vieval 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +31 -31
  2. package/dist/bin/vieval.mjs +1 -1
  3. package/dist/cli/index.d.mts +1 -1
  4. package/dist/cli/index.mjs +1 -1
  5. package/dist/{cli-CHFCF8UR.mjs → cli-uzS81IPd.mjs} +1529 -1529
  6. package/dist/cli-uzS81IPd.mjs.map +1 -0
  7. package/dist/config.d.mts +1 -1
  8. package/dist/core/assertions/index.d.mts +156 -156
  9. package/dist/core/assertions/index.mjs +82 -82
  10. package/dist/core/assertions/index.mjs.map +1 -1
  11. package/dist/core/inference-executors/index.d.mts +37 -37
  12. package/dist/core/inference-executors/index.mjs +53 -52
  13. package/dist/core/inference-executors/index.mjs.map +1 -1
  14. package/dist/core/processors/results/index.d.mts +18 -18
  15. package/dist/core/processors/results/index.mjs.map +1 -1
  16. package/dist/core/runner/index.d.mts +2 -2
  17. package/dist/core/runner/index.mjs +258 -258
  18. package/dist/core/runner/index.mjs.map +1 -1
  19. package/dist/core/scheduler/index.d.mts +1 -1
  20. package/dist/core/scheduler/index.mjs +64 -64
  21. package/dist/core/scheduler/index.mjs.map +1 -1
  22. package/dist/{env-bRH0K6fU.d.mts → env-Br6jaWGL.d.mts} +9 -9
  23. package/dist/{env-BVYeJhGA.mjs → env-egxaJtNn.mjs} +8 -8
  24. package/dist/env-egxaJtNn.mjs.map +1 -0
  25. package/dist/{expect-extensions-Mf1sMNBv.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
  26. package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
  27. package/dist/expect.mjs +1 -1
  28. package/dist/{index-CwKBlCG9.d.mts → index-BLIlhiWT.d.mts} +565 -565
  29. package/dist/{index-Be5I1ZJL.d.mts → index-CIaJClcC.d.mts} +48 -48
  30. package/dist/index.d.mts +207 -195
  31. package/dist/index.mjs +147 -147
  32. package/dist/index.mjs.map +1 -1
  33. package/dist/models-CaCOUPZw.mjs.map +1 -1
  34. package/dist/plugins/chat-models/index.d.mts +279 -279
  35. package/dist/plugins/chat-models/index.mjs +359 -359
  36. package/dist/plugins/chat-models/index.mjs.map +1 -1
  37. package/dist/{registry-BSyjwZFx.mjs → registry-BK7k6X81.mjs} +293 -293
  38. package/dist/registry-BK7k6X81.mjs.map +1 -0
  39. package/dist/testing/expect-extensions.d.mts +27 -27
  40. package/dist/testing/expect-extensions.mjs +1 -1
  41. package/package.json +3 -3
  42. package/dist/cli-CHFCF8UR.mjs.map +0 -1
  43. package/dist/env-BVYeJhGA.mjs.map +0 -1
  44. package/dist/expect-extensions-Mf1sMNBv.mjs.map +0 -1
  45. package/dist/registry-BSyjwZFx.mjs.map +0 -1
@@ -1,48 +1,40 @@
1
1
  //#region src/core/assertions/index.ts
2
2
  /**
3
- * Normalizes text for matching.
4
- *
5
- * Before: `" Hello\nWorld "`
6
- * After: `"hello world"`
3
+ * Returns failing assertion outcomes in original order.
7
4
  */
8
- function normalizeMatchText(value, caseSensitive) {
9
- const compactedWhitespace = value.trim().replaceAll(/\s+/g, " ");
10
- if (caseSensitive) return compactedWhitespace;
11
- return compactedWhitespace.toLowerCase();
12
- }
13
- function clampScore(score) {
14
- if (Number.isNaN(score)) return 0;
15
- if (score < 0) return 0;
16
- if (score > 1) return 1;
17
- return score;
5
+ function collectFailedAssertions(outcomes) {
6
+ return outcomes.filter((outcome) => !outcome.pass);
18
7
  }
19
- function createOutcome(id, scoreKind, pass, score, reason) {
20
- return {
21
- id,
22
- pass,
23
- reason,
24
- score: clampScore(score),
25
- scoreKind
8
+ /**
9
+ * Executes assertion list and returns all outcomes.
10
+ *
11
+ * Call stack:
12
+ *
13
+ * {@link evaluateAssertions}
14
+ * -> `assertion(context)`
15
+ * -> {@link AssertionOutcome}[]
16
+ */
17
+ async function evaluateAssertions(assertions, context) {
18
+ const normalizedContext = {
19
+ state: context.state ?? /* @__PURE__ */ new Map(),
20
+ structuredOutput: context.structuredOutput,
21
+ text: context.text,
22
+ toolCalls: context.toolCalls
26
23
  };
24
+ const outcomes = [];
25
+ for (const assertion of assertions) outcomes.push(await assertion(normalizedContext));
26
+ return outcomes;
27
27
  }
28
28
  /**
29
- * Creates an assertion that requires specific keywords in model text.
29
+ * Creates a custom assertion with fully user-defined logic.
30
30
  *
31
31
  * Example:
32
- * `expectMustInclude({ id: 'tone', keywords: ['calm', 'move'] })`
32
+ * `expectCustom({ id: 'stateful-window', scoreKind: 'exact', evaluate: (ctx) => ... })`
33
33
  */
34
- function expectMustInclude(options) {
34
+ function expectCustom(options) {
35
35
  return async (context) => {
36
- if (options.keywords.length === 0) return createOutcome(options.id, "exact", true, 1, "No required keywords configured.");
37
- const caseSensitive = options.caseSensitive ?? false;
38
- const normalizedText = normalizeMatchText(context.text, caseSensitive);
39
- const matches = options.keywords.filter((keyword) => {
40
- const normalizedKeyword = normalizeMatchText(keyword, caseSensitive);
41
- return normalizedText.includes(normalizedKeyword);
42
- });
43
- const pass = (options.mode ?? "all") === "all" ? matches.length === options.keywords.length : matches.length > 0;
44
- const score = options.keywords.length === 0 ? 1 : matches.length / options.keywords.length;
45
- return createOutcome(options.id, "exact", pass, score, pass ? `Matched ${matches.length}/${options.keywords.length} required keywords.` : `Matched ${matches.length}/${options.keywords.length} required keywords.`);
36
+ const result = await options.evaluate(context);
37
+ return createOutcome(options.id, options.scoreKind, result.pass, result.score, result.reason);
46
38
  };
47
39
  }
48
40
  /**
@@ -66,41 +58,47 @@ function expectMustExclude(options) {
66
58
  };
67
59
  }
68
60
  /**
69
- * Creates an assertion based on a regular expression.
61
+ * Creates an assertion that requires specific keywords in model text.
70
62
  *
71
63
  * Example:
72
- * `expectRegex({ id: 'starts-with-act', pattern: /^<\|ACT:/ })`
64
+ * `expectMustInclude({ id: 'tone', keywords: ['calm', 'move'] })`
73
65
  */
74
- function expectRegex(options) {
66
+ function expectMustInclude(options) {
75
67
  return async (context) => {
76
- const pass = options.pattern.test(context.text);
77
- return createOutcome(options.id, "exact", pass, pass ? 1 : 0, pass ? "Regex matched response text." : `Regex did not match: ${options.pattern}`);
68
+ if (options.keywords.length === 0) return createOutcome(options.id, "exact", true, 1, "No required keywords configured.");
69
+ const caseSensitive = options.caseSensitive ?? false;
70
+ const normalizedText = normalizeMatchText(context.text, caseSensitive);
71
+ const matches = options.keywords.filter((keyword) => {
72
+ const normalizedKeyword = normalizeMatchText(keyword, caseSensitive);
73
+ return normalizedText.includes(normalizedKeyword);
74
+ });
75
+ const pass = (options.mode ?? "all") === "all" ? matches.length === options.keywords.length : matches.length > 0;
76
+ const score = options.keywords.length === 0 ? 1 : matches.length / options.keywords.length;
77
+ return createOutcome(options.id, "exact", pass, score, pass ? `Matched ${matches.length}/${options.keywords.length} required keywords.` : `Matched ${matches.length}/${options.keywords.length} required keywords.`);
78
78
  };
79
79
  }
80
80
  /**
81
- * Creates an assertion for structured model output.
81
+ * Creates an inverse assertion.
82
82
  *
83
83
  * Example:
84
- * `expectStructuredOutput({ id: 'json-shape', validate: isMySchema })`
84
+ * `expectNot(expectMustInclude({ id: 'contains-engine-word', keywords: ['bestmove'] }), { id: 'no-engine-word' })`
85
85
  */
86
- function expectStructuredOutput(options) {
86
+ function expectNot(assertion, options) {
87
87
  return async (context) => {
88
- const pass = options.validate(context.structuredOutput);
89
- return createOutcome(options.id, "exact", pass, pass ? 1 : 0, pass ? "Structured output matched validator." : options.failureReason ?? "Structured output validation failed.");
88
+ const baseOutcome = await assertion(context);
89
+ return createOutcome(options.id, baseOutcome.scoreKind, !baseOutcome.pass, 1 - baseOutcome.score, `NOT(${baseOutcome.id}): ${baseOutcome.reason}`);
90
90
  };
91
91
  }
92
92
  /**
93
- * Creates an assertion for validating tool-call arguments.
93
+ * Creates an assertion based on a regular expression.
94
94
  *
95
95
  * Example:
96
- * `expectToolCallArgs({ id: 'spark-command-shape', toolName: 'builtIn_sparkCommand', validate: isSparkArgs })`
96
+ * `expectRegex({ id: 'starts-with-act', pattern: /^<\|ACT:/ })`
97
97
  */
98
- function expectToolCallArgs(options) {
98
+ function expectRegex(options) {
99
99
  return async (context) => {
100
- const targetCall = (context.toolCalls ?? []).find((call) => call.name === options.toolName);
101
- if (targetCall == null) return createOutcome(options.id, "exact", false, 0, `Missing tool call: ${options.toolName}`);
102
- const pass = options.validate(targetCall.args);
103
- return createOutcome(options.id, "exact", pass, pass ? 1 : 0, pass ? `Tool call args validated for ${options.toolName}.` : `Tool call args validation failed for ${options.toolName}.`);
100
+ const pass = options.pattern.test(context.text);
101
+ return createOutcome(options.id, "exact", pass, pass ? 1 : 0, pass ? "Regex matched response text." : `Regex did not match: ${options.pattern}`);
104
102
  };
105
103
  }
106
104
  /**
@@ -119,48 +117,41 @@ function expectRubric(options) {
119
117
  };
120
118
  }
121
119
  /**
122
- * Creates a custom assertion with fully user-defined logic.
120
+ * Creates an assertion for structured model output.
123
121
  *
124
122
  * Example:
125
- * `expectCustom({ id: 'stateful-window', scoreKind: 'exact', evaluate: (ctx) => ... })`
123
+ * `expectStructuredOutput({ id: 'json-shape', validate: isMySchema })`
126
124
  */
127
- function expectCustom(options) {
125
+ function expectStructuredOutput(options) {
128
126
  return async (context) => {
129
- const result = await options.evaluate(context);
130
- return createOutcome(options.id, options.scoreKind, result.pass, result.score, result.reason);
127
+ const pass = options.validate(context.structuredOutput);
128
+ return createOutcome(options.id, "exact", pass, pass ? 1 : 0, pass ? "Structured output matched validator." : options.failureReason ?? "Structured output validation failed.");
131
129
  };
132
130
  }
133
131
  /**
134
- * Creates an inverse assertion.
132
+ * Creates an assertion for validating tool-call arguments.
135
133
  *
136
134
  * Example:
137
- * `expectNot(expectMustInclude({ id: 'contains-engine-word', keywords: ['bestmove'] }), { id: 'no-engine-word' })`
135
+ * `expectToolCallArgs({ id: 'spark-command-shape', toolName: 'builtIn_sparkCommand', validate: isSparkArgs })`
138
136
  */
139
- function expectNot(assertion, options) {
137
+ function expectToolCallArgs(options) {
140
138
  return async (context) => {
141
- const baseOutcome = await assertion(context);
142
- return createOutcome(options.id, baseOutcome.scoreKind, !baseOutcome.pass, 1 - baseOutcome.score, `NOT(${baseOutcome.id}): ${baseOutcome.reason}`);
139
+ const targetCall = (context.toolCalls ?? []).find((call) => call.name === options.toolName);
140
+ if (targetCall == null) return createOutcome(options.id, "exact", false, 0, `Missing tool call: ${options.toolName}`);
141
+ const pass = options.validate(targetCall.args);
142
+ return createOutcome(options.id, "exact", pass, pass ? 1 : 0, pass ? `Tool call args validated for ${options.toolName}.` : `Tool call args validation failed for ${options.toolName}.`);
143
143
  };
144
144
  }
145
145
  /**
146
- * Executes assertion list and returns all outcomes.
147
- *
148
- * Call stack:
146
+ * Normalizes text for matching.
149
147
  *
150
- * {@link evaluateAssertions}
151
- * -> `assertion(context)`
152
- * -> {@link AssertionOutcome}[]
148
+ * Before: `" Hello\nWorld "`
149
+ * After: `"hello world"`
153
150
  */
154
- async function evaluateAssertions(assertions, context) {
155
- const normalizedContext = {
156
- state: context.state ?? /* @__PURE__ */ new Map(),
157
- structuredOutput: context.structuredOutput,
158
- text: context.text,
159
- toolCalls: context.toolCalls
160
- };
161
- const outcomes = [];
162
- for (const assertion of assertions) outcomes.push(await assertion(normalizedContext));
163
- return outcomes;
151
+ function normalizeMatchText(value, caseSensitive) {
152
+ const compactedWhitespace = value.trim().replaceAll(/\s+/g, " ");
153
+ if (caseSensitive) return compactedWhitespace;
154
+ return compactedWhitespace.toLowerCase();
164
155
  }
165
156
  /**
166
157
  * Converts assertion outcomes to run-score tuples consumed by aggregation.
@@ -171,11 +162,20 @@ function toRunScores(outcomes) {
171
162
  score: outcome.score
172
163
  }));
173
164
  }
174
- /**
175
- * Returns failing assertion outcomes in original order.
176
- */
177
- function collectFailedAssertions(outcomes) {
178
- return outcomes.filter((outcome) => !outcome.pass);
165
+ function clampScore(score) {
166
+ if (Number.isNaN(score)) return 0;
167
+ if (score < 0) return 0;
168
+ if (score > 1) return 1;
169
+ return score;
170
+ }
171
+ function createOutcome(id, scoreKind, pass, score, reason) {
172
+ return {
173
+ id,
174
+ pass,
175
+ reason,
176
+ score: clampScore(score),
177
+ scoreKind
178
+ };
179
179
  }
180
180
  //#endregion
181
181
  export { collectFailedAssertions, evaluateAssertions, expectCustom, expectMustExclude, expectMustInclude, expectNot, expectRegex, expectRubric, expectStructuredOutput, expectToolCallArgs, normalizeMatchText, toRunScores };
@@ -1 +1 @@
1
- {"version":3,"file":"index.mjs","names":[],"sources":["../../../src/core/assertions/index.ts"],"sourcesContent":["import type { RunScore, RunScoreKind } from '../runner/aggregate'\n\n/**\n * Stores mutable evaluation state for stateful assertion flows.\n *\n * Use when:\n * - assertions need to share counters, rolling metrics, or memoized values\n * - a scenario evaluates multiple steps and expects state-aware checks\n */\nexport type AssertionState = Map<string, unknown>\n\n/**\n * Represents one tool call emitted by a model response.\n */\nexport interface ToolCall {\n /**\n * Tool name used by the call.\n */\n name: string\n /**\n * Tool arguments payload.\n */\n args: unknown\n}\n\n/**\n * Normalized assertion context for one model output.\n */\nexport interface AssertionContext {\n /**\n * Plain text model output used by text assertions.\n */\n text: string\n /**\n * Optional structured output parsed from the model response.\n */\n structuredOutput?: unknown\n /**\n * Optional tool calls extracted from the model response.\n */\n toolCalls?: readonly ToolCall[]\n /**\n * Shared mutable state for stateful assertion measurement.\n */\n state: AssertionState\n}\n\n/**\n * Result for one assertion evaluation.\n */\nexport interface AssertionOutcome {\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Assertion family emitted as run score kind.\n */\n scoreKind: RunScoreKind\n /**\n * Whether the assertion passed.\n */\n pass: boolean\n /**\n * Normalized score in the `0..1` range.\n */\n score: number\n /**\n * Human-readable reason for logs and reports.\n */\n reason: string\n}\n\n/**\n * Async assertion function used by eval scenarios.\n */\nexport type Assertion = (context: AssertionContext) => Promise<AssertionOutcome>\n\n/**\n * Normalizes text for matching.\n *\n * Before: `\" Hello\\nWorld \"`\n * After: `\"hello world\"`\n */\nexport function normalizeMatchText(value: string, caseSensitive: boolean): string {\n const compactedWhitespace = value.trim().replaceAll(/\\s+/g, ' ')\n\n if (caseSensitive) {\n return compactedWhitespace\n }\n\n return compactedWhitespace.toLowerCase()\n}\n\nfunction clampScore(score: number): number {\n if (Number.isNaN(score)) {\n return 0\n }\n\n if (score < 0) {\n return 0\n }\n\n if (score > 1) {\n return 1\n }\n\n return score\n}\n\nfunction createOutcome(\n id: string,\n scoreKind: RunScoreKind,\n pass: boolean,\n score: number,\n reason: string,\n): AssertionOutcome {\n return {\n id,\n pass,\n reason,\n score: clampScore(score),\n scoreKind,\n }\n}\n\n/**\n * Options for include-keyword assertions.\n */\nexport interface MustIncludeAssertionOptions {\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Keywords that must be present.\n */\n keywords: readonly string[]\n /**\n * Match mode for keywords.\n *\n * @default 'all'\n */\n mode?: 'all' | 'any'\n /**\n * Case-sensitive matching toggle.\n *\n * @default false\n */\n caseSensitive?: boolean\n}\n\n/**\n * Creates an assertion that requires specific keywords in model text.\n *\n * Example:\n * `expectMustInclude({ id: 'tone', keywords: ['calm', 'move'] })`\n */\nexport function expectMustInclude(options: MustIncludeAssertionOptions): Assertion {\n return async (context) => {\n if (options.keywords.length === 0) {\n return createOutcome(options.id, 'exact', true, 1, 'No required keywords configured.')\n }\n\n const caseSensitive = options.caseSensitive ?? false\n const normalizedText = normalizeMatchText(context.text, caseSensitive)\n const matches = options.keywords.filter((keyword) => {\n const normalizedKeyword = normalizeMatchText(keyword, caseSensitive)\n return normalizedText.includes(normalizedKeyword)\n })\n\n const mode = options.mode ?? 'all'\n const pass = mode === 'all'\n ? matches.length === options.keywords.length\n : matches.length > 0\n\n const score = options.keywords.length === 0 ? 1 : matches.length / options.keywords.length\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n score,\n pass\n ? `Matched ${matches.length}/${options.keywords.length} required keywords.`\n : `Matched ${matches.length}/${options.keywords.length} required keywords.`,\n )\n }\n}\n\n/**\n * Options for exclude-keyword assertions.\n */\nexport interface MustExcludeAssertionOptions {\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Keywords that must not appear.\n */\n keywords: readonly string[]\n /**\n * Case-sensitive matching toggle.\n *\n * @default false\n */\n caseSensitive?: boolean\n}\n\n/**\n * Creates an assertion that forbids specific keywords.\n *\n * Example:\n * `expectMustExclude({ id: 'no-engine-dump', keywords: ['bestmove', 'ponder'] })`\n */\nexport function expectMustExclude(options: MustExcludeAssertionOptions): Assertion {\n return async (context) => {\n if (options.keywords.length === 0) {\n return createOutcome(options.id, 'exact', true, 1, 'No excluded keywords configured.')\n }\n\n const caseSensitive = options.caseSensitive ?? false\n const normalizedText = normalizeMatchText(context.text, caseSensitive)\n const forbiddenMatches = options.keywords.filter((keyword) => {\n const normalizedKeyword = normalizeMatchText(keyword, caseSensitive)\n return normalizedText.includes(normalizedKeyword)\n })\n\n const pass = forbiddenMatches.length === 0\n const score = pass ? 1 : 0\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n score,\n pass\n ? 'No forbidden keywords found.'\n : `Forbidden keywords found: ${forbiddenMatches.join(', ')}`,\n )\n }\n}\n\n/**\n * Options for regular-expression assertions.\n */\nexport interface RegexAssertionOptions {\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Pattern to apply to model text.\n */\n pattern: RegExp\n}\n\n/**\n * Creates an assertion based on a regular expression.\n *\n * Example:\n * `expectRegex({ id: 'starts-with-act', pattern: /^<\\|ACT:/ })`\n */\nexport function expectRegex(options: RegexAssertionOptions): Assertion {\n return async (context) => {\n const pass = options.pattern.test(context.text)\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n pass ? 1 : 0,\n pass ? 'Regex matched response text.' : `Regex did not match: ${options.pattern}`,\n )\n }\n}\n\n/**\n * Options for structured-output assertions.\n */\nexport interface StructuredOutputAssertionOptions<TValue> {\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Runtime validator for structured output.\n */\n validate: (value: unknown) => value is TValue\n /**\n * Optional failure reason.\n */\n failureReason?: string\n}\n\n/**\n * Creates an assertion for structured model output.\n *\n * Example:\n * `expectStructuredOutput({ id: 'json-shape', validate: isMySchema })`\n */\nexport function expectStructuredOutput<TValue>(options: StructuredOutputAssertionOptions<TValue>): Assertion {\n return async (context) => {\n const pass = options.validate(context.structuredOutput)\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n pass ? 1 : 0,\n pass ? 'Structured output matched validator.' : (options.failureReason ?? 'Structured output validation failed.'),\n )\n }\n}\n\n/**\n * Options for tool-call argument assertions.\n */\nexport interface ToolCallArgsAssertionOptions {\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Tool name to inspect.\n */\n toolName: string\n /**\n * Runtime validator for tool arguments.\n */\n validate: (args: unknown) => boolean\n}\n\n/**\n * Creates an assertion for validating tool-call arguments.\n *\n * Example:\n * `expectToolCallArgs({ id: 'spark-command-shape', toolName: 'builtIn_sparkCommand', validate: isSparkArgs })`\n */\nexport function expectToolCallArgs(options: ToolCallArgsAssertionOptions): Assertion {\n return async (context) => {\n const targetCall = (context.toolCalls ?? []).find(call => call.name === options.toolName)\n\n if (targetCall == null) {\n return createOutcome(options.id, 'exact', false, 0, `Missing tool call: ${options.toolName}`)\n }\n\n const pass = options.validate(targetCall.args)\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n pass ? 1 : 0,\n pass ? `Tool call args validated for ${options.toolName}.` : `Tool call args validation failed for ${options.toolName}.`,\n )\n }\n}\n\n/**\n * Rubric judge result returned by teacher-model or rubric logic.\n */\nexport interface RubricJudgeResult {\n /**\n * Normalized score in the `0..1` range.\n */\n score: number\n /**\n * Judge explanation text.\n */\n reason: string\n /**\n * Optional judge model id.\n */\n judgeModel?: string\n}\n\n/**\n * Options for rubric assertions.\n */\nexport interface RubricAssertionOptions {\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Async rubric judge callback.\n */\n judge: (context: AssertionContext) => Promise<RubricJudgeResult>\n /**\n * Minimum passing score.\n *\n * @default 0.7\n */\n minScore?: number\n}\n\n/**\n * Creates a rubric assertion driven by teacher-model style scoring.\n *\n * Example:\n * `expectRubric({ id: 'human-like-tone', judge: judgeFn, minScore: 0.8 })`\n */\nexport function expectRubric(options: RubricAssertionOptions): Assertion {\n return async (context) => {\n const result = await options.judge(context)\n const minScore = options.minScore ?? 0.7\n const normalizedScore = clampScore(result.score)\n const pass = normalizedScore >= minScore\n\n return createOutcome(\n options.id,\n 'judge',\n pass,\n normalizedScore,\n `${result.reason}${result.judgeModel ? ` (judge: ${result.judgeModel})` : ''}`,\n )\n }\n}\n\n/**\n * Options for custom assertions.\n */\nexport interface CustomAssertionOptions {\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Score family emitted by this custom assertion.\n */\n scoreKind: RunScoreKind\n /**\n * Custom evaluator callback.\n */\n evaluate: (context: AssertionContext) => Promise<{ pass: boolean, reason: string, score: number }> | { pass: boolean, reason: string, score: number }\n}\n\n/**\n * Creates a custom assertion with fully user-defined logic.\n *\n * Example:\n * `expectCustom({ id: 'stateful-window', scoreKind: 'exact', evaluate: (ctx) => ... })`\n */\nexport function expectCustom(options: CustomAssertionOptions): Assertion {\n return async (context) => {\n const result = await options.evaluate(context)\n\n return createOutcome(options.id, options.scoreKind, result.pass, result.score, result.reason)\n }\n}\n\n/**\n * Creates an inverse assertion.\n *\n * Example:\n * `expectNot(expectMustInclude({ id: 'contains-engine-word', keywords: ['bestmove'] }), { id: 'no-engine-word' })`\n */\nexport function expectNot(assertion: Assertion, options: { id: string }): Assertion {\n return async (context) => {\n const baseOutcome = await assertion(context)\n\n return createOutcome(\n options.id,\n baseOutcome.scoreKind,\n !baseOutcome.pass,\n 1 - baseOutcome.score,\n `NOT(${baseOutcome.id}): ${baseOutcome.reason}`,\n )\n }\n}\n\n/**\n * Executes assertion list and returns all outcomes.\n *\n * Call stack:\n *\n * {@link evaluateAssertions}\n * -> `assertion(context)`\n * -> {@link AssertionOutcome}[]\n */\nexport async function evaluateAssertions(\n assertions: readonly Assertion[],\n context: Omit<AssertionContext, 'state'> & { state?: AssertionState },\n): Promise<AssertionOutcome[]> {\n const state = context.state ?? new Map<string, unknown>()\n const normalizedContext: AssertionContext = {\n state,\n structuredOutput: context.structuredOutput,\n text: context.text,\n toolCalls: context.toolCalls,\n }\n\n const outcomes: AssertionOutcome[] = []\n\n for (const assertion of assertions) {\n outcomes.push(await assertion(normalizedContext))\n }\n\n return outcomes\n}\n\n/**\n * Converts assertion outcomes to run-score tuples consumed by aggregation.\n */\nexport function toRunScores(outcomes: readonly AssertionOutcome[]): RunScore[] {\n return outcomes.map(outcome => ({\n kind: outcome.scoreKind,\n score: outcome.score,\n }))\n}\n\n/**\n * Returns failing assertion outcomes in original order.\n */\nexport function collectFailedAssertions(outcomes: readonly AssertionOutcome[]): AssertionOutcome[] {\n return outcomes.filter(outcome => !outcome.pass)\n}\n"],"mappings":";;;;;;;AAoFA,SAAgB,mBAAmB,OAAe,eAAgC;CAChF,MAAM,sBAAsB,MAAM,KAAK,CAAC,CAAC,WAAW,QAAQ,GAAG;CAE/D,IAAI,eACF,OAAO;CAGT,OAAO,oBAAoB,YAAY;AACzC;AAEA,SAAS,WAAW,OAAuB;CACzC,IAAI,OAAO,MAAM,KAAK,GACpB,OAAO;CAGT,IAAI,QAAQ,GACV,OAAO;CAGT,IAAI,QAAQ,GACV,OAAO;CAGT,OAAO;AACT;AAEA,SAAS,cACP,IACA,WACA,MACA,OACA,QACkB;CAClB,OAAO;EACL;EACA;EACA;EACA,OAAO,WAAW,KAAK;EACvB;CACF;AACF;;;;;;;AAkCA,SAAgB,kBAAkB,SAAiD;CACjF,OAAO,OAAO,YAAY;EACxB,IAAI,QAAQ,SAAS,WAAW,GAC9B,OAAO,cAAc,QAAQ,IAAI,SAAS,MAAM,GAAG,kCAAkC;EAGvF,MAAM,gBAAgB,QAAQ,iBAAiB;EAC/C,MAAM,iBAAiB,mBAAmB,QAAQ,MAAM,aAAa;EACrE,MAAM,UAAU,QAAQ,SAAS,QAAQ,YAAY;GACnD,MAAM,oBAAoB,mBAAmB,SAAS,aAAa;GACnE,OAAO,eAAe,SAAS,iBAAiB;EAClD,CAAC;EAGD,MAAM,QADO,QAAQ,QAAQ,WACP,QAClB,QAAQ,WAAW,QAAQ,SAAS,SACpC,QAAQ,SAAS;EAErB,MAAM,QAAQ,QAAQ,SAAS,WAAW,IAAI,IAAI,QAAQ,SAAS,QAAQ,SAAS;EAEpF,OAAO,cACL,QAAQ,IACR,SACA,MACA,OACA,OACI,WAAW,QAAQ,OAAO,GAAG,QAAQ,SAAS,OAAO,uBACrD,WAAW,QAAQ,OAAO,GAAG,QAAQ,SAAS,OAAO,oBAC3D;CACF;AACF;;;;;;;AA4BA,SAAgB,kBAAkB,SAAiD;CACjF,OAAO,OAAO,YAAY;EACxB,IAAI,QAAQ,SAAS,WAAW,GAC9B,OAAO,cAAc,QAAQ,IAAI,SAAS,MAAM,GAAG,kCAAkC;EAGvF,MAAM,gBAAgB,QAAQ,iBAAiB;EAC/C,MAAM,iBAAiB,mBAAmB,QAAQ,MAAM,aAAa;EACrE,MAAM,mBAAmB,QAAQ,SAAS,QAAQ,YAAY;GAC5D,MAAM,oBAAoB,mBAAmB,SAAS,aAAa;GACnE,OAAO,eAAe,SAAS,iBAAiB;EAClD,CAAC;EAED,MAAM,OAAO,iBAAiB,WAAW;EACzC,MAAM,QAAQ,OAAO,IAAI;EAEzB,OAAO,cACL,QAAQ,IACR,SACA,MACA,OACA,OACI,iCACA,6BAA6B,iBAAiB,KAAK,IAAI,GAC7D;CACF;AACF;;;;;;;AAsBA,SAAgB,YAAY,SAA2C;CACrE,OAAO,OAAO,YAAY;EACxB,MAAM,OAAO,QAAQ,QAAQ,KAAK,QAAQ,IAAI;EAE9C,OAAO,cACL,QAAQ,IACR,SACA,MACA,OAAO,IAAI,GACX,OAAO,iCAAiC,wBAAwB,QAAQ,SAC1E;CACF;AACF;;;;;;;AA0BA,SAAgB,uBAA+B,SAA8D;CAC3G,OAAO,OAAO,YAAY;EACxB,MAAM,OAAO,QAAQ,SAAS,QAAQ,gBAAgB;EAEtD,OAAO,cACL,QAAQ,IACR,SACA,MACA,OAAO,IAAI,GACX,OAAO,yCAA0C,QAAQ,iBAAiB,sCAC5E;CACF;AACF;;;;;;;AA0BA,SAAgB,mBAAmB,SAAkD;CACnF,OAAO,OAAO,YAAY;EACxB,MAAM,cAAc,QAAQ,aAAa,CAAC,EAAA,CAAG,MAAK,SAAQ,KAAK,SAAS,QAAQ,QAAQ;EAExF,IAAI,cAAc,MAChB,OAAO,cAAc,QAAQ,IAAI,SAAS,OAAO,GAAG,sBAAsB,QAAQ,UAAU;EAG9F,MAAM,OAAO,QAAQ,SAAS,WAAW,IAAI;EAE7C,OAAO,cACL,QAAQ,IACR,SACA,MACA,OAAO,IAAI,GACX,OAAO,gCAAgC,QAAQ,SAAS,KAAK,wCAAwC,QAAQ,SAAS,EACxH;CACF;AACF;;;;;;;AA8CA,SAAgB,aAAa,SAA4C;CACvE,OAAO,OAAO,YAAY;EACxB,MAAM,SAAS,MAAM,QAAQ,MAAM,OAAO;EAC1C,MAAM,WAAW,QAAQ,YAAY;EACrC,MAAM,kBAAkB,WAAW,OAAO,KAAK;EAC/C,MAAM,OAAO,mBAAmB;EAEhC,OAAO,cACL,QAAQ,IACR,SACA,MACA,iBACA,GAAG,OAAO,SAAS,OAAO,aAAa,YAAY,OAAO,WAAW,KAAK,IAC5E;CACF;AACF;;;;;;;AA0BA,SAAgB,aAAa,SAA4C;CACvE,OAAO,OAAO,YAAY;EACxB,MAAM,SAAS,MAAM,QAAQ,SAAS,OAAO;EAE7C,OAAO,cAAc,QAAQ,IAAI,QAAQ,WAAW,OAAO,MAAM,OAAO,OAAO,OAAO,MAAM;CAC9F;AACF;;;;;;;AAQA,SAAgB,UAAU,WAAsB,SAAoC;CAClF,OAAO,OAAO,YAAY;EACxB,MAAM,cAAc,MAAM,UAAU,OAAO;EAE3C,OAAO,cACL,QAAQ,IACR,YAAY,WACZ,CAAC,YAAY,MACb,IAAI,YAAY,OAChB,OAAO,YAAY,GAAG,KAAK,YAAY,QACzC;CACF;AACF;;;;;;;;;;AAWA,eAAsB,mBACpB,YACA,SAC6B;CAE7B,MAAM,oBAAsC;EAC1C,OAFY,QAAQ,yBAAS,IAAI,IAAqB;EAGtD,kBAAkB,QAAQ;EAC1B,MAAM,QAAQ;EACd,WAAW,QAAQ;CACrB;CAEA,MAAM,WAA+B,CAAC;CAEtC,KAAK,MAAM,aAAa,YACtB,SAAS,KAAK,MAAM,UAAU,iBAAiB,CAAC;CAGlD,OAAO;AACT;;;;AAKA,SAAgB,YAAY,UAAmD;CAC7E,OAAO,SAAS,KAAI,aAAY;EAC9B,MAAM,QAAQ;EACd,OAAO,QAAQ;CACjB,EAAE;AACJ;;;;AAKA,SAAgB,wBAAwB,UAA2D;CACjG,OAAO,SAAS,QAAO,YAAW,CAAC,QAAQ,IAAI;AACjD"}
1
+ {"version":3,"file":"index.mjs","names":[],"sources":["../../../src/core/assertions/index.ts"],"sourcesContent":["import type { RunScore, RunScoreKind } from '../runner/aggregate'\n\n/**\n * Async assertion function used by eval scenarios.\n */\nexport type Assertion = (context: AssertionContext) => Promise<AssertionOutcome>\n\n/**\n * Normalized assertion context for one model output.\n */\nexport interface AssertionContext {\n /**\n * Shared mutable state for stateful assertion measurement.\n */\n state: AssertionState\n /**\n * Optional structured output parsed from the model response.\n */\n structuredOutput?: unknown\n /**\n * Plain text model output used by text assertions.\n */\n text: string\n /**\n * Optional tool calls extracted from the model response.\n */\n toolCalls?: readonly ToolCall[]\n}\n\n/**\n * Result for one assertion evaluation.\n */\nexport interface AssertionOutcome {\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Whether the assertion passed.\n */\n pass: boolean\n /**\n * Human-readable reason for logs and reports.\n */\n reason: string\n /**\n * Normalized score in the `0..1` range.\n */\n score: number\n /**\n * Assertion family emitted as run score kind.\n */\n scoreKind: RunScoreKind\n}\n\n/**\n * Stores mutable evaluation state for stateful assertion flows.\n *\n * Use when:\n * - assertions need to share counters, rolling metrics, or memoized values\n * - a scenario evaluates multiple steps and expects state-aware checks\n */\nexport type AssertionState = Map<string, unknown>\n\n/**\n * Options for custom assertions.\n */\nexport interface CustomAssertionOptions {\n /**\n * Custom evaluator callback.\n */\n evaluate: (context: AssertionContext) => Promise<{ pass: boolean, reason: string, score: number }> | { pass: boolean, reason: string, score: number }\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Score family emitted by this custom assertion.\n */\n scoreKind: RunScoreKind\n}\n\n/**\n * Options for exclude-keyword assertions.\n */\nexport interface MustExcludeAssertionOptions {\n /**\n * Case-sensitive matching toggle.\n *\n * @default false\n */\n caseSensitive?: boolean\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Keywords that must not appear.\n */\n keywords: readonly string[]\n}\n\n/**\n * Options for include-keyword assertions.\n */\nexport interface MustIncludeAssertionOptions {\n /**\n * Case-sensitive matching toggle.\n *\n * @default false\n */\n caseSensitive?: boolean\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Keywords that must be present.\n */\n keywords: readonly string[]\n /**\n * Match mode for keywords.\n *\n * @default 'all'\n */\n mode?: 'all' | 'any'\n}\n\n/**\n * Options for regular-expression assertions.\n */\nexport interface RegexAssertionOptions {\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Pattern to apply to model text.\n */\n pattern: RegExp\n}\n\n/**\n * Options for rubric assertions.\n */\nexport interface RubricAssertionOptions {\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Async rubric judge callback.\n */\n judge: (context: AssertionContext) => Promise<RubricJudgeResult>\n /**\n * Minimum passing score.\n *\n * @default 0.7\n */\n minScore?: number\n}\n\n/**\n * Rubric judge result returned by teacher-model or rubric logic.\n */\nexport interface RubricJudgeResult {\n /**\n * Optional judge model id.\n */\n judgeModel?: string\n /**\n * Judge explanation text.\n */\n reason: string\n /**\n * Normalized score in the `0..1` range.\n */\n score: number\n}\n\n/**\n * Options for structured-output assertions.\n */\nexport interface StructuredOutputAssertionOptions<TValue> {\n /**\n * Optional failure reason.\n */\n failureReason?: string\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Runtime validator for structured output.\n */\n validate: (value: unknown) => value is TValue\n}\n\n/**\n * Represents one tool call emitted by a model response.\n */\nexport interface ToolCall {\n /**\n * Tool arguments payload.\n */\n args: unknown\n /**\n * Tool name used by the call.\n */\n name: string\n}\n\n/**\n * Options for tool-call argument assertions.\n */\nexport interface ToolCallArgsAssertionOptions {\n /**\n * Stable assertion id.\n */\n id: string\n /**\n * Tool name to inspect.\n */\n toolName: string\n /**\n * Runtime validator for tool arguments.\n */\n validate: (args: unknown) => boolean\n}\n\n/**\n * Returns failing assertion outcomes in original order.\n */\nexport function collectFailedAssertions(outcomes: readonly AssertionOutcome[]): AssertionOutcome[] {\n return outcomes.filter(outcome => !outcome.pass)\n}\n\n/**\n * Executes assertion list and returns all outcomes.\n *\n * Call stack:\n *\n * {@link evaluateAssertions}\n * -> `assertion(context)`\n * -> {@link AssertionOutcome}[]\n */\nexport async function evaluateAssertions(\n assertions: readonly Assertion[],\n context: Omit<AssertionContext, 'state'> & { state?: AssertionState },\n): Promise<AssertionOutcome[]> {\n const state = context.state ?? new Map<string, unknown>()\n const normalizedContext: AssertionContext = {\n state,\n structuredOutput: context.structuredOutput,\n text: context.text,\n toolCalls: context.toolCalls,\n }\n\n const outcomes: AssertionOutcome[] = []\n\n for (const assertion of assertions) {\n outcomes.push(await assertion(normalizedContext))\n }\n\n return outcomes\n}\n\n/**\n * Creates a custom assertion with fully user-defined logic.\n *\n * Example:\n * `expectCustom({ id: 'stateful-window', scoreKind: 'exact', evaluate: (ctx) => ... })`\n */\nexport function expectCustom(options: CustomAssertionOptions): Assertion {\n return async (context) => {\n const result = await options.evaluate(context)\n\n return createOutcome(options.id, options.scoreKind, result.pass, result.score, result.reason)\n }\n}\n\n/**\n * Creates an assertion that forbids specific keywords.\n *\n * Example:\n * `expectMustExclude({ id: 'no-engine-dump', keywords: ['bestmove', 'ponder'] })`\n */\nexport function expectMustExclude(options: MustExcludeAssertionOptions): Assertion {\n return async (context) => {\n if (options.keywords.length === 0) {\n return createOutcome(options.id, 'exact', true, 1, 'No excluded keywords configured.')\n }\n\n const caseSensitive = options.caseSensitive ?? false\n const normalizedText = normalizeMatchText(context.text, caseSensitive)\n const forbiddenMatches = options.keywords.filter((keyword) => {\n const normalizedKeyword = normalizeMatchText(keyword, caseSensitive)\n return normalizedText.includes(normalizedKeyword)\n })\n\n const pass = forbiddenMatches.length === 0\n const score = pass ? 1 : 0\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n score,\n pass\n ? 'No forbidden keywords found.'\n : `Forbidden keywords found: ${forbiddenMatches.join(', ')}`,\n )\n }\n}\n\n/**\n * Creates an assertion that requires specific keywords in model text.\n *\n * Example:\n * `expectMustInclude({ id: 'tone', keywords: ['calm', 'move'] })`\n */\nexport function expectMustInclude(options: MustIncludeAssertionOptions): Assertion {\n return async (context) => {\n if (options.keywords.length === 0) {\n return createOutcome(options.id, 'exact', true, 1, 'No required keywords configured.')\n }\n\n const caseSensitive = options.caseSensitive ?? false\n const normalizedText = normalizeMatchText(context.text, caseSensitive)\n const matches = options.keywords.filter((keyword) => {\n const normalizedKeyword = normalizeMatchText(keyword, caseSensitive)\n return normalizedText.includes(normalizedKeyword)\n })\n\n const mode = options.mode ?? 'all'\n const pass = mode === 'all'\n ? matches.length === options.keywords.length\n : matches.length > 0\n\n const score = options.keywords.length === 0 ? 1 : matches.length / options.keywords.length\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n score,\n pass\n ? `Matched ${matches.length}/${options.keywords.length} required keywords.`\n : `Matched ${matches.length}/${options.keywords.length} required keywords.`,\n )\n }\n}\n\n/**\n * Creates an inverse assertion.\n *\n * Example:\n * `expectNot(expectMustInclude({ id: 'contains-engine-word', keywords: ['bestmove'] }), { id: 'no-engine-word' })`\n */\nexport function expectNot(assertion: Assertion, options: { id: string }): Assertion {\n return async (context) => {\n const baseOutcome = await assertion(context)\n\n return createOutcome(\n options.id,\n baseOutcome.scoreKind,\n !baseOutcome.pass,\n 1 - baseOutcome.score,\n `NOT(${baseOutcome.id}): ${baseOutcome.reason}`,\n )\n }\n}\n\n/**\n * Creates an assertion based on a regular expression.\n *\n * Example:\n * `expectRegex({ id: 'starts-with-act', pattern: /^<\\|ACT:/ })`\n */\nexport function expectRegex(options: RegexAssertionOptions): Assertion {\n return async (context) => {\n const pass = options.pattern.test(context.text)\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n pass ? 1 : 0,\n pass ? 'Regex matched response text.' : `Regex did not match: ${options.pattern}`,\n )\n }\n}\n\n/**\n * Creates a rubric assertion driven by teacher-model style scoring.\n *\n * Example:\n * `expectRubric({ id: 'human-like-tone', judge: judgeFn, minScore: 0.8 })`\n */\nexport function expectRubric(options: RubricAssertionOptions): Assertion {\n return async (context) => {\n const result = await options.judge(context)\n const minScore = options.minScore ?? 0.7\n const normalizedScore = clampScore(result.score)\n const pass = normalizedScore >= minScore\n\n return createOutcome(\n options.id,\n 'judge',\n pass,\n normalizedScore,\n `${result.reason}${result.judgeModel ? ` (judge: ${result.judgeModel})` : ''}`,\n )\n }\n}\n\n/**\n * Creates an assertion for structured model output.\n *\n * Example:\n * `expectStructuredOutput({ id: 'json-shape', validate: isMySchema })`\n */\nexport function expectStructuredOutput<TValue>(options: StructuredOutputAssertionOptions<TValue>): Assertion {\n return async (context) => {\n const pass = options.validate(context.structuredOutput)\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n pass ? 1 : 0,\n pass ? 'Structured output matched validator.' : (options.failureReason ?? 'Structured output validation failed.'),\n )\n }\n}\n\n/**\n * Creates an assertion for validating tool-call arguments.\n *\n * Example:\n * `expectToolCallArgs({ id: 'spark-command-shape', toolName: 'builtIn_sparkCommand', validate: isSparkArgs })`\n */\nexport function expectToolCallArgs(options: ToolCallArgsAssertionOptions): Assertion {\n return async (context) => {\n const targetCall = (context.toolCalls ?? []).find(call => call.name === options.toolName)\n\n if (targetCall == null) {\n return createOutcome(options.id, 'exact', false, 0, `Missing tool call: ${options.toolName}`)\n }\n\n const pass = options.validate(targetCall.args)\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n pass ? 1 : 0,\n pass ? `Tool call args validated for ${options.toolName}.` : `Tool call args validation failed for ${options.toolName}.`,\n )\n }\n}\n\n/**\n * Normalizes text for matching.\n *\n * Before: `\" Hello\\nWorld \"`\n * After: `\"hello world\"`\n */\nexport function normalizeMatchText(value: string, caseSensitive: boolean): string {\n const compactedWhitespace = value.trim().replaceAll(/\\s+/g, ' ')\n\n if (caseSensitive) {\n return compactedWhitespace\n }\n\n return compactedWhitespace.toLowerCase()\n}\n\n/**\n * Converts assertion outcomes to run-score tuples consumed by aggregation.\n */\nexport function toRunScores(outcomes: readonly AssertionOutcome[]): RunScore[] {\n return outcomes.map(outcome => ({\n kind: outcome.scoreKind,\n score: outcome.score,\n }))\n}\n\nfunction clampScore(score: number): number {\n if (Number.isNaN(score)) {\n return 0\n }\n\n if (score < 0) {\n return 0\n }\n\n if (score > 1) {\n return 1\n }\n\n return score\n}\n\nfunction createOutcome(\n id: string,\n scoreKind: RunScoreKind,\n pass: boolean,\n score: number,\n reason: string,\n): AssertionOutcome {\n return {\n id,\n pass,\n reason,\n score: clampScore(score),\n scoreKind,\n }\n}\n"],"mappings":";;;;AAyOA,SAAgB,wBAAwB,UAA2D;CACjG,OAAO,SAAS,QAAO,YAAW,CAAC,QAAQ,IAAI;AACjD;;;;;;;;;;AAWA,eAAsB,mBACpB,YACA,SAC6B;CAE7B,MAAM,oBAAsC;EAC1C,OAFY,QAAQ,yBAAS,IAAI,IAAqB;EAGtD,kBAAkB,QAAQ;EAC1B,MAAM,QAAQ;EACd,WAAW,QAAQ;CACrB;CAEA,MAAM,WAA+B,CAAC;CAEtC,KAAK,MAAM,aAAa,YACtB,SAAS,KAAK,MAAM,UAAU,iBAAiB,CAAC;CAGlD,OAAO;AACT;;;;;;;AAQA,SAAgB,aAAa,SAA4C;CACvE,OAAO,OAAO,YAAY;EACxB,MAAM,SAAS,MAAM,QAAQ,SAAS,OAAO;EAE7C,OAAO,cAAc,QAAQ,IAAI,QAAQ,WAAW,OAAO,MAAM,OAAO,OAAO,OAAO,MAAM;CAC9F;AACF;;;;;;;AAQA,SAAgB,kBAAkB,SAAiD;CACjF,OAAO,OAAO,YAAY;EACxB,IAAI,QAAQ,SAAS,WAAW,GAC9B,OAAO,cAAc,QAAQ,IAAI,SAAS,MAAM,GAAG,kCAAkC;EAGvF,MAAM,gBAAgB,QAAQ,iBAAiB;EAC/C,MAAM,iBAAiB,mBAAmB,QAAQ,MAAM,aAAa;EACrE,MAAM,mBAAmB,QAAQ,SAAS,QAAQ,YAAY;GAC5D,MAAM,oBAAoB,mBAAmB,SAAS,aAAa;GACnE,OAAO,eAAe,SAAS,iBAAiB;EAClD,CAAC;EAED,MAAM,OAAO,iBAAiB,WAAW;EACzC,MAAM,QAAQ,OAAO,IAAI;EAEzB,OAAO,cACL,QAAQ,IACR,SACA,MACA,OACA,OACI,iCACA,6BAA6B,iBAAiB,KAAK,IAAI,GAC7D;CACF;AACF;;;;;;;AAQA,SAAgB,kBAAkB,SAAiD;CACjF,OAAO,OAAO,YAAY;EACxB,IAAI,QAAQ,SAAS,WAAW,GAC9B,OAAO,cAAc,QAAQ,IAAI,SAAS,MAAM,GAAG,kCAAkC;EAGvF,MAAM,gBAAgB,QAAQ,iBAAiB;EAC/C,MAAM,iBAAiB,mBAAmB,QAAQ,MAAM,aAAa;EACrE,MAAM,UAAU,QAAQ,SAAS,QAAQ,YAAY;GACnD,MAAM,oBAAoB,mBAAmB,SAAS,aAAa;GACnE,OAAO,eAAe,SAAS,iBAAiB;EAClD,CAAC;EAGD,MAAM,QADO,QAAQ,QAAQ,WACP,QAClB,QAAQ,WAAW,QAAQ,SAAS,SACpC,QAAQ,SAAS;EAErB,MAAM,QAAQ,QAAQ,SAAS,WAAW,IAAI,IAAI,QAAQ,SAAS,QAAQ,SAAS;EAEpF,OAAO,cACL,QAAQ,IACR,SACA,MACA,OACA,OACI,WAAW,QAAQ,OAAO,GAAG,QAAQ,SAAS,OAAO,uBACrD,WAAW,QAAQ,OAAO,GAAG,QAAQ,SAAS,OAAO,oBAC3D;CACF;AACF;;;;;;;AAQA,SAAgB,UAAU,WAAsB,SAAoC;CAClF,OAAO,OAAO,YAAY;EACxB,MAAM,cAAc,MAAM,UAAU,OAAO;EAE3C,OAAO,cACL,QAAQ,IACR,YAAY,WACZ,CAAC,YAAY,MACb,IAAI,YAAY,OAChB,OAAO,YAAY,GAAG,KAAK,YAAY,QACzC;CACF;AACF;;;;;;;AAQA,SAAgB,YAAY,SAA2C;CACrE,OAAO,OAAO,YAAY;EACxB,MAAM,OAAO,QAAQ,QAAQ,KAAK,QAAQ,IAAI;EAE9C,OAAO,cACL,QAAQ,IACR,SACA,MACA,OAAO,IAAI,GACX,OAAO,iCAAiC,wBAAwB,QAAQ,SAC1E;CACF;AACF;;;;;;;AAQA,SAAgB,aAAa,SAA4C;CACvE,OAAO,OAAO,YAAY;EACxB,MAAM,SAAS,MAAM,QAAQ,MAAM,OAAO;EAC1C,MAAM,WAAW,QAAQ,YAAY;EACrC,MAAM,kBAAkB,WAAW,OAAO,KAAK;EAC/C,MAAM,OAAO,mBAAmB;EAEhC,OAAO,cACL,QAAQ,IACR,SACA,MACA,iBACA,GAAG,OAAO,SAAS,OAAO,aAAa,YAAY,OAAO,WAAW,KAAK,IAC5E;CACF;AACF;;;;;;;AAQA,SAAgB,uBAA+B,SAA8D;CAC3G,OAAO,OAAO,YAAY;EACxB,MAAM,OAAO,QAAQ,SAAS,QAAQ,gBAAgB;EAEtD,OAAO,cACL,QAAQ,IACR,SACA,MACA,OAAO,IAAI,GACX,OAAO,yCAA0C,QAAQ,iBAAiB,sCAC5E;CACF;AACF;;;;;;;AAQA,SAAgB,mBAAmB,SAAkD;CACnF,OAAO,OAAO,YAAY;EACxB,MAAM,cAAc,QAAQ,aAAa,CAAC,EAAA,CAAG,MAAK,SAAQ,KAAK,SAAS,QAAQ,QAAQ;EAExF,IAAI,cAAc,MAChB,OAAO,cAAc,QAAQ,IAAI,SAAS,OAAO,GAAG,sBAAsB,QAAQ,UAAU;EAG9F,MAAM,OAAO,QAAQ,SAAS,WAAW,IAAI;EAE7C,OAAO,cACL,QAAQ,IACR,SACA,MACA,OAAO,IAAI,GACX,OAAO,gCAAgC,QAAQ,SAAS,KAAK,wCAAwC,QAAQ,SAAS,EACxH;CACF;AACF;;;;;;;AAQA,SAAgB,mBAAmB,OAAe,eAAgC;CAChF,MAAM,sBAAsB,MAAM,KAAK,CAAC,CAAC,WAAW,QAAQ,GAAG;CAE/D,IAAI,eACF,OAAO;CAGT,OAAO,oBAAoB,YAAY;AACzC;;;;AAKA,SAAgB,YAAY,UAAmD;CAC7E,OAAO,SAAS,KAAI,aAAY;EAC9B,MAAM,QAAQ;EACd,OAAO,QAAQ;CACjB,EAAE;AACJ;AAEA,SAAS,WAAW,OAAuB;CACzC,IAAI,OAAO,MAAM,KAAK,GACpB,OAAO;CAGT,IAAI,QAAQ,GACV,OAAO;CAGT,IAAI,QAAQ,GACV,OAAO;CAGT,OAAO;AACT;AAEA,SAAS,cACP,IACA,WACA,MACA,OACA,QACkB;CAClB,OAAO;EACL;EACA;EACA;EACA,OAAO,WAAW,KAAK;EACvB;CACF;AACF"}
@@ -1,4 +1,4 @@
1
- import { a as requiredEnvFrom, i as envFrom, n as EnvValueType, r as RequiredEnvFromOptions, t as EnvFromOptions } from "../../env-bRH0K6fU.mjs";
1
+ import { a as requiredEnvFrom, i as envFrom, n as EnvValueType, r as RequiredEnvFromOptions, t as EnvFromOptions } from "../../env-Br6jaWGL.mjs";
2
2
  import { createOpenAI } from "@xsai-ext/providers/create";
3
3
 
4
4
  //#region src/core/inference-executors/retry-policy.d.ts
@@ -11,14 +11,14 @@ import { createOpenAI } from "@xsai-ext/providers/create";
11
11
  * attempt -> run request -> non-retriable failure -> throw
12
12
  */
13
13
  interface RetryPolicy {
14
- /**
15
- * Maximum number of total attempts, including the first try.
16
- */
17
- maxAttempts: number;
18
14
  /**
19
15
  * Returns the wait time for a retry attempt.
20
16
  */
21
17
  delayMs: (attempt: number) => number;
18
+ /**
19
+ * Maximum number of total attempts, including the first try.
20
+ */
21
+ maxAttempts: number;
22
22
  /**
23
23
  * Determines whether an error can be retried safely.
24
24
  */
@@ -40,18 +40,18 @@ interface RetryPolicy {
40
40
  * - `delayMs` to return a non-negative wait time in milliseconds
41
41
  */
42
42
  interface RetryPolicyOptions {
43
- /**
44
- * Maximum total attempts, including the first request.
45
- *
46
- * @default 3
47
- */
48
- maxAttempts?: number;
49
43
  /**
50
44
  * Computes the delay for a retry attempt.
51
45
  *
52
46
  * The attempt number starts at `1` for the first retry.
53
47
  */
54
48
  delayMs?: (attempt: number) => number;
49
+ /**
50
+ * Maximum total attempts, including the first request.
51
+ *
52
+ * @default 3
53
+ */
54
+ maxAttempts?: number;
55
55
  /**
56
56
  * Overrides the retry classifier.
57
57
  */
@@ -61,6 +61,7 @@ interface RetryPolicyOptions {
61
61
  */
62
62
  sleep?: (milliseconds: number) => Promise<void>;
63
63
  }
64
+ declare function createRetryPolicy(options?: RetryPolicyOptions): RetryPolicy;
64
65
  /**
65
66
  * Returns true when a provider failure is temporary and a retry is reasonable.
66
67
  *
@@ -71,7 +72,6 @@ interface RetryPolicyOptions {
71
72
  * - provider errors to expose a status code, name, or message when possible
72
73
  */
73
74
  declare function isRetriableProviderError(error: unknown): boolean;
74
- declare function createRetryPolicy(options?: RetryPolicyOptions): RetryPolicy;
75
75
  /**
76
76
  * Runs an operation with bounded retries.
77
77
  *
@@ -118,24 +118,10 @@ interface ProviderAdapter<TProvider> {
118
118
  declare function createProviderAdapter<TProvider>(provider: TProvider, options?: RetryPolicyOptions): ProviderAdapter<TProvider>;
119
119
  //#endregion
120
120
  //#region src/core/inference-executors/remote-providers/openai/index.d.ts
121
- /**
122
- * Represents the OpenAI provider instance returned by xsai.
123
- */
124
- type OpenAIProvider = ReturnType<typeof createOpenAI>;
125
- /**
126
- * Represents the OpenAI adapter used by vieval.
127
- */
128
- type OpenAIProviderAdapter = ProviderAdapter<OpenAIProvider>;
129
121
  /**
130
122
  * Configures env key names and source for OpenAI provider setup.
131
123
  */
132
124
  interface OpenAIEnvSourceOptions {
133
- /**
134
- * Environment object used for variable lookup.
135
- *
136
- * @default process.env
137
- */
138
- env?: NodeJS.ProcessEnv;
139
125
  /**
140
126
  * Env key name for API key.
141
127
  *
@@ -148,6 +134,12 @@ interface OpenAIEnvSourceOptions {
148
134
  * @default 'OPENAI_BASE_URL'
149
135
  */
150
136
  baseURL?: string;
137
+ /**
138
+ * Environment object used for variable lookup.
139
+ *
140
+ * @default process.env
141
+ */
142
+ env?: NodeJS.ProcessEnv;
151
143
  /**
152
144
  * Env key name for model.
153
145
  *
@@ -185,6 +177,14 @@ interface OpenAIFromEnvResult {
185
177
  baseURL?: string;
186
178
  model: string;
187
179
  }
180
+ /**
181
+ * Represents the OpenAI provider instance returned by xsai.
182
+ */
183
+ type OpenAIProvider = ReturnType<typeof createOpenAI>;
184
+ /**
185
+ * Represents the OpenAI adapter used by vieval.
186
+ */
187
+ type OpenAIProviderAdapter = ProviderAdapter<OpenAIProvider>;
188
188
  /**
189
189
  * Minimal response shape returned by text-generation calls.
190
190
  */
@@ -194,18 +194,8 @@ interface OpenAITextGenerationResult {
194
194
  *
195
195
  * Some OpenAI-compatible implementations may return `null`.
196
196
  */
197
- text?: string | null;
197
+ text?: null | string;
198
198
  }
199
- /**
200
- * Normalizes provider text output to a safe string.
201
- *
202
- * Before: `{ text: null }`
203
- * After: `''`
204
- *
205
- * Before: `{ text: 'hello' }`
206
- * After: `'hello'`
207
- */
208
- declare function normalizeOpenAITextOutput(result: OpenAITextGenerationResult): string;
209
199
  /**
210
200
  * Creates an OpenAI provider adapter using environment variables with defaults.
211
201
  *
@@ -224,6 +214,16 @@ declare function createOpenAIFromEnv(source?: OpenAIEnvSourceOptions, defaults?:
224
214
  * - `retryOptions` to follow the same invariants as `createRetryPolicy`
225
215
  */
226
216
  declare function createOpenAIProviderAdapter(apiKey: string, baseURL?: string, retryOptions?: RetryPolicyOptions): OpenAIProviderAdapter;
217
+ /**
218
+ * Normalizes provider text output to a safe string.
219
+ *
220
+ * Before: `{ text: null }`
221
+ * After: `''`
222
+ *
223
+ * Before: `{ text: 'hello' }`
224
+ * After: `'hello'`
225
+ */
226
+ declare function normalizeOpenAITextOutput(result: OpenAITextGenerationResult): string;
227
227
  //#endregion
228
228
  export { EnvFromOptions, EnvValueType, OpenAIEnvSourceOptions, OpenAIFromEnvDefaultOptions, OpenAIFromEnvResult, OpenAIProvider, OpenAIProviderAdapter, OpenAITextGenerationResult, ProviderAdapter, RequiredEnvFromOptions, RetryPolicy, RetryPolicyOptions, createOpenAIFromEnv, createOpenAIProviderAdapter, createProviderAdapter, createRetryPolicy, envFrom, isRetriableProviderError, normalizeOpenAITextOutput, requiredEnvFrom, runWithRetry };
229
229
  //# sourceMappingURL=index.d.mts.map
@@ -1,4 +1,4 @@
1
- import { n as requiredEnvFrom, t as envFrom } from "../../env-BVYeJhGA.mjs";
1
+ import { n as requiredEnvFrom, t as envFrom } from "../../env-egxaJtNn.mjs";
2
2
  import process from "node:process";
3
3
  import { errorMessageFrom, errorNameFrom, sleep } from "@moeru/std";
4
4
  import { createOpenAI } from "@xsai-ext/providers/create";
@@ -12,7 +12,7 @@ const retryableStatusCodes = /* @__PURE__ */ new Set([
12
12
  503,
13
13
  504
14
14
  ]);
15
- const retryableErrorNames = /* @__PURE__ */ new Set(["TimeoutError", "FetchError"]);
15
+ const retryableErrorNames = /* @__PURE__ */ new Set(["FetchError", "TimeoutError"]);
16
16
  const retryableMessagePatterns = [
17
17
  /rate limit/i,
18
18
  /rate-limited/i,
@@ -29,16 +29,14 @@ const retryableMessagePatterns = [
29
29
  /timed out/i,
30
30
  /timeout/i
31
31
  ];
32
- function getStatusCode(error) {
33
- if (error == null || typeof error !== "object") return;
34
- const maybeStatusCode = error.statusCode;
35
- if (typeof maybeStatusCode === "number") return maybeStatusCode;
36
- const maybeStatus = error.status;
37
- if (typeof maybeStatus === "number") return maybeStatus;
38
- const response = error.response;
39
- if (response == null || typeof response !== "object") return;
40
- const responseStatus = response.status;
41
- return typeof responseStatus === "number" ? responseStatus : void 0;
32
+ function createRetryPolicy(options = {}) {
33
+ const maxAttempts = assertValidMaxAttempts(options.maxAttempts ?? 3);
34
+ return {
35
+ delayMs: options.delayMs ?? defaultDelayMs,
36
+ maxAttempts,
37
+ shouldRetry: options.shouldRetry ?? isRetriableProviderError,
38
+ sleep: options.sleep ?? sleep
39
+ };
42
40
  }
43
41
  /**
44
42
  * Returns true when a provider failure is temporary and a retry is reasonable.
@@ -58,34 +56,6 @@ function isRetriableProviderError(error) {
58
56
  if (errorMessage == null) return false;
59
57
  return retryableMessagePatterns.some((pattern) => pattern.test(errorMessage));
60
58
  }
61
- function defaultDelayMs(attempt) {
62
- return 500 * 2 ** (attempt - 1);
63
- }
64
- /**
65
- * Creates a retry policy for provider work.
66
- *
67
- * Use when:
68
- * - you need a reusable retry runner for eval-time provider calls
69
- * - you want to keep retry behavior deterministic in tests
70
- *
71
- * Expects:
72
- * - callers to treat `maxAttempts` as total attempts, not retries
73
- *
74
- * Throws:
75
- * - `RangeError` when `maxAttempts` is not a finite integer greater than or equal to `1`
76
- */
77
- function assertValidMaxAttempts(value) {
78
- if (!Number.isFinite(value) || !Number.isInteger(value) || value < 1) throw new RangeError("maxAttempts must be a finite integer greater than or equal to 1.");
79
- return value;
80
- }
81
- function createRetryPolicy(options = {}) {
82
- return {
83
- maxAttempts: assertValidMaxAttempts(options.maxAttempts ?? 3),
84
- delayMs: options.delayMs ?? defaultDelayMs,
85
- shouldRetry: options.shouldRetry ?? isRetriableProviderError,
86
- sleep: options.sleep ?? sleep
87
- };
88
- }
89
59
  /**
90
60
  * Runs an operation with bounded retries.
91
61
  *
@@ -106,6 +76,37 @@ async function runWithRetry(operation, policy = createRetryPolicy()) {
106
76
  }
107
77
  throw new Error("Retry loop exited without returning a value.");
108
78
  }
79
+ /**
80
+ * Creates a retry policy for provider work.
81
+ *
82
+ * Use when:
83
+ * - you need a reusable retry runner for eval-time provider calls
84
+ * - you want to keep retry behavior deterministic in tests
85
+ *
86
+ * Expects:
87
+ * - callers to treat `maxAttempts` as total attempts, not retries
88
+ *
89
+ * Throws:
90
+ * - `RangeError` when `maxAttempts` is not a finite integer greater than or equal to `1`
91
+ */
92
+ function assertValidMaxAttempts(value) {
93
+ if (!Number.isFinite(value) || !Number.isInteger(value) || value < 1) throw new RangeError("maxAttempts must be a finite integer greater than or equal to 1.");
94
+ return value;
95
+ }
96
+ function defaultDelayMs(attempt) {
97
+ return 500 * 2 ** (attempt - 1);
98
+ }
99
+ function getStatusCode(error) {
100
+ if (error == null || typeof error !== "object") return;
101
+ const maybeStatusCode = error.statusCode;
102
+ if (typeof maybeStatusCode === "number") return maybeStatusCode;
103
+ const maybeStatus = error.status;
104
+ if (typeof maybeStatus === "number") return maybeStatus;
105
+ const response = error.response;
106
+ if (response == null || typeof response !== "object") return;
107
+ const responseStatus = response.status;
108
+ return typeof responseStatus === "number" ? responseStatus : void 0;
109
+ }
109
110
  //#endregion
110
111
  //#region src/core/inference-executors/adapters.ts
111
112
  /**
@@ -128,18 +129,6 @@ function createProviderAdapter(provider, options = {}) {
128
129
  //#endregion
129
130
  //#region src/core/inference-executors/remote-providers/openai/index.ts
130
131
  /**
131
- * Normalizes provider text output to a safe string.
132
- *
133
- * Before: `{ text: null }`
134
- * After: `''`
135
- *
136
- * Before: `{ text: 'hello' }`
137
- * After: `'hello'`
138
- */
139
- function normalizeOpenAITextOutput(result) {
140
- return typeof result.text === "string" ? result.text : "";
141
- }
142
- /**
143
132
  * Creates an OpenAI provider adapter using environment variables with defaults.
144
133
  *
145
134
  * Example:
@@ -188,6 +177,18 @@ function createOpenAIFromEnv(source = {}, defaults = {}) {
188
177
  function createOpenAIProviderAdapter(apiKey, baseURL, retryOptions = {}) {
189
178
  return createProviderAdapter(createOpenAI(apiKey, baseURL), retryOptions);
190
179
  }
180
+ /**
181
+ * Normalizes provider text output to a safe string.
182
+ *
183
+ * Before: `{ text: null }`
184
+ * After: `''`
185
+ *
186
+ * Before: `{ text: 'hello' }`
187
+ * After: `'hello'`
188
+ */
189
+ function normalizeOpenAITextOutput(result) {
190
+ return typeof result.text === "string" ? result.text : "";
191
+ }
191
192
  //#endregion
192
193
  export { createOpenAIFromEnv, createOpenAIProviderAdapter, createProviderAdapter, createRetryPolicy, envFrom, isRetriableProviderError, normalizeOpenAITextOutput, requiredEnvFrom, runWithRetry };
193
194