vieval 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +6 -3
  2. package/dist/bin/vieval.d.mts +1 -0
  3. package/dist/bin/vieval.mjs +33 -0
  4. package/dist/bin/vieval.mjs.map +1 -0
  5. package/dist/cli/index.d.mts +32 -0
  6. package/dist/cli/index.mjs +1 -2582
  7. package/dist/cli-DayPXzHX.mjs +2593 -0
  8. package/dist/cli-DayPXzHX.mjs.map +1 -0
  9. package/dist/config.d.mts +1 -1
  10. package/dist/config.mjs +17 -2
  11. package/dist/config.mjs.map +1 -0
  12. package/dist/core/assertions/index.d.mts +314 -2
  13. package/dist/core/assertions/index.mjs +182 -1
  14. package/dist/core/assertions/index.mjs.map +1 -0
  15. package/dist/core/inference-executors/index.d.mts +1 -1
  16. package/dist/core/inference-executors/index.mjs +1 -1
  17. package/dist/core/processors/results/index.d.mts +1 -1
  18. package/dist/core/runner/index.d.mts +1 -1
  19. package/dist/core/runner/index.mjs +635 -1
  20. package/dist/core/runner/index.mjs.map +1 -0
  21. package/dist/{env-C7X81PWa.mjs → env-BFSjny07.mjs} +1 -1
  22. package/dist/{env-C7X81PWa.mjs.map → env-BFSjny07.mjs.map} +1 -1
  23. package/dist/{env-DtpjACOW.d.mts → env-BTq3dV7C.d.mts} +1 -1
  24. package/dist/{expect-extensions-BOzwV5EJ.mjs → expect-extensions-QLXESWjn.mjs} +2 -2
  25. package/dist/{expect-extensions-BOzwV5EJ.mjs.map → expect-extensions-QLXESWjn.mjs.map} +1 -1
  26. package/dist/expect.d.mts +10 -2
  27. package/dist/expect.mjs +16 -1
  28. package/dist/expect.mjs.map +1 -0
  29. package/dist/{index-BDMEAmf2.d.mts → index-OEdqjQSe.d.mts} +2 -2
  30. package/dist/index.d.mts +3 -3
  31. package/dist/index.mjs +4 -4
  32. package/dist/{models-DIGdOUpJ.mjs → models-D_MsBtYw.mjs} +1 -1
  33. package/dist/{models-DIGdOUpJ.mjs.map → models-D_MsBtYw.mjs.map} +1 -1
  34. package/dist/plugins/chat-models/index.d.mts +1 -1
  35. package/dist/plugins/chat-models/index.mjs +1 -1
  36. package/dist/{registry-CHJcTN2W.mjs → registry-CwcMMjnZ.mjs} +3 -3
  37. package/dist/{registry-CHJcTN2W.mjs.map → registry-CwcMMjnZ.mjs.map} +1 -1
  38. package/dist/testing/expect-extensions.d.mts +1 -1
  39. package/dist/testing/expect-extensions.mjs +1 -1
  40. package/package.json +3 -3
  41. package/dist/assertions-DcAjfVDA.mjs +0 -183
  42. package/dist/assertions-DcAjfVDA.mjs.map +0 -1
  43. package/dist/cli/index.mjs.map +0 -1
  44. package/dist/config-CHN24egi.mjs +0 -17
  45. package/dist/config-CHN24egi.mjs.map +0 -1
  46. package/dist/expect-B2vaoRVZ.d.mts +0 -10
  47. package/dist/expect-CaXiUkwY.mjs +0 -17
  48. package/dist/expect-CaXiUkwY.mjs.map +0 -1
  49. package/dist/index-C3gPFmcR.d.mts +0 -314
  50. package/dist/runner-Dpy-eivM.mjs +0 -636
  51. package/dist/runner-Dpy-eivM.mjs.map +0 -1
@@ -1,314 +0,0 @@
1
- import { U as RunScore, W as RunScoreKind } from "./index-BDMEAmf2.mjs";
2
-
3
- //#region src/core/assertions/index.d.ts
4
- /**
5
- * Stores mutable evaluation state for stateful assertion flows.
6
- *
7
- * Use when:
8
- * - assertions need to share counters, rolling metrics, or memoized values
9
- * - a scenario evaluates multiple steps and expects state-aware checks
10
- */
11
- type AssertionState = Map<string, unknown>;
12
- /**
13
- * Represents one tool call emitted by a model response.
14
- */
15
- interface ToolCall {
16
- /**
17
- * Tool name used by the call.
18
- */
19
- name: string;
20
- /**
21
- * Tool arguments payload.
22
- */
23
- args: unknown;
24
- }
25
- /**
26
- * Normalized assertion context for one model output.
27
- */
28
- interface AssertionContext {
29
- /**
30
- * Plain text model output used by text assertions.
31
- */
32
- text: string;
33
- /**
34
- * Optional structured output parsed from the model response.
35
- */
36
- structuredOutput?: unknown;
37
- /**
38
- * Optional tool calls extracted from the model response.
39
- */
40
- toolCalls?: readonly ToolCall[];
41
- /**
42
- * Shared mutable state for stateful assertion measurement.
43
- */
44
- state: AssertionState;
45
- }
46
- /**
47
- * Result for one assertion evaluation.
48
- */
49
- interface AssertionOutcome {
50
- /**
51
- * Stable assertion id.
52
- */
53
- id: string;
54
- /**
55
- * Assertion family emitted as run score kind.
56
- */
57
- scoreKind: RunScoreKind;
58
- /**
59
- * Whether the assertion passed.
60
- */
61
- pass: boolean;
62
- /**
63
- * Normalized score in the `0..1` range.
64
- */
65
- score: number;
66
- /**
67
- * Human-readable reason for logs and reports.
68
- */
69
- reason: string;
70
- }
71
- /**
72
- * Async assertion function used by eval scenarios.
73
- */
74
- type Assertion = (context: AssertionContext) => Promise<AssertionOutcome>;
75
- /**
76
- * Normalizes text for matching.
77
- *
78
- * Before: `" Hello\nWorld "`
79
- * After: `"hello world"`
80
- */
81
- declare function normalizeMatchText(value: string, caseSensitive: boolean): string;
82
- /**
83
- * Options for include-keyword assertions.
84
- */
85
- interface MustIncludeAssertionOptions {
86
- /**
87
- * Stable assertion id.
88
- */
89
- id: string;
90
- /**
91
- * Keywords that must be present.
92
- */
93
- keywords: readonly string[];
94
- /**
95
- * Match mode for keywords.
96
- *
97
- * @default 'all'
98
- */
99
- mode?: 'all' | 'any';
100
- /**
101
- * Case-sensitive matching toggle.
102
- *
103
- * @default false
104
- */
105
- caseSensitive?: boolean;
106
- }
107
- /**
108
- * Creates an assertion that requires specific keywords in model text.
109
- *
110
- * Example:
111
- * `expectMustInclude({ id: 'tone', keywords: ['calm', 'move'] })`
112
- */
113
- declare function expectMustInclude(options: MustIncludeAssertionOptions): Assertion;
114
- /**
115
- * Options for exclude-keyword assertions.
116
- */
117
- interface MustExcludeAssertionOptions {
118
- /**
119
- * Stable assertion id.
120
- */
121
- id: string;
122
- /**
123
- * Keywords that must not appear.
124
- */
125
- keywords: readonly string[];
126
- /**
127
- * Case-sensitive matching toggle.
128
- *
129
- * @default false
130
- */
131
- caseSensitive?: boolean;
132
- }
133
- /**
134
- * Creates an assertion that forbids specific keywords.
135
- *
136
- * Example:
137
- * `expectMustExclude({ id: 'no-engine-dump', keywords: ['bestmove', 'ponder'] })`
138
- */
139
- declare function expectMustExclude(options: MustExcludeAssertionOptions): Assertion;
140
- /**
141
- * Options for regular-expression assertions.
142
- */
143
- interface RegexAssertionOptions {
144
- /**
145
- * Stable assertion id.
146
- */
147
- id: string;
148
- /**
149
- * Pattern to apply to model text.
150
- */
151
- pattern: RegExp;
152
- }
153
- /**
154
- * Creates an assertion based on a regular expression.
155
- *
156
- * Example:
157
- * `expectRegex({ id: 'starts-with-act', pattern: /^<\|ACT:/ })`
158
- */
159
- declare function expectRegex(options: RegexAssertionOptions): Assertion;
160
- /**
161
- * Options for structured-output assertions.
162
- */
163
- interface StructuredOutputAssertionOptions<TValue> {
164
- /**
165
- * Stable assertion id.
166
- */
167
- id: string;
168
- /**
169
- * Runtime validator for structured output.
170
- */
171
- validate: (value: unknown) => value is TValue;
172
- /**
173
- * Optional failure reason.
174
- */
175
- failureReason?: string;
176
- }
177
- /**
178
- * Creates an assertion for structured model output.
179
- *
180
- * Example:
181
- * `expectStructuredOutput({ id: 'json-shape', validate: isMySchema })`
182
- */
183
- declare function expectStructuredOutput<TValue>(options: StructuredOutputAssertionOptions<TValue>): Assertion;
184
- /**
185
- * Options for tool-call argument assertions.
186
- */
187
- interface ToolCallArgsAssertionOptions {
188
- /**
189
- * Stable assertion id.
190
- */
191
- id: string;
192
- /**
193
- * Tool name to inspect.
194
- */
195
- toolName: string;
196
- /**
197
- * Runtime validator for tool arguments.
198
- */
199
- validate: (args: unknown) => boolean;
200
- }
201
- /**
202
- * Creates an assertion for validating tool-call arguments.
203
- *
204
- * Example:
205
- * `expectToolCallArgs({ id: 'spark-command-shape', toolName: 'builtIn_sparkCommand', validate: isSparkArgs })`
206
- */
207
- declare function expectToolCallArgs(options: ToolCallArgsAssertionOptions): Assertion;
208
- /**
209
- * Rubric judge result returned by teacher-model or rubric logic.
210
- */
211
- interface RubricJudgeResult {
212
- /**
213
- * Normalized score in the `0..1` range.
214
- */
215
- score: number;
216
- /**
217
- * Judge explanation text.
218
- */
219
- reason: string;
220
- /**
221
- * Optional judge model id.
222
- */
223
- judgeModel?: string;
224
- }
225
- /**
226
- * Options for rubric assertions.
227
- */
228
- interface RubricAssertionOptions {
229
- /**
230
- * Stable assertion id.
231
- */
232
- id: string;
233
- /**
234
- * Async rubric judge callback.
235
- */
236
- judge: (context: AssertionContext) => Promise<RubricJudgeResult>;
237
- /**
238
- * Minimum passing score.
239
- *
240
- * @default 0.7
241
- */
242
- minScore?: number;
243
- }
244
- /**
245
- * Creates a rubric assertion driven by teacher-model style scoring.
246
- *
247
- * Example:
248
- * `expectRubric({ id: 'human-like-tone', judge: judgeFn, minScore: 0.8 })`
249
- */
250
- declare function expectRubric(options: RubricAssertionOptions): Assertion;
251
- /**
252
- * Options for custom assertions.
253
- */
254
- interface CustomAssertionOptions {
255
- /**
256
- * Stable assertion id.
257
- */
258
- id: string;
259
- /**
260
- * Score family emitted by this custom assertion.
261
- */
262
- scoreKind: RunScoreKind;
263
- /**
264
- * Custom evaluator callback.
265
- */
266
- evaluate: (context: AssertionContext) => Promise<{
267
- pass: boolean;
268
- reason: string;
269
- score: number;
270
- }> | {
271
- pass: boolean;
272
- reason: string;
273
- score: number;
274
- };
275
- }
276
- /**
277
- * Creates a custom assertion with fully user-defined logic.
278
- *
279
- * Example:
280
- * `expectCustom({ id: 'stateful-window', scoreKind: 'exact', evaluate: (ctx) => ... })`
281
- */
282
- declare function expectCustom(options: CustomAssertionOptions): Assertion;
283
- /**
284
- * Creates an inverse assertion.
285
- *
286
- * Example:
287
- * `expectNot(expectMustInclude({ id: 'contains-engine-word', keywords: ['bestmove'] }), { id: 'no-engine-word' })`
288
- */
289
- declare function expectNot(assertion: Assertion, options: {
290
- id: string;
291
- }): Assertion;
292
- /**
293
- * Executes assertion list and returns all outcomes.
294
- *
295
- * Call stack:
296
- *
297
- * {@link evaluateAssertions}
298
- * -> `assertion(context)`
299
- * -> {@link AssertionOutcome}[]
300
- */
301
- declare function evaluateAssertions(assertions: readonly Assertion[], context: Omit<AssertionContext, 'state'> & {
302
- state?: AssertionState;
303
- }): Promise<AssertionOutcome[]>;
304
- /**
305
- * Converts assertion outcomes to run-score tuples consumed by aggregation.
306
- */
307
- declare function toRunScores(outcomes: readonly AssertionOutcome[]): RunScore[];
308
- /**
309
- * Returns failing assertion outcomes in original order.
310
- */
311
- declare function collectFailedAssertions(outcomes: readonly AssertionOutcome[]): AssertionOutcome[];
312
- //#endregion
313
- export { expectToolCallArgs as C, expectStructuredOutput as S, toRunScores as T, expectMustExclude as _, CustomAssertionOptions as a, expectRegex as b, RegexAssertionOptions as c, StructuredOutputAssertionOptions as d, ToolCall as f, expectCustom as g, evaluateAssertions as h, AssertionState as i, RubricAssertionOptions as l, collectFailedAssertions as m, AssertionContext as n, MustExcludeAssertionOptions as o, ToolCallArgsAssertionOptions as p, AssertionOutcome as r, MustIncludeAssertionOptions as s, Assertion as t, RubricJudgeResult as u, expectMustInclude as v, normalizeMatchText as w, expectRubric as x, expectNot as y };
314
- //# sourceMappingURL=index-C3gPFmcR.d.mts.map