vieval 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +290 -0
  2. package/dist/assertions-DcAjfVDA.mjs +183 -0
  3. package/dist/assertions-DcAjfVDA.mjs.map +1 -0
  4. package/dist/cli/index.d.mts +11 -0
  5. package/dist/cli/index.mjs +1434 -0
  6. package/dist/cli/index.mjs.map +1 -0
  7. package/dist/config-D2fe1SnT.mjs +17 -0
  8. package/dist/config-D2fe1SnT.mjs.map +1 -0
  9. package/dist/config.d.mts +3 -0
  10. package/dist/config.mjs +3 -0
  11. package/dist/core/assertions/index.d.mts +2 -0
  12. package/dist/core/assertions/index.mjs +2 -0
  13. package/dist/core/inference-executors/index.d.mts +273 -0
  14. package/dist/core/inference-executors/index.mjs +225 -0
  15. package/dist/core/inference-executors/index.mjs.map +1 -0
  16. package/dist/core/processors/results/index.d.mts +96 -0
  17. package/dist/core/processors/results/index.mjs +64 -0
  18. package/dist/core/processors/results/index.mjs.map +1 -0
  19. package/dist/core/runner/index.d.mts +2 -0
  20. package/dist/core/runner/index.mjs +2 -0
  21. package/dist/expect-0jPJ7Zio.d.mts +2318 -0
  22. package/dist/expect-extensions-CwPtgTz8.mjs +13471 -0
  23. package/dist/expect-extensions-CwPtgTz8.mjs.map +1 -0
  24. package/dist/expect-i9WZWGrA.mjs +17 -0
  25. package/dist/expect-i9WZWGrA.mjs.map +1 -0
  26. package/dist/expect.d.mts +2 -0
  27. package/dist/expect.mjs +2 -0
  28. package/dist/index-DP7jsORl.d.mts +947 -0
  29. package/dist/index-oSXhM1zx.d.mts +314 -0
  30. package/dist/index.d.mts +92 -0
  31. package/dist/index.mjs +150 -0
  32. package/dist/index.mjs.map +1 -0
  33. package/dist/magic-string.es-CH1jwzMg.mjs +1013 -0
  34. package/dist/magic-string.es-CH1jwzMg.mjs.map +1 -0
  35. package/dist/models-D_MsBtYw.mjs +14 -0
  36. package/dist/models-D_MsBtYw.mjs.map +1 -0
  37. package/dist/plugin-DVaRZY2x.d.mts +84 -0
  38. package/dist/plugins/chat-models/index.d.mts +90 -0
  39. package/dist/plugins/chat-models/index.mjs +48 -0
  40. package/dist/plugins/chat-models/index.mjs.map +1 -0
  41. package/dist/registry-ChOjjdEC.mjs +245 -0
  42. package/dist/registry-ChOjjdEC.mjs.map +1 -0
  43. package/dist/runner-4ZsOveoY.mjs +480 -0
  44. package/dist/runner-4ZsOveoY.mjs.map +1 -0
  45. package/dist/testing/expect-extensions.d.mts +86 -0
  46. package/dist/testing/expect-extensions.mjs +2 -0
  47. package/package.json +88 -0
@@ -0,0 +1,480 @@
1
+ import { t as resolveModelByName } from "./models-D_MsBtYw.mjs";
2
+ import { createRequire } from "node:module";
3
+ import { basename, dirname, relative } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+ import { errorMessageFrom } from "@moeru/std";
6
+ //#region src/core/runner/aggregate.ts
7
+ function cloneScheduledTaskMatrix(matrix) {
8
+ return {
9
+ eval: { ...matrix.eval },
10
+ meta: { ...matrix.meta },
11
+ run: { ...matrix.run }
12
+ };
13
+ }
14
+ function assertKnownScoreKind(kind) {
15
+ if (kind === "exact" || kind === "judge") return kind;
16
+ throw new TypeError(`Unknown eval score kind "${kind}".`);
17
+ }
18
+ function average(scores) {
19
+ if (scores.length === 0) return null;
20
+ return scores.reduce((sum, score) => sum + score, 0) / scores.length;
21
+ }
22
+ function createHybridAverage(exactAverage, judgeAverage) {
23
+ if (exactAverage != null && judgeAverage != null) return (exactAverage + judgeAverage) / 2;
24
+ if (exactAverage != null) return exactAverage;
25
+ if (judgeAverage != null) return judgeAverage;
26
+ return null;
27
+ }
28
+ function collectScoreBuckets(scores) {
29
+ const buckets = {
30
+ exact: [],
31
+ judge: []
32
+ };
33
+ for (const score of scores) {
34
+ if (assertKnownScoreKind(score.kind) === "exact") {
35
+ buckets.exact.push(score.score);
36
+ continue;
37
+ }
38
+ buckets.judge.push(score.score);
39
+ }
40
+ return buckets;
41
+ }
42
+ function createRunSummary(result) {
43
+ const buckets = collectScoreBuckets(result.scores);
44
+ const exactAverage = average(buckets.exact);
45
+ const judgeAverage = average(buckets.judge);
46
+ return {
47
+ entryId: result.entryId,
48
+ exactAverage,
49
+ hybridAverage: createHybridAverage(exactAverage, judgeAverage),
50
+ id: result.id,
51
+ judgeAverage,
52
+ matrix: cloneScheduledTaskMatrix(result.matrix),
53
+ inferenceExecutorId: result.inferenceExecutorId
54
+ };
55
+ }
56
+ function createProviderSummary(inferenceExecutorId, results) {
57
+ const exactScores = [];
58
+ const judgeScores = [];
59
+ for (const result of results) {
60
+ const buckets = collectScoreBuckets(result.scores);
61
+ exactScores.push(...buckets.exact);
62
+ judgeScores.push(...buckets.judge);
63
+ }
64
+ const exactAverage = average(exactScores);
65
+ const judgeAverage = average(judgeScores);
66
+ return {
67
+ exactAverage,
68
+ hybridAverage: createHybridAverage(exactAverage, judgeAverage),
69
+ judgeAverage,
70
+ inferenceExecutorId,
71
+ runCount: results.length
72
+ };
73
+ }
74
+ /**
75
+ * Aggregates exact-match and judge-based scores into hybrid runner summaries.
76
+ *
77
+ * Call stack:
78
+ *
79
+ * {@link runScheduledTasks}
80
+ * -> {@link aggregateRunResults}
81
+ * -> {@link createRunSummary}
82
+ * -> {@link createProviderSummary}
83
+ * -> `report output`
84
+ *
85
+ * Use when:
86
+ * - a runner batch mixes deterministic exact checks with judge-based grading
87
+ * - inferenceExecutor comparison should preserve both score families and one hybrid view
88
+ *
89
+ * Expects:
90
+ * - each score to be normalized to the `0..1` range before aggregation
91
+ * - `scores.kind` to use only `'exact'` or `'judge'`
92
+ */
93
+ function aggregateRunResults(results) {
94
+ const runs = results.map(createRunSummary);
95
+ const inferenceExecutors = Array.from(new Set(results.map((result) => result.inferenceExecutorId))).map((inferenceExecutorId) => {
96
+ return createProviderSummary(inferenceExecutorId, results.filter((result) => result.inferenceExecutorId === inferenceExecutorId));
97
+ }).sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId));
98
+ const overall = createProviderSummary("overall", results);
99
+ return {
100
+ overall: {
101
+ exactAverage: overall.exactAverage,
102
+ hybridAverage: overall.hybridAverage,
103
+ judgeAverage: overall.judgeAverage,
104
+ runCount: overall.runCount
105
+ },
106
+ inferenceExecutors,
107
+ runs
108
+ };
109
+ }
110
+ //#endregion
111
+ //#region src/core/runner/collect.ts
112
+ const evalFileSuffix = ".eval.ts";
113
+ const absolutePathPattern = /^(?:[A-Z]:\/|\/|\\\\)/i;
114
+ function normalizePath(value) {
115
+ return value.replaceAll("\\", "/");
116
+ }
117
+ /**
118
+ * Converts a file path into a project-relative path when possible.
119
+ *
120
+ * Before: `/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`
121
+ * After: `plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`
122
+ *
123
+ * Before: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`
124
+ * After: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`
125
+ */
126
+ function asProjectRelativePath(filePath, context) {
127
+ const normalizedFilePath = normalizePath(filePath);
128
+ const normalizedProjectRootDirectory = normalizePath(context.projectRootDirectory);
129
+ const filePathWindowsDrive = normalizedFilePath.match(/^[A-Z]:\//i)?.[0];
130
+ const projectRootWindowsDrive = normalizedProjectRootDirectory.match(/^[A-Z]:\//i)?.[0];
131
+ if (filePathWindowsDrive != null && projectRootWindowsDrive == null) return normalizedFilePath;
132
+ if (filePathWindowsDrive != null && projectRootWindowsDrive != null && filePathWindowsDrive.toLowerCase() !== projectRootWindowsDrive.toLowerCase()) return normalizedFilePath;
133
+ const projectRootDirectory = context.projectRootDirectory;
134
+ const relativeFilePath = normalizePath(relative(projectRootDirectory, filePath));
135
+ if (!absolutePathPattern.test(relativeFilePath)) {
136
+ if (relativeFilePath === "..") return normalizePath(filePath);
137
+ if (!relativeFilePath.startsWith("../")) return relativeFilePath;
138
+ }
139
+ return normalizePath(filePath);
140
+ }
141
+ function resolveModuleFilePath(moduleHref) {
142
+ if (!moduleHref.startsWith("file:")) return null;
143
+ try {
144
+ return fileURLToPath(moduleHref);
145
+ } catch {
146
+ return null;
147
+ }
148
+ }
149
+ function createCollectedEvalEntry(moduleHref, moduleDefinition, context) {
150
+ const filePath = resolveModuleFilePath(moduleHref);
151
+ if (!filePath) return null;
152
+ const relativeFilePath = asProjectRelativePath(filePath, context);
153
+ if (!relativeFilePath.endsWith(evalFileSuffix)) return null;
154
+ const entryName = basename(relativeFilePath, evalFileSuffix);
155
+ if (entryName.length === 0) return null;
156
+ const relativeDirectory = dirname(relativeFilePath);
157
+ const directory = relativeDirectory === "." ? "" : relativeDirectory;
158
+ return {
159
+ ...moduleDefinition.default,
160
+ directory,
161
+ filePath,
162
+ id: directory.length === 0 ? entryName : `${directory}/${entryName}`,
163
+ name: entryName
164
+ };
165
+ }
166
+ /**
167
+ * Collects loaded vieval modules into sorted runner entries with stable ids.
168
+ *
169
+ * Call stack:
170
+ *
171
+ * `import.meta.glob(...)`
172
+ * -> {@link collectEvalEntries}
173
+ * -> {@link createCollectedEvalEntry}
174
+ * -> {@link CollectedEvalEntry}[]
175
+ *
176
+ * Use when:
177
+ * - the runner has already loaded candidate eval modules
178
+ * - downstream scheduling needs stable entry ids and directory metadata
179
+ */
180
+ function collectEvalEntries(modules, context) {
181
+ return Object.entries(modules).flatMap(([moduleHref, moduleDefinition]) => {
182
+ const entry = createCollectedEvalEntry(moduleHref, moduleDefinition, context);
183
+ if (!entry) return [];
184
+ return [entry];
185
+ }).sort((left, right) => left.id.localeCompare(right.id));
186
+ }
187
+ //#endregion
188
+ //#region src/core/runner/run.ts
189
+ function createDefaultExecutionContext(task) {
190
+ return { model(options) {
191
+ const requestedModelName = typeof options === "string" ? options : options?.name;
192
+ if (requestedModelName != null) throw new Error(`No model registry configured. Requested model: ${requestedModelName}`);
193
+ throw new Error(`No model registry configured for task inferenceExecutor id "${task.inferenceExecutor.id}".`);
194
+ } };
195
+ }
196
+ /**
197
+ * Error thrown when a scheduled run fails before producing a normalized result.
198
+ */
199
+ var RunnerExecutionError = class extends Error {
200
+ /**
201
+ * Stable task id that failed.
202
+ */
203
+ taskId;
204
+ constructor(taskId, cause) {
205
+ const message = errorMessageFrom(cause) ?? "Unknown runner execution failure.";
206
+ super(`Runner task "${taskId}" failed: ${message}`);
207
+ this.name = "RunnerExecutionError";
208
+ this.taskId = taskId;
209
+ this.cause = cause;
210
+ }
211
+ };
212
+ function createRunnerExecutionError(taskId, cause) {
213
+ if (cause instanceof RunnerExecutionError && cause.taskId === taskId) return cause;
214
+ return new RunnerExecutionError(taskId, cause);
215
+ }
216
+ /**
217
+ * Executes runner tasks sequentially and aggregates the normalized results.
218
+ *
219
+ * Call stack:
220
+ *
221
+ * {@link createRunnerSchedule}
222
+ * -> {@link runScheduledTasks}
223
+ * -> `executor(task)`
224
+ * -> {@link aggregateRunResults}
225
+ *
226
+ * Use when:
227
+ * - the caller already expanded the runner matrix
228
+ * - task execution should stay deterministic and easy to debug
229
+ *
230
+ * Expects:
231
+ * - `executor` to return normalized `0..1` scores
232
+ * - callers to handle concurrency outside this helper when needed
233
+ * - `onTaskStart` / `onTaskEnd` hooks to be synchronous lifecycle observers
234
+ *
235
+ * Throws:
236
+ * - `RunnerExecutionError` when task setup, hooks, or the executor throws
237
+ */
238
+ async function runScheduledTasks(tasks, executor, options = {}) {
239
+ if (tasks.length === 0) return aggregateRunResults([]);
240
+ const results = [];
241
+ for (const task of tasks) {
242
+ let executionContext;
243
+ try {
244
+ executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext(task);
245
+ } catch (error) {
246
+ throw createRunnerExecutionError(task.id, error);
247
+ }
248
+ try {
249
+ options.onTaskStart?.(task);
250
+ } catch (error) {
251
+ throw createRunnerExecutionError(task.id, error);
252
+ }
253
+ try {
254
+ results.push(await executor(task, executionContext));
255
+ } catch (error) {
256
+ try {
257
+ options.onTaskEnd?.(task, "failed");
258
+ } catch {}
259
+ throw createRunnerExecutionError(task.id, error);
260
+ }
261
+ try {
262
+ options.onTaskEnd?.(task, "passed");
263
+ } catch (error) {
264
+ throw createRunnerExecutionError(task.id, error);
265
+ }
266
+ }
267
+ return aggregateRunResults(results);
268
+ }
269
+ //#endregion
270
+ //#region src/core/runner/runtime-context.ts
271
+ const require = createRequire(import.meta.url);
272
+ /**
273
+ * Creates a side-effect-free runtime context for runner path normalization.
274
+ *
275
+ * Call stack:
276
+ *
277
+ * {@link createRunnerRuntimeContext}
278
+ * -> `findWorkspaceDir(cwd)`
279
+ * -> `resolve projectRootDirectory`
280
+ * -> `{ projectRootDirectory }`
281
+ *
282
+ * Use when:
283
+ * - initializing runner infrastructure before collecting eval modules
284
+ * - tests need deterministic root resolution behavior
285
+ */
286
+ async function createRunnerRuntimeContext(options = {}) {
287
+ const cwd = options.cwd ?? dirname(fileURLToPath(import.meta.url));
288
+ const fallbackProjectRootDirectory = options.fallbackProjectRootDirectory ?? fileURLToPath(new URL("../../../", import.meta.url));
289
+ const { findWorkspaceDir } = require("@pnpm/find-workspace-dir");
290
+ return { projectRootDirectory: await findWorkspaceDir(cwd) ?? fallbackProjectRootDirectory };
291
+ }
292
+ //#endregion
293
+ //#region src/core/runner/schedule.ts
294
+ const matrixLayerKeys = new Set([
295
+ "disable",
296
+ "extend",
297
+ "override"
298
+ ]);
299
+ const ambiguousMatrixDefinitionErrorMessage = "Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.";
300
+ function encodeTaskIdSegment(value) {
301
+ return encodeURIComponent(value);
302
+ }
303
+ function stringifyMatrixValue(value) {
304
+ return String(value);
305
+ }
306
+ function cloneMatrixSelection(matrix) {
307
+ return { ...matrix };
308
+ }
309
+ function createScheduledTaskMatrix(runMatrix, evalMatrix) {
310
+ return {
311
+ eval: cloneMatrixSelection(evalMatrix),
312
+ meta: {
313
+ evalRowId: createStableRowId(evalMatrix),
314
+ runRowId: createStableRowId(runMatrix)
315
+ },
316
+ run: cloneMatrixSelection(runMatrix)
317
+ };
318
+ }
319
+ function isMatrixLayer(matrix) {
320
+ const matrixKeys = Object.keys(matrix);
321
+ return matrixKeys.length > 0 && matrixKeys.every((key) => matrixLayerKeys.has(key));
322
+ }
323
+ function assertNonAmbiguousMatrixDefinition(matrix) {
324
+ const matrixKeys = Object.keys(matrix);
325
+ const hasReservedKeys = matrixKeys.some((key) => matrixLayerKeys.has(key));
326
+ const hasAxisKeys = matrixKeys.some((key) => !matrixLayerKeys.has(key));
327
+ if (hasReservedKeys && hasAxisKeys) throw new TypeError(ambiguousMatrixDefinitionErrorMessage);
328
+ }
329
+ function normalizeLayerInputToAxes(matrix) {
330
+ if (matrix == null) return;
331
+ assertNonAmbiguousMatrixDefinition(matrix);
332
+ if (isMatrixLayer(matrix)) return matrix;
333
+ return { extend: matrix };
334
+ }
335
+ function dedupeAxisValues(values) {
336
+ return Array.from(new Set(values.map(stringifyMatrixValue)));
337
+ }
338
+ function applyAxisValues(axes, definition, mode) {
339
+ if (definition == null) return;
340
+ for (const [axis, values] of Object.entries(definition)) {
341
+ const nextValues = dedupeAxisValues(values);
342
+ if (mode === "extend") {
343
+ const existingValues = axes.get(axis) ?? [];
344
+ axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])));
345
+ continue;
346
+ }
347
+ axes.set(axis, nextValues);
348
+ }
349
+ }
350
+ function applyLayer(baseAxes, layer) {
351
+ const nextAxes = new Map(Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]));
352
+ for (const axis of layer?.disable ?? []) nextAxes.delete(axis);
353
+ applyAxisValues(nextAxes, layer?.extend, "extend");
354
+ applyAxisValues(nextAxes, layer?.override, "override");
355
+ return nextAxes;
356
+ }
357
+ function expandAxesToRows(axes) {
358
+ if (axes.size === 0) return [{}];
359
+ const dimensions = Array.from(axes.entries());
360
+ let selections = [{}];
361
+ for (const [axis, values] of dimensions) {
362
+ if (values.length === 0) return [];
363
+ const nextSelections = [];
364
+ for (const selection of selections) for (const value of values) nextSelections.push({
365
+ ...selection,
366
+ [axis]: value
367
+ });
368
+ selections = nextSelections;
369
+ }
370
+ return selections;
371
+ }
372
+ function createStableRowId(matrix) {
373
+ const segments = Object.entries(matrix).sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis)).map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`);
374
+ if (segments.length === 0) return "default";
375
+ return segments.join("&");
376
+ }
377
+ function createTaskId(entryId, inferenceExecutorId, runRowId, evalRowId) {
378
+ return [
379
+ encodeTaskIdSegment(entryId),
380
+ encodeTaskIdSegment(inferenceExecutorId),
381
+ `run=${encodeTaskIdSegment(runRowId)}`,
382
+ `eval=${encodeTaskIdSegment(evalRowId)}`
383
+ ].join("::");
384
+ }
385
+ function createResolvedRunAxes(entry, runMatrix) {
386
+ let resolvedAxes = /* @__PURE__ */ new Map();
387
+ for (const layerInput of [
388
+ runMatrix,
389
+ entry.matrix?.runMatrix,
390
+ entry.task?.matrix?.runMatrix
391
+ ]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
392
+ return resolvedAxes;
393
+ }
394
+ function createResolvedEvalAxes(entry, evalMatrix) {
395
+ let resolvedAxes = /* @__PURE__ */ new Map();
396
+ for (const layerInput of [
397
+ evalMatrix,
398
+ entry.matrix?.evalMatrix,
399
+ entry.task?.matrix?.evalMatrix
400
+ ]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
401
+ return resolvedAxes;
402
+ }
403
+ /**
404
+ * Expands collected entries into a stable runner schedule.
405
+ *
406
+ * Call stack:
407
+ *
408
+ * {@link collectEvalEntries} (`../runner`)
409
+ * -> {@link createRunnerSchedule}
410
+ * -> {@link expandAxesToRows}
411
+ * -> {@link ScheduledTask}[]
412
+ *
413
+ * Use when:
414
+ * - the runner already knows which eval entries are available
415
+ * - each entry must run against multiple inferenceExecutors or matrix variants
416
+ *
417
+ * Expects:
418
+ * - `entries` and `inferenceExecutors` to be provided in the desired execution order
419
+ * - matrix axes to use insertion order when generating combinations
420
+ */
421
+ function createRunnerSchedule(options) {
422
+ if (options.entries.length === 0) return [];
423
+ if (options.inferenceExecutors.length === 0) return [];
424
+ const tasks = [];
425
+ for (const entry of options.entries) {
426
+ const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix));
427
+ const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix));
428
+ if (runSelections.length === 0 || evalSelections.length === 0) continue;
429
+ for (const inferenceExecutor of options.inferenceExecutors) for (const runMatrix of runSelections) for (const evalMatrix of evalSelections) {
430
+ const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix);
431
+ tasks.push({
432
+ entry,
433
+ id: createTaskId(entry.id, inferenceExecutor.id, isolatedMatrix.meta.runRowId, isolatedMatrix.meta.evalRowId),
434
+ matrix: isolatedMatrix,
435
+ inferenceExecutor
436
+ });
437
+ }
438
+ }
439
+ return tasks;
440
+ }
441
+ //#endregion
442
+ //#region src/core/runner/task-context.ts
443
+ function resolveDefaultTaskModel(models, task) {
444
+ const runMatrixModelName = task.matrix.run.model;
445
+ if (runMatrixModelName != null) {
446
+ const matrixSelectedModel = resolveModelByName(models, runMatrixModelName);
447
+ if (matrixSelectedModel != null) return matrixSelectedModel;
448
+ throw new Error(`Unknown configured model "${runMatrixModelName}" from task.matrix.run.model.`);
449
+ }
450
+ const matched = resolveModelByName(models, task.inferenceExecutor.id);
451
+ if (matched != null) return matched;
452
+ if (models.length > 0) {
453
+ const firstModel = models[0];
454
+ if (firstModel != null) return firstModel;
455
+ }
456
+ throw new Error(`No configured model found for inferenceExecutor id "${task.inferenceExecutor.id}".`);
457
+ }
458
+ /**
459
+ * Creates task-scoped model resolver context for runner execution.
460
+ *
461
+ * Call stack:
462
+ *
463
+ * {@link runScheduledTasks}
464
+ * -> {@link createTaskExecutionContext}
465
+ * -> {@link resolveModelByName}
466
+ * -> `task.model()` / `task.model({ name })`
467
+ */
468
+ function createTaskExecutionContext(options) {
469
+ return { model(selection) {
470
+ if (selection == null) return resolveDefaultTaskModel(options.models, options.task);
471
+ const name = typeof selection === "string" ? selection : selection.name;
472
+ const namedModel = resolveModelByName(options.models, name);
473
+ if (namedModel == null) throw new Error(`Unknown configured model "${name}".`);
474
+ return namedModel;
475
+ } };
476
+ }
477
+ //#endregion
478
+ export { runScheduledTasks as a, aggregateRunResults as c, RunnerExecutionError as i, createRunnerSchedule as n, asProjectRelativePath as o, createRunnerRuntimeContext as r, collectEvalEntries as s, createTaskExecutionContext as t };
479
+
480
+ //# sourceMappingURL=runner-4ZsOveoY.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runner-4ZsOveoY.mjs","names":[],"sources":["../src/core/runner/aggregate.ts","../src/core/runner/collect.ts","../src/core/runner/run.ts","../src/core/runner/runtime-context.ts","../src/core/runner/schedule.ts","../src/core/runner/task-context.ts"],"sourcesContent":["import type { ScheduledTaskMatrix } from './schedule'\n\n/**\n * Identifies the scoring family for a single eval score.\n */\nexport type RunScoreKind = 'exact' | 'judge'\n\n/**\n * Represents one normalized score emitted by a completed eval run.\n */\nexport interface RunScore {\n /**\n * Score family used for aggregation.\n */\n kind: RunScoreKind\n /**\n * Normalized score in the `0..1` range.\n */\n score: number\n}\n\n/**\n * Captures the output of one scheduled runner task.\n */\nexport interface RunResult {\n /**\n * Stable run id, usually copied from the scheduled task id.\n */\n id: string\n /**\n * Collected eval entry id.\n */\n entryId: string\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Concrete matrix selection used by the run.\n */\n matrix: ScheduledTaskMatrix\n /**\n * Raw scores emitted by the eval.\n */\n scores: readonly RunScore[]\n}\n\n/**\n * Stores the per-run score averages after normalization.\n */\nexport interface AggregatedRunSummary {\n /**\n * Stable run id.\n */\n id: string\n /**\n * Collected eval entry id.\n */\n entryId: string\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Concrete matrix selection used by the run.\n */\n matrix: ScheduledTaskMatrix\n /**\n * Mean of exact-match scores or `null` when absent.\n */\n exactAverage: number | null\n /**\n * Mean of judge-based scores or `null` when absent.\n */\n judgeAverage: number | null\n /**\n * Hybrid average. Uses both families when present, otherwise falls back to the\n * single available family.\n */\n hybridAverage: number | null\n}\n\n/**\n * Stores inferenceExecutor-level score aggregates across multiple runs.\n */\nexport interface AggregatedProviderSummary {\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Number of runs included in this inferenceExecutor bucket.\n */\n runCount: number\n /**\n * Mean of all exact-match scores or `null` when absent.\n */\n exactAverage: number | null\n /**\n * Mean of all judge-based scores or `null` when absent.\n */\n judgeAverage: number | null\n /**\n * Hybrid average derived from the inferenceExecutor exact and judge means.\n */\n hybridAverage: number | null\n}\n\n/**\n * Stores the final aggregation output for a batch of runner results.\n */\nexport interface AggregatedRunResults {\n /**\n * Per-run normalized score summaries.\n */\n runs: AggregatedRunSummary[]\n /**\n * Provider-level summaries sorted by inferenceExecutor id.\n */\n inferenceExecutors: AggregatedProviderSummary[]\n /**\n * Overall summary across every run.\n */\n overall: {\n exactAverage: number | null\n judgeAverage: number | null\n hybridAverage: number | null\n runCount: number\n }\n}\n\ninterface ScoreBuckets {\n exact: number[]\n judge: number[]\n}\n\nfunction cloneScheduledTaskMatrix(matrix: ScheduledTaskMatrix): ScheduledTaskMatrix {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction assertKnownScoreKind(kind: string): RunScoreKind {\n if (kind === 'exact' || kind === 'judge') {\n return kind\n }\n\n throw new TypeError(`Unknown eval score kind \"${kind}\".`)\n}\n\nfunction average(scores: readonly number[]): number | null {\n if (scores.length === 0) {\n return null\n }\n\n const total = scores.reduce((sum, score) => sum + score, 0)\n return total / scores.length\n}\n\nfunction createHybridAverage(exactAverage: number | null, judgeAverage: number | null): number | null {\n if (exactAverage != null && judgeAverage != null) {\n return (exactAverage + judgeAverage) / 2\n }\n\n if (exactAverage != null) {\n return exactAverage\n }\n\n if (judgeAverage != null) {\n return judgeAverage\n }\n\n return null\n}\n\nfunction collectScoreBuckets(scores: readonly RunScore[]): ScoreBuckets {\n const buckets: ScoreBuckets = {\n exact: [],\n judge: [],\n }\n\n for (const score of scores) {\n const kind = assertKnownScoreKind(score.kind)\n\n if (kind === 'exact') {\n buckets.exact.push(score.score)\n continue\n }\n\n buckets.judge.push(score.score)\n }\n\n return buckets\n}\n\nfunction createRunSummary(result: RunResult): AggregatedRunSummary {\n const buckets = collectScoreBuckets(result.scores)\n const exactAverage = average(buckets.exact)\n const judgeAverage = average(buckets.judge)\n\n return {\n entryId: result.entryId,\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n id: result.id,\n judgeAverage,\n matrix: cloneScheduledTaskMatrix(result.matrix),\n inferenceExecutorId: result.inferenceExecutorId,\n }\n}\n\nfunction createProviderSummary(inferenceExecutorId: string, results: readonly RunResult[]): AggregatedProviderSummary {\n const exactScores: number[] = []\n const judgeScores: number[] = []\n\n for (const result of results) {\n const buckets = collectScoreBuckets(result.scores)\n exactScores.push(...buckets.exact)\n judgeScores.push(...buckets.judge)\n }\n\n const exactAverage = average(exactScores)\n const judgeAverage = average(judgeScores)\n\n return {\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n judgeAverage,\n inferenceExecutorId,\n runCount: results.length,\n }\n}\n\n/**\n * Aggregates exact-match and judge-based scores into hybrid runner summaries.\n *\n * Call stack:\n *\n * {@link runScheduledTasks}\n * -> {@link aggregateRunResults}\n * -> {@link createRunSummary}\n * -> {@link createProviderSummary}\n * -> `report output`\n *\n * Use when:\n * - a runner batch mixes deterministic exact checks with judge-based grading\n * - inferenceExecutor comparison should preserve both score families and one hybrid view\n *\n * Expects:\n * - each score to be normalized to the `0..1` range before aggregation\n * - `scores.kind` to use only `'exact'` or `'judge'`\n */\nexport function aggregateRunResults(results: readonly RunResult[]): AggregatedRunResults {\n const runs = results.map(createRunSummary)\n\n const inferenceExecutorIds = Array.from(new Set(results.map(result => result.inferenceExecutorId)))\n const inferenceExecutors = inferenceExecutorIds\n .map((inferenceExecutorId) => {\n const providerResults = results.filter(result => result.inferenceExecutorId === inferenceExecutorId)\n return createProviderSummary(inferenceExecutorId, providerResults)\n })\n .sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId))\n\n const overall = createProviderSummary(\n 'overall',\n results,\n )\n\n return {\n overall: {\n exactAverage: overall.exactAverage,\n hybridAverage: overall.hybridAverage,\n judgeAverage: overall.judgeAverage,\n runCount: overall.runCount,\n },\n inferenceExecutors,\n runs,\n }\n}\n","import type { CollectedEvalEntry, EvalModule, EvalModuleMap } from '../../config'\nimport type { RunnerRuntimeContext } from './runtime-context'\n\nimport { basename, dirname, relative } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst evalFileSuffix = '.eval.ts'\nconst absolutePathPattern = /^(?:[A-Z]:\\/|\\/|\\\\\\\\)/i\n\nfunction normalizePath(value: string): string {\n return value.replaceAll('\\\\', '/')\n}\n\n/**\n * Converts a file path into a project-relative path when possible.\n *\n * Before: `/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n *\n * Before: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n */\nexport function asProjectRelativePath(filePath: string, context: RunnerRuntimeContext): string {\n const normalizedFilePath = normalizePath(filePath)\n const normalizedProjectRootDirectory = normalizePath(context.projectRootDirectory)\n const filePathWindowsDrive = normalizedFilePath.match(/^[A-Z]:\\//i)?.[0]\n const projectRootWindowsDrive = normalizedProjectRootDirectory.match(/^[A-Z]:\\//i)?.[0]\n\n if (filePathWindowsDrive != null && projectRootWindowsDrive == null) {\n return normalizedFilePath\n }\n\n if (\n filePathWindowsDrive != null\n && projectRootWindowsDrive != null\n && filePathWindowsDrive.toLowerCase() !== projectRootWindowsDrive.toLowerCase()\n ) {\n return normalizedFilePath\n }\n\n const projectRootDirectory = context.projectRootDirectory\n const relativeFilePath = normalizePath(relative(projectRootDirectory, filePath))\n\n if (!absolutePathPattern.test(relativeFilePath)) {\n if (relativeFilePath === '..') {\n return normalizePath(filePath)\n }\n\n if (!relativeFilePath.startsWith('../')) {\n return relativeFilePath\n }\n }\n\n return normalizePath(filePath)\n}\n\nfunction resolveModuleFilePath(moduleHref: string): string | null {\n if (!moduleHref.startsWith('file:')) {\n return null\n }\n\n try {\n return fileURLToPath(moduleHref)\n }\n catch {\n return null\n }\n}\n\nfunction createCollectedEvalEntry(\n moduleHref: string,\n moduleDefinition: EvalModule,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry | null {\n const filePath = resolveModuleFilePath(moduleHref)\n\n if (!filePath) {\n return null\n }\n\n const relativeFilePath = asProjectRelativePath(filePath, context)\n\n if (!relativeFilePath.endsWith(evalFileSuffix)) {\n return null\n }\n\n const entryName = basename(relativeFilePath, evalFileSuffix)\n\n if (entryName.length === 0) {\n return null\n }\n\n const relativeDirectory = dirname(relativeFilePath)\n const directory = relativeDirectory === '.' ? '' : relativeDirectory\n\n return {\n ...moduleDefinition.default,\n directory,\n filePath,\n id: directory.length === 0 ? entryName : `${directory}/${entryName}`,\n name: entryName,\n }\n}\n\n/**\n * Collects loaded vieval modules into sorted runner entries with stable ids.\n *\n * Call stack:\n *\n * `import.meta.glob(...)`\n * -> {@link collectEvalEntries}\n * -> {@link createCollectedEvalEntry}\n * -> {@link CollectedEvalEntry}[]\n *\n * Use when:\n * - the runner has already loaded candidate eval modules\n * - downstream scheduling needs stable entry ids and directory metadata\n */\nexport function collectEvalEntries(\n modules: EvalModuleMap,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry[] {\n return Object.entries(modules)\n .flatMap(([moduleHref, moduleDefinition]) => {\n const entry = createCollectedEvalEntry(moduleHref, moduleDefinition, context)\n\n if (!entry) {\n return []\n }\n\n return [entry]\n })\n .sort((left, right) => left.id.localeCompare(right.id))\n}\n","import type { AggregatedRunResults, RunResult } from './aggregate'\nimport type { ScheduledTask } from './schedule'\nimport type { TaskExecutionContext } from './task-context'\n\nimport { errorMessageFrom } from '@moeru/std'\n\nimport { aggregateRunResults } from './aggregate'\n\n/**\n * Executes one scheduled runner task and returns a normalized run result.\n *\n * Use when:\n * - a scheduler already selected the task and execution context\n * - the caller wants a typed executor contract for runner workers\n *\n * Expects:\n * - the task context to be ready for model resolution and task-scoped work\n *\n * Returns:\n * - a normalized run result with score entries ready for aggregation\n */\nexport type ScheduledTaskExecutor = (\n task: ScheduledTask,\n context: TaskExecutionContext,\n) => Promise<RunResult>\n\n/**\n * Terminal task state reported by runner lifecycle hooks.\n *\n * Use when:\n * - reporting the outcome of one scheduled task to lifecycle observers\n *\n * Expects:\n * - hooks treat the value as final for the completed task\n */\nexport type RunnerTaskState = 'passed' | 'failed'\n\n/**\n * Optional runner execution hooks used while processing scheduled tasks.\n *\n * Use when:\n * - callers want lifecycle visibility around sequential task execution\n * - task execution should remain deterministic while still observable\n *\n * Expects:\n * - hook functions are synchronous lifecycle observers\n */\nexport interface RunScheduledTasksOptions {\n /**\n * Creates per-task execution context.\n *\n * Use when:\n * - executor code needs per-task model resolution or other task-scoped data\n */\n createExecutionContext?: (task: ScheduledTask) => TaskExecutionContext\n /**\n * Runs before the executor starts handling a task.\n *\n * Use when:\n * - callers want to observe task activation before execution begins\n *\n * Expects:\n * - thrown errors abort the task before executor work starts\n */\n onTaskStart?: (task: ScheduledTask) => void\n /**\n * Runs after the executor settles for a task.\n *\n * Use when:\n * - callers want to observe successful and failed task completion\n *\n * Expects:\n * - thrown errors abort successful runs\n * - failed-task observers do not override the executor error for the task\n */\n onTaskEnd?: (task: ScheduledTask, state: RunnerTaskState) => void\n}\n\nfunction createDefaultExecutionContext(task: ScheduledTask): TaskExecutionContext {\n return {\n model(options) {\n const requestedModelName = typeof options === 'string' ? options : options?.name\n if (requestedModelName != null) {\n throw new Error(`No model registry configured. Requested model: ${requestedModelName}`)\n }\n\n throw new Error(`No model registry configured for task inferenceExecutor id \"${task.inferenceExecutor.id}\".`)\n },\n }\n}\n\n/**\n * Error thrown when a scheduled run fails before producing a normalized result.\n */\nexport class RunnerExecutionError extends Error {\n /**\n * Stable task id that failed.\n */\n taskId: string\n\n constructor(taskId: string, cause: unknown) {\n const message = errorMessageFrom(cause) ?? 'Unknown runner execution failure.'\n super(`Runner task \"${taskId}\" failed: ${message}`)\n this.name = 'RunnerExecutionError'\n this.taskId = taskId\n this.cause = cause\n }\n}\n\nfunction createRunnerExecutionError(taskId: string, cause: unknown): RunnerExecutionError {\n if (cause instanceof RunnerExecutionError && cause.taskId === taskId) {\n return cause\n }\n\n return new RunnerExecutionError(taskId, cause)\n}\n\n/**\n * Executes runner tasks sequentially and aggregates the normalized results.\n *\n * Call stack:\n *\n * {@link createRunnerSchedule}\n * -> {@link runScheduledTasks}\n * -> `executor(task)`\n * -> {@link aggregateRunResults}\n *\n * Use when:\n * - the caller already expanded the runner matrix\n * - task execution should stay deterministic and easy to debug\n *\n * Expects:\n * - `executor` to return normalized `0..1` scores\n * - callers to handle concurrency outside this helper when needed\n * - `onTaskStart` / `onTaskEnd` hooks to be synchronous lifecycle observers\n *\n * Throws:\n * - `RunnerExecutionError` when task setup, hooks, or the executor throws\n */\nexport async function runScheduledTasks(\n tasks: readonly ScheduledTask[],\n executor: ScheduledTaskExecutor,\n options: RunScheduledTasksOptions = {},\n): Promise<AggregatedRunResults> {\n if (tasks.length === 0) {\n return aggregateRunResults([])\n }\n\n const results: RunResult[] = []\n\n for (const task of tasks) {\n let executionContext: TaskExecutionContext\n\n try {\n executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext(task)\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskStart?.(task)\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n results.push(await executor(task, executionContext))\n }\n catch (error) {\n try {\n options.onTaskEnd?.(task, 'failed')\n }\n catch {\n // Failed-task observers must not mask the task execution failure.\n }\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskEnd?.(task, 'passed')\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n }\n\n return aggregateRunResults(results)\n}\n","import { createRequire } from 'node:module'\nimport { dirname } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst require = createRequire(import.meta.url)\n\n/**\n * Shared runtime context used by the vieval runner.\n *\n * Use when:\n * - runner services need stable path resolution without module-level side effects\n * - call sites want deterministic control over workspace root detection\n */\nexport interface RunnerRuntimeContext {\n /**\n * Absolute project root directory used for path normalization.\n */\n projectRootDirectory: string\n}\n\n/**\n * Options used to construct the runner runtime context.\n */\nexport interface CreateVievalRunnerRuntimeContextOptions {\n /**\n * Directory used to search for the nearest pnpm workspace.\n *\n * @default directory of this module file\n */\n cwd?: string\n /**\n * Absolute fallback directory when a pnpm workspace root is not found.\n *\n * @default package root directory (`packages/vieval`)\n */\n fallbackProjectRootDirectory?: string\n}\n\n/**\n * Creates a side-effect-free runtime context for runner path normalization.\n *\n * Call stack:\n *\n * {@link createRunnerRuntimeContext}\n * -> `findWorkspaceDir(cwd)`\n * -> `resolve projectRootDirectory`\n * -> `{ projectRootDirectory }`\n *\n * Use when:\n * - initializing runner infrastructure before collecting eval modules\n * - tests need deterministic root resolution behavior\n */\nexport async function createRunnerRuntimeContext(\n options: CreateVievalRunnerRuntimeContextOptions = {},\n): Promise<RunnerRuntimeContext> {\n const cwd = options.cwd ?? dirname(fileURLToPath(import.meta.url))\n const fallbackProjectRootDirectory = options.fallbackProjectRootDirectory\n ?? fileURLToPath(new URL('../../../', import.meta.url))\n\n // NOTICE:\n // We use dynamic `require` here because `@pnpm/find-workspace-dir` is CommonJS.\n // Keeping this load inside the factory avoids module-level initialization side effects.\n const { findWorkspaceDir } = require('@pnpm/find-workspace-dir') as {\n findWorkspaceDir: (currentWorkingDirectory: string) => Promise<string | undefined>\n }\n\n // NOTICE:\n // Workspace discovery is required to keep collected eval ids stable when this\n // package is moved inside different monorepo layouts.\n const workspaceDirectory = await findWorkspaceDir(cwd)\n\n return {\n projectRootDirectory: workspaceDirectory ?? fallbackProjectRootDirectory,\n }\n}\n","import type { CollectedEvalEntry, MatrixDefinition, MatrixLayer, MatrixValue } from '../../config'\n\n/**\n * Describes the inferenceExecutor target for a scheduled eval run.\n */\nexport interface InferenceExecutor {\n /**\n * Stable inferenceExecutor identifier such as `openai:gpt-4.1-mini`.\n */\n id: string\n}\n\n/**\n * Stores the selected value for each matrix axis.\n */\nexport type RunnerMatrixSelection = Record<string, string>\n\n/**\n * Stores stable row ids for one resolved scheduled task matrix.\n */\nexport interface ScheduledTaskMatrixMeta {\n /**\n * Stable row id for the resolved run matrix selection.\n */\n runRowId: string\n /**\n * Stable row id for the resolved eval matrix selection.\n */\n evalRowId: string\n}\n\n/**\n * Stores the structured matrix payload for one scheduled task.\n */\nexport interface ScheduledTaskMatrix {\n /**\n * Runtime matrix selection visible to task code.\n */\n run: RunnerMatrixSelection\n /**\n * Eval-time matrix selection visible to task code.\n */\n eval: RunnerMatrixSelection\n /**\n * Stable row ids for both scopes.\n */\n meta: ScheduledTaskMatrixMeta\n}\n\n/**\n * Maps matrix axis names to the values that should be expanded.\n */\nexport type RunnerMatrixDefinition = MatrixDefinition\n\n/**\n * Accepts either flat axis definitions or one layered matrix object.\n */\nexport type RunnerMatrixInput = RunnerMatrixDefinition | MatrixLayer\n\nconst matrixLayerKeys = new Set(['disable', 'extend', 'override'])\nconst ambiguousMatrixDefinitionErrorMessage = 'Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.'\n\n/**\n * Represents one fully expanded runner task.\n */\nexport interface ScheduledTask {\n /**\n * Stable task id derived from the entry, inferenceExecutor, and matrix selection.\n */\n id: string\n /**\n * The collected eval entry to execute.\n */\n entry: CollectedEvalEntry\n /**\n * The inferenceExecutor selected for this task.\n */\n inferenceExecutor: InferenceExecutor\n /**\n * The concrete scoped matrix selection for this task.\n */\n matrix: ScheduledTaskMatrix\n}\n\n/**\n * Configures how the runner should expand its execution matrix.\n */\nexport interface CreateRunnerScheduleOptions {\n /**\n * Collected eval entries that should be scheduled.\n */\n entries: readonly CollectedEvalEntry[]\n /**\n * Providers that should run each entry.\n */\n inferenceExecutors: readonly InferenceExecutor[]\n /**\n * Optional run-time matrix axes expanded as a cartesian product.\n */\n runMatrix?: RunnerMatrixInput\n /**\n * Optional eval-time matrix axes expanded as a cartesian product.\n */\n evalMatrix?: RunnerMatrixInput\n}\n\nfunction encodeTaskIdSegment(value: string): string {\n return encodeURIComponent(value)\n}\n\nfunction stringifyMatrixValue(value: MatrixValue): string {\n return String(value)\n}\n\nfunction cloneMatrixSelection(matrix: RunnerMatrixSelection): RunnerMatrixSelection {\n return { ...matrix }\n}\n\nfunction createScheduledTaskMatrix(\n runMatrix: RunnerMatrixSelection,\n evalMatrix: RunnerMatrixSelection,\n): ScheduledTaskMatrix {\n return {\n eval: cloneMatrixSelection(evalMatrix),\n meta: {\n evalRowId: createStableRowId(evalMatrix),\n runRowId: createStableRowId(runMatrix),\n },\n run: cloneMatrixSelection(runMatrix),\n }\n}\n\nfunction isMatrixLayer(matrix: RunnerMatrixInput): matrix is MatrixLayer {\n const matrixKeys = Object.keys(matrix)\n return (\n matrixKeys.length > 0\n && matrixKeys.every(key => matrixLayerKeys.has(key))\n )\n}\n\nfunction assertNonAmbiguousMatrixDefinition(matrix: RunnerMatrixInput): void {\n const matrixKeys = Object.keys(matrix)\n const hasReservedKeys = matrixKeys.some(key => matrixLayerKeys.has(key))\n const hasAxisKeys = matrixKeys.some(key => !matrixLayerKeys.has(key))\n\n if (hasReservedKeys && hasAxisKeys) {\n throw new TypeError(ambiguousMatrixDefinitionErrorMessage)\n }\n}\n\nfunction normalizeLayerInputToAxes(matrix: RunnerMatrixInput | undefined): MatrixLayer | undefined {\n if (matrix == null) {\n return undefined\n }\n\n assertNonAmbiguousMatrixDefinition(matrix)\n\n if (isMatrixLayer(matrix)) {\n return matrix\n }\n\n return {\n extend: matrix,\n }\n}\n\nfunction dedupeAxisValues(values: readonly MatrixValue[]): string[] {\n return Array.from(new Set(values.map(stringifyMatrixValue)))\n}\n\nfunction applyAxisValues(\n axes: Map<string, string[]>,\n definition: RunnerMatrixDefinition | undefined,\n mode: 'extend' | 'override',\n): void {\n if (definition == null) {\n return\n }\n\n for (const [axis, values] of Object.entries(definition)) {\n const nextValues = dedupeAxisValues(values)\n\n if (mode === 'extend') {\n const existingValues = axes.get(axis) ?? []\n axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])))\n continue\n }\n\n axes.set(axis, nextValues)\n }\n}\n\nfunction applyLayer(\n baseAxes: ReadonlyMap<string, string[]>,\n layer: MatrixLayer | undefined,\n): Map<string, string[]> {\n const nextAxes = new Map<string, string[]>(\n Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]),\n )\n\n for (const axis of layer?.disable ?? []) {\n nextAxes.delete(axis)\n }\n\n applyAxisValues(nextAxes, layer?.extend, 'extend')\n applyAxisValues(nextAxes, layer?.override, 'override')\n\n return nextAxes\n}\n\nfunction expandAxesToRows(axes: ReadonlyMap<string, readonly string[]>): RunnerMatrixSelection[] {\n if (axes.size === 0) {\n return [{}]\n }\n\n const dimensions = Array.from(axes.entries())\n\n let selections: RunnerMatrixSelection[] = [{}]\n\n for (const [axis, values] of dimensions) {\n if (values.length === 0) {\n return []\n }\n\n const nextSelections: RunnerMatrixSelection[] = []\n\n for (const selection of selections) {\n for (const value of values) {\n nextSelections.push({\n ...selection,\n [axis]: value,\n })\n }\n }\n\n selections = nextSelections\n }\n\n return selections\n}\n\nfunction createStableRowId(matrix: RunnerMatrixSelection): string {\n const segments = Object.entries(matrix)\n .sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis))\n .map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`)\n\n if (segments.length === 0) {\n return 'default'\n }\n\n return segments.join('&')\n}\n\nfunction createTaskId(entryId: string, inferenceExecutorId: string, runRowId: string, evalRowId: string): string {\n const encodedEntryId = encodeTaskIdSegment(entryId)\n const encodedProviderId = encodeTaskIdSegment(inferenceExecutorId)\n\n return [\n encodedEntryId,\n encodedProviderId,\n `run=${encodeTaskIdSegment(runRowId)}`,\n `eval=${encodeTaskIdSegment(evalRowId)}`,\n ].join('::')\n}\n\nfunction createResolvedRunAxes(\n entry: CollectedEvalEntry,\n runMatrix: RunnerMatrixInput | undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n runMatrix,\n entry.matrix?.runMatrix,\n entry.task?.matrix?.runMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\nfunction createResolvedEvalAxes(\n entry: CollectedEvalEntry,\n evalMatrix: RunnerMatrixInput | undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n evalMatrix,\n entry.matrix?.evalMatrix,\n entry.task?.matrix?.evalMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\n/**\n * Expands collected entries into a stable runner schedule.\n *\n * Call stack:\n *\n * {@link collectEvalEntries} (`../runner`)\n * -> {@link createRunnerSchedule}\n * -> {@link expandAxesToRows}\n * -> {@link ScheduledTask}[]\n *\n * Use when:\n * - the runner already knows which eval entries are available\n * - each entry must run against multiple inferenceExecutors or matrix variants\n *\n * Expects:\n * - `entries` and `inferenceExecutors` to be provided in the desired execution order\n * - matrix axes to use insertion order when generating combinations\n */\nexport function createRunnerSchedule(options: CreateRunnerScheduleOptions): ScheduledTask[] {\n if (options.entries.length === 0) {\n return []\n }\n\n if (options.inferenceExecutors.length === 0) {\n return []\n }\n\n const tasks: ScheduledTask[] = []\n\n for (const entry of options.entries) {\n const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix))\n const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix))\n\n if (runSelections.length === 0 || evalSelections.length === 0) {\n continue\n }\n\n for (const inferenceExecutor of options.inferenceExecutors) {\n for (const runMatrix of runSelections) {\n for (const evalMatrix of evalSelections) {\n const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix)\n\n tasks.push({\n entry,\n id: createTaskId(\n entry.id,\n inferenceExecutor.id,\n isolatedMatrix.meta.runRowId,\n isolatedMatrix.meta.evalRowId,\n ),\n matrix: isolatedMatrix,\n inferenceExecutor,\n })\n }\n }\n }\n }\n\n return tasks\n}\n","import type { ModelDefinition } from '../../config/models'\nimport type { ScheduledTask } from './schedule'\n\nimport { resolveModelByName } from '../../config/models'\n\n/**\n * Options for selecting a model from the execution context.\n */\nexport interface TaskModelSelectionOptions {\n /**\n * Model id or alias name.\n */\n name: string\n}\n\n/**\n * Task-scoped execution context exposed to runner executors.\n */\nexport interface TaskExecutionContext {\n /**\n * Resolves model configuration for the current task.\n *\n * Use when:\n * - no arguments are provided to use the model selected by run matrix/inferenceExecutor\n * - `name` is provided to resolve a specific model id or alias\n */\n model: (\n selection?: string | TaskModelSelectionOptions,\n ) => ModelDefinition\n}\n\n/**\n * Inputs used to build task execution context.\n */\nexport interface CreateTaskExecutionContextOptions {\n models: readonly ModelDefinition[]\n task: ScheduledTask\n}\n\nfunction resolveDefaultTaskModel(\n models: readonly ModelDefinition[],\n task: ScheduledTask,\n): ModelDefinition {\n const runMatrixModelName = task.matrix.run.model\n if (runMatrixModelName != null) {\n const matrixSelectedModel = resolveModelByName(models, runMatrixModelName)\n if (matrixSelectedModel != null) {\n return matrixSelectedModel\n }\n\n throw new Error(`Unknown configured model \"${runMatrixModelName}\" from task.matrix.run.model.`)\n }\n\n const matched = resolveModelByName(models, task.inferenceExecutor.id)\n if (matched != null) {\n return matched\n }\n\n if (models.length > 0) {\n const firstModel = models[0]\n if (firstModel != null) {\n return firstModel\n }\n }\n\n throw new Error(`No configured model found for inferenceExecutor id \"${task.inferenceExecutor.id}\".`)\n}\n\n/**\n * Creates task-scoped model resolver context for runner execution.\n *\n * Call stack:\n *\n * {@link runScheduledTasks}\n * -> {@link createTaskExecutionContext}\n * -> {@link resolveModelByName}\n * -> `task.model()` / `task.model({ name })`\n */\nexport function createTaskExecutionContext(options: CreateTaskExecutionContextOptions): TaskExecutionContext {\n return {\n model(selection) {\n if (selection == null) {\n return resolveDefaultTaskModel(options.models, options.task)\n }\n\n const name = typeof selection === 'string' ? selection : selection.name\n\n const namedModel = resolveModelByName(options.models, name)\n if (namedModel == null) {\n throw new Error(`Unknown configured model \"${name}\".`)\n }\n\n return namedModel\n },\n }\n}\n"],"mappings":";;;;;;AAwIA,SAAS,yBAAyB,QAAkD;AAClF,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,qBAAqB,MAA4B;AACxD,KAAI,SAAS,WAAW,SAAS,QAC/B,QAAO;AAGT,OAAM,IAAI,UAAU,4BAA4B,KAAK,IAAI;;AAG3D,SAAS,QAAQ,QAA0C;AACzD,KAAI,OAAO,WAAW,EACpB,QAAO;AAIT,QADc,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAC5C,OAAO;;AAGxB,SAAS,oBAAoB,cAA6B,cAA4C;AACpG,KAAI,gBAAgB,QAAQ,gBAAgB,KAC1C,SAAQ,eAAe,gBAAgB;AAGzC,KAAI,gBAAgB,KAClB,QAAO;AAGT,KAAI,gBAAgB,KAClB,QAAO;AAGT,QAAO;;AAGT,SAAS,oBAAoB,QAA2C;CACtE,MAAM,UAAwB;EAC5B,OAAO,EAAE;EACT,OAAO,EAAE;EACV;AAED,MAAK,MAAM,SAAS,QAAQ;AAG1B,MAFa,qBAAqB,MAAM,KAAK,KAEhC,SAAS;AACpB,WAAQ,MAAM,KAAK,MAAM,MAAM;AAC/B;;AAGF,UAAQ,MAAM,KAAK,MAAM,MAAM;;AAGjC,QAAO;;AAGT,SAAS,iBAAiB,QAAyC;CACjE,MAAM,UAAU,oBAAoB,OAAO,OAAO;CAClD,MAAM,eAAe,QAAQ,QAAQ,MAAM;CAC3C,MAAM,eAAe,QAAQ,QAAQ,MAAM;AAE3C,QAAO;EACL,SAAS,OAAO;EAChB;EACA,eAAe,oBAAoB,cAAc,aAAa;EAC9D,IAAI,OAAO;EACX;EACA,QAAQ,yBAAyB,OAAO,OAAO;EAC/C,qBAAqB,OAAO;EAC7B;;AAGH,SAAS,sBAAsB,qBAA6B,SAA0D;CACpH,MAAM,cAAwB,EAAE;CAChC,MAAM,cAAwB,EAAE;AAEhC,MAAK,MAAM,UAAU,SAAS;EAC5B,MAAM,UAAU,oBAAoB,OAAO,OAAO;AAClD,cAAY,KAAK,GAAG,QAAQ,MAAM;AAClC,cAAY,KAAK,GAAG,QAAQ,MAAM;;CAGpC,MAAM,eAAe,QAAQ,YAAY;CACzC,MAAM,eAAe,QAAQ,YAAY;AAEzC,QAAO;EACL;EACA,eAAe,oBAAoB,cAAc,aAAa;EAC9D;EACA;EACA,UAAU,QAAQ;EACnB;;;;;;;;;;;;;;;;;;;;;AAsBH,SAAgB,oBAAoB,SAAqD;CACvF,MAAM,OAAO,QAAQ,IAAI,iBAAiB;CAG1C,MAAM,qBADuB,MAAM,KAAK,IAAI,IAAI,QAAQ,KAAI,WAAU,OAAO,oBAAoB,CAAC,CAAC,CAEhG,KAAK,wBAAwB;AAE5B,SAAO,sBAAsB,qBADL,QAAQ,QAAO,WAAU,OAAO,wBAAwB,oBAAoB,CAClC;GAClE,CACD,MAAM,MAAM,UAAU,KAAK,oBAAoB,cAAc,MAAM,oBAAoB,CAAC;CAE3F,MAAM,UAAU,sBACd,WACA,QACD;AAED,QAAO;EACL,SAAS;GACP,cAAc,QAAQ;GACtB,eAAe,QAAQ;GACvB,cAAc,QAAQ;GACtB,UAAU,QAAQ;GACnB;EACD;EACA;EACD;;;;ACvRH,MAAM,iBAAiB;AACvB,MAAM,sBAAsB;AAE5B,SAAS,cAAc,OAAuB;AAC5C,QAAO,MAAM,WAAW,MAAM,IAAI;;;;;;;;;;;AAYpC,SAAgB,sBAAsB,UAAkB,SAAuC;CAC7F,MAAM,qBAAqB,cAAc,SAAS;CAClD,MAAM,iCAAiC,cAAc,QAAQ,qBAAqB;CAClF,MAAM,uBAAuB,mBAAmB,MAAM,aAAa,GAAG;CACtE,MAAM,0BAA0B,+BAA+B,MAAM,aAAa,GAAG;AAErF,KAAI,wBAAwB,QAAQ,2BAA2B,KAC7D,QAAO;AAGT,KACE,wBAAwB,QACrB,2BAA2B,QAC3B,qBAAqB,aAAa,KAAK,wBAAwB,aAAa,CAE/E,QAAO;CAGT,MAAM,uBAAuB,QAAQ;CACrC,MAAM,mBAAmB,cAAc,SAAS,sBAAsB,SAAS,CAAC;AAEhF,KAAI,CAAC,oBAAoB,KAAK,iBAAiB,EAAE;AAC/C,MAAI,qBAAqB,KACvB,QAAO,cAAc,SAAS;AAGhC,MAAI,CAAC,iBAAiB,WAAW,MAAM,CACrC,QAAO;;AAIX,QAAO,cAAc,SAAS;;AAGhC,SAAS,sBAAsB,YAAmC;AAChE,KAAI,CAAC,WAAW,WAAW,QAAQ,CACjC,QAAO;AAGT,KAAI;AACF,SAAO,cAAc,WAAW;SAE5B;AACJ,SAAO;;;AAIX,SAAS,yBACP,YACA,kBACA,SAC2B;CAC3B,MAAM,WAAW,sBAAsB,WAAW;AAElD,KAAI,CAAC,SACH,QAAO;CAGT,MAAM,mBAAmB,sBAAsB,UAAU,QAAQ;AAEjE,KAAI,CAAC,iBAAiB,SAAS,eAAe,CAC5C,QAAO;CAGT,MAAM,YAAY,SAAS,kBAAkB,eAAe;AAE5D,KAAI,UAAU,WAAW,EACvB,QAAO;CAGT,MAAM,oBAAoB,QAAQ,iBAAiB;CACnD,MAAM,YAAY,sBAAsB,MAAM,KAAK;AAEnD,QAAO;EACL,GAAG,iBAAiB;EACpB;EACA;EACA,IAAI,UAAU,WAAW,IAAI,YAAY,GAAG,UAAU,GAAG;EACzD,MAAM;EACP;;;;;;;;;;;;;;;;AAiBH,SAAgB,mBACd,SACA,SACsB;AACtB,QAAO,OAAO,QAAQ,QAAQ,CAC3B,SAAS,CAAC,YAAY,sBAAsB;EAC3C,MAAM,QAAQ,yBAAyB,YAAY,kBAAkB,QAAQ;AAE7E,MAAI,CAAC,MACH,QAAO,EAAE;AAGX,SAAO,CAAC,MAAM;GACd,CACD,MAAM,MAAM,UAAU,KAAK,GAAG,cAAc,MAAM,GAAG,CAAC;;;;ACtD3D,SAAS,8BAA8B,MAA2C;AAChF,QAAO,EACL,MAAM,SAAS;EACb,MAAM,qBAAqB,OAAO,YAAY,WAAW,UAAU,SAAS;AAC5E,MAAI,sBAAsB,KACxB,OAAM,IAAI,MAAM,kDAAkD,qBAAqB;AAGzF,QAAM,IAAI,MAAM,+DAA+D,KAAK,kBAAkB,GAAG,IAAI;IAEhH;;;;;AAMH,IAAa,uBAAb,cAA0C,MAAM;;;;CAI9C;CAEA,YAAY,QAAgB,OAAgB;EAC1C,MAAM,UAAU,iBAAiB,MAAM,IAAI;AAC3C,QAAM,gBAAgB,OAAO,YAAY,UAAU;AACnD,OAAK,OAAO;AACZ,OAAK,SAAS;AACd,OAAK,QAAQ;;;AAIjB,SAAS,2BAA2B,QAAgB,OAAsC;AACxF,KAAI,iBAAiB,wBAAwB,MAAM,WAAW,OAC5D,QAAO;AAGT,QAAO,IAAI,qBAAqB,QAAQ,MAAM;;;;;;;;;;;;;;;;;;;;;;;;AAyBhD,eAAsB,kBACpB,OACA,UACA,UAAoC,EAAE,EACP;AAC/B,KAAI,MAAM,WAAW,EACnB,QAAO,oBAAoB,EAAE,CAAC;CAGhC,MAAM,UAAuB,EAAE;AAE/B,MAAK,MAAM,QAAQ,OAAO;EACxB,IAAI;AAEJ,MAAI;AACF,sBAAmB,QAAQ,yBAAyB,KAAK,IAAI,8BAA8B,KAAK;WAE3F,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,cAAc,KAAK;WAEtB,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,KAAK,MAAM,SAAS,MAAM,iBAAiB,CAAC;WAE/C,OAAO;AACZ,OAAI;AACF,YAAQ,YAAY,MAAM,SAAS;WAE/B;AAGN,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,YAAY,MAAM,SAAS;WAE9B,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;;AAIpD,QAAO,oBAAoB,QAAQ;;;;ACxLrC,MAAM,UAAU,cAAc,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;AAgD9C,eAAsB,2BACpB,UAAmD,EAAE,EACtB;CAC/B,MAAM,MAAM,QAAQ,OAAO,QAAQ,cAAc,OAAO,KAAK,IAAI,CAAC;CAClE,MAAM,+BAA+B,QAAQ,gCACxC,cAAc,IAAI,IAAI,aAAa,OAAO,KAAK,IAAI,CAAC;CAKzD,MAAM,EAAE,qBAAqB,QAAQ,2BAA2B;AAShE,QAAO,EACL,sBAHyB,MAAM,iBAAiB,IAAI,IAGR,8BAC7C;;;;ACdH,MAAM,kBAAkB,IAAI,IAAI;CAAC;CAAW;CAAU;CAAW,CAAC;AAClE,MAAM,wCAAwC;AA8C9C,SAAS,oBAAoB,OAAuB;AAClD,QAAO,mBAAmB,MAAM;;AAGlC,SAAS,qBAAqB,OAA4B;AACxD,QAAO,OAAO,MAAM;;AAGtB,SAAS,qBAAqB,QAAsD;AAClF,QAAO,EAAE,GAAG,QAAQ;;AAGtB,SAAS,0BACP,WACA,YACqB;AACrB,QAAO;EACL,MAAM,qBAAqB,WAAW;EACtC,MAAM;GACJ,WAAW,kBAAkB,WAAW;GACxC,UAAU,kBAAkB,UAAU;GACvC;EACD,KAAK,qBAAqB,UAAU;EACrC;;AAGH,SAAS,cAAc,QAAkD;CACvE,MAAM,aAAa,OAAO,KAAK,OAAO;AACtC,QACE,WAAW,SAAS,KACjB,WAAW,OAAM,QAAO,gBAAgB,IAAI,IAAI,CAAC;;AAIxD,SAAS,mCAAmC,QAAiC;CAC3E,MAAM,aAAa,OAAO,KAAK,OAAO;CACtC,MAAM,kBAAkB,WAAW,MAAK,QAAO,gBAAgB,IAAI,IAAI,CAAC;CACxE,MAAM,cAAc,WAAW,MAAK,QAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC;AAErE,KAAI,mBAAmB,YACrB,OAAM,IAAI,UAAU,sCAAsC;;AAI9D,SAAS,0BAA0B,QAAgE;AACjG,KAAI,UAAU,KACZ;AAGF,oCAAmC,OAAO;AAE1C,KAAI,cAAc,OAAO,CACvB,QAAO;AAGT,QAAO,EACL,QAAQ,QACT;;AAGH,SAAS,iBAAiB,QAA0C;AAClE,QAAO,MAAM,KAAK,IAAI,IAAI,OAAO,IAAI,qBAAqB,CAAC,CAAC;;AAG9D,SAAS,gBACP,MACA,YACA,MACM;AACN,KAAI,cAAc,KAChB;AAGF,MAAK,MAAM,CAAC,MAAM,WAAW,OAAO,QAAQ,WAAW,EAAE;EACvD,MAAM,aAAa,iBAAiB,OAAO;AAE3C,MAAI,SAAS,UAAU;GACrB,MAAM,iBAAiB,KAAK,IAAI,KAAK,IAAI,EAAE;AAC3C,QAAK,IAAI,MAAM,MAAM,KAAK,IAAI,IAAI,CAAC,GAAG,gBAAgB,GAAG,WAAW,CAAC,CAAC,CAAC;AACvE;;AAGF,OAAK,IAAI,MAAM,WAAW;;;AAI9B,SAAS,WACP,UACA,OACuB;CACvB,MAAM,WAAW,IAAI,IACnB,MAAM,KAAK,SAAS,SAAS,CAAC,CAAC,KAAK,CAAC,MAAM,YAAY,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAC5E;AAED,MAAK,MAAM,QAAQ,OAAO,WAAW,EAAE,CACrC,UAAS,OAAO,KAAK;AAGvB,iBAAgB,UAAU,OAAO,QAAQ,SAAS;AAClD,iBAAgB,UAAU,OAAO,UAAU,WAAW;AAEtD,QAAO;;AAGT,SAAS,iBAAiB,MAAuE;AAC/F,KAAI,KAAK,SAAS,EAChB,QAAO,CAAC,EAAE,CAAC;CAGb,MAAM,aAAa,MAAM,KAAK,KAAK,SAAS,CAAC;CAE7C,IAAI,aAAsC,CAAC,EAAE,CAAC;AAE9C,MAAK,MAAM,CAAC,MAAM,WAAW,YAAY;AACvC,MAAI,OAAO,WAAW,EACpB,QAAO,EAAE;EAGX,MAAM,iBAA0C,EAAE;AAElD,OAAK,MAAM,aAAa,WACtB,MAAK,MAAM,SAAS,OAClB,gBAAe,KAAK;GAClB,GAAG;IACF,OAAO;GACT,CAAC;AAIN,eAAa;;AAGf,QAAO;;AAGT,SAAS,kBAAkB,QAAuC;CAChE,MAAM,WAAW,OAAO,QAAQ,OAAO,CACpC,MAAM,CAAC,WAAW,CAAC,eAAe,SAAS,cAAc,UAAU,CAAC,CACpE,KAAK,CAAC,MAAM,WAAW,GAAG,oBAAoB,KAAK,CAAC,GAAG,oBAAoB,MAAM,GAAG;AAEvF,KAAI,SAAS,WAAW,EACtB,QAAO;AAGT,QAAO,SAAS,KAAK,IAAI;;AAG3B,SAAS,aAAa,SAAiB,qBAA6B,UAAkB,WAA2B;AAI/G,QAAO;EAHgB,oBAAoB,QAAQ;EACzB,oBAAoB,oBAAoB;EAKhE,OAAO,oBAAoB,SAAS;EACpC,QAAQ,oBAAoB,UAAU;EACvC,CAAC,KAAK,KAAK;;AAGd,SAAS,sBACP,OACA,WACuB;CACvB,IAAI,+BAAe,IAAI,KAAuB;AAE9C,MAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;EACrB,CACC,gBAAe,WAAW,cAAc,0BAA0B,WAAW,CAAC;AAGhF,QAAO;;AAGT,SAAS,uBACP,OACA,YACuB;CACvB,IAAI,+BAAe,IAAI,KAAuB;AAE9C,MAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;EACrB,CACC,gBAAe,WAAW,cAAc,0BAA0B,WAAW,CAAC;AAGhF,QAAO;;;;;;;;;;;;;;;;;;;;AAqBT,SAAgB,qBAAqB,SAAuD;AAC1F,KAAI,QAAQ,QAAQ,WAAW,EAC7B,QAAO,EAAE;AAGX,KAAI,QAAQ,mBAAmB,WAAW,EACxC,QAAO,EAAE;CAGX,MAAM,QAAyB,EAAE;AAEjC,MAAK,MAAM,SAAS,QAAQ,SAAS;EACnC,MAAM,gBAAgB,iBAAiB,sBAAsB,OAAO,QAAQ,UAAU,CAAC;EACvF,MAAM,iBAAiB,iBAAiB,uBAAuB,OAAO,QAAQ,WAAW,CAAC;AAE1F,MAAI,cAAc,WAAW,KAAK,eAAe,WAAW,EAC1D;AAGF,OAAK,MAAM,qBAAqB,QAAQ,mBACtC,MAAK,MAAM,aAAa,cACtB,MAAK,MAAM,cAAc,gBAAgB;GACvC,MAAM,iBAAiB,0BAA0B,WAAW,WAAW;AAEvE,SAAM,KAAK;IACT;IACA,IAAI,aACF,MAAM,IACN,kBAAkB,IAClB,eAAe,KAAK,UACpB,eAAe,KAAK,UACrB;IACD,QAAQ;IACR;IACD,CAAC;;;AAMV,QAAO;;;;AC9TT,SAAS,wBACP,QACA,MACiB;CACjB,MAAM,qBAAqB,KAAK,OAAO,IAAI;AAC3C,KAAI,sBAAsB,MAAM;EAC9B,MAAM,sBAAsB,mBAAmB,QAAQ,mBAAmB;AAC1E,MAAI,uBAAuB,KACzB,QAAO;AAGT,QAAM,IAAI,MAAM,6BAA6B,mBAAmB,+BAA+B;;CAGjG,MAAM,UAAU,mBAAmB,QAAQ,KAAK,kBAAkB,GAAG;AACrE,KAAI,WAAW,KACb,QAAO;AAGT,KAAI,OAAO,SAAS,GAAG;EACrB,MAAM,aAAa,OAAO;AAC1B,MAAI,cAAc,KAChB,QAAO;;AAIX,OAAM,IAAI,MAAM,uDAAuD,KAAK,kBAAkB,GAAG,IAAI;;;;;;;;;;;;AAavG,SAAgB,2BAA2B,SAAkE;AAC3G,QAAO,EACL,MAAM,WAAW;AACf,MAAI,aAAa,KACf,QAAO,wBAAwB,QAAQ,QAAQ,QAAQ,KAAK;EAG9D,MAAM,OAAO,OAAO,cAAc,WAAW,YAAY,UAAU;EAEnE,MAAM,aAAa,mBAAmB,QAAQ,QAAQ,KAAK;AAC3D,MAAI,cAAc,KAChB,OAAM,IAAI,MAAM,6BAA6B,KAAK,IAAI;AAGxD,SAAO;IAEV"}
@@ -0,0 +1,86 @@
1
+ import { f as ToolCall } from "../index-oSXhM1zx.mjs";
2
+
3
+ //#region src/testing/expect-extensions.d.ts
4
+ /**
5
+ * Options for keyword-based matcher behavior.
6
+ */
7
+ interface KeywordMatcherOptions {
8
+ /**
9
+ * Case-sensitive matching toggle.
10
+ *
11
+ * @default false
12
+ */
13
+ caseSensitive?: boolean;
14
+ /**
15
+ * Match mode.
16
+ *
17
+ * @default 'all'
18
+ */
19
+ mode?: 'all' | 'any';
20
+ }
21
+ /**
22
+ * Shape used by tool-call matchers.
23
+ */
24
+ interface ToolCallContainer {
25
+ /**
26
+ * Tool calls to inspect.
27
+ */
28
+ toolCalls?: readonly ToolCall[];
29
+ }
30
+ /**
31
+ * Registers vieval custom matchers on Vitest `expect`.
32
+ *
33
+ * Call stack:
34
+ *
35
+ * {@link installVievalExpectMatchers}
36
+ * -> `expect.extend(...)`
37
+ * -> `expect(received).toMustInclude(...)`
38
+ * -> `expect(received).toScoreRubricGreaterThan(...)`
39
+ *
40
+ * Use when:
41
+ * - eval suites need domain assertions while preserving native Vitest ergonomics
42
+ * - callers want native `.not` chaining with the same matchers
43
+ */
44
+ declare function installVievalExpectMatchers(): void;
45
+ declare module 'vitest' {
46
+ interface Assertion {
47
+ /**
48
+ * Asserts that text includes required keywords.
49
+ *
50
+ * Example:
51
+ * `expect('calm answer').toMustInclude(['calm'])`
52
+ */
53
+ toMustInclude: (keywords: string | readonly string[], options?: KeywordMatcherOptions) => void;
54
+ /**
55
+ * Asserts that text excludes forbidden keywords.
56
+ *
57
+ * Example:
58
+ * `expect('calm answer').toMustExclude(['bestmove'])`
59
+ */
60
+ toMustExclude: (keywords: string | readonly string[], options?: KeywordMatcherOptions) => void;
61
+ /**
62
+ * Asserts rubric score is greater than a threshold.
63
+ *
64
+ * Example:
65
+ * `expect({ score: 0.91 }).toScoreRubricGreaterThan(0.8)`
66
+ */
67
+ toScoreRubricGreaterThan: (threshold: number) => void;
68
+ /**
69
+ * Asserts structured output satisfies a validator.
70
+ *
71
+ * Example:
72
+ * `expect(value).toSatisfyStructuredOutput(isMyShape)`
73
+ */
74
+ toSatisfyStructuredOutput: <TValue>(validator: (value: unknown) => value is TValue) => void;
75
+ /**
76
+ * Asserts selected tool-call args satisfy validator.
77
+ *
78
+ * Example:
79
+ * `expect({ toolCalls }).toSatisfyToolCallArgs('builtIn_sparkCommand', isSparkArgs)`
80
+ */
81
+ toSatisfyToolCallArgs: (toolName: string, validator: (args: unknown) => boolean) => void;
82
+ }
83
+ }
84
+ //#endregion
85
+ export { KeywordMatcherOptions, ToolCallContainer, installVievalExpectMatchers };
86
+ //# sourceMappingURL=expect-extensions.d.mts.map
@@ -0,0 +1,2 @@
1
+ import { t as installVievalExpectMatchers } from "../expect-extensions-CwPtgTz8.mjs";
2
+ export { installVievalExpectMatchers };