vieval 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +6 -3
  2. package/dist/bin/vieval.d.mts +1 -0
  3. package/dist/bin/vieval.mjs +33 -0
  4. package/dist/bin/vieval.mjs.map +1 -0
  5. package/dist/cli/index.d.mts +32 -0
  6. package/dist/cli/index.mjs +1 -2576
  7. package/dist/cli-DayPXzHX.mjs +2593 -0
  8. package/dist/cli-DayPXzHX.mjs.map +1 -0
  9. package/dist/config.d.mts +1 -1
  10. package/dist/config.mjs +17 -2
  11. package/dist/config.mjs.map +1 -0
  12. package/dist/core/assertions/index.d.mts +314 -2
  13. package/dist/core/assertions/index.mjs +182 -1
  14. package/dist/core/assertions/index.mjs.map +1 -0
  15. package/dist/core/inference-executors/index.d.mts +1 -1
  16. package/dist/core/inference-executors/index.mjs +1 -1
  17. package/dist/core/processors/results/index.d.mts +1 -1
  18. package/dist/core/runner/index.d.mts +1 -1
  19. package/dist/core/runner/index.mjs +635 -1
  20. package/dist/core/runner/index.mjs.map +1 -0
  21. package/dist/{env-C7X81PWa.mjs → env-BFSjny07.mjs} +1 -1
  22. package/dist/{env-C7X81PWa.mjs.map → env-BFSjny07.mjs.map} +1 -1
  23. package/dist/{env-DtpjACOW.d.mts → env-BTq3dV7C.d.mts} +1 -1
  24. package/dist/{expect-extensions-BOzwV5EJ.mjs → expect-extensions-QLXESWjn.mjs} +2 -2
  25. package/dist/{expect-extensions-BOzwV5EJ.mjs.map → expect-extensions-QLXESWjn.mjs.map} +1 -1
  26. package/dist/expect.d.mts +10 -2
  27. package/dist/expect.mjs +16 -1
  28. package/dist/expect.mjs.map +1 -0
  29. package/dist/{index-BDMEAmf2.d.mts → index-OEdqjQSe.d.mts} +2 -2
  30. package/dist/index.d.mts +3 -3
  31. package/dist/index.mjs +4 -4
  32. package/dist/{models-DIGdOUpJ.mjs → models-D_MsBtYw.mjs} +1 -1
  33. package/dist/{models-DIGdOUpJ.mjs.map → models-D_MsBtYw.mjs.map} +1 -1
  34. package/dist/plugins/chat-models/index.d.mts +1 -1
  35. package/dist/plugins/chat-models/index.mjs +1 -1
  36. package/dist/{registry-CHJcTN2W.mjs → registry-CwcMMjnZ.mjs} +3 -3
  37. package/dist/{registry-CHJcTN2W.mjs.map → registry-CwcMMjnZ.mjs.map} +1 -1
  38. package/dist/testing/expect-extensions.d.mts +1 -1
  39. package/dist/testing/expect-extensions.mjs +1 -1
  40. package/package.json +3 -3
  41. package/dist/assertions-DcAjfVDA.mjs +0 -183
  42. package/dist/assertions-DcAjfVDA.mjs.map +0 -1
  43. package/dist/cli/index.mjs.map +0 -1
  44. package/dist/config-CHN24egi.mjs +0 -17
  45. package/dist/config-CHN24egi.mjs.map +0 -1
  46. package/dist/expect-B2vaoRVZ.d.mts +0 -10
  47. package/dist/expect-CaXiUkwY.mjs +0 -17
  48. package/dist/expect-CaXiUkwY.mjs.map +0 -1
  49. package/dist/index-C3gPFmcR.d.mts +0 -314
  50. package/dist/runner-Dpy-eivM.mjs +0 -636
  51. package/dist/runner-Dpy-eivM.mjs.map +0 -1
@@ -1,636 +0,0 @@
1
- import { t as resolveModelByName } from "./models-DIGdOUpJ.mjs";
2
- import { createRequire } from "node:module";
3
- import process from "node:process";
4
- import { access, mkdir, readFile, rename, writeFile } from "node:fs/promises";
5
- import { basename, dirname, join, relative } from "node:path";
6
- import { fileURLToPath } from "node:url";
7
- import { errorMessageFrom } from "@moeru/std";
8
- import { createReadStream, createWriteStream } from "node:fs";
9
- import { Buffer } from "node:buffer";
10
- import { limitConcurrency } from "@vitest/runner/utils";
11
- //#region src/core/cache/filesystem.ts
12
- function sanitizePathSegment(value) {
13
- const normalized = value.trim();
14
- if (normalized.length === 0) return "default";
15
- return normalized.replace(/[^\w.-]+/g, "-");
16
- }
17
- function normalizeExtension(extension, mediaType) {
18
- if (extension != null && extension.length > 0) return extension.startsWith(".") ? extension.slice(1) : extension;
19
- if (mediaType == null || mediaType.length === 0) return;
20
- if (mediaType === "application/json") return "json";
21
- if (mediaType === "text/plain") return "txt";
22
- if (mediaType === "audio/wav") return "wav";
23
- }
24
- /**
25
- * Normalizes cache file options into deterministic relative path segments.
26
- *
27
- * Before:
28
- * - `{ key: ['cases', 'dataset hash', 'v1'], ext: 'json' }`
29
- *
30
- * After:
31
- * - `['cases', 'dataset-hash', 'v1.json']`
32
- */
33
- function normalizeCacheFilePathSegments(options) {
34
- const sanitizedKey = options.key.map((segment) => sanitizePathSegment(segment));
35
- const extension = normalizeExtension(options.ext, options.mediaType);
36
- if (sanitizedKey.length === 0) return extension == null ? ["artifact"] : [`artifact.${extension}`];
37
- if (extension == null) return sanitizedKey;
38
- const withoutTail = sanitizedKey.slice(0, Math.max(0, sanitizedKey.length - 1));
39
- const tail = sanitizedKey[sanitizedKey.length - 1] ?? "artifact";
40
- return [...withoutTail, `${tail}.${extension}`];
41
- }
42
- async function writeAtomically(path, content) {
43
- const directory = dirname(path);
44
- const temporaryPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
45
- await mkdir(directory, { recursive: true });
46
- await writeFile(temporaryPath, content);
47
- await rename(temporaryPath, path);
48
- }
49
- function createCacheFileHandle(path) {
50
- return {
51
- path,
52
- async exists() {
53
- try {
54
- await access(path);
55
- return true;
56
- } catch {
57
- return false;
58
- }
59
- },
60
- openReadStream() {
61
- return createReadStream(path);
62
- },
63
- async openWriteStream() {
64
- await mkdir(dirname(path), { recursive: true });
65
- return createWriteStream(path);
66
- },
67
- async readBuffer() {
68
- return await readFile(path);
69
- },
70
- async writeBuffer(value) {
71
- await writeAtomically(path, value);
72
- },
73
- async readText(encoding = "utf-8") {
74
- return await readFile(path, encoding);
75
- },
76
- async writeText(value, encoding = "utf-8") {
77
- await writeAtomically(path, Buffer.from(value, encoding));
78
- },
79
- async readJson() {
80
- return JSON.parse(await readFile(path, "utf-8"));
81
- },
82
- async writeJson(value) {
83
- await writeAtomically(path, `${JSON.stringify(value, null, 2)}\n`);
84
- },
85
- async loadAsCasesInput() {
86
- return await this.readJson();
87
- },
88
- async loadAsExpectFixture() {
89
- return await this.readJson();
90
- }
91
- };
92
- }
93
- function createCacheNamespace(baseDirectory, namespace) {
94
- return { file(options) {
95
- const relativePathSegments = normalizeCacheFilePathSegments(options);
96
- return createCacheFileHandle(join(baseDirectory, sanitizePathSegment(namespace), ...relativePathSegments));
97
- } };
98
- }
99
- /**
100
- * Creates a deterministic filesystem-backed task cache runtime.
101
- *
102
- * Use when:
103
- * - eval tasks need reproducible cache paths for expensive pre-processing outputs
104
- * - benchmark adapters need one artifact-oriented API for text/json/binary reads and writes
105
- *
106
- * Expects:
107
- * - `cacheRootDirectory` to be writable by the running process
108
- * - `workspaceId` + `projectName` to stay stable for reproducible paths
109
- *
110
- * Returns:
111
- * - task cache runtime that resolves namespaced file handles under:
112
- * `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`
113
- */
114
- function createFilesystemTaskCacheRuntime(options) {
115
- const workspaceDirectory = sanitizePathSegment(options.workspaceId);
116
- const projectDirectory = sanitizePathSegment(options.projectName);
117
- const baseDirectory = join(options.cacheRootDirectory, workspaceDirectory, projectDirectory);
118
- return { namespace(name) {
119
- return createCacheNamespace(baseDirectory, name);
120
- } };
121
- }
122
- //#endregion
123
- //#region src/core/runner/aggregate.ts
124
- function cloneScheduledTaskMatrix(matrix) {
125
- return {
126
- eval: { ...matrix.eval },
127
- meta: { ...matrix.meta },
128
- run: { ...matrix.run }
129
- };
130
- }
131
- function assertKnownScoreKind(kind) {
132
- if (kind === "exact" || kind === "judge") return kind;
133
- throw new TypeError(`Unknown eval score kind "${kind}".`);
134
- }
135
- function average(scores) {
136
- if (scores.length === 0) return null;
137
- return scores.reduce((sum, score) => sum + score, 0) / scores.length;
138
- }
139
- function createHybridAverage(exactAverage, judgeAverage) {
140
- if (exactAverage != null && judgeAverage != null) return (exactAverage + judgeAverage) / 2;
141
- if (exactAverage != null) return exactAverage;
142
- if (judgeAverage != null) return judgeAverage;
143
- return null;
144
- }
145
- function collectScoreBuckets(scores) {
146
- const buckets = {
147
- exact: [],
148
- judge: []
149
- };
150
- for (const score of scores) {
151
- if (assertKnownScoreKind(score.kind) === "exact") {
152
- buckets.exact.push(score.score);
153
- continue;
154
- }
155
- buckets.judge.push(score.score);
156
- }
157
- return buckets;
158
- }
159
- function createRunSummary(result) {
160
- const buckets = collectScoreBuckets(result.scores);
161
- const exactAverage = average(buckets.exact);
162
- const judgeAverage = average(buckets.judge);
163
- return {
164
- entryId: result.entryId,
165
- exactAverage,
166
- hybridAverage: createHybridAverage(exactAverage, judgeAverage),
167
- id: result.id,
168
- judgeAverage,
169
- matrix: cloneScheduledTaskMatrix(result.matrix),
170
- inferenceExecutorId: result.inferenceExecutorId
171
- };
172
- }
173
- function createProviderSummary(inferenceExecutorId, results) {
174
- const exactScores = [];
175
- const judgeScores = [];
176
- for (const result of results) {
177
- const buckets = collectScoreBuckets(result.scores);
178
- exactScores.push(...buckets.exact);
179
- judgeScores.push(...buckets.judge);
180
- }
181
- const exactAverage = average(exactScores);
182
- const judgeAverage = average(judgeScores);
183
- return {
184
- exactAverage,
185
- hybridAverage: createHybridAverage(exactAverage, judgeAverage),
186
- judgeAverage,
187
- inferenceExecutorId,
188
- runCount: results.length
189
- };
190
- }
191
- /**
192
- * Aggregates exact-match and judge-based scores into hybrid runner summaries.
193
- *
194
- * Call stack:
195
- *
196
- * {@link runScheduledTasks}
197
- * -> {@link aggregateRunResults}
198
- * -> {@link createRunSummary}
199
- * -> {@link createProviderSummary}
200
- * -> `report output`
201
- *
202
- * Use when:
203
- * - a runner batch mixes deterministic exact checks with judge-based grading
204
- * - inferenceExecutor comparison should preserve both score families and one hybrid view
205
- *
206
- * Expects:
207
- * - each score to be normalized to the `0..1` range before aggregation
208
- * - `scores.kind` to use only `'exact'` or `'judge'`
209
- */
210
- function aggregateRunResults(results) {
211
- const runs = results.map(createRunSummary);
212
- const inferenceExecutors = Array.from(new Set(results.map((result) => result.inferenceExecutorId))).map((inferenceExecutorId) => {
213
- return createProviderSummary(inferenceExecutorId, results.filter((result) => result.inferenceExecutorId === inferenceExecutorId));
214
- }).sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId));
215
- const overall = createProviderSummary("overall", results);
216
- return {
217
- overall: {
218
- exactAverage: overall.exactAverage,
219
- hybridAverage: overall.hybridAverage,
220
- judgeAverage: overall.judgeAverage,
221
- runCount: overall.runCount
222
- },
223
- inferenceExecutors,
224
- runs
225
- };
226
- }
227
- //#endregion
228
- //#region src/core/runner/collect.ts
229
- const evalFileSuffix = ".eval.ts";
230
- const absolutePathPattern = /^(?:[A-Z]:\/|\/|\\\\)/i;
231
- function normalizePath(value) {
232
- return value.replaceAll("\\", "/");
233
- }
234
- /**
235
- * Converts a file path into a project-relative path when possible.
236
- *
237
- * Before: `/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`
238
- * After: `plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`
239
- *
240
- * Before: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`
241
- * After: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`
242
- */
243
- function asProjectRelativePath(filePath, context) {
244
- const normalizedFilePath = normalizePath(filePath);
245
- const normalizedProjectRootDirectory = normalizePath(context.projectRootDirectory);
246
- const filePathWindowsDrive = normalizedFilePath.match(/^[A-Z]:\//i)?.[0];
247
- const projectRootWindowsDrive = normalizedProjectRootDirectory.match(/^[A-Z]:\//i)?.[0];
248
- if (filePathWindowsDrive != null && projectRootWindowsDrive == null) return normalizedFilePath;
249
- if (filePathWindowsDrive != null && projectRootWindowsDrive != null && filePathWindowsDrive.toLowerCase() !== projectRootWindowsDrive.toLowerCase()) return normalizedFilePath;
250
- const projectRootDirectory = context.projectRootDirectory;
251
- const relativeFilePath = normalizePath(relative(projectRootDirectory, filePath));
252
- if (!absolutePathPattern.test(relativeFilePath)) {
253
- if (relativeFilePath === "..") return normalizePath(filePath);
254
- if (!relativeFilePath.startsWith("../")) return relativeFilePath;
255
- }
256
- return normalizePath(filePath);
257
- }
258
- function resolveModuleFilePath(moduleHref) {
259
- if (!moduleHref.startsWith("file:")) return null;
260
- try {
261
- return fileURLToPath(moduleHref);
262
- } catch {
263
- return null;
264
- }
265
- }
266
- function createCollectedEvalEntry(moduleHref, moduleDefinition, context) {
267
- const filePath = resolveModuleFilePath(moduleHref);
268
- if (!filePath) return null;
269
- const relativeFilePath = asProjectRelativePath(filePath, context);
270
- if (!relativeFilePath.endsWith(evalFileSuffix)) return null;
271
- const entryName = basename(relativeFilePath, evalFileSuffix);
272
- if (entryName.length === 0) return null;
273
- const relativeDirectory = dirname(relativeFilePath);
274
- const directory = relativeDirectory === "." ? "" : relativeDirectory;
275
- return {
276
- ...moduleDefinition.default,
277
- directory,
278
- filePath,
279
- id: directory.length === 0 ? entryName : `${directory}/${entryName}`,
280
- name: entryName
281
- };
282
- }
283
- /**
284
- * Collects loaded vieval modules into sorted runner entries with stable ids.
285
- *
286
- * Call stack:
287
- *
288
- * `import.meta.glob(...)`
289
- * -> {@link collectEvalEntries}
290
- * -> {@link createCollectedEvalEntry}
291
- * -> {@link CollectedEvalEntry}[]
292
- *
293
- * Use when:
294
- * - the runner has already loaded candidate eval modules
295
- * - downstream scheduling needs stable entry ids and directory metadata
296
- */
297
- function collectEvalEntries(modules, context) {
298
- return Object.entries(modules).flatMap(([moduleHref, moduleDefinition]) => {
299
- const entry = createCollectedEvalEntry(moduleHref, moduleDefinition, context);
300
- if (!entry) return [];
301
- return [entry];
302
- }).sort((left, right) => left.id.localeCompare(right.id));
303
- }
304
- //#endregion
305
- //#region src/core/runner/run.ts
306
- function createDefaultExecutionContext(task) {
307
- return {
308
- cache: { namespace(name) {
309
- return { file(options) {
310
- const key = options.key.join("/");
311
- throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
312
- } };
313
- } },
314
- model(options) {
315
- const requestedModelName = typeof options === "string" ? options : options?.name;
316
- if (requestedModelName != null) throw new Error(`No model registry configured. Requested model: ${requestedModelName}`);
317
- throw new Error(`No model registry configured for task inferenceExecutor id "${task.inferenceExecutor.id}".`);
318
- }
319
- };
320
- }
321
- /**
322
- * Error thrown when a scheduled run fails before producing a normalized result.
323
- */
324
- var RunnerExecutionError = class extends Error {
325
- /**
326
- * Stable task id that failed.
327
- */
328
- taskId;
329
- constructor(taskId, cause) {
330
- const message = errorMessageFrom(cause) ?? "Unknown runner execution failure.";
331
- super(`Runner task "${taskId}" failed: ${message}`);
332
- this.name = "RunnerExecutionError";
333
- this.taskId = taskId;
334
- this.cause = cause;
335
- }
336
- };
337
- function createRunnerExecutionError(taskId, cause) {
338
- if (cause instanceof RunnerExecutionError && cause.taskId === taskId) return cause;
339
- return new RunnerExecutionError(taskId, cause);
340
- }
341
- /**
342
- * Executes runner tasks sequentially and aggregates the normalized results.
343
- *
344
- * Call stack:
345
- *
346
- * {@link createRunnerSchedule}
347
- * -> {@link runScheduledTasks}
348
- * -> `executor(task)`
349
- * -> {@link aggregateRunResults}
350
- *
351
- * Use when:
352
- * - the caller already expanded the runner matrix
353
- * - task execution should stay deterministic and easy to debug
354
- *
355
- * Expects:
356
- * - `executor` to return normalized `0..1` scores
357
- * - callers to handle concurrency outside this helper when needed
358
- * - `onTaskStart` / `onTaskEnd` hooks to be synchronous lifecycle observers
359
- *
360
- * Throws:
361
- * - `RunnerExecutionError` when task setup, hooks, or the executor throws
362
- */
363
- async function runScheduledTasks(tasks, executor, options = {}) {
364
- if (tasks.length === 0) return aggregateRunResults([]);
365
- async function executeScheduledTask(task) {
366
- let executionContext;
367
- try {
368
- executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext(task);
369
- } catch (error) {
370
- throw createRunnerExecutionError(task.id, error);
371
- }
372
- try {
373
- options.onTaskStart?.(task);
374
- } catch (error) {
375
- throw createRunnerExecutionError(task.id, error);
376
- }
377
- let runResult;
378
- try {
379
- runResult = await executor(task, executionContext);
380
- } catch (error) {
381
- try {
382
- options.onTaskEnd?.(task, "failed");
383
- } catch {}
384
- throw createRunnerExecutionError(task.id, error);
385
- }
386
- try {
387
- options.onTaskEnd?.(task, "passed");
388
- } catch (error) {
389
- throw createRunnerExecutionError(task.id, error);
390
- }
391
- return runResult;
392
- }
393
- const maxConcurrency = options.maxConcurrency ?? 1;
394
- if (maxConcurrency <= 1) {
395
- const results = [];
396
- for (const task of tasks) results.push(await executeScheduledTask(task));
397
- return aggregateRunResults(results);
398
- }
399
- const runWithLimit = limitConcurrency(maxConcurrency);
400
- return aggregateRunResults((await Promise.all(tasks.map(async (task, index) => {
401
- return {
402
- index,
403
- result: await runWithLimit(async () => executeScheduledTask(task))
404
- };
405
- }))).sort((left, right) => left.index - right.index).map((item) => item.result));
406
- }
407
- //#endregion
408
- //#region src/core/runner/runtime-context.ts
409
- const require = createRequire(import.meta.url);
410
- /**
411
- * Creates a side-effect-free runtime context for runner path normalization.
412
- *
413
- * Call stack:
414
- *
415
- * {@link createRunnerRuntimeContext}
416
- * -> `findWorkspaceDir(cwd)`
417
- * -> `resolve projectRootDirectory`
418
- * -> `{ projectRootDirectory }`
419
- *
420
- * Use when:
421
- * - initializing runner infrastructure before collecting eval modules
422
- * - tests need deterministic root resolution behavior
423
- */
424
- async function createRunnerRuntimeContext(options = {}) {
425
- const cwd = options.cwd ?? dirname(fileURLToPath(import.meta.url));
426
- const fallbackProjectRootDirectory = options.fallbackProjectRootDirectory ?? fileURLToPath(new URL("../../../", import.meta.url));
427
- const { findWorkspaceDir } = require("@pnpm/find-workspace-dir");
428
- return { projectRootDirectory: await findWorkspaceDir(cwd) ?? fallbackProjectRootDirectory };
429
- }
430
- //#endregion
431
- //#region src/core/runner/schedule.ts
432
- const matrixLayerKeys = new Set([
433
- "disable",
434
- "extend",
435
- "override"
436
- ]);
437
- const ambiguousMatrixDefinitionErrorMessage = "Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.";
438
- function encodeTaskIdSegment(value) {
439
- return encodeURIComponent(value);
440
- }
441
- function stringifyMatrixValue(value) {
442
- return String(value);
443
- }
444
- function cloneMatrixSelection(matrix) {
445
- return { ...matrix };
446
- }
447
- function createScheduledTaskMatrix(runMatrix, evalMatrix) {
448
- return {
449
- eval: cloneMatrixSelection(evalMatrix),
450
- meta: {
451
- evalRowId: createStableRowId(evalMatrix),
452
- runRowId: createStableRowId(runMatrix)
453
- },
454
- run: cloneMatrixSelection(runMatrix)
455
- };
456
- }
457
- function isMatrixLayer(matrix) {
458
- const matrixKeys = Object.keys(matrix);
459
- return matrixKeys.length > 0 && matrixKeys.every((key) => matrixLayerKeys.has(key));
460
- }
461
- function assertNonAmbiguousMatrixDefinition(matrix) {
462
- const matrixKeys = Object.keys(matrix);
463
- const hasReservedKeys = matrixKeys.some((key) => matrixLayerKeys.has(key));
464
- const hasAxisKeys = matrixKeys.some((key) => !matrixLayerKeys.has(key));
465
- if (hasReservedKeys && hasAxisKeys) throw new TypeError(ambiguousMatrixDefinitionErrorMessage);
466
- }
467
- function normalizeLayerInputToAxes(matrix) {
468
- if (matrix == null) return;
469
- assertNonAmbiguousMatrixDefinition(matrix);
470
- if (isMatrixLayer(matrix)) return matrix;
471
- return { extend: matrix };
472
- }
473
- function dedupeAxisValues(values) {
474
- return Array.from(new Set(values.map(stringifyMatrixValue)));
475
- }
476
- function applyAxisValues(axes, definition, mode) {
477
- if (definition == null) return;
478
- for (const [axis, values] of Object.entries(definition)) {
479
- const nextValues = dedupeAxisValues(values);
480
- if (mode === "extend") {
481
- const existingValues = axes.get(axis) ?? [];
482
- axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])));
483
- continue;
484
- }
485
- axes.set(axis, nextValues);
486
- }
487
- }
488
- function applyLayer(baseAxes, layer) {
489
- const nextAxes = new Map(Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]));
490
- for (const axis of layer?.disable ?? []) nextAxes.delete(axis);
491
- applyAxisValues(nextAxes, layer?.extend, "extend");
492
- applyAxisValues(nextAxes, layer?.override, "override");
493
- return nextAxes;
494
- }
495
- function expandAxesToRows(axes) {
496
- if (axes.size === 0) return [{}];
497
- const dimensions = Array.from(axes.entries());
498
- let selections = [{}];
499
- for (const [axis, values] of dimensions) {
500
- if (values.length === 0) return [];
501
- const nextSelections = [];
502
- for (const selection of selections) for (const value of values) nextSelections.push({
503
- ...selection,
504
- [axis]: value
505
- });
506
- selections = nextSelections;
507
- }
508
- return selections;
509
- }
510
- function createStableRowId(matrix) {
511
- const segments = Object.entries(matrix).sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis)).map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`);
512
- if (segments.length === 0) return "default";
513
- return segments.join("&");
514
- }
515
- function createTaskId(entryId, inferenceExecutorId, runRowId, evalRowId) {
516
- return [
517
- encodeTaskIdSegment(entryId),
518
- encodeTaskIdSegment(inferenceExecutorId),
519
- `run=${encodeTaskIdSegment(runRowId)}`,
520
- `eval=${encodeTaskIdSegment(evalRowId)}`
521
- ].join("::");
522
- }
523
- function createResolvedRunAxes(entry, runMatrix) {
524
- let resolvedAxes = /* @__PURE__ */ new Map();
525
- for (const layerInput of [
526
- runMatrix,
527
- entry.matrix?.runMatrix,
528
- entry.task?.matrix?.runMatrix
529
- ]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
530
- return resolvedAxes;
531
- }
532
- function createResolvedEvalAxes(entry, evalMatrix) {
533
- let resolvedAxes = /* @__PURE__ */ new Map();
534
- for (const layerInput of [
535
- evalMatrix,
536
- entry.matrix?.evalMatrix,
537
- entry.task?.matrix?.evalMatrix
538
- ]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
539
- return resolvedAxes;
540
- }
541
- /**
542
- * Expands collected entries into a stable runner schedule.
543
- *
544
- * Call stack:
545
- *
546
- * {@link collectEvalEntries} (`../runner`)
547
- * -> {@link createRunnerSchedule}
548
- * -> {@link expandAxesToRows}
549
- * -> {@link ScheduledTask}[]
550
- *
551
- * Use when:
552
- * - the runner already knows which eval entries are available
553
- * - each entry must run against multiple inferenceExecutors or matrix variants
554
- *
555
- * Expects:
556
- * - `entries` and `inferenceExecutors` to be provided in the desired execution order
557
- * - matrix axes to use insertion order when generating combinations
558
- */
559
- function createRunnerSchedule(options) {
560
- if (options.entries.length === 0) return [];
561
- if (options.inferenceExecutors.length === 0) return [];
562
- const tasks = [];
563
- for (const entry of options.entries) {
564
- const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix));
565
- const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix));
566
- if (runSelections.length === 0 || evalSelections.length === 0) continue;
567
- for (const inferenceExecutor of options.inferenceExecutors) for (const runMatrix of runSelections) for (const evalMatrix of evalSelections) {
568
- const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix);
569
- tasks.push({
570
- entry,
571
- id: createTaskId(entry.id, inferenceExecutor.id, isolatedMatrix.meta.runRowId, isolatedMatrix.meta.evalRowId),
572
- matrix: isolatedMatrix,
573
- inferenceExecutor
574
- });
575
- }
576
- }
577
- return tasks;
578
- }
579
- //#endregion
580
- //#region src/core/runner/task-context.ts
581
- function createNoopTaskCacheRuntime() {
582
- return { namespace(name) {
583
- return { file(options) {
584
- const key = options.key.join("/");
585
- throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
586
- } };
587
- } };
588
- }
589
- function resolveDefaultTaskModel(models, task) {
590
- const runMatrixModelName = task.matrix.run.model;
591
- if (runMatrixModelName != null) {
592
- const matrixSelectedModel = resolveModelByName(models, runMatrixModelName);
593
- if (matrixSelectedModel != null) return matrixSelectedModel;
594
- throw new Error(`Unknown configured model "${runMatrixModelName}" from task.matrix.run.model.`);
595
- }
596
- const matched = resolveModelByName(models, task.inferenceExecutor.id);
597
- if (matched != null) return matched;
598
- if (models.length > 1) throw new Error([
599
- `Multiple configured models are available, but no default model is selected for inferenceExecutor "${task.inferenceExecutor.id}".`,
600
- "Select one model explicitly by either:",
601
- "- setting runMatrix.override.model (or task matrix run.model)",
602
- "- setting project.inferenceExecutors to a matching model id",
603
- "- calling context.model({ name: \"your-model-id-or-alias\" })"
604
- ].join("\n"));
605
- if (models.length === 1) {
606
- const firstModel = models[0];
607
- if (firstModel != null) return firstModel;
608
- }
609
- throw new Error(`No configured model found for inferenceExecutor id "${task.inferenceExecutor.id}".`);
610
- }
611
- /**
612
- * Creates task-scoped model resolver context for runner execution.
613
- *
614
- * Call stack:
615
- *
616
- * {@link runScheduledTasks}
617
- * -> {@link createTaskExecutionContext}
618
- * -> {@link resolveModelByName}
619
- * -> `task.model()` / `task.model({ name })`
620
- */
621
- function createTaskExecutionContext(options) {
622
- return {
623
- cache: options.cache ?? createNoopTaskCacheRuntime(),
624
- model(selection) {
625
- if (selection == null) return resolveDefaultTaskModel(options.models, options.task);
626
- const name = typeof selection === "string" ? selection : selection.name;
627
- const namedModel = resolveModelByName(options.models, name);
628
- if (namedModel == null) throw new Error(`Unknown configured model "${name}".`);
629
- return namedModel;
630
- }
631
- };
632
- }
633
- //#endregion
634
- export { runScheduledTasks as a, aggregateRunResults as c, RunnerExecutionError as i, createFilesystemTaskCacheRuntime as l, createRunnerSchedule as n, asProjectRelativePath as o, createRunnerRuntimeContext as r, collectEvalEntries as s, createTaskExecutionContext as t, normalizeCacheFilePathSegments as u };
635
-
636
- //# sourceMappingURL=runner-Dpy-eivM.mjs.map