vieval 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +6 -3
  2. package/dist/bin/vieval.d.mts +1 -0
  3. package/dist/bin/vieval.mjs +33 -0
  4. package/dist/bin/vieval.mjs.map +1 -0
  5. package/dist/cli/index.d.mts +32 -0
  6. package/dist/cli/index.mjs +1 -2582
  7. package/dist/cli-sanbKtQq.mjs +2821 -0
  8. package/dist/cli-sanbKtQq.mjs.map +1 -0
  9. package/dist/config.d.mts +2 -2
  10. package/dist/config.mjs +16 -1
  11. package/dist/config.mjs.map +1 -0
  12. package/dist/core/assertions/index.d.mts +314 -2
  13. package/dist/core/assertions/index.mjs +182 -1
  14. package/dist/core/assertions/index.mjs.map +1 -0
  15. package/dist/core/inference-executors/index.d.mts +1 -1
  16. package/dist/core/inference-executors/index.mjs +1 -1
  17. package/dist/core/processors/results/index.d.mts +1 -1
  18. package/dist/core/runner/index.d.mts +3 -2
  19. package/dist/core/runner/index.mjs +637 -2
  20. package/dist/core/runner/index.mjs.map +1 -0
  21. package/dist/core/scheduler/index.d.mts +2 -0
  22. package/dist/core/scheduler/index.mjs +188 -0
  23. package/dist/core/scheduler/index.mjs.map +1 -0
  24. package/dist/{env-C7X81PWa.mjs → env--94B0UtW.mjs} +1 -1
  25. package/dist/{env-C7X81PWa.mjs.map → env--94B0UtW.mjs.map} +1 -1
  26. package/dist/{env-DtpjACOW.d.mts → env-BeHv_5mo.d.mts} +1 -1
  27. package/dist/{expect-extensions-BOzwV5EJ.mjs → expect-extensions-DCSqlneN.mjs} +2 -2
  28. package/dist/{expect-extensions-BOzwV5EJ.mjs.map → expect-extensions-DCSqlneN.mjs.map} +1 -1
  29. package/dist/expect.d.mts +10 -2
  30. package/dist/expect.mjs +16 -1
  31. package/dist/expect.mjs.map +1 -0
  32. package/dist/{index-BDMEAmf2.d.mts → index-DBZKkpBe.d.mts} +106 -4
  33. package/dist/index-fakXoZEe.d.mts +147 -0
  34. package/dist/index.d.mts +111 -12
  35. package/dist/index.mjs +216 -55
  36. package/dist/index.mjs.map +1 -1
  37. package/dist/models-DIGdOUpJ.mjs.map +1 -1
  38. package/dist/plugins/chat-models/index.d.mts +21 -1
  39. package/dist/plugins/chat-models/index.mjs +27 -1
  40. package/dist/plugins/chat-models/index.mjs.map +1 -1
  41. package/dist/queue-DsZQkZO_.mjs +21 -0
  42. package/dist/queue-DsZQkZO_.mjs.map +1 -0
  43. package/dist/{registry-CHJcTN2W.mjs → registry-CcKZqDJY.mjs} +27 -5
  44. package/dist/registry-CcKZqDJY.mjs.map +1 -0
  45. package/dist/testing/expect-extensions.d.mts +1 -1
  46. package/dist/testing/expect-extensions.mjs +1 -1
  47. package/package.json +9 -3
  48. package/dist/assertions-DcAjfVDA.mjs +0 -183
  49. package/dist/assertions-DcAjfVDA.mjs.map +0 -1
  50. package/dist/cli/index.mjs.map +0 -1
  51. package/dist/config-CHN24egi.mjs +0 -17
  52. package/dist/config-CHN24egi.mjs.map +0 -1
  53. package/dist/expect-B2vaoRVZ.d.mts +0 -10
  54. package/dist/expect-CaXiUkwY.mjs +0 -17
  55. package/dist/expect-CaXiUkwY.mjs.map +0 -1
  56. package/dist/index-C3gPFmcR.d.mts +0 -314
  57. package/dist/registry-CHJcTN2W.mjs.map +0 -1
  58. package/dist/runner-Dpy-eivM.mjs +0 -636
  59. package/dist/runner-Dpy-eivM.mjs.map +0 -1
@@ -1,2 +1,637 @@
1
- import { a as runScheduledTasks, c as aggregateRunResults, i as RunnerExecutionError, l as createFilesystemTaskCacheRuntime, n as createRunnerSchedule, o as asProjectRelativePath, r as createRunnerRuntimeContext, s as collectEvalEntries, t as createTaskExecutionContext, u as normalizeCacheFilePathSegments } from "../../runner-Dpy-eivM.mjs";
2
- export { RunnerExecutionError, aggregateRunResults, asProjectRelativePath, collectEvalEntries, createFilesystemTaskCacheRuntime, createRunnerRuntimeContext, createRunnerSchedule, createTaskExecutionContext, normalizeCacheFilePathSegments, runScheduledTasks };
1
+ import { createSchedulerRuntime, getActiveScopes } from "../scheduler/index.mjs";
2
+ import { t as resolveModelByName } from "../../models-DIGdOUpJ.mjs";
3
+ import { createRequire } from "node:module";
4
+ import process from "node:process";
5
+ import { errorMessageFrom } from "@moeru/std";
6
+ import { basename, dirname, join, relative } from "node:path";
7
+ import { access, mkdir, readFile, rename, writeFile } from "node:fs/promises";
8
+ import { fileURLToPath } from "node:url";
9
+ import { Buffer } from "node:buffer";
10
+ import { createReadStream, createWriteStream } from "node:fs";
11
+ import { limitConcurrency } from "@vitest/runner/utils";
12
+ //#region src/core/cache/filesystem.ts
13
+ function sanitizePathSegment(value) {
14
+ const normalized = value.trim();
15
+ if (normalized.length === 0) return "default";
16
+ return normalized.replace(/[^\w.-]+/g, "-");
17
+ }
18
+ function normalizeExtension(extension, mediaType) {
19
+ if (extension != null && extension.length > 0) return extension.startsWith(".") ? extension.slice(1) : extension;
20
+ if (mediaType == null || mediaType.length === 0) return;
21
+ if (mediaType === "application/json") return "json";
22
+ if (mediaType === "text/plain") return "txt";
23
+ if (mediaType === "audio/wav") return "wav";
24
+ }
25
+ /**
26
+ * Normalizes cache file options into deterministic relative path segments.
27
+ *
28
+ * Before:
29
+ * - `{ key: ['cases', 'dataset hash', 'v1'], ext: 'json' }`
30
+ *
31
+ * After:
32
+ * - `['cases', 'dataset-hash', 'v1.json']`
33
+ */
34
+ function normalizeCacheFilePathSegments(options) {
35
+ const sanitizedKey = options.key.map((segment) => sanitizePathSegment(segment));
36
+ const extension = normalizeExtension(options.ext, options.mediaType);
37
+ if (sanitizedKey.length === 0) return extension == null ? ["artifact"] : [`artifact.${extension}`];
38
+ if (extension == null) return sanitizedKey;
39
+ const withoutTail = sanitizedKey.slice(0, Math.max(0, sanitizedKey.length - 1));
40
+ const tail = sanitizedKey[sanitizedKey.length - 1] ?? "artifact";
41
+ return [...withoutTail, `${tail}.${extension}`];
42
+ }
43
+ async function writeAtomically(path, content) {
44
+ const directory = dirname(path);
45
+ const temporaryPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
46
+ await mkdir(directory, { recursive: true });
47
+ await writeFile(temporaryPath, content);
48
+ await rename(temporaryPath, path);
49
+ }
50
+ function createCacheFileHandle(path) {
51
+ return {
52
+ path,
53
+ async exists() {
54
+ try {
55
+ await access(path);
56
+ return true;
57
+ } catch {
58
+ return false;
59
+ }
60
+ },
61
+ openReadStream() {
62
+ return createReadStream(path);
63
+ },
64
+ async openWriteStream() {
65
+ await mkdir(dirname(path), { recursive: true });
66
+ return createWriteStream(path);
67
+ },
68
+ async readBuffer() {
69
+ return await readFile(path);
70
+ },
71
+ async writeBuffer(value) {
72
+ await writeAtomically(path, value);
73
+ },
74
+ async readText(encoding = "utf-8") {
75
+ return await readFile(path, encoding);
76
+ },
77
+ async writeText(value, encoding = "utf-8") {
78
+ await writeAtomically(path, Buffer.from(value, encoding));
79
+ },
80
+ async readJson() {
81
+ return JSON.parse(await readFile(path, "utf-8"));
82
+ },
83
+ async writeJson(value) {
84
+ await writeAtomically(path, `${JSON.stringify(value, null, 2)}\n`);
85
+ },
86
+ async loadAsCasesInput() {
87
+ return await this.readJson();
88
+ },
89
+ async loadAsExpectFixture() {
90
+ return await this.readJson();
91
+ }
92
+ };
93
+ }
94
+ function createCacheNamespace(baseDirectory, namespace) {
95
+ return { file(options) {
96
+ const relativePathSegments = normalizeCacheFilePathSegments(options);
97
+ return createCacheFileHandle(join(baseDirectory, sanitizePathSegment(namespace), ...relativePathSegments));
98
+ } };
99
+ }
100
+ /**
101
+ * Creates a deterministic filesystem-backed task cache runtime.
102
+ *
103
+ * Use when:
104
+ * - eval tasks need reproducible cache paths for expensive pre-processing outputs
105
+ * - benchmark adapters need one artifact-oriented API for text/json/binary reads and writes
106
+ *
107
+ * Expects:
108
+ * - `cacheRootDirectory` to be writable by the running process
109
+ * - `workspaceId` + `projectName` to stay stable for reproducible paths
110
+ *
111
+ * Returns:
112
+ * - task cache runtime that resolves namespaced file handles under:
113
+ * `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`
114
+ */
115
+ function createFilesystemTaskCacheRuntime(options) {
116
+ const workspaceDirectory = sanitizePathSegment(options.workspaceId);
117
+ const projectDirectory = sanitizePathSegment(options.projectName);
118
+ const baseDirectory = join(options.cacheRootDirectory, workspaceDirectory, projectDirectory);
119
+ return { namespace(name) {
120
+ return createCacheNamespace(baseDirectory, name);
121
+ } };
122
+ }
123
+ //#endregion
124
+ //#region src/core/runner/aggregate.ts
125
+ function cloneScheduledTaskMatrix(matrix) {
126
+ return {
127
+ eval: { ...matrix.eval },
128
+ meta: { ...matrix.meta },
129
+ run: { ...matrix.run }
130
+ };
131
+ }
132
+ function assertKnownScoreKind(kind) {
133
+ if (kind === "exact" || kind === "judge") return kind;
134
+ throw new TypeError(`Unknown eval score kind "${kind}".`);
135
+ }
136
+ function average(scores) {
137
+ if (scores.length === 0) return null;
138
+ return scores.reduce((sum, score) => sum + score, 0) / scores.length;
139
+ }
140
+ function createHybridAverage(exactAverage, judgeAverage) {
141
+ if (exactAverage != null && judgeAverage != null) return (exactAverage + judgeAverage) / 2;
142
+ if (exactAverage != null) return exactAverage;
143
+ if (judgeAverage != null) return judgeAverage;
144
+ return null;
145
+ }
146
+ function collectScoreBuckets(scores) {
147
+ const buckets = {
148
+ exact: [],
149
+ judge: []
150
+ };
151
+ for (const score of scores) {
152
+ if (assertKnownScoreKind(score.kind) === "exact") {
153
+ buckets.exact.push(score.score);
154
+ continue;
155
+ }
156
+ buckets.judge.push(score.score);
157
+ }
158
+ return buckets;
159
+ }
160
+ function createRunSummary(result) {
161
+ const buckets = collectScoreBuckets(result.scores);
162
+ const exactAverage = average(buckets.exact);
163
+ const judgeAverage = average(buckets.judge);
164
+ return {
165
+ entryId: result.entryId,
166
+ exactAverage,
167
+ hybridAverage: createHybridAverage(exactAverage, judgeAverage),
168
+ id: result.id,
169
+ judgeAverage,
170
+ matrix: cloneScheduledTaskMatrix(result.matrix),
171
+ inferenceExecutorId: result.inferenceExecutorId
172
+ };
173
+ }
174
+ function createProviderSummary(inferenceExecutorId, results) {
175
+ const exactScores = [];
176
+ const judgeScores = [];
177
+ for (const result of results) {
178
+ const buckets = collectScoreBuckets(result.scores);
179
+ exactScores.push(...buckets.exact);
180
+ judgeScores.push(...buckets.judge);
181
+ }
182
+ const exactAverage = average(exactScores);
183
+ const judgeAverage = average(judgeScores);
184
+ return {
185
+ exactAverage,
186
+ hybridAverage: createHybridAverage(exactAverage, judgeAverage),
187
+ judgeAverage,
188
+ inferenceExecutorId,
189
+ runCount: results.length
190
+ };
191
+ }
192
+ /**
193
+ * Aggregates exact-match and judge-based scores into hybrid runner summaries.
194
+ *
195
+ * Call stack:
196
+ *
197
+ * {@link runScheduledTasks}
198
+ * -> {@link aggregateRunResults}
199
+ * -> {@link createRunSummary}
200
+ * -> {@link createProviderSummary}
201
+ * -> `report output`
202
+ *
203
+ * Use when:
204
+ * - a runner batch mixes deterministic exact checks with judge-based grading
205
+ * - inferenceExecutor comparison should preserve both score families and one hybrid view
206
+ *
207
+ * Expects:
208
+ * - each score to be normalized to the `0..1` range before aggregation
209
+ * - `scores.kind` to use only `'exact'` or `'judge'`
210
+ */
211
+ function aggregateRunResults(results) {
212
+ const runs = results.map(createRunSummary);
213
+ const inferenceExecutors = Array.from(new Set(results.map((result) => result.inferenceExecutorId))).map((inferenceExecutorId) => {
214
+ return createProviderSummary(inferenceExecutorId, results.filter((result) => result.inferenceExecutorId === inferenceExecutorId));
215
+ }).sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId));
216
+ const overall = createProviderSummary("overall", results);
217
+ return {
218
+ overall: {
219
+ exactAverage: overall.exactAverage,
220
+ hybridAverage: overall.hybridAverage,
221
+ judgeAverage: overall.judgeAverage,
222
+ runCount: overall.runCount
223
+ },
224
+ inferenceExecutors,
225
+ runs
226
+ };
227
+ }
228
+ //#endregion
229
+ //#region src/core/runner/collect.ts
230
+ const evalFileSuffix = ".eval.ts";
231
+ const absolutePathPattern = /^(?:[A-Z]:\/|\/|\\\\)/i;
232
+ function normalizePath(value) {
233
+ return value.replaceAll("\\", "/");
234
+ }
235
+ /**
236
+ * Converts a file path into a project-relative path when possible.
237
+ *
238
+ * Before: `/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`
239
+ * After: `plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`
240
+ *
241
+ * Before: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`
242
+ * After: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`
243
+ */
244
+ function asProjectRelativePath(filePath, context) {
245
+ const normalizedFilePath = normalizePath(filePath);
246
+ const normalizedProjectRootDirectory = normalizePath(context.projectRootDirectory);
247
+ const filePathWindowsDrive = normalizedFilePath.match(/^[A-Z]:\//i)?.[0];
248
+ const projectRootWindowsDrive = normalizedProjectRootDirectory.match(/^[A-Z]:\//i)?.[0];
249
+ if (filePathWindowsDrive != null && projectRootWindowsDrive == null) return normalizedFilePath;
250
+ if (filePathWindowsDrive != null && projectRootWindowsDrive != null && filePathWindowsDrive.toLowerCase() !== projectRootWindowsDrive.toLowerCase()) return normalizedFilePath;
251
+ const projectRootDirectory = context.projectRootDirectory;
252
+ const relativeFilePath = normalizePath(relative(projectRootDirectory, filePath));
253
+ if (!absolutePathPattern.test(relativeFilePath)) {
254
+ if (relativeFilePath === "..") return normalizePath(filePath);
255
+ if (!relativeFilePath.startsWith("../")) return relativeFilePath;
256
+ }
257
+ return normalizePath(filePath);
258
+ }
259
+ function resolveModuleFilePath(moduleHref) {
260
+ if (!moduleHref.startsWith("file:")) return null;
261
+ try {
262
+ return fileURLToPath(moduleHref);
263
+ } catch {
264
+ return null;
265
+ }
266
+ }
267
+ function createCollectedEvalEntry(moduleHref, moduleDefinition, context) {
268
+ const filePath = resolveModuleFilePath(moduleHref);
269
+ if (!filePath) return null;
270
+ const relativeFilePath = asProjectRelativePath(filePath, context);
271
+ if (!relativeFilePath.endsWith(evalFileSuffix)) return null;
272
+ const entryName = basename(relativeFilePath, evalFileSuffix);
273
+ if (entryName.length === 0) return null;
274
+ const relativeDirectory = dirname(relativeFilePath);
275
+ const directory = relativeDirectory === "." ? "" : relativeDirectory;
276
+ return {
277
+ ...moduleDefinition.default,
278
+ directory,
279
+ filePath,
280
+ id: directory.length === 0 ? entryName : `${directory}/${entryName}`,
281
+ name: entryName
282
+ };
283
+ }
284
+ /**
285
+ * Collects loaded vieval modules into sorted runner entries with stable ids.
286
+ *
287
+ * Call stack:
288
+ *
289
+ * `import.meta.glob(...)`
290
+ * -> {@link collectEvalEntries}
291
+ * -> {@link createCollectedEvalEntry}
292
+ * -> {@link CollectedEvalEntry}[]
293
+ *
294
+ * Use when:
295
+ * - the runner has already loaded candidate eval modules
296
+ * - downstream scheduling needs stable entry ids and directory metadata
297
+ */
298
+ function collectEvalEntries(modules, context) {
299
+ return Object.entries(modules).flatMap(([moduleHref, moduleDefinition]) => {
300
+ const entry = createCollectedEvalEntry(moduleHref, moduleDefinition, context);
301
+ if (!entry) return [];
302
+ return [entry];
303
+ }).sort((left, right) => left.id.localeCompare(right.id));
304
+ }
305
+ //#endregion
306
+ //#region src/core/runner/run.ts
307
+ function createDefaultExecutionContext(task) {
308
+ return {
309
+ cache: { namespace(name) {
310
+ return { file(options) {
311
+ const key = options.key.join("/");
312
+ throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
313
+ } };
314
+ } },
315
+ model(options) {
316
+ const requestedModelName = typeof options === "string" ? options : options?.name;
317
+ if (requestedModelName != null) throw new Error(`No model registry configured. Requested model: ${requestedModelName}`);
318
+ throw new Error(`No model registry configured for task inferenceExecutor id "${task.inferenceExecutor.id}".`);
319
+ }
320
+ };
321
+ }
322
+ /**
323
+ * Error thrown when a scheduled run fails before producing a normalized result.
324
+ */
325
+ var RunnerExecutionError = class extends Error {
326
+ /**
327
+ * Stable task id that failed.
328
+ */
329
+ taskId;
330
+ constructor(taskId, cause) {
331
+ const message = errorMessageFrom(cause) ?? "Unknown runner execution failure.";
332
+ super(`Runner task "${taskId}" failed: ${message}`);
333
+ this.name = "RunnerExecutionError";
334
+ this.taskId = taskId;
335
+ this.cause = cause;
336
+ }
337
+ };
338
+ function createRunnerExecutionError(taskId, cause) {
339
+ if (cause instanceof RunnerExecutionError && cause.taskId === taskId) return cause;
340
+ return new RunnerExecutionError(taskId, cause);
341
+ }
342
+ /**
343
+ * Executes runner tasks sequentially and aggregates the normalized results.
344
+ *
345
+ * Call stack:
346
+ *
347
+ * {@link createRunnerSchedule}
348
+ * -> {@link runScheduledTasks}
349
+ * -> `executor(task)`
350
+ * -> {@link aggregateRunResults}
351
+ *
352
+ * Use when:
353
+ * - the caller already expanded the runner matrix
354
+ * - task execution should stay deterministic and easy to debug
355
+ *
356
+ * Expects:
357
+ * - `executor` to return normalized `0..1` scores
358
+ * - callers to handle concurrency outside this helper when needed
359
+ * - `onTaskStart` / `onTaskEnd` hooks to be synchronous lifecycle observers
360
+ *
361
+ * Throws:
362
+ * - `RunnerExecutionError` when task setup, hooks, or the executor throws
363
+ */
364
+ async function runScheduledTasks(tasks, executor, options = {}) {
365
+ if (tasks.length === 0) return aggregateRunResults([]);
366
+ async function executeScheduledTask(task) {
367
+ let executionContext;
368
+ try {
369
+ executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext(task);
370
+ } catch (error) {
371
+ throw createRunnerExecutionError(task.id, error);
372
+ }
373
+ try {
374
+ options.onTaskStart?.(task);
375
+ } catch (error) {
376
+ throw createRunnerExecutionError(task.id, error);
377
+ }
378
+ let runResult;
379
+ try {
380
+ runResult = await executor(task, executionContext);
381
+ } catch (error) {
382
+ try {
383
+ options.onTaskEnd?.(task, "failed");
384
+ } catch {}
385
+ throw createRunnerExecutionError(task.id, error);
386
+ }
387
+ try {
388
+ options.onTaskEnd?.(task, "passed");
389
+ } catch (error) {
390
+ throw createRunnerExecutionError(task.id, error);
391
+ }
392
+ return runResult;
393
+ }
394
+ const maxConcurrency = options.maxConcurrency ?? 1;
395
+ if (maxConcurrency <= 1) {
396
+ const results = [];
397
+ for (const task of tasks) results.push(await executeScheduledTask(task));
398
+ return aggregateRunResults(results);
399
+ }
400
+ const runWithLimit = limitConcurrency(maxConcurrency);
401
+ return aggregateRunResults((await Promise.all(tasks.map(async (task, index) => {
402
+ return {
403
+ index,
404
+ result: await runWithLimit(async () => executeScheduledTask(task))
405
+ };
406
+ }))).sort((left, right) => left.index - right.index).map((item) => item.result));
407
+ }
408
+ //#endregion
409
+ //#region src/core/runner/runtime-context.ts
410
+ const require = createRequire(import.meta.url);
411
+ /**
412
+ * Creates a side-effect-free runtime context for runner path normalization.
413
+ *
414
+ * Call stack:
415
+ *
416
+ * {@link createRunnerRuntimeContext}
417
+ * -> `findWorkspaceDir(cwd)`
418
+ * -> `resolve projectRootDirectory`
419
+ * -> `{ projectRootDirectory }`
420
+ *
421
+ * Use when:
422
+ * - initializing runner infrastructure before collecting eval modules
423
+ * - tests need deterministic root resolution behavior
424
+ */
425
+ async function createRunnerRuntimeContext(options = {}) {
426
+ const cwd = options.cwd ?? dirname(fileURLToPath(import.meta.url));
427
+ const fallbackProjectRootDirectory = options.fallbackProjectRootDirectory ?? fileURLToPath(new URL("../../../", import.meta.url));
428
+ const { findWorkspaceDir } = require("@pnpm/find-workspace-dir");
429
+ return { projectRootDirectory: await findWorkspaceDir(cwd) ?? fallbackProjectRootDirectory };
430
+ }
431
+ //#endregion
432
+ //#region src/core/runner/schedule.ts
433
+ const matrixLayerKeys = new Set([
434
+ "disable",
435
+ "extend",
436
+ "override"
437
+ ]);
438
+ const ambiguousMatrixDefinitionErrorMessage = "Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.";
439
+ function encodeTaskIdSegment(value) {
440
+ return encodeURIComponent(value);
441
+ }
442
+ function stringifyMatrixValue(value) {
443
+ return String(value);
444
+ }
445
+ function cloneMatrixSelection(matrix) {
446
+ return { ...matrix };
447
+ }
448
+ function createScheduledTaskMatrix(runMatrix, evalMatrix) {
449
+ return {
450
+ eval: cloneMatrixSelection(evalMatrix),
451
+ meta: {
452
+ evalRowId: createStableRowId(evalMatrix),
453
+ runRowId: createStableRowId(runMatrix)
454
+ },
455
+ run: cloneMatrixSelection(runMatrix)
456
+ };
457
+ }
458
+ function isMatrixLayer(matrix) {
459
+ const matrixKeys = Object.keys(matrix);
460
+ return matrixKeys.length > 0 && matrixKeys.every((key) => matrixLayerKeys.has(key));
461
+ }
462
+ function assertNonAmbiguousMatrixDefinition(matrix) {
463
+ const matrixKeys = Object.keys(matrix);
464
+ const hasReservedKeys = matrixKeys.some((key) => matrixLayerKeys.has(key));
465
+ const hasAxisKeys = matrixKeys.some((key) => !matrixLayerKeys.has(key));
466
+ if (hasReservedKeys && hasAxisKeys) throw new TypeError(ambiguousMatrixDefinitionErrorMessage);
467
+ }
468
+ function normalizeLayerInputToAxes(matrix) {
469
+ if (matrix == null) return;
470
+ assertNonAmbiguousMatrixDefinition(matrix);
471
+ if (isMatrixLayer(matrix)) return matrix;
472
+ return { extend: matrix };
473
+ }
474
+ function dedupeAxisValues(values) {
475
+ return Array.from(new Set(values.map(stringifyMatrixValue)));
476
+ }
477
+ function applyAxisValues(axes, definition, mode) {
478
+ if (definition == null) return;
479
+ for (const [axis, values] of Object.entries(definition)) {
480
+ const nextValues = dedupeAxisValues(values);
481
+ if (mode === "extend") {
482
+ const existingValues = axes.get(axis) ?? [];
483
+ axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])));
484
+ continue;
485
+ }
486
+ axes.set(axis, nextValues);
487
+ }
488
+ }
489
+ function applyLayer(baseAxes, layer) {
490
+ const nextAxes = new Map(Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]));
491
+ for (const axis of layer?.disable ?? []) nextAxes.delete(axis);
492
+ applyAxisValues(nextAxes, layer?.extend, "extend");
493
+ applyAxisValues(nextAxes, layer?.override, "override");
494
+ return nextAxes;
495
+ }
496
+ function expandAxesToRows(axes) {
497
+ if (axes.size === 0) return [{}];
498
+ const dimensions = Array.from(axes.entries());
499
+ let selections = [{}];
500
+ for (const [axis, values] of dimensions) {
501
+ if (values.length === 0) return [];
502
+ const nextSelections = [];
503
+ for (const selection of selections) for (const value of values) nextSelections.push({
504
+ ...selection,
505
+ [axis]: value
506
+ });
507
+ selections = nextSelections;
508
+ }
509
+ return selections;
510
+ }
511
+ function createStableRowId(matrix) {
512
+ const segments = Object.entries(matrix).sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis)).map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`);
513
+ if (segments.length === 0) return "default";
514
+ return segments.join("&");
515
+ }
516
+ function createTaskId(entryId, inferenceExecutorId, runRowId, evalRowId) {
517
+ return [
518
+ encodeTaskIdSegment(entryId),
519
+ encodeTaskIdSegment(inferenceExecutorId),
520
+ `run=${encodeTaskIdSegment(runRowId)}`,
521
+ `eval=${encodeTaskIdSegment(evalRowId)}`
522
+ ].join("::");
523
+ }
524
+ function createResolvedRunAxes(entry, runMatrix) {
525
+ let resolvedAxes = /* @__PURE__ */ new Map();
526
+ for (const layerInput of [
527
+ runMatrix,
528
+ entry.matrix?.runMatrix,
529
+ entry.task?.matrix?.runMatrix
530
+ ]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
531
+ return resolvedAxes;
532
+ }
533
+ function createResolvedEvalAxes(entry, evalMatrix) {
534
+ let resolvedAxes = /* @__PURE__ */ new Map();
535
+ for (const layerInput of [
536
+ evalMatrix,
537
+ entry.matrix?.evalMatrix,
538
+ entry.task?.matrix?.evalMatrix
539
+ ]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
540
+ return resolvedAxes;
541
+ }
542
+ /**
543
+ * Expands collected entries into a stable runner schedule.
544
+ *
545
+ * Call stack:
546
+ *
547
+ * {@link collectEvalEntries} (`../runner`)
548
+ * -> {@link createRunnerSchedule}
549
+ * -> {@link expandAxesToRows}
550
+ * -> {@link ScheduledTask}[]
551
+ *
552
+ * Use when:
553
+ * - the runner already knows which eval entries are available
554
+ * - each entry must run against multiple inferenceExecutors or matrix variants
555
+ *
556
+ * Expects:
557
+ * - `entries` and `inferenceExecutors` to be provided in the desired execution order
558
+ * - matrix axes to use insertion order when generating combinations
559
+ */
560
+ function createRunnerSchedule(options) {
561
+ if (options.entries.length === 0) return [];
562
+ if (options.inferenceExecutors.length === 0) return [];
563
+ const tasks = [];
564
+ for (const entry of options.entries) {
565
+ const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix));
566
+ const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix));
567
+ if (runSelections.length === 0 || evalSelections.length === 0) continue;
568
+ for (const inferenceExecutor of options.inferenceExecutors) for (const runMatrix of runSelections) for (const evalMatrix of evalSelections) {
569
+ const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix);
570
+ tasks.push({
571
+ entry,
572
+ id: createTaskId(entry.id, inferenceExecutor.id, isolatedMatrix.meta.runRowId, isolatedMatrix.meta.evalRowId),
573
+ matrix: isolatedMatrix,
574
+ inferenceExecutor
575
+ });
576
+ }
577
+ }
578
+ return tasks;
579
+ }
580
+ //#endregion
581
+ //#region src/core/runner/task-context.ts
582
+ function createNoopTaskCacheRuntime() {
583
+ return { namespace(name) {
584
+ return { file(options) {
585
+ const key = options.key.join("/");
586
+ throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
587
+ } };
588
+ } };
589
+ }
590
+ function resolveDefaultTaskModel(models, task) {
591
+ const runMatrixModelName = task.matrix.run.model;
592
+ if (runMatrixModelName != null) {
593
+ const matrixSelectedModel = resolveModelByName(models, runMatrixModelName);
594
+ if (matrixSelectedModel != null) return matrixSelectedModel;
595
+ throw new Error(`Unknown configured model "${runMatrixModelName}" from task.matrix.run.model.`);
596
+ }
597
+ const matched = resolveModelByName(models, task.inferenceExecutor.id);
598
+ if (matched != null) return matched;
599
+ if (models.length > 1) throw new Error([
600
+ `Multiple configured models are available, but no default model is selected for inferenceExecutor "${task.inferenceExecutor.id}".`,
601
+ "Select one model explicitly by either:",
602
+ "- setting runMatrix.override.model (or task matrix run.model)",
603
+ "- setting project.inferenceExecutors to a matching model id",
604
+ "- calling context.model({ name: \"your-model-id-or-alias\" })"
605
+ ].join("\n"));
606
+ if (models.length === 1) {
607
+ const firstModel = models[0];
608
+ if (firstModel != null) return firstModel;
609
+ }
610
+ throw new Error(`No configured model found for inferenceExecutor id "${task.inferenceExecutor.id}".`);
611
+ }
612
+ /**
613
+ * Creates task-scoped model resolver context for runner execution.
614
+ *
615
+ * Call stack:
616
+ *
617
+ * {@link runScheduledTasks}
618
+ * -> {@link createTaskExecutionContext}
619
+ * -> {@link resolveModelByName}
620
+ * -> `task.model()` / `task.model({ name })`
621
+ */
622
+ function createTaskExecutionContext(options) {
623
+ return {
624
+ cache: options.cache ?? createNoopTaskCacheRuntime(),
625
+ model(selection) {
626
+ if (selection == null) return resolveDefaultTaskModel(options.models, options.task);
627
+ const name = typeof selection === "string" ? selection : selection.name;
628
+ const namedModel = resolveModelByName(options.models, name);
629
+ if (namedModel == null) throw new Error(`Unknown configured model "${name}".`);
630
+ return namedModel;
631
+ }
632
+ };
633
+ }
634
+ //#endregion
635
+ export { RunnerExecutionError, aggregateRunResults, asProjectRelativePath, collectEvalEntries, createFilesystemTaskCacheRuntime, createRunnerRuntimeContext, createRunnerSchedule, createSchedulerRuntime, createTaskExecutionContext, getActiveScopes, normalizeCacheFilePathSegments, runScheduledTasks };
636
+
637
+ //# sourceMappingURL=index.mjs.map