vieval 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +31 -31
  2. package/dist/bin/vieval.mjs +1 -1
  3. package/dist/bin/vieval.mjs.map +1 -1
  4. package/dist/cli/index.d.mts +1 -1
  5. package/dist/cli/index.mjs +1 -1
  6. package/dist/{cli-DTDgaqeI.mjs → cli-uzS81IPd.mjs} +1483 -1483
  7. package/dist/cli-uzS81IPd.mjs.map +1 -0
  8. package/dist/config.d.mts +1 -1
  9. package/dist/config.mjs +1 -1
  10. package/dist/config.mjs.map +1 -1
  11. package/dist/core/assertions/index.d.mts +156 -156
  12. package/dist/core/assertions/index.mjs +82 -82
  13. package/dist/core/assertions/index.mjs.map +1 -1
  14. package/dist/core/inference-executors/index.d.mts +37 -37
  15. package/dist/core/inference-executors/index.mjs +54 -53
  16. package/dist/core/inference-executors/index.mjs.map +1 -1
  17. package/dist/core/processors/results/index.d.mts +18 -18
  18. package/dist/core/processors/results/index.mjs.map +1 -1
  19. package/dist/core/runner/index.d.mts +2 -2
  20. package/dist/core/runner/index.mjs +259 -259
  21. package/dist/core/runner/index.mjs.map +1 -1
  22. package/dist/core/scheduler/index.d.mts +1 -1
  23. package/dist/core/scheduler/index.mjs +65 -65
  24. package/dist/core/scheduler/index.mjs.map +1 -1
  25. package/dist/{env-DfWZy_n4.d.mts → env-Br6jaWGL.d.mts} +9 -9
  26. package/dist/{env-nV5rVErX.mjs → env-egxaJtNn.mjs} +8 -8
  27. package/dist/env-egxaJtNn.mjs.map +1 -0
  28. package/dist/{expect-extensions-DCSqlneN.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
  29. package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
  30. package/dist/expect.d.mts +1 -3
  31. package/dist/expect.mjs +1 -1
  32. package/dist/expect.mjs.map +1 -1
  33. package/dist/{index-D_aMeWqO.d.mts → index-BLIlhiWT.d.mts} +565 -565
  34. package/dist/{index-Bg0atWBF.d.mts → index-CIaJClcC.d.mts} +48 -48
  35. package/dist/index.d.mts +208 -197
  36. package/dist/index.mjs +148 -148
  37. package/dist/index.mjs.map +1 -1
  38. package/dist/{models-pBSRUZhY.mjs → models-CaCOUPZw.mjs} +1 -1
  39. package/dist/{models-pBSRUZhY.mjs.map → models-CaCOUPZw.mjs.map} +1 -1
  40. package/dist/plugins/chat-models/index.d.mts +279 -279
  41. package/dist/plugins/chat-models/index.mjs +360 -360
  42. package/dist/plugins/chat-models/index.mjs.map +1 -1
  43. package/dist/{queue-DsZQkZO_.mjs → queue-BL86z2W_.mjs} +1 -1
  44. package/dist/{queue-DsZQkZO_.mjs.map → queue-BL86z2W_.mjs.map} +1 -1
  45. package/dist/{registry-DMnwE_mY.mjs → registry-BK7k6X81.mjs} +294 -294
  46. package/dist/registry-BK7k6X81.mjs.map +1 -0
  47. package/dist/testing/expect-extensions.d.mts +27 -27
  48. package/dist/testing/expect-extensions.mjs +1 -1
  49. package/package.json +12 -12
  50. package/dist/cli-DTDgaqeI.mjs.map +0 -1
  51. package/dist/env-nV5rVErX.mjs.map +0 -1
  52. package/dist/expect-extensions-DCSqlneN.mjs.map +0 -1
  53. package/dist/registry-DMnwE_mY.mjs.map +0 -1
@@ -9,17 +9,28 @@ import { createReadStream, createWriteStream } from "node:fs";
9
9
  import { Buffer } from "node:buffer";
10
10
  import { limitConcurrency } from "@vitest/runner/utils";
11
11
  //#region src/core/cache/filesystem.ts
12
- function sanitizePathSegment(value) {
13
- const normalized = value.trim();
14
- if (normalized.length === 0) return "default";
15
- return normalized.replace(/[^\w.-]+/g, "-");
16
- }
17
- function normalizeExtension(extension, mediaType) {
18
- if (extension != null && extension.length > 0) return extension.startsWith(".") ? extension.slice(1) : extension;
19
- if (mediaType == null || mediaType.length === 0) return;
20
- if (mediaType === "application/json") return "json";
21
- if (mediaType === "text/plain") return "txt";
22
- if (mediaType === "audio/wav") return "wav";
12
+ /**
13
+ * Creates a deterministic filesystem-backed task cache runtime.
14
+ *
15
+ * Use when:
16
+ * - eval tasks need reproducible cache paths for expensive pre-processing outputs
17
+ * - benchmark adapters need one artifact-oriented API for text/json/binary reads and writes
18
+ *
19
+ * Expects:
20
+ * - `cacheRootDirectory` to be writable by the running process
21
+ * - `workspaceId` + `projectName` to stay stable for reproducible paths
22
+ *
23
+ * Returns:
24
+ * - task cache runtime that resolves namespaced file handles under:
25
+ * `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`
26
+ */
27
+ function createFilesystemTaskCacheRuntime(options) {
28
+ const workspaceDirectory = sanitizePathSegment(options.workspaceId);
29
+ const projectDirectory = sanitizePathSegment(options.projectName);
30
+ const baseDirectory = join(options.cacheRootDirectory, workspaceDirectory, projectDirectory);
31
+ return { namespace(name) {
32
+ return createCacheNamespace(baseDirectory, name);
33
+ } };
23
34
  }
24
35
  /**
25
36
  * Normalizes cache file options into deterministic relative path segments.
@@ -39,16 +50,8 @@ function normalizeCacheFilePathSegments(options) {
39
50
  const tail = sanitizedKey[sanitizedKey.length - 1] ?? "artifact";
40
51
  return [...withoutTail, `${tail}.${extension}`];
41
52
  }
42
- async function writeAtomically(path, content) {
43
- const directory = dirname(path);
44
- const temporaryPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
45
- await mkdir(directory, { recursive: true });
46
- await writeFile(temporaryPath, content);
47
- await rename(temporaryPath, path);
48
- }
49
53
  function createCacheFileHandle(path) {
50
54
  return {
51
- path,
52
55
  async exists() {
53
56
  try {
54
57
  await access(path);
@@ -57,6 +60,12 @@ function createCacheFileHandle(path) {
57
60
  return false;
58
61
  }
59
62
  },
63
+ async loadAsCasesInput() {
64
+ return await this.readJson();
65
+ },
66
+ async loadAsExpectFixture() {
67
+ return await this.readJson();
68
+ },
60
69
  openReadStream() {
61
70
  return createReadStream(path);
62
71
  },
@@ -64,29 +73,24 @@ function createCacheFileHandle(path) {
64
73
  await mkdir(dirname(path), { recursive: true });
65
74
  return createWriteStream(path);
66
75
  },
76
+ path,
67
77
  async readBuffer() {
68
78
  return await readFile(path);
69
79
  },
70
- async writeBuffer(value) {
71
- await writeAtomically(path, value);
80
+ async readJson() {
81
+ return JSON.parse(await readFile(path, "utf-8"));
72
82
  },
73
83
  async readText(encoding = "utf-8") {
74
84
  return await readFile(path, encoding);
75
85
  },
76
- async writeText(value, encoding = "utf-8") {
77
- await writeAtomically(path, Buffer.from(value, encoding));
78
- },
79
- async readJson() {
80
- return JSON.parse(await readFile(path, "utf-8"));
86
+ async writeBuffer(value) {
87
+ await writeAtomically(path, value);
81
88
  },
82
89
  async writeJson(value) {
83
90
  await writeAtomically(path, `${JSON.stringify(value, null, 2)}\n`);
84
91
  },
85
- async loadAsCasesInput() {
86
- return await this.readJson();
87
- },
88
- async loadAsExpectFixture() {
89
- return await this.readJson();
92
+ async writeText(value, encoding = "utf-8") {
93
+ await writeAtomically(path, Buffer.from(value, encoding));
90
94
  }
91
95
  };
92
96
  }
@@ -96,36 +100,61 @@ function createCacheNamespace(baseDirectory, namespace) {
96
100
  return createCacheFileHandle(join(baseDirectory, sanitizePathSegment(namespace), ...relativePathSegments));
97
101
  } };
98
102
  }
103
+ function normalizeExtension(extension, mediaType) {
104
+ if (extension != null && extension.length > 0) return extension.startsWith(".") ? extension.slice(1) : extension;
105
+ if (mediaType == null || mediaType.length === 0) return;
106
+ if (mediaType === "application/json") return "json";
107
+ if (mediaType === "text/plain") return "txt";
108
+ if (mediaType === "audio/wav") return "wav";
109
+ }
110
+ function sanitizePathSegment(value) {
111
+ const normalized = value.trim();
112
+ if (normalized.length === 0) return "default";
113
+ return normalized.replace(/[^\w.-]+/g, "-");
114
+ }
115
+ async function writeAtomically(path, content) {
116
+ const directory = dirname(path);
117
+ const temporaryPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
118
+ await mkdir(directory, { recursive: true });
119
+ await writeFile(temporaryPath, content);
120
+ await rename(temporaryPath, path);
121
+ }
122
+ //#endregion
123
+ //#region src/core/runner/aggregate.ts
99
124
  /**
100
- * Creates a deterministic filesystem-backed task cache runtime.
125
+ * Aggregates exact-match and judge-based scores into hybrid runner summaries.
126
+ *
127
+ * Call stack:
128
+ *
129
+ * {@link runScheduledTasks}
130
+ * -> {@link aggregateRunResults}
131
+ * -> {@link createRunSummary}
132
+ * -> {@link createProviderSummary}
133
+ * -> `report output`
101
134
  *
102
135
  * Use when:
103
- * - eval tasks need reproducible cache paths for expensive pre-processing outputs
104
- * - benchmark adapters need one artifact-oriented API for text/json/binary reads and writes
136
+ * - a runner batch mixes deterministic exact checks with judge-based grading
137
+ * - inferenceExecutor comparison should preserve both score families and one hybrid view
105
138
  *
106
139
  * Expects:
107
- * - `cacheRootDirectory` to be writable by the running process
108
- * - `workspaceId` + `projectName` to stay stable for reproducible paths
109
- *
110
- * Returns:
111
- * - task cache runtime that resolves namespaced file handles under:
112
- * `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`
140
+ * - each score to be normalized to the `0..1` range before aggregation
141
+ * - `scores.kind` to use only `'exact'` or `'judge'`
113
142
  */
114
- function createFilesystemTaskCacheRuntime(options) {
115
- const workspaceDirectory = sanitizePathSegment(options.workspaceId);
116
- const projectDirectory = sanitizePathSegment(options.projectName);
117
- const baseDirectory = join(options.cacheRootDirectory, workspaceDirectory, projectDirectory);
118
- return { namespace(name) {
119
- return createCacheNamespace(baseDirectory, name);
120
- } };
121
- }
122
- //#endregion
123
- //#region src/core/runner/aggregate.ts
124
- function cloneScheduledTaskMatrix(matrix) {
143
+ function aggregateRunResults(results) {
144
+ const runs = results.map(createRunSummary);
145
+ const inferenceExecutors = Array.from(new Set(results.map((result) => result.inferenceExecutorId))).map((inferenceExecutorId) => {
146
+ return createProviderSummary(inferenceExecutorId, results.filter((result) => result.inferenceExecutorId === inferenceExecutorId));
147
+ }).sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId));
148
+ const overall = createProviderSummary("overall", results);
125
149
  return {
126
- eval: { ...matrix.eval },
127
- meta: { ...matrix.meta },
128
- run: { ...matrix.run }
150
+ inferenceExecutors,
151
+ overall: {
152
+ exactAverage: overall.exactAverage,
153
+ hybridAverage: overall.hybridAverage,
154
+ judgeAverage: overall.judgeAverage,
155
+ runCount: overall.runCount
156
+ },
157
+ runs
129
158
  };
130
159
  }
131
160
  function assertKnownScoreKind(kind) {
@@ -136,11 +165,12 @@ function average(scores) {
136
165
  if (scores.length === 0) return null;
137
166
  return scores.reduce((sum, score) => sum + score, 0) / scores.length;
138
167
  }
139
- function createHybridAverage(exactAverage, judgeAverage) {
140
- if (exactAverage != null && judgeAverage != null) return (exactAverage + judgeAverage) / 2;
141
- if (exactAverage != null) return exactAverage;
142
- if (judgeAverage != null) return judgeAverage;
143
- return null;
168
+ function cloneScheduledTaskMatrix(matrix) {
169
+ return {
170
+ eval: { ...matrix.eval },
171
+ meta: { ...matrix.meta },
172
+ run: { ...matrix.run }
173
+ };
144
174
  }
145
175
  function collectScoreBuckets(scores) {
146
176
  const buckets = {
@@ -156,19 +186,11 @@ function collectScoreBuckets(scores) {
156
186
  }
157
187
  return buckets;
158
188
  }
159
- function createRunSummary(result) {
160
- const buckets = collectScoreBuckets(result.scores);
161
- const exactAverage = average(buckets.exact);
162
- const judgeAverage = average(buckets.judge);
163
- return {
164
- entryId: result.entryId,
165
- exactAverage,
166
- hybridAverage: createHybridAverage(exactAverage, judgeAverage),
167
- id: result.id,
168
- judgeAverage,
169
- matrix: cloneScheduledTaskMatrix(result.matrix),
170
- inferenceExecutorId: result.inferenceExecutorId
171
- };
189
+ function createHybridAverage(exactAverage, judgeAverage) {
190
+ if (exactAverage != null && judgeAverage != null) return (exactAverage + judgeAverage) / 2;
191
+ if (exactAverage != null) return exactAverage;
192
+ if (judgeAverage != null) return judgeAverage;
193
+ return null;
172
194
  }
173
195
  function createProviderSummary(inferenceExecutorId, results) {
174
196
  const exactScores = [];
@@ -183,54 +205,29 @@ function createProviderSummary(inferenceExecutorId, results) {
183
205
  return {
184
206
  exactAverage,
185
207
  hybridAverage: createHybridAverage(exactAverage, judgeAverage),
186
- judgeAverage,
187
208
  inferenceExecutorId,
209
+ judgeAverage,
188
210
  runCount: results.length
189
211
  };
190
212
  }
191
- /**
192
- * Aggregates exact-match and judge-based scores into hybrid runner summaries.
193
- *
194
- * Call stack:
195
- *
196
- * {@link runScheduledTasks}
197
- * -> {@link aggregateRunResults}
198
- * -> {@link createRunSummary}
199
- * -> {@link createProviderSummary}
200
- * -> `report output`
201
- *
202
- * Use when:
203
- * - a runner batch mixes deterministic exact checks with judge-based grading
204
- * - inferenceExecutor comparison should preserve both score families and one hybrid view
205
- *
206
- * Expects:
207
- * - each score to be normalized to the `0..1` range before aggregation
208
- * - `scores.kind` to use only `'exact'` or `'judge'`
209
- */
210
- function aggregateRunResults(results) {
211
- const runs = results.map(createRunSummary);
212
- const inferenceExecutors = Array.from(new Set(results.map((result) => result.inferenceExecutorId))).map((inferenceExecutorId) => {
213
- return createProviderSummary(inferenceExecutorId, results.filter((result) => result.inferenceExecutorId === inferenceExecutorId));
214
- }).sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId));
215
- const overall = createProviderSummary("overall", results);
213
+ function createRunSummary(result) {
214
+ const buckets = collectScoreBuckets(result.scores);
215
+ const exactAverage = average(buckets.exact);
216
+ const judgeAverage = average(buckets.judge);
216
217
  return {
217
- overall: {
218
- exactAverage: overall.exactAverage,
219
- hybridAverage: overall.hybridAverage,
220
- judgeAverage: overall.judgeAverage,
221
- runCount: overall.runCount
222
- },
223
- inferenceExecutors,
224
- runs
218
+ entryId: result.entryId,
219
+ exactAverage,
220
+ hybridAverage: createHybridAverage(exactAverage, judgeAverage),
221
+ id: result.id,
222
+ inferenceExecutorId: result.inferenceExecutorId,
223
+ judgeAverage,
224
+ matrix: cloneScheduledTaskMatrix(result.matrix)
225
225
  };
226
226
  }
227
227
  //#endregion
228
228
  //#region src/core/runner/collect.ts
229
229
  const evalFileSuffix = ".eval.ts";
230
230
  const absolutePathPattern = /^(?:[A-Z]:\/|\/|\\\\)/i;
231
- function normalizePath(value) {
232
- return value.replaceAll("\\", "/");
233
- }
234
231
  /**
235
232
  * Converts a file path into a project-relative path when possible.
236
233
  *
@@ -255,31 +252,6 @@ function asProjectRelativePath(filePath, context) {
255
252
  }
256
253
  return normalizePath(filePath);
257
254
  }
258
- function resolveModuleFilePath(moduleHref) {
259
- if (!moduleHref.startsWith("file:")) return null;
260
- try {
261
- return fileURLToPath(moduleHref);
262
- } catch {
263
- return null;
264
- }
265
- }
266
- function createCollectedEvalEntry(moduleHref, moduleDefinition, context) {
267
- const filePath = resolveModuleFilePath(moduleHref);
268
- if (!filePath) return null;
269
- const relativeFilePath = asProjectRelativePath(filePath, context);
270
- if (!relativeFilePath.endsWith(evalFileSuffix)) return null;
271
- const entryName = basename(relativeFilePath, evalFileSuffix);
272
- if (entryName.length === 0) return null;
273
- const relativeDirectory = dirname(relativeFilePath);
274
- const directory = relativeDirectory === "." ? "" : relativeDirectory;
275
- return {
276
- ...moduleDefinition.default,
277
- directory,
278
- filePath,
279
- id: directory.length === 0 ? entryName : `${directory}/${entryName}`,
280
- name: entryName
281
- };
282
- }
283
255
  /**
284
256
  * Collects loaded vieval modules into sorted runner entries with stable ids.
285
257
  *
@@ -301,19 +273,36 @@ function collectEvalEntries(modules, context) {
301
273
  return [entry];
302
274
  }).sort((left, right) => left.id.localeCompare(right.id));
303
275
  }
304
- //#endregion
305
- //#region src/core/runner/run.ts
306
- function createDefaultExecutionContext() {
276
+ function createCollectedEvalEntry(moduleHref, moduleDefinition, context) {
277
+ const filePath = resolveModuleFilePath(moduleHref);
278
+ if (!filePath) return null;
279
+ const relativeFilePath = asProjectRelativePath(filePath, context);
280
+ if (!relativeFilePath.endsWith(evalFileSuffix)) return null;
281
+ const entryName = basename(relativeFilePath, evalFileSuffix);
282
+ if (entryName.length === 0) return null;
283
+ const relativeDirectory = dirname(relativeFilePath);
284
+ const directory = relativeDirectory === "." ? "" : relativeDirectory;
307
285
  return {
308
- cache: { namespace(name) {
309
- return { file(options) {
310
- const key = options.key.join("/");
311
- throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
312
- } };
313
- } },
314
- models: []
286
+ ...moduleDefinition.default,
287
+ directory,
288
+ filePath,
289
+ id: directory.length === 0 ? entryName : `${directory}/${entryName}`,
290
+ name: entryName
315
291
  };
316
292
  }
293
+ function normalizePath(value) {
294
+ return value.replaceAll("\\", "/");
295
+ }
296
+ function resolveModuleFilePath(moduleHref) {
297
+ if (!moduleHref.startsWith("file:")) return null;
298
+ try {
299
+ return fileURLToPath(moduleHref);
300
+ } catch {
301
+ return null;
302
+ }
303
+ }
304
+ //#endregion
305
+ //#region src/core/runner/run.ts
317
306
  /**
318
307
  * Error thrown when a scheduled run fails before producing a normalized result.
319
308
  */
@@ -330,10 +319,6 @@ var RunnerExecutionError = class extends Error {
330
319
  this.cause = cause;
331
320
  }
332
321
  };
333
- function createRunnerExecutionError(taskId, cause) {
334
- if (cause instanceof RunnerExecutionError && cause.taskId === taskId) return cause;
335
- return new RunnerExecutionError(taskId, cause);
336
- }
337
322
  /**
338
323
  * Executes runner tasks sequentially and aggregates the normalized results.
339
324
  *
@@ -400,6 +385,21 @@ async function runScheduledTasks(tasks, executor, options = {}) {
400
385
  };
401
386
  }))).sort((left, right) => left.index - right.index).map((item) => item.result));
402
387
  }
388
+ function createDefaultExecutionContext() {
389
+ return {
390
+ cache: { namespace(name) {
391
+ return { file(options) {
392
+ const key = options.key.join("/");
393
+ throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
394
+ } };
395
+ } },
396
+ models: []
397
+ };
398
+ }
399
+ function createRunnerExecutionError(taskId, cause) {
400
+ if (cause instanceof RunnerExecutionError && cause.taskId === taskId) return cause;
401
+ return new RunnerExecutionError(taskId, cause);
402
+ }
403
403
  //#endregion
404
404
  //#region src/core/runner/runtime-context.ts
405
405
  const require = createRequire(import.meta.url);
@@ -425,21 +425,96 @@ async function createRunnerRuntimeContext(options = {}) {
425
425
  }
426
426
  //#endregion
427
427
  //#region src/core/runner/schedule.ts
428
- const matrixLayerKeys = new Set([
428
+ const matrixLayerKeys = /* @__PURE__ */ new Set([
429
429
  "disable",
430
430
  "extend",
431
431
  "override"
432
432
  ]);
433
433
  const ambiguousMatrixDefinitionErrorMessage = "Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.";
434
- function encodeTaskIdSegment(value) {
435
- return encodeURIComponent(value);
434
+ /**
435
+ * Expands collected entries into a stable runner schedule.
436
+ *
437
+ * Call stack:
438
+ *
439
+ * {@link collectEvalEntries} (`../runner`)
440
+ * -> {@link createRunnerSchedule}
441
+ * -> {@link expandAxesToRows}
442
+ * -> {@link ScheduledTask}[]
443
+ *
444
+ * Use when:
445
+ * - the runner already knows which eval entries are available
446
+ * - each entry must run against multiple inferenceExecutors or matrix variants
447
+ *
448
+ * Expects:
449
+ * - `entries` and `inferenceExecutors` to be provided in the desired execution order
450
+ * - matrix axes to use insertion order when generating combinations
451
+ */
452
+ function createRunnerSchedule(options) {
453
+ if (options.entries.length === 0) return [];
454
+ if (options.inferenceExecutors.length === 0) return [];
455
+ const tasks = [];
456
+ for (const entry of options.entries) {
457
+ const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix));
458
+ const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix));
459
+ if (runSelections.length === 0 || evalSelections.length === 0) continue;
460
+ for (const inferenceExecutor of options.inferenceExecutors) for (const runMatrix of runSelections) for (const evalMatrix of evalSelections) {
461
+ const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix);
462
+ tasks.push({
463
+ entry,
464
+ id: createTaskId(entry.id, inferenceExecutor.id, isolatedMatrix.meta.runRowId, isolatedMatrix.meta.evalRowId),
465
+ inferenceExecutor,
466
+ matrix: isolatedMatrix
467
+ });
468
+ }
469
+ }
470
+ return tasks;
436
471
  }
437
- function stringifyMatrixValue(value) {
438
- return String(value);
472
+ function applyAxisValues(axes, definition, mode) {
473
+ if (definition == null) return;
474
+ for (const [axis, values] of Object.entries(definition)) {
475
+ const nextValues = dedupeAxisValues(values);
476
+ if (mode === "extend") {
477
+ const existingValues = axes.get(axis) ?? [];
478
+ axes.set(axis, Array.from(/* @__PURE__ */ new Set([...existingValues, ...nextValues])));
479
+ continue;
480
+ }
481
+ axes.set(axis, nextValues);
482
+ }
483
+ }
484
+ function applyLayer(baseAxes, layer) {
485
+ const nextAxes = new Map(Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]));
486
+ for (const axis of layer?.disable ?? []) nextAxes.delete(axis);
487
+ applyAxisValues(nextAxes, layer?.extend, "extend");
488
+ applyAxisValues(nextAxes, layer?.override, "override");
489
+ return nextAxes;
490
+ }
491
+ function assertNonAmbiguousMatrixDefinition(matrix) {
492
+ const matrixKeys = Object.keys(matrix);
493
+ const hasReservedKeys = matrixKeys.some((key) => matrixLayerKeys.has(key));
494
+ const hasAxisKeys = matrixKeys.some((key) => !matrixLayerKeys.has(key));
495
+ if (hasReservedKeys && hasAxisKeys) throw new TypeError(ambiguousMatrixDefinitionErrorMessage);
439
496
  }
440
497
  function cloneMatrixSelection(matrix) {
441
498
  return { ...matrix };
442
499
  }
500
+ function createResolvedEvalAxes(entry, evalMatrix) {
501
+ let resolvedAxes = /* @__PURE__ */ new Map();
502
+ for (const layerInput of [
503
+ evalMatrix,
504
+ entry.matrix?.evalMatrix,
505
+ entry.task?.matrix?.evalMatrix
506
+ ]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
507
+ return resolvedAxes;
508
+ }
509
+ function createResolvedRunAxes(entry, runMatrix) {
510
+ let resolvedAxes = /* @__PURE__ */ new Map();
511
+ for (const layerInput of [
512
+ runMatrix,
513
+ entry.matrix?.runMatrix,
514
+ entry.task?.matrix?.runMatrix
515
+ ]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
516
+ return resolvedAxes;
517
+ }
443
518
  function createScheduledTaskMatrix(runMatrix, evalMatrix) {
444
519
  return {
445
520
  eval: cloneMatrixSelection(evalMatrix),
@@ -450,43 +525,24 @@ function createScheduledTaskMatrix(runMatrix, evalMatrix) {
450
525
  run: cloneMatrixSelection(runMatrix)
451
526
  };
452
527
  }
453
- function isMatrixLayer(matrix) {
454
- const matrixKeys = Object.keys(matrix);
455
- return matrixKeys.length > 0 && matrixKeys.every((key) => matrixLayerKeys.has(key));
456
- }
457
- function assertNonAmbiguousMatrixDefinition(matrix) {
458
- const matrixKeys = Object.keys(matrix);
459
- const hasReservedKeys = matrixKeys.some((key) => matrixLayerKeys.has(key));
460
- const hasAxisKeys = matrixKeys.some((key) => !matrixLayerKeys.has(key));
461
- if (hasReservedKeys && hasAxisKeys) throw new TypeError(ambiguousMatrixDefinitionErrorMessage);
528
+ function createStableRowId(matrix) {
529
+ const segments = Object.entries(matrix).sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis)).map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`);
530
+ if (segments.length === 0) return "default";
531
+ return segments.join("&");
462
532
  }
463
- function normalizeLayerInputToAxes(matrix) {
464
- if (matrix == null) return;
465
- assertNonAmbiguousMatrixDefinition(matrix);
466
- if (isMatrixLayer(matrix)) return matrix;
467
- return { extend: matrix };
533
+ function createTaskId(entryId, inferenceExecutorId, runRowId, evalRowId) {
534
+ return [
535
+ encodeTaskIdSegment(entryId),
536
+ encodeTaskIdSegment(inferenceExecutorId),
537
+ `run=${encodeTaskIdSegment(runRowId)}`,
538
+ `eval=${encodeTaskIdSegment(evalRowId)}`
539
+ ].join("::");
468
540
  }
469
541
  function dedupeAxisValues(values) {
470
542
  return Array.from(new Set(values.map(stringifyMatrixValue)));
471
543
  }
472
- function applyAxisValues(axes, definition, mode) {
473
- if (definition == null) return;
474
- for (const [axis, values] of Object.entries(definition)) {
475
- const nextValues = dedupeAxisValues(values);
476
- if (mode === "extend") {
477
- const existingValues = axes.get(axis) ?? [];
478
- axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])));
479
- continue;
480
- }
481
- axes.set(axis, nextValues);
482
- }
483
- }
484
- function applyLayer(baseAxes, layer) {
485
- const nextAxes = new Map(Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]));
486
- for (const axis of layer?.disable ?? []) nextAxes.delete(axis);
487
- applyAxisValues(nextAxes, layer?.extend, "extend");
488
- applyAxisValues(nextAxes, layer?.override, "override");
489
- return nextAxes;
544
+ function encodeTaskIdSegment(value) {
545
+ return encodeURIComponent(value);
490
546
  }
491
547
  function expandAxesToRows(axes) {
492
548
  if (axes.size === 0) return [{}];
@@ -503,85 +559,21 @@ function expandAxesToRows(axes) {
503
559
  }
504
560
  return selections;
505
561
  }
506
- function createStableRowId(matrix) {
507
- const segments = Object.entries(matrix).sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis)).map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`);
508
- if (segments.length === 0) return "default";
509
- return segments.join("&");
510
- }
511
- function createTaskId(entryId, inferenceExecutorId, runRowId, evalRowId) {
512
- return [
513
- encodeTaskIdSegment(entryId),
514
- encodeTaskIdSegment(inferenceExecutorId),
515
- `run=${encodeTaskIdSegment(runRowId)}`,
516
- `eval=${encodeTaskIdSegment(evalRowId)}`
517
- ].join("::");
518
- }
519
- function createResolvedRunAxes(entry, runMatrix) {
520
- let resolvedAxes = /* @__PURE__ */ new Map();
521
- for (const layerInput of [
522
- runMatrix,
523
- entry.matrix?.runMatrix,
524
- entry.task?.matrix?.runMatrix
525
- ]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
526
- return resolvedAxes;
562
+ function isMatrixLayer(matrix) {
563
+ const matrixKeys = Object.keys(matrix);
564
+ return matrixKeys.length > 0 && matrixKeys.every((key) => matrixLayerKeys.has(key));
527
565
  }
528
- function createResolvedEvalAxes(entry, evalMatrix) {
529
- let resolvedAxes = /* @__PURE__ */ new Map();
530
- for (const layerInput of [
531
- evalMatrix,
532
- entry.matrix?.evalMatrix,
533
- entry.task?.matrix?.evalMatrix
534
- ]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
535
- return resolvedAxes;
566
+ function normalizeLayerInputToAxes(matrix) {
567
+ if (matrix == null) return;
568
+ assertNonAmbiguousMatrixDefinition(matrix);
569
+ if (isMatrixLayer(matrix)) return matrix;
570
+ return { extend: matrix };
536
571
  }
537
- /**
538
- * Expands collected entries into a stable runner schedule.
539
- *
540
- * Call stack:
541
- *
542
- * {@link collectEvalEntries} (`../runner`)
543
- * -> {@link createRunnerSchedule}
544
- * -> {@link expandAxesToRows}
545
- * -> {@link ScheduledTask}[]
546
- *
547
- * Use when:
548
- * - the runner already knows which eval entries are available
549
- * - each entry must run against multiple inferenceExecutors or matrix variants
550
- *
551
- * Expects:
552
- * - `entries` and `inferenceExecutors` to be provided in the desired execution order
553
- * - matrix axes to use insertion order when generating combinations
554
- */
555
- function createRunnerSchedule(options) {
556
- if (options.entries.length === 0) return [];
557
- if (options.inferenceExecutors.length === 0) return [];
558
- const tasks = [];
559
- for (const entry of options.entries) {
560
- const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix));
561
- const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix));
562
- if (runSelections.length === 0 || evalSelections.length === 0) continue;
563
- for (const inferenceExecutor of options.inferenceExecutors) for (const runMatrix of runSelections) for (const evalMatrix of evalSelections) {
564
- const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix);
565
- tasks.push({
566
- entry,
567
- id: createTaskId(entry.id, inferenceExecutor.id, isolatedMatrix.meta.runRowId, isolatedMatrix.meta.evalRowId),
568
- matrix: isolatedMatrix,
569
- inferenceExecutor
570
- });
571
- }
572
- }
573
- return tasks;
572
+ function stringifyMatrixValue(value) {
573
+ return String(value);
574
574
  }
575
575
  //#endregion
576
576
  //#region src/core/runner/task-context.ts
577
- function createNoopTaskCacheRuntime() {
578
- return { namespace(name) {
579
- return { file(options) {
580
- const key = options.key.join("/");
581
- throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
582
- } };
583
- } };
584
- }
585
577
  /**
586
578
  * Creates task-scoped context data for runner execution.
587
579
  *
@@ -597,6 +589,14 @@ function createTaskExecutionContext(options) {
597
589
  models: options.models
598
590
  };
599
591
  }
592
+ function createNoopTaskCacheRuntime() {
593
+ return { namespace(name) {
594
+ return { file(options) {
595
+ const key = options.key.join("/");
596
+ throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
597
+ } };
598
+ } };
599
+ }
600
600
  //#endregion
601
601
  export { RunnerExecutionError, aggregateRunResults, asProjectRelativePath, collectEvalEntries, createFilesystemTaskCacheRuntime, createRunnerRuntimeContext, createRunnerSchedule, createSchedulerRuntime, createTaskExecutionContext, getActiveScopes, normalizeCacheFilePathSegments, runScheduledTasks };
602
602