vieval 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -31
- package/dist/bin/vieval.mjs +1 -1
- package/dist/bin/vieval.mjs.map +1 -1
- package/dist/cli/index.d.mts +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-DTDgaqeI.mjs → cli-uzS81IPd.mjs} +1483 -1483
- package/dist/cli-uzS81IPd.mjs.map +1 -0
- package/dist/config.d.mts +1 -1
- package/dist/config.mjs +1 -1
- package/dist/config.mjs.map +1 -1
- package/dist/core/assertions/index.d.mts +156 -156
- package/dist/core/assertions/index.mjs +82 -82
- package/dist/core/assertions/index.mjs.map +1 -1
- package/dist/core/inference-executors/index.d.mts +37 -37
- package/dist/core/inference-executors/index.mjs +54 -53
- package/dist/core/inference-executors/index.mjs.map +1 -1
- package/dist/core/processors/results/index.d.mts +18 -18
- package/dist/core/processors/results/index.mjs.map +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +259 -259
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/core/scheduler/index.d.mts +1 -1
- package/dist/core/scheduler/index.mjs +65 -65
- package/dist/core/scheduler/index.mjs.map +1 -1
- package/dist/{env-DfWZy_n4.d.mts → env-Br6jaWGL.d.mts} +9 -9
- package/dist/{env-nV5rVErX.mjs → env-egxaJtNn.mjs} +8 -8
- package/dist/env-egxaJtNn.mjs.map +1 -0
- package/dist/{expect-extensions-DCSqlneN.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
- package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
- package/dist/expect.d.mts +1 -3
- package/dist/expect.mjs +1 -1
- package/dist/expect.mjs.map +1 -1
- package/dist/{index-D_aMeWqO.d.mts → index-BLIlhiWT.d.mts} +565 -565
- package/dist/{index-Bg0atWBF.d.mts → index-CIaJClcC.d.mts} +48 -48
- package/dist/index.d.mts +208 -197
- package/dist/index.mjs +148 -148
- package/dist/index.mjs.map +1 -1
- package/dist/{models-pBSRUZhY.mjs → models-CaCOUPZw.mjs} +1 -1
- package/dist/{models-pBSRUZhY.mjs.map → models-CaCOUPZw.mjs.map} +1 -1
- package/dist/plugins/chat-models/index.d.mts +279 -279
- package/dist/plugins/chat-models/index.mjs +360 -360
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/{queue-DsZQkZO_.mjs → queue-BL86z2W_.mjs} +1 -1
- package/dist/{queue-DsZQkZO_.mjs.map → queue-BL86z2W_.mjs.map} +1 -1
- package/dist/{registry-DMnwE_mY.mjs → registry-BK7k6X81.mjs} +294 -294
- package/dist/registry-BK7k6X81.mjs.map +1 -0
- package/dist/testing/expect-extensions.d.mts +27 -27
- package/dist/testing/expect-extensions.mjs +1 -1
- package/package.json +12 -12
- package/dist/cli-DTDgaqeI.mjs.map +0 -1
- package/dist/env-nV5rVErX.mjs.map +0 -1
- package/dist/expect-extensions-DCSqlneN.mjs.map +0 -1
- package/dist/registry-DMnwE_mY.mjs.map +0 -1
|
@@ -9,17 +9,28 @@ import { createReadStream, createWriteStream } from "node:fs";
|
|
|
9
9
|
import { Buffer } from "node:buffer";
|
|
10
10
|
import { limitConcurrency } from "@vitest/runner/utils";
|
|
11
11
|
//#region src/core/cache/filesystem.ts
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
12
|
+
/**
|
|
13
|
+
* Creates a deterministic filesystem-backed task cache runtime.
|
|
14
|
+
*
|
|
15
|
+
* Use when:
|
|
16
|
+
* - eval tasks need reproducible cache paths for expensive pre-processing outputs
|
|
17
|
+
* - benchmark adapters need one artifact-oriented API for text/json/binary reads and writes
|
|
18
|
+
*
|
|
19
|
+
* Expects:
|
|
20
|
+
* - `cacheRootDirectory` to be writable by the running process
|
|
21
|
+
* - `workspaceId` + `projectName` to stay stable for reproducible paths
|
|
22
|
+
*
|
|
23
|
+
* Returns:
|
|
24
|
+
* - task cache runtime that resolves namespaced file handles under:
|
|
25
|
+
* `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`
|
|
26
|
+
*/
|
|
27
|
+
function createFilesystemTaskCacheRuntime(options) {
|
|
28
|
+
const workspaceDirectory = sanitizePathSegment(options.workspaceId);
|
|
29
|
+
const projectDirectory = sanitizePathSegment(options.projectName);
|
|
30
|
+
const baseDirectory = join(options.cacheRootDirectory, workspaceDirectory, projectDirectory);
|
|
31
|
+
return { namespace(name) {
|
|
32
|
+
return createCacheNamespace(baseDirectory, name);
|
|
33
|
+
} };
|
|
23
34
|
}
|
|
24
35
|
/**
|
|
25
36
|
* Normalizes cache file options into deterministic relative path segments.
|
|
@@ -39,16 +50,8 @@ function normalizeCacheFilePathSegments(options) {
|
|
|
39
50
|
const tail = sanitizedKey[sanitizedKey.length - 1] ?? "artifact";
|
|
40
51
|
return [...withoutTail, `${tail}.${extension}`];
|
|
41
52
|
}
|
|
42
|
-
async function writeAtomically(path, content) {
|
|
43
|
-
const directory = dirname(path);
|
|
44
|
-
const temporaryPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
|
|
45
|
-
await mkdir(directory, { recursive: true });
|
|
46
|
-
await writeFile(temporaryPath, content);
|
|
47
|
-
await rename(temporaryPath, path);
|
|
48
|
-
}
|
|
49
53
|
function createCacheFileHandle(path) {
|
|
50
54
|
return {
|
|
51
|
-
path,
|
|
52
55
|
async exists() {
|
|
53
56
|
try {
|
|
54
57
|
await access(path);
|
|
@@ -57,6 +60,12 @@ function createCacheFileHandle(path) {
|
|
|
57
60
|
return false;
|
|
58
61
|
}
|
|
59
62
|
},
|
|
63
|
+
async loadAsCasesInput() {
|
|
64
|
+
return await this.readJson();
|
|
65
|
+
},
|
|
66
|
+
async loadAsExpectFixture() {
|
|
67
|
+
return await this.readJson();
|
|
68
|
+
},
|
|
60
69
|
openReadStream() {
|
|
61
70
|
return createReadStream(path);
|
|
62
71
|
},
|
|
@@ -64,29 +73,24 @@ function createCacheFileHandle(path) {
|
|
|
64
73
|
await mkdir(dirname(path), { recursive: true });
|
|
65
74
|
return createWriteStream(path);
|
|
66
75
|
},
|
|
76
|
+
path,
|
|
67
77
|
async readBuffer() {
|
|
68
78
|
return await readFile(path);
|
|
69
79
|
},
|
|
70
|
-
async
|
|
71
|
-
await
|
|
80
|
+
async readJson() {
|
|
81
|
+
return JSON.parse(await readFile(path, "utf-8"));
|
|
72
82
|
},
|
|
73
83
|
async readText(encoding = "utf-8") {
|
|
74
84
|
return await readFile(path, encoding);
|
|
75
85
|
},
|
|
76
|
-
async
|
|
77
|
-
await writeAtomically(path,
|
|
78
|
-
},
|
|
79
|
-
async readJson() {
|
|
80
|
-
return JSON.parse(await readFile(path, "utf-8"));
|
|
86
|
+
async writeBuffer(value) {
|
|
87
|
+
await writeAtomically(path, value);
|
|
81
88
|
},
|
|
82
89
|
async writeJson(value) {
|
|
83
90
|
await writeAtomically(path, `${JSON.stringify(value, null, 2)}\n`);
|
|
84
91
|
},
|
|
85
|
-
async
|
|
86
|
-
|
|
87
|
-
},
|
|
88
|
-
async loadAsExpectFixture() {
|
|
89
|
-
return await this.readJson();
|
|
92
|
+
async writeText(value, encoding = "utf-8") {
|
|
93
|
+
await writeAtomically(path, Buffer.from(value, encoding));
|
|
90
94
|
}
|
|
91
95
|
};
|
|
92
96
|
}
|
|
@@ -96,36 +100,61 @@ function createCacheNamespace(baseDirectory, namespace) {
|
|
|
96
100
|
return createCacheFileHandle(join(baseDirectory, sanitizePathSegment(namespace), ...relativePathSegments));
|
|
97
101
|
} };
|
|
98
102
|
}
|
|
103
|
+
function normalizeExtension(extension, mediaType) {
|
|
104
|
+
if (extension != null && extension.length > 0) return extension.startsWith(".") ? extension.slice(1) : extension;
|
|
105
|
+
if (mediaType == null || mediaType.length === 0) return;
|
|
106
|
+
if (mediaType === "application/json") return "json";
|
|
107
|
+
if (mediaType === "text/plain") return "txt";
|
|
108
|
+
if (mediaType === "audio/wav") return "wav";
|
|
109
|
+
}
|
|
110
|
+
function sanitizePathSegment(value) {
|
|
111
|
+
const normalized = value.trim();
|
|
112
|
+
if (normalized.length === 0) return "default";
|
|
113
|
+
return normalized.replace(/[^\w.-]+/g, "-");
|
|
114
|
+
}
|
|
115
|
+
async function writeAtomically(path, content) {
|
|
116
|
+
const directory = dirname(path);
|
|
117
|
+
const temporaryPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
|
|
118
|
+
await mkdir(directory, { recursive: true });
|
|
119
|
+
await writeFile(temporaryPath, content);
|
|
120
|
+
await rename(temporaryPath, path);
|
|
121
|
+
}
|
|
122
|
+
//#endregion
|
|
123
|
+
//#region src/core/runner/aggregate.ts
|
|
99
124
|
/**
|
|
100
|
-
*
|
|
125
|
+
* Aggregates exact-match and judge-based scores into hybrid runner summaries.
|
|
126
|
+
*
|
|
127
|
+
* Call stack:
|
|
128
|
+
*
|
|
129
|
+
* {@link runScheduledTasks}
|
|
130
|
+
* -> {@link aggregateRunResults}
|
|
131
|
+
* -> {@link createRunSummary}
|
|
132
|
+
* -> {@link createProviderSummary}
|
|
133
|
+
* -> `report output`
|
|
101
134
|
*
|
|
102
135
|
* Use when:
|
|
103
|
-
* -
|
|
104
|
-
* -
|
|
136
|
+
* - a runner batch mixes deterministic exact checks with judge-based grading
|
|
137
|
+
* - inferenceExecutor comparison should preserve both score families and one hybrid view
|
|
105
138
|
*
|
|
106
139
|
* Expects:
|
|
107
|
-
* -
|
|
108
|
-
* - `
|
|
109
|
-
*
|
|
110
|
-
* Returns:
|
|
111
|
-
* - task cache runtime that resolves namespaced file handles under:
|
|
112
|
-
* `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`
|
|
140
|
+
* - each score to be normalized to the `0..1` range before aggregation
|
|
141
|
+
* - `scores.kind` to use only `'exact'` or `'judge'`
|
|
113
142
|
*/
|
|
114
|
-
function
|
|
115
|
-
const
|
|
116
|
-
const
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
} };
|
|
121
|
-
}
|
|
122
|
-
//#endregion
|
|
123
|
-
//#region src/core/runner/aggregate.ts
|
|
124
|
-
function cloneScheduledTaskMatrix(matrix) {
|
|
143
|
+
function aggregateRunResults(results) {
|
|
144
|
+
const runs = results.map(createRunSummary);
|
|
145
|
+
const inferenceExecutors = Array.from(new Set(results.map((result) => result.inferenceExecutorId))).map((inferenceExecutorId) => {
|
|
146
|
+
return createProviderSummary(inferenceExecutorId, results.filter((result) => result.inferenceExecutorId === inferenceExecutorId));
|
|
147
|
+
}).sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId));
|
|
148
|
+
const overall = createProviderSummary("overall", results);
|
|
125
149
|
return {
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
150
|
+
inferenceExecutors,
|
|
151
|
+
overall: {
|
|
152
|
+
exactAverage: overall.exactAverage,
|
|
153
|
+
hybridAverage: overall.hybridAverage,
|
|
154
|
+
judgeAverage: overall.judgeAverage,
|
|
155
|
+
runCount: overall.runCount
|
|
156
|
+
},
|
|
157
|
+
runs
|
|
129
158
|
};
|
|
130
159
|
}
|
|
131
160
|
function assertKnownScoreKind(kind) {
|
|
@@ -136,11 +165,12 @@ function average(scores) {
|
|
|
136
165
|
if (scores.length === 0) return null;
|
|
137
166
|
return scores.reduce((sum, score) => sum + score, 0) / scores.length;
|
|
138
167
|
}
|
|
139
|
-
function
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
168
|
+
function cloneScheduledTaskMatrix(matrix) {
|
|
169
|
+
return {
|
|
170
|
+
eval: { ...matrix.eval },
|
|
171
|
+
meta: { ...matrix.meta },
|
|
172
|
+
run: { ...matrix.run }
|
|
173
|
+
};
|
|
144
174
|
}
|
|
145
175
|
function collectScoreBuckets(scores) {
|
|
146
176
|
const buckets = {
|
|
@@ -156,19 +186,11 @@ function collectScoreBuckets(scores) {
|
|
|
156
186
|
}
|
|
157
187
|
return buckets;
|
|
158
188
|
}
|
|
159
|
-
function
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
return
|
|
164
|
-
entryId: result.entryId,
|
|
165
|
-
exactAverage,
|
|
166
|
-
hybridAverage: createHybridAverage(exactAverage, judgeAverage),
|
|
167
|
-
id: result.id,
|
|
168
|
-
judgeAverage,
|
|
169
|
-
matrix: cloneScheduledTaskMatrix(result.matrix),
|
|
170
|
-
inferenceExecutorId: result.inferenceExecutorId
|
|
171
|
-
};
|
|
189
|
+
function createHybridAverage(exactAverage, judgeAverage) {
|
|
190
|
+
if (exactAverage != null && judgeAverage != null) return (exactAverage + judgeAverage) / 2;
|
|
191
|
+
if (exactAverage != null) return exactAverage;
|
|
192
|
+
if (judgeAverage != null) return judgeAverage;
|
|
193
|
+
return null;
|
|
172
194
|
}
|
|
173
195
|
function createProviderSummary(inferenceExecutorId, results) {
|
|
174
196
|
const exactScores = [];
|
|
@@ -183,54 +205,29 @@ function createProviderSummary(inferenceExecutorId, results) {
|
|
|
183
205
|
return {
|
|
184
206
|
exactAverage,
|
|
185
207
|
hybridAverage: createHybridAverage(exactAverage, judgeAverage),
|
|
186
|
-
judgeAverage,
|
|
187
208
|
inferenceExecutorId,
|
|
209
|
+
judgeAverage,
|
|
188
210
|
runCount: results.length
|
|
189
211
|
};
|
|
190
212
|
}
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
*
|
|
196
|
-
* {@link runScheduledTasks}
|
|
197
|
-
* -> {@link aggregateRunResults}
|
|
198
|
-
* -> {@link createRunSummary}
|
|
199
|
-
* -> {@link createProviderSummary}
|
|
200
|
-
* -> `report output`
|
|
201
|
-
*
|
|
202
|
-
* Use when:
|
|
203
|
-
* - a runner batch mixes deterministic exact checks with judge-based grading
|
|
204
|
-
* - inferenceExecutor comparison should preserve both score families and one hybrid view
|
|
205
|
-
*
|
|
206
|
-
* Expects:
|
|
207
|
-
* - each score to be normalized to the `0..1` range before aggregation
|
|
208
|
-
* - `scores.kind` to use only `'exact'` or `'judge'`
|
|
209
|
-
*/
|
|
210
|
-
function aggregateRunResults(results) {
|
|
211
|
-
const runs = results.map(createRunSummary);
|
|
212
|
-
const inferenceExecutors = Array.from(new Set(results.map((result) => result.inferenceExecutorId))).map((inferenceExecutorId) => {
|
|
213
|
-
return createProviderSummary(inferenceExecutorId, results.filter((result) => result.inferenceExecutorId === inferenceExecutorId));
|
|
214
|
-
}).sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId));
|
|
215
|
-
const overall = createProviderSummary("overall", results);
|
|
213
|
+
function createRunSummary(result) {
|
|
214
|
+
const buckets = collectScoreBuckets(result.scores);
|
|
215
|
+
const exactAverage = average(buckets.exact);
|
|
216
|
+
const judgeAverage = average(buckets.judge);
|
|
216
217
|
return {
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
runs
|
|
218
|
+
entryId: result.entryId,
|
|
219
|
+
exactAverage,
|
|
220
|
+
hybridAverage: createHybridAverage(exactAverage, judgeAverage),
|
|
221
|
+
id: result.id,
|
|
222
|
+
inferenceExecutorId: result.inferenceExecutorId,
|
|
223
|
+
judgeAverage,
|
|
224
|
+
matrix: cloneScheduledTaskMatrix(result.matrix)
|
|
225
225
|
};
|
|
226
226
|
}
|
|
227
227
|
//#endregion
|
|
228
228
|
//#region src/core/runner/collect.ts
|
|
229
229
|
const evalFileSuffix = ".eval.ts";
|
|
230
230
|
const absolutePathPattern = /^(?:[A-Z]:\/|\/|\\\\)/i;
|
|
231
|
-
function normalizePath(value) {
|
|
232
|
-
return value.replaceAll("\\", "/");
|
|
233
|
-
}
|
|
234
231
|
/**
|
|
235
232
|
* Converts a file path into a project-relative path when possible.
|
|
236
233
|
*
|
|
@@ -255,31 +252,6 @@ function asProjectRelativePath(filePath, context) {
|
|
|
255
252
|
}
|
|
256
253
|
return normalizePath(filePath);
|
|
257
254
|
}
|
|
258
|
-
function resolveModuleFilePath(moduleHref) {
|
|
259
|
-
if (!moduleHref.startsWith("file:")) return null;
|
|
260
|
-
try {
|
|
261
|
-
return fileURLToPath(moduleHref);
|
|
262
|
-
} catch {
|
|
263
|
-
return null;
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
function createCollectedEvalEntry(moduleHref, moduleDefinition, context) {
|
|
267
|
-
const filePath = resolveModuleFilePath(moduleHref);
|
|
268
|
-
if (!filePath) return null;
|
|
269
|
-
const relativeFilePath = asProjectRelativePath(filePath, context);
|
|
270
|
-
if (!relativeFilePath.endsWith(evalFileSuffix)) return null;
|
|
271
|
-
const entryName = basename(relativeFilePath, evalFileSuffix);
|
|
272
|
-
if (entryName.length === 0) return null;
|
|
273
|
-
const relativeDirectory = dirname(relativeFilePath);
|
|
274
|
-
const directory = relativeDirectory === "." ? "" : relativeDirectory;
|
|
275
|
-
return {
|
|
276
|
-
...moduleDefinition.default,
|
|
277
|
-
directory,
|
|
278
|
-
filePath,
|
|
279
|
-
id: directory.length === 0 ? entryName : `${directory}/${entryName}`,
|
|
280
|
-
name: entryName
|
|
281
|
-
};
|
|
282
|
-
}
|
|
283
255
|
/**
|
|
284
256
|
* Collects loaded vieval modules into sorted runner entries with stable ids.
|
|
285
257
|
*
|
|
@@ -301,19 +273,36 @@ function collectEvalEntries(modules, context) {
|
|
|
301
273
|
return [entry];
|
|
302
274
|
}).sort((left, right) => left.id.localeCompare(right.id));
|
|
303
275
|
}
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
276
|
+
function createCollectedEvalEntry(moduleHref, moduleDefinition, context) {
|
|
277
|
+
const filePath = resolveModuleFilePath(moduleHref);
|
|
278
|
+
if (!filePath) return null;
|
|
279
|
+
const relativeFilePath = asProjectRelativePath(filePath, context);
|
|
280
|
+
if (!relativeFilePath.endsWith(evalFileSuffix)) return null;
|
|
281
|
+
const entryName = basename(relativeFilePath, evalFileSuffix);
|
|
282
|
+
if (entryName.length === 0) return null;
|
|
283
|
+
const relativeDirectory = dirname(relativeFilePath);
|
|
284
|
+
const directory = relativeDirectory === "." ? "" : relativeDirectory;
|
|
307
285
|
return {
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
} },
|
|
314
|
-
models: []
|
|
286
|
+
...moduleDefinition.default,
|
|
287
|
+
directory,
|
|
288
|
+
filePath,
|
|
289
|
+
id: directory.length === 0 ? entryName : `${directory}/${entryName}`,
|
|
290
|
+
name: entryName
|
|
315
291
|
};
|
|
316
292
|
}
|
|
293
|
+
function normalizePath(value) {
|
|
294
|
+
return value.replaceAll("\\", "/");
|
|
295
|
+
}
|
|
296
|
+
function resolveModuleFilePath(moduleHref) {
|
|
297
|
+
if (!moduleHref.startsWith("file:")) return null;
|
|
298
|
+
try {
|
|
299
|
+
return fileURLToPath(moduleHref);
|
|
300
|
+
} catch {
|
|
301
|
+
return null;
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
//#endregion
|
|
305
|
+
//#region src/core/runner/run.ts
|
|
317
306
|
/**
|
|
318
307
|
* Error thrown when a scheduled run fails before producing a normalized result.
|
|
319
308
|
*/
|
|
@@ -330,10 +319,6 @@ var RunnerExecutionError = class extends Error {
|
|
|
330
319
|
this.cause = cause;
|
|
331
320
|
}
|
|
332
321
|
};
|
|
333
|
-
function createRunnerExecutionError(taskId, cause) {
|
|
334
|
-
if (cause instanceof RunnerExecutionError && cause.taskId === taskId) return cause;
|
|
335
|
-
return new RunnerExecutionError(taskId, cause);
|
|
336
|
-
}
|
|
337
322
|
/**
|
|
338
323
|
* Executes runner tasks sequentially and aggregates the normalized results.
|
|
339
324
|
*
|
|
@@ -400,6 +385,21 @@ async function runScheduledTasks(tasks, executor, options = {}) {
|
|
|
400
385
|
};
|
|
401
386
|
}))).sort((left, right) => left.index - right.index).map((item) => item.result));
|
|
402
387
|
}
|
|
388
|
+
function createDefaultExecutionContext() {
|
|
389
|
+
return {
|
|
390
|
+
cache: { namespace(name) {
|
|
391
|
+
return { file(options) {
|
|
392
|
+
const key = options.key.join("/");
|
|
393
|
+
throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
|
|
394
|
+
} };
|
|
395
|
+
} },
|
|
396
|
+
models: []
|
|
397
|
+
};
|
|
398
|
+
}
|
|
399
|
+
function createRunnerExecutionError(taskId, cause) {
|
|
400
|
+
if (cause instanceof RunnerExecutionError && cause.taskId === taskId) return cause;
|
|
401
|
+
return new RunnerExecutionError(taskId, cause);
|
|
402
|
+
}
|
|
403
403
|
//#endregion
|
|
404
404
|
//#region src/core/runner/runtime-context.ts
|
|
405
405
|
const require = createRequire(import.meta.url);
|
|
@@ -425,21 +425,96 @@ async function createRunnerRuntimeContext(options = {}) {
|
|
|
425
425
|
}
|
|
426
426
|
//#endregion
|
|
427
427
|
//#region src/core/runner/schedule.ts
|
|
428
|
-
const matrixLayerKeys = new Set([
|
|
428
|
+
const matrixLayerKeys = /* @__PURE__ */ new Set([
|
|
429
429
|
"disable",
|
|
430
430
|
"extend",
|
|
431
431
|
"override"
|
|
432
432
|
]);
|
|
433
433
|
const ambiguousMatrixDefinitionErrorMessage = "Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.";
|
|
434
|
-
|
|
435
|
-
|
|
434
|
+
/**
|
|
435
|
+
* Expands collected entries into a stable runner schedule.
|
|
436
|
+
*
|
|
437
|
+
* Call stack:
|
|
438
|
+
*
|
|
439
|
+
* {@link collectEvalEntries} (`../runner`)
|
|
440
|
+
* -> {@link createRunnerSchedule}
|
|
441
|
+
* -> {@link expandAxesToRows}
|
|
442
|
+
* -> {@link ScheduledTask}[]
|
|
443
|
+
*
|
|
444
|
+
* Use when:
|
|
445
|
+
* - the runner already knows which eval entries are available
|
|
446
|
+
* - each entry must run against multiple inferenceExecutors or matrix variants
|
|
447
|
+
*
|
|
448
|
+
* Expects:
|
|
449
|
+
* - `entries` and `inferenceExecutors` to be provided in the desired execution order
|
|
450
|
+
* - matrix axes to use insertion order when generating combinations
|
|
451
|
+
*/
|
|
452
|
+
function createRunnerSchedule(options) {
|
|
453
|
+
if (options.entries.length === 0) return [];
|
|
454
|
+
if (options.inferenceExecutors.length === 0) return [];
|
|
455
|
+
const tasks = [];
|
|
456
|
+
for (const entry of options.entries) {
|
|
457
|
+
const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix));
|
|
458
|
+
const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix));
|
|
459
|
+
if (runSelections.length === 0 || evalSelections.length === 0) continue;
|
|
460
|
+
for (const inferenceExecutor of options.inferenceExecutors) for (const runMatrix of runSelections) for (const evalMatrix of evalSelections) {
|
|
461
|
+
const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix);
|
|
462
|
+
tasks.push({
|
|
463
|
+
entry,
|
|
464
|
+
id: createTaskId(entry.id, inferenceExecutor.id, isolatedMatrix.meta.runRowId, isolatedMatrix.meta.evalRowId),
|
|
465
|
+
inferenceExecutor,
|
|
466
|
+
matrix: isolatedMatrix
|
|
467
|
+
});
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
return tasks;
|
|
436
471
|
}
|
|
437
|
-
function
|
|
438
|
-
|
|
472
|
+
function applyAxisValues(axes, definition, mode) {
|
|
473
|
+
if (definition == null) return;
|
|
474
|
+
for (const [axis, values] of Object.entries(definition)) {
|
|
475
|
+
const nextValues = dedupeAxisValues(values);
|
|
476
|
+
if (mode === "extend") {
|
|
477
|
+
const existingValues = axes.get(axis) ?? [];
|
|
478
|
+
axes.set(axis, Array.from(/* @__PURE__ */ new Set([...existingValues, ...nextValues])));
|
|
479
|
+
continue;
|
|
480
|
+
}
|
|
481
|
+
axes.set(axis, nextValues);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
function applyLayer(baseAxes, layer) {
|
|
485
|
+
const nextAxes = new Map(Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]));
|
|
486
|
+
for (const axis of layer?.disable ?? []) nextAxes.delete(axis);
|
|
487
|
+
applyAxisValues(nextAxes, layer?.extend, "extend");
|
|
488
|
+
applyAxisValues(nextAxes, layer?.override, "override");
|
|
489
|
+
return nextAxes;
|
|
490
|
+
}
|
|
491
|
+
function assertNonAmbiguousMatrixDefinition(matrix) {
|
|
492
|
+
const matrixKeys = Object.keys(matrix);
|
|
493
|
+
const hasReservedKeys = matrixKeys.some((key) => matrixLayerKeys.has(key));
|
|
494
|
+
const hasAxisKeys = matrixKeys.some((key) => !matrixLayerKeys.has(key));
|
|
495
|
+
if (hasReservedKeys && hasAxisKeys) throw new TypeError(ambiguousMatrixDefinitionErrorMessage);
|
|
439
496
|
}
|
|
440
497
|
function cloneMatrixSelection(matrix) {
|
|
441
498
|
return { ...matrix };
|
|
442
499
|
}
|
|
500
|
+
function createResolvedEvalAxes(entry, evalMatrix) {
|
|
501
|
+
let resolvedAxes = /* @__PURE__ */ new Map();
|
|
502
|
+
for (const layerInput of [
|
|
503
|
+
evalMatrix,
|
|
504
|
+
entry.matrix?.evalMatrix,
|
|
505
|
+
entry.task?.matrix?.evalMatrix
|
|
506
|
+
]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
|
|
507
|
+
return resolvedAxes;
|
|
508
|
+
}
|
|
509
|
+
function createResolvedRunAxes(entry, runMatrix) {
|
|
510
|
+
let resolvedAxes = /* @__PURE__ */ new Map();
|
|
511
|
+
for (const layerInput of [
|
|
512
|
+
runMatrix,
|
|
513
|
+
entry.matrix?.runMatrix,
|
|
514
|
+
entry.task?.matrix?.runMatrix
|
|
515
|
+
]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
|
|
516
|
+
return resolvedAxes;
|
|
517
|
+
}
|
|
443
518
|
function createScheduledTaskMatrix(runMatrix, evalMatrix) {
|
|
444
519
|
return {
|
|
445
520
|
eval: cloneMatrixSelection(evalMatrix),
|
|
@@ -450,43 +525,24 @@ function createScheduledTaskMatrix(runMatrix, evalMatrix) {
|
|
|
450
525
|
run: cloneMatrixSelection(runMatrix)
|
|
451
526
|
};
|
|
452
527
|
}
|
|
453
|
-
function
|
|
454
|
-
const
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
function assertNonAmbiguousMatrixDefinition(matrix) {
|
|
458
|
-
const matrixKeys = Object.keys(matrix);
|
|
459
|
-
const hasReservedKeys = matrixKeys.some((key) => matrixLayerKeys.has(key));
|
|
460
|
-
const hasAxisKeys = matrixKeys.some((key) => !matrixLayerKeys.has(key));
|
|
461
|
-
if (hasReservedKeys && hasAxisKeys) throw new TypeError(ambiguousMatrixDefinitionErrorMessage);
|
|
528
|
+
function createStableRowId(matrix) {
|
|
529
|
+
const segments = Object.entries(matrix).sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis)).map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`);
|
|
530
|
+
if (segments.length === 0) return "default";
|
|
531
|
+
return segments.join("&");
|
|
462
532
|
}
|
|
463
|
-
function
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
533
|
+
function createTaskId(entryId, inferenceExecutorId, runRowId, evalRowId) {
|
|
534
|
+
return [
|
|
535
|
+
encodeTaskIdSegment(entryId),
|
|
536
|
+
encodeTaskIdSegment(inferenceExecutorId),
|
|
537
|
+
`run=${encodeTaskIdSegment(runRowId)}`,
|
|
538
|
+
`eval=${encodeTaskIdSegment(evalRowId)}`
|
|
539
|
+
].join("::");
|
|
468
540
|
}
|
|
469
541
|
function dedupeAxisValues(values) {
|
|
470
542
|
return Array.from(new Set(values.map(stringifyMatrixValue)));
|
|
471
543
|
}
|
|
472
|
-
function
|
|
473
|
-
|
|
474
|
-
for (const [axis, values] of Object.entries(definition)) {
|
|
475
|
-
const nextValues = dedupeAxisValues(values);
|
|
476
|
-
if (mode === "extend") {
|
|
477
|
-
const existingValues = axes.get(axis) ?? [];
|
|
478
|
-
axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])));
|
|
479
|
-
continue;
|
|
480
|
-
}
|
|
481
|
-
axes.set(axis, nextValues);
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
function applyLayer(baseAxes, layer) {
|
|
485
|
-
const nextAxes = new Map(Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]));
|
|
486
|
-
for (const axis of layer?.disable ?? []) nextAxes.delete(axis);
|
|
487
|
-
applyAxisValues(nextAxes, layer?.extend, "extend");
|
|
488
|
-
applyAxisValues(nextAxes, layer?.override, "override");
|
|
489
|
-
return nextAxes;
|
|
544
|
+
function encodeTaskIdSegment(value) {
|
|
545
|
+
return encodeURIComponent(value);
|
|
490
546
|
}
|
|
491
547
|
function expandAxesToRows(axes) {
|
|
492
548
|
if (axes.size === 0) return [{}];
|
|
@@ -503,85 +559,21 @@ function expandAxesToRows(axes) {
|
|
|
503
559
|
}
|
|
504
560
|
return selections;
|
|
505
561
|
}
|
|
506
|
-
function
|
|
507
|
-
const
|
|
508
|
-
|
|
509
|
-
return segments.join("&");
|
|
510
|
-
}
|
|
511
|
-
function createTaskId(entryId, inferenceExecutorId, runRowId, evalRowId) {
|
|
512
|
-
return [
|
|
513
|
-
encodeTaskIdSegment(entryId),
|
|
514
|
-
encodeTaskIdSegment(inferenceExecutorId),
|
|
515
|
-
`run=${encodeTaskIdSegment(runRowId)}`,
|
|
516
|
-
`eval=${encodeTaskIdSegment(evalRowId)}`
|
|
517
|
-
].join("::");
|
|
518
|
-
}
|
|
519
|
-
function createResolvedRunAxes(entry, runMatrix) {
|
|
520
|
-
let resolvedAxes = /* @__PURE__ */ new Map();
|
|
521
|
-
for (const layerInput of [
|
|
522
|
-
runMatrix,
|
|
523
|
-
entry.matrix?.runMatrix,
|
|
524
|
-
entry.task?.matrix?.runMatrix
|
|
525
|
-
]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
|
|
526
|
-
return resolvedAxes;
|
|
562
|
+
function isMatrixLayer(matrix) {
|
|
563
|
+
const matrixKeys = Object.keys(matrix);
|
|
564
|
+
return matrixKeys.length > 0 && matrixKeys.every((key) => matrixLayerKeys.has(key));
|
|
527
565
|
}
|
|
528
|
-
function
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
entry.task?.matrix?.evalMatrix
|
|
534
|
-
]) resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput));
|
|
535
|
-
return resolvedAxes;
|
|
566
|
+
function normalizeLayerInputToAxes(matrix) {
|
|
567
|
+
if (matrix == null) return;
|
|
568
|
+
assertNonAmbiguousMatrixDefinition(matrix);
|
|
569
|
+
if (isMatrixLayer(matrix)) return matrix;
|
|
570
|
+
return { extend: matrix };
|
|
536
571
|
}
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
*
|
|
540
|
-
* Call stack:
|
|
541
|
-
*
|
|
542
|
-
* {@link collectEvalEntries} (`../runner`)
|
|
543
|
-
* -> {@link createRunnerSchedule}
|
|
544
|
-
* -> {@link expandAxesToRows}
|
|
545
|
-
* -> {@link ScheduledTask}[]
|
|
546
|
-
*
|
|
547
|
-
* Use when:
|
|
548
|
-
* - the runner already knows which eval entries are available
|
|
549
|
-
* - each entry must run against multiple inferenceExecutors or matrix variants
|
|
550
|
-
*
|
|
551
|
-
* Expects:
|
|
552
|
-
* - `entries` and `inferenceExecutors` to be provided in the desired execution order
|
|
553
|
-
* - matrix axes to use insertion order when generating combinations
|
|
554
|
-
*/
|
|
555
|
-
function createRunnerSchedule(options) {
|
|
556
|
-
if (options.entries.length === 0) return [];
|
|
557
|
-
if (options.inferenceExecutors.length === 0) return [];
|
|
558
|
-
const tasks = [];
|
|
559
|
-
for (const entry of options.entries) {
|
|
560
|
-
const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix));
|
|
561
|
-
const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix));
|
|
562
|
-
if (runSelections.length === 0 || evalSelections.length === 0) continue;
|
|
563
|
-
for (const inferenceExecutor of options.inferenceExecutors) for (const runMatrix of runSelections) for (const evalMatrix of evalSelections) {
|
|
564
|
-
const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix);
|
|
565
|
-
tasks.push({
|
|
566
|
-
entry,
|
|
567
|
-
id: createTaskId(entry.id, inferenceExecutor.id, isolatedMatrix.meta.runRowId, isolatedMatrix.meta.evalRowId),
|
|
568
|
-
matrix: isolatedMatrix,
|
|
569
|
-
inferenceExecutor
|
|
570
|
-
});
|
|
571
|
-
}
|
|
572
|
-
}
|
|
573
|
-
return tasks;
|
|
572
|
+
function stringifyMatrixValue(value) {
|
|
573
|
+
return String(value);
|
|
574
574
|
}
|
|
575
575
|
//#endregion
|
|
576
576
|
//#region src/core/runner/task-context.ts
|
|
577
|
-
function createNoopTaskCacheRuntime() {
|
|
578
|
-
return { namespace(name) {
|
|
579
|
-
return { file(options) {
|
|
580
|
-
const key = options.key.join("/");
|
|
581
|
-
throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
|
|
582
|
-
} };
|
|
583
|
-
} };
|
|
584
|
-
}
|
|
585
577
|
/**
|
|
586
578
|
* Creates task-scoped context data for runner execution.
|
|
587
579
|
*
|
|
@@ -597,6 +589,14 @@ function createTaskExecutionContext(options) {
|
|
|
597
589
|
models: options.models
|
|
598
590
|
};
|
|
599
591
|
}
|
|
592
|
+
function createNoopTaskCacheRuntime() {
|
|
593
|
+
return { namespace(name) {
|
|
594
|
+
return { file(options) {
|
|
595
|
+
const key = options.key.join("/");
|
|
596
|
+
throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
|
|
597
|
+
} };
|
|
598
|
+
} };
|
|
599
|
+
}
|
|
600
600
|
//#endregion
|
|
601
601
|
export { RunnerExecutionError, aggregateRunResults, asProjectRelativePath, collectEvalEntries, createFilesystemTaskCacheRuntime, createRunnerRuntimeContext, createRunnerSchedule, createSchedulerRuntime, createTaskExecutionContext, getActiveScopes, normalizeCacheFilePathSegments, runScheduledTasks };
|
|
602
602
|
|