vieval 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -31
- package/dist/bin/vieval.mjs +1 -1
- package/dist/cli/index.d.mts +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-CHFCF8UR.mjs → cli-uzS81IPd.mjs} +1529 -1529
- package/dist/cli-uzS81IPd.mjs.map +1 -0
- package/dist/config.d.mts +1 -1
- package/dist/core/assertions/index.d.mts +156 -156
- package/dist/core/assertions/index.mjs +82 -82
- package/dist/core/assertions/index.mjs.map +1 -1
- package/dist/core/inference-executors/index.d.mts +37 -37
- package/dist/core/inference-executors/index.mjs +53 -52
- package/dist/core/inference-executors/index.mjs.map +1 -1
- package/dist/core/processors/results/index.d.mts +18 -18
- package/dist/core/processors/results/index.mjs.map +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +258 -258
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/core/scheduler/index.d.mts +1 -1
- package/dist/core/scheduler/index.mjs +64 -64
- package/dist/core/scheduler/index.mjs.map +1 -1
- package/dist/{env-bRH0K6fU.d.mts → env-Br6jaWGL.d.mts} +9 -9
- package/dist/{env-BVYeJhGA.mjs → env-egxaJtNn.mjs} +8 -8
- package/dist/env-egxaJtNn.mjs.map +1 -0
- package/dist/{expect-extensions-Mf1sMNBv.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
- package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
- package/dist/expect.mjs +1 -1
- package/dist/{index-CwKBlCG9.d.mts → index-BLIlhiWT.d.mts} +565 -565
- package/dist/{index-Be5I1ZJL.d.mts → index-CIaJClcC.d.mts} +48 -48
- package/dist/index.d.mts +207 -195
- package/dist/index.mjs +147 -147
- package/dist/index.mjs.map +1 -1
- package/dist/models-CaCOUPZw.mjs.map +1 -1
- package/dist/plugins/chat-models/index.d.mts +279 -279
- package/dist/plugins/chat-models/index.mjs +359 -359
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/{registry-BSyjwZFx.mjs → registry-BK7k6X81.mjs} +293 -293
- package/dist/registry-BK7k6X81.mjs.map +1 -0
- package/dist/testing/expect-extensions.d.mts +27 -27
- package/dist/testing/expect-extensions.mjs +1 -1
- package/package.json +3 -3
- package/dist/cli-CHFCF8UR.mjs.map +0 -1
- package/dist/env-BVYeJhGA.mjs.map +0 -1
- package/dist/expect-extensions-Mf1sMNBv.mjs.map +0 -1
- package/dist/registry-BSyjwZFx.mjs.map +0 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { a as createOpenTelemetryRuntime, c as detectCliConfigMode, d as loadVievalCliConfig, n as consumeModuleRegistrations, o as createNoopTelemetryRuntime, r as endModuleRegistration, t as beginModuleRegistration, u as loadRawVievalConfig } from "./registry-
|
|
1
|
+
import { a as createOpenTelemetryRuntime, c as detectCliConfigMode, d as loadVievalCliConfig, n as consumeModuleRegistrations, o as createNoopTelemetryRuntime, r as endModuleRegistration, t as beginModuleRegistration, u as loadRawVievalConfig } from "./registry-BK7k6X81.mjs";
|
|
2
2
|
import { createSchedulerRuntime } from "./core/scheduler/index.mjs";
|
|
3
3
|
import { RunnerExecutionError, collectEvalEntries, createFilesystemTaskCacheRuntime, createRunnerRuntimeContext, createRunnerSchedule, createTaskExecutionContext, runScheduledTasks } from "./core/runner/index.mjs";
|
|
4
4
|
import process from "node:process";
|
|
@@ -27,40 +27,44 @@ const supportedWorkspaceConfigFileNames = [
|
|
|
27
27
|
"vieval.config.cjs",
|
|
28
28
|
"vieval.config.json"
|
|
29
29
|
];
|
|
30
|
-
|
|
30
|
+
/**
|
|
31
|
+
* Loads and validates comparison-mode data from `vieval.config.*`.
|
|
32
|
+
*/
|
|
33
|
+
async function loadVievalComparisonConfig(options = {}) {
|
|
34
|
+
const cwd = options.cwd ?? process.cwd();
|
|
31
35
|
try {
|
|
32
|
-
await
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
+
const loaded = await loadRawVievalConfig({
|
|
37
|
+
configFilePath: options.configFilePath,
|
|
38
|
+
cwd
|
|
39
|
+
});
|
|
40
|
+
if (loaded.configFilePath == null || loaded.config == null) throw new Error("Failed to find vieval config. Expected vieval.config.*");
|
|
41
|
+
assertComparisonMode(loaded.config);
|
|
42
|
+
const selectedComparison = selectComparisonConfig(loaded.config.comparisons, options.comparisonId);
|
|
43
|
+
const configDirectory = dirname(loaded.configFilePath);
|
|
44
|
+
const explicitMethods = (selectedComparison.methods ?? []).map((method, index) => normalizeMethodShape(method, configDirectory, index));
|
|
45
|
+
const discoveredMethods = await discoverMethodsFromWorkspaceGlobs({
|
|
46
|
+
comparison: selectedComparison,
|
|
47
|
+
configDirectory
|
|
48
|
+
});
|
|
49
|
+
const methods = [...explicitMethods, ...discoveredMethods];
|
|
50
|
+
if (methods.length === 0) throw new Error("Comparison config resolved zero methods. Configure methods or includesWorkspaces.");
|
|
51
|
+
validateMethodIdsAreUnique(methods);
|
|
52
|
+
return {
|
|
53
|
+
config: {
|
|
54
|
+
benchmark: normalizeBenchmark(selectedComparison),
|
|
55
|
+
methods
|
|
56
|
+
},
|
|
57
|
+
configFilePath: loaded.configFilePath
|
|
58
|
+
};
|
|
59
|
+
} catch (error) {
|
|
60
|
+
const errorMessage = errorMessageFrom(error) ?? "Unknown comparison config loading error.";
|
|
61
|
+
const resolvedPath = options.configFilePath ?? "vieval.config";
|
|
62
|
+
throw new Error(`Failed to load comparison config "${resolvedPath}": ${errorMessage}`);
|
|
36
63
|
}
|
|
37
64
|
}
|
|
38
|
-
function
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
}
|
|
42
|
-
function normalizeMethodShape(method, configDirectory, index) {
|
|
43
|
-
const id = method.id.trim();
|
|
44
|
-
const workspace = method.workspace.trim();
|
|
45
|
-
const project = method.project.trim();
|
|
46
|
-
const configFilePath = method.configFilePath?.trim();
|
|
47
|
-
if (id.length === 0) throw new Error(`Comparison method #${index + 1} is missing id.`);
|
|
48
|
-
if (workspace.length === 0) throw new Error(`Comparison method "${id}" is missing workspace.`);
|
|
49
|
-
if (project.length === 0) throw new Error(`Comparison method "${id}" is missing project.`);
|
|
50
|
-
const resolvedWorkspace = isAbsolute(workspace) ? workspace : resolve(configDirectory, workspace);
|
|
51
|
-
return {
|
|
52
|
-
configFilePath: configFilePath == null || configFilePath.length === 0 ? void 0 : isAbsolute(configFilePath) ? configFilePath : resolve(configDirectory, configFilePath),
|
|
53
|
-
id,
|
|
54
|
-
project,
|
|
55
|
-
workspace: resolvedWorkspace
|
|
56
|
-
};
|
|
57
|
-
}
|
|
58
|
-
async function findWorkspaceConfigFile(workspaceDirectory) {
|
|
59
|
-
for (const fileName of supportedWorkspaceConfigFileNames) {
|
|
60
|
-
const candidate = join(workspaceDirectory, fileName);
|
|
61
|
-
if (await isReadableFile(candidate)) return candidate;
|
|
62
|
-
}
|
|
63
|
-
return null;
|
|
65
|
+
function assertComparisonMode(config) {
|
|
66
|
+
const mode = detectCliConfigMode(config);
|
|
67
|
+
if (mode !== "comparisons") throw new Error(`Expected comparison-mode config, but received ${mode}-mode config.`);
|
|
64
68
|
}
|
|
65
69
|
function createDiscoveredMethodId(configDirectory, workspace, projectName) {
|
|
66
70
|
const relativeWorkspace = relative(configDirectory, workspace);
|
|
@@ -92,24 +96,20 @@ async function discoverMethodsFromWorkspaceGlobs(args) {
|
|
|
92
96
|
}
|
|
93
97
|
return methods;
|
|
94
98
|
}
|
|
95
|
-
function
|
|
96
|
-
const
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
const mode = detectCliConfigMode(config);
|
|
102
|
-
if (mode !== "comparisons") throw new Error(`Expected comparison-mode config, but received ${mode}-mode config.`);
|
|
99
|
+
async function findWorkspaceConfigFile(workspaceDirectory) {
|
|
100
|
+
for (const fileName of supportedWorkspaceConfigFileNames) {
|
|
101
|
+
const candidate = join(workspaceDirectory, fileName);
|
|
102
|
+
if (await isReadableFile(candidate)) return candidate;
|
|
103
|
+
}
|
|
104
|
+
return null;
|
|
103
105
|
}
|
|
104
|
-
function
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
106
|
+
async function isReadableFile(filePath) {
|
|
107
|
+
try {
|
|
108
|
+
await access(filePath);
|
|
109
|
+
return true;
|
|
110
|
+
} catch {
|
|
111
|
+
return false;
|
|
109
112
|
}
|
|
110
|
-
const selected = comparisons.find((item) => item.id === comparisonId);
|
|
111
|
-
if (selected == null) throw new Error(`Unknown comparison id "${comparisonId}".`);
|
|
112
|
-
return selected;
|
|
113
113
|
}
|
|
114
114
|
function normalizeBenchmark(comparison) {
|
|
115
115
|
const benchmarkId = comparison.benchmark.id.trim();
|
|
@@ -121,40 +121,40 @@ function normalizeBenchmark(comparison) {
|
|
|
121
121
|
sharedCaseNamespace
|
|
122
122
|
};
|
|
123
123
|
}
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
methods
|
|
150
|
-
},
|
|
151
|
-
configFilePath: loaded.configFilePath
|
|
152
|
-
};
|
|
153
|
-
} catch (error) {
|
|
154
|
-
const errorMessage = errorMessageFrom(error) ?? "Unknown comparison config loading error.";
|
|
155
|
-
const resolvedPath = options.configFilePath ?? "vieval.config";
|
|
156
|
-
throw new Error(`Failed to load comparison config "${resolvedPath}": ${errorMessage}`);
|
|
124
|
+
function normalizeGlobInput(patterns) {
|
|
125
|
+
if (patterns == null) return [];
|
|
126
|
+
return (typeof patterns === "string" ? [patterns] : patterns).map((pattern) => pattern.trim()).filter((pattern) => pattern.length > 0);
|
|
127
|
+
}
|
|
128
|
+
function normalizeMethodShape(method, configDirectory, index) {
|
|
129
|
+
const id = method.id.trim();
|
|
130
|
+
const workspace = method.workspace.trim();
|
|
131
|
+
const project = method.project.trim();
|
|
132
|
+
const configFilePath = method.configFilePath?.trim();
|
|
133
|
+
if (id.length === 0) throw new Error(`Comparison method #${index + 1} is missing id.`);
|
|
134
|
+
if (workspace.length === 0) throw new Error(`Comparison method "${id}" is missing workspace.`);
|
|
135
|
+
if (project.length === 0) throw new Error(`Comparison method "${id}" is missing project.`);
|
|
136
|
+
const resolvedWorkspace = isAbsolute(workspace) ? workspace : resolve(configDirectory, workspace);
|
|
137
|
+
return {
|
|
138
|
+
configFilePath: configFilePath == null || configFilePath.length === 0 ? void 0 : isAbsolute(configFilePath) ? configFilePath : resolve(configDirectory, configFilePath),
|
|
139
|
+
id,
|
|
140
|
+
project,
|
|
141
|
+
workspace: resolvedWorkspace
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
function selectComparisonConfig(comparisons, comparisonId) {
|
|
145
|
+
if (comparisons.length === 0) throw new Error("Comparison config requires at least one comparisons entry.");
|
|
146
|
+
if (comparisonId == null || comparisonId.trim().length === 0) {
|
|
147
|
+
if (comparisons.length > 1) throw new Error(`Multiple comparisons found. Provide --comparison. Available ids: ${comparisons.map((item) => item.id).join(", ")}`);
|
|
148
|
+
return comparisons[0];
|
|
157
149
|
}
|
|
150
|
+
const selected = comparisons.find((item) => item.id === comparisonId);
|
|
151
|
+
if (selected == null) throw new Error(`Unknown comparison id "${comparisonId}".`);
|
|
152
|
+
return selected;
|
|
153
|
+
}
|
|
154
|
+
function validateMethodIdsAreUnique(methods) {
|
|
155
|
+
const methodIds = methods.map((method) => method.id);
|
|
156
|
+
const duplicatedMethodId = methodIds.find((methodId, index) => methodIds.indexOf(methodId) !== index);
|
|
157
|
+
if (duplicatedMethodId != null) throw new Error(`Duplicate comparison method id "${duplicatedMethodId}".`);
|
|
158
158
|
}
|
|
159
159
|
//#endregion
|
|
160
160
|
//#region src/cli/report-records.ts
|
|
@@ -242,51 +242,39 @@ function encodeJsonl(records) {
|
|
|
242
242
|
if (records.length === 0) return "";
|
|
243
243
|
return `${records.map((record) => JSON.stringify(record)).join("\n")}\n`;
|
|
244
244
|
}
|
|
245
|
-
function
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
245
|
+
function addRecordScores(summary, record) {
|
|
246
|
+
for (const [kind, score] of Object.entries(record.scores)) {
|
|
247
|
+
if (!Number.isFinite(score)) continue;
|
|
248
|
+
summary[kind] ??= {
|
|
249
|
+
average: 0,
|
|
250
|
+
count: 0,
|
|
251
|
+
sum: 0
|
|
252
|
+
};
|
|
253
|
+
summary[kind].count += 1;
|
|
254
|
+
summary[kind].sum += score;
|
|
255
|
+
}
|
|
250
256
|
}
|
|
251
|
-
function
|
|
257
|
+
function applyCaseEnd(draft, event) {
|
|
252
258
|
const data = asRecord(event.data);
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
runId: stringFrom(data?.runId) ?? event.runId ?? args.runId,
|
|
259
|
-
taskId: stringFrom(data?.taskId) ?? event.taskId ?? "",
|
|
260
|
-
workspaceId: stringFrom(data?.workspaceId) ?? event.workspaceId ?? args.workspaceId
|
|
261
|
-
};
|
|
259
|
+
draft.caseName = extractCaseName(event) ?? draft.caseName;
|
|
260
|
+
draft.endedAt = stringFrom(data?.endedAt) ?? event.timestamp ?? draft.endedAt;
|
|
261
|
+
draft.output = data != null && "output" in data ? data.output : draft.output;
|
|
262
|
+
draft.state = normalizeState(stringFrom(data?.state)) ?? "failed";
|
|
263
|
+
draft.scores.exact ??= draft.state === "passed" ? 1 : 0;
|
|
262
264
|
}
|
|
263
|
-
function
|
|
264
|
-
const
|
|
265
|
-
const
|
|
266
|
-
if (
|
|
267
|
-
const
|
|
268
|
-
|
|
269
|
-
caseId: ids.caseId,
|
|
270
|
-
caseName: extractCaseName(event) ?? ids.caseId,
|
|
271
|
-
experimentId: ids.experimentId,
|
|
272
|
-
metrics: {},
|
|
273
|
-
projectName: ids.projectName || args.projectName,
|
|
274
|
-
retryCount: 0,
|
|
275
|
-
runId: ids.runId,
|
|
276
|
-
scores: {},
|
|
277
|
-
startCount: 0,
|
|
278
|
-
taskId: ids.taskId,
|
|
279
|
-
workspaceId: ids.workspaceId
|
|
280
|
-
};
|
|
281
|
-
drafts.set(key, draft);
|
|
282
|
-
return draft;
|
|
265
|
+
function applyCaseMetric(draft, event) {
|
|
266
|
+
const data = asRecord(event.data);
|
|
267
|
+
const name = stringFrom(data?.name);
|
|
268
|
+
if (name == null) return;
|
|
269
|
+
const value = data?.value;
|
|
270
|
+
if (isCaseMetricValue(value)) draft.metrics[name] = value;
|
|
283
271
|
}
|
|
284
|
-
function
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
draft.
|
|
272
|
+
function applyCaseScore(draft, event) {
|
|
273
|
+
const data = asRecord(event.data);
|
|
274
|
+
const kind = stringFrom(data?.kind) ?? stringFrom(data?.name) ?? stringFrom(data?.["vieval.score.kind"]);
|
|
275
|
+
const score = numberFrom(data?.score) ?? numberFrom(data?.value) ?? numberFrom(data?.["vieval.score.value"]);
|
|
276
|
+
if (kind == null || score == null) return;
|
|
277
|
+
draft.scores[kind] = score;
|
|
290
278
|
}
|
|
291
279
|
function applyCaseStart(draft, event) {
|
|
292
280
|
const data = asRecord(event.data);
|
|
@@ -307,66 +295,55 @@ function applyCaseStart(draft, event) {
|
|
|
307
295
|
}
|
|
308
296
|
draft.retryCount = Math.max(draft.retryCount, draft.startCount - 1);
|
|
309
297
|
}
|
|
310
|
-
function
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
298
|
+
function applyIdentity(draft, ids, event, args) {
|
|
299
|
+
draft.attemptId = ids.attemptId || args.attemptId;
|
|
300
|
+
draft.experimentId = ids.experimentId || args.experimentId;
|
|
301
|
+
draft.projectName = extractExplicitProjectName(event) ?? draft.projectName;
|
|
302
|
+
draft.runId = ids.runId || args.runId;
|
|
303
|
+
draft.workspaceId = ids.workspaceId || args.workspaceId;
|
|
316
304
|
}
|
|
317
|
-
function
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
const score = numberFrom(data?.score) ?? numberFrom(data?.value) ?? numberFrom(data?.["vieval.score.value"]);
|
|
321
|
-
if (kind == null || score == null) return;
|
|
322
|
-
draft.scores[kind] = score;
|
|
305
|
+
function asRecord(value) {
|
|
306
|
+
if (value == null || typeof value !== "object" || Array.isArray(value)) return;
|
|
307
|
+
return value;
|
|
323
308
|
}
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
309
|
+
/**
|
|
310
|
+
* Normalizes duration timestamps.
|
|
311
|
+
*
|
|
312
|
+
* Before:
|
|
313
|
+
* - `startedAt="2026-05-08T00:00:00.000Z"`, `endedAt="2026-05-08T00:00:01.250Z"`
|
|
314
|
+
* - `startedAt="bad"`, `endedAt="2026-05-08T00:00:01.250Z"`
|
|
315
|
+
*
|
|
316
|
+
* After:
|
|
317
|
+
* - `1250`
|
|
318
|
+
* - `0`
|
|
319
|
+
*/
|
|
320
|
+
function calculateDurationMs(startedAt, endedAt) {
|
|
321
|
+
const started = Date.parse(startedAt);
|
|
322
|
+
const ended = Date.parse(endedAt);
|
|
323
|
+
if (!Number.isFinite(started) || !Number.isFinite(ended)) return 0;
|
|
324
|
+
return Math.max(0, ended - started);
|
|
331
325
|
}
|
|
332
|
-
function
|
|
333
|
-
|
|
334
|
-
|
|
326
|
+
function createCaseKey(taskId, caseId) {
|
|
327
|
+
return `${taskId}\u0000${caseId}`;
|
|
328
|
+
}
|
|
329
|
+
function extractCaseName(event) {
|
|
330
|
+
const data = asRecord(event.data);
|
|
331
|
+
return stringFrom(data?.caseName) ?? stringFrom(data?.name);
|
|
332
|
+
}
|
|
333
|
+
function extractEventIds(event, args) {
|
|
334
|
+
const data = asRecord(event.data);
|
|
335
335
|
return {
|
|
336
|
-
attemptId:
|
|
337
|
-
caseId:
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
metrics: draft.metrics,
|
|
344
|
-
...draft.output === void 0 ? {} : { output: draft.output },
|
|
345
|
-
projectName: draft.projectName,
|
|
346
|
-
retryCount: draft.retryCount,
|
|
347
|
-
runId: draft.runId,
|
|
348
|
-
schemaVersion: 1,
|
|
349
|
-
scores: draft.scores,
|
|
350
|
-
startedAt,
|
|
351
|
-
state: draft.state ?? "failed",
|
|
352
|
-
taskId: draft.taskId,
|
|
353
|
-
workspaceId: draft.workspaceId
|
|
336
|
+
attemptId: stringFrom(data?.attemptId) ?? event.attemptId ?? args.attemptId,
|
|
337
|
+
caseId: stringFrom(data?.caseId) ?? event.caseId ?? "",
|
|
338
|
+
experimentId: stringFrom(data?.experimentId) ?? event.experimentId ?? args.experimentId,
|
|
339
|
+
projectName: stringFrom(data?.projectName) ?? event.projectName ?? event.projectId ?? args.projectName,
|
|
340
|
+
runId: stringFrom(data?.runId) ?? event.runId ?? args.runId,
|
|
341
|
+
taskId: stringFrom(data?.taskId) ?? event.taskId ?? "",
|
|
342
|
+
workspaceId: stringFrom(data?.workspaceId) ?? event.workspaceId ?? args.workspaceId
|
|
354
343
|
};
|
|
355
344
|
}
|
|
356
|
-
function
|
|
357
|
-
|
|
358
|
-
if (!Number.isFinite(score)) continue;
|
|
359
|
-
summary[kind] ??= {
|
|
360
|
-
average: 0,
|
|
361
|
-
count: 0,
|
|
362
|
-
sum: 0
|
|
363
|
-
};
|
|
364
|
-
summary[kind].count += 1;
|
|
365
|
-
summary[kind].sum += score;
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
function finalizeSummaryGroups(groups) {
|
|
369
|
-
return Object.fromEntries(Object.entries(groups).map(([key, summary]) => [key, finalizeScoreSummary(summary)]));
|
|
345
|
+
function extractExplicitProjectName(event) {
|
|
346
|
+
return stringFrom(asRecord(event.data)?.projectName) ?? event.projectName ?? event.projectId;
|
|
370
347
|
}
|
|
371
348
|
function finalizeScoreSummary(summary) {
|
|
372
349
|
return Object.fromEntries(Object.entries(summary).map(([kind, bucket]) => [kind, {
|
|
@@ -375,6 +352,9 @@ function finalizeScoreSummary(summary) {
|
|
|
375
352
|
sum: bucket.sum
|
|
376
353
|
}]));
|
|
377
354
|
}
|
|
355
|
+
function finalizeSummaryGroups(groups) {
|
|
356
|
+
return Object.fromEntries(Object.entries(groups).map(([key, summary]) => [key, finalizeScoreSummary(summary)]));
|
|
357
|
+
}
|
|
378
358
|
function getGroupValue(record, key) {
|
|
379
359
|
if (Object.hasOwn(record.metrics, key)) return {
|
|
380
360
|
exists: true,
|
|
@@ -386,49 +366,69 @@ function getGroupValue(record, key) {
|
|
|
386
366
|
value: directValue
|
|
387
367
|
} : { exists: false };
|
|
388
368
|
}
|
|
389
|
-
function
|
|
390
|
-
const
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
369
|
+
function getOrCreateDraft(drafts, ids, event, args) {
|
|
370
|
+
const key = createCaseKey(ids.taskId, ids.caseId);
|
|
371
|
+
const existing = drafts.get(key);
|
|
372
|
+
if (existing != null) return existing;
|
|
373
|
+
const draft = {
|
|
374
|
+
attemptId: ids.attemptId,
|
|
375
|
+
caseId: ids.caseId,
|
|
376
|
+
caseName: extractCaseName(event) ?? ids.caseId,
|
|
377
|
+
experimentId: ids.experimentId,
|
|
378
|
+
metrics: {},
|
|
379
|
+
projectName: ids.projectName || args.projectName,
|
|
380
|
+
retryCount: 0,
|
|
381
|
+
runId: ids.runId,
|
|
382
|
+
scores: {},
|
|
383
|
+
startCount: 0,
|
|
384
|
+
taskId: ids.taskId,
|
|
385
|
+
workspaceId: ids.workspaceId
|
|
386
|
+
};
|
|
387
|
+
drafts.set(key, draft);
|
|
388
|
+
return draft;
|
|
395
389
|
}
|
|
396
|
-
function
|
|
397
|
-
return
|
|
390
|
+
function isCaseMetricValue(value) {
|
|
391
|
+
if (value == null || typeof value === "boolean" || typeof value === "number" || typeof value === "string") return true;
|
|
392
|
+
return Array.isArray(value);
|
|
398
393
|
}
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
* - `startedAt="bad"`, `endedAt="2026-05-08T00:00:01.250Z"`
|
|
405
|
-
*
|
|
406
|
-
* After:
|
|
407
|
-
* - `1250`
|
|
408
|
-
* - `0`
|
|
409
|
-
*/
|
|
410
|
-
function calculateDurationMs(startedAt, endedAt) {
|
|
411
|
-
const started = Date.parse(startedAt);
|
|
412
|
-
const ended = Date.parse(endedAt);
|
|
413
|
-
if (!Number.isFinite(started) || !Number.isFinite(ended)) return 0;
|
|
414
|
-
return Math.max(0, ended - started);
|
|
394
|
+
function normalizeCaseEventName(eventName) {
|
|
395
|
+
if (eventName === "task.case.start" || eventName === "CaseStarted") return "start";
|
|
396
|
+
if (eventName === "task.case.metric") return "metric";
|
|
397
|
+
if (eventName === "task.case.score") return "score";
|
|
398
|
+
if (eventName === "task.case.end" || eventName === "CaseEnded") return "end";
|
|
415
399
|
}
|
|
416
400
|
function normalizeState(value) {
|
|
417
401
|
if (value === "failed" || value === "passed" || value === "skipped" || value === "timeout") return value;
|
|
418
402
|
}
|
|
419
|
-
function
|
|
420
|
-
|
|
421
|
-
return Array.isArray(value);
|
|
422
|
-
}
|
|
423
|
-
function asRecord(value) {
|
|
424
|
-
if (value == null || typeof value !== "object" || Array.isArray(value)) return;
|
|
425
|
-
return value;
|
|
403
|
+
function numberFrom(value) {
|
|
404
|
+
return typeof value === "number" && Number.isFinite(value) ? value : void 0;
|
|
426
405
|
}
|
|
427
406
|
function stringFrom(value) {
|
|
428
407
|
return typeof value === "string" ? value : void 0;
|
|
429
408
|
}
|
|
430
|
-
function
|
|
431
|
-
|
|
409
|
+
function toCaseRecord(draft) {
|
|
410
|
+
const startedAt = draft.startedAt ?? draft.endedAt ?? "";
|
|
411
|
+
const endedAt = draft.endedAt ?? startedAt;
|
|
412
|
+
return {
|
|
413
|
+
attemptId: draft.attemptId,
|
|
414
|
+
caseId: draft.caseId,
|
|
415
|
+
caseName: draft.caseName,
|
|
416
|
+
durationMs: calculateDurationMs(startedAt, endedAt),
|
|
417
|
+
endedAt,
|
|
418
|
+
experimentId: draft.experimentId,
|
|
419
|
+
...draft.input === void 0 ? {} : { input: draft.input },
|
|
420
|
+
metrics: draft.metrics,
|
|
421
|
+
...draft.output === void 0 ? {} : { output: draft.output },
|
|
422
|
+
projectName: draft.projectName,
|
|
423
|
+
retryCount: draft.retryCount,
|
|
424
|
+
runId: draft.runId,
|
|
425
|
+
schemaVersion: 1,
|
|
426
|
+
scores: draft.scores,
|
|
427
|
+
startedAt,
|
|
428
|
+
state: draft.state ?? "failed",
|
|
429
|
+
taskId: draft.taskId,
|
|
430
|
+
workspaceId: draft.workspaceId
|
|
431
|
+
};
|
|
432
432
|
}
|
|
433
433
|
//#endregion
|
|
434
434
|
//#region src/cli/report-selectors.ts
|
|
@@ -493,6 +493,28 @@ const reportCasesHelpText = `
|
|
|
493
493
|
--group-by Case field, score name, or metric name used for grouped score summaries
|
|
494
494
|
`;
|
|
495
495
|
/**
|
|
496
|
+
* Builds filtered case inspection output.
|
|
497
|
+
*
|
|
498
|
+
* Use when:
|
|
499
|
+
* - `vieval report cases` needs deterministic JSON/table output
|
|
500
|
+
* - tests need pure filtering and grouping behavior without process I/O
|
|
501
|
+
*
|
|
502
|
+
* Expects:
|
|
503
|
+
* - `where` filters use `key=value`
|
|
504
|
+
* - lookup keys may target direct case fields, score names, or metric names
|
|
505
|
+
*
|
|
506
|
+
* Returns:
|
|
507
|
+
* - filtered records plus grouped score summaries when `groupBy` is present
|
|
508
|
+
*/
|
|
509
|
+
function buildReportCasesOutput(records, options) {
|
|
510
|
+
const whereFilters = (options.where ?? []).map(parseSelector);
|
|
511
|
+
const filteredRecords = records.filter((record) => matchesWhereFilters(record, whereFilters));
|
|
512
|
+
return {
|
|
513
|
+
groups: options.groupBy == null ? void 0 : buildCaseGroups(filteredRecords, options.groupBy),
|
|
514
|
+
records: [...filteredRecords]
|
|
515
|
+
};
|
|
516
|
+
}
|
|
517
|
+
/**
|
|
496
518
|
* Reads normalized case records from one report run directory or report root.
|
|
497
519
|
*
|
|
498
520
|
* Use when:
|
|
@@ -524,28 +546,6 @@ async function readCaseRecordsFromReport(reportPath) {
|
|
|
524
546
|
return records;
|
|
525
547
|
}
|
|
526
548
|
/**
|
|
527
|
-
* Builds filtered case inspection output.
|
|
528
|
-
*
|
|
529
|
-
* Use when:
|
|
530
|
-
* - `vieval report cases` needs deterministic JSON/table output
|
|
531
|
-
* - tests need pure filtering and grouping behavior without process I/O
|
|
532
|
-
*
|
|
533
|
-
* Expects:
|
|
534
|
-
* - `where` filters use `key=value`
|
|
535
|
-
* - lookup keys may target direct case fields, score names, or metric names
|
|
536
|
-
*
|
|
537
|
-
* Returns:
|
|
538
|
-
* - filtered records plus grouped score summaries when `groupBy` is present
|
|
539
|
-
*/
|
|
540
|
-
function buildReportCasesOutput(records, options) {
|
|
541
|
-
const whereFilters = (options.where ?? []).map(parseSelector);
|
|
542
|
-
const filteredRecords = records.filter((record) => matchesWhereFilters(record, whereFilters));
|
|
543
|
-
return {
|
|
544
|
-
groups: options.groupBy == null ? void 0 : buildCaseGroups(filteredRecords, options.groupBy),
|
|
545
|
-
records: [...filteredRecords]
|
|
546
|
-
};
|
|
547
|
-
}
|
|
548
|
-
/**
|
|
549
549
|
* Runs the `vieval report cases` command.
|
|
550
550
|
*
|
|
551
551
|
* Call stack:
|
|
@@ -583,66 +583,16 @@ async function runReportCasesCli(argv) {
|
|
|
583
583
|
process.exitCode = 1;
|
|
584
584
|
}
|
|
585
585
|
}
|
|
586
|
-
function
|
|
587
|
-
const
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
format: {
|
|
597
|
-
default: "table",
|
|
598
|
-
type: "string"
|
|
599
|
-
},
|
|
600
|
-
groupBy: { type: "string" },
|
|
601
|
-
where: {
|
|
602
|
-
isMultiple: true,
|
|
603
|
-
type: "string"
|
|
604
|
-
}
|
|
605
|
-
},
|
|
606
|
-
importMeta: import.meta
|
|
607
|
-
});
|
|
608
|
-
const reportPath = cli.input[0];
|
|
609
|
-
if (reportPath == null || reportPath.length === 0) throw new Error("Missing required <reportPath> argument.");
|
|
610
|
-
return {
|
|
611
|
-
format: normalizeReportCasesFormat(cli.flags.format),
|
|
612
|
-
groupBy: cli.flags.groupBy,
|
|
613
|
-
reportPath,
|
|
614
|
-
where: cli.flags.where
|
|
615
|
-
};
|
|
616
|
-
}
|
|
617
|
-
function normalizeReportCasesFormat(value) {
|
|
618
|
-
const normalized = value.toLowerCase();
|
|
619
|
-
if (normalized === "json") return "json";
|
|
620
|
-
if (normalized === "jsonl") return "jsonl";
|
|
621
|
-
return "table";
|
|
622
|
-
}
|
|
623
|
-
async function resolveCaseRecordPaths(reportPath) {
|
|
624
|
-
const absoluteReportPath = resolve(reportPath);
|
|
625
|
-
const directCaseFilePath = resolve(absoluteReportPath, "cases.jsonl");
|
|
626
|
-
if (existsSync(absoluteReportPath) && absoluteReportPath.endsWith(".jsonl")) return [absoluteReportPath];
|
|
627
|
-
if (existsSync(directCaseFilePath)) return [directCaseFilePath];
|
|
628
|
-
return (await glob("**/cases.jsonl", {
|
|
629
|
-
absolute: true,
|
|
630
|
-
cwd: absoluteReportPath
|
|
631
|
-
})).sort((left, right) => left.localeCompare(right));
|
|
632
|
-
}
|
|
633
|
-
function matchesWhereFilters(record, whereFilters) {
|
|
634
|
-
return whereFilters.every((parsed) => {
|
|
635
|
-
const resolved = getCaseSelectorValue(record, parsed.key);
|
|
636
|
-
return resolved.exists && String(resolved.value) === parsed.value;
|
|
637
|
-
});
|
|
638
|
-
}
|
|
639
|
-
function parseSelector(selector) {
|
|
640
|
-
const separatorIndex = selector.indexOf("=");
|
|
641
|
-
if (separatorIndex <= 0 || separatorIndex === selector.length - 1) throw new Error(`Invalid selector "${selector}". Expected "key=value".`);
|
|
642
|
-
return {
|
|
643
|
-
key: selector.slice(0, separatorIndex).trim(),
|
|
644
|
-
value: selector.slice(separatorIndex + 1).trim()
|
|
645
|
-
};
|
|
586
|
+
function addScores(summary, scores) {
|
|
587
|
+
for (const [scoreName, value] of Object.entries(scores)) {
|
|
588
|
+
summary[scoreName] ??= {
|
|
589
|
+
average: 0,
|
|
590
|
+
count: 0,
|
|
591
|
+
sum: 0
|
|
592
|
+
};
|
|
593
|
+
summary[scoreName].count += 1;
|
|
594
|
+
summary[scoreName].sum += value;
|
|
595
|
+
}
|
|
646
596
|
}
|
|
647
597
|
function buildCaseGroups(records, groupBy) {
|
|
648
598
|
const groups = {};
|
|
@@ -662,17 +612,6 @@ function buildCaseGroups(records, groupBy) {
|
|
|
662
612
|
scores: finalizeScores(group.scores)
|
|
663
613
|
}]));
|
|
664
614
|
}
|
|
665
|
-
function addScores(summary, scores) {
|
|
666
|
-
for (const [scoreName, value] of Object.entries(scores)) {
|
|
667
|
-
summary[scoreName] ??= {
|
|
668
|
-
average: 0,
|
|
669
|
-
count: 0,
|
|
670
|
-
sum: 0
|
|
671
|
-
};
|
|
672
|
-
summary[scoreName].count += 1;
|
|
673
|
-
summary[scoreName].sum += value;
|
|
674
|
-
}
|
|
675
|
-
}
|
|
676
615
|
function finalizeScores(summary) {
|
|
677
616
|
return Object.fromEntries(Object.entries(summary).sort(([left], [right]) => left.localeCompare(right)).map(([scoreName, bucket]) => [scoreName, {
|
|
678
617
|
average: bucket.count === 0 ? 0 : bucket.sum / bucket.count,
|
|
@@ -691,6 +630,67 @@ function formatCasesTable(output) {
|
|
|
691
630
|
}
|
|
692
631
|
return lines.join("\n");
|
|
693
632
|
}
|
|
633
|
+
function matchesWhereFilters(record, whereFilters) {
|
|
634
|
+
return whereFilters.every((parsed) => {
|
|
635
|
+
const resolved = getCaseSelectorValue(record, parsed.key);
|
|
636
|
+
return resolved.exists && String(resolved.value) === parsed.value;
|
|
637
|
+
});
|
|
638
|
+
}
|
|
639
|
+
function normalizeCliArgv$6(argv) {
|
|
640
|
+
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
641
|
+
if (normalizedArgv[0] === "report" && normalizedArgv[1] === "cases") return normalizedArgv.slice(2);
|
|
642
|
+
if (normalizedArgv[0] === "cases") return normalizedArgv.slice(1);
|
|
643
|
+
return normalizedArgv;
|
|
644
|
+
}
|
|
645
|
+
function normalizeReportCasesFormat(value) {
|
|
646
|
+
const normalized = value.toLowerCase();
|
|
647
|
+
if (normalized === "json") return "json";
|
|
648
|
+
if (normalized === "jsonl") return "jsonl";
|
|
649
|
+
return "table";
|
|
650
|
+
}
|
|
651
|
+
function parseReportCasesCliArguments(argv) {
|
|
652
|
+
const cli = meow(reportCasesHelpText, {
|
|
653
|
+
argv: normalizeCliArgv$6(argv),
|
|
654
|
+
flags: {
|
|
655
|
+
format: {
|
|
656
|
+
default: "table",
|
|
657
|
+
type: "string"
|
|
658
|
+
},
|
|
659
|
+
groupBy: { type: "string" },
|
|
660
|
+
where: {
|
|
661
|
+
isMultiple: true,
|
|
662
|
+
type: "string"
|
|
663
|
+
}
|
|
664
|
+
},
|
|
665
|
+
importMeta: import.meta
|
|
666
|
+
});
|
|
667
|
+
const reportPath = cli.input[0];
|
|
668
|
+
if (reportPath == null || reportPath.length === 0) throw new Error("Missing required <reportPath> argument.");
|
|
669
|
+
return {
|
|
670
|
+
format: normalizeReportCasesFormat(cli.flags.format),
|
|
671
|
+
groupBy: cli.flags.groupBy,
|
|
672
|
+
reportPath,
|
|
673
|
+
where: cli.flags.where
|
|
674
|
+
};
|
|
675
|
+
}
|
|
676
|
+
function parseSelector(selector) {
|
|
677
|
+
const separatorIndex = selector.indexOf("=");
|
|
678
|
+
if (separatorIndex <= 0 || separatorIndex === selector.length - 1) throw new Error(`Invalid selector "${selector}". Expected "key=value".`);
|
|
679
|
+
return {
|
|
680
|
+
key: selector.slice(0, separatorIndex).trim(),
|
|
681
|
+
value: selector.slice(separatorIndex + 1).trim()
|
|
682
|
+
};
|
|
683
|
+
}
|
|
684
|
+
async function resolveCaseRecordPaths(reportPath) {
|
|
685
|
+
const absoluteReportPath = resolve(reportPath);
|
|
686
|
+
const directCaseFilePath = resolve(absoluteReportPath, "cases.jsonl");
|
|
687
|
+
if (existsSync(absoluteReportPath) && absoluteReportPath.endsWith(".jsonl")) return [absoluteReportPath];
|
|
688
|
+
if (existsSync(directCaseFilePath)) return [directCaseFilePath];
|
|
689
|
+
return (await glob("**/cases.jsonl", {
|
|
690
|
+
absolute: true,
|
|
691
|
+
cwd: absoluteReportPath
|
|
692
|
+
})).sort((left, right) => left.localeCompare(right));
|
|
693
|
+
}
|
|
694
694
|
//#endregion
|
|
695
695
|
//#region src/cli/report-compare.ts
|
|
696
696
|
/**
|
|
@@ -735,17 +735,26 @@ function buildCompareReportArtifact(args) {
|
|
|
735
735
|
reportPath: args.reportPath
|
|
736
736
|
};
|
|
737
737
|
}
|
|
738
|
+
/**
|
|
739
|
+
* Writes compare report artifact as JSON.
|
|
740
|
+
*/
|
|
741
|
+
async function writeCompareReportArtifact(args) {
|
|
742
|
+
const outputPath = resolve(args.outputPath);
|
|
743
|
+
await mkdir(dirname(outputPath), { recursive: true });
|
|
744
|
+
await writeFile(outputPath, `${JSON.stringify(args.artifact, null, 2)}\n`, "utf-8");
|
|
745
|
+
return outputPath;
|
|
746
|
+
}
|
|
738
747
|
function countCasesForProject(caseRecords, projectName) {
|
|
739
748
|
return caseRecords.filter((record) => record.projectName === projectName).length;
|
|
740
749
|
}
|
|
741
|
-
function countDistinctCasesForProject(caseRecords, projectName) {
|
|
742
|
-
return countDistinctCases(caseRecords.filter((record) => record.projectName === projectName));
|
|
743
|
-
}
|
|
744
750
|
function countDistinctCases(caseRecords) {
|
|
745
751
|
const caseKeys = /* @__PURE__ */ new Set();
|
|
746
752
|
for (const record of caseRecords) caseKeys.add(`${record.projectName}:${record.taskId}:${record.caseId}`);
|
|
747
753
|
return caseKeys.size;
|
|
748
754
|
}
|
|
755
|
+
function countDistinctCasesForProject(caseRecords, projectName) {
|
|
756
|
+
return countDistinctCases(caseRecords.filter((record) => record.projectName === projectName));
|
|
757
|
+
}
|
|
749
758
|
function createWeightedAverage(projects, selectAverage) {
|
|
750
759
|
let weightedScoreTotal = 0;
|
|
751
760
|
let weightTotal = 0;
|
|
@@ -758,15 +767,6 @@ function createWeightedAverage(projects, selectAverage) {
|
|
|
758
767
|
if (weightTotal === 0) return null;
|
|
759
768
|
return weightedScoreTotal / weightTotal;
|
|
760
769
|
}
|
|
761
|
-
/**
|
|
762
|
-
* Writes compare report artifact as JSON.
|
|
763
|
-
*/
|
|
764
|
-
async function writeCompareReportArtifact(args) {
|
|
765
|
-
const outputPath = resolve(args.outputPath);
|
|
766
|
-
await mkdir(dirname(outputPath), { recursive: true });
|
|
767
|
-
await writeFile(outputPath, `${JSON.stringify(args.artifact, null, 2)}\n`, "utf-8");
|
|
768
|
-
return outputPath;
|
|
769
|
-
}
|
|
770
770
|
//#endregion
|
|
771
771
|
//#region src/cli/discovery.ts
|
|
772
772
|
/**
|
|
@@ -927,21 +927,22 @@ function buildLocalOtlpProjection(args) {
|
|
|
927
927
|
}] }] }
|
|
928
928
|
};
|
|
929
929
|
}
|
|
930
|
-
function
|
|
931
|
-
return
|
|
932
|
-
key,
|
|
933
|
-
value: toAnyValue(value)
|
|
934
|
-
}));
|
|
930
|
+
function collectProjectNames(records) {
|
|
931
|
+
return [...new Set(records.map((record) => record.projectName))].sort((left, right) => left.localeCompare(right));
|
|
935
932
|
}
|
|
936
|
-
function
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
933
|
+
function collectScoreKinds(records) {
|
|
934
|
+
return [...new Set(records.flatMap((record) => Object.keys(record.scores)))].sort((left, right) => left.localeCompare(right));
|
|
935
|
+
}
|
|
936
|
+
function collectTasks(records) {
|
|
937
|
+
const tasks = /* @__PURE__ */ new Map();
|
|
938
|
+
for (const record of records) tasks.set(`${record.projectName}\0${record.taskId}`, {
|
|
939
|
+
projectName: record.projectName,
|
|
940
|
+
taskId: record.taskId
|
|
941
|
+
});
|
|
942
|
+
return [...tasks.values()].sort((left, right) => {
|
|
943
|
+
const projectOrder = left.projectName.localeCompare(right.projectName);
|
|
944
|
+
return projectOrder === 0 ? left.taskId.localeCompare(right.taskId) : projectOrder;
|
|
945
|
+
});
|
|
945
946
|
}
|
|
946
947
|
function isAttributeScalar(value) {
|
|
947
948
|
return value == null || typeof value === "boolean" || typeof value === "number" || typeof value === "string";
|
|
@@ -958,43 +959,32 @@ function isoToUnixNano(value) {
|
|
|
958
959
|
if (!Number.isFinite(unixMilliseconds)) return "0";
|
|
959
960
|
return String(BigInt(unixMilliseconds) * 1000000n);
|
|
960
961
|
}
|
|
961
|
-
function
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
962
|
+
function toAnyValue(value) {
|
|
963
|
+
if (Array.isArray(value)) return { arrayValue: { values: value.map((item) => toAnyValue(item)) } };
|
|
964
|
+
if (isAttributeScalar(value)) {
|
|
965
|
+
if (typeof value === "boolean") return { boolValue: value };
|
|
966
|
+
if (typeof value === "number") return Number.isFinite(value) ? { doubleValue: value } : { stringValue: String(value) };
|
|
967
|
+
if (value == null) return { stringValue: "null" };
|
|
968
|
+
return { stringValue: value };
|
|
969
|
+
}
|
|
970
|
+
return { stringValue: stableStringify(value) };
|
|
966
971
|
}
|
|
967
|
-
function
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
});
|
|
973
|
-
return [...tasks.values()].sort((left, right) => {
|
|
974
|
-
const projectOrder = left.projectName.localeCompare(right.projectName);
|
|
975
|
-
return projectOrder === 0 ? left.taskId.localeCompare(right.taskId) : projectOrder;
|
|
976
|
-
});
|
|
972
|
+
function toAttributes(attributes) {
|
|
973
|
+
return Object.entries(attributes).filter(([, value]) => value !== void 0).sort(([leftKey], [rightKey]) => leftKey.localeCompare(rightKey)).map(([key, value]) => ({
|
|
974
|
+
key,
|
|
975
|
+
value: toAnyValue(value)
|
|
976
|
+
}));
|
|
977
977
|
}
|
|
978
978
|
//#endregion
|
|
979
979
|
//#region src/cli/report-artifacts.ts
|
|
980
980
|
/**
|
|
981
|
-
*
|
|
981
|
+
* Reads all run artifacts found under `reportPath`.
|
|
982
982
|
*
|
|
983
983
|
* Use when:
|
|
984
|
-
* - callers
|
|
985
|
-
*
|
|
986
|
-
* Returns:
|
|
987
|
-
* - sorted absolute summary file paths
|
|
984
|
+
* - callers need multi-run analysis from a directory root
|
|
988
985
|
*/
|
|
989
|
-
async function
|
|
990
|
-
|
|
991
|
-
const directSummaryPath = resolve(absoluteReportPath, "run-summary.json");
|
|
992
|
-
if (existsSync(absoluteReportPath) && absoluteReportPath.endsWith(".json")) return [absoluteReportPath];
|
|
993
|
-
if (existsSync(directSummaryPath)) return [directSummaryPath];
|
|
994
|
-
return (await glob("**/run-summary.json", {
|
|
995
|
-
absolute: true,
|
|
996
|
-
cwd: absoluteReportPath
|
|
997
|
-
})).sort((left, right) => left.localeCompare(right));
|
|
986
|
+
async function readReportArtifacts(reportPath) {
|
|
987
|
+
return (await resolveRunSummaryPaths(reportPath)).map((summaryFilePath) => readReportRunArtifact(summaryFilePath));
|
|
998
988
|
}
|
|
999
989
|
/**
|
|
1000
990
|
* Reads one run report artifact set from `run-summary.json` and sibling `events.jsonl`.
|
|
@@ -1031,13 +1021,23 @@ function readReportRunArtifact(summaryFilePath) {
|
|
|
1031
1021
|
};
|
|
1032
1022
|
}
|
|
1033
1023
|
/**
|
|
1034
|
-
*
|
|
1024
|
+
* Resolves one or more `run-summary.json` paths from a report location.
|
|
1035
1025
|
*
|
|
1036
1026
|
* Use when:
|
|
1037
|
-
* - callers
|
|
1027
|
+
* - callers may pass a run directory, summary file path, or a report root
|
|
1028
|
+
*
|
|
1029
|
+
* Returns:
|
|
1030
|
+
* - sorted absolute summary file paths
|
|
1038
1031
|
*/
|
|
1039
|
-
async function
|
|
1040
|
-
|
|
1032
|
+
async function resolveRunSummaryPaths(reportPath) {
|
|
1033
|
+
const absoluteReportPath = resolve(reportPath);
|
|
1034
|
+
const directSummaryPath = resolve(absoluteReportPath, "run-summary.json");
|
|
1035
|
+
if (existsSync(absoluteReportPath) && absoluteReportPath.endsWith(".json")) return [absoluteReportPath];
|
|
1036
|
+
if (existsSync(directSummaryPath)) return [directSummaryPath];
|
|
1037
|
+
return (await glob("**/run-summary.json", {
|
|
1038
|
+
absolute: true,
|
|
1039
|
+
cwd: absoluteReportPath
|
|
1040
|
+
})).sort((left, right) => left.localeCompare(right));
|
|
1041
1041
|
}
|
|
1042
1042
|
/**
|
|
1043
1043
|
* Creates a compact summary row for one run artifact.
|
|
@@ -1138,14 +1138,14 @@ function sanitizeIdentitySegment$1(value) {
|
|
|
1138
1138
|
*/
|
|
1139
1139
|
function createNoopReporter() {
|
|
1140
1140
|
return {
|
|
1141
|
-
|
|
1142
|
-
onTaskQueued(_payload) {},
|
|
1143
|
-
onTaskStart(_payload) {},
|
|
1144
|
-
onCaseStart(_payload) {},
|
|
1141
|
+
dispose() {},
|
|
1145
1142
|
onCaseEnd(_payload) {},
|
|
1146
|
-
|
|
1143
|
+
onCaseStart(_payload) {},
|
|
1147
1144
|
onRunEnd(_payload) {},
|
|
1148
|
-
|
|
1145
|
+
onRunStart(_payload) {},
|
|
1146
|
+
onTaskEnd(_payload) {},
|
|
1147
|
+
onTaskQueued(_payload) {},
|
|
1148
|
+
onTaskStart(_payload) {}
|
|
1149
1149
|
};
|
|
1150
1150
|
}
|
|
1151
1151
|
//#endregion
|
|
@@ -1154,72 +1154,91 @@ const POINTER = "❯";
|
|
|
1154
1154
|
const TREE_NODE_END = "└";
|
|
1155
1155
|
const TREE_NODE_MIDDLE = "├";
|
|
1156
1156
|
var SummaryReporterStateMachine = class {
|
|
1157
|
-
options;
|
|
1158
|
-
taskCounters = createCounterState();
|
|
1159
1157
|
caseCounters = createCounterState();
|
|
1160
|
-
|
|
1158
|
+
options;
|
|
1161
1159
|
queueOrderCounter = 0;
|
|
1162
1160
|
startedAtMs = 0;
|
|
1163
1161
|
startTime = "";
|
|
1162
|
+
taskCounters = createCounterState();
|
|
1163
|
+
tasks = /* @__PURE__ */ new Map();
|
|
1164
1164
|
constructor(options) {
|
|
1165
1165
|
this.options = options;
|
|
1166
1166
|
}
|
|
1167
1167
|
/**
|
|
1168
|
-
*
|
|
1168
|
+
* Releases reporter resources.
|
|
1169
1169
|
*
|
|
1170
1170
|
* Use when:
|
|
1171
|
-
* -
|
|
1171
|
+
* - CLI cleanup runs from a `finally` block
|
|
1172
1172
|
*
|
|
1173
1173
|
* Expects:
|
|
1174
|
-
* -
|
|
1174
|
+
* - repeated calls are safe
|
|
1175
1175
|
*
|
|
1176
1176
|
* Returns:
|
|
1177
1177
|
* - no direct value
|
|
1178
1178
|
*/
|
|
1179
|
-
|
|
1180
|
-
this.tasks.clear();
|
|
1181
|
-
this.queueOrderCounter = 0;
|
|
1182
|
-
resetCounterState(this.taskCounters, payload.totalTasks);
|
|
1183
|
-
resetCounterState(this.caseCounters, 0);
|
|
1184
|
-
this.startedAtMs = this.options.getNow();
|
|
1185
|
-
this.startTime = formatTimeString(new Date(this.options.getWallClockNow()));
|
|
1186
|
-
}
|
|
1179
|
+
dispose() {}
|
|
1187
1180
|
/**
|
|
1188
|
-
*
|
|
1181
|
+
* Builds the current live summary window rows.
|
|
1189
1182
|
*
|
|
1190
1183
|
* Use when:
|
|
1191
|
-
* -
|
|
1184
|
+
* - the live reporter or tests need a snapshot of the active window
|
|
1192
1185
|
*
|
|
1193
1186
|
* Expects:
|
|
1194
|
-
* - `
|
|
1187
|
+
* - `maxRows`, when present, keeps footer rows visible
|
|
1195
1188
|
*
|
|
1196
1189
|
* Returns:
|
|
1197
|
-
* -
|
|
1190
|
+
* - terminal rows in display order
|
|
1198
1191
|
*/
|
|
1199
|
-
|
|
1200
|
-
const
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1192
|
+
getWindowRows(options) {
|
|
1193
|
+
const activeRows = this.createActiveRows();
|
|
1194
|
+
const footerRows = this.createFooterRows();
|
|
1195
|
+
const maxRows = options?.maxRows;
|
|
1196
|
+
const footerBlock = [...footerRows, ""];
|
|
1197
|
+
if (maxRows == null || maxRows <= 0) return [...[
|
|
1198
|
+
"",
|
|
1199
|
+
...activeRows,
|
|
1200
|
+
...activeRows.length > 0 ? [""] : []
|
|
1201
|
+
], ...footerBlock];
|
|
1202
|
+
if (maxRows <= footerBlock.length) return footerBlock.slice(-maxRows);
|
|
1203
|
+
return [...createBoundedActiveBlock(activeRows, Math.max(0, maxRows - footerBlock.length)), ...footerBlock];
|
|
1204
|
+
}
|
|
1205
|
+
/**
|
|
1206
|
+
* Handles case completion.
|
|
1207
|
+
*
|
|
1208
|
+
* Use when:
|
|
1209
|
+
* - a running case settles and counters must advance
|
|
1211
1210
|
*
|
|
1212
1211
|
* Expects:
|
|
1213
|
-
* -
|
|
1212
|
+
* - duplicate completion for the same `caseId` is ignored
|
|
1214
1213
|
*
|
|
1215
1214
|
* Returns:
|
|
1216
1215
|
* - no direct value
|
|
1217
1216
|
*/
|
|
1218
|
-
|
|
1217
|
+
onCaseEnd(payload) {
|
|
1219
1218
|
const task = this.getOrCreateTaskState(payload.taskId);
|
|
1220
1219
|
if (task.state === "finished") return;
|
|
1221
|
-
task.
|
|
1222
|
-
|
|
1220
|
+
if (task.settledCaseIds.has(payload.caseId)) {
|
|
1221
|
+
task.runningCases.delete(payload.caseId);
|
|
1222
|
+
return;
|
|
1223
|
+
}
|
|
1224
|
+
task.settledCaseIds.add(payload.caseId);
|
|
1225
|
+
task.runningCases.delete(payload.caseId);
|
|
1226
|
+
task.completedCases += 1;
|
|
1227
|
+
this.syncTaskTotalCases(task);
|
|
1228
|
+
this.caseCounters.completed += 1;
|
|
1229
|
+
if (payload.state === "passed") {
|
|
1230
|
+
this.caseCounters.passed += 1;
|
|
1231
|
+
return;
|
|
1232
|
+
}
|
|
1233
|
+
if (payload.state === "failed") {
|
|
1234
|
+
this.caseCounters.failed += 1;
|
|
1235
|
+
return;
|
|
1236
|
+
}
|
|
1237
|
+
if (payload.state === "timeout") {
|
|
1238
|
+
this.caseCounters.timeout += 1;
|
|
1239
|
+
return;
|
|
1240
|
+
}
|
|
1241
|
+
this.caseCounters.skipped += 1;
|
|
1223
1242
|
}
|
|
1224
1243
|
/**
|
|
1225
1244
|
* Handles case start events.
|
|
@@ -1258,42 +1277,43 @@ var SummaryReporterStateMachine = class {
|
|
|
1258
1277
|
this.syncTaskTotalCases(task);
|
|
1259
1278
|
}
|
|
1260
1279
|
/**
|
|
1261
|
-
* Handles
|
|
1280
|
+
* Handles run completion.
|
|
1262
1281
|
*
|
|
1263
1282
|
* Use when:
|
|
1264
|
-
* -
|
|
1283
|
+
* - the caller has final task totals and wants the footer normalized
|
|
1265
1284
|
*
|
|
1266
1285
|
* Expects:
|
|
1267
|
-
* -
|
|
1286
|
+
* - payload counters are final terminal task totals
|
|
1268
1287
|
*
|
|
1269
1288
|
* Returns:
|
|
1270
1289
|
* - no direct value
|
|
1271
1290
|
*/
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
this.
|
|
1291
|
+
onRunEnd(payload) {
|
|
1292
|
+
this.taskCounters.total = payload.totalTasks;
|
|
1293
|
+
this.taskCounters.passed = payload.passedTasks;
|
|
1294
|
+
this.taskCounters.failed = payload.failedTasks;
|
|
1295
|
+
this.taskCounters.skipped = payload.skippedTasks;
|
|
1296
|
+
this.taskCounters.completed = payload.passedTasks + payload.failedTasks + payload.skippedTasks;
|
|
1297
|
+
}
|
|
1298
|
+
/**
|
|
1299
|
+
* Handles run startup.
|
|
1300
|
+
*
|
|
1301
|
+
* Use when:
|
|
1302
|
+
* - a new CLI run is starting and the summary state must reset
|
|
1303
|
+
*
|
|
1304
|
+
* Expects:
|
|
1305
|
+
* - `totalTasks` matches the scheduled task count for the run
|
|
1306
|
+
*
|
|
1307
|
+
* Returns:
|
|
1308
|
+
* - no direct value
|
|
1309
|
+
*/
|
|
1310
|
+
onRunStart(payload) {
|
|
1311
|
+
this.tasks.clear();
|
|
1312
|
+
this.queueOrderCounter = 0;
|
|
1313
|
+
resetCounterState(this.taskCounters, payload.totalTasks);
|
|
1314
|
+
resetCounterState(this.caseCounters, 0);
|
|
1315
|
+
this.startedAtMs = this.options.getNow();
|
|
1316
|
+
this.startTime = formatTimeString(new Date(this.options.getWallClockNow()));
|
|
1297
1317
|
}
|
|
1298
1318
|
/**
|
|
1299
1319
|
* Handles task completion.
|
|
@@ -1326,61 +1346,41 @@ var SummaryReporterStateMachine = class {
|
|
|
1326
1346
|
this.taskCounters.skipped += 1;
|
|
1327
1347
|
}
|
|
1328
1348
|
/**
|
|
1329
|
-
* Handles
|
|
1349
|
+
* Handles task queue events.
|
|
1330
1350
|
*
|
|
1331
1351
|
* Use when:
|
|
1332
|
-
* -
|
|
1352
|
+
* - a scheduled task becomes visible in the live summary before it starts
|
|
1333
1353
|
*
|
|
1334
1354
|
* Expects:
|
|
1335
|
-
* -
|
|
1355
|
+
* - `taskId` is stable across later lifecycle events
|
|
1336
1356
|
*
|
|
1337
1357
|
* Returns:
|
|
1338
1358
|
* - no direct value
|
|
1339
1359
|
*/
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
this.
|
|
1360
|
+
onTaskQueued(payload) {
|
|
1361
|
+
const task = this.getOrCreateTaskState(payload.taskId);
|
|
1362
|
+
if (task.state === "finished") return;
|
|
1363
|
+
task.displayName = payload.displayName ?? task.displayName;
|
|
1364
|
+
task.projectName = payload.projectName ?? task.projectName;
|
|
1365
|
+
this.syncTaskTotalCases(task, payload.totalCases);
|
|
1346
1366
|
}
|
|
1347
1367
|
/**
|
|
1348
|
-
*
|
|
1368
|
+
* Handles task start events.
|
|
1349
1369
|
*
|
|
1350
1370
|
* Use when:
|
|
1351
|
-
* -
|
|
1371
|
+
* - a queued task begins executing
|
|
1352
1372
|
*
|
|
1353
1373
|
* Expects:
|
|
1354
|
-
* -
|
|
1374
|
+
* - the task was previously queued or can be synthesized from its identifier
|
|
1355
1375
|
*
|
|
1356
1376
|
* Returns:
|
|
1357
1377
|
* - no direct value
|
|
1358
1378
|
*/
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
* - the live reporter or tests need a snapshot of the active window
|
|
1365
|
-
*
|
|
1366
|
-
* Expects:
|
|
1367
|
-
* - `maxRows`, when present, keeps footer rows visible
|
|
1368
|
-
*
|
|
1369
|
-
* Returns:
|
|
1370
|
-
* - terminal rows in display order
|
|
1371
|
-
*/
|
|
1372
|
-
getWindowRows(options) {
|
|
1373
|
-
const activeRows = this.createActiveRows();
|
|
1374
|
-
const footerRows = this.createFooterRows();
|
|
1375
|
-
const maxRows = options?.maxRows;
|
|
1376
|
-
const footerBlock = [...footerRows, ""];
|
|
1377
|
-
if (maxRows == null || maxRows <= 0) return [...[
|
|
1378
|
-
"",
|
|
1379
|
-
...activeRows,
|
|
1380
|
-
...activeRows.length > 0 ? [""] : []
|
|
1381
|
-
], ...footerBlock];
|
|
1382
|
-
if (maxRows <= footerBlock.length) return footerBlock.slice(-maxRows);
|
|
1383
|
-
return [...createBoundedActiveBlock(activeRows, Math.max(0, maxRows - footerBlock.length)), ...footerBlock];
|
|
1379
|
+
onTaskStart(payload) {
|
|
1380
|
+
const task = this.getOrCreateTaskState(payload.taskId);
|
|
1381
|
+
if (task.state === "finished") return;
|
|
1382
|
+
task.state = "running";
|
|
1383
|
+
task.startedAt ??= this.options.getNow();
|
|
1384
1384
|
}
|
|
1385
1385
|
createActiveRows() {
|
|
1386
1386
|
const activeTasks = Array.from(this.tasks.values()).filter((task) => task.state !== "finished").sort(compareActiveTasks);
|
|
@@ -1449,6 +1449,49 @@ var SummaryReporterStateMachine = class {
|
|
|
1449
1449
|
}
|
|
1450
1450
|
};
|
|
1451
1451
|
/**
|
|
1452
|
+
* Creates the live summary reporter state machine for `vieval` CLI runs.
|
|
1453
|
+
*
|
|
1454
|
+
* Use when:
|
|
1455
|
+
* - the CLI wants Vitest-style active rows and live counters
|
|
1456
|
+
* - tests need a deterministic reporter surface without touching the terminal
|
|
1457
|
+
*
|
|
1458
|
+
* Expects:
|
|
1459
|
+
* - queue/start/end events describe task lifecycle in order
|
|
1460
|
+
* - `getNow()` remains monotonic within one run
|
|
1461
|
+
* - `getWallClockNow()` returns the wall-clock run start timestamp
|
|
1462
|
+
*
|
|
1463
|
+
* Returns:
|
|
1464
|
+
* - a reporter compatible with the base CLI lifecycle plus `getWindowRows()`
|
|
1465
|
+
*
|
|
1466
|
+
* Call stack:
|
|
1467
|
+
*
|
|
1468
|
+
* {@link createSummaryReporter}
|
|
1469
|
+
* -> {@link SummaryReporterStateMachine.onTaskQueued}
|
|
1470
|
+
* -> {@link SummaryReporterStateMachine.onCaseStart}
|
|
1471
|
+
* -> {@link SummaryReporterStateMachine.getWindowRows}
|
|
1472
|
+
*/
|
|
1473
|
+
function createSummaryReporter(options) {
|
|
1474
|
+
return new SummaryReporterStateMachine(options);
|
|
1475
|
+
}
|
|
1476
|
+
function compareActiveTasks(left, right) {
|
|
1477
|
+
const leftProject = left.projectName ?? "";
|
|
1478
|
+
const rightProject = right.projectName ?? "";
|
|
1479
|
+
if (leftProject !== rightProject) return leftProject.localeCompare(rightProject);
|
|
1480
|
+
const displayNameOrder = left.displayName.localeCompare(right.displayName);
|
|
1481
|
+
if (displayNameOrder !== 0) return displayNameOrder;
|
|
1482
|
+
return left.queueOrder - right.queueOrder;
|
|
1483
|
+
}
|
|
1484
|
+
function countRunningCases(tasks) {
|
|
1485
|
+
let runningCount = 0;
|
|
1486
|
+
for (const task of tasks) runningCount += task.runningCases.size;
|
|
1487
|
+
return runningCount;
|
|
1488
|
+
}
|
|
1489
|
+
function countRunningTasks(tasks) {
|
|
1490
|
+
let runningCount = 0;
|
|
1491
|
+
for (const task of tasks) if (task.state === "running") runningCount += 1;
|
|
1492
|
+
return runningCount;
|
|
1493
|
+
}
|
|
1494
|
+
/**
|
|
1452
1495
|
* Creates the active task block while keeping room for summary footer rows.
|
|
1453
1496
|
*
|
|
1454
1497
|
* Use when:
|
|
@@ -1481,31 +1524,6 @@ function createBoundedActiveBlock(activeRows, maxRows) {
|
|
|
1481
1524
|
c.dim(` ${TREE_NODE_END} ... ${hiddenRows} more running rows hidden`)
|
|
1482
1525
|
];
|
|
1483
1526
|
}
|
|
1484
|
-
/**
|
|
1485
|
-
* Creates the live summary reporter state machine for `vieval` CLI runs.
|
|
1486
|
-
*
|
|
1487
|
-
* Use when:
|
|
1488
|
-
* - the CLI wants Vitest-style active rows and live counters
|
|
1489
|
-
* - tests need a deterministic reporter surface without touching the terminal
|
|
1490
|
-
*
|
|
1491
|
-
* Expects:
|
|
1492
|
-
* - queue/start/end events describe task lifecycle in order
|
|
1493
|
-
* - `getNow()` remains monotonic within one run
|
|
1494
|
-
* - `getWallClockNow()` returns the wall-clock run start timestamp
|
|
1495
|
-
*
|
|
1496
|
-
* Returns:
|
|
1497
|
-
* - a reporter compatible with the base CLI lifecycle plus `getWindowRows()`
|
|
1498
|
-
*
|
|
1499
|
-
* Call stack:
|
|
1500
|
-
*
|
|
1501
|
-
* {@link createSummaryReporter}
|
|
1502
|
-
* -> {@link SummaryReporterStateMachine.onTaskQueued}
|
|
1503
|
-
* -> {@link SummaryReporterStateMachine.onCaseStart}
|
|
1504
|
-
* -> {@link SummaryReporterStateMachine.getWindowRows}
|
|
1505
|
-
*/
|
|
1506
|
-
function createSummaryReporter(options) {
|
|
1507
|
-
return new SummaryReporterStateMachine(options);
|
|
1508
|
-
}
|
|
1509
1527
|
function createCounterState() {
|
|
1510
1528
|
return {
|
|
1511
1529
|
completed: 0,
|
|
@@ -1516,29 +1534,17 @@ function createCounterState() {
|
|
|
1516
1534
|
total: 0
|
|
1517
1535
|
};
|
|
1518
1536
|
}
|
|
1519
|
-
function
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
counter.passed = 0;
|
|
1523
|
-
counter.skipped = 0;
|
|
1524
|
-
counter.timeout = 0;
|
|
1525
|
-
counter.total = total;
|
|
1526
|
-
}
|
|
1527
|
-
function sumTaskCaseTotals(tasks) {
|
|
1528
|
-
let total = 0;
|
|
1529
|
-
for (const task of tasks) total += task.totalCases;
|
|
1530
|
-
return total;
|
|
1537
|
+
function estimateTaskDurationMs(task, now) {
|
|
1538
|
+
if (task.startedAt == null) return;
|
|
1539
|
+
return estimateTotalDurationMs(task.completedCases, task.totalCases, Math.max(0, now - task.startedAt));
|
|
1531
1540
|
}
|
|
1532
|
-
function
|
|
1533
|
-
|
|
1534
|
-
const
|
|
1535
|
-
|
|
1536
|
-
const displayNameOrder = left.displayName.localeCompare(right.displayName);
|
|
1537
|
-
if (displayNameOrder !== 0) return displayNameOrder;
|
|
1538
|
-
return left.queueOrder - right.queueOrder;
|
|
1541
|
+
function estimateTotalDurationMs(completedCount, totalCount, elapsedDurationMs) {
|
|
1542
|
+
if (completedCount === 0 || totalCount === 0) return;
|
|
1543
|
+
const averageDurationMs = elapsedDurationMs / completedCount;
|
|
1544
|
+
return Math.round(averageDurationMs * totalCount);
|
|
1539
1545
|
}
|
|
1540
|
-
function
|
|
1541
|
-
return `${c.dim(
|
|
1546
|
+
function formatActiveConcurrencyState(options) {
|
|
1547
|
+
return [options.taskRunningCount > 0 ? c.bold(c.yellow(`${options.taskRunningCount} ${pluralize("task", options.taskRunningCount)} running`)) : c.dim("0 tasks running"), options.caseRunningCount > 0 ? c.bold(c.yellow(`${options.caseRunningCount} ${pluralize("case", options.caseRunningCount)} running`)) : c.dim("0 cases running")].join(c.dim(" | "));
|
|
1542
1548
|
}
|
|
1543
1549
|
function formatCounterState(counter, runningCount, timing) {
|
|
1544
1550
|
const plannedCount = Math.max(0, counter.total - counter.completed - runningCount);
|
|
@@ -1551,19 +1557,6 @@ function formatCounterState(counter, runningCount, timing) {
|
|
|
1551
1557
|
counter.skipped > 0 ? c.yellow(`${counter.skipped} skipped`) : c.dim(`${counter.skipped} skipped`)
|
|
1552
1558
|
].join(c.dim(" | ")) + c.gray(` (${counter.total})`) + formatTimingSuffix(timing);
|
|
1553
1559
|
}
|
|
1554
|
-
function formatActiveConcurrencyState(options) {
|
|
1555
|
-
return [options.taskRunningCount > 0 ? c.bold(c.yellow(`${options.taskRunningCount} ${pluralize("task", options.taskRunningCount)} running`)) : c.dim("0 tasks running"), options.caseRunningCount > 0 ? c.bold(c.yellow(`${options.caseRunningCount} ${pluralize("case", options.caseRunningCount)} running`)) : c.dim("0 cases running")].join(c.dim(" | "));
|
|
1556
|
-
}
|
|
1557
|
-
function pluralize(noun, count) {
|
|
1558
|
-
return count === 1 ? noun : `${noun}s`;
|
|
1559
|
-
}
|
|
1560
|
-
function formatRetrySuffix(activeCase) {
|
|
1561
|
-
if (activeCase.retryIndex == null || activeCase.retryIndex <= 0 || activeCase.autoRetry == null || activeCase.autoRetry <= 0) return "";
|
|
1562
|
-
return c.dim(` retry ${activeCase.retryIndex}/${activeCase.autoRetry}`);
|
|
1563
|
-
}
|
|
1564
|
-
function formatTimeString(date) {
|
|
1565
|
-
return date.toTimeString().split(" ")[0] ?? "";
|
|
1566
|
-
}
|
|
1567
1560
|
function formatDuration$2(durationMs) {
|
|
1568
1561
|
return formatHumanDuration(durationMs);
|
|
1569
1562
|
}
|
|
@@ -1595,24 +1588,9 @@ function formatProjectBadge(projectName, isTTY) {
|
|
|
1595
1588
|
const background = backgroundPool[projectName.split("").reduce((accumulator, character, index) => accumulator + character.charCodeAt(0) + index, 0) % backgroundPool.length];
|
|
1596
1589
|
return `${c.black(background(` ${projectName} `))} `;
|
|
1597
1590
|
}
|
|
1598
|
-
function
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
return runningCount;
|
|
1602
|
-
}
|
|
1603
|
-
function countRunningTasks(tasks) {
|
|
1604
|
-
let runningCount = 0;
|
|
1605
|
-
for (const task of tasks) if (task.state === "running") runningCount += 1;
|
|
1606
|
-
return runningCount;
|
|
1607
|
-
}
|
|
1608
|
-
function estimateTaskDurationMs(task, now) {
|
|
1609
|
-
if (task.startedAt == null) return;
|
|
1610
|
-
return estimateTotalDurationMs(task.completedCases, task.totalCases, Math.max(0, now - task.startedAt));
|
|
1611
|
-
}
|
|
1612
|
-
function estimateTotalDurationMs(completedCount, totalCount, elapsedDurationMs) {
|
|
1613
|
-
if (completedCount === 0 || totalCount === 0) return;
|
|
1614
|
-
const averageDurationMs = elapsedDurationMs / completedCount;
|
|
1615
|
-
return Math.round(averageDurationMs * totalCount);
|
|
1591
|
+
function formatRetrySuffix(activeCase) {
|
|
1592
|
+
if (activeCase.retryIndex == null || activeCase.retryIndex <= 0 || activeCase.autoRetry == null || activeCase.autoRetry <= 0) return "";
|
|
1593
|
+
return c.dim(` retry ${activeCase.retryIndex}/${activeCase.autoRetry}`);
|
|
1616
1594
|
}
|
|
1617
1595
|
function formatTaskProgressSuffix(task, now) {
|
|
1618
1596
|
const elapsedDurationMs = task.startedAt == null ? 0 : Math.max(0, now - task.startedAt);
|
|
@@ -1621,11 +1599,33 @@ function formatTaskProgressSuffix(task, now) {
|
|
|
1621
1599
|
estimatedDurationMs: estimateTaskDurationMs(task, now)
|
|
1622
1600
|
})}`;
|
|
1623
1601
|
}
|
|
1602
|
+
function formatTimeString(date) {
|
|
1603
|
+
return date.toTimeString().split(" ")[0] ?? "";
|
|
1604
|
+
}
|
|
1624
1605
|
function formatTimingSuffix(timing) {
|
|
1625
1606
|
const parts = [`elapsed ${formatHumanDuration(timing.elapsedDurationMs)}`];
|
|
1626
1607
|
if (timing.estimatedDurationMs != null) parts.push(`estimated ${formatHumanDuration(timing.estimatedDurationMs)}`);
|
|
1627
1608
|
return ` (${parts.join(", ")})`;
|
|
1628
1609
|
}
|
|
1610
|
+
function padSummaryTitle(label) {
|
|
1611
|
+
return `${c.dim(label.padEnd(8))} `;
|
|
1612
|
+
}
|
|
1613
|
+
function pluralize(noun, count) {
|
|
1614
|
+
return count === 1 ? noun : `${noun}s`;
|
|
1615
|
+
}
|
|
1616
|
+
function resetCounterState(counter, total) {
|
|
1617
|
+
counter.completed = 0;
|
|
1618
|
+
counter.failed = 0;
|
|
1619
|
+
counter.passed = 0;
|
|
1620
|
+
counter.skipped = 0;
|
|
1621
|
+
counter.timeout = 0;
|
|
1622
|
+
counter.total = total;
|
|
1623
|
+
}
|
|
1624
|
+
function sumTaskCaseTotals(tasks) {
|
|
1625
|
+
let total = 0;
|
|
1626
|
+
for (const task of tasks) total += task.totalCases;
|
|
1627
|
+
return total;
|
|
1628
|
+
}
|
|
1629
1629
|
//#endregion
|
|
1630
1630
|
//#region src/cli/reporters/index.ts
|
|
1631
1631
|
/**
|
|
@@ -1676,14 +1676,14 @@ const SYNC_END = `${ESC}?2026l`;
|
|
|
1676
1676
|
* -> {@link WindowRenderer.renderWindow}
|
|
1677
1677
|
*/
|
|
1678
1678
|
var WindowRenderer = class {
|
|
1679
|
+
bufferedOutput = "";
|
|
1680
|
+
finished = false;
|
|
1679
1681
|
options;
|
|
1680
1682
|
renderInterval;
|
|
1681
1683
|
renderScheduled = false;
|
|
1682
1684
|
renderScheduleVersion = 0;
|
|
1683
|
-
windowHeight = 0;
|
|
1684
1685
|
started = false;
|
|
1685
|
-
|
|
1686
|
-
bufferedOutput = "";
|
|
1686
|
+
windowHeight = 0;
|
|
1687
1687
|
constructor(options) {
|
|
1688
1688
|
if (options.createInterval && options.clearInterval) {
|
|
1689
1689
|
this.options = {
|
|
@@ -1714,26 +1714,41 @@ var WindowRenderer = class {
|
|
|
1714
1714
|
};
|
|
1715
1715
|
}
|
|
1716
1716
|
/**
|
|
1717
|
-
*
|
|
1717
|
+
* Stops the renderer and clears any visible window state.
|
|
1718
1718
|
*
|
|
1719
1719
|
* Use when:
|
|
1720
|
-
* -
|
|
1720
|
+
* - cleanup needs to happen from a `finally` block or interrupted run
|
|
1721
1721
|
*
|
|
1722
1722
|
* Expects:
|
|
1723
|
-
* -
|
|
1723
|
+
* - callers may invoke it more than once
|
|
1724
1724
|
*
|
|
1725
1725
|
* Returns:
|
|
1726
1726
|
* - no direct value
|
|
1727
1727
|
*/
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1728
|
+
dispose() {
|
|
1729
|
+
this.finish();
|
|
1730
|
+
}
|
|
1731
|
+
/**
|
|
1732
|
+
* Clears the rendered window and stops the refresh loop.
|
|
1733
|
+
*
|
|
1734
|
+
* Use when:
|
|
1735
|
+
* - the live reporter is transitioning to final static output
|
|
1736
|
+
*
|
|
1737
|
+
* Expects:
|
|
1738
|
+
* - repeated calls are safe
|
|
1739
|
+
*
|
|
1740
|
+
* Returns:
|
|
1741
|
+
* - no direct value
|
|
1742
|
+
*/
|
|
1743
|
+
finish() {
|
|
1744
|
+
if (this.finished) return;
|
|
1745
|
+
this.finished = true;
|
|
1746
|
+
this.started = false;
|
|
1732
1747
|
this.renderScheduleVersion += 1;
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1748
|
+
this.renderScheduled = false;
|
|
1749
|
+
this.stopInterval();
|
|
1750
|
+
this.clearWindow();
|
|
1751
|
+
this.flushBufferedOutput();
|
|
1737
1752
|
}
|
|
1738
1753
|
/**
|
|
1739
1754
|
* Queues a render if one is not already in flight.
|
|
@@ -1758,41 +1773,26 @@ var WindowRenderer = class {
|
|
|
1758
1773
|
});
|
|
1759
1774
|
}
|
|
1760
1775
|
/**
|
|
1761
|
-
*
|
|
1776
|
+
* Starts the periodic refresh loop.
|
|
1762
1777
|
*
|
|
1763
1778
|
* Use when:
|
|
1764
|
-
* - the live reporter is
|
|
1779
|
+
* - the live reporter is about to emit in-place updates
|
|
1765
1780
|
*
|
|
1766
1781
|
* Expects:
|
|
1767
|
-
* - repeated calls are
|
|
1782
|
+
* - repeated calls are harmless and keep the existing timer
|
|
1768
1783
|
*
|
|
1769
1784
|
* Returns:
|
|
1770
1785
|
* - no direct value
|
|
1771
1786
|
*/
|
|
1772
|
-
|
|
1773
|
-
if (this.finished) return;
|
|
1774
|
-
this.
|
|
1775
|
-
this.
|
|
1787
|
+
start() {
|
|
1788
|
+
if (this.started && !this.finished) return;
|
|
1789
|
+
this.started = true;
|
|
1790
|
+
this.finished = false;
|
|
1776
1791
|
this.renderScheduleVersion += 1;
|
|
1777
|
-
this.
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
}
|
|
1782
|
-
/**
|
|
1783
|
-
* Stops the renderer and clears any visible window state.
|
|
1784
|
-
*
|
|
1785
|
-
* Use when:
|
|
1786
|
-
* - cleanup needs to happen from a `finally` block or interrupted run
|
|
1787
|
-
*
|
|
1788
|
-
* Expects:
|
|
1789
|
-
* - callers may invoke it more than once
|
|
1790
|
-
*
|
|
1791
|
-
* Returns:
|
|
1792
|
-
* - no direct value
|
|
1793
|
-
*/
|
|
1794
|
-
dispose() {
|
|
1795
|
-
this.finish();
|
|
1792
|
+
if (!this.renderInterval) {
|
|
1793
|
+
this.renderInterval = this.options.createInterval(() => this.schedule(), this.options.intervalMs);
|
|
1794
|
+
this.renderInterval.unref?.();
|
|
1795
|
+
}
|
|
1796
1796
|
}
|
|
1797
1797
|
/**
|
|
1798
1798
|
* Alias for disposal to match Vitest's renderer lifecycle naming.
|
|
@@ -1830,6 +1830,20 @@ var WindowRenderer = class {
|
|
|
1830
1830
|
}
|
|
1831
1831
|
this.bufferedOutput += message;
|
|
1832
1832
|
}
|
|
1833
|
+
clearWindow() {
|
|
1834
|
+
if (!this.options.supportsAnsiWindowing || this.windowHeight === 0) return;
|
|
1835
|
+
this.writeOutput(`${CARRIAGE_RETURN}${CLEAR_LINE}`);
|
|
1836
|
+
for (let rowIndex = 1; rowIndex < this.windowHeight; rowIndex += 1) this.writeOutput(`${CARRIAGE_RETURN}${MOVE_CURSOR_ONE_ROW_UP}${CLEAR_LINE}`);
|
|
1837
|
+
this.windowHeight = 0;
|
|
1838
|
+
}
|
|
1839
|
+
flushBufferedOutput() {
|
|
1840
|
+
if (this.bufferedOutput.length === 0) return;
|
|
1841
|
+
this.writeOutput(this.bufferedOutput);
|
|
1842
|
+
this.bufferedOutput = "";
|
|
1843
|
+
}
|
|
1844
|
+
isActiveWindowMode() {
|
|
1845
|
+
return this.started && !this.finished && this.options.supportsAnsiWindowing;
|
|
1846
|
+
}
|
|
1833
1847
|
renderWindow() {
|
|
1834
1848
|
const windowContent = this.options.getWindow();
|
|
1835
1849
|
const rowCount = getRenderedRowCount(windowContent, this.options.getColumns());
|
|
@@ -1847,12 +1861,6 @@ var WindowRenderer = class {
|
|
|
1847
1861
|
this.writeOutput("\n");
|
|
1848
1862
|
this.windowHeight = 0;
|
|
1849
1863
|
}
|
|
1850
|
-
clearWindow() {
|
|
1851
|
-
if (!this.options.supportsAnsiWindowing || this.windowHeight === 0) return;
|
|
1852
|
-
this.writeOutput(`${CARRIAGE_RETURN}${CLEAR_LINE}`);
|
|
1853
|
-
for (let rowIndex = 1; rowIndex < this.windowHeight; rowIndex += 1) this.writeOutput(`${CARRIAGE_RETURN}${MOVE_CURSOR_ONE_ROW_UP}${CLEAR_LINE}`);
|
|
1854
|
-
this.windowHeight = 0;
|
|
1855
|
-
}
|
|
1856
1864
|
stopInterval() {
|
|
1857
1865
|
if (!this.renderInterval) return;
|
|
1858
1866
|
this.renderInterval.clear();
|
|
@@ -1861,14 +1869,6 @@ var WindowRenderer = class {
|
|
|
1861
1869
|
writeOutput(message) {
|
|
1862
1870
|
this.options.writeOutput(message);
|
|
1863
1871
|
}
|
|
1864
|
-
flushBufferedOutput() {
|
|
1865
|
-
if (this.bufferedOutput.length === 0) return;
|
|
1866
|
-
this.writeOutput(this.bufferedOutput);
|
|
1867
|
-
this.bufferedOutput = "";
|
|
1868
|
-
}
|
|
1869
|
-
isActiveWindowMode() {
|
|
1870
|
-
return this.started && !this.finished && this.options.supportsAnsiWindowing;
|
|
1871
|
-
}
|
|
1872
1872
|
};
|
|
1873
1873
|
function defaultCreateInterval(callback, intervalMs) {
|
|
1874
1874
|
const timer = globalThis.setInterval(callback, intervalMs);
|
|
@@ -1895,40 +1895,6 @@ function getTextDisplayWidth(text) {
|
|
|
1895
1895
|
}
|
|
1896
1896
|
//#endregion
|
|
1897
1897
|
//#region src/cli/reporters/vitest-compat-reporter.ts
|
|
1898
|
-
function isReporterReferenceTuple(reference) {
|
|
1899
|
-
return Array.isArray(reference);
|
|
1900
|
-
}
|
|
1901
|
-
function isAbsoluteLikePath(value) {
|
|
1902
|
-
return value.startsWith("/") || value.startsWith("./") || value.startsWith("../") || /^[A-Z]:[\\/]/i.test(value);
|
|
1903
|
-
}
|
|
1904
|
-
async function loadReporterModule(path) {
|
|
1905
|
-
if (isAbsoluteLikePath(path)) return import(pathToFileURL(path).href);
|
|
1906
|
-
return import(path);
|
|
1907
|
-
}
|
|
1908
|
-
function normalizeReporterReference(reference) {
|
|
1909
|
-
if (isReporterReferenceTuple(reference)) return {
|
|
1910
|
-
options: reference[1],
|
|
1911
|
-
value: reference[0]
|
|
1912
|
-
};
|
|
1913
|
-
return {
|
|
1914
|
-
options: void 0,
|
|
1915
|
-
value: reference
|
|
1916
|
-
};
|
|
1917
|
-
}
|
|
1918
|
-
function createReporterInstance(moduleValue, options) {
|
|
1919
|
-
const value = moduleValue.default ?? moduleValue;
|
|
1920
|
-
if (value == null) return null;
|
|
1921
|
-
if (typeof value === "function") return new value(options);
|
|
1922
|
-
if (typeof value === "object") return value;
|
|
1923
|
-
return null;
|
|
1924
|
-
}
|
|
1925
|
-
async function emitToReporters(reporters, callback) {
|
|
1926
|
-
await Promise.all(reporters.map(async (reporter) => {
|
|
1927
|
-
try {
|
|
1928
|
-
await callback(reporter);
|
|
1929
|
-
} catch {}
|
|
1930
|
-
}));
|
|
1931
|
-
}
|
|
1932
1898
|
/**
|
|
1933
1899
|
* Creates a project-level vitest-compatible reporter bridge.
|
|
1934
1900
|
*
|
|
@@ -2020,82 +1986,337 @@ async function createVievalVitestCompatReporterBridge(options) {
|
|
|
2020
1986
|
}
|
|
2021
1987
|
};
|
|
2022
1988
|
}
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
|
|
2029
|
-
return output.projects.some((project) => {
|
|
2030
|
-
if (project.errorMessage != null) return true;
|
|
2031
|
-
if (project.caseSummary != null && (project.caseSummary.failed > 0 || project.caseSummary.timeout > 0)) return true;
|
|
2032
|
-
return (project.caseFailures?.length ?? 0) > 0;
|
|
2033
|
-
});
|
|
2034
|
-
}
|
|
2035
|
-
function resolveCappedConcurrency(defaultConcurrency, cliConcurrency, fallback) {
|
|
2036
|
-
const effectiveDefault = defaultConcurrency ?? fallback;
|
|
2037
|
-
if (cliConcurrency == null) return effectiveDefault;
|
|
2038
|
-
return Math.min(effectiveDefault, cliConcurrency);
|
|
2039
|
-
}
|
|
2040
|
-
function resolveOptionalRuntimeTaskConcurrency(defaultConcurrency, cliConcurrency) {
|
|
2041
|
-
return cliConcurrency ?? defaultConcurrency;
|
|
1989
|
+
function createReporterInstance(moduleValue, options) {
|
|
1990
|
+
const value = moduleValue.default ?? moduleValue;
|
|
1991
|
+
if (value == null) return null;
|
|
1992
|
+
if (typeof value === "function") return new value(options);
|
|
1993
|
+
if (typeof value === "object") return value;
|
|
1994
|
+
return null;
|
|
2042
1995
|
}
|
|
2043
|
-
function
|
|
2044
|
-
|
|
1996
|
+
async function emitToReporters(reporters, callback) {
|
|
1997
|
+
await Promise.all(reporters.map(async (reporter) => {
|
|
1998
|
+
try {
|
|
1999
|
+
await callback(reporter);
|
|
2000
|
+
} catch {}
|
|
2001
|
+
}));
|
|
2045
2002
|
}
|
|
2046
|
-
function
|
|
2047
|
-
return
|
|
2003
|
+
function isAbsoluteLikePath(value) {
|
|
2004
|
+
return value.startsWith("/") || value.startsWith("./") || value.startsWith("../") || /^[A-Z]:[\\/]/i.test(value);
|
|
2048
2005
|
}
|
|
2049
|
-
function
|
|
2050
|
-
return
|
|
2006
|
+
function isReporterReferenceTuple(reference) {
|
|
2007
|
+
return Array.isArray(reference);
|
|
2051
2008
|
}
|
|
2052
|
-
function
|
|
2053
|
-
|
|
2009
|
+
async function loadReporterModule(path) {
|
|
2010
|
+
if (isAbsoluteLikePath(path)) return import(pathToFileURL(path).href);
|
|
2011
|
+
return import(path);
|
|
2054
2012
|
}
|
|
2055
|
-
function
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
return {
|
|
2060
|
-
attempt,
|
|
2061
|
-
case: caseConcurrency
|
|
2013
|
+
function normalizeReporterReference(reference) {
|
|
2014
|
+
if (isReporterReferenceTuple(reference)) return {
|
|
2015
|
+
options: reference[1],
|
|
2016
|
+
value: reference[0]
|
|
2062
2017
|
};
|
|
2063
|
-
}
|
|
2064
|
-
function createScheduledTaskWithRuntimeConcurrency(task, project, options) {
|
|
2065
|
-
const taskDefinition = task.entry.task;
|
|
2066
|
-
if (taskDefinition == null) return task;
|
|
2067
|
-
const concurrency = resolveRuntimeTaskConcurrency(taskDefinition.concurrency, project, options);
|
|
2068
2018
|
return {
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2019
|
+
options: void 0,
|
|
2020
|
+
value: reference
|
|
2021
|
+
};
|
|
2022
|
+
}
|
|
2023
|
+
//#endregion
|
|
2024
|
+
//#region src/cli/run.ts
|
|
2025
|
+
/**
|
|
2026
|
+
* Formats CLI run output as human-readable lines.
|
|
2027
|
+
*/
|
|
2028
|
+
function formatVievalCliRunOutput(output) {
|
|
2029
|
+
const colorEnabled = shouldUseColor();
|
|
2030
|
+
const colors = createColorPalette(colorEnabled);
|
|
2031
|
+
const lines = [];
|
|
2032
|
+
lines.push(` ${colors.dim("RUN")} ${colors.yellow("vieval")}`);
|
|
2033
|
+
lines.push(` ${colors.dim("Config")} ${output.configFilePath ?? "(not found, using defaults)"}`);
|
|
2034
|
+
lines.push("");
|
|
2035
|
+
let passedProjects = 0;
|
|
2036
|
+
let skippedProjects = 0;
|
|
2037
|
+
let failedProjects = 0;
|
|
2038
|
+
let totalTasks = 0;
|
|
2039
|
+
let executedTasks = 0;
|
|
2040
|
+
function formatMatrixSummary(summary) {
|
|
2041
|
+
if (summary == null) return null;
|
|
2042
|
+
const runAxesLabel = summary.runAxes.length === 0 ? "-" : summary.runAxes.join("|");
|
|
2043
|
+
const evalAxesLabel = summary.evalAxes.length === 0 ? "-" : summary.evalAxes.join("|");
|
|
2044
|
+
return `matrix run ${summary.runRows} [${runAxesLabel}] / eval ${summary.evalRows} [${evalAxesLabel}]`;
|
|
2045
|
+
}
|
|
2046
|
+
function formatScheduleBreakdown(project) {
|
|
2047
|
+
const summary = project.matrixSummary;
|
|
2048
|
+
if (summary == null) return null;
|
|
2049
|
+
if (project.taskCount <= 0 || project.entryCount <= 0 || summary.runRows <= 0 || summary.evalRows <= 0) return null;
|
|
2050
|
+
const denominator = project.entryCount * summary.runRows * summary.evalRows;
|
|
2051
|
+
if (denominator <= 0 || project.taskCount % denominator !== 0) return null;
|
|
2052
|
+
const providerCount = project.taskCount / denominator;
|
|
2053
|
+
return [
|
|
2054
|
+
colors.dim("schedule "),
|
|
2055
|
+
colors.yellow(String(project.entryCount)),
|
|
2056
|
+
colors.dim(" entries × "),
|
|
2057
|
+
colors.yellow(String(providerCount)),
|
|
2058
|
+
colors.dim(" inferenceExecutors × "),
|
|
2059
|
+
colors.yellow(String(summary.runRows)),
|
|
2060
|
+
colors.dim(" run rows × "),
|
|
2061
|
+
colors.yellow(String(summary.evalRows)),
|
|
2062
|
+
colors.dim(" eval rows = "),
|
|
2063
|
+
colors.green(String(project.taskCount)),
|
|
2064
|
+
colors.dim(" tasks")
|
|
2065
|
+
].join("");
|
|
2066
|
+
}
|
|
2067
|
+
for (const project of output.projects) {
|
|
2068
|
+
totalTasks += project.taskCount;
|
|
2069
|
+
executedTasks += project.result?.overall.runCount ?? 0;
|
|
2070
|
+
const badge = createProjectBadge(project.name, colors, colorEnabled);
|
|
2071
|
+
const isFailed = project.errorMessage != null;
|
|
2072
|
+
const hasFailedCases = (project.caseSummary?.failed ?? 0) > 0 || (project.caseSummary?.timeout ?? 0) > 0 || (project.caseFailures?.length ?? 0) > 0;
|
|
2073
|
+
if (isFailed) {
|
|
2074
|
+
failedProjects += 1;
|
|
2075
|
+
lines.push(` ${colors.red("❯")} ${badge}${formatDuration$1(project.durationMs, colors)}`);
|
|
2076
|
+
lines.push(` ${project.errorMessage}`);
|
|
2077
|
+
continue;
|
|
2078
|
+
}
|
|
2079
|
+
if (!project.executed) {
|
|
2080
|
+
skippedProjects += 1;
|
|
2081
|
+
const countLabel = colors.dim(`(${project.taskCount} tasks)`);
|
|
2082
|
+
const detailsLabel = colors.dim(` ${project.discoveredEvalFileCount} files, ${project.entryCount} entries, 0 runs, hybrid n/a`);
|
|
2083
|
+
const matrixSummary = formatMatrixSummary(project.matrixSummary);
|
|
2084
|
+
lines.push(` ${colors.dim("○")} ${badge}${countLabel}${detailsLabel}${formatDuration$1(project.durationMs, colors)}`);
|
|
2085
|
+
if (matrixSummary != null) lines.push(` ${colors.dim(matrixSummary)}`);
|
|
2086
|
+
const scheduleBreakdown = formatScheduleBreakdown(project);
|
|
2087
|
+
if (scheduleBreakdown != null) lines.push(` ${scheduleBreakdown}`);
|
|
2088
|
+
continue;
|
|
2089
|
+
}
|
|
2090
|
+
if (hasFailedCases) failedProjects += 1;
|
|
2091
|
+
else passedProjects += 1;
|
|
2092
|
+
const hybridAverageLabel = formatHybridAverage(project.result?.overall.hybridAverage);
|
|
2093
|
+
const runCount = project.result?.overall.runCount ?? 0;
|
|
2094
|
+
const countLabel = colors.dim(`(${project.taskCount} tasks)`);
|
|
2095
|
+
const caseSummaryLabel = project.caseSummary == null ? "" : `, cases ${project.caseSummary.passed} passed | ${project.caseSummary.failed} failed | ${project.caseSummary.timeout} timeout`;
|
|
2096
|
+
const detailsLabel = colors.dim(` ${project.discoveredEvalFileCount} files, ${project.entryCount} entries, ${runCount} runs${caseSummaryLabel}, hybrid ${hybridAverageLabel}`);
|
|
2097
|
+
const matrixSummary = formatMatrixSummary(project.matrixSummary);
|
|
2098
|
+
lines.push(` ${hasFailedCases ? colors.red("❯") : colors.green("✓")} ${badge}${countLabel}${detailsLabel}${formatDuration$1(project.durationMs, colors)}`);
|
|
2099
|
+
if (matrixSummary != null) lines.push(` ${colors.dim(matrixSummary)}`);
|
|
2100
|
+
const scheduleBreakdown = formatScheduleBreakdown(project);
|
|
2101
|
+
if (scheduleBreakdown != null) lines.push(` ${scheduleBreakdown}`);
|
|
2102
|
+
if ((project.caseFailures?.length ?? 0) > 0) {
|
|
2103
|
+
lines.push(` ${colors.red("Failed cases:")}`);
|
|
2104
|
+
for (const failure of project.caseFailures.slice(0, 5)) {
|
|
2105
|
+
lines.push(` ${colors.red(`- ${failure.caseName} (${failure.taskId})`)}`);
|
|
2106
|
+
for (const line of failure.errorMessage.split("\n")) lines.push(` ${colors.red(line)}`);
|
|
2075
2107
|
}
|
|
2108
|
+
if (project.caseFailures.length > 5) lines.push(` ${colors.dim(`... ${project.caseFailures.length - 5} more failed cases`)}`);
|
|
2109
|
+
}
|
|
2110
|
+
}
|
|
2111
|
+
lines.push("");
|
|
2112
|
+
if (failedProjects > 0 || skippedProjects > 0) {
|
|
2113
|
+
const summarySegments = [`${colors.green(String(passedProjects))} passed`];
|
|
2114
|
+
if (skippedProjects > 0) summarySegments.push(`${colors.dim(String(skippedProjects))} skipped`);
|
|
2115
|
+
if (failedProjects > 0) summarySegments.push(`${colors.red(String(failedProjects))} failed`);
|
|
2116
|
+
lines.push(` ${colors.dim("Projects")} ${summarySegments.join(" | ")} (${output.projects.length})`);
|
|
2117
|
+
} else lines.push(` ${colors.dim("Projects")} ${colors.green(String(passedProjects))} passed (${output.projects.length})`);
|
|
2118
|
+
lines.push(` ${colors.dim("Tasks")} ${executedTasks} executed / ${totalTasks} scheduled`);
|
|
2119
|
+
return lines.join("\n");
|
|
2120
|
+
}
|
|
2121
|
+
/**
|
|
2122
|
+
* Returns true when output contains at least one failing project/task/case outcome.
|
|
2123
|
+
*/
|
|
2124
|
+
function hasRunFailures(output) {
|
|
2125
|
+
return output.projects.some((project) => {
|
|
2126
|
+
if (project.errorMessage != null) return true;
|
|
2127
|
+
if (project.caseSummary != null && (project.caseSummary.failed > 0 || project.caseSummary.timeout > 0)) return true;
|
|
2128
|
+
return (project.caseFailures?.length ?? 0) > 0;
|
|
2129
|
+
});
|
|
2130
|
+
}
|
|
2131
|
+
/**
|
|
2132
|
+
* Runs vieval orchestration from config and returns project-level summaries.
|
|
2133
|
+
*
|
|
2134
|
+
* Call stack:
|
|
2135
|
+
*
|
|
2136
|
+
* {@link runVievalCli}
|
|
2137
|
+
* -> {@link loadVievalCliConfig}
|
|
2138
|
+
* -> {@link discoverEvalFiles}
|
|
2139
|
+
* -> {@link collectEvalEntries}
|
|
2140
|
+
* -> {@link createRunnerSchedule}
|
|
2141
|
+
* -> {@link runScheduledTasks} (optional)
|
|
2142
|
+
*
|
|
2143
|
+
* Use when:
|
|
2144
|
+
* - running eval collection and scheduling from a single command
|
|
2145
|
+
* - keeping business-agent eval files near their implementation packages
|
|
2146
|
+
*/
|
|
2147
|
+
async function runVievalCli(options = {}) {
|
|
2148
|
+
const loadedConfig = await loadVievalCliConfig({
|
|
2149
|
+
configFilePath: options.configFilePath,
|
|
2150
|
+
cwd: options.cwd
|
|
2151
|
+
});
|
|
2152
|
+
const telemetry = loadedConfig.reporting?.openTelemetry?.enabled === true ? createOpenTelemetryRuntime() : createNoopTelemetryRuntime();
|
|
2153
|
+
const onOpenTelemetryRunEnd = loadedConfig.reporting?.openTelemetry?.enabled === true ? loadedConfig.reporting.openTelemetry.onRunEnd : void 0;
|
|
2154
|
+
const restoreEnvironment = applyRunEnvironment(loadedConfig.env);
|
|
2155
|
+
let runError;
|
|
2156
|
+
let runEndError;
|
|
2157
|
+
let output;
|
|
2158
|
+
let reporter;
|
|
2159
|
+
try {
|
|
2160
|
+
const selectedProjects = filterProjectsByName(loadedConfig.projects, options.project ?? []);
|
|
2161
|
+
const preparedProjects = await Promise.all(selectedProjects.map(async (project) => prepareProject(project)));
|
|
2162
|
+
const identity = createRunIdentity(options, preparedProjects);
|
|
2163
|
+
const eventRecorder = createEventRecorder(identity);
|
|
2164
|
+
const runReporter = createReporterWithEventCapture(createRunReporter(options.reporter), eventRecorder.record);
|
|
2165
|
+
reporter = runReporter;
|
|
2166
|
+
output = await telemetry.withSpan("vieval.run", {
|
|
2167
|
+
"vieval.attempt.id": identity.attemptId,
|
|
2168
|
+
"vieval.experiment.id": identity.experimentId,
|
|
2169
|
+
"vieval.run.id": identity.runId,
|
|
2170
|
+
"vieval.workspace.id": identity.workspaceId
|
|
2171
|
+
}, async () => {
|
|
2172
|
+
const workspaceScheduler = createSchedulerRuntime({ concurrency: { workspace: resolveWorkspaceConcurrency(loadedConfig, options) } });
|
|
2173
|
+
const executableProjects = preparedProjects.filter((project) => project.kind === "prepared").map((project) => project.prepared);
|
|
2174
|
+
const totalTasks = preparedProjects.reduce((sum, project) => {
|
|
2175
|
+
if (project.kind === "prepared") return sum + project.prepared.tasks.length;
|
|
2176
|
+
return sum + project.summary.taskCount;
|
|
2177
|
+
}, 0);
|
|
2178
|
+
const skippedSummaryTasks = preparedProjects.reduce((sum, project) => {
|
|
2179
|
+
if (project.kind === "summary") return sum + project.summary.taskCount;
|
|
2180
|
+
return sum;
|
|
2181
|
+
}, 0);
|
|
2182
|
+
const reporterCounters = {
|
|
2183
|
+
failedTasks: 0,
|
|
2184
|
+
passedTasks: 0,
|
|
2185
|
+
skippedTasks: 0
|
|
2186
|
+
};
|
|
2187
|
+
runReporter.onRunStart({ totalTasks });
|
|
2188
|
+
for (const project of executableProjects) for (const task of project.tasks) runReporter.onTaskQueued(createTaskQueuePayload(task, project.name));
|
|
2189
|
+
const projectSummaries = (await Promise.all(preparedProjects.map(async (preparedProject, index) => {
|
|
2190
|
+
if (preparedProject.kind === "summary") return {
|
|
2191
|
+
index,
|
|
2192
|
+
summary: preparedProject.summary
|
|
2193
|
+
};
|
|
2194
|
+
return {
|
|
2195
|
+
index,
|
|
2196
|
+
summary: await telemetry.withSpan("vieval.project", {
|
|
2197
|
+
"vieval.project.name": preparedProject.prepared.name,
|
|
2198
|
+
"vieval.run.id": identity.runId
|
|
2199
|
+
}, async () => await workspaceScheduler.runCase({
|
|
2200
|
+
experimentId: identity.experimentId,
|
|
2201
|
+
projectName: preparedProject.prepared.name,
|
|
2202
|
+
scope: "workspace",
|
|
2203
|
+
workspaceId: identity.workspaceId
|
|
2204
|
+
}, async () => executePreparedProject(preparedProject.prepared, identity, options.cacheProjectName, telemetry, runReporter, reporterCounters, eventRecorder.record, options)))
|
|
2205
|
+
};
|
|
2206
|
+
}))).sort((left, right) => left.index - right.index).map((item) => item.summary);
|
|
2207
|
+
runReporter.onRunEnd({
|
|
2208
|
+
failedTasks: reporterCounters.failedTasks,
|
|
2209
|
+
passedTasks: reporterCounters.passedTasks,
|
|
2210
|
+
skippedTasks: reporterCounters.skippedTasks + skippedSummaryTasks,
|
|
2211
|
+
totalTasks
|
|
2212
|
+
});
|
|
2213
|
+
const output = {
|
|
2214
|
+
attemptId: identity.attemptId,
|
|
2215
|
+
configFilePath: loadedConfig.configFilePath,
|
|
2216
|
+
experimentId: identity.experimentId,
|
|
2217
|
+
projects: projectSummaries,
|
|
2218
|
+
reportDirectory: null,
|
|
2219
|
+
runId: identity.runId,
|
|
2220
|
+
workspaceId: identity.workspaceId
|
|
2221
|
+
};
|
|
2222
|
+
if (options.reportOut != null) output.reportDirectory = await writeRunReportArtifacts(output, eventRecorder.events, identity, options.reportOut);
|
|
2223
|
+
return output;
|
|
2224
|
+
});
|
|
2225
|
+
} catch (error) {
|
|
2226
|
+
runError = error;
|
|
2227
|
+
} finally {
|
|
2228
|
+
if (onOpenTelemetryRunEnd != null) try {
|
|
2229
|
+
await onOpenTelemetryRunEnd();
|
|
2230
|
+
} catch (error) {
|
|
2231
|
+
if (runError == null) runEndError = error;
|
|
2232
|
+
}
|
|
2233
|
+
reporter?.dispose();
|
|
2234
|
+
restoreEnvironment();
|
|
2235
|
+
}
|
|
2236
|
+
if (runError != null) throw runError;
|
|
2237
|
+
if (runEndError != null) throw runEndError;
|
|
2238
|
+
if (output == null) throw new Error("Vieval run finished without output.");
|
|
2239
|
+
return output;
|
|
2240
|
+
}
|
|
2241
|
+
function applyRunEnvironment(env) {
|
|
2242
|
+
const envEntries = Object.entries(env);
|
|
2243
|
+
if (envEntries.length === 0) return () => {};
|
|
2244
|
+
const snapshot = /* @__PURE__ */ new Map();
|
|
2245
|
+
for (const [key, value] of envEntries) {
|
|
2246
|
+
snapshot.set(key, {
|
|
2247
|
+
existed: Object.hasOwn(process.env, key),
|
|
2248
|
+
value: process.env[key]
|
|
2249
|
+
});
|
|
2250
|
+
if (value == null) {
|
|
2251
|
+
delete process.env[key];
|
|
2252
|
+
continue;
|
|
2253
|
+
}
|
|
2254
|
+
process.env[key] = value;
|
|
2255
|
+
}
|
|
2256
|
+
return () => {
|
|
2257
|
+
for (const [key, previous] of snapshot.entries()) {
|
|
2258
|
+
if (previous.existed) {
|
|
2259
|
+
if (previous.value == null) {
|
|
2260
|
+
delete process.env[key];
|
|
2261
|
+
continue;
|
|
2262
|
+
}
|
|
2263
|
+
process.env[key] = previous.value;
|
|
2264
|
+
continue;
|
|
2265
|
+
}
|
|
2266
|
+
delete process.env[key];
|
|
2076
2267
|
}
|
|
2077
2268
|
};
|
|
2078
2269
|
}
|
|
2079
|
-
function
|
|
2080
|
-
if (options.attemptConcurrency == null && options.caseConcurrency == null) return;
|
|
2270
|
+
function cloneScheduledTaskMatrix(task) {
|
|
2081
2271
|
return {
|
|
2082
|
-
|
|
2083
|
-
|
|
2272
|
+
eval: { ...task.matrix.eval },
|
|
2273
|
+
meta: { ...task.matrix.meta },
|
|
2274
|
+
run: { ...task.matrix.run }
|
|
2084
2275
|
};
|
|
2085
2276
|
}
|
|
2086
|
-
function
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2277
|
+
function createAutoTaskExecutor(reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter) {
|
|
2278
|
+
return async (task, context) => {
|
|
2279
|
+
const taskDefinition = task.entry.task;
|
|
2280
|
+
if (taskDefinition == null) throw new Error(`Missing eval task definition for entry "${task.entry.id}".`);
|
|
2281
|
+
const output = await taskDefinition.run({
|
|
2282
|
+
cache: context.cache,
|
|
2283
|
+
models: context.models,
|
|
2284
|
+
reporterHooks: resolveTaskReporterHooks(task, context, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter),
|
|
2285
|
+
task,
|
|
2286
|
+
telemetry: context.telemetry
|
|
2287
|
+
});
|
|
2288
|
+
return {
|
|
2289
|
+
entryId: task.entry.id,
|
|
2290
|
+
id: task.id,
|
|
2291
|
+
inferenceExecutorId: task.inferenceExecutor.id,
|
|
2292
|
+
matrix: task.matrix,
|
|
2293
|
+
scores: [...output.scores]
|
|
2294
|
+
};
|
|
2295
|
+
};
|
|
2296
|
+
}
|
|
2297
|
+
function createCliTaskExecutionContext(task, models, cacheRootDirectory, cacheProjectName, workspaceId, telemetry, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, runtimeConcurrency, vitestCompatReporter) {
|
|
2298
|
+
return {
|
|
2299
|
+
...createTaskExecutionContext({
|
|
2300
|
+
cache: createFilesystemTaskCacheRuntime({
|
|
2301
|
+
cacheRootDirectory,
|
|
2302
|
+
projectName: cacheProjectName,
|
|
2303
|
+
workspaceId
|
|
2304
|
+
}),
|
|
2305
|
+
models,
|
|
2306
|
+
task
|
|
2307
|
+
}),
|
|
2308
|
+
reporterHooks: createTaskReporterHooks(task, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter),
|
|
2309
|
+
runtimeConcurrency,
|
|
2310
|
+
telemetry
|
|
2311
|
+
};
|
|
2091
2312
|
}
|
|
2092
2313
|
function createColorPalette(enabled) {
|
|
2093
2314
|
if (!enabled) return {
|
|
2094
|
-
black: (value) => value,
|
|
2095
2315
|
bgCyan: (value) => value,
|
|
2096
2316
|
bgGreen: (value) => value,
|
|
2097
2317
|
bgMagenta: (value) => value,
|
|
2098
2318
|
bgYellow: (value) => value,
|
|
2319
|
+
black: (value) => value,
|
|
2099
2320
|
dim: (value) => value,
|
|
2100
2321
|
gray: (value) => value,
|
|
2101
2322
|
green: (value) => value,
|
|
@@ -2103,11 +2324,11 @@ function createColorPalette(enabled) {
|
|
|
2103
2324
|
yellow: (value) => value
|
|
2104
2325
|
};
|
|
2105
2326
|
return {
|
|
2106
|
-
black: (value) => c.black(value),
|
|
2107
2327
|
bgCyan: (value) => c.bgCyan(value),
|
|
2108
2328
|
bgGreen: (value) => c.bgGreen(value),
|
|
2109
2329
|
bgMagenta: (value) => c.bgMagenta(value),
|
|
2110
2330
|
bgYellow: (value) => c.bgYellow(value),
|
|
2331
|
+
black: (value) => c.black(value),
|
|
2111
2332
|
dim: (value) => c.dim(value),
|
|
2112
2333
|
gray: (value) => c.gray(value),
|
|
2113
2334
|
green: (value) => c.green(value),
|
|
@@ -2115,67 +2336,6 @@ function createColorPalette(enabled) {
|
|
|
2115
2336
|
yellow: (value) => c.yellow(value)
|
|
2116
2337
|
};
|
|
2117
2338
|
}
|
|
2118
|
-
function createProjectBadge(name, colors, colorEnabled) {
|
|
2119
|
-
if (!colorEnabled || !c.isColorSupported) return `|${name}| `;
|
|
2120
|
-
const labelColorPool = [
|
|
2121
|
-
colors.bgYellow,
|
|
2122
|
-
colors.bgCyan,
|
|
2123
|
-
colors.bgGreen,
|
|
2124
|
-
colors.bgMagenta
|
|
2125
|
-
];
|
|
2126
|
-
const background = labelColorPool[name.split("").reduce((accumulator, char, index) => accumulator + char.charCodeAt(0) + index, 0) % labelColorPool.length];
|
|
2127
|
-
return `${colors.black(background(` ${name} `))} `;
|
|
2128
|
-
}
|
|
2129
|
-
function formatDuration$1(durationMs, colors) {
|
|
2130
|
-
if (durationMs == null) return "";
|
|
2131
|
-
const rounded = Math.round(durationMs);
|
|
2132
|
-
return (rounded > 1e3 ? colors.yellow : colors.green)(` ${rounded}${colors.dim("ms")}`);
|
|
2133
|
-
}
|
|
2134
|
-
function formatHybridAverage(hybridAverage) {
|
|
2135
|
-
if (hybridAverage == null) return "n/a";
|
|
2136
|
-
return hybridAverage.toFixed(3).replace(/\.?0+$/, "");
|
|
2137
|
-
}
|
|
2138
|
-
function filterProjectsByName(projects, names) {
|
|
2139
|
-
if (names.length === 0) return [...projects];
|
|
2140
|
-
const nameSet = new Set(names);
|
|
2141
|
-
return projects.filter((project) => nameSet.has(project.name));
|
|
2142
|
-
}
|
|
2143
|
-
function sanitizeIdentitySegment(value) {
|
|
2144
|
-
const normalized = value.trim();
|
|
2145
|
-
if (normalized.length === 0) return "default";
|
|
2146
|
-
return normalized.replace(/[^\w.-]+/g, "-");
|
|
2147
|
-
}
|
|
2148
|
-
function createExperimentMatrixRows(tasks) {
|
|
2149
|
-
const rows = /* @__PURE__ */ new Set();
|
|
2150
|
-
for (const task of tasks) {
|
|
2151
|
-
const runRowId = task.matrix.meta.runRowId;
|
|
2152
|
-
const evalRowId = task.matrix.meta.evalRowId;
|
|
2153
|
-
if (runRowId !== "default" && evalRowId !== "default") {
|
|
2154
|
-
rows.add(`run:${runRowId}+eval:${evalRowId}`);
|
|
2155
|
-
continue;
|
|
2156
|
-
}
|
|
2157
|
-
if (runRowId !== "default") rows.add(`run:${runRowId}`);
|
|
2158
|
-
if (evalRowId !== "default") rows.add(`eval:${evalRowId}`);
|
|
2159
|
-
}
|
|
2160
|
-
return [...rows].sort((left, right) => left.localeCompare(right));
|
|
2161
|
-
}
|
|
2162
|
-
function resolveExperimentId(options, preparedProjects) {
|
|
2163
|
-
if (options.experiment != null) return sanitizeIdentitySegment(options.experiment);
|
|
2164
|
-
const matrixRows = /* @__PURE__ */ new Set();
|
|
2165
|
-
for (const project of preparedProjects) project.experimentMatrixRows.forEach((row) => matrixRows.add(row));
|
|
2166
|
-
if (matrixRows.size === 0) return "default-experiment";
|
|
2167
|
-
return sanitizeIdentitySegment(`matrix-${[...matrixRows].sort().join("--")}`);
|
|
2168
|
-
}
|
|
2169
|
-
function createRunIdentity(options, preparedProjects) {
|
|
2170
|
-
const workspaceId = sanitizeIdentitySegment(options.workspace ?? "default-workspace");
|
|
2171
|
-
const experimentId = resolveExperimentId(options, preparedProjects);
|
|
2172
|
-
return {
|
|
2173
|
-
attemptId: sanitizeIdentitySegment(options.attempt ?? `attempt-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`),
|
|
2174
|
-
experimentId,
|
|
2175
|
-
runId: `run-${Date.now()}-${randomUUID().slice(0, 8)}`,
|
|
2176
|
-
workspaceId
|
|
2177
|
-
};
|
|
2178
|
-
}
|
|
2179
2339
|
function createEventRecorder(identity) {
|
|
2180
2340
|
const events = [];
|
|
2181
2341
|
const taskProjectMap = /* @__PURE__ */ new Map();
|
|
@@ -2204,6 +2364,50 @@ function createEventRecorder(identity) {
|
|
|
2204
2364
|
}
|
|
2205
2365
|
};
|
|
2206
2366
|
}
|
|
2367
|
+
function createExperimentMatrixRows(tasks) {
|
|
2368
|
+
const rows = /* @__PURE__ */ new Set();
|
|
2369
|
+
for (const task of tasks) {
|
|
2370
|
+
const runRowId = task.matrix.meta.runRowId;
|
|
2371
|
+
const evalRowId = task.matrix.meta.evalRowId;
|
|
2372
|
+
if (runRowId !== "default" && evalRowId !== "default") {
|
|
2373
|
+
rows.add(`run:${runRowId}+eval:${evalRowId}`);
|
|
2374
|
+
continue;
|
|
2375
|
+
}
|
|
2376
|
+
if (runRowId !== "default") rows.add(`run:${runRowId}`);
|
|
2377
|
+
if (evalRowId !== "default") rows.add(`eval:${evalRowId}`);
|
|
2378
|
+
}
|
|
2379
|
+
return [...rows].sort((left, right) => left.localeCompare(right));
|
|
2380
|
+
}
|
|
2381
|
+
function createProjectBadge(name, colors, colorEnabled) {
|
|
2382
|
+
if (!colorEnabled || !c.isColorSupported) return `|${name}| `;
|
|
2383
|
+
const labelColorPool = [
|
|
2384
|
+
colors.bgYellow,
|
|
2385
|
+
colors.bgCyan,
|
|
2386
|
+
colors.bgGreen,
|
|
2387
|
+
colors.bgMagenta
|
|
2388
|
+
];
|
|
2389
|
+
const background = labelColorPool[name.split("").reduce((accumulator, char, index) => accumulator + char.charCodeAt(0) + index, 0) % labelColorPool.length];
|
|
2390
|
+
return `${colors.black(background(` ${name} `))} `;
|
|
2391
|
+
}
|
|
2392
|
+
function createProjectMatrixSummary(tasks) {
|
|
2393
|
+
if (tasks.length === 0) return null;
|
|
2394
|
+
const runAxes = /* @__PURE__ */ new Set();
|
|
2395
|
+
const evalAxes = /* @__PURE__ */ new Set();
|
|
2396
|
+
const runRows = /* @__PURE__ */ new Set();
|
|
2397
|
+
const evalRows = /* @__PURE__ */ new Set();
|
|
2398
|
+
for (const task of tasks) {
|
|
2399
|
+
Object.keys(task.matrix.run).forEach((axis) => runAxes.add(axis));
|
|
2400
|
+
Object.keys(task.matrix.eval).forEach((axis) => evalAxes.add(axis));
|
|
2401
|
+
runRows.add(task.matrix.meta.runRowId);
|
|
2402
|
+
evalRows.add(task.matrix.meta.evalRowId);
|
|
2403
|
+
}
|
|
2404
|
+
return {
|
|
2405
|
+
evalAxes: [...evalAxes].sort(),
|
|
2406
|
+
evalRows: evalRows.size,
|
|
2407
|
+
runAxes: [...runAxes].sort(),
|
|
2408
|
+
runRows: runRows.size
|
|
2409
|
+
};
|
|
2410
|
+
}
|
|
2207
2411
|
function createReporterWithEventCapture(reporter, recordEvent) {
|
|
2208
2412
|
return {
|
|
2209
2413
|
dispose() {
|
|
@@ -2239,38 +2443,16 @@ function createReporterWithEventCapture(reporter, recordEvent) {
|
|
|
2239
2443
|
}
|
|
2240
2444
|
};
|
|
2241
2445
|
}
|
|
2242
|
-
function
|
|
2243
|
-
const
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2249
|
-
|
|
2250
|
-
});
|
|
2251
|
-
if (value == null) {
|
|
2252
|
-
delete process.env[key];
|
|
2253
|
-
continue;
|
|
2254
|
-
}
|
|
2255
|
-
process.env[key] = value;
|
|
2256
|
-
}
|
|
2257
|
-
return () => {
|
|
2258
|
-
for (const [key, previous] of snapshot.entries()) {
|
|
2259
|
-
if (previous.existed) {
|
|
2260
|
-
if (previous.value == null) {
|
|
2261
|
-
delete process.env[key];
|
|
2262
|
-
continue;
|
|
2263
|
-
}
|
|
2264
|
-
process.env[key] = previous.value;
|
|
2265
|
-
continue;
|
|
2266
|
-
}
|
|
2267
|
-
delete process.env[key];
|
|
2268
|
-
}
|
|
2446
|
+
function createRunIdentity(options, preparedProjects) {
|
|
2447
|
+
const workspaceId = sanitizeIdentitySegment(options.workspace ?? "default-workspace");
|
|
2448
|
+
const experimentId = resolveExperimentId(options, preparedProjects);
|
|
2449
|
+
return {
|
|
2450
|
+
attemptId: sanitizeIdentitySegment(options.attempt ?? `attempt-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`),
|
|
2451
|
+
experimentId,
|
|
2452
|
+
runId: `run-${Date.now()}-${randomUUID().slice(0, 8)}`,
|
|
2453
|
+
workspaceId
|
|
2269
2454
|
};
|
|
2270
2455
|
}
|
|
2271
|
-
function isSummaryReporter(reporter) {
|
|
2272
|
-
return "getWindowRows" in reporter;
|
|
2273
|
-
}
|
|
2274
2456
|
function createRunReporter(options) {
|
|
2275
2457
|
const getRows = options?.getRows ?? (() => process.stdout.rows);
|
|
2276
2458
|
const reporter = createCliReporter({
|
|
@@ -2342,21 +2524,23 @@ function createRunReporter(options) {
|
|
|
2342
2524
|
}
|
|
2343
2525
|
};
|
|
2344
2526
|
}
|
|
2345
|
-
|
|
2346
|
-
|
|
2347
|
-
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
|
|
2358
|
-
|
|
2359
|
-
|
|
2527
|
+
function createScheduledTaskWithRuntimeConcurrency(task, project, options) {
|
|
2528
|
+
const taskDefinition = task.entry.task;
|
|
2529
|
+
if (taskDefinition == null) return task;
|
|
2530
|
+
const concurrency = resolveRuntimeTaskConcurrency(taskDefinition.concurrency, project, options);
|
|
2531
|
+
return {
|
|
2532
|
+
...task,
|
|
2533
|
+
entry: {
|
|
2534
|
+
...task.entry,
|
|
2535
|
+
task: {
|
|
2536
|
+
...taskDefinition,
|
|
2537
|
+
concurrency
|
|
2538
|
+
}
|
|
2539
|
+
}
|
|
2540
|
+
};
|
|
2541
|
+
}
|
|
2542
|
+
function createTaskCaseReporterId(payload) {
|
|
2543
|
+
return `${payload.index}:${encodeURIComponent(payload.name)}`;
|
|
2360
2544
|
}
|
|
2361
2545
|
function createTaskQueuePayload(task, projectName) {
|
|
2362
2546
|
return {
|
|
@@ -2365,9 +2549,6 @@ function createTaskQueuePayload(task, projectName) {
|
|
|
2365
2549
|
taskId: task.id
|
|
2366
2550
|
};
|
|
2367
2551
|
}
|
|
2368
|
-
function createTaskCaseReporterId(payload) {
|
|
2369
|
-
return `${payload.index}:${encodeURIComponent(payload.name)}`;
|
|
2370
|
-
}
|
|
2371
2552
|
function createTaskReporterHooks(task, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter) {
|
|
2372
2553
|
function syncCaseTotal(total) {
|
|
2373
2554
|
reporter.onTaskQueued({
|
|
@@ -2415,8 +2596,8 @@ function createTaskReporterHooks(task, reporter, projectName, recordEvent, proje
|
|
|
2415
2596
|
reporter.onCaseStart({
|
|
2416
2597
|
autoRetry: payload.autoRetry,
|
|
2417
2598
|
caseId,
|
|
2418
|
-
input: payload.input,
|
|
2419
2599
|
caseName: payload.name,
|
|
2600
|
+
input: payload.input,
|
|
2420
2601
|
retryIndex: payload.retryIndex,
|
|
2421
2602
|
taskId: task.id
|
|
2422
2603
|
});
|
|
@@ -2434,144 +2615,6 @@ function createTaskReporterHooks(task, reporter, projectName, recordEvent, proje
|
|
|
2434
2615
|
}
|
|
2435
2616
|
};
|
|
2436
2617
|
}
|
|
2437
|
-
function createCliTaskExecutionContext(task, models, cacheRootDirectory, cacheProjectName, workspaceId, telemetry, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, runtimeConcurrency, vitestCompatReporter) {
|
|
2438
|
-
return {
|
|
2439
|
-
...createTaskExecutionContext({
|
|
2440
|
-
cache: createFilesystemTaskCacheRuntime({
|
|
2441
|
-
cacheRootDirectory,
|
|
2442
|
-
projectName: cacheProjectName,
|
|
2443
|
-
workspaceId
|
|
2444
|
-
}),
|
|
2445
|
-
models,
|
|
2446
|
-
task
|
|
2447
|
-
}),
|
|
2448
|
-
reporterHooks: createTaskReporterHooks(task, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter),
|
|
2449
|
-
runtimeConcurrency,
|
|
2450
|
-
telemetry
|
|
2451
|
-
};
|
|
2452
|
-
}
|
|
2453
|
-
function resolveTaskReporterHooks(task, context, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter) {
|
|
2454
|
-
return context.reporterHooks ?? createTaskReporterHooks(task, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter);
|
|
2455
|
-
}
|
|
2456
|
-
function getFailedTaskId(error) {
|
|
2457
|
-
if (error instanceof RunnerExecutionError) return error.taskId;
|
|
2458
|
-
return null;
|
|
2459
|
-
}
|
|
2460
|
-
function createAutoTaskExecutor(reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter) {
|
|
2461
|
-
return async (task, context) => {
|
|
2462
|
-
const taskDefinition = task.entry.task;
|
|
2463
|
-
if (taskDefinition == null) throw new Error(`Missing eval task definition for entry "${task.entry.id}".`);
|
|
2464
|
-
const output = await taskDefinition.run({
|
|
2465
|
-
cache: context.cache,
|
|
2466
|
-
models: context.models,
|
|
2467
|
-
reporterHooks: resolveTaskReporterHooks(task, context, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter),
|
|
2468
|
-
task,
|
|
2469
|
-
telemetry: context.telemetry
|
|
2470
|
-
});
|
|
2471
|
-
return {
|
|
2472
|
-
entryId: task.entry.id,
|
|
2473
|
-
id: task.id,
|
|
2474
|
-
matrix: task.matrix,
|
|
2475
|
-
inferenceExecutorId: task.inferenceExecutor.id,
|
|
2476
|
-
scores: [...output.scores]
|
|
2477
|
-
};
|
|
2478
|
-
};
|
|
2479
|
-
}
|
|
2480
|
-
function cloneScheduledTaskMatrix(task) {
|
|
2481
|
-
return {
|
|
2482
|
-
eval: { ...task.matrix.eval },
|
|
2483
|
-
meta: { ...task.matrix.meta },
|
|
2484
|
-
run: { ...task.matrix.run }
|
|
2485
|
-
};
|
|
2486
|
-
}
|
|
2487
|
-
function createProjectMatrixSummary(tasks) {
|
|
2488
|
-
if (tasks.length === 0) return null;
|
|
2489
|
-
const runAxes = /* @__PURE__ */ new Set();
|
|
2490
|
-
const evalAxes = /* @__PURE__ */ new Set();
|
|
2491
|
-
const runRows = /* @__PURE__ */ new Set();
|
|
2492
|
-
const evalRows = /* @__PURE__ */ new Set();
|
|
2493
|
-
for (const task of tasks) {
|
|
2494
|
-
Object.keys(task.matrix.run).forEach((axis) => runAxes.add(axis));
|
|
2495
|
-
Object.keys(task.matrix.eval).forEach((axis) => evalAxes.add(axis));
|
|
2496
|
-
runRows.add(task.matrix.meta.runRowId);
|
|
2497
|
-
evalRows.add(task.matrix.meta.evalRowId);
|
|
2498
|
-
}
|
|
2499
|
-
return {
|
|
2500
|
-
evalAxes: [...evalAxes].sort(),
|
|
2501
|
-
evalRows: evalRows.size,
|
|
2502
|
-
runAxes: [...runAxes].sort(),
|
|
2503
|
-
runRows: runRows.size
|
|
2504
|
-
};
|
|
2505
|
-
}
|
|
2506
|
-
async function prepareProject(project) {
|
|
2507
|
-
const startedAt = Date.now();
|
|
2508
|
-
try {
|
|
2509
|
-
const runtimeContext = await createRunnerRuntimeContext({
|
|
2510
|
-
cwd: project.root,
|
|
2511
|
-
fallbackProjectRootDirectory: project.root
|
|
2512
|
-
});
|
|
2513
|
-
const evalFilePaths = await discoverEvalFiles({
|
|
2514
|
-
exclude: project.exclude,
|
|
2515
|
-
include: project.include,
|
|
2516
|
-
root: project.root
|
|
2517
|
-
});
|
|
2518
|
-
const entries = collectEvalEntries(await loadEvalModulesWithVitestRuntime(evalFilePaths, project.root), runtimeContext);
|
|
2519
|
-
const tasks = createRunnerSchedule({
|
|
2520
|
-
evalMatrix: project.evalMatrix,
|
|
2521
|
-
entries,
|
|
2522
|
-
inferenceExecutors: project.inferenceExecutors,
|
|
2523
|
-
runMatrix: project.runMatrix
|
|
2524
|
-
});
|
|
2525
|
-
const canAutoExecuteEntryTasks = entries.some((entry) => entry.task != null) && project.models.length > 0;
|
|
2526
|
-
if (project.executor == null && !canAutoExecuteEntryTasks) return {
|
|
2527
|
-
experimentMatrixRows: createExperimentMatrixRows(tasks),
|
|
2528
|
-
kind: "summary",
|
|
2529
|
-
summary: {
|
|
2530
|
-
caseSummary: null,
|
|
2531
|
-
caseFailures: [],
|
|
2532
|
-
discoveredEvalFileCount: evalFilePaths.length,
|
|
2533
|
-
durationMs: Date.now() - startedAt,
|
|
2534
|
-
entryCount: entries.length,
|
|
2535
|
-
errorMessage: null,
|
|
2536
|
-
executed: false,
|
|
2537
|
-
matrixSummary: createProjectMatrixSummary(tasks),
|
|
2538
|
-
name: project.name,
|
|
2539
|
-
result: null,
|
|
2540
|
-
taskCount: tasks.length
|
|
2541
|
-
}
|
|
2542
|
-
};
|
|
2543
|
-
return {
|
|
2544
|
-
experimentMatrixRows: createExperimentMatrixRows(tasks),
|
|
2545
|
-
kind: "prepared",
|
|
2546
|
-
prepared: {
|
|
2547
|
-
discoveredEvalFileCount: evalFilePaths.length,
|
|
2548
|
-
entryCount: entries.length,
|
|
2549
|
-
name: project.name,
|
|
2550
|
-
project,
|
|
2551
|
-
startedAt,
|
|
2552
|
-
tasks
|
|
2553
|
-
}
|
|
2554
|
-
};
|
|
2555
|
-
} catch (error) {
|
|
2556
|
-
return {
|
|
2557
|
-
experimentMatrixRows: [],
|
|
2558
|
-
kind: "summary",
|
|
2559
|
-
summary: {
|
|
2560
|
-
caseSummary: null,
|
|
2561
|
-
caseFailures: [],
|
|
2562
|
-
discoveredEvalFileCount: 0,
|
|
2563
|
-
durationMs: Date.now() - startedAt,
|
|
2564
|
-
entryCount: 0,
|
|
2565
|
-
errorMessage: errorMessageFrom(error) ?? "Unknown project execution error.",
|
|
2566
|
-
executed: false,
|
|
2567
|
-
matrixSummary: null,
|
|
2568
|
-
name: project.name,
|
|
2569
|
-
result: null,
|
|
2570
|
-
taskCount: 0
|
|
2571
|
-
}
|
|
2572
|
-
};
|
|
2573
|
-
}
|
|
2574
|
-
}
|
|
2575
2618
|
async function executePreparedProject(prepared, identity, cacheProjectName, telemetry, reporter, counters, recordEvent, options) {
|
|
2576
2619
|
const settledTaskIds = /* @__PURE__ */ new Set();
|
|
2577
2620
|
const projectCaseCounters = {
|
|
@@ -2607,6 +2650,7 @@ async function executePreparedProject(prepared, identity, cacheProjectName, tele
|
|
|
2607
2650
|
createExecutionContext(task) {
|
|
2608
2651
|
return createCliTaskExecutionContext(task, prepared.project.models, resolve(prepared.project.root, ".vieval", "cache"), cacheProjectName ?? prepared.name, identity.workspaceId, telemetry, reporter, prepared.name, recordEvent, projectCaseCounters, projectCaseFailures, resolveCliRuntimeConcurrency(options), vitestCompatReporter);
|
|
2609
2652
|
},
|
|
2653
|
+
maxConcurrency: resolveScheduledTaskConcurrency(prepared.project, options),
|
|
2610
2654
|
onTaskEnd(task, state) {
|
|
2611
2655
|
settledTaskIds.add(task.id);
|
|
2612
2656
|
reporter.onTaskEnd({
|
|
@@ -2626,11 +2670,11 @@ async function executePreparedProject(prepared, identity, cacheProjectName, tele
|
|
|
2626
2670
|
onTaskStart(task) {
|
|
2627
2671
|
reporter.onTaskStart({ taskId: task.id });
|
|
2628
2672
|
vitestCompatReporter?.onTaskStart({ taskId: task.id });
|
|
2629
|
-
}
|
|
2630
|
-
maxConcurrency: resolveScheduledTaskConcurrency(prepared.project, options)
|
|
2673
|
+
}
|
|
2631
2674
|
});
|
|
2632
2675
|
await vitestCompatReporter?.onRunEnd({ failed: false });
|
|
2633
2676
|
return {
|
|
2677
|
+
caseFailures: projectCaseFailures,
|
|
2634
2678
|
caseSummary: {
|
|
2635
2679
|
failed: projectCaseCounters.failed,
|
|
2636
2680
|
passed: projectCaseCounters.passed,
|
|
@@ -2638,7 +2682,6 @@ async function executePreparedProject(prepared, identity, cacheProjectName, tele
|
|
|
2638
2682
|
timeout: projectCaseCounters.timeout,
|
|
2639
2683
|
total: projectCaseCounters.seenCaseIds.size
|
|
2640
2684
|
},
|
|
2641
|
-
caseFailures: projectCaseFailures,
|
|
2642
2685
|
discoveredEvalFileCount: prepared.discoveredEvalFileCount,
|
|
2643
2686
|
durationMs: Date.now() - prepared.startedAt,
|
|
2644
2687
|
entryCount: prepared.entryCount,
|
|
@@ -2671,238 +2714,195 @@ async function executePreparedProject(prepared, identity, cacheProjectName, tele
|
|
|
2671
2714
|
state: "skipped",
|
|
2672
2715
|
taskId: task.id
|
|
2673
2716
|
});
|
|
2674
|
-
await vitestCompatReporter?.onTaskEnd({
|
|
2675
|
-
state: "skipped",
|
|
2676
|
-
taskId: task.id
|
|
2677
|
-
});
|
|
2678
|
-
}
|
|
2679
|
-
await vitestCompatReporter?.onRunEnd({ failed: true });
|
|
2680
|
-
return {
|
|
2681
|
-
caseSummary: {
|
|
2682
|
-
failed: projectCaseCounters.failed,
|
|
2683
|
-
passed: projectCaseCounters.passed,
|
|
2684
|
-
skipped: projectCaseCounters.skipped,
|
|
2685
|
-
timeout: projectCaseCounters.timeout,
|
|
2686
|
-
total: projectCaseCounters.seenCaseIds.size
|
|
2687
|
-
},
|
|
2688
|
-
caseFailures: projectCaseFailures,
|
|
2689
|
-
discoveredEvalFileCount: prepared.discoveredEvalFileCount,
|
|
2690
|
-
durationMs: Date.now() - prepared.startedAt,
|
|
2691
|
-
entryCount: prepared.entryCount,
|
|
2692
|
-
errorMessage: errorMessageFrom(error) ?? "Unknown project execution error.",
|
|
2693
|
-
executed: false,
|
|
2694
|
-
matrixSummary: createProjectMatrixSummary(prepared.tasks),
|
|
2695
|
-
name: prepared.name,
|
|
2696
|
-
result: null,
|
|
2697
|
-
taskCount: prepared.tasks.length
|
|
2698
|
-
};
|
|
2699
|
-
}
|
|
2700
|
-
}
|
|
2701
|
-
/**
|
|
2702
|
-
* Runs vieval orchestration from config and returns project-level summaries.
|
|
2703
|
-
*
|
|
2704
|
-
* Call stack:
|
|
2705
|
-
*
|
|
2706
|
-
* {@link runVievalCli}
|
|
2707
|
-
* -> {@link loadVievalCliConfig}
|
|
2708
|
-
* -> {@link discoverEvalFiles}
|
|
2709
|
-
* -> {@link collectEvalEntries}
|
|
2710
|
-
* -> {@link createRunnerSchedule}
|
|
2711
|
-
* -> {@link runScheduledTasks} (optional)
|
|
2712
|
-
*
|
|
2713
|
-
* Use when:
|
|
2714
|
-
* - running eval collection and scheduling from a single command
|
|
2715
|
-
* - keeping business-agent eval files near their implementation packages
|
|
2716
|
-
*/
|
|
2717
|
-
async function runVievalCli(options = {}) {
|
|
2718
|
-
const loadedConfig = await loadVievalCliConfig({
|
|
2719
|
-
configFilePath: options.configFilePath,
|
|
2720
|
-
cwd: options.cwd
|
|
2721
|
-
});
|
|
2722
|
-
const telemetry = loadedConfig.reporting?.openTelemetry?.enabled === true ? createOpenTelemetryRuntime() : createNoopTelemetryRuntime();
|
|
2723
|
-
const onOpenTelemetryRunEnd = loadedConfig.reporting?.openTelemetry?.enabled === true ? loadedConfig.reporting.openTelemetry.onRunEnd : void 0;
|
|
2724
|
-
const restoreEnvironment = applyRunEnvironment(loadedConfig.env);
|
|
2725
|
-
let runError;
|
|
2726
|
-
let runEndError;
|
|
2727
|
-
let output;
|
|
2728
|
-
let reporter;
|
|
2729
|
-
try {
|
|
2730
|
-
const selectedProjects = filterProjectsByName(loadedConfig.projects, options.project ?? []);
|
|
2731
|
-
const preparedProjects = await Promise.all(selectedProjects.map(async (project) => prepareProject(project)));
|
|
2732
|
-
const identity = createRunIdentity(options, preparedProjects);
|
|
2733
|
-
const eventRecorder = createEventRecorder(identity);
|
|
2734
|
-
const runReporter = createReporterWithEventCapture(createRunReporter(options.reporter), eventRecorder.record);
|
|
2735
|
-
reporter = runReporter;
|
|
2736
|
-
output = await telemetry.withSpan("vieval.run", {
|
|
2737
|
-
"vieval.attempt.id": identity.attemptId,
|
|
2738
|
-
"vieval.experiment.id": identity.experimentId,
|
|
2739
|
-
"vieval.run.id": identity.runId,
|
|
2740
|
-
"vieval.workspace.id": identity.workspaceId
|
|
2741
|
-
}, async () => {
|
|
2742
|
-
const workspaceScheduler = createSchedulerRuntime({ concurrency: { workspace: resolveWorkspaceConcurrency(loadedConfig, options) } });
|
|
2743
|
-
const executableProjects = preparedProjects.filter((project) => project.kind === "prepared").map((project) => project.prepared);
|
|
2744
|
-
const totalTasks = preparedProjects.reduce((sum, project) => {
|
|
2745
|
-
if (project.kind === "prepared") return sum + project.prepared.tasks.length;
|
|
2746
|
-
return sum + project.summary.taskCount;
|
|
2747
|
-
}, 0);
|
|
2748
|
-
const skippedSummaryTasks = preparedProjects.reduce((sum, project) => {
|
|
2749
|
-
if (project.kind === "summary") return sum + project.summary.taskCount;
|
|
2750
|
-
return sum;
|
|
2751
|
-
}, 0);
|
|
2752
|
-
const reporterCounters = {
|
|
2753
|
-
failedTasks: 0,
|
|
2754
|
-
passedTasks: 0,
|
|
2755
|
-
skippedTasks: 0
|
|
2756
|
-
};
|
|
2757
|
-
runReporter.onRunStart({ totalTasks });
|
|
2758
|
-
for (const project of executableProjects) for (const task of project.tasks) runReporter.onTaskQueued(createTaskQueuePayload(task, project.name));
|
|
2759
|
-
const projectSummaries = (await Promise.all(preparedProjects.map(async (preparedProject, index) => {
|
|
2760
|
-
if (preparedProject.kind === "summary") return {
|
|
2761
|
-
index,
|
|
2762
|
-
summary: preparedProject.summary
|
|
2763
|
-
};
|
|
2764
|
-
return {
|
|
2765
|
-
index,
|
|
2766
|
-
summary: await telemetry.withSpan("vieval.project", {
|
|
2767
|
-
"vieval.project.name": preparedProject.prepared.name,
|
|
2768
|
-
"vieval.run.id": identity.runId
|
|
2769
|
-
}, async () => await workspaceScheduler.runCase({
|
|
2770
|
-
experimentId: identity.experimentId,
|
|
2771
|
-
projectName: preparedProject.prepared.name,
|
|
2772
|
-
scope: "workspace",
|
|
2773
|
-
workspaceId: identity.workspaceId
|
|
2774
|
-
}, async () => executePreparedProject(preparedProject.prepared, identity, options.cacheProjectName, telemetry, runReporter, reporterCounters, eventRecorder.record, options)))
|
|
2775
|
-
};
|
|
2776
|
-
}))).sort((left, right) => left.index - right.index).map((item) => item.summary);
|
|
2777
|
-
runReporter.onRunEnd({
|
|
2778
|
-
failedTasks: reporterCounters.failedTasks,
|
|
2779
|
-
passedTasks: reporterCounters.passedTasks,
|
|
2780
|
-
skippedTasks: reporterCounters.skippedTasks + skippedSummaryTasks,
|
|
2781
|
-
totalTasks
|
|
2717
|
+
await vitestCompatReporter?.onTaskEnd({
|
|
2718
|
+
state: "skipped",
|
|
2719
|
+
taskId: task.id
|
|
2782
2720
|
});
|
|
2783
|
-
const output = {
|
|
2784
|
-
attemptId: identity.attemptId,
|
|
2785
|
-
configFilePath: loadedConfig.configFilePath,
|
|
2786
|
-
experimentId: identity.experimentId,
|
|
2787
|
-
projects: projectSummaries,
|
|
2788
|
-
reportDirectory: null,
|
|
2789
|
-
runId: identity.runId,
|
|
2790
|
-
workspaceId: identity.workspaceId
|
|
2791
|
-
};
|
|
2792
|
-
if (options.reportOut != null) output.reportDirectory = await writeRunReportArtifacts(output, eventRecorder.events, identity, options.reportOut);
|
|
2793
|
-
return output;
|
|
2794
|
-
});
|
|
2795
|
-
} catch (error) {
|
|
2796
|
-
runError = error;
|
|
2797
|
-
} finally {
|
|
2798
|
-
if (onOpenTelemetryRunEnd != null) try {
|
|
2799
|
-
await onOpenTelemetryRunEnd();
|
|
2800
|
-
} catch (error) {
|
|
2801
|
-
if (runError == null) runEndError = error;
|
|
2802
2721
|
}
|
|
2803
|
-
|
|
2804
|
-
|
|
2722
|
+
await vitestCompatReporter?.onRunEnd({ failed: true });
|
|
2723
|
+
return {
|
|
2724
|
+
caseFailures: projectCaseFailures,
|
|
2725
|
+
caseSummary: {
|
|
2726
|
+
failed: projectCaseCounters.failed,
|
|
2727
|
+
passed: projectCaseCounters.passed,
|
|
2728
|
+
skipped: projectCaseCounters.skipped,
|
|
2729
|
+
timeout: projectCaseCounters.timeout,
|
|
2730
|
+
total: projectCaseCounters.seenCaseIds.size
|
|
2731
|
+
},
|
|
2732
|
+
discoveredEvalFileCount: prepared.discoveredEvalFileCount,
|
|
2733
|
+
durationMs: Date.now() - prepared.startedAt,
|
|
2734
|
+
entryCount: prepared.entryCount,
|
|
2735
|
+
errorMessage: errorMessageFrom(error) ?? "Unknown project execution error.",
|
|
2736
|
+
executed: false,
|
|
2737
|
+
matrixSummary: createProjectMatrixSummary(prepared.tasks),
|
|
2738
|
+
name: prepared.name,
|
|
2739
|
+
result: null,
|
|
2740
|
+
taskCount: prepared.tasks.length
|
|
2741
|
+
};
|
|
2805
2742
|
}
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
if (
|
|
2809
|
-
|
|
2743
|
+
}
|
|
2744
|
+
function filterProjectsByName(projects, names) {
|
|
2745
|
+
if (names.length === 0) return [...projects];
|
|
2746
|
+
const nameSet = new Set(names);
|
|
2747
|
+
return projects.filter((project) => nameSet.has(project.name));
|
|
2748
|
+
}
|
|
2749
|
+
function formatDuration$1(durationMs, colors) {
|
|
2750
|
+
if (durationMs == null) return "";
|
|
2751
|
+
const rounded = Math.round(durationMs);
|
|
2752
|
+
return (rounded > 1e3 ? colors.yellow : colors.green)(` ${rounded}${colors.dim("ms")}`);
|
|
2753
|
+
}
|
|
2754
|
+
function formatHybridAverage(hybridAverage) {
|
|
2755
|
+
if (hybridAverage == null) return "n/a";
|
|
2756
|
+
return hybridAverage.toFixed(3).replace(/\.?0+$/, "");
|
|
2757
|
+
}
|
|
2758
|
+
function getFailedTaskId(error) {
|
|
2759
|
+
if (error instanceof RunnerExecutionError) return error.taskId;
|
|
2760
|
+
return null;
|
|
2761
|
+
}
|
|
2762
|
+
function isSummaryReporter(reporter) {
|
|
2763
|
+
return "getWindowRows" in reporter;
|
|
2810
2764
|
}
|
|
2811
2765
|
/**
|
|
2812
|
-
*
|
|
2766
|
+
* Normalizes terminal row count into the live reporter window height.
|
|
2767
|
+
*
|
|
2768
|
+
* Before:
|
|
2769
|
+
* - undefined
|
|
2770
|
+
* - 4
|
|
2771
|
+
* - 40
|
|
2772
|
+
*
|
|
2773
|
+
* After:
|
|
2774
|
+
* - 23
|
|
2775
|
+
* - 6
|
|
2776
|
+
* - 39
|
|
2813
2777
|
*/
|
|
2814
|
-
function
|
|
2815
|
-
|
|
2816
|
-
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
const
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
const
|
|
2837
|
-
if (
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
|
|
2843
|
-
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
2848
|
-
|
|
2849
|
-
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
}
|
|
2853
|
-
for (const project of output.projects) {
|
|
2854
|
-
totalTasks += project.taskCount;
|
|
2855
|
-
executedTasks += project.result?.overall.runCount ?? 0;
|
|
2856
|
-
const badge = createProjectBadge(project.name, colors, colorEnabled);
|
|
2857
|
-
const isFailed = project.errorMessage != null;
|
|
2858
|
-
const hasFailedCases = (project.caseSummary?.failed ?? 0) > 0 || (project.caseSummary?.timeout ?? 0) > 0 || (project.caseFailures?.length ?? 0) > 0;
|
|
2859
|
-
if (isFailed) {
|
|
2860
|
-
failedProjects += 1;
|
|
2861
|
-
lines.push(` ${colors.red("❯")} ${badge}${formatDuration$1(project.durationMs, colors)}`);
|
|
2862
|
-
lines.push(` ${project.errorMessage}`);
|
|
2863
|
-
continue;
|
|
2864
|
-
}
|
|
2865
|
-
if (!project.executed) {
|
|
2866
|
-
skippedProjects += 1;
|
|
2867
|
-
const countLabel = colors.dim(`(${project.taskCount} tasks)`);
|
|
2868
|
-
const detailsLabel = colors.dim(` ${project.discoveredEvalFileCount} files, ${project.entryCount} entries, 0 runs, hybrid n/a`);
|
|
2869
|
-
const matrixSummary = formatMatrixSummary(project.matrixSummary);
|
|
2870
|
-
lines.push(` ${colors.dim("○")} ${badge}${countLabel}${detailsLabel}${formatDuration$1(project.durationMs, colors)}`);
|
|
2871
|
-
if (matrixSummary != null) lines.push(` ${colors.dim(matrixSummary)}`);
|
|
2872
|
-
const scheduleBreakdown = formatScheduleBreakdown(project);
|
|
2873
|
-
if (scheduleBreakdown != null) lines.push(` ${scheduleBreakdown}`);
|
|
2874
|
-
continue;
|
|
2875
|
-
}
|
|
2876
|
-
if (hasFailedCases) failedProjects += 1;
|
|
2877
|
-
else passedProjects += 1;
|
|
2878
|
-
const hybridAverageLabel = formatHybridAverage(project.result?.overall.hybridAverage);
|
|
2879
|
-
const runCount = project.result?.overall.runCount ?? 0;
|
|
2880
|
-
const countLabel = colors.dim(`(${project.taskCount} tasks)`);
|
|
2881
|
-
const caseSummaryLabel = project.caseSummary == null ? "" : `, cases ${project.caseSummary.passed} passed | ${project.caseSummary.failed} failed | ${project.caseSummary.timeout} timeout`;
|
|
2882
|
-
const detailsLabel = colors.dim(` ${project.discoveredEvalFileCount} files, ${project.entryCount} entries, ${runCount} runs${caseSummaryLabel}, hybrid ${hybridAverageLabel}`);
|
|
2883
|
-
const matrixSummary = formatMatrixSummary(project.matrixSummary);
|
|
2884
|
-
lines.push(` ${hasFailedCases ? colors.red("❯") : colors.green("✓")} ${badge}${countLabel}${detailsLabel}${formatDuration$1(project.durationMs, colors)}`);
|
|
2885
|
-
if (matrixSummary != null) lines.push(` ${colors.dim(matrixSummary)}`);
|
|
2886
|
-
const scheduleBreakdown = formatScheduleBreakdown(project);
|
|
2887
|
-
if (scheduleBreakdown != null) lines.push(` ${scheduleBreakdown}`);
|
|
2888
|
-
if ((project.caseFailures?.length ?? 0) > 0) {
|
|
2889
|
-
lines.push(` ${colors.red("Failed cases:")}`);
|
|
2890
|
-
for (const failure of project.caseFailures.slice(0, 5)) {
|
|
2891
|
-
lines.push(` ${colors.red(`- ${failure.caseName} (${failure.taskId})`)}`);
|
|
2892
|
-
for (const line of failure.errorMessage.split("\n")) lines.push(` ${colors.red(line)}`);
|
|
2778
|
+
function normalizeLiveReporterMaxRows(rows) {
|
|
2779
|
+
return Math.max(6, (rows == null || !Number.isFinite(rows) || rows <= 0 ? 24 : Math.floor(rows)) - 1);
|
|
2780
|
+
}
|
|
2781
|
+
async function prepareProject(project) {
|
|
2782
|
+
const startedAt = Date.now();
|
|
2783
|
+
try {
|
|
2784
|
+
const runtimeContext = await createRunnerRuntimeContext({
|
|
2785
|
+
cwd: project.root,
|
|
2786
|
+
fallbackProjectRootDirectory: project.root
|
|
2787
|
+
});
|
|
2788
|
+
const evalFilePaths = await discoverEvalFiles({
|
|
2789
|
+
exclude: project.exclude,
|
|
2790
|
+
include: project.include,
|
|
2791
|
+
root: project.root
|
|
2792
|
+
});
|
|
2793
|
+
const entries = collectEvalEntries(await loadEvalModulesWithVitestRuntime(evalFilePaths, project.root), runtimeContext);
|
|
2794
|
+
const tasks = createRunnerSchedule({
|
|
2795
|
+
entries,
|
|
2796
|
+
evalMatrix: project.evalMatrix,
|
|
2797
|
+
inferenceExecutors: project.inferenceExecutors,
|
|
2798
|
+
runMatrix: project.runMatrix
|
|
2799
|
+
});
|
|
2800
|
+
const canAutoExecuteEntryTasks = entries.some((entry) => entry.task != null) && project.models.length > 0;
|
|
2801
|
+
if (project.executor == null && !canAutoExecuteEntryTasks) return {
|
|
2802
|
+
experimentMatrixRows: createExperimentMatrixRows(tasks),
|
|
2803
|
+
kind: "summary",
|
|
2804
|
+
summary: {
|
|
2805
|
+
caseFailures: [],
|
|
2806
|
+
caseSummary: null,
|
|
2807
|
+
discoveredEvalFileCount: evalFilePaths.length,
|
|
2808
|
+
durationMs: Date.now() - startedAt,
|
|
2809
|
+
entryCount: entries.length,
|
|
2810
|
+
errorMessage: null,
|
|
2811
|
+
executed: false,
|
|
2812
|
+
matrixSummary: createProjectMatrixSummary(tasks),
|
|
2813
|
+
name: project.name,
|
|
2814
|
+
result: null,
|
|
2815
|
+
taskCount: tasks.length
|
|
2893
2816
|
}
|
|
2894
|
-
|
|
2895
|
-
|
|
2817
|
+
};
|
|
2818
|
+
return {
|
|
2819
|
+
experimentMatrixRows: createExperimentMatrixRows(tasks),
|
|
2820
|
+
kind: "prepared",
|
|
2821
|
+
prepared: {
|
|
2822
|
+
discoveredEvalFileCount: evalFilePaths.length,
|
|
2823
|
+
entryCount: entries.length,
|
|
2824
|
+
name: project.name,
|
|
2825
|
+
project,
|
|
2826
|
+
startedAt,
|
|
2827
|
+
tasks
|
|
2828
|
+
}
|
|
2829
|
+
};
|
|
2830
|
+
} catch (error) {
|
|
2831
|
+
return {
|
|
2832
|
+
experimentMatrixRows: [],
|
|
2833
|
+
kind: "summary",
|
|
2834
|
+
summary: {
|
|
2835
|
+
caseFailures: [],
|
|
2836
|
+
caseSummary: null,
|
|
2837
|
+
discoveredEvalFileCount: 0,
|
|
2838
|
+
durationMs: Date.now() - startedAt,
|
|
2839
|
+
entryCount: 0,
|
|
2840
|
+
errorMessage: errorMessageFrom(error) ?? "Unknown project execution error.",
|
|
2841
|
+
executed: false,
|
|
2842
|
+
matrixSummary: null,
|
|
2843
|
+
name: project.name,
|
|
2844
|
+
result: null,
|
|
2845
|
+
taskCount: 0
|
|
2846
|
+
}
|
|
2847
|
+
};
|
|
2896
2848
|
}
|
|
2897
|
-
|
|
2898
|
-
|
|
2899
|
-
|
|
2900
|
-
|
|
2901
|
-
|
|
2902
|
-
|
|
2903
|
-
|
|
2904
|
-
|
|
2905
|
-
return
|
|
2849
|
+
}
|
|
2850
|
+
function resolveCappedConcurrency(defaultConcurrency, cliConcurrency, fallback) {
|
|
2851
|
+
const effectiveDefault = defaultConcurrency ?? fallback;
|
|
2852
|
+
if (cliConcurrency == null) return effectiveDefault;
|
|
2853
|
+
return Math.min(effectiveDefault, cliConcurrency);
|
|
2854
|
+
}
|
|
2855
|
+
function resolveCliRuntimeConcurrency(options) {
|
|
2856
|
+
if (options.attemptConcurrency == null && options.caseConcurrency == null) return;
|
|
2857
|
+
return {
|
|
2858
|
+
attempt: options.attemptConcurrency,
|
|
2859
|
+
case: options.caseConcurrency
|
|
2860
|
+
};
|
|
2861
|
+
}
|
|
2862
|
+
function resolveExperimentId(options, preparedProjects) {
|
|
2863
|
+
if (options.experiment != null) return sanitizeIdentitySegment(options.experiment);
|
|
2864
|
+
const matrixRows = /* @__PURE__ */ new Set();
|
|
2865
|
+
for (const project of preparedProjects) project.experimentMatrixRows.forEach((row) => matrixRows.add(row));
|
|
2866
|
+
if (matrixRows.size === 0) return "default-experiment";
|
|
2867
|
+
return sanitizeIdentitySegment(`matrix-${[...matrixRows].sort().join("--")}`);
|
|
2868
|
+
}
|
|
2869
|
+
function resolveOptionalRuntimeTaskConcurrency(defaultConcurrency, cliConcurrency) {
|
|
2870
|
+
return cliConcurrency ?? defaultConcurrency;
|
|
2871
|
+
}
|
|
2872
|
+
function resolveProjectConcurrency(project, options) {
|
|
2873
|
+
return resolveCappedConcurrency(project.concurrency?.project, options.projectConcurrency, Number.POSITIVE_INFINITY);
|
|
2874
|
+
}
|
|
2875
|
+
function resolveRuntimeTaskConcurrency(taskConcurrency, project, options) {
|
|
2876
|
+
const attempt = resolveOptionalRuntimeTaskConcurrency(taskConcurrency?.attempt ?? project.concurrency?.attempt, options.attemptConcurrency);
|
|
2877
|
+
const caseConcurrency = resolveOptionalRuntimeTaskConcurrency(taskConcurrency?.case ?? project.concurrency?.case, options.caseConcurrency);
|
|
2878
|
+
if (attempt == null && caseConcurrency == null) return;
|
|
2879
|
+
return {
|
|
2880
|
+
attempt,
|
|
2881
|
+
case: caseConcurrency
|
|
2882
|
+
};
|
|
2883
|
+
}
|
|
2884
|
+
function resolveScheduledTaskConcurrency(project, options) {
|
|
2885
|
+
return Math.min(resolveProjectConcurrency(project, options), resolveTaskConcurrency(project, options));
|
|
2886
|
+
}
|
|
2887
|
+
function resolveTaskConcurrency(project, options) {
|
|
2888
|
+
return resolveCappedConcurrency(project.concurrency?.task, options.taskConcurrency, 1);
|
|
2889
|
+
}
|
|
2890
|
+
function resolveTaskReporterHooks(task, context, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter) {
|
|
2891
|
+
return context.reporterHooks ?? createTaskReporterHooks(task, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter);
|
|
2892
|
+
}
|
|
2893
|
+
function resolveWorkspaceConcurrency(loadedConfig, options) {
|
|
2894
|
+
return resolveCappedConcurrency(loadedConfig.concurrency?.workspace, options.workspaceConcurrency, 1);
|
|
2895
|
+
}
|
|
2896
|
+
function sanitizeIdentitySegment(value) {
|
|
2897
|
+
const normalized = value.trim();
|
|
2898
|
+
if (normalized.length === 0) return "default";
|
|
2899
|
+
return normalized.replace(/[^\w.-]+/g, "-");
|
|
2900
|
+
}
|
|
2901
|
+
function shouldUseColor() {
|
|
2902
|
+
if (process.env.NO_COLOR != null) return false;
|
|
2903
|
+
const forceColor = process.env.FORCE_COLOR;
|
|
2904
|
+
if (forceColor != null) return forceColor !== "0";
|
|
2905
|
+
return process.stdout.isTTY === true;
|
|
2906
2906
|
}
|
|
2907
2907
|
//#endregion
|
|
2908
2908
|
//#region src/cli/compare.ts
|
|
@@ -2918,17 +2918,12 @@ const compareHelpText = `
|
|
|
2918
2918
|
--output Optional output artifact path
|
|
2919
2919
|
--format Console output format: table | json (default: table)
|
|
2920
2920
|
`;
|
|
2921
|
-
function normalizeCliArgv$5(argv) {
|
|
2922
|
-
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
2923
|
-
if (normalizedArgv[0] === "compare") return normalizedArgv.slice(1);
|
|
2924
|
-
return normalizedArgv;
|
|
2925
|
-
}
|
|
2926
2921
|
function parseCompareCliArguments(argv) {
|
|
2927
2922
|
const cli = meow(compareHelpText, {
|
|
2928
2923
|
argv: normalizeCliArgv$5(argv),
|
|
2929
2924
|
flags: {
|
|
2930
|
-
config: { type: "string" },
|
|
2931
2925
|
comparison: { type: "string" },
|
|
2926
|
+
config: { type: "string" },
|
|
2932
2927
|
format: {
|
|
2933
2928
|
default: "table",
|
|
2934
2929
|
type: "string"
|
|
@@ -3009,6 +3004,11 @@ async function runCompareCliOrExit(argv) {
|
|
|
3009
3004
|
process.exitCode = 1;
|
|
3010
3005
|
}
|
|
3011
3006
|
}
|
|
3007
|
+
function normalizeCliArgv$5(argv) {
|
|
3008
|
+
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
3009
|
+
if (normalizedArgv[0] === "compare") return normalizedArgv.slice(1);
|
|
3010
|
+
return normalizedArgv;
|
|
3011
|
+
}
|
|
3012
3012
|
//#endregion
|
|
3013
3013
|
//#region package.json
|
|
3014
3014
|
var name = "vieval";
|
|
@@ -3034,14 +3034,6 @@ const evalRunHelpText = `
|
|
|
3034
3034
|
--report-out Report output root directory
|
|
3035
3035
|
--json Print machine-readable JSON output
|
|
3036
3036
|
`;
|
|
3037
|
-
function normalizeCliArgv$4(argv) {
|
|
3038
|
-
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
3039
|
-
return normalizedArgv[0] === "run" ? normalizedArgv.slice(1) : normalizedArgv;
|
|
3040
|
-
}
|
|
3041
|
-
function normalizeProjectNames(projectNames) {
|
|
3042
|
-
if (typeof projectNames === "string") return [projectNames];
|
|
3043
|
-
return projectNames ?? [];
|
|
3044
|
-
}
|
|
3045
3037
|
/**
|
|
3046
3038
|
* Parses `vieval run` CLI arguments into one normalized execution payload.
|
|
3047
3039
|
*
|
|
@@ -3058,9 +3050,12 @@ function normalizeProjectNames(projectNames) {
|
|
|
3058
3050
|
function parseCliArguments(argv) {
|
|
3059
3051
|
const cli = meow(evalRunHelpText, {
|
|
3060
3052
|
argv: normalizeCliArgv$4(argv),
|
|
3061
|
-
importMeta: import.meta,
|
|
3062
3053
|
flags: {
|
|
3054
|
+
attempt: { type: "string" },
|
|
3055
|
+
attemptConcurrency: { type: "number" },
|
|
3056
|
+
caseConcurrency: { type: "number" },
|
|
3063
3057
|
config: { type: "string" },
|
|
3058
|
+
experiment: { type: "string" },
|
|
3064
3059
|
json: {
|
|
3065
3060
|
default: false,
|
|
3066
3061
|
type: "boolean"
|
|
@@ -3069,16 +3064,13 @@ function parseCliArguments(argv) {
|
|
|
3069
3064
|
isMultiple: true,
|
|
3070
3065
|
type: "string"
|
|
3071
3066
|
},
|
|
3072
|
-
workspace: { type: "string" },
|
|
3073
|
-
experiment: { type: "string" },
|
|
3074
|
-
attempt: { type: "string" },
|
|
3075
|
-
workspaceConcurrency: { type: "number" },
|
|
3076
3067
|
projectConcurrency: { type: "number" },
|
|
3068
|
+
reportOut: { type: "string" },
|
|
3077
3069
|
taskConcurrency: { type: "number" },
|
|
3078
|
-
|
|
3079
|
-
|
|
3080
|
-
|
|
3081
|
-
|
|
3070
|
+
workspace: { type: "string" },
|
|
3071
|
+
workspaceConcurrency: { type: "number" }
|
|
3072
|
+
},
|
|
3073
|
+
importMeta: import.meta
|
|
3082
3074
|
});
|
|
3083
3075
|
return {
|
|
3084
3076
|
attempt: cli.flags.attempt,
|
|
@@ -3131,198 +3123,57 @@ async function runEvalRunCli(argv) {
|
|
|
3131
3123
|
attemptConcurrency: parsed.attemptConcurrency,
|
|
3132
3124
|
caseConcurrency: parsed.caseConcurrency,
|
|
3133
3125
|
configFilePath: parsed.configFilePath,
|
|
3134
|
-
experiment: parsed.experiment,
|
|
3135
|
-
project: parsed.project,
|
|
3136
|
-
projectConcurrency: parsed.projectConcurrency,
|
|
3137
|
-
reportOut: parsed.reportOut,
|
|
3138
|
-
taskConcurrency: parsed.taskConcurrency,
|
|
3139
|
-
workspace: parsed.workspace,
|
|
3140
|
-
workspaceConcurrency: parsed.workspaceConcurrency
|
|
3141
|
-
});
|
|
3142
|
-
if (parsed.json) {
|
|
3143
|
-
process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
|
|
3144
|
-
if (hasRunFailures(output)) process.exitCode = 1;
|
|
3145
|
-
return;
|
|
3146
|
-
}
|
|
3147
|
-
process.stdout.write(`${formatVievalCliRunOutput(output)}\n`);
|
|
3148
|
-
if (hasRunFailures(output)) process.exitCode = 1;
|
|
3149
|
-
} catch (error) {
|
|
3150
|
-
const errorMessage = errorMessageFrom(error) ?? "Unknown CLI failure.";
|
|
3151
|
-
process.stderr.write(`[${name}] ${errorMessage}\n`);
|
|
3152
|
-
process.exitCode = 1;
|
|
3153
|
-
}
|
|
3154
|
-
}
|
|
3155
|
-
//#endregion
|
|
3156
|
-
//#region src/cli/report-analyze.ts
|
|
3157
|
-
const reportAnalyzeHelpText = `
|
|
3158
|
-
Analyze generated vieval report artifacts.
|
|
3159
|
-
|
|
3160
|
-
Usage
|
|
3161
|
-
$ vieval report analyze <reportPath> [options]
|
|
3162
|
-
|
|
3163
|
-
Options
|
|
3164
|
-
--format Output format: table | json | jsonl | csv (default: table)
|
|
3165
|
-
--workspace Workspace id filter
|
|
3166
|
-
--project Project name filter (exact)
|
|
3167
|
-
--experiment Experiment id filter
|
|
3168
|
-
--attempt Attempt id filter
|
|
3169
|
-
--run Run id filter
|
|
3170
|
-
--task-state Keep runs containing at least one task in this state
|
|
3171
|
-
--case-state Keep runs containing at least one case in this state
|
|
3172
|
-
--contains Keep runs containing this text in event name or payload
|
|
3173
|
-
--error-contains Keep runs containing this text in project errors or event payload
|
|
3174
|
-
--run-matrix Keep runs matching run-matrix selector "key=value[,key=value]"
|
|
3175
|
-
--eval-matrix Keep runs matching eval-matrix selector "key=value[,key=value]"
|
|
3176
|
-
`;
|
|
3177
|
-
function normalizeCliArgv$3(argv) {
|
|
3178
|
-
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
3179
|
-
if (normalizedArgv[0] === "report" && normalizedArgv[1] === "analyze") return normalizedArgv.slice(2);
|
|
3180
|
-
if (normalizedArgv[0] === "analyze") return normalizedArgv.slice(1);
|
|
3181
|
-
return normalizedArgv;
|
|
3182
|
-
}
|
|
3183
|
-
function parseReportAnalyzeCliArguments(argv) {
|
|
3184
|
-
const cli = meow(reportAnalyzeHelpText, {
|
|
3185
|
-
argv: normalizeCliArgv$3(argv),
|
|
3186
|
-
flags: {
|
|
3187
|
-
attempt: { type: "string" },
|
|
3188
|
-
caseState: { type: "string" },
|
|
3189
|
-
contains: { type: "string" },
|
|
3190
|
-
evalMatrix: { type: "string" },
|
|
3191
|
-
errorContains: { type: "string" },
|
|
3192
|
-
experiment: { type: "string" },
|
|
3193
|
-
format: {
|
|
3194
|
-
default: "table",
|
|
3195
|
-
type: "string"
|
|
3196
|
-
},
|
|
3197
|
-
project: { type: "string" },
|
|
3198
|
-
runMatrix: { type: "string" },
|
|
3199
|
-
run: { type: "string" },
|
|
3200
|
-
taskState: { type: "string" },
|
|
3201
|
-
workspace: { type: "string" }
|
|
3202
|
-
},
|
|
3203
|
-
importMeta: import.meta
|
|
3204
|
-
});
|
|
3205
|
-
const reportPath = cli.input[0];
|
|
3206
|
-
if (reportPath == null || reportPath.length === 0) throw new Error("Missing required <reportPath> argument.");
|
|
3207
|
-
const normalizedFormat = cli.flags.format.toLowerCase();
|
|
3208
|
-
const format = normalizedFormat === "json" ? "json" : normalizedFormat === "jsonl" ? "jsonl" : normalizedFormat === "csv" ? "csv" : "table";
|
|
3209
|
-
return {
|
|
3210
|
-
attempt: cli.flags.attempt,
|
|
3211
|
-
caseState: normalizeStateFilter(cli.flags.caseState),
|
|
3212
|
-
contains: cli.flags.contains,
|
|
3213
|
-
evalMatrix: parseMatrixSelector(cli.flags.evalMatrix),
|
|
3214
|
-
errorContains: cli.flags.errorContains,
|
|
3215
|
-
experiment: cli.flags.experiment,
|
|
3216
|
-
format,
|
|
3217
|
-
project: cli.flags.project,
|
|
3218
|
-
reportPath,
|
|
3219
|
-
runMatrix: parseMatrixSelector(cli.flags.runMatrix),
|
|
3220
|
-
run: cli.flags.run,
|
|
3221
|
-
taskState: normalizeStateFilter(cli.flags.taskState),
|
|
3222
|
-
workspace: cli.flags.workspace
|
|
3223
|
-
};
|
|
3224
|
-
}
|
|
3225
|
-
function normalizeStateFilter(value) {
|
|
3226
|
-
if (value == null) return;
|
|
3227
|
-
const normalized = value.trim().toLowerCase();
|
|
3228
|
-
if (normalized === "passed" || normalized === "failed" || normalized === "skipped") return normalized;
|
|
3229
|
-
throw new Error(`Unsupported state filter "${value}". Expected "passed", "failed", or "skipped".`);
|
|
3230
|
-
}
|
|
3231
|
-
function parseMatrixSelector(value) {
|
|
3232
|
-
if (value == null) return;
|
|
3233
|
-
const selector = {};
|
|
3234
|
-
const segments = value.split(",").map((segment) => segment.trim()).filter((segment) => segment.length > 0);
|
|
3235
|
-
for (const segment of segments) {
|
|
3236
|
-
const separatorIndex = segment.indexOf("=");
|
|
3237
|
-
if (separatorIndex <= 0 || separatorIndex === segment.length - 1) throw new Error(`Invalid matrix selector segment "${segment}". Expected "key=value".`);
|
|
3238
|
-
const key = segment.slice(0, separatorIndex).trim();
|
|
3239
|
-
const parsedValue = segment.slice(separatorIndex + 1).trim();
|
|
3240
|
-
if (key.length === 0 || parsedValue.length === 0) throw new Error(`Invalid matrix selector segment "${segment}". Expected "key=value".`);
|
|
3241
|
-
selector[key] = parsedValue;
|
|
3242
|
-
}
|
|
3243
|
-
return selector;
|
|
3244
|
-
}
|
|
3245
|
-
function filterAnalyzeRows(rows, parsed) {
|
|
3246
|
-
return rows.filter((row) => {
|
|
3247
|
-
if (parsed.workspace != null && row.workspaceId !== parsed.workspace) return false;
|
|
3248
|
-
if (parsed.experiment != null && row.experimentId !== parsed.experiment) return false;
|
|
3249
|
-
if (parsed.attempt != null && row.attemptId !== parsed.attempt) return false;
|
|
3250
|
-
if (parsed.run != null && row.runId !== parsed.run) return false;
|
|
3251
|
-
if (parsed.project != null && !row.projectNames.includes(parsed.project)) return false;
|
|
3252
|
-
return true;
|
|
3253
|
-
});
|
|
3254
|
-
}
|
|
3255
|
-
function includesNeedle(value, needle) {
|
|
3256
|
-
const normalizedNeedle = needle.trim().toLowerCase();
|
|
3257
|
-
if (normalizedNeedle.length === 0) return true;
|
|
3258
|
-
return JSON.stringify(value).toLowerCase().includes(normalizedNeedle);
|
|
3259
|
-
}
|
|
3260
|
-
function hasTaskState(artifact, targetState) {
|
|
3261
|
-
return artifact.events.some((event) => {
|
|
3262
|
-
if (event.event !== "TaskEnded") return false;
|
|
3263
|
-
return event.data?.state === targetState;
|
|
3264
|
-
});
|
|
3265
|
-
}
|
|
3266
|
-
function hasCaseState(artifact, targetState) {
|
|
3267
|
-
return artifact.events.some((event) => {
|
|
3268
|
-
if (event.event !== "CaseEnded") return false;
|
|
3269
|
-
return event.data?.state === targetState;
|
|
3270
|
-
});
|
|
3271
|
-
}
|
|
3272
|
-
function matchesMatrixSelector(matrix, selector) {
|
|
3273
|
-
return Object.entries(selector).every(([key, expectedValue]) => String(matrix[key]) === expectedValue);
|
|
3274
|
-
}
|
|
3275
|
-
function hasRunMatrixMatch(artifact, selector) {
|
|
3276
|
-
return artifact.summary.projects.some((project) => project.result?.runs.some((run) => matchesMatrixSelector(run.matrix.run, selector)) === true);
|
|
3277
|
-
}
|
|
3278
|
-
function hasEvalMatrixMatch(artifact, selector) {
|
|
3279
|
-
return artifact.summary.projects.some((project) => project.result?.runs.some((run) => matchesMatrixSelector(run.matrix.eval, selector)) === true);
|
|
3280
|
-
}
|
|
3281
|
-
function matchesOutcomeFilters(artifact, parsed) {
|
|
3282
|
-
if (parsed.runMatrix != null && !hasRunMatrixMatch(artifact, parsed.runMatrix)) return false;
|
|
3283
|
-
if (parsed.evalMatrix != null && !hasEvalMatrixMatch(artifact, parsed.evalMatrix)) return false;
|
|
3284
|
-
if (parsed.taskState != null && !hasTaskState(artifact, parsed.taskState)) return false;
|
|
3285
|
-
if (parsed.caseState != null && !hasCaseState(artifact, parsed.caseState)) return false;
|
|
3286
|
-
if (parsed.contains != null) {
|
|
3287
|
-
if (!artifact.events.some((event) => includesNeedle({
|
|
3288
|
-
data: event.data,
|
|
3289
|
-
event: event.event
|
|
3290
|
-
}, parsed.contains))) return false;
|
|
3291
|
-
}
|
|
3292
|
-
if (parsed.errorContains != null) {
|
|
3293
|
-
if (!(artifact.summary.projects.map((project) => project.errorMessage).filter((errorMessage) => errorMessage != null).some((errorMessage) => includesNeedle(errorMessage, parsed.errorContains)) || artifact.events.some((event) => includesNeedle(event.data, parsed.errorContains)))) return false;
|
|
3294
|
-
}
|
|
3295
|
-
return true;
|
|
3296
|
-
}
|
|
3297
|
-
async function readReportAnalyzeOutput(parsed) {
|
|
3298
|
-
const artifacts = await readReportArtifacts(parsed.reportPath);
|
|
3299
|
-
const rows = artifacts.map((artifact) => summarizeReportRunArtifact(artifact));
|
|
3300
|
-
const identityFilteredRows = filterAnalyzeRows(rows, parsed);
|
|
3301
|
-
const rowByDirectory = new Map(identityFilteredRows.map((row) => [row.reportDirectory, row]));
|
|
3302
|
-
const filteredRows = artifacts.filter((artifact) => rowByDirectory.has(artifact.reportDirectory)).filter((artifact) => matchesOutcomeFilters(artifact, parsed)).map((artifact) => rowByDirectory.get(artifact.reportDirectory)).filter((row) => row != null);
|
|
3303
|
-
return {
|
|
3304
|
-
experimentSummaries: buildExperimentSummaries(filteredRows),
|
|
3305
|
-
filteredRunCount: filteredRows.length,
|
|
3306
|
-
runs: filteredRows,
|
|
3307
|
-
totalRunCount: rows.length
|
|
3308
|
-
};
|
|
3309
|
-
}
|
|
3310
|
-
function roundMetric(value) {
|
|
3311
|
-
return Number(value.toFixed(6));
|
|
3312
|
-
}
|
|
3313
|
-
function computeAverage(values) {
|
|
3314
|
-
if (values.length === 0) return 0;
|
|
3315
|
-
return values.reduce((sum, value) => sum + value, 0) / values.length;
|
|
3126
|
+
experiment: parsed.experiment,
|
|
3127
|
+
project: parsed.project,
|
|
3128
|
+
projectConcurrency: parsed.projectConcurrency,
|
|
3129
|
+
reportOut: parsed.reportOut,
|
|
3130
|
+
taskConcurrency: parsed.taskConcurrency,
|
|
3131
|
+
workspace: parsed.workspace,
|
|
3132
|
+
workspaceConcurrency: parsed.workspaceConcurrency
|
|
3133
|
+
});
|
|
3134
|
+
if (parsed.json) {
|
|
3135
|
+
process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
|
|
3136
|
+
if (hasRunFailures(output)) process.exitCode = 1;
|
|
3137
|
+
return;
|
|
3138
|
+
}
|
|
3139
|
+
process.stdout.write(`${formatVievalCliRunOutput(output)}\n`);
|
|
3140
|
+
if (hasRunFailures(output)) process.exitCode = 1;
|
|
3141
|
+
} catch (error) {
|
|
3142
|
+
const errorMessage = errorMessageFrom(error) ?? "Unknown CLI failure.";
|
|
3143
|
+
process.stderr.write(`[${name}] ${errorMessage}\n`);
|
|
3144
|
+
process.exitCode = 1;
|
|
3145
|
+
}
|
|
3316
3146
|
}
|
|
3317
|
-
function
|
|
3318
|
-
|
|
3319
|
-
|
|
3320
|
-
const variance = computeAverage(values.map((value) => (value - average) ** 2));
|
|
3321
|
-
return Math.sqrt(variance);
|
|
3147
|
+
function normalizeCliArgv$4(argv) {
|
|
3148
|
+
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
3149
|
+
return normalizedArgv[0] === "run" ? normalizedArgv.slice(1) : normalizedArgv;
|
|
3322
3150
|
}
|
|
3323
|
-
function
|
|
3324
|
-
|
|
3151
|
+
function normalizeProjectNames(projectNames) {
|
|
3152
|
+
if (typeof projectNames === "string") return [projectNames];
|
|
3153
|
+
return projectNames ?? [];
|
|
3325
3154
|
}
|
|
3155
|
+
//#endregion
|
|
3156
|
+
//#region src/cli/report-analyze.ts
|
|
3157
|
+
const reportAnalyzeHelpText = `
|
|
3158
|
+
Analyze generated vieval report artifacts.
|
|
3159
|
+
|
|
3160
|
+
Usage
|
|
3161
|
+
$ vieval report analyze <reportPath> [options]
|
|
3162
|
+
|
|
3163
|
+
Options
|
|
3164
|
+
--format Output format: table | json | jsonl | csv (default: table)
|
|
3165
|
+
--workspace Workspace id filter
|
|
3166
|
+
--project Project name filter (exact)
|
|
3167
|
+
--experiment Experiment id filter
|
|
3168
|
+
--attempt Attempt id filter
|
|
3169
|
+
--run Run id filter
|
|
3170
|
+
--task-state Keep runs containing at least one task in this state
|
|
3171
|
+
--case-state Keep runs containing at least one case in this state
|
|
3172
|
+
--contains Keep runs containing this text in event name or payload
|
|
3173
|
+
--error-contains Keep runs containing this text in project errors or event payload
|
|
3174
|
+
--run-matrix Keep runs matching run-matrix selector "key=value[,key=value]"
|
|
3175
|
+
--eval-matrix Keep runs matching eval-matrix selector "key=value[,key=value]"
|
|
3176
|
+
`;
|
|
3326
3177
|
/**
|
|
3327
3178
|
* Builds experiment-level rollups from filtered run rows.
|
|
3328
3179
|
*
|
|
@@ -3383,13 +3234,13 @@ function buildExperimentSummaries(rows) {
|
|
|
3383
3234
|
const stdevAttemptSuccessRate = computeStandardDeviation(attemptSuccessRates);
|
|
3384
3235
|
return {
|
|
3385
3236
|
attemptCount: attemptToRuns.size,
|
|
3386
|
-
attemptSummaries,
|
|
3387
3237
|
attemptSuccessRateStats: {
|
|
3388
3238
|
avg: roundMetric(avgAttemptSuccessRate),
|
|
3389
3239
|
max: roundMetric(maxAttemptSuccessRate),
|
|
3390
3240
|
min: roundMetric(minAttemptSuccessRate),
|
|
3391
3241
|
stdev: roundMetric(stdevAttemptSuccessRate)
|
|
3392
3242
|
},
|
|
3243
|
+
attemptSummaries,
|
|
3393
3244
|
experimentId,
|
|
3394
3245
|
failedProjects,
|
|
3395
3246
|
runCount: groupRows.length,
|
|
@@ -3404,16 +3255,94 @@ function buildExperimentSummaries(rows) {
|
|
|
3404
3255
|
return left.experimentId.localeCompare(right.experimentId);
|
|
3405
3256
|
});
|
|
3406
3257
|
}
|
|
3407
|
-
function
|
|
3408
|
-
const
|
|
3409
|
-
|
|
3410
|
-
|
|
3258
|
+
function parseReportAnalyzeCliArguments(argv) {
|
|
3259
|
+
const cli = meow(reportAnalyzeHelpText, {
|
|
3260
|
+
argv: normalizeCliArgv$3(argv),
|
|
3261
|
+
flags: {
|
|
3262
|
+
attempt: { type: "string" },
|
|
3263
|
+
caseState: { type: "string" },
|
|
3264
|
+
contains: { type: "string" },
|
|
3265
|
+
errorContains: { type: "string" },
|
|
3266
|
+
evalMatrix: { type: "string" },
|
|
3267
|
+
experiment: { type: "string" },
|
|
3268
|
+
format: {
|
|
3269
|
+
default: "table",
|
|
3270
|
+
type: "string"
|
|
3271
|
+
},
|
|
3272
|
+
project: { type: "string" },
|
|
3273
|
+
run: { type: "string" },
|
|
3274
|
+
runMatrix: { type: "string" },
|
|
3275
|
+
taskState: { type: "string" },
|
|
3276
|
+
workspace: { type: "string" }
|
|
3277
|
+
},
|
|
3278
|
+
importMeta: import.meta
|
|
3279
|
+
});
|
|
3280
|
+
const reportPath = cli.input[0];
|
|
3281
|
+
if (reportPath == null || reportPath.length === 0) throw new Error("Missing required <reportPath> argument.");
|
|
3282
|
+
const normalizedFormat = cli.flags.format.toLowerCase();
|
|
3283
|
+
const format = normalizedFormat === "json" ? "json" : normalizedFormat === "jsonl" ? "jsonl" : normalizedFormat === "csv" ? "csv" : "table";
|
|
3284
|
+
return {
|
|
3285
|
+
attempt: cli.flags.attempt,
|
|
3286
|
+
caseState: normalizeStateFilter(cli.flags.caseState),
|
|
3287
|
+
contains: cli.flags.contains,
|
|
3288
|
+
errorContains: cli.flags.errorContains,
|
|
3289
|
+
evalMatrix: parseMatrixSelector(cli.flags.evalMatrix),
|
|
3290
|
+
experiment: cli.flags.experiment,
|
|
3291
|
+
format,
|
|
3292
|
+
project: cli.flags.project,
|
|
3293
|
+
reportPath,
|
|
3294
|
+
run: cli.flags.run,
|
|
3295
|
+
runMatrix: parseMatrixSelector(cli.flags.runMatrix),
|
|
3296
|
+
taskState: normalizeStateFilter(cli.flags.taskState),
|
|
3297
|
+
workspace: cli.flags.workspace
|
|
3298
|
+
};
|
|
3299
|
+
}
|
|
3300
|
+
async function runReportAnalyzeCli(argv) {
|
|
3301
|
+
try {
|
|
3302
|
+
const parsed = parseReportAnalyzeCliArguments(argv);
|
|
3303
|
+
const output = await readReportAnalyzeOutput(parsed);
|
|
3304
|
+
if (parsed.format === "json") {
|
|
3305
|
+
process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
|
|
3306
|
+
return;
|
|
3307
|
+
}
|
|
3308
|
+
if (parsed.format === "jsonl") {
|
|
3309
|
+
const jsonl = output.runs.map((run) => JSON.stringify(run)).join("\n");
|
|
3310
|
+
process.stdout.write(`${jsonl}${jsonl.length > 0 ? "\n" : ""}`);
|
|
3311
|
+
return;
|
|
3312
|
+
}
|
|
3313
|
+
if (parsed.format === "csv") {
|
|
3314
|
+
process.stdout.write(`${formatCsvOutput(output)}\n`);
|
|
3315
|
+
return;
|
|
3316
|
+
}
|
|
3317
|
+
process.stdout.write(`${formatTableOutput$1(output)}\n`);
|
|
3318
|
+
} catch (error) {
|
|
3319
|
+
const errorMessage = errorMessageFrom(error) ?? "Unknown report analyze failure.";
|
|
3320
|
+
process.stderr.write(`[vieval report analyze] ${errorMessage}\n`);
|
|
3321
|
+
process.exitCode = 1;
|
|
3322
|
+
}
|
|
3323
|
+
}
|
|
3324
|
+
function computeAverage(values) {
|
|
3325
|
+
if (values.length === 0) return 0;
|
|
3326
|
+
return values.reduce((sum, value) => sum + value, 0) / values.length;
|
|
3327
|
+
}
|
|
3328
|
+
function computeStandardDeviation(values) {
|
|
3329
|
+
if (values.length === 0) return 0;
|
|
3330
|
+
const average = computeAverage(values);
|
|
3331
|
+
const variance = computeAverage(values.map((value) => (value - average) ** 2));
|
|
3332
|
+
return Math.sqrt(variance);
|
|
3333
|
+
}
|
|
3334
|
+
function createExperimentGroupKey(row) {
|
|
3335
|
+
return `${row.workspaceId ?? "unknown-workspace"}::${row.experimentId ?? "unknown-experiment"}`;
|
|
3336
|
+
}
|
|
3337
|
+
function filterAnalyzeRows(rows, parsed) {
|
|
3338
|
+
return rows.filter((row) => {
|
|
3339
|
+
if (parsed.workspace != null && row.workspaceId !== parsed.workspace) return false;
|
|
3340
|
+
if (parsed.experiment != null && row.experimentId !== parsed.experiment) return false;
|
|
3341
|
+
if (parsed.attempt != null && row.attemptId !== parsed.attempt) return false;
|
|
3342
|
+
if (parsed.run != null && row.runId !== parsed.run) return false;
|
|
3343
|
+
if (parsed.project != null && !row.projectNames.includes(parsed.project)) return false;
|
|
3344
|
+
return true;
|
|
3411
3345
|
});
|
|
3412
|
-
return [
|
|
3413
|
-
`ANALYZE vieval report: ${output.filteredRunCount}/${output.totalRunCount} runs (${output.experimentSummaries.length} experiment groups)`,
|
|
3414
|
-
header,
|
|
3415
|
-
...lines
|
|
3416
|
-
].join("\n");
|
|
3417
3346
|
}
|
|
3418
3347
|
function formatCsvOutput(output) {
|
|
3419
3348
|
return [[
|
|
@@ -3446,29 +3375,100 @@ function formatCsvOutput(output) {
|
|
|
3446
3375
|
].join(",");
|
|
3447
3376
|
})].join("\n");
|
|
3448
3377
|
}
|
|
3449
|
-
|
|
3450
|
-
|
|
3451
|
-
|
|
3452
|
-
|
|
3453
|
-
|
|
3454
|
-
|
|
3455
|
-
|
|
3456
|
-
|
|
3457
|
-
|
|
3458
|
-
|
|
3459
|
-
|
|
3460
|
-
|
|
3461
|
-
|
|
3462
|
-
if (
|
|
3463
|
-
|
|
3464
|
-
|
|
3465
|
-
|
|
3466
|
-
|
|
3467
|
-
|
|
3468
|
-
|
|
3469
|
-
|
|
3470
|
-
|
|
3378
|
+
function formatTableOutput$1(output) {
|
|
3379
|
+
const header = "Run ID | Workspace | Experiment | Attempt | Projects(executed/total) | FailedProjects | Tasks | Events";
|
|
3380
|
+
const lines = output.runs.map((row) => {
|
|
3381
|
+
return `${row.runId ?? "n/a"} | ${row.workspaceId ?? "n/a"} | ${row.experimentId ?? "n/a"} | ${row.attemptId ?? "n/a"} | ${`${row.executedProjects}/${row.totalProjects}`} | ${row.failedProjects} | ${row.totalTasks} | ${row.eventsCount}`;
|
|
3382
|
+
});
|
|
3383
|
+
return [
|
|
3384
|
+
`ANALYZE vieval report: ${output.filteredRunCount}/${output.totalRunCount} runs (${output.experimentSummaries.length} experiment groups)`,
|
|
3385
|
+
header,
|
|
3386
|
+
...lines
|
|
3387
|
+
].join("\n");
|
|
3388
|
+
}
|
|
3389
|
+
function hasCaseState(artifact, targetState) {
|
|
3390
|
+
return artifact.events.some((event) => {
|
|
3391
|
+
if (event.event !== "CaseEnded") return false;
|
|
3392
|
+
return event.data?.state === targetState;
|
|
3393
|
+
});
|
|
3394
|
+
}
|
|
3395
|
+
function hasEvalMatrixMatch(artifact, selector) {
|
|
3396
|
+
return artifact.summary.projects.some((project) => project.result?.runs.some((run) => matchesMatrixSelector(run.matrix.eval, selector)) === true);
|
|
3397
|
+
}
|
|
3398
|
+
function hasRunMatrixMatch(artifact, selector) {
|
|
3399
|
+
return artifact.summary.projects.some((project) => project.result?.runs.some((run) => matchesMatrixSelector(run.matrix.run, selector)) === true);
|
|
3400
|
+
}
|
|
3401
|
+
function hasTaskState(artifact, targetState) {
|
|
3402
|
+
return artifact.events.some((event) => {
|
|
3403
|
+
if (event.event !== "TaskEnded") return false;
|
|
3404
|
+
return event.data?.state === targetState;
|
|
3405
|
+
});
|
|
3406
|
+
}
|
|
3407
|
+
function includesNeedle(value, needle) {
|
|
3408
|
+
const normalizedNeedle = needle.trim().toLowerCase();
|
|
3409
|
+
if (normalizedNeedle.length === 0) return true;
|
|
3410
|
+
return JSON.stringify(value).toLowerCase().includes(normalizedNeedle);
|
|
3411
|
+
}
|
|
3412
|
+
function matchesMatrixSelector(matrix, selector) {
|
|
3413
|
+
return Object.entries(selector).every(([key, expectedValue]) => String(matrix[key]) === expectedValue);
|
|
3414
|
+
}
|
|
3415
|
+
function matchesOutcomeFilters(artifact, parsed) {
|
|
3416
|
+
if (parsed.runMatrix != null && !hasRunMatrixMatch(artifact, parsed.runMatrix)) return false;
|
|
3417
|
+
if (parsed.evalMatrix != null && !hasEvalMatrixMatch(artifact, parsed.evalMatrix)) return false;
|
|
3418
|
+
if (parsed.taskState != null && !hasTaskState(artifact, parsed.taskState)) return false;
|
|
3419
|
+
if (parsed.caseState != null && !hasCaseState(artifact, parsed.caseState)) return false;
|
|
3420
|
+
if (parsed.contains != null) {
|
|
3421
|
+
if (!artifact.events.some((event) => includesNeedle({
|
|
3422
|
+
data: event.data,
|
|
3423
|
+
event: event.event
|
|
3424
|
+
}, parsed.contains))) return false;
|
|
3425
|
+
}
|
|
3426
|
+
if (parsed.errorContains != null) {
|
|
3427
|
+
if (!(artifact.summary.projects.map((project) => project.errorMessage).filter((errorMessage) => errorMessage != null).some((errorMessage) => includesNeedle(errorMessage, parsed.errorContains)) || artifact.events.some((event) => includesNeedle(event.data, parsed.errorContains)))) return false;
|
|
3428
|
+
}
|
|
3429
|
+
return true;
|
|
3430
|
+
}
|
|
3431
|
+
function normalizeCliArgv$3(argv) {
|
|
3432
|
+
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
3433
|
+
if (normalizedArgv[0] === "report" && normalizedArgv[1] === "analyze") return normalizedArgv.slice(2);
|
|
3434
|
+
if (normalizedArgv[0] === "analyze") return normalizedArgv.slice(1);
|
|
3435
|
+
return normalizedArgv;
|
|
3436
|
+
}
|
|
3437
|
+
function normalizeStateFilter(value) {
|
|
3438
|
+
if (value == null) return;
|
|
3439
|
+
const normalized = value.trim().toLowerCase();
|
|
3440
|
+
if (normalized === "passed" || normalized === "failed" || normalized === "skipped") return normalized;
|
|
3441
|
+
throw new Error(`Unsupported state filter "${value}". Expected "passed", "failed", or "skipped".`);
|
|
3442
|
+
}
|
|
3443
|
+
function parseMatrixSelector(value) {
|
|
3444
|
+
if (value == null) return;
|
|
3445
|
+
const selector = {};
|
|
3446
|
+
const segments = value.split(",").map((segment) => segment.trim()).filter((segment) => segment.length > 0);
|
|
3447
|
+
for (const segment of segments) {
|
|
3448
|
+
const separatorIndex = segment.indexOf("=");
|
|
3449
|
+
if (separatorIndex <= 0 || separatorIndex === segment.length - 1) throw new Error(`Invalid matrix selector segment "${segment}". Expected "key=value".`);
|
|
3450
|
+
const key = segment.slice(0, separatorIndex).trim();
|
|
3451
|
+
const parsedValue = segment.slice(separatorIndex + 1).trim();
|
|
3452
|
+
if (key.length === 0 || parsedValue.length === 0) throw new Error(`Invalid matrix selector segment "${segment}". Expected "key=value".`);
|
|
3453
|
+
selector[key] = parsedValue;
|
|
3471
3454
|
}
|
|
3455
|
+
return selector;
|
|
3456
|
+
}
|
|
3457
|
+
async function readReportAnalyzeOutput(parsed) {
|
|
3458
|
+
const artifacts = await readReportArtifacts(parsed.reportPath);
|
|
3459
|
+
const rows = artifacts.map((artifact) => summarizeReportRunArtifact(artifact));
|
|
3460
|
+
const identityFilteredRows = filterAnalyzeRows(rows, parsed);
|
|
3461
|
+
const rowByDirectory = new Map(identityFilteredRows.map((row) => [row.reportDirectory, row]));
|
|
3462
|
+
const filteredRows = artifacts.filter((artifact) => rowByDirectory.has(artifact.reportDirectory)).filter((artifact) => matchesOutcomeFilters(artifact, parsed)).map((artifact) => rowByDirectory.get(artifact.reportDirectory)).filter((row) => row != null);
|
|
3463
|
+
return {
|
|
3464
|
+
experimentSummaries: buildExperimentSummaries(filteredRows),
|
|
3465
|
+
filteredRunCount: filteredRows.length,
|
|
3466
|
+
runs: filteredRows,
|
|
3467
|
+
totalRunCount: rows.length
|
|
3468
|
+
};
|
|
3469
|
+
}
|
|
3470
|
+
function roundMetric(value) {
|
|
3471
|
+
return Number(value.toFixed(6));
|
|
3472
3472
|
}
|
|
3473
3473
|
//#endregion
|
|
3474
3474
|
//#region src/cli/report-case-compare.ts
|
|
@@ -3545,6 +3545,50 @@ function buildCaseComparison(args) {
|
|
|
3545
3545
|
};
|
|
3546
3546
|
}
|
|
3547
3547
|
/**
|
|
3548
|
+
* Formats a case comparison as a compact human-readable table.
|
|
3549
|
+
*
|
|
3550
|
+
* Use when:
|
|
3551
|
+
* - `vieval report compare` should expose the same information as JSON output
|
|
3552
|
+
* - users need a terminal-first overview of group and per-case deltas
|
|
3553
|
+
*
|
|
3554
|
+
* Expects:
|
|
3555
|
+
* - comparison output was produced by {@link buildCaseComparison}
|
|
3556
|
+
*
|
|
3557
|
+
* Returns:
|
|
3558
|
+
* - multi-line text containing aggregate, group, top-change, case, and unmatched summaries
|
|
3559
|
+
*/
|
|
3560
|
+
function formatCaseComparisonTable(output) {
|
|
3561
|
+
const lines = [
|
|
3562
|
+
"COMPARE vieval report cases",
|
|
3563
|
+
`Matched ${output.cases.length}`,
|
|
3564
|
+
`Added ${output.added.length}`,
|
|
3565
|
+
`Removed ${output.removed.length}`,
|
|
3566
|
+
`Scores left=${output.overall.leftAverage.toFixed(3)} right=${output.overall.rightAverage.toFixed(3)} delta=${output.overall.delta.toFixed(3)}`
|
|
3567
|
+
];
|
|
3568
|
+
if (output.groups != null && Object.keys(output.groups).length > 0) {
|
|
3569
|
+
lines.push("Groups");
|
|
3570
|
+
for (const [groupKey, group] of Object.entries(output.groups)) lines.push(`${groupKey} count=${group.count} left=${group.leftAverage.toFixed(3)} right=${group.rightAverage.toFixed(3)} delta=${group.delta.toFixed(3)}`);
|
|
3571
|
+
}
|
|
3572
|
+
if (output.topImprovements.length > 0) {
|
|
3573
|
+
lines.push("Top improvements");
|
|
3574
|
+
for (const row of output.topImprovements) lines.push(`${row.caseKey} delta=${row.delta.score.toFixed(3)} left=${row.delta.left.toFixed(3)} right=${row.delta.right.toFixed(3)}`);
|
|
3575
|
+
}
|
|
3576
|
+
if (output.topRegressions.length > 0) {
|
|
3577
|
+
lines.push("Top regressions");
|
|
3578
|
+
for (const row of output.topRegressions) lines.push(`${row.caseKey} delta=${row.delta.score.toFixed(3)} left=${row.delta.left.toFixed(3)} right=${row.delta.right.toFixed(3)}`);
|
|
3579
|
+
}
|
|
3580
|
+
if (output.cases.length > 0) {
|
|
3581
|
+
lines.push("Cases");
|
|
3582
|
+
for (const row of output.cases) {
|
|
3583
|
+
const changedMetricNames = Object.keys(row.metricsChanged);
|
|
3584
|
+
lines.push(`${row.caseKey} delta=${row.delta.score.toFixed(3)} changedMetrics=${changedMetricNames.length === 0 ? "none" : changedMetricNames.join(",")}`);
|
|
3585
|
+
}
|
|
3586
|
+
}
|
|
3587
|
+
if (output.added.length > 0) lines.push(`Added cases ${output.added.map((record) => record.caseId).join(",")}`);
|
|
3588
|
+
if (output.removed.length > 0) lines.push(`Removed cases ${output.removed.map((record) => record.caseId).join(",")}`);
|
|
3589
|
+
return lines.join("\n");
|
|
3590
|
+
}
|
|
3591
|
+
/**
|
|
3548
3592
|
* Runs the `vieval report compare` command.
|
|
3549
3593
|
*
|
|
3550
3594
|
* Call stack:
|
|
@@ -3586,6 +3630,55 @@ async function runReportCompareCli(argv) {
|
|
|
3586
3630
|
process.exitCode = 1;
|
|
3587
3631
|
}
|
|
3588
3632
|
}
|
|
3633
|
+
function averageScore(records, scoreKind) {
|
|
3634
|
+
const values = records.map((record) => record.scores[scoreKind]).filter((value) => typeof value === "number");
|
|
3635
|
+
if (values.length === 0) return 0;
|
|
3636
|
+
return values.reduce((sum, value) => sum + value, 0) / values.length;
|
|
3637
|
+
}
|
|
3638
|
+
function buildComparisonGroups(cases, groupBy) {
|
|
3639
|
+
const groupedRows = {};
|
|
3640
|
+
for (const row of cases) {
|
|
3641
|
+
const resolved = getCaseSelectorValue(row.right, groupBy);
|
|
3642
|
+
if (!resolved.exists) continue;
|
|
3643
|
+
const groupKey = `${groupBy}=${String(resolved.value)}`;
|
|
3644
|
+
groupedRows[groupKey] ??= [];
|
|
3645
|
+
groupedRows[groupKey].push(row);
|
|
3646
|
+
}
|
|
3647
|
+
return Object.fromEntries(Object.entries(groupedRows).sort(([left], [right]) => left.localeCompare(right)).map(([groupKey, rows]) => {
|
|
3648
|
+
const leftAverage = rows.reduce((sum, row) => sum + row.delta.left, 0) / rows.length;
|
|
3649
|
+
const rightAverage = rows.reduce((sum, row) => sum + row.delta.right, 0) / rows.length;
|
|
3650
|
+
return [groupKey, {
|
|
3651
|
+
count: rows.length,
|
|
3652
|
+
delta: rightAverage - leftAverage,
|
|
3653
|
+
leftAverage,
|
|
3654
|
+
rightAverage
|
|
3655
|
+
}];
|
|
3656
|
+
}));
|
|
3657
|
+
}
|
|
3658
|
+
function compareCaseRecords(left, right) {
|
|
3659
|
+
return left.caseId.localeCompare(right.caseId);
|
|
3660
|
+
}
|
|
3661
|
+
function diffMetrics(left, right) {
|
|
3662
|
+
const changed = {};
|
|
3663
|
+
const metricKeys = [.../* @__PURE__ */ new Set([...Object.keys(left), ...Object.keys(right)])].sort((leftKey, rightKey) => leftKey.localeCompare(rightKey));
|
|
3664
|
+
for (const metricKey of metricKeys) if (stableStringify(left[metricKey]) !== stableStringify(right[metricKey])) changed[metricKey] = {
|
|
3665
|
+
left: left[metricKey],
|
|
3666
|
+
right: right[metricKey]
|
|
3667
|
+
};
|
|
3668
|
+
return changed;
|
|
3669
|
+
}
|
|
3670
|
+
function getScore(record, scoreKind) {
|
|
3671
|
+
return record.scores[scoreKind] ?? 0;
|
|
3672
|
+
}
|
|
3673
|
+
function indexRecordsByCaseKey(records, caseKey, side) {
|
|
3674
|
+
const indexed = /* @__PURE__ */ new Map();
|
|
3675
|
+
for (const record of records) {
|
|
3676
|
+
const resolved = resolveCaseKey(record, caseKey);
|
|
3677
|
+
if (indexed.has(resolved)) throw new Error(`Duplicate case key "${resolved}" in ${side} report.`);
|
|
3678
|
+
indexed.set(resolved, record);
|
|
3679
|
+
}
|
|
3680
|
+
return indexed;
|
|
3681
|
+
}
|
|
3589
3682
|
function normalizeCliArgv$2(argv) {
|
|
3590
3683
|
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
3591
3684
|
if (normalizedArgv[0] === "report" && normalizedArgv[1] === "compare") return normalizedArgv.slice(2);
|
|
@@ -3621,15 +3714,6 @@ function parseReportCompareCliArguments(argv) {
|
|
|
3621
3714
|
scoreKind: cli.flags.scoreKind
|
|
3622
3715
|
};
|
|
3623
3716
|
}
|
|
3624
|
-
function indexRecordsByCaseKey(records, caseKey, side) {
|
|
3625
|
-
const indexed = /* @__PURE__ */ new Map();
|
|
3626
|
-
for (const record of records) {
|
|
3627
|
-
const resolved = resolveCaseKey(record, caseKey);
|
|
3628
|
-
if (indexed.has(resolved)) throw new Error(`Duplicate case key "${resolved}" in ${side} report.`);
|
|
3629
|
-
indexed.set(resolved, record);
|
|
3630
|
-
}
|
|
3631
|
-
return indexed;
|
|
3632
|
-
}
|
|
3633
3717
|
function resolveCaseKey(record, caseKey) {
|
|
3634
3718
|
if (caseKey != null) {
|
|
3635
3719
|
const resolved = getCaseSelectorValue(record, caseKey);
|
|
@@ -3641,90 +3725,6 @@ function resolveCaseKey(record, caseKey) {
|
|
|
3641
3725
|
const vievalCaseId = getCaseSelectorValue(record, "vieval.case.id");
|
|
3642
3726
|
return vievalCaseId.exists ? String(vievalCaseId.value) : record.caseId;
|
|
3643
3727
|
}
|
|
3644
|
-
function getScore(record, scoreKind) {
|
|
3645
|
-
return record.scores[scoreKind] ?? 0;
|
|
3646
|
-
}
|
|
3647
|
-
function averageScore(records, scoreKind) {
|
|
3648
|
-
const values = records.map((record) => record.scores[scoreKind]).filter((value) => typeof value === "number");
|
|
3649
|
-
if (values.length === 0) return 0;
|
|
3650
|
-
return values.reduce((sum, value) => sum + value, 0) / values.length;
|
|
3651
|
-
}
|
|
3652
|
-
function diffMetrics(left, right) {
|
|
3653
|
-
const changed = {};
|
|
3654
|
-
const metricKeys = [.../* @__PURE__ */ new Set([...Object.keys(left), ...Object.keys(right)])].sort((leftKey, rightKey) => leftKey.localeCompare(rightKey));
|
|
3655
|
-
for (const metricKey of metricKeys) if (stableStringify(left[metricKey]) !== stableStringify(right[metricKey])) changed[metricKey] = {
|
|
3656
|
-
left: left[metricKey],
|
|
3657
|
-
right: right[metricKey]
|
|
3658
|
-
};
|
|
3659
|
-
return changed;
|
|
3660
|
-
}
|
|
3661
|
-
function buildComparisonGroups(cases, groupBy) {
|
|
3662
|
-
const groupedRows = {};
|
|
3663
|
-
for (const row of cases) {
|
|
3664
|
-
const resolved = getCaseSelectorValue(row.right, groupBy);
|
|
3665
|
-
if (!resolved.exists) continue;
|
|
3666
|
-
const groupKey = `${groupBy}=${String(resolved.value)}`;
|
|
3667
|
-
groupedRows[groupKey] ??= [];
|
|
3668
|
-
groupedRows[groupKey].push(row);
|
|
3669
|
-
}
|
|
3670
|
-
return Object.fromEntries(Object.entries(groupedRows).sort(([left], [right]) => left.localeCompare(right)).map(([groupKey, rows]) => {
|
|
3671
|
-
const leftAverage = rows.reduce((sum, row) => sum + row.delta.left, 0) / rows.length;
|
|
3672
|
-
const rightAverage = rows.reduce((sum, row) => sum + row.delta.right, 0) / rows.length;
|
|
3673
|
-
return [groupKey, {
|
|
3674
|
-
count: rows.length,
|
|
3675
|
-
delta: rightAverage - leftAverage,
|
|
3676
|
-
leftAverage,
|
|
3677
|
-
rightAverage
|
|
3678
|
-
}];
|
|
3679
|
-
}));
|
|
3680
|
-
}
|
|
3681
|
-
function compareCaseRecords(left, right) {
|
|
3682
|
-
return left.caseId.localeCompare(right.caseId);
|
|
3683
|
-
}
|
|
3684
|
-
/**
|
|
3685
|
-
* Formats a case comparison as a compact human-readable table.
|
|
3686
|
-
*
|
|
3687
|
-
* Use when:
|
|
3688
|
-
* - `vieval report compare` should expose the same information as JSON output
|
|
3689
|
-
* - users need a terminal-first overview of group and per-case deltas
|
|
3690
|
-
*
|
|
3691
|
-
* Expects:
|
|
3692
|
-
* - comparison output was produced by {@link buildCaseComparison}
|
|
3693
|
-
*
|
|
3694
|
-
* Returns:
|
|
3695
|
-
* - multi-line text containing aggregate, group, top-change, case, and unmatched summaries
|
|
3696
|
-
*/
|
|
3697
|
-
function formatCaseComparisonTable(output) {
|
|
3698
|
-
const lines = [
|
|
3699
|
-
"COMPARE vieval report cases",
|
|
3700
|
-
`Matched ${output.cases.length}`,
|
|
3701
|
-
`Added ${output.added.length}`,
|
|
3702
|
-
`Removed ${output.removed.length}`,
|
|
3703
|
-
`Scores left=${output.overall.leftAverage.toFixed(3)} right=${output.overall.rightAverage.toFixed(3)} delta=${output.overall.delta.toFixed(3)}`
|
|
3704
|
-
];
|
|
3705
|
-
if (output.groups != null && Object.keys(output.groups).length > 0) {
|
|
3706
|
-
lines.push("Groups");
|
|
3707
|
-
for (const [groupKey, group] of Object.entries(output.groups)) lines.push(`${groupKey} count=${group.count} left=${group.leftAverage.toFixed(3)} right=${group.rightAverage.toFixed(3)} delta=${group.delta.toFixed(3)}`);
|
|
3708
|
-
}
|
|
3709
|
-
if (output.topImprovements.length > 0) {
|
|
3710
|
-
lines.push("Top improvements");
|
|
3711
|
-
for (const row of output.topImprovements) lines.push(`${row.caseKey} delta=${row.delta.score.toFixed(3)} left=${row.delta.left.toFixed(3)} right=${row.delta.right.toFixed(3)}`);
|
|
3712
|
-
}
|
|
3713
|
-
if (output.topRegressions.length > 0) {
|
|
3714
|
-
lines.push("Top regressions");
|
|
3715
|
-
for (const row of output.topRegressions) lines.push(`${row.caseKey} delta=${row.delta.score.toFixed(3)} left=${row.delta.left.toFixed(3)} right=${row.delta.right.toFixed(3)}`);
|
|
3716
|
-
}
|
|
3717
|
-
if (output.cases.length > 0) {
|
|
3718
|
-
lines.push("Cases");
|
|
3719
|
-
for (const row of output.cases) {
|
|
3720
|
-
const changedMetricNames = Object.keys(row.metricsChanged);
|
|
3721
|
-
lines.push(`${row.caseKey} delta=${row.delta.score.toFixed(3)} changedMetrics=${changedMetricNames.length === 0 ? "none" : changedMetricNames.join(",")}`);
|
|
3722
|
-
}
|
|
3723
|
-
}
|
|
3724
|
-
if (output.added.length > 0) lines.push(`Added cases ${output.added.map((record) => record.caseId).join(",")}`);
|
|
3725
|
-
if (output.removed.length > 0) lines.push(`Removed cases ${output.removed.map((record) => record.caseId).join(",")}`);
|
|
3726
|
-
return lines.join("\n");
|
|
3727
|
-
}
|
|
3728
3728
|
//#endregion
|
|
3729
3729
|
//#region src/cli/report-index.ts
|
|
3730
3730
|
const reportIndexHelpText = `
|
|
@@ -3737,12 +3737,6 @@ const reportIndexHelpText = `
|
|
|
3737
3737
|
--output Output file path (default: <reportPath>/index/runs.jsonl)
|
|
3738
3738
|
--format Console output format: table | json | jsonl (default: table)
|
|
3739
3739
|
`;
|
|
3740
|
-
function normalizeCliArgv$1(argv) {
|
|
3741
|
-
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
3742
|
-
if (normalizedArgv[0] === "report" && normalizedArgv[1] === "index") return normalizedArgv.slice(2);
|
|
3743
|
-
if (normalizedArgv[0] === "index") return normalizedArgv.slice(1);
|
|
3744
|
-
return normalizedArgv;
|
|
3745
|
-
}
|
|
3746
3740
|
function parseReportIndexCliArguments(argv) {
|
|
3747
3741
|
const cli = meow(reportIndexHelpText, {
|
|
3748
3742
|
argv: normalizeCliArgv$1(argv),
|
|
@@ -3764,25 +3758,6 @@ function parseReportIndexCliArguments(argv) {
|
|
|
3764
3758
|
reportPath
|
|
3765
3759
|
};
|
|
3766
3760
|
}
|
|
3767
|
-
async function writeIndexFile(parsed) {
|
|
3768
|
-
const rows = (await readReportArtifacts(parsed.reportPath)).map((artifact) => summarizeReportRunArtifact(artifact));
|
|
3769
|
-
const indexFilePath = resolve(parsed.output ?? resolve(parsed.reportPath, "index", "runs.jsonl"));
|
|
3770
|
-
await mkdir(dirname(indexFilePath), { recursive: true });
|
|
3771
|
-
const indexContents = rows.map((row) => JSON.stringify(row)).join("\n");
|
|
3772
|
-
await writeFile(indexFilePath, `${indexContents}${indexContents.length > 0 ? "\n" : ""}`, "utf-8");
|
|
3773
|
-
return {
|
|
3774
|
-
indexFilePath,
|
|
3775
|
-
indexedRunCount: rows.length,
|
|
3776
|
-
rows
|
|
3777
|
-
};
|
|
3778
|
-
}
|
|
3779
|
-
function formatTableOutput(output) {
|
|
3780
|
-
return [
|
|
3781
|
-
"INDEX vieval report",
|
|
3782
|
-
`Path ${output.indexFilePath}`,
|
|
3783
|
-
`Run count ${output.indexedRunCount}`
|
|
3784
|
-
].join("\n");
|
|
3785
|
-
}
|
|
3786
3761
|
async function runReportIndexCli(argv) {
|
|
3787
3762
|
try {
|
|
3788
3763
|
const parsed = parseReportIndexCliArguments(argv);
|
|
@@ -3803,6 +3778,31 @@ async function runReportIndexCli(argv) {
|
|
|
3803
3778
|
process.exitCode = 1;
|
|
3804
3779
|
}
|
|
3805
3780
|
}
|
|
3781
|
+
function formatTableOutput(output) {
|
|
3782
|
+
return [
|
|
3783
|
+
"INDEX vieval report",
|
|
3784
|
+
`Path ${output.indexFilePath}`,
|
|
3785
|
+
`Run count ${output.indexedRunCount}`
|
|
3786
|
+
].join("\n");
|
|
3787
|
+
}
|
|
3788
|
+
function normalizeCliArgv$1(argv) {
|
|
3789
|
+
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
3790
|
+
if (normalizedArgv[0] === "report" && normalizedArgv[1] === "index") return normalizedArgv.slice(2);
|
|
3791
|
+
if (normalizedArgv[0] === "index") return normalizedArgv.slice(1);
|
|
3792
|
+
return normalizedArgv;
|
|
3793
|
+
}
|
|
3794
|
+
async function writeIndexFile(parsed) {
|
|
3795
|
+
const rows = (await readReportArtifacts(parsed.reportPath)).map((artifact) => summarizeReportRunArtifact(artifact));
|
|
3796
|
+
const indexFilePath = resolve(parsed.output ?? resolve(parsed.reportPath, "index", "runs.jsonl"));
|
|
3797
|
+
await mkdir(dirname(indexFilePath), { recursive: true });
|
|
3798
|
+
const indexContents = rows.map((row) => JSON.stringify(row)).join("\n");
|
|
3799
|
+
await writeFile(indexFilePath, `${indexContents}${indexContents.length > 0 ? "\n" : ""}`, "utf-8");
|
|
3800
|
+
return {
|
|
3801
|
+
indexedRunCount: rows.length,
|
|
3802
|
+
indexFilePath,
|
|
3803
|
+
rows
|
|
3804
|
+
};
|
|
3805
|
+
}
|
|
3806
3806
|
//#endregion
|
|
3807
3807
|
//#region src/cli/index.ts
|
|
3808
3808
|
const topLevelHelpText = `
|
|
@@ -3823,9 +3823,6 @@ const topLevelHelpText = `
|
|
|
3823
3823
|
$ vieval report analyze .vieval/reports/my-run
|
|
3824
3824
|
$ vieval report index .vieval/reports --output .vieval/reports/index/runs.jsonl
|
|
3825
3825
|
`;
|
|
3826
|
-
function normalizeCliArgv(argv) {
|
|
3827
|
-
return argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
3828
|
-
}
|
|
3829
3826
|
/**
|
|
3830
3827
|
* Parses top-level `vieval` CLI arguments into one command dispatch payload.
|
|
3831
3828
|
*
|
|
@@ -3843,9 +3840,9 @@ function parseTopLevelCliArguments(argv) {
|
|
|
3843
3840
|
const normalizedArgv = normalizeCliArgv(argv);
|
|
3844
3841
|
const command = normalizedArgv[0];
|
|
3845
3842
|
meow(topLevelHelpText, {
|
|
3843
|
+
argv: normalizedArgv,
|
|
3846
3844
|
autoHelp: false,
|
|
3847
3845
|
autoVersion: false,
|
|
3848
|
-
argv: normalizedArgv,
|
|
3849
3846
|
importMeta: import.meta
|
|
3850
3847
|
});
|
|
3851
3848
|
if (command == null || command === "help" || command === "--help" || command === "-h") return {
|
|
@@ -3909,7 +3906,10 @@ async function runTopLevelCli(argv) {
|
|
|
3909
3906
|
}
|
|
3910
3907
|
await runEvalRunCli(parsed.commandArgv);
|
|
3911
3908
|
}
|
|
3909
|
+
function normalizeCliArgv(argv) {
|
|
3910
|
+
return argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
3911
|
+
}
|
|
3912
3912
|
//#endregion
|
|
3913
3913
|
export { runTopLevelCli as n, parseTopLevelCliArguments as t };
|
|
3914
3914
|
|
|
3915
|
-
//# sourceMappingURL=cli-
|
|
3915
|
+
//# sourceMappingURL=cli-uzS81IPd.mjs.map
|