vieval 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +6 -3
  2. package/dist/bin/vieval.d.mts +1 -0
  3. package/dist/bin/vieval.mjs +33 -0
  4. package/dist/bin/vieval.mjs.map +1 -0
  5. package/dist/cli/index.d.mts +32 -0
  6. package/dist/cli/index.mjs +1 -2582
  7. package/dist/cli-sanbKtQq.mjs +2821 -0
  8. package/dist/cli-sanbKtQq.mjs.map +1 -0
  9. package/dist/config.d.mts +2 -2
  10. package/dist/config.mjs +16 -1
  11. package/dist/config.mjs.map +1 -0
  12. package/dist/core/assertions/index.d.mts +314 -2
  13. package/dist/core/assertions/index.mjs +182 -1
  14. package/dist/core/assertions/index.mjs.map +1 -0
  15. package/dist/core/inference-executors/index.d.mts +1 -1
  16. package/dist/core/inference-executors/index.mjs +1 -1
  17. package/dist/core/processors/results/index.d.mts +1 -1
  18. package/dist/core/runner/index.d.mts +3 -2
  19. package/dist/core/runner/index.mjs +637 -2
  20. package/dist/core/runner/index.mjs.map +1 -0
  21. package/dist/core/scheduler/index.d.mts +2 -0
  22. package/dist/core/scheduler/index.mjs +188 -0
  23. package/dist/core/scheduler/index.mjs.map +1 -0
  24. package/dist/{env-C7X81PWa.mjs → env--94B0UtW.mjs} +1 -1
  25. package/dist/{env-C7X81PWa.mjs.map → env--94B0UtW.mjs.map} +1 -1
  26. package/dist/{env-DtpjACOW.d.mts → env-BeHv_5mo.d.mts} +1 -1
  27. package/dist/{expect-extensions-BOzwV5EJ.mjs → expect-extensions-DCSqlneN.mjs} +2 -2
  28. package/dist/{expect-extensions-BOzwV5EJ.mjs.map → expect-extensions-DCSqlneN.mjs.map} +1 -1
  29. package/dist/expect.d.mts +10 -2
  30. package/dist/expect.mjs +16 -1
  31. package/dist/expect.mjs.map +1 -0
  32. package/dist/{index-BDMEAmf2.d.mts → index-DBZKkpBe.d.mts} +106 -4
  33. package/dist/index-fakXoZEe.d.mts +147 -0
  34. package/dist/index.d.mts +111 -12
  35. package/dist/index.mjs +216 -55
  36. package/dist/index.mjs.map +1 -1
  37. package/dist/models-DIGdOUpJ.mjs.map +1 -1
  38. package/dist/plugins/chat-models/index.d.mts +21 -1
  39. package/dist/plugins/chat-models/index.mjs +27 -1
  40. package/dist/plugins/chat-models/index.mjs.map +1 -1
  41. package/dist/queue-DsZQkZO_.mjs +21 -0
  42. package/dist/queue-DsZQkZO_.mjs.map +1 -0
  43. package/dist/{registry-CHJcTN2W.mjs → registry-CcKZqDJY.mjs} +27 -5
  44. package/dist/registry-CcKZqDJY.mjs.map +1 -0
  45. package/dist/testing/expect-extensions.d.mts +1 -1
  46. package/dist/testing/expect-extensions.mjs +1 -1
  47. package/package.json +9 -3
  48. package/dist/assertions-DcAjfVDA.mjs +0 -183
  49. package/dist/assertions-DcAjfVDA.mjs.map +0 -1
  50. package/dist/cli/index.mjs.map +0 -1
  51. package/dist/config-CHN24egi.mjs +0 -17
  52. package/dist/config-CHN24egi.mjs.map +0 -1
  53. package/dist/expect-B2vaoRVZ.d.mts +0 -10
  54. package/dist/expect-CaXiUkwY.mjs +0 -17
  55. package/dist/expect-CaXiUkwY.mjs.map +0 -1
  56. package/dist/index-C3gPFmcR.d.mts +0 -314
  57. package/dist/registry-CHJcTN2W.mjs.map +0 -1
  58. package/dist/runner-Dpy-eivM.mjs +0 -636
  59. package/dist/runner-Dpy-eivM.mjs.map +0 -1
@@ -0,0 +1,2821 @@
1
+ import { c as loadRawVievalConfig, l as loadVievalCliConfig, n as consumeModuleRegistrations, o as detectCliConfigMode, r as endModuleRegistration, t as beginModuleRegistration } from "./registry-CcKZqDJY.mjs";
2
+ import { createSchedulerRuntime } from "./core/scheduler/index.mjs";
3
+ import { RunnerExecutionError, collectEvalEntries, createFilesystemTaskCacheRuntime, createRunnerRuntimeContext, createRunnerSchedule, createTaskExecutionContext, runScheduledTasks } from "./core/runner/index.mjs";
4
+ import process from "node:process";
5
+ import { errorMessageFrom } from "@moeru/std";
6
+ import meow from "meow";
7
+ import { basename, dirname, isAbsolute, join, relative, resolve } from "node:path";
8
+ import { access, mkdir, writeFile } from "node:fs/promises";
9
+ import { glob } from "tinyglobby";
10
+ import { pathToFileURL } from "node:url";
11
+ import { randomUUID } from "node:crypto";
12
+ import c from "tinyrainbow";
13
+ import { existsSync, readFileSync } from "node:fs";
14
+ import { uniq } from "es-toolkit";
15
+ import { createVitest } from "vitest/node";
16
+ import { formatDuration, intervalToDuration } from "date-fns";
17
+ import { stripVTControlCharacters } from "node:util";
18
+ import stringWidth from "fast-string-width";
19
+ //#region src/cli/comparison-config.ts
20
+ const supportedWorkspaceConfigFileNames = [
21
+ "vieval.config.ts",
22
+ "vieval.config.mts",
23
+ "vieval.config.cts",
24
+ "vieval.config.js",
25
+ "vieval.config.mjs",
26
+ "vieval.config.cjs",
27
+ "vieval.config.json"
28
+ ];
29
+ async function isReadableFile(filePath) {
30
+ try {
31
+ await access(filePath);
32
+ return true;
33
+ } catch {
34
+ return false;
35
+ }
36
+ }
37
+ function normalizeGlobInput(patterns) {
38
+ if (patterns == null) return [];
39
+ return (typeof patterns === "string" ? [patterns] : patterns).map((pattern) => pattern.trim()).filter((pattern) => pattern.length > 0);
40
+ }
41
+ function normalizeMethodShape(method, configDirectory, index) {
42
+ const id = method.id.trim();
43
+ const workspace = method.workspace.trim();
44
+ const project = method.project.trim();
45
+ const configFilePath = method.configFilePath?.trim();
46
+ if (id.length === 0) throw new Error(`Comparison method #${index + 1} is missing id.`);
47
+ if (workspace.length === 0) throw new Error(`Comparison method "${id}" is missing workspace.`);
48
+ if (project.length === 0) throw new Error(`Comparison method "${id}" is missing project.`);
49
+ const resolvedWorkspace = isAbsolute(workspace) ? workspace : resolve(configDirectory, workspace);
50
+ return {
51
+ configFilePath: configFilePath == null || configFilePath.length === 0 ? void 0 : isAbsolute(configFilePath) ? configFilePath : resolve(configDirectory, configFilePath),
52
+ id,
53
+ project,
54
+ workspace: resolvedWorkspace
55
+ };
56
+ }
57
+ async function findWorkspaceConfigFile(workspaceDirectory) {
58
+ for (const fileName of supportedWorkspaceConfigFileNames) {
59
+ const candidate = join(workspaceDirectory, fileName);
60
+ if (await isReadableFile(candidate)) return candidate;
61
+ }
62
+ return null;
63
+ }
64
+ function createDiscoveredMethodId(configDirectory, workspace, projectName) {
65
+ const relativeWorkspace = relative(configDirectory, workspace);
66
+ return `${(relativeWorkspace.length > 0 ? relativeWorkspace : basename(workspace)).replaceAll("\\", "/")}:${projectName}`;
67
+ }
68
+ async function discoverMethodsFromWorkspaceGlobs(args) {
69
+ const includes = normalizeGlobInput(args.comparison.includesWorkspaces);
70
+ if (includes.length === 0) return [];
71
+ const discoveredWorkspaceDirectories = await glob(includes, {
72
+ absolute: true,
73
+ cwd: args.configDirectory,
74
+ ignore: normalizeGlobInput(args.comparison.excludesWorkspaces),
75
+ onlyDirectories: true
76
+ });
77
+ const methods = [];
78
+ for (const workspaceDirectory of discoveredWorkspaceDirectories.sort((left, right) => left.localeCompare(right))) {
79
+ const configFilePath = await findWorkspaceConfigFile(workspaceDirectory);
80
+ if (configFilePath == null) continue;
81
+ const loadedWorkspaceConfig = await loadVievalCliConfig({
82
+ configFilePath,
83
+ cwd: workspaceDirectory
84
+ });
85
+ for (const project of loadedWorkspaceConfig.projects) methods.push({
86
+ configFilePath,
87
+ id: createDiscoveredMethodId(args.configDirectory, workspaceDirectory, project.name),
88
+ project: project.name,
89
+ workspace: workspaceDirectory
90
+ });
91
+ }
92
+ return methods;
93
+ }
94
+ function validateMethodIdsAreUnique(methods) {
95
+ const methodIds = methods.map((method) => method.id);
96
+ const duplicatedMethodId = methodIds.find((methodId, index) => methodIds.indexOf(methodId) !== index);
97
+ if (duplicatedMethodId != null) throw new Error(`Duplicate comparison method id "${duplicatedMethodId}".`);
98
+ }
99
+ function assertComparisonMode(config) {
100
+ const mode = detectCliConfigMode(config);
101
+ if (mode !== "comparisons") throw new Error(`Expected comparison-mode config, but received ${mode}-mode config.`);
102
+ }
103
+ function selectComparisonConfig(comparisons, comparisonId) {
104
+ if (comparisons.length === 0) throw new Error("Comparison config requires at least one comparisons entry.");
105
+ if (comparisonId == null || comparisonId.trim().length === 0) {
106
+ if (comparisons.length > 1) throw new Error(`Multiple comparisons found. Provide --comparison. Available ids: ${comparisons.map((item) => item.id).join(", ")}`);
107
+ return comparisons[0];
108
+ }
109
+ const selected = comparisons.find((item) => item.id === comparisonId);
110
+ if (selected == null) throw new Error(`Unknown comparison id "${comparisonId}".`);
111
+ return selected;
112
+ }
113
+ function normalizeBenchmark(comparison) {
114
+ const benchmarkId = comparison.benchmark.id.trim();
115
+ const sharedCaseNamespace = comparison.benchmark.sharedCaseNamespace.trim();
116
+ if (benchmarkId.length === 0) throw new Error("Comparison config requires benchmark.id.");
117
+ if (sharedCaseNamespace.length === 0) throw new Error("Comparison config requires benchmark.sharedCaseNamespace.");
118
+ return {
119
+ id: benchmarkId,
120
+ sharedCaseNamespace
121
+ };
122
+ }
123
+ /**
124
+ * Loads and validates comparison-mode data from `vieval.config.*`.
125
+ */
126
+ async function loadVievalComparisonConfig(options = {}) {
127
+ const cwd = options.cwd ?? process.cwd();
128
+ try {
129
+ const loaded = await loadRawVievalConfig({
130
+ configFilePath: options.configFilePath,
131
+ cwd
132
+ });
133
+ if (loaded.configFilePath == null || loaded.config == null) throw new Error("Failed to find vieval config. Expected vieval.config.*");
134
+ assertComparisonMode(loaded.config);
135
+ const selectedComparison = selectComparisonConfig(loaded.config.comparisons, options.comparisonId);
136
+ const configDirectory = dirname(loaded.configFilePath);
137
+ const explicitMethods = (selectedComparison.methods ?? []).map((method, index) => normalizeMethodShape(method, configDirectory, index));
138
+ const discoveredMethods = await discoverMethodsFromWorkspaceGlobs({
139
+ comparison: selectedComparison,
140
+ configDirectory
141
+ });
142
+ const methods = [...explicitMethods, ...discoveredMethods];
143
+ if (methods.length === 0) throw new Error("Comparison config resolved zero methods. Configure methods or includesWorkspaces.");
144
+ validateMethodIdsAreUnique(methods);
145
+ return {
146
+ config: {
147
+ benchmark: normalizeBenchmark(selectedComparison),
148
+ methods
149
+ },
150
+ configFilePath: loaded.configFilePath
151
+ };
152
+ } catch (error) {
153
+ const errorMessage = errorMessageFrom(error) ?? "Unknown comparison config loading error.";
154
+ const resolvedPath = options.configFilePath ?? "vieval.config";
155
+ throw new Error(`Failed to load comparison config "${resolvedPath}": ${errorMessage}`);
156
+ }
157
+ }
158
+ //#endregion
159
+ //#region src/cli/report-compare.ts
160
+ /**
161
+ * Builds a compact compare report sorted by hybrid/exact score.
162
+ */
163
+ function buildCompareReportArtifact(args) {
164
+ const rows = args.methods.map((method) => {
165
+ const overall = method.output.projects[0]?.result?.overall;
166
+ return {
167
+ exactAverage: overall?.exactAverage ?? null,
168
+ hybridAverage: overall?.hybridAverage ?? null,
169
+ methodId: method.methodId,
170
+ runCount: overall?.runCount ?? 0
171
+ };
172
+ });
173
+ rows.sort((left, right) => {
174
+ const leftHybrid = left.hybridAverage ?? Number.NEGATIVE_INFINITY;
175
+ const rightHybrid = right.hybridAverage ?? Number.NEGATIVE_INFINITY;
176
+ if (leftHybrid !== rightHybrid) return rightHybrid - leftHybrid;
177
+ const leftExact = left.exactAverage ?? Number.NEGATIVE_INFINITY;
178
+ return (right.exactAverage ?? Number.NEGATIVE_INFINITY) - leftExact;
179
+ });
180
+ return {
181
+ benchmarkId: args.benchmarkId,
182
+ methods: rows,
183
+ reportPath: args.reportPath
184
+ };
185
+ }
186
+ /**
187
+ * Writes compare report artifact as JSON.
188
+ */
189
+ async function writeCompareReportArtifact(args) {
190
+ const outputPath = resolve(args.outputPath);
191
+ await mkdir(dirname(outputPath), { recursive: true });
192
+ await writeFile(outputPath, `${JSON.stringify(args.artifact, null, 2)}\n`, "utf-8");
193
+ return outputPath;
194
+ }
195
+ //#endregion
196
+ //#region src/cli/discovery.ts
197
+ /**
198
+ * Discovers eval files using include/exclude globs relative to project root.
199
+ *
200
+ * Before:
201
+ * - Absolute path file list from recursive filesystem walk
202
+ *
203
+ * After:
204
+ * - Filtered absolute path list matching include/exclude rules
205
+ */
206
+ async function discoverEvalFiles(options) {
207
+ return uniq(await glob([...options.include], {
208
+ absolute: true,
209
+ cwd: options.root,
210
+ ignore: [...options.exclude],
211
+ onlyFiles: true
212
+ })).sort((left, right) => left.localeCompare(right));
213
+ }
214
+ //#endregion
215
+ //#region src/cli/module-runtime.ts
216
+ /**
217
+ * Loads eval modules and returns a normalized eval-module map.
218
+ *
219
+ * Use when:
220
+ * - CLI collection needs Vite/Vitest-powered module resolution and transforms
221
+ * - eval files should be imported with the same runtime semantics as Vitest
222
+ *
223
+ * Expects:
224
+ * - `projectRoot` points at the project that owns the eval files
225
+ * - each `evalFilePaths` entry is an absolute file path
226
+ *
227
+ * Returns:
228
+ * - eval modules keyed by stable file href + optional registration suffixes
229
+ */
230
+ async function loadEvalModulesWithVitestRuntime(evalFilePaths, projectRoot) {
231
+ const loadedModules = {};
232
+ const runtime = await createVitest("test", {
233
+ config: false,
234
+ root: projectRoot,
235
+ run: false,
236
+ silent: true,
237
+ watch: false
238
+ });
239
+ try {
240
+ for (const evalFilePath of evalFilePaths) {
241
+ const moduleHref = pathToFileURL(evalFilePath).href;
242
+ beginModuleRegistration(moduleHref);
243
+ try {
244
+ const moduleValue = await runtime.import(moduleHref);
245
+ const registeredDefinitions = consumeModuleRegistrations(moduleHref);
246
+ const defaultDefinition = moduleValue.default;
247
+ const definitions = [...registeredDefinitions, ...defaultDefinition == null ? [] : [defaultDefinition]];
248
+ const deduplicatedDefinitions = definitions.filter((definition, index) => {
249
+ const key = `${definition.name}::${definition.description}::${definition.task?.id ?? ""}`;
250
+ return definitions.findIndex((candidate) => `${candidate.name}::${candidate.description}::${candidate.task?.id ?? ""}` === key) === index;
251
+ });
252
+ if (deduplicatedDefinitions.length === 0) continue;
253
+ for (const [definitionIndex, definition] of deduplicatedDefinitions.entries()) {
254
+ const moduleKey = definitionIndex === 0 ? moduleHref : `${moduleHref}#registration-${definitionIndex + 1}`;
255
+ loadedModules[moduleKey] = { default: definition };
256
+ }
257
+ } finally {
258
+ endModuleRegistration();
259
+ }
260
+ }
261
+ } finally {
262
+ await runtime.close();
263
+ }
264
+ return loadedModules;
265
+ }
266
+ //#endregion
267
+ //#region src/cli/reporters/noop-reporter.ts
268
+ /**
269
+ * Creates a reporter that intentionally does nothing.
270
+ *
271
+ * Use when:
272
+ * - terminal output should stay silent
273
+ * - reporter wiring needs a safe default for tests or non-interactive runs
274
+ *
275
+ * Expects:
276
+ * - callers may invoke any lifecycle method in any order that matches the run
277
+ *
278
+ * Returns:
279
+ * - a stable reporter implementation with no observable side effects
280
+ */
281
+ function createNoopReporter() {
282
+ return {
283
+ onRunStart(_payload) {},
284
+ onTaskQueued(_payload) {},
285
+ onTaskStart(_payload) {},
286
+ onCaseStart(_payload) {},
287
+ onCaseEnd(_payload) {},
288
+ onTaskEnd(_payload) {},
289
+ onRunEnd(_payload) {},
290
+ dispose() {}
291
+ };
292
+ }
293
+ //#endregion
294
+ //#region src/cli/reporters/summary-reporter.ts
295
+ const POINTER = "❯";
296
+ const TREE_NODE_END = "└";
297
+ const TREE_NODE_MIDDLE = "├";
298
+ var SummaryReporterStateMachine = class {
299
+ options;
300
+ taskCounters = createCounterState();
301
+ caseCounters = createCounterState();
302
+ tasks = /* @__PURE__ */ new Map();
303
+ queueOrderCounter = 0;
304
+ startedAtMs = 0;
305
+ startTime = "";
306
+ constructor(options) {
307
+ this.options = options;
308
+ }
309
+ /**
310
+ * Handles run startup.
311
+ *
312
+ * Use when:
313
+ * - a new CLI run is starting and the summary state must reset
314
+ *
315
+ * Expects:
316
+ * - `totalTasks` matches the scheduled task count for the run
317
+ *
318
+ * Returns:
319
+ * - no direct value
320
+ */
321
+ onRunStart(payload) {
322
+ this.tasks.clear();
323
+ this.queueOrderCounter = 0;
324
+ resetCounterState(this.taskCounters, payload.totalTasks);
325
+ resetCounterState(this.caseCounters, 0);
326
+ this.startedAtMs = this.options.getNow();
327
+ this.startTime = formatTimeString(new Date(this.options.getWallClockNow()));
328
+ }
329
+ /**
330
+ * Handles task queue events.
331
+ *
332
+ * Use when:
333
+ * - a scheduled task becomes visible in the live summary before it starts
334
+ *
335
+ * Expects:
336
+ * - `taskId` is stable across later lifecycle events
337
+ *
338
+ * Returns:
339
+ * - no direct value
340
+ */
341
+ onTaskQueued(payload) {
342
+ const task = this.getOrCreateTaskState(payload.taskId);
343
+ if (task.state === "finished") return;
344
+ task.displayName = payload.displayName ?? task.displayName;
345
+ task.projectName = payload.projectName ?? task.projectName;
346
+ this.syncTaskTotalCases(task, payload.totalCases);
347
+ }
348
+ /**
349
+ * Handles task start events.
350
+ *
351
+ * Use when:
352
+ * - a queued task begins executing
353
+ *
354
+ * Expects:
355
+ * - the task was previously queued or can be synthesized from its identifier
356
+ *
357
+ * Returns:
358
+ * - no direct value
359
+ */
360
+ onTaskStart(payload) {
361
+ const task = this.getOrCreateTaskState(payload.taskId);
362
+ if (task.state === "finished") return;
363
+ task.state = "running";
364
+ task.startedAt ??= this.options.getNow();
365
+ }
366
+ /**
367
+ * Handles case start events.
368
+ *
369
+ * Use when:
370
+ * - a running task starts one case and slow-case tracking may begin
371
+ *
372
+ * Expects:
373
+ * - `caseId` is stable for the lifetime of the running case
374
+ *
375
+ * Returns:
376
+ * - no direct value
377
+ */
378
+ onCaseStart(payload) {
379
+ const task = this.getOrCreateTaskState(payload.taskId);
380
+ if (task.state === "finished") return;
381
+ task.state = "running";
382
+ task.startedAt ??= this.options.getNow();
383
+ if (task.settledCaseIds.has(payload.caseId)) return;
384
+ const existingCase = task.runningCases.get(payload.caseId);
385
+ if (existingCase != null) {
386
+ existingCase.autoRetry = payload.autoRetry;
387
+ existingCase.caseName = payload.caseName ?? payload.caseId;
388
+ existingCase.retryIndex = payload.retryIndex;
389
+ return;
390
+ }
391
+ task.caseOrderCounter += 1;
392
+ task.runningCases.set(payload.caseId, {
393
+ autoRetry: payload.autoRetry,
394
+ caseId: payload.caseId,
395
+ caseName: payload.caseName ?? payload.caseId,
396
+ order: task.caseOrderCounter,
397
+ retryIndex: payload.retryIndex,
398
+ startedAt: this.options.getNow()
399
+ });
400
+ this.syncTaskTotalCases(task);
401
+ }
402
+ /**
403
+ * Handles case completion.
404
+ *
405
+ * Use when:
406
+ * - a running case settles and counters must advance
407
+ *
408
+ * Expects:
409
+ * - duplicate completion for the same `caseId` is ignored
410
+ *
411
+ * Returns:
412
+ * - no direct value
413
+ */
414
+ onCaseEnd(payload) {
415
+ const task = this.getOrCreateTaskState(payload.taskId);
416
+ if (task.state === "finished") return;
417
+ if (task.settledCaseIds.has(payload.caseId)) {
418
+ task.runningCases.delete(payload.caseId);
419
+ return;
420
+ }
421
+ task.settledCaseIds.add(payload.caseId);
422
+ task.runningCases.delete(payload.caseId);
423
+ task.completedCases += 1;
424
+ this.syncTaskTotalCases(task);
425
+ this.caseCounters.completed += 1;
426
+ if (payload.state === "passed") {
427
+ this.caseCounters.passed += 1;
428
+ return;
429
+ }
430
+ if (payload.state === "failed") {
431
+ this.caseCounters.failed += 1;
432
+ return;
433
+ }
434
+ if (payload.state === "timeout") {
435
+ this.caseCounters.timeout += 1;
436
+ return;
437
+ }
438
+ this.caseCounters.skipped += 1;
439
+ }
440
+ /**
441
+ * Handles task completion.
442
+ *
443
+ * Use when:
444
+ * - a task leaves the active window and contributes to terminal totals
445
+ *
446
+ * Expects:
447
+ * - duplicate task completion for the same task is ignored
448
+ *
449
+ * Returns:
450
+ * - no direct value
451
+ */
452
+ onTaskEnd(payload) {
453
+ const task = this.getOrCreateTaskState(payload.taskId);
454
+ if (task.state === "finished") return;
455
+ this.syncTaskTotalCases(task);
456
+ task.state = "finished";
457
+ task.taskResult = payload.state;
458
+ task.runningCases.clear();
459
+ this.taskCounters.completed += 1;
460
+ if (payload.state === "passed") {
461
+ this.taskCounters.passed += 1;
462
+ return;
463
+ }
464
+ if (payload.state === "failed") {
465
+ this.taskCounters.failed += 1;
466
+ return;
467
+ }
468
+ this.taskCounters.skipped += 1;
469
+ }
470
+ /**
471
+ * Handles run completion.
472
+ *
473
+ * Use when:
474
+ * - the caller has final task totals and wants the footer normalized
475
+ *
476
+ * Expects:
477
+ * - payload counters are final terminal task totals
478
+ *
479
+ * Returns:
480
+ * - no direct value
481
+ */
482
+ onRunEnd(payload) {
483
+ this.taskCounters.total = payload.totalTasks;
484
+ this.taskCounters.passed = payload.passedTasks;
485
+ this.taskCounters.failed = payload.failedTasks;
486
+ this.taskCounters.skipped = payload.skippedTasks;
487
+ this.taskCounters.completed = payload.passedTasks + payload.failedTasks + payload.skippedTasks;
488
+ }
489
+ /**
490
+ * Releases reporter resources.
491
+ *
492
+ * Use when:
493
+ * - CLI cleanup runs from a `finally` block
494
+ *
495
+ * Expects:
496
+ * - repeated calls are safe
497
+ *
498
+ * Returns:
499
+ * - no direct value
500
+ */
501
+ dispose() {}
502
+ /**
503
+ * Builds the current live summary window rows.
504
+ *
505
+ * Use when:
506
+ * - the live reporter or tests need a snapshot of the active window
507
+ *
508
+ * Expects:
509
+ * - `maxRows`, when present, keeps footer rows visible
510
+ *
511
+ * Returns:
512
+ * - terminal rows in display order
513
+ */
514
+ getWindowRows(options) {
515
+ const activeRows = this.createActiveRows();
516
+ const footerRows = this.createFooterRows();
517
+ const maxRows = options?.maxRows;
518
+ const footerBlock = [...footerRows, ""];
519
+ if (maxRows == null || maxRows <= 0) return [...[
520
+ "",
521
+ ...activeRows,
522
+ ...activeRows.length > 0 ? [""] : []
523
+ ], ...footerBlock];
524
+ if (maxRows <= footerBlock.length) return footerBlock.slice(-maxRows);
525
+ return [...createBoundedActiveBlock(activeRows, Math.max(0, maxRows - footerBlock.length)), ...footerBlock];
526
+ }
527
+ createActiveRows() {
528
+ const activeTasks = Array.from(this.tasks.values()).filter((task) => task.state !== "finished").sort(compareActiveTasks);
529
+ const rows = [];
530
+ for (const task of activeTasks) {
531
+ const now = this.options.getNow();
532
+ const suffix = task.state === "queued" ? c.dim(" [queued]") : formatTaskProgressSuffix(task, now);
533
+ const badge = formatProjectBadge(task.projectName, this.options.isTTY);
534
+ rows.push(c.bold(c.yellow(` ${POINTER} `)) + badge + task.displayName + c.dim(suffix));
535
+ const slowCases = Array.from(task.runningCases.values()).filter((activeCase) => now - activeCase.startedAt >= this.options.slowThresholdMs).sort((left, right) => left.order - right.order);
536
+ for (const [index, activeCase] of slowCases.entries()) {
537
+ const icon = index === slowCases.length - 1 ? TREE_NODE_END : TREE_NODE_MIDDLE;
538
+ const elapsed = Math.max(0, now - activeCase.startedAt);
539
+ rows.push(c.bold(c.yellow(` ${icon} `)) + activeCase.caseName + formatRetrySuffix(activeCase) + c.bold(c.yellow(` ${formatDuration$2(elapsed)}`)));
540
+ }
541
+ }
542
+ return rows;
543
+ }
544
+ createFooterRows() {
545
+ const now = this.options.getNow();
546
+ const runElapsedDurationMs = Math.max(0, now - this.startedAtMs);
547
+ const taskRunningCount = countRunningTasks(this.tasks.values());
548
+ const caseRunningCount = countRunningCases(this.tasks.values());
549
+ return [
550
+ padSummaryTitle("Tasks") + formatCounterState(this.taskCounters, taskRunningCount, {
551
+ elapsedDurationMs: runElapsedDurationMs,
552
+ estimatedDurationMs: estimateTotalDurationMs(this.taskCounters.completed, this.taskCounters.total, runElapsedDurationMs)
553
+ }),
554
+ padSummaryTitle("Cases") + formatCounterState(this.caseCounters, caseRunningCount, {
555
+ elapsedDurationMs: runElapsedDurationMs,
556
+ estimatedDurationMs: estimateTotalDurationMs(this.caseCounters.completed, this.caseCounters.total, runElapsedDurationMs)
557
+ }),
558
+ padSummaryTitle("Concurrency") + formatActiveConcurrencyState({
559
+ caseRunningCount,
560
+ taskRunningCount
561
+ }),
562
+ padSummaryTitle("Start at") + this.startTime,
563
+ padSummaryTitle("Duration") + formatHumanDuration(runElapsedDurationMs)
564
+ ];
565
+ }
566
+ getOrCreateTaskState(taskId) {
567
+ const existing = this.tasks.get(taskId);
568
+ if (existing != null) return existing;
569
+ const created = {
570
+ caseOrderCounter: 0,
571
+ completedCases: 0,
572
+ displayName: taskId,
573
+ projectName: void 0,
574
+ queueOrder: this.queueOrderCounter,
575
+ runningCases: /* @__PURE__ */ new Map(),
576
+ settledCaseIds: /* @__PURE__ */ new Set(),
577
+ startedAt: void 0,
578
+ state: "queued",
579
+ taskId,
580
+ taskResult: void 0,
581
+ totalCases: 0
582
+ };
583
+ this.queueOrderCounter += 1;
584
+ this.tasks.set(taskId, created);
585
+ return created;
586
+ }
587
+ syncTaskTotalCases(task, reportedTotalCases) {
588
+ const observedTotalCases = task.completedCases + task.runningCases.size;
589
+ task.totalCases = Math.max(task.totalCases, reportedTotalCases ?? 0, observedTotalCases);
590
+ this.caseCounters.total = sumTaskCaseTotals(this.tasks.values());
591
+ }
592
+ };
593
+ /**
594
+ * Creates the active task block while keeping room for summary footer rows.
595
+ *
596
+ * Use when:
597
+ * - the live TTY window is smaller than the number of running task/case rows
598
+ * - active rows need a visible truncation marker instead of silently disappearing
599
+ *
600
+ * Expects:
601
+ * - `activeRows` contains already-formatted task and slow-case rows
602
+ * - `maxRows` counts the leading spacer and truncation marker
603
+ *
604
+ * Returns:
605
+ * - rows that fit inside `maxRows`
606
+ * - a final hidden-row marker when active rows were omitted
607
+ */
608
+ function createBoundedActiveBlock(activeRows, maxRows) {
609
+ if (maxRows <= 0) return [];
610
+ if (activeRows.length === 0) return [""];
611
+ const fullBlock = [
612
+ "",
613
+ ...activeRows,
614
+ ""
615
+ ];
616
+ if (fullBlock.length <= maxRows) return fullBlock;
617
+ if (maxRows === 1) return [""];
618
+ const visibleActiveRows = Math.max(0, maxRows - 2);
619
+ const hiddenRows = Math.max(0, activeRows.length - visibleActiveRows);
620
+ return [
621
+ "",
622
+ ...activeRows.slice(0, visibleActiveRows),
623
+ c.dim(` ${TREE_NODE_END} ... ${hiddenRows} more running rows hidden`)
624
+ ];
625
+ }
626
+ /**
627
+ * Creates the live summary reporter state machine for `vieval` CLI runs.
628
+ *
629
+ * Use when:
630
+ * - the CLI wants Vitest-style active rows and live counters
631
+ * - tests need a deterministic reporter surface without touching the terminal
632
+ *
633
+ * Expects:
634
+ * - queue/start/end events describe task lifecycle in order
635
+ * - `getNow()` remains monotonic within one run
636
+ * - `getWallClockNow()` returns the wall-clock run start timestamp
637
+ *
638
+ * Returns:
639
+ * - a reporter compatible with the base CLI lifecycle plus `getWindowRows()`
640
+ *
641
+ * Call stack:
642
+ *
643
+ * {@link createSummaryReporter}
644
+ * -> {@link SummaryReporterStateMachine.onTaskQueued}
645
+ * -> {@link SummaryReporterStateMachine.onCaseStart}
646
+ * -> {@link SummaryReporterStateMachine.getWindowRows}
647
+ */
648
+ function createSummaryReporter(options) {
649
+ return new SummaryReporterStateMachine(options);
650
+ }
651
+ function createCounterState() {
652
+ return {
653
+ completed: 0,
654
+ failed: 0,
655
+ passed: 0,
656
+ skipped: 0,
657
+ timeout: 0,
658
+ total: 0
659
+ };
660
+ }
661
+ function resetCounterState(counter, total) {
662
+ counter.completed = 0;
663
+ counter.failed = 0;
664
+ counter.passed = 0;
665
+ counter.skipped = 0;
666
+ counter.timeout = 0;
667
+ counter.total = total;
668
+ }
669
+ function sumTaskCaseTotals(tasks) {
670
+ let total = 0;
671
+ for (const task of tasks) total += task.totalCases;
672
+ return total;
673
+ }
674
+ function compareActiveTasks(left, right) {
675
+ const leftProject = left.projectName ?? "";
676
+ const rightProject = right.projectName ?? "";
677
+ if (leftProject !== rightProject) return leftProject.localeCompare(rightProject);
678
+ const displayNameOrder = left.displayName.localeCompare(right.displayName);
679
+ if (displayNameOrder !== 0) return displayNameOrder;
680
+ return left.queueOrder - right.queueOrder;
681
+ }
682
+ function padSummaryTitle(label) {
683
+ return `${c.dim(label.padEnd(8))} `;
684
+ }
685
+ function formatCounterState(counter, runningCount, timing) {
686
+ const plannedCount = Math.max(0, counter.total - counter.completed - runningCount);
687
+ return [
688
+ plannedCount > 0 ? c.bold(c.blue(`${plannedCount} planned`)) : c.dim(`${plannedCount} planned`),
689
+ runningCount > 0 ? c.bold(c.yellow(`${runningCount} running`)) : c.dim(`${runningCount} running`),
690
+ c.bold(c.green(`${counter.passed} passed`)),
691
+ counter.failed > 0 ? c.bold(c.red(`${counter.failed} failed`)) : c.dim(`${counter.failed} failed`),
692
+ counter.timeout > 0 ? c.bold(c.yellow(`${counter.timeout} timeout`)) : c.dim(`${counter.timeout} timeout`),
693
+ counter.skipped > 0 ? c.yellow(`${counter.skipped} skipped`) : c.dim(`${counter.skipped} skipped`)
694
+ ].join(c.dim(" | ")) + c.gray(` (${counter.total})`) + formatTimingSuffix(timing);
695
+ }
696
+ function formatActiveConcurrencyState(options) {
697
+ return [options.taskRunningCount > 0 ? c.bold(c.yellow(`${options.taskRunningCount} ${pluralize("task", options.taskRunningCount)} running`)) : c.dim("0 tasks running"), options.caseRunningCount > 0 ? c.bold(c.yellow(`${options.caseRunningCount} ${pluralize("case", options.caseRunningCount)} running`)) : c.dim("0 cases running")].join(c.dim(" | "));
698
+ }
699
+ function pluralize(noun, count) {
700
+ return count === 1 ? noun : `${noun}s`;
701
+ }
702
+ function formatRetrySuffix(activeCase) {
703
+ if (activeCase.retryIndex == null || activeCase.retryIndex <= 0 || activeCase.autoRetry == null || activeCase.autoRetry <= 0) return "";
704
+ return c.dim(` retry ${activeCase.retryIndex}/${activeCase.autoRetry}`);
705
+ }
706
+ function formatTimeString(date) {
707
+ return date.toTimeString().split(" ")[0] ?? "";
708
+ }
709
+ function formatDuration$2(durationMs) {
710
+ return formatHumanDuration(durationMs);
711
+ }
712
+ function formatHumanDuration(durationMs) {
713
+ if (durationMs < 1e3) return `${Math.round(durationMs)}ms`;
714
+ const formatted = formatDuration(intervalToDuration({
715
+ end: durationMs,
716
+ start: 0
717
+ }), {
718
+ delimiter: " ",
719
+ format: [
720
+ "hours",
721
+ "minutes",
722
+ "seconds"
723
+ ],
724
+ zero: false
725
+ });
726
+ return formatted.length > 0 ? formatted : "0 seconds";
727
+ }
728
+ function formatProjectBadge(projectName, isTTY) {
729
+ if (projectName == null || projectName.length === 0) return "";
730
+ if (!isTTY || !c.isColorSupported) return `|${projectName}| `;
731
+ const backgroundPool = [
732
+ c.bgYellow,
733
+ c.bgCyan,
734
+ c.bgGreen,
735
+ c.bgMagenta
736
+ ];
737
+ const background = backgroundPool[projectName.split("").reduce((accumulator, character, index) => accumulator + character.charCodeAt(0) + index, 0) % backgroundPool.length];
738
+ return `${c.black(background(` ${projectName} `))} `;
739
+ }
740
+ function countRunningCases(tasks) {
741
+ let runningCount = 0;
742
+ for (const task of tasks) runningCount += task.runningCases.size;
743
+ return runningCount;
744
+ }
745
+ function countRunningTasks(tasks) {
746
+ let runningCount = 0;
747
+ for (const task of tasks) if (task.state === "running") runningCount += 1;
748
+ return runningCount;
749
+ }
750
+ function estimateTaskDurationMs(task, now) {
751
+ if (task.startedAt == null) return;
752
+ return estimateTotalDurationMs(task.completedCases, task.totalCases, Math.max(0, now - task.startedAt));
753
+ }
754
+ function estimateTotalDurationMs(completedCount, totalCount, elapsedDurationMs) {
755
+ if (completedCount === 0 || totalCount === 0) return;
756
+ const averageDurationMs = elapsedDurationMs / completedCount;
757
+ return Math.round(averageDurationMs * totalCount);
758
+ }
759
+ function formatTaskProgressSuffix(task, now) {
760
+ const elapsedDurationMs = task.startedAt == null ? 0 : Math.max(0, now - task.startedAt);
761
+ return ` ${task.completedCases}/${task.totalCases}, ${task.runningCases.size} ${pluralize("case", task.runningCases.size)} running${formatTimingSuffix({
762
+ elapsedDurationMs,
763
+ estimatedDurationMs: estimateTaskDurationMs(task, now)
764
+ })}`;
765
+ }
766
+ function formatTimingSuffix(timing) {
767
+ const parts = [`elapsed ${formatHumanDuration(timing.elapsedDurationMs)}`];
768
+ if (timing.estimatedDurationMs != null) parts.push(`estimated ${formatHumanDuration(timing.estimatedDurationMs)}`);
769
+ return ` (${parts.join(", ")})`;
770
+ }
771
+ //#endregion
772
+ //#region src/cli/reporters/index.ts
773
+ /**
774
+ * Creates the default CLI reporter for the current output mode.
775
+ *
776
+ * Use when:
777
+ * - interactive terminals should use the live summary reporter
778
+ * - non-interactive environments should stay silent with the noop reporter
779
+ *
780
+ * Expects:
781
+ * - `isTTY` decides whether the live summary reporter can be used
782
+ *
783
+ * Returns:
784
+ * - a summary reporter for TTY runs, otherwise a noop reporter
785
+ */
786
+ function createCliReporter(options) {
787
+ if (!options.isTTY) return createNoopReporter();
788
+ return createSummaryReporter(options);
789
+ }
790
+ //#endregion
791
+ //#region src/cli/reporters/renderers/windowed-renderer.ts
792
+ const DEFAULT_RENDER_INTERVAL_MS = 1e3;
793
+ const ESC = "\x1B[";
794
+ const CARRIAGE_RETURN = "\r";
795
+ const CLEAR_LINE = `${ESC}K`;
796
+ const MOVE_CURSOR_ONE_ROW_UP = `${ESC}1A`;
797
+ const SYNC_START = `${ESC}?2026h`;
798
+ const SYNC_END = `${ESC}?2026l`;
799
+ /**
800
+ * Renders a dynamic window at the bottom of the terminal.
801
+ *
802
+ * Use when:
803
+ * - a reporter needs in-place TTY updates without leaking terminal control codes into tests
804
+ * - callers want Vitest-style redraw behavior with injected output/timer dependencies
805
+ *
806
+ * Expects:
807
+ * - `start()` runs before `schedule()`
808
+ * - `finish()` or `dispose()` may be called multiple times safely
809
+ *
810
+ * Returns:
811
+ * - no direct value; all effects are emitted through the injected callbacks
812
+ *
813
+ * Call stack:
814
+ *
815
+ * {@link WindowRenderer.start}
816
+ * -> periodic schedule callback
817
+ * -> {@link WindowRenderer.schedule}
818
+ * -> {@link WindowRenderer.renderWindow}
819
+ */
820
+ var WindowRenderer = class {
821
+ options;
822
+ renderInterval;
823
+ renderScheduled = false;
824
+ renderScheduleVersion = 0;
825
+ windowHeight = 0;
826
+ started = false;
827
+ finished = false;
828
+ bufferedOutput = "";
829
+ constructor(options) {
830
+ if (options.createInterval && options.clearInterval) {
831
+ this.options = {
832
+ createInterval: (callback, intervalMs) => {
833
+ const timer = options.createInterval(callback, intervalMs);
834
+ return {
835
+ clear: () => options.clearInterval(timer),
836
+ unref: timer.unref?.bind(timer)
837
+ };
838
+ },
839
+ getColumns: options.getColumns,
840
+ getWindow: options.getWindow,
841
+ intervalMs: options.intervalMs ?? DEFAULT_RENDER_INTERVAL_MS,
842
+ queueRenderReset: options.queueRenderReset ?? defaultQueueRenderReset,
843
+ supportsAnsiWindowing: options.supportsAnsiWindowing ?? true,
844
+ writeOutput: options.writeOutput
845
+ };
846
+ return;
847
+ }
848
+ this.options = {
849
+ createInterval: defaultCreateInterval,
850
+ getColumns: options.getColumns,
851
+ getWindow: options.getWindow,
852
+ intervalMs: options.intervalMs ?? DEFAULT_RENDER_INTERVAL_MS,
853
+ queueRenderReset: options.queueRenderReset ?? defaultQueueRenderReset,
854
+ supportsAnsiWindowing: options.supportsAnsiWindowing ?? true,
855
+ writeOutput: options.writeOutput
856
+ };
857
+ }
858
+ /**
859
+ * Starts the periodic refresh loop.
860
+ *
861
+ * Use when:
862
+ * - the live reporter is about to emit in-place updates
863
+ *
864
+ * Expects:
865
+ * - repeated calls are harmless and keep the existing timer
866
+ *
867
+ * Returns:
868
+ * - no direct value
869
+ */
870
+ start() {
871
+ if (this.started && !this.finished) return;
872
+ this.started = true;
873
+ this.finished = false;
874
+ this.renderScheduleVersion += 1;
875
+ if (!this.renderInterval) {
876
+ this.renderInterval = this.options.createInterval(() => this.schedule(), this.options.intervalMs);
877
+ this.renderInterval.unref?.();
878
+ }
879
+ }
880
+ /**
881
+ * Queues a render if one is not already in flight.
882
+ *
883
+ * Use when:
884
+ * - reporter state changes and the bottom window should refresh
885
+ *
886
+ * Expects:
887
+ * - the renderer has been started
888
+ *
889
+ * Returns:
890
+ * - no direct value
891
+ */
892
+ schedule() {
893
+ if (!this.started || this.finished || this.renderScheduled) return;
894
+ const renderScheduleVersion = this.renderScheduleVersion;
895
+ this.renderScheduled = true;
896
+ this.renderWindow();
897
+ this.options.queueRenderReset(() => {
898
+ if (this.renderScheduleVersion !== renderScheduleVersion) return;
899
+ this.renderScheduled = false;
900
+ });
901
+ }
902
+ /**
903
+ * Clears the rendered window and stops the refresh loop.
904
+ *
905
+ * Use when:
906
+ * - the live reporter is transitioning to final static output
907
+ *
908
+ * Expects:
909
+ * - repeated calls are safe
910
+ *
911
+ * Returns:
912
+ * - no direct value
913
+ */
914
+ finish() {
915
+ if (this.finished) return;
916
+ this.finished = true;
917
+ this.started = false;
918
+ this.renderScheduleVersion += 1;
919
+ this.renderScheduled = false;
920
+ this.stopInterval();
921
+ this.clearWindow();
922
+ this.flushBufferedOutput();
923
+ }
924
+ /**
925
+ * Stops the renderer and clears any visible window state.
926
+ *
927
+ * Use when:
928
+ * - cleanup needs to happen from a `finally` block or interrupted run
929
+ *
930
+ * Expects:
931
+ * - callers may invoke it more than once
932
+ *
933
+ * Returns:
934
+ * - no direct value
935
+ */
936
+ dispose() {
937
+ this.finish();
938
+ }
939
+ /**
940
+ * Alias for disposal to match Vitest's renderer lifecycle naming.
941
+ *
942
+ * Use when:
943
+ * - adapting code that expects `stop()`
944
+ *
945
+ * Expects:
946
+ * - callers want the same semantics as `dispose()`
947
+ *
948
+ * Returns:
949
+ * - no direct value
950
+ */
951
+ stop() {
952
+ this.dispose();
953
+ }
954
+ /**
955
+ * Writes reporter output through the renderer lifecycle.
956
+ *
957
+ * Use when:
958
+ * - emitting log lines that must appear above the live ANSI window
959
+ * - callers need deterministic buffering behavior in tests
960
+ *
961
+ * Expects:
962
+ * - active ANSI window mode buffers until `schedule()` or `finish()`
963
+ * - inactive or non-windowed mode writes directly
964
+ *
965
+ * Returns:
966
+ * - no direct value
967
+ */
968
+ write(message) {
969
+ if (!this.isActiveWindowMode()) {
970
+ this.writeOutput(message);
971
+ return;
972
+ }
973
+ this.bufferedOutput += message;
974
+ }
975
+ renderWindow() {
976
+ const windowContent = this.options.getWindow();
977
+ const rowCount = getRenderedRowCount(windowContent, this.options.getColumns());
978
+ if (this.options.supportsAnsiWindowing) {
979
+ this.writeOutput(SYNC_START);
980
+ this.clearWindow();
981
+ }
982
+ this.flushBufferedOutput();
983
+ this.writeOutput(windowContent.join("\n"));
984
+ if (this.options.supportsAnsiWindowing) {
985
+ this.writeOutput(SYNC_END);
986
+ this.windowHeight = rowCount;
987
+ return;
988
+ }
989
+ this.writeOutput("\n");
990
+ this.windowHeight = 0;
991
+ }
992
+ clearWindow() {
993
+ if (!this.options.supportsAnsiWindowing || this.windowHeight === 0) return;
994
+ this.writeOutput(`${CARRIAGE_RETURN}${CLEAR_LINE}`);
995
+ for (let rowIndex = 1; rowIndex < this.windowHeight; rowIndex += 1) this.writeOutput(`${CARRIAGE_RETURN}${MOVE_CURSOR_ONE_ROW_UP}${CLEAR_LINE}`);
996
+ this.windowHeight = 0;
997
+ }
998
+ stopInterval() {
999
+ if (!this.renderInterval) return;
1000
+ this.renderInterval.clear();
1001
+ this.renderInterval = void 0;
1002
+ }
1003
+ writeOutput(message) {
1004
+ this.options.writeOutput(message);
1005
+ }
1006
+ flushBufferedOutput() {
1007
+ if (this.bufferedOutput.length === 0) return;
1008
+ this.writeOutput(this.bufferedOutput);
1009
+ this.bufferedOutput = "";
1010
+ }
1011
+ isActiveWindowMode() {
1012
+ return this.started && !this.finished && this.options.supportsAnsiWindowing;
1013
+ }
1014
+ };
1015
+ function defaultCreateInterval(callback, intervalMs) {
1016
+ const timer = globalThis.setInterval(callback, intervalMs);
1017
+ return {
1018
+ clear: () => globalThis.clearInterval(timer),
1019
+ unref: timer.unref?.bind(timer)
1020
+ };
1021
+ }
1022
+ function defaultQueueRenderReset(callback) {
1023
+ setTimeout(callback, 100).unref();
1024
+ }
1025
+ /** Calculate the rendered row count for the supplied rows and terminal width. */
1026
+ function getRenderedRowCount(rows, columns) {
1027
+ const safeColumns = Math.max(1, columns);
1028
+ let count = 0;
1029
+ for (const row of rows) {
1030
+ const text = stripVTControlCharacters(row);
1031
+ count += Math.max(1, Math.ceil(getTextDisplayWidth(text) / safeColumns));
1032
+ }
1033
+ return count;
1034
+ }
1035
+ function getTextDisplayWidth(text) {
1036
+ return stringWidth(stripVTControlCharacters(text));
1037
+ }
1038
+ //#endregion
1039
+ //#region src/cli/reporters/vitest-compat-reporter.ts
1040
+ function isReporterReferenceTuple(reference) {
1041
+ return Array.isArray(reference);
1042
+ }
1043
+ function isAbsoluteLikePath(value) {
1044
+ return value.startsWith("/") || value.startsWith("./") || value.startsWith("../") || /^[A-Z]:[\\/]/i.test(value);
1045
+ }
1046
+ async function loadReporterModule(path) {
1047
+ if (isAbsoluteLikePath(path)) return import(pathToFileURL(path).href);
1048
+ return import(path);
1049
+ }
1050
+ function normalizeReporterReference(reference) {
1051
+ if (isReporterReferenceTuple(reference)) return {
1052
+ options: reference[1],
1053
+ value: reference[0]
1054
+ };
1055
+ return {
1056
+ options: void 0,
1057
+ value: reference
1058
+ };
1059
+ }
1060
+ function createReporterInstance(moduleValue, options) {
1061
+ const value = moduleValue.default ?? moduleValue;
1062
+ if (value == null) return null;
1063
+ if (typeof value === "function") return new value(options);
1064
+ if (typeof value === "object") return value;
1065
+ return null;
1066
+ }
1067
+ async function emitToReporters(reporters, callback) {
1068
+ await Promise.all(reporters.map(async (reporter) => {
1069
+ try {
1070
+ await callback(reporter);
1071
+ } catch {}
1072
+ }));
1073
+ }
1074
+ /**
1075
+ * Creates a project-level vitest-compatible reporter bridge.
1076
+ *
1077
+ * Use when:
1078
+ * - `vieval` should reuse vitest-like reporter callbacks without changing CLI output contracts
1079
+ *
1080
+ * Expects:
1081
+ * - references point to modules whose default export is a reporter instance or constructor
1082
+ *
1083
+ * Returns:
1084
+ * - `null` when no reporter references are configured
1085
+ */
1086
+ async function createVievalVitestCompatReporterBridge(options) {
1087
+ if (options.references.length === 0) return null;
1088
+ const loadedReporters = [];
1089
+ for (const reference of options.references) {
1090
+ const normalized = normalizeReporterReference(reference);
1091
+ try {
1092
+ const instance = createReporterInstance(typeof normalized.value === "string" ? await loadReporterModule(normalized.value) : normalized.value, normalized.options);
1093
+ if (instance != null) loadedReporters.push(instance);
1094
+ } catch {}
1095
+ }
1096
+ if (loadedReporters.length === 0) return null;
1097
+ const modulesByTaskId = /* @__PURE__ */ new Map();
1098
+ const casesByCompositeId = /* @__PURE__ */ new Map();
1099
+ function getOrCreateModule(taskId) {
1100
+ const existing = modulesByTaskId.get(taskId);
1101
+ if (existing != null) return existing;
1102
+ const created = {
1103
+ id: taskId,
1104
+ name: taskId,
1105
+ projectName: options.projectName
1106
+ };
1107
+ modulesByTaskId.set(taskId, created);
1108
+ return created;
1109
+ }
1110
+ function getOrCreateCase(taskId, caseId) {
1111
+ const compositeId = `${taskId}::${caseId}`;
1112
+ const existing = casesByCompositeId.get(compositeId);
1113
+ if (existing != null) return existing;
1114
+ const created = {
1115
+ id: caseId,
1116
+ module: getOrCreateModule(taskId),
1117
+ name: caseId,
1118
+ state: "pending"
1119
+ };
1120
+ casesByCompositeId.set(compositeId, created);
1121
+ return created;
1122
+ }
1123
+ return {
1124
+ async onCaseEnd(payload) {
1125
+ const taskCase = getOrCreateCase(payload.taskId, payload.caseId);
1126
+ taskCase.state = payload.state === "timeout" ? "failed" : payload.state;
1127
+ await emitToReporters(loadedReporters, (reporter) => reporter.onTestCaseResult?.(taskCase));
1128
+ },
1129
+ async onCaseStart(payload) {
1130
+ const taskCase = getOrCreateCase(payload.taskId, payload.caseId);
1131
+ await emitToReporters(loadedReporters, (reporter) => reporter.onTestCaseReady?.(taskCase));
1132
+ },
1133
+ async onRunEnd(run) {
1134
+ const modules = [...modulesByTaskId.values()];
1135
+ const errors = run.failed ? [{ message: "vieval run failed" }] : [];
1136
+ await emitToReporters(loadedReporters, (reporter) => reporter.onTestRunEnd?.(modules, errors, run.failed ? "failed" : "passed"));
1137
+ },
1138
+ async onRunStart() {
1139
+ const specifications = [...modulesByTaskId.values()].map((module) => ({
1140
+ moduleId: module.id,
1141
+ projectName: module.projectName
1142
+ }));
1143
+ await emitToReporters(loadedReporters, (reporter) => reporter.onTestRunStart?.(specifications));
1144
+ },
1145
+ async onTaskEnd(payload) {
1146
+ const module = getOrCreateModule(payload.taskId);
1147
+ if (payload.state === "failed") {
1148
+ const syntheticCase = getOrCreateCase(payload.taskId, `${payload.taskId}:task`);
1149
+ syntheticCase.state = "failed";
1150
+ await emitToReporters(loadedReporters, (reporter) => reporter.onTestCaseResult?.(syntheticCase));
1151
+ }
1152
+ await emitToReporters(loadedReporters, (reporter) => reporter.onTestModuleEnd?.(module));
1153
+ },
1154
+ async onTaskQueued(payload) {
1155
+ const module = getOrCreateModule(payload.taskId);
1156
+ await emitToReporters(loadedReporters, (reporter) => reporter.onTestModuleQueued?.(module));
1157
+ await emitToReporters(loadedReporters, (reporter) => reporter.onTestModuleCollected?.(module));
1158
+ },
1159
+ async onTaskStart(payload) {
1160
+ const module = getOrCreateModule(payload.taskId);
1161
+ await emitToReporters(loadedReporters, (reporter) => reporter.onTestModuleStart?.(module));
1162
+ }
1163
+ };
1164
+ }
1165
+ //#endregion
1166
+ //#region src/cli/run.ts
1167
+ /**
1168
+ * Returns true when output contains at least one failing project/task/case outcome.
1169
+ */
1170
+ function hasRunFailures(output) {
1171
+ return output.projects.some((project) => {
1172
+ if (project.errorMessage != null) return true;
1173
+ if (project.caseSummary != null && (project.caseSummary.failed > 0 || project.caseSummary.timeout > 0)) return true;
1174
+ return (project.caseFailures?.length ?? 0) > 0;
1175
+ });
1176
+ }
1177
+ function resolveCappedConcurrency(defaultConcurrency, cliConcurrency, fallback) {
1178
+ const effectiveDefault = defaultConcurrency ?? fallback;
1179
+ if (cliConcurrency == null) return effectiveDefault;
1180
+ return Math.min(effectiveDefault, cliConcurrency);
1181
+ }
1182
+ function resolveOptionalRuntimeTaskConcurrency(defaultConcurrency, cliConcurrency) {
1183
+ return cliConcurrency ?? defaultConcurrency;
1184
+ }
1185
+ function resolveWorkspaceConcurrency(loadedConfig, options) {
1186
+ return resolveCappedConcurrency(loadedConfig.concurrency?.workspace, options.workspaceConcurrency, 1);
1187
+ }
1188
+ function resolveProjectConcurrency(project, options) {
1189
+ return resolveCappedConcurrency(project.concurrency?.project, options.projectConcurrency, Number.POSITIVE_INFINITY);
1190
+ }
1191
+ function resolveTaskConcurrency(project, options) {
1192
+ return resolveCappedConcurrency(project.concurrency?.task, options.taskConcurrency, 1);
1193
+ }
1194
+ function resolveScheduledTaskConcurrency(project, options) {
1195
+ return Math.min(resolveProjectConcurrency(project, options), resolveTaskConcurrency(project, options));
1196
+ }
1197
+ function resolveRuntimeTaskConcurrency(taskConcurrency, project, options) {
1198
+ const attempt = resolveOptionalRuntimeTaskConcurrency(taskConcurrency?.attempt ?? project.concurrency?.attempt, options.attemptConcurrency);
1199
+ const caseConcurrency = resolveOptionalRuntimeTaskConcurrency(taskConcurrency?.case ?? project.concurrency?.case, options.caseConcurrency);
1200
+ if (attempt == null && caseConcurrency == null) return;
1201
+ return {
1202
+ attempt,
1203
+ case: caseConcurrency
1204
+ };
1205
+ }
1206
+ function createScheduledTaskWithRuntimeConcurrency(task, project, options) {
1207
+ const taskDefinition = task.entry.task;
1208
+ if (taskDefinition == null) return task;
1209
+ const concurrency = resolveRuntimeTaskConcurrency(taskDefinition.concurrency, project, options);
1210
+ return {
1211
+ ...task,
1212
+ entry: {
1213
+ ...task.entry,
1214
+ task: {
1215
+ ...taskDefinition,
1216
+ concurrency
1217
+ }
1218
+ }
1219
+ };
1220
+ }
1221
+ function resolveCliRuntimeConcurrency(options) {
1222
+ if (options.attemptConcurrency == null && options.caseConcurrency == null) return;
1223
+ return {
1224
+ attempt: options.attemptConcurrency,
1225
+ case: options.caseConcurrency
1226
+ };
1227
+ }
1228
+ function shouldUseColor() {
1229
+ if (process.env.NO_COLOR != null) return false;
1230
+ const forceColor = process.env.FORCE_COLOR;
1231
+ if (forceColor != null) return forceColor !== "0";
1232
+ return process.stdout.isTTY === true;
1233
+ }
1234
+ function createColorPalette(enabled) {
1235
+ if (!enabled) return {
1236
+ black: (value) => value,
1237
+ bgCyan: (value) => value,
1238
+ bgGreen: (value) => value,
1239
+ bgMagenta: (value) => value,
1240
+ bgYellow: (value) => value,
1241
+ dim: (value) => value,
1242
+ gray: (value) => value,
1243
+ green: (value) => value,
1244
+ red: (value) => value,
1245
+ yellow: (value) => value
1246
+ };
1247
+ return {
1248
+ black: (value) => c.black(value),
1249
+ bgCyan: (value) => c.bgCyan(value),
1250
+ bgGreen: (value) => c.bgGreen(value),
1251
+ bgMagenta: (value) => c.bgMagenta(value),
1252
+ bgYellow: (value) => c.bgYellow(value),
1253
+ dim: (value) => c.dim(value),
1254
+ gray: (value) => c.gray(value),
1255
+ green: (value) => c.green(value),
1256
+ red: (value) => c.red(value),
1257
+ yellow: (value) => c.yellow(value)
1258
+ };
1259
+ }
1260
+ function createProjectBadge(name, colors, colorEnabled) {
1261
+ if (!colorEnabled || !c.isColorSupported) return `|${name}| `;
1262
+ const labelColorPool = [
1263
+ colors.bgYellow,
1264
+ colors.bgCyan,
1265
+ colors.bgGreen,
1266
+ colors.bgMagenta
1267
+ ];
1268
+ const background = labelColorPool[name.split("").reduce((accumulator, char, index) => accumulator + char.charCodeAt(0) + index, 0) % labelColorPool.length];
1269
+ return `${colors.black(background(` ${name} `))} `;
1270
+ }
1271
+ function formatDuration$1(durationMs, colors) {
1272
+ if (durationMs == null) return "";
1273
+ const rounded = Math.round(durationMs);
1274
+ return (rounded > 1e3 ? colors.yellow : colors.green)(` ${rounded}${colors.dim("ms")}`);
1275
+ }
1276
+ function filterProjectsByName(projects, names) {
1277
+ if (names.length === 0) return [...projects];
1278
+ const nameSet = new Set(names);
1279
+ return projects.filter((project) => nameSet.has(project.name));
1280
+ }
1281
+ function sanitizeIdentitySegment(value) {
1282
+ const normalized = value.trim();
1283
+ if (normalized.length === 0) return "default";
1284
+ return normalized.replace(/[^\w.-]+/g, "-");
1285
+ }
1286
+ function createRunIdentity(options) {
1287
+ const workspaceId = sanitizeIdentitySegment(options.workspace ?? "default-workspace");
1288
+ const experimentId = sanitizeIdentitySegment(options.experiment ?? "default-experiment");
1289
+ return {
1290
+ attemptId: sanitizeIdentitySegment(options.attempt ?? `attempt-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`),
1291
+ experimentId,
1292
+ runId: `run-${Date.now()}-${randomUUID().slice(0, 8)}`,
1293
+ workspaceId
1294
+ };
1295
+ }
1296
+ function deriveReportProjectId(output) {
1297
+ const uniqueProjectNames = [...new Set(output.projects.map((project) => project.name))];
1298
+ if (uniqueProjectNames.length === 1) return sanitizeIdentitySegment(uniqueProjectNames[0] ?? "default-project");
1299
+ return "multi-project";
1300
+ }
1301
+ function createEventRecorder(identity) {
1302
+ const events = [];
1303
+ const taskProjectMap = /* @__PURE__ */ new Map();
1304
+ return {
1305
+ events,
1306
+ record(event, payload, metadata) {
1307
+ const maybeTaskPayload = payload;
1308
+ const taskId = metadata?.taskId ?? maybeTaskPayload?.taskId;
1309
+ const caseId = metadata?.caseId ?? payload?.caseId;
1310
+ const projectName = metadata?.projectName ?? maybeTaskPayload?.projectName;
1311
+ if (taskId != null && projectName != null) taskProjectMap.set(taskId, projectName);
1312
+ events.push({
1313
+ attemptId: identity.attemptId,
1314
+ caseId,
1315
+ data: payload,
1316
+ event,
1317
+ experimentId: identity.experimentId,
1318
+ projectId: taskId == null ? void 0 : taskProjectMap.get(taskId),
1319
+ runId: identity.runId,
1320
+ schemaVersion: 1,
1321
+ taskId,
1322
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
1323
+ version: 1,
1324
+ workspaceId: identity.workspaceId
1325
+ });
1326
+ }
1327
+ };
1328
+ }
1329
+ function createReporterWithEventCapture(reporter, recordEvent) {
1330
+ return {
1331
+ dispose() {
1332
+ reporter.dispose();
1333
+ },
1334
+ onCaseEnd(payload) {
1335
+ recordEvent("CaseEnded", payload);
1336
+ reporter.onCaseEnd(payload);
1337
+ },
1338
+ onCaseStart(payload) {
1339
+ recordEvent("CaseStarted", payload);
1340
+ reporter.onCaseStart(payload);
1341
+ },
1342
+ onRunEnd(payload) {
1343
+ recordEvent("RunEnded", payload);
1344
+ reporter.onRunEnd(payload);
1345
+ },
1346
+ onRunStart(payload) {
1347
+ recordEvent("RunStarted", payload);
1348
+ reporter.onRunStart(payload);
1349
+ },
1350
+ onTaskEnd(payload) {
1351
+ recordEvent("TaskEnded", payload);
1352
+ reporter.onTaskEnd(payload);
1353
+ },
1354
+ onTaskQueued(payload) {
1355
+ recordEvent("TaskQueued", payload);
1356
+ reporter.onTaskQueued(payload);
1357
+ },
1358
+ onTaskStart(payload) {
1359
+ recordEvent("TaskStarted", payload);
1360
+ reporter.onTaskStart(payload);
1361
+ }
1362
+ };
1363
+ }
1364
+ function applyRunEnvironment(env) {
1365
+ const envEntries = Object.entries(env);
1366
+ if (envEntries.length === 0) return () => {};
1367
+ const snapshot = /* @__PURE__ */ new Map();
1368
+ for (const [key, value] of envEntries) {
1369
+ snapshot.set(key, {
1370
+ existed: Object.hasOwn(process.env, key),
1371
+ value: process.env[key]
1372
+ });
1373
+ if (value == null) {
1374
+ delete process.env[key];
1375
+ continue;
1376
+ }
1377
+ process.env[key] = value;
1378
+ }
1379
+ return () => {
1380
+ for (const [key, previous] of snapshot.entries()) {
1381
+ if (previous.existed) {
1382
+ if (previous.value == null) {
1383
+ delete process.env[key];
1384
+ continue;
1385
+ }
1386
+ process.env[key] = previous.value;
1387
+ continue;
1388
+ }
1389
+ delete process.env[key];
1390
+ }
1391
+ };
1392
+ }
1393
+ function isSummaryReporter(reporter) {
1394
+ return "getWindowRows" in reporter;
1395
+ }
1396
+ function createRunReporter(options) {
1397
+ const getRows = options?.getRows ?? (() => process.stdout.rows);
1398
+ const reporter = createCliReporter({
1399
+ getColumns: options?.getColumns ?? (() => process.stdout.columns ?? 80),
1400
+ getNow: options?.getNow ?? (() => Date.now()),
1401
+ getWallClockNow: options?.getWallClockNow ?? (() => Date.now()),
1402
+ isTTY: options?.isTTY ?? process.stdout.isTTY === true,
1403
+ slowThresholdMs: options?.slowThresholdMs ?? 300,
1404
+ writeError: options?.writeError ?? ((value) => process.stderr.write(value)),
1405
+ writeOutput: options?.writeOutput ?? ((value) => process.stdout.write(value))
1406
+ });
1407
+ if (!isSummaryReporter(reporter)) return {
1408
+ ...reporter,
1409
+ onCaseStart(payload) {
1410
+ reporter.onCaseStart(payload);
1411
+ },
1412
+ onTaskQueued(payload) {
1413
+ reporter.onTaskQueued(payload);
1414
+ }
1415
+ };
1416
+ const rendererBaseOptions = {
1417
+ getColumns: options?.getColumns ?? (() => process.stdout.columns ?? 80),
1418
+ getWindow: () => reporter.getWindowRows({ maxRows: normalizeLiveReporterMaxRows(getRows()) }),
1419
+ queueRenderReset: options?.queueRenderReset,
1420
+ supportsAnsiWindowing: options?.supportsAnsiWindowing,
1421
+ writeOutput: options?.writeOutput ?? ((value) => process.stdout.write(value))
1422
+ };
1423
+ const renderer = options?.clearInterval != null && options.createInterval != null ? new WindowRenderer({
1424
+ ...rendererBaseOptions,
1425
+ clearInterval: options.clearInterval,
1426
+ createInterval: options.createInterval
1427
+ }) : new WindowRenderer(rendererBaseOptions);
1428
+ renderer.start();
1429
+ function scheduleRender() {
1430
+ renderer.schedule();
1431
+ }
1432
+ return {
1433
+ dispose() {
1434
+ reporter.dispose();
1435
+ renderer.dispose();
1436
+ },
1437
+ onCaseEnd(payload) {
1438
+ reporter.onCaseEnd(payload);
1439
+ scheduleRender();
1440
+ },
1441
+ onCaseStart(payload) {
1442
+ reporter.onCaseStart(payload);
1443
+ scheduleRender();
1444
+ },
1445
+ onRunEnd(payload) {
1446
+ reporter.onRunEnd(payload);
1447
+ scheduleRender();
1448
+ },
1449
+ onRunStart(payload) {
1450
+ reporter.onRunStart(payload);
1451
+ scheduleRender();
1452
+ },
1453
+ onTaskEnd(payload) {
1454
+ reporter.onTaskEnd(payload);
1455
+ scheduleRender();
1456
+ },
1457
+ onTaskQueued(payload) {
1458
+ reporter.onTaskQueued(payload);
1459
+ scheduleRender();
1460
+ },
1461
+ onTaskStart(payload) {
1462
+ reporter.onTaskStart(payload);
1463
+ scheduleRender();
1464
+ }
1465
+ };
1466
+ }
1467
+ /**
1468
+ * Normalizes terminal row count into the live reporter window height.
1469
+ *
1470
+ * Before:
1471
+ * - undefined
1472
+ * - 4
1473
+ * - 40
1474
+ *
1475
+ * After:
1476
+ * - 23
1477
+ * - 6
1478
+ * - 39
1479
+ */
1480
+ function normalizeLiveReporterMaxRows(rows) {
1481
+ return Math.max(6, (rows == null || !Number.isFinite(rows) || rows <= 0 ? 24 : Math.floor(rows)) - 1);
1482
+ }
1483
+ function createTaskQueuePayload(task, projectName) {
1484
+ return {
1485
+ displayName: task.entry.name,
1486
+ projectName,
1487
+ taskId: task.id
1488
+ };
1489
+ }
1490
+ function createTaskCaseReporterId(payload) {
1491
+ return `${payload.index}:${encodeURIComponent(payload.name)}`;
1492
+ }
1493
+ function createTaskReporterHooks(task, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter) {
1494
+ function syncCaseTotal(total) {
1495
+ reporter.onTaskQueued({
1496
+ taskId: task.id,
1497
+ totalCases: total
1498
+ });
1499
+ }
1500
+ return {
1501
+ onCaseEnd(payload) {
1502
+ const caseId = createTaskCaseReporterId(payload);
1503
+ if (projectCaseCounters != null) {
1504
+ const projectCaseId = `${task.id}:${caseId}`;
1505
+ if (!projectCaseCounters.seenCaseIds.has(projectCaseId)) {
1506
+ projectCaseCounters.seenCaseIds.add(projectCaseId);
1507
+ if (payload.state === "passed") projectCaseCounters.passed += 1;
1508
+ else if (payload.state === "failed") projectCaseCounters.failed += 1;
1509
+ else if (payload.state === "timeout") projectCaseCounters.timeout += 1;
1510
+ else projectCaseCounters.skipped += 1;
1511
+ }
1512
+ }
1513
+ syncCaseTotal(payload.total);
1514
+ if ((payload.state === "failed" || payload.state === "timeout") && payload.errorMessage != null && projectCaseFailures != null) projectCaseFailures.push({
1515
+ caseId,
1516
+ caseName: payload.name,
1517
+ errorMessage: payload.errorMessage,
1518
+ taskId: task.id
1519
+ });
1520
+ reporter.onCaseEnd({
1521
+ caseId,
1522
+ errorMessage: payload.errorMessage,
1523
+ state: payload.state,
1524
+ taskId: task.id
1525
+ });
1526
+ vitestCompatReporter?.onCaseEnd({
1527
+ caseId,
1528
+ errorMessage: payload.errorMessage,
1529
+ state: payload.state,
1530
+ taskId: task.id
1531
+ });
1532
+ },
1533
+ onCaseStart(payload) {
1534
+ const caseId = createTaskCaseReporterId(payload);
1535
+ syncCaseTotal(payload.total);
1536
+ reporter.onCaseStart({
1537
+ autoRetry: payload.autoRetry,
1538
+ caseId,
1539
+ caseName: payload.name,
1540
+ retryIndex: payload.retryIndex,
1541
+ taskId: task.id
1542
+ });
1543
+ vitestCompatReporter?.onCaseStart({
1544
+ caseId,
1545
+ taskId: task.id
1546
+ });
1547
+ },
1548
+ onEvent(payload) {
1549
+ recordEvent(payload.event, payload.data, {
1550
+ caseId: payload.caseId,
1551
+ projectName,
1552
+ taskId: task.id
1553
+ });
1554
+ }
1555
+ };
1556
+ }
1557
+ function createCliTaskExecutionContext(task, models, cacheRootDirectory, cacheProjectName, workspaceId, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, runtimeConcurrency, vitestCompatReporter) {
1558
+ return {
1559
+ ...createTaskExecutionContext({
1560
+ cache: createFilesystemTaskCacheRuntime({
1561
+ cacheRootDirectory,
1562
+ projectName: cacheProjectName,
1563
+ workspaceId
1564
+ }),
1565
+ models,
1566
+ task
1567
+ }),
1568
+ reporterHooks: createTaskReporterHooks(task, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter),
1569
+ runtimeConcurrency
1570
+ };
1571
+ }
1572
+ function resolveTaskReporterHooks(task, context, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter) {
1573
+ return context.reporterHooks ?? createTaskReporterHooks(task, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter);
1574
+ }
1575
+ function getFailedTaskId(error) {
1576
+ if (error instanceof RunnerExecutionError) return error.taskId;
1577
+ return null;
1578
+ }
1579
+ function createAutoTaskExecutor(reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter) {
1580
+ return async (task, context) => {
1581
+ const taskDefinition = task.entry.task;
1582
+ if (taskDefinition == null) throw new Error(`Missing eval task definition for entry "${task.entry.id}".`);
1583
+ const output = await taskDefinition.run({
1584
+ cache: context.cache,
1585
+ model: context.model,
1586
+ reporterHooks: resolveTaskReporterHooks(task, context, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter),
1587
+ task
1588
+ });
1589
+ return {
1590
+ entryId: task.entry.id,
1591
+ id: task.id,
1592
+ matrix: task.matrix,
1593
+ inferenceExecutorId: task.inferenceExecutor.id,
1594
+ scores: [...output.scores]
1595
+ };
1596
+ };
1597
+ }
1598
+ function cloneScheduledTaskMatrix(task) {
1599
+ return {
1600
+ eval: { ...task.matrix.eval },
1601
+ meta: { ...task.matrix.meta },
1602
+ run: { ...task.matrix.run }
1603
+ };
1604
+ }
1605
+ function createProjectMatrixSummary(tasks) {
1606
+ if (tasks.length === 0) return null;
1607
+ const runAxes = /* @__PURE__ */ new Set();
1608
+ const evalAxes = /* @__PURE__ */ new Set();
1609
+ const runRows = /* @__PURE__ */ new Set();
1610
+ const evalRows = /* @__PURE__ */ new Set();
1611
+ for (const task of tasks) {
1612
+ Object.keys(task.matrix.run).forEach((axis) => runAxes.add(axis));
1613
+ Object.keys(task.matrix.eval).forEach((axis) => evalAxes.add(axis));
1614
+ runRows.add(task.matrix.meta.runRowId);
1615
+ evalRows.add(task.matrix.meta.evalRowId);
1616
+ }
1617
+ return {
1618
+ evalAxes: [...evalAxes].sort(),
1619
+ evalRows: evalRows.size,
1620
+ runAxes: [...runAxes].sort(),
1621
+ runRows: runRows.size
1622
+ };
1623
+ }
1624
+ async function prepareProject(project) {
1625
+ const startedAt = Date.now();
1626
+ try {
1627
+ const runtimeContext = await createRunnerRuntimeContext({
1628
+ cwd: project.root,
1629
+ fallbackProjectRootDirectory: project.root
1630
+ });
1631
+ const evalFilePaths = await discoverEvalFiles({
1632
+ exclude: project.exclude,
1633
+ include: project.include,
1634
+ root: project.root
1635
+ });
1636
+ const entries = collectEvalEntries(await loadEvalModulesWithVitestRuntime(evalFilePaths, project.root), runtimeContext);
1637
+ const tasks = createRunnerSchedule({
1638
+ evalMatrix: project.evalMatrix,
1639
+ entries,
1640
+ inferenceExecutors: project.inferenceExecutors,
1641
+ runMatrix: project.runMatrix
1642
+ });
1643
+ const canAutoExecuteEntryTasks = entries.some((entry) => entry.task != null) && project.models.length > 0;
1644
+ if (project.executor == null && !canAutoExecuteEntryTasks) return {
1645
+ kind: "summary",
1646
+ summary: {
1647
+ caseSummary: null,
1648
+ caseFailures: [],
1649
+ discoveredEvalFileCount: evalFilePaths.length,
1650
+ durationMs: Date.now() - startedAt,
1651
+ entryCount: entries.length,
1652
+ errorMessage: null,
1653
+ executed: false,
1654
+ matrixSummary: createProjectMatrixSummary(tasks),
1655
+ name: project.name,
1656
+ result: null,
1657
+ taskCount: tasks.length
1658
+ }
1659
+ };
1660
+ return {
1661
+ kind: "prepared",
1662
+ prepared: {
1663
+ discoveredEvalFileCount: evalFilePaths.length,
1664
+ entryCount: entries.length,
1665
+ name: project.name,
1666
+ project,
1667
+ startedAt,
1668
+ tasks
1669
+ }
1670
+ };
1671
+ } catch (error) {
1672
+ return {
1673
+ kind: "summary",
1674
+ summary: {
1675
+ caseSummary: null,
1676
+ caseFailures: [],
1677
+ discoveredEvalFileCount: 0,
1678
+ durationMs: Date.now() - startedAt,
1679
+ entryCount: 0,
1680
+ errorMessage: errorMessageFrom(error) ?? "Unknown project execution error.",
1681
+ executed: false,
1682
+ matrixSummary: null,
1683
+ name: project.name,
1684
+ result: null,
1685
+ taskCount: 0
1686
+ }
1687
+ };
1688
+ }
1689
+ }
1690
+ async function executePreparedProject(prepared, identity, cacheProjectName, reporter, counters, recordEvent, options) {
1691
+ const settledTaskIds = /* @__PURE__ */ new Set();
1692
+ const projectCaseCounters = {
1693
+ failed: 0,
1694
+ passed: 0,
1695
+ seenCaseIds: /* @__PURE__ */ new Set(),
1696
+ skipped: 0,
1697
+ timeout: 0
1698
+ };
1699
+ const projectCaseFailures = [];
1700
+ const vitestCompatReporter = await createVievalVitestCompatReporterBridge({
1701
+ projectName: prepared.name,
1702
+ references: prepared.project.reporters
1703
+ });
1704
+ const rawTaskExecutor = prepared.project.executor ?? createAutoTaskExecutor(reporter, prepared.name, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter);
1705
+ const taskExecutor = async (task, context) => {
1706
+ const runtimeTask = createScheduledTaskWithRuntimeConcurrency(task, prepared.project, options);
1707
+ return {
1708
+ ...await rawTaskExecutor(runtimeTask, context),
1709
+ matrix: cloneScheduledTaskMatrix(runtimeTask)
1710
+ };
1711
+ };
1712
+ for (const task of prepared.tasks) await vitestCompatReporter?.onTaskQueued({ taskId: task.id });
1713
+ await vitestCompatReporter?.onRunStart();
1714
+ try {
1715
+ const aggregated = await runScheduledTasks(prepared.tasks, taskExecutor, {
1716
+ createExecutionContext(task) {
1717
+ return createCliTaskExecutionContext(task, prepared.project.models, resolve(prepared.project.root, ".vieval", "cache"), cacheProjectName ?? prepared.name, identity.workspaceId, reporter, prepared.name, recordEvent, projectCaseCounters, projectCaseFailures, resolveCliRuntimeConcurrency(options), vitestCompatReporter);
1718
+ },
1719
+ onTaskEnd(task, state) {
1720
+ settledTaskIds.add(task.id);
1721
+ reporter.onTaskEnd({
1722
+ state,
1723
+ taskId: task.id
1724
+ });
1725
+ vitestCompatReporter?.onTaskEnd({
1726
+ state,
1727
+ taskId: task.id
1728
+ });
1729
+ if (state === "passed") {
1730
+ counters.passedTasks += 1;
1731
+ return;
1732
+ }
1733
+ counters.failedTasks += 1;
1734
+ },
1735
+ onTaskStart(task) {
1736
+ reporter.onTaskStart({ taskId: task.id });
1737
+ vitestCompatReporter?.onTaskStart({ taskId: task.id });
1738
+ },
1739
+ maxConcurrency: resolveScheduledTaskConcurrency(prepared.project, options)
1740
+ });
1741
+ await vitestCompatReporter?.onRunEnd({ failed: false });
1742
+ return {
1743
+ caseSummary: {
1744
+ failed: projectCaseCounters.failed,
1745
+ passed: projectCaseCounters.passed,
1746
+ skipped: projectCaseCounters.skipped,
1747
+ timeout: projectCaseCounters.timeout,
1748
+ total: projectCaseCounters.seenCaseIds.size
1749
+ },
1750
+ caseFailures: projectCaseFailures,
1751
+ discoveredEvalFileCount: prepared.discoveredEvalFileCount,
1752
+ durationMs: Date.now() - prepared.startedAt,
1753
+ entryCount: prepared.entryCount,
1754
+ errorMessage: null,
1755
+ executed: true,
1756
+ matrixSummary: createProjectMatrixSummary(prepared.tasks),
1757
+ name: prepared.name,
1758
+ result: aggregated,
1759
+ taskCount: prepared.tasks.length
1760
+ };
1761
+ } catch (error) {
1762
+ const failedTaskId = getFailedTaskId(error);
1763
+ if (failedTaskId != null && !settledTaskIds.has(failedTaskId)) {
1764
+ counters.failedTasks += 1;
1765
+ settledTaskIds.add(failedTaskId);
1766
+ reporter.onTaskEnd({
1767
+ state: "failed",
1768
+ taskId: failedTaskId
1769
+ });
1770
+ await vitestCompatReporter?.onTaskEnd({
1771
+ state: "failed",
1772
+ taskId: failedTaskId
1773
+ });
1774
+ }
1775
+ for (const task of prepared.tasks) {
1776
+ if (settledTaskIds.has(task.id)) continue;
1777
+ counters.skippedTasks += 1;
1778
+ settledTaskIds.add(task.id);
1779
+ reporter.onTaskEnd({
1780
+ state: "skipped",
1781
+ taskId: task.id
1782
+ });
1783
+ await vitestCompatReporter?.onTaskEnd({
1784
+ state: "skipped",
1785
+ taskId: task.id
1786
+ });
1787
+ }
1788
+ await vitestCompatReporter?.onRunEnd({ failed: true });
1789
+ return {
1790
+ caseSummary: {
1791
+ failed: projectCaseCounters.failed,
1792
+ passed: projectCaseCounters.passed,
1793
+ skipped: projectCaseCounters.skipped,
1794
+ timeout: projectCaseCounters.timeout,
1795
+ total: projectCaseCounters.seenCaseIds.size
1796
+ },
1797
+ caseFailures: projectCaseFailures,
1798
+ discoveredEvalFileCount: prepared.discoveredEvalFileCount,
1799
+ durationMs: Date.now() - prepared.startedAt,
1800
+ entryCount: prepared.entryCount,
1801
+ errorMessage: errorMessageFrom(error) ?? "Unknown project execution error.",
1802
+ executed: false,
1803
+ matrixSummary: createProjectMatrixSummary(prepared.tasks),
1804
+ name: prepared.name,
1805
+ result: null,
1806
+ taskCount: prepared.tasks.length
1807
+ };
1808
+ }
1809
+ }
1810
+ async function writeRunReportArtifacts(output, events, identity, reportOut) {
1811
+ const projectId = deriveReportProjectId(output);
1812
+ const reportDirectory = resolve(reportOut, identity.workspaceId, projectId, identity.experimentId, identity.attemptId, identity.runId);
1813
+ await mkdir(reportDirectory, { recursive: true });
1814
+ await writeFile(resolve(reportDirectory, "run-summary.json"), `${JSON.stringify(output, null, 2)}\n`, "utf-8");
1815
+ await writeFile(resolve(reportDirectory, "events.jsonl"), events.map((event) => JSON.stringify(event)).join("\n").concat(events.length > 0 ? "\n" : ""), "utf-8");
1816
+ return reportDirectory;
1817
+ }
1818
+ /**
1819
+ * Runs vieval orchestration from config and returns project-level summaries.
1820
+ *
1821
+ * Call stack:
1822
+ *
1823
+ * {@link runVievalCli}
1824
+ * -> {@link loadVievalCliConfig}
1825
+ * -> {@link discoverEvalFiles}
1826
+ * -> {@link collectEvalEntries}
1827
+ * -> {@link createRunnerSchedule}
1828
+ * -> {@link runScheduledTasks} (optional)
1829
+ *
1830
+ * Use when:
1831
+ * - running eval collection and scheduling from a single command
1832
+ * - keeping business-agent eval files near their implementation packages
1833
+ */
1834
+ async function runVievalCli(options = {}) {
1835
+ const identity = createRunIdentity(options);
1836
+ const loadedConfig = await loadVievalCliConfig({
1837
+ configFilePath: options.configFilePath,
1838
+ cwd: options.cwd
1839
+ });
1840
+ const restoreEnvironment = applyRunEnvironment(loadedConfig.env);
1841
+ const eventRecorder = createEventRecorder(identity);
1842
+ const reporter = createReporterWithEventCapture(createRunReporter(options.reporter), eventRecorder.record);
1843
+ try {
1844
+ const selectedProjects = filterProjectsByName(loadedConfig.projects, options.project ?? []);
1845
+ const workspaceScheduler = createSchedulerRuntime({ concurrency: { workspace: resolveWorkspaceConcurrency(loadedConfig, options) } });
1846
+ const preparedProjects = await Promise.all(selectedProjects.map(async (project) => prepareProject(project)));
1847
+ const executableProjects = preparedProjects.filter((project) => project.kind === "prepared").map((project) => project.prepared);
1848
+ const totalTasks = preparedProjects.reduce((sum, project) => {
1849
+ if (project.kind === "prepared") return sum + project.prepared.tasks.length;
1850
+ return sum + project.summary.taskCount;
1851
+ }, 0);
1852
+ const skippedSummaryTasks = preparedProjects.reduce((sum, project) => {
1853
+ if (project.kind === "summary") return sum + project.summary.taskCount;
1854
+ return sum;
1855
+ }, 0);
1856
+ const reporterCounters = {
1857
+ failedTasks: 0,
1858
+ passedTasks: 0,
1859
+ skippedTasks: 0
1860
+ };
1861
+ reporter.onRunStart({ totalTasks });
1862
+ for (const project of executableProjects) for (const task of project.tasks) reporter.onTaskQueued(createTaskQueuePayload(task, project.name));
1863
+ const projectSummaries = (await Promise.all(preparedProjects.map(async (preparedProject, index) => {
1864
+ if (preparedProject.kind === "summary") return {
1865
+ index,
1866
+ summary: preparedProject.summary
1867
+ };
1868
+ return {
1869
+ index,
1870
+ summary: await workspaceScheduler.runCase({
1871
+ experimentId: identity.experimentId,
1872
+ projectName: preparedProject.prepared.name,
1873
+ scope: "workspace",
1874
+ workspaceId: identity.workspaceId
1875
+ }, async () => executePreparedProject(preparedProject.prepared, identity, options.cacheProjectName, reporter, reporterCounters, eventRecorder.record, options))
1876
+ };
1877
+ }))).sort((left, right) => left.index - right.index).map((item) => item.summary);
1878
+ reporter.onRunEnd({
1879
+ failedTasks: reporterCounters.failedTasks,
1880
+ passedTasks: reporterCounters.passedTasks,
1881
+ skippedTasks: reporterCounters.skippedTasks + skippedSummaryTasks,
1882
+ totalTasks
1883
+ });
1884
+ const output = {
1885
+ attemptId: identity.attemptId,
1886
+ configFilePath: loadedConfig.configFilePath,
1887
+ experimentId: identity.experimentId,
1888
+ projects: projectSummaries,
1889
+ reportDirectory: null,
1890
+ runId: identity.runId,
1891
+ workspaceId: identity.workspaceId
1892
+ };
1893
+ if (options.reportOut != null) output.reportDirectory = await writeRunReportArtifacts(output, eventRecorder.events, identity, options.reportOut);
1894
+ return output;
1895
+ } finally {
1896
+ reporter.dispose();
1897
+ restoreEnvironment();
1898
+ }
1899
+ }
1900
+ /**
1901
+ * Formats CLI run output as human-readable lines.
1902
+ */
1903
+ function formatVievalCliRunOutput(output) {
1904
+ const colorEnabled = shouldUseColor();
1905
+ const colors = createColorPalette(colorEnabled);
1906
+ const lines = [];
1907
+ lines.push(` ${colors.dim("RUN")} ${colors.yellow("vieval")}`);
1908
+ lines.push(` ${colors.dim("Config")} ${output.configFilePath ?? "(not found, using defaults)"}`);
1909
+ lines.push("");
1910
+ let passedProjects = 0;
1911
+ let skippedProjects = 0;
1912
+ let failedProjects = 0;
1913
+ let totalTasks = 0;
1914
+ let executedTasks = 0;
1915
+ function formatMatrixSummary(summary) {
1916
+ if (summary == null) return null;
1917
+ const runAxesLabel = summary.runAxes.length === 0 ? "-" : summary.runAxes.join("|");
1918
+ const evalAxesLabel = summary.evalAxes.length === 0 ? "-" : summary.evalAxes.join("|");
1919
+ return `matrix run ${summary.runRows} [${runAxesLabel}] / eval ${summary.evalRows} [${evalAxesLabel}]`;
1920
+ }
1921
+ function formatScheduleBreakdown(project) {
1922
+ const summary = project.matrixSummary;
1923
+ if (summary == null) return null;
1924
+ if (project.taskCount <= 0 || project.entryCount <= 0 || summary.runRows <= 0 || summary.evalRows <= 0) return null;
1925
+ const denominator = project.entryCount * summary.runRows * summary.evalRows;
1926
+ if (denominator <= 0 || project.taskCount % denominator !== 0) return null;
1927
+ const providerCount = project.taskCount / denominator;
1928
+ return [
1929
+ colors.dim("schedule "),
1930
+ colors.yellow(String(project.entryCount)),
1931
+ colors.dim(" entries × "),
1932
+ colors.yellow(String(providerCount)),
1933
+ colors.dim(" inferenceExecutors × "),
1934
+ colors.yellow(String(summary.runRows)),
1935
+ colors.dim(" run rows × "),
1936
+ colors.yellow(String(summary.evalRows)),
1937
+ colors.dim(" eval rows = "),
1938
+ colors.green(String(project.taskCount)),
1939
+ colors.dim(" tasks")
1940
+ ].join("");
1941
+ }
1942
+ for (const project of output.projects) {
1943
+ totalTasks += project.taskCount;
1944
+ executedTasks += project.result?.overall.runCount ?? 0;
1945
+ const badge = createProjectBadge(project.name, colors, colorEnabled);
1946
+ const isFailed = project.errorMessage != null;
1947
+ const hasFailedCases = (project.caseSummary?.failed ?? 0) > 0 || (project.caseSummary?.timeout ?? 0) > 0 || (project.caseFailures?.length ?? 0) > 0;
1948
+ if (isFailed) {
1949
+ failedProjects += 1;
1950
+ lines.push(` ${colors.red("❯")} ${badge}${formatDuration$1(project.durationMs, colors)}`);
1951
+ lines.push(` ${project.errorMessage}`);
1952
+ continue;
1953
+ }
1954
+ if (!project.executed) {
1955
+ skippedProjects += 1;
1956
+ const countLabel = colors.dim(`(${project.taskCount} tasks)`);
1957
+ const detailsLabel = colors.dim(` ${project.discoveredEvalFileCount} files, ${project.entryCount} entries, 0 runs, hybrid n/a`);
1958
+ const matrixSummary = formatMatrixSummary(project.matrixSummary);
1959
+ lines.push(` ${colors.dim("○")} ${badge}${countLabel}${detailsLabel}${formatDuration$1(project.durationMs, colors)}`);
1960
+ if (matrixSummary != null) lines.push(` ${colors.dim(matrixSummary)}`);
1961
+ const scheduleBreakdown = formatScheduleBreakdown(project);
1962
+ if (scheduleBreakdown != null) lines.push(` ${scheduleBreakdown}`);
1963
+ continue;
1964
+ }
1965
+ if (hasFailedCases) failedProjects += 1;
1966
+ else passedProjects += 1;
1967
+ const hybridAverage = project.result?.overall.hybridAverage;
1968
+ const hybridAverageLabel = hybridAverage == null ? "n/a" : String(hybridAverage);
1969
+ const runCount = project.result?.overall.runCount ?? 0;
1970
+ const countLabel = colors.dim(`(${project.taskCount} tasks)`);
1971
+ const caseSummaryLabel = project.caseSummary == null ? "" : `, cases ${project.caseSummary.passed} passed | ${project.caseSummary.failed} failed | ${project.caseSummary.timeout} timeout`;
1972
+ const detailsLabel = colors.dim(` ${project.discoveredEvalFileCount} files, ${project.entryCount} entries, ${runCount} runs${caseSummaryLabel}, hybrid ${hybridAverageLabel}`);
1973
+ const matrixSummary = formatMatrixSummary(project.matrixSummary);
1974
+ lines.push(` ${hasFailedCases ? colors.red("❯") : colors.green("✓")} ${badge}${countLabel}${detailsLabel}${formatDuration$1(project.durationMs, colors)}`);
1975
+ if (matrixSummary != null) lines.push(` ${colors.dim(matrixSummary)}`);
1976
+ const scheduleBreakdown = formatScheduleBreakdown(project);
1977
+ if (scheduleBreakdown != null) lines.push(` ${scheduleBreakdown}`);
1978
+ if ((project.caseFailures?.length ?? 0) > 0) {
1979
+ lines.push(` ${colors.red("Failed cases:")}`);
1980
+ for (const failure of project.caseFailures.slice(0, 5)) {
1981
+ lines.push(` ${colors.red(`- ${failure.caseName} (${failure.taskId})`)}`);
1982
+ for (const line of failure.errorMessage.split("\n")) lines.push(` ${colors.red(line)}`);
1983
+ }
1984
+ if (project.caseFailures.length > 5) lines.push(` ${colors.dim(`... ${project.caseFailures.length - 5} more failed cases`)}`);
1985
+ }
1986
+ }
1987
+ lines.push("");
1988
+ if (failedProjects > 0 || skippedProjects > 0) {
1989
+ const summarySegments = [`${colors.green(String(passedProjects))} passed`];
1990
+ if (skippedProjects > 0) summarySegments.push(`${colors.dim(String(skippedProjects))} skipped`);
1991
+ if (failedProjects > 0) summarySegments.push(`${colors.red(String(failedProjects))} failed`);
1992
+ lines.push(` ${colors.dim("Projects")} ${summarySegments.join(" | ")} (${output.projects.length})`);
1993
+ } else lines.push(` ${colors.dim("Projects")} ${colors.green(String(passedProjects))} passed (${output.projects.length})`);
1994
+ lines.push(` ${colors.dim("Tasks")} ${executedTasks} executed / ${totalTasks} scheduled`);
1995
+ return lines.join("\n");
1996
+ }
1997
+ //#endregion
1998
+ //#region src/cli/compare.ts
1999
+ const compareHelpText = `
2000
+ Compare multiple methods on one benchmark.
2001
+
2002
+ Usage
2003
+ $ vieval compare [--config <path>] [--comparison <id>] [--output <path>] [--format <format>]
2004
+
2005
+ Options
2006
+ --config Config file path (default: nearest vieval.config.*)
2007
+ --comparison Comparison entry id from config.comparisons
2008
+ --output Optional output artifact path
2009
+ --format Console output format: table | json (default: table)
2010
+ `;
2011
+ function normalizeCliArgv$4(argv) {
2012
+ const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
2013
+ if (normalizedArgv[0] === "compare") return normalizedArgv.slice(1);
2014
+ return normalizedArgv;
2015
+ }
2016
+ function parseCompareCliArguments(argv) {
2017
+ const cli = meow(compareHelpText, {
2018
+ argv: normalizeCliArgv$4(argv),
2019
+ flags: {
2020
+ config: { type: "string" },
2021
+ comparison: { type: "string" },
2022
+ format: {
2023
+ default: "table",
2024
+ type: "string"
2025
+ },
2026
+ output: { type: "string" }
2027
+ },
2028
+ importMeta: import.meta
2029
+ });
2030
+ return {
2031
+ comparisonId: cli.flags.comparison,
2032
+ configFilePath: cli.flags.config,
2033
+ format: cli.flags.format === "json" ? "json" : "table",
2034
+ output: cli.flags.output
2035
+ };
2036
+ }
2037
+ /**
2038
+ * Runs one compare session from `vieval.config.*` comparison-mode config.
2039
+ */
2040
+ async function runCompareCli(argv) {
2041
+ const parsed = parseCompareCliArguments(argv);
2042
+ const loaded = await loadVievalComparisonConfig({
2043
+ comparisonId: parsed.comparisonId,
2044
+ configFilePath: parsed.configFilePath,
2045
+ cwd: parsed.cwd
2046
+ });
2047
+ const methodResults = [];
2048
+ for (const method of loaded.config.methods) {
2049
+ const methodWorkspace = resolve(method.workspace);
2050
+ const output = await runVievalCli({
2051
+ cacheProjectName: loaded.config.benchmark.sharedCaseNamespace,
2052
+ configFilePath: method.configFilePath ?? resolve(methodWorkspace, "vieval.config.ts"),
2053
+ cwd: methodWorkspace,
2054
+ project: [method.project],
2055
+ workspace: loaded.config.benchmark.id
2056
+ });
2057
+ const failedProject = output.projects.find((project) => project.errorMessage != null);
2058
+ if (failedProject != null) throw new Error(`Comparison method "${method.id}" failed: ${failedProject.errorMessage}`);
2059
+ methodResults.push({
2060
+ methodId: method.id,
2061
+ output
2062
+ });
2063
+ }
2064
+ const runOutput = {
2065
+ benchmarkId: loaded.config.benchmark.id,
2066
+ methods: methodResults
2067
+ };
2068
+ const artifact = buildCompareReportArtifact({
2069
+ benchmarkId: runOutput.benchmarkId,
2070
+ methods: runOutput.methods,
2071
+ reportPath: loaded.configFilePath
2072
+ });
2073
+ if (parsed.output != null) await writeCompareReportArtifact({
2074
+ artifact,
2075
+ outputPath: parsed.output
2076
+ });
2077
+ if (parsed.format === "json") process.stdout.write(`${JSON.stringify(artifact, null, 2)}\n`);
2078
+ else process.stdout.write([
2079
+ "COMPARE vieval",
2080
+ `Benchmark ${artifact.benchmarkId}`,
2081
+ ...artifact.methods.map((method, index) => {
2082
+ const hybrid = method.hybridAverage == null ? "n/a" : method.hybridAverage.toFixed(3);
2083
+ const exact = method.exactAverage == null ? "n/a" : method.exactAverage.toFixed(3);
2084
+ return `${index + 1}. ${method.methodId} hybrid=${hybrid} exact=${exact} runs=${method.runCount}`;
2085
+ })
2086
+ ].join("\n").concat("\n"));
2087
+ return runOutput;
2088
+ }
2089
+ async function runCompareCliOrExit(argv) {
2090
+ try {
2091
+ await runCompareCli(argv);
2092
+ } catch (error) {
2093
+ const errorMessage = errorMessageFrom(error) ?? "Unknown compare command failure.";
2094
+ process.stderr.write(`[vieval compare] ${errorMessage}\n`);
2095
+ process.exitCode = 1;
2096
+ }
2097
+ }
2098
+ //#endregion
2099
+ //#region package.json
2100
+ var name = "vieval";
2101
+ //#endregion
2102
+ //#region src/cli/eval-run.ts
2103
+ const evalRunHelpText = `
2104
+ Execute vieval projects from discovered or explicit config.
2105
+
2106
+ Usage
2107
+ $ vieval run [--config <path>] [--project <name>] [--json] [--report-out <path>]
2108
+
2109
+ Options
2110
+ --config Config file path
2111
+ --project Project name to execute; may be repeated
2112
+ --workspace Workspace id used in report artifacts
2113
+ --experiment Experiment id used in report artifacts
2114
+ --attempt Attempt id used in report artifacts
2115
+ --workspace-concurrency Workspace scheduling cap
2116
+ --project-concurrency Project scheduling cap
2117
+ --task-concurrency Task scheduling cap
2118
+ --attempt-concurrency Attempt scheduling cap
2119
+ --case-concurrency Case scheduling cap
2120
+ --report-out Report output root directory
2121
+ --json Print machine-readable JSON output
2122
+ `;
2123
+ function normalizeCliArgv$3(argv) {
2124
+ const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
2125
+ return normalizedArgv[0] === "run" ? normalizedArgv.slice(1) : normalizedArgv;
2126
+ }
2127
+ function normalizeProjectNames(projectNames) {
2128
+ if (typeof projectNames === "string") return [projectNames];
2129
+ return projectNames ?? [];
2130
+ }
2131
+ /**
2132
+ * Parses `vieval run` CLI arguments into one normalized execution payload.
2133
+ *
2134
+ * Use when:
2135
+ * - the top-level CLI forwards `run` subcommand arguments
2136
+ * - tests need stable flag normalization without executing the runner
2137
+ *
2138
+ * Expects:
2139
+ * - argv in either direct `run` form or forwarded `-- ...` form
2140
+ *
2141
+ * Returns:
2142
+ * - normalized run options ready for {@link runVievalCli}
2143
+ */
2144
+ function parseCliArguments(argv) {
2145
+ const cli = meow(evalRunHelpText, {
2146
+ argv: normalizeCliArgv$3(argv),
2147
+ importMeta: import.meta,
2148
+ flags: {
2149
+ config: { type: "string" },
2150
+ json: {
2151
+ default: false,
2152
+ type: "boolean"
2153
+ },
2154
+ project: {
2155
+ isMultiple: true,
2156
+ type: "string"
2157
+ },
2158
+ workspace: { type: "string" },
2159
+ experiment: { type: "string" },
2160
+ attempt: { type: "string" },
2161
+ workspaceConcurrency: { type: "number" },
2162
+ projectConcurrency: { type: "number" },
2163
+ taskConcurrency: { type: "number" },
2164
+ attemptConcurrency: { type: "number" },
2165
+ caseConcurrency: { type: "number" },
2166
+ reportOut: { type: "string" }
2167
+ }
2168
+ });
2169
+ return {
2170
+ attempt: cli.flags.attempt,
2171
+ attemptConcurrency: cli.flags.attemptConcurrency,
2172
+ caseConcurrency: cli.flags.caseConcurrency,
2173
+ configFilePath: cli.flags.config,
2174
+ experiment: cli.flags.experiment,
2175
+ json: cli.flags.json === true,
2176
+ project: normalizeProjectNames(cli.flags.project),
2177
+ projectConcurrency: cli.flags.projectConcurrency,
2178
+ reportOut: cli.flags.reportOut,
2179
+ taskConcurrency: cli.flags.taskConcurrency,
2180
+ workspace: cli.flags.workspace,
2181
+ workspaceConcurrency: cli.flags.workspaceConcurrency
2182
+ };
2183
+ }
2184
+ /**
2185
+ * Executes the `vieval run` subcommand.
2186
+ *
2187
+ * Call stack:
2188
+ *
2189
+ * top-level `vieval` CLI
2190
+ * -> {@link runTopLevelCli} (`./index`)
2191
+ * -> {@link runEvalRunCli}
2192
+ * -> {@link parseCliArguments}
2193
+ * -> {@link runVievalCli}
2194
+ * -> `process.stdout.write(...)` / `process.stderr.write(...)`
2195
+ * -> `process.exitCode`
2196
+ *
2197
+ * Use when:
2198
+ * - the published `vieval` binary needs to execute the `run` subcommand
2199
+ * - callers want one reusable implementation without a second bundled entrypoint
2200
+ *
2201
+ * Expects:
2202
+ * - argv that belongs to the `run` subcommand only
2203
+ *
2204
+ * Returns:
2205
+ * - resolves after writing CLI output and updating `process.exitCode`
2206
+ *
2207
+ * NOTICE:
2208
+ * - `src/cli/index.ts` is the only direct-execution entrypoint for the bundled
2209
+ * CLI artifact. Keeping `eval-run.ts` reusable avoids duplicate top-level
2210
+ * await guards once tsdown inlines both modules into `dist/cli/index.mjs`.
2211
+ */
2212
+ async function runEvalRunCli(argv) {
2213
+ const parsed = parseCliArguments(argv);
2214
+ try {
2215
+ const output = await runVievalCli({
2216
+ attempt: parsed.attempt,
2217
+ attemptConcurrency: parsed.attemptConcurrency,
2218
+ caseConcurrency: parsed.caseConcurrency,
2219
+ configFilePath: parsed.configFilePath,
2220
+ experiment: parsed.experiment,
2221
+ project: parsed.project,
2222
+ projectConcurrency: parsed.projectConcurrency,
2223
+ reportOut: parsed.reportOut,
2224
+ taskConcurrency: parsed.taskConcurrency,
2225
+ workspace: parsed.workspace,
2226
+ workspaceConcurrency: parsed.workspaceConcurrency
2227
+ });
2228
+ if (parsed.json) {
2229
+ process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
2230
+ if (hasRunFailures(output)) process.exitCode = 1;
2231
+ return;
2232
+ }
2233
+ process.stdout.write(`${formatVievalCliRunOutput(output)}\n`);
2234
+ if (hasRunFailures(output)) process.exitCode = 1;
2235
+ } catch (error) {
2236
+ const errorMessage = errorMessageFrom(error) ?? "Unknown CLI failure.";
2237
+ process.stderr.write(`[${name}] ${errorMessage}\n`);
2238
+ process.exitCode = 1;
2239
+ }
2240
+ }
2241
+ //#endregion
2242
+ //#region src/cli/report-artifacts.ts
2243
+ /**
2244
+ * Resolves one or more `run-summary.json` paths from a report location.
2245
+ *
2246
+ * Use when:
2247
+ * - callers may pass a run directory, summary file path, or a report root
2248
+ *
2249
+ * Returns:
2250
+ * - sorted absolute summary file paths
2251
+ */
2252
+ async function resolveRunSummaryPaths(reportPath) {
2253
+ const absoluteReportPath = resolve(reportPath);
2254
+ const directSummaryPath = resolve(absoluteReportPath, "run-summary.json");
2255
+ if (existsSync(absoluteReportPath) && absoluteReportPath.endsWith(".json")) return [absoluteReportPath];
2256
+ if (existsSync(directSummaryPath)) return [directSummaryPath];
2257
+ return (await glob("**/run-summary.json", {
2258
+ absolute: true,
2259
+ cwd: absoluteReportPath
2260
+ })).sort((left, right) => left.localeCompare(right));
2261
+ }
2262
+ /**
2263
+ * Reads one run report artifact set from `run-summary.json` and sibling `events.jsonl`.
2264
+ *
2265
+ * Use when:
2266
+ * - report analysis needs both run aggregate output and event count metadata
2267
+ */
2268
+ function readReportRunArtifact(summaryFilePath) {
2269
+ const reportDirectory = resolve(summaryFilePath, "..");
2270
+ const summary = JSON.parse(readFileSync(summaryFilePath, "utf-8"));
2271
+ const eventsFilePath = resolve(reportDirectory, "events.jsonl");
2272
+ const events = existsSync(eventsFilePath) ? readFileSync(eventsFilePath, "utf-8").split("\n").filter((line) => line.trim().length > 0).map((line) => {
2273
+ const event = JSON.parse(line);
2274
+ return {
2275
+ caseId: event.caseId,
2276
+ data: event.data,
2277
+ event: event.event,
2278
+ taskId: event.taskId
2279
+ };
2280
+ }) : [];
2281
+ return {
2282
+ events,
2283
+ eventsCount: events.length,
2284
+ reportDirectory,
2285
+ summary,
2286
+ summaryFilePath
2287
+ };
2288
+ }
2289
+ /**
2290
+ * Reads all run artifacts found under `reportPath`.
2291
+ *
2292
+ * Use when:
2293
+ * - callers need multi-run analysis from a directory root
2294
+ */
2295
+ async function readReportArtifacts(reportPath) {
2296
+ return (await resolveRunSummaryPaths(reportPath)).map((summaryFilePath) => readReportRunArtifact(summaryFilePath));
2297
+ }
2298
+ /**
2299
+ * Creates a compact summary row for one run artifact.
2300
+ *
2301
+ * Use when:
2302
+ * - table/csv/jsonl exports should stay stable and cheap to parse
2303
+ */
2304
+ function summarizeReportRunArtifact(artifact) {
2305
+ const totalProjects = artifact.summary.projects.length;
2306
+ const failedProjects = artifact.summary.projects.filter((project) => project.errorMessage != null).length;
2307
+ const executedProjects = artifact.summary.projects.filter((project) => project.executed).length;
2308
+ const totalTasks = artifact.summary.projects.reduce((sum, project) => sum + project.taskCount, 0);
2309
+ const projectNames = artifact.summary.projects.map((project) => project.name);
2310
+ return {
2311
+ attemptId: artifact.summary.attemptId ?? null,
2312
+ eventsCount: artifact.eventsCount,
2313
+ executedProjects,
2314
+ experimentId: artifact.summary.experimentId ?? null,
2315
+ failedProjects,
2316
+ projectNames,
2317
+ reportDirectory: artifact.reportDirectory,
2318
+ runId: artifact.summary.runId ?? null,
2319
+ totalProjects,
2320
+ totalTasks,
2321
+ workspaceId: artifact.summary.workspaceId ?? null
2322
+ };
2323
+ }
2324
+ //#endregion
2325
+ //#region src/cli/report-analyze.ts
2326
+ const reportAnalyzeHelpText = `
2327
+ Analyze generated vieval report artifacts.
2328
+
2329
+ Usage
2330
+ $ vieval report analyze <reportPath> [options]
2331
+
2332
+ Options
2333
+ --format Output format: table | json | jsonl | csv (default: table)
2334
+ --workspace Workspace id filter
2335
+ --project Project name filter (exact)
2336
+ --experiment Experiment id filter
2337
+ --attempt Attempt id filter
2338
+ --run Run id filter
2339
+ --task-state Keep runs containing at least one task in this state
2340
+ --case-state Keep runs containing at least one case in this state
2341
+ --contains Keep runs containing this text in event name or payload
2342
+ --error-contains Keep runs containing this text in project errors or event payload
2343
+ --run-matrix Keep runs matching run-matrix selector "key=value[,key=value]"
2344
+ --eval-matrix Keep runs matching eval-matrix selector "key=value[,key=value]"
2345
+ `;
2346
+ function normalizeCliArgv$2(argv) {
2347
+ const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
2348
+ if (normalizedArgv[0] === "report" && normalizedArgv[1] === "analyze") return normalizedArgv.slice(2);
2349
+ if (normalizedArgv[0] === "analyze") return normalizedArgv.slice(1);
2350
+ return normalizedArgv;
2351
+ }
2352
+ function parseReportAnalyzeCliArguments(argv) {
2353
+ const cli = meow(reportAnalyzeHelpText, {
2354
+ argv: normalizeCliArgv$2(argv),
2355
+ flags: {
2356
+ attempt: { type: "string" },
2357
+ caseState: { type: "string" },
2358
+ contains: { type: "string" },
2359
+ evalMatrix: { type: "string" },
2360
+ errorContains: { type: "string" },
2361
+ experiment: { type: "string" },
2362
+ format: {
2363
+ default: "table",
2364
+ type: "string"
2365
+ },
2366
+ project: { type: "string" },
2367
+ runMatrix: { type: "string" },
2368
+ run: { type: "string" },
2369
+ taskState: { type: "string" },
2370
+ workspace: { type: "string" }
2371
+ },
2372
+ importMeta: import.meta
2373
+ });
2374
+ const reportPath = cli.input[0];
2375
+ if (reportPath == null || reportPath.length === 0) throw new Error("Missing required <reportPath> argument.");
2376
+ const normalizedFormat = cli.flags.format.toLowerCase();
2377
+ const format = normalizedFormat === "json" ? "json" : normalizedFormat === "jsonl" ? "jsonl" : normalizedFormat === "csv" ? "csv" : "table";
2378
+ return {
2379
+ attempt: cli.flags.attempt,
2380
+ caseState: normalizeStateFilter(cli.flags.caseState),
2381
+ contains: cli.flags.contains,
2382
+ evalMatrix: parseMatrixSelector(cli.flags.evalMatrix),
2383
+ errorContains: cli.flags.errorContains,
2384
+ experiment: cli.flags.experiment,
2385
+ format,
2386
+ project: cli.flags.project,
2387
+ reportPath,
2388
+ runMatrix: parseMatrixSelector(cli.flags.runMatrix),
2389
+ run: cli.flags.run,
2390
+ taskState: normalizeStateFilter(cli.flags.taskState),
2391
+ workspace: cli.flags.workspace
2392
+ };
2393
+ }
2394
+ function normalizeStateFilter(value) {
2395
+ if (value == null) return;
2396
+ const normalized = value.trim().toLowerCase();
2397
+ if (normalized === "passed" || normalized === "failed" || normalized === "skipped") return normalized;
2398
+ throw new Error(`Unsupported state filter "${value}". Expected "passed", "failed", or "skipped".`);
2399
+ }
2400
+ function parseMatrixSelector(value) {
2401
+ if (value == null) return;
2402
+ const selector = {};
2403
+ const segments = value.split(",").map((segment) => segment.trim()).filter((segment) => segment.length > 0);
2404
+ for (const segment of segments) {
2405
+ const separatorIndex = segment.indexOf("=");
2406
+ if (separatorIndex <= 0 || separatorIndex === segment.length - 1) throw new Error(`Invalid matrix selector segment "${segment}". Expected "key=value".`);
2407
+ const key = segment.slice(0, separatorIndex).trim();
2408
+ const parsedValue = segment.slice(separatorIndex + 1).trim();
2409
+ if (key.length === 0 || parsedValue.length === 0) throw new Error(`Invalid matrix selector segment "${segment}". Expected "key=value".`);
2410
+ selector[key] = parsedValue;
2411
+ }
2412
+ return selector;
2413
+ }
2414
+ function filterAnalyzeRows(rows, parsed) {
2415
+ return rows.filter((row) => {
2416
+ if (parsed.workspace != null && row.workspaceId !== parsed.workspace) return false;
2417
+ if (parsed.experiment != null && row.experimentId !== parsed.experiment) return false;
2418
+ if (parsed.attempt != null && row.attemptId !== parsed.attempt) return false;
2419
+ if (parsed.run != null && row.runId !== parsed.run) return false;
2420
+ if (parsed.project != null && !row.projectNames.includes(parsed.project)) return false;
2421
+ return true;
2422
+ });
2423
+ }
2424
+ function includesNeedle(value, needle) {
2425
+ const normalizedNeedle = needle.trim().toLowerCase();
2426
+ if (normalizedNeedle.length === 0) return true;
2427
+ return JSON.stringify(value).toLowerCase().includes(normalizedNeedle);
2428
+ }
2429
+ function hasTaskState(artifact, targetState) {
2430
+ return artifact.events.some((event) => {
2431
+ if (event.event !== "TaskEnded") return false;
2432
+ return event.data?.state === targetState;
2433
+ });
2434
+ }
2435
+ function hasCaseState(artifact, targetState) {
2436
+ return artifact.events.some((event) => {
2437
+ if (event.event !== "CaseEnded") return false;
2438
+ return event.data?.state === targetState;
2439
+ });
2440
+ }
2441
+ function matchesMatrixSelector(matrix, selector) {
2442
+ return Object.entries(selector).every(([key, expectedValue]) => String(matrix[key]) === expectedValue);
2443
+ }
2444
+ function hasRunMatrixMatch(artifact, selector) {
2445
+ return artifact.summary.projects.some((project) => project.result?.runs.some((run) => matchesMatrixSelector(run.matrix.run, selector)) === true);
2446
+ }
2447
+ function hasEvalMatrixMatch(artifact, selector) {
2448
+ return artifact.summary.projects.some((project) => project.result?.runs.some((run) => matchesMatrixSelector(run.matrix.eval, selector)) === true);
2449
+ }
2450
+ function matchesOutcomeFilters(artifact, parsed) {
2451
+ if (parsed.runMatrix != null && !hasRunMatrixMatch(artifact, parsed.runMatrix)) return false;
2452
+ if (parsed.evalMatrix != null && !hasEvalMatrixMatch(artifact, parsed.evalMatrix)) return false;
2453
+ if (parsed.taskState != null && !hasTaskState(artifact, parsed.taskState)) return false;
2454
+ if (parsed.caseState != null && !hasCaseState(artifact, parsed.caseState)) return false;
2455
+ if (parsed.contains != null) {
2456
+ if (!artifact.events.some((event) => includesNeedle({
2457
+ data: event.data,
2458
+ event: event.event
2459
+ }, parsed.contains))) return false;
2460
+ }
2461
+ if (parsed.errorContains != null) {
2462
+ if (!(artifact.summary.projects.map((project) => project.errorMessage).filter((errorMessage) => errorMessage != null).some((errorMessage) => includesNeedle(errorMessage, parsed.errorContains)) || artifact.events.some((event) => includesNeedle(event.data, parsed.errorContains)))) return false;
2463
+ }
2464
+ return true;
2465
+ }
2466
+ async function readReportAnalyzeOutput(parsed) {
2467
+ const artifacts = await readReportArtifacts(parsed.reportPath);
2468
+ const rows = artifacts.map((artifact) => summarizeReportRunArtifact(artifact));
2469
+ const identityFilteredRows = filterAnalyzeRows(rows, parsed);
2470
+ const rowByDirectory = new Map(identityFilteredRows.map((row) => [row.reportDirectory, row]));
2471
+ const filteredRows = artifacts.filter((artifact) => rowByDirectory.has(artifact.reportDirectory)).filter((artifact) => matchesOutcomeFilters(artifact, parsed)).map((artifact) => rowByDirectory.get(artifact.reportDirectory)).filter((row) => row != null);
2472
+ return {
2473
+ experimentSummaries: buildExperimentSummaries(filteredRows),
2474
+ filteredRunCount: filteredRows.length,
2475
+ runs: filteredRows,
2476
+ totalRunCount: rows.length
2477
+ };
2478
+ }
2479
+ function roundMetric(value) {
2480
+ return Number(value.toFixed(6));
2481
+ }
2482
+ function computeAverage(values) {
2483
+ if (values.length === 0) return 0;
2484
+ return values.reduce((sum, value) => sum + value, 0) / values.length;
2485
+ }
2486
+ function computeStandardDeviation(values) {
2487
+ if (values.length === 0) return 0;
2488
+ const average = computeAverage(values);
2489
+ const variance = computeAverage(values.map((value) => (value - average) ** 2));
2490
+ return Math.sqrt(variance);
2491
+ }
2492
+ function createExperimentGroupKey(row) {
2493
+ return `${row.workspaceId ?? "unknown-workspace"}::${row.experimentId ?? "unknown-experiment"}`;
2494
+ }
2495
+ /**
2496
+ * Builds experiment-level rollups from filtered run rows.
2497
+ *
2498
+ * Use when:
2499
+ * - CLI consumers need stability and reliability summaries above per-run data
2500
+ *
2501
+ * Returns:
2502
+ * - one summary row per `workspaceId + experimentId` group
2503
+ */
2504
+ function buildExperimentSummaries(rows) {
2505
+ const grouped = /* @__PURE__ */ new Map();
2506
+ for (const row of rows) {
2507
+ const groupKey = createExperimentGroupKey(row);
2508
+ const existing = grouped.get(groupKey);
2509
+ if (existing == null) {
2510
+ grouped.set(groupKey, [row]);
2511
+ continue;
2512
+ }
2513
+ existing.push(row);
2514
+ }
2515
+ return [...grouped.entries()].map(([groupKey, groupRows]) => {
2516
+ const [workspaceId, experimentId] = groupKey.split("::");
2517
+ const failedProjects = groupRows.reduce((sum, row) => sum + row.failedProjects, 0);
2518
+ const totalTasks = groupRows.reduce((sum, row) => sum + row.totalTasks, 0);
2519
+ const totalEvents = groupRows.reduce((sum, row) => sum + row.eventsCount, 0);
2520
+ const successfulRunCount = groupRows.filter((row) => row.failedProjects === 0).length;
2521
+ const successRate = groupRows.length === 0 ? 0 : successfulRunCount / groupRows.length;
2522
+ const attemptToRuns = /* @__PURE__ */ new Map();
2523
+ for (const row of groupRows) {
2524
+ const attemptId = row.attemptId ?? "unknown-attempt";
2525
+ const attemptRows = attemptToRuns.get(attemptId);
2526
+ if (attemptRows == null) {
2527
+ attemptToRuns.set(attemptId, [row]);
2528
+ continue;
2529
+ }
2530
+ attemptRows.push(row);
2531
+ }
2532
+ const attemptSummaries = [...attemptToRuns.entries()].map(([attemptId, attemptRows]) => {
2533
+ const successCount = attemptRows.filter((row) => row.failedProjects === 0).length;
2534
+ const runCount = attemptRows.length;
2535
+ const failedProjectCount = attemptRows.reduce((sum, row) => sum + row.failedProjects, 0);
2536
+ const totalTaskCount = attemptRows.reduce((sum, row) => sum + row.totalTasks, 0);
2537
+ const totalEventCount = attemptRows.reduce((sum, row) => sum + row.eventsCount, 0);
2538
+ return {
2539
+ attemptId,
2540
+ failedProjects: failedProjectCount,
2541
+ runCount,
2542
+ runIds: attemptRows.map((row) => row.runId).filter((runId) => runId != null).sort((left, right) => left.localeCompare(right)),
2543
+ successRate: roundMetric(runCount === 0 ? 0 : successCount / runCount),
2544
+ totalEvents: totalEventCount,
2545
+ totalTasks: totalTaskCount
2546
+ };
2547
+ }).sort((left, right) => left.attemptId.localeCompare(right.attemptId));
2548
+ const attemptSuccessRates = attemptSummaries.map((summary) => summary.successRate);
2549
+ const minAttemptSuccessRate = attemptSuccessRates.length === 0 ? 0 : Math.min(...attemptSuccessRates);
2550
+ const maxAttemptSuccessRate = attemptSuccessRates.length === 0 ? 0 : Math.max(...attemptSuccessRates);
2551
+ const avgAttemptSuccessRate = computeAverage(attemptSuccessRates);
2552
+ const stdevAttemptSuccessRate = computeStandardDeviation(attemptSuccessRates);
2553
+ return {
2554
+ attemptCount: attemptToRuns.size,
2555
+ attemptSummaries,
2556
+ attemptSuccessRateStats: {
2557
+ avg: roundMetric(avgAttemptSuccessRate),
2558
+ max: roundMetric(maxAttemptSuccessRate),
2559
+ min: roundMetric(minAttemptSuccessRate),
2560
+ stdev: roundMetric(stdevAttemptSuccessRate)
2561
+ },
2562
+ experimentId,
2563
+ failedProjects,
2564
+ runCount: groupRows.length,
2565
+ successRate: roundMetric(successRate),
2566
+ totalEvents,
2567
+ totalTasks,
2568
+ workspaceId
2569
+ };
2570
+ }).sort((left, right) => {
2571
+ const workspaceCompare = left.workspaceId.localeCompare(right.workspaceId);
2572
+ if (workspaceCompare !== 0) return workspaceCompare;
2573
+ return left.experimentId.localeCompare(right.experimentId);
2574
+ });
2575
+ }
2576
+ function formatTableOutput$1(output) {
2577
+ const header = "Run ID | Workspace | Experiment | Attempt | Projects(executed/total) | FailedProjects | Tasks | Events";
2578
+ const lines = output.runs.map((row) => {
2579
+ return `${row.runId ?? "n/a"} | ${row.workspaceId ?? "n/a"} | ${row.experimentId ?? "n/a"} | ${row.attemptId ?? "n/a"} | ${`${row.executedProjects}/${row.totalProjects}`} | ${row.failedProjects} | ${row.totalTasks} | ${row.eventsCount}`;
2580
+ });
2581
+ return [
2582
+ `ANALYZE vieval report: ${output.filteredRunCount}/${output.totalRunCount} runs (${output.experimentSummaries.length} experiment groups)`,
2583
+ header,
2584
+ ...lines
2585
+ ].join("\n");
2586
+ }
2587
+ function formatCsvOutput(output) {
2588
+ return [[
2589
+ "runId",
2590
+ "workspaceId",
2591
+ "experimentId",
2592
+ "attemptId",
2593
+ "totalProjects",
2594
+ "executedProjects",
2595
+ "failedProjects",
2596
+ "totalTasks",
2597
+ "eventsCount",
2598
+ "reportDirectory",
2599
+ "projectNames"
2600
+ ].join(","), ...output.runs.map((row) => {
2601
+ const escapedProjectNames = `"${row.projectNames.join("|").replaceAll("\"", "\"\"")}"`;
2602
+ const escapedDirectory = `"${row.reportDirectory.replaceAll("\"", "\"\"")}"`;
2603
+ return [
2604
+ row.runId ?? "",
2605
+ row.workspaceId ?? "",
2606
+ row.experimentId ?? "",
2607
+ row.attemptId ?? "",
2608
+ row.totalProjects.toString(),
2609
+ row.executedProjects.toString(),
2610
+ row.failedProjects.toString(),
2611
+ row.totalTasks.toString(),
2612
+ row.eventsCount.toString(),
2613
+ escapedDirectory,
2614
+ escapedProjectNames
2615
+ ].join(",");
2616
+ })].join("\n");
2617
+ }
2618
+ async function runReportAnalyzeCli(argv) {
2619
+ try {
2620
+ const parsed = parseReportAnalyzeCliArguments(argv);
2621
+ const output = await readReportAnalyzeOutput(parsed);
2622
+ if (parsed.format === "json") {
2623
+ process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
2624
+ return;
2625
+ }
2626
+ if (parsed.format === "jsonl") {
2627
+ const jsonl = output.runs.map((run) => JSON.stringify(run)).join("\n");
2628
+ process.stdout.write(`${jsonl}${jsonl.length > 0 ? "\n" : ""}`);
2629
+ return;
2630
+ }
2631
+ if (parsed.format === "csv") {
2632
+ process.stdout.write(`${formatCsvOutput(output)}\n`);
2633
+ return;
2634
+ }
2635
+ process.stdout.write(`${formatTableOutput$1(output)}\n`);
2636
+ } catch (error) {
2637
+ const errorMessage = errorMessageFrom(error) ?? "Unknown report analyze failure.";
2638
+ process.stderr.write(`[vieval report analyze] ${errorMessage}\n`);
2639
+ process.exitCode = 1;
2640
+ }
2641
+ }
2642
+ //#endregion
2643
+ //#region src/cli/report-index.ts
2644
+ const reportIndexHelpText = `
2645
+ Build report indexes from generated vieval artifacts.
2646
+
2647
+ Usage
2648
+ $ vieval report index <reportPath> [--output <path>] [--format <format>]
2649
+
2650
+ Options
2651
+ --output Output file path (default: <reportPath>/index/runs.jsonl)
2652
+ --format Console output format: table | json | jsonl (default: table)
2653
+ `;
2654
+ function normalizeCliArgv$1(argv) {
2655
+ const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
2656
+ if (normalizedArgv[0] === "report" && normalizedArgv[1] === "index") return normalizedArgv.slice(2);
2657
+ if (normalizedArgv[0] === "index") return normalizedArgv.slice(1);
2658
+ return normalizedArgv;
2659
+ }
2660
+ function parseReportIndexCliArguments(argv) {
2661
+ const cli = meow(reportIndexHelpText, {
2662
+ argv: normalizeCliArgv$1(argv),
2663
+ flags: {
2664
+ format: {
2665
+ default: "table",
2666
+ type: "string"
2667
+ },
2668
+ output: { type: "string" }
2669
+ },
2670
+ importMeta: import.meta
2671
+ });
2672
+ const reportPath = cli.input[0];
2673
+ if (reportPath == null || reportPath.length === 0) throw new Error("Missing required <reportPath> argument.");
2674
+ const normalizedFormat = cli.flags.format.toLowerCase();
2675
+ return {
2676
+ format: normalizedFormat === "json" ? "json" : normalizedFormat === "jsonl" ? "jsonl" : "table",
2677
+ output: cli.flags.output,
2678
+ reportPath
2679
+ };
2680
+ }
2681
+ async function writeIndexFile(parsed) {
2682
+ const rows = (await readReportArtifacts(parsed.reportPath)).map((artifact) => summarizeReportRunArtifact(artifact));
2683
+ const indexFilePath = resolve(parsed.output ?? resolve(parsed.reportPath, "index", "runs.jsonl"));
2684
+ await mkdir(dirname(indexFilePath), { recursive: true });
2685
+ const indexContents = rows.map((row) => JSON.stringify(row)).join("\n");
2686
+ await writeFile(indexFilePath, `${indexContents}${indexContents.length > 0 ? "\n" : ""}`, "utf-8");
2687
+ return {
2688
+ indexFilePath,
2689
+ indexedRunCount: rows.length,
2690
+ rows
2691
+ };
2692
+ }
2693
+ function formatTableOutput(output) {
2694
+ return [
2695
+ "INDEX vieval report",
2696
+ `Path ${output.indexFilePath}`,
2697
+ `Run count ${output.indexedRunCount}`
2698
+ ].join("\n");
2699
+ }
2700
+ async function runReportIndexCli(argv) {
2701
+ try {
2702
+ const parsed = parseReportIndexCliArguments(argv);
2703
+ const output = await writeIndexFile(parsed);
2704
+ if (parsed.format === "json") {
2705
+ process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
2706
+ return;
2707
+ }
2708
+ if (parsed.format === "jsonl") {
2709
+ const jsonl = output.rows.map((row) => JSON.stringify(row)).join("\n");
2710
+ process.stdout.write(`${jsonl}${jsonl.length > 0 ? "\n" : ""}`);
2711
+ return;
2712
+ }
2713
+ process.stdout.write(`${formatTableOutput(output)}\n`);
2714
+ } catch (error) {
2715
+ const errorMessage = errorMessageFrom(error) ?? "Unknown report index failure.";
2716
+ process.stderr.write(`[vieval report index] ${errorMessage}\n`);
2717
+ process.exitCode = 1;
2718
+ }
2719
+ }
2720
+ //#endregion
2721
+ //#region src/cli/index.ts
2722
+ const topLevelHelpText = `
2723
+ Execute and report evaluation projects.
2724
+
2725
+ Usage
2726
+ $ vieval <command> [options]
2727
+
2728
+ Commands
2729
+ run Discover and execute eval projects
2730
+ compare Compare multiple workspaces/methods on one benchmark
2731
+ report Analyze and index generated report artifacts
2732
+
2733
+ Examples
2734
+ $ vieval run
2735
+ $ vieval run --config vieval.config.ts --project chess --json --report-out .vieval/reports
2736
+ $ vieval compare --config vieval.config.ts --comparison agent-memory
2737
+ $ vieval report analyze .vieval/reports/my-run
2738
+ $ vieval report index .vieval/reports --output .vieval/reports/index/runs.jsonl
2739
+ `;
2740
+ function normalizeCliArgv(argv) {
2741
+ return argv[0] === "--" ? argv.slice(1) : [...argv];
2742
+ }
2743
+ /**
2744
+ * Parses top-level `vieval` CLI arguments into one command dispatch payload.
2745
+ *
2746
+ * Use when:
2747
+ * - the executable needs to resolve which subcommand should run
2748
+ * - tests need stable top-level argv normalization without invoking subcommands
2749
+ *
2750
+ * Expects:
2751
+ * - argv excludes the node executable and script path
2752
+ *
2753
+ * Returns:
2754
+ * - the normalized top-level command plus subcommand argv
2755
+ */
2756
+ function parseTopLevelCliArguments(argv) {
2757
+ const normalizedArgv = normalizeCliArgv(argv);
2758
+ const command = normalizedArgv[0];
2759
+ meow(topLevelHelpText, {
2760
+ autoHelp: false,
2761
+ autoVersion: false,
2762
+ argv: normalizedArgv,
2763
+ importMeta: import.meta
2764
+ });
2765
+ if (command == null || command === "help" || command === "--help" || command === "-h") return {
2766
+ command: "help",
2767
+ commandArgv: []
2768
+ };
2769
+ if (command !== "run" && command !== "report" && command !== "compare") throw new Error(`Unsupported vieval command "${command ?? "(none)"}". Expected "run", "compare", or "report".`);
2770
+ return {
2771
+ command,
2772
+ commandArgv: normalizedArgv.slice(1)
2773
+ };
2774
+ }
2775
+ /**
2776
+ * Dispatches the top-level `vieval` command to one concrete subcommand module.
2777
+ *
2778
+ * Call stack:
2779
+ *
2780
+ * published executable (`../bin/vieval`)
2781
+ * -> {@link runTopLevelCli}
2782
+ * -> {@link runEvalRunCli} / report CLI / compare CLI
2783
+ *
2784
+ * Use when:
2785
+ * - the executable or tests need import-safe CLI orchestration
2786
+ * - subcommands should remain reusable without process-bound startup code
2787
+ *
2788
+ * Expects:
2789
+ * - argv excludes the node executable and script path
2790
+ *
2791
+ * Returns:
2792
+ * - resolves after the selected subcommand completes
2793
+ */
2794
+ async function runTopLevelCli(argv) {
2795
+ const parsed = parseTopLevelCliArguments(argv);
2796
+ if (parsed.command === "help") {
2797
+ process.stdout.write(`${topLevelHelpText.trim()}\n`);
2798
+ return;
2799
+ }
2800
+ if (parsed.command === "report") {
2801
+ const reportSubcommand = parsed.commandArgv[0];
2802
+ if (reportSubcommand === "analyze") {
2803
+ await runReportAnalyzeCli(parsed.commandArgv);
2804
+ return;
2805
+ }
2806
+ if (reportSubcommand === "index") {
2807
+ await runReportIndexCli(parsed.commandArgv);
2808
+ return;
2809
+ }
2810
+ throw new Error(`Unsupported vieval report command "${reportSubcommand ?? "(none)"}". Expected "analyze" or "index".`);
2811
+ }
2812
+ if (parsed.command === "compare") {
2813
+ await runCompareCliOrExit(parsed.commandArgv);
2814
+ return;
2815
+ }
2816
+ await runEvalRunCli(parsed.commandArgv);
2817
+ }
2818
+ //#endregion
2819
+ export { runTopLevelCli as n, parseTopLevelCliArguments as t };
2820
+
2821
+ //# sourceMappingURL=cli-sanbKtQq.mjs.map