vieval 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +290 -0
  2. package/dist/assertions-DcAjfVDA.mjs +183 -0
  3. package/dist/assertions-DcAjfVDA.mjs.map +1 -0
  4. package/dist/cli/index.d.mts +11 -0
  5. package/dist/cli/index.mjs +1434 -0
  6. package/dist/cli/index.mjs.map +1 -0
  7. package/dist/config-D2fe1SnT.mjs +17 -0
  8. package/dist/config-D2fe1SnT.mjs.map +1 -0
  9. package/dist/config.d.mts +3 -0
  10. package/dist/config.mjs +3 -0
  11. package/dist/core/assertions/index.d.mts +2 -0
  12. package/dist/core/assertions/index.mjs +2 -0
  13. package/dist/core/inference-executors/index.d.mts +273 -0
  14. package/dist/core/inference-executors/index.mjs +225 -0
  15. package/dist/core/inference-executors/index.mjs.map +1 -0
  16. package/dist/core/processors/results/index.d.mts +96 -0
  17. package/dist/core/processors/results/index.mjs +64 -0
  18. package/dist/core/processors/results/index.mjs.map +1 -0
  19. package/dist/core/runner/index.d.mts +2 -0
  20. package/dist/core/runner/index.mjs +2 -0
  21. package/dist/expect-0jPJ7Zio.d.mts +2318 -0
  22. package/dist/expect-extensions-CwPtgTz8.mjs +13471 -0
  23. package/dist/expect-extensions-CwPtgTz8.mjs.map +1 -0
  24. package/dist/expect-i9WZWGrA.mjs +17 -0
  25. package/dist/expect-i9WZWGrA.mjs.map +1 -0
  26. package/dist/expect.d.mts +2 -0
  27. package/dist/expect.mjs +2 -0
  28. package/dist/index-DP7jsORl.d.mts +947 -0
  29. package/dist/index-oSXhM1zx.d.mts +314 -0
  30. package/dist/index.d.mts +92 -0
  31. package/dist/index.mjs +150 -0
  32. package/dist/index.mjs.map +1 -0
  33. package/dist/magic-string.es-CH1jwzMg.mjs +1013 -0
  34. package/dist/magic-string.es-CH1jwzMg.mjs.map +1 -0
  35. package/dist/models-D_MsBtYw.mjs +14 -0
  36. package/dist/models-D_MsBtYw.mjs.map +1 -0
  37. package/dist/plugin-DVaRZY2x.d.mts +84 -0
  38. package/dist/plugins/chat-models/index.d.mts +90 -0
  39. package/dist/plugins/chat-models/index.mjs +48 -0
  40. package/dist/plugins/chat-models/index.mjs.map +1 -0
  41. package/dist/registry-ChOjjdEC.mjs +245 -0
  42. package/dist/registry-ChOjjdEC.mjs.map +1 -0
  43. package/dist/runner-4ZsOveoY.mjs +480 -0
  44. package/dist/runner-4ZsOveoY.mjs.map +1 -0
  45. package/dist/testing/expect-extensions.d.mts +86 -0
  46. package/dist/testing/expect-extensions.mjs +2 -0
  47. package/package.json +88 -0
@@ -0,0 +1,1434 @@
1
+ #!/usr/bin/env node
2
+ import { n as consumeModuleRegistrations, r as endModuleRegistration, s as loadVievalCliConfig, t as beginModuleRegistration } from "../registry-ChOjjdEC.mjs";
3
+ import { a as runScheduledTasks, i as RunnerExecutionError, n as createRunnerSchedule, r as createRunnerRuntimeContext, s as collectEvalEntries, t as createTaskExecutionContext } from "../runner-4ZsOveoY.mjs";
4
+ import process from "node:process";
5
+ import path from "node:path";
6
+ import { fileURLToPath, pathToFileURL } from "node:url";
7
+ import { errorMessageFrom } from "@moeru/std";
8
+ import c from "tinyrainbow";
9
+ import meow from "meow";
10
+ import { uniq } from "es-toolkit";
11
+ import { glob } from "tinyglobby";
12
+ import { stripVTControlCharacters } from "node:util";
13
+ import stringWidth from "fast-string-width";
14
+ //#region package.json
15
+ var name = "vieval";
16
+ //#endregion
17
+ //#region src/cli/discovery.ts
18
+ /**
19
+ * Discovers eval files using include/exclude globs relative to project root.
20
+ *
21
+ * Before:
22
+ * - Absolute path file list from recursive filesystem walk
23
+ *
24
+ * After:
25
+ * - Filtered absolute path list matching include/exclude rules
26
+ */
27
+ async function discoverEvalFiles(options) {
28
+ return uniq(await glob([...options.include], {
29
+ absolute: true,
30
+ cwd: options.root,
31
+ ignore: [...options.exclude],
32
+ onlyFiles: true
33
+ })).sort((left, right) => left.localeCompare(right));
34
+ }
35
+ //#endregion
36
+ //#region src/cli/reporters/noop-reporter.ts
37
+ /**
38
+ * Creates a reporter that intentionally does nothing.
39
+ *
40
+ * Use when:
41
+ * - terminal output should stay silent
42
+ * - reporter wiring needs a safe default for tests or non-interactive runs
43
+ *
44
+ * Expects:
45
+ * - callers may invoke any lifecycle method in any order that matches the run
46
+ *
47
+ * Returns:
48
+ * - a stable reporter implementation with no observable side effects
49
+ */
50
+ function createNoopReporter() {
51
+ return {
52
+ onRunStart(_payload) {},
53
+ onTaskQueued(_payload) {},
54
+ onTaskStart(_payload) {},
55
+ onCaseStart(_payload) {},
56
+ onCaseEnd(_payload) {},
57
+ onTaskEnd(_payload) {},
58
+ onRunEnd(_payload) {},
59
+ dispose() {}
60
+ };
61
+ }
62
+ //#endregion
63
+ //#region src/cli/reporters/summary-reporter.ts
64
+ const POINTER = "❯";
65
+ const TREE_NODE_END = "└";
66
+ const TREE_NODE_MIDDLE = "├";
67
+ var SummaryReporterStateMachine = class {
68
+ options;
69
+ taskCounters = createCounterState();
70
+ caseCounters = createCounterState();
71
+ tasks = /* @__PURE__ */ new Map();
72
+ queueOrderCounter = 0;
73
+ startedAtMs = 0;
74
+ startTime = "";
75
+ constructor(options) {
76
+ this.options = options;
77
+ }
78
+ /**
79
+ * Handles run startup.
80
+ *
81
+ * Use when:
82
+ * - a new CLI run is starting and the summary state must reset
83
+ *
84
+ * Expects:
85
+ * - `totalTasks` matches the scheduled task count for the run
86
+ *
87
+ * Returns:
88
+ * - no direct value
89
+ */
90
+ onRunStart(payload) {
91
+ this.tasks.clear();
92
+ this.queueOrderCounter = 0;
93
+ resetCounterState(this.taskCounters, payload.totalTasks);
94
+ resetCounterState(this.caseCounters, 0);
95
+ this.startedAtMs = this.options.getNow();
96
+ this.startTime = formatTimeString(new Date(this.options.getWallClockNow()));
97
+ }
98
+ /**
99
+ * Handles task queue events.
100
+ *
101
+ * Use when:
102
+ * - a scheduled task becomes visible in the live summary before it starts
103
+ *
104
+ * Expects:
105
+ * - `taskId` is stable across later lifecycle events
106
+ *
107
+ * Returns:
108
+ * - no direct value
109
+ */
110
+ onTaskQueued(payload) {
111
+ const task = this.getOrCreateTaskState(payload.taskId);
112
+ if (task.state === "finished") return;
113
+ task.displayName = payload.displayName ?? task.displayName;
114
+ task.projectName = payload.projectName ?? task.projectName;
115
+ this.syncTaskTotalCases(task, payload.totalCases);
116
+ }
117
+ /**
118
+ * Handles task start events.
119
+ *
120
+ * Use when:
121
+ * - a queued task begins executing
122
+ *
123
+ * Expects:
124
+ * - the task was previously queued or can be synthesized from its identifier
125
+ *
126
+ * Returns:
127
+ * - no direct value
128
+ */
129
+ onTaskStart(payload) {
130
+ const task = this.getOrCreateTaskState(payload.taskId);
131
+ if (task.state === "finished") return;
132
+ task.state = "running";
133
+ task.startedAt ??= this.options.getNow();
134
+ }
135
+ /**
136
+ * Handles case start events.
137
+ *
138
+ * Use when:
139
+ * - a running task starts one case and slow-case tracking may begin
140
+ *
141
+ * Expects:
142
+ * - `caseId` is stable for the lifetime of the running case
143
+ *
144
+ * Returns:
145
+ * - no direct value
146
+ */
147
+ onCaseStart(payload) {
148
+ const task = this.getOrCreateTaskState(payload.taskId);
149
+ if (task.state === "finished") return;
150
+ task.state = "running";
151
+ task.startedAt ??= this.options.getNow();
152
+ if (task.settledCaseIds.has(payload.caseId) || task.runningCases.has(payload.caseId)) return;
153
+ task.caseOrderCounter += 1;
154
+ task.runningCases.set(payload.caseId, {
155
+ caseId: payload.caseId,
156
+ caseName: payload.caseName ?? payload.caseId,
157
+ order: task.caseOrderCounter,
158
+ startedAt: this.options.getNow()
159
+ });
160
+ this.syncTaskTotalCases(task);
161
+ }
162
+ /**
163
+ * Handles case completion.
164
+ *
165
+ * Use when:
166
+ * - a running case settles and counters must advance
167
+ *
168
+ * Expects:
169
+ * - duplicate completion for the same `caseId` is ignored
170
+ *
171
+ * Returns:
172
+ * - no direct value
173
+ */
174
+ onCaseEnd(payload) {
175
+ const task = this.getOrCreateTaskState(payload.taskId);
176
+ if (task.state === "finished") return;
177
+ if (task.settledCaseIds.has(payload.caseId)) {
178
+ task.runningCases.delete(payload.caseId);
179
+ return;
180
+ }
181
+ task.settledCaseIds.add(payload.caseId);
182
+ task.runningCases.delete(payload.caseId);
183
+ task.completedCases += 1;
184
+ this.syncTaskTotalCases(task);
185
+ this.caseCounters.completed += 1;
186
+ if (payload.state === "passed") {
187
+ this.caseCounters.passed += 1;
188
+ return;
189
+ }
190
+ if (payload.state === "failed") {
191
+ this.caseCounters.failed += 1;
192
+ return;
193
+ }
194
+ this.caseCounters.skipped += 1;
195
+ }
196
+ /**
197
+ * Handles task completion.
198
+ *
199
+ * Use when:
200
+ * - a task leaves the active window and contributes to terminal totals
201
+ *
202
+ * Expects:
203
+ * - duplicate task completion for the same task is ignored
204
+ *
205
+ * Returns:
206
+ * - no direct value
207
+ */
208
+ onTaskEnd(payload) {
209
+ const task = this.getOrCreateTaskState(payload.taskId);
210
+ if (task.state === "finished") return;
211
+ this.syncTaskTotalCases(task);
212
+ task.state = "finished";
213
+ task.taskResult = payload.state;
214
+ task.runningCases.clear();
215
+ this.taskCounters.completed += 1;
216
+ if (payload.state === "passed") {
217
+ this.taskCounters.passed += 1;
218
+ return;
219
+ }
220
+ if (payload.state === "failed") {
221
+ this.taskCounters.failed += 1;
222
+ return;
223
+ }
224
+ this.taskCounters.skipped += 1;
225
+ }
226
+ /**
227
+ * Handles run completion.
228
+ *
229
+ * Use when:
230
+ * - the caller has final task totals and wants the footer normalized
231
+ *
232
+ * Expects:
233
+ * - payload counters are final terminal task totals
234
+ *
235
+ * Returns:
236
+ * - no direct value
237
+ */
238
+ onRunEnd(payload) {
239
+ this.taskCounters.total = payload.totalTasks;
240
+ this.taskCounters.passed = payload.passedTasks;
241
+ this.taskCounters.failed = payload.failedTasks;
242
+ this.taskCounters.skipped = payload.skippedTasks;
243
+ this.taskCounters.completed = payload.passedTasks + payload.failedTasks + payload.skippedTasks;
244
+ }
245
+ /**
246
+ * Releases reporter resources.
247
+ *
248
+ * Use when:
249
+ * - CLI cleanup runs from a `finally` block
250
+ *
251
+ * Expects:
252
+ * - repeated calls are safe
253
+ *
254
+ * Returns:
255
+ * - no direct value
256
+ */
257
+ dispose() {}
258
+ /**
259
+ * Builds the current live summary window rows.
260
+ *
261
+ * Use when:
262
+ * - the live reporter or tests need a snapshot of the active window
263
+ *
264
+ * Expects:
265
+ * - `maxRows`, when present, keeps footer rows visible
266
+ *
267
+ * Returns:
268
+ * - terminal rows in display order
269
+ */
270
+ getWindowRows(options) {
271
+ const activeRows = this.createActiveRows();
272
+ const footerRows = this.createFooterRows();
273
+ const maxRows = options?.maxRows;
274
+ const activeBlock = [
275
+ "",
276
+ ...activeRows,
277
+ ...activeRows.length > 0 ? [""] : []
278
+ ];
279
+ const footerBlock = [...footerRows, ""];
280
+ if (maxRows == null || maxRows <= 0) return [...activeBlock, ...footerBlock];
281
+ if (maxRows <= footerBlock.length) return footerBlock.slice(-maxRows);
282
+ const availableActiveRows = Math.max(0, maxRows - footerBlock.length);
283
+ return [...activeBlock.slice(0, availableActiveRows), ...footerBlock];
284
+ }
285
+ createActiveRows() {
286
+ const activeTasks = Array.from(this.tasks.values()).filter((task) => task.state !== "finished").sort(compareActiveTasks);
287
+ const rows = [];
288
+ for (const task of activeTasks) {
289
+ const suffix = task.state === "queued" ? c.dim(" [queued]") : ` ${task.completedCases}/${task.totalCases}`;
290
+ const badge = formatProjectBadge(task.projectName, this.options.isTTY);
291
+ rows.push(c.bold(c.yellow(` ${POINTER} `)) + badge + task.displayName + c.dim(suffix));
292
+ const slowCases = Array.from(task.runningCases.values()).filter((activeCase) => this.options.getNow() - activeCase.startedAt >= this.options.slowThresholdMs).sort((left, right) => left.order - right.order);
293
+ for (const [index, activeCase] of slowCases.entries()) {
294
+ const icon = index === slowCases.length - 1 ? TREE_NODE_END : TREE_NODE_MIDDLE;
295
+ const elapsed = Math.max(0, this.options.getNow() - activeCase.startedAt);
296
+ rows.push(c.bold(c.yellow(` ${icon} `)) + activeCase.caseName + c.bold(c.yellow(` ${formatDuration$1(elapsed)}`)));
297
+ }
298
+ }
299
+ return rows;
300
+ }
301
+ createFooterRows() {
302
+ return [
303
+ padSummaryTitle("Tasks") + formatCounterState(this.taskCounters),
304
+ padSummaryTitle("Cases") + formatCounterState(this.caseCounters),
305
+ padSummaryTitle("Start at") + this.startTime,
306
+ padSummaryTitle("Duration") + formatDuration$1(Math.max(0, this.options.getNow() - this.startedAtMs))
307
+ ];
308
+ }
309
+ getOrCreateTaskState(taskId) {
310
+ const existing = this.tasks.get(taskId);
311
+ if (existing != null) return existing;
312
+ const created = {
313
+ caseOrderCounter: 0,
314
+ completedCases: 0,
315
+ displayName: taskId,
316
+ projectName: void 0,
317
+ queueOrder: this.queueOrderCounter,
318
+ runningCases: /* @__PURE__ */ new Map(),
319
+ settledCaseIds: /* @__PURE__ */ new Set(),
320
+ startedAt: void 0,
321
+ state: "queued",
322
+ taskId,
323
+ taskResult: void 0,
324
+ totalCases: 0
325
+ };
326
+ this.queueOrderCounter += 1;
327
+ this.tasks.set(taskId, created);
328
+ return created;
329
+ }
330
+ syncTaskTotalCases(task, reportedTotalCases) {
331
+ const observedTotalCases = task.completedCases + task.runningCases.size;
332
+ task.totalCases = Math.max(task.totalCases, reportedTotalCases ?? 0, observedTotalCases);
333
+ this.caseCounters.total = sumTaskCaseTotals(this.tasks.values());
334
+ }
335
+ };
336
+ /**
337
+ * Creates the live summary reporter state machine for `vieval` CLI runs.
338
+ *
339
+ * Use when:
340
+ * - the CLI wants Vitest-style active rows and live counters
341
+ * - tests need a deterministic reporter surface without touching the terminal
342
+ *
343
+ * Expects:
344
+ * - queue/start/end events describe task lifecycle in order
345
+ * - `getNow()` remains monotonic within one run
346
+ * - `getWallClockNow()` returns the wall-clock run start timestamp
347
+ *
348
+ * Returns:
349
+ * - a reporter compatible with the base CLI lifecycle plus `getWindowRows()`
350
+ *
351
+ * Call stack:
352
+ *
353
+ * {@link createSummaryReporter}
354
+ * -> {@link SummaryReporterStateMachine.onTaskQueued}
355
+ * -> {@link SummaryReporterStateMachine.onCaseStart}
356
+ * -> {@link SummaryReporterStateMachine.getWindowRows}
357
+ */
358
+ function createSummaryReporter(options) {
359
+ return new SummaryReporterStateMachine(options);
360
+ }
361
+ function createCounterState() {
362
+ return {
363
+ completed: 0,
364
+ failed: 0,
365
+ passed: 0,
366
+ skipped: 0,
367
+ total: 0
368
+ };
369
+ }
370
+ function resetCounterState(counter, total) {
371
+ counter.completed = 0;
372
+ counter.failed = 0;
373
+ counter.passed = 0;
374
+ counter.skipped = 0;
375
+ counter.total = total;
376
+ }
377
+ function sumTaskCaseTotals(tasks) {
378
+ let total = 0;
379
+ for (const task of tasks) total += task.totalCases;
380
+ return total;
381
+ }
382
+ function compareActiveTasks(left, right) {
383
+ const leftProject = left.projectName ?? "";
384
+ const rightProject = right.projectName ?? "";
385
+ if (leftProject !== rightProject) return leftProject.localeCompare(rightProject);
386
+ const displayNameOrder = left.displayName.localeCompare(right.displayName);
387
+ if (displayNameOrder !== 0) return displayNameOrder;
388
+ return left.queueOrder - right.queueOrder;
389
+ }
390
+ function padSummaryTitle(label) {
391
+ return `${c.dim(label.padEnd(8))} `;
392
+ }
393
+ function formatCounterState(counter) {
394
+ return [
395
+ c.bold(c.green(`${counter.passed} passed`)),
396
+ counter.failed > 0 ? c.bold(c.red(`${counter.failed} failed`)) : c.dim(`${counter.failed} failed`),
397
+ counter.skipped > 0 ? c.yellow(`${counter.skipped} skipped`) : c.dim(`${counter.skipped} skipped`)
398
+ ].join(c.dim(" | ")) + c.gray(` (${counter.total})`);
399
+ }
400
+ function formatTimeString(date) {
401
+ return date.toTimeString().split(" ")[0] ?? "";
402
+ }
403
+ function formatDuration$1(durationMs) {
404
+ if (durationMs >= 1e3) return `${(durationMs / 1e3).toFixed(2)}s`;
405
+ return `${Math.round(durationMs)}ms`;
406
+ }
407
+ function formatProjectBadge(projectName, isTTY) {
408
+ if (projectName == null || projectName.length === 0) return "";
409
+ if (!isTTY || !c.isColorSupported) return `|${projectName}| `;
410
+ const backgroundPool = [
411
+ c.bgYellow,
412
+ c.bgCyan,
413
+ c.bgGreen,
414
+ c.bgMagenta
415
+ ];
416
+ const background = backgroundPool[projectName.split("").reduce((accumulator, character, index) => accumulator + character.charCodeAt(0) + index, 0) % backgroundPool.length];
417
+ return `${c.black(background(` ${projectName} `))} `;
418
+ }
419
+ //#endregion
420
+ //#region src/cli/reporters/index.ts
421
+ /**
422
+ * Creates the default CLI reporter for the current output mode.
423
+ *
424
+ * Use when:
425
+ * - interactive terminals should use the live summary reporter
426
+ * - non-interactive environments should stay silent with the noop reporter
427
+ *
428
+ * Expects:
429
+ * - `isTTY` decides whether the live summary reporter can be used
430
+ *
431
+ * Returns:
432
+ * - a summary reporter for TTY runs, otherwise a noop reporter
433
+ */
434
+ function createCliReporter(options) {
435
+ if (!options.isTTY) return createNoopReporter();
436
+ return createSummaryReporter(options);
437
+ }
438
+ //#endregion
439
+ //#region src/cli/reporters/renderers/windowed-renderer.ts
440
+ const DEFAULT_RENDER_INTERVAL_MS = 1e3;
441
+ const ESC = "\x1B[";
442
+ const CARRIAGE_RETURN = "\r";
443
+ const CLEAR_LINE = `${ESC}K`;
444
+ const MOVE_CURSOR_ONE_ROW_UP = `${ESC}1A`;
445
+ const SYNC_START = `${ESC}?2026h`;
446
+ const SYNC_END = `${ESC}?2026l`;
447
+ /**
448
+ * Renders a dynamic window at the bottom of the terminal.
449
+ *
450
+ * Use when:
451
+ * - a reporter needs in-place TTY updates without leaking terminal control codes into tests
452
+ * - callers want Vitest-style redraw behavior with injected output/timer dependencies
453
+ *
454
+ * Expects:
455
+ * - `start()` runs before `schedule()`
456
+ * - `finish()` or `dispose()` may be called multiple times safely
457
+ *
458
+ * Returns:
459
+ * - no direct value; all effects are emitted through the injected callbacks
460
+ *
461
+ * Call stack:
462
+ *
463
+ * {@link WindowRenderer.start}
464
+ * -> periodic schedule callback
465
+ * -> {@link WindowRenderer.schedule}
466
+ * -> {@link WindowRenderer.renderWindow}
467
+ */
468
+ var WindowRenderer = class {
469
+ options;
470
+ renderInterval;
471
+ renderScheduled = false;
472
+ renderScheduleVersion = 0;
473
+ windowHeight = 0;
474
+ started = false;
475
+ finished = false;
476
+ bufferedOutput = "";
477
+ constructor(options) {
478
+ if (options.createInterval && options.clearInterval) {
479
+ this.options = {
480
+ createInterval: (callback, intervalMs) => {
481
+ const timer = options.createInterval(callback, intervalMs);
482
+ return {
483
+ clear: () => options.clearInterval(timer),
484
+ unref: timer.unref?.bind(timer)
485
+ };
486
+ },
487
+ getColumns: options.getColumns,
488
+ getWindow: options.getWindow,
489
+ intervalMs: options.intervalMs ?? DEFAULT_RENDER_INTERVAL_MS,
490
+ queueRenderReset: options.queueRenderReset ?? defaultQueueRenderReset,
491
+ supportsAnsiWindowing: options.supportsAnsiWindowing ?? true,
492
+ writeOutput: options.writeOutput
493
+ };
494
+ return;
495
+ }
496
+ this.options = {
497
+ createInterval: defaultCreateInterval,
498
+ getColumns: options.getColumns,
499
+ getWindow: options.getWindow,
500
+ intervalMs: options.intervalMs ?? DEFAULT_RENDER_INTERVAL_MS,
501
+ queueRenderReset: options.queueRenderReset ?? defaultQueueRenderReset,
502
+ supportsAnsiWindowing: options.supportsAnsiWindowing ?? true,
503
+ writeOutput: options.writeOutput
504
+ };
505
+ }
506
+ /**
507
+ * Starts the periodic refresh loop.
508
+ *
509
+ * Use when:
510
+ * - the live reporter is about to emit in-place updates
511
+ *
512
+ * Expects:
513
+ * - repeated calls are harmless and keep the existing timer
514
+ *
515
+ * Returns:
516
+ * - no direct value
517
+ */
518
+ start() {
519
+ if (this.started && !this.finished) return;
520
+ this.started = true;
521
+ this.finished = false;
522
+ this.renderScheduleVersion += 1;
523
+ if (!this.renderInterval) {
524
+ this.renderInterval = this.options.createInterval(() => this.schedule(), this.options.intervalMs);
525
+ this.renderInterval.unref?.();
526
+ }
527
+ }
528
+ /**
529
+ * Queues a render if one is not already in flight.
530
+ *
531
+ * Use when:
532
+ * - reporter state changes and the bottom window should refresh
533
+ *
534
+ * Expects:
535
+ * - the renderer has been started
536
+ *
537
+ * Returns:
538
+ * - no direct value
539
+ */
540
+ schedule() {
541
+ if (!this.started || this.finished || this.renderScheduled) return;
542
+ const renderScheduleVersion = this.renderScheduleVersion;
543
+ this.renderScheduled = true;
544
+ this.renderWindow();
545
+ this.options.queueRenderReset(() => {
546
+ if (this.renderScheduleVersion !== renderScheduleVersion) return;
547
+ this.renderScheduled = false;
548
+ });
549
+ }
550
+ /**
551
+ * Clears the rendered window and stops the refresh loop.
552
+ *
553
+ * Use when:
554
+ * - the live reporter is transitioning to final static output
555
+ *
556
+ * Expects:
557
+ * - repeated calls are safe
558
+ *
559
+ * Returns:
560
+ * - no direct value
561
+ */
562
+ finish() {
563
+ if (this.finished) return;
564
+ this.finished = true;
565
+ this.started = false;
566
+ this.renderScheduleVersion += 1;
567
+ this.renderScheduled = false;
568
+ this.stopInterval();
569
+ this.clearWindow();
570
+ this.flushBufferedOutput();
571
+ }
572
+ /**
573
+ * Stops the renderer and clears any visible window state.
574
+ *
575
+ * Use when:
576
+ * - cleanup needs to happen from a `finally` block or interrupted run
577
+ *
578
+ * Expects:
579
+ * - callers may invoke it more than once
580
+ *
581
+ * Returns:
582
+ * - no direct value
583
+ */
584
+ dispose() {
585
+ this.finish();
586
+ }
587
+ /**
588
+ * Alias for disposal to match Vitest's renderer lifecycle naming.
589
+ *
590
+ * Use when:
591
+ * - adapting code that expects `stop()`
592
+ *
593
+ * Expects:
594
+ * - callers want the same semantics as `dispose()`
595
+ *
596
+ * Returns:
597
+ * - no direct value
598
+ */
599
+ stop() {
600
+ this.dispose();
601
+ }
602
+ /**
603
+ * Writes reporter output through the renderer lifecycle.
604
+ *
605
+ * Use when:
606
+ * - emitting log lines that must appear above the live ANSI window
607
+ * - callers need deterministic buffering behavior in tests
608
+ *
609
+ * Expects:
610
+ * - active ANSI window mode buffers until `schedule()` or `finish()`
611
+ * - inactive or non-windowed mode writes directly
612
+ *
613
+ * Returns:
614
+ * - no direct value
615
+ */
616
+ write(message) {
617
+ if (!this.isActiveWindowMode()) {
618
+ this.writeOutput(message);
619
+ return;
620
+ }
621
+ this.bufferedOutput += message;
622
+ }
623
+ renderWindow() {
624
+ const windowContent = this.options.getWindow();
625
+ const rowCount = getRenderedRowCount(windowContent, this.options.getColumns());
626
+ if (this.options.supportsAnsiWindowing) {
627
+ this.writeOutput(SYNC_START);
628
+ this.clearWindow();
629
+ }
630
+ this.flushBufferedOutput();
631
+ this.writeOutput(windowContent.join("\n"));
632
+ if (this.options.supportsAnsiWindowing) {
633
+ this.writeOutput(SYNC_END);
634
+ this.windowHeight = rowCount;
635
+ return;
636
+ }
637
+ this.writeOutput("\n");
638
+ this.windowHeight = 0;
639
+ }
640
+ clearWindow() {
641
+ if (!this.options.supportsAnsiWindowing || this.windowHeight === 0) return;
642
+ this.writeOutput(`${CARRIAGE_RETURN}${CLEAR_LINE}`);
643
+ for (let rowIndex = 1; rowIndex < this.windowHeight; rowIndex += 1) this.writeOutput(`${CARRIAGE_RETURN}${MOVE_CURSOR_ONE_ROW_UP}${CLEAR_LINE}`);
644
+ this.windowHeight = 0;
645
+ }
646
+ stopInterval() {
647
+ if (!this.renderInterval) return;
648
+ this.renderInterval.clear();
649
+ this.renderInterval = void 0;
650
+ }
651
+ writeOutput(message) {
652
+ this.options.writeOutput(message);
653
+ }
654
+ flushBufferedOutput() {
655
+ if (this.bufferedOutput.length === 0) return;
656
+ this.writeOutput(this.bufferedOutput);
657
+ this.bufferedOutput = "";
658
+ }
659
+ isActiveWindowMode() {
660
+ return this.started && !this.finished && this.options.supportsAnsiWindowing;
661
+ }
662
+ };
663
+ function defaultCreateInterval(callback, intervalMs) {
664
+ const timer = globalThis.setInterval(callback, intervalMs);
665
+ return {
666
+ clear: () => globalThis.clearInterval(timer),
667
+ unref: timer.unref?.bind(timer)
668
+ };
669
+ }
670
+ function defaultQueueRenderReset(callback) {
671
+ setTimeout(callback, 100).unref();
672
+ }
673
+ /** Calculate the rendered row count for the supplied rows and terminal width. */
674
+ function getRenderedRowCount(rows, columns) {
675
+ const safeColumns = Math.max(1, columns);
676
+ let count = 0;
677
+ for (const row of rows) {
678
+ const text = stripVTControlCharacters(row);
679
+ count += Math.max(1, Math.ceil(getTextDisplayWidth(text) / safeColumns));
680
+ }
681
+ return count;
682
+ }
683
+ function getTextDisplayWidth(text) {
684
+ return stringWidth(stripVTControlCharacters(text));
685
+ }
686
+ //#endregion
687
+ //#region src/cli/run.ts
688
+ function shouldUseColor() {
689
+ if (process.env.NO_COLOR != null) return false;
690
+ const forceColor = process.env.FORCE_COLOR;
691
+ if (forceColor != null) return forceColor !== "0";
692
+ return process.stdout.isTTY === true;
693
+ }
694
+ function createColorPalette(enabled) {
695
+ if (!enabled) return {
696
+ black: (value) => value,
697
+ bgCyan: (value) => value,
698
+ bgGreen: (value) => value,
699
+ bgMagenta: (value) => value,
700
+ bgYellow: (value) => value,
701
+ dim: (value) => value,
702
+ gray: (value) => value,
703
+ green: (value) => value,
704
+ red: (value) => value,
705
+ yellow: (value) => value
706
+ };
707
+ return {
708
+ black: (value) => c.black(value),
709
+ bgCyan: (value) => c.bgCyan(value),
710
+ bgGreen: (value) => c.bgGreen(value),
711
+ bgMagenta: (value) => c.bgMagenta(value),
712
+ bgYellow: (value) => c.bgYellow(value),
713
+ dim: (value) => c.dim(value),
714
+ gray: (value) => c.gray(value),
715
+ green: (value) => c.green(value),
716
+ red: (value) => c.red(value),
717
+ yellow: (value) => c.yellow(value)
718
+ };
719
+ }
720
+ function createProjectBadge(name, colors, colorEnabled) {
721
+ if (!colorEnabled || !c.isColorSupported) return `|${name}| `;
722
+ const labelColorPool = [
723
+ colors.bgYellow,
724
+ colors.bgCyan,
725
+ colors.bgGreen,
726
+ colors.bgMagenta
727
+ ];
728
+ const background = labelColorPool[name.split("").reduce((accumulator, char, index) => accumulator + char.charCodeAt(0) + index, 0) % labelColorPool.length];
729
+ return `${colors.black(background(` ${name} `))} `;
730
+ }
731
+ function formatDuration(durationMs, colors) {
732
+ if (durationMs == null) return "";
733
+ const rounded = Math.round(durationMs);
734
+ return (rounded > 1e3 ? colors.yellow : colors.green)(` ${rounded}${colors.dim("ms")}`);
735
+ }
736
+ async function loadEvalModules(evalFilePaths) {
737
+ const loadedModules = {};
738
+ for (const [moduleIndex, evalFilePath] of evalFilePaths.entries()) {
739
+ const moduleHref = pathToFileURL(evalFilePath).href;
740
+ const importHref = `${moduleHref}?vieval_load=${Date.now()}_${moduleIndex}`;
741
+ beginModuleRegistration(importHref);
742
+ try {
743
+ const moduleValue = await import(importHref);
744
+ const registeredDefinitions = consumeModuleRegistrations(importHref);
745
+ const defaultDefinition = moduleValue.default;
746
+ const definitions = [...registeredDefinitions, ...defaultDefinition == null ? [] : [defaultDefinition]];
747
+ const deduplicatedDefinitions = definitions.filter((definition, index) => {
748
+ const key = `${definition.name}::${definition.description}::${definition.task?.id ?? ""}`;
749
+ return definitions.findIndex((candidate) => `${candidate.name}::${candidate.description}::${candidate.task?.id ?? ""}` === key) === index;
750
+ });
751
+ if (deduplicatedDefinitions.length === 0) continue;
752
+ for (const [definitionIndex, definition] of deduplicatedDefinitions.entries()) {
753
+ const moduleKey = definitionIndex === 0 ? moduleHref : `${moduleHref}#registration-${definitionIndex + 1}`;
754
+ loadedModules[moduleKey] = { default: definition };
755
+ }
756
+ } finally {
757
+ endModuleRegistration();
758
+ }
759
+ }
760
+ return loadedModules;
761
+ }
762
+ function filterProjectsByName(projects, names) {
763
+ if (names.length === 0) return [...projects];
764
+ const nameSet = new Set(names);
765
+ return projects.filter((project) => nameSet.has(project.name));
766
+ }
767
+ function applyRunEnvironment(env) {
768
+ const envEntries = Object.entries(env);
769
+ if (envEntries.length === 0) return () => {};
770
+ const snapshot = /* @__PURE__ */ new Map();
771
+ for (const [key, value] of envEntries) {
772
+ snapshot.set(key, {
773
+ existed: Object.hasOwn(process.env, key),
774
+ value: process.env[key]
775
+ });
776
+ if (value == null) {
777
+ delete process.env[key];
778
+ continue;
779
+ }
780
+ process.env[key] = value;
781
+ }
782
+ return () => {
783
+ for (const [key, previous] of snapshot.entries()) {
784
+ if (previous.existed) {
785
+ if (previous.value == null) {
786
+ delete process.env[key];
787
+ continue;
788
+ }
789
+ process.env[key] = previous.value;
790
+ continue;
791
+ }
792
+ delete process.env[key];
793
+ }
794
+ };
795
+ }
796
+ function isSummaryReporter(reporter) {
797
+ return "getWindowRows" in reporter;
798
+ }
799
+ function createRunReporter(options) {
800
+ const reporter = createCliReporter({
801
+ getColumns: options?.getColumns ?? (() => process.stdout.columns ?? 80),
802
+ getNow: options?.getNow ?? (() => Date.now()),
803
+ getWallClockNow: options?.getWallClockNow ?? (() => Date.now()),
804
+ isTTY: options?.isTTY ?? process.stdout.isTTY === true,
805
+ slowThresholdMs: options?.slowThresholdMs ?? 300,
806
+ writeError: options?.writeError ?? ((value) => process.stderr.write(value)),
807
+ writeOutput: options?.writeOutput ?? ((value) => process.stdout.write(value))
808
+ });
809
+ if (!isSummaryReporter(reporter)) return {
810
+ ...reporter,
811
+ onCaseStart(payload) {
812
+ reporter.onCaseStart(payload);
813
+ },
814
+ onTaskQueued(payload) {
815
+ reporter.onTaskQueued(payload);
816
+ }
817
+ };
818
+ const rendererBaseOptions = {
819
+ getColumns: options?.getColumns ?? (() => process.stdout.columns ?? 80),
820
+ getWindow: () => reporter.getWindowRows(),
821
+ queueRenderReset: options?.queueRenderReset,
822
+ supportsAnsiWindowing: options?.supportsAnsiWindowing,
823
+ writeOutput: options?.writeOutput ?? ((value) => process.stdout.write(value))
824
+ };
825
+ const renderer = options?.clearInterval != null && options.createInterval != null ? new WindowRenderer({
826
+ ...rendererBaseOptions,
827
+ clearInterval: options.clearInterval,
828
+ createInterval: options.createInterval
829
+ }) : new WindowRenderer(rendererBaseOptions);
830
+ renderer.start();
831
+ function scheduleRender() {
832
+ renderer.schedule();
833
+ }
834
+ return {
835
+ dispose() {
836
+ reporter.dispose();
837
+ renderer.dispose();
838
+ },
839
+ onCaseEnd(payload) {
840
+ reporter.onCaseEnd(payload);
841
+ scheduleRender();
842
+ },
843
+ onCaseStart(payload) {
844
+ reporter.onCaseStart(payload);
845
+ scheduleRender();
846
+ },
847
+ onRunEnd(payload) {
848
+ reporter.onRunEnd(payload);
849
+ scheduleRender();
850
+ },
851
+ onRunStart(payload) {
852
+ reporter.onRunStart(payload);
853
+ scheduleRender();
854
+ },
855
+ onTaskEnd(payload) {
856
+ reporter.onTaskEnd(payload);
857
+ scheduleRender();
858
+ },
859
+ onTaskQueued(payload) {
860
+ reporter.onTaskQueued(payload);
861
+ scheduleRender();
862
+ },
863
+ onTaskStart(payload) {
864
+ reporter.onTaskStart(payload);
865
+ scheduleRender();
866
+ }
867
+ };
868
+ }
869
+ function createTaskQueuePayload(task, projectName) {
870
+ return {
871
+ displayName: task.entry.name,
872
+ projectName,
873
+ taskId: task.id
874
+ };
875
+ }
876
+ function createTaskCaseReporterId(payload) {
877
+ return `${payload.index}:${encodeURIComponent(payload.name)}`;
878
+ }
879
+ function createTaskReporterHooks(task, reporter, projectCaseCounters) {
880
+ function syncCaseTotal(total) {
881
+ reporter.onTaskQueued({
882
+ taskId: task.id,
883
+ totalCases: total
884
+ });
885
+ }
886
+ return {
887
+ onCaseEnd(payload) {
888
+ const caseId = createTaskCaseReporterId(payload);
889
+ if (projectCaseCounters != null) {
890
+ const projectCaseId = `${task.id}:${caseId}`;
891
+ if (!projectCaseCounters.seenCaseIds.has(projectCaseId)) {
892
+ projectCaseCounters.seenCaseIds.add(projectCaseId);
893
+ if (payload.state === "passed") projectCaseCounters.passed += 1;
894
+ else if (payload.state === "failed") projectCaseCounters.failed += 1;
895
+ else projectCaseCounters.skipped += 1;
896
+ }
897
+ }
898
+ syncCaseTotal(payload.total);
899
+ reporter.onCaseEnd({
900
+ caseId,
901
+ state: payload.state,
902
+ taskId: task.id
903
+ });
904
+ },
905
+ onCaseStart(payload) {
906
+ const caseId = createTaskCaseReporterId(payload);
907
+ syncCaseTotal(payload.total);
908
+ reporter.onCaseStart({
909
+ caseId,
910
+ caseName: payload.name,
911
+ taskId: task.id
912
+ });
913
+ }
914
+ };
915
+ }
916
+ function createCliTaskExecutionContext(task, models, reporter, projectCaseCounters) {
917
+ return {
918
+ ...createTaskExecutionContext({
919
+ models,
920
+ task
921
+ }),
922
+ reporterHooks: createTaskReporterHooks(task, reporter, projectCaseCounters)
923
+ };
924
+ }
925
+ function resolveTaskReporterHooks(task, context, reporter, projectCaseCounters) {
926
+ return context.reporterHooks ?? createTaskReporterHooks(task, reporter, projectCaseCounters);
927
+ }
928
+ function getFailedTaskId(error) {
929
+ if (error instanceof RunnerExecutionError) return error.taskId;
930
+ return null;
931
+ }
932
+ function createAutoTaskExecutor(reporter, projectCaseCounters) {
933
+ return async (task, context) => {
934
+ const taskDefinition = task.entry.task;
935
+ if (taskDefinition == null) throw new Error(`Missing eval task definition for entry "${task.entry.id}".`);
936
+ const output = await taskDefinition.run({
937
+ model: context.model,
938
+ reporterHooks: resolveTaskReporterHooks(task, context, reporter, projectCaseCounters),
939
+ task
940
+ });
941
+ return {
942
+ entryId: task.entry.id,
943
+ id: task.id,
944
+ matrix: task.matrix,
945
+ inferenceExecutorId: task.inferenceExecutor.id,
946
+ scores: [...output.scores]
947
+ };
948
+ };
949
+ }
950
+ function cloneScheduledTaskMatrix(task) {
951
+ return {
952
+ eval: { ...task.matrix.eval },
953
+ meta: { ...task.matrix.meta },
954
+ run: { ...task.matrix.run }
955
+ };
956
+ }
957
+ function createProjectMatrixSummary(tasks) {
958
+ if (tasks.length === 0) return null;
959
+ const runAxes = /* @__PURE__ */ new Set();
960
+ const evalAxes = /* @__PURE__ */ new Set();
961
+ const runRows = /* @__PURE__ */ new Set();
962
+ const evalRows = /* @__PURE__ */ new Set();
963
+ for (const task of tasks) {
964
+ Object.keys(task.matrix.run).forEach((axis) => runAxes.add(axis));
965
+ Object.keys(task.matrix.eval).forEach((axis) => evalAxes.add(axis));
966
+ runRows.add(task.matrix.meta.runRowId);
967
+ evalRows.add(task.matrix.meta.evalRowId);
968
+ }
969
+ return {
970
+ evalAxes: [...evalAxes].sort(),
971
+ evalRows: evalRows.size,
972
+ runAxes: [...runAxes].sort(),
973
+ runRows: runRows.size
974
+ };
975
+ }
976
+ async function prepareProject(project) {
977
+ const startedAt = Date.now();
978
+ try {
979
+ const runtimeContext = await createRunnerRuntimeContext({
980
+ cwd: project.root,
981
+ fallbackProjectRootDirectory: project.root
982
+ });
983
+ const evalFilePaths = await discoverEvalFiles({
984
+ exclude: project.exclude,
985
+ include: project.include,
986
+ root: project.root
987
+ });
988
+ const entries = collectEvalEntries(await loadEvalModules(evalFilePaths), runtimeContext);
989
+ const tasks = createRunnerSchedule({
990
+ evalMatrix: project.evalMatrix,
991
+ entries,
992
+ inferenceExecutors: project.inferenceExecutors,
993
+ runMatrix: project.runMatrix
994
+ });
995
+ const canAutoExecuteEntryTasks = entries.some((entry) => entry.task != null) && project.models.length > 0;
996
+ if (project.executor == null && !canAutoExecuteEntryTasks) return {
997
+ kind: "summary",
998
+ summary: {
999
+ caseSummary: null,
1000
+ discoveredEvalFileCount: evalFilePaths.length,
1001
+ durationMs: Date.now() - startedAt,
1002
+ entryCount: entries.length,
1003
+ errorMessage: null,
1004
+ executed: false,
1005
+ matrixSummary: createProjectMatrixSummary(tasks),
1006
+ name: project.name,
1007
+ result: null,
1008
+ taskCount: tasks.length
1009
+ }
1010
+ };
1011
+ return {
1012
+ kind: "prepared",
1013
+ prepared: {
1014
+ discoveredEvalFileCount: evalFilePaths.length,
1015
+ entryCount: entries.length,
1016
+ name: project.name,
1017
+ project,
1018
+ startedAt,
1019
+ tasks
1020
+ }
1021
+ };
1022
+ } catch (error) {
1023
+ return {
1024
+ kind: "summary",
1025
+ summary: {
1026
+ caseSummary: null,
1027
+ discoveredEvalFileCount: 0,
1028
+ durationMs: Date.now() - startedAt,
1029
+ entryCount: 0,
1030
+ errorMessage: errorMessageFrom(error) ?? "Unknown project execution error.",
1031
+ executed: false,
1032
+ matrixSummary: null,
1033
+ name: project.name,
1034
+ result: null,
1035
+ taskCount: 0
1036
+ }
1037
+ };
1038
+ }
1039
+ }
1040
+ async function executePreparedProject(prepared, reporter, counters) {
1041
+ const settledTaskIds = /* @__PURE__ */ new Set();
1042
+ const projectCaseCounters = {
1043
+ failed: 0,
1044
+ passed: 0,
1045
+ seenCaseIds: /* @__PURE__ */ new Set(),
1046
+ skipped: 0
1047
+ };
1048
+ const rawTaskExecutor = prepared.project.executor ?? createAutoTaskExecutor(reporter, projectCaseCounters);
1049
+ const taskExecutor = async (task, context) => {
1050
+ return {
1051
+ ...await rawTaskExecutor(task, context),
1052
+ matrix: cloneScheduledTaskMatrix(task)
1053
+ };
1054
+ };
1055
+ try {
1056
+ const aggregated = await runScheduledTasks(prepared.tasks, taskExecutor, {
1057
+ createExecutionContext(task) {
1058
+ return createCliTaskExecutionContext(task, prepared.project.models, reporter, projectCaseCounters);
1059
+ },
1060
+ onTaskEnd(task, state) {
1061
+ settledTaskIds.add(task.id);
1062
+ reporter.onTaskEnd({
1063
+ state,
1064
+ taskId: task.id
1065
+ });
1066
+ if (state === "passed") {
1067
+ counters.passedTasks += 1;
1068
+ return;
1069
+ }
1070
+ counters.failedTasks += 1;
1071
+ },
1072
+ onTaskStart(task) {
1073
+ reporter.onTaskStart({ taskId: task.id });
1074
+ }
1075
+ });
1076
+ return {
1077
+ caseSummary: {
1078
+ failed: projectCaseCounters.failed,
1079
+ passed: projectCaseCounters.passed,
1080
+ skipped: projectCaseCounters.skipped,
1081
+ total: projectCaseCounters.seenCaseIds.size
1082
+ },
1083
+ discoveredEvalFileCount: prepared.discoveredEvalFileCount,
1084
+ durationMs: Date.now() - prepared.startedAt,
1085
+ entryCount: prepared.entryCount,
1086
+ errorMessage: null,
1087
+ executed: true,
1088
+ matrixSummary: createProjectMatrixSummary(prepared.tasks),
1089
+ name: prepared.name,
1090
+ result: aggregated,
1091
+ taskCount: prepared.tasks.length
1092
+ };
1093
+ } catch (error) {
1094
+ const failedTaskId = getFailedTaskId(error);
1095
+ if (failedTaskId != null && !settledTaskIds.has(failedTaskId)) {
1096
+ counters.failedTasks += 1;
1097
+ settledTaskIds.add(failedTaskId);
1098
+ reporter.onTaskEnd({
1099
+ state: "failed",
1100
+ taskId: failedTaskId
1101
+ });
1102
+ }
1103
+ for (const task of prepared.tasks) {
1104
+ if (settledTaskIds.has(task.id)) continue;
1105
+ counters.skippedTasks += 1;
1106
+ settledTaskIds.add(task.id);
1107
+ reporter.onTaskEnd({
1108
+ state: "skipped",
1109
+ taskId: task.id
1110
+ });
1111
+ }
1112
+ return {
1113
+ caseSummary: {
1114
+ failed: projectCaseCounters.failed,
1115
+ passed: projectCaseCounters.passed,
1116
+ skipped: projectCaseCounters.skipped,
1117
+ total: projectCaseCounters.seenCaseIds.size
1118
+ },
1119
+ discoveredEvalFileCount: prepared.discoveredEvalFileCount,
1120
+ durationMs: Date.now() - prepared.startedAt,
1121
+ entryCount: prepared.entryCount,
1122
+ errorMessage: errorMessageFrom(error) ?? "Unknown project execution error.",
1123
+ executed: false,
1124
+ matrixSummary: createProjectMatrixSummary(prepared.tasks),
1125
+ name: prepared.name,
1126
+ result: null,
1127
+ taskCount: prepared.tasks.length
1128
+ };
1129
+ }
1130
+ }
1131
+ /**
1132
+ * Runs vieval orchestration from config and returns project-level summaries.
1133
+ *
1134
+ * Call stack:
1135
+ *
1136
+ * {@link runVievalCli}
1137
+ * -> {@link loadVievalCliConfig}
1138
+ * -> {@link discoverEvalFiles}
1139
+ * -> {@link collectEvalEntries}
1140
+ * -> {@link createRunnerSchedule}
1141
+ * -> {@link runScheduledTasks} (optional)
1142
+ *
1143
+ * Use when:
1144
+ * - running eval collection and scheduling from a single command
1145
+ * - keeping business-agent eval files near their implementation packages
1146
+ */
1147
+ async function runVievalCli(options = {}) {
1148
+ const loadedConfig = await loadVievalCliConfig({
1149
+ configFilePath: options.configFilePath,
1150
+ cwd: options.cwd
1151
+ });
1152
+ const restoreEnvironment = applyRunEnvironment(loadedConfig.env);
1153
+ const reporter = createRunReporter(options.reporter);
1154
+ try {
1155
+ const selectedProjects = filterProjectsByName(loadedConfig.projects, options.project ?? []);
1156
+ const preparedProjects = await Promise.all(selectedProjects.map(async (project) => prepareProject(project)));
1157
+ const executableProjects = preparedProjects.filter((project) => project.kind === "prepared").map((project) => project.prepared);
1158
+ const totalTasks = preparedProjects.reduce((sum, project) => {
1159
+ if (project.kind === "prepared") return sum + project.prepared.tasks.length;
1160
+ return sum + project.summary.taskCount;
1161
+ }, 0);
1162
+ const skippedSummaryTasks = preparedProjects.reduce((sum, project) => {
1163
+ if (project.kind === "summary") return sum + project.summary.taskCount;
1164
+ return sum;
1165
+ }, 0);
1166
+ const reporterCounters = {
1167
+ failedTasks: 0,
1168
+ passedTasks: 0,
1169
+ skippedTasks: 0
1170
+ };
1171
+ reporter.onRunStart({ totalTasks });
1172
+ for (const project of executableProjects) for (const task of project.tasks) reporter.onTaskQueued(createTaskQueuePayload(task, project.name));
1173
+ const projectSummaries = [];
1174
+ for (const preparedProject of preparedProjects) {
1175
+ if (preparedProject.kind === "summary") {
1176
+ projectSummaries.push(preparedProject.summary);
1177
+ continue;
1178
+ }
1179
+ projectSummaries.push(await executePreparedProject(preparedProject.prepared, reporter, reporterCounters));
1180
+ }
1181
+ reporter.onRunEnd({
1182
+ failedTasks: reporterCounters.failedTasks,
1183
+ passedTasks: reporterCounters.passedTasks,
1184
+ skippedTasks: reporterCounters.skippedTasks + skippedSummaryTasks,
1185
+ totalTasks
1186
+ });
1187
+ return {
1188
+ configFilePath: loadedConfig.configFilePath,
1189
+ projects: projectSummaries
1190
+ };
1191
+ } finally {
1192
+ reporter.dispose();
1193
+ restoreEnvironment();
1194
+ }
1195
+ }
1196
+ /**
1197
+ * Formats CLI run output as human-readable lines.
1198
+ */
1199
+ function formatVievalCliRunOutput(output) {
1200
+ const colorEnabled = shouldUseColor();
1201
+ const colors = createColorPalette(colorEnabled);
1202
+ const lines = [];
1203
+ lines.push(` ${colors.dim("RUN")} ${colors.yellow("vieval")}`);
1204
+ lines.push(` ${colors.dim("Config")} ${output.configFilePath ?? "(not found, using defaults)"}`);
1205
+ lines.push("");
1206
+ let passedProjects = 0;
1207
+ let skippedProjects = 0;
1208
+ let failedProjects = 0;
1209
+ let totalTasks = 0;
1210
+ let executedTasks = 0;
1211
+ function formatMatrixSummary(summary) {
1212
+ if (summary == null) return null;
1213
+ const runAxesLabel = summary.runAxes.length === 0 ? "-" : summary.runAxes.join("|");
1214
+ const evalAxesLabel = summary.evalAxes.length === 0 ? "-" : summary.evalAxes.join("|");
1215
+ return `matrix run ${summary.runRows} [${runAxesLabel}] / eval ${summary.evalRows} [${evalAxesLabel}]`;
1216
+ }
1217
+ function formatScheduleBreakdown(project) {
1218
+ const summary = project.matrixSummary;
1219
+ if (summary == null) return null;
1220
+ if (project.taskCount <= 0 || project.entryCount <= 0 || summary.runRows <= 0 || summary.evalRows <= 0) return null;
1221
+ const denominator = project.entryCount * summary.runRows * summary.evalRows;
1222
+ if (denominator <= 0 || project.taskCount % denominator !== 0) return null;
1223
+ const providerCount = project.taskCount / denominator;
1224
+ return [
1225
+ colors.dim("schedule "),
1226
+ colors.yellow(String(project.entryCount)),
1227
+ colors.dim(" entries × "),
1228
+ colors.yellow(String(providerCount)),
1229
+ colors.dim(" inferenceExecutors × "),
1230
+ colors.yellow(String(summary.runRows)),
1231
+ colors.dim(" run rows × "),
1232
+ colors.yellow(String(summary.evalRows)),
1233
+ colors.dim(" eval rows = "),
1234
+ colors.green(String(project.taskCount)),
1235
+ colors.dim(" tasks")
1236
+ ].join("");
1237
+ }
1238
+ for (const project of output.projects) {
1239
+ totalTasks += project.taskCount;
1240
+ executedTasks += project.result?.overall.runCount ?? 0;
1241
+ const badge = createProjectBadge(project.name, colors, colorEnabled);
1242
+ if (project.errorMessage != null) {
1243
+ failedProjects += 1;
1244
+ lines.push(` ${colors.red("❯")} ${badge}${formatDuration(project.durationMs, colors)}`);
1245
+ lines.push(` ${project.errorMessage}`);
1246
+ continue;
1247
+ }
1248
+ if (!project.executed) {
1249
+ skippedProjects += 1;
1250
+ const countLabel = colors.dim(`(${project.taskCount} tasks)`);
1251
+ const detailsLabel = colors.dim(` ${project.discoveredEvalFileCount} files, ${project.entryCount} entries, 0 runs, hybrid n/a`);
1252
+ const matrixSummary = formatMatrixSummary(project.matrixSummary);
1253
+ lines.push(` ${colors.dim("○")} ${badge}${countLabel}${detailsLabel}${formatDuration(project.durationMs, colors)}`);
1254
+ if (matrixSummary != null) lines.push(` ${colors.dim(matrixSummary)}`);
1255
+ const scheduleBreakdown = formatScheduleBreakdown(project);
1256
+ if (scheduleBreakdown != null) lines.push(` ${scheduleBreakdown}`);
1257
+ continue;
1258
+ }
1259
+ passedProjects += 1;
1260
+ const hybridAverage = project.result?.overall.hybridAverage;
1261
+ const hybridAverageLabel = hybridAverage == null ? "n/a" : String(hybridAverage);
1262
+ const runCount = project.result?.overall.runCount ?? 0;
1263
+ const countLabel = colors.dim(`(${project.taskCount} tasks)`);
1264
+ const caseSummaryLabel = project.caseSummary == null ? "" : `, cases ${project.caseSummary.passed} passed | ${project.caseSummary.failed} failed`;
1265
+ const detailsLabel = colors.dim(` ${project.discoveredEvalFileCount} files, ${project.entryCount} entries, ${runCount} runs${caseSummaryLabel}, hybrid ${hybridAverageLabel}`);
1266
+ const matrixSummary = formatMatrixSummary(project.matrixSummary);
1267
+ lines.push(` ${colors.green("✓")} ${badge}${countLabel}${detailsLabel}${formatDuration(project.durationMs, colors)}`);
1268
+ if (matrixSummary != null) lines.push(` ${colors.dim(matrixSummary)}`);
1269
+ const scheduleBreakdown = formatScheduleBreakdown(project);
1270
+ if (scheduleBreakdown != null) lines.push(` ${scheduleBreakdown}`);
1271
+ }
1272
+ lines.push("");
1273
+ if (failedProjects > 0 || skippedProjects > 0) {
1274
+ const summarySegments = [`${colors.green(String(passedProjects))} passed`];
1275
+ if (skippedProjects > 0) summarySegments.push(`${colors.dim(String(skippedProjects))} skipped`);
1276
+ if (failedProjects > 0) summarySegments.push(`${colors.red(String(failedProjects))} failed`);
1277
+ lines.push(` ${colors.dim("Projects")} ${summarySegments.join(" | ")} (${output.projects.length})`);
1278
+ } else lines.push(` ${colors.dim("Projects")} ${colors.green(String(passedProjects))} passed (${output.projects.length})`);
1279
+ lines.push(` ${colors.dim("Tasks")} ${executedTasks} executed / ${totalTasks} scheduled`);
1280
+ return lines.join("\n");
1281
+ }
1282
+ //#endregion
1283
+ //#region src/cli/eval-run.ts
1284
+ const evalRunHelpText = `
1285
+ Execute vieval projects from discovered or explicit config.
1286
+
1287
+ Usage
1288
+ $ vieval run [--config <path>] [--project <name>] [--json]
1289
+
1290
+ Options
1291
+ --config Config file path
1292
+ --project Project name to execute; may be repeated
1293
+ --json Print machine-readable JSON output
1294
+ `;
1295
+ function normalizeCliArgv$1(argv) {
1296
+ const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
1297
+ return normalizedArgv[0] === "run" ? normalizedArgv.slice(1) : normalizedArgv;
1298
+ }
1299
+ function normalizeProjectNames(projectNames) {
1300
+ if (typeof projectNames === "string") return [projectNames];
1301
+ return projectNames ?? [];
1302
+ }
1303
+ function parseCliArguments(argv) {
1304
+ const cli = meow(evalRunHelpText, {
1305
+ argv: normalizeCliArgv$1(argv),
1306
+ importMeta: import.meta,
1307
+ flags: {
1308
+ config: { type: "string" },
1309
+ json: {
1310
+ default: false,
1311
+ type: "boolean"
1312
+ },
1313
+ project: {
1314
+ isMultiple: true,
1315
+ type: "string"
1316
+ }
1317
+ }
1318
+ });
1319
+ return {
1320
+ configFilePath: cli.flags.config,
1321
+ json: cli.flags.json === true,
1322
+ project: normalizeProjectNames(cli.flags.project)
1323
+ };
1324
+ }
1325
+ function isDirectExecution$1() {
1326
+ if (!process.argv[1]) return false;
1327
+ return path.resolve(process.argv[1]) === fileURLToPath(import.meta.url);
1328
+ }
1329
+ /**
1330
+ * CLI entrypoint for `vieval run`.
1331
+ *
1332
+ * Call stack:
1333
+ *
1334
+ * {@link main}
1335
+ * -> {@link parseCliArguments}(`process.argv`)
1336
+ * -> {@link runVievalCli}
1337
+ * -> `process.stdout.write(...)` / `process.stderr.write(...)`
1338
+ * -> `process.exitCode`
1339
+ *
1340
+ * Use when:
1341
+ * - developers want project-style eval discovery and execution from one command
1342
+ * - manual `import.meta.glob` and runner wiring should stay internal
1343
+ */
1344
+ async function main$1() {
1345
+ const parsed = parseCliArguments(process.argv.slice(2));
1346
+ try {
1347
+ const output = await runVievalCli({
1348
+ configFilePath: parsed.configFilePath,
1349
+ project: parsed.project
1350
+ });
1351
+ if (parsed.json) {
1352
+ process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
1353
+ return;
1354
+ }
1355
+ process.stdout.write(`${formatVievalCliRunOutput(output)}\n`);
1356
+ } catch (error) {
1357
+ const errorMessage = errorMessageFrom(error) ?? "Unknown CLI failure.";
1358
+ process.stderr.write(`[${name}] ${errorMessage}\n`);
1359
+ process.exitCode = 1;
1360
+ }
1361
+ }
1362
+ if (isDirectExecution$1()) await main$1();
1363
+ //#endregion
1364
+ //#region src/cli/index.ts
1365
+ const topLevelHelpText = `
1366
+ Execute and report evaluation projects.
1367
+
1368
+ Usage
1369
+ $ vieval <command> [options]
1370
+
1371
+ Commands
1372
+ run Discover and execute eval projects
1373
+
1374
+ Examples
1375
+ $ vieval run
1376
+ $ vieval run --config vieval.config.ts --project chess --json
1377
+ `;
1378
+ function normalizeCliArgv(argv) {
1379
+ return argv[0] === "--" ? argv.slice(1) : [...argv];
1380
+ }
1381
+ function parseTopLevelCliArguments(argv) {
1382
+ const normalizedArgv = normalizeCliArgv(argv);
1383
+ const command = normalizedArgv[0];
1384
+ meow(topLevelHelpText, {
1385
+ autoHelp: false,
1386
+ autoVersion: false,
1387
+ argv: normalizedArgv,
1388
+ importMeta: import.meta
1389
+ });
1390
+ if (command == null || command === "help" || command === "--help" || command === "-h") return {
1391
+ command: "help",
1392
+ commandArgv: []
1393
+ };
1394
+ if (command !== "run") throw new Error(`Unsupported vieval command "${command ?? "(none)"}". Expected "run".`);
1395
+ return {
1396
+ command,
1397
+ commandArgv: normalizedArgv.slice(1)
1398
+ };
1399
+ }
1400
+ async function runTopLevelCli(argv) {
1401
+ const parsed = parseTopLevelCliArguments(argv);
1402
+ if (parsed.command === "help") {
1403
+ process.stdout.write(`${topLevelHelpText.trim()}\n`);
1404
+ return;
1405
+ }
1406
+ const runArguments = parseCliArguments(parsed.commandArgv);
1407
+ const output = await runVievalCli({
1408
+ configFilePath: runArguments.configFilePath,
1409
+ project: runArguments.project
1410
+ });
1411
+ if (runArguments.json) {
1412
+ process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
1413
+ return;
1414
+ }
1415
+ process.stdout.write(`${formatVievalCliRunOutput(output)}\n`);
1416
+ }
1417
+ function isDirectExecution() {
1418
+ if (!process.argv[1]) return false;
1419
+ return path.resolve(process.argv[1]) === fileURLToPath(import.meta.url);
1420
+ }
1421
+ async function main() {
1422
+ try {
1423
+ await runTopLevelCli(process.argv.slice(2));
1424
+ } catch (error) {
1425
+ const errorMessage = errorMessageFrom(error) ?? "Unknown CLI failure.";
1426
+ process.stderr.write(`[vieval] ${errorMessage}\n`);
1427
+ process.exitCode = 1;
1428
+ }
1429
+ }
1430
+ if (isDirectExecution()) await main();
1431
+ //#endregion
1432
+ export { parseTopLevelCliArguments, runTopLevelCli };
1433
+
1434
+ //# sourceMappingURL=index.mjs.map