vieval 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +219 -109
- package/dist/bin/vieval.mjs +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-sanbKtQq.mjs → cli-Dao25VxV.mjs} +1186 -162
- package/dist/cli-Dao25VxV.mjs.map +1 -0
- package/dist/config.d.mts +2 -2
- package/dist/config.mjs +1 -1
- package/dist/core/assertions/index.d.mts +1 -1
- package/dist/core/inference-executors/index.mjs +1 -1
- package/dist/core/processors/results/index.d.mts +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +6 -40
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/{env--94B0UtW.mjs → env-BFSjny07.mjs} +1 -1
- package/dist/{env--94B0UtW.mjs.map → env-BFSjny07.mjs.map} +1 -1
- package/dist/{index-DBZKkpBe.d.mts → index-BkjyCInx.d.mts} +102 -37
- package/dist/index.d.mts +14 -6
- package/dist/index.mjs +110 -39
- package/dist/index.mjs.map +1 -1
- package/dist/{models-DIGdOUpJ.mjs → models-pBSRUZhY.mjs} +1 -1
- package/dist/{models-DIGdOUpJ.mjs.map → models-pBSRUZhY.mjs.map} +1 -1
- package/dist/plugins/chat-models/index.d.mts +69 -6
- package/dist/plugins/chat-models/index.mjs +62 -6
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/{registry-CcKZqDJY.mjs → registry-BHGMxjpA.mjs} +140 -4
- package/dist/registry-BHGMxjpA.mjs.map +1 -0
- package/package.json +2 -1
- package/dist/cli-sanbKtQq.mjs.map +0 -1
- package/dist/registry-CcKZqDJY.mjs.map +0 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { c as
|
|
1
|
+
import { a as createOpenTelemetryRuntime, c as detectCliConfigMode, d as loadVievalCliConfig, n as consumeModuleRegistrations, o as createNoopTelemetryRuntime, r as endModuleRegistration, t as beginModuleRegistration, u as loadRawVievalConfig } from "./registry-BHGMxjpA.mjs";
|
|
2
2
|
import { createSchedulerRuntime } from "./core/scheduler/index.mjs";
|
|
3
3
|
import { RunnerExecutionError, collectEvalEntries, createFilesystemTaskCacheRuntime, createRunnerRuntimeContext, createRunnerSchedule, createTaskExecutionContext, runScheduledTasks } from "./core/runner/index.mjs";
|
|
4
4
|
import process from "node:process";
|
|
@@ -264,6 +264,612 @@ async function loadEvalModulesWithVitestRuntime(evalFilePaths, projectRoot) {
|
|
|
264
264
|
return loadedModules;
|
|
265
265
|
}
|
|
266
266
|
//#endregion
|
|
267
|
+
//#region src/cli/report-selectors.ts
|
|
268
|
+
/**
|
|
269
|
+
* Resolves a generic case selector from metrics, scores, then direct fields.
|
|
270
|
+
*
|
|
271
|
+
* Use when:
|
|
272
|
+
* - report commands accept benchmark-neutral selectors such as `benchmark.case.id`
|
|
273
|
+
* - comparisons need the same lookup semantics as filtering and grouping
|
|
274
|
+
*
|
|
275
|
+
* Expects:
|
|
276
|
+
* - `key` is a direct `CaseRecord` field, score key, `scores.<key>`, or metric key
|
|
277
|
+
*
|
|
278
|
+
* Returns:
|
|
279
|
+
* - existence flag plus matched value when present
|
|
280
|
+
*/
|
|
281
|
+
function getCaseSelectorValue(record, key) {
|
|
282
|
+
if (Object.hasOwn(record.metrics, key)) return {
|
|
283
|
+
exists: true,
|
|
284
|
+
value: record.metrics[key]
|
|
285
|
+
};
|
|
286
|
+
if (key.startsWith("scores.") && Object.hasOwn(record.scores, key.slice(7))) return {
|
|
287
|
+
exists: true,
|
|
288
|
+
value: record.scores[key.slice(7)]
|
|
289
|
+
};
|
|
290
|
+
if (Object.hasOwn(record.scores, key)) return {
|
|
291
|
+
exists: true,
|
|
292
|
+
value: record.scores[key]
|
|
293
|
+
};
|
|
294
|
+
if (Object.hasOwn(record, key)) return {
|
|
295
|
+
exists: true,
|
|
296
|
+
value: record[key]
|
|
297
|
+
};
|
|
298
|
+
return { exists: false };
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Stable-stringifies JSON-like values for report comparisons.
|
|
302
|
+
*
|
|
303
|
+
* Before:
|
|
304
|
+
* - `{ b: 1, a: true }`
|
|
305
|
+
*
|
|
306
|
+
* After:
|
|
307
|
+
* - `{"a":true,"b":1}`
|
|
308
|
+
*/
|
|
309
|
+
function stableStringify(value) {
|
|
310
|
+
if (value == null || typeof value !== "object") return JSON.stringify(value);
|
|
311
|
+
if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(",")}]`;
|
|
312
|
+
const record = value;
|
|
313
|
+
return `{${Object.keys(record).sort((left, right) => left.localeCompare(right)).map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`).join(",")}}`;
|
|
314
|
+
}
|
|
315
|
+
//#endregion
|
|
316
|
+
//#region src/cli/report-otlp.ts
|
|
317
|
+
/**
|
|
318
|
+
* Builds local OTLP-shaped JSON projections from normalized case records.
|
|
319
|
+
*
|
|
320
|
+
* Use when:
|
|
321
|
+
* - writing deterministic report artifacts without requiring an OpenTelemetry Collector
|
|
322
|
+
* - future tools need trace/log/metric-shaped JSON files
|
|
323
|
+
*
|
|
324
|
+
* Expects:
|
|
325
|
+
* - records belong to one Vieval run
|
|
326
|
+
*
|
|
327
|
+
* Returns:
|
|
328
|
+
* - trace, log, and metric containers shaped after OTLP JSON concepts
|
|
329
|
+
*/
|
|
330
|
+
function buildLocalOtlpProjection(args) {
|
|
331
|
+
const projectSpans = collectProjectNames(args.records).map((projectName) => ({
|
|
332
|
+
attributes: toAttributes({
|
|
333
|
+
"vieval.project.name": projectName,
|
|
334
|
+
"vieval.run.id": args.runId
|
|
335
|
+
}),
|
|
336
|
+
name: "vieval.project"
|
|
337
|
+
}));
|
|
338
|
+
const taskSpans = collectTasks(args.records).map((task) => ({
|
|
339
|
+
attributes: toAttributes({
|
|
340
|
+
"vieval.project.name": task.projectName,
|
|
341
|
+
"vieval.run.id": args.runId,
|
|
342
|
+
"vieval.task.id": task.taskId
|
|
343
|
+
}),
|
|
344
|
+
name: "vieval.task"
|
|
345
|
+
}));
|
|
346
|
+
const caseSpans = args.records.map((record) => ({
|
|
347
|
+
attributes: toAttributes({
|
|
348
|
+
...record.metrics,
|
|
349
|
+
"vieval.case.duration_ms": record.durationMs,
|
|
350
|
+
"vieval.case.id": record.caseId,
|
|
351
|
+
"vieval.case.name": record.caseName,
|
|
352
|
+
"vieval.case.retry_count": record.retryCount,
|
|
353
|
+
"vieval.case.state": record.state,
|
|
354
|
+
"vieval.project.name": record.projectName,
|
|
355
|
+
"vieval.task.id": record.taskId
|
|
356
|
+
}),
|
|
357
|
+
endTimeUnixNano: isoToUnixNano(record.endedAt),
|
|
358
|
+
name: "vieval.case",
|
|
359
|
+
startTimeUnixNano: isoToUnixNano(record.startedAt)
|
|
360
|
+
}));
|
|
361
|
+
return {
|
|
362
|
+
logs: { resourceLogs: [{ scopeLogs: [{
|
|
363
|
+
logRecords: args.records.map((record) => ({
|
|
364
|
+
attributes: toAttributes(record.metrics),
|
|
365
|
+
body: { stringValue: JSON.stringify({
|
|
366
|
+
caseId: record.caseId,
|
|
367
|
+
scores: record.scores,
|
|
368
|
+
state: record.state
|
|
369
|
+
}) },
|
|
370
|
+
eventName: "vieval.case",
|
|
371
|
+
timeUnixNano: isoToUnixNano(record.endedAt)
|
|
372
|
+
})),
|
|
373
|
+
scope: { name: "vieval" }
|
|
374
|
+
}] }] },
|
|
375
|
+
metrics: { resourceMetrics: [{ scopeMetrics: [{
|
|
376
|
+
metrics: collectScoreKinds(args.records).map((kind) => ({
|
|
377
|
+
gauge: { dataPoints: args.records.filter((record) => typeof record.scores[kind] === "number").map((record) => ({
|
|
378
|
+
asDouble: record.scores[kind],
|
|
379
|
+
attributes: toAttributes({
|
|
380
|
+
...record.metrics,
|
|
381
|
+
"vieval.case.id": record.caseId,
|
|
382
|
+
"vieval.task.id": record.taskId
|
|
383
|
+
}),
|
|
384
|
+
timeUnixNano: isoToUnixNano(record.endedAt)
|
|
385
|
+
})) },
|
|
386
|
+
name: `vieval.score.${kind}`
|
|
387
|
+
})),
|
|
388
|
+
scope: { name: "vieval" }
|
|
389
|
+
}] }] },
|
|
390
|
+
traces: { resourceSpans: [{ scopeSpans: [{
|
|
391
|
+
scope: { name: "vieval" },
|
|
392
|
+
spans: [
|
|
393
|
+
{
|
|
394
|
+
attributes: toAttributes({ "vieval.run.id": args.runId }),
|
|
395
|
+
name: "vieval.run"
|
|
396
|
+
},
|
|
397
|
+
...projectSpans,
|
|
398
|
+
...taskSpans,
|
|
399
|
+
...caseSpans
|
|
400
|
+
]
|
|
401
|
+
}] }] }
|
|
402
|
+
};
|
|
403
|
+
}
|
|
404
|
+
function toAttributes(attributes) {
|
|
405
|
+
return Object.entries(attributes).filter(([, value]) => value !== void 0).sort(([leftKey], [rightKey]) => leftKey.localeCompare(rightKey)).map(([key, value]) => ({
|
|
406
|
+
key,
|
|
407
|
+
value: toAnyValue(value)
|
|
408
|
+
}));
|
|
409
|
+
}
|
|
410
|
+
function toAnyValue(value) {
|
|
411
|
+
if (Array.isArray(value)) return { arrayValue: { values: value.map((item) => toAnyValue(item)) } };
|
|
412
|
+
if (isAttributeScalar(value)) {
|
|
413
|
+
if (typeof value === "boolean") return { boolValue: value };
|
|
414
|
+
if (typeof value === "number") return Number.isFinite(value) ? { doubleValue: value } : { stringValue: String(value) };
|
|
415
|
+
if (value == null) return { stringValue: "null" };
|
|
416
|
+
return { stringValue: value };
|
|
417
|
+
}
|
|
418
|
+
return { stringValue: stableStringify(value) };
|
|
419
|
+
}
|
|
420
|
+
function isAttributeScalar(value) {
|
|
421
|
+
return value == null || typeof value === "boolean" || typeof value === "number" || typeof value === "string";
|
|
422
|
+
}
|
|
423
|
+
function isoToUnixNano(value) {
|
|
424
|
+
const preciseMatch = /^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(?:\.(\d{1,9}))?(Z|[+-]\d{2}:\d{2})$/.exec(value);
|
|
425
|
+
if (preciseMatch != null) {
|
|
426
|
+
const [, secondsPart, fraction = "", zone] = preciseMatch;
|
|
427
|
+
const unixMilliseconds = Date.parse(`${secondsPart}.000${zone}`);
|
|
428
|
+
if (!Number.isFinite(unixMilliseconds)) return "0";
|
|
429
|
+
return String(BigInt(unixMilliseconds) * 1000000n + BigInt(fraction.padEnd(9, "0").slice(0, 9)));
|
|
430
|
+
}
|
|
431
|
+
const unixMilliseconds = Date.parse(value);
|
|
432
|
+
if (!Number.isFinite(unixMilliseconds)) return "0";
|
|
433
|
+
return String(BigInt(unixMilliseconds) * 1000000n);
|
|
434
|
+
}
|
|
435
|
+
function collectScoreKinds(records) {
|
|
436
|
+
return [...new Set(records.flatMap((record) => Object.keys(record.scores)))].sort((left, right) => left.localeCompare(right));
|
|
437
|
+
}
|
|
438
|
+
function collectProjectNames(records) {
|
|
439
|
+
return [...new Set(records.map((record) => record.projectName))].sort((left, right) => left.localeCompare(right));
|
|
440
|
+
}
|
|
441
|
+
function collectTasks(records) {
|
|
442
|
+
const tasks = /* @__PURE__ */ new Map();
|
|
443
|
+
for (const record of records) tasks.set(`${record.projectName}\0${record.taskId}`, {
|
|
444
|
+
projectName: record.projectName,
|
|
445
|
+
taskId: record.taskId
|
|
446
|
+
});
|
|
447
|
+
return [...tasks.values()].sort((left, right) => {
|
|
448
|
+
const projectOrder = left.projectName.localeCompare(right.projectName);
|
|
449
|
+
return projectOrder === 0 ? left.taskId.localeCompare(right.taskId) : projectOrder;
|
|
450
|
+
});
|
|
451
|
+
}
|
|
452
|
+
//#endregion
|
|
453
|
+
//#region src/cli/report-records.ts
|
|
454
|
+
/**
|
|
455
|
+
* Builds normalized case records from lifecycle, metric, and score events.
|
|
456
|
+
*
|
|
457
|
+
* Use when:
|
|
458
|
+
* - `events.jsonl` should be projected into `cases.jsonl`
|
|
459
|
+
* - report commands need one final record per observed case outcome
|
|
460
|
+
*
|
|
461
|
+
* Expects:
|
|
462
|
+
* - events are ordered by occurrence where possible
|
|
463
|
+
* - lifecycle events use either `task.case.start`/`task.case.end` or current CLI `CaseStarted`/`CaseEnded` names
|
|
464
|
+
*
|
|
465
|
+
* Returns:
|
|
466
|
+
* - records for cases that emitted an end lifecycle event
|
|
467
|
+
*/
|
|
468
|
+
function buildCaseRecords(args) {
|
|
469
|
+
const drafts = /* @__PURE__ */ new Map();
|
|
470
|
+
const completedKeys = [];
|
|
471
|
+
for (const event of args.events) {
|
|
472
|
+
const normalizedEvent = normalizeCaseEventName(event.event);
|
|
473
|
+
if (normalizedEvent == null) continue;
|
|
474
|
+
const ids = extractEventIds(event, args);
|
|
475
|
+
if (ids.caseId.length === 0 || ids.taskId.length === 0) continue;
|
|
476
|
+
const draft = getOrCreateDraft(drafts, ids, event, args);
|
|
477
|
+
applyIdentity(draft, ids, event, args);
|
|
478
|
+
if (normalizedEvent === "start") applyCaseStart(draft, event);
|
|
479
|
+
else if (normalizedEvent === "metric") applyCaseMetric(draft, event);
|
|
480
|
+
else if (normalizedEvent === "score") applyCaseScore(draft, event);
|
|
481
|
+
else {
|
|
482
|
+
applyCaseEnd(draft, event);
|
|
483
|
+
const key = createCaseKey(ids.taskId, ids.caseId);
|
|
484
|
+
if (!completedKeys.includes(key)) completedKeys.push(key);
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
return completedKeys.map((key) => drafts.get(key)).filter((draft) => draft != null && draft.endedAt != null).map(toCaseRecord);
|
|
488
|
+
}
|
|
489
|
+
/**
|
|
490
|
+
* Builds generic score summaries overall and grouped by arbitrary keys.
|
|
491
|
+
*
|
|
492
|
+
* Use when:
|
|
493
|
+
* - report artifacts need benchmark-neutral aggregate score views
|
|
494
|
+
* - callers want to group by metrics such as `benchmark.category` or direct record fields such as `taskId`
|
|
495
|
+
*
|
|
496
|
+
* Expects:
|
|
497
|
+
* - `groupByKeys` are stable metric names or direct `CaseRecord` field names
|
|
498
|
+
* - record score values are normalized numeric scores
|
|
499
|
+
*
|
|
500
|
+
* Returns:
|
|
501
|
+
* - overall score buckets and group buckets keyed by `<key>=<value>`
|
|
502
|
+
*/
|
|
503
|
+
function buildMetricsSummary(records, groupByKeys) {
|
|
504
|
+
const overall = {};
|
|
505
|
+
const groups = {};
|
|
506
|
+
for (const record of records) {
|
|
507
|
+
addRecordScores(overall, record);
|
|
508
|
+
for (const groupByKey of groupByKeys) {
|
|
509
|
+
const groupValue = getGroupValue(record, groupByKey);
|
|
510
|
+
if (!groupValue.exists) continue;
|
|
511
|
+
const groupKey = `${groupByKey}=${String(groupValue.value)}`;
|
|
512
|
+
groups[groupKey] ??= {};
|
|
513
|
+
addRecordScores(groups[groupKey], record);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
return {
|
|
517
|
+
groups: finalizeSummaryGroups(groups),
|
|
518
|
+
overall: finalizeScoreSummary(overall)
|
|
519
|
+
};
|
|
520
|
+
}
|
|
521
|
+
/**
|
|
522
|
+
* Encodes records as newline-delimited JSON.
|
|
523
|
+
*
|
|
524
|
+
* Use when:
|
|
525
|
+
* - writing `cases.jsonl` for command-line tools, dataframes, or streaming parsers
|
|
526
|
+
* - each record should occupy exactly one JSON line
|
|
527
|
+
*
|
|
528
|
+
* Expects:
|
|
529
|
+
* - records are JSON-serializable case records
|
|
530
|
+
*
|
|
531
|
+
* Returns:
|
|
532
|
+
* - one JSON object per line with a trailing newline for non-empty input
|
|
533
|
+
*/
|
|
534
|
+
function encodeJsonl(records) {
|
|
535
|
+
if (records.length === 0) return "";
|
|
536
|
+
return `${records.map((record) => JSON.stringify(record)).join("\n")}\n`;
|
|
537
|
+
}
|
|
538
|
+
function normalizeCaseEventName(eventName) {
|
|
539
|
+
if (eventName === "task.case.start" || eventName === "CaseStarted") return "start";
|
|
540
|
+
if (eventName === "task.case.metric") return "metric";
|
|
541
|
+
if (eventName === "task.case.score") return "score";
|
|
542
|
+
if (eventName === "task.case.end" || eventName === "CaseEnded") return "end";
|
|
543
|
+
}
|
|
544
|
+
function extractEventIds(event, args) {
|
|
545
|
+
const data = asRecord(event.data);
|
|
546
|
+
return {
|
|
547
|
+
attemptId: stringFrom(data?.attemptId) ?? event.attemptId ?? args.attemptId,
|
|
548
|
+
caseId: stringFrom(data?.caseId) ?? event.caseId ?? "",
|
|
549
|
+
experimentId: stringFrom(data?.experimentId) ?? event.experimentId ?? args.experimentId,
|
|
550
|
+
projectName: stringFrom(data?.projectName) ?? event.projectName ?? event.projectId ?? args.projectName,
|
|
551
|
+
runId: stringFrom(data?.runId) ?? event.runId ?? args.runId,
|
|
552
|
+
taskId: stringFrom(data?.taskId) ?? event.taskId ?? "",
|
|
553
|
+
workspaceId: stringFrom(data?.workspaceId) ?? event.workspaceId ?? args.workspaceId
|
|
554
|
+
};
|
|
555
|
+
}
|
|
556
|
+
function getOrCreateDraft(drafts, ids, event, args) {
|
|
557
|
+
const key = createCaseKey(ids.taskId, ids.caseId);
|
|
558
|
+
const existing = drafts.get(key);
|
|
559
|
+
if (existing != null) return existing;
|
|
560
|
+
const draft = {
|
|
561
|
+
attemptId: ids.attemptId,
|
|
562
|
+
caseId: ids.caseId,
|
|
563
|
+
caseName: extractCaseName(event) ?? ids.caseId,
|
|
564
|
+
experimentId: ids.experimentId,
|
|
565
|
+
metrics: {},
|
|
566
|
+
projectName: ids.projectName || args.projectName,
|
|
567
|
+
retryCount: 0,
|
|
568
|
+
runId: ids.runId,
|
|
569
|
+
scores: {},
|
|
570
|
+
startCount: 0,
|
|
571
|
+
taskId: ids.taskId,
|
|
572
|
+
workspaceId: ids.workspaceId
|
|
573
|
+
};
|
|
574
|
+
drafts.set(key, draft);
|
|
575
|
+
return draft;
|
|
576
|
+
}
|
|
577
|
+
function applyIdentity(draft, ids, event, args) {
|
|
578
|
+
draft.attemptId = ids.attemptId || args.attemptId;
|
|
579
|
+
draft.experimentId = ids.experimentId || args.experimentId;
|
|
580
|
+
draft.projectName = extractExplicitProjectName(event) ?? draft.projectName;
|
|
581
|
+
draft.runId = ids.runId || args.runId;
|
|
582
|
+
draft.workspaceId = ids.workspaceId || args.workspaceId;
|
|
583
|
+
}
|
|
584
|
+
function applyCaseStart(draft, event) {
|
|
585
|
+
const data = asRecord(event.data);
|
|
586
|
+
draft.startCount += 1;
|
|
587
|
+
draft.caseName = extractCaseName(event) ?? draft.caseName;
|
|
588
|
+
draft.startedAt ??= stringFrom(data?.startedAt) ?? event.timestamp;
|
|
589
|
+
draft.endedAt = void 0;
|
|
590
|
+
draft.input = void 0;
|
|
591
|
+
draft.metrics = {};
|
|
592
|
+
draft.output = void 0;
|
|
593
|
+
draft.scores = {};
|
|
594
|
+
draft.state = void 0;
|
|
595
|
+
draft.input = data != null && "input" in data ? data.input : draft.input;
|
|
596
|
+
const retryIndex = numberFrom(data?.retryIndex);
|
|
597
|
+
if (retryIndex != null) {
|
|
598
|
+
draft.retryCount = Math.max(draft.retryCount, retryIndex);
|
|
599
|
+
return;
|
|
600
|
+
}
|
|
601
|
+
draft.retryCount = Math.max(draft.retryCount, draft.startCount - 1);
|
|
602
|
+
}
|
|
603
|
+
function applyCaseMetric(draft, event) {
|
|
604
|
+
const data = asRecord(event.data);
|
|
605
|
+
const name = stringFrom(data?.name);
|
|
606
|
+
if (name == null) return;
|
|
607
|
+
const value = data?.value;
|
|
608
|
+
if (isCaseMetricValue(value)) draft.metrics[name] = value;
|
|
609
|
+
}
|
|
610
|
+
function applyCaseScore(draft, event) {
|
|
611
|
+
const data = asRecord(event.data);
|
|
612
|
+
const kind = stringFrom(data?.kind) ?? stringFrom(data?.name) ?? stringFrom(data?.["vieval.score.kind"]);
|
|
613
|
+
const score = numberFrom(data?.score) ?? numberFrom(data?.value) ?? numberFrom(data?.["vieval.score.value"]);
|
|
614
|
+
if (kind == null || score == null) return;
|
|
615
|
+
draft.scores[kind] = score;
|
|
616
|
+
}
|
|
617
|
+
function applyCaseEnd(draft, event) {
|
|
618
|
+
const data = asRecord(event.data);
|
|
619
|
+
draft.caseName = extractCaseName(event) ?? draft.caseName;
|
|
620
|
+
draft.endedAt = stringFrom(data?.endedAt) ?? event.timestamp ?? draft.endedAt;
|
|
621
|
+
draft.output = data != null && "output" in data ? data.output : draft.output;
|
|
622
|
+
draft.state = normalizeState(stringFrom(data?.state)) ?? "failed";
|
|
623
|
+
draft.scores.exact ??= draft.state === "passed" ? 1 : 0;
|
|
624
|
+
}
|
|
625
|
+
function toCaseRecord(draft) {
|
|
626
|
+
const startedAt = draft.startedAt ?? draft.endedAt ?? "";
|
|
627
|
+
const endedAt = draft.endedAt ?? startedAt;
|
|
628
|
+
return {
|
|
629
|
+
attemptId: draft.attemptId,
|
|
630
|
+
caseId: draft.caseId,
|
|
631
|
+
caseName: draft.caseName,
|
|
632
|
+
durationMs: calculateDurationMs(startedAt, endedAt),
|
|
633
|
+
endedAt,
|
|
634
|
+
experimentId: draft.experimentId,
|
|
635
|
+
...draft.input === void 0 ? {} : { input: draft.input },
|
|
636
|
+
metrics: draft.metrics,
|
|
637
|
+
...draft.output === void 0 ? {} : { output: draft.output },
|
|
638
|
+
projectName: draft.projectName,
|
|
639
|
+
retryCount: draft.retryCount,
|
|
640
|
+
runId: draft.runId,
|
|
641
|
+
schemaVersion: 1,
|
|
642
|
+
scores: draft.scores,
|
|
643
|
+
startedAt,
|
|
644
|
+
state: draft.state ?? "failed",
|
|
645
|
+
taskId: draft.taskId,
|
|
646
|
+
workspaceId: draft.workspaceId
|
|
647
|
+
};
|
|
648
|
+
}
|
|
649
|
+
function addRecordScores(summary, record) {
|
|
650
|
+
for (const [kind, score] of Object.entries(record.scores)) {
|
|
651
|
+
if (!Number.isFinite(score)) continue;
|
|
652
|
+
summary[kind] ??= {
|
|
653
|
+
average: 0,
|
|
654
|
+
count: 0,
|
|
655
|
+
sum: 0
|
|
656
|
+
};
|
|
657
|
+
summary[kind].count += 1;
|
|
658
|
+
summary[kind].sum += score;
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
function finalizeSummaryGroups(groups) {
|
|
662
|
+
return Object.fromEntries(Object.entries(groups).map(([key, summary]) => [key, finalizeScoreSummary(summary)]));
|
|
663
|
+
}
|
|
664
|
+
function finalizeScoreSummary(summary) {
|
|
665
|
+
return Object.fromEntries(Object.entries(summary).map(([kind, bucket]) => [kind, {
|
|
666
|
+
average: bucket.count === 0 ? 0 : bucket.sum / bucket.count,
|
|
667
|
+
count: bucket.count,
|
|
668
|
+
sum: bucket.sum
|
|
669
|
+
}]));
|
|
670
|
+
}
|
|
671
|
+
function getGroupValue(record, key) {
|
|
672
|
+
if (Object.hasOwn(record.metrics, key)) return {
|
|
673
|
+
exists: true,
|
|
674
|
+
value: record.metrics[key]
|
|
675
|
+
};
|
|
676
|
+
const directValue = record[key];
|
|
677
|
+
return isCaseMetricValue(directValue) ? {
|
|
678
|
+
exists: true,
|
|
679
|
+
value: directValue
|
|
680
|
+
} : { exists: false };
|
|
681
|
+
}
|
|
682
|
+
function extractCaseName(event) {
|
|
683
|
+
const data = asRecord(event.data);
|
|
684
|
+
return stringFrom(data?.caseName) ?? stringFrom(data?.name);
|
|
685
|
+
}
|
|
686
|
+
function extractExplicitProjectName(event) {
|
|
687
|
+
return stringFrom(asRecord(event.data)?.projectName) ?? event.projectName ?? event.projectId;
|
|
688
|
+
}
|
|
689
|
+
function createCaseKey(taskId, caseId) {
|
|
690
|
+
return `${taskId}\u0000${caseId}`;
|
|
691
|
+
}
|
|
692
|
+
/**
|
|
693
|
+
* Normalizes duration timestamps.
|
|
694
|
+
*
|
|
695
|
+
* Before:
|
|
696
|
+
* - `startedAt="2026-05-08T00:00:00.000Z"`, `endedAt="2026-05-08T00:00:01.250Z"`
|
|
697
|
+
* - `startedAt="bad"`, `endedAt="2026-05-08T00:00:01.250Z"`
|
|
698
|
+
*
|
|
699
|
+
* After:
|
|
700
|
+
* - `1250`
|
|
701
|
+
* - `0`
|
|
702
|
+
*/
|
|
703
|
+
function calculateDurationMs(startedAt, endedAt) {
|
|
704
|
+
const started = Date.parse(startedAt);
|
|
705
|
+
const ended = Date.parse(endedAt);
|
|
706
|
+
if (!Number.isFinite(started) || !Number.isFinite(ended)) return 0;
|
|
707
|
+
return Math.max(0, ended - started);
|
|
708
|
+
}
|
|
709
|
+
function normalizeState(value) {
|
|
710
|
+
if (value === "failed" || value === "passed" || value === "skipped" || value === "timeout") return value;
|
|
711
|
+
}
|
|
712
|
+
function isCaseMetricValue(value) {
|
|
713
|
+
if (value == null || typeof value === "boolean" || typeof value === "number" || typeof value === "string") return true;
|
|
714
|
+
return Array.isArray(value);
|
|
715
|
+
}
|
|
716
|
+
function asRecord(value) {
|
|
717
|
+
if (value == null || typeof value !== "object" || Array.isArray(value)) return;
|
|
718
|
+
return value;
|
|
719
|
+
}
|
|
720
|
+
function stringFrom(value) {
|
|
721
|
+
return typeof value === "string" ? value : void 0;
|
|
722
|
+
}
|
|
723
|
+
function numberFrom(value) {
|
|
724
|
+
return typeof value === "number" && Number.isFinite(value) ? value : void 0;
|
|
725
|
+
}
|
|
726
|
+
//#endregion
|
|
727
|
+
//#region src/cli/report-artifacts.ts
|
|
728
|
+
/**
|
|
729
|
+
* Resolves one or more `run-summary.json` paths from a report location.
|
|
730
|
+
*
|
|
731
|
+
* Use when:
|
|
732
|
+
* - callers may pass a run directory, summary file path, or a report root
|
|
733
|
+
*
|
|
734
|
+
* Returns:
|
|
735
|
+
* - sorted absolute summary file paths
|
|
736
|
+
*/
|
|
737
|
+
async function resolveRunSummaryPaths(reportPath) {
|
|
738
|
+
const absoluteReportPath = resolve(reportPath);
|
|
739
|
+
const directSummaryPath = resolve(absoluteReportPath, "run-summary.json");
|
|
740
|
+
if (existsSync(absoluteReportPath) && absoluteReportPath.endsWith(".json")) return [absoluteReportPath];
|
|
741
|
+
if (existsSync(directSummaryPath)) return [directSummaryPath];
|
|
742
|
+
return (await glob("**/run-summary.json", {
|
|
743
|
+
absolute: true,
|
|
744
|
+
cwd: absoluteReportPath
|
|
745
|
+
})).sort((left, right) => left.localeCompare(right));
|
|
746
|
+
}
|
|
747
|
+
/**
|
|
748
|
+
* Reads one run report artifact set from `run-summary.json` and sibling `events.jsonl`.
|
|
749
|
+
*
|
|
750
|
+
* Use when:
|
|
751
|
+
* - report analysis needs both run aggregate output and event count metadata
|
|
752
|
+
*/
|
|
753
|
+
function readReportRunArtifact(summaryFilePath) {
|
|
754
|
+
const reportDirectory = resolve(summaryFilePath, "..");
|
|
755
|
+
const summary = JSON.parse(readFileSync(summaryFilePath, "utf-8"));
|
|
756
|
+
const eventsFilePath = resolve(reportDirectory, "events.jsonl");
|
|
757
|
+
const events = existsSync(eventsFilePath) ? readFileSync(eventsFilePath, "utf-8").split("\n").filter((line) => line.trim().length > 0).map((line) => {
|
|
758
|
+
const event = JSON.parse(line);
|
|
759
|
+
return {
|
|
760
|
+
attemptId: event.attemptId,
|
|
761
|
+
caseId: event.caseId,
|
|
762
|
+
data: event.data,
|
|
763
|
+
event: event.event,
|
|
764
|
+
experimentId: event.experimentId,
|
|
765
|
+
projectId: event.projectId,
|
|
766
|
+
projectName: event.projectName,
|
|
767
|
+
runId: event.runId,
|
|
768
|
+
taskId: event.taskId,
|
|
769
|
+
timestamp: event.timestamp,
|
|
770
|
+
workspaceId: event.workspaceId
|
|
771
|
+
};
|
|
772
|
+
}) : [];
|
|
773
|
+
return {
|
|
774
|
+
events,
|
|
775
|
+
eventsCount: events.length,
|
|
776
|
+
reportDirectory,
|
|
777
|
+
summary,
|
|
778
|
+
summaryFilePath
|
|
779
|
+
};
|
|
780
|
+
}
|
|
781
|
+
/**
|
|
782
|
+
* Reads all run artifacts found under `reportPath`.
|
|
783
|
+
*
|
|
784
|
+
* Use when:
|
|
785
|
+
* - callers need multi-run analysis from a directory root
|
|
786
|
+
*/
|
|
787
|
+
async function readReportArtifacts(reportPath) {
|
|
788
|
+
return (await resolveRunSummaryPaths(reportPath)).map((summaryFilePath) => readReportRunArtifact(summaryFilePath));
|
|
789
|
+
}
|
|
790
|
+
/**
|
|
791
|
+
* Creates a compact summary row for one run artifact.
|
|
792
|
+
*
|
|
793
|
+
* Use when:
|
|
794
|
+
* - table/csv/jsonl exports should stay stable and cheap to parse
|
|
795
|
+
*/
|
|
796
|
+
function summarizeReportRunArtifact(artifact) {
|
|
797
|
+
const totalProjects = artifact.summary.projects.length;
|
|
798
|
+
const failedProjects = artifact.summary.projects.filter((project) => project.errorMessage != null).length;
|
|
799
|
+
const executedProjects = artifact.summary.projects.filter((project) => project.executed).length;
|
|
800
|
+
const totalTasks = artifact.summary.projects.reduce((sum, project) => sum + project.taskCount, 0);
|
|
801
|
+
const projectNames = artifact.summary.projects.map((project) => project.name);
|
|
802
|
+
return {
|
|
803
|
+
attemptId: artifact.summary.attemptId ?? null,
|
|
804
|
+
eventsCount: artifact.eventsCount,
|
|
805
|
+
executedProjects,
|
|
806
|
+
experimentId: artifact.summary.experimentId ?? null,
|
|
807
|
+
failedProjects,
|
|
808
|
+
projectNames,
|
|
809
|
+
reportDirectory: artifact.reportDirectory,
|
|
810
|
+
runId: artifact.summary.runId ?? null,
|
|
811
|
+
totalProjects,
|
|
812
|
+
totalTasks,
|
|
813
|
+
workspaceId: artifact.summary.workspaceId ?? null
|
|
814
|
+
};
|
|
815
|
+
}
|
|
816
|
+
/**
|
|
817
|
+
* Writes one complete local run report artifact set.
|
|
818
|
+
*
|
|
819
|
+
* Use when:
|
|
820
|
+
* - CLI runs need deterministic local artifacts under workspace/project/experiment/attempt/run
|
|
821
|
+
* - report commands need normalized case, metrics, and OTLP-shaped files
|
|
822
|
+
*
|
|
823
|
+
* Expects:
|
|
824
|
+
* - `events` are the same envelopes written to `events.jsonl`
|
|
825
|
+
* - `output` already contains run identity fields
|
|
826
|
+
*
|
|
827
|
+
* Returns:
|
|
828
|
+
* - absolute report directory path containing the written artifacts
|
|
829
|
+
*/
|
|
830
|
+
async function writeRunReportArtifacts(output, events, identity, reportOut) {
|
|
831
|
+
const projectId = deriveReportProjectId(output);
|
|
832
|
+
const reportDirectory = resolve(reportOut, identity.workspaceId, projectId, identity.experimentId, identity.attemptId, identity.runId);
|
|
833
|
+
const persistedOutput = {
|
|
834
|
+
...output,
|
|
835
|
+
reportDirectory
|
|
836
|
+
};
|
|
837
|
+
await mkdir(reportDirectory, { recursive: true });
|
|
838
|
+
await writeFile(resolve(reportDirectory, "run-summary.json"), `${JSON.stringify(persistedOutput, null, 2)}\n`, "utf-8");
|
|
839
|
+
await writeFile(resolve(reportDirectory, "events.jsonl"), events.map((event) => JSON.stringify(event)).join("\n").concat(events.length > 0 ? "\n" : ""), "utf-8");
|
|
840
|
+
const caseRecords = buildCaseRecords({
|
|
841
|
+
attemptId: identity.attemptId,
|
|
842
|
+
events,
|
|
843
|
+
experimentId: identity.experimentId,
|
|
844
|
+
projectName: projectId,
|
|
845
|
+
runId: identity.runId,
|
|
846
|
+
workspaceId: identity.workspaceId
|
|
847
|
+
});
|
|
848
|
+
const metricsSummary = buildMetricsSummary(caseRecords, []);
|
|
849
|
+
const otlp = buildLocalOtlpProjection({
|
|
850
|
+
records: caseRecords,
|
|
851
|
+
runId: identity.runId
|
|
852
|
+
});
|
|
853
|
+
await writeFile(resolve(reportDirectory, "cases.jsonl"), encodeJsonl(caseRecords), "utf-8");
|
|
854
|
+
await writeFile(resolve(reportDirectory, "metrics-summary.json"), `${JSON.stringify(metricsSummary, null, 2)}\n`, "utf-8");
|
|
855
|
+
await mkdir(resolve(reportDirectory, "otlp"), { recursive: true });
|
|
856
|
+
await mkdir(resolve(reportDirectory, "benchmark"), { recursive: true });
|
|
857
|
+
await writeFile(resolve(reportDirectory, "otlp", "traces.json"), `${JSON.stringify(otlp.traces, null, 2)}\n`, "utf-8");
|
|
858
|
+
await writeFile(resolve(reportDirectory, "otlp", "logs.json"), `${JSON.stringify(otlp.logs, null, 2)}\n`, "utf-8");
|
|
859
|
+
await writeFile(resolve(reportDirectory, "otlp", "metrics.json"), `${JSON.stringify(otlp.metrics, null, 2)}\n`, "utf-8");
|
|
860
|
+
return reportDirectory;
|
|
861
|
+
}
|
|
862
|
+
function deriveReportProjectId(output) {
|
|
863
|
+
const uniqueProjectNames = [...new Set(output.projects.map((project) => project.name))];
|
|
864
|
+
if (uniqueProjectNames.length === 1) return sanitizeIdentitySegment$1(uniqueProjectNames[0] ?? "default-project");
|
|
865
|
+
return "multi-project";
|
|
866
|
+
}
|
|
867
|
+
function sanitizeIdentitySegment$1(value) {
|
|
868
|
+
const normalized = value.trim();
|
|
869
|
+
if (normalized.length === 0) return "default";
|
|
870
|
+
return normalized.replace(/[^\w.-]+/g, "-");
|
|
871
|
+
}
|
|
872
|
+
//#endregion
|
|
267
873
|
//#region src/cli/reporters/noop-reporter.ts
|
|
268
874
|
/**
|
|
269
875
|
* Creates a reporter that intentionally does nothing.
|
|
@@ -1273,6 +1879,10 @@ function formatDuration$1(durationMs, colors) {
|
|
|
1273
1879
|
const rounded = Math.round(durationMs);
|
|
1274
1880
|
return (rounded > 1e3 ? colors.yellow : colors.green)(` ${rounded}${colors.dim("ms")}`);
|
|
1275
1881
|
}
|
|
1882
|
+
function formatHybridAverage(hybridAverage) {
|
|
1883
|
+
if (hybridAverage == null) return "n/a";
|
|
1884
|
+
return hybridAverage.toFixed(3).replace(/\.?0+$/, "");
|
|
1885
|
+
}
|
|
1276
1886
|
function filterProjectsByName(projects, names) {
|
|
1277
1887
|
if (names.length === 0) return [...projects];
|
|
1278
1888
|
const nameSet = new Set(names);
|
|
@@ -1293,11 +1903,6 @@ function createRunIdentity(options) {
|
|
|
1293
1903
|
workspaceId
|
|
1294
1904
|
};
|
|
1295
1905
|
}
|
|
1296
|
-
function deriveReportProjectId(output) {
|
|
1297
|
-
const uniqueProjectNames = [...new Set(output.projects.map((project) => project.name))];
|
|
1298
|
-
if (uniqueProjectNames.length === 1) return sanitizeIdentitySegment(uniqueProjectNames[0] ?? "default-project");
|
|
1299
|
-
return "multi-project";
|
|
1300
|
-
}
|
|
1301
1906
|
function createEventRecorder(identity) {
|
|
1302
1907
|
const events = [];
|
|
1303
1908
|
const taskProjectMap = /* @__PURE__ */ new Map();
|
|
@@ -1520,6 +2125,7 @@ function createTaskReporterHooks(task, reporter, projectName, recordEvent, proje
|
|
|
1520
2125
|
reporter.onCaseEnd({
|
|
1521
2126
|
caseId,
|
|
1522
2127
|
errorMessage: payload.errorMessage,
|
|
2128
|
+
output: payload.output,
|
|
1523
2129
|
state: payload.state,
|
|
1524
2130
|
taskId: task.id
|
|
1525
2131
|
});
|
|
@@ -1536,6 +2142,7 @@ function createTaskReporterHooks(task, reporter, projectName, recordEvent, proje
|
|
|
1536
2142
|
reporter.onCaseStart({
|
|
1537
2143
|
autoRetry: payload.autoRetry,
|
|
1538
2144
|
caseId,
|
|
2145
|
+
input: payload.input,
|
|
1539
2146
|
caseName: payload.name,
|
|
1540
2147
|
retryIndex: payload.retryIndex,
|
|
1541
2148
|
taskId: task.id
|
|
@@ -1554,7 +2161,7 @@ function createTaskReporterHooks(task, reporter, projectName, recordEvent, proje
|
|
|
1554
2161
|
}
|
|
1555
2162
|
};
|
|
1556
2163
|
}
|
|
1557
|
-
function createCliTaskExecutionContext(task, models, cacheRootDirectory, cacheProjectName, workspaceId, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, runtimeConcurrency, vitestCompatReporter) {
|
|
2164
|
+
function createCliTaskExecutionContext(task, models, cacheRootDirectory, cacheProjectName, workspaceId, telemetry, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, runtimeConcurrency, vitestCompatReporter) {
|
|
1558
2165
|
return {
|
|
1559
2166
|
...createTaskExecutionContext({
|
|
1560
2167
|
cache: createFilesystemTaskCacheRuntime({
|
|
@@ -1566,7 +2173,8 @@ function createCliTaskExecutionContext(task, models, cacheRootDirectory, cachePr
|
|
|
1566
2173
|
task
|
|
1567
2174
|
}),
|
|
1568
2175
|
reporterHooks: createTaskReporterHooks(task, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter),
|
|
1569
|
-
runtimeConcurrency
|
|
2176
|
+
runtimeConcurrency,
|
|
2177
|
+
telemetry
|
|
1570
2178
|
};
|
|
1571
2179
|
}
|
|
1572
2180
|
function resolveTaskReporterHooks(task, context, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter) {
|
|
@@ -1582,9 +2190,10 @@ function createAutoTaskExecutor(reporter, projectName, recordEvent, projectCaseC
|
|
|
1582
2190
|
if (taskDefinition == null) throw new Error(`Missing eval task definition for entry "${task.entry.id}".`);
|
|
1583
2191
|
const output = await taskDefinition.run({
|
|
1584
2192
|
cache: context.cache,
|
|
1585
|
-
|
|
2193
|
+
models: context.models,
|
|
1586
2194
|
reporterHooks: resolveTaskReporterHooks(task, context, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter),
|
|
1587
|
-
task
|
|
2195
|
+
task,
|
|
2196
|
+
telemetry: context.telemetry
|
|
1588
2197
|
});
|
|
1589
2198
|
return {
|
|
1590
2199
|
entryId: task.entry.id,
|
|
@@ -1687,7 +2296,7 @@ async function prepareProject(project) {
|
|
|
1687
2296
|
};
|
|
1688
2297
|
}
|
|
1689
2298
|
}
|
|
1690
|
-
async function executePreparedProject(prepared, identity, cacheProjectName, reporter, counters, recordEvent, options) {
|
|
2299
|
+
async function executePreparedProject(prepared, identity, cacheProjectName, telemetry, reporter, counters, recordEvent, options) {
|
|
1691
2300
|
const settledTaskIds = /* @__PURE__ */ new Set();
|
|
1692
2301
|
const projectCaseCounters = {
|
|
1693
2302
|
failed: 0,
|
|
@@ -1705,7 +2314,13 @@ async function executePreparedProject(prepared, identity, cacheProjectName, repo
|
|
|
1705
2314
|
const taskExecutor = async (task, context) => {
|
|
1706
2315
|
const runtimeTask = createScheduledTaskWithRuntimeConcurrency(task, prepared.project, options);
|
|
1707
2316
|
return {
|
|
1708
|
-
...await
|
|
2317
|
+
...await telemetry.withSpan("vieval.task", {
|
|
2318
|
+
"vieval.project.name": prepared.name,
|
|
2319
|
+
"vieval.run.id": identity.runId,
|
|
2320
|
+
"vieval.task.entry.id": runtimeTask.entry.id,
|
|
2321
|
+
"vieval.task.id": runtimeTask.id,
|
|
2322
|
+
"vieval.task.name": runtimeTask.entry.name
|
|
2323
|
+
}, async () => await rawTaskExecutor(runtimeTask, context)),
|
|
1709
2324
|
matrix: cloneScheduledTaskMatrix(runtimeTask)
|
|
1710
2325
|
};
|
|
1711
2326
|
};
|
|
@@ -1714,7 +2329,7 @@ async function executePreparedProject(prepared, identity, cacheProjectName, repo
|
|
|
1714
2329
|
try {
|
|
1715
2330
|
const aggregated = await runScheduledTasks(prepared.tasks, taskExecutor, {
|
|
1716
2331
|
createExecutionContext(task) {
|
|
1717
|
-
return createCliTaskExecutionContext(task, prepared.project.models, resolve(prepared.project.root, ".vieval", "cache"), cacheProjectName ?? prepared.name, identity.workspaceId, reporter, prepared.name, recordEvent, projectCaseCounters, projectCaseFailures, resolveCliRuntimeConcurrency(options), vitestCompatReporter);
|
|
2332
|
+
return createCliTaskExecutionContext(task, prepared.project.models, resolve(prepared.project.root, ".vieval", "cache"), cacheProjectName ?? prepared.name, identity.workspaceId, telemetry, reporter, prepared.name, recordEvent, projectCaseCounters, projectCaseFailures, resolveCliRuntimeConcurrency(options), vitestCompatReporter);
|
|
1718
2333
|
},
|
|
1719
2334
|
onTaskEnd(task, state) {
|
|
1720
2335
|
settledTaskIds.add(task.id);
|
|
@@ -1807,14 +2422,6 @@ async function executePreparedProject(prepared, identity, cacheProjectName, repo
|
|
|
1807
2422
|
};
|
|
1808
2423
|
}
|
|
1809
2424
|
}
|
|
1810
|
-
async function writeRunReportArtifacts(output, events, identity, reportOut) {
|
|
1811
|
-
const projectId = deriveReportProjectId(output);
|
|
1812
|
-
const reportDirectory = resolve(reportOut, identity.workspaceId, projectId, identity.experimentId, identity.attemptId, identity.runId);
|
|
1813
|
-
await mkdir(reportDirectory, { recursive: true });
|
|
1814
|
-
await writeFile(resolve(reportDirectory, "run-summary.json"), `${JSON.stringify(output, null, 2)}\n`, "utf-8");
|
|
1815
|
-
await writeFile(resolve(reportDirectory, "events.jsonl"), events.map((event) => JSON.stringify(event)).join("\n").concat(events.length > 0 ? "\n" : ""), "utf-8");
|
|
1816
|
-
return reportDirectory;
|
|
1817
|
-
}
|
|
1818
2425
|
/**
|
|
1819
2426
|
* Runs vieval orchestration from config and returns project-level summaries.
|
|
1820
2427
|
*
|
|
@@ -1837,65 +2444,91 @@ async function runVievalCli(options = {}) {
|
|
|
1837
2444
|
configFilePath: options.configFilePath,
|
|
1838
2445
|
cwd: options.cwd
|
|
1839
2446
|
});
|
|
2447
|
+
const telemetry = loadedConfig.reporting?.openTelemetry?.enabled === true ? createOpenTelemetryRuntime() : createNoopTelemetryRuntime();
|
|
2448
|
+
const onOpenTelemetryRunEnd = loadedConfig.reporting?.openTelemetry?.enabled === true ? loadedConfig.reporting.openTelemetry.onRunEnd : void 0;
|
|
1840
2449
|
const restoreEnvironment = applyRunEnvironment(loadedConfig.env);
|
|
1841
2450
|
const eventRecorder = createEventRecorder(identity);
|
|
1842
2451
|
const reporter = createReporterWithEventCapture(createRunReporter(options.reporter), eventRecorder.record);
|
|
2452
|
+
let runError;
|
|
2453
|
+
let runEndError;
|
|
2454
|
+
let output;
|
|
1843
2455
|
try {
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
summary: preparedProject.summary
|
|
2456
|
+
output = await telemetry.withSpan("vieval.run", {
|
|
2457
|
+
"vieval.attempt.id": identity.attemptId,
|
|
2458
|
+
"vieval.experiment.id": identity.experimentId,
|
|
2459
|
+
"vieval.run.id": identity.runId,
|
|
2460
|
+
"vieval.workspace.id": identity.workspaceId
|
|
2461
|
+
}, async () => {
|
|
2462
|
+
const selectedProjects = filterProjectsByName(loadedConfig.projects, options.project ?? []);
|
|
2463
|
+
const workspaceScheduler = createSchedulerRuntime({ concurrency: { workspace: resolveWorkspaceConcurrency(loadedConfig, options) } });
|
|
2464
|
+
const preparedProjects = await Promise.all(selectedProjects.map(async (project) => prepareProject(project)));
|
|
2465
|
+
const executableProjects = preparedProjects.filter((project) => project.kind === "prepared").map((project) => project.prepared);
|
|
2466
|
+
const totalTasks = preparedProjects.reduce((sum, project) => {
|
|
2467
|
+
if (project.kind === "prepared") return sum + project.prepared.tasks.length;
|
|
2468
|
+
return sum + project.summary.taskCount;
|
|
2469
|
+
}, 0);
|
|
2470
|
+
const skippedSummaryTasks = preparedProjects.reduce((sum, project) => {
|
|
2471
|
+
if (project.kind === "summary") return sum + project.summary.taskCount;
|
|
2472
|
+
return sum;
|
|
2473
|
+
}, 0);
|
|
2474
|
+
const reporterCounters = {
|
|
2475
|
+
failedTasks: 0,
|
|
2476
|
+
passedTasks: 0,
|
|
2477
|
+
skippedTasks: 0
|
|
1867
2478
|
};
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
2479
|
+
reporter.onRunStart({ totalTasks });
|
|
2480
|
+
for (const project of executableProjects) for (const task of project.tasks) reporter.onTaskQueued(createTaskQueuePayload(task, project.name));
|
|
2481
|
+
const projectSummaries = (await Promise.all(preparedProjects.map(async (preparedProject, index) => {
|
|
2482
|
+
if (preparedProject.kind === "summary") return {
|
|
2483
|
+
index,
|
|
2484
|
+
summary: preparedProject.summary
|
|
2485
|
+
};
|
|
2486
|
+
return {
|
|
2487
|
+
index,
|
|
2488
|
+
summary: await telemetry.withSpan("vieval.project", {
|
|
2489
|
+
"vieval.project.name": preparedProject.prepared.name,
|
|
2490
|
+
"vieval.run.id": identity.runId
|
|
2491
|
+
}, async () => await workspaceScheduler.runCase({
|
|
2492
|
+
experimentId: identity.experimentId,
|
|
2493
|
+
projectName: preparedProject.prepared.name,
|
|
2494
|
+
scope: "workspace",
|
|
2495
|
+
workspaceId: identity.workspaceId
|
|
2496
|
+
}, async () => executePreparedProject(preparedProject.prepared, identity, options.cacheProjectName, telemetry, reporter, reporterCounters, eventRecorder.record, options)))
|
|
2497
|
+
};
|
|
2498
|
+
}))).sort((left, right) => left.index - right.index).map((item) => item.summary);
|
|
2499
|
+
reporter.onRunEnd({
|
|
2500
|
+
failedTasks: reporterCounters.failedTasks,
|
|
2501
|
+
passedTasks: reporterCounters.passedTasks,
|
|
2502
|
+
skippedTasks: reporterCounters.skippedTasks + skippedSummaryTasks,
|
|
2503
|
+
totalTasks
|
|
2504
|
+
});
|
|
2505
|
+
const output = {
|
|
2506
|
+
attemptId: identity.attemptId,
|
|
2507
|
+
configFilePath: loadedConfig.configFilePath,
|
|
2508
|
+
experimentId: identity.experimentId,
|
|
2509
|
+
projects: projectSummaries,
|
|
2510
|
+
reportDirectory: null,
|
|
2511
|
+
runId: identity.runId,
|
|
2512
|
+
workspaceId: identity.workspaceId
|
|
1876
2513
|
};
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
failedTasks: reporterCounters.failedTasks,
|
|
1880
|
-
passedTasks: reporterCounters.passedTasks,
|
|
1881
|
-
skippedTasks: reporterCounters.skippedTasks + skippedSummaryTasks,
|
|
1882
|
-
totalTasks
|
|
2514
|
+
if (options.reportOut != null) output.reportDirectory = await writeRunReportArtifacts(output, eventRecorder.events, identity, options.reportOut);
|
|
2515
|
+
return output;
|
|
1883
2516
|
});
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
configFilePath: loadedConfig.configFilePath,
|
|
1887
|
-
experimentId: identity.experimentId,
|
|
1888
|
-
projects: projectSummaries,
|
|
1889
|
-
reportDirectory: null,
|
|
1890
|
-
runId: identity.runId,
|
|
1891
|
-
workspaceId: identity.workspaceId
|
|
1892
|
-
};
|
|
1893
|
-
if (options.reportOut != null) output.reportDirectory = await writeRunReportArtifacts(output, eventRecorder.events, identity, options.reportOut);
|
|
1894
|
-
return output;
|
|
2517
|
+
} catch (error) {
|
|
2518
|
+
runError = error;
|
|
1895
2519
|
} finally {
|
|
2520
|
+
if (onOpenTelemetryRunEnd != null) try {
|
|
2521
|
+
await onOpenTelemetryRunEnd();
|
|
2522
|
+
} catch (error) {
|
|
2523
|
+
if (runError == null) runEndError = error;
|
|
2524
|
+
}
|
|
1896
2525
|
reporter.dispose();
|
|
1897
2526
|
restoreEnvironment();
|
|
1898
2527
|
}
|
|
2528
|
+
if (runError != null) throw runError;
|
|
2529
|
+
if (runEndError != null) throw runEndError;
|
|
2530
|
+
if (output == null) throw new Error("Vieval run finished without output.");
|
|
2531
|
+
return output;
|
|
1899
2532
|
}
|
|
1900
2533
|
/**
|
|
1901
2534
|
* Formats CLI run output as human-readable lines.
|
|
@@ -1964,8 +2597,7 @@ function formatVievalCliRunOutput(output) {
|
|
|
1964
2597
|
}
|
|
1965
2598
|
if (hasFailedCases) failedProjects += 1;
|
|
1966
2599
|
else passedProjects += 1;
|
|
1967
|
-
const
|
|
1968
|
-
const hybridAverageLabel = hybridAverage == null ? "n/a" : String(hybridAverage);
|
|
2600
|
+
const hybridAverageLabel = formatHybridAverage(project.result?.overall.hybridAverage);
|
|
1969
2601
|
const runCount = project.result?.overall.runCount ?? 0;
|
|
1970
2602
|
const countLabel = colors.dim(`(${project.taskCount} tasks)`);
|
|
1971
2603
|
const caseSummaryLabel = project.caseSummary == null ? "" : `, cases ${project.caseSummary.passed} passed | ${project.caseSummary.failed} failed | ${project.caseSummary.timeout} timeout`;
|
|
@@ -2008,14 +2640,14 @@ const compareHelpText = `
|
|
|
2008
2640
|
--output Optional output artifact path
|
|
2009
2641
|
--format Console output format: table | json (default: table)
|
|
2010
2642
|
`;
|
|
2011
|
-
function normalizeCliArgv$
|
|
2643
|
+
function normalizeCliArgv$6(argv) {
|
|
2012
2644
|
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
2013
2645
|
if (normalizedArgv[0] === "compare") return normalizedArgv.slice(1);
|
|
2014
2646
|
return normalizedArgv;
|
|
2015
2647
|
}
|
|
2016
2648
|
function parseCompareCliArguments(argv) {
|
|
2017
2649
|
const cli = meow(compareHelpText, {
|
|
2018
|
-
argv: normalizeCliArgv$
|
|
2650
|
+
argv: normalizeCliArgv$6(argv),
|
|
2019
2651
|
flags: {
|
|
2020
2652
|
config: { type: "string" },
|
|
2021
2653
|
comparison: { type: "string" },
|
|
@@ -2120,7 +2752,7 @@ const evalRunHelpText = `
|
|
|
2120
2752
|
--report-out Report output root directory
|
|
2121
2753
|
--json Print machine-readable JSON output
|
|
2122
2754
|
`;
|
|
2123
|
-
function normalizeCliArgv$
|
|
2755
|
+
function normalizeCliArgv$5(argv) {
|
|
2124
2756
|
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
2125
2757
|
return normalizedArgv[0] === "run" ? normalizedArgv.slice(1) : normalizedArgv;
|
|
2126
2758
|
}
|
|
@@ -2143,7 +2775,7 @@ function normalizeProjectNames(projectNames) {
|
|
|
2143
2775
|
*/
|
|
2144
2776
|
function parseCliArguments(argv) {
|
|
2145
2777
|
const cli = meow(evalRunHelpText, {
|
|
2146
|
-
argv: normalizeCliArgv$
|
|
2778
|
+
argv: normalizeCliArgv$5(argv),
|
|
2147
2779
|
importMeta: import.meta,
|
|
2148
2780
|
flags: {
|
|
2149
2781
|
config: { type: "string" },
|
|
@@ -2239,89 +2871,6 @@ async function runEvalRunCli(argv) {
|
|
|
2239
2871
|
}
|
|
2240
2872
|
}
|
|
2241
2873
|
//#endregion
|
|
2242
|
-
//#region src/cli/report-artifacts.ts
|
|
2243
|
-
/**
|
|
2244
|
-
* Resolves one or more `run-summary.json` paths from a report location.
|
|
2245
|
-
*
|
|
2246
|
-
* Use when:
|
|
2247
|
-
* - callers may pass a run directory, summary file path, or a report root
|
|
2248
|
-
*
|
|
2249
|
-
* Returns:
|
|
2250
|
-
* - sorted absolute summary file paths
|
|
2251
|
-
*/
|
|
2252
|
-
async function resolveRunSummaryPaths(reportPath) {
|
|
2253
|
-
const absoluteReportPath = resolve(reportPath);
|
|
2254
|
-
const directSummaryPath = resolve(absoluteReportPath, "run-summary.json");
|
|
2255
|
-
if (existsSync(absoluteReportPath) && absoluteReportPath.endsWith(".json")) return [absoluteReportPath];
|
|
2256
|
-
if (existsSync(directSummaryPath)) return [directSummaryPath];
|
|
2257
|
-
return (await glob("**/run-summary.json", {
|
|
2258
|
-
absolute: true,
|
|
2259
|
-
cwd: absoluteReportPath
|
|
2260
|
-
})).sort((left, right) => left.localeCompare(right));
|
|
2261
|
-
}
|
|
2262
|
-
/**
|
|
2263
|
-
* Reads one run report artifact set from `run-summary.json` and sibling `events.jsonl`.
|
|
2264
|
-
*
|
|
2265
|
-
* Use when:
|
|
2266
|
-
* - report analysis needs both run aggregate output and event count metadata
|
|
2267
|
-
*/
|
|
2268
|
-
function readReportRunArtifact(summaryFilePath) {
|
|
2269
|
-
const reportDirectory = resolve(summaryFilePath, "..");
|
|
2270
|
-
const summary = JSON.parse(readFileSync(summaryFilePath, "utf-8"));
|
|
2271
|
-
const eventsFilePath = resolve(reportDirectory, "events.jsonl");
|
|
2272
|
-
const events = existsSync(eventsFilePath) ? readFileSync(eventsFilePath, "utf-8").split("\n").filter((line) => line.trim().length > 0).map((line) => {
|
|
2273
|
-
const event = JSON.parse(line);
|
|
2274
|
-
return {
|
|
2275
|
-
caseId: event.caseId,
|
|
2276
|
-
data: event.data,
|
|
2277
|
-
event: event.event,
|
|
2278
|
-
taskId: event.taskId
|
|
2279
|
-
};
|
|
2280
|
-
}) : [];
|
|
2281
|
-
return {
|
|
2282
|
-
events,
|
|
2283
|
-
eventsCount: events.length,
|
|
2284
|
-
reportDirectory,
|
|
2285
|
-
summary,
|
|
2286
|
-
summaryFilePath
|
|
2287
|
-
};
|
|
2288
|
-
}
|
|
2289
|
-
/**
|
|
2290
|
-
* Reads all run artifacts found under `reportPath`.
|
|
2291
|
-
*
|
|
2292
|
-
* Use when:
|
|
2293
|
-
* - callers need multi-run analysis from a directory root
|
|
2294
|
-
*/
|
|
2295
|
-
async function readReportArtifacts(reportPath) {
|
|
2296
|
-
return (await resolveRunSummaryPaths(reportPath)).map((summaryFilePath) => readReportRunArtifact(summaryFilePath));
|
|
2297
|
-
}
|
|
2298
|
-
/**
|
|
2299
|
-
* Creates a compact summary row for one run artifact.
|
|
2300
|
-
*
|
|
2301
|
-
* Use when:
|
|
2302
|
-
* - table/csv/jsonl exports should stay stable and cheap to parse
|
|
2303
|
-
*/
|
|
2304
|
-
function summarizeReportRunArtifact(artifact) {
|
|
2305
|
-
const totalProjects = artifact.summary.projects.length;
|
|
2306
|
-
const failedProjects = artifact.summary.projects.filter((project) => project.errorMessage != null).length;
|
|
2307
|
-
const executedProjects = artifact.summary.projects.filter((project) => project.executed).length;
|
|
2308
|
-
const totalTasks = artifact.summary.projects.reduce((sum, project) => sum + project.taskCount, 0);
|
|
2309
|
-
const projectNames = artifact.summary.projects.map((project) => project.name);
|
|
2310
|
-
return {
|
|
2311
|
-
attemptId: artifact.summary.attemptId ?? null,
|
|
2312
|
-
eventsCount: artifact.eventsCount,
|
|
2313
|
-
executedProjects,
|
|
2314
|
-
experimentId: artifact.summary.experimentId ?? null,
|
|
2315
|
-
failedProjects,
|
|
2316
|
-
projectNames,
|
|
2317
|
-
reportDirectory: artifact.reportDirectory,
|
|
2318
|
-
runId: artifact.summary.runId ?? null,
|
|
2319
|
-
totalProjects,
|
|
2320
|
-
totalTasks,
|
|
2321
|
-
workspaceId: artifact.summary.workspaceId ?? null
|
|
2322
|
-
};
|
|
2323
|
-
}
|
|
2324
|
-
//#endregion
|
|
2325
2874
|
//#region src/cli/report-analyze.ts
|
|
2326
2875
|
const reportAnalyzeHelpText = `
|
|
2327
2876
|
Analyze generated vieval report artifacts.
|
|
@@ -2343,7 +2892,7 @@ const reportAnalyzeHelpText = `
|
|
|
2343
2892
|
--run-matrix Keep runs matching run-matrix selector "key=value[,key=value]"
|
|
2344
2893
|
--eval-matrix Keep runs matching eval-matrix selector "key=value[,key=value]"
|
|
2345
2894
|
`;
|
|
2346
|
-
function normalizeCliArgv$
|
|
2895
|
+
function normalizeCliArgv$4(argv) {
|
|
2347
2896
|
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
2348
2897
|
if (normalizedArgv[0] === "report" && normalizedArgv[1] === "analyze") return normalizedArgv.slice(2);
|
|
2349
2898
|
if (normalizedArgv[0] === "analyze") return normalizedArgv.slice(1);
|
|
@@ -2351,7 +2900,7 @@ function normalizeCliArgv$2(argv) {
|
|
|
2351
2900
|
}
|
|
2352
2901
|
function parseReportAnalyzeCliArguments(argv) {
|
|
2353
2902
|
const cli = meow(reportAnalyzeHelpText, {
|
|
2354
|
-
argv: normalizeCliArgv$
|
|
2903
|
+
argv: normalizeCliArgv$4(argv),
|
|
2355
2904
|
flags: {
|
|
2356
2905
|
attempt: { type: "string" },
|
|
2357
2906
|
caseState: { type: "string" },
|
|
@@ -2640,6 +3189,473 @@ async function runReportAnalyzeCli(argv) {
|
|
|
2640
3189
|
}
|
|
2641
3190
|
}
|
|
2642
3191
|
//#endregion
|
|
3192
|
+
//#region src/cli/report-cases.ts
|
|
3193
|
+
const reportCasesHelpText = `
|
|
3194
|
+
Inspect normalized case records from generated vieval report artifacts.
|
|
3195
|
+
|
|
3196
|
+
Usage
|
|
3197
|
+
$ vieval report cases <reportPath> [options]
|
|
3198
|
+
|
|
3199
|
+
Options
|
|
3200
|
+
--format Output format: table | json | jsonl (default: table)
|
|
3201
|
+
--where Equality filter "key=value"; repeatable
|
|
3202
|
+
--group-by Case field, score name, or metric name used for grouped score summaries
|
|
3203
|
+
`;
|
|
3204
|
+
/**
|
|
3205
|
+
* Reads normalized case records from one report run directory or report root.
|
|
3206
|
+
*
|
|
3207
|
+
* Use when:
|
|
3208
|
+
* - CLI tools need case-level inspection from local report artifacts
|
|
3209
|
+
* - callers may pass a run directory, a `cases.jsonl` file, or a report root
|
|
3210
|
+
*
|
|
3211
|
+
* Expects:
|
|
3212
|
+
* - discovered `cases.jsonl` files contain one `CaseRecord` JSON object per line
|
|
3213
|
+
*
|
|
3214
|
+
* Returns:
|
|
3215
|
+
* - all parsed case records sorted by discovered file path order
|
|
3216
|
+
*/
|
|
3217
|
+
async function readCaseRecordsFromReport(reportPath) {
|
|
3218
|
+
const caseFilePaths = await resolveCaseRecordPaths(reportPath);
|
|
3219
|
+
if (caseFilePaths.length === 0) throw new Error(`No cases.jsonl files found under "${resolve(reportPath)}".`);
|
|
3220
|
+
const records = [];
|
|
3221
|
+
for (const caseFilePath of caseFilePaths) {
|
|
3222
|
+
const lines = readFileSync(caseFilePath, "utf-8").split("\n");
|
|
3223
|
+
for (const [index, line] of lines.entries()) {
|
|
3224
|
+
const trimmed = line.trim();
|
|
3225
|
+
if (trimmed.length === 0) continue;
|
|
3226
|
+
try {
|
|
3227
|
+
records.push(JSON.parse(trimmed));
|
|
3228
|
+
} catch (error) {
|
|
3229
|
+
throw new Error(`Invalid cases.jsonl line ${index + 1} in "${caseFilePath}": ${errorMessageFrom(error) ?? "Unknown JSON parse failure."}`);
|
|
3230
|
+
}
|
|
3231
|
+
}
|
|
3232
|
+
}
|
|
3233
|
+
return records;
|
|
3234
|
+
}
|
|
3235
|
+
/**
|
|
3236
|
+
* Builds filtered case inspection output.
|
|
3237
|
+
*
|
|
3238
|
+
* Use when:
|
|
3239
|
+
* - `vieval report cases` needs deterministic JSON/table output
|
|
3240
|
+
* - tests need pure filtering and grouping behavior without process I/O
|
|
3241
|
+
*
|
|
3242
|
+
* Expects:
|
|
3243
|
+
* - `where` filters use `key=value`
|
|
3244
|
+
* - lookup keys may target direct case fields, score names, or metric names
|
|
3245
|
+
*
|
|
3246
|
+
* Returns:
|
|
3247
|
+
* - filtered records plus grouped score summaries when `groupBy` is present
|
|
3248
|
+
*/
|
|
3249
|
+
function buildReportCasesOutput(records, options) {
|
|
3250
|
+
const whereFilters = (options.where ?? []).map(parseSelector);
|
|
3251
|
+
const filteredRecords = records.filter((record) => matchesWhereFilters(record, whereFilters));
|
|
3252
|
+
return {
|
|
3253
|
+
groups: options.groupBy == null ? void 0 : buildCaseGroups(filteredRecords, options.groupBy),
|
|
3254
|
+
records: [...filteredRecords]
|
|
3255
|
+
};
|
|
3256
|
+
}
|
|
3257
|
+
/**
|
|
3258
|
+
* Runs the `vieval report cases` command.
|
|
3259
|
+
*
|
|
3260
|
+
* Call stack:
|
|
3261
|
+
*
|
|
3262
|
+
* published executable (`../bin/vieval`)
|
|
3263
|
+
* -> {@link import('./index').runTopLevelCli}
|
|
3264
|
+
* -> {@link runReportCasesCli}
|
|
3265
|
+
* -> {@link readCaseRecordsFromReport}
|
|
3266
|
+
*
|
|
3267
|
+
* Use when:
|
|
3268
|
+
* - the top-level CLI dispatches local case artifact inspection
|
|
3269
|
+
*
|
|
3270
|
+
* Expects:
|
|
3271
|
+
* - argv is either `cases <reportPath> ...` or `<reportPath> ...`
|
|
3272
|
+
*
|
|
3273
|
+
* Returns:
|
|
3274
|
+
* - resolves after writing the requested output to stdout
|
|
3275
|
+
*/
|
|
3276
|
+
async function runReportCasesCli(argv) {
|
|
3277
|
+
try {
|
|
3278
|
+
const parsed = parseReportCasesCliArguments(argv);
|
|
3279
|
+
const output = buildReportCasesOutput(await readCaseRecordsFromReport(parsed.reportPath), parsed);
|
|
3280
|
+
if (parsed.format === "json") {
|
|
3281
|
+
process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
|
|
3282
|
+
return;
|
|
3283
|
+
}
|
|
3284
|
+
if (parsed.format === "jsonl") {
|
|
3285
|
+
process.stdout.write(encodeJsonl(output.records));
|
|
3286
|
+
return;
|
|
3287
|
+
}
|
|
3288
|
+
process.stdout.write(`${formatCasesTable(output)}\n`);
|
|
3289
|
+
} catch (error) {
|
|
3290
|
+
const errorMessage = errorMessageFrom(error) ?? "Unknown report cases failure.";
|
|
3291
|
+
process.stderr.write(`[vieval report cases] ${errorMessage}\n`);
|
|
3292
|
+
process.exitCode = 1;
|
|
3293
|
+
}
|
|
3294
|
+
}
|
|
3295
|
+
function normalizeCliArgv$3(argv) {
|
|
3296
|
+
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
3297
|
+
if (normalizedArgv[0] === "report" && normalizedArgv[1] === "cases") return normalizedArgv.slice(2);
|
|
3298
|
+
if (normalizedArgv[0] === "cases") return normalizedArgv.slice(1);
|
|
3299
|
+
return normalizedArgv;
|
|
3300
|
+
}
|
|
3301
|
+
function parseReportCasesCliArguments(argv) {
|
|
3302
|
+
const cli = meow(reportCasesHelpText, {
|
|
3303
|
+
argv: normalizeCliArgv$3(argv),
|
|
3304
|
+
flags: {
|
|
3305
|
+
format: {
|
|
3306
|
+
default: "table",
|
|
3307
|
+
type: "string"
|
|
3308
|
+
},
|
|
3309
|
+
groupBy: { type: "string" },
|
|
3310
|
+
where: {
|
|
3311
|
+
isMultiple: true,
|
|
3312
|
+
type: "string"
|
|
3313
|
+
}
|
|
3314
|
+
},
|
|
3315
|
+
importMeta: import.meta
|
|
3316
|
+
});
|
|
3317
|
+
const reportPath = cli.input[0];
|
|
3318
|
+
if (reportPath == null || reportPath.length === 0) throw new Error("Missing required <reportPath> argument.");
|
|
3319
|
+
return {
|
|
3320
|
+
format: normalizeReportCasesFormat(cli.flags.format),
|
|
3321
|
+
groupBy: cli.flags.groupBy,
|
|
3322
|
+
reportPath,
|
|
3323
|
+
where: cli.flags.where
|
|
3324
|
+
};
|
|
3325
|
+
}
|
|
3326
|
+
function normalizeReportCasesFormat(value) {
|
|
3327
|
+
const normalized = value.toLowerCase();
|
|
3328
|
+
if (normalized === "json") return "json";
|
|
3329
|
+
if (normalized === "jsonl") return "jsonl";
|
|
3330
|
+
return "table";
|
|
3331
|
+
}
|
|
3332
|
+
async function resolveCaseRecordPaths(reportPath) {
|
|
3333
|
+
const absoluteReportPath = resolve(reportPath);
|
|
3334
|
+
const directCaseFilePath = resolve(absoluteReportPath, "cases.jsonl");
|
|
3335
|
+
if (existsSync(absoluteReportPath) && absoluteReportPath.endsWith(".jsonl")) return [absoluteReportPath];
|
|
3336
|
+
if (existsSync(directCaseFilePath)) return [directCaseFilePath];
|
|
3337
|
+
return (await glob("**/cases.jsonl", {
|
|
3338
|
+
absolute: true,
|
|
3339
|
+
cwd: absoluteReportPath
|
|
3340
|
+
})).sort((left, right) => left.localeCompare(right));
|
|
3341
|
+
}
|
|
3342
|
+
function matchesWhereFilters(record, whereFilters) {
|
|
3343
|
+
return whereFilters.every((parsed) => {
|
|
3344
|
+
const resolved = getCaseSelectorValue(record, parsed.key);
|
|
3345
|
+
return resolved.exists && String(resolved.value) === parsed.value;
|
|
3346
|
+
});
|
|
3347
|
+
}
|
|
3348
|
+
function parseSelector(selector) {
|
|
3349
|
+
const separatorIndex = selector.indexOf("=");
|
|
3350
|
+
if (separatorIndex <= 0 || separatorIndex === selector.length - 1) throw new Error(`Invalid selector "${selector}". Expected "key=value".`);
|
|
3351
|
+
return {
|
|
3352
|
+
key: selector.slice(0, separatorIndex).trim(),
|
|
3353
|
+
value: selector.slice(separatorIndex + 1).trim()
|
|
3354
|
+
};
|
|
3355
|
+
}
|
|
3356
|
+
function buildCaseGroups(records, groupBy) {
|
|
3357
|
+
const groups = {};
|
|
3358
|
+
for (const record of records) {
|
|
3359
|
+
const resolved = getCaseSelectorValue(record, groupBy);
|
|
3360
|
+
if (!resolved.exists) continue;
|
|
3361
|
+
const groupKey = `${groupBy}=${String(resolved.value)}`;
|
|
3362
|
+
groups[groupKey] ??= {
|
|
3363
|
+
count: 0,
|
|
3364
|
+
scores: {}
|
|
3365
|
+
};
|
|
3366
|
+
groups[groupKey].count += 1;
|
|
3367
|
+
addScores(groups[groupKey].scores, record.scores);
|
|
3368
|
+
}
|
|
3369
|
+
return Object.fromEntries(Object.entries(groups).sort(([left], [right]) => left.localeCompare(right)).map(([groupKey, group]) => [groupKey, {
|
|
3370
|
+
count: group.count,
|
|
3371
|
+
scores: finalizeScores(group.scores)
|
|
3372
|
+
}]));
|
|
3373
|
+
}
|
|
3374
|
+
function addScores(summary, scores) {
|
|
3375
|
+
for (const [scoreName, value] of Object.entries(scores)) {
|
|
3376
|
+
summary[scoreName] ??= {
|
|
3377
|
+
average: 0,
|
|
3378
|
+
count: 0,
|
|
3379
|
+
sum: 0
|
|
3380
|
+
};
|
|
3381
|
+
summary[scoreName].count += 1;
|
|
3382
|
+
summary[scoreName].sum += value;
|
|
3383
|
+
}
|
|
3384
|
+
}
|
|
3385
|
+
function finalizeScores(summary) {
|
|
3386
|
+
return Object.fromEntries(Object.entries(summary).sort(([left], [right]) => left.localeCompare(right)).map(([scoreName, bucket]) => [scoreName, {
|
|
3387
|
+
average: bucket.count === 0 ? 0 : bucket.sum / bucket.count,
|
|
3388
|
+
count: bucket.count,
|
|
3389
|
+
sum: bucket.sum
|
|
3390
|
+
}]));
|
|
3391
|
+
}
|
|
3392
|
+
function formatCasesTable(output) {
|
|
3393
|
+
const lines = ["CASES vieval report", `Case count ${output.records.length}`];
|
|
3394
|
+
if (output.groups != null) {
|
|
3395
|
+
lines.push("Groups");
|
|
3396
|
+
for (const [groupKey, group] of Object.entries(output.groups)) {
|
|
3397
|
+
const scoreText = Object.entries(group.scores).map(([scoreName, bucket]) => `${scoreName}=${bucket.average.toFixed(3)}`).join(" ");
|
|
3398
|
+
lines.push(`${groupKey} count=${group.count}${scoreText.length > 0 ? ` ${scoreText}` : ""}`);
|
|
3399
|
+
}
|
|
3400
|
+
}
|
|
3401
|
+
return lines.join("\n");
|
|
3402
|
+
}
|
|
3403
|
+
//#endregion
|
|
3404
|
+
//#region src/cli/report-case-compare.ts
|
|
3405
|
+
const reportCompareHelpText = `
|
|
3406
|
+
Compare normalized case records from two generated vieval reports.
|
|
3407
|
+
|
|
3408
|
+
Usage
|
|
3409
|
+
$ vieval report compare <leftReportPath> <rightReportPath> [options]
|
|
3410
|
+
|
|
3411
|
+
Options
|
|
3412
|
+
--format Output format: table | json (default: table)
|
|
3413
|
+
--case-key Case field, score name, or metric name used to match records
|
|
3414
|
+
--score-kind Score kind used for deltas (default: exact)
|
|
3415
|
+
--group-by Case field, score name, or metric name used for grouped deltas
|
|
3416
|
+
`;
|
|
3417
|
+
/**
|
|
3418
|
+
* Builds a generic case-level comparison between two report runs.
|
|
3419
|
+
*
|
|
3420
|
+
* Use when:
|
|
3421
|
+
* - local report analysis needs per-case improvements/regressions
|
|
3422
|
+
* - benchmark-specific facets should stay as generic metric keys
|
|
3423
|
+
*
|
|
3424
|
+
* Expects:
|
|
3425
|
+
* - left and right records are normalized `cases.jsonl` rows
|
|
3426
|
+
* - score values are numeric and comparable by `scoreKind`
|
|
3427
|
+
*
|
|
3428
|
+
* Returns:
|
|
3429
|
+
* - matched case deltas, added/removed cases, top changes, and optional group summaries
|
|
3430
|
+
*/
|
|
3431
|
+
function buildCaseComparison(args) {
|
|
3432
|
+
const scoreKind = args.scoreKind ?? "exact";
|
|
3433
|
+
const leftByKey = indexRecordsByCaseKey(args.left, args.caseKey, "left");
|
|
3434
|
+
const rightByKey = indexRecordsByCaseKey(args.right, args.caseKey, "right");
|
|
3435
|
+
const cases = [];
|
|
3436
|
+
const added = [];
|
|
3437
|
+
const removed = [];
|
|
3438
|
+
for (const [caseKey, leftRecord] of leftByKey) {
|
|
3439
|
+
const rightRecord = rightByKey.get(caseKey);
|
|
3440
|
+
if (rightRecord == null) {
|
|
3441
|
+
removed.push(leftRecord);
|
|
3442
|
+
continue;
|
|
3443
|
+
}
|
|
3444
|
+
const leftScore = getScore(leftRecord, scoreKind);
|
|
3445
|
+
const rightScore = getScore(rightRecord, scoreKind);
|
|
3446
|
+
cases.push({
|
|
3447
|
+
caseKey,
|
|
3448
|
+
delta: {
|
|
3449
|
+
left: leftScore,
|
|
3450
|
+
right: rightScore,
|
|
3451
|
+
score: rightScore - leftScore
|
|
3452
|
+
},
|
|
3453
|
+
left: leftRecord,
|
|
3454
|
+
metricsChanged: diffMetrics(leftRecord.metrics, rightRecord.metrics),
|
|
3455
|
+
right: rightRecord
|
|
3456
|
+
});
|
|
3457
|
+
}
|
|
3458
|
+
for (const [caseKey, rightRecord] of rightByKey) if (!leftByKey.has(caseKey)) added.push(rightRecord);
|
|
3459
|
+
const sortedCases = [...cases].sort((left, right) => {
|
|
3460
|
+
const deltaOrder = right.delta.score - left.delta.score;
|
|
3461
|
+
return deltaOrder === 0 ? left.caseKey.localeCompare(right.caseKey) : deltaOrder;
|
|
3462
|
+
});
|
|
3463
|
+
return {
|
|
3464
|
+
added: added.sort(compareCaseRecords),
|
|
3465
|
+
cases: cases.sort((left, right) => left.caseKey.localeCompare(right.caseKey)),
|
|
3466
|
+
groups: args.groupBy == null ? void 0 : buildComparisonGroups(cases, args.groupBy),
|
|
3467
|
+
overall: {
|
|
3468
|
+
delta: averageScore(args.right, scoreKind) - averageScore(args.left, scoreKind),
|
|
3469
|
+
leftAverage: averageScore(args.left, scoreKind),
|
|
3470
|
+
rightAverage: averageScore(args.right, scoreKind)
|
|
3471
|
+
},
|
|
3472
|
+
removed: removed.sort(compareCaseRecords),
|
|
3473
|
+
topImprovements: sortedCases.filter((row) => row.delta.score > 0).slice(0, 10),
|
|
3474
|
+
topRegressions: [...sortedCases].reverse().filter((row) => row.delta.score < 0).slice(0, 10)
|
|
3475
|
+
};
|
|
3476
|
+
}
|
|
3477
|
+
/**
|
|
3478
|
+
* Runs the `vieval report compare` command.
|
|
3479
|
+
*
|
|
3480
|
+
* Call stack:
|
|
3481
|
+
*
|
|
3482
|
+
* published executable (`../bin/vieval`)
|
|
3483
|
+
* -> {@link import('./index').runTopLevelCli}
|
|
3484
|
+
* -> {@link runReportCompareCli}
|
|
3485
|
+
* -> {@link readCaseRecordsFromReport}
|
|
3486
|
+
* -> {@link buildCaseComparison}
|
|
3487
|
+
*
|
|
3488
|
+
* Use when:
|
|
3489
|
+
* - two local report artifact directories should be compared case-by-case
|
|
3490
|
+
*
|
|
3491
|
+
* Expects:
|
|
3492
|
+
* - argv is either `compare <left> <right> ...` or `<left> <right> ...`
|
|
3493
|
+
*
|
|
3494
|
+
* Returns:
|
|
3495
|
+
* - resolves after writing the requested output to stdout
|
|
3496
|
+
*/
|
|
3497
|
+
async function runReportCompareCli(argv) {
|
|
3498
|
+
try {
|
|
3499
|
+
const parsed = parseReportCompareCliArguments(argv);
|
|
3500
|
+
const [left, right] = await Promise.all([readCaseRecordsFromReport(parsed.leftReportPath), readCaseRecordsFromReport(parsed.rightReportPath)]);
|
|
3501
|
+
const output = buildCaseComparison({
|
|
3502
|
+
caseKey: parsed.caseKey,
|
|
3503
|
+
groupBy: parsed.groupBy,
|
|
3504
|
+
left,
|
|
3505
|
+
right,
|
|
3506
|
+
scoreKind: parsed.scoreKind
|
|
3507
|
+
});
|
|
3508
|
+
if (parsed.format === "json") {
|
|
3509
|
+
process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
|
|
3510
|
+
return;
|
|
3511
|
+
}
|
|
3512
|
+
process.stdout.write(`${formatCaseComparisonTable(output)}\n`);
|
|
3513
|
+
} catch (error) {
|
|
3514
|
+
const errorMessage = errorMessageFrom(error) ?? "Unknown report compare failure.";
|
|
3515
|
+
process.stderr.write(`[vieval report compare] ${errorMessage}\n`);
|
|
3516
|
+
process.exitCode = 1;
|
|
3517
|
+
}
|
|
3518
|
+
}
|
|
3519
|
+
function normalizeCliArgv$2(argv) {
|
|
3520
|
+
const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
|
|
3521
|
+
if (normalizedArgv[0] === "report" && normalizedArgv[1] === "compare") return normalizedArgv.slice(2);
|
|
3522
|
+
if (normalizedArgv[0] === "compare") return normalizedArgv.slice(1);
|
|
3523
|
+
return normalizedArgv;
|
|
3524
|
+
}
|
|
3525
|
+
function parseReportCompareCliArguments(argv) {
|
|
3526
|
+
const cli = meow(reportCompareHelpText, {
|
|
3527
|
+
argv: normalizeCliArgv$2(argv),
|
|
3528
|
+
flags: {
|
|
3529
|
+
caseKey: { type: "string" },
|
|
3530
|
+
format: {
|
|
3531
|
+
default: "table",
|
|
3532
|
+
type: "string"
|
|
3533
|
+
},
|
|
3534
|
+
groupBy: { type: "string" },
|
|
3535
|
+
scoreKind: {
|
|
3536
|
+
default: "exact",
|
|
3537
|
+
type: "string"
|
|
3538
|
+
}
|
|
3539
|
+
},
|
|
3540
|
+
importMeta: import.meta
|
|
3541
|
+
});
|
|
3542
|
+
const leftReportPath = cli.input[0];
|
|
3543
|
+
const rightReportPath = cli.input[1];
|
|
3544
|
+
if (leftReportPath == null || leftReportPath.length === 0 || rightReportPath == null || rightReportPath.length === 0) throw new Error("Missing required <leftReportPath> and <rightReportPath> arguments.");
|
|
3545
|
+
return {
|
|
3546
|
+
caseKey: cli.flags.caseKey,
|
|
3547
|
+
format: cli.flags.format === "json" ? "json" : "table",
|
|
3548
|
+
groupBy: cli.flags.groupBy,
|
|
3549
|
+
leftReportPath,
|
|
3550
|
+
rightReportPath,
|
|
3551
|
+
scoreKind: cli.flags.scoreKind
|
|
3552
|
+
};
|
|
3553
|
+
}
|
|
3554
|
+
function indexRecordsByCaseKey(records, caseKey, side) {
|
|
3555
|
+
const indexed = /* @__PURE__ */ new Map();
|
|
3556
|
+
for (const record of records) {
|
|
3557
|
+
const resolved = resolveCaseKey(record, caseKey);
|
|
3558
|
+
if (indexed.has(resolved)) throw new Error(`Duplicate case key "${resolved}" in ${side} report.`);
|
|
3559
|
+
indexed.set(resolved, record);
|
|
3560
|
+
}
|
|
3561
|
+
return indexed;
|
|
3562
|
+
}
|
|
3563
|
+
function resolveCaseKey(record, caseKey) {
|
|
3564
|
+
if (caseKey != null) {
|
|
3565
|
+
const resolved = getCaseSelectorValue(record, caseKey);
|
|
3566
|
+
if (resolved.exists) return String(resolved.value);
|
|
3567
|
+
throw new Error(`Missing explicit case key "${caseKey}" for case "${record.caseId}".`);
|
|
3568
|
+
}
|
|
3569
|
+
const benchmarkCaseId = getCaseSelectorValue(record, "benchmark.case.id");
|
|
3570
|
+
if (benchmarkCaseId.exists) return String(benchmarkCaseId.value);
|
|
3571
|
+
const vievalCaseId = getCaseSelectorValue(record, "vieval.case.id");
|
|
3572
|
+
return vievalCaseId.exists ? String(vievalCaseId.value) : record.caseId;
|
|
3573
|
+
}
|
|
3574
|
+
function getScore(record, scoreKind) {
|
|
3575
|
+
return record.scores[scoreKind] ?? 0;
|
|
3576
|
+
}
|
|
3577
|
+
function averageScore(records, scoreKind) {
|
|
3578
|
+
const values = records.map((record) => record.scores[scoreKind]).filter((value) => typeof value === "number");
|
|
3579
|
+
if (values.length === 0) return 0;
|
|
3580
|
+
return values.reduce((sum, value) => sum + value, 0) / values.length;
|
|
3581
|
+
}
|
|
3582
|
+
function diffMetrics(left, right) {
|
|
3583
|
+
const changed = {};
|
|
3584
|
+
const metricKeys = [...new Set([...Object.keys(left), ...Object.keys(right)])].sort((leftKey, rightKey) => leftKey.localeCompare(rightKey));
|
|
3585
|
+
for (const metricKey of metricKeys) if (stableStringify(left[metricKey]) !== stableStringify(right[metricKey])) changed[metricKey] = {
|
|
3586
|
+
left: left[metricKey],
|
|
3587
|
+
right: right[metricKey]
|
|
3588
|
+
};
|
|
3589
|
+
return changed;
|
|
3590
|
+
}
|
|
3591
|
+
function buildComparisonGroups(cases, groupBy) {
|
|
3592
|
+
const groupedRows = {};
|
|
3593
|
+
for (const row of cases) {
|
|
3594
|
+
const resolved = getCaseSelectorValue(row.right, groupBy);
|
|
3595
|
+
if (!resolved.exists) continue;
|
|
3596
|
+
const groupKey = `${groupBy}=${String(resolved.value)}`;
|
|
3597
|
+
groupedRows[groupKey] ??= [];
|
|
3598
|
+
groupedRows[groupKey].push(row);
|
|
3599
|
+
}
|
|
3600
|
+
return Object.fromEntries(Object.entries(groupedRows).sort(([left], [right]) => left.localeCompare(right)).map(([groupKey, rows]) => {
|
|
3601
|
+
const leftAverage = rows.reduce((sum, row) => sum + row.delta.left, 0) / rows.length;
|
|
3602
|
+
const rightAverage = rows.reduce((sum, row) => sum + row.delta.right, 0) / rows.length;
|
|
3603
|
+
return [groupKey, {
|
|
3604
|
+
count: rows.length,
|
|
3605
|
+
delta: rightAverage - leftAverage,
|
|
3606
|
+
leftAverage,
|
|
3607
|
+
rightAverage
|
|
3608
|
+
}];
|
|
3609
|
+
}));
|
|
3610
|
+
}
|
|
3611
|
+
function compareCaseRecords(left, right) {
|
|
3612
|
+
return left.caseId.localeCompare(right.caseId);
|
|
3613
|
+
}
|
|
3614
|
+
/**
|
|
3615
|
+
* Formats a case comparison as a compact human-readable table.
|
|
3616
|
+
*
|
|
3617
|
+
* Use when:
|
|
3618
|
+
* - `vieval report compare` should expose the same information as JSON output
|
|
3619
|
+
* - users need a terminal-first overview of group and per-case deltas
|
|
3620
|
+
*
|
|
3621
|
+
* Expects:
|
|
3622
|
+
* - comparison output was produced by {@link buildCaseComparison}
|
|
3623
|
+
*
|
|
3624
|
+
* Returns:
|
|
3625
|
+
* - multi-line text containing aggregate, group, top-change, case, and unmatched summaries
|
|
3626
|
+
*/
|
|
3627
|
+
function formatCaseComparisonTable(output) {
|
|
3628
|
+
const lines = [
|
|
3629
|
+
"COMPARE vieval report cases",
|
|
3630
|
+
`Matched ${output.cases.length}`,
|
|
3631
|
+
`Added ${output.added.length}`,
|
|
3632
|
+
`Removed ${output.removed.length}`,
|
|
3633
|
+
`Scores left=${output.overall.leftAverage.toFixed(3)} right=${output.overall.rightAverage.toFixed(3)} delta=${output.overall.delta.toFixed(3)}`
|
|
3634
|
+
];
|
|
3635
|
+
if (output.groups != null && Object.keys(output.groups).length > 0) {
|
|
3636
|
+
lines.push("Groups");
|
|
3637
|
+
for (const [groupKey, group] of Object.entries(output.groups)) lines.push(`${groupKey} count=${group.count} left=${group.leftAverage.toFixed(3)} right=${group.rightAverage.toFixed(3)} delta=${group.delta.toFixed(3)}`);
|
|
3638
|
+
}
|
|
3639
|
+
if (output.topImprovements.length > 0) {
|
|
3640
|
+
lines.push("Top improvements");
|
|
3641
|
+
for (const row of output.topImprovements) lines.push(`${row.caseKey} delta=${row.delta.score.toFixed(3)} left=${row.delta.left.toFixed(3)} right=${row.delta.right.toFixed(3)}`);
|
|
3642
|
+
}
|
|
3643
|
+
if (output.topRegressions.length > 0) {
|
|
3644
|
+
lines.push("Top regressions");
|
|
3645
|
+
for (const row of output.topRegressions) lines.push(`${row.caseKey} delta=${row.delta.score.toFixed(3)} left=${row.delta.left.toFixed(3)} right=${row.delta.right.toFixed(3)}`);
|
|
3646
|
+
}
|
|
3647
|
+
if (output.cases.length > 0) {
|
|
3648
|
+
lines.push("Cases");
|
|
3649
|
+
for (const row of output.cases) {
|
|
3650
|
+
const changedMetricNames = Object.keys(row.metricsChanged);
|
|
3651
|
+
lines.push(`${row.caseKey} delta=${row.delta.score.toFixed(3)} changedMetrics=${changedMetricNames.length === 0 ? "none" : changedMetricNames.join(",")}`);
|
|
3652
|
+
}
|
|
3653
|
+
}
|
|
3654
|
+
if (output.added.length > 0) lines.push(`Added cases ${output.added.map((record) => record.caseId).join(",")}`);
|
|
3655
|
+
if (output.removed.length > 0) lines.push(`Removed cases ${output.removed.map((record) => record.caseId).join(",")}`);
|
|
3656
|
+
return lines.join("\n");
|
|
3657
|
+
}
|
|
3658
|
+
//#endregion
|
|
2643
3659
|
//#region src/cli/report-index.ts
|
|
2644
3660
|
const reportIndexHelpText = `
|
|
2645
3661
|
Build report indexes from generated vieval artifacts.
|
|
@@ -2807,7 +3823,15 @@ async function runTopLevelCli(argv) {
|
|
|
2807
3823
|
await runReportIndexCli(parsed.commandArgv);
|
|
2808
3824
|
return;
|
|
2809
3825
|
}
|
|
2810
|
-
|
|
3826
|
+
if (reportSubcommand === "cases") {
|
|
3827
|
+
await runReportCasesCli(parsed.commandArgv);
|
|
3828
|
+
return;
|
|
3829
|
+
}
|
|
3830
|
+
if (reportSubcommand === "compare") {
|
|
3831
|
+
await runReportCompareCli(parsed.commandArgv);
|
|
3832
|
+
return;
|
|
3833
|
+
}
|
|
3834
|
+
throw new Error(`Unsupported vieval report command "${reportSubcommand ?? "(none)"}". Expected "analyze", "index", "cases", or "compare".`);
|
|
2811
3835
|
}
|
|
2812
3836
|
if (parsed.command === "compare") {
|
|
2813
3837
|
await runCompareCliOrExit(parsed.commandArgv);
|
|
@@ -2818,4 +3842,4 @@ async function runTopLevelCli(argv) {
|
|
|
2818
3842
|
//#endregion
|
|
2819
3843
|
export { runTopLevelCli as n, parseTopLevelCliArguments as t };
|
|
2820
3844
|
|
|
2821
|
-
//# sourceMappingURL=cli-
|
|
3845
|
+
//# sourceMappingURL=cli-Dao25VxV.mjs.map
|