vieval 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +27 -2
  2. package/dist/bin/vieval.mjs +1 -1
  3. package/dist/bin/vieval.mjs.map +1 -1
  4. package/dist/cli/index.mjs +1 -1
  5. package/dist/{cli-Dao25VxV.mjs → cli-CHFCF8UR.mjs} +670 -600
  6. package/dist/cli-CHFCF8UR.mjs.map +1 -0
  7. package/dist/config.d.mts +1 -1
  8. package/dist/config.mjs +1 -1
  9. package/dist/config.mjs.map +1 -1
  10. package/dist/core/assertions/index.d.mts +1 -1
  11. package/dist/core/assertions/index.mjs.map +1 -1
  12. package/dist/core/inference-executors/index.d.mts +1 -1
  13. package/dist/core/inference-executors/index.mjs +3 -3
  14. package/dist/core/inference-executors/index.mjs.map +1 -1
  15. package/dist/core/processors/results/index.d.mts +1 -1
  16. package/dist/core/processors/results/index.mjs.map +1 -1
  17. package/dist/core/runner/index.d.mts +2 -2
  18. package/dist/core/runner/index.mjs +4 -4
  19. package/dist/core/runner/index.mjs.map +1 -1
  20. package/dist/core/scheduler/index.d.mts +1 -1
  21. package/dist/core/scheduler/index.mjs +3 -3
  22. package/dist/core/scheduler/index.mjs.map +1 -1
  23. package/dist/{env-nV5rVErX.mjs → env-BVYeJhGA.mjs} +1 -1
  24. package/dist/{env-nV5rVErX.mjs.map → env-BVYeJhGA.mjs.map} +1 -1
  25. package/dist/{env-DfWZy_n4.d.mts → env-bRH0K6fU.d.mts} +1 -1
  26. package/dist/{expect-extensions-DCSqlneN.mjs → expect-extensions-Mf1sMNBv.mjs} +1 -1
  27. package/dist/{expect-extensions-DCSqlneN.mjs.map → expect-extensions-Mf1sMNBv.mjs.map} +1 -1
  28. package/dist/expect.d.mts +1 -3
  29. package/dist/expect.mjs +1 -1
  30. package/dist/expect.mjs.map +1 -1
  31. package/dist/{index-fakXoZEe.d.mts → index-Be5I1ZJL.d.mts} +4 -3
  32. package/dist/{index-BkjyCInx.d.mts → index-CwKBlCG9.d.mts} +2 -2
  33. package/dist/index.d.mts +3 -4
  34. package/dist/index.mjs +22 -27
  35. package/dist/index.mjs.map +1 -1
  36. package/dist/{models-pBSRUZhY.mjs → models-CaCOUPZw.mjs} +1 -1
  37. package/dist/{models-pBSRUZhY.mjs.map → models-CaCOUPZw.mjs.map} +1 -1
  38. package/dist/plugins/chat-models/index.d.mts +1 -1
  39. package/dist/plugins/chat-models/index.mjs +2 -2
  40. package/dist/plugins/chat-models/index.mjs.map +1 -1
  41. package/dist/{queue-DsZQkZO_.mjs → queue-BL86z2W_.mjs} +1 -1
  42. package/dist/{queue-DsZQkZO_.mjs.map → queue-BL86z2W_.mjs.map} +1 -1
  43. package/dist/{registry-BHGMxjpA.mjs → registry-BSyjwZFx.mjs} +55 -11
  44. package/dist/registry-BSyjwZFx.mjs.map +1 -0
  45. package/dist/testing/expect-extensions.mjs +1 -1
  46. package/package.json +10 -10
  47. package/dist/cli-Dao25VxV.mjs.map +0 -1
  48. package/dist/registry-BHGMxjpA.mjs.map +0 -1
@@ -1,16 +1,17 @@
1
- import { a as createOpenTelemetryRuntime, c as detectCliConfigMode, d as loadVievalCliConfig, n as consumeModuleRegistrations, o as createNoopTelemetryRuntime, r as endModuleRegistration, t as beginModuleRegistration, u as loadRawVievalConfig } from "./registry-BHGMxjpA.mjs";
1
+ import { a as createOpenTelemetryRuntime, c as detectCliConfigMode, d as loadVievalCliConfig, n as consumeModuleRegistrations, o as createNoopTelemetryRuntime, r as endModuleRegistration, t as beginModuleRegistration, u as loadRawVievalConfig } from "./registry-BSyjwZFx.mjs";
2
2
  import { createSchedulerRuntime } from "./core/scheduler/index.mjs";
3
3
  import { RunnerExecutionError, collectEvalEntries, createFilesystemTaskCacheRuntime, createRunnerRuntimeContext, createRunnerSchedule, createTaskExecutionContext, runScheduledTasks } from "./core/runner/index.mjs";
4
4
  import process from "node:process";
5
5
  import { errorMessageFrom } from "@moeru/std";
6
6
  import meow from "meow";
7
+ import { access, mkdir, mkdtemp, writeFile } from "node:fs/promises";
8
+ import { tmpdir } from "node:os";
7
9
  import { basename, dirname, isAbsolute, join, relative, resolve } from "node:path";
8
- import { access, mkdir, writeFile } from "node:fs/promises";
9
10
  import { glob } from "tinyglobby";
10
11
  import { pathToFileURL } from "node:url";
12
+ import { existsSync, readFileSync } from "node:fs";
11
13
  import { randomUUID } from "node:crypto";
12
14
  import c from "tinyrainbow";
13
- import { existsSync, readFileSync } from "node:fs";
14
15
  import { uniq } from "es-toolkit";
15
16
  import { createVitest } from "vitest/node";
16
17
  import { formatDuration, intervalToDuration } from "date-fns";
@@ -156,393 +157,99 @@ async function loadVievalComparisonConfig(options = {}) {
156
157
  }
157
158
  }
158
159
  //#endregion
159
- //#region src/cli/report-compare.ts
160
- /**
161
- * Builds a compact compare report sorted by hybrid/exact score.
162
- */
163
- function buildCompareReportArtifact(args) {
164
- const rows = args.methods.map((method) => {
165
- const overall = method.output.projects[0]?.result?.overall;
166
- return {
167
- exactAverage: overall?.exactAverage ?? null,
168
- hybridAverage: overall?.hybridAverage ?? null,
169
- methodId: method.methodId,
170
- runCount: overall?.runCount ?? 0
171
- };
172
- });
173
- rows.sort((left, right) => {
174
- const leftHybrid = left.hybridAverage ?? Number.NEGATIVE_INFINITY;
175
- const rightHybrid = right.hybridAverage ?? Number.NEGATIVE_INFINITY;
176
- if (leftHybrid !== rightHybrid) return rightHybrid - leftHybrid;
177
- const leftExact = left.exactAverage ?? Number.NEGATIVE_INFINITY;
178
- return (right.exactAverage ?? Number.NEGATIVE_INFINITY) - leftExact;
179
- });
180
- return {
181
- benchmarkId: args.benchmarkId,
182
- methods: rows,
183
- reportPath: args.reportPath
184
- };
185
- }
186
- /**
187
- * Writes compare report artifact as JSON.
188
- */
189
- async function writeCompareReportArtifact(args) {
190
- const outputPath = resolve(args.outputPath);
191
- await mkdir(dirname(outputPath), { recursive: true });
192
- await writeFile(outputPath, `${JSON.stringify(args.artifact, null, 2)}\n`, "utf-8");
193
- return outputPath;
194
- }
195
- //#endregion
196
- //#region src/cli/discovery.ts
197
- /**
198
- * Discovers eval files using include/exclude globs relative to project root.
199
- *
200
- * Before:
201
- * - Absolute path file list from recursive filesystem walk
202
- *
203
- * After:
204
- * - Filtered absolute path list matching include/exclude rules
205
- */
206
- async function discoverEvalFiles(options) {
207
- return uniq(await glob([...options.include], {
208
- absolute: true,
209
- cwd: options.root,
210
- ignore: [...options.exclude],
211
- onlyFiles: true
212
- })).sort((left, right) => left.localeCompare(right));
213
- }
214
- //#endregion
215
- //#region src/cli/module-runtime.ts
160
+ //#region src/cli/report-records.ts
216
161
  /**
217
- * Loads eval modules and returns a normalized eval-module map.
162
+ * Builds normalized case records from lifecycle, metric, and score events.
218
163
  *
219
164
  * Use when:
220
- * - CLI collection needs Vite/Vitest-powered module resolution and transforms
221
- * - eval files should be imported with the same runtime semantics as Vitest
165
+ * - `events.jsonl` should be projected into `cases.jsonl`
166
+ * - report commands need one final record per observed case outcome
222
167
  *
223
168
  * Expects:
224
- * - `projectRoot` points at the project that owns the eval files
225
- * - each `evalFilePaths` entry is an absolute file path
169
+ * - events are ordered by occurrence where possible
170
+ * - lifecycle events use either `task.case.start`/`task.case.end` or current CLI `CaseStarted`/`CaseEnded` names
226
171
  *
227
172
  * Returns:
228
- * - eval modules keyed by stable file href + optional registration suffixes
173
+ * - records for cases that emitted an end lifecycle event
229
174
  */
230
- async function loadEvalModulesWithVitestRuntime(evalFilePaths, projectRoot) {
231
- const loadedModules = {};
232
- const runtime = await createVitest("test", {
233
- config: false,
234
- root: projectRoot,
235
- run: false,
236
- silent: true,
237
- watch: false
238
- });
239
- try {
240
- for (const evalFilePath of evalFilePaths) {
241
- const moduleHref = pathToFileURL(evalFilePath).href;
242
- beginModuleRegistration(moduleHref);
243
- try {
244
- const moduleValue = await runtime.import(moduleHref);
245
- const registeredDefinitions = consumeModuleRegistrations(moduleHref);
246
- const defaultDefinition = moduleValue.default;
247
- const definitions = [...registeredDefinitions, ...defaultDefinition == null ? [] : [defaultDefinition]];
248
- const deduplicatedDefinitions = definitions.filter((definition, index) => {
249
- const key = `${definition.name}::${definition.description}::${definition.task?.id ?? ""}`;
250
- return definitions.findIndex((candidate) => `${candidate.name}::${candidate.description}::${candidate.task?.id ?? ""}` === key) === index;
251
- });
252
- if (deduplicatedDefinitions.length === 0) continue;
253
- for (const [definitionIndex, definition] of deduplicatedDefinitions.entries()) {
254
- const moduleKey = definitionIndex === 0 ? moduleHref : `${moduleHref}#registration-${definitionIndex + 1}`;
255
- loadedModules[moduleKey] = { default: definition };
256
- }
257
- } finally {
258
- endModuleRegistration();
259
- }
175
+ function buildCaseRecords(args) {
176
+ const drafts = /* @__PURE__ */ new Map();
177
+ const completedKeys = [];
178
+ for (const event of args.events) {
179
+ const normalizedEvent = normalizeCaseEventName(event.event);
180
+ if (normalizedEvent == null) continue;
181
+ const ids = extractEventIds(event, args);
182
+ if (ids.caseId.length === 0 || ids.taskId.length === 0) continue;
183
+ const draft = getOrCreateDraft(drafts, ids, event, args);
184
+ applyIdentity(draft, ids, event, args);
185
+ if (normalizedEvent === "start") applyCaseStart(draft, event);
186
+ else if (normalizedEvent === "metric") applyCaseMetric(draft, event);
187
+ else if (normalizedEvent === "score") applyCaseScore(draft, event);
188
+ else {
189
+ applyCaseEnd(draft, event);
190
+ const key = createCaseKey(ids.taskId, ids.caseId);
191
+ if (!completedKeys.includes(key)) completedKeys.push(key);
260
192
  }
261
- } finally {
262
- await runtime.close();
263
193
  }
264
- return loadedModules;
194
+ return completedKeys.map((key) => drafts.get(key)).filter((draft) => draft != null && draft.endedAt != null).map(toCaseRecord);
265
195
  }
266
- //#endregion
267
- //#region src/cli/report-selectors.ts
268
196
  /**
269
- * Resolves a generic case selector from metrics, scores, then direct fields.
197
+ * Builds generic score summaries overall and grouped by arbitrary keys.
270
198
  *
271
199
  * Use when:
272
- * - report commands accept benchmark-neutral selectors such as `benchmark.case.id`
273
- * - comparisons need the same lookup semantics as filtering and grouping
200
+ * - report artifacts need benchmark-neutral aggregate score views
201
+ * - callers want to group by metrics such as `benchmark.category` or direct record fields such as `taskId`
274
202
  *
275
203
  * Expects:
276
- * - `key` is a direct `CaseRecord` field, score key, `scores.<key>`, or metric key
204
+ * - `groupByKeys` are stable metric names or direct `CaseRecord` field names
205
+ * - record score values are normalized numeric scores
277
206
  *
278
207
  * Returns:
279
- * - existence flag plus matched value when present
208
+ * - overall score buckets and group buckets keyed by `<key>=<value>`
280
209
  */
281
- function getCaseSelectorValue(record, key) {
282
- if (Object.hasOwn(record.metrics, key)) return {
283
- exists: true,
284
- value: record.metrics[key]
285
- };
286
- if (key.startsWith("scores.") && Object.hasOwn(record.scores, key.slice(7))) return {
287
- exists: true,
288
- value: record.scores[key.slice(7)]
289
- };
290
- if (Object.hasOwn(record.scores, key)) return {
291
- exists: true,
292
- value: record.scores[key]
293
- };
294
- if (Object.hasOwn(record, key)) return {
295
- exists: true,
296
- value: record[key]
210
+ function buildMetricsSummary(records, groupByKeys) {
211
+ const overall = {};
212
+ const groups = {};
213
+ for (const record of records) {
214
+ addRecordScores(overall, record);
215
+ for (const groupByKey of groupByKeys) {
216
+ const groupValue = getGroupValue(record, groupByKey);
217
+ if (!groupValue.exists) continue;
218
+ const groupKey = `${groupByKey}=${String(groupValue.value)}`;
219
+ groups[groupKey] ??= {};
220
+ addRecordScores(groups[groupKey], record);
221
+ }
222
+ }
223
+ return {
224
+ groups: finalizeSummaryGroups(groups),
225
+ overall: finalizeScoreSummary(overall)
297
226
  };
298
- return { exists: false };
299
- }
300
- /**
301
- * Stable-stringifies JSON-like values for report comparisons.
302
- *
303
- * Before:
304
- * - `{ b: 1, a: true }`
305
- *
306
- * After:
307
- * - `{"a":true,"b":1}`
308
- */
309
- function stableStringify(value) {
310
- if (value == null || typeof value !== "object") return JSON.stringify(value);
311
- if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(",")}]`;
312
- const record = value;
313
- return `{${Object.keys(record).sort((left, right) => left.localeCompare(right)).map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`).join(",")}}`;
314
227
  }
315
- //#endregion
316
- //#region src/cli/report-otlp.ts
317
228
  /**
318
- * Builds local OTLP-shaped JSON projections from normalized case records.
229
+ * Encodes records as newline-delimited JSON.
319
230
  *
320
231
  * Use when:
321
- * - writing deterministic report artifacts without requiring an OpenTelemetry Collector
322
- * - future tools need trace/log/metric-shaped JSON files
232
+ * - writing `cases.jsonl` for command-line tools, dataframes, or streaming parsers
233
+ * - each record should occupy exactly one JSON line
323
234
  *
324
235
  * Expects:
325
- * - records belong to one Vieval run
236
+ * - records are JSON-serializable case records
326
237
  *
327
238
  * Returns:
328
- * - trace, log, and metric containers shaped after OTLP JSON concepts
239
+ * - one JSON object per line with a trailing newline for non-empty input
329
240
  */
330
- function buildLocalOtlpProjection(args) {
331
- const projectSpans = collectProjectNames(args.records).map((projectName) => ({
332
- attributes: toAttributes({
333
- "vieval.project.name": projectName,
334
- "vieval.run.id": args.runId
335
- }),
336
- name: "vieval.project"
337
- }));
338
- const taskSpans = collectTasks(args.records).map((task) => ({
339
- attributes: toAttributes({
340
- "vieval.project.name": task.projectName,
341
- "vieval.run.id": args.runId,
342
- "vieval.task.id": task.taskId
343
- }),
344
- name: "vieval.task"
345
- }));
346
- const caseSpans = args.records.map((record) => ({
347
- attributes: toAttributes({
348
- ...record.metrics,
349
- "vieval.case.duration_ms": record.durationMs,
350
- "vieval.case.id": record.caseId,
351
- "vieval.case.name": record.caseName,
352
- "vieval.case.retry_count": record.retryCount,
353
- "vieval.case.state": record.state,
354
- "vieval.project.name": record.projectName,
355
- "vieval.task.id": record.taskId
356
- }),
357
- endTimeUnixNano: isoToUnixNano(record.endedAt),
358
- name: "vieval.case",
359
- startTimeUnixNano: isoToUnixNano(record.startedAt)
360
- }));
361
- return {
362
- logs: { resourceLogs: [{ scopeLogs: [{
363
- logRecords: args.records.map((record) => ({
364
- attributes: toAttributes(record.metrics),
365
- body: { stringValue: JSON.stringify({
366
- caseId: record.caseId,
367
- scores: record.scores,
368
- state: record.state
369
- }) },
370
- eventName: "vieval.case",
371
- timeUnixNano: isoToUnixNano(record.endedAt)
372
- })),
373
- scope: { name: "vieval" }
374
- }] }] },
375
- metrics: { resourceMetrics: [{ scopeMetrics: [{
376
- metrics: collectScoreKinds(args.records).map((kind) => ({
377
- gauge: { dataPoints: args.records.filter((record) => typeof record.scores[kind] === "number").map((record) => ({
378
- asDouble: record.scores[kind],
379
- attributes: toAttributes({
380
- ...record.metrics,
381
- "vieval.case.id": record.caseId,
382
- "vieval.task.id": record.taskId
383
- }),
384
- timeUnixNano: isoToUnixNano(record.endedAt)
385
- })) },
386
- name: `vieval.score.${kind}`
387
- })),
388
- scope: { name: "vieval" }
389
- }] }] },
390
- traces: { resourceSpans: [{ scopeSpans: [{
391
- scope: { name: "vieval" },
392
- spans: [
393
- {
394
- attributes: toAttributes({ "vieval.run.id": args.runId }),
395
- name: "vieval.run"
396
- },
397
- ...projectSpans,
398
- ...taskSpans,
399
- ...caseSpans
400
- ]
401
- }] }] }
402
- };
403
- }
404
- function toAttributes(attributes) {
405
- return Object.entries(attributes).filter(([, value]) => value !== void 0).sort(([leftKey], [rightKey]) => leftKey.localeCompare(rightKey)).map(([key, value]) => ({
406
- key,
407
- value: toAnyValue(value)
408
- }));
409
- }
410
- function toAnyValue(value) {
411
- if (Array.isArray(value)) return { arrayValue: { values: value.map((item) => toAnyValue(item)) } };
412
- if (isAttributeScalar(value)) {
413
- if (typeof value === "boolean") return { boolValue: value };
414
- if (typeof value === "number") return Number.isFinite(value) ? { doubleValue: value } : { stringValue: String(value) };
415
- if (value == null) return { stringValue: "null" };
416
- return { stringValue: value };
417
- }
418
- return { stringValue: stableStringify(value) };
419
- }
420
- function isAttributeScalar(value) {
421
- return value == null || typeof value === "boolean" || typeof value === "number" || typeof value === "string";
422
- }
423
- function isoToUnixNano(value) {
424
- const preciseMatch = /^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(?:\.(\d{1,9}))?(Z|[+-]\d{2}:\d{2})$/.exec(value);
425
- if (preciseMatch != null) {
426
- const [, secondsPart, fraction = "", zone] = preciseMatch;
427
- const unixMilliseconds = Date.parse(`${secondsPart}.000${zone}`);
428
- if (!Number.isFinite(unixMilliseconds)) return "0";
429
- return String(BigInt(unixMilliseconds) * 1000000n + BigInt(fraction.padEnd(9, "0").slice(0, 9)));
430
- }
431
- const unixMilliseconds = Date.parse(value);
432
- if (!Number.isFinite(unixMilliseconds)) return "0";
433
- return String(BigInt(unixMilliseconds) * 1000000n);
434
- }
435
- function collectScoreKinds(records) {
436
- return [...new Set(records.flatMap((record) => Object.keys(record.scores)))].sort((left, right) => left.localeCompare(right));
437
- }
438
- function collectProjectNames(records) {
439
- return [...new Set(records.map((record) => record.projectName))].sort((left, right) => left.localeCompare(right));
440
- }
441
- function collectTasks(records) {
442
- const tasks = /* @__PURE__ */ new Map();
443
- for (const record of records) tasks.set(`${record.projectName}\0${record.taskId}`, {
444
- projectName: record.projectName,
445
- taskId: record.taskId
446
- });
447
- return [...tasks.values()].sort((left, right) => {
448
- const projectOrder = left.projectName.localeCompare(right.projectName);
449
- return projectOrder === 0 ? left.taskId.localeCompare(right.taskId) : projectOrder;
450
- });
451
- }
452
- //#endregion
453
- //#region src/cli/report-records.ts
454
- /**
455
- * Builds normalized case records from lifecycle, metric, and score events.
456
- *
457
- * Use when:
458
- * - `events.jsonl` should be projected into `cases.jsonl`
459
- * - report commands need one final record per observed case outcome
460
- *
461
- * Expects:
462
- * - events are ordered by occurrence where possible
463
- * - lifecycle events use either `task.case.start`/`task.case.end` or current CLI `CaseStarted`/`CaseEnded` names
464
- *
465
- * Returns:
466
- * - records for cases that emitted an end lifecycle event
467
- */
468
- function buildCaseRecords(args) {
469
- const drafts = /* @__PURE__ */ new Map();
470
- const completedKeys = [];
471
- for (const event of args.events) {
472
- const normalizedEvent = normalizeCaseEventName(event.event);
473
- if (normalizedEvent == null) continue;
474
- const ids = extractEventIds(event, args);
475
- if (ids.caseId.length === 0 || ids.taskId.length === 0) continue;
476
- const draft = getOrCreateDraft(drafts, ids, event, args);
477
- applyIdentity(draft, ids, event, args);
478
- if (normalizedEvent === "start") applyCaseStart(draft, event);
479
- else if (normalizedEvent === "metric") applyCaseMetric(draft, event);
480
- else if (normalizedEvent === "score") applyCaseScore(draft, event);
481
- else {
482
- applyCaseEnd(draft, event);
483
- const key = createCaseKey(ids.taskId, ids.caseId);
484
- if (!completedKeys.includes(key)) completedKeys.push(key);
485
- }
486
- }
487
- return completedKeys.map((key) => drafts.get(key)).filter((draft) => draft != null && draft.endedAt != null).map(toCaseRecord);
488
- }
489
- /**
490
- * Builds generic score summaries overall and grouped by arbitrary keys.
491
- *
492
- * Use when:
493
- * - report artifacts need benchmark-neutral aggregate score views
494
- * - callers want to group by metrics such as `benchmark.category` or direct record fields such as `taskId`
495
- *
496
- * Expects:
497
- * - `groupByKeys` are stable metric names or direct `CaseRecord` field names
498
- * - record score values are normalized numeric scores
499
- *
500
- * Returns:
501
- * - overall score buckets and group buckets keyed by `<key>=<value>`
502
- */
503
- function buildMetricsSummary(records, groupByKeys) {
504
- const overall = {};
505
- const groups = {};
506
- for (const record of records) {
507
- addRecordScores(overall, record);
508
- for (const groupByKey of groupByKeys) {
509
- const groupValue = getGroupValue(record, groupByKey);
510
- if (!groupValue.exists) continue;
511
- const groupKey = `${groupByKey}=${String(groupValue.value)}`;
512
- groups[groupKey] ??= {};
513
- addRecordScores(groups[groupKey], record);
514
- }
515
- }
516
- return {
517
- groups: finalizeSummaryGroups(groups),
518
- overall: finalizeScoreSummary(overall)
519
- };
520
- }
521
- /**
522
- * Encodes records as newline-delimited JSON.
523
- *
524
- * Use when:
525
- * - writing `cases.jsonl` for command-line tools, dataframes, or streaming parsers
526
- * - each record should occupy exactly one JSON line
527
- *
528
- * Expects:
529
- * - records are JSON-serializable case records
530
- *
531
- * Returns:
532
- * - one JSON object per line with a trailing newline for non-empty input
533
- */
534
- function encodeJsonl(records) {
535
- if (records.length === 0) return "";
536
- return `${records.map((record) => JSON.stringify(record)).join("\n")}\n`;
537
- }
538
- function normalizeCaseEventName(eventName) {
539
- if (eventName === "task.case.start" || eventName === "CaseStarted") return "start";
540
- if (eventName === "task.case.metric") return "metric";
541
- if (eventName === "task.case.score") return "score";
542
- if (eventName === "task.case.end" || eventName === "CaseEnded") return "end";
543
- }
544
- function extractEventIds(event, args) {
545
- const data = asRecord(event.data);
241
+ function encodeJsonl(records) {
242
+ if (records.length === 0) return "";
243
+ return `${records.map((record) => JSON.stringify(record)).join("\n")}\n`;
244
+ }
245
+ function normalizeCaseEventName(eventName) {
246
+ if (eventName === "task.case.start" || eventName === "CaseStarted") return "start";
247
+ if (eventName === "task.case.metric") return "metric";
248
+ if (eventName === "task.case.score") return "score";
249
+ if (eventName === "task.case.end" || eventName === "CaseEnded") return "end";
250
+ }
251
+ function extractEventIds(event, args) {
252
+ const data = asRecord(event.data);
546
253
  return {
547
254
  attemptId: stringFrom(data?.attemptId) ?? event.attemptId ?? args.attemptId,
548
255
  caseId: stringFrom(data?.caseId) ?? event.caseId ?? "",
@@ -713,15 +420,560 @@ function isCaseMetricValue(value) {
713
420
  if (value == null || typeof value === "boolean" || typeof value === "number" || typeof value === "string") return true;
714
421
  return Array.isArray(value);
715
422
  }
716
- function asRecord(value) {
717
- if (value == null || typeof value !== "object" || Array.isArray(value)) return;
718
- return value;
423
+ function asRecord(value) {
424
+ if (value == null || typeof value !== "object" || Array.isArray(value)) return;
425
+ return value;
426
+ }
427
+ function stringFrom(value) {
428
+ return typeof value === "string" ? value : void 0;
429
+ }
430
+ function numberFrom(value) {
431
+ return typeof value === "number" && Number.isFinite(value) ? value : void 0;
432
+ }
433
+ //#endregion
434
+ //#region src/cli/report-selectors.ts
435
+ /**
436
+ * Resolves a generic case selector from metrics, scores, then direct fields.
437
+ *
438
+ * Use when:
439
+ * - report commands accept benchmark-neutral selectors such as `benchmark.case.id`
440
+ * - comparisons need the same lookup semantics as filtering and grouping
441
+ *
442
+ * Expects:
443
+ * - `key` is a direct `CaseRecord` field, score key, `scores.<key>`, or metric key
444
+ *
445
+ * Returns:
446
+ * - existence flag plus matched value when present
447
+ */
448
+ function getCaseSelectorValue(record, key) {
449
+ if (Object.hasOwn(record.metrics, key)) return {
450
+ exists: true,
451
+ value: record.metrics[key]
452
+ };
453
+ if (key.startsWith("scores.") && Object.hasOwn(record.scores, key.slice(7))) return {
454
+ exists: true,
455
+ value: record.scores[key.slice(7)]
456
+ };
457
+ if (Object.hasOwn(record.scores, key)) return {
458
+ exists: true,
459
+ value: record.scores[key]
460
+ };
461
+ if (Object.hasOwn(record, key)) return {
462
+ exists: true,
463
+ value: record[key]
464
+ };
465
+ return { exists: false };
466
+ }
467
+ /**
468
+ * Stable-stringifies JSON-like values for report comparisons.
469
+ *
470
+ * Before:
471
+ * - `{ b: 1, a: true }`
472
+ *
473
+ * After:
474
+ * - `{"a":true,"b":1}`
475
+ */
476
+ function stableStringify(value) {
477
+ if (value == null || typeof value !== "object") return JSON.stringify(value);
478
+ if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(",")}]`;
479
+ const record = value;
480
+ return `{${Object.keys(record).sort((left, right) => left.localeCompare(right)).map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`).join(",")}}`;
481
+ }
482
+ //#endregion
483
+ //#region src/cli/report-cases.ts
484
+ const reportCasesHelpText = `
485
+ Inspect normalized case records from generated vieval report artifacts.
486
+
487
+ Usage
488
+ $ vieval report cases <reportPath> [options]
489
+
490
+ Options
491
+ --format Output format: table | json | jsonl (default: table)
492
+ --where Equality filter "key=value"; repeatable
493
+ --group-by Case field, score name, or metric name used for grouped score summaries
494
+ `;
495
+ /**
496
+ * Reads normalized case records from one report run directory or report root.
497
+ *
498
+ * Use when:
499
+ * - CLI tools need case-level inspection from local report artifacts
500
+ * - callers may pass a run directory, a `cases.jsonl` file, or a report root
501
+ *
502
+ * Expects:
503
+ * - discovered `cases.jsonl` files contain one `CaseRecord` JSON object per line
504
+ *
505
+ * Returns:
506
+ * - all parsed case records sorted by discovered file path order
507
+ */
508
+ async function readCaseRecordsFromReport(reportPath) {
509
+ const caseFilePaths = await resolveCaseRecordPaths(reportPath);
510
+ if (caseFilePaths.length === 0) throw new Error(`No cases.jsonl files found under "${resolve(reportPath)}".`);
511
+ const records = [];
512
+ for (const caseFilePath of caseFilePaths) {
513
+ const lines = readFileSync(caseFilePath, "utf-8").split("\n");
514
+ for (const [index, line] of lines.entries()) {
515
+ const trimmed = line.trim();
516
+ if (trimmed.length === 0) continue;
517
+ try {
518
+ records.push(JSON.parse(trimmed));
519
+ } catch (error) {
520
+ throw new Error(`Invalid cases.jsonl line ${index + 1} in "${caseFilePath}": ${errorMessageFrom(error) ?? "Unknown JSON parse failure."}`);
521
+ }
522
+ }
523
+ }
524
+ return records;
525
+ }
526
+ /**
527
+ * Builds filtered case inspection output.
528
+ *
529
+ * Use when:
530
+ * - `vieval report cases` needs deterministic JSON/table output
531
+ * - tests need pure filtering and grouping behavior without process I/O
532
+ *
533
+ * Expects:
534
+ * - `where` filters use `key=value`
535
+ * - lookup keys may target direct case fields, score names, or metric names
536
+ *
537
+ * Returns:
538
+ * - filtered records plus grouped score summaries when `groupBy` is present
539
+ */
540
+ function buildReportCasesOutput(records, options) {
541
+ const whereFilters = (options.where ?? []).map(parseSelector);
542
+ const filteredRecords = records.filter((record) => matchesWhereFilters(record, whereFilters));
543
+ return {
544
+ groups: options.groupBy == null ? void 0 : buildCaseGroups(filteredRecords, options.groupBy),
545
+ records: [...filteredRecords]
546
+ };
547
+ }
548
+ /**
549
+ * Runs the `vieval report cases` command.
550
+ *
551
+ * Call stack:
552
+ *
553
+ * published executable (`../bin/vieval`)
554
+ * -> {@link import('./index').runTopLevelCli}
555
+ * -> {@link runReportCasesCli}
556
+ * -> {@link readCaseRecordsFromReport}
557
+ *
558
+ * Use when:
559
+ * - the top-level CLI dispatches local case artifact inspection
560
+ *
561
+ * Expects:
562
+ * - argv is either `cases <reportPath> ...` or `<reportPath> ...`
563
+ *
564
+ * Returns:
565
+ * - resolves after writing the requested output to stdout
566
+ */
567
+ async function runReportCasesCli(argv) {
568
+ try {
569
+ const parsed = parseReportCasesCliArguments(argv);
570
+ const output = buildReportCasesOutput(await readCaseRecordsFromReport(parsed.reportPath), parsed);
571
+ if (parsed.format === "json") {
572
+ process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
573
+ return;
574
+ }
575
+ if (parsed.format === "jsonl") {
576
+ process.stdout.write(encodeJsonl(output.records));
577
+ return;
578
+ }
579
+ process.stdout.write(`${formatCasesTable(output)}\n`);
580
+ } catch (error) {
581
+ const errorMessage = errorMessageFrom(error) ?? "Unknown report cases failure.";
582
+ process.stderr.write(`[vieval report cases] ${errorMessage}\n`);
583
+ process.exitCode = 1;
584
+ }
585
+ }
586
+ function normalizeCliArgv$6(argv) {
587
+ const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
588
+ if (normalizedArgv[0] === "report" && normalizedArgv[1] === "cases") return normalizedArgv.slice(2);
589
+ if (normalizedArgv[0] === "cases") return normalizedArgv.slice(1);
590
+ return normalizedArgv;
591
+ }
592
+ function parseReportCasesCliArguments(argv) {
593
+ const cli = meow(reportCasesHelpText, {
594
+ argv: normalizeCliArgv$6(argv),
595
+ flags: {
596
+ format: {
597
+ default: "table",
598
+ type: "string"
599
+ },
600
+ groupBy: { type: "string" },
601
+ where: {
602
+ isMultiple: true,
603
+ type: "string"
604
+ }
605
+ },
606
+ importMeta: import.meta
607
+ });
608
+ const reportPath = cli.input[0];
609
+ if (reportPath == null || reportPath.length === 0) throw new Error("Missing required <reportPath> argument.");
610
+ return {
611
+ format: normalizeReportCasesFormat(cli.flags.format),
612
+ groupBy: cli.flags.groupBy,
613
+ reportPath,
614
+ where: cli.flags.where
615
+ };
616
+ }
617
+ function normalizeReportCasesFormat(value) {
618
+ const normalized = value.toLowerCase();
619
+ if (normalized === "json") return "json";
620
+ if (normalized === "jsonl") return "jsonl";
621
+ return "table";
622
+ }
623
+ async function resolveCaseRecordPaths(reportPath) {
624
+ const absoluteReportPath = resolve(reportPath);
625
+ const directCaseFilePath = resolve(absoluteReportPath, "cases.jsonl");
626
+ if (existsSync(absoluteReportPath) && absoluteReportPath.endsWith(".jsonl")) return [absoluteReportPath];
627
+ if (existsSync(directCaseFilePath)) return [directCaseFilePath];
628
+ return (await glob("**/cases.jsonl", {
629
+ absolute: true,
630
+ cwd: absoluteReportPath
631
+ })).sort((left, right) => left.localeCompare(right));
632
+ }
633
+ function matchesWhereFilters(record, whereFilters) {
634
+ return whereFilters.every((parsed) => {
635
+ const resolved = getCaseSelectorValue(record, parsed.key);
636
+ return resolved.exists && String(resolved.value) === parsed.value;
637
+ });
638
+ }
639
+ function parseSelector(selector) {
640
+ const separatorIndex = selector.indexOf("=");
641
+ if (separatorIndex <= 0 || separatorIndex === selector.length - 1) throw new Error(`Invalid selector "${selector}". Expected "key=value".`);
642
+ return {
643
+ key: selector.slice(0, separatorIndex).trim(),
644
+ value: selector.slice(separatorIndex + 1).trim()
645
+ };
646
+ }
647
+ function buildCaseGroups(records, groupBy) {
648
+ const groups = {};
649
+ for (const record of records) {
650
+ const resolved = getCaseSelectorValue(record, groupBy);
651
+ if (!resolved.exists) continue;
652
+ const groupKey = `${groupBy}=${String(resolved.value)}`;
653
+ groups[groupKey] ??= {
654
+ count: 0,
655
+ scores: {}
656
+ };
657
+ groups[groupKey].count += 1;
658
+ addScores(groups[groupKey].scores, record.scores);
659
+ }
660
+ return Object.fromEntries(Object.entries(groups).sort(([left], [right]) => left.localeCompare(right)).map(([groupKey, group]) => [groupKey, {
661
+ count: group.count,
662
+ scores: finalizeScores(group.scores)
663
+ }]));
664
+ }
665
+ function addScores(summary, scores) {
666
+ for (const [scoreName, value] of Object.entries(scores)) {
667
+ summary[scoreName] ??= {
668
+ average: 0,
669
+ count: 0,
670
+ sum: 0
671
+ };
672
+ summary[scoreName].count += 1;
673
+ summary[scoreName].sum += value;
674
+ }
675
+ }
676
+ function finalizeScores(summary) {
677
+ return Object.fromEntries(Object.entries(summary).sort(([left], [right]) => left.localeCompare(right)).map(([scoreName, bucket]) => [scoreName, {
678
+ average: bucket.count === 0 ? 0 : bucket.sum / bucket.count,
679
+ count: bucket.count,
680
+ sum: bucket.sum
681
+ }]));
682
+ }
683
+ function formatCasesTable(output) {
684
+ const lines = ["CASES vieval report", `Case count ${output.records.length}`];
685
+ if (output.groups != null) {
686
+ lines.push("Groups");
687
+ for (const [groupKey, group] of Object.entries(output.groups)) {
688
+ const scoreText = Object.entries(group.scores).map(([scoreName, bucket]) => `${scoreName}=${bucket.average.toFixed(3)}`).join(" ");
689
+ lines.push(`${groupKey} count=${group.count}${scoreText.length > 0 ? ` ${scoreText}` : ""}`);
690
+ }
691
+ }
692
+ return lines.join("\n");
693
+ }
694
+ //#endregion
695
+ //#region src/cli/report-compare.ts
696
+ /**
697
+ * Builds a compact compare report sorted by hybrid/exact score.
698
+ */
699
+ function buildCompareReportArtifact(args) {
700
+ const rows = args.methods.map((method) => {
701
+ const caseRecords = method.caseRecords ?? [];
702
+ const projects = method.output.projects.map((project) => ({
703
+ caseCount: countCasesForProject(caseRecords, project.name),
704
+ distinctCaseCount: countDistinctCasesForProject(caseRecords, project.name),
705
+ exactAverage: project.result?.overall.exactAverage ?? null,
706
+ executed: project.executed,
707
+ hybridAverage: project.result?.overall.hybridAverage ?? null,
708
+ name: project.name,
709
+ runCount: project.result?.overall.runCount ?? 0,
710
+ taskCount: project.taskCount
711
+ }));
712
+ return {
713
+ caseCount: caseRecords.length,
714
+ distinctCaseCount: countDistinctCases(caseRecords),
715
+ exactAverage: createWeightedAverage(projects, (project) => project.exactAverage),
716
+ executedProjectCount: projects.filter((project) => project.executed).length,
717
+ hybridAverage: createWeightedAverage(projects, (project) => project.hybridAverage),
718
+ methodId: method.methodId,
719
+ projectCount: projects.length,
720
+ projects,
721
+ runCount: projects.reduce((sum, project) => sum + project.runCount, 0),
722
+ taskCount: projects.reduce((sum, project) => sum + project.taskCount, 0)
723
+ };
724
+ });
725
+ rows.sort((left, right) => {
726
+ const leftHybrid = left.hybridAverage ?? Number.NEGATIVE_INFINITY;
727
+ const rightHybrid = right.hybridAverage ?? Number.NEGATIVE_INFINITY;
728
+ if (leftHybrid !== rightHybrid) return rightHybrid - leftHybrid;
729
+ const leftExact = left.exactAverage ?? Number.NEGATIVE_INFINITY;
730
+ return (right.exactAverage ?? Number.NEGATIVE_INFINITY) - leftExact;
731
+ });
732
+ return {
733
+ benchmarkId: args.benchmarkId,
734
+ methods: rows,
735
+ reportPath: args.reportPath
736
+ };
737
+ }
738
+ function countCasesForProject(caseRecords, projectName) {
739
+ return caseRecords.filter((record) => record.projectName === projectName).length;
740
+ }
741
+ function countDistinctCasesForProject(caseRecords, projectName) {
742
+ return countDistinctCases(caseRecords.filter((record) => record.projectName === projectName));
743
+ }
744
+ function countDistinctCases(caseRecords) {
745
+ const caseKeys = /* @__PURE__ */ new Set();
746
+ for (const record of caseRecords) caseKeys.add(`${record.projectName}:${record.taskId}:${record.caseId}`);
747
+ return caseKeys.size;
748
+ }
749
+ function createWeightedAverage(projects, selectAverage) {
750
+ let weightedScoreTotal = 0;
751
+ let weightTotal = 0;
752
+ for (const project of projects) {
753
+ const average = selectAverage(project);
754
+ if (average == null || project.runCount <= 0) continue;
755
+ weightedScoreTotal += average * project.runCount;
756
+ weightTotal += project.runCount;
757
+ }
758
+ if (weightTotal === 0) return null;
759
+ return weightedScoreTotal / weightTotal;
760
+ }
761
+ /**
762
+ * Writes compare report artifact as JSON.
763
+ */
764
+ async function writeCompareReportArtifact(args) {
765
+ const outputPath = resolve(args.outputPath);
766
+ await mkdir(dirname(outputPath), { recursive: true });
767
+ await writeFile(outputPath, `${JSON.stringify(args.artifact, null, 2)}\n`, "utf-8");
768
+ return outputPath;
769
+ }
770
+ //#endregion
771
+ //#region src/cli/discovery.ts
772
+ /**
773
+ * Discovers eval files using include/exclude globs relative to project root.
774
+ *
775
+ * Before:
776
+ * - Absolute path file list from recursive filesystem walk
777
+ *
778
+ * After:
779
+ * - Filtered absolute path list matching include/exclude rules
780
+ */
781
+ async function discoverEvalFiles(options) {
782
+ return uniq(await glob([...options.include], {
783
+ absolute: true,
784
+ cwd: options.root,
785
+ ignore: [...options.exclude],
786
+ onlyFiles: true
787
+ })).sort((left, right) => left.localeCompare(right));
788
+ }
789
+ //#endregion
790
+ //#region src/cli/module-runtime.ts
791
+ /**
792
+ * Loads eval modules and returns a normalized eval-module map.
793
+ *
794
+ * Use when:
795
+ * - CLI collection needs Vite/Vitest-powered module resolution and transforms
796
+ * - eval files should be imported with the same runtime semantics as Vitest
797
+ *
798
+ * Expects:
799
+ * - `projectRoot` points at the project that owns the eval files
800
+ * - each `evalFilePaths` entry is an absolute file path
801
+ *
802
+ * Returns:
803
+ * - eval modules keyed by stable file href + optional registration suffixes
804
+ */
805
+ async function loadEvalModulesWithVitestRuntime(evalFilePaths, projectRoot) {
806
+ const loadedModules = {};
807
+ const runtime = await createVitest("test", {
808
+ config: false,
809
+ root: projectRoot,
810
+ run: false,
811
+ silent: true,
812
+ watch: false
813
+ });
814
+ try {
815
+ for (const evalFilePath of evalFilePaths) {
816
+ const moduleHref = pathToFileURL(evalFilePath).href;
817
+ beginModuleRegistration(moduleHref);
818
+ try {
819
+ const moduleValue = await runtime.import(moduleHref);
820
+ const registeredDefinitions = consumeModuleRegistrations(moduleHref);
821
+ const defaultDefinition = moduleValue.default;
822
+ const definitions = [...registeredDefinitions, ...defaultDefinition == null ? [] : [defaultDefinition]];
823
+ const deduplicatedDefinitions = definitions.filter((definition, index) => {
824
+ const key = `${definition.name}::${definition.description}::${definition.task?.id ?? ""}`;
825
+ return definitions.findIndex((candidate) => `${candidate.name}::${candidate.description}::${candidate.task?.id ?? ""}` === key) === index;
826
+ });
827
+ if (deduplicatedDefinitions.length === 0) continue;
828
+ for (const [definitionIndex, definition] of deduplicatedDefinitions.entries()) {
829
+ const moduleKey = definitionIndex === 0 ? moduleHref : `${moduleHref}#registration-${definitionIndex + 1}`;
830
+ loadedModules[moduleKey] = { default: definition };
831
+ }
832
+ } finally {
833
+ endModuleRegistration();
834
+ }
835
+ }
836
+ } finally {
837
+ await runtime.close();
838
+ }
839
+ return loadedModules;
840
+ }
841
+ //#endregion
842
+ //#region src/cli/report-otlp.ts
843
+ /**
844
+ * Builds local OTLP-shaped JSON projections from normalized case records.
845
+ *
846
+ * Use when:
847
+ * - writing deterministic report artifacts without requiring an OpenTelemetry Collector
848
+ * - future tools need trace/log/metric-shaped JSON files
849
+ *
850
+ * Expects:
851
+ * - records belong to one Vieval run
852
+ *
853
+ * Returns:
854
+ * - trace, log, and metric containers shaped after OTLP JSON concepts
855
+ */
856
+ function buildLocalOtlpProjection(args) {
857
+ const projectSpans = collectProjectNames(args.records).map((projectName) => ({
858
+ attributes: toAttributes({
859
+ "vieval.project.name": projectName,
860
+ "vieval.run.id": args.runId
861
+ }),
862
+ name: "vieval.project"
863
+ }));
864
+ const taskSpans = collectTasks(args.records).map((task) => ({
865
+ attributes: toAttributes({
866
+ "vieval.project.name": task.projectName,
867
+ "vieval.run.id": args.runId,
868
+ "vieval.task.id": task.taskId
869
+ }),
870
+ name: "vieval.task"
871
+ }));
872
+ const caseSpans = args.records.map((record) => ({
873
+ attributes: toAttributes({
874
+ ...record.metrics,
875
+ "vieval.case.duration_ms": record.durationMs,
876
+ "vieval.case.id": record.caseId,
877
+ "vieval.case.name": record.caseName,
878
+ "vieval.case.retry_count": record.retryCount,
879
+ "vieval.case.state": record.state,
880
+ "vieval.project.name": record.projectName,
881
+ "vieval.task.id": record.taskId
882
+ }),
883
+ endTimeUnixNano: isoToUnixNano(record.endedAt),
884
+ name: "vieval.case",
885
+ startTimeUnixNano: isoToUnixNano(record.startedAt)
886
+ }));
887
+ return {
888
+ logs: { resourceLogs: [{ scopeLogs: [{
889
+ logRecords: args.records.map((record) => ({
890
+ attributes: toAttributes(record.metrics),
891
+ body: { stringValue: JSON.stringify({
892
+ caseId: record.caseId,
893
+ scores: record.scores,
894
+ state: record.state
895
+ }) },
896
+ eventName: "vieval.case",
897
+ timeUnixNano: isoToUnixNano(record.endedAt)
898
+ })),
899
+ scope: { name: "vieval" }
900
+ }] }] },
901
+ metrics: { resourceMetrics: [{ scopeMetrics: [{
902
+ metrics: collectScoreKinds(args.records).map((kind) => ({
903
+ gauge: { dataPoints: args.records.filter((record) => typeof record.scores[kind] === "number").map((record) => ({
904
+ asDouble: record.scores[kind],
905
+ attributes: toAttributes({
906
+ ...record.metrics,
907
+ "vieval.case.id": record.caseId,
908
+ "vieval.task.id": record.taskId
909
+ }),
910
+ timeUnixNano: isoToUnixNano(record.endedAt)
911
+ })) },
912
+ name: `vieval.score.${kind}`
913
+ })),
914
+ scope: { name: "vieval" }
915
+ }] }] },
916
+ traces: { resourceSpans: [{ scopeSpans: [{
917
+ scope: { name: "vieval" },
918
+ spans: [
919
+ {
920
+ attributes: toAttributes({ "vieval.run.id": args.runId }),
921
+ name: "vieval.run"
922
+ },
923
+ ...projectSpans,
924
+ ...taskSpans,
925
+ ...caseSpans
926
+ ]
927
+ }] }] }
928
+ };
929
+ }
930
+ function toAttributes(attributes) {
931
+ return Object.entries(attributes).filter(([, value]) => value !== void 0).sort(([leftKey], [rightKey]) => leftKey.localeCompare(rightKey)).map(([key, value]) => ({
932
+ key,
933
+ value: toAnyValue(value)
934
+ }));
935
+ }
936
+ function toAnyValue(value) {
937
+ if (Array.isArray(value)) return { arrayValue: { values: value.map((item) => toAnyValue(item)) } };
938
+ if (isAttributeScalar(value)) {
939
+ if (typeof value === "boolean") return { boolValue: value };
940
+ if (typeof value === "number") return Number.isFinite(value) ? { doubleValue: value } : { stringValue: String(value) };
941
+ if (value == null) return { stringValue: "null" };
942
+ return { stringValue: value };
943
+ }
944
+ return { stringValue: stableStringify(value) };
945
+ }
946
+ function isAttributeScalar(value) {
947
+ return value == null || typeof value === "boolean" || typeof value === "number" || typeof value === "string";
948
+ }
949
+ function isoToUnixNano(value) {
950
+ const preciseMatch = /^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(?:\.(\d{1,9}))?(Z|[+-]\d{2}:\d{2})$/.exec(value);
951
+ if (preciseMatch != null) {
952
+ const [, secondsPart, fraction = "", zone] = preciseMatch;
953
+ const unixMilliseconds = Date.parse(`${secondsPart}.000${zone}`);
954
+ if (!Number.isFinite(unixMilliseconds)) return "0";
955
+ return String(BigInt(unixMilliseconds) * 1000000n + BigInt(fraction.padEnd(9, "0").slice(0, 9)));
956
+ }
957
+ const unixMilliseconds = Date.parse(value);
958
+ if (!Number.isFinite(unixMilliseconds)) return "0";
959
+ return String(BigInt(unixMilliseconds) * 1000000n);
960
+ }
961
+ function collectScoreKinds(records) {
962
+ return [...new Set(records.flatMap((record) => Object.keys(record.scores)))].sort((left, right) => left.localeCompare(right));
719
963
  }
720
- function stringFrom(value) {
721
- return typeof value === "string" ? value : void 0;
964
+ function collectProjectNames(records) {
965
+ return [...new Set(records.map((record) => record.projectName))].sort((left, right) => left.localeCompare(right));
722
966
  }
723
- function numberFrom(value) {
724
- return typeof value === "number" && Number.isFinite(value) ? value : void 0;
967
+ function collectTasks(records) {
968
+ const tasks = /* @__PURE__ */ new Map();
969
+ for (const record of records) tasks.set(`${record.projectName}\0${record.taskId}`, {
970
+ projectName: record.projectName,
971
+ taskId: record.taskId
972
+ });
973
+ return [...tasks.values()].sort((left, right) => {
974
+ const projectOrder = left.projectName.localeCompare(right.projectName);
975
+ return projectOrder === 0 ? left.taskId.localeCompare(right.taskId) : projectOrder;
976
+ });
725
977
  }
726
978
  //#endregion
727
979
  //#region src/cli/report-artifacts.ts
@@ -1893,9 +2145,30 @@ function sanitizeIdentitySegment(value) {
1893
2145
  if (normalized.length === 0) return "default";
1894
2146
  return normalized.replace(/[^\w.-]+/g, "-");
1895
2147
  }
1896
- function createRunIdentity(options) {
2148
+ function createExperimentMatrixRows(tasks) {
2149
+ const rows = /* @__PURE__ */ new Set();
2150
+ for (const task of tasks) {
2151
+ const runRowId = task.matrix.meta.runRowId;
2152
+ const evalRowId = task.matrix.meta.evalRowId;
2153
+ if (runRowId !== "default" && evalRowId !== "default") {
2154
+ rows.add(`run:${runRowId}+eval:${evalRowId}`);
2155
+ continue;
2156
+ }
2157
+ if (runRowId !== "default") rows.add(`run:${runRowId}`);
2158
+ if (evalRowId !== "default") rows.add(`eval:${evalRowId}`);
2159
+ }
2160
+ return [...rows].sort((left, right) => left.localeCompare(right));
2161
+ }
2162
+ function resolveExperimentId(options, preparedProjects) {
2163
+ if (options.experiment != null) return sanitizeIdentitySegment(options.experiment);
2164
+ const matrixRows = /* @__PURE__ */ new Set();
2165
+ for (const project of preparedProjects) project.experimentMatrixRows.forEach((row) => matrixRows.add(row));
2166
+ if (matrixRows.size === 0) return "default-experiment";
2167
+ return sanitizeIdentitySegment(`matrix-${[...matrixRows].sort().join("--")}`);
2168
+ }
2169
+ function createRunIdentity(options, preparedProjects) {
1897
2170
  const workspaceId = sanitizeIdentitySegment(options.workspace ?? "default-workspace");
1898
- const experimentId = sanitizeIdentitySegment(options.experiment ?? "default-experiment");
2171
+ const experimentId = resolveExperimentId(options, preparedProjects);
1899
2172
  return {
1900
2173
  attemptId: sanitizeIdentitySegment(options.attempt ?? `attempt-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`),
1901
2174
  experimentId,
@@ -2251,6 +2524,7 @@ async function prepareProject(project) {
2251
2524
  });
2252
2525
  const canAutoExecuteEntryTasks = entries.some((entry) => entry.task != null) && project.models.length > 0;
2253
2526
  if (project.executor == null && !canAutoExecuteEntryTasks) return {
2527
+ experimentMatrixRows: createExperimentMatrixRows(tasks),
2254
2528
  kind: "summary",
2255
2529
  summary: {
2256
2530
  caseSummary: null,
@@ -2267,6 +2541,7 @@ async function prepareProject(project) {
2267
2541
  }
2268
2542
  };
2269
2543
  return {
2544
+ experimentMatrixRows: createExperimentMatrixRows(tasks),
2270
2545
  kind: "prepared",
2271
2546
  prepared: {
2272
2547
  discoveredEvalFileCount: evalFilePaths.length,
@@ -2279,6 +2554,7 @@ async function prepareProject(project) {
2279
2554
  };
2280
2555
  } catch (error) {
2281
2556
  return {
2557
+ experimentMatrixRows: [],
2282
2558
  kind: "summary",
2283
2559
  summary: {
2284
2560
  caseSummary: null,
@@ -2439,7 +2715,6 @@ async function executePreparedProject(prepared, identity, cacheProjectName, tele
2439
2715
  * - keeping business-agent eval files near their implementation packages
2440
2716
  */
2441
2717
  async function runVievalCli(options = {}) {
2442
- const identity = createRunIdentity(options);
2443
2718
  const loadedConfig = await loadVievalCliConfig({
2444
2719
  configFilePath: options.configFilePath,
2445
2720
  cwd: options.cwd
@@ -2447,21 +2722,24 @@ async function runVievalCli(options = {}) {
2447
2722
  const telemetry = loadedConfig.reporting?.openTelemetry?.enabled === true ? createOpenTelemetryRuntime() : createNoopTelemetryRuntime();
2448
2723
  const onOpenTelemetryRunEnd = loadedConfig.reporting?.openTelemetry?.enabled === true ? loadedConfig.reporting.openTelemetry.onRunEnd : void 0;
2449
2724
  const restoreEnvironment = applyRunEnvironment(loadedConfig.env);
2450
- const eventRecorder = createEventRecorder(identity);
2451
- const reporter = createReporterWithEventCapture(createRunReporter(options.reporter), eventRecorder.record);
2452
2725
  let runError;
2453
2726
  let runEndError;
2454
2727
  let output;
2728
+ let reporter;
2455
2729
  try {
2730
+ const selectedProjects = filterProjectsByName(loadedConfig.projects, options.project ?? []);
2731
+ const preparedProjects = await Promise.all(selectedProjects.map(async (project) => prepareProject(project)));
2732
+ const identity = createRunIdentity(options, preparedProjects);
2733
+ const eventRecorder = createEventRecorder(identity);
2734
+ const runReporter = createReporterWithEventCapture(createRunReporter(options.reporter), eventRecorder.record);
2735
+ reporter = runReporter;
2456
2736
  output = await telemetry.withSpan("vieval.run", {
2457
2737
  "vieval.attempt.id": identity.attemptId,
2458
2738
  "vieval.experiment.id": identity.experimentId,
2459
2739
  "vieval.run.id": identity.runId,
2460
2740
  "vieval.workspace.id": identity.workspaceId
2461
2741
  }, async () => {
2462
- const selectedProjects = filterProjectsByName(loadedConfig.projects, options.project ?? []);
2463
2742
  const workspaceScheduler = createSchedulerRuntime({ concurrency: { workspace: resolveWorkspaceConcurrency(loadedConfig, options) } });
2464
- const preparedProjects = await Promise.all(selectedProjects.map(async (project) => prepareProject(project)));
2465
2743
  const executableProjects = preparedProjects.filter((project) => project.kind === "prepared").map((project) => project.prepared);
2466
2744
  const totalTasks = preparedProjects.reduce((sum, project) => {
2467
2745
  if (project.kind === "prepared") return sum + project.prepared.tasks.length;
@@ -2476,8 +2754,8 @@ async function runVievalCli(options = {}) {
2476
2754
  passedTasks: 0,
2477
2755
  skippedTasks: 0
2478
2756
  };
2479
- reporter.onRunStart({ totalTasks });
2480
- for (const project of executableProjects) for (const task of project.tasks) reporter.onTaskQueued(createTaskQueuePayload(task, project.name));
2757
+ runReporter.onRunStart({ totalTasks });
2758
+ for (const project of executableProjects) for (const task of project.tasks) runReporter.onTaskQueued(createTaskQueuePayload(task, project.name));
2481
2759
  const projectSummaries = (await Promise.all(preparedProjects.map(async (preparedProject, index) => {
2482
2760
  if (preparedProject.kind === "summary") return {
2483
2761
  index,
@@ -2493,10 +2771,10 @@ async function runVievalCli(options = {}) {
2493
2771
  projectName: preparedProject.prepared.name,
2494
2772
  scope: "workspace",
2495
2773
  workspaceId: identity.workspaceId
2496
- }, async () => executePreparedProject(preparedProject.prepared, identity, options.cacheProjectName, telemetry, reporter, reporterCounters, eventRecorder.record, options)))
2774
+ }, async () => executePreparedProject(preparedProject.prepared, identity, options.cacheProjectName, telemetry, runReporter, reporterCounters, eventRecorder.record, options)))
2497
2775
  };
2498
2776
  }))).sort((left, right) => left.index - right.index).map((item) => item.summary);
2499
- reporter.onRunEnd({
2777
+ runReporter.onRunEnd({
2500
2778
  failedTasks: reporterCounters.failedTasks,
2501
2779
  passedTasks: reporterCounters.passedTasks,
2502
2780
  skippedTasks: reporterCounters.skippedTasks + skippedSummaryTasks,
@@ -2522,7 +2800,7 @@ async function runVievalCli(options = {}) {
2522
2800
  } catch (error) {
2523
2801
  if (runError == null) runEndError = error;
2524
2802
  }
2525
- reporter.dispose();
2803
+ reporter?.dispose();
2526
2804
  restoreEnvironment();
2527
2805
  }
2528
2806
  if (runError != null) throw runError;
@@ -2640,14 +2918,14 @@ const compareHelpText = `
2640
2918
  --output Optional output artifact path
2641
2919
  --format Console output format: table | json (default: table)
2642
2920
  `;
2643
- function normalizeCliArgv$6(argv) {
2921
+ function normalizeCliArgv$5(argv) {
2644
2922
  const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
2645
2923
  if (normalizedArgv[0] === "compare") return normalizedArgv.slice(1);
2646
2924
  return normalizedArgv;
2647
2925
  }
2648
2926
  function parseCompareCliArguments(argv) {
2649
2927
  const cli = meow(compareHelpText, {
2650
- argv: normalizeCliArgv$6(argv),
2928
+ argv: normalizeCliArgv$5(argv),
2651
2929
  flags: {
2652
2930
  config: { type: "string" },
2653
2931
  comparison: { type: "string" },
@@ -2677,18 +2955,22 @@ async function runCompareCli(argv) {
2677
2955
  cwd: parsed.cwd
2678
2956
  });
2679
2957
  const methodResults = [];
2958
+ const reportRoot = await mkdtemp(join(tmpdir(), "vieval-compare-"));
2680
2959
  for (const method of loaded.config.methods) {
2681
2960
  const methodWorkspace = resolve(method.workspace);
2961
+ const methodReportOut = join(reportRoot, method.id);
2682
2962
  const output = await runVievalCli({
2683
2963
  cacheProjectName: loaded.config.benchmark.sharedCaseNamespace,
2684
2964
  configFilePath: method.configFilePath ?? resolve(methodWorkspace, "vieval.config.ts"),
2685
2965
  cwd: methodWorkspace,
2686
2966
  project: [method.project],
2967
+ reportOut: methodReportOut,
2687
2968
  workspace: loaded.config.benchmark.id
2688
2969
  });
2689
2970
  const failedProject = output.projects.find((project) => project.errorMessage != null);
2690
2971
  if (failedProject != null) throw new Error(`Comparison method "${method.id}" failed: ${failedProject.errorMessage}`);
2691
2972
  methodResults.push({
2973
+ caseRecords: await readCaseRecordsFromReport(methodReportOut),
2692
2974
  methodId: method.id,
2693
2975
  output
2694
2976
  });
@@ -2752,7 +3034,7 @@ const evalRunHelpText = `
2752
3034
  --report-out Report output root directory
2753
3035
  --json Print machine-readable JSON output
2754
3036
  `;
2755
- function normalizeCliArgv$5(argv) {
3037
+ function normalizeCliArgv$4(argv) {
2756
3038
  const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
2757
3039
  return normalizedArgv[0] === "run" ? normalizedArgv.slice(1) : normalizedArgv;
2758
3040
  }
@@ -2775,7 +3057,7 @@ function normalizeProjectNames(projectNames) {
2775
3057
  */
2776
3058
  function parseCliArguments(argv) {
2777
3059
  const cli = meow(evalRunHelpText, {
2778
- argv: normalizeCliArgv$5(argv),
3060
+ argv: normalizeCliArgv$4(argv),
2779
3061
  importMeta: import.meta,
2780
3062
  flags: {
2781
3063
  config: { type: "string" },
@@ -2892,7 +3174,7 @@ const reportAnalyzeHelpText = `
2892
3174
  --run-matrix Keep runs matching run-matrix selector "key=value[,key=value]"
2893
3175
  --eval-matrix Keep runs matching eval-matrix selector "key=value[,key=value]"
2894
3176
  `;
2895
- function normalizeCliArgv$4(argv) {
3177
+ function normalizeCliArgv$3(argv) {
2896
3178
  const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
2897
3179
  if (normalizedArgv[0] === "report" && normalizedArgv[1] === "analyze") return normalizedArgv.slice(2);
2898
3180
  if (normalizedArgv[0] === "analyze") return normalizedArgv.slice(1);
@@ -2900,7 +3182,7 @@ function normalizeCliArgv$4(argv) {
2900
3182
  }
2901
3183
  function parseReportAnalyzeCliArguments(argv) {
2902
3184
  const cli = meow(reportAnalyzeHelpText, {
2903
- argv: normalizeCliArgv$4(argv),
3185
+ argv: normalizeCliArgv$3(argv),
2904
3186
  flags: {
2905
3187
  attempt: { type: "string" },
2906
3188
  caseState: { type: "string" },
@@ -3189,218 +3471,6 @@ async function runReportAnalyzeCli(argv) {
3189
3471
  }
3190
3472
  }
3191
3473
  //#endregion
3192
- //#region src/cli/report-cases.ts
3193
- const reportCasesHelpText = `
3194
- Inspect normalized case records from generated vieval report artifacts.
3195
-
3196
- Usage
3197
- $ vieval report cases <reportPath> [options]
3198
-
3199
- Options
3200
- --format Output format: table | json | jsonl (default: table)
3201
- --where Equality filter "key=value"; repeatable
3202
- --group-by Case field, score name, or metric name used for grouped score summaries
3203
- `;
3204
- /**
3205
- * Reads normalized case records from one report run directory or report root.
3206
- *
3207
- * Use when:
3208
- * - CLI tools need case-level inspection from local report artifacts
3209
- * - callers may pass a run directory, a `cases.jsonl` file, or a report root
3210
- *
3211
- * Expects:
3212
- * - discovered `cases.jsonl` files contain one `CaseRecord` JSON object per line
3213
- *
3214
- * Returns:
3215
- * - all parsed case records sorted by discovered file path order
3216
- */
3217
- async function readCaseRecordsFromReport(reportPath) {
3218
- const caseFilePaths = await resolveCaseRecordPaths(reportPath);
3219
- if (caseFilePaths.length === 0) throw new Error(`No cases.jsonl files found under "${resolve(reportPath)}".`);
3220
- const records = [];
3221
- for (const caseFilePath of caseFilePaths) {
3222
- const lines = readFileSync(caseFilePath, "utf-8").split("\n");
3223
- for (const [index, line] of lines.entries()) {
3224
- const trimmed = line.trim();
3225
- if (trimmed.length === 0) continue;
3226
- try {
3227
- records.push(JSON.parse(trimmed));
3228
- } catch (error) {
3229
- throw new Error(`Invalid cases.jsonl line ${index + 1} in "${caseFilePath}": ${errorMessageFrom(error) ?? "Unknown JSON parse failure."}`);
3230
- }
3231
- }
3232
- }
3233
- return records;
3234
- }
3235
- /**
3236
- * Builds filtered case inspection output.
3237
- *
3238
- * Use when:
3239
- * - `vieval report cases` needs deterministic JSON/table output
3240
- * - tests need pure filtering and grouping behavior without process I/O
3241
- *
3242
- * Expects:
3243
- * - `where` filters use `key=value`
3244
- * - lookup keys may target direct case fields, score names, or metric names
3245
- *
3246
- * Returns:
3247
- * - filtered records plus grouped score summaries when `groupBy` is present
3248
- */
3249
- function buildReportCasesOutput(records, options) {
3250
- const whereFilters = (options.where ?? []).map(parseSelector);
3251
- const filteredRecords = records.filter((record) => matchesWhereFilters(record, whereFilters));
3252
- return {
3253
- groups: options.groupBy == null ? void 0 : buildCaseGroups(filteredRecords, options.groupBy),
3254
- records: [...filteredRecords]
3255
- };
3256
- }
3257
- /**
3258
- * Runs the `vieval report cases` command.
3259
- *
3260
- * Call stack:
3261
- *
3262
- * published executable (`../bin/vieval`)
3263
- * -> {@link import('./index').runTopLevelCli}
3264
- * -> {@link runReportCasesCli}
3265
- * -> {@link readCaseRecordsFromReport}
3266
- *
3267
- * Use when:
3268
- * - the top-level CLI dispatches local case artifact inspection
3269
- *
3270
- * Expects:
3271
- * - argv is either `cases <reportPath> ...` or `<reportPath> ...`
3272
- *
3273
- * Returns:
3274
- * - resolves after writing the requested output to stdout
3275
- */
3276
- async function runReportCasesCli(argv) {
3277
- try {
3278
- const parsed = parseReportCasesCliArguments(argv);
3279
- const output = buildReportCasesOutput(await readCaseRecordsFromReport(parsed.reportPath), parsed);
3280
- if (parsed.format === "json") {
3281
- process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
3282
- return;
3283
- }
3284
- if (parsed.format === "jsonl") {
3285
- process.stdout.write(encodeJsonl(output.records));
3286
- return;
3287
- }
3288
- process.stdout.write(`${formatCasesTable(output)}\n`);
3289
- } catch (error) {
3290
- const errorMessage = errorMessageFrom(error) ?? "Unknown report cases failure.";
3291
- process.stderr.write(`[vieval report cases] ${errorMessage}\n`);
3292
- process.exitCode = 1;
3293
- }
3294
- }
3295
- function normalizeCliArgv$3(argv) {
3296
- const normalizedArgv = argv[0] === "--" ? argv.slice(1) : [...argv];
3297
- if (normalizedArgv[0] === "report" && normalizedArgv[1] === "cases") return normalizedArgv.slice(2);
3298
- if (normalizedArgv[0] === "cases") return normalizedArgv.slice(1);
3299
- return normalizedArgv;
3300
- }
3301
- function parseReportCasesCliArguments(argv) {
3302
- const cli = meow(reportCasesHelpText, {
3303
- argv: normalizeCliArgv$3(argv),
3304
- flags: {
3305
- format: {
3306
- default: "table",
3307
- type: "string"
3308
- },
3309
- groupBy: { type: "string" },
3310
- where: {
3311
- isMultiple: true,
3312
- type: "string"
3313
- }
3314
- },
3315
- importMeta: import.meta
3316
- });
3317
- const reportPath = cli.input[0];
3318
- if (reportPath == null || reportPath.length === 0) throw new Error("Missing required <reportPath> argument.");
3319
- return {
3320
- format: normalizeReportCasesFormat(cli.flags.format),
3321
- groupBy: cli.flags.groupBy,
3322
- reportPath,
3323
- where: cli.flags.where
3324
- };
3325
- }
3326
- function normalizeReportCasesFormat(value) {
3327
- const normalized = value.toLowerCase();
3328
- if (normalized === "json") return "json";
3329
- if (normalized === "jsonl") return "jsonl";
3330
- return "table";
3331
- }
3332
- async function resolveCaseRecordPaths(reportPath) {
3333
- const absoluteReportPath = resolve(reportPath);
3334
- const directCaseFilePath = resolve(absoluteReportPath, "cases.jsonl");
3335
- if (existsSync(absoluteReportPath) && absoluteReportPath.endsWith(".jsonl")) return [absoluteReportPath];
3336
- if (existsSync(directCaseFilePath)) return [directCaseFilePath];
3337
- return (await glob("**/cases.jsonl", {
3338
- absolute: true,
3339
- cwd: absoluteReportPath
3340
- })).sort((left, right) => left.localeCompare(right));
3341
- }
3342
- function matchesWhereFilters(record, whereFilters) {
3343
- return whereFilters.every((parsed) => {
3344
- const resolved = getCaseSelectorValue(record, parsed.key);
3345
- return resolved.exists && String(resolved.value) === parsed.value;
3346
- });
3347
- }
3348
- function parseSelector(selector) {
3349
- const separatorIndex = selector.indexOf("=");
3350
- if (separatorIndex <= 0 || separatorIndex === selector.length - 1) throw new Error(`Invalid selector "${selector}". Expected "key=value".`);
3351
- return {
3352
- key: selector.slice(0, separatorIndex).trim(),
3353
- value: selector.slice(separatorIndex + 1).trim()
3354
- };
3355
- }
3356
- function buildCaseGroups(records, groupBy) {
3357
- const groups = {};
3358
- for (const record of records) {
3359
- const resolved = getCaseSelectorValue(record, groupBy);
3360
- if (!resolved.exists) continue;
3361
- const groupKey = `${groupBy}=${String(resolved.value)}`;
3362
- groups[groupKey] ??= {
3363
- count: 0,
3364
- scores: {}
3365
- };
3366
- groups[groupKey].count += 1;
3367
- addScores(groups[groupKey].scores, record.scores);
3368
- }
3369
- return Object.fromEntries(Object.entries(groups).sort(([left], [right]) => left.localeCompare(right)).map(([groupKey, group]) => [groupKey, {
3370
- count: group.count,
3371
- scores: finalizeScores(group.scores)
3372
- }]));
3373
- }
3374
- function addScores(summary, scores) {
3375
- for (const [scoreName, value] of Object.entries(scores)) {
3376
- summary[scoreName] ??= {
3377
- average: 0,
3378
- count: 0,
3379
- sum: 0
3380
- };
3381
- summary[scoreName].count += 1;
3382
- summary[scoreName].sum += value;
3383
- }
3384
- }
3385
- function finalizeScores(summary) {
3386
- return Object.fromEntries(Object.entries(summary).sort(([left], [right]) => left.localeCompare(right)).map(([scoreName, bucket]) => [scoreName, {
3387
- average: bucket.count === 0 ? 0 : bucket.sum / bucket.count,
3388
- count: bucket.count,
3389
- sum: bucket.sum
3390
- }]));
3391
- }
3392
- function formatCasesTable(output) {
3393
- const lines = ["CASES vieval report", `Case count ${output.records.length}`];
3394
- if (output.groups != null) {
3395
- lines.push("Groups");
3396
- for (const [groupKey, group] of Object.entries(output.groups)) {
3397
- const scoreText = Object.entries(group.scores).map(([scoreName, bucket]) => `${scoreName}=${bucket.average.toFixed(3)}`).join(" ");
3398
- lines.push(`${groupKey} count=${group.count}${scoreText.length > 0 ? ` ${scoreText}` : ""}`);
3399
- }
3400
- }
3401
- return lines.join("\n");
3402
- }
3403
- //#endregion
3404
3474
  //#region src/cli/report-case-compare.ts
3405
3475
  const reportCompareHelpText = `
3406
3476
  Compare normalized case records from two generated vieval reports.
@@ -3581,7 +3651,7 @@ function averageScore(records, scoreKind) {
3581
3651
  }
3582
3652
  function diffMetrics(left, right) {
3583
3653
  const changed = {};
3584
- const metricKeys = [...new Set([...Object.keys(left), ...Object.keys(right)])].sort((leftKey, rightKey) => leftKey.localeCompare(rightKey));
3654
+ const metricKeys = [.../* @__PURE__ */ new Set([...Object.keys(left), ...Object.keys(right)])].sort((leftKey, rightKey) => leftKey.localeCompare(rightKey));
3585
3655
  for (const metricKey of metricKeys) if (stableStringify(left[metricKey]) !== stableStringify(right[metricKey])) changed[metricKey] = {
3586
3656
  left: left[metricKey],
3587
3657
  right: right[metricKey]
@@ -3842,4 +3912,4 @@ async function runTopLevelCli(argv) {
3842
3912
  //#endregion
3843
3913
  export { runTopLevelCli as n, parseTopLevelCliArguments as t };
3844
3914
 
3845
- //# sourceMappingURL=cli-Dao25VxV.mjs.map
3915
+ //# sourceMappingURL=cli-CHFCF8UR.mjs.map