agentv 4.17.1 → 4.19.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,7 @@
1
1
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
2
  import {
3
3
  HtmlWriter,
4
- RESULT_INDEX_FILENAME,
5
4
  TARGET_FILE_CANDIDATES,
6
- buildDefaultRunDir,
7
5
  c,
8
6
  detectFileType,
9
7
  discoverEvalFiles,
@@ -31,47 +29,58 @@ import {
31
29
  resolveEvalPaths,
32
30
  resolveResultSourcePath,
33
31
  resolveRunCacheFile,
34
- resolveRunManifestPath,
35
32
  runEvalCommand,
36
33
  selectTarget,
37
34
  syncRemoteResults,
38
- toSnakeCaseDeep,
39
35
  toTraceSummary,
40
36
  validateCasesFile,
41
37
  validateConfigFile,
42
38
  validateEvalFile,
43
39
  validateFileReferences,
44
40
  validateTargetsFile,
45
- validateWorkspacePaths,
41
+ validateWorkspacePaths
42
+ } from "./chunk-YGNBTBXW.js";
43
+ import {
44
+ RESULT_INDEX_FILENAME,
45
+ aggregateRunDir,
46
+ buildDefaultRunDir,
47
+ resolveRunManifestPath,
48
+ toSnakeCaseDeep,
46
49
  writeArtifactsFromResults
47
- } from "./chunk-ZUNYOUFO.js";
50
+ } from "./chunk-NL6P5MUH.js";
48
51
  import {
49
52
  DEFAULT_CATEGORY,
50
- DEFAULT_THRESHOLD,
51
53
  addBenchmark,
52
- createBuiltinRegistry,
53
54
  deriveCategory,
54
55
  discoverBenchmarks,
55
56
  discoverClaudeSessions,
56
57
  discoverCodexSessions,
58
+ getBenchmark,
59
+ getOutputFilenames,
60
+ loadBenchmarkRegistry,
61
+ parseClaudeSession,
62
+ parseCodexSession,
63
+ readTranscriptFile,
64
+ removeBenchmark,
65
+ scanRepoDeps,
66
+ toTranscriptJsonLines,
67
+ transpileEvalYamlFile,
68
+ trimBaselineResult
69
+ } from "./chunk-R2QDYORI.js";
70
+ import {
71
+ DEFAULT_THRESHOLD,
72
+ createBuiltinRegistry,
57
73
  discoverCopilotSessions,
58
74
  executeScript,
59
75
  getAgentvConfigDir,
60
- getBenchmark,
61
- getOutputFilenames,
62
76
  getWorkspacePoolRoot,
63
77
  isAgentSkillsFormat,
64
78
  listTargetNames,
65
- loadBenchmarkRegistry,
66
79
  loadTestSuite,
67
80
  normalizeLineEndings,
68
81
  parseAgentSkillsEvals,
69
- parseClaudeSession,
70
- parseCodexSession,
71
82
  parseCopilotEvents,
72
83
  readTargetDefinitions,
73
- readTranscriptFile,
74
- removeBenchmark,
75
84
  runContainsAllAssertion,
76
85
  runContainsAnyAssertion,
77
86
  runContainsAssertion,
@@ -83,13 +92,9 @@ import {
83
92
  runIsJsonAssertion,
84
93
  runRegexAssertion,
85
94
  runStartsWithAssertion,
86
- scanRepoDeps,
87
95
  toCamelCaseDeep,
88
- toSnakeCaseDeep as toSnakeCaseDeep2,
89
- toTranscriptJsonLines,
90
- transpileEvalYamlFile,
91
- trimBaselineResult
92
- } from "./chunk-IRU2UOWN.js";
96
+ toSnakeCaseDeep as toSnakeCaseDeep2
97
+ } from "./chunk-PTYQS37Y.js";
93
98
  import {
94
99
  __commonJS,
95
100
  __require,
@@ -3638,9 +3643,30 @@ var createCommand = subcommands({
3638
3643
  }
3639
3644
  });
3640
3645
 
3646
+ // src/commands/eval/commands/aggregate.ts
3647
+ import path3 from "node:path";
3648
+ var evalAggregateCommand = command({
3649
+ name: "aggregate",
3650
+ description: "Recompute benchmark.json and timing.json from a run directory. Deduplicates by (test_id, target), keeping the last entry.",
3651
+ args: {
3652
+ runDir: positional({
3653
+ type: string,
3654
+ displayName: "run-dir",
3655
+ description: "Path to a run directory containing index.jsonl"
3656
+ })
3657
+ },
3658
+ handler: async (args) => {
3659
+ const runDir = path3.resolve(args.runDir);
3660
+ const { benchmarkPath, timingPath, testCount, targetCount } = await aggregateRunDir(runDir);
3661
+ console.log(`Aggregated ${testCount} test result(s) across ${targetCount} target(s)`);
3662
+ console.log(` Benchmark: ${benchmarkPath}`);
3663
+ console.log(` Timing: ${timingPath}`);
3664
+ }
3665
+ });
3666
+
3641
3667
  // src/commands/eval/commands/assert.ts
3642
3668
  import { readFileSync as readFileSync2 } from "node:fs";
3643
- import path3 from "node:path";
3669
+ import path4 from "node:path";
3644
3670
  import fg from "fast-glob";
3645
3671
  var evalAssertCommand = command({
3646
3672
  name: "assert",
@@ -3671,7 +3697,7 @@ var evalAssertCommand = command({
3671
3697
  let resolvedOutput;
3672
3698
  let resolvedInput;
3673
3699
  if (file) {
3674
- const content = JSON.parse(readFileSync2(path3.resolve(file), "utf8"));
3700
+ const content = JSON.parse(readFileSync2(path4.resolve(file), "utf8"));
3675
3701
  resolvedOutput = content.output ?? "";
3676
3702
  resolvedInput = content.input ?? "";
3677
3703
  } else {
@@ -3737,11 +3763,11 @@ var evalAssertCommand = command({
3737
3763
  }
3738
3764
  });
3739
3765
  async function findGraderScript(graderName, startDir) {
3740
- let dir = path3.resolve(startDir);
3741
- const root = path3.parse(dir).root;
3766
+ let dir = path4.resolve(startDir);
3767
+ const root = path4.parse(dir).root;
3742
3768
  while (dir !== root) {
3743
3769
  for (const subdir of ["graders", "judges"]) {
3744
- const gradersDir = path3.join(dir, ".agentv", subdir);
3770
+ const gradersDir = path4.join(dir, ".agentv", subdir);
3745
3771
  const found = await fg([`${graderName}.{ts,js,mts,mjs}`], {
3746
3772
  cwd: gradersDir,
3747
3773
  absolute: true,
@@ -3749,7 +3775,7 @@ async function findGraderScript(graderName, startDir) {
3749
3775
  });
3750
3776
  if (found.length > 0) return found[0];
3751
3777
  }
3752
- dir = path3.dirname(dir);
3778
+ dir = path4.dirname(dir);
3753
3779
  }
3754
3780
  return null;
3755
3781
  }
@@ -3762,7 +3788,7 @@ var evalRunCommand = command({
3762
3788
  evalPaths: restPositionals({
3763
3789
  type: string,
3764
3790
  displayName: "eval-paths",
3765
- description: "Path(s) or glob(s) to evaluation .yaml file(s)"
3791
+ description: "Path(s) or glob(s) to evaluation files (.yaml, .eval.ts)"
3766
3792
  }),
3767
3793
  target: multioption({
3768
3794
  type: array(string),
@@ -3895,6 +3921,14 @@ var evalRunCommand = command({
3895
3921
  long: "retry-errors",
3896
3922
  description: "Path to a previous run workspace or index.jsonl manifest \u2014 re-run only execution_error test cases"
3897
3923
  }),
3924
+ resume: flag({
3925
+ long: "resume",
3926
+ description: "Resume an interrupted run: skip already-completed tests and append new results to --output dir"
3927
+ }),
3928
+ rerunFailed: flag({
3929
+ long: "rerun-failed",
3930
+ description: "Rerun failed/errored tests while keeping passing results. Implies --resume semantics"
3931
+ }),
3898
3932
  strict: flag({
3899
3933
  long: "strict",
3900
3934
  description: "Exit with error on version mismatch (instead of warning)"
@@ -3947,7 +3981,7 @@ var evalRunCommand = command({
3947
3981
  },
3948
3982
  handler: async (args) => {
3949
3983
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
3950
- const { launchInteractiveWizard } = await import("./interactive-LFCOVXPQ.js");
3984
+ const { launchInteractiveWizard } = await import("./interactive-UZSFYMCF.js");
3951
3985
  await launchInteractiveWizard();
3952
3986
  return;
3953
3987
  }
@@ -3981,6 +4015,8 @@ var evalRunCommand = command({
3981
4015
  otelCaptureContent: args.otelCaptureContent,
3982
4016
  otelGroupTurns: args.otelGroupTurns,
3983
4017
  retryErrors: args.retryErrors,
4018
+ resume: args.resume,
4019
+ rerunFailed: args.rerunFailed,
3984
4020
  strict: args.strict,
3985
4021
  benchmarkJson: args.benchmarkJson,
3986
4022
  artifacts: args.artifacts,
@@ -4008,13 +4044,14 @@ var evalCommand = subcommands({
4008
4044
  description: "Evaluation commands",
4009
4045
  cmds: {
4010
4046
  run: evalRunCommand,
4011
- assert: evalAssertCommand
4047
+ assert: evalAssertCommand,
4048
+ aggregate: evalAggregateCommand
4012
4049
  }
4013
4050
  });
4014
4051
 
4015
4052
  // src/commands/import/claude.ts
4016
4053
  import { mkdir as mkdir2, writeFile as writeFile2 } from "node:fs/promises";
4017
- import path4 from "node:path";
4054
+ import path5 from "node:path";
4018
4055
  var importClaudeCommand = command({
4019
4056
  name: "claude",
4020
4057
  description: "Import a Claude Code session transcript for offline grading",
@@ -4086,8 +4123,8 @@ var importClaudeCommand = command({
4086
4123
  const rawJsonl = await readTranscriptFile(sessionFilePath);
4087
4124
  const transcript = parseClaudeSession(rawJsonl);
4088
4125
  const shortId = (sessionId ?? transcript.source.sessionId).slice(0, 8);
4089
- const outputPath = output ?? path4.join(".agentv", "transcripts", `claude-${shortId}.jsonl`);
4090
- await mkdir2(path4.dirname(outputPath), { recursive: true });
4126
+ const outputPath = output ?? path5.join(".agentv", "transcripts", `claude-${shortId}.jsonl`);
4127
+ await mkdir2(path5.dirname(outputPath), { recursive: true });
4091
4128
  const jsonLines = toTranscriptJsonLines(transcript);
4092
4129
  await writeFile2(
4093
4130
  outputPath,
@@ -4131,7 +4168,7 @@ function formatDurationMs(ms) {
4131
4168
 
4132
4169
  // src/commands/import/codex.ts
4133
4170
  import { mkdir as mkdir3, writeFile as writeFile3 } from "node:fs/promises";
4134
- import path5 from "node:path";
4171
+ import path6 from "node:path";
4135
4172
  var importCodexCommand = command({
4136
4173
  name: "codex",
4137
4174
  description: "Import a Codex CLI session transcript for offline grading",
@@ -4203,8 +4240,8 @@ var importCodexCommand = command({
4203
4240
  const rawJsonl = await readTranscriptFile(session.filePath);
4204
4241
  const transcript = parseCodexSession(rawJsonl);
4205
4242
  const shortId = session.sessionId.slice(0, 8);
4206
- const outputPath = output ?? path5.join(".agentv", "transcripts", `codex-${shortId}.jsonl`);
4207
- await mkdir3(path5.dirname(outputPath), { recursive: true });
4243
+ const outputPath = output ?? path6.join(".agentv", "transcripts", `codex-${shortId}.jsonl`);
4244
+ await mkdir3(path6.dirname(outputPath), { recursive: true });
4208
4245
  const jsonLines = toTranscriptJsonLines(transcript);
4209
4246
  await writeFile3(
4210
4247
  outputPath,
@@ -4243,7 +4280,7 @@ function formatDurationMs2(ms) {
4243
4280
 
4244
4281
  // src/commands/import/copilot.ts
4245
4282
  import { mkdir as mkdir4, readFile, writeFile as writeFile4 } from "node:fs/promises";
4246
- import path6 from "node:path";
4283
+ import path7 from "node:path";
4247
4284
  var importCopilotCommand = command({
4248
4285
  name: "copilot",
4249
4286
  description: "Import a Copilot CLI session transcript for offline grading",
@@ -4308,7 +4345,7 @@ var importCopilotCommand = command({
4308
4345
  );
4309
4346
  process.exit(1);
4310
4347
  }
4311
- const eventsPath = path6.join(sessionDir, "events.jsonl");
4348
+ const eventsPath = path7.join(sessionDir, "events.jsonl");
4312
4349
  const rawJsonl = await readFile(eventsPath, "utf8");
4313
4350
  const parsed = parseCopilotEvents(rawJsonl);
4314
4351
  const transcript = {
@@ -4325,8 +4362,8 @@ var importCopilotCommand = command({
4325
4362
  costUsd: null
4326
4363
  };
4327
4364
  const shortId = resolvedSessionId.slice(0, 8);
4328
- const outputPath = output ?? path6.join(".agentv", "transcripts", `copilot-${shortId}.jsonl`);
4329
- await mkdir4(path6.dirname(outputPath), { recursive: true });
4365
+ const outputPath = output ?? path7.join(".agentv", "transcripts", `copilot-${shortId}.jsonl`);
4366
+ await mkdir4(path7.dirname(outputPath), { recursive: true });
4330
4367
  const jsonLines = toTranscriptJsonLines(transcript);
4331
4368
  await writeFile4(
4332
4369
  outputPath,
@@ -4371,12 +4408,12 @@ function formatDurationMs3(ms) {
4371
4408
  // src/commands/import/huggingface.ts
4372
4409
  import { execFile } from "node:child_process";
4373
4410
  import { existsSync } from "node:fs";
4374
- import path7 from "node:path";
4411
+ import path8 from "node:path";
4375
4412
  function findScript() {
4376
4413
  const candidates = [
4377
- path7.resolve(__dirname, "..", "..", "..", "..", "..", "scripts", "import-huggingface.py"),
4378
- path7.resolve(__dirname, "..", "..", "..", "..", "scripts", "import-huggingface.py"),
4379
- path7.resolve(process.cwd(), "scripts", "import-huggingface.py")
4414
+ path8.resolve(__dirname, "..", "..", "..", "..", "..", "scripts", "import-huggingface.py"),
4415
+ path8.resolve(__dirname, "..", "..", "..", "..", "scripts", "import-huggingface.py"),
4416
+ path8.resolve(process.cwd(), "scripts", "import-huggingface.py")
4380
4417
  ];
4381
4418
  for (const candidate of candidates) {
4382
4419
  if (existsSync(candidate)) return candidate;
@@ -4488,29 +4525,29 @@ var importCommand = subcommands({
4488
4525
 
4489
4526
  // src/commands/init/index.ts
4490
4527
  import { existsSync as existsSync2, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
4491
- import path9 from "node:path";
4528
+ import path10 from "node:path";
4492
4529
  import * as readline from "node:readline/promises";
4493
4530
 
4494
4531
  // src/templates/index.ts
4495
4532
  import { readFileSync as readFileSync3, readdirSync, statSync } from "node:fs";
4496
- import path8 from "node:path";
4533
+ import path9 from "node:path";
4497
4534
  import { fileURLToPath } from "node:url";
4498
4535
  function getAgentvTemplates() {
4499
4536
  return getTemplatesFromDir(".agentv");
4500
4537
  }
4501
4538
  function getEnvExampleTemplate() {
4502
- const currentDir = path8.dirname(fileURLToPath(import.meta.url));
4503
- const templatesBase = currentDir.includes(`${path8.sep}dist`) ? path8.join(currentDir, "templates") : currentDir;
4504
- const content = readFileSync3(path8.join(templatesBase, ".env.example"), "utf-8");
4539
+ const currentDir = path9.dirname(fileURLToPath(import.meta.url));
4540
+ const templatesBase = currentDir.includes(`${path9.sep}dist`) ? path9.join(currentDir, "templates") : currentDir;
4541
+ const content = readFileSync3(path9.join(templatesBase, ".env.example"), "utf-8");
4505
4542
  return { path: ".env.example", content };
4506
4543
  }
4507
4544
  function getTemplatesFromDir(subdir) {
4508
- const currentDir = path8.dirname(fileURLToPath(import.meta.url));
4545
+ const currentDir = path9.dirname(fileURLToPath(import.meta.url));
4509
4546
  let templatesDir;
4510
- if (currentDir.includes(`${path8.sep}dist`)) {
4511
- templatesDir = path8.join(currentDir, "templates", subdir);
4547
+ if (currentDir.includes(`${path9.sep}dist`)) {
4548
+ templatesDir = path9.join(currentDir, "templates", subdir);
4512
4549
  } else {
4513
- templatesDir = path8.join(currentDir, subdir);
4550
+ templatesDir = path9.join(currentDir, subdir);
4514
4551
  }
4515
4552
  return readTemplatesRecursively(templatesDir, "");
4516
4553
  }
@@ -4518,15 +4555,15 @@ function readTemplatesRecursively(dir, relativePath) {
4518
4555
  const templates = [];
4519
4556
  const entries2 = readdirSync(dir);
4520
4557
  for (const entry of entries2) {
4521
- const fullPath = path8.join(dir, entry);
4558
+ const fullPath = path9.join(dir, entry);
4522
4559
  const stat3 = statSync(fullPath);
4523
- const entryRelativePath = relativePath ? path8.join(relativePath, entry) : entry;
4560
+ const entryRelativePath = relativePath ? path9.join(relativePath, entry) : entry;
4524
4561
  if (stat3.isDirectory()) {
4525
4562
  templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
4526
4563
  } else {
4527
4564
  const content = readFileSync3(fullPath, "utf-8");
4528
4565
  templates.push({
4529
- path: entryRelativePath.split(path8.sep).join("/"),
4566
+ path: entryRelativePath.split(path9.sep).join("/"),
4530
4567
  // Normalize to forward slashes
4531
4568
  content
4532
4569
  });
@@ -4555,22 +4592,22 @@ async function promptYesNo(message) {
4555
4592
  }
4556
4593
  }
4557
4594
  async function initCommand(options = {}) {
4558
- const targetPath = path9.resolve(options.targetPath ?? ".");
4559
- const agentvDir = path9.join(targetPath, ".agentv");
4595
+ const targetPath = path10.resolve(options.targetPath ?? ".");
4596
+ const agentvDir = path10.join(targetPath, ".agentv");
4560
4597
  const otherAgentvTemplates = getAgentvTemplates();
4561
4598
  const envTemplate = getEnvExampleTemplate();
4562
4599
  const existingFiles = [];
4563
4600
  if (envTemplate) {
4564
- const envFilePath = path9.join(targetPath, ".env.example");
4601
+ const envFilePath = path10.join(targetPath, ".env.example");
4565
4602
  if (existsSync2(envFilePath)) {
4566
4603
  existingFiles.push(".env.example");
4567
4604
  }
4568
4605
  }
4569
4606
  if (existsSync2(agentvDir)) {
4570
4607
  for (const template of otherAgentvTemplates) {
4571
- const targetFilePath = path9.join(agentvDir, template.path);
4608
+ const targetFilePath = path10.join(agentvDir, template.path);
4572
4609
  if (existsSync2(targetFilePath)) {
4573
- existingFiles.push(path9.relative(targetPath, targetFilePath));
4610
+ existingFiles.push(path10.relative(targetPath, targetFilePath));
4574
4611
  }
4575
4612
  }
4576
4613
  }
@@ -4592,18 +4629,18 @@ async function initCommand(options = {}) {
4592
4629
  mkdirSync(agentvDir, { recursive: true });
4593
4630
  }
4594
4631
  if (envTemplate) {
4595
- const envFilePath = path9.join(targetPath, ".env.example");
4632
+ const envFilePath = path10.join(targetPath, ".env.example");
4596
4633
  writeFileSync2(envFilePath, envTemplate.content, "utf-8");
4597
4634
  console.log("Created .env.example");
4598
4635
  }
4599
4636
  for (const template of otherAgentvTemplates) {
4600
- const targetFilePath = path9.join(agentvDir, template.path);
4601
- const targetDirPath = path9.dirname(targetFilePath);
4637
+ const targetFilePath = path10.join(agentvDir, template.path);
4638
+ const targetDirPath = path10.dirname(targetFilePath);
4602
4639
  if (!existsSync2(targetDirPath)) {
4603
4640
  mkdirSync(targetDirPath, { recursive: true });
4604
4641
  }
4605
4642
  writeFileSync2(targetFilePath, template.content, "utf-8");
4606
- console.log(`Created ${path9.relative(targetPath, targetFilePath)}`);
4643
+ console.log(`Created ${path10.relative(targetPath, targetFilePath)}`);
4607
4644
  }
4608
4645
  console.log("\nAgentV initialized successfully!");
4609
4646
  console.log("\nFiles installed to root:");
@@ -4611,7 +4648,7 @@ async function initCommand(options = {}) {
4611
4648
  console.log(" - .env.example");
4612
4649
  }
4613
4650
  console.log(`
4614
- Files installed to ${path9.relative(targetPath, agentvDir)}:`);
4651
+ Files installed to ${path10.relative(targetPath, agentvDir)}:`);
4615
4652
  for (const t of otherAgentvTemplates) {
4616
4653
  console.log(` - ${t.path}`);
4617
4654
  }
@@ -4643,13 +4680,13 @@ var initCmdTsCommand = command({
4643
4680
 
4644
4681
  // src/commands/inspect/filter.ts
4645
4682
  import { existsSync as existsSync3, readFileSync as readFileSync4, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
4646
- import path10 from "node:path";
4683
+ import path11 from "node:path";
4647
4684
  function collectIndexFiles(dir) {
4648
4685
  const files = [];
4649
4686
  try {
4650
4687
  const entries2 = readdirSync2(dir, { withFileTypes: true });
4651
4688
  for (const entry of entries2) {
4652
- const fullPath = path10.join(dir, entry.name);
4689
+ const fullPath = path11.join(dir, entry.name);
4653
4690
  if (entry.isDirectory()) {
4654
4691
  files.push(...collectIndexFiles(fullPath));
4655
4692
  } else if (entry.name === "index.jsonl") {
@@ -4700,7 +4737,7 @@ function parseFilterableRecords(filePath) {
4700
4737
  }
4701
4738
  let experiment = typeof raw.experiment === "string" ? raw.experiment : void 0;
4702
4739
  if (!experiment) {
4703
- const parts = filePath.split(path10.sep);
4740
+ const parts = filePath.split(path11.sep);
4704
4741
  const runsIdx = parts.indexOf("runs");
4705
4742
  if (runsIdx !== -1 && parts.length - runsIdx >= 3) {
4706
4743
  const candidate = parts[runsIdx + 1];
@@ -4755,7 +4792,7 @@ function buildFilterPredicate(opts) {
4755
4792
  }
4756
4793
  function discoverFilterSources(searchPath, cwd) {
4757
4794
  if (searchPath) {
4758
- const resolved = path10.isAbsolute(searchPath) ? searchPath : path10.resolve(cwd, searchPath);
4795
+ const resolved = path11.isAbsolute(searchPath) ? searchPath : path11.resolve(cwd, searchPath);
4759
4796
  if (!existsSync3(resolved)) {
4760
4797
  console.error(`${c.red}Error:${c.reset} Path does not exist: ${resolved}`);
4761
4798
  process.exit(1);
@@ -4768,7 +4805,7 @@ function discoverFilterSources(searchPath, cwd) {
4768
4805
  }
4769
4806
  return [resolved];
4770
4807
  }
4771
- return collectIndexFiles(path10.join(cwd, ".agentv", "results", "runs"));
4808
+ return collectIndexFiles(path11.join(cwd, ".agentv", "results", "runs"));
4772
4809
  }
4773
4810
  function formatFilterTable(records) {
4774
4811
  const lines = [];
@@ -5042,7 +5079,7 @@ function parseAssertSpec(spec) {
5042
5079
  }
5043
5080
  default:
5044
5081
  throw new Error(
5045
- `Unsupported evaluator type: "${type}". Supported: ${SUPPORTED_TYPES.join(", ")}`
5082
+ `Unsupported grader type: "${type}". Supported: ${SUPPORTED_TYPES.join(", ")}`
5046
5083
  );
5047
5084
  }
5048
5085
  }
@@ -5164,7 +5201,7 @@ var traceScoreCommand = command({
5164
5201
  type: string,
5165
5202
  long: "assert",
5166
5203
  short: "a",
5167
- description: "Evaluator spec: contains:<val>, regex:<pat>, is-json, equals:<val>, latency:<ms>, cost:<usd>, token-usage:<params>, execution-metrics:<params>"
5204
+ description: "Grader spec: contains:<val>, regex:<pat>, is-json, equals:<val>, latency:<ms>, cost:<usd>, token-usage:<params>, execution-metrics:<params>"
5168
5205
  }),
5169
5206
  testId: option({
5170
5207
  type: optional(string),
@@ -5241,13 +5278,13 @@ var traceScoreCommand = command({
5241
5278
 
5242
5279
  // src/commands/inspect/search.ts
5243
5280
  import { existsSync as existsSync4, readFileSync as readFileSync5, readdirSync as readdirSync3, statSync as statSync3 } from "node:fs";
5244
- import path11 from "node:path";
5281
+ import path12 from "node:path";
5245
5282
  function collectJsonlFiles(dir) {
5246
5283
  const files = [];
5247
5284
  try {
5248
5285
  const entries2 = readdirSync3(dir, { withFileTypes: true });
5249
5286
  for (const entry of entries2) {
5250
- const fullPath = path11.join(dir, entry.name);
5287
+ const fullPath = path12.join(dir, entry.name);
5251
5288
  if (entry.isDirectory()) {
5252
5289
  files.push(...collectJsonlFiles(fullPath));
5253
5290
  } else if (entry.name.endsWith(".jsonl")) {
@@ -5306,7 +5343,7 @@ function searchJsonlFile(filePath, regex2, targetFilter, experimentFilter) {
5306
5343
  }
5307
5344
  function discoverSources(basePath, cwd) {
5308
5345
  if (basePath) {
5309
- const resolved = path11.isAbsolute(basePath) ? basePath : path11.resolve(cwd, basePath);
5346
+ const resolved = path12.isAbsolute(basePath) ? basePath : path12.resolve(cwd, basePath);
5310
5347
  if (!existsSync4(resolved)) {
5311
5348
  console.error(`${c.red}Error:${c.reset} Path does not exist: ${resolved}`);
5312
5349
  process.exit(1);
@@ -5320,8 +5357,8 @@ function discoverSources(basePath, cwd) {
5320
5357
  return [resolved];
5321
5358
  }
5322
5359
  const sources = [];
5323
- sources.push(...collectJsonlFiles(path11.join(cwd, ".agentv", "results", "runs")));
5324
- sources.push(...collectJsonlFiles(path11.join(cwd, ".agentv", "transcripts")));
5360
+ sources.push(...collectJsonlFiles(path12.join(cwd, ".agentv", "results", "runs")));
5361
+ sources.push(...collectJsonlFiles(path12.join(cwd, ".agentv", "transcripts")));
5325
5362
  return sources;
5326
5363
  }
5327
5364
  function formatSearchResults(matches, pattern) {
@@ -5873,7 +5910,7 @@ import { readFile as readFile2, readdir, writeFile as writeFile5 } from "node:fs
5873
5910
  import { join } from "node:path";
5874
5911
  var evalBenchCommand = command({
5875
5912
  name: "bench",
5876
- description: "Merge evaluator scores and produce benchmark artifacts",
5913
+ description: "Merge grader scores and produce benchmark artifacts",
5877
5914
  args: {
5878
5915
  exportDir: positional({
5879
5916
  type: string,
@@ -6862,7 +6899,7 @@ var pipelineCommand = subcommands({
6862
6899
  });
6863
6900
 
6864
6901
  // src/commands/results/export.ts
6865
- import path12 from "node:path";
6902
+ import path13 from "node:path";
6866
6903
 
6867
6904
  // src/commands/results/shared.ts
6868
6905
  import { existsSync as existsSync7 } from "node:fs";
@@ -6909,20 +6946,20 @@ async function loadResults(source, cwd) {
6909
6946
 
6910
6947
  // src/commands/results/export.ts
6911
6948
  function deriveOutputDir(cwd, sourceFile) {
6912
- if (path12.basename(sourceFile) !== RESULT_INDEX_FILENAME) {
6949
+ if (path13.basename(sourceFile) !== RESULT_INDEX_FILENAME) {
6913
6950
  throw new Error(`Expected a run manifest named ${RESULT_INDEX_FILENAME}: ${sourceFile}`);
6914
6951
  }
6915
- const runDir = path12.dirname(sourceFile);
6916
- const segments = path12.normalize(runDir).split(path12.sep).filter(Boolean);
6952
+ const runDir = path13.dirname(sourceFile);
6953
+ const segments = path13.normalize(runDir).split(path13.sep).filter(Boolean);
6917
6954
  const runsIndex = segments.lastIndexOf("runs");
6918
6955
  if (runsIndex >= 0 && runsIndex < segments.length - 1) {
6919
- return path12.join(cwd, ".agentv", "results", "export", ...segments.slice(runsIndex + 1));
6956
+ return path13.join(cwd, ".agentv", "results", "export", ...segments.slice(runsIndex + 1));
6920
6957
  }
6921
- const parentDir = path12.basename(runDir);
6958
+ const parentDir = path13.basename(runDir);
6922
6959
  if (parentDir.startsWith("eval_")) {
6923
- return path12.join(cwd, ".agentv", "results", "export", parentDir.slice(5));
6960
+ return path13.join(cwd, ".agentv", "results", "export", parentDir.slice(5));
6924
6961
  }
6925
- return path12.join(cwd, ".agentv", "results", "export", parentDir);
6962
+ return path13.join(cwd, ".agentv", "results", "export", parentDir);
6926
6963
  }
6927
6964
  async function loadExportSource(source, cwd) {
6928
6965
  const { sourceFile } = await resolveSourceFile(source, cwd);
@@ -6955,7 +6992,7 @@ var resultsExportCommand = command({
6955
6992
  const cwd = dir ?? process.cwd();
6956
6993
  try {
6957
6994
  const { sourceFile, results } = await loadExportSource(source, cwd);
6958
- const outputDir = out ? path12.isAbsolute(out) ? out : path12.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
6995
+ const outputDir = out ? path13.isAbsolute(out) ? out : path13.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
6959
6996
  await writeArtifactsFromResults(results, outputDir, {
6960
6997
  evalFile: sourceFile
6961
6998
  });
@@ -7016,7 +7053,7 @@ var resultsFailuresCommand = command({
7016
7053
 
7017
7054
  // src/commands/results/report.ts
7018
7055
  import { existsSync as existsSync8, mkdirSync as mkdirSync2, readFileSync as readFileSync7, writeFileSync as writeFileSync3 } from "node:fs";
7019
- import path13 from "node:path";
7056
+ import path14 from "node:path";
7020
7057
 
7021
7058
  // src/commands/results/report-template.ts
7022
7059
  var RESULTS_REPORT_TEMPLATE = `<!DOCTYPE html>
@@ -8640,10 +8677,10 @@ function normalizeEvalFileLabel(value) {
8640
8677
  if (!trimmed) {
8641
8678
  return void 0;
8642
8679
  }
8643
- return path13.basename(trimmed).replace(/\.results\.jsonl$/i, "").replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "").replace(/\.jsonl$/i, "");
8680
+ return path14.basename(trimmed).replace(/\.results\.jsonl$/i, "").replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "").replace(/\.jsonl$/i, "");
8644
8681
  }
8645
8682
  function readBenchmarkEvalFile(sourceFile) {
8646
- const benchmarkPath = path13.join(path13.dirname(sourceFile), "benchmark.json");
8683
+ const benchmarkPath = path14.join(path14.dirname(sourceFile), "benchmark.json");
8647
8684
  if (!existsSync8(benchmarkPath)) {
8648
8685
  return void 0;
8649
8686
  }
@@ -8655,10 +8692,10 @@ function readBenchmarkEvalFile(sourceFile) {
8655
8692
  }
8656
8693
  }
8657
8694
  function deriveReportPath(sourceFile) {
8658
- return path13.join(path13.dirname(sourceFile), "report.html");
8695
+ return path14.join(path14.dirname(sourceFile), "report.html");
8659
8696
  }
8660
8697
  function serializeReportResult(result, sourceFile, manifestRecord, benchmarkEvalFile) {
8661
- const fallbackEvalFile = normalizeEvalFileLabel(manifestRecord?.eval_file) ?? benchmarkEvalFile ?? normalizeEvalFileLabel(result.suite) ?? path13.basename(path13.dirname(sourceFile));
8698
+ const fallbackEvalFile = normalizeEvalFileLabel(manifestRecord?.eval_file) ?? benchmarkEvalFile ?? normalizeEvalFileLabel(result.suite) ?? path14.basename(path14.dirname(sourceFile));
8662
8699
  return {
8663
8700
  timestamp: result.timestamp,
8664
8701
  test_id: result.testId,
@@ -8706,9 +8743,9 @@ function renderResultsReport(results, sourceFile, records, benchmarkEvalFile) {
8706
8743
  }
8707
8744
  async function writeResultsReport(source, outputPath, cwd) {
8708
8745
  const { sourceFile, results, records, benchmarkEvalFile } = await loadReportSource(source, cwd);
8709
- const resolvedOutputPath = outputPath ? path13.isAbsolute(outputPath) ? outputPath : path13.resolve(cwd, outputPath) : deriveReportPath(sourceFile);
8746
+ const resolvedOutputPath = outputPath ? path14.isAbsolute(outputPath) ? outputPath : path14.resolve(cwd, outputPath) : deriveReportPath(sourceFile);
8710
8747
  const html = renderResultsReport(results, sourceFile, records, benchmarkEvalFile);
8711
- mkdirSync2(path13.dirname(resolvedOutputPath), { recursive: true });
8748
+ mkdirSync2(path14.dirname(resolvedOutputPath), { recursive: true });
8712
8749
  writeFileSync3(resolvedOutputPath, html, "utf8");
8713
8750
  const written = readFileSync7(resolvedOutputPath, "utf8");
8714
8751
  if (written.includes("__DATA_PLACEHOLDER__")) {
@@ -8896,10 +8933,10 @@ var resultsSummaryCommand = command({
8896
8933
 
8897
8934
  // src/commands/results/validate.ts
8898
8935
  import { existsSync as existsSync10, readFileSync as readFileSync9, statSync as statSync4 } from "node:fs";
8899
- import path14 from "node:path";
8936
+ import path15 from "node:path";
8900
8937
  function checkDirectoryNaming(runDir) {
8901
- const dirName = path14.basename(runDir);
8902
- const pathSegments = path14.normalize(runDir).split(path14.sep).filter(Boolean);
8938
+ const dirName = path15.basename(runDir);
8939
+ const pathSegments = path15.normalize(runDir).split(path15.sep).filter(Boolean);
8903
8940
  const runsIndex = pathSegments.lastIndexOf("runs");
8904
8941
  const diagnostics = [];
8905
8942
  if (runsIndex < 0 || runsIndex >= pathSegments.length - 1) {
@@ -8929,7 +8966,7 @@ function validateRunDirectory(runDir) {
8929
8966
  return { diagnostics, entries: entries2 };
8930
8967
  }
8931
8968
  function checkIndexJsonl(runDir) {
8932
- const indexPath = path14.join(runDir, "index.jsonl");
8969
+ const indexPath = path15.join(runDir, "index.jsonl");
8933
8970
  const diagnostics = [];
8934
8971
  const entries2 = [];
8935
8972
  if (!existsSync10(indexPath)) {
@@ -8978,7 +9015,7 @@ function checkIndexJsonl(runDir) {
8978
9015
  if (!entry.scores || !Array.isArray(entry.scores) || entry.scores.length === 0) {
8979
9016
  diagnostics.push({
8980
9017
  severity: "warning",
8981
- message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'scores[]' array \u2014 dashboard may not show per-evaluator breakdown`
9018
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'scores[]' array \u2014 dashboard may not show per-grader breakdown`
8982
9019
  });
8983
9020
  } else {
8984
9021
  for (let j = 0; j < entry.scores.length; j++) {
@@ -9028,7 +9065,7 @@ function checkArtifactFiles(runDir, entries2) {
9028
9065
  for (const entry of entries2) {
9029
9066
  const testId = entry.test_id ?? "?";
9030
9067
  if (entry.grading_path) {
9031
- const gradingPath = path14.join(runDir, entry.grading_path);
9068
+ const gradingPath = path15.join(runDir, entry.grading_path);
9032
9069
  if (!existsSync10(gradingPath)) {
9033
9070
  diagnostics.push({
9034
9071
  severity: "error",
@@ -9058,7 +9095,7 @@ function checkArtifactFiles(runDir, entries2) {
9058
9095
  }
9059
9096
  }
9060
9097
  if (entry.timing_path) {
9061
- const timingPath = path14.join(runDir, entry.timing_path);
9098
+ const timingPath = path15.join(runDir, entry.timing_path);
9062
9099
  if (!existsSync10(timingPath)) {
9063
9100
  diagnostics.push({
9064
9101
  severity: "warning",
@@ -9067,7 +9104,7 @@ function checkArtifactFiles(runDir, entries2) {
9067
9104
  }
9068
9105
  }
9069
9106
  }
9070
- const benchmarkPath = path14.join(runDir, "benchmark.json");
9107
+ const benchmarkPath = path15.join(runDir, "benchmark.json");
9071
9108
  if (!existsSync10(benchmarkPath)) {
9072
9109
  diagnostics.push({ severity: "warning", message: "benchmark.json is missing" });
9073
9110
  }
@@ -9084,7 +9121,7 @@ var resultsValidateCommand = command({
9084
9121
  })
9085
9122
  },
9086
9123
  handler: async ({ runDir }) => {
9087
- const resolvedDir = path14.resolve(runDir);
9124
+ const resolvedDir = path15.resolve(runDir);
9088
9125
  if (!existsSync10(resolvedDir) || !statSync4(resolvedDir).isDirectory()) {
9089
9126
  console.error(`Error: '${runDir}' is not a directory`);
9090
9127
  process.exit(1);
@@ -9128,14 +9165,14 @@ var resultsCommand = subcommands({
9128
9165
 
9129
9166
  // src/commands/results/serve.ts
9130
9167
  import { existsSync as existsSync14, readFileSync as readFileSync12, readdirSync as readdirSync4, statSync as statSync5, writeFileSync as writeFileSync6 } from "node:fs";
9131
- import path18 from "node:path";
9168
+ import path19 from "node:path";
9132
9169
  import { fileURLToPath as fileURLToPath3 } from "node:url";
9133
9170
  import { Hono } from "hono";
9134
9171
 
9135
9172
  // src/commands/results/eval-runner.ts
9136
9173
  import { execFileSync, spawn } from "node:child_process";
9137
9174
  import { existsSync as existsSync11 } from "node:fs";
9138
- import path15 from "node:path";
9175
+ import path16 from "node:path";
9139
9176
  import { fileURLToPath as fileURLToPath2 } from "node:url";
9140
9177
  var activeRuns = /* @__PURE__ */ new Map();
9141
9178
  function generateRunId() {
@@ -9157,7 +9194,7 @@ async function discoverTargetsInProject(cwd) {
9157
9194
  const repoRoot = await findRepoRoot(cwd) ?? cwd;
9158
9195
  let targetsFilePath;
9159
9196
  for (const candidate of TARGET_FILE_CANDIDATES) {
9160
- const fullPath = path15.join(cwd, candidate);
9197
+ const fullPath = path16.join(cwd, candidate);
9161
9198
  if (existsSync11(fullPath)) {
9162
9199
  targetsFilePath = fullPath;
9163
9200
  break;
@@ -9165,7 +9202,7 @@ async function discoverTargetsInProject(cwd) {
9165
9202
  }
9166
9203
  if (!targetsFilePath) {
9167
9204
  for (const candidate of TARGET_FILE_CANDIDATES) {
9168
- const fullPath = path15.join(repoRoot, candidate);
9205
+ const fullPath = path16.join(repoRoot, candidate);
9169
9206
  if (existsSync11(fullPath)) {
9170
9207
  targetsFilePath = fullPath;
9171
9208
  break;
@@ -9215,17 +9252,17 @@ function buildCliPreview(args) {
9215
9252
  }
9216
9253
  function resolveCliPath(cwd) {
9217
9254
  const candidates = [
9218
- path15.join(cwd, "apps/cli/src/cli.ts"),
9219
- path15.join(cwd, "apps/cli/dist/cli.js")
9255
+ path16.join(cwd, "apps/cli/src/cli.ts"),
9256
+ path16.join(cwd, "apps/cli/dist/cli.js")
9220
9257
  ];
9221
9258
  for (const c4 of candidates) {
9222
9259
  if (existsSync11(c4)) {
9223
9260
  return { binPath: "bun", args: [c4] };
9224
9261
  }
9225
9262
  }
9226
- const currentDir = typeof __dirname !== "undefined" ? __dirname : path15.dirname(fileURLToPath2(import.meta.url));
9227
- const fromSrc = path15.resolve(currentDir, "../../../cli.ts");
9228
- const fromDist = path15.resolve(currentDir, "../../cli.js");
9263
+ const currentDir = typeof __dirname !== "undefined" ? __dirname : path16.dirname(fileURLToPath2(import.meta.url));
9264
+ const fromSrc = path16.resolve(currentDir, "../../../cli.ts");
9265
+ const fromDist = path16.resolve(currentDir, "../../cli.js");
9229
9266
  if (existsSync11(fromSrc)) return { binPath: "bun", args: [fromSrc] };
9230
9267
  if (existsSync11(fromDist)) return { binPath: "bun", args: [fromDist] };
9231
9268
  if (isCommandAvailable("agentv")) {
@@ -9516,12 +9553,12 @@ Process error: ${err2.message}`;
9516
9553
 
9517
9554
  // src/commands/results/run-tags.ts
9518
9555
  import { existsSync as existsSync12, readFileSync as readFileSync10, unlinkSync as unlinkSync2, writeFileSync as writeFileSync4 } from "node:fs";
9519
- import path16 from "node:path";
9556
+ import path17 from "node:path";
9520
9557
  var RUN_TAGS_FILENAME = "tags.json";
9521
9558
  var MAX_TAGS_PER_RUN = 20;
9522
9559
  var MAX_TAG_LENGTH = 60;
9523
9560
  function runTagsPath(manifestPath) {
9524
- return path16.join(path16.dirname(manifestPath), RUN_TAGS_FILENAME);
9561
+ return path17.join(path17.dirname(manifestPath), RUN_TAGS_FILENAME);
9525
9562
  }
9526
9563
  function readRunTags(manifestPath) {
9527
9564
  const fp = runTagsPath(manifestPath);
@@ -9593,13 +9630,13 @@ function normalizeTags(tags) {
9593
9630
 
9594
9631
  // src/commands/results/studio-config.ts
9595
9632
  import { existsSync as existsSync13, mkdirSync as mkdirSync3, readFileSync as readFileSync11, writeFileSync as writeFileSync5 } from "node:fs";
9596
- import path17 from "node:path";
9633
+ import path18 from "node:path";
9597
9634
  import { parse as parseYaml, stringify as stringifyYaml2 } from "yaml";
9598
9635
  var DEFAULTS = {
9599
9636
  threshold: DEFAULT_THRESHOLD
9600
9637
  };
9601
9638
  function loadStudioConfig(agentvDir) {
9602
- const configPath = path17.join(agentvDir, "config.yaml");
9639
+ const configPath = path18.join(agentvDir, "config.yaml");
9603
9640
  if (!existsSync13(configPath)) {
9604
9641
  return { ...DEFAULTS };
9605
9642
  }
@@ -9628,7 +9665,7 @@ function saveStudioConfig(agentvDir, config) {
9628
9665
  if (!existsSync13(agentvDir)) {
9629
9666
  mkdirSync3(agentvDir, { recursive: true });
9630
9667
  }
9631
- const configPath = path17.join(agentvDir, "config.yaml");
9668
+ const configPath = path18.join(agentvDir, "config.yaml");
9632
9669
  let existing = {};
9633
9670
  if (existsSync13(configPath)) {
9634
9671
  const raw = readFileSync11(configPath, "utf-8");
@@ -9692,7 +9729,7 @@ function resolveDashboardMode(projectCount, options) {
9692
9729
  return { isMultiProject: projectCount > 1, showMultiWarning: false };
9693
9730
  }
9694
9731
  function feedbackPath(resultDir) {
9695
- return path18.join(resultDir, "feedback.json");
9732
+ return path19.join(resultDir, "feedback.json");
9696
9733
  }
9697
9734
  function readFeedback(cwd) {
9698
9735
  const fp = feedbackPath(cwd);
@@ -9719,8 +9756,8 @@ function buildFileTree(dirPath, relativeTo) {
9719
9756
  if (a.isDirectory() !== b.isDirectory()) return a.isDirectory() ? -1 : 1;
9720
9757
  return a.name.localeCompare(b.name);
9721
9758
  }).map((entry) => {
9722
- const fullPath = path18.join(dirPath, entry.name);
9723
- const relPath = path18.relative(relativeTo, fullPath);
9759
+ const fullPath = path19.join(dirPath, entry.name);
9760
+ const relPath = path19.relative(relativeTo, fullPath);
9724
9761
  if (entry.isDirectory()) {
9725
9762
  return {
9726
9763
  name: entry.name,
@@ -9733,7 +9770,7 @@ function buildFileTree(dirPath, relativeTo) {
9733
9770
  });
9734
9771
  }
9735
9772
  function inferLanguage(filePath) {
9736
- const ext = path18.extname(filePath).toLowerCase();
9773
+ const ext = path19.extname(filePath).toLowerCase();
9737
9774
  const langMap = {
9738
9775
  ".json": "json",
9739
9776
  ".jsonl": "json",
@@ -9938,7 +9975,7 @@ async function handleEvalFiles(c4, { searchDir }) {
9938
9975
  const records = parseResultManifest(content);
9939
9976
  const record = records.find((r) => r.test_id === evalId);
9940
9977
  if (!record) return c4.json({ error: "Eval not found" }, 404);
9941
- const baseDir = path18.dirname(meta.path);
9978
+ const baseDir = path19.dirname(meta.path);
9942
9979
  const knownPaths = [
9943
9980
  record.grading_path,
9944
9981
  record.timing_path,
@@ -9947,14 +9984,14 @@ async function handleEvalFiles(c4, { searchDir }) {
9947
9984
  record.response_path
9948
9985
  ].filter((p) => !!p);
9949
9986
  if (knownPaths.length === 0) return c4.json({ files: [] });
9950
- const artifactDirs = knownPaths.map((p) => path18.dirname(p));
9987
+ const artifactDirs = knownPaths.map((p) => path19.dirname(p));
9951
9988
  let commonDir = artifactDirs[0];
9952
9989
  for (const dir of artifactDirs) {
9953
9990
  while (!dir.startsWith(commonDir)) {
9954
- commonDir = path18.dirname(commonDir);
9991
+ commonDir = path19.dirname(commonDir);
9955
9992
  }
9956
9993
  }
9957
- const artifactAbsDir = path18.join(baseDir, commonDir);
9994
+ const artifactAbsDir = path19.join(baseDir, commonDir);
9958
9995
  const files = buildFileTree(artifactAbsDir, baseDir);
9959
9996
  return c4.json({ files });
9960
9997
  } catch {
@@ -9969,9 +10006,9 @@ async function handleEvalFileContent(c4, { searchDir }) {
9969
10006
  const markerIdx = c4.req.path.indexOf(marker);
9970
10007
  const filePath = markerIdx >= 0 ? c4.req.path.slice(markerIdx + marker.length) : "";
9971
10008
  if (!filePath) return c4.json({ error: "No file path specified" }, 400);
9972
- const baseDir = path18.dirname(meta.path);
9973
- const absolutePath = path18.resolve(baseDir, filePath);
9974
- if (!absolutePath.startsWith(path18.resolve(baseDir) + path18.sep) && absolutePath !== path18.resolve(baseDir)) {
10009
+ const baseDir = path19.dirname(meta.path);
10010
+ const absolutePath = path19.resolve(baseDir, filePath);
10011
+ if (!absolutePath.startsWith(path19.resolve(baseDir) + path19.sep) && absolutePath !== path19.resolve(baseDir)) {
9975
10012
  return c4.json({ error: "Path traversal not allowed" }, 403);
9976
10013
  }
9977
10014
  if (!existsSync14(absolutePath) || !statSync5(absolutePath).isFile()) {
@@ -10191,12 +10228,12 @@ function handleConfig(c4, { agentvDir, searchDir }, options) {
10191
10228
  return c4.json({
10192
10229
  ...loadStudioConfig(agentvDir),
10193
10230
  read_only: options?.readOnly === true,
10194
- project_name: path18.basename(searchDir),
10231
+ project_name: path19.basename(searchDir),
10195
10232
  multi_project_dashboard: options?.multiProjectDashboard === true
10196
10233
  });
10197
10234
  }
10198
10235
  function handleFeedbackRead(c4, { searchDir }) {
10199
- const resultsDir = path18.join(searchDir, ".agentv", "results");
10236
+ const resultsDir = path19.join(searchDir, ".agentv", "results");
10200
10237
  return c4.json(readFeedback(existsSync14(resultsDir) ? resultsDir : searchDir));
10201
10238
  }
10202
10239
  async function handleRunTagsPut(c4, { searchDir }) {
@@ -10245,7 +10282,7 @@ async function handleRunTagsDelete(c4, { searchDir }) {
10245
10282
  }
10246
10283
  function createApp(results, resultDir, cwd, sourceFile, options) {
10247
10284
  const searchDir = cwd ?? resultDir;
10248
- const agentvDir = path18.join(searchDir, ".agentv");
10285
+ const agentvDir = path19.join(searchDir, ".agentv");
10249
10286
  const defaultCtx = { searchDir, agentvDir };
10250
10287
  const readOnly = options?.readOnly === true;
10251
10288
  const app2 = new Hono();
@@ -10256,7 +10293,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
10256
10293
  }
10257
10294
  return handler(c4, {
10258
10295
  searchDir: benchmark.path,
10259
- agentvDir: path18.join(benchmark.path, ".agentv")
10296
+ agentvDir: path19.join(benchmark.path, ".agentv")
10260
10297
  });
10261
10298
  }
10262
10299
  app2.post("/api/config", async (c4) => {
@@ -10583,20 +10620,20 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
10583
10620
  { readOnly }
10584
10621
  );
10585
10622
  const studioDistPath = options?.studioDir ?? resolveStudioDistDir();
10586
- if (!studioDistPath || !existsSync14(path18.join(studioDistPath, "index.html"))) {
10623
+ if (!studioDistPath || !existsSync14(path19.join(studioDistPath, "index.html"))) {
10587
10624
  throw new Error('Studio dist not found. Run "bun run build" in apps/studio/ to build the SPA.');
10588
10625
  }
10589
10626
  app2.get("/", (c4) => {
10590
- const indexPath = path18.join(studioDistPath, "index.html");
10627
+ const indexPath = path19.join(studioDistPath, "index.html");
10591
10628
  if (existsSync14(indexPath)) return c4.html(readFileSync12(indexPath, "utf8"));
10592
10629
  return c4.notFound();
10593
10630
  });
10594
10631
  app2.get("/assets/*", (c4) => {
10595
10632
  const assetPath = c4.req.path;
10596
- const filePath = path18.join(studioDistPath, assetPath);
10633
+ const filePath = path19.join(studioDistPath, assetPath);
10597
10634
  if (!existsSync14(filePath)) return c4.notFound();
10598
10635
  const content = readFileSync12(filePath);
10599
- const ext = path18.extname(filePath);
10636
+ const ext = path19.extname(filePath);
10600
10637
  const mimeTypes = {
10601
10638
  ".js": "application/javascript",
10602
10639
  ".css": "text/css",
@@ -10617,26 +10654,26 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
10617
10654
  });
10618
10655
  app2.get("*", (c4) => {
10619
10656
  if (c4.req.path.startsWith("/api/")) return c4.json({ error: "Not found" }, 404);
10620
- const indexPath = path18.join(studioDistPath, "index.html");
10657
+ const indexPath = path19.join(studioDistPath, "index.html");
10621
10658
  if (existsSync14(indexPath)) return c4.html(readFileSync12(indexPath, "utf8"));
10622
10659
  return c4.notFound();
10623
10660
  });
10624
10661
  return app2;
10625
10662
  }
10626
10663
  function resolveStudioDistDir() {
10627
- const currentDir = typeof __dirname !== "undefined" ? __dirname : path18.dirname(fileURLToPath3(import.meta.url));
10664
+ const currentDir = typeof __dirname !== "undefined" ? __dirname : path19.dirname(fileURLToPath3(import.meta.url));
10628
10665
  const candidates = [
10629
10666
  // From src/commands/results/ → sibling apps/studio/dist
10630
- path18.resolve(currentDir, "../../../../studio/dist"),
10667
+ path19.resolve(currentDir, "../../../../studio/dist"),
10631
10668
  // From dist/ → sibling apps/studio/dist (monorepo dev)
10632
- path18.resolve(currentDir, "../../studio/dist"),
10669
+ path19.resolve(currentDir, "../../studio/dist"),
10633
10670
  // Bundled inside CLI dist (published package: dist/studio/)
10634
- path18.resolve(currentDir, "studio"),
10671
+ path19.resolve(currentDir, "studio"),
10635
10672
  // From dist/ in monorepo root context
10636
- path18.resolve(currentDir, "../../../apps/studio/dist")
10673
+ path19.resolve(currentDir, "../../../apps/studio/dist")
10637
10674
  ];
10638
10675
  for (const candidate of candidates) {
10639
- if (existsSync14(candidate) && existsSync14(path18.join(candidate, "index.html"))) {
10676
+ if (existsSync14(candidate) && existsSync14(path19.join(candidate, "index.html"))) {
10640
10677
  return candidate;
10641
10678
  }
10642
10679
  }
@@ -10753,7 +10790,7 @@ Discovered ${discovered.length} project(s).`);
10753
10790
  }
10754
10791
  }
10755
10792
  }
10756
- const resultDir = sourceFile ? path18.dirname(path18.resolve(sourceFile)) : cwd;
10793
+ const resultDir = sourceFile ? path19.dirname(path19.resolve(sourceFile)) : cwd;
10757
10794
  const app2 = createApp(results, resultDir, cwd, sourceFile, {
10758
10795
  readOnly,
10759
10796
  multiProjectDashboard: isMultiProject
@@ -10878,7 +10915,7 @@ var selfCommand = subcommands({
10878
10915
 
10879
10916
  // src/commands/transpile/index.ts
10880
10917
  import { writeFileSync as writeFileSync7 } from "node:fs";
10881
- import path19 from "node:path";
10918
+ import path20 from "node:path";
10882
10919
  var transpileCommand = command({
10883
10920
  name: "transpile",
10884
10921
  description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
@@ -10902,7 +10939,7 @@ var transpileCommand = command({
10902
10939
  handler: async ({ input, outDir, stdout }) => {
10903
10940
  let result;
10904
10941
  try {
10905
- result = transpileEvalYamlFile(path19.resolve(input));
10942
+ result = transpileEvalYamlFile(path20.resolve(input));
10906
10943
  } catch (error) {
10907
10944
  console.error(`Error: ${error.message}`);
10908
10945
  process.exit(1);
@@ -10926,11 +10963,11 @@ var transpileCommand = command({
10926
10963
  process.stdout.write("\n");
10927
10964
  return;
10928
10965
  }
10929
- const outputDir = outDir ? path19.resolve(outDir) : path19.dirname(path19.resolve(input));
10966
+ const outputDir = outDir ? path20.resolve(outDir) : path20.dirname(path20.resolve(input));
10930
10967
  const fileNames = getOutputFilenames(result);
10931
10968
  for (const [skill, evalsJson] of result.files) {
10932
10969
  const fileName = fileNames.get(skill) ?? "evals.json";
10933
- const outputPath = path19.join(outputDir, fileName);
10970
+ const outputPath = path20.join(outputDir, fileName);
10934
10971
  writeFileSync7(outputPath, `${JSON.stringify(evalsJson, null, 2)}
10935
10972
  `);
10936
10973
  console.log(`Transpiled to ${outputPath}`);
@@ -10939,7 +10976,7 @@ var transpileCommand = command({
10939
10976
  });
10940
10977
 
10941
10978
  // src/commands/trend/index.ts
10942
- import path20 from "node:path";
10979
+ import path21 from "node:path";
10943
10980
  var colors2 = {
10944
10981
  reset: "\x1B[0m",
10945
10982
  bold: "\x1B[1m",
@@ -10989,7 +11026,7 @@ function colorizeSlope(value) {
10989
11026
  }
10990
11027
  function ensureTrendIndexPath(source, cwd) {
10991
11028
  const resolved = resolveResultSourcePath(source, cwd);
10992
- if (path20.basename(resolved) !== RESULT_INDEX_FILENAME) {
11029
+ if (path21.basename(resolved) !== RESULT_INDEX_FILENAME) {
10993
11030
  throw new Error(
10994
11031
  `Unsupported result source for trend: ${source}. Use a run workspace directory or ${RESULT_INDEX_FILENAME} manifest.`
10995
11032
  );
@@ -11009,7 +11046,7 @@ function resolveTrendSources(cwd, sources, last) {
11009
11046
  if (last < 2) {
11010
11047
  throw new Error("--last must be at least 2");
11011
11048
  }
11012
- const metas = listResultFiles(cwd).filter((meta) => path20.basename(meta.path) === RESULT_INDEX_FILENAME).slice(0, last);
11049
+ const metas = listResultFiles(cwd).filter((meta) => path21.basename(meta.path) === RESULT_INDEX_FILENAME).slice(0, last);
11013
11050
  if (metas.length < 2) {
11014
11051
  throw new Error(
11015
11052
  "Trend analysis requires at least 2 canonical run workspaces in .agentv/results/runs/"
@@ -11024,10 +11061,10 @@ function getRunLabel(sourcePath, timestamp) {
11024
11061
  if (timestamp) {
11025
11062
  return timestamp;
11026
11063
  }
11027
- return path20.basename(path20.dirname(sourcePath));
11064
+ return path21.basename(path21.dirname(sourcePath));
11028
11065
  }
11029
11066
  function getRunSortKey(sourcePath, timestamp) {
11030
- return timestamp ?? path20.basename(path20.dirname(sourcePath));
11067
+ return timestamp ?? path21.basename(path21.dirname(sourcePath));
11031
11068
  }
11032
11069
  function mean2(values) {
11033
11070
  return values.reduce((sum, value) => sum + value, 0) / values.length;
@@ -11408,7 +11445,7 @@ function isTTY() {
11408
11445
  // src/commands/validate/validate-files.ts
11409
11446
  import { constants } from "node:fs";
11410
11447
  import { access, readdir as readdir4, stat } from "node:fs/promises";
11411
- import path21 from "node:path";
11448
+ import path22 from "node:path";
11412
11449
  import fg2 from "fast-glob";
11413
11450
  async function validateFiles(paths) {
11414
11451
  const filePaths = await expandPaths(paths);
@@ -11423,7 +11460,7 @@ async function validateFiles(paths) {
11423
11460
  };
11424
11461
  }
11425
11462
  async function validateSingleFile(filePath) {
11426
- const absolutePath = path21.resolve(filePath);
11463
+ const absolutePath = path22.resolve(filePath);
11427
11464
  const fileType = await detectFileType(absolutePath);
11428
11465
  let result;
11429
11466
  if (fileType === "eval") {
@@ -11467,7 +11504,7 @@ async function validateSingleFile(filePath) {
11467
11504
  async function expandPaths(paths) {
11468
11505
  const expanded = /* @__PURE__ */ new Set();
11469
11506
  for (const inputPath of paths) {
11470
- const absolutePath = path21.resolve(inputPath);
11507
+ const absolutePath = path22.resolve(inputPath);
11471
11508
  try {
11472
11509
  await access(absolutePath, constants.F_OK);
11473
11510
  const stats = await stat(absolutePath);
@@ -11495,7 +11532,7 @@ async function expandPaths(paths) {
11495
11532
  if (yamlMatches.length === 0) {
11496
11533
  console.warn(`Warning: No YAML files matched pattern: ${inputPath}`);
11497
11534
  }
11498
- for (const f of yamlMatches) expanded.add(path21.normalize(f));
11535
+ for (const f of yamlMatches) expanded.add(path22.normalize(f));
11499
11536
  }
11500
11537
  const sorted = Array.from(expanded);
11501
11538
  sorted.sort();
@@ -11506,7 +11543,7 @@ async function findYamlFiles(dirPath) {
11506
11543
  try {
11507
11544
  const entries2 = await readdir4(dirPath, { withFileTypes: true });
11508
11545
  for (const entry of entries2) {
11509
- const fullPath = path21.join(dirPath, entry.name);
11546
+ const fullPath = path22.join(dirPath, entry.name);
11510
11547
  if (entry.isDirectory()) {
11511
11548
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
11512
11549
  continue;
@@ -11523,11 +11560,11 @@ async function findYamlFiles(dirPath) {
11523
11560
  return results;
11524
11561
  }
11525
11562
  function isYamlFile(filePath) {
11526
- const ext = path21.extname(filePath).toLowerCase();
11563
+ const ext = path22.extname(filePath).toLowerCase();
11527
11564
  return ext === ".yaml" || ext === ".yml";
11528
11565
  }
11529
11566
  function isEvalYamlFile(filePath) {
11530
- const lower = path21.basename(filePath).toLowerCase();
11567
+ const lower = path22.basename(filePath).toLowerCase();
11531
11568
  return lower.endsWith(".eval.yaml") || lower.endsWith(".eval.yml");
11532
11569
  }
11533
11570
 
@@ -11584,7 +11621,7 @@ var validateCommand = command({
11584
11621
  // src/commands/workspace/clean.ts
11585
11622
  import { existsSync as existsSync15 } from "node:fs";
11586
11623
  import { readFile as readFile6, readdir as readdir5, rm } from "node:fs/promises";
11587
- import path22 from "node:path";
11624
+ import path23 from "node:path";
11588
11625
  async function confirm(message) {
11589
11626
  const readline2 = await import("node:readline");
11590
11627
  const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
@@ -11620,8 +11657,8 @@ var cleanCommand = command({
11620
11657
  const poolDirs = entries2.filter((e) => e.isDirectory());
11621
11658
  const matchingDirs = [];
11622
11659
  for (const dir of poolDirs) {
11623
- const poolDir = path22.join(poolRoot, dir.name);
11624
- const metadataPath = path22.join(poolDir, "metadata.json");
11660
+ const poolDir = path23.join(poolRoot, dir.name);
11661
+ const metadataPath = path23.join(poolDir, "metadata.json");
11625
11662
  try {
11626
11663
  const raw = await readFile6(metadataPath, "utf-8");
11627
11664
  const metadata = JSON.parse(raw);
@@ -11652,7 +11689,7 @@ var cleanCommand = command({
11652
11689
  }
11653
11690
  for (const dir of matchingDirs) {
11654
11691
  await rm(dir, { recursive: true, force: true });
11655
- console.log(`Removed: ${path22.basename(dir).slice(0, 12)}...`);
11692
+ console.log(`Removed: ${path23.basename(dir).slice(0, 12)}...`);
11656
11693
  }
11657
11694
  console.log("Done.");
11658
11695
  } else {
@@ -11670,7 +11707,7 @@ var cleanCommand = command({
11670
11707
  });
11671
11708
 
11672
11709
  // src/commands/workspace/deps.ts
11673
- import path23 from "node:path";
11710
+ import path24 from "node:path";
11674
11711
  var depsCommand = command({
11675
11712
  name: "deps",
11676
11713
  description: "Scan eval files and list git repo dependencies needed by workspaces",
@@ -11694,7 +11731,7 @@ var depsCommand = command({
11694
11731
  const resolvedPaths = await resolveEvalPaths(evalPaths, cwd);
11695
11732
  const result = await scanRepoDeps(resolvedPaths);
11696
11733
  for (const err2 of result.errors) {
11697
- console.error(`warning: ${path23.relative(cwd, err2.file)}: ${err2.message}`);
11734
+ console.error(`warning: ${path24.relative(cwd, err2.file)}: ${err2.message}`);
11698
11735
  }
11699
11736
  const output = {
11700
11737
  repos: result.repos.map((r) => ({
@@ -11702,7 +11739,7 @@ var depsCommand = command({
11702
11739
  ...r.ref !== void 0 && { ref: r.ref },
11703
11740
  ...r.clone !== void 0 && { clone: r.clone },
11704
11741
  ...r.checkout !== void 0 && { checkout: r.checkout },
11705
- ...usedBy && { used_by: r.usedBy.map((p) => path23.relative(cwd, p)) }
11742
+ ...usedBy && { used_by: r.usedBy.map((p) => path24.relative(cwd, p)) }
11706
11743
  }))
11707
11744
  };
11708
11745
  console.log(JSON.stringify(output, null, 2));
@@ -11712,13 +11749,13 @@ var depsCommand = command({
11712
11749
  // src/commands/workspace/list.ts
11713
11750
  import { existsSync as existsSync16 } from "node:fs";
11714
11751
  import { readFile as readFile7, readdir as readdir6, stat as stat2 } from "node:fs/promises";
11715
- import path24 from "node:path";
11752
+ import path25 from "node:path";
11716
11753
  async function getDirectorySize(dirPath) {
11717
11754
  let totalSize = 0;
11718
11755
  try {
11719
11756
  const entries2 = await readdir6(dirPath, { withFileTypes: true });
11720
11757
  for (const entry of entries2) {
11721
- const fullPath = path24.join(dirPath, entry.name);
11758
+ const fullPath = path25.join(dirPath, entry.name);
11722
11759
  if (entry.isDirectory()) {
11723
11760
  totalSize += await getDirectorySize(fullPath);
11724
11761
  } else {
@@ -11753,11 +11790,11 @@ var listCommand = command({
11753
11790
  return;
11754
11791
  }
11755
11792
  for (const dir of poolDirs) {
11756
- const poolDir = path24.join(poolRoot, dir.name);
11793
+ const poolDir = path25.join(poolRoot, dir.name);
11757
11794
  const fingerprint = dir.name;
11758
11795
  const poolEntries = await readdir6(poolDir, { withFileTypes: true });
11759
11796
  const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
11760
- const metadataPath = path24.join(poolDir, "metadata.json");
11797
+ const metadataPath = path25.join(poolDir, "metadata.json");
11761
11798
  let metadata = null;
11762
11799
  try {
11763
11800
  const raw = await readFile7(metadataPath, "utf-8");
@@ -11804,8 +11841,8 @@ var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
11804
11841
  var CONFIG_DIR = getAgentvConfigDir();
11805
11842
  var CACHE_FILE = "version-check.json";
11806
11843
  var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
11807
- async function getCachedUpdateInfo(path25) {
11808
- const filePath = path25 ?? join5(CONFIG_DIR, CACHE_FILE);
11844
+ async function getCachedUpdateInfo(path26) {
11845
+ const filePath = path26 ?? join5(CONFIG_DIR, CACHE_FILE);
11809
11846
  try {
11810
11847
  const raw = await readFile8(filePath, "utf-8");
11811
11848
  const data = JSON.parse(raw);
@@ -11907,7 +11944,7 @@ var app = subcommands({
11907
11944
  workspace: workspaceCommand
11908
11945
  }
11909
11946
  });
11910
- var EVAL_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "assert"]);
11947
+ var EVAL_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "assert", "aggregate"]);
11911
11948
  var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
11912
11949
  "import",
11913
11950
  "inspect",
@@ -11966,4 +12003,4 @@ export {
11966
12003
  preprocessArgv,
11967
12004
  runCli
11968
12005
  };
11969
- //# sourceMappingURL=chunk-ILIM6IIX.js.map
12006
+ //# sourceMappingURL=chunk-L7WOR7IR.js.map