agentv 4.17.1 → 4.18.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,7 @@
1
1
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
2
  import {
3
3
  HtmlWriter,
4
- RESULT_INDEX_FILENAME,
5
4
  TARGET_FILE_CANDIDATES,
6
- buildDefaultRunDir,
7
5
  c,
8
6
  detectFileType,
9
7
  discoverEvalFiles,
@@ -31,20 +29,25 @@ import {
31
29
  resolveEvalPaths,
32
30
  resolveResultSourcePath,
33
31
  resolveRunCacheFile,
34
- resolveRunManifestPath,
35
32
  runEvalCommand,
36
33
  selectTarget,
37
34
  syncRemoteResults,
38
- toSnakeCaseDeep,
39
35
  toTraceSummary,
40
36
  validateCasesFile,
41
37
  validateConfigFile,
42
38
  validateEvalFile,
43
39
  validateFileReferences,
44
40
  validateTargetsFile,
45
- validateWorkspacePaths,
41
+ validateWorkspacePaths
42
+ } from "./chunk-VRPCMCLQ.js";
43
+ import {
44
+ RESULT_INDEX_FILENAME,
45
+ aggregateRunDir,
46
+ buildDefaultRunDir,
47
+ resolveRunManifestPath,
48
+ toSnakeCaseDeep,
46
49
  writeArtifactsFromResults
47
- } from "./chunk-ZUNYOUFO.js";
50
+ } from "./chunk-HBDOJJFY.js";
48
51
  import {
49
52
  DEFAULT_CATEGORY,
50
53
  DEFAULT_THRESHOLD,
@@ -89,7 +92,7 @@ import {
89
92
  toTranscriptJsonLines,
90
93
  transpileEvalYamlFile,
91
94
  trimBaselineResult
92
- } from "./chunk-IRU2UOWN.js";
95
+ } from "./chunk-RCOAXXHP.js";
93
96
  import {
94
97
  __commonJS,
95
98
  __require,
@@ -3638,9 +3641,30 @@ var createCommand = subcommands({
3638
3641
  }
3639
3642
  });
3640
3643
 
3644
+ // src/commands/eval/commands/aggregate.ts
3645
+ import path3 from "node:path";
3646
+ var evalAggregateCommand = command({
3647
+ name: "aggregate",
3648
+ description: "Recompute benchmark.json and timing.json from a run directory. Deduplicates by (test_id, target), keeping the last entry.",
3649
+ args: {
3650
+ runDir: positional({
3651
+ type: string,
3652
+ displayName: "run-dir",
3653
+ description: "Path to a run directory containing index.jsonl"
3654
+ })
3655
+ },
3656
+ handler: async (args) => {
3657
+ const runDir = path3.resolve(args.runDir);
3658
+ const { benchmarkPath, timingPath, testCount, targetCount } = await aggregateRunDir(runDir);
3659
+ console.log(`Aggregated ${testCount} test result(s) across ${targetCount} target(s)`);
3660
+ console.log(` Benchmark: ${benchmarkPath}`);
3661
+ console.log(` Timing: ${timingPath}`);
3662
+ }
3663
+ });
3664
+
3641
3665
  // src/commands/eval/commands/assert.ts
3642
3666
  import { readFileSync as readFileSync2 } from "node:fs";
3643
- import path3 from "node:path";
3667
+ import path4 from "node:path";
3644
3668
  import fg from "fast-glob";
3645
3669
  var evalAssertCommand = command({
3646
3670
  name: "assert",
@@ -3671,7 +3695,7 @@ var evalAssertCommand = command({
3671
3695
  let resolvedOutput;
3672
3696
  let resolvedInput;
3673
3697
  if (file) {
3674
- const content = JSON.parse(readFileSync2(path3.resolve(file), "utf8"));
3698
+ const content = JSON.parse(readFileSync2(path4.resolve(file), "utf8"));
3675
3699
  resolvedOutput = content.output ?? "";
3676
3700
  resolvedInput = content.input ?? "";
3677
3701
  } else {
@@ -3737,11 +3761,11 @@ var evalAssertCommand = command({
3737
3761
  }
3738
3762
  });
3739
3763
  async function findGraderScript(graderName, startDir) {
3740
- let dir = path3.resolve(startDir);
3741
- const root = path3.parse(dir).root;
3764
+ let dir = path4.resolve(startDir);
3765
+ const root = path4.parse(dir).root;
3742
3766
  while (dir !== root) {
3743
3767
  for (const subdir of ["graders", "judges"]) {
3744
- const gradersDir = path3.join(dir, ".agentv", subdir);
3768
+ const gradersDir = path4.join(dir, ".agentv", subdir);
3745
3769
  const found = await fg([`${graderName}.{ts,js,mts,mjs}`], {
3746
3770
  cwd: gradersDir,
3747
3771
  absolute: true,
@@ -3749,7 +3773,7 @@ async function findGraderScript(graderName, startDir) {
3749
3773
  });
3750
3774
  if (found.length > 0) return found[0];
3751
3775
  }
3752
- dir = path3.dirname(dir);
3776
+ dir = path4.dirname(dir);
3753
3777
  }
3754
3778
  return null;
3755
3779
  }
@@ -3895,6 +3919,14 @@ var evalRunCommand = command({
3895
3919
  long: "retry-errors",
3896
3920
  description: "Path to a previous run workspace or index.jsonl manifest \u2014 re-run only execution_error test cases"
3897
3921
  }),
3922
+ resume: flag({
3923
+ long: "resume",
3924
+ description: "Resume an interrupted run: skip already-completed tests and append new results to --output dir"
3925
+ }),
3926
+ rerunFailed: flag({
3927
+ long: "rerun-failed",
3928
+ description: "Rerun failed/errored tests while keeping passing results. Implies --resume semantics"
3929
+ }),
3898
3930
  strict: flag({
3899
3931
  long: "strict",
3900
3932
  description: "Exit with error on version mismatch (instead of warning)"
@@ -3947,7 +3979,7 @@ var evalRunCommand = command({
3947
3979
  },
3948
3980
  handler: async (args) => {
3949
3981
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
3950
- const { launchInteractiveWizard } = await import("./interactive-LFCOVXPQ.js");
3982
+ const { launchInteractiveWizard } = await import("./interactive-J4QEU5FG.js");
3951
3983
  await launchInteractiveWizard();
3952
3984
  return;
3953
3985
  }
@@ -3981,6 +4013,8 @@ var evalRunCommand = command({
3981
4013
  otelCaptureContent: args.otelCaptureContent,
3982
4014
  otelGroupTurns: args.otelGroupTurns,
3983
4015
  retryErrors: args.retryErrors,
4016
+ resume: args.resume,
4017
+ rerunFailed: args.rerunFailed,
3984
4018
  strict: args.strict,
3985
4019
  benchmarkJson: args.benchmarkJson,
3986
4020
  artifacts: args.artifacts,
@@ -4008,13 +4042,14 @@ var evalCommand = subcommands({
4008
4042
  description: "Evaluation commands",
4009
4043
  cmds: {
4010
4044
  run: evalRunCommand,
4011
- assert: evalAssertCommand
4045
+ assert: evalAssertCommand,
4046
+ aggregate: evalAggregateCommand
4012
4047
  }
4013
4048
  });
4014
4049
 
4015
4050
  // src/commands/import/claude.ts
4016
4051
  import { mkdir as mkdir2, writeFile as writeFile2 } from "node:fs/promises";
4017
- import path4 from "node:path";
4052
+ import path5 from "node:path";
4018
4053
  var importClaudeCommand = command({
4019
4054
  name: "claude",
4020
4055
  description: "Import a Claude Code session transcript for offline grading",
@@ -4086,8 +4121,8 @@ var importClaudeCommand = command({
4086
4121
  const rawJsonl = await readTranscriptFile(sessionFilePath);
4087
4122
  const transcript = parseClaudeSession(rawJsonl);
4088
4123
  const shortId = (sessionId ?? transcript.source.sessionId).slice(0, 8);
4089
- const outputPath = output ?? path4.join(".agentv", "transcripts", `claude-${shortId}.jsonl`);
4090
- await mkdir2(path4.dirname(outputPath), { recursive: true });
4124
+ const outputPath = output ?? path5.join(".agentv", "transcripts", `claude-${shortId}.jsonl`);
4125
+ await mkdir2(path5.dirname(outputPath), { recursive: true });
4091
4126
  const jsonLines = toTranscriptJsonLines(transcript);
4092
4127
  await writeFile2(
4093
4128
  outputPath,
@@ -4131,7 +4166,7 @@ function formatDurationMs(ms) {
4131
4166
 
4132
4167
  // src/commands/import/codex.ts
4133
4168
  import { mkdir as mkdir3, writeFile as writeFile3 } from "node:fs/promises";
4134
- import path5 from "node:path";
4169
+ import path6 from "node:path";
4135
4170
  var importCodexCommand = command({
4136
4171
  name: "codex",
4137
4172
  description: "Import a Codex CLI session transcript for offline grading",
@@ -4203,8 +4238,8 @@ var importCodexCommand = command({
4203
4238
  const rawJsonl = await readTranscriptFile(session.filePath);
4204
4239
  const transcript = parseCodexSession(rawJsonl);
4205
4240
  const shortId = session.sessionId.slice(0, 8);
4206
- const outputPath = output ?? path5.join(".agentv", "transcripts", `codex-${shortId}.jsonl`);
4207
- await mkdir3(path5.dirname(outputPath), { recursive: true });
4241
+ const outputPath = output ?? path6.join(".agentv", "transcripts", `codex-${shortId}.jsonl`);
4242
+ await mkdir3(path6.dirname(outputPath), { recursive: true });
4208
4243
  const jsonLines = toTranscriptJsonLines(transcript);
4209
4244
  await writeFile3(
4210
4245
  outputPath,
@@ -4243,7 +4278,7 @@ function formatDurationMs2(ms) {
4243
4278
 
4244
4279
  // src/commands/import/copilot.ts
4245
4280
  import { mkdir as mkdir4, readFile, writeFile as writeFile4 } from "node:fs/promises";
4246
- import path6 from "node:path";
4281
+ import path7 from "node:path";
4247
4282
  var importCopilotCommand = command({
4248
4283
  name: "copilot",
4249
4284
  description: "Import a Copilot CLI session transcript for offline grading",
@@ -4308,7 +4343,7 @@ var importCopilotCommand = command({
4308
4343
  );
4309
4344
  process.exit(1);
4310
4345
  }
4311
- const eventsPath = path6.join(sessionDir, "events.jsonl");
4346
+ const eventsPath = path7.join(sessionDir, "events.jsonl");
4312
4347
  const rawJsonl = await readFile(eventsPath, "utf8");
4313
4348
  const parsed = parseCopilotEvents(rawJsonl);
4314
4349
  const transcript = {
@@ -4325,8 +4360,8 @@ var importCopilotCommand = command({
4325
4360
  costUsd: null
4326
4361
  };
4327
4362
  const shortId = resolvedSessionId.slice(0, 8);
4328
- const outputPath = output ?? path6.join(".agentv", "transcripts", `copilot-${shortId}.jsonl`);
4329
- await mkdir4(path6.dirname(outputPath), { recursive: true });
4363
+ const outputPath = output ?? path7.join(".agentv", "transcripts", `copilot-${shortId}.jsonl`);
4364
+ await mkdir4(path7.dirname(outputPath), { recursive: true });
4330
4365
  const jsonLines = toTranscriptJsonLines(transcript);
4331
4366
  await writeFile4(
4332
4367
  outputPath,
@@ -4371,12 +4406,12 @@ function formatDurationMs3(ms) {
4371
4406
  // src/commands/import/huggingface.ts
4372
4407
  import { execFile } from "node:child_process";
4373
4408
  import { existsSync } from "node:fs";
4374
- import path7 from "node:path";
4409
+ import path8 from "node:path";
4375
4410
  function findScript() {
4376
4411
  const candidates = [
4377
- path7.resolve(__dirname, "..", "..", "..", "..", "..", "scripts", "import-huggingface.py"),
4378
- path7.resolve(__dirname, "..", "..", "..", "..", "scripts", "import-huggingface.py"),
4379
- path7.resolve(process.cwd(), "scripts", "import-huggingface.py")
4412
+ path8.resolve(__dirname, "..", "..", "..", "..", "..", "scripts", "import-huggingface.py"),
4413
+ path8.resolve(__dirname, "..", "..", "..", "..", "scripts", "import-huggingface.py"),
4414
+ path8.resolve(process.cwd(), "scripts", "import-huggingface.py")
4380
4415
  ];
4381
4416
  for (const candidate of candidates) {
4382
4417
  if (existsSync(candidate)) return candidate;
@@ -4488,29 +4523,29 @@ var importCommand = subcommands({
4488
4523
 
4489
4524
  // src/commands/init/index.ts
4490
4525
  import { existsSync as existsSync2, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
4491
- import path9 from "node:path";
4526
+ import path10 from "node:path";
4492
4527
  import * as readline from "node:readline/promises";
4493
4528
 
4494
4529
  // src/templates/index.ts
4495
4530
  import { readFileSync as readFileSync3, readdirSync, statSync } from "node:fs";
4496
- import path8 from "node:path";
4531
+ import path9 from "node:path";
4497
4532
  import { fileURLToPath } from "node:url";
4498
4533
  function getAgentvTemplates() {
4499
4534
  return getTemplatesFromDir(".agentv");
4500
4535
  }
4501
4536
  function getEnvExampleTemplate() {
4502
- const currentDir = path8.dirname(fileURLToPath(import.meta.url));
4503
- const templatesBase = currentDir.includes(`${path8.sep}dist`) ? path8.join(currentDir, "templates") : currentDir;
4504
- const content = readFileSync3(path8.join(templatesBase, ".env.example"), "utf-8");
4537
+ const currentDir = path9.dirname(fileURLToPath(import.meta.url));
4538
+ const templatesBase = currentDir.includes(`${path9.sep}dist`) ? path9.join(currentDir, "templates") : currentDir;
4539
+ const content = readFileSync3(path9.join(templatesBase, ".env.example"), "utf-8");
4505
4540
  return { path: ".env.example", content };
4506
4541
  }
4507
4542
  function getTemplatesFromDir(subdir) {
4508
- const currentDir = path8.dirname(fileURLToPath(import.meta.url));
4543
+ const currentDir = path9.dirname(fileURLToPath(import.meta.url));
4509
4544
  let templatesDir;
4510
- if (currentDir.includes(`${path8.sep}dist`)) {
4511
- templatesDir = path8.join(currentDir, "templates", subdir);
4545
+ if (currentDir.includes(`${path9.sep}dist`)) {
4546
+ templatesDir = path9.join(currentDir, "templates", subdir);
4512
4547
  } else {
4513
- templatesDir = path8.join(currentDir, subdir);
4548
+ templatesDir = path9.join(currentDir, subdir);
4514
4549
  }
4515
4550
  return readTemplatesRecursively(templatesDir, "");
4516
4551
  }
@@ -4518,15 +4553,15 @@ function readTemplatesRecursively(dir, relativePath) {
4518
4553
  const templates = [];
4519
4554
  const entries2 = readdirSync(dir);
4520
4555
  for (const entry of entries2) {
4521
- const fullPath = path8.join(dir, entry);
4556
+ const fullPath = path9.join(dir, entry);
4522
4557
  const stat3 = statSync(fullPath);
4523
- const entryRelativePath = relativePath ? path8.join(relativePath, entry) : entry;
4558
+ const entryRelativePath = relativePath ? path9.join(relativePath, entry) : entry;
4524
4559
  if (stat3.isDirectory()) {
4525
4560
  templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
4526
4561
  } else {
4527
4562
  const content = readFileSync3(fullPath, "utf-8");
4528
4563
  templates.push({
4529
- path: entryRelativePath.split(path8.sep).join("/"),
4564
+ path: entryRelativePath.split(path9.sep).join("/"),
4530
4565
  // Normalize to forward slashes
4531
4566
  content
4532
4567
  });
@@ -4555,22 +4590,22 @@ async function promptYesNo(message) {
4555
4590
  }
4556
4591
  }
4557
4592
  async function initCommand(options = {}) {
4558
- const targetPath = path9.resolve(options.targetPath ?? ".");
4559
- const agentvDir = path9.join(targetPath, ".agentv");
4593
+ const targetPath = path10.resolve(options.targetPath ?? ".");
4594
+ const agentvDir = path10.join(targetPath, ".agentv");
4560
4595
  const otherAgentvTemplates = getAgentvTemplates();
4561
4596
  const envTemplate = getEnvExampleTemplate();
4562
4597
  const existingFiles = [];
4563
4598
  if (envTemplate) {
4564
- const envFilePath = path9.join(targetPath, ".env.example");
4599
+ const envFilePath = path10.join(targetPath, ".env.example");
4565
4600
  if (existsSync2(envFilePath)) {
4566
4601
  existingFiles.push(".env.example");
4567
4602
  }
4568
4603
  }
4569
4604
  if (existsSync2(agentvDir)) {
4570
4605
  for (const template of otherAgentvTemplates) {
4571
- const targetFilePath = path9.join(agentvDir, template.path);
4606
+ const targetFilePath = path10.join(agentvDir, template.path);
4572
4607
  if (existsSync2(targetFilePath)) {
4573
- existingFiles.push(path9.relative(targetPath, targetFilePath));
4608
+ existingFiles.push(path10.relative(targetPath, targetFilePath));
4574
4609
  }
4575
4610
  }
4576
4611
  }
@@ -4592,18 +4627,18 @@ async function initCommand(options = {}) {
4592
4627
  mkdirSync(agentvDir, { recursive: true });
4593
4628
  }
4594
4629
  if (envTemplate) {
4595
- const envFilePath = path9.join(targetPath, ".env.example");
4630
+ const envFilePath = path10.join(targetPath, ".env.example");
4596
4631
  writeFileSync2(envFilePath, envTemplate.content, "utf-8");
4597
4632
  console.log("Created .env.example");
4598
4633
  }
4599
4634
  for (const template of otherAgentvTemplates) {
4600
- const targetFilePath = path9.join(agentvDir, template.path);
4601
- const targetDirPath = path9.dirname(targetFilePath);
4635
+ const targetFilePath = path10.join(agentvDir, template.path);
4636
+ const targetDirPath = path10.dirname(targetFilePath);
4602
4637
  if (!existsSync2(targetDirPath)) {
4603
4638
  mkdirSync(targetDirPath, { recursive: true });
4604
4639
  }
4605
4640
  writeFileSync2(targetFilePath, template.content, "utf-8");
4606
- console.log(`Created ${path9.relative(targetPath, targetFilePath)}`);
4641
+ console.log(`Created ${path10.relative(targetPath, targetFilePath)}`);
4607
4642
  }
4608
4643
  console.log("\nAgentV initialized successfully!");
4609
4644
  console.log("\nFiles installed to root:");
@@ -4611,7 +4646,7 @@ async function initCommand(options = {}) {
4611
4646
  console.log(" - .env.example");
4612
4647
  }
4613
4648
  console.log(`
4614
- Files installed to ${path9.relative(targetPath, agentvDir)}:`);
4649
+ Files installed to ${path10.relative(targetPath, agentvDir)}:`);
4615
4650
  for (const t of otherAgentvTemplates) {
4616
4651
  console.log(` - ${t.path}`);
4617
4652
  }
@@ -4643,13 +4678,13 @@ var initCmdTsCommand = command({
4643
4678
 
4644
4679
  // src/commands/inspect/filter.ts
4645
4680
  import { existsSync as existsSync3, readFileSync as readFileSync4, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
4646
- import path10 from "node:path";
4681
+ import path11 from "node:path";
4647
4682
  function collectIndexFiles(dir) {
4648
4683
  const files = [];
4649
4684
  try {
4650
4685
  const entries2 = readdirSync2(dir, { withFileTypes: true });
4651
4686
  for (const entry of entries2) {
4652
- const fullPath = path10.join(dir, entry.name);
4687
+ const fullPath = path11.join(dir, entry.name);
4653
4688
  if (entry.isDirectory()) {
4654
4689
  files.push(...collectIndexFiles(fullPath));
4655
4690
  } else if (entry.name === "index.jsonl") {
@@ -4700,7 +4735,7 @@ function parseFilterableRecords(filePath) {
4700
4735
  }
4701
4736
  let experiment = typeof raw.experiment === "string" ? raw.experiment : void 0;
4702
4737
  if (!experiment) {
4703
- const parts = filePath.split(path10.sep);
4738
+ const parts = filePath.split(path11.sep);
4704
4739
  const runsIdx = parts.indexOf("runs");
4705
4740
  if (runsIdx !== -1 && parts.length - runsIdx >= 3) {
4706
4741
  const candidate = parts[runsIdx + 1];
@@ -4755,7 +4790,7 @@ function buildFilterPredicate(opts) {
4755
4790
  }
4756
4791
  function discoverFilterSources(searchPath, cwd) {
4757
4792
  if (searchPath) {
4758
- const resolved = path10.isAbsolute(searchPath) ? searchPath : path10.resolve(cwd, searchPath);
4793
+ const resolved = path11.isAbsolute(searchPath) ? searchPath : path11.resolve(cwd, searchPath);
4759
4794
  if (!existsSync3(resolved)) {
4760
4795
  console.error(`${c.red}Error:${c.reset} Path does not exist: ${resolved}`);
4761
4796
  process.exit(1);
@@ -4768,7 +4803,7 @@ function discoverFilterSources(searchPath, cwd) {
4768
4803
  }
4769
4804
  return [resolved];
4770
4805
  }
4771
- return collectIndexFiles(path10.join(cwd, ".agentv", "results", "runs"));
4806
+ return collectIndexFiles(path11.join(cwd, ".agentv", "results", "runs"));
4772
4807
  }
4773
4808
  function formatFilterTable(records) {
4774
4809
  const lines = [];
@@ -5042,7 +5077,7 @@ function parseAssertSpec(spec) {
5042
5077
  }
5043
5078
  default:
5044
5079
  throw new Error(
5045
- `Unsupported evaluator type: "${type}". Supported: ${SUPPORTED_TYPES.join(", ")}`
5080
+ `Unsupported grader type: "${type}". Supported: ${SUPPORTED_TYPES.join(", ")}`
5046
5081
  );
5047
5082
  }
5048
5083
  }
@@ -5164,7 +5199,7 @@ var traceScoreCommand = command({
5164
5199
  type: string,
5165
5200
  long: "assert",
5166
5201
  short: "a",
5167
- description: "Evaluator spec: contains:<val>, regex:<pat>, is-json, equals:<val>, latency:<ms>, cost:<usd>, token-usage:<params>, execution-metrics:<params>"
5202
+ description: "Grader spec: contains:<val>, regex:<pat>, is-json, equals:<val>, latency:<ms>, cost:<usd>, token-usage:<params>, execution-metrics:<params>"
5168
5203
  }),
5169
5204
  testId: option({
5170
5205
  type: optional(string),
@@ -5241,13 +5276,13 @@ var traceScoreCommand = command({
5241
5276
 
5242
5277
  // src/commands/inspect/search.ts
5243
5278
  import { existsSync as existsSync4, readFileSync as readFileSync5, readdirSync as readdirSync3, statSync as statSync3 } from "node:fs";
5244
- import path11 from "node:path";
5279
+ import path12 from "node:path";
5245
5280
  function collectJsonlFiles(dir) {
5246
5281
  const files = [];
5247
5282
  try {
5248
5283
  const entries2 = readdirSync3(dir, { withFileTypes: true });
5249
5284
  for (const entry of entries2) {
5250
- const fullPath = path11.join(dir, entry.name);
5285
+ const fullPath = path12.join(dir, entry.name);
5251
5286
  if (entry.isDirectory()) {
5252
5287
  files.push(...collectJsonlFiles(fullPath));
5253
5288
  } else if (entry.name.endsWith(".jsonl")) {
@@ -5306,7 +5341,7 @@ function searchJsonlFile(filePath, regex2, targetFilter, experimentFilter) {
5306
5341
  }
5307
5342
  function discoverSources(basePath, cwd) {
5308
5343
  if (basePath) {
5309
- const resolved = path11.isAbsolute(basePath) ? basePath : path11.resolve(cwd, basePath);
5344
+ const resolved = path12.isAbsolute(basePath) ? basePath : path12.resolve(cwd, basePath);
5310
5345
  if (!existsSync4(resolved)) {
5311
5346
  console.error(`${c.red}Error:${c.reset} Path does not exist: ${resolved}`);
5312
5347
  process.exit(1);
@@ -5320,8 +5355,8 @@ function discoverSources(basePath, cwd) {
5320
5355
  return [resolved];
5321
5356
  }
5322
5357
  const sources = [];
5323
- sources.push(...collectJsonlFiles(path11.join(cwd, ".agentv", "results", "runs")));
5324
- sources.push(...collectJsonlFiles(path11.join(cwd, ".agentv", "transcripts")));
5358
+ sources.push(...collectJsonlFiles(path12.join(cwd, ".agentv", "results", "runs")));
5359
+ sources.push(...collectJsonlFiles(path12.join(cwd, ".agentv", "transcripts")));
5325
5360
  return sources;
5326
5361
  }
5327
5362
  function formatSearchResults(matches, pattern) {
@@ -5873,7 +5908,7 @@ import { readFile as readFile2, readdir, writeFile as writeFile5 } from "node:fs
5873
5908
  import { join } from "node:path";
5874
5909
  var evalBenchCommand = command({
5875
5910
  name: "bench",
5876
- description: "Merge evaluator scores and produce benchmark artifacts",
5911
+ description: "Merge grader scores and produce benchmark artifacts",
5877
5912
  args: {
5878
5913
  exportDir: positional({
5879
5914
  type: string,
@@ -6862,7 +6897,7 @@ var pipelineCommand = subcommands({
6862
6897
  });
6863
6898
 
6864
6899
  // src/commands/results/export.ts
6865
- import path12 from "node:path";
6900
+ import path13 from "node:path";
6866
6901
 
6867
6902
  // src/commands/results/shared.ts
6868
6903
  import { existsSync as existsSync7 } from "node:fs";
@@ -6909,20 +6944,20 @@ async function loadResults(source, cwd) {
6909
6944
 
6910
6945
  // src/commands/results/export.ts
6911
6946
  function deriveOutputDir(cwd, sourceFile) {
6912
- if (path12.basename(sourceFile) !== RESULT_INDEX_FILENAME) {
6947
+ if (path13.basename(sourceFile) !== RESULT_INDEX_FILENAME) {
6913
6948
  throw new Error(`Expected a run manifest named ${RESULT_INDEX_FILENAME}: ${sourceFile}`);
6914
6949
  }
6915
- const runDir = path12.dirname(sourceFile);
6916
- const segments = path12.normalize(runDir).split(path12.sep).filter(Boolean);
6950
+ const runDir = path13.dirname(sourceFile);
6951
+ const segments = path13.normalize(runDir).split(path13.sep).filter(Boolean);
6917
6952
  const runsIndex = segments.lastIndexOf("runs");
6918
6953
  if (runsIndex >= 0 && runsIndex < segments.length - 1) {
6919
- return path12.join(cwd, ".agentv", "results", "export", ...segments.slice(runsIndex + 1));
6954
+ return path13.join(cwd, ".agentv", "results", "export", ...segments.slice(runsIndex + 1));
6920
6955
  }
6921
- const parentDir = path12.basename(runDir);
6956
+ const parentDir = path13.basename(runDir);
6922
6957
  if (parentDir.startsWith("eval_")) {
6923
- return path12.join(cwd, ".agentv", "results", "export", parentDir.slice(5));
6958
+ return path13.join(cwd, ".agentv", "results", "export", parentDir.slice(5));
6924
6959
  }
6925
- return path12.join(cwd, ".agentv", "results", "export", parentDir);
6960
+ return path13.join(cwd, ".agentv", "results", "export", parentDir);
6926
6961
  }
6927
6962
  async function loadExportSource(source, cwd) {
6928
6963
  const { sourceFile } = await resolveSourceFile(source, cwd);
@@ -6955,7 +6990,7 @@ var resultsExportCommand = command({
6955
6990
  const cwd = dir ?? process.cwd();
6956
6991
  try {
6957
6992
  const { sourceFile, results } = await loadExportSource(source, cwd);
6958
- const outputDir = out ? path12.isAbsolute(out) ? out : path12.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
6993
+ const outputDir = out ? path13.isAbsolute(out) ? out : path13.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
6959
6994
  await writeArtifactsFromResults(results, outputDir, {
6960
6995
  evalFile: sourceFile
6961
6996
  });
@@ -7016,7 +7051,7 @@ var resultsFailuresCommand = command({
7016
7051
 
7017
7052
  // src/commands/results/report.ts
7018
7053
  import { existsSync as existsSync8, mkdirSync as mkdirSync2, readFileSync as readFileSync7, writeFileSync as writeFileSync3 } from "node:fs";
7019
- import path13 from "node:path";
7054
+ import path14 from "node:path";
7020
7055
 
7021
7056
  // src/commands/results/report-template.ts
7022
7057
  var RESULTS_REPORT_TEMPLATE = `<!DOCTYPE html>
@@ -8640,10 +8675,10 @@ function normalizeEvalFileLabel(value) {
8640
8675
  if (!trimmed) {
8641
8676
  return void 0;
8642
8677
  }
8643
- return path13.basename(trimmed).replace(/\.results\.jsonl$/i, "").replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "").replace(/\.jsonl$/i, "");
8678
+ return path14.basename(trimmed).replace(/\.results\.jsonl$/i, "").replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "").replace(/\.jsonl$/i, "");
8644
8679
  }
8645
8680
  function readBenchmarkEvalFile(sourceFile) {
8646
- const benchmarkPath = path13.join(path13.dirname(sourceFile), "benchmark.json");
8681
+ const benchmarkPath = path14.join(path14.dirname(sourceFile), "benchmark.json");
8647
8682
  if (!existsSync8(benchmarkPath)) {
8648
8683
  return void 0;
8649
8684
  }
@@ -8655,10 +8690,10 @@ function readBenchmarkEvalFile(sourceFile) {
8655
8690
  }
8656
8691
  }
8657
8692
  function deriveReportPath(sourceFile) {
8658
- return path13.join(path13.dirname(sourceFile), "report.html");
8693
+ return path14.join(path14.dirname(sourceFile), "report.html");
8659
8694
  }
8660
8695
  function serializeReportResult(result, sourceFile, manifestRecord, benchmarkEvalFile) {
8661
- const fallbackEvalFile = normalizeEvalFileLabel(manifestRecord?.eval_file) ?? benchmarkEvalFile ?? normalizeEvalFileLabel(result.suite) ?? path13.basename(path13.dirname(sourceFile));
8696
+ const fallbackEvalFile = normalizeEvalFileLabel(manifestRecord?.eval_file) ?? benchmarkEvalFile ?? normalizeEvalFileLabel(result.suite) ?? path14.basename(path14.dirname(sourceFile));
8662
8697
  return {
8663
8698
  timestamp: result.timestamp,
8664
8699
  test_id: result.testId,
@@ -8706,9 +8741,9 @@ function renderResultsReport(results, sourceFile, records, benchmarkEvalFile) {
8706
8741
  }
8707
8742
  async function writeResultsReport(source, outputPath, cwd) {
8708
8743
  const { sourceFile, results, records, benchmarkEvalFile } = await loadReportSource(source, cwd);
8709
- const resolvedOutputPath = outputPath ? path13.isAbsolute(outputPath) ? outputPath : path13.resolve(cwd, outputPath) : deriveReportPath(sourceFile);
8744
+ const resolvedOutputPath = outputPath ? path14.isAbsolute(outputPath) ? outputPath : path14.resolve(cwd, outputPath) : deriveReportPath(sourceFile);
8710
8745
  const html = renderResultsReport(results, sourceFile, records, benchmarkEvalFile);
8711
- mkdirSync2(path13.dirname(resolvedOutputPath), { recursive: true });
8746
+ mkdirSync2(path14.dirname(resolvedOutputPath), { recursive: true });
8712
8747
  writeFileSync3(resolvedOutputPath, html, "utf8");
8713
8748
  const written = readFileSync7(resolvedOutputPath, "utf8");
8714
8749
  if (written.includes("__DATA_PLACEHOLDER__")) {
@@ -8896,10 +8931,10 @@ var resultsSummaryCommand = command({
8896
8931
 
8897
8932
  // src/commands/results/validate.ts
8898
8933
  import { existsSync as existsSync10, readFileSync as readFileSync9, statSync as statSync4 } from "node:fs";
8899
- import path14 from "node:path";
8934
+ import path15 from "node:path";
8900
8935
  function checkDirectoryNaming(runDir) {
8901
- const dirName = path14.basename(runDir);
8902
- const pathSegments = path14.normalize(runDir).split(path14.sep).filter(Boolean);
8936
+ const dirName = path15.basename(runDir);
8937
+ const pathSegments = path15.normalize(runDir).split(path15.sep).filter(Boolean);
8903
8938
  const runsIndex = pathSegments.lastIndexOf("runs");
8904
8939
  const diagnostics = [];
8905
8940
  if (runsIndex < 0 || runsIndex >= pathSegments.length - 1) {
@@ -8929,7 +8964,7 @@ function validateRunDirectory(runDir) {
8929
8964
  return { diagnostics, entries: entries2 };
8930
8965
  }
8931
8966
  function checkIndexJsonl(runDir) {
8932
- const indexPath = path14.join(runDir, "index.jsonl");
8967
+ const indexPath = path15.join(runDir, "index.jsonl");
8933
8968
  const diagnostics = [];
8934
8969
  const entries2 = [];
8935
8970
  if (!existsSync10(indexPath)) {
@@ -8978,7 +9013,7 @@ function checkIndexJsonl(runDir) {
8978
9013
  if (!entry.scores || !Array.isArray(entry.scores) || entry.scores.length === 0) {
8979
9014
  diagnostics.push({
8980
9015
  severity: "warning",
8981
- message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'scores[]' array \u2014 dashboard may not show per-evaluator breakdown`
9016
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'scores[]' array \u2014 dashboard may not show per-grader breakdown`
8982
9017
  });
8983
9018
  } else {
8984
9019
  for (let j = 0; j < entry.scores.length; j++) {
@@ -9028,7 +9063,7 @@ function checkArtifactFiles(runDir, entries2) {
9028
9063
  for (const entry of entries2) {
9029
9064
  const testId = entry.test_id ?? "?";
9030
9065
  if (entry.grading_path) {
9031
- const gradingPath = path14.join(runDir, entry.grading_path);
9066
+ const gradingPath = path15.join(runDir, entry.grading_path);
9032
9067
  if (!existsSync10(gradingPath)) {
9033
9068
  diagnostics.push({
9034
9069
  severity: "error",
@@ -9058,7 +9093,7 @@ function checkArtifactFiles(runDir, entries2) {
9058
9093
  }
9059
9094
  }
9060
9095
  if (entry.timing_path) {
9061
- const timingPath = path14.join(runDir, entry.timing_path);
9096
+ const timingPath = path15.join(runDir, entry.timing_path);
9062
9097
  if (!existsSync10(timingPath)) {
9063
9098
  diagnostics.push({
9064
9099
  severity: "warning",
@@ -9067,7 +9102,7 @@ function checkArtifactFiles(runDir, entries2) {
9067
9102
  }
9068
9103
  }
9069
9104
  }
9070
- const benchmarkPath = path14.join(runDir, "benchmark.json");
9105
+ const benchmarkPath = path15.join(runDir, "benchmark.json");
9071
9106
  if (!existsSync10(benchmarkPath)) {
9072
9107
  diagnostics.push({ severity: "warning", message: "benchmark.json is missing" });
9073
9108
  }
@@ -9084,7 +9119,7 @@ var resultsValidateCommand = command({
9084
9119
  })
9085
9120
  },
9086
9121
  handler: async ({ runDir }) => {
9087
- const resolvedDir = path14.resolve(runDir);
9122
+ const resolvedDir = path15.resolve(runDir);
9088
9123
  if (!existsSync10(resolvedDir) || !statSync4(resolvedDir).isDirectory()) {
9089
9124
  console.error(`Error: '${runDir}' is not a directory`);
9090
9125
  process.exit(1);
@@ -9128,14 +9163,14 @@ var resultsCommand = subcommands({
9128
9163
 
9129
9164
  // src/commands/results/serve.ts
9130
9165
  import { existsSync as existsSync14, readFileSync as readFileSync12, readdirSync as readdirSync4, statSync as statSync5, writeFileSync as writeFileSync6 } from "node:fs";
9131
- import path18 from "node:path";
9166
+ import path19 from "node:path";
9132
9167
  import { fileURLToPath as fileURLToPath3 } from "node:url";
9133
9168
  import { Hono } from "hono";
9134
9169
 
9135
9170
  // src/commands/results/eval-runner.ts
9136
9171
  import { execFileSync, spawn } from "node:child_process";
9137
9172
  import { existsSync as existsSync11 } from "node:fs";
9138
- import path15 from "node:path";
9173
+ import path16 from "node:path";
9139
9174
  import { fileURLToPath as fileURLToPath2 } from "node:url";
9140
9175
  var activeRuns = /* @__PURE__ */ new Map();
9141
9176
  function generateRunId() {
@@ -9157,7 +9192,7 @@ async function discoverTargetsInProject(cwd) {
9157
9192
  const repoRoot = await findRepoRoot(cwd) ?? cwd;
9158
9193
  let targetsFilePath;
9159
9194
  for (const candidate of TARGET_FILE_CANDIDATES) {
9160
- const fullPath = path15.join(cwd, candidate);
9195
+ const fullPath = path16.join(cwd, candidate);
9161
9196
  if (existsSync11(fullPath)) {
9162
9197
  targetsFilePath = fullPath;
9163
9198
  break;
@@ -9165,7 +9200,7 @@ async function discoverTargetsInProject(cwd) {
9165
9200
  }
9166
9201
  if (!targetsFilePath) {
9167
9202
  for (const candidate of TARGET_FILE_CANDIDATES) {
9168
- const fullPath = path15.join(repoRoot, candidate);
9203
+ const fullPath = path16.join(repoRoot, candidate);
9169
9204
  if (existsSync11(fullPath)) {
9170
9205
  targetsFilePath = fullPath;
9171
9206
  break;
@@ -9215,17 +9250,17 @@ function buildCliPreview(args) {
9215
9250
  }
9216
9251
  function resolveCliPath(cwd) {
9217
9252
  const candidates = [
9218
- path15.join(cwd, "apps/cli/src/cli.ts"),
9219
- path15.join(cwd, "apps/cli/dist/cli.js")
9253
+ path16.join(cwd, "apps/cli/src/cli.ts"),
9254
+ path16.join(cwd, "apps/cli/dist/cli.js")
9220
9255
  ];
9221
9256
  for (const c4 of candidates) {
9222
9257
  if (existsSync11(c4)) {
9223
9258
  return { binPath: "bun", args: [c4] };
9224
9259
  }
9225
9260
  }
9226
- const currentDir = typeof __dirname !== "undefined" ? __dirname : path15.dirname(fileURLToPath2(import.meta.url));
9227
- const fromSrc = path15.resolve(currentDir, "../../../cli.ts");
9228
- const fromDist = path15.resolve(currentDir, "../../cli.js");
9261
+ const currentDir = typeof __dirname !== "undefined" ? __dirname : path16.dirname(fileURLToPath2(import.meta.url));
9262
+ const fromSrc = path16.resolve(currentDir, "../../../cli.ts");
9263
+ const fromDist = path16.resolve(currentDir, "../../cli.js");
9229
9264
  if (existsSync11(fromSrc)) return { binPath: "bun", args: [fromSrc] };
9230
9265
  if (existsSync11(fromDist)) return { binPath: "bun", args: [fromDist] };
9231
9266
  if (isCommandAvailable("agentv")) {
@@ -9516,12 +9551,12 @@ Process error: ${err2.message}`;
9516
9551
 
9517
9552
  // src/commands/results/run-tags.ts
9518
9553
  import { existsSync as existsSync12, readFileSync as readFileSync10, unlinkSync as unlinkSync2, writeFileSync as writeFileSync4 } from "node:fs";
9519
- import path16 from "node:path";
9554
+ import path17 from "node:path";
9520
9555
  var RUN_TAGS_FILENAME = "tags.json";
9521
9556
  var MAX_TAGS_PER_RUN = 20;
9522
9557
  var MAX_TAG_LENGTH = 60;
9523
9558
  function runTagsPath(manifestPath) {
9524
- return path16.join(path16.dirname(manifestPath), RUN_TAGS_FILENAME);
9559
+ return path17.join(path17.dirname(manifestPath), RUN_TAGS_FILENAME);
9525
9560
  }
9526
9561
  function readRunTags(manifestPath) {
9527
9562
  const fp = runTagsPath(manifestPath);
@@ -9593,13 +9628,13 @@ function normalizeTags(tags) {
9593
9628
 
9594
9629
  // src/commands/results/studio-config.ts
9595
9630
  import { existsSync as existsSync13, mkdirSync as mkdirSync3, readFileSync as readFileSync11, writeFileSync as writeFileSync5 } from "node:fs";
9596
- import path17 from "node:path";
9631
+ import path18 from "node:path";
9597
9632
  import { parse as parseYaml, stringify as stringifyYaml2 } from "yaml";
9598
9633
  var DEFAULTS = {
9599
9634
  threshold: DEFAULT_THRESHOLD
9600
9635
  };
9601
9636
  function loadStudioConfig(agentvDir) {
9602
- const configPath = path17.join(agentvDir, "config.yaml");
9637
+ const configPath = path18.join(agentvDir, "config.yaml");
9603
9638
  if (!existsSync13(configPath)) {
9604
9639
  return { ...DEFAULTS };
9605
9640
  }
@@ -9628,7 +9663,7 @@ function saveStudioConfig(agentvDir, config) {
9628
9663
  if (!existsSync13(agentvDir)) {
9629
9664
  mkdirSync3(agentvDir, { recursive: true });
9630
9665
  }
9631
- const configPath = path17.join(agentvDir, "config.yaml");
9666
+ const configPath = path18.join(agentvDir, "config.yaml");
9632
9667
  let existing = {};
9633
9668
  if (existsSync13(configPath)) {
9634
9669
  const raw = readFileSync11(configPath, "utf-8");
@@ -9692,7 +9727,7 @@ function resolveDashboardMode(projectCount, options) {
9692
9727
  return { isMultiProject: projectCount > 1, showMultiWarning: false };
9693
9728
  }
9694
9729
  function feedbackPath(resultDir) {
9695
- return path18.join(resultDir, "feedback.json");
9730
+ return path19.join(resultDir, "feedback.json");
9696
9731
  }
9697
9732
  function readFeedback(cwd) {
9698
9733
  const fp = feedbackPath(cwd);
@@ -9719,8 +9754,8 @@ function buildFileTree(dirPath, relativeTo) {
9719
9754
  if (a.isDirectory() !== b.isDirectory()) return a.isDirectory() ? -1 : 1;
9720
9755
  return a.name.localeCompare(b.name);
9721
9756
  }).map((entry) => {
9722
- const fullPath = path18.join(dirPath, entry.name);
9723
- const relPath = path18.relative(relativeTo, fullPath);
9757
+ const fullPath = path19.join(dirPath, entry.name);
9758
+ const relPath = path19.relative(relativeTo, fullPath);
9724
9759
  if (entry.isDirectory()) {
9725
9760
  return {
9726
9761
  name: entry.name,
@@ -9733,7 +9768,7 @@ function buildFileTree(dirPath, relativeTo) {
9733
9768
  });
9734
9769
  }
9735
9770
  function inferLanguage(filePath) {
9736
- const ext = path18.extname(filePath).toLowerCase();
9771
+ const ext = path19.extname(filePath).toLowerCase();
9737
9772
  const langMap = {
9738
9773
  ".json": "json",
9739
9774
  ".jsonl": "json",
@@ -9938,7 +9973,7 @@ async function handleEvalFiles(c4, { searchDir }) {
9938
9973
  const records = parseResultManifest(content);
9939
9974
  const record = records.find((r) => r.test_id === evalId);
9940
9975
  if (!record) return c4.json({ error: "Eval not found" }, 404);
9941
- const baseDir = path18.dirname(meta.path);
9976
+ const baseDir = path19.dirname(meta.path);
9942
9977
  const knownPaths = [
9943
9978
  record.grading_path,
9944
9979
  record.timing_path,
@@ -9947,14 +9982,14 @@ async function handleEvalFiles(c4, { searchDir }) {
9947
9982
  record.response_path
9948
9983
  ].filter((p) => !!p);
9949
9984
  if (knownPaths.length === 0) return c4.json({ files: [] });
9950
- const artifactDirs = knownPaths.map((p) => path18.dirname(p));
9985
+ const artifactDirs = knownPaths.map((p) => path19.dirname(p));
9951
9986
  let commonDir = artifactDirs[0];
9952
9987
  for (const dir of artifactDirs) {
9953
9988
  while (!dir.startsWith(commonDir)) {
9954
- commonDir = path18.dirname(commonDir);
9989
+ commonDir = path19.dirname(commonDir);
9955
9990
  }
9956
9991
  }
9957
- const artifactAbsDir = path18.join(baseDir, commonDir);
9992
+ const artifactAbsDir = path19.join(baseDir, commonDir);
9958
9993
  const files = buildFileTree(artifactAbsDir, baseDir);
9959
9994
  return c4.json({ files });
9960
9995
  } catch {
@@ -9969,9 +10004,9 @@ async function handleEvalFileContent(c4, { searchDir }) {
9969
10004
  const markerIdx = c4.req.path.indexOf(marker);
9970
10005
  const filePath = markerIdx >= 0 ? c4.req.path.slice(markerIdx + marker.length) : "";
9971
10006
  if (!filePath) return c4.json({ error: "No file path specified" }, 400);
9972
- const baseDir = path18.dirname(meta.path);
9973
- const absolutePath = path18.resolve(baseDir, filePath);
9974
- if (!absolutePath.startsWith(path18.resolve(baseDir) + path18.sep) && absolutePath !== path18.resolve(baseDir)) {
10007
+ const baseDir = path19.dirname(meta.path);
10008
+ const absolutePath = path19.resolve(baseDir, filePath);
10009
+ if (!absolutePath.startsWith(path19.resolve(baseDir) + path19.sep) && absolutePath !== path19.resolve(baseDir)) {
9975
10010
  return c4.json({ error: "Path traversal not allowed" }, 403);
9976
10011
  }
9977
10012
  if (!existsSync14(absolutePath) || !statSync5(absolutePath).isFile()) {
@@ -10191,12 +10226,12 @@ function handleConfig(c4, { agentvDir, searchDir }, options) {
10191
10226
  return c4.json({
10192
10227
  ...loadStudioConfig(agentvDir),
10193
10228
  read_only: options?.readOnly === true,
10194
- project_name: path18.basename(searchDir),
10229
+ project_name: path19.basename(searchDir),
10195
10230
  multi_project_dashboard: options?.multiProjectDashboard === true
10196
10231
  });
10197
10232
  }
10198
10233
  function handleFeedbackRead(c4, { searchDir }) {
10199
- const resultsDir = path18.join(searchDir, ".agentv", "results");
10234
+ const resultsDir = path19.join(searchDir, ".agentv", "results");
10200
10235
  return c4.json(readFeedback(existsSync14(resultsDir) ? resultsDir : searchDir));
10201
10236
  }
10202
10237
  async function handleRunTagsPut(c4, { searchDir }) {
@@ -10245,7 +10280,7 @@ async function handleRunTagsDelete(c4, { searchDir }) {
10245
10280
  }
10246
10281
  function createApp(results, resultDir, cwd, sourceFile, options) {
10247
10282
  const searchDir = cwd ?? resultDir;
10248
- const agentvDir = path18.join(searchDir, ".agentv");
10283
+ const agentvDir = path19.join(searchDir, ".agentv");
10249
10284
  const defaultCtx = { searchDir, agentvDir };
10250
10285
  const readOnly = options?.readOnly === true;
10251
10286
  const app2 = new Hono();
@@ -10256,7 +10291,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
10256
10291
  }
10257
10292
  return handler(c4, {
10258
10293
  searchDir: benchmark.path,
10259
- agentvDir: path18.join(benchmark.path, ".agentv")
10294
+ agentvDir: path19.join(benchmark.path, ".agentv")
10260
10295
  });
10261
10296
  }
10262
10297
  app2.post("/api/config", async (c4) => {
@@ -10583,20 +10618,20 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
10583
10618
  { readOnly }
10584
10619
  );
10585
10620
  const studioDistPath = options?.studioDir ?? resolveStudioDistDir();
10586
- if (!studioDistPath || !existsSync14(path18.join(studioDistPath, "index.html"))) {
10621
+ if (!studioDistPath || !existsSync14(path19.join(studioDistPath, "index.html"))) {
10587
10622
  throw new Error('Studio dist not found. Run "bun run build" in apps/studio/ to build the SPA.');
10588
10623
  }
10589
10624
  app2.get("/", (c4) => {
10590
- const indexPath = path18.join(studioDistPath, "index.html");
10625
+ const indexPath = path19.join(studioDistPath, "index.html");
10591
10626
  if (existsSync14(indexPath)) return c4.html(readFileSync12(indexPath, "utf8"));
10592
10627
  return c4.notFound();
10593
10628
  });
10594
10629
  app2.get("/assets/*", (c4) => {
10595
10630
  const assetPath = c4.req.path;
10596
- const filePath = path18.join(studioDistPath, assetPath);
10631
+ const filePath = path19.join(studioDistPath, assetPath);
10597
10632
  if (!existsSync14(filePath)) return c4.notFound();
10598
10633
  const content = readFileSync12(filePath);
10599
- const ext = path18.extname(filePath);
10634
+ const ext = path19.extname(filePath);
10600
10635
  const mimeTypes = {
10601
10636
  ".js": "application/javascript",
10602
10637
  ".css": "text/css",
@@ -10617,26 +10652,26 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
10617
10652
  });
10618
10653
  app2.get("*", (c4) => {
10619
10654
  if (c4.req.path.startsWith("/api/")) return c4.json({ error: "Not found" }, 404);
10620
- const indexPath = path18.join(studioDistPath, "index.html");
10655
+ const indexPath = path19.join(studioDistPath, "index.html");
10621
10656
  if (existsSync14(indexPath)) return c4.html(readFileSync12(indexPath, "utf8"));
10622
10657
  return c4.notFound();
10623
10658
  });
10624
10659
  return app2;
10625
10660
  }
10626
10661
  function resolveStudioDistDir() {
10627
- const currentDir = typeof __dirname !== "undefined" ? __dirname : path18.dirname(fileURLToPath3(import.meta.url));
10662
+ const currentDir = typeof __dirname !== "undefined" ? __dirname : path19.dirname(fileURLToPath3(import.meta.url));
10628
10663
  const candidates = [
10629
10664
  // From src/commands/results/ → sibling apps/studio/dist
10630
- path18.resolve(currentDir, "../../../../studio/dist"),
10665
+ path19.resolve(currentDir, "../../../../studio/dist"),
10631
10666
  // From dist/ → sibling apps/studio/dist (monorepo dev)
10632
- path18.resolve(currentDir, "../../studio/dist"),
10667
+ path19.resolve(currentDir, "../../studio/dist"),
10633
10668
  // Bundled inside CLI dist (published package: dist/studio/)
10634
- path18.resolve(currentDir, "studio"),
10669
+ path19.resolve(currentDir, "studio"),
10635
10670
  // From dist/ in monorepo root context
10636
- path18.resolve(currentDir, "../../../apps/studio/dist")
10671
+ path19.resolve(currentDir, "../../../apps/studio/dist")
10637
10672
  ];
10638
10673
  for (const candidate of candidates) {
10639
- if (existsSync14(candidate) && existsSync14(path18.join(candidate, "index.html"))) {
10674
+ if (existsSync14(candidate) && existsSync14(path19.join(candidate, "index.html"))) {
10640
10675
  return candidate;
10641
10676
  }
10642
10677
  }
@@ -10753,7 +10788,7 @@ Discovered ${discovered.length} project(s).`);
10753
10788
  }
10754
10789
  }
10755
10790
  }
10756
- const resultDir = sourceFile ? path18.dirname(path18.resolve(sourceFile)) : cwd;
10791
+ const resultDir = sourceFile ? path19.dirname(path19.resolve(sourceFile)) : cwd;
10757
10792
  const app2 = createApp(results, resultDir, cwd, sourceFile, {
10758
10793
  readOnly,
10759
10794
  multiProjectDashboard: isMultiProject
@@ -10878,7 +10913,7 @@ var selfCommand = subcommands({
10878
10913
 
10879
10914
  // src/commands/transpile/index.ts
10880
10915
  import { writeFileSync as writeFileSync7 } from "node:fs";
10881
- import path19 from "node:path";
10916
+ import path20 from "node:path";
10882
10917
  var transpileCommand = command({
10883
10918
  name: "transpile",
10884
10919
  description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
@@ -10902,7 +10937,7 @@ var transpileCommand = command({
10902
10937
  handler: async ({ input, outDir, stdout }) => {
10903
10938
  let result;
10904
10939
  try {
10905
- result = transpileEvalYamlFile(path19.resolve(input));
10940
+ result = transpileEvalYamlFile(path20.resolve(input));
10906
10941
  } catch (error) {
10907
10942
  console.error(`Error: ${error.message}`);
10908
10943
  process.exit(1);
@@ -10926,11 +10961,11 @@ var transpileCommand = command({
10926
10961
  process.stdout.write("\n");
10927
10962
  return;
10928
10963
  }
10929
- const outputDir = outDir ? path19.resolve(outDir) : path19.dirname(path19.resolve(input));
10964
+ const outputDir = outDir ? path20.resolve(outDir) : path20.dirname(path20.resolve(input));
10930
10965
  const fileNames = getOutputFilenames(result);
10931
10966
  for (const [skill, evalsJson] of result.files) {
10932
10967
  const fileName = fileNames.get(skill) ?? "evals.json";
10933
- const outputPath = path19.join(outputDir, fileName);
10968
+ const outputPath = path20.join(outputDir, fileName);
10934
10969
  writeFileSync7(outputPath, `${JSON.stringify(evalsJson, null, 2)}
10935
10970
  `);
10936
10971
  console.log(`Transpiled to ${outputPath}`);
@@ -10939,7 +10974,7 @@ var transpileCommand = command({
10939
10974
  });
10940
10975
 
10941
10976
  // src/commands/trend/index.ts
10942
- import path20 from "node:path";
10977
+ import path21 from "node:path";
10943
10978
  var colors2 = {
10944
10979
  reset: "\x1B[0m",
10945
10980
  bold: "\x1B[1m",
@@ -10989,7 +11024,7 @@ function colorizeSlope(value) {
10989
11024
  }
10990
11025
  function ensureTrendIndexPath(source, cwd) {
10991
11026
  const resolved = resolveResultSourcePath(source, cwd);
10992
- if (path20.basename(resolved) !== RESULT_INDEX_FILENAME) {
11027
+ if (path21.basename(resolved) !== RESULT_INDEX_FILENAME) {
10993
11028
  throw new Error(
10994
11029
  `Unsupported result source for trend: ${source}. Use a run workspace directory or ${RESULT_INDEX_FILENAME} manifest.`
10995
11030
  );
@@ -11009,7 +11044,7 @@ function resolveTrendSources(cwd, sources, last) {
11009
11044
  if (last < 2) {
11010
11045
  throw new Error("--last must be at least 2");
11011
11046
  }
11012
- const metas = listResultFiles(cwd).filter((meta) => path20.basename(meta.path) === RESULT_INDEX_FILENAME).slice(0, last);
11047
+ const metas = listResultFiles(cwd).filter((meta) => path21.basename(meta.path) === RESULT_INDEX_FILENAME).slice(0, last);
11013
11048
  if (metas.length < 2) {
11014
11049
  throw new Error(
11015
11050
  "Trend analysis requires at least 2 canonical run workspaces in .agentv/results/runs/"
@@ -11024,10 +11059,10 @@ function getRunLabel(sourcePath, timestamp) {
11024
11059
  if (timestamp) {
11025
11060
  return timestamp;
11026
11061
  }
11027
- return path20.basename(path20.dirname(sourcePath));
11062
+ return path21.basename(path21.dirname(sourcePath));
11028
11063
  }
11029
11064
  function getRunSortKey(sourcePath, timestamp) {
11030
- return timestamp ?? path20.basename(path20.dirname(sourcePath));
11065
+ return timestamp ?? path21.basename(path21.dirname(sourcePath));
11031
11066
  }
11032
11067
  function mean2(values) {
11033
11068
  return values.reduce((sum, value) => sum + value, 0) / values.length;
@@ -11408,7 +11443,7 @@ function isTTY() {
11408
11443
  // src/commands/validate/validate-files.ts
11409
11444
  import { constants } from "node:fs";
11410
11445
  import { access, readdir as readdir4, stat } from "node:fs/promises";
11411
- import path21 from "node:path";
11446
+ import path22 from "node:path";
11412
11447
  import fg2 from "fast-glob";
11413
11448
  async function validateFiles(paths) {
11414
11449
  const filePaths = await expandPaths(paths);
@@ -11423,7 +11458,7 @@ async function validateFiles(paths) {
11423
11458
  };
11424
11459
  }
11425
11460
  async function validateSingleFile(filePath) {
11426
- const absolutePath = path21.resolve(filePath);
11461
+ const absolutePath = path22.resolve(filePath);
11427
11462
  const fileType = await detectFileType(absolutePath);
11428
11463
  let result;
11429
11464
  if (fileType === "eval") {
@@ -11467,7 +11502,7 @@ async function validateSingleFile(filePath) {
11467
11502
  async function expandPaths(paths) {
11468
11503
  const expanded = /* @__PURE__ */ new Set();
11469
11504
  for (const inputPath of paths) {
11470
- const absolutePath = path21.resolve(inputPath);
11505
+ const absolutePath = path22.resolve(inputPath);
11471
11506
  try {
11472
11507
  await access(absolutePath, constants.F_OK);
11473
11508
  const stats = await stat(absolutePath);
@@ -11495,7 +11530,7 @@ async function expandPaths(paths) {
11495
11530
  if (yamlMatches.length === 0) {
11496
11531
  console.warn(`Warning: No YAML files matched pattern: ${inputPath}`);
11497
11532
  }
11498
- for (const f of yamlMatches) expanded.add(path21.normalize(f));
11533
+ for (const f of yamlMatches) expanded.add(path22.normalize(f));
11499
11534
  }
11500
11535
  const sorted = Array.from(expanded);
11501
11536
  sorted.sort();
@@ -11506,7 +11541,7 @@ async function findYamlFiles(dirPath) {
11506
11541
  try {
11507
11542
  const entries2 = await readdir4(dirPath, { withFileTypes: true });
11508
11543
  for (const entry of entries2) {
11509
- const fullPath = path21.join(dirPath, entry.name);
11544
+ const fullPath = path22.join(dirPath, entry.name);
11510
11545
  if (entry.isDirectory()) {
11511
11546
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
11512
11547
  continue;
@@ -11523,11 +11558,11 @@ async function findYamlFiles(dirPath) {
11523
11558
  return results;
11524
11559
  }
11525
11560
  function isYamlFile(filePath) {
11526
- const ext = path21.extname(filePath).toLowerCase();
11561
+ const ext = path22.extname(filePath).toLowerCase();
11527
11562
  return ext === ".yaml" || ext === ".yml";
11528
11563
  }
11529
11564
  function isEvalYamlFile(filePath) {
11530
- const lower = path21.basename(filePath).toLowerCase();
11565
+ const lower = path22.basename(filePath).toLowerCase();
11531
11566
  return lower.endsWith(".eval.yaml") || lower.endsWith(".eval.yml");
11532
11567
  }
11533
11568
 
@@ -11584,7 +11619,7 @@ var validateCommand = command({
11584
11619
  // src/commands/workspace/clean.ts
11585
11620
  import { existsSync as existsSync15 } from "node:fs";
11586
11621
  import { readFile as readFile6, readdir as readdir5, rm } from "node:fs/promises";
11587
- import path22 from "node:path";
11622
+ import path23 from "node:path";
11588
11623
  async function confirm(message) {
11589
11624
  const readline2 = await import("node:readline");
11590
11625
  const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
@@ -11620,8 +11655,8 @@ var cleanCommand = command({
11620
11655
  const poolDirs = entries2.filter((e) => e.isDirectory());
11621
11656
  const matchingDirs = [];
11622
11657
  for (const dir of poolDirs) {
11623
- const poolDir = path22.join(poolRoot, dir.name);
11624
- const metadataPath = path22.join(poolDir, "metadata.json");
11658
+ const poolDir = path23.join(poolRoot, dir.name);
11659
+ const metadataPath = path23.join(poolDir, "metadata.json");
11625
11660
  try {
11626
11661
  const raw = await readFile6(metadataPath, "utf-8");
11627
11662
  const metadata = JSON.parse(raw);
@@ -11652,7 +11687,7 @@ var cleanCommand = command({
11652
11687
  }
11653
11688
  for (const dir of matchingDirs) {
11654
11689
  await rm(dir, { recursive: true, force: true });
11655
- console.log(`Removed: ${path22.basename(dir).slice(0, 12)}...`);
11690
+ console.log(`Removed: ${path23.basename(dir).slice(0, 12)}...`);
11656
11691
  }
11657
11692
  console.log("Done.");
11658
11693
  } else {
@@ -11670,7 +11705,7 @@ var cleanCommand = command({
11670
11705
  });
11671
11706
 
11672
11707
  // src/commands/workspace/deps.ts
11673
- import path23 from "node:path";
11708
+ import path24 from "node:path";
11674
11709
  var depsCommand = command({
11675
11710
  name: "deps",
11676
11711
  description: "Scan eval files and list git repo dependencies needed by workspaces",
@@ -11694,7 +11729,7 @@ var depsCommand = command({
11694
11729
  const resolvedPaths = await resolveEvalPaths(evalPaths, cwd);
11695
11730
  const result = await scanRepoDeps(resolvedPaths);
11696
11731
  for (const err2 of result.errors) {
11697
- console.error(`warning: ${path23.relative(cwd, err2.file)}: ${err2.message}`);
11732
+ console.error(`warning: ${path24.relative(cwd, err2.file)}: ${err2.message}`);
11698
11733
  }
11699
11734
  const output = {
11700
11735
  repos: result.repos.map((r) => ({
@@ -11702,7 +11737,7 @@ var depsCommand = command({
11702
11737
  ...r.ref !== void 0 && { ref: r.ref },
11703
11738
  ...r.clone !== void 0 && { clone: r.clone },
11704
11739
  ...r.checkout !== void 0 && { checkout: r.checkout },
11705
- ...usedBy && { used_by: r.usedBy.map((p) => path23.relative(cwd, p)) }
11740
+ ...usedBy && { used_by: r.usedBy.map((p) => path24.relative(cwd, p)) }
11706
11741
  }))
11707
11742
  };
11708
11743
  console.log(JSON.stringify(output, null, 2));
@@ -11712,13 +11747,13 @@ var depsCommand = command({
11712
11747
  // src/commands/workspace/list.ts
11713
11748
  import { existsSync as existsSync16 } from "node:fs";
11714
11749
  import { readFile as readFile7, readdir as readdir6, stat as stat2 } from "node:fs/promises";
11715
- import path24 from "node:path";
11750
+ import path25 from "node:path";
11716
11751
  async function getDirectorySize(dirPath) {
11717
11752
  let totalSize = 0;
11718
11753
  try {
11719
11754
  const entries2 = await readdir6(dirPath, { withFileTypes: true });
11720
11755
  for (const entry of entries2) {
11721
- const fullPath = path24.join(dirPath, entry.name);
11756
+ const fullPath = path25.join(dirPath, entry.name);
11722
11757
  if (entry.isDirectory()) {
11723
11758
  totalSize += await getDirectorySize(fullPath);
11724
11759
  } else {
@@ -11753,11 +11788,11 @@ var listCommand = command({
11753
11788
  return;
11754
11789
  }
11755
11790
  for (const dir of poolDirs) {
11756
- const poolDir = path24.join(poolRoot, dir.name);
11791
+ const poolDir = path25.join(poolRoot, dir.name);
11757
11792
  const fingerprint = dir.name;
11758
11793
  const poolEntries = await readdir6(poolDir, { withFileTypes: true });
11759
11794
  const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
11760
- const metadataPath = path24.join(poolDir, "metadata.json");
11795
+ const metadataPath = path25.join(poolDir, "metadata.json");
11761
11796
  let metadata = null;
11762
11797
  try {
11763
11798
  const raw = await readFile7(metadataPath, "utf-8");
@@ -11804,8 +11839,8 @@ var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
11804
11839
  var CONFIG_DIR = getAgentvConfigDir();
11805
11840
  var CACHE_FILE = "version-check.json";
11806
11841
  var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
11807
- async function getCachedUpdateInfo(path25) {
11808
- const filePath = path25 ?? join5(CONFIG_DIR, CACHE_FILE);
11842
+ async function getCachedUpdateInfo(path26) {
11843
+ const filePath = path26 ?? join5(CONFIG_DIR, CACHE_FILE);
11809
11844
  try {
11810
11845
  const raw = await readFile8(filePath, "utf-8");
11811
11846
  const data = JSON.parse(raw);
@@ -11907,7 +11942,7 @@ var app = subcommands({
11907
11942
  workspace: workspaceCommand
11908
11943
  }
11909
11944
  });
11910
- var EVAL_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "assert"]);
11945
+ var EVAL_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "assert", "aggregate"]);
11911
11946
  var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
11912
11947
  "import",
11913
11948
  "inspect",
@@ -11966,4 +12001,4 @@ export {
11966
12001
  preprocessArgv,
11967
12002
  runCli
11968
12003
  };
11969
- //# sourceMappingURL=chunk-ILIM6IIX.js.map
12004
+ //# sourceMappingURL=chunk-MCBERRMC.js.map