agentv 3.14.3 → 3.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@ import {
3
3
  HtmlWriter,
4
4
  RESULT_INDEX_FILENAME,
5
5
  RESULT_RUNS_DIRNAME,
6
+ buildDefaultRunDir,
6
7
  detectFileType,
7
8
  findRepoRoot,
8
9
  loadLightweightResults,
@@ -22,7 +23,7 @@ import {
22
23
  validateFileReferences,
23
24
  validateTargetsFile,
24
25
  writeArtifactsFromResults
25
- } from "./chunk-VYDUBNCD.js";
26
+ } from "./chunk-SAPEYQ5U.js";
26
27
  import {
27
28
  createBuiltinRegistry,
28
29
  executeScript,
@@ -4185,7 +4186,7 @@ var evalRunCommand = command({
4185
4186
  },
4186
4187
  handler: async (args) => {
4187
4188
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4188
- const { launchInteractiveWizard } = await import("./interactive-LBVPF2CG.js");
4189
+ const { launchInteractiveWizard } = await import("./interactive-PGZ55VHT.js");
4189
4190
  await launchInteractiveWizard();
4190
4191
  return;
4191
4192
  }
@@ -4398,6 +4399,7 @@ var initCmdTsCommand = command({
4398
4399
  });
4399
4400
 
4400
4401
  // src/commands/pipeline/bench.ts
4402
+ import { existsSync as existsSync2 } from "node:fs";
4401
4403
  import { readFile, readdir, writeFile as writeFile2 } from "node:fs/promises";
4402
4404
  import { join } from "node:path";
4403
4405
  var evalBenchCommand = command({
@@ -4497,14 +4499,41 @@ var evalBenchCommand = command({
4497
4499
  `,
4498
4500
  "utf8"
4499
4501
  );
4502
+ const scores = evaluators.map((e) => ({
4503
+ name: e.name,
4504
+ type: e.type,
4505
+ score: e.score,
4506
+ weight: e.weight,
4507
+ verdict: e.score >= 0.5 ? "pass" : "fail",
4508
+ assertions: e.assertions.map((a) => ({
4509
+ text: a.text,
4510
+ passed: a.passed,
4511
+ evidence: a.evidence ?? ""
4512
+ }))
4513
+ }));
4514
+ let executionStatus = "ok";
4515
+ const timingPath = join(testDir, "timing.json");
4516
+ if (existsSync2(timingPath)) {
4517
+ try {
4518
+ const timing = JSON.parse(await readFile(timingPath, "utf8"));
4519
+ if (typeof timing.execution_status === "string") {
4520
+ executionStatus = timing.execution_status;
4521
+ }
4522
+ } catch {
4523
+ }
4524
+ }
4525
+ const hasResponse = existsSync2(join(testDir, "response.md"));
4500
4526
  indexLines.push(
4501
4527
  JSON.stringify({
4502
4528
  timestamp: manifest.timestamp,
4503
4529
  test_id: testId,
4504
4530
  score: Math.round(weightedScore * 1e3) / 1e3,
4505
4531
  target: targetName,
4532
+ scores,
4533
+ execution_status: executionStatus,
4506
4534
  grading_path: `${testId}/grading.json`,
4507
- timing_path: `${testId}/timing.json`
4535
+ timing_path: `${testId}/timing.json`,
4536
+ response_path: hasResponse ? `${testId}/response.md` : null
4508
4537
  })
4509
4538
  );
4510
4539
  }
@@ -4680,14 +4709,14 @@ var evalInputCommand = command({
4680
4709
  description: "Path to eval YAML file"
4681
4710
  }),
4682
4711
  out: option({
4683
- type: string,
4712
+ type: optional(string),
4684
4713
  long: "out",
4685
- description: "Output directory for extracted inputs"
4714
+ description: "Output directory for extracted inputs (default: .agentv/results/runs/eval_<timestamp>)"
4686
4715
  })
4687
4716
  },
4688
4717
  handler: async ({ evalPath, out }) => {
4689
4718
  const resolvedEvalPath = resolve(evalPath);
4690
- const outDir = resolve(out);
4719
+ const outDir = resolve(out ?? buildDefaultRunDir(process.cwd()));
4691
4720
  const repoRoot = await findRepoRoot(dirname(resolvedEvalPath));
4692
4721
  const evalDir = dirname(resolvedEvalPath);
4693
4722
  const suite = await loadTestSuite(resolvedEvalPath, repoRoot);
@@ -4826,7 +4855,7 @@ async function writeJson(filePath, data) {
4826
4855
 
4827
4856
  // src/commands/pipeline/run.ts
4828
4857
  import { execSync } from "node:child_process";
4829
- import { existsSync as existsSync2, readFileSync as readFileSync4, unlinkSync } from "node:fs";
4858
+ import { existsSync as existsSync3, readFileSync as readFileSync4, unlinkSync } from "node:fs";
4830
4859
  import { mkdir as mkdir4, readFile as readFile4, readdir as readdir3, writeFile as writeFile5 } from "node:fs/promises";
4831
4860
  import { tmpdir } from "node:os";
4832
4861
  import { dirname as dirname2, join as join4, resolve as resolve2 } from "node:path";
@@ -4834,7 +4863,7 @@ function loadEnvFile(dir) {
4834
4863
  let current = resolve2(dir);
4835
4864
  while (true) {
4836
4865
  const candidate = join4(current, ".env");
4837
- if (existsSync2(candidate)) {
4866
+ if (existsSync3(candidate)) {
4838
4867
  const env3 = {};
4839
4868
  for (const line of readFileSync4(candidate, "utf8").split("\n")) {
4840
4869
  const trimmed = line.trim();
@@ -4861,9 +4890,9 @@ var evalRunCommand2 = command({
4861
4890
  description: "Path to eval YAML file"
4862
4891
  }),
4863
4892
  out: option({
4864
- type: string,
4893
+ type: optional(string),
4865
4894
  long: "out",
4866
- description: "Output directory for results"
4895
+ description: "Output directory for results (default: .agentv/results/runs/eval_<timestamp>)"
4867
4896
  }),
4868
4897
  workers: option({
4869
4898
  type: optional(number),
@@ -4873,7 +4902,7 @@ var evalRunCommand2 = command({
4873
4902
  },
4874
4903
  handler: async ({ evalPath, out, workers }) => {
4875
4904
  const resolvedEvalPath = resolve2(evalPath);
4876
- const outDir = resolve2(out);
4905
+ const outDir = resolve2(out ?? buildDefaultRunDir(process.cwd()));
4877
4906
  const repoRoot = await findRepoRoot(dirname2(resolvedEvalPath));
4878
4907
  const evalDir = dirname2(resolvedEvalPath);
4879
4908
  const suite = await loadTestSuite(resolvedEvalPath, repoRoot);
@@ -4989,7 +5018,7 @@ var evalRunCommand2 = command({
4989
5018
  });
4990
5019
  const durationMs = Math.round(performance.now() - start);
4991
5020
  let response;
4992
- if (existsSync2(outputFile)) {
5021
+ if (existsSync3(outputFile)) {
4993
5022
  response = readFileSync4(outputFile, "utf8");
4994
5023
  } else {
4995
5024
  response = "ERROR: No output file generated";
@@ -4997,7 +5026,8 @@ var evalRunCommand2 = command({
4997
5026
  await writeFile5(join4(testDir, "response.md"), response, "utf8");
4998
5027
  await writeJson2(join4(testDir, "timing.json"), {
4999
5028
  duration_ms: durationMs,
5000
- total_duration_seconds: Math.round(durationMs / 10) / 100
5029
+ total_duration_seconds: Math.round(durationMs / 10) / 100,
5030
+ execution_status: "ok"
5001
5031
  });
5002
5032
  console.log(` ${testId}: OK (${durationMs}ms, ${response.length} chars)`);
5003
5033
  } catch (error) {
@@ -5007,19 +5037,29 @@ var evalRunCommand2 = command({
5007
5037
  await writeFile5(join4(testDir, "response.md"), response, "utf8");
5008
5038
  await writeJson2(join4(testDir, "timing.json"), {
5009
5039
  duration_ms: durationMs,
5010
- total_duration_seconds: Math.round(durationMs / 10) / 100
5040
+ total_duration_seconds: Math.round(durationMs / 10) / 100,
5041
+ execution_status: "execution_error"
5011
5042
  });
5012
5043
  console.error(` ${testId}: FAILED (${durationMs}ms) \u2014 ${message.slice(0, 200)}`);
5013
5044
  } finally {
5014
5045
  try {
5015
- if (existsSync2(promptFile)) unlinkSync(promptFile);
5016
- if (existsSync2(outputFile)) unlinkSync(outputFile);
5046
+ if (existsSync3(promptFile)) unlinkSync(promptFile);
5047
+ if (existsSync3(outputFile)) unlinkSync(outputFile);
5017
5048
  } catch {
5018
5049
  }
5019
5050
  }
5020
5051
  };
5021
- const allTasks = testIds.map((testId) => invokeTarget(testId));
5022
- await Promise.all(allTasks);
5052
+ const pending = /* @__PURE__ */ new Set();
5053
+ for (const testId of testIds) {
5054
+ const task = invokeTarget(testId).then(() => {
5055
+ pending.delete(task);
5056
+ });
5057
+ pending.add(task);
5058
+ if (pending.size >= maxWorkers) {
5059
+ await Promise.race(pending);
5060
+ }
5061
+ }
5062
+ await Promise.all(pending);
5023
5063
  } else {
5024
5064
  console.log("Agent-as-target mode \u2014 skipping CLI invocation.");
5025
5065
  }
@@ -5188,7 +5228,7 @@ var pipelineCommand = subcommands({
5188
5228
  import path7 from "node:path";
5189
5229
 
5190
5230
  // src/commands/results/shared.ts
5191
- import { existsSync as existsSync3 } from "node:fs";
5231
+ import { existsSync as existsSync4 } from "node:fs";
5192
5232
 
5193
5233
  // src/commands/trace/utils.ts
5194
5234
  import { readFileSync as readFileSync5, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
@@ -5602,14 +5642,14 @@ async function resolveSourceFile(source, cwd) {
5602
5642
  let sourceFile;
5603
5643
  if (source) {
5604
5644
  sourceFile = resolveResultSourcePath(source, cwd);
5605
- if (!existsSync3(sourceFile)) {
5645
+ if (!existsSync4(sourceFile)) {
5606
5646
  console.error(`Error: File not found: ${sourceFile}`);
5607
5647
  process.exit(1);
5608
5648
  }
5609
5649
  } else {
5610
5650
  const cache = await loadRunCache(cwd);
5611
5651
  const cachedFile = cache ? resolveRunCacheFile(cache) : "";
5612
- if (cachedFile && existsSync3(cachedFile)) {
5652
+ if (cachedFile && existsSync4(cachedFile)) {
5613
5653
  sourceFile = cachedFile;
5614
5654
  } else {
5615
5655
  const metas = listResultFiles(cwd, 1);
@@ -5821,7 +5861,7 @@ var resultsShowCommand = command({
5821
5861
  });
5822
5862
 
5823
5863
  // src/commands/results/summary.ts
5824
- import { existsSync as existsSync4, readFileSync as readFileSync6 } from "node:fs";
5864
+ import { existsSync as existsSync5, readFileSync as readFileSync6 } from "node:fs";
5825
5865
  function formatSummary(results, grading) {
5826
5866
  const total = results.length;
5827
5867
  let passed;
@@ -5872,7 +5912,7 @@ var resultsSummaryCommand = command({
5872
5912
  const { results, sourceFile } = await loadResults(source, cwd);
5873
5913
  let grading;
5874
5914
  const gradingPath = sourceFile.replace(/\.jsonl$/, ".grading.json");
5875
- if (existsSync4(gradingPath)) {
5915
+ if (existsSync5(gradingPath)) {
5876
5916
  try {
5877
5917
  grading = JSON.parse(readFileSync6(gradingPath, "utf8"));
5878
5918
  } catch {
@@ -5886,6 +5926,217 @@ var resultsSummaryCommand = command({
5886
5926
  }
5887
5927
  });
5888
5928
 
5929
+ // src/commands/results/validate.ts
5930
+ import { existsSync as existsSync6, readFileSync as readFileSync7, statSync as statSync3 } from "node:fs";
5931
+ import path8 from "node:path";
5932
+ function checkDirectoryNaming(runDir) {
5933
+ const dirName = path8.basename(runDir);
5934
+ const parentName = path8.basename(path8.dirname(runDir));
5935
+ const diagnostics = [];
5936
+ if (parentName !== "runs") {
5937
+ diagnostics.push({
5938
+ severity: "warning",
5939
+ message: `Directory is not under a 'runs/' parent (found '${parentName}/'). Expected: .agentv/results/runs/<run-dir>`
5940
+ });
5941
+ }
5942
+ if (!/^eval_\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-\d{3}Z$/.test(dirName)) {
5943
+ diagnostics.push({
5944
+ severity: "warning",
5945
+ message: `Directory name '${dirName}' does not match the expected pattern 'eval_<ISO-timestamp>'. Example: eval_2026-03-27T12-42-24-429Z`
5946
+ });
5947
+ }
5948
+ return diagnostics;
5949
+ }
5950
+ function checkIndexJsonl(runDir) {
5951
+ const indexPath = path8.join(runDir, "index.jsonl");
5952
+ const diagnostics = [];
5953
+ const entries2 = [];
5954
+ if (!existsSync6(indexPath)) {
5955
+ diagnostics.push({ severity: "error", message: "index.jsonl is missing" });
5956
+ return { diagnostics, entries: entries2 };
5957
+ }
5958
+ const content = readFileSync7(indexPath, "utf8");
5959
+ const lines = content.split("\n").filter((l) => l.trim().length > 0);
5960
+ if (lines.length === 0) {
5961
+ diagnostics.push({ severity: "error", message: "index.jsonl is empty" });
5962
+ return { diagnostics, entries: entries2 };
5963
+ }
5964
+ for (let i = 0; i < lines.length; i++) {
5965
+ try {
5966
+ const entry = JSON.parse(lines[i]);
5967
+ entries2.push(entry);
5968
+ if (!entry.test_id) {
5969
+ diagnostics.push({
5970
+ severity: "error",
5971
+ message: `index.jsonl line ${i + 1}: missing 'test_id'`
5972
+ });
5973
+ }
5974
+ if (entry.score === void 0 || entry.score === null) {
5975
+ diagnostics.push({
5976
+ severity: "error",
5977
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'score'`
5978
+ });
5979
+ } else if (typeof entry.score !== "number" || entry.score < 0 || entry.score > 1) {
5980
+ diagnostics.push({
5981
+ severity: "error",
5982
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): score ${entry.score} is outside [0, 1]`
5983
+ });
5984
+ }
5985
+ if (!entry.target) {
5986
+ diagnostics.push({
5987
+ severity: "error",
5988
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'target'`
5989
+ });
5990
+ }
5991
+ if (!entry.grading_path) {
5992
+ diagnostics.push({
5993
+ severity: "warning",
5994
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'grading_path'`
5995
+ });
5996
+ }
5997
+ if (!entry.scores || !Array.isArray(entry.scores) || entry.scores.length === 0) {
5998
+ diagnostics.push({
5999
+ severity: "warning",
6000
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'scores[]' array \u2014 dashboard may not show per-evaluator breakdown`
6001
+ });
6002
+ } else {
6003
+ for (let j = 0; j < entry.scores.length; j++) {
6004
+ const s = entry.scores[j];
6005
+ if (!s || typeof s !== "object") {
6006
+ diagnostics.push({
6007
+ severity: "error",
6008
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): scores[${j}] is not an object`
6009
+ });
6010
+ continue;
6011
+ }
6012
+ const missing = [];
6013
+ if (typeof s.name !== "string") missing.push("name");
6014
+ if (typeof s.type !== "string") missing.push("type");
6015
+ if (typeof s.score !== "number") missing.push("score");
6016
+ if (typeof s.verdict !== "string") missing.push("verdict");
6017
+ if (missing.length > 0) {
6018
+ diagnostics.push({
6019
+ severity: "warning",
6020
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): scores[${j}] missing fields: ${missing.join(", ")}`
6021
+ });
6022
+ }
6023
+ }
6024
+ }
6025
+ if (!entry.execution_status) {
6026
+ diagnostics.push({
6027
+ severity: "warning",
6028
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): missing 'execution_status'`
6029
+ });
6030
+ } else if (!["ok", "quality_failure", "execution_error"].includes(entry.execution_status)) {
6031
+ diagnostics.push({
6032
+ severity: "warning",
6033
+ message: `index.jsonl line ${i + 1} (${entry.test_id ?? "?"}): unknown execution_status '${entry.execution_status}' (expected: ok, quality_failure, execution_error)`
6034
+ });
6035
+ }
6036
+ } catch {
6037
+ diagnostics.push({
6038
+ severity: "error",
6039
+ message: `index.jsonl line ${i + 1}: invalid JSON`
6040
+ });
6041
+ }
6042
+ }
6043
+ return { diagnostics, entries: entries2 };
6044
+ }
6045
+ function checkArtifactFiles(runDir, entries2) {
6046
+ const diagnostics = [];
6047
+ for (const entry of entries2) {
6048
+ const testId = entry.test_id ?? "?";
6049
+ if (entry.grading_path) {
6050
+ const gradingPath = path8.join(runDir, entry.grading_path);
6051
+ if (!existsSync6(gradingPath)) {
6052
+ diagnostics.push({
6053
+ severity: "error",
6054
+ message: `${testId}: grading.json not found at '${entry.grading_path}'`
6055
+ });
6056
+ } else {
6057
+ try {
6058
+ const grading = JSON.parse(readFileSync7(gradingPath, "utf8"));
6059
+ if (!grading.assertions || !Array.isArray(grading.assertions)) {
6060
+ diagnostics.push({
6061
+ severity: "error",
6062
+ message: `${testId}: grading.json missing 'assertions' array`
6063
+ });
6064
+ }
6065
+ if (!grading.summary) {
6066
+ diagnostics.push({
6067
+ severity: "warning",
6068
+ message: `${testId}: grading.json missing 'summary' object`
6069
+ });
6070
+ }
6071
+ } catch {
6072
+ diagnostics.push({
6073
+ severity: "error",
6074
+ message: `${testId}: grading.json is not valid JSON`
6075
+ });
6076
+ }
6077
+ }
6078
+ }
6079
+ if (entry.timing_path) {
6080
+ const timingPath = path8.join(runDir, entry.timing_path);
6081
+ if (!existsSync6(timingPath)) {
6082
+ diagnostics.push({
6083
+ severity: "warning",
6084
+ message: `${testId}: timing.json not found at '${entry.timing_path}'`
6085
+ });
6086
+ }
6087
+ }
6088
+ }
6089
+ const benchmarkPath = path8.join(runDir, "benchmark.json");
6090
+ if (!existsSync6(benchmarkPath)) {
6091
+ diagnostics.push({ severity: "warning", message: "benchmark.json is missing" });
6092
+ }
6093
+ return diagnostics;
6094
+ }
6095
+ var resultsValidateCommand = command({
6096
+ name: "validate",
6097
+ description: "Validate that a run directory contains well-formed result artifacts",
6098
+ args: {
6099
+ runDir: positional({
6100
+ type: string,
6101
+ displayName: "run-dir",
6102
+ description: "Path to the run directory to validate"
6103
+ })
6104
+ },
6105
+ handler: async ({ runDir }) => {
6106
+ const resolvedDir = path8.resolve(runDir);
6107
+ if (!existsSync6(resolvedDir) || !statSync3(resolvedDir).isDirectory()) {
6108
+ console.error(`Error: '${runDir}' is not a directory`);
6109
+ process.exit(1);
6110
+ }
6111
+ const allDiagnostics = [];
6112
+ allDiagnostics.push(...checkDirectoryNaming(resolvedDir));
6113
+ const { diagnostics: indexDiags, entries: entries2 } = checkIndexJsonl(resolvedDir);
6114
+ allDiagnostics.push(...indexDiags);
6115
+ if (entries2.length > 0) {
6116
+ allDiagnostics.push(...checkArtifactFiles(resolvedDir, entries2));
6117
+ }
6118
+ const errors = allDiagnostics.filter((d) => d.severity === "error");
6119
+ const warnings = allDiagnostics.filter((d) => d.severity === "warning");
6120
+ if (allDiagnostics.length === 0) {
6121
+ console.log(`\u2713 Valid run directory: ${entries2.length} test(s), no issues found`);
6122
+ return;
6123
+ }
6124
+ for (const d of errors) {
6125
+ console.error(` ERROR: ${d.message}`);
6126
+ }
6127
+ for (const d of warnings) {
6128
+ console.warn(` WARN: ${d.message}`);
6129
+ }
6130
+ console.log(
6131
+ `
6132
+ ${entries2.length} test(s), ${errors.length} error(s), ${warnings.length} warning(s)`
6133
+ );
6134
+ if (errors.length > 0) {
6135
+ process.exit(1);
6136
+ }
6137
+ }
6138
+ });
6139
+
5889
6140
  // src/commands/results/index.ts
5890
6141
  var resultsCommand = subcommands({
5891
6142
  name: "results",
@@ -5894,24 +6145,25 @@ var resultsCommand = subcommands({
5894
6145
  export: resultsExportCommand,
5895
6146
  summary: resultsSummaryCommand,
5896
6147
  failures: resultsFailuresCommand,
5897
- show: resultsShowCommand
6148
+ show: resultsShowCommand,
6149
+ validate: resultsValidateCommand
5898
6150
  }
5899
6151
  });
5900
6152
 
5901
6153
  // src/commands/results/serve.ts
5902
- import { existsSync as existsSync5, readFileSync as readFileSync7, writeFileSync as writeFileSync3 } from "node:fs";
5903
- import path8 from "node:path";
6154
+ import { existsSync as existsSync7, readFileSync as readFileSync8, writeFileSync as writeFileSync3 } from "node:fs";
6155
+ import path9 from "node:path";
5904
6156
  import { Hono } from "hono";
5905
6157
  function feedbackPath(resultDir) {
5906
- return path8.join(resultDir, "feedback.json");
6158
+ return path9.join(resultDir, "feedback.json");
5907
6159
  }
5908
6160
  function readFeedback(cwd) {
5909
6161
  const fp = feedbackPath(cwd);
5910
- if (!existsSync5(fp)) {
6162
+ if (!existsSync7(fp)) {
5911
6163
  return { reviews: [] };
5912
6164
  }
5913
6165
  try {
5914
- return JSON.parse(readFileSync7(fp, "utf8"));
6166
+ return JSON.parse(readFileSync8(fp, "utf8"));
5915
6167
  } catch (err2) {
5916
6168
  console.error(`Warning: could not parse ${fp}, starting fresh: ${err2.message}`);
5917
6169
  return { reviews: [] };
@@ -6050,7 +6302,7 @@ ${SERVE_STYLES}
6050
6302
  <main id="app"></main>
6051
6303
  <script>
6052
6304
  var DATA = ${dataJson};
6053
- var INITIAL_SOURCE = ${sourceFile ? JSON.stringify(path8.basename(sourceFile)).replace(/</g, "\\u003c").replace(/>/g, "\\u003e") : "null"};
6305
+ var INITIAL_SOURCE = ${sourceFile ? JSON.stringify(path9.basename(sourceFile)).replace(/</g, "\\u003c").replace(/>/g, "\\u003e") : "null"};
6054
6306
  ${SERVE_SCRIPT}
6055
6307
  </script>
6056
6308
  </body>
@@ -6711,7 +6963,7 @@ var resultsServeCommand = command({
6711
6963
  let sourceFile;
6712
6964
  if (source) {
6713
6965
  const resolved = resolveResultSourcePath(source, cwd);
6714
- if (!existsSync5(resolved)) {
6966
+ if (!existsSync7(resolved)) {
6715
6967
  console.error(`Error: Source file not found: ${resolved}`);
6716
6968
  process.exit(1);
6717
6969
  }
@@ -6720,7 +6972,7 @@ var resultsServeCommand = command({
6720
6972
  } else {
6721
6973
  const cache = await loadRunCache(cwd);
6722
6974
  const cachedFile = cache ? resolveRunCacheFile(cache) : "";
6723
- if (cachedFile && existsSync5(cachedFile)) {
6975
+ if (cachedFile && existsSync7(cachedFile)) {
6724
6976
  sourceFile = cachedFile;
6725
6977
  results = patchTestIds(loadManifestResults(cachedFile));
6726
6978
  } else {
@@ -6731,7 +6983,7 @@ var resultsServeCommand = command({
6731
6983
  }
6732
6984
  }
6733
6985
  }
6734
- const resultDir = sourceFile ? path8.dirname(path8.resolve(sourceFile)) : cwd;
6986
+ const resultDir = sourceFile ? path9.dirname(path9.resolve(sourceFile)) : cwd;
6735
6987
  const app2 = createApp(results, resultDir, cwd, sourceFile);
6736
6988
  if (results.length > 0 && sourceFile) {
6737
6989
  console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
@@ -7625,7 +7877,7 @@ var traceCommand = subcommands({
7625
7877
 
7626
7878
  // src/commands/transpile/index.ts
7627
7879
  import { writeFileSync as writeFileSync4 } from "node:fs";
7628
- import path9 from "node:path";
7880
+ import path10 from "node:path";
7629
7881
  var transpileCommand = command({
7630
7882
  name: "transpile",
7631
7883
  description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
@@ -7649,7 +7901,7 @@ var transpileCommand = command({
7649
7901
  handler: async ({ input, outDir, stdout }) => {
7650
7902
  let result;
7651
7903
  try {
7652
- result = transpileEvalYamlFile(path9.resolve(input));
7904
+ result = transpileEvalYamlFile(path10.resolve(input));
7653
7905
  } catch (error) {
7654
7906
  console.error(`Error: ${error.message}`);
7655
7907
  process.exit(1);
@@ -7673,11 +7925,11 @@ var transpileCommand = command({
7673
7925
  process.stdout.write("\n");
7674
7926
  return;
7675
7927
  }
7676
- const outputDir = outDir ? path9.resolve(outDir) : path9.dirname(path9.resolve(input));
7928
+ const outputDir = outDir ? path10.resolve(outDir) : path10.dirname(path10.resolve(input));
7677
7929
  const fileNames = getOutputFilenames(result);
7678
7930
  for (const [skill, evalsJson] of result.files) {
7679
7931
  const fileName = fileNames.get(skill) ?? "evals.json";
7680
- const outputPath = path9.join(outputDir, fileName);
7932
+ const outputPath = path10.join(outputDir, fileName);
7681
7933
  writeFileSync4(outputPath, `${JSON.stringify(evalsJson, null, 2)}
7682
7934
  `);
7683
7935
  console.log(`Transpiled to ${outputPath}`);
@@ -7686,7 +7938,7 @@ var transpileCommand = command({
7686
7938
  });
7687
7939
 
7688
7940
  // src/commands/trim/index.ts
7689
- import { readFileSync as readFileSync8, writeFileSync as writeFileSync5 } from "node:fs";
7941
+ import { readFileSync as readFileSync9, writeFileSync as writeFileSync5 } from "node:fs";
7690
7942
  var trimCommand = command({
7691
7943
  name: "trim",
7692
7944
  description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
@@ -7705,7 +7957,7 @@ var trimCommand = command({
7705
7957
  },
7706
7958
  handler: async ({ input, out }) => {
7707
7959
  try {
7708
- const content = readFileSync8(input, "utf8");
7960
+ const content = readFileSync9(input, "utf8");
7709
7961
  const lines = content.trim().split("\n").filter((line) => line.trim());
7710
7962
  const trimmedLines = lines.map((line) => {
7711
7963
  const record = JSON.parse(line);
@@ -7812,7 +8064,7 @@ function isTTY() {
7812
8064
  // src/commands/validate/validate-files.ts
7813
8065
  import { constants } from "node:fs";
7814
8066
  import { access, readdir as readdir4, stat } from "node:fs/promises";
7815
- import path10 from "node:path";
8067
+ import path11 from "node:path";
7816
8068
  async function validateFiles(paths) {
7817
8069
  const filePaths = await expandPaths(paths);
7818
8070
  const results = [];
@@ -7830,7 +8082,7 @@ async function validateFiles(paths) {
7830
8082
  };
7831
8083
  }
7832
8084
  async function validateSingleFile(filePath) {
7833
- const absolutePath = path10.resolve(filePath);
8085
+ const absolutePath = path11.resolve(filePath);
7834
8086
  const fileType = await detectFileType(absolutePath);
7835
8087
  let result;
7836
8088
  if (fileType === "eval") {
@@ -7855,7 +8107,7 @@ async function validateSingleFile(filePath) {
7855
8107
  async function expandPaths(paths) {
7856
8108
  const expanded = [];
7857
8109
  for (const inputPath of paths) {
7858
- const absolutePath = path10.resolve(inputPath);
8110
+ const absolutePath = path11.resolve(inputPath);
7859
8111
  try {
7860
8112
  await access(absolutePath, constants.F_OK);
7861
8113
  } catch {
@@ -7879,7 +8131,7 @@ async function findYamlFiles(dirPath) {
7879
8131
  try {
7880
8132
  const entries2 = await readdir4(dirPath, { withFileTypes: true });
7881
8133
  for (const entry of entries2) {
7882
- const fullPath = path10.join(dirPath, entry.name);
8134
+ const fullPath = path11.join(dirPath, entry.name);
7883
8135
  if (entry.isDirectory()) {
7884
8136
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
7885
8137
  continue;
@@ -7896,7 +8148,7 @@ async function findYamlFiles(dirPath) {
7896
8148
  return results;
7897
8149
  }
7898
8150
  function isYamlFile(filePath) {
7899
- const ext = path10.extname(filePath).toLowerCase();
8151
+ const ext = path11.extname(filePath).toLowerCase();
7900
8152
  return ext === ".yaml" || ext === ".yml";
7901
8153
  }
7902
8154
 
@@ -7934,9 +8186,9 @@ var validateCommand = command({
7934
8186
  });
7935
8187
 
7936
8188
  // src/commands/workspace/clean.ts
7937
- import { existsSync as existsSync6 } from "node:fs";
8189
+ import { existsSync as existsSync8 } from "node:fs";
7938
8190
  import { readFile as readFile5, readdir as readdir5, rm } from "node:fs/promises";
7939
- import path11 from "node:path";
8191
+ import path12 from "node:path";
7940
8192
  async function confirm(message) {
7941
8193
  const readline2 = await import("node:readline");
7942
8194
  const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
@@ -7963,7 +8215,7 @@ var cleanCommand = command({
7963
8215
  },
7964
8216
  handler: async ({ repo, force }) => {
7965
8217
  const poolRoot = getWorkspacePoolRoot();
7966
- if (!existsSync6(poolRoot)) {
8218
+ if (!existsSync8(poolRoot)) {
7967
8219
  console.log("No workspace pool entries found.");
7968
8220
  return;
7969
8221
  }
@@ -7972,8 +8224,8 @@ var cleanCommand = command({
7972
8224
  const poolDirs = entries2.filter((e) => e.isDirectory());
7973
8225
  const matchingDirs = [];
7974
8226
  for (const dir of poolDirs) {
7975
- const poolDir = path11.join(poolRoot, dir.name);
7976
- const metadataPath = path11.join(poolDir, "metadata.json");
8227
+ const poolDir = path12.join(poolRoot, dir.name);
8228
+ const metadataPath = path12.join(poolDir, "metadata.json");
7977
8229
  try {
7978
8230
  const raw = await readFile5(metadataPath, "utf-8");
7979
8231
  const metadata = JSON.parse(raw);
@@ -8004,7 +8256,7 @@ var cleanCommand = command({
8004
8256
  }
8005
8257
  for (const dir of matchingDirs) {
8006
8258
  await rm(dir, { recursive: true, force: true });
8007
- console.log(`Removed: ${path11.basename(dir).slice(0, 12)}...`);
8259
+ console.log(`Removed: ${path12.basename(dir).slice(0, 12)}...`);
8008
8260
  }
8009
8261
  console.log("Done.");
8010
8262
  } else {
@@ -8022,15 +8274,15 @@ var cleanCommand = command({
8022
8274
  });
8023
8275
 
8024
8276
  // src/commands/workspace/list.ts
8025
- import { existsSync as existsSync7 } from "node:fs";
8277
+ import { existsSync as existsSync9 } from "node:fs";
8026
8278
  import { readFile as readFile6, readdir as readdir6, stat as stat2 } from "node:fs/promises";
8027
- import path12 from "node:path";
8279
+ import path13 from "node:path";
8028
8280
  async function getDirectorySize(dirPath) {
8029
8281
  let totalSize = 0;
8030
8282
  try {
8031
8283
  const entries2 = await readdir6(dirPath, { withFileTypes: true });
8032
8284
  for (const entry of entries2) {
8033
- const fullPath = path12.join(dirPath, entry.name);
8285
+ const fullPath = path13.join(dirPath, entry.name);
8034
8286
  if (entry.isDirectory()) {
8035
8287
  totalSize += await getDirectorySize(fullPath);
8036
8288
  } else {
@@ -8054,7 +8306,7 @@ var listCommand = command({
8054
8306
  args: {},
8055
8307
  handler: async () => {
8056
8308
  const poolRoot = getWorkspacePoolRoot();
8057
- if (!existsSync7(poolRoot)) {
8309
+ if (!existsSync9(poolRoot)) {
8058
8310
  console.log("No workspace pool entries found.");
8059
8311
  return;
8060
8312
  }
@@ -8065,11 +8317,11 @@ var listCommand = command({
8065
8317
  return;
8066
8318
  }
8067
8319
  for (const dir of poolDirs) {
8068
- const poolDir = path12.join(poolRoot, dir.name);
8320
+ const poolDir = path13.join(poolRoot, dir.name);
8069
8321
  const fingerprint = dir.name;
8070
8322
  const poolEntries = await readdir6(poolDir, { withFileTypes: true });
8071
8323
  const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
8072
- const metadataPath = path12.join(poolDir, "metadata.json");
8324
+ const metadataPath = path13.join(poolDir, "metadata.json");
8073
8325
  let metadata = null;
8074
8326
  try {
8075
8327
  const raw = await readFile6(metadataPath, "utf-8");
@@ -8115,8 +8367,8 @@ var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
8115
8367
  var AGENTV_DIR = getAgentvHome();
8116
8368
  var CACHE_FILE = "version-check.json";
8117
8369
  var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
8118
- async function getCachedUpdateInfo(path13) {
8119
- const filePath = path13 ?? join5(AGENTV_DIR, CACHE_FILE);
8370
+ async function getCachedUpdateInfo(path14) {
8371
+ const filePath = path14 ?? join5(AGENTV_DIR, CACHE_FILE);
8120
8372
  try {
8121
8373
  const raw = await readFile7(filePath, "utf-8");
8122
8374
  const data = JSON.parse(raw);
@@ -8273,4 +8525,4 @@ export {
8273
8525
  preprocessArgv,
8274
8526
  runCli
8275
8527
  };
8276
- //# sourceMappingURL=chunk-FSNRKR7X.js.map
8528
+ //# sourceMappingURL=chunk-3NLBBQX6.js.map