agentv 3.11.1 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +16 -12
  2. package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} +1 -2
  3. package/dist/{chunk-V2S5CZU3.js → chunk-2ELQ6F3C.js} +916 -523
  4. package/dist/chunk-2ELQ6F3C.js.map +1 -0
  5. package/dist/{chunk-JK6V4KVD.js → chunk-NR7QVL75.js} +32 -24
  6. package/dist/chunk-NR7QVL75.js.map +1 -0
  7. package/dist/{chunk-OYD2NB55.js → chunk-UYBLUYHN.js} +104 -15
  8. package/dist/chunk-UYBLUYHN.js.map +1 -0
  9. package/dist/{chunk-CKMAM2GD.js → chunk-VLOFRXH4.js} +461 -196
  10. package/dist/chunk-VLOFRXH4.js.map +1 -0
  11. package/dist/{chunk-BAUNAXHT.js → chunk-XOSNETAV.js} +1 -1
  12. package/dist/cli.js +5 -6
  13. package/dist/cli.js.map +1 -1
  14. package/dist/{dist-VUPMLHIV.js → dist-L6R5HJ72.js} +4 -5
  15. package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js → esm-5Q4BZALM-5REQWAUV.js} +2 -3
  16. package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js.map → esm-5Q4BZALM-5REQWAUV.js.map} +1 -1
  17. package/dist/{esm-UYZ3HJBU.js → esm-CZAWIY6F.js} +2 -2
  18. package/dist/index.js +5 -6
  19. package/dist/{interactive-FZJANO4A.js → interactive-5X62YEEX.js} +5 -6
  20. package/dist/{interactive-FZJANO4A.js.map → interactive-5X62YEEX.js.map} +1 -1
  21. package/dist/{otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js} +1 -2
  22. package/dist/{simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js → simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js} +2 -3
  23. package/dist/{src-PXDA7QIS.js → src-ML4D2MC2.js} +2 -2
  24. package/dist/templates/.agentv/.env.example +23 -0
  25. package/dist/templates/.agentv/config.yaml +13 -4
  26. package/dist/templates/.agentv/targets.yaml +16 -0
  27. package/package.json +1 -1
  28. package/dist/chunk-2QFWRIYL.js +0 -186
  29. package/dist/chunk-2QFWRIYL.js.map +0 -1
  30. package/dist/chunk-2RMPO6LY.js +0 -747
  31. package/dist/chunk-2RMPO6LY.js.map +0 -1
  32. package/dist/chunk-3Q7WIXT4.js +0 -4846
  33. package/dist/chunk-3Q7WIXT4.js.map +0 -1
  34. package/dist/chunk-73O2DCJP.js +0 -1274
  35. package/dist/chunk-73O2DCJP.js.map +0 -1
  36. package/dist/chunk-AUKF3Y3W.js +0 -212
  37. package/dist/chunk-AUKF3Y3W.js.map +0 -1
  38. package/dist/chunk-BRH7SIDP.js +0 -133
  39. package/dist/chunk-BRH7SIDP.js.map +0 -1
  40. package/dist/chunk-BXM4I3BM.js +0 -526
  41. package/dist/chunk-BXM4I3BM.js.map +0 -1
  42. package/dist/chunk-CKMAM2GD.js.map +0 -1
  43. package/dist/chunk-FHTURHTY.js +0 -546
  44. package/dist/chunk-FHTURHTY.js.map +0 -1
  45. package/dist/chunk-GJFXQQWG.js +0 -21
  46. package/dist/chunk-GJFXQQWG.js.map +0 -1
  47. package/dist/chunk-HKMLG4KF.js +0 -38
  48. package/dist/chunk-HKMLG4KF.js.map +0 -1
  49. package/dist/chunk-JGU3PVA4.js +0 -133
  50. package/dist/chunk-JGU3PVA4.js.map +0 -1
  51. package/dist/chunk-JK6V4KVD.js.map +0 -1
  52. package/dist/chunk-LHU5FGVZ.js +0 -4804
  53. package/dist/chunk-LHU5FGVZ.js.map +0 -1
  54. package/dist/chunk-OL2WGI6E.js +0 -149
  55. package/dist/chunk-OL2WGI6E.js.map +0 -1
  56. package/dist/chunk-ONETZL6N.js +0 -15
  57. package/dist/chunk-ONETZL6N.js.map +0 -1
  58. package/dist/chunk-OYD2NB55.js.map +0 -1
  59. package/dist/chunk-QV4UGEN6.js +0 -320
  60. package/dist/chunk-QV4UGEN6.js.map +0 -1
  61. package/dist/chunk-QXLDKGF3.js +0 -46
  62. package/dist/chunk-QXLDKGF3.js.map +0 -1
  63. package/dist/chunk-U6VEM66A.js +0 -63
  64. package/dist/chunk-U6VEM66A.js.map +0 -1
  65. package/dist/chunk-UALXHIMX.js +0 -48
  66. package/dist/chunk-UALXHIMX.js.map +0 -1
  67. package/dist/chunk-UGXG73VF.js +0 -55
  68. package/dist/chunk-UGXG73VF.js.map +0 -1
  69. package/dist/chunk-UHP5KEDL.js +0 -38
  70. package/dist/chunk-UHP5KEDL.js.map +0 -1
  71. package/dist/chunk-V2S5CZU3.js.map +0 -1
  72. package/dist/chunk-WVSXFZWP.js +0 -204
  73. package/dist/chunk-WVSXFZWP.js.map +0 -1
  74. package/dist/chunk-XSUMCWKO.js +0 -30
  75. package/dist/chunk-XSUMCWKO.js.map +0 -1
  76. package/dist/chunk-XUO7ZEHU.js +0 -181
  77. package/dist/chunk-XUO7ZEHU.js.map +0 -1
  78. package/dist/chunk-YSGUX5JT.js +0 -1002
  79. package/dist/chunk-YSGUX5JT.js.map +0 -1
  80. package/dist/dist-3PCP5TNF-RYMVLILE.js +0 -25785
  81. package/dist/dist-3PCP5TNF-RYMVLILE.js.map +0 -1
  82. package/dist/dist-BOIN5LC5-T5UWUK43.js +0 -76113
  83. package/dist/dist-BOIN5LC5-T5UWUK43.js.map +0 -1
  84. package/dist/dist-LXPDQOBI-4V5J2WDS.js +0 -13
  85. package/dist/dist-LXPDQOBI-4V5J2WDS.js.map +0 -1
  86. package/dist/dist-es-4WSJUIYR-XKIX65IH.js +0 -69
  87. package/dist/dist-es-4WSJUIYR-XKIX65IH.js.map +0 -1
  88. package/dist/dist-es-7K7MKRME-CCMAZOQC.js +0 -355
  89. package/dist/dist-es-7K7MKRME-CCMAZOQC.js.map +0 -1
  90. package/dist/dist-es-B2RTOKRI-VWZHK5RE.js +0 -191
  91. package/dist/dist-es-B2RTOKRI-VWZHK5RE.js.map +0 -1
  92. package/dist/dist-es-HHZ4FAXA-CRERHWKB.js +0 -164
  93. package/dist/dist-es-HHZ4FAXA-CRERHWKB.js.map +0 -1
  94. package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js +0 -355
  95. package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js.map +0 -1
  96. package/dist/dist-es-L6R4FPI5-IKIRYN45.js +0 -472
  97. package/dist/dist-es-L6R4FPI5-IKIRYN45.js.map +0 -1
  98. package/dist/dist-es-SRVEB5QV-Q4CTC2HX.js +0 -24
  99. package/dist/dist-es-TRIVUKV4-2J47CDXR.js +0 -85
  100. package/dist/dist-es-TRIVUKV4-2J47CDXR.js.map +0 -1
  101. package/dist/dist-es-UEEUAV34-IZQDTAMW.js +0 -16
  102. package/dist/event-streams-NZADSH5J-6MOSNEV3.js +0 -247
  103. package/dist/event-streams-NZADSH5J-6MOSNEV3.js.map +0 -1
  104. package/dist/loadSso-IQZ5NB6C-DZJTORO3.js +0 -738
  105. package/dist/loadSso-IQZ5NB6C-DZJTORO3.js.map +0 -1
  106. package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js +0 -387
  107. package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js.map +0 -1
  108. package/dist/otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js.map +0 -1
  109. package/dist/signin-2ANR4DVS-K5VGBEJF.js +0 -556
  110. package/dist/signin-2ANR4DVS-K5VGBEJF.js.map +0 -1
  111. package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js.map +0 -1
  112. package/dist/src-SLOMUG7K-CV5JG263.js +0 -1408
  113. package/dist/src-SLOMUG7K-CV5JG263.js.map +0 -1
  114. package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js +0 -708
  115. package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js.map +0 -1
  116. package/dist/sts-X7JGSP4H-PDAAYDDH.js +0 -2917
  117. package/dist/sts-X7JGSP4H-PDAAYDDH.js.map +0 -1
  118. package/dist/undici-VAR2VUJI-6PAOUXZC.js +0 -23388
  119. package/dist/undici-VAR2VUJI-6PAOUXZC.js.map +0 -1
  120. /package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js.map → agentv-provider-NFFLXG5M-TJAWCWCX.js.map} +0 -0
  121. /package/dist/{chunk-BAUNAXHT.js.map → chunk-XOSNETAV.js.map} +0 -0
  122. /package/dist/{dist-VUPMLHIV.js.map → dist-L6R5HJ72.js.map} +0 -0
  123. /package/dist/{dist-es-SRVEB5QV-Q4CTC2HX.js.map → esm-CZAWIY6F.js.map} +0 -0
  124. /package/dist/{dist-es-UEEUAV34-IZQDTAMW.js.map → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js.map} +0 -0
  125. /package/dist/{esm-UYZ3HJBU.js.map → simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js.map} +0 -0
  126. /package/dist/{src-PXDA7QIS.js.map → src-ML4D2MC2.js.map} +0 -0
@@ -1,35 +1,37 @@
1
1
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
2
  import {
3
3
  HtmlWriter,
4
- buildAggregateGradingArtifact,
5
- buildBenchmarkArtifact,
6
- buildGradingArtifact,
7
- buildTimingArtifact,
4
+ LEGACY_RESULTS_FILENAME,
5
+ RESULT_INDEX_FILENAME,
8
6
  detectFileType,
9
7
  findRepoRoot,
8
+ loadLightweightResults,
9
+ loadManifestResults,
10
10
  loadRunCache,
11
11
  package_default,
12
- parseJsonlResults,
13
12
  resolveEvalPaths,
13
+ resolveExistingRunPrimaryPath,
14
+ resolveResultSourcePath,
14
15
  resolveRunCacheFile,
16
+ resolveWorkspaceOrFilePath,
15
17
  runEvalCommand,
16
18
  selectTarget,
17
19
  toSnakeCaseDeep,
18
20
  validateConfigFile,
19
21
  validateEvalFile,
20
22
  validateFileReferences,
21
- validateTargetsFile
22
- } from "./chunk-CKMAM2GD.js";
23
+ validateTargetsFile,
24
+ writeArtifactsFromResults
25
+ } from "./chunk-VLOFRXH4.js";
23
26
  import {
24
27
  createBuiltinRegistry,
25
- createProvider,
26
28
  executeScript,
27
- generateRubrics,
28
29
  getAgentvHome,
29
30
  getOutputFilenames,
30
31
  getWorkspacePoolRoot,
31
32
  isAgentSkillsFormat,
32
33
  loadTestById,
34
+ loadTestSuite,
33
35
  loadTests,
34
36
  normalizeLineEndings,
35
37
  parseAgentSkillsEvals,
@@ -37,7 +39,7 @@ import {
37
39
  toSnakeCaseDeep as toSnakeCaseDeep2,
38
40
  transpileEvalYamlFile,
39
41
  trimBaselineResult
40
- } from "./chunk-OYD2NB55.js";
42
+ } from "./chunk-UYBLUYHN.js";
41
43
  import {
42
44
  __commonJS,
43
45
  __esm,
@@ -2888,7 +2890,6 @@ function oneOf(literals) {
2888
2890
  }
2889
2891
 
2890
2892
  // src/commands/compare/index.ts
2891
- import { readFileSync } from "node:fs";
2892
2893
  var colors = {
2893
2894
  reset: "\x1B[0m",
2894
2895
  bold: "\x1B[1m",
@@ -2902,41 +2903,22 @@ var colors = {
2902
2903
  var noColor = process.env.NO_COLOR !== void 0 || !process.stdout.isTTY;
2903
2904
  var c = noColor ? Object.fromEntries(Object.keys(colors).map((k) => [k, ""])) : colors;
2904
2905
  function loadJsonlResults(filePath) {
2905
- const content = readFileSync(filePath, "utf8");
2906
- const lines = content.trim().split("\n").filter((line) => line.trim());
2907
- return lines.map((line) => {
2908
- const record = JSON.parse(line);
2909
- const testId = record.test_id ?? record.eval_id;
2910
- if (typeof testId !== "string") {
2911
- throw new Error(`Missing test_id in result: ${line}`);
2912
- }
2913
- if (typeof record.score !== "number") {
2914
- throw new Error(`Missing or invalid score in result: ${line}`);
2915
- }
2916
- return { testId, score: record.score };
2917
- });
2906
+ return loadLightweightResults(resolveResultSourcePath(filePath)).map((record) => ({
2907
+ testId: record.testId,
2908
+ score: record.score
2909
+ }));
2918
2910
  }
2919
2911
  function loadCombinedResults(filePath) {
2920
- const content = readFileSync(filePath, "utf8");
2921
- const lines = content.trim().split("\n").filter((line) => line.trim());
2922
2912
  const groups = /* @__PURE__ */ new Map();
2923
- for (const line of lines) {
2924
- const record = JSON.parse(line);
2925
- const testId = record.test_id ?? record.eval_id;
2926
- if (typeof testId !== "string") {
2927
- throw new Error(`Missing test_id in result: ${line}`);
2928
- }
2929
- if (typeof record.score !== "number") {
2930
- throw new Error(`Missing or invalid score in result: ${line}`);
2931
- }
2913
+ for (const record of loadLightweightResults(resolveResultSourcePath(filePath))) {
2932
2914
  if (typeof record.target !== "string") {
2933
- throw new Error(`Missing target field in combined result: ${line}`);
2915
+ throw new Error(`Missing target field in combined result source: ${filePath}`);
2934
2916
  }
2935
2917
  const target = record.target;
2936
2918
  if (!groups.has(target)) {
2937
2919
  groups.set(target, []);
2938
2920
  }
2939
- groups.get(target)?.push({ testId, score: record.score });
2921
+ groups.get(target)?.push({ testId: record.testId, score: record.score });
2940
2922
  }
2941
2923
  return groups;
2942
2924
  }
@@ -3303,11 +3285,11 @@ var compareCommand = command({
3303
3285
  });
3304
3286
 
3305
3287
  // src/commands/convert/index.ts
3306
- import { readFileSync as readFileSync2, writeFileSync } from "node:fs";
3288
+ import { readFileSync, writeFileSync } from "node:fs";
3307
3289
  import path from "node:path";
3308
3290
  import { stringify as stringifyYaml } from "yaml";
3309
3291
  async function convertJsonlToHtml(inputPath, outputPath) {
3310
- const content = readFileSync2(inputPath, "utf8");
3292
+ const content = readFileSync(inputPath, "utf8");
3311
3293
  const lines = content.trim().split("\n").filter((line) => line.trim());
3312
3294
  const writer = await HtmlWriter.open(outputPath);
3313
3295
  for (const line of lines) {
@@ -3317,7 +3299,7 @@ async function convertJsonlToHtml(inputPath, outputPath) {
3317
3299
  return lines.length;
3318
3300
  }
3319
3301
  function convertJsonlToYaml(inputPath, outputPath) {
3320
- const content = readFileSync2(inputPath, "utf8");
3302
+ const content = readFileSync(inputPath, "utf8");
3321
3303
  const lines = content.trim().split("\n").filter((line) => line.trim());
3322
3304
  let yamlOutput = "";
3323
3305
  let isFirst = true;
@@ -3336,7 +3318,7 @@ function convertJsonlToYaml(inputPath, outputPath) {
3336
3318
  return lines.length;
3337
3319
  }
3338
3320
  function convertEvalsJsonToYaml(inputPath) {
3339
- const content = readFileSync2(inputPath, "utf8");
3321
+ const content = readFileSync(inputPath, "utf8");
3340
3322
  const parsed = JSON.parse(content);
3341
3323
  if (!isAgentSkillsFormat(parsed)) {
3342
3324
  throw new Error(`Not a valid Agent Skills evals.json: missing 'evals' array`);
@@ -3924,7 +3906,7 @@ var evalPromptCommand = subcommands({
3924
3906
  });
3925
3907
 
3926
3908
  // src/commands/eval/commands/assert.ts
3927
- import { readFileSync as readFileSync3 } from "node:fs";
3909
+ import { readFileSync as readFileSync2 } from "node:fs";
3928
3910
  import path3 from "node:path";
3929
3911
  import fg from "fast-glob";
3930
3912
  var evalAssertCommand = command({
@@ -3956,7 +3938,7 @@ var evalAssertCommand = command({
3956
3938
  let resolvedOutput;
3957
3939
  let resolvedInput;
3958
3940
  if (file) {
3959
- const content = JSON.parse(readFileSync3(path3.resolve(file), "utf8"));
3941
+ const content = JSON.parse(readFileSync2(path3.resolve(file), "utf8"));
3960
3942
  resolvedOutput = content.output ?? "";
3961
3943
  resolvedInput = content.input ?? "";
3962
3944
  } else {
@@ -4183,7 +4165,7 @@ var evalRunCommand = command({
4183
4165
  artifacts: option({
4184
4166
  type: optional(string),
4185
4167
  long: "artifacts",
4186
- description: "Write companion artifacts (grading/<test>.json, timing.json, benchmark.json) to the specified directory"
4168
+ description: "Write companion artifacts (index.jsonl, <test>/grading.json, <test>/timing.json, timing.json, benchmark.json) to the specified directory"
4187
4169
  }),
4188
4170
  graderTarget: option({
4189
4171
  type: optional(string),
@@ -4203,7 +4185,7 @@ var evalRunCommand = command({
4203
4185
  },
4204
4186
  handler: async (args) => {
4205
4187
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4206
- const { launchInteractiveWizard } = await import("./interactive-FZJANO4A.js");
4188
+ const { launchInteractiveWizard } = await import("./interactive-5X62YEEX.js");
4207
4189
  await launchInteractiveWizard();
4208
4190
  return;
4209
4191
  }
@@ -4257,212 +4239,31 @@ var evalCommand = subcommands({
4257
4239
  }
4258
4240
  });
4259
4241
 
4260
- // src/commands/generate/rubrics.ts
4261
- import { readFile, writeFile as writeFile2 } from "node:fs/promises";
4262
- import path4 from "node:path";
4263
- import { pathToFileURL } from "node:url";
4264
- import { isMap, isSeq, parseDocument } from "yaml";
4265
- function isJsonObject(value) {
4266
- return typeof value === "object" && value !== null && !Array.isArray(value);
4267
- }
4268
- function asString(value) {
4269
- return typeof value === "string" ? value : void 0;
4270
- }
4271
- async function loadRubricGenerator() {
4272
- const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
4273
- if (customGenerator) {
4274
- const generatorPath = path4.resolve(customGenerator);
4275
- const generatorUrl = pathToFileURL(generatorPath).href;
4276
- const module = await import(generatorUrl);
4277
- return module.generateRubrics;
4278
- }
4279
- return generateRubrics;
4280
- }
4281
- async function generateRubricsCommand(options) {
4282
- const { file, target: targetOverride, verbose } = options;
4283
- console.log(`Generating rubrics for: ${file}`);
4284
- const absolutePath = path4.resolve(file);
4285
- const content = await readFile(absolutePath, "utf8");
4286
- const doc = parseDocument(content);
4287
- const parsed = doc.toJSON();
4288
- if (!isJsonObject(parsed)) {
4289
- throw new Error(`Invalid YAML file format: ${file}`);
4290
- }
4291
- const suite = parsed;
4292
- const evalcases = suite.tests;
4293
- if (!Array.isArray(evalcases)) {
4294
- throw new Error(`No tests found in ${file}`);
4295
- }
4296
- const targetSelection = await selectTarget({
4297
- testFilePath: absolutePath,
4298
- repoRoot: process.cwd(),
4299
- cwd: process.cwd(),
4300
- cliTargetName: targetOverride,
4301
- dryRun: false,
4302
- dryRunDelay: 0,
4303
- dryRunDelayMin: 0,
4304
- dryRunDelayMax: 0,
4305
- env: process.env
4306
- });
4307
- if (verbose) {
4308
- console.log(`Using target: ${targetSelection.targetName}`);
4309
- }
4310
- const provider = createProvider(targetSelection.resolvedTarget);
4311
- const generateRubricsFunc = await loadRubricGenerator();
4312
- let updatedCount = 0;
4313
- let skippedCount = 0;
4314
- const evalcasesNode = doc.getIn(["tests"]);
4315
- if (!evalcasesNode || !isSeq(evalcasesNode)) {
4316
- throw new Error("tests must be a sequence");
4317
- }
4318
- for (let i = 0; i < evalcases.length; i++) {
4319
- const rawCase = evalcases[i];
4320
- if (!isJsonObject(rawCase)) {
4321
- continue;
4322
- }
4323
- const evalCase = rawCase;
4324
- const id = asString(evalCase.id) ?? "unknown";
4325
- const expectedOutcome = asString(evalCase.criteria) ?? asString(evalCase.outcome);
4326
- if (!expectedOutcome) {
4327
- if (verbose) {
4328
- console.log(` Skipping ${id}: no criteria`);
4329
- }
4330
- skippedCount++;
4331
- continue;
4332
- }
4333
- if (evalCase.rubrics !== void 0) {
4334
- if (verbose) {
4335
- console.log(` Skipping ${id}: rubrics already defined`);
4336
- }
4337
- skippedCount++;
4338
- continue;
4339
- }
4340
- console.log(` Generating rubrics for: ${id}`);
4341
- const question = extractQuestion(evalCase);
4342
- const referenceAnswer = asString(evalCase.reference_answer);
4343
- const rubrics = await generateRubricsFunc({
4344
- criteria: expectedOutcome,
4345
- question,
4346
- referenceAnswer,
4347
- provider
4348
- });
4349
- const caseNode = evalcasesNode.items[i];
4350
- if (caseNode && isMap(caseNode)) {
4351
- caseNode.set(
4352
- "rubrics",
4353
- rubrics.filter((r) => r.outcome !== void 0).map((r) => ({
4354
- id: r.id,
4355
- outcome: r.outcome,
4356
- weight: r.weight,
4357
- required: r.required ?? true
4358
- }))
4359
- );
4360
- }
4361
- updatedCount++;
4362
- if (verbose) {
4363
- console.log(` Generated ${rubrics.length} rubric(s)`);
4364
- }
4365
- }
4366
- if (updatedCount > 0) {
4367
- const output = doc.toString();
4368
- await writeFile2(absolutePath, output, "utf8");
4369
- console.log(`
4370
- Updated ${updatedCount} test(s) with generated rubrics`);
4371
- if (skippedCount > 0) {
4372
- console.log(`Skipped ${skippedCount} test(s)`);
4373
- }
4374
- } else {
4375
- console.log("\nNo tests updated (all already have rubrics or missing criteria)");
4376
- }
4377
- }
4378
- function extractQuestion(evalCase) {
4379
- const explicitQuestion = asString(evalCase.question);
4380
- if (explicitQuestion) {
4381
- return explicitQuestion;
4382
- }
4383
- const inputMessages = evalCase.input;
4384
- if (!Array.isArray(inputMessages)) {
4385
- return void 0;
4386
- }
4387
- for (const msg of inputMessages) {
4388
- if (!isJsonObject(msg)) {
4389
- continue;
4390
- }
4391
- if (msg.role === "user" && typeof msg.content === "string") {
4392
- return msg.content;
4393
- }
4394
- }
4395
- return void 0;
4396
- }
4397
-
4398
- // src/commands/generate/index.ts
4399
- var rubricsCommand = command({
4400
- name: "rubrics",
4401
- description: "Generate rubrics from criteria in YAML eval file",
4402
- args: {
4403
- file: positional({
4404
- type: string,
4405
- displayName: "file",
4406
- description: "Path to YAML eval file"
4407
- }),
4408
- target: option({
4409
- type: optional(string),
4410
- long: "target",
4411
- short: "t",
4412
- description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
4413
- }),
4414
- verbose: flag({
4415
- long: "verbose",
4416
- short: "v",
4417
- description: "Show detailed progress"
4418
- })
4419
- },
4420
- handler: async ({ file, target, verbose }) => {
4421
- try {
4422
- await generateRubricsCommand({
4423
- file,
4424
- target,
4425
- verbose
4426
- });
4427
- } catch (error) {
4428
- console.error(`Error: ${error.message}`);
4429
- process.exit(1);
4430
- }
4431
- }
4432
- });
4433
- var generateCommand = subcommands({
4434
- name: "generate",
4435
- description: "Generate evaluation artifacts",
4436
- cmds: {
4437
- rubrics: rubricsCommand
4438
- }
4439
- });
4440
-
4441
4242
  // src/commands/init/index.ts
4442
4243
  import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
4443
- import path6 from "node:path";
4244
+ import path5 from "node:path";
4444
4245
  import * as readline from "node:readline/promises";
4445
4246
 
4446
4247
  // src/templates/index.ts
4447
- import { readFileSync as readFileSync4, readdirSync, statSync } from "node:fs";
4448
- import path5 from "node:path";
4248
+ import { readFileSync as readFileSync3, readdirSync, statSync } from "node:fs";
4249
+ import path4 from "node:path";
4449
4250
  import { fileURLToPath } from "node:url";
4450
4251
  function getAgentvTemplates() {
4451
4252
  return getTemplatesFromDir(".agentv");
4452
4253
  }
4453
4254
  function getEnvExampleTemplate() {
4454
- const currentDir = path5.dirname(fileURLToPath(import.meta.url));
4455
- const templatesBase = currentDir.includes(`${path5.sep}dist`) ? path5.join(currentDir, "templates") : currentDir;
4456
- const content = readFileSync4(path5.join(templatesBase, ".env.example"), "utf-8");
4255
+ const currentDir = path4.dirname(fileURLToPath(import.meta.url));
4256
+ const templatesBase = currentDir.includes(`${path4.sep}dist`) ? path4.join(currentDir, "templates") : currentDir;
4257
+ const content = readFileSync3(path4.join(templatesBase, ".env.example"), "utf-8");
4457
4258
  return { path: ".env.example", content };
4458
4259
  }
4459
4260
  function getTemplatesFromDir(subdir) {
4460
- const currentDir = path5.dirname(fileURLToPath(import.meta.url));
4261
+ const currentDir = path4.dirname(fileURLToPath(import.meta.url));
4461
4262
  let templatesDir;
4462
- if (currentDir.includes(`${path5.sep}dist`)) {
4463
- templatesDir = path5.join(currentDir, "templates", subdir);
4263
+ if (currentDir.includes(`${path4.sep}dist`)) {
4264
+ templatesDir = path4.join(currentDir, "templates", subdir);
4464
4265
  } else {
4465
- templatesDir = path5.join(currentDir, subdir);
4266
+ templatesDir = path4.join(currentDir, subdir);
4466
4267
  }
4467
4268
  return readTemplatesRecursively(templatesDir, "");
4468
4269
  }
@@ -4470,15 +4271,15 @@ function readTemplatesRecursively(dir, relativePath) {
4470
4271
  const templates = [];
4471
4272
  const entries2 = readdirSync(dir);
4472
4273
  for (const entry of entries2) {
4473
- const fullPath = path5.join(dir, entry);
4274
+ const fullPath = path4.join(dir, entry);
4474
4275
  const stat3 = statSync(fullPath);
4475
- const entryRelativePath = relativePath ? path5.join(relativePath, entry) : entry;
4276
+ const entryRelativePath = relativePath ? path4.join(relativePath, entry) : entry;
4476
4277
  if (stat3.isDirectory()) {
4477
4278
  templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
4478
4279
  } else {
4479
- const content = readFileSync4(fullPath, "utf-8");
4280
+ const content = readFileSync3(fullPath, "utf-8");
4480
4281
  templates.push({
4481
- path: entryRelativePath.split(path5.sep).join("/"),
4282
+ path: entryRelativePath.split(path4.sep).join("/"),
4482
4283
  // Normalize to forward slashes
4483
4284
  content
4484
4285
  });
@@ -4507,22 +4308,22 @@ async function promptYesNo(message) {
4507
4308
  }
4508
4309
  }
4509
4310
  async function initCommand(options = {}) {
4510
- const targetPath = path6.resolve(options.targetPath ?? ".");
4511
- const agentvDir = path6.join(targetPath, ".agentv");
4311
+ const targetPath = path5.resolve(options.targetPath ?? ".");
4312
+ const agentvDir = path5.join(targetPath, ".agentv");
4512
4313
  const otherAgentvTemplates = getAgentvTemplates();
4513
4314
  const envTemplate = getEnvExampleTemplate();
4514
4315
  const existingFiles = [];
4515
4316
  if (envTemplate) {
4516
- const envFilePath = path6.join(targetPath, ".env.example");
4317
+ const envFilePath = path5.join(targetPath, ".env.example");
4517
4318
  if (existsSync(envFilePath)) {
4518
4319
  existingFiles.push(".env.example");
4519
4320
  }
4520
4321
  }
4521
4322
  if (existsSync(agentvDir)) {
4522
4323
  for (const template of otherAgentvTemplates) {
4523
- const targetFilePath = path6.join(agentvDir, template.path);
4324
+ const targetFilePath = path5.join(agentvDir, template.path);
4524
4325
  if (existsSync(targetFilePath)) {
4525
- existingFiles.push(path6.relative(targetPath, targetFilePath));
4326
+ existingFiles.push(path5.relative(targetPath, targetFilePath));
4526
4327
  }
4527
4328
  }
4528
4329
  }
@@ -4544,18 +4345,18 @@ async function initCommand(options = {}) {
4544
4345
  mkdirSync(agentvDir, { recursive: true });
4545
4346
  }
4546
4347
  if (envTemplate) {
4547
- const envFilePath = path6.join(targetPath, ".env.example");
4348
+ const envFilePath = path5.join(targetPath, ".env.example");
4548
4349
  writeFileSync2(envFilePath, envTemplate.content, "utf-8");
4549
4350
  console.log("Created .env.example");
4550
4351
  }
4551
4352
  for (const template of otherAgentvTemplates) {
4552
- const targetFilePath = path6.join(agentvDir, template.path);
4553
- const targetDirPath = path6.dirname(targetFilePath);
4353
+ const targetFilePath = path5.join(agentvDir, template.path);
4354
+ const targetDirPath = path5.dirname(targetFilePath);
4554
4355
  if (!existsSync(targetDirPath)) {
4555
4356
  mkdirSync(targetDirPath, { recursive: true });
4556
4357
  }
4557
4358
  writeFileSync2(targetFilePath, template.content, "utf-8");
4558
- console.log(`Created ${path6.relative(targetPath, targetFilePath)}`);
4359
+ console.log(`Created ${path5.relative(targetPath, targetFilePath)}`);
4559
4360
  }
4560
4361
  console.log("\nAgentV initialized successfully!");
4561
4362
  console.log("\nFiles installed to root:");
@@ -4563,7 +4364,7 @@ async function initCommand(options = {}) {
4563
4364
  console.log(" - .env.example");
4564
4365
  }
4565
4366
  console.log(`
4566
- Files installed to ${path6.relative(targetPath, agentvDir)}:`);
4367
+ Files installed to ${path5.relative(targetPath, agentvDir)}:`);
4567
4368
  for (const t of otherAgentvTemplates) {
4568
4369
  console.log(` - ${t.path}`);
4569
4370
  }
@@ -4593,13 +4394,443 @@ var initCmdTsCommand = command({
4593
4394
  }
4594
4395
  });
4595
4396
 
4397
+ // src/commands/pipeline/bench.ts
4398
+ import { readFile, readdir, writeFile as writeFile2 } from "node:fs/promises";
4399
+ import { join } from "node:path";
4400
+ var evalBenchCommand = command({
4401
+ name: "bench",
4402
+ description: "Merge evaluator scores and produce benchmark artifacts",
4403
+ args: {
4404
+ exportDir: positional({
4405
+ type: string,
4406
+ displayName: "export-dir",
4407
+ description: "Export directory from pipeline input/grade"
4408
+ })
4409
+ },
4410
+ handler: async ({ exportDir }) => {
4411
+ const manifest = JSON.parse(await readFile(join(exportDir, "manifest.json"), "utf8"));
4412
+ const testIds = manifest.test_ids;
4413
+ const targetName = manifest.target?.name ?? "unknown";
4414
+ const stdinData = await readStdin();
4415
+ const llmScores = stdinData ? JSON.parse(stdinData) : {};
4416
+ const indexLines = [];
4417
+ const allPassRates = [];
4418
+ for (const testId of testIds) {
4419
+ const testDir = join(exportDir, testId);
4420
+ const evaluators = [];
4421
+ const allAssertions = [];
4422
+ const codeResultsDir = join(testDir, "code_grader_results");
4423
+ try {
4424
+ const resultFiles = (await readdir(codeResultsDir)).filter((f) => f.endsWith(".json"));
4425
+ for (const file of resultFiles) {
4426
+ const result = JSON.parse(await readFile(join(codeResultsDir, file), "utf8"));
4427
+ evaluators.push({
4428
+ name: result.name,
4429
+ type: "code-grader",
4430
+ score: result.score,
4431
+ weight: result.weight ?? 1,
4432
+ assertions: result.assertions ?? []
4433
+ });
4434
+ for (const a of result.assertions ?? []) {
4435
+ allAssertions.push({ text: a.text, passed: a.passed, evidence: a.evidence ?? "" });
4436
+ }
4437
+ }
4438
+ } catch {
4439
+ }
4440
+ const testLlmScores = llmScores[testId] ?? {};
4441
+ const llmGradersDir = join(testDir, "llm_graders");
4442
+ try {
4443
+ const graderFiles = (await readdir(llmGradersDir)).filter((f) => f.endsWith(".json"));
4444
+ for (const file of graderFiles) {
4445
+ const graderMeta = JSON.parse(await readFile(join(llmGradersDir, file), "utf8"));
4446
+ const graderName = graderMeta.name;
4447
+ const llmResult = testLlmScores[graderName];
4448
+ if (llmResult) {
4449
+ evaluators.push({
4450
+ name: graderName,
4451
+ type: "llm-grader",
4452
+ score: llmResult.score,
4453
+ weight: graderMeta.weight ?? 1,
4454
+ assertions: llmResult.assertions ?? []
4455
+ });
4456
+ for (const a of llmResult.assertions ?? []) {
4457
+ allAssertions.push({ text: a.text, passed: a.passed, evidence: a.evidence ?? "" });
4458
+ }
4459
+ }
4460
+ }
4461
+ } catch {
4462
+ }
4463
+ const totalWeight = evaluators.reduce((sum, e) => sum + e.weight, 0);
4464
+ const weightedScore = totalWeight > 0 ? evaluators.reduce((sum, e) => sum + e.score * e.weight, 0) / totalWeight : 0;
4465
+ const passed = allAssertions.filter((a) => a.passed).length;
4466
+ const failed = allAssertions.filter((a) => !a.passed).length;
4467
+ const passRate = allAssertions.length > 0 ? Math.round(passed / allAssertions.length * 1e3) / 1e3 : 0;
4468
+ allPassRates.push(passRate);
4469
+ const grading = {
4470
+ assertions: allAssertions,
4471
+ summary: { passed, failed, total: allAssertions.length, pass_rate: passRate },
4472
+ execution_metrics: { tool_calls: {}, total_tool_calls: 0, errors_encountered: 0 },
4473
+ evaluators: evaluators.map((e) => ({
4474
+ name: e.name,
4475
+ type: e.type,
4476
+ score: e.score,
4477
+ reasoning: "",
4478
+ weight: e.weight
4479
+ }))
4480
+ };
4481
+ await writeFile2(
4482
+ join(testDir, "grading.json"),
4483
+ `${JSON.stringify(grading, null, 2)}
4484
+ `,
4485
+ "utf8"
4486
+ );
4487
+ indexLines.push(
4488
+ JSON.stringify({
4489
+ timestamp: manifest.timestamp,
4490
+ test_id: testId,
4491
+ score: Math.round(weightedScore * 1e3) / 1e3,
4492
+ target: targetName,
4493
+ grading_path: `${testId}/grading.json`,
4494
+ timing_path: `${testId}/timing.json`
4495
+ })
4496
+ );
4497
+ }
4498
+ await writeFile2(
4499
+ join(exportDir, "index.jsonl"),
4500
+ indexLines.length > 0 ? `${indexLines.join("\n")}
4501
+ ` : "",
4502
+ "utf8"
4503
+ );
4504
+ const passRateStats = computeStats(allPassRates);
4505
+ const benchmark = {
4506
+ metadata: {
4507
+ eval_file: manifest.eval_file,
4508
+ timestamp: manifest.timestamp,
4509
+ targets: [targetName],
4510
+ tests_run: testIds
4511
+ },
4512
+ run_summary: {
4513
+ [targetName]: {
4514
+ pass_rate: passRateStats,
4515
+ time_seconds: { mean: 0, stddev: 0 },
4516
+ tokens: { mean: 0, stddev: 0 }
4517
+ }
4518
+ },
4519
+ notes: []
4520
+ };
4521
+ await writeFile2(
4522
+ join(exportDir, "benchmark.json"),
4523
+ `${JSON.stringify(benchmark, null, 2)}
4524
+ `,
4525
+ "utf8"
4526
+ );
4527
+ console.log(`Benchmark: ${testIds.length} test(s), pass_rate=${passRateStats.mean}`);
4528
+ }
4529
+ });
4530
+ async function readStdin() {
4531
+ const chunks = [];
4532
+ for await (const chunk of process.stdin) {
4533
+ chunks.push(chunk);
4534
+ }
4535
+ return Buffer.concat(chunks).toString("utf8").trim();
4536
+ }
4537
+ function computeStats(values) {
4538
+ if (values.length === 0) return { mean: 0, stddev: 0 };
4539
+ const mean2 = values.reduce((sum, v) => sum + v, 0) / values.length;
4540
+ const variance = values.reduce((sum, v) => sum + (v - mean2) ** 2, 0) / values.length;
4541
+ return {
4542
+ mean: Math.round(mean2 * 1e3) / 1e3,
4543
+ stddev: Math.round(Math.sqrt(variance) * 1e3) / 1e3
4544
+ };
4545
+ }
4546
+
4547
+ // src/commands/pipeline/grade.ts
4548
+ import { mkdir as mkdir2, readFile as readFile2, readdir as readdir2, writeFile as writeFile3 } from "node:fs/promises";
4549
+ import { join as join2 } from "node:path";
4550
+ var evalGradeCommand = command({
4551
+ name: "grade",
4552
+ description: "Run code-grader assertions on responses in an export directory",
4553
+ args: {
4554
+ exportDir: positional({
4555
+ type: string,
4556
+ displayName: "export-dir",
4557
+ description: "Export directory from pipeline input"
4558
+ })
4559
+ },
4560
+ handler: async ({ exportDir }) => {
4561
+ const manifestPath = join2(exportDir, "manifest.json");
4562
+ const manifest = JSON.parse(await readFile2(manifestPath, "utf8"));
4563
+ const testIds = manifest.test_ids;
4564
+ let totalGraders = 0;
4565
+ let totalPassed = 0;
4566
+ for (const testId of testIds) {
4567
+ const testDir = join2(exportDir, testId);
4568
+ const codeGradersDir = join2(testDir, "code_graders");
4569
+ const resultsDir = join2(testDir, "code_grader_results");
4570
+ let graderFiles;
4571
+ try {
4572
+ graderFiles = (await readdir2(codeGradersDir)).filter((f) => f.endsWith(".json"));
4573
+ } catch {
4574
+ continue;
4575
+ }
4576
+ if (graderFiles.length === 0) continue;
4577
+ await mkdir2(resultsDir, { recursive: true });
4578
+ const responseText = await readFile2(join2(testDir, "response.md"), "utf8");
4579
+ const inputData = JSON.parse(await readFile2(join2(testDir, "input.json"), "utf8"));
4580
+ for (const graderFile of graderFiles) {
4581
+ const graderConfig = JSON.parse(await readFile2(join2(codeGradersDir, graderFile), "utf8"));
4582
+ const graderName = graderConfig.name;
4583
+ const payload = JSON.stringify({
4584
+ output: [{ role: "assistant", content: responseText }],
4585
+ input: inputData.input_messages,
4586
+ question: inputData.input_text,
4587
+ criteria: "",
4588
+ expected_output: [],
4589
+ reference_answer: "",
4590
+ input_files: [],
4591
+ trace: null,
4592
+ token_usage: null,
4593
+ cost_usd: null,
4594
+ duration_ms: null,
4595
+ start_time: null,
4596
+ end_time: null,
4597
+ file_changes: null,
4598
+ workspace_path: null,
4599
+ config: graderConfig.config ?? null,
4600
+ metadata: {},
4601
+ input_text: inputData.input_text,
4602
+ output_text: responseText,
4603
+ expected_output_text: ""
4604
+ });
4605
+ try {
4606
+ const stdout = await executeScript(
4607
+ graderConfig.command,
4608
+ payload,
4609
+ void 0,
4610
+ graderConfig.cwd
4611
+ );
4612
+ const parsed = JSON.parse(stdout);
4613
+ const score = typeof parsed.score === "number" ? parsed.score : 0;
4614
+ const assertions = Array.isArray(parsed.assertions) ? parsed.assertions : [];
4615
+ const result = {
4616
+ name: graderName,
4617
+ type: "code-grader",
4618
+ score,
4619
+ weight: graderConfig.weight ?? 1,
4620
+ assertions,
4621
+ details: parsed.details ?? {}
4622
+ };
4623
+ await writeFile3(
4624
+ join2(resultsDir, `${graderName}.json`),
4625
+ `${JSON.stringify(result, null, 2)}
4626
+ `,
4627
+ "utf8"
4628
+ );
4629
+ totalGraders++;
4630
+ if (score >= 0.5) totalPassed++;
4631
+ } catch (error) {
4632
+ const message = error instanceof Error ? error.message : String(error);
4633
+ console.error(` ${testId}/${graderName}: ERROR \u2014 ${message}`);
4634
+ const errorResult = {
4635
+ name: graderName,
4636
+ type: "code-grader",
4637
+ score: 0,
4638
+ weight: graderConfig.weight ?? 1,
4639
+ assertions: [{ text: `Error: ${message}`, passed: false }],
4640
+ details: { error: message }
4641
+ };
4642
+ await writeFile3(
4643
+ join2(resultsDir, `${graderName}.json`),
4644
+ `${JSON.stringify(errorResult, null, 2)}
4645
+ `,
4646
+ "utf8"
4647
+ );
4648
+ totalGraders++;
4649
+ }
4650
+ }
4651
+ }
4652
+ console.log(`Graded ${totalGraders} code-grader(s): ${totalPassed} passed`);
4653
+ }
4654
+ });
4655
+
4656
+ // src/commands/pipeline/input.ts
4657
+ import { readFile as readFile3 } from "node:fs/promises";
4658
+ import { mkdir as mkdir3, writeFile as writeFile4 } from "node:fs/promises";
4659
+ import { dirname, join as join3, resolve } from "node:path";
4660
+ var evalInputCommand = command({
4661
+ name: "input",
4662
+ description: "Extract eval inputs, target commands, and grader prompts for agent-mode runs",
4663
+ args: {
4664
+ evalPath: positional({
4665
+ type: string,
4666
+ displayName: "eval-path",
4667
+ description: "Path to eval YAML file"
4668
+ }),
4669
+ out: option({
4670
+ type: string,
4671
+ long: "out",
4672
+ description: "Output directory for extracted inputs"
4673
+ })
4674
+ },
4675
+ handler: async ({ evalPath, out }) => {
4676
+ const resolvedEvalPath = resolve(evalPath);
4677
+ const outDir = resolve(out);
4678
+ const repoRoot = await findRepoRoot(dirname(resolvedEvalPath));
4679
+ const evalDir = dirname(resolvedEvalPath);
4680
+ const suite = await loadTestSuite(resolvedEvalPath, repoRoot);
4681
+ const tests = suite.tests;
4682
+ if (tests.length === 0) {
4683
+ console.error("No tests found in eval file.");
4684
+ process.exit(1);
4685
+ }
4686
+ let targetInfo = null;
4687
+ let targetName = "agent";
4688
+ let targetKind = "agent";
4689
+ try {
4690
+ const selection = await selectTarget({
4691
+ testFilePath: resolvedEvalPath,
4692
+ repoRoot,
4693
+ cwd: evalDir,
4694
+ dryRun: false,
4695
+ dryRunDelay: 0,
4696
+ dryRunDelayMin: 0,
4697
+ dryRunDelayMax: 0,
4698
+ env: process.env
4699
+ });
4700
+ targetName = selection.targetName;
4701
+ if (selection.resolvedTarget.kind === "cli") {
4702
+ targetKind = "cli";
4703
+ const config = selection.resolvedTarget.config;
4704
+ targetInfo = {
4705
+ kind: "cli",
4706
+ command: config.command,
4707
+ cwd: config.cwd ?? evalDir,
4708
+ timeoutMs: config.timeoutMs ?? 3e4
4709
+ };
4710
+ }
4711
+ } catch {
4712
+ }
4713
+ const testIds = [];
4714
+ for (const test of tests) {
4715
+ const testDir = join3(outDir, test.id);
4716
+ await mkdir3(testDir, { recursive: true });
4717
+ testIds.push(test.id);
4718
+ const inputText = test.question;
4719
+ const inputMessages = test.input.map((m) => ({
4720
+ role: m.role,
4721
+ content: typeof m.content === "string" ? m.content : m.content
4722
+ }));
4723
+ await writeJson(join3(testDir, "input.json"), {
4724
+ input_text: inputText,
4725
+ input_messages: inputMessages,
4726
+ file_paths: test.file_paths,
4727
+ metadata: test.metadata ?? {}
4728
+ });
4729
+ if (targetInfo) {
4730
+ await writeJson(join3(testDir, "invoke.json"), {
4731
+ kind: "cli",
4732
+ command: targetInfo.command,
4733
+ cwd: targetInfo.cwd,
4734
+ timeout_ms: targetInfo.timeoutMs,
4735
+ env: {}
4736
+ });
4737
+ } else {
4738
+ await writeJson(join3(testDir, "invoke.json"), {
4739
+ kind: "agent",
4740
+ instructions: "Execute this task in the current workspace. The agent IS the target."
4741
+ });
4742
+ }
4743
+ await writeFile4(join3(testDir, "criteria.md"), test.criteria ?? "", "utf8");
4744
+ if (test.expected_output.length > 0 || test.reference_answer !== void 0 && test.reference_answer !== "") {
4745
+ await writeJson(join3(testDir, "expected_output.json"), {
4746
+ expected_output: test.expected_output,
4747
+ reference_answer: test.reference_answer ?? ""
4748
+ });
4749
+ }
4750
+ await writeGraderConfigs(testDir, test.assertions ?? [], evalDir);
4751
+ }
4752
+ await writeJson(join3(outDir, "manifest.json"), {
4753
+ eval_file: resolvedEvalPath,
4754
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
4755
+ target: {
4756
+ name: targetName,
4757
+ kind: targetKind
4758
+ },
4759
+ test_ids: testIds
4760
+ });
4761
+ console.log(`Extracted ${testIds.length} test(s) to ${outDir}`);
4762
+ }
4763
+ });
4764
+ async function writeGraderConfigs(testDir, assertions, evalDir) {
4765
+ const codeGradersDir = join3(testDir, "code_graders");
4766
+ const llmGradersDir = join3(testDir, "llm_graders");
4767
+ let hasCodeGraders = false;
4768
+ let hasLlmGraders = false;
4769
+ for (const assertion of assertions) {
4770
+ if (assertion.type === "code-grader" || assertion.type === "code-judge") {
4771
+ if (!hasCodeGraders) {
4772
+ await mkdir3(codeGradersDir, { recursive: true });
4773
+ hasCodeGraders = true;
4774
+ }
4775
+ const config = assertion;
4776
+ await writeJson(join3(codeGradersDir, `${config.name}.json`), {
4777
+ name: config.name,
4778
+ command: config.command,
4779
+ cwd: config.resolvedCwd ?? config.cwd ?? evalDir,
4780
+ weight: config.weight ?? 1,
4781
+ config: config.config ?? {}
4782
+ });
4783
+ } else if (assertion.type === "llm-grader" || assertion.type === "llm-judge") {
4784
+ if (!hasLlmGraders) {
4785
+ await mkdir3(llmGradersDir, { recursive: true });
4786
+ hasLlmGraders = true;
4787
+ }
4788
+ const config = assertion;
4789
+ let promptContent = "";
4790
+ if (config.resolvedPromptPath) {
4791
+ try {
4792
+ promptContent = await readFile3(config.resolvedPromptPath, "utf8");
4793
+ } catch {
4794
+ promptContent = typeof config.prompt === "string" ? config.prompt : "";
4795
+ }
4796
+ } else if (typeof config.prompt === "string") {
4797
+ promptContent = config.prompt;
4798
+ }
4799
+ await writeJson(join3(llmGradersDir, `${config.name}.json`), {
4800
+ name: config.name,
4801
+ prompt_content: promptContent,
4802
+ weight: config.weight ?? 1,
4803
+ threshold: 0.5,
4804
+ config: {}
4805
+ });
4806
+ }
4807
+ }
4808
+ }
4809
+ async function writeJson(filePath, data) {
4810
+ await writeFile4(filePath, `${JSON.stringify(data, null, 2)}
4811
+ `, "utf8");
4812
+ }
4813
+
4814
+ // src/commands/pipeline/index.ts
4815
+ var pipelineCommand = subcommands({
4816
+ name: "pipeline",
4817
+ description: "Agent-mode eval pipeline commands (input \u2192 grade \u2192 bench)",
4818
+ cmds: {
4819
+ input: evalInputCommand,
4820
+ grade: evalGradeCommand,
4821
+ bench: evalBenchCommand
4822
+ }
4823
+ });
4824
+
4596
4825
  // src/commands/results/export.ts
4597
- import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync6, writeFileSync as writeFileSync3 } from "node:fs";
4598
- import path8 from "node:path";
4826
+ import path7 from "node:path";
4827
+
4828
+ // src/commands/results/shared.ts
4829
+ import { existsSync as existsSync2 } from "node:fs";
4599
4830
 
4600
4831
  // src/commands/trace/utils.ts
4601
- import { readFileSync as readFileSync5, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
4602
- import path7 from "node:path";
4832
+ import { readFileSync as readFileSync4, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
4833
+ import path6 from "node:path";
4603
4834
  var colors2 = {
4604
4835
  reset: "\x1B[0m",
4605
4836
  bold: "\x1B[1m",
@@ -4625,7 +4856,26 @@ function padLeft2(str, len) {
4625
4856
  return " ".repeat(Math.max(0, len - plainLen)) + str;
4626
4857
  }
4627
4858
  function loadResultFile(filePath) {
4628
- const content = readFileSync5(filePath, "utf8");
4859
+ const resolvedFilePath = resolveTraceResultPath(filePath);
4860
+ if (path6.extname(resolvedFilePath) === ".json") {
4861
+ return loadOtlpTraceFile(resolvedFilePath);
4862
+ }
4863
+ if (path6.basename(resolvedFilePath) === RESULT_INDEX_FILENAME) {
4864
+ return loadManifestAsRawResults(resolvedFilePath);
4865
+ }
4866
+ return loadJsonlRecords(resolvedFilePath);
4867
+ }
4868
+ function resolveTraceResultPath(filePath) {
4869
+ if (path6.basename(filePath) === LEGACY_RESULTS_FILENAME) {
4870
+ return filePath;
4871
+ }
4872
+ if (!filePath.endsWith(".jsonl") && !filePath.endsWith(".json")) {
4873
+ return resolveWorkspaceOrFilePath(filePath);
4874
+ }
4875
+ return resolveWorkspaceOrFilePath(filePath);
4876
+ }
4877
+ function loadJsonlRecords(filePath) {
4878
+ const content = readFileSync4(filePath, "utf8");
4629
4879
  const lines = content.trim().split("\n").filter((line) => line.trim());
4630
4880
  return lines.map((line, i) => {
4631
4881
  const record = JSON.parse(line);
@@ -4635,25 +4885,274 @@ function loadResultFile(filePath) {
4635
4885
  return record;
4636
4886
  });
4637
4887
  }
4888
+ function loadManifestAsRawResults(filePath) {
4889
+ return loadManifestResults(filePath).map(toRawResult);
4890
+ }
4891
+ function toRawResult(result) {
4892
+ return {
4893
+ timestamp: result.timestamp,
4894
+ test_id: result.testId,
4895
+ eval_set: result.eval_set,
4896
+ conversation_id: result.conversationId,
4897
+ score: result.score,
4898
+ assertions: result.assertions?.map((assertion) => ({
4899
+ text: assertion.text,
4900
+ passed: assertion.passed,
4901
+ evidence: assertion.evidence
4902
+ })),
4903
+ target: result.target,
4904
+ error: result.error,
4905
+ scores: result.scores?.map((score) => ({
4906
+ name: score.name,
4907
+ type: score.type,
4908
+ score: score.score,
4909
+ assertions: score.assertions?.map((assertion) => ({
4910
+ text: assertion.text,
4911
+ passed: assertion.passed,
4912
+ evidence: assertion.evidence
4913
+ })),
4914
+ weight: score.weight
4915
+ })),
4916
+ token_usage: result.tokenUsage ? {
4917
+ input: result.tokenUsage.input,
4918
+ output: result.tokenUsage.output,
4919
+ cached: result.tokenUsage.cached
4920
+ } : void 0,
4921
+ cost_usd: result.costUsd,
4922
+ duration_ms: result.durationMs,
4923
+ start_time: result.startTime,
4924
+ end_time: result.endTime,
4925
+ input: result.input,
4926
+ output: result.output,
4927
+ file_changes: result.fileChanges
4928
+ };
4929
+ }
4930
+ function loadOtlpTraceFile(filePath) {
4931
+ const parsed = JSON.parse(readFileSync4(filePath, "utf8"));
4932
+ const spans = parsed.resourceSpans?.flatMap((resource) => resource.scopeSpans ?? []).flatMap((scope) => scope.spans ?? []);
4933
+ if (!spans || spans.length === 0) {
4934
+ return [];
4935
+ }
4936
+ const spanMap = /* @__PURE__ */ new Map();
4937
+ const childMap = /* @__PURE__ */ new Map();
4938
+ for (const span of spans) {
4939
+ if (!span.spanId) continue;
4940
+ spanMap.set(span.spanId, span);
4941
+ if (span.parentSpanId) {
4942
+ const siblings = childMap.get(span.parentSpanId) ?? [];
4943
+ siblings.push(span);
4944
+ childMap.set(span.parentSpanId, siblings);
4945
+ }
4946
+ }
4947
+ const roots = spans.filter((span) => !span.parentSpanId || !spanMap.has(span.parentSpanId));
4948
+ return roots.map((root, index) => {
4949
+ const descendants = collectChildSpans(root.spanId, childMap);
4950
+ const rootAttrs = parseOtlpAttributes(root.attributes);
4951
+ const parsedDescendants = descendants.map((span) => ({
4952
+ ...span,
4953
+ parsedAttributes: parseOtlpAttributes(span.attributes)
4954
+ }));
4955
+ const toolSpans = parsedDescendants.filter(
4956
+ (span) => typeof span.parsedAttributes.gen_ai_tool_name === "string"
4957
+ );
4958
+ const llmSpans = parsedDescendants.filter(
4959
+ (span) => span.parsedAttributes.gen_ai_operation_name === "chat" || typeof span.name === "string" && span.name.startsWith("chat ")
4960
+ );
4961
+ const tokenUsage = descendants.reduce(
4962
+ (acc, span) => {
4963
+ const attrs = parseOtlpAttributes(span.attributes);
4964
+ acc.input += numberAttr(attrs.gen_ai_usage_input_tokens) ?? 0;
4965
+ acc.output += numberAttr(attrs.gen_ai_usage_output_tokens) ?? 0;
4966
+ const cached = numberAttr(attrs.gen_ai_usage_cache_read_input_tokens);
4967
+ if (cached !== void 0 && cached > 0) {
4968
+ acc.cached = (acc.cached ?? 0) + cached;
4969
+ }
4970
+ return acc;
4971
+ },
4972
+ { input: 0, output: 0, cached: void 0 }
4973
+ );
4974
+ const traceSummary = buildDerivedTraceSummary({
4975
+ trace: {
4976
+ event_count: numberAttr(rootAttrs.agentv_trace_event_count) ?? (toolSpans.length > 0 ? toolSpans.length : void 0),
4977
+ tool_calls: countRawSpanNames(
4978
+ toolSpans.map((span) => ({
4979
+ type: "tool",
4980
+ name: String(span.parsedAttributes.gen_ai_tool_name)
4981
+ }))
4982
+ ),
4983
+ error_count: descendants.filter((span) => span.status?.code === 2).length || void 0,
4984
+ llm_call_count: numberAttr(rootAttrs.agentv_trace_llm_call_count) ?? (llmSpans.length > 0 ? llmSpans.length : void 0)
4985
+ },
4986
+ spans: [
4987
+ ...llmSpans.map((span) => ({
4988
+ type: "llm",
4989
+ name: span.name ?? "chat",
4990
+ duration_ms: durationFromSpan(span)
4991
+ })),
4992
+ ...toolSpans.map((span) => ({
4993
+ type: "tool",
4994
+ name: String(span.parsedAttributes.gen_ai_tool_name),
4995
+ duration_ms: durationFromSpan(span)
4996
+ }))
4997
+ ],
4998
+ duration_ms: numberAttr(rootAttrs.agentv_trace_duration_ms) ?? durationFromSpan(root),
4999
+ cost_usd: numberAttr(rootAttrs.agentv_trace_cost_usd),
5000
+ token_usage: tokenUsage.input || tokenUsage.output || tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_input) || numberAttr(rootAttrs.agentv_trace_token_output) || numberAttr(rootAttrs.agentv_trace_token_cached) ? {
5001
+ input: tokenUsage.input || numberAttr(rootAttrs.agentv_trace_token_input) || 0,
5002
+ output: tokenUsage.output || numberAttr(rootAttrs.agentv_trace_token_output) || 0,
5003
+ ...tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_cached) ? {
5004
+ cached: tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_cached) || 0
5005
+ } : {}
5006
+ } : void 0
5007
+ });
5008
+ const score = numberAttr(rootAttrs.agentv_score);
5009
+ if (score === void 0) {
5010
+ throw new Error(
5011
+ `Unsupported OTLP trace root span at index ${index + 1}: missing agentv.score attribute`
5012
+ );
5013
+ }
5014
+ return {
5015
+ test_id: stringAttr(rootAttrs.agentv_test_id) ?? stringAttr(rootAttrs.agentv_eval_id) ?? `trace-${index + 1}`,
5016
+ eval_set: stringAttr(rootAttrs.agentv_eval_set),
5017
+ target: stringAttr(rootAttrs.agentv_target),
5018
+ score,
5019
+ error: root.status?.code === 2 ? root.status.message : void 0,
5020
+ cost_usd: traceSummary?.cost_usd,
5021
+ duration_ms: traceSummary?.duration_ms,
5022
+ token_usage: traceSummary?.token_usage,
5023
+ trace: traceSummary ? {
5024
+ event_count: traceSummary.event_count,
5025
+ tool_calls: traceSummary.tool_calls,
5026
+ error_count: traceSummary.error_count,
5027
+ tool_durations: traceSummary.tool_durations,
5028
+ llm_call_count: traceSummary.llm_call_count,
5029
+ token_usage: traceSummary.token_usage,
5030
+ cost_usd: traceSummary.cost_usd,
5031
+ duration_ms: traceSummary.duration_ms
5032
+ } : void 0,
5033
+ spans: traceSummary?.spans,
5034
+ output: stringAttr(rootAttrs.agentv_output_text),
5035
+ scores: root.events?.filter((event) => event.name?.startsWith("agentv.evaluator.")).map((event) => {
5036
+ const attrs = parseOtlpAttributes(event.attributes);
5037
+ const name = event.name?.replace(/^agentv\.evaluator\./, "") ?? "unknown";
5038
+ return {
5039
+ name,
5040
+ type: stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
5041
+ score: numberAttr(attrs.agentv_evaluator_score) ?? 0
5042
+ };
5043
+ })
5044
+ };
5045
+ });
5046
+ }
5047
+ function collectChildSpans(spanId, childMap) {
5048
+ if (!spanId) return [];
5049
+ const direct = childMap.get(spanId) ?? [];
5050
+ const all = [...direct];
5051
+ for (const child of direct) {
5052
+ all.push(...collectChildSpans(child.spanId, childMap));
5053
+ }
5054
+ return all;
5055
+ }
5056
+ function parseOtlpAttributes(attributes) {
5057
+ const parsed = {};
5058
+ for (const attribute of attributes ?? []) {
5059
+ parsed[attribute.key.replace(/\./g, "_")] = parseOtlpValue(attribute.value);
5060
+ }
5061
+ return parsed;
5062
+ }
5063
+ function parseOtlpValue(value) {
5064
+ if (!value) return void 0;
5065
+ if ("stringValue" in value && value.stringValue !== void 0) return value.stringValue;
5066
+ if ("intValue" in value && value.intValue !== void 0) return Number(value.intValue);
5067
+ if ("doubleValue" in value && value.doubleValue !== void 0) return value.doubleValue;
5068
+ if ("boolValue" in value && value.boolValue !== void 0) return value.boolValue;
5069
+ if ("arrayValue" in value)
5070
+ return (value.arrayValue?.values ?? []).map((entry) => parseOtlpValue(entry));
5071
+ return void 0;
5072
+ }
5073
+ function durationFromSpan(span) {
5074
+ const start = Number(span.startTimeUnixNano);
5075
+ const end = Number(span.endTimeUnixNano);
5076
+ if (!Number.isFinite(start) || !Number.isFinite(end)) return void 0;
5077
+ return Math.round((end - start) / 1e6);
5078
+ }
5079
+ function stringAttr(value) {
5080
+ return typeof value === "string" ? value : void 0;
5081
+ }
5082
+ function numberAttr(value) {
5083
+ return typeof value === "number" && Number.isFinite(value) ? value : void 0;
5084
+ }
5085
+ function buildDerivedTraceSummary(result) {
5086
+ const toolSpans = (result.spans ?? []).filter((span) => span.type === "tool");
5087
+ const llmSpans = (result.spans ?? []).filter((span) => span.type === "llm");
5088
+ const toolCalls = result.trace?.tool_calls ?? countRawSpanNames(toolSpans);
5089
+ const toolDurations = result.trace?.tool_durations ?? groupRawSpanDurations(toolSpans);
5090
+ const hasSpanData = (result.spans?.length ?? 0) > 0;
5091
+ const eventCount = result.trace?.event_count ?? (hasSpanData ? toolSpans.length : void 0);
5092
+ const llmCallCount = result.trace?.llm_call_count ?? (hasSpanData ? llmSpans.length : void 0);
5093
+ if (!result.trace && !result.spans?.length && result.token_usage === void 0 && result.cost_usd === void 0 && result.duration_ms === void 0) {
5094
+ return void 0;
5095
+ }
5096
+ return {
5097
+ event_count: eventCount,
5098
+ tool_calls: toolCalls,
5099
+ error_count: result.trace?.error_count,
5100
+ tool_durations: toolDurations,
5101
+ llm_call_count: llmCallCount,
5102
+ token_usage: result.trace?.token_usage ?? result.token_usage,
5103
+ cost_usd: result.trace?.cost_usd ?? result.cost_usd,
5104
+ duration_ms: result.trace?.duration_ms ?? result.duration_ms,
5105
+ spans: result.spans
5106
+ };
5107
+ }
5108
+ function countRawSpanNames(spans) {
5109
+ const counts = {};
5110
+ for (const span of spans) {
5111
+ counts[span.name] = (counts[span.name] ?? 0) + 1;
5112
+ }
5113
+ return Object.keys(counts).length > 0 ? counts : void 0;
5114
+ }
5115
+ function groupRawSpanDurations(spans) {
5116
+ const grouped = {};
5117
+ for (const span of spans) {
5118
+ if (span.duration_ms === void 0) continue;
5119
+ const existing = grouped[span.name] ?? [];
5120
+ existing.push(span.duration_ms);
5121
+ grouped[span.name] = existing;
5122
+ }
5123
+ return Object.keys(grouped).length > 0 ? grouped : void 0;
5124
+ }
5125
+ function getTraceSummary(result) {
5126
+ const derived = buildDerivedTraceSummary(result);
5127
+ if (!derived) return void 0;
5128
+ const { spans: _spans, ...trace } = derived;
5129
+ return trace;
5130
+ }
5131
+ function getTraceSpans(result) {
5132
+ return buildDerivedTraceSummary(result)?.spans ?? [];
5133
+ }
5134
+ function toTraceSummary(result) {
5135
+ const rawTrace = getTraceSummary(result);
5136
+ if (!rawTrace) return void 0;
5137
+ return toCamelCaseDeep(rawTrace);
5138
+ }
4638
5139
  function listResultFiles(cwd, limit) {
4639
- const baseDir = path7.join(cwd, ".agentv", "results");
4640
- const rawDir = path7.join(baseDir, "raw");
5140
+ const baseDir = path6.join(cwd, ".agentv", "results");
5141
+ const rawDir = path6.join(baseDir, "raw");
4641
5142
  const files = [];
4642
5143
  try {
4643
5144
  const entries2 = readdirSync2(rawDir, { withFileTypes: true });
4644
5145
  for (const entry of entries2) {
4645
5146
  if (entry.isDirectory()) {
4646
- const jsonlPath = path7.join(rawDir, entry.name, "results.jsonl");
4647
- try {
4648
- statSync2(jsonlPath);
4649
- files.push({ filePath: jsonlPath, displayName: entry.name });
4650
- } catch {
5147
+ const primaryPath = resolveExistingRunPrimaryPath(path6.join(rawDir, entry.name));
5148
+ if (primaryPath) {
5149
+ files.push({ filePath: primaryPath, displayName: entry.name });
4651
5150
  }
4652
5151
  }
4653
5152
  }
4654
5153
  for (const entry of entries2) {
4655
5154
  if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
4656
- files.push({ filePath: path7.join(rawDir, entry.name), displayName: entry.name });
5155
+ files.push({ filePath: path6.join(rawDir, entry.name), displayName: entry.name });
4657
5156
  }
4658
5157
  }
4659
5158
  } catch {
@@ -4661,7 +5160,7 @@ function listResultFiles(cwd, limit) {
4661
5160
  try {
4662
5161
  const entries2 = readdirSync2(baseDir).filter((f) => f.endsWith(".jsonl"));
4663
5162
  for (const entry of entries2) {
4664
- files.push({ filePath: path7.join(baseDir, entry), displayName: entry });
5163
+ files.push({ filePath: path6.join(baseDir, entry), displayName: entry });
4665
5164
  }
4666
5165
  } catch {
4667
5166
  }
@@ -4729,84 +5228,65 @@ function formatScore(score) {
4729
5228
  return `${(score * 100).toFixed(0)}%`;
4730
5229
  }
4731
5230
 
4732
- // src/commands/results/export.ts
4733
- function exportResults(sourceFile, content, outputDir) {
4734
- const results = parseJsonlResults(content);
5231
+ // src/commands/results/shared.ts
5232
+ var sourceArg = positional({
5233
+ type: optional(string),
5234
+ displayName: "source",
5235
+ description: "Result file or workspace directory (defaults to most recent in .agentv/results/)"
5236
+ });
5237
+ async function resolveSourceFile(source, cwd) {
5238
+ let sourceFile;
5239
+ if (source) {
5240
+ sourceFile = resolveResultSourcePath(source, cwd);
5241
+ if (!existsSync2(sourceFile)) {
5242
+ console.error(`Error: File not found: ${sourceFile}`);
5243
+ process.exit(1);
5244
+ }
5245
+ } else {
5246
+ const cache = await loadRunCache(cwd);
5247
+ const cachedFile = cache ? resolveRunCacheFile(cache) : "";
5248
+ if (cachedFile && existsSync2(cachedFile)) {
5249
+ sourceFile = cachedFile;
5250
+ } else {
5251
+ const metas = listResultFiles(cwd, 1);
5252
+ if (metas.length === 0) {
5253
+ console.error("Error: No result files found in .agentv/results/");
5254
+ console.error("Run an evaluation first: agentv eval <eval-file>");
5255
+ process.exit(1);
5256
+ }
5257
+ sourceFile = metas[0].path;
5258
+ }
5259
+ }
5260
+ return { sourceFile };
5261
+ }
5262
+ async function loadResults(source, cwd) {
5263
+ const { sourceFile } = await resolveSourceFile(source, cwd);
5264
+ const results = loadManifestResults(sourceFile);
4735
5265
  if (results.length === 0) {
4736
- throw new Error(`No results found in ${sourceFile}`);
5266
+ console.error(`No results found in ${sourceFile}`);
5267
+ process.exit(1);
4737
5268
  }
4738
- const patched = results.map((r) => {
5269
+ return { results: patchTestIds(results), sourceFile };
5270
+ }
5271
+ function patchTestIds(results) {
5272
+ return results.map((r) => {
4739
5273
  if (!r.testId && r.evalId) {
4740
5274
  return { ...r, testId: String(r.evalId) };
4741
5275
  }
4742
5276
  return r;
4743
5277
  });
4744
- mkdirSync2(outputDir, { recursive: true });
4745
- const benchmark = buildBenchmarkArtifact(patched, sourceFile);
4746
- writeFileSync3(path8.join(outputDir, "benchmark.json"), `${JSON.stringify(benchmark, null, 2)}
4747
- `);
4748
- const timing = buildTimingArtifact(patched);
4749
- writeFileSync3(path8.join(outputDir, "timing.json"), `${JSON.stringify(timing, null, 2)}
4750
- `);
4751
- const aggregateGrading = buildAggregateGradingArtifact(patched);
4752
- writeFileSync3(
4753
- path8.join(outputDir, "grading.json"),
4754
- `${JSON.stringify(aggregateGrading, null, 2)}
4755
- `
4756
- );
4757
- const gradingDir = path8.join(outputDir, "grading");
4758
- mkdirSync2(gradingDir, { recursive: true });
4759
- for (const result of patched) {
4760
- const id = safeTestId(result);
4761
- const grading = buildGradingArtifact(result);
4762
- writeFileSync3(path8.join(gradingDir, `${id}.json`), `${JSON.stringify(grading, null, 2)}
4763
- `);
4764
- }
4765
- const outputsDir = path8.join(outputDir, "outputs");
4766
- mkdirSync2(outputsDir, { recursive: true });
4767
- for (const result of patched) {
4768
- if (result.output && result.output.length > 0) {
4769
- const id = safeTestId(result);
4770
- const md = formatOutputMarkdown(result.output);
4771
- writeFileSync3(path8.join(outputsDir, `${id}.md`), md);
4772
- }
4773
- }
4774
- const inputsDir = path8.join(outputDir, "inputs");
4775
- mkdirSync2(inputsDir, { recursive: true });
4776
- for (const result of patched) {
4777
- const id = safeTestId(result);
4778
- const input = extractInput(result);
4779
- if (input) {
4780
- writeFileSync3(path8.join(inputsDir, `${id}.md`), input);
4781
- }
4782
- }
4783
- }
4784
- function formatOutputMarkdown(output) {
4785
- return output.map((msg) => `@[${msg.role}]:
4786
- ${String(msg.content ?? "")}`).join("\n\n");
4787
- }
4788
- function extractInput(result) {
4789
- const input = result.input;
4790
- if (!input) return null;
4791
- if (typeof input === "string") return input;
4792
- if (Array.isArray(input) && input.length > 0) {
4793
- return formatOutputMarkdown(input);
4794
- }
4795
- return null;
4796
- }
4797
- function safeTestId(result) {
4798
- const raw = result.testId ?? result.evalId ?? "unknown";
4799
- return String(raw).replace(/[/\\:*?"<>|]/g, "_");
4800
5278
  }
5279
+
5280
+ // src/commands/results/export.ts
4801
5281
  function deriveOutputDir(cwd, sourceFile) {
4802
- const parentDir = path8.basename(path8.dirname(sourceFile));
5282
+ const parentDir = path7.basename(path7.dirname(sourceFile));
4803
5283
  if (parentDir.startsWith("eval_")) {
4804
5284
  const dirName2 = parentDir.slice(5);
4805
- return path8.join(cwd, ".agentv", "results", "export", dirName2);
5285
+ return path7.join(cwd, ".agentv", "results", "export", dirName2);
4806
5286
  }
4807
- const basename = path8.basename(sourceFile, ".jsonl");
5287
+ const basename = path7.basename(sourceFile, ".jsonl");
4808
5288
  const dirName = basename.startsWith("eval_") ? basename.slice(5) : basename;
4809
- return path8.join(cwd, ".agentv", "results", "export", dirName);
5289
+ return path7.join(cwd, ".agentv", "results", "export", dirName);
4810
5290
  }
4811
5291
  var resultsExportCommand = command({
4812
5292
  name: "export",
@@ -4833,28 +5313,13 @@ var resultsExportCommand = command({
4833
5313
  handler: async ({ source, out, dir }) => {
4834
5314
  const cwd = dir ?? process.cwd();
4835
5315
  try {
4836
- let sourceFile;
4837
- if (source) {
4838
- sourceFile = path8.isAbsolute(source) ? source : path8.resolve(cwd, source);
4839
- } else {
4840
- const cache = await loadRunCache(cwd);
4841
- const cachedFile = cache ? resolveRunCacheFile(cache) : "";
4842
- if (cachedFile && existsSync2(cachedFile)) {
4843
- sourceFile = cachedFile;
4844
- } else {
4845
- const metas = listResultFiles(cwd, 1);
4846
- if (metas.length === 0) {
4847
- console.error("Error: No result files found in .agentv/results/");
4848
- console.error("Run an evaluation first: agentv eval <eval-file>");
4849
- process.exit(1);
4850
- }
4851
- sourceFile = metas[0].path;
4852
- }
4853
- }
4854
- const content = readFileSync6(sourceFile, "utf8");
4855
- const outputDir = out ? path8.isAbsolute(out) ? out : path8.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
4856
- exportResults(sourceFile, content, outputDir);
4857
- const results = parseJsonlResults(content);
5316
+ const { sourceFile } = await resolveSourceFile(source, cwd);
5317
+ const { results } = await loadResults(source, cwd);
5318
+ const outputDir = out ? path7.isAbsolute(out) ? out : path7.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
5319
+ await writeArtifactsFromResults(results, outputDir, {
5320
+ evalFile: sourceFile,
5321
+ writeLegacyResults: false
5322
+ });
4858
5323
  console.log(`Exported ${results.length} test(s) to ${outputDir}`);
4859
5324
  for (const result of results) {
4860
5325
  const id = result.testId ?? result.evalId ?? "unknown";
@@ -4867,58 +5332,6 @@ var resultsExportCommand = command({
4867
5332
  }
4868
5333
  });
4869
5334
 
4870
- // src/commands/results/shared.ts
4871
- import { existsSync as existsSync3, readFileSync as readFileSync7 } from "node:fs";
4872
- import path9 from "node:path";
4873
- var sourceArg = positional({
4874
- type: optional(string),
4875
- displayName: "source",
4876
- description: "JSONL result file (defaults to most recent in .agentv/results/)"
4877
- });
4878
- async function resolveSourceFile(source, cwd) {
4879
- let sourceFile;
4880
- if (source) {
4881
- sourceFile = path9.isAbsolute(source) ? source : path9.resolve(cwd, source);
4882
- if (!existsSync3(sourceFile)) {
4883
- console.error(`Error: File not found: ${sourceFile}`);
4884
- process.exit(1);
4885
- }
4886
- } else {
4887
- const cache = await loadRunCache(cwd);
4888
- const cachedFile = cache ? resolveRunCacheFile(cache) : "";
4889
- if (cachedFile && existsSync3(cachedFile)) {
4890
- sourceFile = cachedFile;
4891
- } else {
4892
- const metas = listResultFiles(cwd, 1);
4893
- if (metas.length === 0) {
4894
- console.error("Error: No result files found in .agentv/results/");
4895
- console.error("Run an evaluation first: agentv eval <eval-file>");
4896
- process.exit(1);
4897
- }
4898
- sourceFile = metas[0].path;
4899
- }
4900
- }
4901
- const content = readFileSync7(sourceFile, "utf8");
4902
- return { sourceFile, content };
4903
- }
4904
- async function loadResults(source, cwd) {
4905
- const { sourceFile, content } = await resolveSourceFile(source, cwd);
4906
- const results = parseJsonlResults(content);
4907
- if (results.length === 0) {
4908
- console.error(`No results found in ${sourceFile}`);
4909
- process.exit(1);
4910
- }
4911
- return { results: patchTestIds(results), sourceFile };
4912
- }
4913
- function patchTestIds(results) {
4914
- return results.map((r) => {
4915
- if (!r.testId && r.evalId) {
4916
- return { ...r, testId: String(r.evalId) };
4917
- }
4918
- return r;
4919
- });
4920
- }
4921
-
4922
5335
  // src/commands/results/failures.ts
4923
5336
  function formatFailures(results) {
4924
5337
  return results.filter((r) => r.score < 1).map((r) => {
@@ -5045,7 +5458,7 @@ var resultsShowCommand = command({
5045
5458
  });
5046
5459
 
5047
5460
  // src/commands/results/summary.ts
5048
- import { existsSync as existsSync4, readFileSync as readFileSync8 } from "node:fs";
5461
+ import { existsSync as existsSync3, readFileSync as readFileSync5 } from "node:fs";
5049
5462
  function formatSummary(results, grading) {
5050
5463
  const total = results.length;
5051
5464
  let passed;
@@ -5096,9 +5509,9 @@ var resultsSummaryCommand = command({
5096
5509
  const { results, sourceFile } = await loadResults(source, cwd);
5097
5510
  let grading;
5098
5511
  const gradingPath = sourceFile.replace(/\.jsonl$/, ".grading.json");
5099
- if (existsSync4(gradingPath)) {
5512
+ if (existsSync3(gradingPath)) {
5100
5513
  try {
5101
- grading = JSON.parse(readFileSync8(gradingPath, "utf8"));
5514
+ grading = JSON.parse(readFileSync5(gradingPath, "utf8"));
5102
5515
  } catch {
5103
5516
  }
5104
5517
  }
@@ -5123,68 +5536,26 @@ var resultsCommand = subcommands({
5123
5536
  });
5124
5537
 
5125
5538
  // src/commands/results/serve.ts
5126
- import { existsSync as existsSync5, readFileSync as readFileSync9, writeFileSync as writeFileSync4 } from "node:fs";
5127
- import path10 from "node:path";
5539
+ import { existsSync as existsSync4, readFileSync as readFileSync6, writeFileSync as writeFileSync3 } from "node:fs";
5540
+ import path8 from "node:path";
5128
5541
  import { Hono } from "hono";
5129
- async function resolveSourceFile2(source, cwd) {
5130
- if (source) {
5131
- const resolved = path10.isAbsolute(source) ? source : path10.resolve(cwd, source);
5132
- if (!existsSync5(resolved)) {
5133
- throw new Error(`Source file not found: ${resolved}`);
5134
- }
5135
- return resolved;
5136
- }
5137
- const cache = await loadRunCache(cwd);
5138
- const cachedFile = cache ? resolveRunCacheFile(cache) : "";
5139
- if (cachedFile && existsSync5(cachedFile)) {
5140
- return cachedFile;
5141
- }
5142
- const metas = listResultFiles(cwd, 10);
5143
- if (metas.length === 0) {
5144
- throw new Error(
5145
- "No result files found in .agentv/results/\nRun an evaluation first: agentv eval <eval-file>"
5146
- );
5147
- }
5148
- if (metas.length > 1) {
5149
- console.log("Available result files:");
5150
- for (const m of metas) {
5151
- console.log(` ${m.path}`);
5152
- }
5153
- console.log(`
5154
- Serving most recent: ${metas[0].path}
5155
- `);
5156
- }
5157
- return metas[0].path;
5158
- }
5159
- function loadResults2(content) {
5160
- const results = parseJsonlResults(content);
5161
- if (results.length === 0) {
5162
- throw new Error("No valid results found in JSONL content");
5163
- }
5164
- return results.map((r) => {
5165
- if (!r.testId && r.evalId) {
5166
- return { ...r, testId: String(r.evalId) };
5167
- }
5168
- return r;
5169
- });
5170
- }
5171
5542
  function feedbackPath(cwd) {
5172
- return path10.join(cwd, "feedback.json");
5543
+ return path8.join(cwd, "feedback.json");
5173
5544
  }
5174
5545
  function readFeedback(cwd) {
5175
5546
  const fp = feedbackPath(cwd);
5176
- if (!existsSync5(fp)) {
5547
+ if (!existsSync4(fp)) {
5177
5548
  return { reviews: [] };
5178
5549
  }
5179
5550
  try {
5180
- return JSON.parse(readFileSync9(fp, "utf8"));
5551
+ return JSON.parse(readFileSync6(fp, "utf8"));
5181
5552
  } catch (err2) {
5182
5553
  console.error(`Warning: could not parse ${fp}, starting fresh: ${err2.message}`);
5183
5554
  return { reviews: [] };
5184
5555
  }
5185
5556
  }
5186
5557
  function writeFeedback(cwd, data) {
5187
- writeFileSync4(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
5558
+ writeFileSync3(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
5188
5559
  `, "utf8");
5189
5560
  }
5190
5561
  function createApp(results, cwd) {
@@ -5854,9 +6225,7 @@ var resultsServeCommand = command({
5854
6225
  const cwd = dir ?? process.cwd();
5855
6226
  const listenPort = port ?? 3117;
5856
6227
  try {
5857
- const sourceFile = await resolveSourceFile2(source, cwd);
5858
- const content = readFileSync9(sourceFile, "utf8");
5859
- const results = loadResults2(content);
6228
+ const { results, sourceFile } = await loadResults(source, cwd);
5860
6229
  const app2 = createApp(results, cwd);
5861
6230
  console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
5862
6231
  console.log(`Dashboard: http://localhost:${listenPort}`);
@@ -5889,7 +6258,7 @@ function detectPackageManager() {
5889
6258
  return detectPackageManagerFromPath(process.argv[1] ?? "");
5890
6259
  }
5891
6260
  function runCommand(cmd, args) {
5892
- return new Promise((resolve, reject) => {
6261
+ return new Promise((resolve2, reject) => {
5893
6262
  const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
5894
6263
  let stdout = "";
5895
6264
  child.stdout?.on("data", (data) => {
@@ -5897,7 +6266,7 @@ function runCommand(cmd, args) {
5897
6266
  stdout += data.toString();
5898
6267
  });
5899
6268
  child.on("error", reject);
5900
- child.on("close", (code) => resolve({ exitCode: code ?? 1, stdout }));
6269
+ child.on("close", (code) => resolve2({ exitCode: code ?? 1, stdout }));
5901
6270
  });
5902
6271
  }
5903
6272
  var updateCommand = command({
@@ -6109,10 +6478,6 @@ function parseAssertSpec(spec) {
6109
6478
  );
6110
6479
  }
6111
6480
  }
6112
- function toTraceSummary(raw) {
6113
- if (!raw.trace) return void 0;
6114
- return toCamelCaseDeep(raw.trace);
6115
- }
6116
6481
  function extractCandidate(raw) {
6117
6482
  if (raw.output !== void 0)
6118
6483
  return typeof raw.output === "string" ? raw.output : JSON.stringify(raw.output);
@@ -6224,8 +6589,8 @@ var traceScoreCommand = command({
6224
6589
  args: {
6225
6590
  file: positional({
6226
6591
  type: string,
6227
- displayName: "result-file",
6228
- description: "Path to JSONL result file"
6592
+ displayName: "trace-source",
6593
+ description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
6229
6594
  }),
6230
6595
  assert: option({
6231
6596
  type: string,
@@ -6271,11 +6636,11 @@ var traceScoreCommand = command({
6271
6636
  );
6272
6637
  if (traceRequired) {
6273
6638
  const hasTrace = results.some(
6274
- (r) => r.trace || r.cost_usd !== void 0 || r.duration_ms !== void 0 || r.token_usage !== void 0
6639
+ (r) => toTraceSummary(r) || r.cost_usd !== void 0 || r.duration_ms !== void 0 || r.token_usage !== void 0
6275
6640
  );
6276
6641
  if (!hasTrace) {
6277
6642
  console.error(
6278
- `${c2.red}Error:${c2.reset} Result file lacks trace data. Re-run eval with ${c2.bold}--trace${c2.reset} to capture trace summaries.`
6643
+ `${c2.red}Error:${c2.reset} Source lacks trace metrics. Export a trace file with ${c2.bold}--trace-file${c2.reset} or ${c2.bold}--otel-file${c2.reset}.`
6279
6644
  );
6280
6645
  process.exit(1);
6281
6646
  }
@@ -6308,7 +6673,7 @@ var traceScoreCommand = command({
6308
6673
 
6309
6674
  // src/commands/trace/show.ts
6310
6675
  function renderFlatTrace(result) {
6311
- const trace = result.trace;
6676
+ const trace = getTraceSummary(result);
6312
6677
  const parts = [];
6313
6678
  if (trace?.tool_calls && Object.keys(trace.tool_calls).length > 0) {
6314
6679
  const toolParts = Object.entries(trace.tool_calls).map(([name, count]) => {
@@ -6339,8 +6704,12 @@ function renderScores(scores) {
6339
6704
  }
6340
6705
  function renderTree(result) {
6341
6706
  const messages = result.output;
6707
+ const spans = getTraceSpans(result);
6342
6708
  if (!messages || messages.length === 0) {
6343
- if (result.trace || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
6709
+ if (spans.length > 0) {
6710
+ return renderSpanTree(result, spans);
6711
+ }
6712
+ if (getTraceSummary(result) || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
6344
6713
  return renderFlatTrace(result);
6345
6714
  }
6346
6715
  return `${c2.dim}No trace data available${c2.reset}`;
@@ -6406,6 +6775,30 @@ function renderTree(result) {
6406
6775
  }
6407
6776
  return lines.join("\n");
6408
6777
  }
6778
+ function renderSpanTree(result, spans) {
6779
+ const lines = [];
6780
+ const testId = result.test_id ?? result.eval_id ?? "unknown";
6781
+ const totalTokens = result.token_usage ? result.token_usage.input + result.token_usage.output : void 0;
6782
+ const rootParts = [testId];
6783
+ if (result.duration_ms !== void 0) rootParts.push(formatDuration(result.duration_ms));
6784
+ if (totalTokens !== void 0) rootParts.push(`${formatNumber(totalTokens)} tok`);
6785
+ if (result.cost_usd !== void 0) rootParts.push(formatCost(result.cost_usd));
6786
+ lines.push(`${c2.bold}${rootParts.join(", ")}${c2.reset}`);
6787
+ spans.forEach((span, index) => {
6788
+ const connector = index === spans.length - 1 ? "\u2514\u2500" : "\u251C\u2500";
6789
+ const color = span.type === "llm" ? c2.cyan : c2.yellow;
6790
+ const parts = [`${color}${span.name}${c2.reset}`];
6791
+ if (span.duration_ms !== void 0) {
6792
+ parts.push(formatDuration(span.duration_ms));
6793
+ }
6794
+ lines.push(`${connector} ${parts.join(", ")}`);
6795
+ });
6796
+ if (result.scores && result.scores.length > 0) {
6797
+ lines.push("");
6798
+ lines.push(`${c2.dim}Scores:${c2.reset} ${renderScores(result.scores)}`);
6799
+ }
6800
+ return lines.join("\n");
6801
+ }
6409
6802
  function formatResultDetail(result, index, tree) {
6410
6803
  const lines = [];
6411
6804
  const testId = result.test_id ?? result.eval_id ?? `result-${index}`;
@@ -6489,8 +6882,8 @@ var traceShowCommand = command({
6489
6882
  args: {
6490
6883
  file: positional({
6491
6884
  type: string,
6492
- displayName: "result-file",
6493
- description: "Path to JSONL result file"
6885
+ displayName: "trace-source",
6886
+ description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
6494
6887
  }),
6495
6888
  testId: option({
6496
6889
  type: optional(string),
@@ -6499,7 +6892,7 @@ var traceShowCommand = command({
6499
6892
  }),
6500
6893
  tree: flag({
6501
6894
  long: "tree",
6502
- description: "Show hierarchical trace tree (requires results with --trace output)"
6895
+ description: "Show hierarchical trace tree from output messages or exported trace spans"
6503
6896
  }),
6504
6897
  format: option({
6505
6898
  type: optional(oneOf(["table", "json"])),
@@ -6570,11 +6963,11 @@ function collectMetrics(results) {
6570
6963
  formatter: (n) => formatNumber(Math.round(n))
6571
6964
  });
6572
6965
  }
6573
- const toolCalls = results.map((r) => r.trace?.event_count).filter((v) => v !== void 0);
6966
+ const toolCalls = results.map((r) => getTraceSummary(r)?.event_count).filter((v) => v !== void 0);
6574
6967
  if (toolCalls.length > 0) {
6575
6968
  rows.push({ name: "tool_calls", values: toolCalls, formatter: (n) => String(Math.round(n)) });
6576
6969
  }
6577
- const llmCalls = results.map((r) => r.trace?.llm_call_count).filter((v) => v !== void 0);
6970
+ const llmCalls = results.map((r) => getTraceSummary(r)?.llm_call_count).filter((v) => v !== void 0);
6578
6971
  if (llmCalls.length > 0) {
6579
6972
  rows.push({ name: "llm_calls", values: llmCalls, formatter: (n) => String(Math.round(n)) });
6580
6973
  }
@@ -6668,8 +7061,8 @@ var traceStatsCommand = command({
6668
7061
  args: {
6669
7062
  file: positional({
6670
7063
  type: string,
6671
- displayName: "result-file",
6672
- description: "Path to JSONL result file"
7064
+ displayName: "trace-source",
7065
+ description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
6673
7066
  }),
6674
7067
  groupBy: option({
6675
7068
  type: optional(oneOf(["target", "eval-set", "test-id"])),
@@ -6719,8 +7112,8 @@ var traceCommand = subcommands({
6719
7112
  });
6720
7113
 
6721
7114
  // src/commands/transpile/index.ts
6722
- import { writeFileSync as writeFileSync5 } from "node:fs";
6723
- import path11 from "node:path";
7115
+ import { writeFileSync as writeFileSync4 } from "node:fs";
7116
+ import path9 from "node:path";
6724
7117
  var transpileCommand = command({
6725
7118
  name: "transpile",
6726
7119
  description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
@@ -6744,7 +7137,7 @@ var transpileCommand = command({
6744
7137
  handler: async ({ input, outDir, stdout }) => {
6745
7138
  let result;
6746
7139
  try {
6747
- result = transpileEvalYamlFile(path11.resolve(input));
7140
+ result = transpileEvalYamlFile(path9.resolve(input));
6748
7141
  } catch (error) {
6749
7142
  console.error(`Error: ${error.message}`);
6750
7143
  process.exit(1);
@@ -6768,12 +7161,12 @@ var transpileCommand = command({
6768
7161
  process.stdout.write("\n");
6769
7162
  return;
6770
7163
  }
6771
- const outputDir = outDir ? path11.resolve(outDir) : path11.dirname(path11.resolve(input));
7164
+ const outputDir = outDir ? path9.resolve(outDir) : path9.dirname(path9.resolve(input));
6772
7165
  const fileNames = getOutputFilenames(result);
6773
7166
  for (const [skill, evalsJson] of result.files) {
6774
7167
  const fileName = fileNames.get(skill) ?? "evals.json";
6775
- const outputPath = path11.join(outputDir, fileName);
6776
- writeFileSync5(outputPath, `${JSON.stringify(evalsJson, null, 2)}
7168
+ const outputPath = path9.join(outputDir, fileName);
7169
+ writeFileSync4(outputPath, `${JSON.stringify(evalsJson, null, 2)}
6777
7170
  `);
6778
7171
  console.log(`Transpiled to ${outputPath}`);
6779
7172
  }
@@ -6781,7 +7174,7 @@ var transpileCommand = command({
6781
7174
  });
6782
7175
 
6783
7176
  // src/commands/trim/index.ts
6784
- import { readFileSync as readFileSync10, writeFileSync as writeFileSync6 } from "node:fs";
7177
+ import { readFileSync as readFileSync7, writeFileSync as writeFileSync5 } from "node:fs";
6785
7178
  var trimCommand = command({
6786
7179
  name: "trim",
6787
7180
  description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
@@ -6800,7 +7193,7 @@ var trimCommand = command({
6800
7193
  },
6801
7194
  handler: async ({ input, out }) => {
6802
7195
  try {
6803
- const content = readFileSync10(input, "utf8");
7196
+ const content = readFileSync7(input, "utf8");
6804
7197
  const lines = content.trim().split("\n").filter((line) => line.trim());
6805
7198
  const trimmedLines = lines.map((line) => {
6806
7199
  const record = JSON.parse(line);
@@ -6812,7 +7205,7 @@ var trimCommand = command({
6812
7205
  const output = `${trimmedLines.join("\n")}
6813
7206
  `;
6814
7207
  if (out) {
6815
- writeFileSync6(out, output, "utf8");
7208
+ writeFileSync5(out, output, "utf8");
6816
7209
  console.error(`Trimmed ${lines.length} record(s) \u2192 ${out}`);
6817
7210
  } else {
6818
7211
  process.stdout.write(output);
@@ -6906,8 +7299,8 @@ function isTTY() {
6906
7299
 
6907
7300
  // src/commands/validate/validate-files.ts
6908
7301
  import { constants } from "node:fs";
6909
- import { access, readdir, stat } from "node:fs/promises";
6910
- import path12 from "node:path";
7302
+ import { access, readdir as readdir3, stat } from "node:fs/promises";
7303
+ import path10 from "node:path";
6911
7304
  async function validateFiles(paths) {
6912
7305
  const filePaths = await expandPaths(paths);
6913
7306
  const results = [];
@@ -6925,7 +7318,7 @@ async function validateFiles(paths) {
6925
7318
  };
6926
7319
  }
6927
7320
  async function validateSingleFile(filePath) {
6928
- const absolutePath = path12.resolve(filePath);
7321
+ const absolutePath = path10.resolve(filePath);
6929
7322
  const fileType = await detectFileType(absolutePath);
6930
7323
  let result;
6931
7324
  if (fileType === "eval") {
@@ -6950,7 +7343,7 @@ async function validateSingleFile(filePath) {
6950
7343
  async function expandPaths(paths) {
6951
7344
  const expanded = [];
6952
7345
  for (const inputPath of paths) {
6953
- const absolutePath = path12.resolve(inputPath);
7346
+ const absolutePath = path10.resolve(inputPath);
6954
7347
  try {
6955
7348
  await access(absolutePath, constants.F_OK);
6956
7349
  } catch {
@@ -6972,9 +7365,9 @@ async function expandPaths(paths) {
6972
7365
  async function findYamlFiles(dirPath) {
6973
7366
  const results = [];
6974
7367
  try {
6975
- const entries2 = await readdir(dirPath, { withFileTypes: true });
7368
+ const entries2 = await readdir3(dirPath, { withFileTypes: true });
6976
7369
  for (const entry of entries2) {
6977
- const fullPath = path12.join(dirPath, entry.name);
7370
+ const fullPath = path10.join(dirPath, entry.name);
6978
7371
  if (entry.isDirectory()) {
6979
7372
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
6980
7373
  continue;
@@ -6991,7 +7384,7 @@ async function findYamlFiles(dirPath) {
6991
7384
  return results;
6992
7385
  }
6993
7386
  function isYamlFile(filePath) {
6994
- const ext = path12.extname(filePath).toLowerCase();
7387
+ const ext = path10.extname(filePath).toLowerCase();
6995
7388
  return ext === ".yaml" || ext === ".yml";
6996
7389
  }
6997
7390
 
@@ -7029,14 +7422,14 @@ var validateCommand = command({
7029
7422
  });
7030
7423
 
7031
7424
  // src/commands/workspace/clean.ts
7032
- import { existsSync as existsSync6 } from "node:fs";
7033
- import { readFile as readFile2, readdir as readdir2, rm } from "node:fs/promises";
7034
- import path13 from "node:path";
7425
+ import { existsSync as existsSync5 } from "node:fs";
7426
+ import { readFile as readFile4, readdir as readdir4, rm } from "node:fs/promises";
7427
+ import path11 from "node:path";
7035
7428
  async function confirm(message) {
7036
7429
  const readline2 = await import("node:readline");
7037
7430
  const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
7038
- const answer = await new Promise((resolve) => {
7039
- rl.question(`${message} [y/N] `, resolve);
7431
+ const answer = await new Promise((resolve2) => {
7432
+ rl.question(`${message} [y/N] `, resolve2);
7040
7433
  });
7041
7434
  rl.close();
7042
7435
  return answer.toLowerCase() === "y";
@@ -7058,19 +7451,19 @@ var cleanCommand = command({
7058
7451
  },
7059
7452
  handler: async ({ repo, force }) => {
7060
7453
  const poolRoot = getWorkspacePoolRoot();
7061
- if (!existsSync6(poolRoot)) {
7454
+ if (!existsSync5(poolRoot)) {
7062
7455
  console.log("No workspace pool entries found.");
7063
7456
  return;
7064
7457
  }
7065
7458
  if (repo) {
7066
- const entries2 = await readdir2(poolRoot, { withFileTypes: true });
7459
+ const entries2 = await readdir4(poolRoot, { withFileTypes: true });
7067
7460
  const poolDirs = entries2.filter((e) => e.isDirectory());
7068
7461
  const matchingDirs = [];
7069
7462
  for (const dir of poolDirs) {
7070
- const poolDir = path13.join(poolRoot, dir.name);
7071
- const metadataPath = path13.join(poolDir, "metadata.json");
7463
+ const poolDir = path11.join(poolRoot, dir.name);
7464
+ const metadataPath = path11.join(poolDir, "metadata.json");
7072
7465
  try {
7073
- const raw = await readFile2(metadataPath, "utf-8");
7466
+ const raw = await readFile4(metadataPath, "utf-8");
7074
7467
  const metadata = JSON.parse(raw);
7075
7468
  const hasRepo = metadata.repos?.some((r) => {
7076
7469
  if (r.source.type === "git" && r.source.url) {
@@ -7099,7 +7492,7 @@ var cleanCommand = command({
7099
7492
  }
7100
7493
  for (const dir of matchingDirs) {
7101
7494
  await rm(dir, { recursive: true, force: true });
7102
- console.log(`Removed: ${path13.basename(dir).slice(0, 12)}...`);
7495
+ console.log(`Removed: ${path11.basename(dir).slice(0, 12)}...`);
7103
7496
  }
7104
7497
  console.log("Done.");
7105
7498
  } else {
@@ -7117,15 +7510,15 @@ var cleanCommand = command({
7117
7510
  });
7118
7511
 
7119
7512
  // src/commands/workspace/list.ts
7120
- import { existsSync as existsSync7 } from "node:fs";
7121
- import { readFile as readFile3, readdir as readdir3, stat as stat2 } from "node:fs/promises";
7122
- import path14 from "node:path";
7513
+ import { existsSync as existsSync6 } from "node:fs";
7514
+ import { readFile as readFile5, readdir as readdir5, stat as stat2 } from "node:fs/promises";
7515
+ import path12 from "node:path";
7123
7516
  async function getDirectorySize(dirPath) {
7124
7517
  let totalSize = 0;
7125
7518
  try {
7126
- const entries2 = await readdir3(dirPath, { withFileTypes: true });
7519
+ const entries2 = await readdir5(dirPath, { withFileTypes: true });
7127
7520
  for (const entry of entries2) {
7128
- const fullPath = path14.join(dirPath, entry.name);
7521
+ const fullPath = path12.join(dirPath, entry.name);
7129
7522
  if (entry.isDirectory()) {
7130
7523
  totalSize += await getDirectorySize(fullPath);
7131
7524
  } else {
@@ -7149,25 +7542,25 @@ var listCommand = command({
7149
7542
  args: {},
7150
7543
  handler: async () => {
7151
7544
  const poolRoot = getWorkspacePoolRoot();
7152
- if (!existsSync7(poolRoot)) {
7545
+ if (!existsSync6(poolRoot)) {
7153
7546
  console.log("No workspace pool entries found.");
7154
7547
  return;
7155
7548
  }
7156
- const entries2 = await readdir3(poolRoot, { withFileTypes: true });
7549
+ const entries2 = await readdir5(poolRoot, { withFileTypes: true });
7157
7550
  const poolDirs = entries2.filter((e) => e.isDirectory());
7158
7551
  if (poolDirs.length === 0) {
7159
7552
  console.log("No workspace pool entries found.");
7160
7553
  return;
7161
7554
  }
7162
7555
  for (const dir of poolDirs) {
7163
- const poolDir = path14.join(poolRoot, dir.name);
7556
+ const poolDir = path12.join(poolRoot, dir.name);
7164
7557
  const fingerprint = dir.name;
7165
- const poolEntries = await readdir3(poolDir, { withFileTypes: true });
7558
+ const poolEntries = await readdir5(poolDir, { withFileTypes: true });
7166
7559
  const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
7167
- const metadataPath = path14.join(poolDir, "metadata.json");
7560
+ const metadataPath = path12.join(poolDir, "metadata.json");
7168
7561
  let metadata = null;
7169
7562
  try {
7170
- const raw = await readFile3(metadataPath, "utf-8");
7563
+ const raw = await readFile5(metadataPath, "utf-8");
7171
7564
  metadata = JSON.parse(raw);
7172
7565
  } catch {
7173
7566
  }
@@ -7204,16 +7597,16 @@ var workspaceCommand = subcommands({
7204
7597
 
7205
7598
  // src/update-check.ts
7206
7599
  import { spawn as spawn2 } from "node:child_process";
7207
- import { readFile as readFile4 } from "node:fs/promises";
7208
- import { join } from "node:path";
7600
+ import { readFile as readFile6 } from "node:fs/promises";
7601
+ import { join as join4 } from "node:path";
7209
7602
  var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
7210
7603
  var AGENTV_DIR = getAgentvHome();
7211
7604
  var CACHE_FILE = "version-check.json";
7212
7605
  var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
7213
- async function getCachedUpdateInfo(path15) {
7214
- const filePath = path15 ?? join(AGENTV_DIR, CACHE_FILE);
7606
+ async function getCachedUpdateInfo(path13) {
7607
+ const filePath = path13 ?? join4(AGENTV_DIR, CACHE_FILE);
7215
7608
  try {
7216
- const raw = await readFile4(filePath, "utf-8");
7609
+ const raw = await readFile6(filePath, "utf-8");
7217
7610
  const data = JSON.parse(raw);
7218
7611
  if (typeof data.latestVersion === "string" && typeof data.lastCheckedAt === "string") {
7219
7612
  return data;
@@ -7245,7 +7638,7 @@ function buildNotice(currentVersion, latestVersion) {
7245
7638
  }
7246
7639
  function backgroundUpdateCheck() {
7247
7640
  const dir = AGENTV_DIR;
7248
- const filePath = join(dir, CACHE_FILE);
7641
+ const filePath = join4(dir, CACHE_FILE);
7249
7642
  const script = `
7250
7643
  const https = require('https');
7251
7644
  const fs = require('fs');
@@ -7299,8 +7692,8 @@ var app = subcommands({
7299
7692
  compare: compareCommand,
7300
7693
  convert: convertCommand,
7301
7694
  create: createCommand,
7302
- generate: generateCommand,
7303
7695
  init: initCmdTsCommand,
7696
+ pipeline: pipelineCommand,
7304
7697
  results: resultsCommand,
7305
7698
  self: selfCommand,
7306
7699
  serve: resultsServeCommand,
@@ -7317,8 +7710,8 @@ var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
7317
7710
  "compare",
7318
7711
  "convert",
7319
7712
  "create",
7320
- "generate",
7321
7713
  "init",
7714
+ "pipeline",
7322
7715
  "results",
7323
7716
  "self",
7324
7717
  "serve",
@@ -7368,4 +7761,4 @@ export {
7368
7761
  preprocessArgv,
7369
7762
  runCli
7370
7763
  };
7371
- //# sourceMappingURL=chunk-V2S5CZU3.js.map
7764
+ //# sourceMappingURL=chunk-2ELQ6F3C.js.map