agentv 3.11.1 → 3.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +15 -12
  2. package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} +1 -2
  3. package/dist/{chunk-CKMAM2GD.js → chunk-6H4IAXQH.js} +435 -198
  4. package/dist/chunk-6H4IAXQH.js.map +1 -0
  5. package/dist/{chunk-OYD2NB55.js → chunk-7OHZAFND.js} +120 -29
  6. package/dist/chunk-7OHZAFND.js.map +1 -0
  7. package/dist/{chunk-V2S5CZU3.js → chunk-DJU4C6NS.js} +914 -529
  8. package/dist/chunk-DJU4C6NS.js.map +1 -0
  9. package/dist/{chunk-BAUNAXHT.js → chunk-XOSNETAV.js} +1 -1
  10. package/dist/cli.js +4 -6
  11. package/dist/cli.js.map +1 -1
  12. package/dist/{dist-VUPMLHIV.js → dist-SMKOBBFB.js} +3 -8
  13. package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js → esm-5Q4BZALM-5REQWAUV.js} +2 -3
  14. package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js.map → esm-5Q4BZALM-5REQWAUV.js.map} +1 -1
  15. package/dist/{esm-UYZ3HJBU.js → esm-CZAWIY6F.js} +2 -2
  16. package/dist/index.js +4 -6
  17. package/dist/{interactive-FZJANO4A.js → interactive-RV664PCR.js} +4 -6
  18. package/dist/{interactive-FZJANO4A.js.map → interactive-RV664PCR.js.map} +1 -1
  19. package/dist/{otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js} +1 -2
  20. package/dist/{src-PXDA7QIS.js → src-ML4D2MC2.js} +2 -2
  21. package/package.json +1 -1
  22. package/dist/chunk-2QFWRIYL.js +0 -186
  23. package/dist/chunk-2QFWRIYL.js.map +0 -1
  24. package/dist/chunk-2RMPO6LY.js +0 -747
  25. package/dist/chunk-2RMPO6LY.js.map +0 -1
  26. package/dist/chunk-3Q7WIXT4.js +0 -4846
  27. package/dist/chunk-3Q7WIXT4.js.map +0 -1
  28. package/dist/chunk-73O2DCJP.js +0 -1274
  29. package/dist/chunk-73O2DCJP.js.map +0 -1
  30. package/dist/chunk-AUKF3Y3W.js +0 -212
  31. package/dist/chunk-AUKF3Y3W.js.map +0 -1
  32. package/dist/chunk-BRH7SIDP.js +0 -133
  33. package/dist/chunk-BRH7SIDP.js.map +0 -1
  34. package/dist/chunk-BXM4I3BM.js +0 -526
  35. package/dist/chunk-BXM4I3BM.js.map +0 -1
  36. package/dist/chunk-CKMAM2GD.js.map +0 -1
  37. package/dist/chunk-FHTURHTY.js +0 -546
  38. package/dist/chunk-FHTURHTY.js.map +0 -1
  39. package/dist/chunk-GJFXQQWG.js +0 -21
  40. package/dist/chunk-GJFXQQWG.js.map +0 -1
  41. package/dist/chunk-HKMLG4KF.js +0 -38
  42. package/dist/chunk-HKMLG4KF.js.map +0 -1
  43. package/dist/chunk-JGU3PVA4.js +0 -133
  44. package/dist/chunk-JGU3PVA4.js.map +0 -1
  45. package/dist/chunk-JK6V4KVD.js +0 -114
  46. package/dist/chunk-JK6V4KVD.js.map +0 -1
  47. package/dist/chunk-LHU5FGVZ.js +0 -4804
  48. package/dist/chunk-LHU5FGVZ.js.map +0 -1
  49. package/dist/chunk-OL2WGI6E.js +0 -149
  50. package/dist/chunk-OL2WGI6E.js.map +0 -1
  51. package/dist/chunk-ONETZL6N.js +0 -15
  52. package/dist/chunk-ONETZL6N.js.map +0 -1
  53. package/dist/chunk-OYD2NB55.js.map +0 -1
  54. package/dist/chunk-QV4UGEN6.js +0 -320
  55. package/dist/chunk-QV4UGEN6.js.map +0 -1
  56. package/dist/chunk-QXLDKGF3.js +0 -46
  57. package/dist/chunk-QXLDKGF3.js.map +0 -1
  58. package/dist/chunk-U6VEM66A.js +0 -63
  59. package/dist/chunk-U6VEM66A.js.map +0 -1
  60. package/dist/chunk-UALXHIMX.js +0 -48
  61. package/dist/chunk-UALXHIMX.js.map +0 -1
  62. package/dist/chunk-UGXG73VF.js +0 -55
  63. package/dist/chunk-UGXG73VF.js.map +0 -1
  64. package/dist/chunk-UHP5KEDL.js +0 -38
  65. package/dist/chunk-UHP5KEDL.js.map +0 -1
  66. package/dist/chunk-V2S5CZU3.js.map +0 -1
  67. package/dist/chunk-WVSXFZWP.js +0 -204
  68. package/dist/chunk-WVSXFZWP.js.map +0 -1
  69. package/dist/chunk-XSUMCWKO.js +0 -30
  70. package/dist/chunk-XSUMCWKO.js.map +0 -1
  71. package/dist/chunk-XUO7ZEHU.js +0 -181
  72. package/dist/chunk-XUO7ZEHU.js.map +0 -1
  73. package/dist/chunk-YSGUX5JT.js +0 -1002
  74. package/dist/chunk-YSGUX5JT.js.map +0 -1
  75. package/dist/dist-3PCP5TNF-RYMVLILE.js +0 -25785
  76. package/dist/dist-3PCP5TNF-RYMVLILE.js.map +0 -1
  77. package/dist/dist-BOIN5LC5-T5UWUK43.js +0 -76113
  78. package/dist/dist-BOIN5LC5-T5UWUK43.js.map +0 -1
  79. package/dist/dist-LXPDQOBI-4V5J2WDS.js +0 -13
  80. package/dist/dist-LXPDQOBI-4V5J2WDS.js.map +0 -1
  81. package/dist/dist-es-4WSJUIYR-XKIX65IH.js +0 -69
  82. package/dist/dist-es-4WSJUIYR-XKIX65IH.js.map +0 -1
  83. package/dist/dist-es-7K7MKRME-CCMAZOQC.js +0 -355
  84. package/dist/dist-es-7K7MKRME-CCMAZOQC.js.map +0 -1
  85. package/dist/dist-es-B2RTOKRI-VWZHK5RE.js +0 -191
  86. package/dist/dist-es-B2RTOKRI-VWZHK5RE.js.map +0 -1
  87. package/dist/dist-es-HHZ4FAXA-CRERHWKB.js +0 -164
  88. package/dist/dist-es-HHZ4FAXA-CRERHWKB.js.map +0 -1
  89. package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js +0 -355
  90. package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js.map +0 -1
  91. package/dist/dist-es-L6R4FPI5-IKIRYN45.js +0 -472
  92. package/dist/dist-es-L6R4FPI5-IKIRYN45.js.map +0 -1
  93. package/dist/dist-es-SRVEB5QV-Q4CTC2HX.js +0 -24
  94. package/dist/dist-es-TRIVUKV4-2J47CDXR.js +0 -85
  95. package/dist/dist-es-TRIVUKV4-2J47CDXR.js.map +0 -1
  96. package/dist/dist-es-UEEUAV34-IZQDTAMW.js +0 -16
  97. package/dist/esm-UYZ3HJBU.js.map +0 -1
  98. package/dist/event-streams-NZADSH5J-6MOSNEV3.js +0 -247
  99. package/dist/event-streams-NZADSH5J-6MOSNEV3.js.map +0 -1
  100. package/dist/loadSso-IQZ5NB6C-DZJTORO3.js +0 -738
  101. package/dist/loadSso-IQZ5NB6C-DZJTORO3.js.map +0 -1
  102. package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js +0 -387
  103. package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js.map +0 -1
  104. package/dist/otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js.map +0 -1
  105. package/dist/signin-2ANR4DVS-K5VGBEJF.js +0 -556
  106. package/dist/signin-2ANR4DVS-K5VGBEJF.js.map +0 -1
  107. package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js +0 -10
  108. package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js.map +0 -1
  109. package/dist/src-SLOMUG7K-CV5JG263.js +0 -1408
  110. package/dist/src-SLOMUG7K-CV5JG263.js.map +0 -1
  111. package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js +0 -708
  112. package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js.map +0 -1
  113. package/dist/sts-X7JGSP4H-PDAAYDDH.js +0 -2917
  114. package/dist/sts-X7JGSP4H-PDAAYDDH.js.map +0 -1
  115. package/dist/undici-VAR2VUJI-6PAOUXZC.js +0 -23388
  116. package/dist/undici-VAR2VUJI-6PAOUXZC.js.map +0 -1
  117. /package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js.map → agentv-provider-NFFLXG5M-TJAWCWCX.js.map} +0 -0
  118. /package/dist/{chunk-BAUNAXHT.js.map → chunk-XOSNETAV.js.map} +0 -0
  119. /package/dist/{dist-VUPMLHIV.js.map → dist-SMKOBBFB.js.map} +0 -0
  120. /package/dist/{dist-es-SRVEB5QV-Q4CTC2HX.js.map → esm-CZAWIY6F.js.map} +0 -0
  121. /package/dist/{dist-es-UEEUAV34-IZQDTAMW.js.map → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js.map} +0 -0
  122. /package/dist/{src-PXDA7QIS.js.map → src-ML4D2MC2.js.map} +0 -0
@@ -1,35 +1,36 @@
1
1
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
2
  import {
3
3
  HtmlWriter,
4
- buildAggregateGradingArtifact,
5
- buildBenchmarkArtifact,
6
- buildGradingArtifact,
7
- buildTimingArtifact,
4
+ RESULT_INDEX_FILENAME,
8
5
  detectFileType,
9
6
  findRepoRoot,
7
+ loadLightweightResults,
8
+ loadManifestResults,
10
9
  loadRunCache,
11
10
  package_default,
12
- parseJsonlResults,
13
11
  resolveEvalPaths,
12
+ resolveExistingRunPrimaryPath,
13
+ resolveResultSourcePath,
14
14
  resolveRunCacheFile,
15
+ resolveWorkspaceOrFilePath,
15
16
  runEvalCommand,
16
17
  selectTarget,
17
18
  toSnakeCaseDeep,
18
19
  validateConfigFile,
19
20
  validateEvalFile,
20
21
  validateFileReferences,
21
- validateTargetsFile
22
- } from "./chunk-CKMAM2GD.js";
22
+ validateTargetsFile,
23
+ writeArtifactsFromResults
24
+ } from "./chunk-6H4IAXQH.js";
23
25
  import {
24
26
  createBuiltinRegistry,
25
- createProvider,
26
27
  executeScript,
27
- generateRubrics,
28
28
  getAgentvHome,
29
29
  getOutputFilenames,
30
30
  getWorkspacePoolRoot,
31
31
  isAgentSkillsFormat,
32
32
  loadTestById,
33
+ loadTestSuite,
33
34
  loadTests,
34
35
  normalizeLineEndings,
35
36
  parseAgentSkillsEvals,
@@ -37,7 +38,7 @@ import {
37
38
  toSnakeCaseDeep as toSnakeCaseDeep2,
38
39
  transpileEvalYamlFile,
39
40
  trimBaselineResult
40
- } from "./chunk-OYD2NB55.js";
41
+ } from "./chunk-7OHZAFND.js";
41
42
  import {
42
43
  __commonJS,
43
44
  __esm,
@@ -2888,7 +2889,6 @@ function oneOf(literals) {
2888
2889
  }
2889
2890
 
2890
2891
  // src/commands/compare/index.ts
2891
- import { readFileSync } from "node:fs";
2892
2892
  var colors = {
2893
2893
  reset: "\x1B[0m",
2894
2894
  bold: "\x1B[1m",
@@ -2902,41 +2902,22 @@ var colors = {
2902
2902
  var noColor = process.env.NO_COLOR !== void 0 || !process.stdout.isTTY;
2903
2903
  var c = noColor ? Object.fromEntries(Object.keys(colors).map((k) => [k, ""])) : colors;
2904
2904
  function loadJsonlResults(filePath) {
2905
- const content = readFileSync(filePath, "utf8");
2906
- const lines = content.trim().split("\n").filter((line) => line.trim());
2907
- return lines.map((line) => {
2908
- const record = JSON.parse(line);
2909
- const testId = record.test_id ?? record.eval_id;
2910
- if (typeof testId !== "string") {
2911
- throw new Error(`Missing test_id in result: ${line}`);
2912
- }
2913
- if (typeof record.score !== "number") {
2914
- throw new Error(`Missing or invalid score in result: ${line}`);
2915
- }
2916
- return { testId, score: record.score };
2917
- });
2905
+ return loadLightweightResults(resolveResultSourcePath(filePath)).map((record) => ({
2906
+ testId: record.testId,
2907
+ score: record.score
2908
+ }));
2918
2909
  }
2919
2910
  function loadCombinedResults(filePath) {
2920
- const content = readFileSync(filePath, "utf8");
2921
- const lines = content.trim().split("\n").filter((line) => line.trim());
2922
2911
  const groups = /* @__PURE__ */ new Map();
2923
- for (const line of lines) {
2924
- const record = JSON.parse(line);
2925
- const testId = record.test_id ?? record.eval_id;
2926
- if (typeof testId !== "string") {
2927
- throw new Error(`Missing test_id in result: ${line}`);
2928
- }
2929
- if (typeof record.score !== "number") {
2930
- throw new Error(`Missing or invalid score in result: ${line}`);
2931
- }
2912
+ for (const record of loadLightweightResults(resolveResultSourcePath(filePath))) {
2932
2913
  if (typeof record.target !== "string") {
2933
- throw new Error(`Missing target field in combined result: ${line}`);
2914
+ throw new Error(`Missing target field in combined result source: ${filePath}`);
2934
2915
  }
2935
2916
  const target = record.target;
2936
2917
  if (!groups.has(target)) {
2937
2918
  groups.set(target, []);
2938
2919
  }
2939
- groups.get(target)?.push({ testId, score: record.score });
2920
+ groups.get(target)?.push({ testId: record.testId, score: record.score });
2940
2921
  }
2941
2922
  return groups;
2942
2923
  }
@@ -3303,11 +3284,11 @@ var compareCommand = command({
3303
3284
  });
3304
3285
 
3305
3286
  // src/commands/convert/index.ts
3306
- import { readFileSync as readFileSync2, writeFileSync } from "node:fs";
3287
+ import { readFileSync, writeFileSync } from "node:fs";
3307
3288
  import path from "node:path";
3308
3289
  import { stringify as stringifyYaml } from "yaml";
3309
3290
  async function convertJsonlToHtml(inputPath, outputPath) {
3310
- const content = readFileSync2(inputPath, "utf8");
3291
+ const content = readFileSync(inputPath, "utf8");
3311
3292
  const lines = content.trim().split("\n").filter((line) => line.trim());
3312
3293
  const writer = await HtmlWriter.open(outputPath);
3313
3294
  for (const line of lines) {
@@ -3317,7 +3298,7 @@ async function convertJsonlToHtml(inputPath, outputPath) {
3317
3298
  return lines.length;
3318
3299
  }
3319
3300
  function convertJsonlToYaml(inputPath, outputPath) {
3320
- const content = readFileSync2(inputPath, "utf8");
3301
+ const content = readFileSync(inputPath, "utf8");
3321
3302
  const lines = content.trim().split("\n").filter((line) => line.trim());
3322
3303
  let yamlOutput = "";
3323
3304
  let isFirst = true;
@@ -3336,7 +3317,7 @@ function convertJsonlToYaml(inputPath, outputPath) {
3336
3317
  return lines.length;
3337
3318
  }
3338
3319
  function convertEvalsJsonToYaml(inputPath) {
3339
- const content = readFileSync2(inputPath, "utf8");
3320
+ const content = readFileSync(inputPath, "utf8");
3340
3321
  const parsed = JSON.parse(content);
3341
3322
  if (!isAgentSkillsFormat(parsed)) {
3342
3323
  throw new Error(`Not a valid Agent Skills evals.json: missing 'evals' array`);
@@ -3924,7 +3905,7 @@ var evalPromptCommand = subcommands({
3924
3905
  });
3925
3906
 
3926
3907
  // src/commands/eval/commands/assert.ts
3927
- import { readFileSync as readFileSync3 } from "node:fs";
3908
+ import { readFileSync as readFileSync2 } from "node:fs";
3928
3909
  import path3 from "node:path";
3929
3910
  import fg from "fast-glob";
3930
3911
  var evalAssertCommand = command({
@@ -3956,7 +3937,7 @@ var evalAssertCommand = command({
3956
3937
  let resolvedOutput;
3957
3938
  let resolvedInput;
3958
3939
  if (file) {
3959
- const content = JSON.parse(readFileSync3(path3.resolve(file), "utf8"));
3940
+ const content = JSON.parse(readFileSync2(path3.resolve(file), "utf8"));
3960
3941
  resolvedOutput = content.output ?? "";
3961
3942
  resolvedInput = content.input ?? "";
3962
3943
  } else {
@@ -4144,11 +4125,6 @@ var evalRunCommand = command({
4144
4125
  long: "otel-file",
4145
4126
  description: "Write OTLP JSON trace to file (importable by OTel backends)"
4146
4127
  }),
4147
- traceFile: option({
4148
- type: optional(string),
4149
- long: "trace-file",
4150
- description: "Write human-readable trace JSONL to file"
4151
- }),
4152
4128
  exportOtel: flag({
4153
4129
  long: "export-otel",
4154
4130
  description: "Export evaluation traces via OTLP/HTTP to configured endpoint"
@@ -4183,7 +4159,7 @@ var evalRunCommand = command({
4183
4159
  artifacts: option({
4184
4160
  type: optional(string),
4185
4161
  long: "artifacts",
4186
- description: "Write companion artifacts (grading/<test>.json, timing.json, benchmark.json) to the specified directory"
4162
+ description: "Write companion artifacts (index.jsonl, <test>/grading.json, <test>/timing.json, timing.json, benchmark.json) to the specified directory"
4187
4163
  }),
4188
4164
  graderTarget: option({
4189
4165
  type: optional(string),
@@ -4203,7 +4179,7 @@ var evalRunCommand = command({
4203
4179
  },
4204
4180
  handler: async (args) => {
4205
4181
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4206
- const { launchInteractiveWizard } = await import("./interactive-FZJANO4A.js");
4182
+ const { launchInteractiveWizard } = await import("./interactive-RV664PCR.js");
4207
4183
  await launchInteractiveWizard();
4208
4184
  return;
4209
4185
  }
@@ -4229,7 +4205,6 @@ var evalRunCommand = command({
4229
4205
  workspacePath: args.workspacePath,
4230
4206
  trace: false,
4231
4207
  otelFile: args.otelFile,
4232
- traceFile: args.traceFile,
4233
4208
  exportOtel: args.exportOtel,
4234
4209
  otelBackend: args.otelBackend,
4235
4210
  otelCaptureContent: args.otelCaptureContent,
@@ -4257,212 +4232,31 @@ var evalCommand = subcommands({
4257
4232
  }
4258
4233
  });
4259
4234
 
4260
- // src/commands/generate/rubrics.ts
4261
- import { readFile, writeFile as writeFile2 } from "node:fs/promises";
4262
- import path4 from "node:path";
4263
- import { pathToFileURL } from "node:url";
4264
- import { isMap, isSeq, parseDocument } from "yaml";
4265
- function isJsonObject(value) {
4266
- return typeof value === "object" && value !== null && !Array.isArray(value);
4267
- }
4268
- function asString(value) {
4269
- return typeof value === "string" ? value : void 0;
4270
- }
4271
- async function loadRubricGenerator() {
4272
- const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
4273
- if (customGenerator) {
4274
- const generatorPath = path4.resolve(customGenerator);
4275
- const generatorUrl = pathToFileURL(generatorPath).href;
4276
- const module = await import(generatorUrl);
4277
- return module.generateRubrics;
4278
- }
4279
- return generateRubrics;
4280
- }
4281
- async function generateRubricsCommand(options) {
4282
- const { file, target: targetOverride, verbose } = options;
4283
- console.log(`Generating rubrics for: ${file}`);
4284
- const absolutePath = path4.resolve(file);
4285
- const content = await readFile(absolutePath, "utf8");
4286
- const doc = parseDocument(content);
4287
- const parsed = doc.toJSON();
4288
- if (!isJsonObject(parsed)) {
4289
- throw new Error(`Invalid YAML file format: ${file}`);
4290
- }
4291
- const suite = parsed;
4292
- const evalcases = suite.tests;
4293
- if (!Array.isArray(evalcases)) {
4294
- throw new Error(`No tests found in ${file}`);
4295
- }
4296
- const targetSelection = await selectTarget({
4297
- testFilePath: absolutePath,
4298
- repoRoot: process.cwd(),
4299
- cwd: process.cwd(),
4300
- cliTargetName: targetOverride,
4301
- dryRun: false,
4302
- dryRunDelay: 0,
4303
- dryRunDelayMin: 0,
4304
- dryRunDelayMax: 0,
4305
- env: process.env
4306
- });
4307
- if (verbose) {
4308
- console.log(`Using target: ${targetSelection.targetName}`);
4309
- }
4310
- const provider = createProvider(targetSelection.resolvedTarget);
4311
- const generateRubricsFunc = await loadRubricGenerator();
4312
- let updatedCount = 0;
4313
- let skippedCount = 0;
4314
- const evalcasesNode = doc.getIn(["tests"]);
4315
- if (!evalcasesNode || !isSeq(evalcasesNode)) {
4316
- throw new Error("tests must be a sequence");
4317
- }
4318
- for (let i = 0; i < evalcases.length; i++) {
4319
- const rawCase = evalcases[i];
4320
- if (!isJsonObject(rawCase)) {
4321
- continue;
4322
- }
4323
- const evalCase = rawCase;
4324
- const id = asString(evalCase.id) ?? "unknown";
4325
- const expectedOutcome = asString(evalCase.criteria) ?? asString(evalCase.outcome);
4326
- if (!expectedOutcome) {
4327
- if (verbose) {
4328
- console.log(` Skipping ${id}: no criteria`);
4329
- }
4330
- skippedCount++;
4331
- continue;
4332
- }
4333
- if (evalCase.rubrics !== void 0) {
4334
- if (verbose) {
4335
- console.log(` Skipping ${id}: rubrics already defined`);
4336
- }
4337
- skippedCount++;
4338
- continue;
4339
- }
4340
- console.log(` Generating rubrics for: ${id}`);
4341
- const question = extractQuestion(evalCase);
4342
- const referenceAnswer = asString(evalCase.reference_answer);
4343
- const rubrics = await generateRubricsFunc({
4344
- criteria: expectedOutcome,
4345
- question,
4346
- referenceAnswer,
4347
- provider
4348
- });
4349
- const caseNode = evalcasesNode.items[i];
4350
- if (caseNode && isMap(caseNode)) {
4351
- caseNode.set(
4352
- "rubrics",
4353
- rubrics.filter((r) => r.outcome !== void 0).map((r) => ({
4354
- id: r.id,
4355
- outcome: r.outcome,
4356
- weight: r.weight,
4357
- required: r.required ?? true
4358
- }))
4359
- );
4360
- }
4361
- updatedCount++;
4362
- if (verbose) {
4363
- console.log(` Generated ${rubrics.length} rubric(s)`);
4364
- }
4365
- }
4366
- if (updatedCount > 0) {
4367
- const output = doc.toString();
4368
- await writeFile2(absolutePath, output, "utf8");
4369
- console.log(`
4370
- Updated ${updatedCount} test(s) with generated rubrics`);
4371
- if (skippedCount > 0) {
4372
- console.log(`Skipped ${skippedCount} test(s)`);
4373
- }
4374
- } else {
4375
- console.log("\nNo tests updated (all already have rubrics or missing criteria)");
4376
- }
4377
- }
4378
- function extractQuestion(evalCase) {
4379
- const explicitQuestion = asString(evalCase.question);
4380
- if (explicitQuestion) {
4381
- return explicitQuestion;
4382
- }
4383
- const inputMessages = evalCase.input;
4384
- if (!Array.isArray(inputMessages)) {
4385
- return void 0;
4386
- }
4387
- for (const msg of inputMessages) {
4388
- if (!isJsonObject(msg)) {
4389
- continue;
4390
- }
4391
- if (msg.role === "user" && typeof msg.content === "string") {
4392
- return msg.content;
4393
- }
4394
- }
4395
- return void 0;
4396
- }
4397
-
4398
- // src/commands/generate/index.ts
4399
- var rubricsCommand = command({
4400
- name: "rubrics",
4401
- description: "Generate rubrics from criteria in YAML eval file",
4402
- args: {
4403
- file: positional({
4404
- type: string,
4405
- displayName: "file",
4406
- description: "Path to YAML eval file"
4407
- }),
4408
- target: option({
4409
- type: optional(string),
4410
- long: "target",
4411
- short: "t",
4412
- description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
4413
- }),
4414
- verbose: flag({
4415
- long: "verbose",
4416
- short: "v",
4417
- description: "Show detailed progress"
4418
- })
4419
- },
4420
- handler: async ({ file, target, verbose }) => {
4421
- try {
4422
- await generateRubricsCommand({
4423
- file,
4424
- target,
4425
- verbose
4426
- });
4427
- } catch (error) {
4428
- console.error(`Error: ${error.message}`);
4429
- process.exit(1);
4430
- }
4431
- }
4432
- });
4433
- var generateCommand = subcommands({
4434
- name: "generate",
4435
- description: "Generate evaluation artifacts",
4436
- cmds: {
4437
- rubrics: rubricsCommand
4438
- }
4439
- });
4440
-
4441
4235
  // src/commands/init/index.ts
4442
4236
  import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
4443
- import path6 from "node:path";
4237
+ import path5 from "node:path";
4444
4238
  import * as readline from "node:readline/promises";
4445
4239
 
4446
4240
  // src/templates/index.ts
4447
- import { readFileSync as readFileSync4, readdirSync, statSync } from "node:fs";
4448
- import path5 from "node:path";
4241
+ import { readFileSync as readFileSync3, readdirSync, statSync } from "node:fs";
4242
+ import path4 from "node:path";
4449
4243
  import { fileURLToPath } from "node:url";
4450
4244
  function getAgentvTemplates() {
4451
4245
  return getTemplatesFromDir(".agentv");
4452
4246
  }
4453
4247
  function getEnvExampleTemplate() {
4454
- const currentDir = path5.dirname(fileURLToPath(import.meta.url));
4455
- const templatesBase = currentDir.includes(`${path5.sep}dist`) ? path5.join(currentDir, "templates") : currentDir;
4456
- const content = readFileSync4(path5.join(templatesBase, ".env.example"), "utf-8");
4248
+ const currentDir = path4.dirname(fileURLToPath(import.meta.url));
4249
+ const templatesBase = currentDir.includes(`${path4.sep}dist`) ? path4.join(currentDir, "templates") : currentDir;
4250
+ const content = readFileSync3(path4.join(templatesBase, ".env.example"), "utf-8");
4457
4251
  return { path: ".env.example", content };
4458
4252
  }
4459
4253
  function getTemplatesFromDir(subdir) {
4460
- const currentDir = path5.dirname(fileURLToPath(import.meta.url));
4254
+ const currentDir = path4.dirname(fileURLToPath(import.meta.url));
4461
4255
  let templatesDir;
4462
- if (currentDir.includes(`${path5.sep}dist`)) {
4463
- templatesDir = path5.join(currentDir, "templates", subdir);
4256
+ if (currentDir.includes(`${path4.sep}dist`)) {
4257
+ templatesDir = path4.join(currentDir, "templates", subdir);
4464
4258
  } else {
4465
- templatesDir = path5.join(currentDir, subdir);
4259
+ templatesDir = path4.join(currentDir, subdir);
4466
4260
  }
4467
4261
  return readTemplatesRecursively(templatesDir, "");
4468
4262
  }
@@ -4470,15 +4264,15 @@ function readTemplatesRecursively(dir, relativePath) {
4470
4264
  const templates = [];
4471
4265
  const entries2 = readdirSync(dir);
4472
4266
  for (const entry of entries2) {
4473
- const fullPath = path5.join(dir, entry);
4267
+ const fullPath = path4.join(dir, entry);
4474
4268
  const stat3 = statSync(fullPath);
4475
- const entryRelativePath = relativePath ? path5.join(relativePath, entry) : entry;
4269
+ const entryRelativePath = relativePath ? path4.join(relativePath, entry) : entry;
4476
4270
  if (stat3.isDirectory()) {
4477
4271
  templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
4478
4272
  } else {
4479
- const content = readFileSync4(fullPath, "utf-8");
4273
+ const content = readFileSync3(fullPath, "utf-8");
4480
4274
  templates.push({
4481
- path: entryRelativePath.split(path5.sep).join("/"),
4275
+ path: entryRelativePath.split(path4.sep).join("/"),
4482
4276
  // Normalize to forward slashes
4483
4277
  content
4484
4278
  });
@@ -4507,22 +4301,22 @@ async function promptYesNo(message) {
4507
4301
  }
4508
4302
  }
4509
4303
  async function initCommand(options = {}) {
4510
- const targetPath = path6.resolve(options.targetPath ?? ".");
4511
- const agentvDir = path6.join(targetPath, ".agentv");
4304
+ const targetPath = path5.resolve(options.targetPath ?? ".");
4305
+ const agentvDir = path5.join(targetPath, ".agentv");
4512
4306
  const otherAgentvTemplates = getAgentvTemplates();
4513
4307
  const envTemplate = getEnvExampleTemplate();
4514
4308
  const existingFiles = [];
4515
4309
  if (envTemplate) {
4516
- const envFilePath = path6.join(targetPath, ".env.example");
4310
+ const envFilePath = path5.join(targetPath, ".env.example");
4517
4311
  if (existsSync(envFilePath)) {
4518
4312
  existingFiles.push(".env.example");
4519
4313
  }
4520
4314
  }
4521
4315
  if (existsSync(agentvDir)) {
4522
4316
  for (const template of otherAgentvTemplates) {
4523
- const targetFilePath = path6.join(agentvDir, template.path);
4317
+ const targetFilePath = path5.join(agentvDir, template.path);
4524
4318
  if (existsSync(targetFilePath)) {
4525
- existingFiles.push(path6.relative(targetPath, targetFilePath));
4319
+ existingFiles.push(path5.relative(targetPath, targetFilePath));
4526
4320
  }
4527
4321
  }
4528
4322
  }
@@ -4544,18 +4338,18 @@ async function initCommand(options = {}) {
4544
4338
  mkdirSync(agentvDir, { recursive: true });
4545
4339
  }
4546
4340
  if (envTemplate) {
4547
- const envFilePath = path6.join(targetPath, ".env.example");
4341
+ const envFilePath = path5.join(targetPath, ".env.example");
4548
4342
  writeFileSync2(envFilePath, envTemplate.content, "utf-8");
4549
4343
  console.log("Created .env.example");
4550
4344
  }
4551
4345
  for (const template of otherAgentvTemplates) {
4552
- const targetFilePath = path6.join(agentvDir, template.path);
4553
- const targetDirPath = path6.dirname(targetFilePath);
4346
+ const targetFilePath = path5.join(agentvDir, template.path);
4347
+ const targetDirPath = path5.dirname(targetFilePath);
4554
4348
  if (!existsSync(targetDirPath)) {
4555
4349
  mkdirSync(targetDirPath, { recursive: true });
4556
4350
  }
4557
4351
  writeFileSync2(targetFilePath, template.content, "utf-8");
4558
- console.log(`Created ${path6.relative(targetPath, targetFilePath)}`);
4352
+ console.log(`Created ${path5.relative(targetPath, targetFilePath)}`);
4559
4353
  }
4560
4354
  console.log("\nAgentV initialized successfully!");
4561
4355
  console.log("\nFiles installed to root:");
@@ -4563,7 +4357,7 @@ async function initCommand(options = {}) {
4563
4357
  console.log(" - .env.example");
4564
4358
  }
4565
4359
  console.log(`
4566
- Files installed to ${path6.relative(targetPath, agentvDir)}:`);
4360
+ Files installed to ${path5.relative(targetPath, agentvDir)}:`);
4567
4361
  for (const t of otherAgentvTemplates) {
4568
4362
  console.log(` - ${t.path}`);
4569
4363
  }
@@ -4593,13 +4387,443 @@ var initCmdTsCommand = command({
4593
4387
  }
4594
4388
  });
4595
4389
 
4390
+ // src/commands/pipeline/bench.ts
4391
+ import { readFile, readdir, writeFile as writeFile2 } from "node:fs/promises";
4392
+ import { join } from "node:path";
4393
+ var evalBenchCommand = command({
4394
+ name: "bench",
4395
+ description: "Merge evaluator scores and produce benchmark artifacts",
4396
+ args: {
4397
+ exportDir: positional({
4398
+ type: string,
4399
+ displayName: "export-dir",
4400
+ description: "Export directory from pipeline input/grade"
4401
+ })
4402
+ },
4403
+ handler: async ({ exportDir }) => {
4404
+ const manifest = JSON.parse(await readFile(join(exportDir, "manifest.json"), "utf8"));
4405
+ const testIds = manifest.test_ids;
4406
+ const targetName = manifest.target?.name ?? "unknown";
4407
+ const stdinData = await readStdin();
4408
+ const llmScores = stdinData ? JSON.parse(stdinData) : {};
4409
+ const indexLines = [];
4410
+ const allPassRates = [];
4411
+ for (const testId of testIds) {
4412
+ const testDir = join(exportDir, testId);
4413
+ const evaluators = [];
4414
+ const allAssertions = [];
4415
+ const codeResultsDir = join(testDir, "code_grader_results");
4416
+ try {
4417
+ const resultFiles = (await readdir(codeResultsDir)).filter((f) => f.endsWith(".json"));
4418
+ for (const file of resultFiles) {
4419
+ const result = JSON.parse(await readFile(join(codeResultsDir, file), "utf8"));
4420
+ evaluators.push({
4421
+ name: result.name,
4422
+ type: "code-grader",
4423
+ score: result.score,
4424
+ weight: result.weight ?? 1,
4425
+ assertions: result.assertions ?? []
4426
+ });
4427
+ for (const a of result.assertions ?? []) {
4428
+ allAssertions.push({ text: a.text, passed: a.passed, evidence: a.evidence ?? "" });
4429
+ }
4430
+ }
4431
+ } catch {
4432
+ }
4433
+ const testLlmScores = llmScores[testId] ?? {};
4434
+ const llmGradersDir = join(testDir, "llm_graders");
4435
+ try {
4436
+ const graderFiles = (await readdir(llmGradersDir)).filter((f) => f.endsWith(".json"));
4437
+ for (const file of graderFiles) {
4438
+ const graderMeta = JSON.parse(await readFile(join(llmGradersDir, file), "utf8"));
4439
+ const graderName = graderMeta.name;
4440
+ const llmResult = testLlmScores[graderName];
4441
+ if (llmResult) {
4442
+ evaluators.push({
4443
+ name: graderName,
4444
+ type: "llm-grader",
4445
+ score: llmResult.score,
4446
+ weight: graderMeta.weight ?? 1,
4447
+ assertions: llmResult.assertions ?? []
4448
+ });
4449
+ for (const a of llmResult.assertions ?? []) {
4450
+ allAssertions.push({ text: a.text, passed: a.passed, evidence: a.evidence ?? "" });
4451
+ }
4452
+ }
4453
+ }
4454
+ } catch {
4455
+ }
4456
+ const totalWeight = evaluators.reduce((sum, e) => sum + e.weight, 0);
4457
+ const weightedScore = totalWeight > 0 ? evaluators.reduce((sum, e) => sum + e.score * e.weight, 0) / totalWeight : 0;
4458
+ const passed = allAssertions.filter((a) => a.passed).length;
4459
+ const failed = allAssertions.filter((a) => !a.passed).length;
4460
+ const passRate = allAssertions.length > 0 ? Math.round(passed / allAssertions.length * 1e3) / 1e3 : 0;
4461
+ allPassRates.push(passRate);
4462
+ const grading = {
4463
+ assertions: allAssertions,
4464
+ summary: { passed, failed, total: allAssertions.length, pass_rate: passRate },
4465
+ execution_metrics: { tool_calls: {}, total_tool_calls: 0, errors_encountered: 0 },
4466
+ evaluators: evaluators.map((e) => ({
4467
+ name: e.name,
4468
+ type: e.type,
4469
+ score: e.score,
4470
+ reasoning: "",
4471
+ weight: e.weight
4472
+ }))
4473
+ };
4474
+ await writeFile2(
4475
+ join(testDir, "grading.json"),
4476
+ `${JSON.stringify(grading, null, 2)}
4477
+ `,
4478
+ "utf8"
4479
+ );
4480
+ indexLines.push(
4481
+ JSON.stringify({
4482
+ timestamp: manifest.timestamp,
4483
+ test_id: testId,
4484
+ score: Math.round(weightedScore * 1e3) / 1e3,
4485
+ target: targetName,
4486
+ grading_path: `${testId}/grading.json`,
4487
+ timing_path: `${testId}/timing.json`
4488
+ })
4489
+ );
4490
+ }
4491
+ await writeFile2(
4492
+ join(exportDir, "index.jsonl"),
4493
+ indexLines.length > 0 ? `${indexLines.join("\n")}
4494
+ ` : "",
4495
+ "utf8"
4496
+ );
4497
+ const passRateStats = computeStats(allPassRates);
4498
+ const benchmark = {
4499
+ metadata: {
4500
+ eval_file: manifest.eval_file,
4501
+ timestamp: manifest.timestamp,
4502
+ targets: [targetName],
4503
+ tests_run: testIds
4504
+ },
4505
+ run_summary: {
4506
+ [targetName]: {
4507
+ pass_rate: passRateStats,
4508
+ time_seconds: { mean: 0, stddev: 0 },
4509
+ tokens: { mean: 0, stddev: 0 }
4510
+ }
4511
+ },
4512
+ notes: []
4513
+ };
4514
+ await writeFile2(
4515
+ join(exportDir, "benchmark.json"),
4516
+ `${JSON.stringify(benchmark, null, 2)}
4517
+ `,
4518
+ "utf8"
4519
+ );
4520
+ console.log(`Benchmark: ${testIds.length} test(s), pass_rate=${passRateStats.mean}`);
4521
+ }
4522
+ });
4523
+ async function readStdin() {
4524
+ const chunks = [];
4525
+ for await (const chunk of process.stdin) {
4526
+ chunks.push(chunk);
4527
+ }
4528
+ return Buffer.concat(chunks).toString("utf8").trim();
4529
+ }
4530
+ function computeStats(values) {
4531
+ if (values.length === 0) return { mean: 0, stddev: 0 };
4532
+ const mean2 = values.reduce((sum, v) => sum + v, 0) / values.length;
4533
+ const variance = values.reduce((sum, v) => sum + (v - mean2) ** 2, 0) / values.length;
4534
+ return {
4535
+ mean: Math.round(mean2 * 1e3) / 1e3,
4536
+ stddev: Math.round(Math.sqrt(variance) * 1e3) / 1e3
4537
+ };
4538
+ }
4539
+
4540
+ // src/commands/pipeline/grade.ts
4541
+ import { mkdir as mkdir2, readFile as readFile2, readdir as readdir2, writeFile as writeFile3 } from "node:fs/promises";
4542
+ import { join as join2 } from "node:path";
4543
+ var evalGradeCommand = command({
4544
+ name: "grade",
4545
+ description: "Run code-grader assertions on responses in an export directory",
4546
+ args: {
4547
+ exportDir: positional({
4548
+ type: string,
4549
+ displayName: "export-dir",
4550
+ description: "Export directory from pipeline input"
4551
+ })
4552
+ },
4553
+ handler: async ({ exportDir }) => {
4554
+ const manifestPath = join2(exportDir, "manifest.json");
4555
+ const manifest = JSON.parse(await readFile2(manifestPath, "utf8"));
4556
+ const testIds = manifest.test_ids;
4557
+ let totalGraders = 0;
4558
+ let totalPassed = 0;
4559
+ for (const testId of testIds) {
4560
+ const testDir = join2(exportDir, testId);
4561
+ const codeGradersDir = join2(testDir, "code_graders");
4562
+ const resultsDir = join2(testDir, "code_grader_results");
4563
+ let graderFiles;
4564
+ try {
4565
+ graderFiles = (await readdir2(codeGradersDir)).filter((f) => f.endsWith(".json"));
4566
+ } catch {
4567
+ continue;
4568
+ }
4569
+ if (graderFiles.length === 0) continue;
4570
+ await mkdir2(resultsDir, { recursive: true });
4571
+ const responseText = await readFile2(join2(testDir, "response.md"), "utf8");
4572
+ const inputData = JSON.parse(await readFile2(join2(testDir, "input.json"), "utf8"));
4573
+ for (const graderFile of graderFiles) {
4574
+ const graderConfig = JSON.parse(await readFile2(join2(codeGradersDir, graderFile), "utf8"));
4575
+ const graderName = graderConfig.name;
4576
+ const payload = JSON.stringify({
4577
+ output: [{ role: "assistant", content: responseText }],
4578
+ input: inputData.input_messages,
4579
+ question: inputData.input_text,
4580
+ criteria: "",
4581
+ expected_output: [],
4582
+ reference_answer: "",
4583
+ input_files: [],
4584
+ trace: null,
4585
+ token_usage: null,
4586
+ cost_usd: null,
4587
+ duration_ms: null,
4588
+ start_time: null,
4589
+ end_time: null,
4590
+ file_changes: null,
4591
+ workspace_path: null,
4592
+ config: graderConfig.config ?? null,
4593
+ metadata: {},
4594
+ input_text: inputData.input_text,
4595
+ output_text: responseText,
4596
+ expected_output_text: ""
4597
+ });
4598
+ try {
4599
+ const stdout = await executeScript(
4600
+ graderConfig.command,
4601
+ payload,
4602
+ void 0,
4603
+ graderConfig.cwd
4604
+ );
4605
+ const parsed = JSON.parse(stdout);
4606
+ const score = typeof parsed.score === "number" ? parsed.score : 0;
4607
+ const assertions = Array.isArray(parsed.assertions) ? parsed.assertions : [];
4608
+ const result = {
4609
+ name: graderName,
4610
+ type: "code-grader",
4611
+ score,
4612
+ weight: graderConfig.weight ?? 1,
4613
+ assertions,
4614
+ details: parsed.details ?? {}
4615
+ };
4616
+ await writeFile3(
4617
+ join2(resultsDir, `${graderName}.json`),
4618
+ `${JSON.stringify(result, null, 2)}
4619
+ `,
4620
+ "utf8"
4621
+ );
4622
+ totalGraders++;
4623
+ if (score >= 0.5) totalPassed++;
4624
+ } catch (error) {
4625
+ const message = error instanceof Error ? error.message : String(error);
4626
+ console.error(` ${testId}/${graderName}: ERROR \u2014 ${message}`);
4627
+ const errorResult = {
4628
+ name: graderName,
4629
+ type: "code-grader",
4630
+ score: 0,
4631
+ weight: graderConfig.weight ?? 1,
4632
+ assertions: [{ text: `Error: ${message}`, passed: false }],
4633
+ details: { error: message }
4634
+ };
4635
+ await writeFile3(
4636
+ join2(resultsDir, `${graderName}.json`),
4637
+ `${JSON.stringify(errorResult, null, 2)}
4638
+ `,
4639
+ "utf8"
4640
+ );
4641
+ totalGraders++;
4642
+ }
4643
+ }
4644
+ }
4645
+ console.log(`Graded ${totalGraders} code-grader(s): ${totalPassed} passed`);
4646
+ }
4647
+ });
4648
+
4649
+ // src/commands/pipeline/input.ts
4650
+ import { readFile as readFile3 } from "node:fs/promises";
4651
+ import { mkdir as mkdir3, writeFile as writeFile4 } from "node:fs/promises";
4652
+ import { dirname, join as join3, resolve } from "node:path";
4653
+ var evalInputCommand = command({
4654
+ name: "input",
4655
+ description: "Extract eval inputs, target commands, and grader prompts for agent-mode runs",
4656
+ args: {
4657
+ evalPath: positional({
4658
+ type: string,
4659
+ displayName: "eval-path",
4660
+ description: "Path to eval YAML file"
4661
+ }),
4662
+ out: option({
4663
+ type: string,
4664
+ long: "out",
4665
+ description: "Output directory for extracted inputs"
4666
+ })
4667
+ },
4668
+ handler: async ({ evalPath, out }) => {
4669
+ const resolvedEvalPath = resolve(evalPath);
4670
+ const outDir = resolve(out);
4671
+ const repoRoot = await findRepoRoot(dirname(resolvedEvalPath));
4672
+ const evalDir = dirname(resolvedEvalPath);
4673
+ const suite = await loadTestSuite(resolvedEvalPath, repoRoot);
4674
+ const tests = suite.tests;
4675
+ if (tests.length === 0) {
4676
+ console.error("No tests found in eval file.");
4677
+ process.exit(1);
4678
+ }
4679
+ let targetInfo = null;
4680
+ let targetName = "agent";
4681
+ let targetKind = "agent";
4682
+ try {
4683
+ const selection = await selectTarget({
4684
+ testFilePath: resolvedEvalPath,
4685
+ repoRoot,
4686
+ cwd: evalDir,
4687
+ dryRun: false,
4688
+ dryRunDelay: 0,
4689
+ dryRunDelayMin: 0,
4690
+ dryRunDelayMax: 0,
4691
+ env: process.env
4692
+ });
4693
+ targetName = selection.targetName;
4694
+ if (selection.resolvedTarget.kind === "cli") {
4695
+ targetKind = "cli";
4696
+ const config = selection.resolvedTarget.config;
4697
+ targetInfo = {
4698
+ kind: "cli",
4699
+ command: config.command,
4700
+ cwd: config.cwd ?? evalDir,
4701
+ timeoutMs: config.timeoutMs ?? 3e4
4702
+ };
4703
+ }
4704
+ } catch {
4705
+ }
4706
+ const testIds = [];
4707
+ for (const test of tests) {
4708
+ const testDir = join3(outDir, test.id);
4709
+ await mkdir3(testDir, { recursive: true });
4710
+ testIds.push(test.id);
4711
+ const inputText = test.question;
4712
+ const inputMessages = test.input.map((m) => ({
4713
+ role: m.role,
4714
+ content: typeof m.content === "string" ? m.content : m.content
4715
+ }));
4716
+ await writeJson(join3(testDir, "input.json"), {
4717
+ input_text: inputText,
4718
+ input_messages: inputMessages,
4719
+ file_paths: test.file_paths,
4720
+ metadata: test.metadata ?? {}
4721
+ });
4722
+ if (targetInfo) {
4723
+ await writeJson(join3(testDir, "invoke.json"), {
4724
+ kind: "cli",
4725
+ command: targetInfo.command,
4726
+ cwd: targetInfo.cwd,
4727
+ timeout_ms: targetInfo.timeoutMs,
4728
+ env: {}
4729
+ });
4730
+ } else {
4731
+ await writeJson(join3(testDir, "invoke.json"), {
4732
+ kind: "agent",
4733
+ instructions: "Execute this task in the current workspace. The agent IS the target."
4734
+ });
4735
+ }
4736
+ await writeFile4(join3(testDir, "criteria.md"), test.criteria ?? "", "utf8");
4737
+ if (test.expected_output.length > 0 || test.reference_answer !== void 0 && test.reference_answer !== "") {
4738
+ await writeJson(join3(testDir, "expected_output.json"), {
4739
+ expected_output: test.expected_output,
4740
+ reference_answer: test.reference_answer ?? ""
4741
+ });
4742
+ }
4743
+ await writeGraderConfigs(testDir, test.assertions ?? [], evalDir);
4744
+ }
4745
+ await writeJson(join3(outDir, "manifest.json"), {
4746
+ eval_file: resolvedEvalPath,
4747
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
4748
+ target: {
4749
+ name: targetName,
4750
+ kind: targetKind
4751
+ },
4752
+ test_ids: testIds
4753
+ });
4754
+ console.log(`Extracted ${testIds.length} test(s) to ${outDir}`);
4755
+ }
4756
+ });
4757
+ async function writeGraderConfigs(testDir, assertions, evalDir) {
4758
+ const codeGradersDir = join3(testDir, "code_graders");
4759
+ const llmGradersDir = join3(testDir, "llm_graders");
4760
+ let hasCodeGraders = false;
4761
+ let hasLlmGraders = false;
4762
+ for (const assertion of assertions) {
4763
+ if (assertion.type === "code-grader" || assertion.type === "code-judge") {
4764
+ if (!hasCodeGraders) {
4765
+ await mkdir3(codeGradersDir, { recursive: true });
4766
+ hasCodeGraders = true;
4767
+ }
4768
+ const config = assertion;
4769
+ await writeJson(join3(codeGradersDir, `${config.name}.json`), {
4770
+ name: config.name,
4771
+ command: config.command,
4772
+ cwd: config.resolvedCwd ?? config.cwd ?? evalDir,
4773
+ weight: config.weight ?? 1,
4774
+ config: config.config ?? {}
4775
+ });
4776
+ } else if (assertion.type === "llm-grader" || assertion.type === "llm-judge") {
4777
+ if (!hasLlmGraders) {
4778
+ await mkdir3(llmGradersDir, { recursive: true });
4779
+ hasLlmGraders = true;
4780
+ }
4781
+ const config = assertion;
4782
+ let promptContent = "";
4783
+ if (config.resolvedPromptPath) {
4784
+ try {
4785
+ promptContent = await readFile3(config.resolvedPromptPath, "utf8");
4786
+ } catch {
4787
+ promptContent = typeof config.prompt === "string" ? config.prompt : "";
4788
+ }
4789
+ } else if (typeof config.prompt === "string") {
4790
+ promptContent = config.prompt;
4791
+ }
4792
+ await writeJson(join3(llmGradersDir, `${config.name}.json`), {
4793
+ name: config.name,
4794
+ prompt_content: promptContent,
4795
+ weight: config.weight ?? 1,
4796
+ threshold: 0.5,
4797
+ config: {}
4798
+ });
4799
+ }
4800
+ }
4801
+ }
4802
+ async function writeJson(filePath, data) {
4803
+ await writeFile4(filePath, `${JSON.stringify(data, null, 2)}
4804
+ `, "utf8");
4805
+ }
4806
+
4807
+ // src/commands/pipeline/index.ts
4808
+ var pipelineCommand = subcommands({
4809
+ name: "pipeline",
4810
+ description: "Agent-mode eval pipeline commands (input \u2192 grade \u2192 bench)",
4811
+ cmds: {
4812
+ input: evalInputCommand,
4813
+ grade: evalGradeCommand,
4814
+ bench: evalBenchCommand
4815
+ }
4816
+ });
4817
+
4596
4818
  // src/commands/results/export.ts
4597
- import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync6, writeFileSync as writeFileSync3 } from "node:fs";
4598
- import path8 from "node:path";
4819
+ import path7 from "node:path";
4820
+
4821
+ // src/commands/results/shared.ts
4822
+ import { existsSync as existsSync2 } from "node:fs";
4599
4823
 
4600
4824
  // src/commands/trace/utils.ts
4601
- import { readFileSync as readFileSync5, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
4602
- import path7 from "node:path";
4825
+ import { readFileSync as readFileSync4, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
4826
+ import path6 from "node:path";
4603
4827
  var colors2 = {
4604
4828
  reset: "\x1B[0m",
4605
4829
  bold: "\x1B[1m",
@@ -4625,7 +4849,20 @@ function padLeft2(str, len) {
4625
4849
  return " ".repeat(Math.max(0, len - plainLen)) + str;
4626
4850
  }
4627
4851
  function loadResultFile(filePath) {
4628
- const content = readFileSync5(filePath, "utf8");
4852
+ const resolvedFilePath = resolveTraceResultPath(filePath);
4853
+ if (path6.extname(resolvedFilePath) === ".json") {
4854
+ return loadOtlpTraceFile(resolvedFilePath);
4855
+ }
4856
+ if (path6.basename(resolvedFilePath) === RESULT_INDEX_FILENAME) {
4857
+ return loadManifestAsRawResults(resolvedFilePath);
4858
+ }
4859
+ return loadJsonlRecords(resolvedFilePath);
4860
+ }
4861
+ function resolveTraceResultPath(filePath) {
4862
+ return resolveWorkspaceOrFilePath(filePath);
4863
+ }
4864
+ function loadJsonlRecords(filePath) {
4865
+ const content = readFileSync4(filePath, "utf8");
4629
4866
  const lines = content.trim().split("\n").filter((line) => line.trim());
4630
4867
  return lines.map((line, i) => {
4631
4868
  const record = JSON.parse(line);
@@ -4635,25 +4872,280 @@ function loadResultFile(filePath) {
4635
4872
  return record;
4636
4873
  });
4637
4874
  }
4875
+ function loadManifestAsRawResults(filePath) {
4876
+ return loadManifestResults(filePath).map(toRawResult);
4877
+ }
4878
+ function toRawResult(result) {
4879
+ return {
4880
+ timestamp: result.timestamp,
4881
+ test_id: result.testId,
4882
+ eval_set: result.eval_set,
4883
+ conversation_id: result.conversationId,
4884
+ score: result.score,
4885
+ assertions: result.assertions?.map((assertion) => ({
4886
+ text: assertion.text,
4887
+ passed: assertion.passed,
4888
+ evidence: assertion.evidence
4889
+ })),
4890
+ target: result.target,
4891
+ error: result.error,
4892
+ scores: result.scores?.map((score) => ({
4893
+ name: score.name,
4894
+ type: score.type,
4895
+ score: score.score,
4896
+ assertions: score.assertions?.map((assertion) => ({
4897
+ text: assertion.text,
4898
+ passed: assertion.passed,
4899
+ evidence: assertion.evidence
4900
+ })),
4901
+ weight: score.weight
4902
+ })),
4903
+ token_usage: result.tokenUsage ? {
4904
+ input: result.tokenUsage.input,
4905
+ output: result.tokenUsage.output,
4906
+ cached: result.tokenUsage.cached
4907
+ } : void 0,
4908
+ cost_usd: result.costUsd,
4909
+ duration_ms: result.durationMs,
4910
+ start_time: result.startTime,
4911
+ end_time: result.endTime,
4912
+ input: result.input,
4913
+ output: result.output,
4914
+ file_changes: result.fileChanges
4915
+ };
4916
+ }
4917
+ function loadOtlpTraceFile(filePath) {
4918
+ const parsed = JSON.parse(readFileSync4(filePath, "utf8"));
4919
+ const spans = parsed.resourceSpans?.flatMap((resource) => resource.scopeSpans ?? []).flatMap((scope) => scope.spans ?? []);
4920
+ if (!spans || spans.length === 0) {
4921
+ return [];
4922
+ }
4923
+ const spanMap = /* @__PURE__ */ new Map();
4924
+ const childMap = /* @__PURE__ */ new Map();
4925
+ for (const span of spans) {
4926
+ if (!span.spanId) continue;
4927
+ spanMap.set(span.spanId, span);
4928
+ if (span.parentSpanId) {
4929
+ const siblings = childMap.get(span.parentSpanId) ?? [];
4930
+ siblings.push(span);
4931
+ childMap.set(span.parentSpanId, siblings);
4932
+ }
4933
+ }
4934
+ const roots = spans.filter((span) => !span.parentSpanId || !spanMap.has(span.parentSpanId));
4935
+ const supportedRoots = roots.filter(isAgentvEvalRoot);
4936
+ const candidateRoots = supportedRoots.length > 0 ? supportedRoots : roots;
4937
+ return candidateRoots.map((root, index) => {
4938
+ const descendants = collectChildSpans(root.spanId, childMap);
4939
+ const rootAttrs = parseOtlpAttributes(root.attributes);
4940
+ const parsedDescendants = descendants.map((span) => ({
4941
+ ...span,
4942
+ parsedAttributes: parseOtlpAttributes(span.attributes)
4943
+ }));
4944
+ const toolSpans = parsedDescendants.filter(
4945
+ (span) => typeof span.parsedAttributes.gen_ai_tool_name === "string"
4946
+ );
4947
+ const llmSpans = parsedDescendants.filter(
4948
+ (span) => span.parsedAttributes.gen_ai_operation_name === "chat" || typeof span.name === "string" && span.name.startsWith("chat ")
4949
+ );
4950
+ const tokenUsage = descendants.reduce(
4951
+ (acc, span) => {
4952
+ const attrs = parseOtlpAttributes(span.attributes);
4953
+ acc.input += numberAttr(attrs.gen_ai_usage_input_tokens) ?? 0;
4954
+ acc.output += numberAttr(attrs.gen_ai_usage_output_tokens) ?? 0;
4955
+ const cached = numberAttr(attrs.gen_ai_usage_cache_read_input_tokens);
4956
+ if (cached !== void 0 && cached > 0) {
4957
+ acc.cached = (acc.cached ?? 0) + cached;
4958
+ }
4959
+ return acc;
4960
+ },
4961
+ { input: 0, output: 0, cached: void 0 }
4962
+ );
4963
+ const traceSummary = buildDerivedTraceSummary({
4964
+ trace: {
4965
+ event_count: numberAttr(rootAttrs.agentv_trace_event_count) ?? (toolSpans.length > 0 ? toolSpans.length : void 0),
4966
+ tool_calls: countRawSpanNames(
4967
+ toolSpans.map((span) => ({
4968
+ type: "tool",
4969
+ name: String(span.parsedAttributes.gen_ai_tool_name)
4970
+ }))
4971
+ ),
4972
+ error_count: descendants.filter((span) => span.status?.code === 2).length || void 0,
4973
+ llm_call_count: numberAttr(rootAttrs.agentv_trace_llm_call_count) ?? (llmSpans.length > 0 ? llmSpans.length : void 0)
4974
+ },
4975
+ spans: [
4976
+ ...llmSpans.map((span) => ({
4977
+ type: "llm",
4978
+ name: span.name ?? "chat",
4979
+ duration_ms: durationFromSpan(span)
4980
+ })),
4981
+ ...toolSpans.map((span) => ({
4982
+ type: "tool",
4983
+ name: String(span.parsedAttributes.gen_ai_tool_name),
4984
+ duration_ms: durationFromSpan(span)
4985
+ }))
4986
+ ],
4987
+ duration_ms: numberAttr(rootAttrs.agentv_trace_duration_ms) ?? durationFromSpan(root),
4988
+ cost_usd: numberAttr(rootAttrs.agentv_trace_cost_usd),
4989
+ token_usage: tokenUsage.input || tokenUsage.output || tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_input) || numberAttr(rootAttrs.agentv_trace_token_output) || numberAttr(rootAttrs.agentv_trace_token_cached) ? {
4990
+ input: tokenUsage.input || numberAttr(rootAttrs.agentv_trace_token_input) || 0,
4991
+ output: tokenUsage.output || numberAttr(rootAttrs.agentv_trace_token_output) || 0,
4992
+ ...tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_cached) ? {
4993
+ cached: tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_cached) || 0
4994
+ } : {}
4995
+ } : void 0
4996
+ });
4997
+ const score = numberAttr(rootAttrs.agentv_score);
4998
+ if (score === void 0) {
4999
+ throw new Error(
5000
+ `Unsupported OTLP trace root span at index ${index + 1}: missing agentv.score attribute`
5001
+ );
5002
+ }
5003
+ return {
5004
+ test_id: stringAttr(rootAttrs.agentv_test_id) ?? stringAttr(rootAttrs.agentv_eval_id) ?? `trace-${index + 1}`,
5005
+ eval_set: stringAttr(rootAttrs.agentv_eval_set),
5006
+ target: stringAttr(rootAttrs.agentv_target),
5007
+ score,
5008
+ error: root.status?.code === 2 ? root.status.message : void 0,
5009
+ cost_usd: traceSummary?.cost_usd,
5010
+ duration_ms: traceSummary?.duration_ms,
5011
+ token_usage: traceSummary?.token_usage,
5012
+ trace: traceSummary ? {
5013
+ event_count: traceSummary.event_count,
5014
+ tool_calls: traceSummary.tool_calls,
5015
+ error_count: traceSummary.error_count,
5016
+ tool_durations: traceSummary.tool_durations,
5017
+ llm_call_count: traceSummary.llm_call_count,
5018
+ token_usage: traceSummary.token_usage,
5019
+ cost_usd: traceSummary.cost_usd,
5020
+ duration_ms: traceSummary.duration_ms
5021
+ } : void 0,
5022
+ spans: traceSummary?.spans,
5023
+ output: stringAttr(rootAttrs.agentv_output_text),
5024
+ scores: root.events?.filter((event) => event.name?.startsWith("agentv.evaluator.")).map((event) => {
5025
+ const attrs = parseOtlpAttributes(event.attributes);
5026
+ const name = event.name?.replace(/^agentv\.evaluator\./, "") ?? "unknown";
5027
+ return {
5028
+ name,
5029
+ type: stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
5030
+ score: numberAttr(attrs.agentv_evaluator_score) ?? 0
5031
+ };
5032
+ })
5033
+ };
5034
+ });
5035
+ }
5036
+ function isAgentvEvalRoot(span) {
5037
+ const attrs = parseOtlpAttributes(span.attributes);
5038
+ return span.name === "agentv.eval" || numberAttr(attrs.agentv_score) !== void 0 || typeof stringAttr(attrs.agentv_test_id) === "string";
5039
+ }
5040
+ function collectChildSpans(spanId, childMap) {
5041
+ if (!spanId) return [];
5042
+ const direct = childMap.get(spanId) ?? [];
5043
+ const all = [...direct];
5044
+ for (const child of direct) {
5045
+ all.push(...collectChildSpans(child.spanId, childMap));
5046
+ }
5047
+ return all;
5048
+ }
5049
+ function parseOtlpAttributes(attributes) {
5050
+ const parsed = {};
5051
+ for (const attribute of attributes ?? []) {
5052
+ parsed[attribute.key.replace(/\./g, "_")] = parseOtlpValue(attribute.value);
5053
+ }
5054
+ return parsed;
5055
+ }
5056
+ function parseOtlpValue(value) {
5057
+ if (!value) return void 0;
5058
+ if ("stringValue" in value && value.stringValue !== void 0) return value.stringValue;
5059
+ if ("intValue" in value && value.intValue !== void 0) return Number(value.intValue);
5060
+ if ("doubleValue" in value && value.doubleValue !== void 0) return value.doubleValue;
5061
+ if ("boolValue" in value && value.boolValue !== void 0) return value.boolValue;
5062
+ if ("arrayValue" in value)
5063
+ return (value.arrayValue?.values ?? []).map((entry) => parseOtlpValue(entry));
5064
+ return void 0;
5065
+ }
5066
+ function durationFromSpan(span) {
5067
+ const start = Number(span.startTimeUnixNano);
5068
+ const end = Number(span.endTimeUnixNano);
5069
+ if (!Number.isFinite(start) || !Number.isFinite(end)) return void 0;
5070
+ return Math.round((end - start) / 1e6);
5071
+ }
5072
+ function stringAttr(value) {
5073
+ return typeof value === "string" ? value : void 0;
5074
+ }
5075
+ function numberAttr(value) {
5076
+ return typeof value === "number" && Number.isFinite(value) ? value : void 0;
5077
+ }
5078
+ function buildDerivedTraceSummary(result) {
5079
+ const toolSpans = (result.spans ?? []).filter((span) => span.type === "tool");
5080
+ const llmSpans = (result.spans ?? []).filter((span) => span.type === "llm");
5081
+ const toolCalls = result.trace?.tool_calls ?? countRawSpanNames(toolSpans);
5082
+ const toolDurations = result.trace?.tool_durations ?? groupRawSpanDurations(toolSpans);
5083
+ const hasSpanData = (result.spans?.length ?? 0) > 0;
5084
+ const eventCount = result.trace?.event_count ?? (hasSpanData ? toolSpans.length : void 0);
5085
+ const llmCallCount = result.trace?.llm_call_count ?? (hasSpanData ? llmSpans.length : void 0);
5086
+ if (!result.trace && !result.spans?.length && result.token_usage === void 0 && result.cost_usd === void 0 && result.duration_ms === void 0) {
5087
+ return void 0;
5088
+ }
5089
+ return {
5090
+ event_count: eventCount,
5091
+ tool_calls: toolCalls,
5092
+ error_count: result.trace?.error_count,
5093
+ tool_durations: toolDurations,
5094
+ llm_call_count: llmCallCount,
5095
+ token_usage: result.trace?.token_usage ?? result.token_usage,
5096
+ cost_usd: result.trace?.cost_usd ?? result.cost_usd,
5097
+ duration_ms: result.trace?.duration_ms ?? result.duration_ms,
5098
+ spans: result.spans
5099
+ };
5100
+ }
5101
+ function countRawSpanNames(spans) {
5102
+ const counts = {};
5103
+ for (const span of spans) {
5104
+ counts[span.name] = (counts[span.name] ?? 0) + 1;
5105
+ }
5106
+ return Object.keys(counts).length > 0 ? counts : void 0;
5107
+ }
5108
+ function groupRawSpanDurations(spans) {
5109
+ const grouped = {};
5110
+ for (const span of spans) {
5111
+ if (span.duration_ms === void 0) continue;
5112
+ const existing = grouped[span.name] ?? [];
5113
+ existing.push(span.duration_ms);
5114
+ grouped[span.name] = existing;
5115
+ }
5116
+ return Object.keys(grouped).length > 0 ? grouped : void 0;
5117
+ }
5118
+ function getTraceSummary(result) {
5119
+ const derived = buildDerivedTraceSummary(result);
5120
+ if (!derived) return void 0;
5121
+ const { spans: _spans, ...trace } = derived;
5122
+ return trace;
5123
+ }
5124
+ function getTraceSpans(result) {
5125
+ return buildDerivedTraceSummary(result)?.spans ?? [];
5126
+ }
5127
+ function toTraceSummary(result) {
5128
+ const rawTrace = getTraceSummary(result);
5129
+ if (!rawTrace) return void 0;
5130
+ return toCamelCaseDeep(rawTrace);
5131
+ }
4638
5132
  function listResultFiles(cwd, limit) {
4639
- const baseDir = path7.join(cwd, ".agentv", "results");
4640
- const rawDir = path7.join(baseDir, "raw");
5133
+ const baseDir = path6.join(cwd, ".agentv", "results");
5134
+ const rawDir = path6.join(baseDir, "raw");
4641
5135
  const files = [];
4642
5136
  try {
4643
5137
  const entries2 = readdirSync2(rawDir, { withFileTypes: true });
4644
5138
  for (const entry of entries2) {
4645
5139
  if (entry.isDirectory()) {
4646
- const jsonlPath = path7.join(rawDir, entry.name, "results.jsonl");
4647
- try {
4648
- statSync2(jsonlPath);
4649
- files.push({ filePath: jsonlPath, displayName: entry.name });
4650
- } catch {
5140
+ const primaryPath = resolveExistingRunPrimaryPath(path6.join(rawDir, entry.name));
5141
+ if (primaryPath) {
5142
+ files.push({ filePath: primaryPath, displayName: entry.name });
4651
5143
  }
4652
5144
  }
4653
5145
  }
4654
5146
  for (const entry of entries2) {
4655
5147
  if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
4656
- files.push({ filePath: path7.join(rawDir, entry.name), displayName: entry.name });
5148
+ files.push({ filePath: path6.join(rawDir, entry.name), displayName: entry.name });
4657
5149
  }
4658
5150
  }
4659
5151
  } catch {
@@ -4661,7 +5153,7 @@ function listResultFiles(cwd, limit) {
4661
5153
  try {
4662
5154
  const entries2 = readdirSync2(baseDir).filter((f) => f.endsWith(".jsonl"));
4663
5155
  for (const entry of entries2) {
4664
- files.push({ filePath: path7.join(baseDir, entry), displayName: entry });
5156
+ files.push({ filePath: path6.join(baseDir, entry), displayName: entry });
4665
5157
  }
4666
5158
  } catch {
4667
5159
  }
@@ -4729,84 +5221,65 @@ function formatScore(score) {
4729
5221
  return `${(score * 100).toFixed(0)}%`;
4730
5222
  }
4731
5223
 
4732
- // src/commands/results/export.ts
4733
- function exportResults(sourceFile, content, outputDir) {
4734
- const results = parseJsonlResults(content);
5224
+ // src/commands/results/shared.ts
5225
+ var sourceArg = positional({
5226
+ type: optional(string),
5227
+ displayName: "source",
5228
+ description: "Result file or workspace directory (defaults to most recent in .agentv/results/)"
5229
+ });
5230
+ async function resolveSourceFile(source, cwd) {
5231
+ let sourceFile;
5232
+ if (source) {
5233
+ sourceFile = resolveResultSourcePath(source, cwd);
5234
+ if (!existsSync2(sourceFile)) {
5235
+ console.error(`Error: File not found: ${sourceFile}`);
5236
+ process.exit(1);
5237
+ }
5238
+ } else {
5239
+ const cache = await loadRunCache(cwd);
5240
+ const cachedFile = cache ? resolveRunCacheFile(cache) : "";
5241
+ if (cachedFile && existsSync2(cachedFile)) {
5242
+ sourceFile = cachedFile;
5243
+ } else {
5244
+ const metas = listResultFiles(cwd, 1);
5245
+ if (metas.length === 0) {
5246
+ console.error("Error: No result files found in .agentv/results/");
5247
+ console.error("Run an evaluation first: agentv eval <eval-file>");
5248
+ process.exit(1);
5249
+ }
5250
+ sourceFile = metas[0].path;
5251
+ }
5252
+ }
5253
+ return { sourceFile };
5254
+ }
5255
+ async function loadResults(source, cwd) {
5256
+ const { sourceFile } = await resolveSourceFile(source, cwd);
5257
+ const results = loadManifestResults(sourceFile);
4735
5258
  if (results.length === 0) {
4736
- throw new Error(`No results found in ${sourceFile}`);
5259
+ console.error(`No results found in ${sourceFile}`);
5260
+ process.exit(1);
4737
5261
  }
4738
- const patched = results.map((r) => {
5262
+ return { results: patchTestIds(results), sourceFile };
5263
+ }
5264
+ function patchTestIds(results) {
5265
+ return results.map((r) => {
4739
5266
  if (!r.testId && r.evalId) {
4740
5267
  return { ...r, testId: String(r.evalId) };
4741
5268
  }
4742
5269
  return r;
4743
5270
  });
4744
- mkdirSync2(outputDir, { recursive: true });
4745
- const benchmark = buildBenchmarkArtifact(patched, sourceFile);
4746
- writeFileSync3(path8.join(outputDir, "benchmark.json"), `${JSON.stringify(benchmark, null, 2)}
4747
- `);
4748
- const timing = buildTimingArtifact(patched);
4749
- writeFileSync3(path8.join(outputDir, "timing.json"), `${JSON.stringify(timing, null, 2)}
4750
- `);
4751
- const aggregateGrading = buildAggregateGradingArtifact(patched);
4752
- writeFileSync3(
4753
- path8.join(outputDir, "grading.json"),
4754
- `${JSON.stringify(aggregateGrading, null, 2)}
4755
- `
4756
- );
4757
- const gradingDir = path8.join(outputDir, "grading");
4758
- mkdirSync2(gradingDir, { recursive: true });
4759
- for (const result of patched) {
4760
- const id = safeTestId(result);
4761
- const grading = buildGradingArtifact(result);
4762
- writeFileSync3(path8.join(gradingDir, `${id}.json`), `${JSON.stringify(grading, null, 2)}
4763
- `);
4764
- }
4765
- const outputsDir = path8.join(outputDir, "outputs");
4766
- mkdirSync2(outputsDir, { recursive: true });
4767
- for (const result of patched) {
4768
- if (result.output && result.output.length > 0) {
4769
- const id = safeTestId(result);
4770
- const md = formatOutputMarkdown(result.output);
4771
- writeFileSync3(path8.join(outputsDir, `${id}.md`), md);
4772
- }
4773
- }
4774
- const inputsDir = path8.join(outputDir, "inputs");
4775
- mkdirSync2(inputsDir, { recursive: true });
4776
- for (const result of patched) {
4777
- const id = safeTestId(result);
4778
- const input = extractInput(result);
4779
- if (input) {
4780
- writeFileSync3(path8.join(inputsDir, `${id}.md`), input);
4781
- }
4782
- }
4783
- }
4784
- function formatOutputMarkdown(output) {
4785
- return output.map((msg) => `@[${msg.role}]:
4786
- ${String(msg.content ?? "")}`).join("\n\n");
4787
- }
4788
- function extractInput(result) {
4789
- const input = result.input;
4790
- if (!input) return null;
4791
- if (typeof input === "string") return input;
4792
- if (Array.isArray(input) && input.length > 0) {
4793
- return formatOutputMarkdown(input);
4794
- }
4795
- return null;
4796
- }
4797
- function safeTestId(result) {
4798
- const raw = result.testId ?? result.evalId ?? "unknown";
4799
- return String(raw).replace(/[/\\:*?"<>|]/g, "_");
4800
5271
  }
5272
+
5273
+ // src/commands/results/export.ts
4801
5274
  function deriveOutputDir(cwd, sourceFile) {
4802
- const parentDir = path8.basename(path8.dirname(sourceFile));
5275
+ const parentDir = path7.basename(path7.dirname(sourceFile));
4803
5276
  if (parentDir.startsWith("eval_")) {
4804
5277
  const dirName2 = parentDir.slice(5);
4805
- return path8.join(cwd, ".agentv", "results", "export", dirName2);
5278
+ return path7.join(cwd, ".agentv", "results", "export", dirName2);
4806
5279
  }
4807
- const basename = path8.basename(sourceFile, ".jsonl");
5280
+ const basename = path7.basename(sourceFile, ".jsonl");
4808
5281
  const dirName = basename.startsWith("eval_") ? basename.slice(5) : basename;
4809
- return path8.join(cwd, ".agentv", "results", "export", dirName);
5282
+ return path7.join(cwd, ".agentv", "results", "export", dirName);
4810
5283
  }
4811
5284
  var resultsExportCommand = command({
4812
5285
  name: "export",
@@ -4833,28 +5306,12 @@ var resultsExportCommand = command({
4833
5306
  handler: async ({ source, out, dir }) => {
4834
5307
  const cwd = dir ?? process.cwd();
4835
5308
  try {
4836
- let sourceFile;
4837
- if (source) {
4838
- sourceFile = path8.isAbsolute(source) ? source : path8.resolve(cwd, source);
4839
- } else {
4840
- const cache = await loadRunCache(cwd);
4841
- const cachedFile = cache ? resolveRunCacheFile(cache) : "";
4842
- if (cachedFile && existsSync2(cachedFile)) {
4843
- sourceFile = cachedFile;
4844
- } else {
4845
- const metas = listResultFiles(cwd, 1);
4846
- if (metas.length === 0) {
4847
- console.error("Error: No result files found in .agentv/results/");
4848
- console.error("Run an evaluation first: agentv eval <eval-file>");
4849
- process.exit(1);
4850
- }
4851
- sourceFile = metas[0].path;
4852
- }
4853
- }
4854
- const content = readFileSync6(sourceFile, "utf8");
4855
- const outputDir = out ? path8.isAbsolute(out) ? out : path8.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
4856
- exportResults(sourceFile, content, outputDir);
4857
- const results = parseJsonlResults(content);
5309
+ const { sourceFile } = await resolveSourceFile(source, cwd);
5310
+ const { results } = await loadResults(source, cwd);
5311
+ const outputDir = out ? path7.isAbsolute(out) ? out : path7.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
5312
+ await writeArtifactsFromResults(results, outputDir, {
5313
+ evalFile: sourceFile
5314
+ });
4858
5315
  console.log(`Exported ${results.length} test(s) to ${outputDir}`);
4859
5316
  for (const result of results) {
4860
5317
  const id = result.testId ?? result.evalId ?? "unknown";
@@ -4867,58 +5324,6 @@ var resultsExportCommand = command({
4867
5324
  }
4868
5325
  });
4869
5326
 
4870
- // src/commands/results/shared.ts
4871
- import { existsSync as existsSync3, readFileSync as readFileSync7 } from "node:fs";
4872
- import path9 from "node:path";
4873
- var sourceArg = positional({
4874
- type: optional(string),
4875
- displayName: "source",
4876
- description: "JSONL result file (defaults to most recent in .agentv/results/)"
4877
- });
4878
- async function resolveSourceFile(source, cwd) {
4879
- let sourceFile;
4880
- if (source) {
4881
- sourceFile = path9.isAbsolute(source) ? source : path9.resolve(cwd, source);
4882
- if (!existsSync3(sourceFile)) {
4883
- console.error(`Error: File not found: ${sourceFile}`);
4884
- process.exit(1);
4885
- }
4886
- } else {
4887
- const cache = await loadRunCache(cwd);
4888
- const cachedFile = cache ? resolveRunCacheFile(cache) : "";
4889
- if (cachedFile && existsSync3(cachedFile)) {
4890
- sourceFile = cachedFile;
4891
- } else {
4892
- const metas = listResultFiles(cwd, 1);
4893
- if (metas.length === 0) {
4894
- console.error("Error: No result files found in .agentv/results/");
4895
- console.error("Run an evaluation first: agentv eval <eval-file>");
4896
- process.exit(1);
4897
- }
4898
- sourceFile = metas[0].path;
4899
- }
4900
- }
4901
- const content = readFileSync7(sourceFile, "utf8");
4902
- return { sourceFile, content };
4903
- }
4904
- async function loadResults(source, cwd) {
4905
- const { sourceFile, content } = await resolveSourceFile(source, cwd);
4906
- const results = parseJsonlResults(content);
4907
- if (results.length === 0) {
4908
- console.error(`No results found in ${sourceFile}`);
4909
- process.exit(1);
4910
- }
4911
- return { results: patchTestIds(results), sourceFile };
4912
- }
4913
- function patchTestIds(results) {
4914
- return results.map((r) => {
4915
- if (!r.testId && r.evalId) {
4916
- return { ...r, testId: String(r.evalId) };
4917
- }
4918
- return r;
4919
- });
4920
- }
4921
-
4922
5327
  // src/commands/results/failures.ts
4923
5328
  function formatFailures(results) {
4924
5329
  return results.filter((r) => r.score < 1).map((r) => {
@@ -5045,7 +5450,7 @@ var resultsShowCommand = command({
5045
5450
  });
5046
5451
 
5047
5452
  // src/commands/results/summary.ts
5048
- import { existsSync as existsSync4, readFileSync as readFileSync8 } from "node:fs";
5453
+ import { existsSync as existsSync3, readFileSync as readFileSync5 } from "node:fs";
5049
5454
  function formatSummary(results, grading) {
5050
5455
  const total = results.length;
5051
5456
  let passed;
@@ -5096,9 +5501,9 @@ var resultsSummaryCommand = command({
5096
5501
  const { results, sourceFile } = await loadResults(source, cwd);
5097
5502
  let grading;
5098
5503
  const gradingPath = sourceFile.replace(/\.jsonl$/, ".grading.json");
5099
- if (existsSync4(gradingPath)) {
5504
+ if (existsSync3(gradingPath)) {
5100
5505
  try {
5101
- grading = JSON.parse(readFileSync8(gradingPath, "utf8"));
5506
+ grading = JSON.parse(readFileSync5(gradingPath, "utf8"));
5102
5507
  } catch {
5103
5508
  }
5104
5509
  }
@@ -5123,68 +5528,26 @@ var resultsCommand = subcommands({
5123
5528
  });
5124
5529
 
5125
5530
  // src/commands/results/serve.ts
5126
- import { existsSync as existsSync5, readFileSync as readFileSync9, writeFileSync as writeFileSync4 } from "node:fs";
5127
- import path10 from "node:path";
5531
+ import { existsSync as existsSync4, readFileSync as readFileSync6, writeFileSync as writeFileSync3 } from "node:fs";
5532
+ import path8 from "node:path";
5128
5533
  import { Hono } from "hono";
5129
- async function resolveSourceFile2(source, cwd) {
5130
- if (source) {
5131
- const resolved = path10.isAbsolute(source) ? source : path10.resolve(cwd, source);
5132
- if (!existsSync5(resolved)) {
5133
- throw new Error(`Source file not found: ${resolved}`);
5134
- }
5135
- return resolved;
5136
- }
5137
- const cache = await loadRunCache(cwd);
5138
- const cachedFile = cache ? resolveRunCacheFile(cache) : "";
5139
- if (cachedFile && existsSync5(cachedFile)) {
5140
- return cachedFile;
5141
- }
5142
- const metas = listResultFiles(cwd, 10);
5143
- if (metas.length === 0) {
5144
- throw new Error(
5145
- "No result files found in .agentv/results/\nRun an evaluation first: agentv eval <eval-file>"
5146
- );
5147
- }
5148
- if (metas.length > 1) {
5149
- console.log("Available result files:");
5150
- for (const m of metas) {
5151
- console.log(` ${m.path}`);
5152
- }
5153
- console.log(`
5154
- Serving most recent: ${metas[0].path}
5155
- `);
5156
- }
5157
- return metas[0].path;
5158
- }
5159
- function loadResults2(content) {
5160
- const results = parseJsonlResults(content);
5161
- if (results.length === 0) {
5162
- throw new Error("No valid results found in JSONL content");
5163
- }
5164
- return results.map((r) => {
5165
- if (!r.testId && r.evalId) {
5166
- return { ...r, testId: String(r.evalId) };
5167
- }
5168
- return r;
5169
- });
5170
- }
5171
5534
  function feedbackPath(cwd) {
5172
- return path10.join(cwd, "feedback.json");
5535
+ return path8.join(cwd, "feedback.json");
5173
5536
  }
5174
5537
  function readFeedback(cwd) {
5175
5538
  const fp = feedbackPath(cwd);
5176
- if (!existsSync5(fp)) {
5539
+ if (!existsSync4(fp)) {
5177
5540
  return { reviews: [] };
5178
5541
  }
5179
5542
  try {
5180
- return JSON.parse(readFileSync9(fp, "utf8"));
5543
+ return JSON.parse(readFileSync6(fp, "utf8"));
5181
5544
  } catch (err2) {
5182
5545
  console.error(`Warning: could not parse ${fp}, starting fresh: ${err2.message}`);
5183
5546
  return { reviews: [] };
5184
5547
  }
5185
5548
  }
5186
5549
  function writeFeedback(cwd, data) {
5187
- writeFileSync4(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
5550
+ writeFileSync3(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
5188
5551
  `, "utf8");
5189
5552
  }
5190
5553
  function createApp(results, cwd) {
@@ -5854,9 +6217,7 @@ var resultsServeCommand = command({
5854
6217
  const cwd = dir ?? process.cwd();
5855
6218
  const listenPort = port ?? 3117;
5856
6219
  try {
5857
- const sourceFile = await resolveSourceFile2(source, cwd);
5858
- const content = readFileSync9(sourceFile, "utf8");
5859
- const results = loadResults2(content);
6220
+ const { results, sourceFile } = await loadResults(source, cwd);
5860
6221
  const app2 = createApp(results, cwd);
5861
6222
  console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
5862
6223
  console.log(`Dashboard: http://localhost:${listenPort}`);
@@ -5889,7 +6250,7 @@ function detectPackageManager() {
5889
6250
  return detectPackageManagerFromPath(process.argv[1] ?? "");
5890
6251
  }
5891
6252
  function runCommand(cmd, args) {
5892
- return new Promise((resolve, reject) => {
6253
+ return new Promise((resolve2, reject) => {
5893
6254
  const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
5894
6255
  let stdout = "";
5895
6256
  child.stdout?.on("data", (data) => {
@@ -5897,7 +6258,7 @@ function runCommand(cmd, args) {
5897
6258
  stdout += data.toString();
5898
6259
  });
5899
6260
  child.on("error", reject);
5900
- child.on("close", (code) => resolve({ exitCode: code ?? 1, stdout }));
6261
+ child.on("close", (code) => resolve2({ exitCode: code ?? 1, stdout }));
5901
6262
  });
5902
6263
  }
5903
6264
  var updateCommand = command({
@@ -6109,10 +6470,6 @@ function parseAssertSpec(spec) {
6109
6470
  );
6110
6471
  }
6111
6472
  }
6112
- function toTraceSummary(raw) {
6113
- if (!raw.trace) return void 0;
6114
- return toCamelCaseDeep(raw.trace);
6115
- }
6116
6473
  function extractCandidate(raw) {
6117
6474
  if (raw.output !== void 0)
6118
6475
  return typeof raw.output === "string" ? raw.output : JSON.stringify(raw.output);
@@ -6224,8 +6581,8 @@ var traceScoreCommand = command({
6224
6581
  args: {
6225
6582
  file: positional({
6226
6583
  type: string,
6227
- displayName: "result-file",
6228
- description: "Path to JSONL result file"
6584
+ displayName: "trace-source",
6585
+ description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
6229
6586
  }),
6230
6587
  assert: option({
6231
6588
  type: string,
@@ -6271,11 +6628,11 @@ var traceScoreCommand = command({
6271
6628
  );
6272
6629
  if (traceRequired) {
6273
6630
  const hasTrace = results.some(
6274
- (r) => r.trace || r.cost_usd !== void 0 || r.duration_ms !== void 0 || r.token_usage !== void 0
6631
+ (r) => toTraceSummary(r) || r.cost_usd !== void 0 || r.duration_ms !== void 0 || r.token_usage !== void 0
6275
6632
  );
6276
6633
  if (!hasTrace) {
6277
6634
  console.error(
6278
- `${c2.red}Error:${c2.reset} Result file lacks trace data. Re-run eval with ${c2.bold}--trace${c2.reset} to capture trace summaries.`
6635
+ `${c2.red}Error:${c2.reset} Source lacks trace metrics. Use an OTLP trace export via ${c2.bold}--otel-file${c2.reset} or a run manifest with summary metrics in ${c2.bold}index.jsonl${c2.reset}.`
6279
6636
  );
6280
6637
  process.exit(1);
6281
6638
  }
@@ -6308,7 +6665,7 @@ var traceScoreCommand = command({
6308
6665
 
6309
6666
  // src/commands/trace/show.ts
6310
6667
  function renderFlatTrace(result) {
6311
- const trace = result.trace;
6668
+ const trace = getTraceSummary(result);
6312
6669
  const parts = [];
6313
6670
  if (trace?.tool_calls && Object.keys(trace.tool_calls).length > 0) {
6314
6671
  const toolParts = Object.entries(trace.tool_calls).map(([name, count]) => {
@@ -6339,8 +6696,12 @@ function renderScores(scores) {
6339
6696
  }
6340
6697
  function renderTree(result) {
6341
6698
  const messages = result.output;
6699
+ const spans = getTraceSpans(result);
6342
6700
  if (!messages || messages.length === 0) {
6343
- if (result.trace || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
6701
+ if (spans.length > 0) {
6702
+ return renderSpanTree(result, spans);
6703
+ }
6704
+ if (getTraceSummary(result) || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
6344
6705
  return renderFlatTrace(result);
6345
6706
  }
6346
6707
  return `${c2.dim}No trace data available${c2.reset}`;
@@ -6406,6 +6767,30 @@ function renderTree(result) {
6406
6767
  }
6407
6768
  return lines.join("\n");
6408
6769
  }
6770
+ function renderSpanTree(result, spans) {
6771
+ const lines = [];
6772
+ const testId = result.test_id ?? result.eval_id ?? "unknown";
6773
+ const totalTokens = result.token_usage ? result.token_usage.input + result.token_usage.output : void 0;
6774
+ const rootParts = [testId];
6775
+ if (result.duration_ms !== void 0) rootParts.push(formatDuration(result.duration_ms));
6776
+ if (totalTokens !== void 0) rootParts.push(`${formatNumber(totalTokens)} tok`);
6777
+ if (result.cost_usd !== void 0) rootParts.push(formatCost(result.cost_usd));
6778
+ lines.push(`${c2.bold}${rootParts.join(", ")}${c2.reset}`);
6779
+ spans.forEach((span, index) => {
6780
+ const connector = index === spans.length - 1 ? "\u2514\u2500" : "\u251C\u2500";
6781
+ const color = span.type === "llm" ? c2.cyan : c2.yellow;
6782
+ const parts = [`${color}${span.name}${c2.reset}`];
6783
+ if (span.duration_ms !== void 0) {
6784
+ parts.push(formatDuration(span.duration_ms));
6785
+ }
6786
+ lines.push(`${connector} ${parts.join(", ")}`);
6787
+ });
6788
+ if (result.scores && result.scores.length > 0) {
6789
+ lines.push("");
6790
+ lines.push(`${c2.dim}Scores:${c2.reset} ${renderScores(result.scores)}`);
6791
+ }
6792
+ return lines.join("\n");
6793
+ }
6409
6794
  function formatResultDetail(result, index, tree) {
6410
6795
  const lines = [];
6411
6796
  const testId = result.test_id ?? result.eval_id ?? `result-${index}`;
@@ -6489,8 +6874,8 @@ var traceShowCommand = command({
6489
6874
  args: {
6490
6875
  file: positional({
6491
6876
  type: string,
6492
- displayName: "result-file",
6493
- description: "Path to JSONL result file"
6877
+ displayName: "trace-source",
6878
+ description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
6494
6879
  }),
6495
6880
  testId: option({
6496
6881
  type: optional(string),
@@ -6499,7 +6884,7 @@ var traceShowCommand = command({
6499
6884
  }),
6500
6885
  tree: flag({
6501
6886
  long: "tree",
6502
- description: "Show hierarchical trace tree (requires results with --trace output)"
6887
+ description: "Show hierarchical trace tree from output messages or exported trace spans"
6503
6888
  }),
6504
6889
  format: option({
6505
6890
  type: optional(oneOf(["table", "json"])),
@@ -6570,11 +6955,11 @@ function collectMetrics(results) {
6570
6955
  formatter: (n) => formatNumber(Math.round(n))
6571
6956
  });
6572
6957
  }
6573
- const toolCalls = results.map((r) => r.trace?.event_count).filter((v) => v !== void 0);
6958
+ const toolCalls = results.map((r) => getTraceSummary(r)?.event_count).filter((v) => v !== void 0);
6574
6959
  if (toolCalls.length > 0) {
6575
6960
  rows.push({ name: "tool_calls", values: toolCalls, formatter: (n) => String(Math.round(n)) });
6576
6961
  }
6577
- const llmCalls = results.map((r) => r.trace?.llm_call_count).filter((v) => v !== void 0);
6962
+ const llmCalls = results.map((r) => getTraceSummary(r)?.llm_call_count).filter((v) => v !== void 0);
6578
6963
  if (llmCalls.length > 0) {
6579
6964
  rows.push({ name: "llm_calls", values: llmCalls, formatter: (n) => String(Math.round(n)) });
6580
6965
  }
@@ -6668,8 +7053,8 @@ var traceStatsCommand = command({
6668
7053
  args: {
6669
7054
  file: positional({
6670
7055
  type: string,
6671
- displayName: "result-file",
6672
- description: "Path to JSONL result file"
7056
+ displayName: "trace-source",
7057
+ description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
6673
7058
  }),
6674
7059
  groupBy: option({
6675
7060
  type: optional(oneOf(["target", "eval-set", "test-id"])),
@@ -6719,8 +7104,8 @@ var traceCommand = subcommands({
6719
7104
  });
6720
7105
 
6721
7106
  // src/commands/transpile/index.ts
6722
- import { writeFileSync as writeFileSync5 } from "node:fs";
6723
- import path11 from "node:path";
7107
+ import { writeFileSync as writeFileSync4 } from "node:fs";
7108
+ import path9 from "node:path";
6724
7109
  var transpileCommand = command({
6725
7110
  name: "transpile",
6726
7111
  description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
@@ -6744,7 +7129,7 @@ var transpileCommand = command({
6744
7129
  handler: async ({ input, outDir, stdout }) => {
6745
7130
  let result;
6746
7131
  try {
6747
- result = transpileEvalYamlFile(path11.resolve(input));
7132
+ result = transpileEvalYamlFile(path9.resolve(input));
6748
7133
  } catch (error) {
6749
7134
  console.error(`Error: ${error.message}`);
6750
7135
  process.exit(1);
@@ -6768,12 +7153,12 @@ var transpileCommand = command({
6768
7153
  process.stdout.write("\n");
6769
7154
  return;
6770
7155
  }
6771
- const outputDir = outDir ? path11.resolve(outDir) : path11.dirname(path11.resolve(input));
7156
+ const outputDir = outDir ? path9.resolve(outDir) : path9.dirname(path9.resolve(input));
6772
7157
  const fileNames = getOutputFilenames(result);
6773
7158
  for (const [skill, evalsJson] of result.files) {
6774
7159
  const fileName = fileNames.get(skill) ?? "evals.json";
6775
- const outputPath = path11.join(outputDir, fileName);
6776
- writeFileSync5(outputPath, `${JSON.stringify(evalsJson, null, 2)}
7160
+ const outputPath = path9.join(outputDir, fileName);
7161
+ writeFileSync4(outputPath, `${JSON.stringify(evalsJson, null, 2)}
6777
7162
  `);
6778
7163
  console.log(`Transpiled to ${outputPath}`);
6779
7164
  }
@@ -6781,7 +7166,7 @@ var transpileCommand = command({
6781
7166
  });
6782
7167
 
6783
7168
  // src/commands/trim/index.ts
6784
- import { readFileSync as readFileSync10, writeFileSync as writeFileSync6 } from "node:fs";
7169
+ import { readFileSync as readFileSync7, writeFileSync as writeFileSync5 } from "node:fs";
6785
7170
  var trimCommand = command({
6786
7171
  name: "trim",
6787
7172
  description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
@@ -6800,7 +7185,7 @@ var trimCommand = command({
6800
7185
  },
6801
7186
  handler: async ({ input, out }) => {
6802
7187
  try {
6803
- const content = readFileSync10(input, "utf8");
7188
+ const content = readFileSync7(input, "utf8");
6804
7189
  const lines = content.trim().split("\n").filter((line) => line.trim());
6805
7190
  const trimmedLines = lines.map((line) => {
6806
7191
  const record = JSON.parse(line);
@@ -6812,7 +7197,7 @@ var trimCommand = command({
6812
7197
  const output = `${trimmedLines.join("\n")}
6813
7198
  `;
6814
7199
  if (out) {
6815
- writeFileSync6(out, output, "utf8");
7200
+ writeFileSync5(out, output, "utf8");
6816
7201
  console.error(`Trimmed ${lines.length} record(s) \u2192 ${out}`);
6817
7202
  } else {
6818
7203
  process.stdout.write(output);
@@ -6906,8 +7291,8 @@ function isTTY() {
6906
7291
 
6907
7292
  // src/commands/validate/validate-files.ts
6908
7293
  import { constants } from "node:fs";
6909
- import { access, readdir, stat } from "node:fs/promises";
6910
- import path12 from "node:path";
7294
+ import { access, readdir as readdir3, stat } from "node:fs/promises";
7295
+ import path10 from "node:path";
6911
7296
  async function validateFiles(paths) {
6912
7297
  const filePaths = await expandPaths(paths);
6913
7298
  const results = [];
@@ -6925,7 +7310,7 @@ async function validateFiles(paths) {
6925
7310
  };
6926
7311
  }
6927
7312
  async function validateSingleFile(filePath) {
6928
- const absolutePath = path12.resolve(filePath);
7313
+ const absolutePath = path10.resolve(filePath);
6929
7314
  const fileType = await detectFileType(absolutePath);
6930
7315
  let result;
6931
7316
  if (fileType === "eval") {
@@ -6950,7 +7335,7 @@ async function validateSingleFile(filePath) {
6950
7335
  async function expandPaths(paths) {
6951
7336
  const expanded = [];
6952
7337
  for (const inputPath of paths) {
6953
- const absolutePath = path12.resolve(inputPath);
7338
+ const absolutePath = path10.resolve(inputPath);
6954
7339
  try {
6955
7340
  await access(absolutePath, constants.F_OK);
6956
7341
  } catch {
@@ -6972,9 +7357,9 @@ async function expandPaths(paths) {
6972
7357
  async function findYamlFiles(dirPath) {
6973
7358
  const results = [];
6974
7359
  try {
6975
- const entries2 = await readdir(dirPath, { withFileTypes: true });
7360
+ const entries2 = await readdir3(dirPath, { withFileTypes: true });
6976
7361
  for (const entry of entries2) {
6977
- const fullPath = path12.join(dirPath, entry.name);
7362
+ const fullPath = path10.join(dirPath, entry.name);
6978
7363
  if (entry.isDirectory()) {
6979
7364
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
6980
7365
  continue;
@@ -6991,7 +7376,7 @@ async function findYamlFiles(dirPath) {
6991
7376
  return results;
6992
7377
  }
6993
7378
  function isYamlFile(filePath) {
6994
- const ext = path12.extname(filePath).toLowerCase();
7379
+ const ext = path10.extname(filePath).toLowerCase();
6995
7380
  return ext === ".yaml" || ext === ".yml";
6996
7381
  }
6997
7382
 
@@ -7029,14 +7414,14 @@ var validateCommand = command({
7029
7414
  });
7030
7415
 
7031
7416
  // src/commands/workspace/clean.ts
7032
- import { existsSync as existsSync6 } from "node:fs";
7033
- import { readFile as readFile2, readdir as readdir2, rm } from "node:fs/promises";
7034
- import path13 from "node:path";
7417
+ import { existsSync as existsSync5 } from "node:fs";
7418
+ import { readFile as readFile4, readdir as readdir4, rm } from "node:fs/promises";
7419
+ import path11 from "node:path";
7035
7420
  async function confirm(message) {
7036
7421
  const readline2 = await import("node:readline");
7037
7422
  const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
7038
- const answer = await new Promise((resolve) => {
7039
- rl.question(`${message} [y/N] `, resolve);
7423
+ const answer = await new Promise((resolve2) => {
7424
+ rl.question(`${message} [y/N] `, resolve2);
7040
7425
  });
7041
7426
  rl.close();
7042
7427
  return answer.toLowerCase() === "y";
@@ -7058,19 +7443,19 @@ var cleanCommand = command({
7058
7443
  },
7059
7444
  handler: async ({ repo, force }) => {
7060
7445
  const poolRoot = getWorkspacePoolRoot();
7061
- if (!existsSync6(poolRoot)) {
7446
+ if (!existsSync5(poolRoot)) {
7062
7447
  console.log("No workspace pool entries found.");
7063
7448
  return;
7064
7449
  }
7065
7450
  if (repo) {
7066
- const entries2 = await readdir2(poolRoot, { withFileTypes: true });
7451
+ const entries2 = await readdir4(poolRoot, { withFileTypes: true });
7067
7452
  const poolDirs = entries2.filter((e) => e.isDirectory());
7068
7453
  const matchingDirs = [];
7069
7454
  for (const dir of poolDirs) {
7070
- const poolDir = path13.join(poolRoot, dir.name);
7071
- const metadataPath = path13.join(poolDir, "metadata.json");
7455
+ const poolDir = path11.join(poolRoot, dir.name);
7456
+ const metadataPath = path11.join(poolDir, "metadata.json");
7072
7457
  try {
7073
- const raw = await readFile2(metadataPath, "utf-8");
7458
+ const raw = await readFile4(metadataPath, "utf-8");
7074
7459
  const metadata = JSON.parse(raw);
7075
7460
  const hasRepo = metadata.repos?.some((r) => {
7076
7461
  if (r.source.type === "git" && r.source.url) {
@@ -7099,7 +7484,7 @@ var cleanCommand = command({
7099
7484
  }
7100
7485
  for (const dir of matchingDirs) {
7101
7486
  await rm(dir, { recursive: true, force: true });
7102
- console.log(`Removed: ${path13.basename(dir).slice(0, 12)}...`);
7487
+ console.log(`Removed: ${path11.basename(dir).slice(0, 12)}...`);
7103
7488
  }
7104
7489
  console.log("Done.");
7105
7490
  } else {
@@ -7117,15 +7502,15 @@ var cleanCommand = command({
7117
7502
  });
7118
7503
 
7119
7504
  // src/commands/workspace/list.ts
7120
- import { existsSync as existsSync7 } from "node:fs";
7121
- import { readFile as readFile3, readdir as readdir3, stat as stat2 } from "node:fs/promises";
7122
- import path14 from "node:path";
7505
+ import { existsSync as existsSync6 } from "node:fs";
7506
+ import { readFile as readFile5, readdir as readdir5, stat as stat2 } from "node:fs/promises";
7507
+ import path12 from "node:path";
7123
7508
  async function getDirectorySize(dirPath) {
7124
7509
  let totalSize = 0;
7125
7510
  try {
7126
- const entries2 = await readdir3(dirPath, { withFileTypes: true });
7511
+ const entries2 = await readdir5(dirPath, { withFileTypes: true });
7127
7512
  for (const entry of entries2) {
7128
- const fullPath = path14.join(dirPath, entry.name);
7513
+ const fullPath = path12.join(dirPath, entry.name);
7129
7514
  if (entry.isDirectory()) {
7130
7515
  totalSize += await getDirectorySize(fullPath);
7131
7516
  } else {
@@ -7149,25 +7534,25 @@ var listCommand = command({
7149
7534
  args: {},
7150
7535
  handler: async () => {
7151
7536
  const poolRoot = getWorkspacePoolRoot();
7152
- if (!existsSync7(poolRoot)) {
7537
+ if (!existsSync6(poolRoot)) {
7153
7538
  console.log("No workspace pool entries found.");
7154
7539
  return;
7155
7540
  }
7156
- const entries2 = await readdir3(poolRoot, { withFileTypes: true });
7541
+ const entries2 = await readdir5(poolRoot, { withFileTypes: true });
7157
7542
  const poolDirs = entries2.filter((e) => e.isDirectory());
7158
7543
  if (poolDirs.length === 0) {
7159
7544
  console.log("No workspace pool entries found.");
7160
7545
  return;
7161
7546
  }
7162
7547
  for (const dir of poolDirs) {
7163
- const poolDir = path14.join(poolRoot, dir.name);
7548
+ const poolDir = path12.join(poolRoot, dir.name);
7164
7549
  const fingerprint = dir.name;
7165
- const poolEntries = await readdir3(poolDir, { withFileTypes: true });
7550
+ const poolEntries = await readdir5(poolDir, { withFileTypes: true });
7166
7551
  const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
7167
- const metadataPath = path14.join(poolDir, "metadata.json");
7552
+ const metadataPath = path12.join(poolDir, "metadata.json");
7168
7553
  let metadata = null;
7169
7554
  try {
7170
- const raw = await readFile3(metadataPath, "utf-8");
7555
+ const raw = await readFile5(metadataPath, "utf-8");
7171
7556
  metadata = JSON.parse(raw);
7172
7557
  } catch {
7173
7558
  }
@@ -7204,16 +7589,16 @@ var workspaceCommand = subcommands({
7204
7589
 
7205
7590
  // src/update-check.ts
7206
7591
  import { spawn as spawn2 } from "node:child_process";
7207
- import { readFile as readFile4 } from "node:fs/promises";
7208
- import { join } from "node:path";
7592
+ import { readFile as readFile6 } from "node:fs/promises";
7593
+ import { join as join4 } from "node:path";
7209
7594
  var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
7210
7595
  var AGENTV_DIR = getAgentvHome();
7211
7596
  var CACHE_FILE = "version-check.json";
7212
7597
  var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
7213
- async function getCachedUpdateInfo(path15) {
7214
- const filePath = path15 ?? join(AGENTV_DIR, CACHE_FILE);
7598
+ async function getCachedUpdateInfo(path13) {
7599
+ const filePath = path13 ?? join4(AGENTV_DIR, CACHE_FILE);
7215
7600
  try {
7216
- const raw = await readFile4(filePath, "utf-8");
7601
+ const raw = await readFile6(filePath, "utf-8");
7217
7602
  const data = JSON.parse(raw);
7218
7603
  if (typeof data.latestVersion === "string" && typeof data.lastCheckedAt === "string") {
7219
7604
  return data;
@@ -7245,7 +7630,7 @@ function buildNotice(currentVersion, latestVersion) {
7245
7630
  }
7246
7631
  function backgroundUpdateCheck() {
7247
7632
  const dir = AGENTV_DIR;
7248
- const filePath = join(dir, CACHE_FILE);
7633
+ const filePath = join4(dir, CACHE_FILE);
7249
7634
  const script = `
7250
7635
  const https = require('https');
7251
7636
  const fs = require('fs');
@@ -7299,8 +7684,8 @@ var app = subcommands({
7299
7684
  compare: compareCommand,
7300
7685
  convert: convertCommand,
7301
7686
  create: createCommand,
7302
- generate: generateCommand,
7303
7687
  init: initCmdTsCommand,
7688
+ pipeline: pipelineCommand,
7304
7689
  results: resultsCommand,
7305
7690
  self: selfCommand,
7306
7691
  serve: resultsServeCommand,
@@ -7317,8 +7702,8 @@ var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
7317
7702
  "compare",
7318
7703
  "convert",
7319
7704
  "create",
7320
- "generate",
7321
7705
  "init",
7706
+ "pipeline",
7322
7707
  "results",
7323
7708
  "self",
7324
7709
  "serve",
@@ -7368,4 +7753,4 @@ export {
7368
7753
  preprocessArgv,
7369
7754
  runCli
7370
7755
  };
7371
- //# sourceMappingURL=chunk-V2S5CZU3.js.map
7756
+ //# sourceMappingURL=chunk-DJU4C6NS.js.map