npm - agentv - Versions diffs - 3.11.1 → 3.13.0 - Mend

agentv 3.11.1 → 3.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

package/README.md +15 -12
package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} +1 -2
package/dist/{chunk-CKMAM2GD.js → chunk-6H4IAXQH.js} +435 -198
package/dist/chunk-6H4IAXQH.js.map +1 -0
package/dist/{chunk-OYD2NB55.js → chunk-7OHZAFND.js} +120 -29
package/dist/chunk-7OHZAFND.js.map +1 -0
package/dist/{chunk-V2S5CZU3.js → chunk-DJU4C6NS.js} +914 -529
package/dist/chunk-DJU4C6NS.js.map +1 -0
package/dist/{chunk-BAUNAXHT.js → chunk-XOSNETAV.js} +1 -1
package/dist/cli.js +4 -6
package/dist/cli.js.map +1 -1
package/dist/{dist-VUPMLHIV.js → dist-SMKOBBFB.js} +3 -8
package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js → esm-5Q4BZALM-5REQWAUV.js} +2 -3
package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js.map → esm-5Q4BZALM-5REQWAUV.js.map} +1 -1
package/dist/{esm-UYZ3HJBU.js → esm-CZAWIY6F.js} +2 -2
package/dist/index.js +4 -6
package/dist/{interactive-FZJANO4A.js → interactive-RV664PCR.js} +4 -6
package/dist/{interactive-FZJANO4A.js.map → interactive-RV664PCR.js.map} +1 -1
package/dist/{otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js} +1 -2
package/dist/{src-PXDA7QIS.js → src-ML4D2MC2.js} +2 -2
package/package.json +1 -1
package/dist/chunk-2QFWRIYL.js +0 -186
package/dist/chunk-2QFWRIYL.js.map +0 -1
package/dist/chunk-2RMPO6LY.js +0 -747
package/dist/chunk-2RMPO6LY.js.map +0 -1
package/dist/chunk-3Q7WIXT4.js +0 -4846
package/dist/chunk-3Q7WIXT4.js.map +0 -1
package/dist/chunk-73O2DCJP.js +0 -1274
package/dist/chunk-73O2DCJP.js.map +0 -1
package/dist/chunk-AUKF3Y3W.js +0 -212
package/dist/chunk-AUKF3Y3W.js.map +0 -1
package/dist/chunk-BRH7SIDP.js +0 -133
package/dist/chunk-BRH7SIDP.js.map +0 -1
package/dist/chunk-BXM4I3BM.js +0 -526
package/dist/chunk-BXM4I3BM.js.map +0 -1
package/dist/chunk-CKMAM2GD.js.map +0 -1
package/dist/chunk-FHTURHTY.js +0 -546
package/dist/chunk-FHTURHTY.js.map +0 -1
package/dist/chunk-GJFXQQWG.js +0 -21
package/dist/chunk-GJFXQQWG.js.map +0 -1
package/dist/chunk-HKMLG4KF.js +0 -38
package/dist/chunk-HKMLG4KF.js.map +0 -1
package/dist/chunk-JGU3PVA4.js +0 -133
package/dist/chunk-JGU3PVA4.js.map +0 -1
package/dist/chunk-JK6V4KVD.js +0 -114
package/dist/chunk-JK6V4KVD.js.map +0 -1
package/dist/chunk-LHU5FGVZ.js +0 -4804
package/dist/chunk-LHU5FGVZ.js.map +0 -1
package/dist/chunk-OL2WGI6E.js +0 -149
package/dist/chunk-OL2WGI6E.js.map +0 -1
package/dist/chunk-ONETZL6N.js +0 -15
package/dist/chunk-ONETZL6N.js.map +0 -1
package/dist/chunk-OYD2NB55.js.map +0 -1
package/dist/chunk-QV4UGEN6.js +0 -320
package/dist/chunk-QV4UGEN6.js.map +0 -1
package/dist/chunk-QXLDKGF3.js +0 -46
package/dist/chunk-QXLDKGF3.js.map +0 -1
package/dist/chunk-U6VEM66A.js +0 -63
package/dist/chunk-U6VEM66A.js.map +0 -1
package/dist/chunk-UALXHIMX.js +0 -48
package/dist/chunk-UALXHIMX.js.map +0 -1
package/dist/chunk-UGXG73VF.js +0 -55
package/dist/chunk-UGXG73VF.js.map +0 -1
package/dist/chunk-UHP5KEDL.js +0 -38
package/dist/chunk-UHP5KEDL.js.map +0 -1
package/dist/chunk-V2S5CZU3.js.map +0 -1
package/dist/chunk-WVSXFZWP.js +0 -204
package/dist/chunk-WVSXFZWP.js.map +0 -1
package/dist/chunk-XSUMCWKO.js +0 -30
package/dist/chunk-XSUMCWKO.js.map +0 -1
package/dist/chunk-XUO7ZEHU.js +0 -181
package/dist/chunk-XUO7ZEHU.js.map +0 -1
package/dist/chunk-YSGUX5JT.js +0 -1002
package/dist/chunk-YSGUX5JT.js.map +0 -1
package/dist/dist-3PCP5TNF-RYMVLILE.js +0 -25785
package/dist/dist-3PCP5TNF-RYMVLILE.js.map +0 -1
package/dist/dist-BOIN5LC5-T5UWUK43.js +0 -76113
package/dist/dist-BOIN5LC5-T5UWUK43.js.map +0 -1
package/dist/dist-LXPDQOBI-4V5J2WDS.js +0 -13
package/dist/dist-LXPDQOBI-4V5J2WDS.js.map +0 -1
package/dist/dist-es-4WSJUIYR-XKIX65IH.js +0 -69
package/dist/dist-es-4WSJUIYR-XKIX65IH.js.map +0 -1
package/dist/dist-es-7K7MKRME-CCMAZOQC.js +0 -355
package/dist/dist-es-7K7MKRME-CCMAZOQC.js.map +0 -1
package/dist/dist-es-B2RTOKRI-VWZHK5RE.js +0 -191
package/dist/dist-es-B2RTOKRI-VWZHK5RE.js.map +0 -1
package/dist/dist-es-HHZ4FAXA-CRERHWKB.js +0 -164
package/dist/dist-es-HHZ4FAXA-CRERHWKB.js.map +0 -1
package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js +0 -355
package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js.map +0 -1
package/dist/dist-es-L6R4FPI5-IKIRYN45.js +0 -472
package/dist/dist-es-L6R4FPI5-IKIRYN45.js.map +0 -1
package/dist/dist-es-SRVEB5QV-Q4CTC2HX.js +0 -24
package/dist/dist-es-TRIVUKV4-2J47CDXR.js +0 -85
package/dist/dist-es-TRIVUKV4-2J47CDXR.js.map +0 -1
package/dist/dist-es-UEEUAV34-IZQDTAMW.js +0 -16
package/dist/esm-UYZ3HJBU.js.map +0 -1
package/dist/event-streams-NZADSH5J-6MOSNEV3.js +0 -247
package/dist/event-streams-NZADSH5J-6MOSNEV3.js.map +0 -1
package/dist/loadSso-IQZ5NB6C-DZJTORO3.js +0 -738
package/dist/loadSso-IQZ5NB6C-DZJTORO3.js.map +0 -1
package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js +0 -387
package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js.map +0 -1
package/dist/otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js.map +0 -1
package/dist/signin-2ANR4DVS-K5VGBEJF.js +0 -556
package/dist/signin-2ANR4DVS-K5VGBEJF.js.map +0 -1
package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js +0 -10
package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js.map +0 -1
package/dist/src-SLOMUG7K-CV5JG263.js +0 -1408
package/dist/src-SLOMUG7K-CV5JG263.js.map +0 -1
package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js +0 -708
package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js.map +0 -1
package/dist/sts-X7JGSP4H-PDAAYDDH.js +0 -2917
package/dist/sts-X7JGSP4H-PDAAYDDH.js.map +0 -1
package/dist/undici-VAR2VUJI-6PAOUXZC.js +0 -23388
package/dist/undici-VAR2VUJI-6PAOUXZC.js.map +0 -1
/package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js.map → agentv-provider-NFFLXG5M-TJAWCWCX.js.map} +0 -0
/package/dist/{chunk-BAUNAXHT.js.map → chunk-XOSNETAV.js.map} +0 -0
/package/dist/{dist-VUPMLHIV.js.map → dist-SMKOBBFB.js.map} +0 -0
/package/dist/{dist-es-SRVEB5QV-Q4CTC2HX.js.map → esm-CZAWIY6F.js.map} +0 -0
/package/dist/{dist-es-UEEUAV34-IZQDTAMW.js.map → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js.map} +0 -0
/package/dist/{src-PXDA7QIS.js.map → src-ML4D2MC2.js.map} +0 -0

package/dist/{chunk-V2S5CZU3.js → chunk-DJU4C6NS.js} RENAMED Viewed

@@ -1,35 +1,36 @@
 import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
 import {
   HtmlWriter,
-  buildAggregateGradingArtifact,
-  buildBenchmarkArtifact,
-  buildGradingArtifact,
-  buildTimingArtifact,
+  RESULT_INDEX_FILENAME,
   detectFileType,
   findRepoRoot,
+  loadLightweightResults,
+  loadManifestResults,
   loadRunCache,
   package_default,
-  parseJsonlResults,
   resolveEvalPaths,
+  resolveExistingRunPrimaryPath,
+  resolveResultSourcePath,
   resolveRunCacheFile,
+  resolveWorkspaceOrFilePath,
   runEvalCommand,
   selectTarget,
   toSnakeCaseDeep,
   validateConfigFile,
   validateEvalFile,
   validateFileReferences,
-  validateTargetsFile
-} from "./chunk-CKMAM2GD.js";
+  validateTargetsFile,
+  writeArtifactsFromResults
+} from "./chunk-6H4IAXQH.js";
 import {
   createBuiltinRegistry,
-  createProvider,
   executeScript,
-  generateRubrics,
   getAgentvHome,
   getOutputFilenames,
   getWorkspacePoolRoot,
   isAgentSkillsFormat,
   loadTestById,
+  loadTestSuite,
   loadTests,
   normalizeLineEndings,
   parseAgentSkillsEvals,
@@ -37,7 +38,7 @@ import {
   toSnakeCaseDeep as toSnakeCaseDeep2,
   transpileEvalYamlFile,
   trimBaselineResult
-} from "./chunk-OYD2NB55.js";
+} from "./chunk-7OHZAFND.js";
 import {
   __commonJS,
   __esm,
@@ -2888,7 +2889,6 @@ function oneOf(literals) {
 }
 // src/commands/compare/index.ts
-import { readFileSync } from "node:fs";
 var colors = {
   reset: "\x1B[0m",
   bold: "\x1B[1m",
@@ -2902,41 +2902,22 @@ var colors = {
 var noColor = process.env.NO_COLOR !== void 0 || !process.stdout.isTTY;
 var c = noColor ? Object.fromEntries(Object.keys(colors).map((k) => [k, ""])) : colors;
 function loadJsonlResults(filePath) {
-  const content = readFileSync(filePath, "utf8");
-  const lines = content.trim().split("\n").filter((line) => line.trim());
-  return lines.map((line) => {
-    const record = JSON.parse(line);
-    const testId = record.test_id ?? record.eval_id;
-    if (typeof testId !== "string") {
-      throw new Error(`Missing test_id in result: ${line}`);
-    }
-    if (typeof record.score !== "number") {
-      throw new Error(`Missing or invalid score in result: ${line}`);
-    }
-    return { testId, score: record.score };
-  });
+  return loadLightweightResults(resolveResultSourcePath(filePath)).map((record) => ({
+    testId: record.testId,
+    score: record.score
+  }));
 }
 function loadCombinedResults(filePath) {
-  const content = readFileSync(filePath, "utf8");
-  const lines = content.trim().split("\n").filter((line) => line.trim());
   const groups = /* @__PURE__ */ new Map();
-  for (const line of lines) {
-    const record = JSON.parse(line);
-    const testId = record.test_id ?? record.eval_id;
-    if (typeof testId !== "string") {
-      throw new Error(`Missing test_id in result: ${line}`);
-    }
-    if (typeof record.score !== "number") {
-      throw new Error(`Missing or invalid score in result: ${line}`);
-    }
+  for (const record of loadLightweightResults(resolveResultSourcePath(filePath))) {
     if (typeof record.target !== "string") {
-      throw new Error(`Missing target field in combined result: ${line}`);
+      throw new Error(`Missing target field in combined result source: ${filePath}`);
     }
     const target = record.target;
     if (!groups.has(target)) {
       groups.set(target, []);
     }
-    groups.get(target)?.push({ testId, score: record.score });
+    groups.get(target)?.push({ testId: record.testId, score: record.score });
   }
   return groups;
 }
@@ -3303,11 +3284,11 @@ var compareCommand = command({
 });
 // src/commands/convert/index.ts
-import { readFileSync as readFileSync2, writeFileSync } from "node:fs";
+import { readFileSync, writeFileSync } from "node:fs";
 import path from "node:path";
 import { stringify as stringifyYaml } from "yaml";
 async function convertJsonlToHtml(inputPath, outputPath) {
-  const content = readFileSync2(inputPath, "utf8");
+  const content = readFileSync(inputPath, "utf8");
   const lines = content.trim().split("\n").filter((line) => line.trim());
   const writer = await HtmlWriter.open(outputPath);
   for (const line of lines) {
@@ -3317,7 +3298,7 @@ async function convertJsonlToHtml(inputPath, outputPath) {
   return lines.length;
 }
 function convertJsonlToYaml(inputPath, outputPath) {
-  const content = readFileSync2(inputPath, "utf8");
+  const content = readFileSync(inputPath, "utf8");
   const lines = content.trim().split("\n").filter((line) => line.trim());
   let yamlOutput = "";
   let isFirst = true;
@@ -3336,7 +3317,7 @@ function convertJsonlToYaml(inputPath, outputPath) {
   return lines.length;
 }
 function convertEvalsJsonToYaml(inputPath) {
-  const content = readFileSync2(inputPath, "utf8");
+  const content = readFileSync(inputPath, "utf8");
   const parsed = JSON.parse(content);
   if (!isAgentSkillsFormat(parsed)) {
     throw new Error(`Not a valid Agent Skills evals.json: missing 'evals' array`);
@@ -3924,7 +3905,7 @@ var evalPromptCommand = subcommands({
 });
 // src/commands/eval/commands/assert.ts
-import { readFileSync as readFileSync3 } from "node:fs";
+import { readFileSync as readFileSync2 } from "node:fs";
 import path3 from "node:path";
 import fg from "fast-glob";
 var evalAssertCommand = command({
@@ -3956,7 +3937,7 @@ var evalAssertCommand = command({
     let resolvedOutput;
     let resolvedInput;
     if (file) {
-      const content = JSON.parse(readFileSync3(path3.resolve(file), "utf8"));
+      const content = JSON.parse(readFileSync2(path3.resolve(file), "utf8"));
       resolvedOutput = content.output ?? "";
       resolvedInput = content.input ?? "";
     } else {
@@ -4144,11 +4125,6 @@ var evalRunCommand = command({
       long: "otel-file",
       description: "Write OTLP JSON trace to file (importable by OTel backends)"
     }),
-    traceFile: option({
-      type: optional(string),
-      long: "trace-file",
-      description: "Write human-readable trace JSONL to file"
-    }),
     exportOtel: flag({
       long: "export-otel",
       description: "Export evaluation traces via OTLP/HTTP to configured endpoint"
@@ -4183,7 +4159,7 @@ var evalRunCommand = command({
     artifacts: option({
       type: optional(string),
       long: "artifacts",
-      description: "Write companion artifacts (grading/<test>.json, timing.json, benchmark.json) to the specified directory"
+      description: "Write companion artifacts (index.jsonl, <test>/grading.json, <test>/timing.json, timing.json, benchmark.json) to the specified directory"
     }),
     graderTarget: option({
       type: optional(string),
@@ -4203,7 +4179,7 @@ var evalRunCommand = command({
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-FZJANO4A.js");
+      const { launchInteractiveWizard } = await import("./interactive-RV664PCR.js");
       await launchInteractiveWizard();
       return;
     }
@@ -4229,7 +4205,6 @@ var evalRunCommand = command({
       workspacePath: args.workspacePath,
       trace: false,
       otelFile: args.otelFile,
-      traceFile: args.traceFile,
       exportOtel: args.exportOtel,
       otelBackend: args.otelBackend,
       otelCaptureContent: args.otelCaptureContent,
@@ -4257,212 +4232,31 @@ var evalCommand = subcommands({
   }
 });
-// src/commands/generate/rubrics.ts
-import { readFile, writeFile as writeFile2 } from "node:fs/promises";
-import path4 from "node:path";
-import { pathToFileURL } from "node:url";
-import { isMap, isSeq, parseDocument } from "yaml";
-function isJsonObject(value) {
-  return typeof value === "object" && value !== null && !Array.isArray(value);
-}
-function asString(value) {
-  return typeof value === "string" ? value : void 0;
-}
-async function loadRubricGenerator() {
-  const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
-  if (customGenerator) {
-    const generatorPath = path4.resolve(customGenerator);
-    const generatorUrl = pathToFileURL(generatorPath).href;
-    const module = await import(generatorUrl);
-    return module.generateRubrics;
-  }
-  return generateRubrics;
-}
-async function generateRubricsCommand(options) {
-  const { file, target: targetOverride, verbose } = options;
-  console.log(`Generating rubrics for: ${file}`);
-  const absolutePath = path4.resolve(file);
-  const content = await readFile(absolutePath, "utf8");
-  const doc = parseDocument(content);
-  const parsed = doc.toJSON();
-  if (!isJsonObject(parsed)) {
-    throw new Error(`Invalid YAML file format: ${file}`);
-  }
-  const suite = parsed;
-  const evalcases = suite.tests;
-  if (!Array.isArray(evalcases)) {
-    throw new Error(`No tests found in ${file}`);
-  }
-  const targetSelection = await selectTarget({
-    testFilePath: absolutePath,
-    repoRoot: process.cwd(),
-    cwd: process.cwd(),
-    cliTargetName: targetOverride,
-    dryRun: false,
-    dryRunDelay: 0,
-    dryRunDelayMin: 0,
-    dryRunDelayMax: 0,
-    env: process.env
-  });
-  if (verbose) {
-    console.log(`Using target: ${targetSelection.targetName}`);
-  }
-  const provider = createProvider(targetSelection.resolvedTarget);
-  const generateRubricsFunc = await loadRubricGenerator();
-  let updatedCount = 0;
-  let skippedCount = 0;
-  const evalcasesNode = doc.getIn(["tests"]);
-  if (!evalcasesNode || !isSeq(evalcasesNode)) {
-    throw new Error("tests must be a sequence");
-  }
-  for (let i = 0; i < evalcases.length; i++) {
-    const rawCase = evalcases[i];
-    if (!isJsonObject(rawCase)) {
-      continue;
-    }
-    const evalCase = rawCase;
-    const id = asString(evalCase.id) ?? "unknown";
-    const expectedOutcome = asString(evalCase.criteria) ?? asString(evalCase.outcome);
-    if (!expectedOutcome) {
-      if (verbose) {
-        console.log(`  Skipping ${id}: no criteria`);
-      }
-      skippedCount++;
-      continue;
-    }
-    if (evalCase.rubrics !== void 0) {
-      if (verbose) {
-        console.log(`  Skipping ${id}: rubrics already defined`);
-      }
-      skippedCount++;
-      continue;
-    }
-    console.log(`  Generating rubrics for: ${id}`);
-    const question = extractQuestion(evalCase);
-    const referenceAnswer = asString(evalCase.reference_answer);
-    const rubrics = await generateRubricsFunc({
-      criteria: expectedOutcome,
-      question,
-      referenceAnswer,
-      provider
-    });
-    const caseNode = evalcasesNode.items[i];
-    if (caseNode && isMap(caseNode)) {
-      caseNode.set(
-        "rubrics",
-        rubrics.filter((r) => r.outcome !== void 0).map((r) => ({
-          id: r.id,
-          outcome: r.outcome,
-          weight: r.weight,
-          required: r.required ?? true
-        }))
-      );
-    }
-    updatedCount++;
-    if (verbose) {
-      console.log(`    Generated ${rubrics.length} rubric(s)`);
-    }
-  }
-  if (updatedCount > 0) {
-    const output = doc.toString();
-    await writeFile2(absolutePath, output, "utf8");
-    console.log(`
-Updated ${updatedCount} test(s) with generated rubrics`);
-    if (skippedCount > 0) {
-      console.log(`Skipped ${skippedCount} test(s)`);
-    }
-  } else {
-    console.log("\nNo tests updated (all already have rubrics or missing criteria)");
-  }
-}
-function extractQuestion(evalCase) {
-  const explicitQuestion = asString(evalCase.question);
-  if (explicitQuestion) {
-    return explicitQuestion;
-  }
-  const inputMessages = evalCase.input;
-  if (!Array.isArray(inputMessages)) {
-    return void 0;
-  }
-  for (const msg of inputMessages) {
-    if (!isJsonObject(msg)) {
-      continue;
-    }
-    if (msg.role === "user" && typeof msg.content === "string") {
-      return msg.content;
-    }
-  }
-  return void 0;
-}
-// src/commands/generate/index.ts
-var rubricsCommand = command({
-  name: "rubrics",
-  description: "Generate rubrics from criteria in YAML eval file",
-  args: {
-    file: positional({
-      type: string,
-      displayName: "file",
-      description: "Path to YAML eval file"
-    }),
-    target: option({
-      type: optional(string),
-      long: "target",
-      short: "t",
-      description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
-    }),
-    verbose: flag({
-      long: "verbose",
-      short: "v",
-      description: "Show detailed progress"
-    })
-  },
-  handler: async ({ file, target, verbose }) => {
-    try {
-      await generateRubricsCommand({
-        file,
-        target,
-        verbose
-      });
-    } catch (error) {
-      console.error(`Error: ${error.message}`);
-      process.exit(1);
-    }
-  }
-});
-var generateCommand = subcommands({
-  name: "generate",
-  description: "Generate evaluation artifacts",
-  cmds: {
-    rubrics: rubricsCommand
-  }
-});
 // src/commands/init/index.ts
 import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
-import path6 from "node:path";
+import path5 from "node:path";
 import * as readline from "node:readline/promises";
 // src/templates/index.ts
-import { readFileSync as readFileSync4, readdirSync, statSync } from "node:fs";
-import path5 from "node:path";
+import { readFileSync as readFileSync3, readdirSync, statSync } from "node:fs";
+import path4 from "node:path";
 import { fileURLToPath } from "node:url";
 function getAgentvTemplates() {
   return getTemplatesFromDir(".agentv");
 }
 function getEnvExampleTemplate() {
-  const currentDir = path5.dirname(fileURLToPath(import.meta.url));
-  const templatesBase = currentDir.includes(`${path5.sep}dist`) ? path5.join(currentDir, "templates") : currentDir;
-  const content = readFileSync4(path5.join(templatesBase, ".env.example"), "utf-8");
+  const currentDir = path4.dirname(fileURLToPath(import.meta.url));
+  const templatesBase = currentDir.includes(`${path4.sep}dist`) ? path4.join(currentDir, "templates") : currentDir;
+  const content = readFileSync3(path4.join(templatesBase, ".env.example"), "utf-8");
   return { path: ".env.example", content };
 }
 function getTemplatesFromDir(subdir) {
-  const currentDir = path5.dirname(fileURLToPath(import.meta.url));
+  const currentDir = path4.dirname(fileURLToPath(import.meta.url));
   let templatesDir;
-  if (currentDir.includes(`${path5.sep}dist`)) {
-    templatesDir = path5.join(currentDir, "templates", subdir);
+  if (currentDir.includes(`${path4.sep}dist`)) {
+    templatesDir = path4.join(currentDir, "templates", subdir);
   } else {
-    templatesDir = path5.join(currentDir, subdir);
+    templatesDir = path4.join(currentDir, subdir);
   }
   return readTemplatesRecursively(templatesDir, "");
 }
@@ -4470,15 +4264,15 @@ function readTemplatesRecursively(dir, relativePath) {
   const templates = [];
   const entries2 = readdirSync(dir);
   for (const entry of entries2) {
-    const fullPath = path5.join(dir, entry);
+    const fullPath = path4.join(dir, entry);
     const stat3 = statSync(fullPath);
-    const entryRelativePath = relativePath ? path5.join(relativePath, entry) : entry;
+    const entryRelativePath = relativePath ? path4.join(relativePath, entry) : entry;
     if (stat3.isDirectory()) {
       templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
     } else {
-      const content = readFileSync4(fullPath, "utf-8");
+      const content = readFileSync3(fullPath, "utf-8");
       templates.push({
-        path: entryRelativePath.split(path5.sep).join("/"),
+        path: entryRelativePath.split(path4.sep).join("/"),
         // Normalize to forward slashes
         content
       });
@@ -4507,22 +4301,22 @@ async function promptYesNo(message) {
   }
 }
 async function initCommand(options = {}) {
-  const targetPath = path6.resolve(options.targetPath ?? ".");
-  const agentvDir = path6.join(targetPath, ".agentv");
+  const targetPath = path5.resolve(options.targetPath ?? ".");
+  const agentvDir = path5.join(targetPath, ".agentv");
   const otherAgentvTemplates = getAgentvTemplates();
   const envTemplate = getEnvExampleTemplate();
   const existingFiles = [];
   if (envTemplate) {
-    const envFilePath = path6.join(targetPath, ".env.example");
+    const envFilePath = path5.join(targetPath, ".env.example");
     if (existsSync(envFilePath)) {
       existingFiles.push(".env.example");
     }
   }
   if (existsSync(agentvDir)) {
     for (const template of otherAgentvTemplates) {
-      const targetFilePath = path6.join(agentvDir, template.path);
+      const targetFilePath = path5.join(agentvDir, template.path);
       if (existsSync(targetFilePath)) {
-        existingFiles.push(path6.relative(targetPath, targetFilePath));
+        existingFiles.push(path5.relative(targetPath, targetFilePath));
       }
     }
   }
@@ -4544,18 +4338,18 @@ async function initCommand(options = {}) {
     mkdirSync(agentvDir, { recursive: true });
   }
   if (envTemplate) {
-    const envFilePath = path6.join(targetPath, ".env.example");
+    const envFilePath = path5.join(targetPath, ".env.example");
     writeFileSync2(envFilePath, envTemplate.content, "utf-8");
     console.log("Created .env.example");
   }
   for (const template of otherAgentvTemplates) {
-    const targetFilePath = path6.join(agentvDir, template.path);
-    const targetDirPath = path6.dirname(targetFilePath);
+    const targetFilePath = path5.join(agentvDir, template.path);
+    const targetDirPath = path5.dirname(targetFilePath);
     if (!existsSync(targetDirPath)) {
       mkdirSync(targetDirPath, { recursive: true });
     }
     writeFileSync2(targetFilePath, template.content, "utf-8");
-    console.log(`Created ${path6.relative(targetPath, targetFilePath)}`);
+    console.log(`Created ${path5.relative(targetPath, targetFilePath)}`);
   }
   console.log("\nAgentV initialized successfully!");
   console.log("\nFiles installed to root:");
@@ -4563,7 +4357,7 @@ async function initCommand(options = {}) {
     console.log("  - .env.example");
   }
   console.log(`
-Files installed to ${path6.relative(targetPath, agentvDir)}:`);
+Files installed to ${path5.relative(targetPath, agentvDir)}:`);
   for (const t of otherAgentvTemplates) {
     console.log(`  - ${t.path}`);
   }
@@ -4593,13 +4387,443 @@ var initCmdTsCommand = command({
   }
 });
+// src/commands/pipeline/bench.ts
+import { readFile, readdir, writeFile as writeFile2 } from "node:fs/promises";
+import { join } from "node:path";
+var evalBenchCommand = command({
+  name: "bench",
+  description: "Merge evaluator scores and produce benchmark artifacts",
+  args: {
+    exportDir: positional({
+      type: string,
+      displayName: "export-dir",
+      description: "Export directory from pipeline input/grade"
+    })
+  },
+  handler: async ({ exportDir }) => {
+    const manifest = JSON.parse(await readFile(join(exportDir, "manifest.json"), "utf8"));
+    const testIds = manifest.test_ids;
+    const targetName = manifest.target?.name ?? "unknown";
+    const stdinData = await readStdin();
+    const llmScores = stdinData ? JSON.parse(stdinData) : {};
+    const indexLines = [];
+    const allPassRates = [];
+    for (const testId of testIds) {
+      const testDir = join(exportDir, testId);
+      const evaluators = [];
+      const allAssertions = [];
+      const codeResultsDir = join(testDir, "code_grader_results");
+      try {
+        const resultFiles = (await readdir(codeResultsDir)).filter((f) => f.endsWith(".json"));
+        for (const file of resultFiles) {
+          const result = JSON.parse(await readFile(join(codeResultsDir, file), "utf8"));
+          evaluators.push({
+            name: result.name,
+            type: "code-grader",
+            score: result.score,
+            weight: result.weight ?? 1,
+            assertions: result.assertions ?? []
+          });
+          for (const a of result.assertions ?? []) {
+            allAssertions.push({ text: a.text, passed: a.passed, evidence: a.evidence ?? "" });
+          }
+        }
+      } catch {
+      }
+      const testLlmScores = llmScores[testId] ?? {};
+      const llmGradersDir = join(testDir, "llm_graders");
+      try {
+        const graderFiles = (await readdir(llmGradersDir)).filter((f) => f.endsWith(".json"));
+        for (const file of graderFiles) {
+          const graderMeta = JSON.parse(await readFile(join(llmGradersDir, file), "utf8"));
+          const graderName = graderMeta.name;
+          const llmResult = testLlmScores[graderName];
+          if (llmResult) {
+            evaluators.push({
+              name: graderName,
+              type: "llm-grader",
+              score: llmResult.score,
+              weight: graderMeta.weight ?? 1,
+              assertions: llmResult.assertions ?? []
+            });
+            for (const a of llmResult.assertions ?? []) {
+              allAssertions.push({ text: a.text, passed: a.passed, evidence: a.evidence ?? "" });
+            }
+          }
+        }
+      } catch {
+      }
+      const totalWeight = evaluators.reduce((sum, e) => sum + e.weight, 0);
+      const weightedScore = totalWeight > 0 ? evaluators.reduce((sum, e) => sum + e.score * e.weight, 0) / totalWeight : 0;
+      const passed = allAssertions.filter((a) => a.passed).length;
+      const failed = allAssertions.filter((a) => !a.passed).length;
+      const passRate = allAssertions.length > 0 ? Math.round(passed / allAssertions.length * 1e3) / 1e3 : 0;
+      allPassRates.push(passRate);
+      const grading = {
+        assertions: allAssertions,
+        summary: { passed, failed, total: allAssertions.length, pass_rate: passRate },
+        execution_metrics: { tool_calls: {}, total_tool_calls: 0, errors_encountered: 0 },
+        evaluators: evaluators.map((e) => ({
+          name: e.name,
+          type: e.type,
+          score: e.score,
+          reasoning: "",
+          weight: e.weight
+        }))
+      };
+      await writeFile2(
+        join(testDir, "grading.json"),
+        `${JSON.stringify(grading, null, 2)}
+`,
+        "utf8"
+      );
+      indexLines.push(
+        JSON.stringify({
+          timestamp: manifest.timestamp,
+          test_id: testId,
+          score: Math.round(weightedScore * 1e3) / 1e3,
+          target: targetName,
+          grading_path: `${testId}/grading.json`,
+          timing_path: `${testId}/timing.json`
+        })
+      );
+    }
+    await writeFile2(
+      join(exportDir, "index.jsonl"),
+      indexLines.length > 0 ? `${indexLines.join("\n")}
+` : "",
+      "utf8"
+    );
+    const passRateStats = computeStats(allPassRates);
+    const benchmark = {
+      metadata: {
+        eval_file: manifest.eval_file,
+        timestamp: manifest.timestamp,
+        targets: [targetName],
+        tests_run: testIds
+      },
+      run_summary: {
+        [targetName]: {
+          pass_rate: passRateStats,
+          time_seconds: { mean: 0, stddev: 0 },
+          tokens: { mean: 0, stddev: 0 }
+        }
+      },
+      notes: []
+    };
+    await writeFile2(
+      join(exportDir, "benchmark.json"),
+      `${JSON.stringify(benchmark, null, 2)}
+`,
+      "utf8"
+    );
+    console.log(`Benchmark: ${testIds.length} test(s), pass_rate=${passRateStats.mean}`);
+  }
+});
+async function readStdin() {
+  const chunks = [];
+  for await (const chunk of process.stdin) {
+    chunks.push(chunk);
+  }
+  return Buffer.concat(chunks).toString("utf8").trim();
+}
+function computeStats(values) {
+  if (values.length === 0) return { mean: 0, stddev: 0 };
+  const mean2 = values.reduce((sum, v) => sum + v, 0) / values.length;
+  const variance = values.reduce((sum, v) => sum + (v - mean2) ** 2, 0) / values.length;
+  return {
+    mean: Math.round(mean2 * 1e3) / 1e3,
+    stddev: Math.round(Math.sqrt(variance) * 1e3) / 1e3
+  };
+}
+// src/commands/pipeline/grade.ts
+import { mkdir as mkdir2, readFile as readFile2, readdir as readdir2, writeFile as writeFile3 } from "node:fs/promises";
+import { join as join2 } from "node:path";
+var evalGradeCommand = command({
+  name: "grade",
+  description: "Run code-grader assertions on responses in an export directory",
+  args: {
+    exportDir: positional({
+      type: string,
+      displayName: "export-dir",
+      description: "Export directory from pipeline input"
+    })
+  },
+  handler: async ({ exportDir }) => {
+    const manifestPath = join2(exportDir, "manifest.json");
+    const manifest = JSON.parse(await readFile2(manifestPath, "utf8"));
+    const testIds = manifest.test_ids;
+    let totalGraders = 0;
+    let totalPassed = 0;
+    for (const testId of testIds) {
+      const testDir = join2(exportDir, testId);
+      const codeGradersDir = join2(testDir, "code_graders");
+      const resultsDir = join2(testDir, "code_grader_results");
+      let graderFiles;
+      try {
+        graderFiles = (await readdir2(codeGradersDir)).filter((f) => f.endsWith(".json"));
+      } catch {
+        continue;
+      }
+      if (graderFiles.length === 0) continue;
+      await mkdir2(resultsDir, { recursive: true });
+      const responseText = await readFile2(join2(testDir, "response.md"), "utf8");
+      const inputData = JSON.parse(await readFile2(join2(testDir, "input.json"), "utf8"));
+      for (const graderFile of graderFiles) {
+        const graderConfig = JSON.parse(await readFile2(join2(codeGradersDir, graderFile), "utf8"));
+        const graderName = graderConfig.name;
+        const payload = JSON.stringify({
+          output: [{ role: "assistant", content: responseText }],
+          input: inputData.input_messages,
+          question: inputData.input_text,
+          criteria: "",
+          expected_output: [],
+          reference_answer: "",
+          input_files: [],
+          trace: null,
+          token_usage: null,
+          cost_usd: null,
+          duration_ms: null,
+          start_time: null,
+          end_time: null,
+          file_changes: null,
+          workspace_path: null,
+          config: graderConfig.config ?? null,
+          metadata: {},
+          input_text: inputData.input_text,
+          output_text: responseText,
+          expected_output_text: ""
+        });
+        try {
+          const stdout = await executeScript(
+            graderConfig.command,
+            payload,
+            void 0,
+            graderConfig.cwd
+          );
+          const parsed = JSON.parse(stdout);
+          const score = typeof parsed.score === "number" ? parsed.score : 0;
+          const assertions = Array.isArray(parsed.assertions) ? parsed.assertions : [];
+          const result = {
+            name: graderName,
+            type: "code-grader",
+            score,
+            weight: graderConfig.weight ?? 1,
+            assertions,
+            details: parsed.details ?? {}
+          };
+          await writeFile3(
+            join2(resultsDir, `${graderName}.json`),
+            `${JSON.stringify(result, null, 2)}
+`,
+            "utf8"
+          );
+          totalGraders++;
+          if (score >= 0.5) totalPassed++;
+        } catch (error) {
+          const message = error instanceof Error ? error.message : String(error);
+          console.error(`  ${testId}/${graderName}: ERROR \u2014 ${message}`);
+          const errorResult = {
+            name: graderName,
+            type: "code-grader",
+            score: 0,
+            weight: graderConfig.weight ?? 1,
+            assertions: [{ text: `Error: ${message}`, passed: false }],
+            details: { error: message }
+          };
+          await writeFile3(
+            join2(resultsDir, `${graderName}.json`),
+            `${JSON.stringify(errorResult, null, 2)}
+`,
+            "utf8"
+          );
+          totalGraders++;
+        }
+      }
+    }
+    console.log(`Graded ${totalGraders} code-grader(s): ${totalPassed} passed`);
+  }
+});
+// src/commands/pipeline/input.ts
+import { readFile as readFile3 } from "node:fs/promises";
+import { mkdir as mkdir3, writeFile as writeFile4 } from "node:fs/promises";
+import { dirname, join as join3, resolve } from "node:path";
+var evalInputCommand = command({
+  name: "input",
+  description: "Extract eval inputs, target commands, and grader prompts for agent-mode runs",
+  args: {
+    evalPath: positional({
+      type: string,
+      displayName: "eval-path",
+      description: "Path to eval YAML file"
+    }),
+    out: option({
+      type: string,
+      long: "out",
+      description: "Output directory for extracted inputs"
+    })
+  },
+  handler: async ({ evalPath, out }) => {
+    const resolvedEvalPath = resolve(evalPath);
+    const outDir = resolve(out);
+    const repoRoot = await findRepoRoot(dirname(resolvedEvalPath));
+    const evalDir = dirname(resolvedEvalPath);
+    const suite = await loadTestSuite(resolvedEvalPath, repoRoot);
+    const tests = suite.tests;
+    if (tests.length === 0) {
+      console.error("No tests found in eval file.");
+      process.exit(1);
+    }
+    let targetInfo = null;
+    let targetName = "agent";
+    let targetKind = "agent";
+    try {
+      const selection = await selectTarget({
+        testFilePath: resolvedEvalPath,
+        repoRoot,
+        cwd: evalDir,
+        dryRun: false,
+        dryRunDelay: 0,
+        dryRunDelayMin: 0,
+        dryRunDelayMax: 0,
+        env: process.env
+      });
+      targetName = selection.targetName;
+      if (selection.resolvedTarget.kind === "cli") {
+        targetKind = "cli";
+        const config = selection.resolvedTarget.config;
+        targetInfo = {
+          kind: "cli",
+          command: config.command,
+          cwd: config.cwd ?? evalDir,
+          timeoutMs: config.timeoutMs ?? 3e4
+        };
+      }
+    } catch {
+    }
+    const testIds = [];
+    for (const test of tests) {
+      const testDir = join3(outDir, test.id);
+      await mkdir3(testDir, { recursive: true });
+      testIds.push(test.id);
+      const inputText = test.question;
+      const inputMessages = test.input.map((m) => ({
+        role: m.role,
+        content: typeof m.content === "string" ? m.content : m.content
+      }));
+      await writeJson(join3(testDir, "input.json"), {
+        input_text: inputText,
+        input_messages: inputMessages,
+        file_paths: test.file_paths,
+        metadata: test.metadata ?? {}
+      });
+      if (targetInfo) {
+        await writeJson(join3(testDir, "invoke.json"), {
+          kind: "cli",
+          command: targetInfo.command,
+          cwd: targetInfo.cwd,
+          timeout_ms: targetInfo.timeoutMs,
+          env: {}
+        });
+      } else {
+        await writeJson(join3(testDir, "invoke.json"), {
+          kind: "agent",
+          instructions: "Execute this task in the current workspace. The agent IS the target."
+        });
+      }
+      await writeFile4(join3(testDir, "criteria.md"), test.criteria ?? "", "utf8");
+      if (test.expected_output.length > 0 || test.reference_answer !== void 0 && test.reference_answer !== "") {
+        await writeJson(join3(testDir, "expected_output.json"), {
+          expected_output: test.expected_output,
+          reference_answer: test.reference_answer ?? ""
+        });
+      }
+      await writeGraderConfigs(testDir, test.assertions ?? [], evalDir);
+    }
+    await writeJson(join3(outDir, "manifest.json"), {
+      eval_file: resolvedEvalPath,
+      timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+      target: {
+        name: targetName,
+        kind: targetKind
+      },
+      test_ids: testIds
+    });
+    console.log(`Extracted ${testIds.length} test(s) to ${outDir}`);
+  }
+});
+async function writeGraderConfigs(testDir, assertions, evalDir) {
+  const codeGradersDir = join3(testDir, "code_graders");
+  const llmGradersDir = join3(testDir, "llm_graders");
+  let hasCodeGraders = false;
+  let hasLlmGraders = false;
+  for (const assertion of assertions) {
+    if (assertion.type === "code-grader" || assertion.type === "code-judge") {
+      if (!hasCodeGraders) {
+        await mkdir3(codeGradersDir, { recursive: true });
+        hasCodeGraders = true;
+      }
+      const config = assertion;
+      await writeJson(join3(codeGradersDir, `${config.name}.json`), {
+        name: config.name,
+        command: config.command,
+        cwd: config.resolvedCwd ?? config.cwd ?? evalDir,
+        weight: config.weight ?? 1,
+        config: config.config ?? {}
+      });
+    } else if (assertion.type === "llm-grader" || assertion.type === "llm-judge") {
+      if (!hasLlmGraders) {
+        await mkdir3(llmGradersDir, { recursive: true });
+        hasLlmGraders = true;
+      }
+      const config = assertion;
+      let promptContent = "";
+      if (config.resolvedPromptPath) {
+        try {
+          promptContent = await readFile3(config.resolvedPromptPath, "utf8");
+        } catch {
+          promptContent = typeof config.prompt === "string" ? config.prompt : "";
+        }
+      } else if (typeof config.prompt === "string") {
+        promptContent = config.prompt;
+      }
+      await writeJson(join3(llmGradersDir, `${config.name}.json`), {
+        name: config.name,
+        prompt_content: promptContent,
+        weight: config.weight ?? 1,
+        threshold: 0.5,
+        config: {}
+      });
+    }
+  }
+}
+async function writeJson(filePath, data) {
+  await writeFile4(filePath, `${JSON.stringify(data, null, 2)}
+`, "utf8");
+}
+// src/commands/pipeline/index.ts
+var pipelineCommand = subcommands({
+  name: "pipeline",
+  description: "Agent-mode eval pipeline commands (input \u2192 grade \u2192 bench)",
+  cmds: {
+    input: evalInputCommand,
+    grade: evalGradeCommand,
+    bench: evalBenchCommand
+  }
+});
 // src/commands/results/export.ts
-import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync6, writeFileSync as writeFileSync3 } from "node:fs";
-import path8 from "node:path";
+import path7 from "node:path";
+// src/commands/results/shared.ts
+import { existsSync as existsSync2 } from "node:fs";
 // src/commands/trace/utils.ts
-import { readFileSync as readFileSync5, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
-import path7 from "node:path";
+import { readFileSync as readFileSync4, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
+import path6 from "node:path";
 var colors2 = {
   reset: "\x1B[0m",
   bold: "\x1B[1m",
@@ -4625,7 +4849,20 @@ function padLeft2(str, len) {
   return " ".repeat(Math.max(0, len - plainLen)) + str;
 }
 function loadResultFile(filePath) {
-  const content = readFileSync5(filePath, "utf8");
+  const resolvedFilePath = resolveTraceResultPath(filePath);
+  if (path6.extname(resolvedFilePath) === ".json") {
+    return loadOtlpTraceFile(resolvedFilePath);
+  }
+  if (path6.basename(resolvedFilePath) === RESULT_INDEX_FILENAME) {
+    return loadManifestAsRawResults(resolvedFilePath);
+  }
+  return loadJsonlRecords(resolvedFilePath);
+}
+function resolveTraceResultPath(filePath) {
+  return resolveWorkspaceOrFilePath(filePath);
+}
+function loadJsonlRecords(filePath) {
+  const content = readFileSync4(filePath, "utf8");
   const lines = content.trim().split("\n").filter((line) => line.trim());
   return lines.map((line, i) => {
     const record = JSON.parse(line);
@@ -4635,25 +4872,280 @@ function loadResultFile(filePath) {
     return record;
   });
 }
+function loadManifestAsRawResults(filePath) {
+  return loadManifestResults(filePath).map(toRawResult);
+}
+function toRawResult(result) {
+  return {
+    timestamp: result.timestamp,
+    test_id: result.testId,
+    eval_set: result.eval_set,
+    conversation_id: result.conversationId,
+    score: result.score,
+    assertions: result.assertions?.map((assertion) => ({
+      text: assertion.text,
+      passed: assertion.passed,
+      evidence: assertion.evidence
+    })),
+    target: result.target,
+    error: result.error,
+    scores: result.scores?.map((score) => ({
+      name: score.name,
+      type: score.type,
+      score: score.score,
+      assertions: score.assertions?.map((assertion) => ({
+        text: assertion.text,
+        passed: assertion.passed,
+        evidence: assertion.evidence
+      })),
+      weight: score.weight
+    })),
+    token_usage: result.tokenUsage ? {
+      input: result.tokenUsage.input,
+      output: result.tokenUsage.output,
+      cached: result.tokenUsage.cached
+    } : void 0,
+    cost_usd: result.costUsd,
+    duration_ms: result.durationMs,
+    start_time: result.startTime,
+    end_time: result.endTime,
+    input: result.input,
+    output: result.output,
+    file_changes: result.fileChanges
+  };
+}
+function loadOtlpTraceFile(filePath) {
+  const parsed = JSON.parse(readFileSync4(filePath, "utf8"));
+  const spans = parsed.resourceSpans?.flatMap((resource) => resource.scopeSpans ?? []).flatMap((scope) => scope.spans ?? []);
+  if (!spans || spans.length === 0) {
+    return [];
+  }
+  const spanMap = /* @__PURE__ */ new Map();
+  const childMap = /* @__PURE__ */ new Map();
+  for (const span of spans) {
+    if (!span.spanId) continue;
+    spanMap.set(span.spanId, span);
+    if (span.parentSpanId) {
+      const siblings = childMap.get(span.parentSpanId) ?? [];
+      siblings.push(span);
+      childMap.set(span.parentSpanId, siblings);
+    }
+  }
+  const roots = spans.filter((span) => !span.parentSpanId || !spanMap.has(span.parentSpanId));
+  const supportedRoots = roots.filter(isAgentvEvalRoot);
+  const candidateRoots = supportedRoots.length > 0 ? supportedRoots : roots;
+  return candidateRoots.map((root, index) => {
+    const descendants = collectChildSpans(root.spanId, childMap);
+    const rootAttrs = parseOtlpAttributes(root.attributes);
+    const parsedDescendants = descendants.map((span) => ({
+      ...span,
+      parsedAttributes: parseOtlpAttributes(span.attributes)
+    }));
+    const toolSpans = parsedDescendants.filter(
+      (span) => typeof span.parsedAttributes.gen_ai_tool_name === "string"
+    );
+    const llmSpans = parsedDescendants.filter(
+      (span) => span.parsedAttributes.gen_ai_operation_name === "chat" || typeof span.name === "string" && span.name.startsWith("chat ")
+    );
+    const tokenUsage = descendants.reduce(
+      (acc, span) => {
+        const attrs = parseOtlpAttributes(span.attributes);
+        acc.input += numberAttr(attrs.gen_ai_usage_input_tokens) ?? 0;
+        acc.output += numberAttr(attrs.gen_ai_usage_output_tokens) ?? 0;
+        const cached = numberAttr(attrs.gen_ai_usage_cache_read_input_tokens);
+        if (cached !== void 0 && cached > 0) {
+          acc.cached = (acc.cached ?? 0) + cached;
+        }
+        return acc;
+      },
+      { input: 0, output: 0, cached: void 0 }
+    );
+    const traceSummary = buildDerivedTraceSummary({
+      trace: {
+        event_count: numberAttr(rootAttrs.agentv_trace_event_count) ?? (toolSpans.length > 0 ? toolSpans.length : void 0),
+        tool_calls: countRawSpanNames(
+          toolSpans.map((span) => ({
+            type: "tool",
+            name: String(span.parsedAttributes.gen_ai_tool_name)
+          }))
+        ),
+        error_count: descendants.filter((span) => span.status?.code === 2).length || void 0,
+        llm_call_count: numberAttr(rootAttrs.agentv_trace_llm_call_count) ?? (llmSpans.length > 0 ? llmSpans.length : void 0)
+      },
+      spans: [
+        ...llmSpans.map((span) => ({
+          type: "llm",
+          name: span.name ?? "chat",
+          duration_ms: durationFromSpan(span)
+        })),
+        ...toolSpans.map((span) => ({
+          type: "tool",
+          name: String(span.parsedAttributes.gen_ai_tool_name),
+          duration_ms: durationFromSpan(span)
+        }))
+      ],
+      duration_ms: numberAttr(rootAttrs.agentv_trace_duration_ms) ?? durationFromSpan(root),
+      cost_usd: numberAttr(rootAttrs.agentv_trace_cost_usd),
+      token_usage: tokenUsage.input || tokenUsage.output || tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_input) || numberAttr(rootAttrs.agentv_trace_token_output) || numberAttr(rootAttrs.agentv_trace_token_cached) ? {
+        input: tokenUsage.input || numberAttr(rootAttrs.agentv_trace_token_input) || 0,
+        output: tokenUsage.output || numberAttr(rootAttrs.agentv_trace_token_output) || 0,
+        ...tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_cached) ? {
+          cached: tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_cached) || 0
+        } : {}
+      } : void 0
+    });
+    const score = numberAttr(rootAttrs.agentv_score);
+    if (score === void 0) {
+      throw new Error(
+        `Unsupported OTLP trace root span at index ${index + 1}: missing agentv.score attribute`
+      );
+    }
+    return {
+      test_id: stringAttr(rootAttrs.agentv_test_id) ?? stringAttr(rootAttrs.agentv_eval_id) ?? `trace-${index + 1}`,
+      eval_set: stringAttr(rootAttrs.agentv_eval_set),
+      target: stringAttr(rootAttrs.agentv_target),
+      score,
+      error: root.status?.code === 2 ? root.status.message : void 0,
+      cost_usd: traceSummary?.cost_usd,
+      duration_ms: traceSummary?.duration_ms,
+      token_usage: traceSummary?.token_usage,
+      trace: traceSummary ? {
+        event_count: traceSummary.event_count,
+        tool_calls: traceSummary.tool_calls,
+        error_count: traceSummary.error_count,
+        tool_durations: traceSummary.tool_durations,
+        llm_call_count: traceSummary.llm_call_count,
+        token_usage: traceSummary.token_usage,
+        cost_usd: traceSummary.cost_usd,
+        duration_ms: traceSummary.duration_ms
+      } : void 0,
+      spans: traceSummary?.spans,
+      output: stringAttr(rootAttrs.agentv_output_text),
+      scores: root.events?.filter((event) => event.name?.startsWith("agentv.evaluator.")).map((event) => {
+        const attrs = parseOtlpAttributes(event.attributes);
+        const name = event.name?.replace(/^agentv\.evaluator\./, "") ?? "unknown";
+        return {
+          name,
+          type: stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
+          score: numberAttr(attrs.agentv_evaluator_score) ?? 0
+        };
+      })
+    };
+  });
+}
+function isAgentvEvalRoot(span) {
+  const attrs = parseOtlpAttributes(span.attributes);
+  return span.name === "agentv.eval" || numberAttr(attrs.agentv_score) !== void 0 || typeof stringAttr(attrs.agentv_test_id) === "string";
+}
+function collectChildSpans(spanId, childMap) {
+  if (!spanId) return [];
+  const direct = childMap.get(spanId) ?? [];
+  const all = [...direct];
+  for (const child of direct) {
+    all.push(...collectChildSpans(child.spanId, childMap));
+  }
+  return all;
+}
+function parseOtlpAttributes(attributes) {
+  const parsed = {};
+  for (const attribute of attributes ?? []) {
+    parsed[attribute.key.replace(/\./g, "_")] = parseOtlpValue(attribute.value);
+  }
+  return parsed;
+}
+function parseOtlpValue(value) {
+  if (!value) return void 0;
+  if ("stringValue" in value && value.stringValue !== void 0) return value.stringValue;
+  if ("intValue" in value && value.intValue !== void 0) return Number(value.intValue);
+  if ("doubleValue" in value && value.doubleValue !== void 0) return value.doubleValue;
+  if ("boolValue" in value && value.boolValue !== void 0) return value.boolValue;
+  if ("arrayValue" in value)
+    return (value.arrayValue?.values ?? []).map((entry) => parseOtlpValue(entry));
+  return void 0;
+}
+function durationFromSpan(span) {
+  const start = Number(span.startTimeUnixNano);
+  const end = Number(span.endTimeUnixNano);
+  if (!Number.isFinite(start) || !Number.isFinite(end)) return void 0;
+  return Math.round((end - start) / 1e6);
+}
+function stringAttr(value) {
+  return typeof value === "string" ? value : void 0;
+}
+function numberAttr(value) {
+  return typeof value === "number" && Number.isFinite(value) ? value : void 0;
+}
+function buildDerivedTraceSummary(result) {
+  const toolSpans = (result.spans ?? []).filter((span) => span.type === "tool");
+  const llmSpans = (result.spans ?? []).filter((span) => span.type === "llm");
+  const toolCalls = result.trace?.tool_calls ?? countRawSpanNames(toolSpans);
+  const toolDurations = result.trace?.tool_durations ?? groupRawSpanDurations(toolSpans);
+  const hasSpanData = (result.spans?.length ?? 0) > 0;
+  const eventCount = result.trace?.event_count ?? (hasSpanData ? toolSpans.length : void 0);
+  const llmCallCount = result.trace?.llm_call_count ?? (hasSpanData ? llmSpans.length : void 0);
+  if (!result.trace && !result.spans?.length && result.token_usage === void 0 && result.cost_usd === void 0 && result.duration_ms === void 0) {
+    return void 0;
+  }
+  return {
+    event_count: eventCount,
+    tool_calls: toolCalls,
+    error_count: result.trace?.error_count,
+    tool_durations: toolDurations,
+    llm_call_count: llmCallCount,
+    token_usage: result.trace?.token_usage ?? result.token_usage,
+    cost_usd: result.trace?.cost_usd ?? result.cost_usd,
+    duration_ms: result.trace?.duration_ms ?? result.duration_ms,
+    spans: result.spans
+  };
+}
+function countRawSpanNames(spans) {
+  const counts = {};
+  for (const span of spans) {
+    counts[span.name] = (counts[span.name] ?? 0) + 1;
+  }
+  return Object.keys(counts).length > 0 ? counts : void 0;
+}
+function groupRawSpanDurations(spans) {
+  const grouped = {};
+  for (const span of spans) {
+    if (span.duration_ms === void 0) continue;
+    const existing = grouped[span.name] ?? [];
+    existing.push(span.duration_ms);
+    grouped[span.name] = existing;
+  }
+  return Object.keys(grouped).length > 0 ? grouped : void 0;
+}
+function getTraceSummary(result) {
+  const derived = buildDerivedTraceSummary(result);
+  if (!derived) return void 0;
+  const { spans: _spans, ...trace } = derived;
+  return trace;
+}
+function getTraceSpans(result) {
+  return buildDerivedTraceSummary(result)?.spans ?? [];
+}
+function toTraceSummary(result) {
+  const rawTrace = getTraceSummary(result);
+  if (!rawTrace) return void 0;
+  return toCamelCaseDeep(rawTrace);
+}
 function listResultFiles(cwd, limit) {
-  const baseDir = path7.join(cwd, ".agentv", "results");
-  const rawDir = path7.join(baseDir, "raw");
+  const baseDir = path6.join(cwd, ".agentv", "results");
+  const rawDir = path6.join(baseDir, "raw");
   const files = [];
   try {
     const entries2 = readdirSync2(rawDir, { withFileTypes: true });
     for (const entry of entries2) {
       if (entry.isDirectory()) {
-        const jsonlPath = path7.join(rawDir, entry.name, "results.jsonl");
-        try {
-          statSync2(jsonlPath);
-          files.push({ filePath: jsonlPath, displayName: entry.name });
-        } catch {
+        const primaryPath = resolveExistingRunPrimaryPath(path6.join(rawDir, entry.name));
+        if (primaryPath) {
+          files.push({ filePath: primaryPath, displayName: entry.name });
         }
       }
     }
     for (const entry of entries2) {
       if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
-        files.push({ filePath: path7.join(rawDir, entry.name), displayName: entry.name });
+        files.push({ filePath: path6.join(rawDir, entry.name), displayName: entry.name });
       }
     }
   } catch {
@@ -4661,7 +5153,7 @@ function listResultFiles(cwd, limit) {
   try {
     const entries2 = readdirSync2(baseDir).filter((f) => f.endsWith(".jsonl"));
     for (const entry of entries2) {
-      files.push({ filePath: path7.join(baseDir, entry), displayName: entry });
+      files.push({ filePath: path6.join(baseDir, entry), displayName: entry });
     }
   } catch {
   }
@@ -4729,84 +5221,65 @@ function formatScore(score) {
   return `${(score * 100).toFixed(0)}%`;
 }
-// src/commands/results/export.ts
-function exportResults(sourceFile, content, outputDir) {
-  const results = parseJsonlResults(content);
+// src/commands/results/shared.ts
+var sourceArg = positional({
+  type: optional(string),
+  displayName: "source",
+  description: "Result file or workspace directory (defaults to most recent in .agentv/results/)"
+});
+async function resolveSourceFile(source, cwd) {
+  let sourceFile;
+  if (source) {
+    sourceFile = resolveResultSourcePath(source, cwd);
+    if (!existsSync2(sourceFile)) {
+      console.error(`Error: File not found: ${sourceFile}`);
+      process.exit(1);
+    }
+  } else {
+    const cache = await loadRunCache(cwd);
+    const cachedFile = cache ? resolveRunCacheFile(cache) : "";
+    if (cachedFile && existsSync2(cachedFile)) {
+      sourceFile = cachedFile;
+    } else {
+      const metas = listResultFiles(cwd, 1);
+      if (metas.length === 0) {
+        console.error("Error: No result files found in .agentv/results/");
+        console.error("Run an evaluation first: agentv eval <eval-file>");
+        process.exit(1);
+      }
+      sourceFile = metas[0].path;
+    }
+  }
+  return { sourceFile };
+}
+async function loadResults(source, cwd) {
+  const { sourceFile } = await resolveSourceFile(source, cwd);
+  const results = loadManifestResults(sourceFile);
   if (results.length === 0) {
-    throw new Error(`No results found in ${sourceFile}`);
+    console.error(`No results found in ${sourceFile}`);
+    process.exit(1);
   }
-  const patched = results.map((r) => {
+  return { results: patchTestIds(results), sourceFile };
+}
+function patchTestIds(results) {
+  return results.map((r) => {
     if (!r.testId && r.evalId) {
       return { ...r, testId: String(r.evalId) };
     }
     return r;
   });
-  mkdirSync2(outputDir, { recursive: true });
-  const benchmark = buildBenchmarkArtifact(patched, sourceFile);
-  writeFileSync3(path8.join(outputDir, "benchmark.json"), `${JSON.stringify(benchmark, null, 2)}
-`);
-  const timing = buildTimingArtifact(patched);
-  writeFileSync3(path8.join(outputDir, "timing.json"), `${JSON.stringify(timing, null, 2)}
-`);
-  const aggregateGrading = buildAggregateGradingArtifact(patched);
-  writeFileSync3(
-    path8.join(outputDir, "grading.json"),
-    `${JSON.stringify(aggregateGrading, null, 2)}
-`
-  );
-  const gradingDir = path8.join(outputDir, "grading");
-  mkdirSync2(gradingDir, { recursive: true });
-  for (const result of patched) {
-    const id = safeTestId(result);
-    const grading = buildGradingArtifact(result);
-    writeFileSync3(path8.join(gradingDir, `${id}.json`), `${JSON.stringify(grading, null, 2)}
-`);
-  }
-  const outputsDir = path8.join(outputDir, "outputs");
-  mkdirSync2(outputsDir, { recursive: true });
-  for (const result of patched) {
-    if (result.output && result.output.length > 0) {
-      const id = safeTestId(result);
-      const md = formatOutputMarkdown(result.output);
-      writeFileSync3(path8.join(outputsDir, `${id}.md`), md);
-    }
-  }
-  const inputsDir = path8.join(outputDir, "inputs");
-  mkdirSync2(inputsDir, { recursive: true });
-  for (const result of patched) {
-    const id = safeTestId(result);
-    const input = extractInput(result);
-    if (input) {
-      writeFileSync3(path8.join(inputsDir, `${id}.md`), input);
-    }
-  }
-}
-function formatOutputMarkdown(output) {
-  return output.map((msg) => `@[${msg.role}]:
-${String(msg.content ?? "")}`).join("\n\n");
-}
-function extractInput(result) {
-  const input = result.input;
-  if (!input) return null;
-  if (typeof input === "string") return input;
-  if (Array.isArray(input) && input.length > 0) {
-    return formatOutputMarkdown(input);
-  }
-  return null;
-}
-function safeTestId(result) {
-  const raw = result.testId ?? result.evalId ?? "unknown";
-  return String(raw).replace(/[/\\:*?"<>|]/g, "_");
 }
+// src/commands/results/export.ts
 function deriveOutputDir(cwd, sourceFile) {
-  const parentDir = path8.basename(path8.dirname(sourceFile));
+  const parentDir = path7.basename(path7.dirname(sourceFile));
   if (parentDir.startsWith("eval_")) {
     const dirName2 = parentDir.slice(5);
-    return path8.join(cwd, ".agentv", "results", "export", dirName2);
+    return path7.join(cwd, ".agentv", "results", "export", dirName2);
   }
-  const basename = path8.basename(sourceFile, ".jsonl");
+  const basename = path7.basename(sourceFile, ".jsonl");
   const dirName = basename.startsWith("eval_") ? basename.slice(5) : basename;
-  return path8.join(cwd, ".agentv", "results", "export", dirName);
+  return path7.join(cwd, ".agentv", "results", "export", dirName);
 }
 var resultsExportCommand = command({
   name: "export",
@@ -4833,28 +5306,12 @@ var resultsExportCommand = command({
   handler: async ({ source, out, dir }) => {
     const cwd = dir ?? process.cwd();
     try {
-      let sourceFile;
-      if (source) {
-        sourceFile = path8.isAbsolute(source) ? source : path8.resolve(cwd, source);
-      } else {
-        const cache = await loadRunCache(cwd);
-        const cachedFile = cache ? resolveRunCacheFile(cache) : "";
-        if (cachedFile && existsSync2(cachedFile)) {
-          sourceFile = cachedFile;
-        } else {
-          const metas = listResultFiles(cwd, 1);
-          if (metas.length === 0) {
-            console.error("Error: No result files found in .agentv/results/");
-            console.error("Run an evaluation first: agentv eval <eval-file>");
-            process.exit(1);
-          }
-          sourceFile = metas[0].path;
-        }
-      }
-      const content = readFileSync6(sourceFile, "utf8");
-      const outputDir = out ? path8.isAbsolute(out) ? out : path8.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
-      exportResults(sourceFile, content, outputDir);
-      const results = parseJsonlResults(content);
+      const { sourceFile } = await resolveSourceFile(source, cwd);
+      const { results } = await loadResults(source, cwd);
+      const outputDir = out ? path7.isAbsolute(out) ? out : path7.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
+      await writeArtifactsFromResults(results, outputDir, {
+        evalFile: sourceFile
+      });
       console.log(`Exported ${results.length} test(s) to ${outputDir}`);
       for (const result of results) {
         const id = result.testId ?? result.evalId ?? "unknown";
@@ -4867,58 +5324,6 @@ var resultsExportCommand = command({
   }
 });
-// src/commands/results/shared.ts
-import { existsSync as existsSync3, readFileSync as readFileSync7 } from "node:fs";
-import path9 from "node:path";
-var sourceArg = positional({
-  type: optional(string),
-  displayName: "source",
-  description: "JSONL result file (defaults to most recent in .agentv/results/)"
-});
-async function resolveSourceFile(source, cwd) {
-  let sourceFile;
-  if (source) {
-    sourceFile = path9.isAbsolute(source) ? source : path9.resolve(cwd, source);
-    if (!existsSync3(sourceFile)) {
-      console.error(`Error: File not found: ${sourceFile}`);
-      process.exit(1);
-    }
-  } else {
-    const cache = await loadRunCache(cwd);
-    const cachedFile = cache ? resolveRunCacheFile(cache) : "";
-    if (cachedFile && existsSync3(cachedFile)) {
-      sourceFile = cachedFile;
-    } else {
-      const metas = listResultFiles(cwd, 1);
-      if (metas.length === 0) {
-        console.error("Error: No result files found in .agentv/results/");
-        console.error("Run an evaluation first: agentv eval <eval-file>");
-        process.exit(1);
-      }
-      sourceFile = metas[0].path;
-    }
-  }
-  const content = readFileSync7(sourceFile, "utf8");
-  return { sourceFile, content };
-}
-async function loadResults(source, cwd) {
-  const { sourceFile, content } = await resolveSourceFile(source, cwd);
-  const results = parseJsonlResults(content);
-  if (results.length === 0) {
-    console.error(`No results found in ${sourceFile}`);
-    process.exit(1);
-  }
-  return { results: patchTestIds(results), sourceFile };
-}
-function patchTestIds(results) {
-  return results.map((r) => {
-    if (!r.testId && r.evalId) {
-      return { ...r, testId: String(r.evalId) };
-    }
-    return r;
-  });
-}
 // src/commands/results/failures.ts
 function formatFailures(results) {
   return results.filter((r) => r.score < 1).map((r) => {
@@ -5045,7 +5450,7 @@ var resultsShowCommand = command({
 });
 // src/commands/results/summary.ts
-import { existsSync as existsSync4, readFileSync as readFileSync8 } from "node:fs";
+import { existsSync as existsSync3, readFileSync as readFileSync5 } from "node:fs";
 function formatSummary(results, grading) {
   const total = results.length;
   let passed;
@@ -5096,9 +5501,9 @@ var resultsSummaryCommand = command({
       const { results, sourceFile } = await loadResults(source, cwd);
       let grading;
       const gradingPath = sourceFile.replace(/\.jsonl$/, ".grading.json");
-      if (existsSync4(gradingPath)) {
+      if (existsSync3(gradingPath)) {
         try {
-          grading = JSON.parse(readFileSync8(gradingPath, "utf8"));
+          grading = JSON.parse(readFileSync5(gradingPath, "utf8"));
         } catch {
         }
       }
@@ -5123,68 +5528,26 @@ var resultsCommand = subcommands({
 });
 // src/commands/results/serve.ts
-import { existsSync as existsSync5, readFileSync as readFileSync9, writeFileSync as writeFileSync4 } from "node:fs";
-import path10 from "node:path";
+import { existsSync as existsSync4, readFileSync as readFileSync6, writeFileSync as writeFileSync3 } from "node:fs";
+import path8 from "node:path";
 import { Hono } from "hono";
-async function resolveSourceFile2(source, cwd) {
-  if (source) {
-    const resolved = path10.isAbsolute(source) ? source : path10.resolve(cwd, source);
-    if (!existsSync5(resolved)) {
-      throw new Error(`Source file not found: ${resolved}`);
-    }
-    return resolved;
-  }
-  const cache = await loadRunCache(cwd);
-  const cachedFile = cache ? resolveRunCacheFile(cache) : "";
-  if (cachedFile && existsSync5(cachedFile)) {
-    return cachedFile;
-  }
-  const metas = listResultFiles(cwd, 10);
-  if (metas.length === 0) {
-    throw new Error(
-      "No result files found in .agentv/results/\nRun an evaluation first: agentv eval <eval-file>"
-    );
-  }
-  if (metas.length > 1) {
-    console.log("Available result files:");
-    for (const m of metas) {
-      console.log(`  ${m.path}`);
-    }
-    console.log(`
-Serving most recent: ${metas[0].path}
-`);
-  }
-  return metas[0].path;
-}
-function loadResults2(content) {
-  const results = parseJsonlResults(content);
-  if (results.length === 0) {
-    throw new Error("No valid results found in JSONL content");
-  }
-  return results.map((r) => {
-    if (!r.testId && r.evalId) {
-      return { ...r, testId: String(r.evalId) };
-    }
-    return r;
-  });
-}
 function feedbackPath(cwd) {
-  return path10.join(cwd, "feedback.json");
+  return path8.join(cwd, "feedback.json");
 }
 function readFeedback(cwd) {
   const fp = feedbackPath(cwd);
-  if (!existsSync5(fp)) {
+  if (!existsSync4(fp)) {
     return { reviews: [] };
   }
   try {
-    return JSON.parse(readFileSync9(fp, "utf8"));
+    return JSON.parse(readFileSync6(fp, "utf8"));
   } catch (err2) {
     console.error(`Warning: could not parse ${fp}, starting fresh: ${err2.message}`);
     return { reviews: [] };
   }
 }
 function writeFeedback(cwd, data) {
-  writeFileSync4(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
+  writeFileSync3(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
 `, "utf8");
 }
 function createApp(results, cwd) {
@@ -5854,9 +6217,7 @@ var resultsServeCommand = command({
     const cwd = dir ?? process.cwd();
     const listenPort = port ?? 3117;
     try {
-      const sourceFile = await resolveSourceFile2(source, cwd);
-      const content = readFileSync9(sourceFile, "utf8");
-      const results = loadResults2(content);
+      const { results, sourceFile } = await loadResults(source, cwd);
       const app2 = createApp(results, cwd);
       console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
       console.log(`Dashboard: http://localhost:${listenPort}`);
@@ -5889,7 +6250,7 @@ function detectPackageManager() {
   return detectPackageManagerFromPath(process.argv[1] ?? "");
 }
 function runCommand(cmd, args) {
-  return new Promise((resolve, reject) => {
+  return new Promise((resolve2, reject) => {
     const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
     let stdout = "";
     child.stdout?.on("data", (data) => {
@@ -5897,7 +6258,7 @@ function runCommand(cmd, args) {
       stdout += data.toString();
     });
     child.on("error", reject);
-    child.on("close", (code) => resolve({ exitCode: code ?? 1, stdout }));
+    child.on("close", (code) => resolve2({ exitCode: code ?? 1, stdout }));
   });
 }
 var updateCommand = command({
@@ -6109,10 +6470,6 @@ function parseAssertSpec(spec) {
       );
   }
 }
-function toTraceSummary(raw) {
-  if (!raw.trace) return void 0;
-  return toCamelCaseDeep(raw.trace);
-}
 function extractCandidate(raw) {
   if (raw.output !== void 0)
     return typeof raw.output === "string" ? raw.output : JSON.stringify(raw.output);
@@ -6224,8 +6581,8 @@ var traceScoreCommand = command({
   args: {
     file: positional({
       type: string,
-      displayName: "result-file",
-      description: "Path to JSONL result file"
+      displayName: "trace-source",
+      description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
     }),
     assert: option({
       type: string,
@@ -6271,11 +6628,11 @@ var traceScoreCommand = command({
     );
     if (traceRequired) {
       const hasTrace = results.some(
-        (r) => r.trace || r.cost_usd !== void 0 || r.duration_ms !== void 0 || r.token_usage !== void 0
+        (r) => toTraceSummary(r) || r.cost_usd !== void 0 || r.duration_ms !== void 0 || r.token_usage !== void 0
       );
       if (!hasTrace) {
         console.error(
-          `${c2.red}Error:${c2.reset} Result file lacks trace data. Re-run eval with ${c2.bold}--trace${c2.reset} to capture trace summaries.`
+          `${c2.red}Error:${c2.reset} Source lacks trace metrics. Use an OTLP trace export via ${c2.bold}--otel-file${c2.reset} or a run manifest with summary metrics in ${c2.bold}index.jsonl${c2.reset}.`
         );
         process.exit(1);
       }
@@ -6308,7 +6665,7 @@ var traceScoreCommand = command({
 // src/commands/trace/show.ts
 function renderFlatTrace(result) {
-  const trace = result.trace;
+  const trace = getTraceSummary(result);
   const parts = [];
   if (trace?.tool_calls && Object.keys(trace.tool_calls).length > 0) {
     const toolParts = Object.entries(trace.tool_calls).map(([name, count]) => {
@@ -6339,8 +6696,12 @@ function renderScores(scores) {
 }
 function renderTree(result) {
   const messages = result.output;
+  const spans = getTraceSpans(result);
   if (!messages || messages.length === 0) {
-    if (result.trace || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
+    if (spans.length > 0) {
+      return renderSpanTree(result, spans);
+    }
+    if (getTraceSummary(result) || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
       return renderFlatTrace(result);
     }
     return `${c2.dim}No trace data available${c2.reset}`;
@@ -6406,6 +6767,30 @@ function renderTree(result) {
   }
   return lines.join("\n");
 }
+function renderSpanTree(result, spans) {
+  const lines = [];
+  const testId = result.test_id ?? result.eval_id ?? "unknown";
+  const totalTokens = result.token_usage ? result.token_usage.input + result.token_usage.output : void 0;
+  const rootParts = [testId];
+  if (result.duration_ms !== void 0) rootParts.push(formatDuration(result.duration_ms));
+  if (totalTokens !== void 0) rootParts.push(`${formatNumber(totalTokens)} tok`);
+  if (result.cost_usd !== void 0) rootParts.push(formatCost(result.cost_usd));
+  lines.push(`${c2.bold}${rootParts.join(", ")}${c2.reset}`);
+  spans.forEach((span, index) => {
+    const connector = index === spans.length - 1 ? "\u2514\u2500" : "\u251C\u2500";
+    const color = span.type === "llm" ? c2.cyan : c2.yellow;
+    const parts = [`${color}${span.name}${c2.reset}`];
+    if (span.duration_ms !== void 0) {
+      parts.push(formatDuration(span.duration_ms));
+    }
+    lines.push(`${connector} ${parts.join(", ")}`);
+  });
+  if (result.scores && result.scores.length > 0) {
+    lines.push("");
+    lines.push(`${c2.dim}Scores:${c2.reset} ${renderScores(result.scores)}`);
+  }
+  return lines.join("\n");
+}
 function formatResultDetail(result, index, tree) {
   const lines = [];
   const testId = result.test_id ?? result.eval_id ?? `result-${index}`;
@@ -6489,8 +6874,8 @@ var traceShowCommand = command({
   args: {
     file: positional({
       type: string,
-      displayName: "result-file",
-      description: "Path to JSONL result file"
+      displayName: "trace-source",
+      description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
     }),
     testId: option({
       type: optional(string),
@@ -6499,7 +6884,7 @@ var traceShowCommand = command({
     }),
     tree: flag({
       long: "tree",
-      description: "Show hierarchical trace tree (requires results with --trace output)"
+      description: "Show hierarchical trace tree from output messages or exported trace spans"
     }),
     format: option({
       type: optional(oneOf(["table", "json"])),
@@ -6570,11 +6955,11 @@ function collectMetrics(results) {
       formatter: (n) => formatNumber(Math.round(n))
     });
   }
-  const toolCalls = results.map((r) => r.trace?.event_count).filter((v) => v !== void 0);
+  const toolCalls = results.map((r) => getTraceSummary(r)?.event_count).filter((v) => v !== void 0);
   if (toolCalls.length > 0) {
     rows.push({ name: "tool_calls", values: toolCalls, formatter: (n) => String(Math.round(n)) });
   }
-  const llmCalls = results.map((r) => r.trace?.llm_call_count).filter((v) => v !== void 0);
+  const llmCalls = results.map((r) => getTraceSummary(r)?.llm_call_count).filter((v) => v !== void 0);
   if (llmCalls.length > 0) {
     rows.push({ name: "llm_calls", values: llmCalls, formatter: (n) => String(Math.round(n)) });
   }
@@ -6668,8 +7053,8 @@ var traceStatsCommand = command({
   args: {
     file: positional({
       type: string,
-      displayName: "result-file",
-      description: "Path to JSONL result file"
+      displayName: "trace-source",
+      description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
     }),
     groupBy: option({
       type: optional(oneOf(["target", "eval-set", "test-id"])),
@@ -6719,8 +7104,8 @@ var traceCommand = subcommands({
 });
 // src/commands/transpile/index.ts
-import { writeFileSync as writeFileSync5 } from "node:fs";
-import path11 from "node:path";
+import { writeFileSync as writeFileSync4 } from "node:fs";
+import path9 from "node:path";
 var transpileCommand = command({
   name: "transpile",
   description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
@@ -6744,7 +7129,7 @@ var transpileCommand = command({
   handler: async ({ input, outDir, stdout }) => {
     let result;
     try {
-      result = transpileEvalYamlFile(path11.resolve(input));
+      result = transpileEvalYamlFile(path9.resolve(input));
     } catch (error) {
       console.error(`Error: ${error.message}`);
       process.exit(1);
@@ -6768,12 +7153,12 @@ var transpileCommand = command({
       process.stdout.write("\n");
       return;
     }
-    const outputDir = outDir ? path11.resolve(outDir) : path11.dirname(path11.resolve(input));
+    const outputDir = outDir ? path9.resolve(outDir) : path9.dirname(path9.resolve(input));
     const fileNames = getOutputFilenames(result);
     for (const [skill, evalsJson] of result.files) {
       const fileName = fileNames.get(skill) ?? "evals.json";
-      const outputPath = path11.join(outputDir, fileName);
-      writeFileSync5(outputPath, `${JSON.stringify(evalsJson, null, 2)}
+      const outputPath = path9.join(outputDir, fileName);
+      writeFileSync4(outputPath, `${JSON.stringify(evalsJson, null, 2)}
 `);
       console.log(`Transpiled to ${outputPath}`);
     }
@@ -6781,7 +7166,7 @@ var transpileCommand = command({
 });
 // src/commands/trim/index.ts
-import { readFileSync as readFileSync10, writeFileSync as writeFileSync6 } from "node:fs";
+import { readFileSync as readFileSync7, writeFileSync as writeFileSync5 } from "node:fs";
 var trimCommand = command({
   name: "trim",
   description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
@@ -6800,7 +7185,7 @@ var trimCommand = command({
   },
   handler: async ({ input, out }) => {
     try {
-      const content = readFileSync10(input, "utf8");
+      const content = readFileSync7(input, "utf8");
       const lines = content.trim().split("\n").filter((line) => line.trim());
       const trimmedLines = lines.map((line) => {
         const record = JSON.parse(line);
@@ -6812,7 +7197,7 @@ var trimCommand = command({
       const output = `${trimmedLines.join("\n")}
 `;
       if (out) {
-        writeFileSync6(out, output, "utf8");
+        writeFileSync5(out, output, "utf8");
         console.error(`Trimmed ${lines.length} record(s) \u2192 ${out}`);
       } else {
         process.stdout.write(output);
@@ -6906,8 +7291,8 @@ function isTTY() {
 // src/commands/validate/validate-files.ts
 import { constants } from "node:fs";
-import { access, readdir, stat } from "node:fs/promises";
-import path12 from "node:path";
+import { access, readdir as readdir3, stat } from "node:fs/promises";
+import path10 from "node:path";
 async function validateFiles(paths) {
   const filePaths = await expandPaths(paths);
   const results = [];
@@ -6925,7 +7310,7 @@ async function validateFiles(paths) {
   };
 }
 async function validateSingleFile(filePath) {
-  const absolutePath = path12.resolve(filePath);
+  const absolutePath = path10.resolve(filePath);
   const fileType = await detectFileType(absolutePath);
   let result;
   if (fileType === "eval") {
@@ -6950,7 +7335,7 @@ async function validateSingleFile(filePath) {
 async function expandPaths(paths) {
   const expanded = [];
   for (const inputPath of paths) {
-    const absolutePath = path12.resolve(inputPath);
+    const absolutePath = path10.resolve(inputPath);
     try {
       await access(absolutePath, constants.F_OK);
     } catch {
@@ -6972,9 +7357,9 @@ async function expandPaths(paths) {
 async function findYamlFiles(dirPath) {
   const results = [];
   try {
-    const entries2 = await readdir(dirPath, { withFileTypes: true });
+    const entries2 = await readdir3(dirPath, { withFileTypes: true });
     for (const entry of entries2) {
-      const fullPath = path12.join(dirPath, entry.name);
+      const fullPath = path10.join(dirPath, entry.name);
       if (entry.isDirectory()) {
         if (entry.name === "node_modules" || entry.name.startsWith(".")) {
           continue;
@@ -6991,7 +7376,7 @@ async function findYamlFiles(dirPath) {
   return results;
 }
 function isYamlFile(filePath) {
-  const ext = path12.extname(filePath).toLowerCase();
+  const ext = path10.extname(filePath).toLowerCase();
   return ext === ".yaml" || ext === ".yml";
 }
@@ -7029,14 +7414,14 @@ var validateCommand = command({
 });
 // src/commands/workspace/clean.ts
-import { existsSync as existsSync6 } from "node:fs";
-import { readFile as readFile2, readdir as readdir2, rm } from "node:fs/promises";
-import path13 from "node:path";
+import { existsSync as existsSync5 } from "node:fs";
+import { readFile as readFile4, readdir as readdir4, rm } from "node:fs/promises";
+import path11 from "node:path";
 async function confirm(message) {
   const readline2 = await import("node:readline");
   const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
-  const answer = await new Promise((resolve) => {
-    rl.question(`${message} [y/N] `, resolve);
+  const answer = await new Promise((resolve2) => {
+    rl.question(`${message} [y/N] `, resolve2);
   });
   rl.close();
   return answer.toLowerCase() === "y";
@@ -7058,19 +7443,19 @@ var cleanCommand = command({
   },
   handler: async ({ repo, force }) => {
     const poolRoot = getWorkspacePoolRoot();
-    if (!existsSync6(poolRoot)) {
+    if (!existsSync5(poolRoot)) {
       console.log("No workspace pool entries found.");
       return;
     }
     if (repo) {
-      const entries2 = await readdir2(poolRoot, { withFileTypes: true });
+      const entries2 = await readdir4(poolRoot, { withFileTypes: true });
       const poolDirs = entries2.filter((e) => e.isDirectory());
       const matchingDirs = [];
       for (const dir of poolDirs) {
-        const poolDir = path13.join(poolRoot, dir.name);
-        const metadataPath = path13.join(poolDir, "metadata.json");
+        const poolDir = path11.join(poolRoot, dir.name);
+        const metadataPath = path11.join(poolDir, "metadata.json");
         try {
-          const raw = await readFile2(metadataPath, "utf-8");
+          const raw = await readFile4(metadataPath, "utf-8");
           const metadata = JSON.parse(raw);
           const hasRepo = metadata.repos?.some((r) => {
             if (r.source.type === "git" && r.source.url) {
@@ -7099,7 +7484,7 @@ var cleanCommand = command({
       }
       for (const dir of matchingDirs) {
         await rm(dir, { recursive: true, force: true });
-        console.log(`Removed: ${path13.basename(dir).slice(0, 12)}...`);
+        console.log(`Removed: ${path11.basename(dir).slice(0, 12)}...`);
       }
       console.log("Done.");
     } else {
@@ -7117,15 +7502,15 @@ var cleanCommand = command({
 });
 // src/commands/workspace/list.ts
-import { existsSync as existsSync7 } from "node:fs";
-import { readFile as readFile3, readdir as readdir3, stat as stat2 } from "node:fs/promises";
-import path14 from "node:path";
+import { existsSync as existsSync6 } from "node:fs";
+import { readFile as readFile5, readdir as readdir5, stat as stat2 } from "node:fs/promises";
+import path12 from "node:path";
 async function getDirectorySize(dirPath) {
   let totalSize = 0;
   try {
-    const entries2 = await readdir3(dirPath, { withFileTypes: true });
+    const entries2 = await readdir5(dirPath, { withFileTypes: true });
     for (const entry of entries2) {
-      const fullPath = path14.join(dirPath, entry.name);
+      const fullPath = path12.join(dirPath, entry.name);
       if (entry.isDirectory()) {
         totalSize += await getDirectorySize(fullPath);
       } else {
@@ -7149,25 +7534,25 @@ var listCommand = command({
   args: {},
   handler: async () => {
     const poolRoot = getWorkspacePoolRoot();
-    if (!existsSync7(poolRoot)) {
+    if (!existsSync6(poolRoot)) {
       console.log("No workspace pool entries found.");
       return;
     }
-    const entries2 = await readdir3(poolRoot, { withFileTypes: true });
+    const entries2 = await readdir5(poolRoot, { withFileTypes: true });
     const poolDirs = entries2.filter((e) => e.isDirectory());
     if (poolDirs.length === 0) {
       console.log("No workspace pool entries found.");
       return;
     }
     for (const dir of poolDirs) {
-      const poolDir = path14.join(poolRoot, dir.name);
+      const poolDir = path12.join(poolRoot, dir.name);
       const fingerprint = dir.name;
-      const poolEntries = await readdir3(poolDir, { withFileTypes: true });
+      const poolEntries = await readdir5(poolDir, { withFileTypes: true });
       const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
-      const metadataPath = path14.join(poolDir, "metadata.json");
+      const metadataPath = path12.join(poolDir, "metadata.json");
       let metadata = null;
       try {
-        const raw = await readFile3(metadataPath, "utf-8");
+        const raw = await readFile5(metadataPath, "utf-8");
         metadata = JSON.parse(raw);
       } catch {
       }
@@ -7204,16 +7589,16 @@ var workspaceCommand = subcommands({
 // src/update-check.ts
 import { spawn as spawn2 } from "node:child_process";
-import { readFile as readFile4 } from "node:fs/promises";
-import { join } from "node:path";
+import { readFile as readFile6 } from "node:fs/promises";
+import { join as join4 } from "node:path";
 var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
 var AGENTV_DIR = getAgentvHome();
 var CACHE_FILE = "version-check.json";
 var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
-async function getCachedUpdateInfo(path15) {
-  const filePath = path15 ?? join(AGENTV_DIR, CACHE_FILE);
+async function getCachedUpdateInfo(path13) {
+  const filePath = path13 ?? join4(AGENTV_DIR, CACHE_FILE);
   try {
-    const raw = await readFile4(filePath, "utf-8");
+    const raw = await readFile6(filePath, "utf-8");
     const data = JSON.parse(raw);
     if (typeof data.latestVersion === "string" && typeof data.lastCheckedAt === "string") {
       return data;
@@ -7245,7 +7630,7 @@ function buildNotice(currentVersion, latestVersion) {
 }
 function backgroundUpdateCheck() {
   const dir = AGENTV_DIR;
-  const filePath = join(dir, CACHE_FILE);
+  const filePath = join4(dir, CACHE_FILE);
   const script = `
     const https = require('https');
     const fs = require('fs');
@@ -7299,8 +7684,8 @@ var app = subcommands({
     compare: compareCommand,
     convert: convertCommand,
     create: createCommand,
-    generate: generateCommand,
     init: initCmdTsCommand,
+    pipeline: pipelineCommand,
     results: resultsCommand,
     self: selfCommand,
     serve: resultsServeCommand,
@@ -7317,8 +7702,8 @@ var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
   "compare",
   "convert",
   "create",
-  "generate",
   "init",
+  "pipeline",
   "results",
   "self",
   "serve",
@@ -7368,4 +7753,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-V2S5CZU3.js.map
+//# sourceMappingURL=chunk-DJU4C6NS.js.map