npm - agentv - Versions diffs - 1.3.1 → 1.5.0 - Mend

agentv 1.3.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +439 -441
package/dist/{chunk-6R2YRXCQ.js → chunk-3RYQPI4H.js} +487 -329
package/dist/chunk-3RYQPI4H.js.map +1 -0
package/dist/cli.js +1 -1
package/dist/index.js +1 -1
package/dist/templates/.agentv/.env.template +23 -23
package/dist/templates/.agentv/config.yaml +15 -15
package/dist/templates/.agentv/targets.yaml +71 -73
package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +212 -211
package/dist/templates/.claude/skills/agentv-eval-builder/references/batch-cli-evaluator.md +318 -288
package/dist/templates/.claude/skills/agentv-eval-builder/references/composite-evaluator.md +215 -215
package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +216 -213
package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +340 -333
package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +139 -139
package/dist/templates/.claude/skills/agentv-eval-builder/references/tool-trajectory-evaluator.md +198 -179
package/dist/templates/.claude/skills/agentv-prompt-optimizer/SKILL.md +77 -77
package/dist/templates/.github/prompts/agentv-eval-build.prompt.md +4 -4
package/dist/templates/.github/prompts/agentv-optimize.prompt.md +3 -3
package/package.json +2 -5
package/dist/chunk-6R2YRXCQ.js.map +0 -1

package/dist/{chunk-6R2YRXCQ.js → chunk-3RYQPI4H.js} RENAMED Viewed

@@ -141,30 +141,14 @@ var require_dist = __commonJS({
 });
 // src/index.ts
-import { readFileSync as readFileSync2 } from "node:fs";
+import { readFileSync as readFileSync3 } from "node:fs";
 import { binary, run, subcommands as subcommands2 } from "cmd-ts";
-// src/commands/eval/index.ts
-import { stat as stat4 } from "node:fs/promises";
-import path20 from "node:path";
-import {
-  command,
-  flag,
-  number as number4,
-  option,
-  optional as optional2,
-  restPositionals,
-  string as string4
-} from "cmd-ts";
-import fg from "fast-glob";
-// src/commands/eval/run-eval.ts
-import { constants as constants6 } from "node:fs";
-import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
-import path19 from "node:path";
-import { pathToFileURL } from "node:url";
+// src/commands/convert/index.ts
+import { readFileSync, writeFileSync } from "node:fs";
+import path14 from "node:path";
-// ../../packages/core/dist/chunk-4A6L2F6L.js
+// ../../packages/core/dist/chunk-KPHTMTZ3.js
 import { constants } from "node:fs";
 import { access, readFile } from "node:fs/promises";
 import path from "node:path";
@@ -648,8 +632,8 @@ function getErrorMap() {
 // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
 var makeIssue = (params) => {
-  const { data, path: path27, errorMaps, issueData } = params;
-  const fullPath = [...path27, ...issueData.path || []];
+  const { data, path: path28, errorMaps, issueData } = params;
+  const fullPath = [...path28, ...issueData.path || []];
   const fullIssue = {
     ...issueData,
     path: fullPath
@@ -765,11 +749,11 @@ var errorUtil;
 // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
 var ParseInputLazyPath = class {
-  constructor(parent, value, path27, key2) {
+  constructor(parent, value, path28, key2) {
     this._cachedPath = [];
     this.parent = parent;
     this.data = value;
-    this._path = path27;
+    this._path = path28;
     this._key = key2;
   }
   get path() {
@@ -1049,8 +1033,8 @@ var ZodType = class {
   promise() {
     return ZodPromise.create(this, this._def);
   }
-  or(option4) {
-    return ZodUnion.create([this, option4], this._def);
+  or(option5) {
+    return ZodUnion.create([this, option5], this._def);
   }
   and(incoming) {
     return ZodIntersection.create(this, incoming, this._def);
@@ -2900,7 +2884,7 @@ var ZodUnion = class extends ZodType {
       return INVALID;
     }
     if (ctx.common.async) {
-      return Promise.all(options.map(async (option4) => {
+      return Promise.all(options.map(async (option5) => {
         const childCtx = {
           ...ctx,
           common: {
@@ -2910,7 +2894,7 @@ var ZodUnion = class extends ZodType {
           parent: null
         };
         return {
-          result: await option4._parseAsync({
+          result: await option5._parseAsync({
             data: ctx.data,
             path: ctx.path,
             parent: childCtx
@@ -2921,7 +2905,7 @@ var ZodUnion = class extends ZodType {
     } else {
       let dirty = void 0;
       const issues = [];
-      for (const option4 of options) {
+      for (const option5 of options) {
         const childCtx = {
           ...ctx,
           common: {
@@ -2930,7 +2914,7 @@ var ZodUnion = class extends ZodType {
           },
           parent: null
         };
-        const result = option4._parseSync({
+        const result = option5._parseSync({
           data: ctx.data,
           path: ctx.path,
           parent: childCtx
@@ -3011,8 +2995,8 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
     }
     const discriminator = this.discriminator;
     const discriminatorValue = ctx.data[discriminator];
-    const option4 = this.optionsMap.get(discriminatorValue);
-    if (!option4) {
+    const option5 = this.optionsMap.get(discriminatorValue);
+    if (!option5) {
       addIssueToContext(ctx, {
         code: ZodIssueCode.invalid_union_discriminator,
         options: Array.from(this.optionsMap.keys()),
@@ -3021,13 +3005,13 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
       return INVALID;
     }
     if (ctx.common.async) {
-      return option4._parseAsync({
+      return option5._parseAsync({
         data: ctx.data,
         path: ctx.path,
         parent: ctx
       });
     } else {
-      return option4._parseSync({
+      return option5._parseSync({
         data: ctx.data,
         path: ctx.path,
         parent: ctx
@@ -4211,7 +4195,7 @@ var coerce = {
 };
 var NEVER = INVALID;
-// ../../packages/core/dist/chunk-4A6L2F6L.js
+// ../../packages/core/dist/chunk-KPHTMTZ3.js
 async function fileExists(filePath) {
   try {
     await access(filePath, constants.F_OK);
@@ -4227,10 +4211,6 @@ async function readTextFile(filePath) {
   const content = await readFile(filePath, "utf8");
   return normalizeLineEndings(content);
 }
-async function readJsonFile(filePath) {
-  const content = await readFile(filePath, "utf8");
-  return JSON.parse(content);
-}
 async function findGitRoot(startPath) {
   let currentDir = path.dirname(path.resolve(startPath));
   const root2 = path.parse(currentDir).root;
@@ -4574,8 +4554,7 @@ function normalizeCodexLogFormat(value) {
 }
 function resolveMockConfig(target) {
   const response = typeof target.response === "string" ? target.response : void 0;
-  const trace2 = Array.isArray(target.trace) ? target.trace : void 0;
-  return { response, trace: trace2 };
+  return { response };
 }
 function resolveVSCodeConfig(target, env, insiders) {
   const workspaceTemplateEnvVar = resolveOptionalLiteralString(
@@ -4595,9 +4574,9 @@ function resolveVSCodeConfig(target, env, insiders) {
   const dryRunSource = target.dry_run ?? target.dryRun;
   const subagentRootSource = target.subagent_root ?? target.subagentRoot;
   const defaultCommand = insiders ? "code-insiders" : "code";
-  const command5 = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
+  const command6 = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
   return {
-    command: command5,
+    command: command6,
     waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
     dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
     subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
@@ -4613,6 +4592,9 @@ function resolveCliConfig(target, env, evalFilePath) {
     target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
   );
   const verbose = resolveOptionalBoolean(target.verbose ?? target.cli_verbose ?? target.cliVerbose);
+  const keepTempFiles = resolveOptionalBoolean(
+    target.keep_temp_files ?? target.keepTempFiles ?? target.keep_output_files ?? target.keepOutputFiles
+  );
   let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
     allowLiteral: true,
     optionalEnv: true
@@ -4641,7 +4623,8 @@ function resolveCliConfig(target, env, evalFilePath) {
     cwd,
     timeoutMs,
     healthcheck,
-    verbose
+    verbose,
+    keepTempFiles
   };
 }
 function resolveTimeoutMs(source2, description) {
@@ -4891,6 +4874,21 @@ var PROVIDER_ALIASES = [
   "vertex"
   // legacy/future support
 ];
+function extractLastAssistantContent(messages) {
+  if (!messages || messages.length === 0) {
+    return "";
+  }
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg.role === "assistant" && msg.content !== void 0) {
+      if (typeof msg.content === "string") {
+        return msg.content;
+      }
+      return JSON.stringify(msg.content);
+    }
+  }
+  return "";
+}
 function isAgentProvider(provider) {
   return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
 }
@@ -6001,10 +5999,10 @@ function assignProp(target, prop, value) {
     configurable: true
   });
 }
-function getElementAtPath(obj, path27) {
-  if (!path27)
+function getElementAtPath(obj, path28) {
+  if (!path28)
     return obj;
-  return path27.reduce((acc, key2) => acc?.[key2], obj);
+  return path28.reduce((acc, key2) => acc?.[key2], obj);
 }
 function promiseAllObject(promisesObj) {
   const keys = Object.keys(promisesObj);
@@ -6324,11 +6322,11 @@ function aborted(x, startIndex = 0) {
   }
   return false;
 }
-function prefixIssues(path27, issues) {
+function prefixIssues(path28, issues) {
   return issues.map((iss) => {
     var _a17;
     (_a17 = iss).path ?? (_a17.path = []);
-    iss.path.unshift(path27);
+    iss.path.unshift(path28);
     return iss;
   });
 }
@@ -6465,7 +6463,7 @@ function treeifyError(error40, _mapper) {
     return issue2.message;
   };
   const result = { errors: [] };
-  const processError = (error41, path27 = []) => {
+  const processError = (error41, path28 = []) => {
     var _a17, _b8;
     for (const issue2 of error41.issues) {
       if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -6475,7 +6473,7 @@ function treeifyError(error40, _mapper) {
       } else if (issue2.code === "invalid_element") {
         processError({ issues: issue2.issues }, issue2.path);
       } else {
-        const fullpath = [...path27, ...issue2.path];
+        const fullpath = [...path28, ...issue2.path];
         if (fullpath.length === 0) {
           result.errors.push(mapper(issue2));
           continue;
@@ -6505,9 +6503,9 @@ function treeifyError(error40, _mapper) {
   processError(error40);
   return result;
 }
-function toDotPath(path27) {
+function toDotPath(path28) {
   const segs = [];
-  for (const seg of path27) {
+  for (const seg of path28) {
     if (typeof seg === "number")
       segs.push(`[${seg}]`);
     else if (typeof seg === "symbol")
@@ -8106,7 +8104,7 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
   defineLazy(inst._zod, "optout", () => def.options.some((o) => o._zod.optout === "optional") ? "optional" : void 0);
   defineLazy(inst._zod, "values", () => {
     if (def.options.every((o) => o._zod.values)) {
-      return new Set(def.options.flatMap((option4) => Array.from(option4._zod.values)));
+      return new Set(def.options.flatMap((option5) => Array.from(option5._zod.values)));
     }
     return void 0;
   });
@@ -8120,8 +8118,8 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
   inst._zod.parse = (payload, ctx) => {
     let async = false;
     const results = [];
-    for (const option4 of def.options) {
-      const result = option4._zod.run({
+    for (const option5 of def.options) {
+      const result = option5._zod.run({
         value: payload.value,
         issues: []
       }, ctx);
@@ -8146,10 +8144,10 @@ var $ZodDiscriminatedUnion = /* @__PURE__ */ $constructor("$ZodDiscriminatedUnio
   const _super = inst._zod.parse;
   defineLazy(inst._zod, "propValues", () => {
     const propValues = {};
-    for (const option4 of def.options) {
-      const pv = option4._zod.propValues;
+    for (const option5 of def.options) {
+      const pv = option5._zod.propValues;
       if (!pv || Object.keys(pv).length === 0)
-        throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option4)}"`);
+        throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option5)}"`);
       for (const [k, v] of Object.entries(pv)) {
         if (!propValues[k])
           propValues[k] = /* @__PURE__ */ new Set();
@@ -15353,8 +15351,8 @@ function isTransforming(_schema, _ctx) {
       return false;
     }
     case "union": {
-      for (const option4 of def.options) {
-        if (isTransforming(option4, ctx))
+      for (const option5 of def.options) {
+        if (isTransforming(option5, ctx))
           return true;
       }
       return false;
@@ -26060,14 +26058,14 @@ function createAzure(options = {}) {
     description: "Azure OpenAI resource name"
   });
   const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
-  const url2 = ({ path: path27, modelId }) => {
+  const url2 = ({ path: path28, modelId }) => {
     var _a24;
     const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
     let fullUrl;
     if (options.useDeploymentBasedUrls) {
-      fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path27}`);
+      fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path28}`);
     } else {
-      fullUrl = new URL(`${baseUrlPrefix}/v1${path27}`);
+      fullUrl = new URL(`${baseUrlPrefix}/v1${path28}`);
     }
     fullUrl.searchParams.set("api-version", apiVersion);
     return fullUrl.toString();
@@ -34595,33 +34593,22 @@ var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
 function isEvaluatorKind(value) {
   return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
 }
-function isTraceEventType(value) {
-  return typeof value === "string" && ["model_step", "tool_call", "tool_result", "message", "error"].includes(value);
-}
-function isTraceEvent(value) {
-  if (typeof value !== "object" || value === null) {
-    return false;
-  }
-  const candidate = value;
-  return isTraceEventType(candidate.type) && typeof candidate.timestamp === "string";
-}
-function computeTraceSummary(trace2) {
+function computeTraceSummary(messages) {
   const toolCallCounts = {};
-  let errorCount = 0;
-  for (const event of trace2) {
-    if (event.type === "tool_call" && event.name) {
-      toolCallCounts[event.name] = (toolCallCounts[event.name] ?? 0) + 1;
-    }
-    if (event.type === "error") {
-      errorCount++;
+  let totalToolCalls = 0;
+  for (const message of messages) {
+    if (!message.toolCalls) continue;
+    for (const toolCall of message.toolCalls) {
+      toolCallCounts[toolCall.tool] = (toolCallCounts[toolCall.tool] ?? 0) + 1;
+      totalToolCalls++;
     }
   }
   const toolNames = Object.keys(toolCallCounts).sort();
   return {
-    eventCount: trace2.length,
+    eventCount: totalToolCalls,
     toolNames,
     toolCallsByName: toolCallCounts,
-    errorCount
+    errorCount: 0
   };
 }
 function extractCodeBlocks(segments) {
@@ -34869,7 +34856,8 @@ var TEMPLATE_VARIABLES = {
   QUESTION: "question",
   EXPECTED_OUTCOME: "expected_outcome",
   REFERENCE_ANSWER: "reference_answer",
-  INPUT_MESSAGES: "input_messages"
+  INPUT_MESSAGES: "input_messages",
+  OUTPUT_MESSAGES: "output_messages"
 };
 var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
 var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
@@ -35738,16 +35726,16 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
     }) : [];
     const codeSnippets = extractCodeBlocks(inputSegments);
     let referenceAnswer = "";
-    if (outputSegments.length > 1) {
-      referenceAnswer = JSON.stringify(outputSegments, null, 2);
-    } else if (outputSegments.length === 1) {
-      const singleMessage = outputSegments[0];
-      if (typeof singleMessage.content === "string") {
-        referenceAnswer = singleMessage.content;
-      } else if (singleMessage.content) {
-        referenceAnswer = JSON.stringify(singleMessage, null, 2);
-      } else if (singleMessage.tool_calls) {
-        referenceAnswer = JSON.stringify(singleMessage, null, 2);
+    if (outputSegments.length > 0) {
+      const lastMessage = outputSegments[outputSegments.length - 1];
+      const content = lastMessage.content;
+      const toolCalls = lastMessage.tool_calls;
+      if (typeof content === "string") {
+        referenceAnswer = content;
+      } else if (content !== void 0 && content !== null) {
+        referenceAnswer = JSON.stringify(content, null, 2);
+      } else if (toolCalls !== void 0 && toolCalls !== null) {
+        referenceAnswer = JSON.stringify(toolCalls, null, 2);
       }
     }
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
@@ -36069,11 +36057,11 @@ async function invokeModel(options) {
   return mapResponse(result);
 }
 function mapResponse(result) {
+  const content = result.text ?? "";
   return {
-    text: result.text ?? "",
-    reasoning: result.reasoningText ?? void 0,
     raw: result,
-    usage: toJsonObject(result.totalUsage ?? result.usage)
+    usage: toJsonObject(result.totalUsage ?? result.usage),
+    outputMessages: [{ role: "assistant", content }]
   };
 }
 function toJsonObject(value) {
@@ -36180,7 +36168,7 @@ async function withRetry(fn, retryConfig, signal) {
 }
 var execAsync2 = promisify2(execWithCallback);
 var DEFAULT_MAX_BUFFER = 10 * 1024 * 1024;
-async function defaultCommandRunner(command5, options) {
+async function defaultCommandRunner(command6, options) {
   const execOptions = {
     cwd: options.cwd,
     env: options.env,
@@ -36190,7 +36178,7 @@ async function defaultCommandRunner(command5, options) {
     shell: process.platform === "win32" ? "powershell.exe" : void 0
   };
   try {
-    const { stdout, stderr } = await execAsync2(command5, execOptions);
+    const { stdout, stderr } = await execAsync2(command6, execOptions);
     return {
       stdout,
       stderr,
@@ -36219,6 +36207,7 @@ var CliProvider = class {
   config;
   runCommand;
   verbose;
+  keepTempFiles;
   healthcheckPromise;
   constructor(targetName, config2, runner = defaultCommandRunner) {
     this.targetName = targetName;
@@ -36226,6 +36215,7 @@ var CliProvider = class {
     this.config = config2;
     this.runCommand = runner;
     this.verbose = config2.verbose ?? false;
+    this.keepTempFiles = config2.keepTempFiles ?? false;
   }
   async invoke(request) {
     if (request.signal?.aborted) {
@@ -36263,8 +36253,7 @@ var CliProvider = class {
     const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
     const parsed = this.parseOutputContent(responseContent);
     return {
-      text: parsed.text,
-      trace: parsed.trace,
+      outputMessages: parsed.outputMessages,
       raw: {
         command: renderedCommand,
         stderr: result.stderr,
@@ -36343,7 +36332,7 @@ var CliProvider = class {
       const evalCaseId = request.evalCaseId;
       if (!evalCaseId) {
         return {
-          text: "",
+          outputMessages: [],
           raw: {
             command: renderedCommand,
             stderr: result.stderr,
@@ -36356,7 +36345,7 @@ var CliProvider = class {
       const parsed = recordsById.get(evalCaseId);
       if (!parsed) {
         return {
-          text: "",
+          outputMessages: [],
           raw: {
             command: renderedCommand,
             stderr: result.stderr,
@@ -36367,9 +36356,7 @@ var CliProvider = class {
         };
       }
       return {
-        text: parsed.text,
-        trace: parsed.trace,
-        traceRef: parsed.traceRef,
+        outputMessages: parsed.outputMessages,
         raw: {
           command: renderedCommand,
           stderr: result.stderr,
@@ -36384,28 +36371,81 @@ var CliProvider = class {
   }
   /**
    * Parse output content from CLI.
-   * If the content is valid JSON with a 'text' field, extract text and optional trace.
-   * Otherwise, treat the entire content as plain text.
+   * If the content is valid JSON with 'output_messages' or 'text' field, extract them.
+   * If only 'text' is provided, wrap it in outputMessages.
+   * Otherwise, treat the entire content as plain text wrapped in outputMessages.
    */
   parseOutputContent(content) {
     try {
       const parsed = JSON.parse(content);
-      if (typeof parsed === "object" && parsed !== null && "text" in parsed) {
+      if (typeof parsed === "object" && parsed !== null) {
         const obj = parsed;
-        const text2 = typeof obj.text === "string" ? obj.text : String(obj.text);
-        const trace2 = this.parseTrace(obj.trace);
-        return { text: text2, trace: trace2 };
+        const outputMessages = this.parseOutputMessages(obj.output_messages);
+        if (outputMessages && outputMessages.length > 0) {
+          return { outputMessages };
+        }
+        if ("text" in obj) {
+          const text2 = typeof obj.text === "string" ? obj.text : String(obj.text);
+          return { outputMessages: [{ role: "assistant", content: text2 }] };
+        }
       }
     } catch {
     }
-    return { text: content };
+    return { outputMessages: [{ role: "assistant", content }] };
   }
-  parseTrace(trace2) {
-    if (!Array.isArray(trace2)) {
+  /**
+   * Parse output_messages from JSONL (snake_case) and convert to OutputMessage[] (camelCase).
+   */
+  parseOutputMessages(outputMessages) {
+    if (!Array.isArray(outputMessages)) {
       return void 0;
     }
-    const validEvents = trace2.filter(isTraceEvent);
-    return validEvents.length > 0 ? validEvents : void 0;
+    const messages = [];
+    for (const msg of outputMessages) {
+      if (typeof msg !== "object" || msg === null) {
+        continue;
+      }
+      const rawMsg = msg;
+      if (typeof rawMsg.role !== "string") {
+        continue;
+      }
+      const message = {
+        role: rawMsg.role,
+        name: typeof rawMsg.name === "string" ? rawMsg.name : void 0,
+        content: rawMsg.content,
+        toolCalls: this.parseToolCalls(rawMsg.tool_calls),
+        timestamp: typeof rawMsg.timestamp === "string" ? rawMsg.timestamp : void 0,
+        metadata: typeof rawMsg.metadata === "object" && rawMsg.metadata !== null ? rawMsg.metadata : void 0
+      };
+      messages.push(message);
+    }
+    return messages.length > 0 ? messages : void 0;
+  }
+  /**
+   * Parse tool_calls from JSONL (snake_case) and convert to ToolCall[] format.
+   */
+  parseToolCalls(toolCalls) {
+    if (!Array.isArray(toolCalls)) {
+      return void 0;
+    }
+    const calls = [];
+    for (const call of toolCalls) {
+      if (typeof call !== "object" || call === null) {
+        continue;
+      }
+      const rawCall = call;
+      if (typeof rawCall.tool !== "string") {
+        continue;
+      }
+      calls.push({
+        tool: rawCall.tool,
+        input: rawCall.input,
+        output: rawCall.output,
+        id: typeof rawCall.id === "string" ? rawCall.id : void 0,
+        timestamp: typeof rawCall.timestamp === "string" ? rawCall.timestamp : void 0
+      });
+    }
+    return calls.length > 0 ? calls : void 0;
   }
   parseJsonlBatchOutput(content) {
     const records = /* @__PURE__ */ new Map();
@@ -36429,12 +36469,16 @@ var CliProvider = class {
       if (records.has(id)) {
         throw new Error(`CLI batch output contains duplicate id: ${id}`);
       }
-      const text2 = typeof obj.text === "string" ? obj.text : obj.text === void 0 ? "" : JSON.stringify(obj.text);
-      const traceRef = typeof obj.traceRef === "string" ? obj.traceRef : typeof obj.trace_ref === "string" ? obj.trace_ref : void 0;
+      const parsedOutputMessages = this.parseOutputMessages(obj.output_messages);
+      let outputMessages;
+      if (parsedOutputMessages && parsedOutputMessages.length > 0) {
+        outputMessages = parsedOutputMessages;
+      } else {
+        const text2 = typeof obj.text === "string" ? obj.text : obj.text === void 0 ? "" : JSON.stringify(obj.text);
+        outputMessages = text2 ? [{ role: "assistant", content: text2 }] : [];
+      }
       records.set(id, {
-        text: text2,
-        trace: this.parseTrace(obj.trace),
-        traceRef
+        outputMessages
       });
     }
     return records;
@@ -36447,8 +36491,10 @@ var CliProvider = class {
       const errorMsg = error40 instanceof Error ? error40.message : String(error40);
       throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
     } finally {
-      await fs.unlink(filePath).catch(() => {
-      });
+      if (!this.keepTempFiles) {
+        await fs.unlink(filePath).catch(() => {
+        });
+      }
     }
   }
   async ensureHealthy(signal) {
@@ -36768,7 +36814,6 @@ var CodexProvider = class {
       const parsed = parseCodexJson(result.stdout);
       const assistantText = extractAssistantText(parsed);
       return {
-        text: assistantText,
         raw: {
           response: parsed,
           stdout: result.stdout,
@@ -36780,7 +36825,8 @@ var CodexProvider = class {
           workspace: workspaceRoot,
           inputFiles,
           logFile: logger?.filePath
-        }
+        },
+        outputMessages: [{ role: "assistant", content: assistantText }]
       };
     } finally {
       await logger?.close();
@@ -37400,7 +37446,6 @@ var MockProvider = class {
   delayMs;
   delayMinMs;
   delayMaxMs;
-  trace;
   constructor(targetName, config2) {
     this.id = `mock:${targetName}`;
     this.targetName = targetName;
@@ -37408,7 +37453,6 @@ var MockProvider = class {
     this.delayMs = config2.delayMs ?? 0;
     this.delayMinMs = config2.delayMinMs ?? 0;
     this.delayMaxMs = config2.delayMaxMs ?? 0;
-    this.trace = config2.trace;
   }
   async invoke(request) {
     const delay2 = this.calculateDelay();
@@ -37416,12 +37460,11 @@ var MockProvider = class {
       await new Promise((resolve2) => setTimeout(resolve2, delay2));
     }
     return {
-      text: this.cannedResponse,
+      outputMessages: [{ role: "assistant", content: this.cannedResponse }],
       raw: {
         question: request.question,
         guidelines: request.guidelines
-      },
-      trace: this.trace
+      }
     };
   }
   calculateDelay() {
@@ -37501,7 +37544,7 @@ var VSCodeProvider = class {
     }
     if (this.config.dryRun) {
       return {
-        text: "",
+        outputMessages: [],
         raw: {
           session,
           inputFiles
@@ -37510,7 +37553,7 @@ var VSCodeProvider = class {
     }
     const responseText = await readTextFile(session.responseFile);
     return {
-      text: responseText,
+      outputMessages: [{ role: "assistant", content: responseText }],
       raw: {
         session,
         inputFiles
@@ -37548,7 +37591,7 @@ var VSCodeProvider = class {
     }
     if (this.config.dryRun) {
       return normalizedRequests.map(({ inputFiles }) => ({
-        text: "",
+        outputMessages: [],
         raw: {
           session,
           inputFiles,
@@ -37565,7 +37608,7 @@ var VSCodeProvider = class {
     for (const [index, responseFile] of session.responseFiles.entries()) {
       const responseText = await readTextFile(responseFile);
       responses.push({
-        text: responseText,
+        outputMessages: [{ role: "assistant", content: responseText }],
         raw: {
           session,
           inputFiles: normalizedRequests[index]?.inputFiles,
@@ -37853,6 +37896,7 @@ var LlmJudgeEvaluator = class {
         null,
         2
       ),
+      [TEMPLATE_VARIABLES.OUTPUT_MESSAGES]: JSON.stringify(context.outputMessages ?? [], null, 2),
       [TEMPLATE_VARIABLES.CANDIDATE_ANSWER]: context.candidate.trim(),
       [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? "").trim(),
       [TEMPLATE_VARIABLES.EXPECTED_OUTCOME]: context.evalCase.expected_outcome.trim(),
@@ -37877,7 +37921,7 @@ var LlmJudgeEvaluator = class {
       const score = clampScore(data.score);
       const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
       const misses = Array.isArray(data.misses) ? data.misses.filter(isNonEmptyString).slice(0, 4) : [];
-      const reasoning = data.reasoning ?? providerResponse?.reasoning;
+      const reasoning = data.reasoning;
       const expectedAspectCount = Math.max(hits.length + misses.length, 1);
       return {
         score,
@@ -37979,7 +38023,9 @@ var LlmJudgeEvaluator = class {
           maxOutputTokens: this.maxOutputTokens,
           temperature: this.temperature
         });
-        const data = schema.parse(parseJsonFromText(response.text ?? ""));
+        const data = schema.parse(
+          parseJsonFromText(extractLastAssistantContent(response.outputMessages))
+        );
         return { data, providerResponse: response };
       } catch (e) {
         lastError = e instanceof Error ? e : new Error(String(e));
@@ -38065,13 +38111,13 @@ var CodeEvaluator = class {
         expected_messages: context.evalCase.expected_messages,
         reference_answer: context.evalCase.reference_answer,
         candidate_answer: context.candidate,
+        output_messages: context.outputMessages ?? null,
         guideline_files: context.evalCase.guideline_paths,
         input_files: context.evalCase.file_paths.filter(
           (path132) => !context.evalCase.guideline_paths.includes(path132)
         ),
         input_messages: context.evalCase.input_messages,
-        candidate_trace_file: context.candidateTraceRef ?? null,
-        candidate_trace_summary: context.candidateTraceSummary ?? null
+        candidate_trace_summary: context.traceSummary ?? null
       },
       null,
       2
@@ -38198,8 +38244,19 @@ var ToolTrajectoryEvaluator = class {
     this.config = options.config;
   }
   evaluate(context) {
-    const { candidateTrace, candidateTraceSummary } = context;
-    if (!candidateTrace || !candidateTraceSummary) {
+    const { outputMessages, traceSummary } = context;
+    const toolCalls = this.extractToolCallsFromMessages(outputMessages);
+    if (toolCalls.length === 0 && !traceSummary) {
+      return {
+        score: 0,
+        verdict: "fail",
+        hits: [],
+        misses: ["No trace available for evaluation"],
+        expectedAspectCount: 1
+      };
+    }
+    const summary = toolCalls.length > 0 ? this.buildSummary(toolCalls) : traceSummary;
+    if (!summary) {
       return {
         score: 0,
         verdict: "fail",
@@ -38210,11 +38267,11 @@ var ToolTrajectoryEvaluator = class {
     }
     switch (this.config.mode) {
       case "any_order":
-        return this.evaluateAnyOrder(candidateTraceSummary);
+        return this.evaluateAnyOrder(summary);
       case "in_order":
-        return this.evaluateInOrder(candidateTrace);
+        return this.evaluateInOrder(toolCalls);
       case "exact":
-        return this.evaluateExact(candidateTrace);
+        return this.evaluateExact(toolCalls);
       default:
         return {
           score: 0,
@@ -38225,6 +38282,39 @@ var ToolTrajectoryEvaluator = class {
         };
     }
   }
+  /**
+   * Extract tool calls from output messages.
+   */
+  extractToolCallsFromMessages(messages) {
+    if (!messages) {
+      return [];
+    }
+    const toolCalls = [];
+    for (const message of messages) {
+      if (message.toolCalls) {
+        for (const call of message.toolCalls) {
+          toolCalls.push({ name: call.tool });
+        }
+      }
+    }
+    return toolCalls;
+  }
+  /**
+   * Build a summary from extracted tool calls.
+   */
+  buildSummary(toolCalls) {
+    const toolCallsByName = {};
+    for (const call of toolCalls) {
+      toolCallsByName[call.name] = (toolCallsByName[call.name] ?? 0) + 1;
+    }
+    const toolNames = Object.keys(toolCallsByName).sort();
+    return {
+      eventCount: toolCalls.length,
+      toolNames,
+      toolCallsByName,
+      errorCount: 0
+    };
+  }
   evaluateAnyOrder(summary) {
     const minimums = this.config.minimums ?? {};
     const toolNames = Object.keys(minimums);
@@ -38257,7 +38347,7 @@ var ToolTrajectoryEvaluator = class {
       expectedAspectCount: toolNames.length
     };
   }
-  evaluateInOrder(trace2) {
+  evaluateInOrder(toolCalls) {
     const expected = this.config.expected ?? [];
     if (expected.length === 0) {
       return {
@@ -38268,15 +38358,14 @@ var ToolTrajectoryEvaluator = class {
         expectedAspectCount: 0
       };
     }
-    const actualToolCalls = trace2.filter((e) => e.type === "tool_call" && e.name);
     const hits = [];
     const misses = [];
     let actualIndex = 0;
     for (let i = 0; i < expected.length; i++) {
       const expectedTool = expected[i].tool;
       let found = false;
-      while (actualIndex < actualToolCalls.length) {
-        if (actualToolCalls[actualIndex].name === expectedTool) {
+      while (actualIndex < toolCalls.length) {
+        if (toolCalls[actualIndex].name === expectedTool) {
           hits.push(`Found ${expectedTool} at position ${actualIndex}`);
           actualIndex++;
           found = true;
@@ -38297,7 +38386,7 @@ var ToolTrajectoryEvaluator = class {
       expectedAspectCount: expected.length
     };
   }
-  evaluateExact(trace2) {
+  evaluateExact(toolCalls) {
     const expected = this.config.expected ?? [];
     if (expected.length === 0) {
       return {
@@ -38308,16 +38397,15 @@ var ToolTrajectoryEvaluator = class {
         expectedAspectCount: 0
       };
     }
-    const actualToolCalls = trace2.filter((e) => e.type === "tool_call" && e.name);
     const hits = [];
     const misses = [];
-    if (actualToolCalls.length !== expected.length) {
-      misses.push(`Expected ${expected.length} tool calls, got ${actualToolCalls.length}`);
+    if (toolCalls.length !== expected.length) {
+      misses.push(`Expected ${expected.length} tool calls, got ${toolCalls.length}`);
     }
-    const checkLength = Math.min(expected.length, actualToolCalls.length);
+    const checkLength = Math.min(expected.length, toolCalls.length);
     for (let i = 0; i < checkLength; i++) {
       const expectedTool = expected[i].tool;
-      const actualTool = actualToolCalls[i].name;
+      const actualTool = toolCalls[i].name;
       if (actualTool === expectedTool) {
         hits.push(`Position ${i}: ${expectedTool} \u2713`);
       } else {
@@ -38531,11 +38619,13 @@ var CompositeEvaluator = class {
         evalCaseId: context.evalCase.id,
         attempt: context.attempt
       });
-      const data = freeformEvaluationSchema.parse(parseJsonFromText(response.text ?? ""));
+      const data = freeformEvaluationSchema.parse(
+        parseJsonFromText(extractLastAssistantContent(response.outputMessages))
+      );
       const score = clampScore(data.score);
       const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
       const misses = Array.isArray(data.misses) ? data.misses.filter(isNonEmptyString).slice(0, 4) : [];
-      const reasoning = data.reasoning ?? response.reasoning;
+      const reasoning = data.reasoning;
       return {
         score,
         verdict: scoreToVerdict(score),
@@ -38947,11 +39037,14 @@ async function runBatchEvaluation(options) {
     const evalCase = evalCases[i];
     const promptInputs = promptInputsList[i];
     const providerResponse = batchResponse[i];
+    const outputMessages = providerResponse.outputMessages;
+    const traceSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
+    const candidate = extractLastAssistantContent(outputMessages);
     let result;
     try {
       result = await evaluateCandidate({
         evalCase,
-        candidate: providerResponse.text ?? "",
+        candidate,
         target,
         provider,
         evaluators: evaluatorRegistry,
@@ -38959,7 +39052,9 @@ async function runBatchEvaluation(options) {
         nowFn,
         attempt: 0,
         judgeProvider: await resolveJudgeProvider(target),
-        agentTimeoutMs
+        agentTimeoutMs,
+        outputMessages,
+        traceSummary
       });
     } catch (error40) {
       const errorResult = buildErrorResult(
@@ -39063,21 +39158,13 @@ async function runEvalCase(options) {
   if (cacheKey && cache && !cachedResponse) {
     await cache.set(cacheKey, providerResponse);
   }
-  let candidateTrace = providerResponse.trace;
-  if (!candidateTrace && providerResponse.traceRef) {
-    try {
-      const rawTrace = await readJsonFile(providerResponse.traceRef);
-      if (Array.isArray(rawTrace) && rawTrace.every(isTraceEvent)) {
-        candidateTrace = rawTrace;
-      }
-    } catch {
-    }
-  }
-  const candidateTraceSummary = candidateTrace ? computeTraceSummary(candidateTrace) : void 0;
+  const outputMessages = providerResponse.outputMessages;
+  const traceSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
+  const candidate = extractLastAssistantContent(outputMessages);
   try {
     return await evaluateCandidate({
       evalCase,
-      candidate: providerResponse.text ?? "",
+      candidate,
       target,
       provider,
       evaluators,
@@ -39086,9 +39173,8 @@ async function runEvalCase(options) {
       attempt,
       judgeProvider,
       agentTimeoutMs,
-      candidateTrace,
-      candidateTraceRef: providerResponse.traceRef,
-      candidateTraceSummary
+      outputMessages,
+      traceSummary
     });
   } catch (error40) {
     return buildErrorResult(evalCase, target.name, nowFn(), error40, promptInputs, provider);
@@ -39106,9 +39192,8 @@ async function evaluateCandidate(options) {
     attempt,
     judgeProvider,
     agentTimeoutMs,
-    candidateTrace,
-    candidateTraceRef,
-    candidateTraceSummary
+    outputMessages,
+    traceSummary
   } = options;
   const gradeTimestamp = nowFn();
   const { score, evaluatorResults } = await runEvaluatorsForCase({
@@ -39122,9 +39207,8 @@ async function evaluateCandidate(options) {
     now: gradeTimestamp,
     judgeProvider,
     agentTimeoutMs,
-    candidateTrace,
-    candidateTraceRef,
-    candidateTraceSummary
+    outputMessages,
+    traceSummary
   });
   const completedAt = nowFn();
   let agentProviderRequest;
@@ -39162,7 +39246,7 @@ async function evaluateCandidate(options) {
     lm_provider_request: lmProviderRequest,
     evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
     evaluator_results: evaluatorResults,
-    trace_summary: candidateTraceSummary
+    trace_summary: traceSummary
   };
 }
 async function runEvaluatorsForCase(options) {
@@ -39177,9 +39261,8 @@ async function runEvaluatorsForCase(options) {
     now,
     judgeProvider,
     agentTimeoutMs,
-    candidateTrace,
-    candidateTraceRef,
-    candidateTraceSummary
+    outputMessages,
+    traceSummary
   } = options;
   if (evalCase.evaluators && evalCase.evaluators.length > 0) {
     return runEvaluatorList({
@@ -39194,9 +39277,8 @@ async function runEvaluatorsForCase(options) {
       now,
       judgeProvider,
       agentTimeoutMs,
-      candidateTrace,
-      candidateTraceRef,
-      candidateTraceSummary
+      outputMessages,
+      traceSummary
     });
   }
   const evaluatorKind = evalCase.evaluator ?? "llm_judge";
@@ -39213,9 +39295,8 @@ async function runEvaluatorsForCase(options) {
     promptInputs,
     now,
     judgeProvider,
-    candidateTrace,
-    candidateTraceRef,
-    candidateTraceSummary
+    outputMessages,
+    traceSummary
   });
   return { score };
 }
@@ -39232,9 +39313,8 @@ async function runEvaluatorList(options) {
     now,
     judgeProvider,
     agentTimeoutMs,
-    candidateTrace,
-    candidateTraceRef,
-    candidateTraceSummary
+    outputMessages,
+    traceSummary
   } = options;
   const scored = [];
   const evaluatorResults = [];
@@ -39281,8 +39361,8 @@ async function runEvaluatorList(options) {
           attempt,
           promptInputs,
           now,
-          candidateTraceRef,
-          candidateTraceSummary
+          outputMessages,
+          traceSummary
         });
         const weight = evaluator.weight ?? 1;
         scored.push({ score: score2, name: evaluator.name, type: "code_judge", weight });
@@ -39368,9 +39448,8 @@ async function runEvaluatorList(options) {
           attempt,
           promptInputs,
           now,
-          candidateTrace,
-          candidateTraceRef,
-          candidateTraceSummary
+          outputMessages,
+          traceSummary
         });
         const weight = evaluator.weight ?? 1;
         scored.push({ score: score2, name: evaluator.name, type: evaluator.type, weight });
@@ -39730,16 +39809,90 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
   return parts.join("\n");
 }
+// src/commands/convert/index.ts
+import { command, option, optional as optional2, positional, string as string4 } from "cmd-ts";
+import { stringify as stringifyYaml } from "yaml";
+function convertJsonlToYaml(inputPath, outputPath) {
+  const content = readFileSync(inputPath, "utf8");
+  const lines = content.trim().split("\n").filter((line2) => line2.trim());
+  let yamlOutput = "";
+  let isFirst = true;
+  for (const line2 of lines) {
+    const record2 = JSON.parse(line2);
+    const yamlDoc = stringifyYaml(record2, {
+      indent: 2,
+      lineWidth: 0
+    });
+    const normalizedYaml = normalizeLineEndings(yamlDoc);
+    const separator = isFirst ? "---\n" : "\n---\n";
+    isFirst = false;
+    yamlOutput += separator + normalizedYaml;
+  }
+  writeFileSync(outputPath, yamlOutput);
+  return lines.length;
+}
+var convertCommand = command({
+  name: "convert",
+  description: "Convert evaluation results from JSONL to YAML format",
+  args: {
+    input: positional({
+      type: string4,
+      displayName: "input",
+      description: "Path to input JSONL file"
+    }),
+    out: option({
+      type: optional2(string4),
+      long: "out",
+      short: "o",
+      description: "Output file path (defaults to input path with .yaml extension)"
+    })
+  },
+  handler: async ({ input, out }) => {
+    if (!input.endsWith(".jsonl")) {
+      console.error("Error: Input file must be a .jsonl file");
+      process.exit(1);
+    }
+    const outputPath = out ?? input.replace(/\.jsonl$/, ".yaml");
+    try {
+      const count = convertJsonlToYaml(input, outputPath);
+      console.log(`Converted ${count} records to ${path14.resolve(outputPath)}`);
+    } catch (error40) {
+      console.error(`Error: ${error40.message}`);
+      process.exit(1);
+    }
+  }
+});
+// src/commands/eval/index.ts
+import { stat as stat4 } from "node:fs/promises";
+import path21 from "node:path";
+import {
+  command as command2,
+  flag,
+  number as number4,
+  option as option2,
+  optional as optional3,
+  restPositionals,
+  string as string5
+} from "cmd-ts";
+import fg from "fast-glob";
+// src/commands/eval/run-eval.ts
+import { constants as constants6 } from "node:fs";
+import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
+import path20 from "node:path";
+import { pathToFileURL } from "node:url";
 // src/commands/eval/env.ts
 import { constants as constants4 } from "node:fs";
 import { access as access4 } from "node:fs/promises";
-import path14 from "node:path";
+import path15 from "node:path";
 import { config as loadDotenv } from "dotenv";
 function uniqueDirs(directories) {
   const seen = /* @__PURE__ */ new Set();
   const result = [];
   for (const dir of directories) {
-    const absolute = path14.resolve(dir);
+    const absolute = path15.resolve(dir);
     if (seen.has(absolute)) {
       continue;
     }
@@ -39758,14 +39911,14 @@ async function fileExists4(filePath) {
 }
 function collectAncestorDirectories(start, boundary) {
   const directories = [];
-  const boundaryDir = path14.resolve(boundary);
-  let current = path14.resolve(start);
+  const boundaryDir = path15.resolve(boundary);
+  let current = path15.resolve(start);
   while (current !== void 0) {
     directories.push(current);
     if (current === boundaryDir) {
       break;
     }
-    const parent = path14.dirname(current);
+    const parent = path15.dirname(current);
     if (parent === current) {
       break;
     }
@@ -39775,12 +39928,12 @@ function collectAncestorDirectories(start, boundary) {
 }
 async function loadEnvFromHierarchy(options) {
   const { testFilePath, repoRoot, verbose } = options;
-  const testDir = path14.dirname(path14.resolve(testFilePath));
+  const testDir = path15.dirname(path15.resolve(testFilePath));
   const cwd = process.cwd();
   const searchDirs = uniqueDirs([...collectAncestorDirectories(testDir, repoRoot), repoRoot, cwd]);
   const envFiles = [];
   for (const dir of searchDirs) {
-    const candidate = path14.join(dir, ".env");
+    const candidate = path15.join(dir, ".env");
     if (await fileExists4(candidate)) {
       envFiles.push(candidate);
     }
@@ -39804,7 +39957,7 @@ async function loadEnvFromHierarchy(options) {
 // src/commands/eval/jsonl-writer.ts
 import { createWriteStream as createWriteStream2 } from "node:fs";
 import { mkdir as mkdir5 } from "node:fs/promises";
-import path15 from "node:path";
+import path16 from "node:path";
 import { finished } from "node:stream/promises";
 // ../../node_modules/.bun/async-mutex@0.5.0/node_modules/async-mutex/index.mjs
@@ -40022,7 +40175,7 @@ var JsonlWriter = class _JsonlWriter {
     this.stream = stream;
   }
   static async open(filePath) {
-    await mkdir5(path15.dirname(filePath), { recursive: true });
+    await mkdir5(path16.dirname(filePath), { recursive: true });
     const stream = createWriteStream2(filePath, { flags: "w", encoding: "utf8" });
     return new _JsonlWriter(stream);
   }
@@ -40054,9 +40207,9 @@ var JsonlWriter = class _JsonlWriter {
 // src/commands/eval/yaml-writer.ts
 import { createWriteStream as createWriteStream3 } from "node:fs";
 import { mkdir as mkdir6 } from "node:fs/promises";
-import path16 from "node:path";
+import path17 from "node:path";
 import { finished as finished2 } from "node:stream/promises";
-import { stringify as stringifyYaml } from "yaml";
+import { stringify as stringifyYaml2 } from "yaml";
 var YamlWriter = class _YamlWriter {
   stream;
   mutex = new Mutex();
@@ -40066,7 +40219,7 @@ var YamlWriter = class _YamlWriter {
     this.stream = stream;
   }
   static async open(filePath) {
-    await mkdir6(path16.dirname(filePath), { recursive: true });
+    await mkdir6(path17.dirname(filePath), { recursive: true });
     const stream = createWriteStream3(filePath, { flags: "w", encoding: "utf8" });
     return new _YamlWriter(stream);
   }
@@ -40075,7 +40228,7 @@ var YamlWriter = class _YamlWriter {
       if (this.closed) {
         throw new Error("Cannot write to closed YAML writer");
       }
-      const yamlDoc = stringifyYaml(record2, {
+      const yamlDoc = stringifyYaml2(record2, {
         indent: 2,
         lineWidth: 0
         // Disable line wrapping
@@ -40185,12 +40338,12 @@ var ProgressDisplay = class {
   }
   addLogPaths(paths) {
     const newPaths = [];
-    for (const path27 of paths) {
-      if (this.logPathSet.has(path27)) {
+    for (const path28 of paths) {
+      if (this.logPathSet.has(path28)) {
         continue;
       }
-      this.logPathSet.add(path27);
-      newPaths.push(path27);
+      this.logPathSet.add(path28);
+      newPaths.push(path28);
     }
     if (newPaths.length === 0) {
       return;
@@ -40202,8 +40355,8 @@ var ProgressDisplay = class {
       this.hasPrintedLogHeader = true;
     }
     const startIndex = this.logPaths.length - newPaths.length;
-    newPaths.forEach((path27, offset) => {
-      console.log(`${startIndex + offset + 1}. ${path27}`);
+    newPaths.forEach((path28, offset) => {
+      console.log(`${startIndex + offset + 1}. ${path28}`);
     });
   }
   finish() {
@@ -40358,7 +40511,7 @@ function formatEvaluationSummary(summary) {
 // ../../packages/core/dist/evaluation/validation/index.js
 import { readFile as readFile7 } from "node:fs/promises";
-import path17 from "node:path";
+import path18 from "node:path";
 import { parse as parse6 } from "yaml";
 import { readFile as readFile23 } from "node:fs/promises";
 import path23 from "node:path";
@@ -40401,8 +40554,8 @@ async function detectFileType(filePath) {
   }
 }
 function inferFileTypeFromPath(filePath) {
-  const normalized = path17.normalize(filePath).replace(/\\/g, "/");
-  const basename = path17.basename(filePath);
+  const normalized = path18.normalize(filePath).replace(/\\/g, "/");
+  const basename = path18.basename(filePath);
   if (normalized.includes("/.agentv/")) {
     if (basename === "config.yaml" || basename === "config.yml") {
       return "config";
@@ -40725,7 +40878,11 @@ var CLI_SETTINGS = /* @__PURE__ */ new Set([
   "env",
   "timeout_seconds",
   "timeoutSeconds",
-  "healthcheck"
+  "healthcheck",
+  "keep_temp_files",
+  "keepTempFiles",
+  "keep_output_files",
+  "keepOutputFiles"
 ]);
 function getKnownSettings(provider) {
   const normalizedProvider = provider.toLowerCase();
@@ -41243,12 +41400,12 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
 // src/utils/targets.ts
 import { constants as constants5 } from "node:fs";
 import { access as access5 } from "node:fs/promises";
-import path18 from "node:path";
+import path19 from "node:path";
 var TARGET_FILE_CANDIDATES = [
   "targets.yaml",
   "targets.yml",
-  path18.join(".agentv", "targets.yaml"),
-  path18.join(".agentv", "targets.yml")
+  path19.join(".agentv", "targets.yaml"),
+  path19.join(".agentv", "targets.yml")
 ];
 async function fileExists5(filePath) {
   try {
@@ -41261,12 +41418,12 @@ async function fileExists5(filePath) {
 async function discoverTargetsFile(options) {
   const { explicitPath, testFilePath, repoRoot, cwd } = options;
   if (explicitPath) {
-    const resolvedExplicit = path18.resolve(explicitPath);
+    const resolvedExplicit = path19.resolve(explicitPath);
     if (await fileExists5(resolvedExplicit)) {
       return resolvedExplicit;
     }
     for (const candidate of TARGET_FILE_CANDIDATES) {
-      const nested = path18.join(resolvedExplicit, candidate);
+      const nested = path19.join(resolvedExplicit, candidate);
       if (await fileExists5(nested)) {
         return nested;
       }
@@ -41274,13 +41431,13 @@ async function discoverTargetsFile(options) {
     throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
   }
   const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
-  const resolvedCwd = path18.resolve(cwd);
+  const resolvedCwd = path19.resolve(cwd);
   if (!directories.includes(resolvedCwd)) {
     directories.push(resolvedCwd);
   }
   for (const directory of directories) {
     for (const candidate of TARGET_FILE_CANDIDATES) {
-      const fullPath = path18.join(directory, candidate);
+      const fullPath = path19.join(directory, candidate);
       if (await fileExists5(fullPath)) {
         return fullPath;
       }
@@ -41459,15 +41616,15 @@ async function ensureFileExists(filePath, description) {
   }
 }
 async function findRepoRoot(start) {
-  const fallback = path19.resolve(start);
+  const fallback = path20.resolve(start);
   let current = fallback;
   while (current !== void 0) {
-    const candidate = path19.join(current, ".git");
+    const candidate = path20.join(current, ".git");
     try {
       await access6(candidate, constants6.F_OK);
       return current;
     } catch {
-      const parent = path19.dirname(current);
+      const parent = path20.dirname(current);
       if (parent === current) {
         break;
       }
@@ -41480,16 +41637,16 @@ function buildDefaultOutputPath(cwd, format) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
   const baseName = "eval";
   const extension = getDefaultExtension(format);
-  return path19.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
+  return path20.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
 }
-function resolvePromptDirectory(option4, cwd) {
-  if (option4 === void 0) {
+function resolvePromptDirectory(option5, cwd) {
+  if (option5 === void 0) {
     return void 0;
   }
-  if (typeof option4 === "string" && option4.trim().length > 0) {
-    return path19.resolve(cwd, option4);
+  if (typeof option5 === "string" && option5.trim().length > 0) {
+    return path20.resolve(cwd, option5);
   }
-  return path19.join(cwd, ".agentv", "prompts");
+  return path20.join(cwd, ".agentv", "prompts");
 }
 function createEvaluationCache() {
   const store = /* @__PURE__ */ new Map();
@@ -41514,7 +41671,7 @@ function createProgressReporter(maxWorkers, options) {
   };
 }
 function makeEvalKey(testFilePath, evalId) {
-  return `${path19.resolve(testFilePath)}::${evalId}`;
+  return `${path20.resolve(testFilePath)}::${evalId}`;
 }
 function createDisplayIdTracker() {
   const map2 = /* @__PURE__ */ new Map();
@@ -41686,7 +41843,7 @@ async function runEvalCommand(input) {
   if (options.verbose) {
     console.log(`Repository root: ${repoRoot}`);
   }
-  const outputPath = options.outPath ? path19.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
+  const outputPath = options.outPath ? path20.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
   console.log(`Output path: ${outputPath}`);
   const outputWriter = await createOutputWriter(outputPath, options.format);
   const cache = options.cache ? createEvaluationCache() : void 0;
@@ -41694,7 +41851,7 @@ async function runEvalCommand(input) {
   const allResults = [];
   let lastPromptDumpDir;
   const seenEvalCases = /* @__PURE__ */ new Set();
-  const resolvedTestFiles = input.testFiles.map((file2) => path19.resolve(file2));
+  const resolvedTestFiles = input.testFiles.map((file2) => path20.resolve(file2));
   const displayIdTracker = createDisplayIdTracker();
   const totalWorkers = options.workers ?? DEFAULT_WORKERS;
   const fileConcurrency = Math.min(
@@ -41790,7 +41947,7 @@ async function resolveEvaluationRunner() {
   if (!overridePath) {
     return runEvaluation;
   }
-  const resolved = path19.isAbsolute(overridePath) ? overridePath : path19.resolve(process.cwd(), overridePath);
+  const resolved = path20.isAbsolute(overridePath) ? overridePath : path20.resolve(process.cwd(), overridePath);
   const moduleUrl = pathToFileURL(resolved).href;
   const mod = await import(moduleUrl);
   const candidate = mod.runEvaluation;
@@ -41803,44 +41960,44 @@ async function resolveEvaluationRunner() {
 }
 // src/commands/eval/index.ts
-var evalCommand = command({
+var evalCommand = command2({
   name: "eval",
   description: "Run eval suites and report results",
   args: {
     evalPaths: restPositionals({
-      type: string4,
+      type: string5,
       displayName: "eval-paths",
       description: "Path(s) or glob(s) to evaluation .yaml file(s)"
     }),
-    target: option({
-      type: string4,
+    target: option2({
+      type: string5,
       long: "target",
       description: "Override target name from targets.yaml",
       defaultValue: () => "default"
     }),
-    targets: option({
-      type: optional2(string4),
+    targets: option2({
+      type: optional3(string5),
       long: "targets",
       description: "Path to targets.yaml (overrides discovery)"
     }),
-    evalId: option({
-      type: optional2(string4),
+    evalId: option2({
+      type: optional3(string5),
       long: "eval-id",
       description: "Run only the eval case with this identifier"
     }),
-    workers: option({
+    workers: option2({
       type: number4,
       long: "workers",
       description: "Number of parallel workers (default: 3, max: 50). Can also be set per-target in targets.yaml",
       defaultValue: () => 3
     }),
-    out: option({
-      type: optional2(string4),
+    out: option2({
+      type: optional3(string5),
       long: "out",
       description: "Write results to the specified path"
     }),
-    outputFormat: option({
-      type: string4,
+    outputFormat: option2({
+      type: string5,
       long: "output-format",
       description: "Output format: 'jsonl' or 'yaml' (default: jsonl)",
       defaultValue: () => "jsonl"
@@ -41849,31 +42006,31 @@ var evalCommand = command({
       long: "dry-run",
       description: "Use mock provider responses instead of real LLM calls"
     }),
-    dryRunDelay: option({
+    dryRunDelay: option2({
       type: number4,
       long: "dry-run-delay",
       description: "Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
       defaultValue: () => 0
     }),
-    dryRunDelayMin: option({
+    dryRunDelayMin: option2({
       type: number4,
       long: "dry-run-delay-min",
       description: "Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
       defaultValue: () => 0
     }),
-    dryRunDelayMax: option({
+    dryRunDelayMax: option2({
       type: number4,
       long: "dry-run-delay-max",
       description: "Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
       defaultValue: () => 0
     }),
-    agentTimeout: option({
+    agentTimeout: option2({
       type: number4,
       long: "agent-timeout",
       description: "Timeout in seconds for provider responses (default: 120)",
       defaultValue: () => 120
     }),
-    maxRetries: option({
+    maxRetries: option2({
       type: number4,
       long: "max-retries",
       description: "Retry count for timeout recoveries (default: 2)",
@@ -41887,8 +42044,8 @@ var evalCommand = command({
       long: "verbose",
       description: "Enable verbose logging"
     }),
-    dumpPrompts: option({
-      type: optional2(string4),
+    dumpPrompts: option2({
+      type: optional3(string5),
       long: "dump-prompts",
       description: "Directory path for persisting prompt payloads for debugging"
     }),
@@ -41934,7 +42091,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
   const unmatched = [];
   const results = /* @__PURE__ */ new Set();
   for (const pattern of normalizedInputs) {
-    const candidatePath = path20.isAbsolute(pattern) ? path20.normalize(pattern) : path20.resolve(cwd, pattern);
+    const candidatePath = path21.isAbsolute(pattern) ? path21.normalize(pattern) : path21.resolve(cwd, pattern);
     try {
       const stats = await stat4(candidatePath);
       if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
@@ -41958,7 +42115,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
       continue;
     }
     for (const filePath of yamlMatches) {
-      results.add(path20.normalize(filePath));
+      results.add(path21.normalize(filePath));
     }
   }
   if (unmatched.length > 0) {
@@ -41974,11 +42131,11 @@ async function resolveEvalPaths(evalPaths, cwd) {
 }
 // src/commands/generate/index.ts
-import { command as command2, flag as flag2, option as option2, optional as optional3, positional as positional2, string as string5, subcommands } from "cmd-ts";
+import { command as command3, flag as flag2, option as option3, optional as optional4, positional as positional3, string as string6, subcommands } from "cmd-ts";
 // src/commands/generate/rubrics.ts
 import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
-import path21 from "node:path";
+import path24 from "node:path";
 import { pathToFileURL as pathToFileURL2 } from "node:url";
 import { isMap, isSeq, parseDocument } from "yaml";
 function isJsonObject3(value) {
@@ -41990,7 +42147,7 @@ function asString6(value) {
 async function loadRubricGenerator() {
   const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
   if (customGenerator) {
-    const generatorPath = path21.resolve(customGenerator);
+    const generatorPath = path24.resolve(customGenerator);
     const generatorUrl = pathToFileURL2(generatorPath).href;
     const module = await import(generatorUrl);
     return module.generateRubrics;
@@ -42000,7 +42157,7 @@ async function loadRubricGenerator() {
 async function generateRubricsCommand(options) {
   const { file: file2, target: targetOverride, verbose } = options;
   console.log(`Generating rubrics for: ${file2}`);
-  const absolutePath = path21.resolve(file2);
+  const absolutePath = path24.resolve(file2);
   const content = await readFile8(absolutePath, "utf8");
   const doc = parseDocument(content);
   const parsed = doc.toJSON();
@@ -42117,17 +42274,17 @@ function extractQuestion(evalCase) {
 }
 // src/commands/generate/index.ts
-var rubricsCommand = command2({
+var rubricsCommand = command3({
   name: "rubrics",
   description: "Generate rubrics from expected_outcome in YAML eval file",
   args: {
-    file: positional2({
-      type: string5,
+    file: positional3({
+      type: string6,
       displayName: "file",
       description: "Path to YAML eval file"
     }),
-    target: option2({
-      type: optional3(string5),
+    target: option3({
+      type: optional4(string6),
       long: "target",
       short: "t",
       description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
@@ -42160,14 +42317,14 @@ var generateCommand = subcommands({
 });
 // src/commands/init/index.ts
-import { existsSync, mkdirSync, writeFileSync } from "node:fs";
-import path25 from "node:path";
+import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
+import path26 from "node:path";
 import * as readline from "node:readline/promises";
-import { command as command3, option as option3, optional as optional4, string as string6 } from "cmd-ts";
+import { command as command4, option as option4, optional as optional5, string as string7 } from "cmd-ts";
 // src/templates/index.ts
-import { readFileSync, readdirSync, statSync } from "node:fs";
-import path24 from "node:path";
+import { readFileSync as readFileSync2, readdirSync, statSync } from "node:fs";
+import path25 from "node:path";
 import { fileURLToPath } from "node:url";
 function getGithubTemplates() {
   return getTemplatesFromDir(".github");
@@ -42179,12 +42336,12 @@ function getClaudeTemplates() {
   return getTemplatesFromDir(".claude");
 }
 function getTemplatesFromDir(subdir) {
-  const currentDir = path24.dirname(fileURLToPath(import.meta.url));
+  const currentDir = path25.dirname(fileURLToPath(import.meta.url));
   let templatesDir;
-  if (currentDir.includes(`${path24.sep}dist`)) {
-    templatesDir = path24.join(currentDir, "templates", subdir);
+  if (currentDir.includes(`${path25.sep}dist`)) {
+    templatesDir = path25.join(currentDir, "templates", subdir);
   } else {
-    templatesDir = path24.join(currentDir, subdir);
+    templatesDir = path25.join(currentDir, subdir);
   }
   return readTemplatesRecursively(templatesDir, "");
 }
@@ -42192,15 +42349,15 @@ function readTemplatesRecursively(dir, relativePath) {
   const templates = [];
   const entries = readdirSync(dir);
   for (const entry of entries) {
-    const fullPath = path24.join(dir, entry);
+    const fullPath = path25.join(dir, entry);
     const stat6 = statSync(fullPath);
-    const entryRelativePath = relativePath ? path24.join(relativePath, entry) : entry;
+    const entryRelativePath = relativePath ? path25.join(relativePath, entry) : entry;
     if (stat6.isDirectory()) {
       templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
     } else {
-      const content = readFileSync(fullPath, "utf-8");
+      const content = readFileSync2(fullPath, "utf-8");
       templates.push({
-        path: entryRelativePath.split(path24.sep).join("/"),
+        path: entryRelativePath.split(path25.sep).join("/"),
         // Normalize to forward slashes
         content
       });
@@ -42223,10 +42380,10 @@ async function promptYesNo(message) {
   }
 }
 async function initCommand(options = {}) {
-  const targetPath = path25.resolve(options.targetPath ?? ".");
-  const githubDir = path25.join(targetPath, ".github");
-  const agentvDir = path25.join(targetPath, ".agentv");
-  const claudeDir = path25.join(targetPath, ".claude");
+  const targetPath = path26.resolve(options.targetPath ?? ".");
+  const githubDir = path26.join(targetPath, ".github");
+  const agentvDir = path26.join(targetPath, ".agentv");
+  const claudeDir = path26.join(targetPath, ".claude");
   const githubTemplates = getGithubTemplates();
   const agentvTemplates = getAgentvTemplates();
   const claudeTemplates = getClaudeTemplates();
@@ -42234,32 +42391,32 @@ async function initCommand(options = {}) {
   const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.template");
   const existingFiles = [];
   if (envTemplate) {
-    const envFilePath = path25.join(targetPath, ".env.template");
+    const envFilePath = path26.join(targetPath, ".env.template");
     if (existsSync(envFilePath)) {
       existingFiles.push(".env.template");
     }
   }
   if (existsSync(githubDir)) {
     for (const template of githubTemplates) {
-      const targetFilePath = path25.join(githubDir, template.path);
+      const targetFilePath = path26.join(githubDir, template.path);
       if (existsSync(targetFilePath)) {
-        existingFiles.push(path25.relative(targetPath, targetFilePath));
+        existingFiles.push(path26.relative(targetPath, targetFilePath));
       }
     }
   }
   if (existsSync(agentvDir)) {
     for (const template of otherAgentvTemplates) {
-      const targetFilePath = path25.join(agentvDir, template.path);
+      const targetFilePath = path26.join(agentvDir, template.path);
       if (existsSync(targetFilePath)) {
-        existingFiles.push(path25.relative(targetPath, targetFilePath));
+        existingFiles.push(path26.relative(targetPath, targetFilePath));
       }
     }
   }
   if (existsSync(claudeDir)) {
     for (const template of claudeTemplates) {
-      const targetFilePath = path25.join(claudeDir, template.path);
+      const targetFilePath = path26.join(claudeDir, template.path);
       if (existsSync(targetFilePath)) {
-        existingFiles.push(path25.relative(targetPath, targetFilePath));
+        existingFiles.push(path26.relative(targetPath, targetFilePath));
       }
     }
   }
@@ -42286,36 +42443,36 @@ async function initCommand(options = {}) {
     mkdirSync(claudeDir, { recursive: true });
   }
   if (envTemplate) {
-    const envFilePath = path25.join(targetPath, ".env.template");
-    writeFileSync(envFilePath, envTemplate.content, "utf-8");
+    const envFilePath = path26.join(targetPath, ".env.template");
+    writeFileSync2(envFilePath, envTemplate.content, "utf-8");
     console.log("Created .env.template");
   }
   for (const template of githubTemplates) {
-    const targetFilePath = path25.join(githubDir, template.path);
-    const targetDirPath = path25.dirname(targetFilePath);
+    const targetFilePath = path26.join(githubDir, template.path);
+    const targetDirPath = path26.dirname(targetFilePath);
     if (!existsSync(targetDirPath)) {
       mkdirSync(targetDirPath, { recursive: true });
     }
-    writeFileSync(targetFilePath, template.content, "utf-8");
-    console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
+    writeFileSync2(targetFilePath, template.content, "utf-8");
+    console.log(`Created ${path26.relative(targetPath, targetFilePath)}`);
   }
   for (const template of otherAgentvTemplates) {
-    const targetFilePath = path25.join(agentvDir, template.path);
-    const targetDirPath = path25.dirname(targetFilePath);
+    const targetFilePath = path26.join(agentvDir, template.path);
+    const targetDirPath = path26.dirname(targetFilePath);
     if (!existsSync(targetDirPath)) {
       mkdirSync(targetDirPath, { recursive: true });
     }
-    writeFileSync(targetFilePath, template.content, "utf-8");
-    console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
+    writeFileSync2(targetFilePath, template.content, "utf-8");
+    console.log(`Created ${path26.relative(targetPath, targetFilePath)}`);
   }
   for (const template of claudeTemplates) {
-    const targetFilePath = path25.join(claudeDir, template.path);
-    const targetDirPath = path25.dirname(targetFilePath);
+    const targetFilePath = path26.join(claudeDir, template.path);
+    const targetDirPath = path26.dirname(targetFilePath);
     if (!existsSync(targetDirPath)) {
       mkdirSync(targetDirPath, { recursive: true });
     }
-    writeFileSync(targetFilePath, template.content, "utf-8");
-    console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
+    writeFileSync2(targetFilePath, template.content, "utf-8");
+    console.log(`Created ${path26.relative(targetPath, targetFilePath)}`);
   }
   console.log("\nAgentV initialized successfully!");
   console.log("\nFiles installed to root:");
@@ -42323,17 +42480,17 @@ async function initCommand(options = {}) {
     console.log("  - .env.template");
   }
   console.log(`
-Files installed to ${path25.relative(targetPath, githubDir)}:`);
+Files installed to ${path26.relative(targetPath, githubDir)}:`);
   for (const t of githubTemplates) {
     console.log(`  - ${t.path}`);
   }
   console.log(`
-Files installed to ${path25.relative(targetPath, agentvDir)}:`);
+Files installed to ${path26.relative(targetPath, agentvDir)}:`);
   for (const t of otherAgentvTemplates) {
     console.log(`  - ${t.path}`);
   }
   console.log(`
-Files installed to ${path25.relative(targetPath, claudeDir)}:`);
+Files installed to ${path26.relative(targetPath, claudeDir)}:`);
   for (const t of claudeTemplates) {
     console.log(`  - ${t.path}`);
   }
@@ -42342,12 +42499,12 @@ Files installed to ${path25.relative(targetPath, claudeDir)}:`);
   console.log("  2. Configure targets in .agentv/targets.yaml");
   console.log("  3. Create eval files using the schema and prompt templates");
 }
-var initCmdTsCommand = command3({
+var initCmdTsCommand = command4({
   name: "init",
   description: "Initialize AgentV in your project (installs prompt templates and schema to .github)",
   args: {
-    path: option3({
-      type: optional4(string6),
+    path: option4({
+      type: optional5(string7),
       long: "path",
       description: "Target directory for initialization (default: current directory)"
     })
@@ -42363,7 +42520,7 @@ var initCmdTsCommand = command3({
 });
 // src/commands/validate/index.ts
-import { command as command4, restPositionals as restPositionals2, string as string7 } from "cmd-ts";
+import { command as command5, restPositionals as restPositionals2, string as string8 } from "cmd-ts";
 // src/commands/validate/format-output.ts
 var ANSI_RED3 = "\x1B[31m";
@@ -42448,7 +42605,7 @@ function isTTY2() {
 // src/commands/validate/validate-files.ts
 import { constants as constants7 } from "node:fs";
 import { access as access7, readdir as readdir3, stat as stat5 } from "node:fs/promises";
-import path26 from "node:path";
+import path27 from "node:path";
 async function validateFiles(paths) {
   const filePaths = await expandPaths(paths);
   const results = [];
@@ -42466,7 +42623,7 @@ async function validateFiles(paths) {
   };
 }
 async function validateSingleFile(filePath) {
-  const absolutePath = path26.resolve(filePath);
+  const absolutePath = path27.resolve(filePath);
   const fileType = await detectFileType(absolutePath);
   let result;
   if (fileType === "eval") {
@@ -42491,7 +42648,7 @@ async function validateSingleFile(filePath) {
 async function expandPaths(paths) {
   const expanded = [];
   for (const inputPath of paths) {
-    const absolutePath = path26.resolve(inputPath);
+    const absolutePath = path27.resolve(inputPath);
     try {
       await access7(absolutePath, constants7.F_OK);
     } catch {
@@ -42515,7 +42672,7 @@ async function findYamlFiles(dirPath) {
   try {
     const entries = await readdir3(dirPath, { withFileTypes: true });
     for (const entry of entries) {
-      const fullPath = path26.join(dirPath, entry.name);
+      const fullPath = path27.join(dirPath, entry.name);
       if (entry.isDirectory()) {
         if (entry.name === "node_modules" || entry.name.startsWith(".")) {
           continue;
@@ -42532,7 +42689,7 @@ async function findYamlFiles(dirPath) {
   return results;
 }
 function isYamlFile(filePath) {
-  const ext = path26.extname(filePath).toLowerCase();
+  const ext = path27.extname(filePath).toLowerCase();
   return ext === ".yaml" || ext === ".yml";
 }
@@ -42549,12 +42706,12 @@ async function runValidateCommand(paths) {
     process.exit(1);
   }
 }
-var validateCommand = command4({
+var validateCommand = command5({
   name: "validate",
   description: "Validate AgentV eval and targets YAML files",
   args: {
     paths: restPositionals2({
-      type: string7,
+      type: string8,
       displayName: "paths",
       description: "Files or directories to validate"
     })
@@ -42570,16 +42727,17 @@ var validateCommand = command4({
 });
 // src/index.ts
-var packageJson = JSON.parse(readFileSync2(new URL("../package.json", import.meta.url), "utf8"));
+var packageJson = JSON.parse(readFileSync3(new URL("../package.json", import.meta.url), "utf8"));
 var app = subcommands2({
   name: "agentv",
   description: "AgentV CLI",
   version: packageJson.version,
   cmds: {
+    convert: convertCommand,
     eval: evalCommand,
-    validate: validateCommand,
     generate: generateCommand,
-    init: initCmdTsCommand
+    init: initCmdTsCommand,
+    validate: validateCommand
   }
 });
 async function runCli(argv = process.argv) {
@@ -42590,4 +42748,4 @@ export {
   app,
   runCli
 };
-//# sourceMappingURL=chunk-6R2YRXCQ.js.map
+//# sourceMappingURL=chunk-3RYQPI4H.js.map