npm - agentv - Versions diffs - 1.5.0 → 1.6.1 - Mend

agentv 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{chunk-3RYQPI4H.js → chunk-HU4B6ODF.js} RENAMED Viewed

@@ -141,14 +141,114 @@ var require_dist = __commonJS({
 });
 // src/index.ts
-import { readFileSync as readFileSync3 } from "node:fs";
+import { readFileSync as readFileSync4 } from "node:fs";
 import { binary, run, subcommands as subcommands2 } from "cmd-ts";
+// src/commands/compare/index.ts
+import { readFileSync } from "node:fs";
+import { command, number, option, optional, positional, string } from "cmd-ts";
+function loadJsonlResults(filePath) {
+  const content = readFileSync(filePath, "utf8");
+  const lines = content.trim().split("\n").filter((line2) => line2.trim());
+  return lines.map((line2) => {
+    const record2 = JSON.parse(line2);
+    if (typeof record2.evalId !== "string") {
+      throw new Error(`Missing evalId in result: ${line2}`);
+    }
+    if (typeof record2.score !== "number") {
+      throw new Error(`Missing or invalid score in result: ${line2}`);
+    }
+    return { evalId: record2.evalId, score: record2.score };
+  });
+}
+function classifyOutcome(delta, threshold) {
+  if (delta >= threshold) return "win";
+  if (delta <= -threshold) return "loss";
+  return "tie";
+}
+function compareResults(results1, results2, threshold) {
+  const map1 = new Map(results1.map((r) => [r.evalId, r.score]));
+  const map2 = new Map(results2.map((r) => [r.evalId, r.score]));
+  const matched = [];
+  const matchedIds = /* @__PURE__ */ new Set();
+  for (const [evalId, score1] of map1) {
+    const score2 = map2.get(evalId);
+    if (score2 !== void 0) {
+      const delta = score2 - score1;
+      matched.push({
+        evalId,
+        score1,
+        score2,
+        delta,
+        outcome: classifyOutcome(delta, threshold)
+      });
+      matchedIds.add(evalId);
+    }
+  }
+  const unmatchedFile1 = results1.filter((r) => !matchedIds.has(r.evalId)).length;
+  const unmatchedFile2 = results2.filter((r) => !map1.has(r.evalId)).length;
+  const wins = matched.filter((m) => m.outcome === "win").length;
+  const losses = matched.filter((m) => m.outcome === "loss").length;
+  const ties = matched.filter((m) => m.outcome === "tie").length;
+  const meanDelta = matched.length > 0 ? matched.reduce((sum, m) => sum + m.delta, 0) / matched.length : 0;
+  return {
+    matched,
+    unmatched: { file1: unmatchedFile1, file2: unmatchedFile2 },
+    summary: {
+      total: results1.length + results2.length,
+      matched: matched.length,
+      wins,
+      losses,
+      ties,
+      meanDelta: Math.round(meanDelta * 1e3) / 1e3
+    }
+  };
+}
+function determineExitCode(meanDelta) {
+  return meanDelta >= 0 ? 0 : 1;
+}
+var compareCommand = command({
+  name: "compare",
+  description: "Compare two evaluation result files and compute score differences",
+  args: {
+    result1: positional({
+      type: string,
+      displayName: "result1",
+      description: "Path to first JSONL result file (baseline)"
+    }),
+    result2: positional({
+      type: string,
+      displayName: "result2",
+      description: "Path to second JSONL result file (candidate)"
+    }),
+    threshold: option({
+      type: optional(number),
+      long: "threshold",
+      short: "t",
+      description: "Score delta threshold for win/loss classification (default: 0.1)"
+    })
+  },
+  handler: async ({ result1, result2, threshold }) => {
+    const effectiveThreshold = threshold ?? 0.1;
+    try {
+      const results1 = loadJsonlResults(result1);
+      const results2 = loadJsonlResults(result2);
+      const comparison = compareResults(results1, results2, effectiveThreshold);
+      console.log(JSON.stringify(comparison, null, 2));
+      const exitCode = determineExitCode(comparison.summary.meanDelta);
+      process.exit(exitCode);
+    } catch (error40) {
+      console.error(`Error: ${error40.message}`);
+      process.exit(1);
+    }
+  }
+});
 // src/commands/convert/index.ts
-import { readFileSync, writeFileSync } from "node:fs";
+import { readFileSync as readFileSync2, writeFileSync } from "node:fs";
 import path14 from "node:path";
-// ../../packages/core/dist/chunk-KPHTMTZ3.js
+// ../../packages/core/dist/chunk-E2VSU4WZ.js
 import { constants } from "node:fs";
 import { access, readFile } from "node:fs/promises";
 import path from "node:path";
@@ -1033,8 +1133,8 @@ var ZodType = class {
   promise() {
     return ZodPromise.create(this, this._def);
   }
-  or(option5) {
-    return ZodUnion.create([this, option5], this._def);
+  or(option6) {
+    return ZodUnion.create([this, option6], this._def);
   }
   and(incoming) {
     return ZodIntersection.create(this, incoming, this._def);
@@ -2884,7 +2984,7 @@ var ZodUnion = class extends ZodType {
       return INVALID;
     }
     if (ctx.common.async) {
-      return Promise.all(options.map(async (option5) => {
+      return Promise.all(options.map(async (option6) => {
         const childCtx = {
           ...ctx,
           common: {
@@ -2894,7 +2994,7 @@ var ZodUnion = class extends ZodType {
           parent: null
         };
         return {
-          result: await option5._parseAsync({
+          result: await option6._parseAsync({
             data: ctx.data,
             path: ctx.path,
             parent: childCtx
@@ -2905,7 +3005,7 @@ var ZodUnion = class extends ZodType {
     } else {
       let dirty = void 0;
       const issues = [];
-      for (const option5 of options) {
+      for (const option6 of options) {
         const childCtx = {
           ...ctx,
           common: {
@@ -2914,7 +3014,7 @@ var ZodUnion = class extends ZodType {
           },
           parent: null
         };
-        const result = option5._parseSync({
+        const result = option6._parseSync({
           data: ctx.data,
           path: ctx.path,
           parent: childCtx
@@ -2995,8 +3095,8 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
     }
     const discriminator = this.discriminator;
     const discriminatorValue = ctx.data[discriminator];
-    const option5 = this.optionsMap.get(discriminatorValue);
-    if (!option5) {
+    const option6 = this.optionsMap.get(discriminatorValue);
+    if (!option6) {
       addIssueToContext(ctx, {
         code: ZodIssueCode.invalid_union_discriminator,
         options: Array.from(this.optionsMap.keys()),
@@ -3005,13 +3105,13 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
       return INVALID;
     }
     if (ctx.common.async) {
-      return option5._parseAsync({
+      return option6._parseAsync({
         data: ctx.data,
         path: ctx.path,
         parent: ctx
       });
     } else {
-      return option5._parseSync({
+      return option6._parseSync({
         data: ctx.data,
         path: ctx.path,
         parent: ctx
@@ -4195,7 +4295,7 @@ var coerce = {
 };
 var NEVER = INVALID;
-// ../../packages/core/dist/chunk-KPHTMTZ3.js
+// ../../packages/core/dist/chunk-E2VSU4WZ.js
 async function fileExists(filePath) {
   try {
     await access(filePath, constants.F_OK);
@@ -4302,6 +4402,161 @@ async function resolveFileReference(rawValue, searchRoots) {
   }
   return { displayPath, attempted };
 }
+var CliHealthcheckHttpInputSchema = external_exports.object({
+  type: external_exports.literal("http"),
+  url: external_exports.string().min(1, "healthcheck URL is required"),
+  timeout_seconds: external_exports.number().positive().optional(),
+  timeoutSeconds: external_exports.number().positive().optional()
+});
+var CliHealthcheckCommandInputSchema = external_exports.object({
+  type: external_exports.literal("command"),
+  command_template: external_exports.string().optional(),
+  commandTemplate: external_exports.string().optional(),
+  cwd: external_exports.string().optional(),
+  timeout_seconds: external_exports.number().positive().optional(),
+  timeoutSeconds: external_exports.number().positive().optional()
+});
+var CliHealthcheckInputSchema = external_exports.discriminatedUnion("type", [
+  CliHealthcheckHttpInputSchema,
+  CliHealthcheckCommandInputSchema
+]);
+var CliTargetInputSchema = external_exports.object({
+  name: external_exports.string().min(1, "target name is required"),
+  provider: external_exports.string().refine((p) => p.toLowerCase() === "cli", { message: "provider must be 'cli'" }),
+  // Command template - required (accept both naming conventions)
+  command_template: external_exports.string().optional(),
+  commandTemplate: external_exports.string().optional(),
+  // Files format - optional
+  files_format: external_exports.string().optional(),
+  filesFormat: external_exports.string().optional(),
+  attachments_format: external_exports.string().optional(),
+  attachmentsFormat: external_exports.string().optional(),
+  // Working directory - optional
+  cwd: external_exports.string().optional(),
+  // Timeout in seconds - optional
+  timeout_seconds: external_exports.number().positive().optional(),
+  timeoutSeconds: external_exports.number().positive().optional(),
+  // Healthcheck configuration - optional
+  healthcheck: CliHealthcheckInputSchema.optional(),
+  // Verbose mode - optional
+  verbose: external_exports.boolean().optional(),
+  cli_verbose: external_exports.boolean().optional(),
+  cliVerbose: external_exports.boolean().optional(),
+  // Keep temp files - optional
+  keep_temp_files: external_exports.boolean().optional(),
+  keepTempFiles: external_exports.boolean().optional(),
+  keep_output_files: external_exports.boolean().optional(),
+  keepOutputFiles: external_exports.boolean().optional(),
+  // Common target fields
+  judge_target: external_exports.string().optional(),
+  workers: external_exports.number().int().min(1).optional(),
+  provider_batching: external_exports.boolean().optional(),
+  providerBatching: external_exports.boolean().optional()
+}).refine((data) => data.command_template !== void 0 || data.commandTemplate !== void 0, {
+  message: "Either command_template or commandTemplate is required"
+});
+var CliHealthcheckHttpSchema = external_exports.object({
+  type: external_exports.literal("http"),
+  url: external_exports.string().min(1),
+  timeoutMs: external_exports.number().positive().optional()
+}).strict();
+var CliHealthcheckCommandSchema = external_exports.object({
+  type: external_exports.literal("command"),
+  commandTemplate: external_exports.string().min(1),
+  cwd: external_exports.string().optional(),
+  timeoutMs: external_exports.number().positive().optional()
+}).strict();
+var CliHealthcheckSchema = external_exports.discriminatedUnion("type", [
+  CliHealthcheckHttpSchema,
+  CliHealthcheckCommandSchema
+]);
+var CliTargetConfigSchema = external_exports.object({
+  commandTemplate: external_exports.string().min(1),
+  filesFormat: external_exports.string().optional(),
+  cwd: external_exports.string().optional(),
+  timeoutMs: external_exports.number().positive().optional(),
+  healthcheck: CliHealthcheckSchema.optional(),
+  verbose: external_exports.boolean().optional(),
+  keepTempFiles: external_exports.boolean().optional()
+}).strict();
+function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
+  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
+  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
+  if (input.type === "http") {
+    const url2 = resolveString(input.url, env, `${targetName} healthcheck URL`);
+    return {
+      type: "http",
+      url: url2,
+      timeoutMs
+    };
+  }
+  const commandTemplateSource = input.command_template ?? input.commandTemplate;
+  if (commandTemplateSource === void 0) {
+    throw new Error(
+      `${targetName} healthcheck: Either command_template or commandTemplate is required for command healthcheck`
+    );
+  }
+  const commandTemplate = resolveString(
+    commandTemplateSource,
+    env,
+    `${targetName} healthcheck command template`,
+    true
+  );
+  let cwd = resolveOptionalString(input.cwd, env, `${targetName} healthcheck cwd`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
+    cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
+  }
+  return {
+    type: "command",
+    commandTemplate,
+    cwd,
+    timeoutMs
+  };
+}
+function normalizeCliTargetInput(input, env, evalFilePath) {
+  const targetName = input.name;
+  const commandTemplateSource = input.command_template ?? input.commandTemplate;
+  if (commandTemplateSource === void 0) {
+    throw new Error(`${targetName}: Either command_template or commandTemplate is required`);
+  }
+  const commandTemplate = resolveString(
+    commandTemplateSource,
+    env,
+    `${targetName} CLI command template`,
+    true
+  );
+  const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
+  const filesFormat = resolveOptionalLiteralString(filesFormatSource);
+  let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
+    cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
+  }
+  if (!cwd && evalFilePath) {
+    cwd = path2.dirname(path2.resolve(evalFilePath));
+  }
+  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
+  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
+  const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose ?? input.cliVerbose);
+  const keepTempFiles = resolveOptionalBoolean(
+    input.keep_temp_files ?? input.keepTempFiles ?? input.keep_output_files ?? input.keepOutputFiles
+  );
+  const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
+  return {
+    commandTemplate,
+    filesFormat,
+    cwd,
+    timeoutMs,
+    healthcheck,
+    verbose,
+    keepTempFiles
+  };
+}
 var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
   "PROMPT",
   "GUIDELINES",
@@ -4407,6 +4662,16 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
         providerBatching,
         config: resolveCodexConfig(parsed, env)
       };
+    case "pi":
+    case "pi-coding-agent":
+      return {
+        kind: "pi-coding-agent",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolvePiCodingAgentConfig(parsed, env)
+      };
     case "mock":
       return {
         kind: "mock",
@@ -4515,6 +4780,7 @@ function resolveCodexConfig(target, env) {
   const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
   const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
   const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
+  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
   const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
     allowLiteral: true,
     optionalEnv: true
@@ -4530,13 +4796,15 @@ function resolveCodexConfig(target, env) {
     optionalEnv: true
   });
   const logFormat = normalizeCodexLogFormat(logFormatSource);
+  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
   return {
     executable,
     args,
     cwd,
     timeoutMs,
     logDir,
-    logFormat
+    logFormat,
+    systemPrompt
   };
 }
 function normalizeCodexLogFormat(value) {
@@ -4552,6 +4820,70 @@ function normalizeCodexLogFormat(value) {
   }
   throw new Error("codex log format must be 'summary' or 'json'");
 }
+function resolvePiCodingAgentConfig(target, env) {
+  const executableSource = target.executable ?? target.command ?? target.binary;
+  const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
+  const modelSource = target.model ?? target.pi_model ?? target.piModel;
+  const apiKeySource = target.api_key ?? target.apiKey;
+  const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
+  const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
+  const argsSource = target.args ?? target.arguments;
+  const cwdSource = target.cwd;
+  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
+  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
+  const logFormatSource = target.log_format ?? target.logFormat;
+  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const executable = resolveOptionalString(executableSource, env, `${target.name} pi executable`, {
+    allowLiteral: true,
+    optionalEnv: true
+  }) ?? "pi";
+  const provider = resolveOptionalString(providerSource, env, `${target.name} pi provider`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const model = resolveOptionalString(modelSource, env, `${target.name} pi model`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi api key`, {
+    allowLiteral: false,
+    optionalEnv: true
+  });
+  const tools = resolveOptionalString(toolsSource, env, `${target.name} pi tools`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const thinking = resolveOptionalString(thinkingSource, env, `${target.name} pi thinking`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const args = resolveOptionalStringArray(argsSource, env, `${target.name} pi args`);
+  const cwd = resolveOptionalString(cwdSource, env, `${target.name} pi cwd`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi timeout`);
+  const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi log directory`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const logFormat = logFormatSource === "json" || logFormatSource === "summary" ? logFormatSource : void 0;
+  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
+  return {
+    executable,
+    provider,
+    model,
+    apiKey,
+    tools,
+    thinking,
+    args,
+    cwd,
+    timeoutMs,
+    logDir,
+    logFormat,
+    systemPrompt
+  };
+}
 function resolveMockConfig(target) {
   const response = typeof target.response === "string" ? target.response : void 0;
   return { response };
@@ -4574,9 +4906,9 @@ function resolveVSCodeConfig(target, env, insiders) {
   const dryRunSource = target.dry_run ?? target.dryRun;
   const subagentRootSource = target.subagent_root ?? target.subagentRoot;
   const defaultCommand = insiders ? "code-insiders" : "code";
-  const command6 = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
+  const command7 = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
   return {
-    command: command6,
+    command: command7,
     waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
     dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
     subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
@@ -4586,46 +4918,35 @@ function resolveVSCodeConfig(target, env, insiders) {
     workspaceTemplate
   };
 }
-function resolveCliConfig(target, env, evalFilePath) {
-  const commandTemplateSource = target.command_template ?? target.commandTemplate;
-  const filesFormat = resolveOptionalLiteralString(
-    target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
-  );
-  const verbose = resolveOptionalBoolean(target.verbose ?? target.cli_verbose ?? target.cliVerbose);
-  const keepTempFiles = resolveOptionalBoolean(
-    target.keep_temp_files ?? target.keepTempFiles ?? target.keep_output_files ?? target.keepOutputFiles
-  );
-  let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
-    allowLiteral: true,
-    optionalEnv: true
-  });
-  if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
-    cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
+var cliErrorMap = (issue2, ctx) => {
+  if (issue2.code === external_exports.ZodIssueCode.unrecognized_keys) {
+    return { message: `Unknown CLI provider settings: ${issue2.keys.join(", ")}` };
   }
-  if (!cwd && evalFilePath) {
-    cwd = path2.dirname(path2.resolve(evalFilePath));
+  if (issue2.code === external_exports.ZodIssueCode.invalid_union_discriminator) {
+    return { message: "healthcheck type must be 'http' or 'command'" };
   }
-  const timeoutMs = resolveTimeoutMs(
-    target.timeout_seconds ?? target.timeoutSeconds,
-    `${target.name} timeout`
-  );
-  const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name, evalFilePath);
-  const commandTemplate = resolveString(
-    commandTemplateSource,
-    env,
-    `${target.name} CLI command template`,
-    true
-  );
-  assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
-  return {
-    commandTemplate,
-    filesFormat,
-    cwd,
-    timeoutMs,
-    healthcheck,
-    verbose,
-    keepTempFiles
-  };
+  if (issue2.code === external_exports.ZodIssueCode.invalid_type && issue2.expected === "string") {
+    return { message: `${ctx.defaultError} (expected a string value)` };
+  }
+  return { message: ctx.defaultError };
+};
+function resolveCliConfig(target, env, evalFilePath) {
+  const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
+  if (!parseResult.success) {
+    const firstError = parseResult.error.errors[0];
+    const path34 = firstError?.path.join(".") || "";
+    const prefix = path34 ? `${target.name} ${path34}: ` : `${target.name}: `;
+    throw new Error(`${prefix}${firstError?.message}`);
+  }
+  const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
+  assertSupportedCliPlaceholders(normalized.commandTemplate, `${target.name} CLI command template`);
+  if (normalized.healthcheck?.type === "command") {
+    assertSupportedCliPlaceholders(
+      normalized.healthcheck.commandTemplate,
+      `${target.name} healthcheck command template`
+    );
+  }
+  return normalized;
 }
 function resolveTimeoutMs(source2, description) {
   const seconds = resolveOptionalNumber(source2, `${description} (seconds)`);
@@ -4637,49 +4958,6 @@ function resolveTimeoutMs(source2, description) {
   }
   return Math.floor(seconds * 1e3);
 }
-function resolveCliHealthcheck(source2, env, targetName, evalFilePath) {
-  if (source2 === void 0 || source2 === null) {
-    return void 0;
-  }
-  if (typeof source2 !== "object" || Array.isArray(source2)) {
-    throw new Error(`${targetName} healthcheck must be an object`);
-  }
-  const candidate = source2;
-  const type = candidate.type;
-  const timeoutMs = resolveTimeoutMs(
-    candidate.timeout_seconds ?? candidate.timeoutSeconds,
-    `${targetName} healthcheck timeout`
-  );
-  if (type === "http") {
-    const url2 = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
-    return {
-      type: "http",
-      url: url2,
-      timeoutMs
-    };
-  }
-  if (type === "command") {
-    const commandTemplate = resolveString(
-      candidate.command_template ?? candidate.commandTemplate,
-      env,
-      `${targetName} healthcheck command template`,
-      true
-    );
-    assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
-    const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
-      allowLiteral: true,
-      optionalEnv: true
-    });
-    const resolvedCwd = cwd && evalFilePath && !path2.isAbsolute(cwd) ? path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd) : cwd;
-    return {
-      type: "command",
-      commandTemplate,
-      timeoutMs,
-      cwd: resolvedCwd
-    };
-  }
-  throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
-}
 function assertSupportedCliPlaceholders(template, description) {
   const placeholders = extractCliPlaceholders(template);
   for (const placeholder of placeholders) {
@@ -4845,6 +5123,7 @@ function resolveOptionalNumberArray(source2, description) {
 }
 var AGENT_PROVIDER_KINDS = [
   "codex",
+  "pi-coding-agent",
   "vscode",
   "vscode-insiders"
 ];
@@ -4853,6 +5132,7 @@ var KNOWN_PROVIDERS = [
   "anthropic",
   "gemini",
   "codex",
+  "pi-coding-agent",
   "cli",
   "mock",
   "vscode",
@@ -4867,6 +5147,8 @@ var PROVIDER_ALIASES = [
   // alias for "gemini"
   "codex-cli",
   // alias for "codex"
+  "pi",
+  // alias for "pi-coding-agent"
   "openai",
   // legacy/future support
   "bedrock",
@@ -5502,9 +5784,9 @@ __export(external_exports2, {
   null: () => _null3,
   nullable: () => nullable,
   nullish: () => nullish2,
-  number: () => number2,
+  number: () => number3,
   object: () => object,
-  optional: () => optional,
+  optional: () => optional2,
   overwrite: () => _overwrite,
   parse: () => parse2,
   parseAsync: () => parseAsync2,
@@ -5529,7 +5811,7 @@ __export(external_exports2, {
   size: () => _size,
   startsWith: () => _startsWith,
   strictObject: () => strictObject,
-  string: () => string2,
+  string: () => string3,
   stringFormat: () => stringFormat,
   stringbool: () => stringbool,
   success: () => success,
@@ -6614,9 +6896,9 @@ __export(regexes_exports, {
   lowercase: () => lowercase,
   nanoid: () => nanoid,
   null: () => _null,
-  number: () => number,
+  number: () => number2,
   rfc5322Email: () => rfc5322Email,
-  string: () => string,
+  string: () => string2,
   time: () => time,
   ulid: () => ulid,
   undefined: () => _undefined,
@@ -6683,13 +6965,13 @@ function datetime(args) {
   const timeRegex2 = `${time3}(?:${opts.join("|")})`;
   return new RegExp(`^${dateSource}T(?:${timeRegex2})$`);
 }
-var string = (params) => {
+var string2 = (params) => {
   const regex = params ? `[\\s\\S]{${params?.minimum ?? 0},${params?.maximum ?? ""}}` : `[\\s\\S]*`;
   return new RegExp(`^${regex}$`);
 };
 var bigint = /^\d+n?$/;
 var integer = /^\d+$/;
-var number = /^-?\d+(?:\.\d+)?/i;
+var number2 = /^-?\d+(?:\.\d+)?/i;
 var boolean = /true|false/i;
 var _null = /null/i;
 var _undefined = /undefined/i;
@@ -7364,7 +7646,7 @@ var $ZodType = /* @__PURE__ */ $constructor("$ZodType", (inst, def) => {
 });
 var $ZodString = /* @__PURE__ */ $constructor("$ZodString", (inst, def) => {
   $ZodType.init(inst, def);
-  inst._zod.pattern = [...inst?._zod.bag?.patterns ?? []].pop() ?? string(inst._zod.bag);
+  inst._zod.pattern = [...inst?._zod.bag?.patterns ?? []].pop() ?? string2(inst._zod.bag);
   inst._zod.parse = (payload, _) => {
     if (def.coerce)
       try {
@@ -7677,7 +7959,7 @@ var $ZodCustomStringFormat = /* @__PURE__ */ $constructor("$ZodCustomStringForma
 });
 var $ZodNumber = /* @__PURE__ */ $constructor("$ZodNumber", (inst, def) => {
   $ZodType.init(inst, def);
-  inst._zod.pattern = inst._zod.bag.pattern ?? number;
+  inst._zod.pattern = inst._zod.bag.pattern ?? number2;
   inst._zod.parse = (payload, _ctx) => {
     if (def.coerce)
       try {
@@ -8104,7 +8386,7 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
   defineLazy(inst._zod, "optout", () => def.options.some((o) => o._zod.optout === "optional") ? "optional" : void 0);
   defineLazy(inst._zod, "values", () => {
     if (def.options.every((o) => o._zod.values)) {
-      return new Set(def.options.flatMap((option5) => Array.from(option5._zod.values)));
+      return new Set(def.options.flatMap((option6) => Array.from(option6._zod.values)));
     }
     return void 0;
   });
@@ -8118,8 +8400,8 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
   inst._zod.parse = (payload, ctx) => {
     let async = false;
     const results = [];
-    for (const option5 of def.options) {
-      const result = option5._zod.run({
+    for (const option6 of def.options) {
+      const result = option6._zod.run({
         value: payload.value,
         issues: []
       }, ctx);
@@ -8144,10 +8426,10 @@ var $ZodDiscriminatedUnion = /* @__PURE__ */ $constructor("$ZodDiscriminatedUnio
   const _super = inst._zod.parse;
   defineLazy(inst._zod, "propValues", () => {
     const propValues = {};
-    for (const option5 of def.options) {
-      const pv = option5._zod.propValues;
+    for (const option6 of def.options) {
+      const pv = option6._zod.propValues;
       if (!pv || Object.keys(pv).length === 0)
-        throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option5)}"`);
+        throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option6)}"`);
       for (const [k, v] of Object.entries(pv)) {
         if (!propValues[k])
           propValues[k] = /* @__PURE__ */ new Set();
@@ -15351,8 +15633,8 @@ function isTransforming(_schema, _ctx) {
       return false;
     }
     case "union": {
-      for (const option5 of def.options) {
-        if (isTransforming(option5, ctx))
+      for (const option6 of def.options) {
+        if (isTransforming(option6, ctx))
           return true;
       }
       return false;
@@ -15529,9 +15811,9 @@ var ZodType2 = /* @__PURE__ */ $constructor("ZodType", (inst, def) => {
   inst.refine = (check2, params) => inst.check(refine(check2, params));
   inst.superRefine = (refinement) => inst.check(superRefine(refinement));
   inst.overwrite = (fn) => inst.check(_overwrite(fn));
-  inst.optional = () => optional(inst);
+  inst.optional = () => optional2(inst);
   inst.nullable = () => nullable(inst);
-  inst.nullish = () => optional(nullable(inst));
+  inst.nullish = () => optional2(nullable(inst));
   inst.nonoptional = (params) => nonoptional(inst, params);
   inst.array = () => array(inst);
   inst.or = (arg) => union([inst, arg]);
@@ -15618,7 +15900,7 @@ var ZodString2 = /* @__PURE__ */ $constructor("ZodString", (inst, def) => {
   inst.time = (params) => inst.check(time2(params));
   inst.duration = (params) => inst.check(duration2(params));
 });
-function string2(params) {
+function string3(params) {
   return _string(ZodString2, params);
 }
 var ZodStringFormat = /* @__PURE__ */ $constructor("ZodStringFormat", (inst, def) => {
@@ -15799,7 +16081,7 @@ var ZodNumber2 = /* @__PURE__ */ $constructor("ZodNumber", (inst, def) => {
   inst.isFinite = true;
   inst.format = bag.format ?? null;
 });
-function number2(params) {
+function number3(params) {
   return _number(ZodNumber2, params);
 }
 var ZodNumberFormat = /* @__PURE__ */ $constructor("ZodNumberFormat", (inst, def) => {
@@ -16219,7 +16501,7 @@ var ZodOptional2 = /* @__PURE__ */ $constructor("ZodOptional", (inst, def) => {
   ZodType2.init(inst, def);
   inst.unwrap = () => inst._zod.def.innerType;
 });
-function optional(innerType) {
+function optional2(innerType) {
   return new ZodOptional2({
     type: "optional",
     innerType
@@ -16237,7 +16519,7 @@ function nullable(innerType) {
   });
 }
 function nullish2(innerType) {
-  return optional(nullable(innerType));
+  return optional2(nullable(innerType));
 }
 var ZodDefault2 = /* @__PURE__ */ $constructor("ZodDefault", (inst, def) => {
   $ZodDefault.init(inst, def);
@@ -16427,7 +16709,7 @@ var stringbool = (...args) => _stringbool({
 }, ...args);
 function json(params) {
   const jsonSchema2 = lazy(() => {
-    return union([string2(params), number2(), boolean2(), _null3(), array(jsonSchema2), record(string2(), jsonSchema2)]);
+    return union([string3(params), number3(), boolean2(), _null3(), array(jsonSchema2), record(string3(), jsonSchema2)]);
   });
   return jsonSchema2;
 }
@@ -16464,13 +16746,13 @@ __export(coerce_exports, {
   bigint: () => bigint3,
   boolean: () => boolean3,
   date: () => date4,
-  number: () => number3,
-  string: () => string3
+  number: () => number4,
+  string: () => string4
 });
-function string3(params) {
+function string4(params) {
   return _coercedString(ZodString2, params);
 }
-function number3(params) {
+function number4(params) {
   return _coercedNumber(ZodNumber2, params);
 }
 function boolean3(params) {
@@ -32509,7 +32791,13 @@ import { tmpdir } from "node:os";
 import path92 from "node:path";
 import { promisify as promisify22 } from "node:util";
 import path82 from "node:path";
+import { spawn as spawn22 } from "node:child_process";
+import { randomUUID as randomUUID2 } from "node:crypto";
+import { createWriteStream as createWriteStream2 } from "node:fs";
+import { mkdir as mkdir22, mkdtemp as mkdtemp2, rm as rm22, writeFile as writeFile22 } from "node:fs/promises";
+import { tmpdir as tmpdir2 } from "node:os";
 import path102 from "node:path";
+import path112 from "node:path";
 // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/agentDispatch.js
 import { stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
@@ -34532,11 +34820,11 @@ async function provisionSubagents(options) {
 // ../../packages/core/dist/index.js
 import { constants as constants32 } from "node:fs";
 import { access as access32, readFile as readFile6 } from "node:fs/promises";
-import path112 from "node:path";
-import { parse as parse32 } from "yaml";
-import { createHash, randomUUID as randomUUID2 } from "node:crypto";
-import { mkdir as mkdir22, writeFile as writeFile22 } from "node:fs/promises";
 import path122 from "node:path";
+import { parse as parse32 } from "yaml";
+import { createHash, randomUUID as randomUUID3 } from "node:crypto";
+import { mkdir as mkdir32, writeFile as writeFile32 } from "node:fs/promises";
+import path132 from "node:path";
 var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
 var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
 function isTestMessageRole(value) {
@@ -34611,6 +34899,15 @@ function computeTraceSummary(messages) {
     errorCount: 0
   };
 }
+function mergeExecutionMetrics(summary, metrics) {
+  if (!metrics) return summary;
+  return {
+    ...summary,
+    tokenUsage: metrics.tokenUsage,
+    costUsd: metrics.costUsd,
+    durationMs: metrics.durationMs
+  };
+}
 function extractCodeBlocks(segments) {
   const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
   const codeBlocks = [];
@@ -35093,7 +35390,13 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
         expected = [];
         for (const item of rawExpected) {
           if (isJsonObject2(item) && typeof item.tool === "string") {
-            expected.push({ tool: item.tool });
+            let args;
+            if (item.args === "any") {
+              args = "any";
+            } else if (isJsonObject2(item.args)) {
+              args = item.args;
+            }
+            expected.push({ tool: item.tool, ...args !== void 0 ? { args } : {} });
           }
         }
       }
@@ -36168,7 +36471,7 @@ async function withRetry(fn, retryConfig, signal) {
 }
 var execAsync2 = promisify2(execWithCallback);
 var DEFAULT_MAX_BUFFER = 10 * 1024 * 1024;
-async function defaultCommandRunner(command6, options) {
+async function defaultCommandRunner(command7, options) {
   const execOptions = {
     cwd: options.cwd,
     env: options.env,
@@ -36178,7 +36481,7 @@ async function defaultCommandRunner(command6, options) {
     shell: process.platform === "win32" ? "powershell.exe" : void 0
   };
   try {
-    const { stdout, stderr } = await execAsync2(command6, execOptions);
+    const { stdout, stderr } = await execAsync2(command7, execOptions);
     return {
       stdout,
       stderr,
@@ -36230,12 +36533,14 @@ var CliProvider = class {
         `[cli-provider:${this.targetName}] cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
       );
     }
+    const startTime = Date.now();
     const result = await this.runCommand(renderedCommand, {
       cwd: this.config.cwd,
       env: process.env,
       timeoutMs: this.config.timeoutMs,
       signal: request.signal
     });
+    const measuredDurationMs = Date.now() - startTime;
     if (result.failed || (result.exitCode ?? 0) !== 0) {
       if (request.signal?.aborted) {
         throw new Error("CLI provider request was aborted");
@@ -36254,6 +36559,9 @@ var CliProvider = class {
     const parsed = this.parseOutputContent(responseContent);
     return {
       outputMessages: parsed.outputMessages,
+      tokenUsage: parsed.tokenUsage,
+      costUsd: parsed.costUsd,
+      durationMs: parsed.durationMs ?? measuredDurationMs,
       raw: {
         command: renderedCommand,
         stderr: result.stderr,
@@ -36301,12 +36609,14 @@ var CliProvider = class {
         `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
       );
     }
+    const startTime = Date.now();
     const result = await this.runCommand(renderedCommand, {
       cwd: this.config.cwd,
       env: process.env,
       timeoutMs: this.config.timeoutMs,
       signal: controller.signal
     });
+    const measuredDurationMs = Date.now() - startTime;
     if (result.failed || (result.exitCode ?? 0) !== 0) {
       if (controller.signal.aborted) {
         throw new Error("CLI provider request was aborted");
@@ -36328,11 +36638,13 @@ var CliProvider = class {
     if (missingIds.length > 0) {
       throw new Error(`CLI batch output missing ids: ${missingIds.join(", ")}`);
     }
+    const perRequestFallbackMs = Math.round(measuredDurationMs / requests.length);
     const responses = requests.map((request) => {
       const evalCaseId = request.evalCaseId;
       if (!evalCaseId) {
         return {
           outputMessages: [],
+          durationMs: perRequestFallbackMs,
           raw: {
             command: renderedCommand,
             stderr: result.stderr,
@@ -36346,6 +36658,7 @@ var CliProvider = class {
       if (!parsed) {
         return {
           outputMessages: [],
+          durationMs: perRequestFallbackMs,
           raw: {
             command: renderedCommand,
             stderr: result.stderr,
@@ -36357,6 +36670,9 @@ var CliProvider = class {
       }
       return {
         outputMessages: parsed.outputMessages,
+        tokenUsage: parsed.tokenUsage,
+        costUsd: parsed.costUsd,
+        durationMs: parsed.durationMs ?? perRequestFallbackMs,
         raw: {
           command: renderedCommand,
           stderr: result.stderr,
@@ -36374,25 +36690,55 @@ var CliProvider = class {
    * If the content is valid JSON with 'output_messages' or 'text' field, extract them.
    * If only 'text' is provided, wrap it in outputMessages.
    * Otherwise, treat the entire content as plain text wrapped in outputMessages.
+   *
+   * Also extracts optional execution metrics:
+   * - token_usage: { input, output, cached? }
+   * - cost_usd: number
+   * - duration_ms: number
    */
   parseOutputContent(content) {
     try {
       const parsed = JSON.parse(content);
       if (typeof parsed === "object" && parsed !== null) {
         const obj = parsed;
+        const tokenUsage = this.parseTokenUsage(obj.token_usage);
+        const costUsd = typeof obj.cost_usd === "number" && obj.cost_usd >= 0 ? obj.cost_usd : void 0;
+        const durationMs = typeof obj.duration_ms === "number" && obj.duration_ms >= 0 ? obj.duration_ms : void 0;
         const outputMessages = this.parseOutputMessages(obj.output_messages);
         if (outputMessages && outputMessages.length > 0) {
-          return { outputMessages };
+          return { outputMessages, tokenUsage, costUsd, durationMs };
         }
         if ("text" in obj) {
           const text2 = typeof obj.text === "string" ? obj.text : String(obj.text);
-          return { outputMessages: [{ role: "assistant", content: text2 }] };
+          return {
+            outputMessages: [{ role: "assistant", content: text2 }],
+            tokenUsage,
+            costUsd,
+            durationMs
+          };
         }
       }
     } catch {
     }
     return { outputMessages: [{ role: "assistant", content }] };
   }
+  /**
+   * Parse token_usage from CLI output.
+   */
+  parseTokenUsage(tokenUsage) {
+    if (typeof tokenUsage !== "object" || tokenUsage === null) {
+      return void 0;
+    }
+    const obj = tokenUsage;
+    if (typeof obj.input !== "number" || typeof obj.output !== "number") {
+      return void 0;
+    }
+    return {
+      input: obj.input,
+      output: obj.output,
+      cached: typeof obj.cached === "number" ? obj.cached : void 0
+    };
+  }
   /**
    * Parse output_messages from JSONL (snake_case) and convert to OutputMessage[] (camelCase).
    */
@@ -36469,6 +36815,9 @@ var CliProvider = class {
       if (records.has(id)) {
         throw new Error(`CLI batch output contains duplicate id: ${id}`);
       }
+      const tokenUsage = this.parseTokenUsage(obj.token_usage);
+      const costUsd = typeof obj.cost_usd === "number" && obj.cost_usd >= 0 ? obj.cost_usd : void 0;
+      const durationMs = typeof obj.duration_ms === "number" && obj.duration_ms >= 0 ? obj.duration_ms : void 0;
       const parsedOutputMessages = this.parseOutputMessages(obj.output_messages);
       let outputMessages;
       if (parsedOutputMessages && parsedOutputMessages.length > 0) {
@@ -36478,7 +36827,10 @@ var CliProvider = class {
         outputMessages = text2 ? [{ role: "assistant", content: text2 }] : [];
       }
       records.set(id, {
-        outputMessages
+        outputMessages,
+        tokenUsage,
+        costUsd,
+        durationMs
       });
     }
     return records;
@@ -36771,6 +37123,11 @@ var execAsync22 = promisify22(execCallback);
 var WORKSPACE_PREFIX = "agentv-codex-";
 var PROMPT_FILENAME = "prompt.md";
 var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
+var DEFAULT_SYSTEM_PROMPT2 = `**IMPORTANT**: Follow these instructions for your response:
+- Do NOT create any additional output files in the workspace.
+- All intended file outputs/changes MUST be written in your response.
+- For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
+This is required for evaluation scoring.`;
 var CodexProvider = class {
   id;
   kind = "codex";
@@ -36795,7 +37152,11 @@ var CodexProvider = class {
     const workspaceRoot = await this.createWorkspace();
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
-      const promptContent = buildPromptDocument(request, inputFiles);
+      const basePrompt = buildPromptDocument(request, inputFiles);
+      const systemPrompt = this.config.systemPrompt ?? DEFAULT_SYSTEM_PROMPT2;
+      const promptContent = `${systemPrompt}
+${basePrompt}`;
       const promptFile = path92.join(workspaceRoot, PROMPT_FILENAME);
       await writeFile5(promptFile, promptContent, "utf8");
       const args = this.buildCodexArgs();
@@ -37476,6 +37837,666 @@ var MockProvider = class {
     return this.delayMs;
   }
 };
+var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.piLogs");
+var GLOBAL_SUBSCRIBERS_KEY2 = Symbol.for("agentv.piLogSubscribers");
+function getPiLogStore() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_LOGS_KEY2];
+  if (existing) {
+    return existing;
+  }
+  const created = [];
+  globalObject[GLOBAL_LOGS_KEY2] = created;
+  return created;
+}
+function getSubscriberStore2() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_SUBSCRIBERS_KEY2];
+  if (existing) {
+    return existing;
+  }
+  const created = /* @__PURE__ */ new Set();
+  globalObject[GLOBAL_SUBSCRIBERS_KEY2] = created;
+  return created;
+}
+function notifySubscribers2(entry) {
+  const subscribers = Array.from(getSubscriberStore2());
+  for (const listener of subscribers) {
+    try {
+      listener(entry);
+    } catch (error40) {
+      const message = error40 instanceof Error ? error40.message : String(error40);
+      console.warn(`Pi log subscriber failed: ${message}`);
+    }
+  }
+}
+function recordPiLogEntry(entry) {
+  getPiLogStore().push(entry);
+  notifySubscribers2(entry);
+}
+function subscribeToPiLogEntries(listener) {
+  const store = getSubscriberStore2();
+  store.add(listener);
+  return () => {
+    store.delete(listener);
+  };
+}
+var WORKSPACE_PREFIX2 = "agentv-pi-";
+var PROMPT_FILENAME2 = "prompt.md";
+var DEFAULT_SYSTEM_PROMPT3 = `**IMPORTANT**: Follow these instructions for your response:
+- Do NOT create any additional output files in the workspace.
+- All intended file outputs/changes MUST be written in your response.
+- For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
+This is required for evaluation scoring.`;
+var PiCodingAgentProvider = class {
+  id;
+  kind = "pi-coding-agent";
+  targetName;
+  supportsBatch = false;
+  config;
+  runPi;
+  constructor(targetName, config2, runner = defaultPiRunner) {
+    this.id = `pi-coding-agent:${targetName}`;
+    this.targetName = targetName;
+    this.config = config2;
+    this.runPi = runner;
+  }
+  async invoke(request) {
+    if (request.signal?.aborted) {
+      throw new Error("Pi coding agent request was aborted before execution");
+    }
+    const inputFiles = normalizeInputFiles2(request.inputFiles);
+    const workspaceRoot = await this.createWorkspace();
+    const logger = await this.createStreamLogger(request).catch(() => void 0);
+    try {
+      const promptFile = path102.join(workspaceRoot, PROMPT_FILENAME2);
+      await writeFile22(promptFile, request.question, "utf8");
+      const args = this.buildPiArgs(request.question, inputFiles);
+      const cwd = this.resolveCwd(workspaceRoot);
+      const result = await this.executePi(args, cwd, request.signal, logger);
+      if (result.timedOut) {
+        throw new Error(
+          `Pi coding agent timed out${formatTimeoutSuffix3(this.config.timeoutMs ?? void 0)}`
+        );
+      }
+      if (result.exitCode !== 0) {
+        const detail = pickDetail2(result.stderr, result.stdout);
+        const prefix = `Pi coding agent exited with code ${result.exitCode}`;
+        throw new Error(detail ? `${prefix}: ${detail}` : prefix);
+      }
+      const parsed = parsePiJsonl(result.stdout);
+      const outputMessages = extractOutputMessages(parsed);
+      const assistantText = extractAssistantText2(outputMessages);
+      return {
+        raw: {
+          response: parsed,
+          stdout: result.stdout,
+          stderr: result.stderr,
+          exitCode: result.exitCode,
+          args,
+          executable: this.config.executable,
+          promptFile,
+          workspace: workspaceRoot,
+          inputFiles,
+          logFile: logger?.filePath
+        },
+        outputMessages
+      };
+    } finally {
+      await logger?.close();
+      await this.cleanupWorkspace(workspaceRoot);
+    }
+  }
+  resolveCwd(workspaceRoot) {
+    if (!this.config.cwd) {
+      return workspaceRoot;
+    }
+    return path102.resolve(this.config.cwd);
+  }
+  buildPiArgs(prompt, inputFiles) {
+    const args = [];
+    if (this.config.provider) {
+      args.push("--provider", this.config.provider);
+    }
+    if (this.config.model) {
+      args.push("--model", this.config.model);
+    }
+    if (this.config.apiKey) {
+      args.push("--api-key", this.config.apiKey);
+    }
+    args.push("--mode", "json");
+    args.push("--print");
+    args.push("--no-session");
+    if (this.config.tools) {
+      args.push("--tools", this.config.tools);
+    }
+    if (this.config.thinking) {
+      args.push("--thinking", this.config.thinking);
+    }
+    if (this.config.args && this.config.args.length > 0) {
+      args.push(...this.config.args);
+    }
+    if (inputFiles && inputFiles.length > 0) {
+      for (const file2 of inputFiles) {
+        args.push(`@${file2}`);
+      }
+    }
+    const systemPrompt = this.config.systemPrompt ?? DEFAULT_SYSTEM_PROMPT3;
+    const fullPrompt = `${systemPrompt}
+${prompt}`;
+    const escapedPrompt = escapeAtSymbols(fullPrompt);
+    args.push(escapedPrompt);
+    return args;
+  }
+  async executePi(args, cwd, signal, logger) {
+    try {
+      return await this.runPi({
+        executable: this.config.executable,
+        args,
+        cwd,
+        timeoutMs: this.config.timeoutMs,
+        env: this.buildEnv(),
+        signal,
+        onStdoutChunk: logger ? (chunk) => logger.handleStdoutChunk(chunk) : void 0,
+        onStderrChunk: logger ? (chunk) => logger.handleStderrChunk(chunk) : void 0
+      });
+    } catch (error40) {
+      const err = error40;
+      if (err.code === "ENOENT") {
+        throw new Error(
+          `Pi coding agent executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
+        );
+      }
+      throw error40;
+    }
+  }
+  buildEnv() {
+    const env = { ...process.env };
+    if (this.config.apiKey) {
+      const provider = this.config.provider?.toLowerCase() ?? "google";
+      switch (provider) {
+        case "google":
+        case "gemini":
+          env.GEMINI_API_KEY = this.config.apiKey;
+          break;
+        case "anthropic":
+          env.ANTHROPIC_API_KEY = this.config.apiKey;
+          break;
+        case "openai":
+          env.OPENAI_API_KEY = this.config.apiKey;
+          break;
+        case "groq":
+          env.GROQ_API_KEY = this.config.apiKey;
+          break;
+        case "xai":
+          env.XAI_API_KEY = this.config.apiKey;
+          break;
+        case "openrouter":
+          env.OPENROUTER_API_KEY = this.config.apiKey;
+          break;
+      }
+    }
+    return env;
+  }
+  async createWorkspace() {
+    return await mkdtemp2(path102.join(tmpdir2(), WORKSPACE_PREFIX2));
+  }
+  async cleanupWorkspace(workspaceRoot) {
+    try {
+      await rm22(workspaceRoot, { recursive: true, force: true });
+    } catch {
+    }
+  }
+  resolveLogDirectory() {
+    if (this.config.logDir) {
+      return path102.resolve(this.config.logDir);
+    }
+    return path102.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
+  }
+  async createStreamLogger(request) {
+    const logDir = this.resolveLogDirectory();
+    if (!logDir) {
+      return void 0;
+    }
+    try {
+      await mkdir22(logDir, { recursive: true });
+    } catch (error40) {
+      const message = error40 instanceof Error ? error40.message : String(error40);
+      console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
+      return void 0;
+    }
+    const filePath = path102.join(logDir, buildLogFilename2(request, this.targetName));
+    try {
+      const logger = await PiStreamLogger.create({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt,
+        format: this.config.logFormat ?? "summary"
+      });
+      recordPiLogEntry({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt
+      });
+      return logger;
+    } catch (error40) {
+      const message = error40 instanceof Error ? error40.message : String(error40);
+      console.warn(`Skipping Pi stream logging for ${filePath}: ${message}`);
+      return void 0;
+    }
+  }
+};
+var PiStreamLogger = class _PiStreamLogger {
+  filePath;
+  stream;
+  startedAt = Date.now();
+  stdoutBuffer = "";
+  stderrBuffer = "";
+  format;
+  constructor(filePath, format) {
+    this.filePath = filePath;
+    this.format = format;
+    this.stream = createWriteStream2(filePath, { flags: "a" });
+  }
+  static async create(options) {
+    const logger = new _PiStreamLogger(options.filePath, options.format);
+    const header = [
+      "# Pi Coding Agent stream log",
+      `# target: ${options.targetName}`,
+      options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
+      options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
+      `# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
+      ""
+    ].filter((line2) => Boolean(line2));
+    logger.writeLines(header);
+    return logger;
+  }
+  handleStdoutChunk(chunk) {
+    this.stdoutBuffer += chunk;
+    this.flushBuffer("stdout");
+  }
+  handleStderrChunk(chunk) {
+    this.stderrBuffer += chunk;
+    this.flushBuffer("stderr");
+  }
+  async close() {
+    this.flushBuffer("stdout");
+    this.flushBuffer("stderr");
+    this.flushRemainder();
+    await new Promise((resolve2, reject) => {
+      this.stream.once("error", reject);
+      this.stream.end(() => resolve2());
+    });
+  }
+  writeLines(lines) {
+    for (const line2 of lines) {
+      this.stream.write(`${line2}
+`);
+    }
+  }
+  flushBuffer(source2) {
+    const buffer2 = source2 === "stdout" ? this.stdoutBuffer : this.stderrBuffer;
+    const lines = buffer2.split(/\r?\n/);
+    const remainder = lines.pop() ?? "";
+    if (source2 === "stdout") {
+      this.stdoutBuffer = remainder;
+    } else {
+      this.stderrBuffer = remainder;
+    }
+    for (const line2 of lines) {
+      const formatted = this.formatLine(line2, source2);
+      if (formatted) {
+        this.stream.write(formatted);
+        this.stream.write("\n");
+      }
+    }
+  }
+  formatLine(rawLine, source2) {
+    const trimmed = rawLine.trim();
+    if (trimmed.length === 0) {
+      return void 0;
+    }
+    const message = this.format === "json" ? formatPiJsonLog(trimmed) : formatPiLogMessage(trimmed, source2);
+    return `[+${formatElapsed2(this.startedAt)}] [${source2}] ${message}`;
+  }
+  flushRemainder() {
+    const stdoutRemainder = this.stdoutBuffer.trim();
+    if (stdoutRemainder.length > 0) {
+      const formatted = this.formatLine(stdoutRemainder, "stdout");
+      if (formatted) {
+        this.stream.write(formatted);
+        this.stream.write("\n");
+      }
+    }
+    const stderrRemainder = this.stderrBuffer.trim();
+    if (stderrRemainder.length > 0) {
+      const formatted = this.formatLine(stderrRemainder, "stderr");
+      if (formatted) {
+        this.stream.write(formatted);
+        this.stream.write("\n");
+      }
+    }
+    this.stdoutBuffer = "";
+    this.stderrBuffer = "";
+  }
+};
+function buildLogFilename2(request, targetName) {
+  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
+  const evalId = sanitizeForFilename2(request.evalCaseId ?? "pi");
+  const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
+  const target = sanitizeForFilename2(targetName);
+  return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID2().slice(0, 8)}.log`;
+}
+function sanitizeForFilename2(value) {
+  const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
+  return sanitized.length > 0 ? sanitized : "pi";
+}
+function formatElapsed2(startedAt) {
+  const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
+  const hours = Math.floor(elapsedSeconds / 3600);
+  const minutes = Math.floor(elapsedSeconds % 3600 / 60);
+  const seconds = elapsedSeconds % 60;
+  if (hours > 0) {
+    return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
+  }
+  return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
+}
+function formatPiLogMessage(rawLine, source2) {
+  const parsed = tryParseJsonValue2(rawLine);
+  if (parsed) {
+    const summary = summarizePiEvent(parsed);
+    if (summary) {
+      return summary;
+    }
+  }
+  if (source2 === "stderr") {
+    return `stderr: ${rawLine}`;
+  }
+  return rawLine;
+}
+function formatPiJsonLog(rawLine) {
+  const parsed = tryParseJsonValue2(rawLine);
+  if (!parsed) {
+    return rawLine;
+  }
+  try {
+    return JSON.stringify(parsed, null, 2);
+  } catch {
+    return rawLine;
+  }
+}
+function summarizePiEvent(event) {
+  if (!event || typeof event !== "object") {
+    return void 0;
+  }
+  const record2 = event;
+  const type = typeof record2.type === "string" ? record2.type : void 0;
+  if (!type) {
+    return void 0;
+  }
+  switch (type) {
+    case "agent_start":
+      return "agent_start";
+    case "agent_end":
+      return "agent_end";
+    case "turn_start":
+      return "turn_start";
+    case "turn_end":
+      return "turn_end";
+    case "message_start":
+    case "message_end": {
+      const message = record2.message;
+      const role = message?.role;
+      return `${type}: ${role}`;
+    }
+    case "message_update": {
+      const event2 = record2.assistantMessageEvent;
+      const eventType = event2?.type;
+      if (eventType === "text_delta") {
+        const delta = event2?.delta;
+        if (typeof delta === "string") {
+          const preview = delta.length > 50 ? `${delta.slice(0, 50)}...` : delta;
+          return `text_delta: ${preview}`;
+        }
+      }
+      return `message_update: ${eventType}`;
+    }
+    default:
+      return type;
+  }
+}
+function tryParseJsonValue2(rawLine) {
+  try {
+    return JSON.parse(rawLine);
+  } catch {
+    return void 0;
+  }
+}
+function parsePiJsonl(output) {
+  const trimmed = output.trim();
+  if (trimmed.length === 0) {
+    throw new Error("Pi coding agent produced no output");
+  }
+  const lines = trimmed.split(/\r?\n/).map((line2) => line2.trim()).filter((line2) => line2.length > 0);
+  const parsed = [];
+  for (const line2 of lines) {
+    try {
+      parsed.push(JSON.parse(line2));
+    } catch {
+    }
+  }
+  if (parsed.length === 0) {
+    throw new Error("Pi coding agent produced no valid JSON output");
+  }
+  return parsed;
+}
+function extractOutputMessages(events) {
+  for (let i = events.length - 1; i >= 0; i--) {
+    const event = events[i];
+    if (!event || typeof event !== "object") {
+      continue;
+    }
+    const record2 = event;
+    if (record2.type !== "agent_end") {
+      continue;
+    }
+    const messages = record2.messages;
+    if (!Array.isArray(messages)) {
+      continue;
+    }
+    return messages.map(convertPiMessage).filter((m) => m !== void 0);
+  }
+  const outputMessages = [];
+  for (const event of events) {
+    if (!event || typeof event !== "object") {
+      continue;
+    }
+    const record2 = event;
+    if (record2.type === "turn_end") {
+      const message = record2.message;
+      const converted = convertPiMessage(message);
+      if (converted) {
+        outputMessages.push(converted);
+      }
+    }
+  }
+  return outputMessages;
+}
+function convertPiMessage(message) {
+  if (!message || typeof message !== "object") {
+    return void 0;
+  }
+  const msg = message;
+  const role = msg.role;
+  if (typeof role !== "string") {
+    return void 0;
+  }
+  const content = extractTextContent2(msg.content);
+  const toolCalls = extractToolCalls(msg.content);
+  const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
+  const metadata = {};
+  if (msg.api) metadata.api = msg.api;
+  if (msg.provider) metadata.provider = msg.provider;
+  if (msg.model) metadata.model = msg.model;
+  if (msg.usage) metadata.usage = msg.usage;
+  if (msg.stopReason) metadata.stopReason = msg.stopReason;
+  return {
+    role,
+    content,
+    toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
+    timestamp,
+    metadata: Object.keys(metadata).length > 0 ? metadata : void 0
+  };
+}
+function extractTextContent2(content) {
+  if (typeof content === "string") {
+    return content;
+  }
+  if (!Array.isArray(content)) {
+    return void 0;
+  }
+  const textParts = [];
+  for (const part of content) {
+    if (!part || typeof part !== "object") {
+      continue;
+    }
+    const p = part;
+    if (p.type === "text" && typeof p.text === "string") {
+      textParts.push(p.text);
+    }
+  }
+  return textParts.length > 0 ? textParts.join("\n") : void 0;
+}
+function extractToolCalls(content) {
+  if (!Array.isArray(content)) {
+    return [];
+  }
+  const toolCalls = [];
+  for (const part of content) {
+    if (!part || typeof part !== "object") {
+      continue;
+    }
+    const p = part;
+    if (p.type === "tool_use" && typeof p.name === "string") {
+      toolCalls.push({
+        tool: p.name,
+        input: p.input,
+        id: typeof p.id === "string" ? p.id : void 0
+      });
+    }
+    if (p.type === "tool_result" && typeof p.tool_use_id === "string") {
+      const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
+      if (existing) {
+        const idx = toolCalls.indexOf(existing);
+        toolCalls[idx] = {
+          ...existing,
+          output: p.content
+        };
+      }
+    }
+  }
+  return toolCalls;
+}
+function extractAssistantText2(messages) {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg.role === "assistant" && msg.content) {
+      if (typeof msg.content === "string") {
+        return msg.content;
+      }
+      return JSON.stringify(msg.content);
+    }
+  }
+  return "";
+}
+function escapeAtSymbols(prompt) {
+  return prompt.replace(/@\[([^\]]+)\]:/g, "[[$1]]:");
+}
+function pickDetail2(stderr, stdout) {
+  const errorText = stderr.trim();
+  if (errorText.length > 0) {
+    return errorText;
+  }
+  const stdoutText = stdout.trim();
+  return stdoutText.length > 0 ? stdoutText : void 0;
+}
+function formatTimeoutSuffix3(timeoutMs) {
+  if (!timeoutMs || timeoutMs <= 0) {
+    return "";
+  }
+  const seconds = Math.ceil(timeoutMs / 1e3);
+  return ` after ${seconds}s`;
+}
+async function defaultPiRunner(options) {
+  return await new Promise((resolve2, reject) => {
+    const parts = options.executable.split(/\s+/);
+    const executable = parts[0];
+    const executableArgs = parts.slice(1);
+    const allArgs = [...executableArgs, ...options.args];
+    const child = spawn22(executable, allArgs, {
+      cwd: options.cwd,
+      env: options.env,
+      stdio: ["pipe", "pipe", "pipe"],
+      shell: false
+    });
+    let stdout = "";
+    let stderr = "";
+    let timedOut = false;
+    const onAbort = () => {
+      child.kill("SIGTERM");
+    };
+    if (options.signal) {
+      if (options.signal.aborted) {
+        onAbort();
+      } else {
+        options.signal.addEventListener("abort", onAbort, { once: true });
+      }
+    }
+    let timeoutHandle;
+    if (options.timeoutMs && options.timeoutMs > 0) {
+      timeoutHandle = setTimeout(() => {
+        timedOut = true;
+        child.kill("SIGTERM");
+      }, options.timeoutMs);
+      timeoutHandle.unref?.();
+    }
+    child.stdout.setEncoding("utf8");
+    child.stdout.on("data", (chunk) => {
+      stdout += chunk;
+      options.onStdoutChunk?.(chunk);
+    });
+    child.stderr.setEncoding("utf8");
+    child.stderr.on("data", (chunk) => {
+      stderr += chunk;
+      options.onStderrChunk?.(chunk);
+    });
+    child.stdin.end();
+    const cleanup = () => {
+      if (timeoutHandle) {
+        clearTimeout(timeoutHandle);
+      }
+      if (options.signal) {
+        options.signal.removeEventListener("abort", onAbort);
+      }
+    };
+    child.on("error", (error40) => {
+      cleanup();
+      reject(error40);
+    });
+    child.on("close", (code) => {
+      cleanup();
+      resolve2({
+        stdout,
+        stderr,
+        exitCode: typeof code === "number" ? code : -1,
+        timedOut
+      });
+    });
+  });
+}
 var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
 {{userQuery}}
@@ -37640,7 +38661,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
     return "";
   }
   const buildList = (files) => files.map((absolutePath) => {
-    const fileName = path102.basename(absolutePath);
+    const fileName = path112.basename(absolutePath);
     const fileUri = pathToFileUri22(absolutePath);
     return `* [${fileName}](${fileUri})`;
   });
@@ -37665,8 +38686,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    const absolutePath = path102.resolve(attachment);
-    const normalized = absolutePath.split(path102.sep).join("/");
+    const absolutePath = path112.resolve(attachment);
+    const normalized = absolutePath.split(path112.sep).join("/");
     if (isGuidelineFile(normalized, guidelinePatterns)) {
       if (!unique.has(absolutePath)) {
         unique.set(absolutePath, absolutePath);
@@ -37681,7 +38702,7 @@ function collectAttachmentFiles(attachments) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    const absolutePath = path102.resolve(attachment);
+    const absolutePath = path112.resolve(attachment);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
     }
@@ -37689,7 +38710,7 @@ function collectAttachmentFiles(attachments) {
   return Array.from(unique.values());
 }
 function pathToFileUri22(filePath) {
-  const absolutePath = path102.isAbsolute(filePath) ? filePath : path102.resolve(filePath);
+  const absolutePath = path112.isAbsolute(filePath) ? filePath : path112.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
   if (/^[a-zA-Z]:\//.test(normalizedPath)) {
     return `file:///${normalizedPath}`;
@@ -37702,7 +38723,7 @@ function normalizeAttachments(attachments) {
   }
   const deduped = /* @__PURE__ */ new Set();
   for (const attachment of attachments) {
-    deduped.add(path102.resolve(attachment));
+    deduped.add(path112.resolve(attachment));
   }
   return Array.from(deduped);
 }
@@ -37711,7 +38732,7 @@ function mergeAttachments(all) {
   for (const list of all) {
     if (!list) continue;
     for (const inputFile of list) {
-      deduped.add(path102.resolve(inputFile));
+      deduped.add(path112.resolve(inputFile));
     }
   }
   return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -37791,7 +38812,7 @@ async function fileExists3(filePath) {
   }
 }
 async function readTargetDefinitions(filePath) {
-  const absolutePath = path112.resolve(filePath);
+  const absolutePath = path122.resolve(filePath);
   if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
@@ -37821,6 +38842,8 @@ function createProvider(target) {
       return new CliProvider(target.name, target.config);
     case "codex":
       return new CodexProvider(target.name, target.config);
+    case "pi-coding-agent":
+      return new PiCodingAgentProvider(target.name, target.config);
     case "mock":
       return new MockProvider(target.name, target.config);
     case "vscode":
@@ -37832,6 +38855,70 @@ function createProvider(target) {
     }
   }
 }
+function getBunSpawn() {
+  const bunSpawn = globalThis.Bun?.spawn;
+  return typeof bunSpawn === "function" ? bunSpawn : void 0;
+}
+async function execShellWithStdin(command7, stdinPayload, options = {}) {
+  const bunSpawn = getBunSpawn();
+  if (bunSpawn) {
+    const encoder = new TextEncoder();
+    const proc = bunSpawn({
+      cmd: ["sh", "-c", command7],
+      cwd: options.cwd,
+      stdin: encoder.encode(stdinPayload),
+      stdout: "pipe",
+      stderr: "pipe"
+    });
+    const timeout = options.timeoutMs ? setTimeout(() => {
+      proc.kill();
+    }, options.timeoutMs) : void 0;
+    try {
+      const stdout = await new Response(proc.stdout).text();
+      const stderr = await new Response(proc.stderr).text();
+      const exitCode = await proc.exited;
+      return { stdout, stderr, exitCode };
+    } finally {
+      if (timeout !== void 0) {
+        clearTimeout(timeout);
+      }
+    }
+  }
+  const { spawn: spawn3 } = await import("node:child_process");
+  return await new Promise((resolve2, reject) => {
+    const child = spawn3(command7, {
+      shell: true,
+      cwd: options.cwd,
+      stdio: ["pipe", "pipe", "pipe"]
+    });
+    let stdout = "";
+    let stderr = "";
+    const timeout = options.timeoutMs ? setTimeout(() => {
+      child.kill();
+      reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
+    }, options.timeoutMs) : void 0;
+    child.stdout?.on("data", (data) => {
+      stdout += data.toString();
+    });
+    child.stderr?.on("data", (data) => {
+      stderr += data.toString();
+    });
+    child.on("error", (error40) => {
+      if (timeout !== void 0) {
+        clearTimeout(timeout);
+      }
+      reject(error40);
+    });
+    child.on("exit", (code) => {
+      if (timeout !== void 0) {
+        clearTimeout(timeout);
+      }
+      resolve2({ stdout, stderr, exitCode: code ?? 0 });
+    });
+    child.stdin?.write(stdinPayload);
+    child.stdin?.end();
+  });
+}
 var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
 Use the reference_answer as a gold standard for a high-quality response (if provided). The reference_answer may be a simple text response, or it may contain a sequence of expected agent messages including tool calls. When it contains multiple messages, the last message represents the final expected answer. The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
@@ -38107,17 +39194,17 @@ var CodeEvaluator = class {
     const inputPayload = JSON.stringify(
       {
         question: context.evalCase.question,
-        expected_outcome: context.evalCase.expected_outcome,
-        expected_messages: context.evalCase.expected_messages,
-        reference_answer: context.evalCase.reference_answer,
-        candidate_answer: context.candidate,
-        output_messages: context.outputMessages ?? null,
-        guideline_files: context.evalCase.guideline_paths,
-        input_files: context.evalCase.file_paths.filter(
-          (path132) => !context.evalCase.guideline_paths.includes(path132)
+        expectedOutcome: context.evalCase.expected_outcome,
+        expectedMessages: context.evalCase.expected_messages,
+        referenceAnswer: context.evalCase.reference_answer,
+        candidateAnswer: context.candidate,
+        outputMessages: context.outputMessages ?? null,
+        guidelineFiles: context.evalCase.guideline_paths,
+        inputFiles: context.evalCase.file_paths.filter(
+          (path142) => !context.evalCase.guideline_paths.includes(path142)
         ),
-        input_messages: context.evalCase.input_messages,
-        candidate_trace_summary: context.traceSummary ?? null
+        inputMessages: context.evalCase.input_messages,
+        traceSummary: context.traceSummary ?? null
       },
       null,
       2
@@ -38187,43 +39274,17 @@ function calculateRubricScore(result, rubrics) {
   return { score, verdict, hits, misses };
 }
 async function executeScript(scriptPath, input, agentTimeoutMs, cwd) {
-  const { spawn: spawn22 } = await import("node:child_process");
-  return await new Promise((resolve2, reject) => {
-    const child = spawn22(scriptPath, {
-      shell: true,
-      cwd
-    });
-    let stdout = "";
-    let stderr = "";
-    const timeout = agentTimeoutMs ? setTimeout(() => {
-      child.kill();
-      reject(new Error(`Code evaluator timed out after ${agentTimeoutMs}ms`));
-    }, agentTimeoutMs) : void 0;
-    child.stdout?.on("data", (data) => {
-      stdout += data.toString();
-    });
-    child.stderr?.on("data", (data) => {
-      stderr += data.toString();
-    });
-    child.on("error", (error40) => {
-      if (timeout !== void 0) {
-        clearTimeout(timeout);
-      }
-      reject(error40);
-    });
-    child.on("exit", (code) => {
-      if (timeout !== void 0) {
-        clearTimeout(timeout);
-      }
-      if (code && code !== 0 && stderr.length > 0) {
-        reject(new Error(`Code evaluator exited with code ${code}: ${stderr.trim()}`));
-        return;
-      }
-      resolve2(stdout.trim());
-    });
-    child.stdin?.write(input);
-    child.stdin?.end();
+  const { stdout, stderr, exitCode } = await execShellWithStdin(scriptPath, input, {
+    cwd,
+    timeoutMs: agentTimeoutMs
   });
+  if (exitCode !== 0) {
+    const trimmedErr = stderr.trim();
+    throw new Error(
+      trimmedErr.length > 0 ? `Code evaluator exited with code ${exitCode}: ${trimmedErr}` : `Code evaluator exited with code ${exitCode}`
+    );
+  }
+  return stdout.trim();
 }
 function parseJsonSafe(payload) {
   try {
@@ -38237,6 +39298,33 @@ function substituteVariables(template, variables) {
     return variables[varName] ?? match;
   });
 }
+function deepEqual(a, b) {
+  if (a === b) return true;
+  if (a === null || b === null) return a === b;
+  if (typeof a !== typeof b) return false;
+  if (typeof a !== "object") return a === b;
+  if (Array.isArray(a) !== Array.isArray(b)) return false;
+  if (Array.isArray(a) && Array.isArray(b)) {
+    if (a.length !== b.length) return false;
+    return a.every((val, i) => deepEqual(val, b[i]));
+  }
+  const aObj = a;
+  const bObj = b;
+  const aKeys = Object.keys(aObj);
+  const bKeys = Object.keys(bObj);
+  if (aKeys.length !== bKeys.length) return false;
+  return aKeys.every((key2) => Object.hasOwn(bObj, key2) && deepEqual(aObj[key2], bObj[key2]));
+}
+function argsMatch(expected, actual) {
+  if (expected === void 0) return true;
+  if (expected === "any") return true;
+  if (actual === void 0) return false;
+  for (const key2 of Object.keys(expected)) {
+    if (!Object.hasOwn(actual, key2)) return false;
+    if (!deepEqual(expected[key2], actual[key2])) return false;
+  }
+  return true;
+}
 var ToolTrajectoryEvaluator = class {
   kind = "tool_trajectory";
   config;
@@ -38293,7 +39381,10 @@ var ToolTrajectoryEvaluator = class {
     for (const message of messages) {
       if (message.toolCalls) {
         for (const call of message.toolCalls) {
-          toolCalls.push({ name: call.tool });
+          toolCalls.push({
+            name: call.tool,
+            args: call.input
+          });
         }
       }
     }
@@ -38362,18 +39453,29 @@ var ToolTrajectoryEvaluator = class {
     const misses = [];
     let actualIndex = 0;
     for (let i = 0; i < expected.length; i++) {
-      const expectedTool = expected[i].tool;
+      const expectedItem = expected[i];
+      const expectedTool = expectedItem.tool;
       let found = false;
+      let argsMismatch = false;
       while (actualIndex < toolCalls.length) {
-        if (toolCalls[actualIndex].name === expectedTool) {
-          hits.push(`Found ${expectedTool} at position ${actualIndex}`);
+        const actualCall = toolCalls[actualIndex];
+        if (actualCall.name === expectedTool) {
+          if (argsMatch(expectedItem.args, actualCall.args)) {
+            hits.push(`Found ${expectedTool} at position ${actualIndex}`);
+            actualIndex++;
+            found = true;
+            break;
+          }
+          misses.push(
+            `Expected ${expectedTool} at position ${i}: tool found at ${actualIndex} but args mismatch`
+          );
           actualIndex++;
-          found = true;
+          argsMismatch = true;
           break;
         }
         actualIndex++;
       }
-      if (!found) {
+      if (!found && !argsMismatch) {
         misses.push(`Expected ${expectedTool} at position ${i}, not found in remaining trace`);
       }
     }
@@ -38404,10 +39506,16 @@ var ToolTrajectoryEvaluator = class {
     }
     const checkLength = Math.min(expected.length, toolCalls.length);
     for (let i = 0; i < checkLength; i++) {
-      const expectedTool = expected[i].tool;
-      const actualTool = toolCalls[i].name;
+      const expectedItem = expected[i];
+      const expectedTool = expectedItem.tool;
+      const actualCall = toolCalls[i];
+      const actualTool = actualCall.name;
       if (actualTool === expectedTool) {
-        hits.push(`Position ${i}: ${expectedTool} \u2713`);
+        if (argsMatch(expectedItem.args, actualCall.args)) {
+          hits.push(`Position ${i}: ${expectedTool}`);
+        } else {
+          misses.push(`Position ${i}: ${expectedTool} args mismatch`);
+        }
       } else {
         misses.push(`Position ${i}: expected ${expectedTool}, got ${actualTool}`);
       }
@@ -39038,7 +40146,12 @@ async function runBatchEvaluation(options) {
     const promptInputs = promptInputsList[i];
     const providerResponse = batchResponse[i];
     const outputMessages = providerResponse.outputMessages;
-    const traceSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
+    const baseSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
+    const traceSummary = baseSummary ? mergeExecutionMetrics(baseSummary, {
+      tokenUsage: providerResponse.tokenUsage,
+      costUsd: providerResponse.costUsd,
+      durationMs: providerResponse.durationMs
+    }) : void 0;
     const candidate = extractLastAssistantContent(outputMessages);
     let result;
     try {
@@ -39159,7 +40272,12 @@ async function runEvalCase(options) {
     await cache.set(cacheKey, providerResponse);
   }
   const outputMessages = providerResponse.outputMessages;
-  const traceSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
+  const baseSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
+  const traceSummary = baseSummary ? mergeExecutionMetrics(baseSummary, {
+    tokenUsage: providerResponse.tokenUsage,
+    costUsd: providerResponse.costUsd,
+    durationMs: providerResponse.durationMs
+  }) : void 0;
   const candidate = extractLastAssistantContent(outputMessages);
   try {
     return await evaluateCandidate({
@@ -39232,21 +40350,21 @@ async function evaluateCandidate(options) {
   }
   return {
     timestamp: completedAt.toISOString(),
-    eval_id: evalCase.id,
+    evalId: evalCase.id,
     dataset: evalCase.dataset,
-    conversation_id: evalCase.conversation_id,
+    conversationId: evalCase.conversation_id,
     score: score.score,
     hits: score.hits,
     misses: score.misses,
-    candidate_answer: candidate,
+    candidateAnswer: candidate,
     target: target.name,
     reasoning: score.reasoning,
-    raw_aspects: score.rawAspects,
-    agent_provider_request: agentProviderRequest,
-    lm_provider_request: lmProviderRequest,
-    evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
-    evaluator_results: evaluatorResults,
-    trace_summary: traceSummary
+    rawAspects: score.rawAspects,
+    agentProviderRequest,
+    lmProviderRequest,
+    evaluatorProviderRequest: evaluatorResults ? void 0 : score.evaluatorRawRequest,
+    evaluatorResults,
+    traceSummary
   };
 }
 async function runEvaluatorsForCase(options) {
@@ -39344,7 +40462,7 @@ async function runEvaluatorList(options) {
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
-          evaluator_provider_request: score2.evaluatorRawRequest
+          evaluatorProviderRequest: score2.evaluatorRawRequest
         });
       }
       if (evaluator.type === "code") {
@@ -39375,11 +40493,11 @@ async function runEvaluatorList(options) {
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
-          evaluator_provider_request: score2.evaluatorRawRequest
+          evaluatorProviderRequest: score2.evaluatorRawRequest
         });
       }
       if (evaluator.type === "composite") {
-        const evalFileDir = evalCase.guideline_paths[0] ? path122.dirname(evalCase.guideline_paths[0]) : process.cwd();
+        const evalFileDir = evalCase.guideline_paths[0] ? path132.dirname(evalCase.guideline_paths[0]) : process.cwd();
         const createEvaluator = (memberConfig) => {
           switch (memberConfig.type) {
             case "llm_judge":
@@ -39432,8 +40550,8 @@ async function runEvaluatorList(options) {
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
-          evaluator_provider_request: score2.evaluatorRawRequest,
-          evaluator_results: mapChildResults(score2.evaluatorResults)
+          evaluatorProviderRequest: score2.evaluatorRawRequest,
+          evaluatorResults: mapChildResults(score2.evaluatorResults)
         });
       }
       if (evaluator.type === "tool_trajectory") {
@@ -39591,22 +40709,22 @@ function buildEvaluatorRegistry(overrides, resolveJudgeProvider) {
 async function dumpPrompt(directory, evalCase, promptInputs) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
   const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
-  const filePath = path122.resolve(directory, filename);
-  await mkdir22(path122.dirname(filePath), { recursive: true });
+  const filePath = path132.resolve(directory, filename);
+  await mkdir32(path132.dirname(filePath), { recursive: true });
   const payload = {
     eval_id: evalCase.id,
     question: promptInputs.question,
     guidelines: promptInputs.guidelines,
     guideline_paths: evalCase.guideline_paths
   };
-  await writeFile22(filePath, JSON.stringify(payload, null, 2), "utf8");
+  await writeFile32(filePath, JSON.stringify(payload, null, 2), "utf8");
 }
 function sanitizeFilename(value) {
   if (!value) {
     return "prompt";
   }
   const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
-  return sanitized.length > 0 ? sanitized : randomUUID2();
+  return sanitized.length > 0 ? sanitized : randomUUID3();
 }
 async function invokeProvider(provider, options) {
   const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
@@ -39663,17 +40781,17 @@ function buildErrorResult(evalCase, targetName, timestamp, error40, promptInputs
   }
   return {
     timestamp: timestamp.toISOString(),
-    eval_id: evalCase.id,
+    evalId: evalCase.id,
     dataset: evalCase.dataset,
-    conversation_id: evalCase.conversation_id,
+    conversationId: evalCase.conversation_id,
     score: 0,
     hits: [],
     misses: [`Error: ${message}`],
-    candidate_answer: `Error occurred: ${message}`,
+    candidateAnswer: `Error occurred: ${message}`,
     target: targetName,
-    raw_aspects: [],
-    agent_provider_request: agentProviderRequest,
-    lm_provider_request: lmProviderRequest,
+    rawAspects: [],
+    agentProviderRequest,
+    lmProviderRequest,
     error: message
   };
 }
@@ -39718,8 +40836,8 @@ function mapChildResults(children) {
     hits: child.hits,
     misses: child.misses,
     reasoning: child.reasoning,
-    evaluator_provider_request: child.evaluatorRawRequest,
-    evaluator_results: mapChildResults(child.evaluatorResults)
+    evaluatorProviderRequest: child.evaluatorRawRequest,
+    evaluatorResults: mapChildResults(child.evaluatorResults)
   }));
 }
 function computeWeightedMean(entries) {
@@ -39810,10 +40928,10 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
 }
 // src/commands/convert/index.ts
-import { command, option, optional as optional2, positional, string as string4 } from "cmd-ts";
+import { command as command2, option as option2, optional as optional3, positional as positional2, string as string5 } from "cmd-ts";
 import { stringify as stringifyYaml } from "yaml";
 function convertJsonlToYaml(inputPath, outputPath) {
-  const content = readFileSync(inputPath, "utf8");
+  const content = readFileSync2(inputPath, "utf8");
   const lines = content.trim().split("\n").filter((line2) => line2.trim());
   let yamlOutput = "";
   let isFirst = true;
@@ -39831,17 +40949,17 @@ function convertJsonlToYaml(inputPath, outputPath) {
   writeFileSync(outputPath, yamlOutput);
   return lines.length;
 }
-var convertCommand = command({
+var convertCommand = command2({
   name: "convert",
   description: "Convert evaluation results from JSONL to YAML format",
   args: {
-    input: positional({
-      type: string4,
+    input: positional2({
+      type: string5,
       displayName: "input",
       description: "Path to input JSONL file"
     }),
-    out: option({
-      type: optional2(string4),
+    out: option2({
+      type: optional3(string5),
       long: "out",
       short: "o",
       description: "Output file path (defaults to input path with .yaml extension)"
@@ -39867,13 +40985,13 @@ var convertCommand = command({
 import { stat as stat4 } from "node:fs/promises";
 import path21 from "node:path";
 import {
-  command as command2,
+  command as command3,
   flag,
-  number as number4,
-  option as option2,
-  optional as optional3,
+  number as number5,
+  option as option3,
+  optional as optional4,
   restPositionals,
-  string as string5
+  string as string6
 } from "cmd-ts";
 import fg from "fast-glob";
@@ -39955,7 +41073,7 @@ async function loadEnvFromHierarchy(options) {
 }
 // src/commands/eval/jsonl-writer.ts
-import { createWriteStream as createWriteStream2 } from "node:fs";
+import { createWriteStream as createWriteStream3 } from "node:fs";
 import { mkdir as mkdir5 } from "node:fs/promises";
 import path16 from "node:path";
 import { finished } from "node:stream/promises";
@@ -40176,7 +41294,7 @@ var JsonlWriter = class _JsonlWriter {
   }
   static async open(filePath) {
     await mkdir5(path16.dirname(filePath), { recursive: true });
-    const stream = createWriteStream2(filePath, { flags: "w", encoding: "utf8" });
+    const stream = createWriteStream3(filePath, { flags: "w", encoding: "utf8" });
     return new _JsonlWriter(stream);
   }
   async append(record2) {
@@ -40205,7 +41323,7 @@ var JsonlWriter = class _JsonlWriter {
 };
 // src/commands/eval/yaml-writer.ts
-import { createWriteStream as createWriteStream3 } from "node:fs";
+import { createWriteStream as createWriteStream4 } from "node:fs";
 import { mkdir as mkdir6 } from "node:fs/promises";
 import path17 from "node:path";
 import { finished as finished2 } from "node:stream/promises";
@@ -40220,7 +41338,7 @@ var YamlWriter = class _YamlWriter {
   }
   static async open(filePath) {
     await mkdir6(path17.dirname(filePath), { recursive: true });
-    const stream = createWriteStream3(filePath, { flags: "w", encoding: "utf8" });
+    const stream = createWriteStream4(filePath, { flags: "w", encoding: "utf8" });
     return new _YamlWriter(stream);
   }
   async append(record2) {
@@ -40336,7 +41454,7 @@ var ProgressDisplay = class {
         break;
     }
   }
-  addLogPaths(paths) {
+  addLogPaths(paths, provider) {
     const newPaths = [];
     for (const path28 of paths) {
       if (this.logPathSet.has(path28)) {
@@ -40351,7 +41469,8 @@ var ProgressDisplay = class {
     this.logPaths.push(...newPaths);
     if (!this.hasPrintedLogHeader) {
       console.log("");
-      console.log("Codex CLI logs:");
+      const label = provider === "pi" ? "Pi Coding Agent" : "Codex CLI";
+      console.log(`${label} logs:`);
       this.hasPrintedLogHeader = true;
     }
     const startIndex = this.logPaths.length - newPaths.length;
@@ -40419,7 +41538,7 @@ function buildHistogram(values) {
 function calculateEvaluationSummary(results) {
   const scores = results.map((result) => result.score);
   const total = results.length;
-  const errors = results.filter((result) => result.error !== void 0).map((result) => ({ evalId: result.eval_id, error: result.error }));
+  const errors = results.filter((result) => result.error !== void 0).map((result) => ({ evalId: result.evalId, error: result.error }));
   const errorCount = errors.length;
   if (total === 0) {
     return {
@@ -40500,11 +41619,11 @@ function formatEvaluationSummary(summary) {
   }
   lines.push("\nTop performing eval cases:");
   summary.topResults.forEach((result, index) => {
-    lines.push(`  ${index + 1}. ${result.eval_id}: ${formatScore(result.score)}`);
+    lines.push(`  ${index + 1}. ${result.evalId}: ${formatScore(result.score)}`);
   });
   lines.push("\nLowest performing eval cases:");
   summary.bottomResults.forEach((result, index) => {
-    lines.push(`  ${index + 1}. ${result.eval_id}: ${formatScore(result.score)}`);
+    lines.push(`  ${index + 1}. ${result.evalId}: ${formatScore(result.score)}`);
   });
   return lines.join("\n");
 }
@@ -40863,27 +41982,6 @@ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
   "trace"
   // For testing tool_trajectory evaluator
 ]);
-var CLI_SETTINGS = /* @__PURE__ */ new Set([
-  ...COMMON_SETTINGS,
-  "command_template",
-  "commandTemplate",
-  "verbose",
-  "cli_verbose",
-  "cliVerbose",
-  "files_format",
-  "filesFormat",
-  "attachments_format",
-  "attachmentsFormat",
-  "cwd",
-  "env",
-  "timeout_seconds",
-  "timeoutSeconds",
-  "healthcheck",
-  "keep_temp_files",
-  "keepTempFiles",
-  "keep_output_files",
-  "keepOutputFiles"
-]);
 function getKnownSettings(provider) {
   const normalizedProvider = provider.toLowerCase();
   switch (normalizedProvider) {
@@ -40905,7 +42003,7 @@ function getKnownSettings(provider) {
     case "mock":
       return MOCK_SETTINGS;
     case "cli":
-      return CLI_SETTINGS;
+      return null;
     default:
       return null;
   }
@@ -40954,7 +42052,7 @@ async function validateTargetsFile(filePath) {
         severity: "error",
         filePath: absolutePath2,
         location: `${location}.commandTemplate`,
-        message: "CLI provider requires 'commandTemplate' as a non-empty string"
+        message: "CLI provider requires 'command_template' or 'commandTemplate' as a non-empty string"
       });
     } else {
       recordUnknownPlaceholders(
@@ -40964,58 +42062,10 @@ async function validateTargetsFile(filePath) {
         errors2
       );
     }
-    const attachmentsFormat = target.attachments_format ?? target.attachmentsFormat;
-    if (attachmentsFormat !== void 0 && typeof attachmentsFormat !== "string") {
-      errors2.push({
-        severity: "error",
-        filePath: absolutePath2,
-        location: `${location}.attachmentsFormat`,
-        message: "'attachmentsFormat' must be a string when provided"
-      });
-    }
-    const filesFormat = target.files_format ?? target.filesFormat;
-    if (filesFormat !== void 0 && typeof filesFormat !== "string") {
-      errors2.push({
-        severity: "error",
-        filePath: absolutePath2,
-        location: `${location}.filesFormat`,
-        message: "'filesFormat' must be a string when provided"
-      });
-    }
-    const cwd = target.cwd;
-    if (cwd !== void 0 && typeof cwd !== "string") {
-      errors2.push({
-        severity: "error",
-        filePath: absolutePath2,
-        location: `${location}.cwd`,
-        message: "'cwd' must be a string when provided"
-      });
-    }
-    const timeoutSeconds = target.timeout_seconds ?? target.timeoutSeconds;
-    if (timeoutSeconds !== void 0) {
-      const numericTimeout = Number(timeoutSeconds);
-      if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
-        errors2.push({
-          severity: "error",
-          filePath: absolutePath2,
-          location: `${location}.timeoutSeconds`,
-          message: "'timeoutSeconds' must be a positive number when provided"
-        });
-      }
-    }
     const healthcheck = target.healthcheck;
     if (healthcheck !== void 0) {
       validateCliHealthcheck(healthcheck, absolutePath2, `${location}.healthcheck`, errors2);
     }
-    const verbose = target.verbose ?? target.cli_verbose ?? target.cliVerbose;
-    if (verbose !== void 0 && typeof verbose !== "boolean") {
-      errors2.push({
-        severity: "error",
-        filePath: absolutePath2,
-        location: `${location}.verbose`,
-        message: "'verbose' must be a boolean when provided"
-      });
-    }
   }
   function validateCliHealthcheck(healthcheck, absolutePath2, location, errors2) {
     if (!isObject22(healthcheck)) {
@@ -41639,12 +42689,12 @@ function buildDefaultOutputPath(cwd, format) {
   const extension = getDefaultExtension(format);
   return path20.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
 }
-function resolvePromptDirectory(option5, cwd) {
-  if (option5 === void 0) {
+function resolvePromptDirectory(option6, cwd) {
+  if (option6 === void 0) {
     return void 0;
   }
-  if (typeof option5 === "string" && option5.trim().length > 0) {
-    return path20.resolve(cwd, option5);
+  if (typeof option6 === "string" && option6.trim().length > 0) {
+    return path20.resolve(cwd, option6);
   }
   return path20.join(cwd, ".agentv", "prompts");
 }
@@ -41667,7 +42717,7 @@ function createProgressReporter(maxWorkers, options) {
     setTotal: (total) => display.setTotalTests(total),
     update: (workerId, progress) => display.updateWorker({ ...progress, workerId }),
     finish: () => display.finish(),
-    addLogPaths: (paths) => display.addLogPaths(paths)
+    addLogPaths: (paths, provider) => display.addLogPaths(paths, provider)
   };
 }
 function makeEvalKey(testFilePath, evalId) {
@@ -41885,7 +42935,15 @@ async function runEvalCommand(input) {
       return;
     }
     seenCodexLogPaths.add(entry.filePath);
-    progressReporter.addLogPaths([entry.filePath]);
+    progressReporter.addLogPaths([entry.filePath], "codex");
+  });
+  const seenPiLogPaths = /* @__PURE__ */ new Set();
+  const unsubscribePiLogs = subscribeToPiLogEntries((entry) => {
+    if (!entry.filePath || seenPiLogPaths.has(entry.filePath)) {
+      return;
+    }
+    seenPiLogPaths.add(entry.filePath);
+    progressReporter.addLogPaths([entry.filePath], "pi");
   });
   for (const [testFilePath, meta] of fileMetadata.entries()) {
     for (const evalId of meta.evalIds) {
@@ -41939,6 +42997,7 @@ Results written to: ${outputPath}`);
     }
   } finally {
     unsubscribeCodexLogs();
+    unsubscribePiLogs();
     await outputWriter.close().catch(() => void 0);
   }
 }
@@ -41960,44 +43019,44 @@ async function resolveEvaluationRunner() {
 }
 // src/commands/eval/index.ts
-var evalCommand = command2({
+var evalCommand = command3({
   name: "eval",
   description: "Run eval suites and report results",
   args: {
     evalPaths: restPositionals({
-      type: string5,
+      type: string6,
       displayName: "eval-paths",
       description: "Path(s) or glob(s) to evaluation .yaml file(s)"
     }),
-    target: option2({
-      type: string5,
+    target: option3({
+      type: string6,
       long: "target",
       description: "Override target name from targets.yaml",
       defaultValue: () => "default"
     }),
-    targets: option2({
-      type: optional3(string5),
+    targets: option3({
+      type: optional4(string6),
       long: "targets",
       description: "Path to targets.yaml (overrides discovery)"
     }),
-    evalId: option2({
-      type: optional3(string5),
+    evalId: option3({
+      type: optional4(string6),
       long: "eval-id",
       description: "Run only the eval case with this identifier"
     }),
-    workers: option2({
-      type: number4,
+    workers: option3({
+      type: number5,
       long: "workers",
       description: "Number of parallel workers (default: 3, max: 50). Can also be set per-target in targets.yaml",
       defaultValue: () => 3
     }),
-    out: option2({
-      type: optional3(string5),
+    out: option3({
+      type: optional4(string6),
       long: "out",
       description: "Write results to the specified path"
     }),
-    outputFormat: option2({
-      type: string5,
+    outputFormat: option3({
+      type: string6,
       long: "output-format",
       description: "Output format: 'jsonl' or 'yaml' (default: jsonl)",
       defaultValue: () => "jsonl"
@@ -42006,32 +43065,32 @@ var evalCommand = command2({
       long: "dry-run",
       description: "Use mock provider responses instead of real LLM calls"
     }),
-    dryRunDelay: option2({
-      type: number4,
+    dryRunDelay: option3({
+      type: number5,
       long: "dry-run-delay",
       description: "Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
       defaultValue: () => 0
     }),
-    dryRunDelayMin: option2({
-      type: number4,
+    dryRunDelayMin: option3({
+      type: number5,
       long: "dry-run-delay-min",
       description: "Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
       defaultValue: () => 0
     }),
-    dryRunDelayMax: option2({
-      type: number4,
+    dryRunDelayMax: option3({
+      type: number5,
       long: "dry-run-delay-max",
       description: "Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
       defaultValue: () => 0
     }),
-    agentTimeout: option2({
-      type: number4,
+    agentTimeout: option3({
+      type: number5,
       long: "agent-timeout",
       description: "Timeout in seconds for provider responses (default: 120)",
       defaultValue: () => 120
     }),
-    maxRetries: option2({
-      type: number4,
+    maxRetries: option3({
+      type: number5,
       long: "max-retries",
       description: "Retry count for timeout recoveries (default: 2)",
       defaultValue: () => 2
@@ -42044,8 +43103,8 @@ var evalCommand = command2({
       long: "verbose",
       description: "Enable verbose logging"
     }),
-    dumpPrompts: option2({
-      type: optional3(string5),
+    dumpPrompts: option3({
+      type: optional4(string6),
       long: "dump-prompts",
       description: "Directory path for persisting prompt payloads for debugging"
     }),
@@ -42131,7 +43190,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
 }
 // src/commands/generate/index.ts
-import { command as command3, flag as flag2, option as option3, optional as optional4, positional as positional3, string as string6, subcommands } from "cmd-ts";
+import { command as command4, flag as flag2, option as option4, optional as optional5, positional as positional4, string as string7, subcommands } from "cmd-ts";
 // src/commands/generate/rubrics.ts
 import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
@@ -42274,17 +43333,17 @@ function extractQuestion(evalCase) {
 }
 // src/commands/generate/index.ts
-var rubricsCommand = command3({
+var rubricsCommand = command4({
   name: "rubrics",
   description: "Generate rubrics from expected_outcome in YAML eval file",
   args: {
-    file: positional3({
-      type: string6,
+    file: positional4({
+      type: string7,
       displayName: "file",
       description: "Path to YAML eval file"
     }),
-    target: option3({
-      type: optional4(string6),
+    target: option4({
+      type: optional5(string7),
       long: "target",
       short: "t",
       description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
@@ -42320,10 +43379,10 @@ var generateCommand = subcommands({
 import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
 import path26 from "node:path";
 import * as readline from "node:readline/promises";
-import { command as command4, option as option4, optional as optional5, string as string7 } from "cmd-ts";
+import { command as command5, option as option5, optional as optional6, string as string8 } from "cmd-ts";
 // src/templates/index.ts
-import { readFileSync as readFileSync2, readdirSync, statSync } from "node:fs";
+import { readFileSync as readFileSync3, readdirSync, statSync } from "node:fs";
 import path25 from "node:path";
 import { fileURLToPath } from "node:url";
 function getGithubTemplates() {
@@ -42355,7 +43414,7 @@ function readTemplatesRecursively(dir, relativePath) {
     if (stat6.isDirectory()) {
       templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
     } else {
-      const content = readFileSync2(fullPath, "utf-8");
+      const content = readFileSync3(fullPath, "utf-8");
       templates.push({
         path: entryRelativePath.split(path25.sep).join("/"),
         // Normalize to forward slashes
@@ -42499,12 +43558,12 @@ Files installed to ${path26.relative(targetPath, claudeDir)}:`);
   console.log("  2. Configure targets in .agentv/targets.yaml");
   console.log("  3. Create eval files using the schema and prompt templates");
 }
-var initCmdTsCommand = command4({
+var initCmdTsCommand = command5({
   name: "init",
   description: "Initialize AgentV in your project (installs prompt templates and schema to .github)",
   args: {
-    path: option4({
-      type: optional5(string7),
+    path: option5({
+      type: optional6(string8),
       long: "path",
       description: "Target directory for initialization (default: current directory)"
     })
@@ -42520,7 +43579,7 @@ var initCmdTsCommand = command4({
 });
 // src/commands/validate/index.ts
-import { command as command5, restPositionals as restPositionals2, string as string8 } from "cmd-ts";
+import { command as command6, restPositionals as restPositionals2, string as string9 } from "cmd-ts";
 // src/commands/validate/format-output.ts
 var ANSI_RED3 = "\x1B[31m";
@@ -42706,12 +43765,12 @@ async function runValidateCommand(paths) {
     process.exit(1);
   }
 }
-var validateCommand = command5({
+var validateCommand = command6({
   name: "validate",
   description: "Validate AgentV eval and targets YAML files",
   args: {
     paths: restPositionals2({
-      type: string8,
+      type: string9,
       displayName: "paths",
       description: "Files or directories to validate"
     })
@@ -42727,12 +43786,13 @@ var validateCommand = command5({
 });
 // src/index.ts
-var packageJson = JSON.parse(readFileSync3(new URL("../package.json", import.meta.url), "utf8"));
+var packageJson = JSON.parse(readFileSync4(new URL("../package.json", import.meta.url), "utf8"));
 var app = subcommands2({
   name: "agentv",
   description: "AgentV CLI",
   version: packageJson.version,
   cmds: {
+    compare: compareCommand,
     convert: convertCommand,
     eval: evalCommand,
     generate: generateCommand,
@@ -42748,4 +43808,4 @@ export {
   app,
   runCli
 };
-//# sourceMappingURL=chunk-3RYQPI4H.js.map
+//# sourceMappingURL=chunk-HU4B6ODF.js.map