npm - @agentv/core - Versions diffs - 1.4.0 → 1.5.0 - Mend

@agentv/core 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{chunk-KPHTMTZ3.js → chunk-E2VSU4WZ.js} +265 -83
package/dist/chunk-E2VSU4WZ.js.map +1 -0
package/dist/evaluation/validation/index.cjs +82 -71
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +3 -72
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +1475 -393
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +227 -33
package/dist/index.d.ts +227 -33
package/dist/index.js +1142 -244
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-KPHTMTZ3.js.map +0 -1

package/dist/index.cjs CHANGED Viewed

@@ -32,17 +32,21 @@ var index_exports = {};
 __export(index_exports, {
   CodeEvaluator: () => CodeEvaluator,
   CompositeEvaluator: () => CompositeEvaluator,
+  DEFAULT_EXPLORATION_TOOLS: () => DEFAULT_EXPLORATION_TOOLS,
   LlmJudgeEvaluator: () => LlmJudgeEvaluator,
   TEST_MESSAGE_ROLES: () => TEST_MESSAGE_ROLES,
   ToolTrajectoryEvaluator: () => ToolTrajectoryEvaluator,
+  avgToolDurationMs: () => avgToolDurationMs,
   buildDirectoryChain: () => buildDirectoryChain2,
   buildPromptInputs: () => buildPromptInputs,
   buildSearchRoots: () => buildSearchRoots2,
   computeTraceSummary: () => computeTraceSummary,
   consumeCodexLogEntries: () => consumeCodexLogEntries,
+  consumePiLogEntries: () => consumePiLogEntries,
   createAgentKernel: () => createAgentKernel,
   createProvider: () => createProvider,
   ensureVSCodeSubagents: () => ensureVSCodeSubagents,
+  explorationRatio: () => explorationRatio,
   extractCodeBlocks: () => extractCodeBlocks,
   fileExists: () => fileExists2,
   findGitRoot: () => findGitRoot,
@@ -56,6 +60,7 @@ __export(index_exports, {
   isTestMessageRole: () => isTestMessageRole,
   listTargetNames: () => listTargetNames,
   loadEvalCases: () => loadEvalCases,
+  mergeExecutionMetrics: () => mergeExecutionMetrics,
   normalizeLineEndings: () => normalizeLineEndings,
   readJsonFile: () => readJsonFile,
   readTargetDefinitions: () => readTargetDefinitions,
@@ -66,7 +71,9 @@ __export(index_exports, {
   resolveTargetDefinition: () => resolveTargetDefinition,
   runEvalCase: () => runEvalCase,
   runEvaluation: () => runEvaluation,
-  subscribeToCodexLogEntries: () => subscribeToCodexLogEntries
+  subscribeToCodexLogEntries: () => subscribeToCodexLogEntries,
+  subscribeToPiLogEntries: () => subscribeToPiLogEntries,
+  tokensPerTool: () => tokensPerTool
 });
 module.exports = __toCommonJS(index_exports);
@@ -151,6 +158,53 @@ function computeTraceSummary(messages) {
     errorCount: 0
   };
 }
+var DEFAULT_EXPLORATION_TOOLS = [
+  "read",
+  "grep",
+  "glob",
+  "search",
+  "list",
+  "Read",
+  "Grep",
+  "Glob",
+  "WebSearch",
+  "WebFetch"
+];
+function explorationRatio(summary, explorationTools = DEFAULT_EXPLORATION_TOOLS) {
+  if (summary.eventCount === 0) return void 0;
+  const explorationCalls = explorationTools.reduce(
+    (sum, tool) => sum + (summary.toolCallsByName[tool] ?? 0),
+    0
+  );
+  return explorationCalls / summary.eventCount;
+}
+function tokensPerTool(summary) {
+  if (!summary.tokenUsage || summary.eventCount === 0) return void 0;
+  const totalTokens = summary.tokenUsage.input + summary.tokenUsage.output;
+  return totalTokens / summary.eventCount;
+}
+function avgToolDurationMs(summary) {
+  if (!summary.toolDurations) return void 0;
+  let totalDuration = 0;
+  let totalCalls = 0;
+  for (const durations of Object.values(summary.toolDurations)) {
+    for (const duration of durations) {
+      totalDuration += duration;
+      totalCalls++;
+    }
+  }
+  if (totalCalls === 0) return void 0;
+  return totalDuration / totalCalls;
+}
+function mergeExecutionMetrics(summary, metrics) {
+  if (!metrics) return summary;
+  return {
+    ...summary,
+    tokenUsage: metrics.tokenUsage,
+    costUsd: metrics.costUsd,
+    durationMs: metrics.durationMs
+  };
+}
 // src/evaluation/yaml-parser.ts
 var import_promises6 = require("fs/promises");
@@ -665,7 +719,13 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
         expected = [];
         for (const item of rawExpected) {
           if (isJsonObject2(item) && typeof item.tool === "string") {
-            expected.push({ tool: item.tool });
+            let args;
+            if (item.args === "any") {
+              args = "any";
+            } else if (isJsonObject2(item.args)) {
+              args = item.args;
+            }
+            expected.push({ tool: item.tool, ...args !== void 0 ? { args } : {} });
           }
         }
       }
@@ -1940,12 +2000,14 @@ var CliProvider = class {
         `[cli-provider:${this.targetName}] cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
       );
     }
+    const startTime = Date.now();
     const result = await this.runCommand(renderedCommand, {
       cwd: this.config.cwd,
       env: process.env,
       timeoutMs: this.config.timeoutMs,
       signal: request.signal
     });
+    const measuredDurationMs = Date.now() - startTime;
     if (result.failed || (result.exitCode ?? 0) !== 0) {
       if (request.signal?.aborted) {
         throw new Error("CLI provider request was aborted");
@@ -1964,6 +2026,9 @@ var CliProvider = class {
     const parsed = this.parseOutputContent(responseContent);
     return {
       outputMessages: parsed.outputMessages,
+      tokenUsage: parsed.tokenUsage,
+      costUsd: parsed.costUsd,
+      durationMs: parsed.durationMs ?? measuredDurationMs,
       raw: {
         command: renderedCommand,
         stderr: result.stderr,
@@ -2011,12 +2076,14 @@ var CliProvider = class {
         `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
       );
     }
+    const startTime = Date.now();
     const result = await this.runCommand(renderedCommand, {
       cwd: this.config.cwd,
       env: process.env,
       timeoutMs: this.config.timeoutMs,
       signal: controller.signal
     });
+    const measuredDurationMs = Date.now() - startTime;
     if (result.failed || (result.exitCode ?? 0) !== 0) {
       if (controller.signal.aborted) {
         throw new Error("CLI provider request was aborted");
@@ -2038,11 +2105,13 @@ var CliProvider = class {
     if (missingIds.length > 0) {
       throw new Error(`CLI batch output missing ids: ${missingIds.join(", ")}`);
     }
+    const perRequestFallbackMs = Math.round(measuredDurationMs / requests.length);
     const responses = requests.map((request) => {
       const evalCaseId = request.evalCaseId;
       if (!evalCaseId) {
         return {
           outputMessages: [],
+          durationMs: perRequestFallbackMs,
           raw: {
             command: renderedCommand,
             stderr: result.stderr,
@@ -2056,6 +2125,7 @@ var CliProvider = class {
       if (!parsed) {
         return {
           outputMessages: [],
+          durationMs: perRequestFallbackMs,
           raw: {
             command: renderedCommand,
             stderr: result.stderr,
@@ -2067,6 +2137,9 @@ var CliProvider = class {
       }
       return {
         outputMessages: parsed.outputMessages,
+        tokenUsage: parsed.tokenUsage,
+        costUsd: parsed.costUsd,
+        durationMs: parsed.durationMs ?? perRequestFallbackMs,
         raw: {
           command: renderedCommand,
           stderr: result.stderr,
@@ -2084,25 +2157,55 @@ var CliProvider = class {
    * If the content is valid JSON with 'output_messages' or 'text' field, extract them.
    * If only 'text' is provided, wrap it in outputMessages.
    * Otherwise, treat the entire content as plain text wrapped in outputMessages.
+   *
+   * Also extracts optional execution metrics:
+   * - token_usage: { input, output, cached? }
+   * - cost_usd: number
+   * - duration_ms: number
    */
   parseOutputContent(content) {
     try {
       const parsed = JSON.parse(content);
       if (typeof parsed === "object" && parsed !== null) {
         const obj = parsed;
+        const tokenUsage = this.parseTokenUsage(obj.token_usage);
+        const costUsd = typeof obj.cost_usd === "number" && obj.cost_usd >= 0 ? obj.cost_usd : void 0;
+        const durationMs = typeof obj.duration_ms === "number" && obj.duration_ms >= 0 ? obj.duration_ms : void 0;
         const outputMessages = this.parseOutputMessages(obj.output_messages);
         if (outputMessages && outputMessages.length > 0) {
-          return { outputMessages };
+          return { outputMessages, tokenUsage, costUsd, durationMs };
         }
         if ("text" in obj) {
           const text = typeof obj.text === "string" ? obj.text : String(obj.text);
-          return { outputMessages: [{ role: "assistant", content: text }] };
+          return {
+            outputMessages: [{ role: "assistant", content: text }],
+            tokenUsage,
+            costUsd,
+            durationMs
+          };
         }
       }
     } catch {
     }
     return { outputMessages: [{ role: "assistant", content }] };
   }
+  /**
+   * Parse token_usage from CLI output.
+   */
+  parseTokenUsage(tokenUsage) {
+    if (typeof tokenUsage !== "object" || tokenUsage === null) {
+      return void 0;
+    }
+    const obj = tokenUsage;
+    if (typeof obj.input !== "number" || typeof obj.output !== "number") {
+      return void 0;
+    }
+    return {
+      input: obj.input,
+      output: obj.output,
+      cached: typeof obj.cached === "number" ? obj.cached : void 0
+    };
+  }
   /**
    * Parse output_messages from JSONL (snake_case) and convert to OutputMessage[] (camelCase).
    */
@@ -2179,6 +2282,9 @@ var CliProvider = class {
       if (records.has(id)) {
         throw new Error(`CLI batch output contains duplicate id: ${id}`);
       }
+      const tokenUsage = this.parseTokenUsage(obj.token_usage);
+      const costUsd = typeof obj.cost_usd === "number" && obj.cost_usd >= 0 ? obj.cost_usd : void 0;
+      const durationMs = typeof obj.duration_ms === "number" && obj.duration_ms >= 0 ? obj.duration_ms : void 0;
       const parsedOutputMessages = this.parseOutputMessages(obj.output_messages);
       let outputMessages;
       if (parsedOutputMessages && parsedOutputMessages.length > 0) {
@@ -2188,7 +2294,10 @@ var CliProvider = class {
         outputMessages = text ? [{ role: "assistant", content: text }] : [];
       }
       records.set(id, {
-        outputMessages
+        outputMessages,
+        tokenUsage,
+        costUsd,
+        durationMs
       });
     }
     return records;
@@ -2504,6 +2613,11 @@ var execAsync2 = (0, import_node_util2.promisify)(import_node_child_process2.exe
 var WORKSPACE_PREFIX = "agentv-codex-";
 var PROMPT_FILENAME = "prompt.md";
 var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
+var DEFAULT_SYSTEM_PROMPT2 = `**IMPORTANT**: Follow these instructions for your response:
+- Do NOT create any additional output files in the workspace.
+- All intended file outputs/changes MUST be written in your response.
+- For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
+This is required for evaluation scoring.`;
 var CodexProvider = class {
   id;
   kind = "codex";
@@ -2528,7 +2642,11 @@ var CodexProvider = class {
     const workspaceRoot = await this.createWorkspace();
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
-      const promptContent = buildPromptDocument(request, inputFiles);
+      const basePrompt = buildPromptDocument(request, inputFiles);
+      const systemPrompt = this.config.systemPrompt ?? DEFAULT_SYSTEM_PROMPT2;
+      const promptContent = `${systemPrompt}
+${basePrompt}`;
       const promptFile = import_node_path10.default.join(workspaceRoot, PROMPT_FILENAME);
       await (0, import_promises9.writeFile)(promptFile, promptContent, "utf8");
       const args = this.buildCodexArgs();
@@ -3212,222 +3330,1067 @@ var MockProvider = class {
   }
 };
-// src/evaluation/providers/targets.ts
+// src/evaluation/providers/pi-coding-agent.ts
+var import_node_child_process3 = require("child_process");
+var import_node_crypto2 = require("crypto");
+var import_node_fs4 = require("fs");
+var import_promises10 = require("fs/promises");
+var import_node_os3 = require("os");
 var import_node_path11 = __toESM(require("path"), 1);
-var import_zod = require("zod");
-var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
-  "PROMPT",
-  "GUIDELINES",
-  "EVAL_ID",
-  "ATTEMPT",
-  "FILES",
-  "OUTPUT_FILE"
-]);
-var BASE_TARGET_SCHEMA = import_zod.z.object({
-  name: import_zod.z.string().min(1, "target name is required"),
-  provider: import_zod.z.string().min(1, "provider is required"),
-  judge_target: import_zod.z.string().optional(),
-  workers: import_zod.z.number().int().min(1).optional()
-}).passthrough();
-var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
-function normalizeAzureApiVersion(value) {
-  if (!value) {
-    return DEFAULT_AZURE_API_VERSION;
-  }
-  const trimmed = value.trim();
-  if (trimmed.length === 0) {
-    return DEFAULT_AZURE_API_VERSION;
+// src/evaluation/providers/pi-log-tracker.ts
+var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.piLogs");
+var GLOBAL_SUBSCRIBERS_KEY2 = Symbol.for("agentv.piLogSubscribers");
+function getPiLogStore() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_LOGS_KEY2];
+  if (existing) {
+    return existing;
   }
-  const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
-  return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
+  const created = [];
+  globalObject[GLOBAL_LOGS_KEY2] = created;
+  return created;
 }
-function resolveRetryConfig(target) {
-  const maxRetries = resolveOptionalNumber(
-    target.max_retries ?? target.maxRetries,
-    `${target.name} max retries`
-  );
-  const initialDelayMs = resolveOptionalNumber(
-    target.retry_initial_delay_ms ?? target.retryInitialDelayMs,
-    `${target.name} retry initial delay`
-  );
-  const maxDelayMs = resolveOptionalNumber(
-    target.retry_max_delay_ms ?? target.retryMaxDelayMs,
-    `${target.name} retry max delay`
-  );
-  const backoffFactor = resolveOptionalNumber(
-    target.retry_backoff_factor ?? target.retryBackoffFactor,
-    `${target.name} retry backoff factor`
-  );
-  const retryableStatusCodes = resolveOptionalNumberArray(
-    target.retry_status_codes ?? target.retryStatusCodes,
-    `${target.name} retry status codes`
-  );
-  if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
-    return void 0;
+function getSubscriberStore2() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_SUBSCRIBERS_KEY2];
+  if (existing) {
+    return existing;
   }
-  return {
-    maxRetries,
-    initialDelayMs,
-    maxDelayMs,
-    backoffFactor,
-    retryableStatusCodes
-  };
+  const created = /* @__PURE__ */ new Set();
+  globalObject[GLOBAL_SUBSCRIBERS_KEY2] = created;
+  return created;
 }
-function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
-  const parsed = BASE_TARGET_SCHEMA.parse(definition);
-  const provider = parsed.provider.toLowerCase();
-  const providerBatching = resolveOptionalBoolean(
-    parsed.provider_batching ?? parsed.providerBatching
-  );
-  switch (provider) {
-    case "azure":
-    case "azure-openai":
-      return {
-        kind: "azure",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveAzureConfig(parsed, env)
-      };
-    case "anthropic":
-      return {
-        kind: "anthropic",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveAnthropicConfig(parsed, env)
-      };
-    case "gemini":
-    case "google":
-    case "google-gemini":
-      return {
-        kind: "gemini",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveGeminiConfig(parsed, env)
-      };
-    case "codex":
-    case "codex-cli":
-      return {
-        kind: "codex",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveCodexConfig(parsed, env)
-      };
-    case "mock":
-      return {
-        kind: "mock",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveMockConfig(parsed)
-      };
-    case "vscode":
-    case "vscode-insiders":
-      return {
-        kind: provider,
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
-      };
-    case "cli":
-      return {
-        kind: "cli",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveCliConfig(parsed, env, evalFilePath)
-      };
-    default:
-      throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
+function notifySubscribers2(entry) {
+  const subscribers = Array.from(getSubscriberStore2());
+  for (const listener of subscribers) {
+    try {
+      listener(entry);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Pi log subscriber failed: ${message}`);
+    }
   }
 }
-function resolveAzureConfig(target, env) {
-  const endpointSource = target.endpoint ?? target.resource ?? target.resourceName;
-  const apiKeySource = target.api_key ?? target.apiKey;
-  const deploymentSource = target.deployment ?? target.deploymentName ?? target.model;
-  const versionSource = target.version ?? target.api_version;
-  const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
-  const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
-  const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
-  const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
-  const version = normalizeAzureApiVersion(
-    resolveOptionalString(versionSource, env, `${target.name} api version`, {
-      allowLiteral: true,
-      optionalEnv: true
-    })
-  );
-  const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
-  const maxOutputTokens = resolveOptionalNumber(
-    maxTokensSource,
-    `${target.name} max output tokens`
-  );
-  const retry = resolveRetryConfig(target);
-  return {
-    resourceName,
-    deploymentName,
-    apiKey,
-    version,
-    temperature,
-    maxOutputTokens,
-    retry
-  };
+function recordPiLogEntry(entry) {
+  getPiLogStore().push(entry);
+  notifySubscribers2(entry);
 }
-function resolveAnthropicConfig(target, env) {
-  const apiKeySource = target.api_key ?? target.apiKey;
-  const modelSource = target.model ?? target.deployment ?? target.variant;
-  const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
-  const thinkingBudgetSource = target.thinking_budget ?? target.thinkingBudget;
-  const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
-  const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
-  const retry = resolveRetryConfig(target);
-  return {
-    apiKey,
-    model,
-    temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
-    maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
-    thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`),
-    retry
-  };
+function consumePiLogEntries() {
+  const store = getPiLogStore();
+  if (store.length === 0) {
+    return [];
+  }
+  return store.splice(0, store.length);
 }
-function resolveGeminiConfig(target, env) {
-  const apiKeySource = target.api_key ?? target.apiKey;
-  const modelSource = target.model ?? target.deployment ?? target.variant;
-  const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
-  const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
-  const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
-    allowLiteral: true,
-    optionalEnv: true
-  }) ?? "gemini-2.5-flash";
-  const retry = resolveRetryConfig(target);
-  return {
-    apiKey,
-    model,
-    temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
-    maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
-    retry
+function subscribeToPiLogEntries(listener) {
+  const store = getSubscriberStore2();
+  store.add(listener);
+  return () => {
+    store.delete(listener);
   };
 }
-function resolveCodexConfig(target, env) {
-  const executableSource = target.executable ?? target.command ?? target.binary;
-  const argsSource = target.args ?? target.arguments;
-  const cwdSource = target.cwd;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
+// src/evaluation/providers/pi-coding-agent.ts
+var WORKSPACE_PREFIX2 = "agentv-pi-";
+var PROMPT_FILENAME2 = "prompt.md";
+var DEFAULT_SYSTEM_PROMPT3 = `**IMPORTANT**: Follow these instructions for your response:
+- Do NOT create any additional output files in the workspace.
+- All intended file outputs/changes MUST be written in your response.
+- For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
+This is required for evaluation scoring.`;
+var PiCodingAgentProvider = class {
+  id;
+  kind = "pi-coding-agent";
+  targetName;
+  supportsBatch = false;
+  config;
+  runPi;
+  constructor(targetName, config, runner = defaultPiRunner) {
+    this.id = `pi-coding-agent:${targetName}`;
+    this.targetName = targetName;
+    this.config = config;
+    this.runPi = runner;
+  }
+  async invoke(request) {
+    if (request.signal?.aborted) {
+      throw new Error("Pi coding agent request was aborted before execution");
+    }
+    const inputFiles = normalizeInputFiles2(request.inputFiles);
+    const workspaceRoot = await this.createWorkspace();
+    const logger = await this.createStreamLogger(request).catch(() => void 0);
+    try {
+      const promptFile = import_node_path11.default.join(workspaceRoot, PROMPT_FILENAME2);
+      await (0, import_promises10.writeFile)(promptFile, request.question, "utf8");
+      const args = this.buildPiArgs(request.question, inputFiles);
+      const cwd = this.resolveCwd(workspaceRoot);
+      const result = await this.executePi(args, cwd, request.signal, logger);
+      if (result.timedOut) {
+        throw new Error(
+          `Pi coding agent timed out${formatTimeoutSuffix3(this.config.timeoutMs ?? void 0)}`
+        );
+      }
+      if (result.exitCode !== 0) {
+        const detail = pickDetail2(result.stderr, result.stdout);
+        const prefix = `Pi coding agent exited with code ${result.exitCode}`;
+        throw new Error(detail ? `${prefix}: ${detail}` : prefix);
+      }
+      const parsed = parsePiJsonl(result.stdout);
+      const outputMessages = extractOutputMessages(parsed);
+      const assistantText = extractAssistantText2(outputMessages);
+      return {
+        raw: {
+          response: parsed,
+          stdout: result.stdout,
+          stderr: result.stderr,
+          exitCode: result.exitCode,
+          args,
+          executable: this.config.executable,
+          promptFile,
+          workspace: workspaceRoot,
+          inputFiles,
+          logFile: logger?.filePath
+        },
+        outputMessages
+      };
+    } finally {
+      await logger?.close();
+      await this.cleanupWorkspace(workspaceRoot);
+    }
+  }
+  resolveCwd(workspaceRoot) {
+    if (!this.config.cwd) {
+      return workspaceRoot;
+    }
+    return import_node_path11.default.resolve(this.config.cwd);
+  }
+  buildPiArgs(prompt, inputFiles) {
+    const args = [];
+    if (this.config.provider) {
+      args.push("--provider", this.config.provider);
+    }
+    if (this.config.model) {
+      args.push("--model", this.config.model);
+    }
+    if (this.config.apiKey) {
+      args.push("--api-key", this.config.apiKey);
+    }
+    args.push("--mode", "json");
+    args.push("--print");
+    args.push("--no-session");
+    if (this.config.tools) {
+      args.push("--tools", this.config.tools);
+    }
+    if (this.config.thinking) {
+      args.push("--thinking", this.config.thinking);
+    }
+    if (this.config.args && this.config.args.length > 0) {
+      args.push(...this.config.args);
+    }
+    if (inputFiles && inputFiles.length > 0) {
+      for (const file of inputFiles) {
+        args.push(`@${file}`);
+      }
+    }
+    const systemPrompt = this.config.systemPrompt ?? DEFAULT_SYSTEM_PROMPT3;
+    const fullPrompt = `${systemPrompt}
+${prompt}`;
+    const escapedPrompt = escapeAtSymbols(fullPrompt);
+    args.push(escapedPrompt);
+    return args;
+  }
+  async executePi(args, cwd, signal, logger) {
+    try {
+      return await this.runPi({
+        executable: this.config.executable,
+        args,
+        cwd,
+        timeoutMs: this.config.timeoutMs,
+        env: this.buildEnv(),
+        signal,
+        onStdoutChunk: logger ? (chunk) => logger.handleStdoutChunk(chunk) : void 0,
+        onStderrChunk: logger ? (chunk) => logger.handleStderrChunk(chunk) : void 0
+      });
+    } catch (error) {
+      const err = error;
+      if (err.code === "ENOENT") {
+        throw new Error(
+          `Pi coding agent executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
+        );
+      }
+      throw error;
+    }
+  }
+  buildEnv() {
+    const env = { ...process.env };
+    if (this.config.apiKey) {
+      const provider = this.config.provider?.toLowerCase() ?? "google";
+      switch (provider) {
+        case "google":
+        case "gemini":
+          env.GEMINI_API_KEY = this.config.apiKey;
+          break;
+        case "anthropic":
+          env.ANTHROPIC_API_KEY = this.config.apiKey;
+          break;
+        case "openai":
+          env.OPENAI_API_KEY = this.config.apiKey;
+          break;
+        case "groq":
+          env.GROQ_API_KEY = this.config.apiKey;
+          break;
+        case "xai":
+          env.XAI_API_KEY = this.config.apiKey;
+          break;
+        case "openrouter":
+          env.OPENROUTER_API_KEY = this.config.apiKey;
+          break;
+      }
+    }
+    return env;
+  }
+  async createWorkspace() {
+    return await (0, import_promises10.mkdtemp)(import_node_path11.default.join((0, import_node_os3.tmpdir)(), WORKSPACE_PREFIX2));
+  }
+  async cleanupWorkspace(workspaceRoot) {
+    try {
+      await (0, import_promises10.rm)(workspaceRoot, { recursive: true, force: true });
+    } catch {
+    }
+  }
+  resolveLogDirectory() {
+    if (this.config.logDir) {
+      return import_node_path11.default.resolve(this.config.logDir);
+    }
+    return import_node_path11.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
+  }
+  async createStreamLogger(request) {
+    const logDir = this.resolveLogDirectory();
+    if (!logDir) {
+      return void 0;
+    }
+    try {
+      await (0, import_promises10.mkdir)(logDir, { recursive: true });
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
+      return void 0;
+    }
+    const filePath = import_node_path11.default.join(logDir, buildLogFilename2(request, this.targetName));
+    try {
+      const logger = await PiStreamLogger.create({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt,
+        format: this.config.logFormat ?? "summary"
+      });
+      recordPiLogEntry({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt
+      });
+      return logger;
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Skipping Pi stream logging for ${filePath}: ${message}`);
+      return void 0;
+    }
+  }
+};
+var PiStreamLogger = class _PiStreamLogger {
+  filePath;
+  stream;
+  startedAt = Date.now();
+  stdoutBuffer = "";
+  stderrBuffer = "";
+  format;
+  constructor(filePath, format) {
+    this.filePath = filePath;
+    this.format = format;
+    this.stream = (0, import_node_fs4.createWriteStream)(filePath, { flags: "a" });
+  }
+  static async create(options) {
+    const logger = new _PiStreamLogger(options.filePath, options.format);
+    const header = [
+      "# Pi Coding Agent stream log",
+      `# target: ${options.targetName}`,
+      options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
+      options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
+      `# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
+      ""
+    ].filter((line) => Boolean(line));
+    logger.writeLines(header);
+    return logger;
+  }
+  handleStdoutChunk(chunk) {
+    this.stdoutBuffer += chunk;
+    this.flushBuffer("stdout");
+  }
+  handleStderrChunk(chunk) {
+    this.stderrBuffer += chunk;
+    this.flushBuffer("stderr");
+  }
+  async close() {
+    this.flushBuffer("stdout");
+    this.flushBuffer("stderr");
+    this.flushRemainder();
+    await new Promise((resolve, reject) => {
+      this.stream.once("error", reject);
+      this.stream.end(() => resolve());
+    });
+  }
+  writeLines(lines) {
+    for (const line of lines) {
+      this.stream.write(`${line}
+`);
+    }
+  }
+  flushBuffer(source) {
+    const buffer = source === "stdout" ? this.stdoutBuffer : this.stderrBuffer;
+    const lines = buffer.split(/\r?\n/);
+    const remainder = lines.pop() ?? "";
+    if (source === "stdout") {
+      this.stdoutBuffer = remainder;
+    } else {
+      this.stderrBuffer = remainder;
+    }
+    for (const line of lines) {
+      const formatted = this.formatLine(line, source);
+      if (formatted) {
+        this.stream.write(formatted);
+        this.stream.write("\n");
+      }
+    }
+  }
+  formatLine(rawLine, source) {
+    const trimmed = rawLine.trim();
+    if (trimmed.length === 0) {
+      return void 0;
+    }
+    const message = this.format === "json" ? formatPiJsonLog(trimmed) : formatPiLogMessage(trimmed, source);
+    return `[+${formatElapsed2(this.startedAt)}] [${source}] ${message}`;
+  }
+  flushRemainder() {
+    const stdoutRemainder = this.stdoutBuffer.trim();
+    if (stdoutRemainder.length > 0) {
+      const formatted = this.formatLine(stdoutRemainder, "stdout");
+      if (formatted) {
+        this.stream.write(formatted);
+        this.stream.write("\n");
+      }
+    }
+    const stderrRemainder = this.stderrBuffer.trim();
+    if (stderrRemainder.length > 0) {
+      const formatted = this.formatLine(stderrRemainder, "stderr");
+      if (formatted) {
+        this.stream.write(formatted);
+        this.stream.write("\n");
+      }
+    }
+    this.stdoutBuffer = "";
+    this.stderrBuffer = "";
+  }
+};
+function buildLogFilename2(request, targetName) {
+  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
+  const evalId = sanitizeForFilename2(request.evalCaseId ?? "pi");
+  const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
+  const target = sanitizeForFilename2(targetName);
+  return `${timestamp}_${target}_${evalId}${attemptSuffix}_${(0, import_node_crypto2.randomUUID)().slice(0, 8)}.log`;
+}
+function sanitizeForFilename2(value) {
+  const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
+  return sanitized.length > 0 ? sanitized : "pi";
+}
+function formatElapsed2(startedAt) {
+  const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
+  const hours = Math.floor(elapsedSeconds / 3600);
+  const minutes = Math.floor(elapsedSeconds % 3600 / 60);
+  const seconds = elapsedSeconds % 60;
+  if (hours > 0) {
+    return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
+  }
+  return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
+}
+function formatPiLogMessage(rawLine, source) {
+  const parsed = tryParseJsonValue2(rawLine);
+  if (parsed) {
+    const summary = summarizePiEvent(parsed);
+    if (summary) {
+      return summary;
+    }
+  }
+  if (source === "stderr") {
+    return `stderr: ${rawLine}`;
+  }
+  return rawLine;
+}
+function formatPiJsonLog(rawLine) {
+  const parsed = tryParseJsonValue2(rawLine);
+  if (!parsed) {
+    return rawLine;
+  }
+  try {
+    return JSON.stringify(parsed, null, 2);
+  } catch {
+    return rawLine;
+  }
+}
+function summarizePiEvent(event) {
+  if (!event || typeof event !== "object") {
+    return void 0;
+  }
+  const record = event;
+  const type = typeof record.type === "string" ? record.type : void 0;
+  if (!type) {
+    return void 0;
+  }
+  switch (type) {
+    case "agent_start":
+      return "agent_start";
+    case "agent_end":
+      return "agent_end";
+    case "turn_start":
+      return "turn_start";
+    case "turn_end":
+      return "turn_end";
+    case "message_start":
+    case "message_end": {
+      const message = record.message;
+      const role = message?.role;
+      return `${type}: ${role}`;
+    }
+    case "message_update": {
+      const event2 = record.assistantMessageEvent;
+      const eventType = event2?.type;
+      if (eventType === "text_delta") {
+        const delta = event2?.delta;
+        if (typeof delta === "string") {
+          const preview = delta.length > 50 ? `${delta.slice(0, 50)}...` : delta;
+          return `text_delta: ${preview}`;
+        }
+      }
+      return `message_update: ${eventType}`;
+    }
+    default:
+      return type;
+  }
+}
+function tryParseJsonValue2(rawLine) {
+  try {
+    return JSON.parse(rawLine);
+  } catch {
+    return void 0;
+  }
+}
+function parsePiJsonl(output) {
+  const trimmed = output.trim();
+  if (trimmed.length === 0) {
+    throw new Error("Pi coding agent produced no output");
+  }
+  const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
+  const parsed = [];
+  for (const line of lines) {
+    try {
+      parsed.push(JSON.parse(line));
+    } catch {
+    }
+  }
+  if (parsed.length === 0) {
+    throw new Error("Pi coding agent produced no valid JSON output");
+  }
+  return parsed;
+}
+function extractOutputMessages(events) {
+  for (let i = events.length - 1; i >= 0; i--) {
+    const event = events[i];
+    if (!event || typeof event !== "object") {
+      continue;
+    }
+    const record = event;
+    if (record.type !== "agent_end") {
+      continue;
+    }
+    const messages = record.messages;
+    if (!Array.isArray(messages)) {
+      continue;
+    }
+    return messages.map(convertPiMessage).filter((m) => m !== void 0);
+  }
+  const outputMessages = [];
+  for (const event of events) {
+    if (!event || typeof event !== "object") {
+      continue;
+    }
+    const record = event;
+    if (record.type === "turn_end") {
+      const message = record.message;
+      const converted = convertPiMessage(message);
+      if (converted) {
+        outputMessages.push(converted);
+      }
+    }
+  }
+  return outputMessages;
+}
+function convertPiMessage(message) {
+  if (!message || typeof message !== "object") {
+    return void 0;
+  }
+  const msg = message;
+  const role = msg.role;
+  if (typeof role !== "string") {
+    return void 0;
+  }
+  const content = extractTextContent(msg.content);
+  const toolCalls = extractToolCalls(msg.content);
+  const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
+  const metadata = {};
+  if (msg.api) metadata.api = msg.api;
+  if (msg.provider) metadata.provider = msg.provider;
+  if (msg.model) metadata.model = msg.model;
+  if (msg.usage) metadata.usage = msg.usage;
+  if (msg.stopReason) metadata.stopReason = msg.stopReason;
+  return {
+    role,
+    content,
+    toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
+    timestamp,
+    metadata: Object.keys(metadata).length > 0 ? metadata : void 0
+  };
+}
+function extractTextContent(content) {
+  if (typeof content === "string") {
+    return content;
+  }
+  if (!Array.isArray(content)) {
+    return void 0;
+  }
+  const textParts = [];
+  for (const part of content) {
+    if (!part || typeof part !== "object") {
+      continue;
+    }
+    const p = part;
+    if (p.type === "text" && typeof p.text === "string") {
+      textParts.push(p.text);
+    }
+  }
+  return textParts.length > 0 ? textParts.join("\n") : void 0;
+}
+function extractToolCalls(content) {
+  if (!Array.isArray(content)) {
+    return [];
+  }
+  const toolCalls = [];
+  for (const part of content) {
+    if (!part || typeof part !== "object") {
+      continue;
+    }
+    const p = part;
+    if (p.type === "tool_use" && typeof p.name === "string") {
+      toolCalls.push({
+        tool: p.name,
+        input: p.input,
+        id: typeof p.id === "string" ? p.id : void 0
+      });
+    }
+    if (p.type === "tool_result" && typeof p.tool_use_id === "string") {
+      const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
+      if (existing) {
+        const idx = toolCalls.indexOf(existing);
+        toolCalls[idx] = {
+          ...existing,
+          output: p.content
+        };
+      }
+    }
+  }
+  return toolCalls;
+}
+function extractAssistantText2(messages) {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg.role === "assistant" && msg.content) {
+      if (typeof msg.content === "string") {
+        return msg.content;
+      }
+      return JSON.stringify(msg.content);
+    }
+  }
+  return "";
+}
+function escapeAtSymbols(prompt) {
+  return prompt.replace(/@\[([^\]]+)\]:/g, "[[$1]]:");
+}
+function pickDetail2(stderr, stdout) {
+  const errorText = stderr.trim();
+  if (errorText.length > 0) {
+    return errorText;
+  }
+  const stdoutText = stdout.trim();
+  return stdoutText.length > 0 ? stdoutText : void 0;
+}
+function formatTimeoutSuffix3(timeoutMs) {
+  if (!timeoutMs || timeoutMs <= 0) {
+    return "";
+  }
+  const seconds = Math.ceil(timeoutMs / 1e3);
+  return ` after ${seconds}s`;
+}
+async function defaultPiRunner(options) {
+  return await new Promise((resolve, reject) => {
+    const parts = options.executable.split(/\s+/);
+    const executable = parts[0];
+    const executableArgs = parts.slice(1);
+    const allArgs = [...executableArgs, ...options.args];
+    const child = (0, import_node_child_process3.spawn)(executable, allArgs, {
+      cwd: options.cwd,
+      env: options.env,
+      stdio: ["pipe", "pipe", "pipe"],
+      shell: false
+    });
+    let stdout = "";
+    let stderr = "";
+    let timedOut = false;
+    const onAbort = () => {
+      child.kill("SIGTERM");
+    };
+    if (options.signal) {
+      if (options.signal.aborted) {
+        onAbort();
+      } else {
+        options.signal.addEventListener("abort", onAbort, { once: true });
+      }
+    }
+    let timeoutHandle;
+    if (options.timeoutMs && options.timeoutMs > 0) {
+      timeoutHandle = setTimeout(() => {
+        timedOut = true;
+        child.kill("SIGTERM");
+      }, options.timeoutMs);
+      timeoutHandle.unref?.();
+    }
+    child.stdout.setEncoding("utf8");
+    child.stdout.on("data", (chunk) => {
+      stdout += chunk;
+      options.onStdoutChunk?.(chunk);
+    });
+    child.stderr.setEncoding("utf8");
+    child.stderr.on("data", (chunk) => {
+      stderr += chunk;
+      options.onStderrChunk?.(chunk);
+    });
+    child.stdin.end();
+    const cleanup = () => {
+      if (timeoutHandle) {
+        clearTimeout(timeoutHandle);
+      }
+      if (options.signal) {
+        options.signal.removeEventListener("abort", onAbort);
+      }
+    };
+    child.on("error", (error) => {
+      cleanup();
+      reject(error);
+    });
+    child.on("close", (code) => {
+      cleanup();
+      resolve({
+        stdout,
+        stderr,
+        exitCode: typeof code === "number" ? code : -1,
+        timedOut
+      });
+    });
+  });
+}
+// src/evaluation/providers/targets.ts
+var import_node_path12 = __toESM(require("path"), 1);
+var import_zod = require("zod");
+var CliHealthcheckHttpInputSchema = import_zod.z.object({
+  type: import_zod.z.literal("http"),
+  url: import_zod.z.string().min(1, "healthcheck URL is required"),
+  timeout_seconds: import_zod.z.number().positive().optional(),
+  timeoutSeconds: import_zod.z.number().positive().optional()
+});
+var CliHealthcheckCommandInputSchema = import_zod.z.object({
+  type: import_zod.z.literal("command"),
+  command_template: import_zod.z.string().optional(),
+  commandTemplate: import_zod.z.string().optional(),
+  cwd: import_zod.z.string().optional(),
+  timeout_seconds: import_zod.z.number().positive().optional(),
+  timeoutSeconds: import_zod.z.number().positive().optional()
+});
+var CliHealthcheckInputSchema = import_zod.z.discriminatedUnion("type", [
+  CliHealthcheckHttpInputSchema,
+  CliHealthcheckCommandInputSchema
+]);
+var CliTargetInputSchema = import_zod.z.object({
+  name: import_zod.z.string().min(1, "target name is required"),
+  provider: import_zod.z.string().refine((p) => p.toLowerCase() === "cli", { message: "provider must be 'cli'" }),
+  // Command template - required (accept both naming conventions)
+  command_template: import_zod.z.string().optional(),
+  commandTemplate: import_zod.z.string().optional(),
+  // Files format - optional
+  files_format: import_zod.z.string().optional(),
+  filesFormat: import_zod.z.string().optional(),
+  attachments_format: import_zod.z.string().optional(),
+  attachmentsFormat: import_zod.z.string().optional(),
+  // Working directory - optional
+  cwd: import_zod.z.string().optional(),
+  // Timeout in seconds - optional
+  timeout_seconds: import_zod.z.number().positive().optional(),
+  timeoutSeconds: import_zod.z.number().positive().optional(),
+  // Healthcheck configuration - optional
+  healthcheck: CliHealthcheckInputSchema.optional(),
+  // Verbose mode - optional
+  verbose: import_zod.z.boolean().optional(),
+  cli_verbose: import_zod.z.boolean().optional(),
+  cliVerbose: import_zod.z.boolean().optional(),
+  // Keep temp files - optional
+  keep_temp_files: import_zod.z.boolean().optional(),
+  keepTempFiles: import_zod.z.boolean().optional(),
+  keep_output_files: import_zod.z.boolean().optional(),
+  keepOutputFiles: import_zod.z.boolean().optional(),
+  // Common target fields
+  judge_target: import_zod.z.string().optional(),
+  workers: import_zod.z.number().int().min(1).optional(),
+  provider_batching: import_zod.z.boolean().optional(),
+  providerBatching: import_zod.z.boolean().optional()
+}).refine((data) => data.command_template !== void 0 || data.commandTemplate !== void 0, {
+  message: "Either command_template or commandTemplate is required"
+});
+var CliHealthcheckHttpSchema = import_zod.z.object({
+  type: import_zod.z.literal("http"),
+  url: import_zod.z.string().min(1),
+  timeoutMs: import_zod.z.number().positive().optional()
+}).strict();
+var CliHealthcheckCommandSchema = import_zod.z.object({
+  type: import_zod.z.literal("command"),
+  commandTemplate: import_zod.z.string().min(1),
+  cwd: import_zod.z.string().optional(),
+  timeoutMs: import_zod.z.number().positive().optional()
+}).strict();
+var CliHealthcheckSchema = import_zod.z.discriminatedUnion("type", [
+  CliHealthcheckHttpSchema,
+  CliHealthcheckCommandSchema
+]);
+var CliTargetConfigSchema = import_zod.z.object({
+  commandTemplate: import_zod.z.string().min(1),
+  filesFormat: import_zod.z.string().optional(),
+  cwd: import_zod.z.string().optional(),
+  timeoutMs: import_zod.z.number().positive().optional(),
+  healthcheck: CliHealthcheckSchema.optional(),
+  verbose: import_zod.z.boolean().optional(),
+  keepTempFiles: import_zod.z.boolean().optional()
+}).strict();
+function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
+  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
+  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
+  if (input.type === "http") {
+    const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
+    return {
+      type: "http",
+      url,
+      timeoutMs
+    };
+  }
+  const commandTemplateSource = input.command_template ?? input.commandTemplate;
+  if (commandTemplateSource === void 0) {
+    throw new Error(
+      `${targetName} healthcheck: Either command_template or commandTemplate is required for command healthcheck`
+    );
+  }
+  const commandTemplate = resolveString(
+    commandTemplateSource,
+    env,
+    `${targetName} healthcheck command template`,
+    true
+  );
+  let cwd = resolveOptionalString(input.cwd, env, `${targetName} healthcheck cwd`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  if (cwd && evalFilePath && !import_node_path12.default.isAbsolute(cwd)) {
+    cwd = import_node_path12.default.resolve(import_node_path12.default.dirname(import_node_path12.default.resolve(evalFilePath)), cwd);
+  }
+  return {
+    type: "command",
+    commandTemplate,
+    cwd,
+    timeoutMs
+  };
+}
+function normalizeCliTargetInput(input, env, evalFilePath) {
+  const targetName = input.name;
+  const commandTemplateSource = input.command_template ?? input.commandTemplate;
+  if (commandTemplateSource === void 0) {
+    throw new Error(`${targetName}: Either command_template or commandTemplate is required`);
+  }
+  const commandTemplate = resolveString(
+    commandTemplateSource,
+    env,
+    `${targetName} CLI command template`,
+    true
+  );
+  const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
+  const filesFormat = resolveOptionalLiteralString(filesFormatSource);
+  let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  if (cwd && evalFilePath && !import_node_path12.default.isAbsolute(cwd)) {
+    cwd = import_node_path12.default.resolve(import_node_path12.default.dirname(import_node_path12.default.resolve(evalFilePath)), cwd);
+  }
+  if (!cwd && evalFilePath) {
+    cwd = import_node_path12.default.dirname(import_node_path12.default.resolve(evalFilePath));
+  }
+  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
+  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
+  const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose ?? input.cliVerbose);
+  const keepTempFiles = resolveOptionalBoolean(
+    input.keep_temp_files ?? input.keepTempFiles ?? input.keep_output_files ?? input.keepOutputFiles
+  );
+  const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
+  return {
+    commandTemplate,
+    filesFormat,
+    cwd,
+    timeoutMs,
+    healthcheck,
+    verbose,
+    keepTempFiles
+  };
+}
+var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
+  "PROMPT",
+  "GUIDELINES",
+  "EVAL_ID",
+  "ATTEMPT",
+  "FILES",
+  "OUTPUT_FILE"
+]);
+var BASE_TARGET_SCHEMA = import_zod.z.object({
+  name: import_zod.z.string().min(1, "target name is required"),
+  provider: import_zod.z.string().min(1, "provider is required"),
+  judge_target: import_zod.z.string().optional(),
+  workers: import_zod.z.number().int().min(1).optional()
+}).passthrough();
+var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
+function normalizeAzureApiVersion(value) {
+  if (!value) {
+    return DEFAULT_AZURE_API_VERSION;
+  }
+  const trimmed = value.trim();
+  if (trimmed.length === 0) {
+    return DEFAULT_AZURE_API_VERSION;
+  }
+  const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
+  return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
+}
+function resolveRetryConfig(target) {
+  const maxRetries = resolveOptionalNumber(
+    target.max_retries ?? target.maxRetries,
+    `${target.name} max retries`
+  );
+  const initialDelayMs = resolveOptionalNumber(
+    target.retry_initial_delay_ms ?? target.retryInitialDelayMs,
+    `${target.name} retry initial delay`
+  );
+  const maxDelayMs = resolveOptionalNumber(
+    target.retry_max_delay_ms ?? target.retryMaxDelayMs,
+    `${target.name} retry max delay`
+  );
+  const backoffFactor = resolveOptionalNumber(
+    target.retry_backoff_factor ?? target.retryBackoffFactor,
+    `${target.name} retry backoff factor`
+  );
+  const retryableStatusCodes = resolveOptionalNumberArray(
+    target.retry_status_codes ?? target.retryStatusCodes,
+    `${target.name} retry status codes`
+  );
+  if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
+    return void 0;
+  }
+  return {
+    maxRetries,
+    initialDelayMs,
+    maxDelayMs,
+    backoffFactor,
+    retryableStatusCodes
+  };
+}
+function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
+  const parsed = BASE_TARGET_SCHEMA.parse(definition);
+  const provider = parsed.provider.toLowerCase();
+  const providerBatching = resolveOptionalBoolean(
+    parsed.provider_batching ?? parsed.providerBatching
+  );
+  switch (provider) {
+    case "azure":
+    case "azure-openai":
+      return {
+        kind: "azure",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveAzureConfig(parsed, env)
+      };
+    case "anthropic":
+      return {
+        kind: "anthropic",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveAnthropicConfig(parsed, env)
+      };
+    case "gemini":
+    case "google":
+    case "google-gemini":
+      return {
+        kind: "gemini",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveGeminiConfig(parsed, env)
+      };
+    case "codex":
+    case "codex-cli":
+      return {
+        kind: "codex",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveCodexConfig(parsed, env)
+      };
+    case "pi":
+    case "pi-coding-agent":
+      return {
+        kind: "pi-coding-agent",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolvePiCodingAgentConfig(parsed, env)
+      };
+    case "mock":
+      return {
+        kind: "mock",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveMockConfig(parsed)
+      };
+    case "vscode":
+    case "vscode-insiders":
+      return {
+        kind: provider,
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
+      };
+    case "cli":
+      return {
+        kind: "cli",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveCliConfig(parsed, env, evalFilePath)
+      };
+    default:
+      throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
+  }
+}
+function resolveAzureConfig(target, env) {
+  const endpointSource = target.endpoint ?? target.resource ?? target.resourceName;
+  const apiKeySource = target.api_key ?? target.apiKey;
+  const deploymentSource = target.deployment ?? target.deploymentName ?? target.model;
+  const versionSource = target.version ?? target.api_version;
+  const temperatureSource = target.temperature;
+  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
+  const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
+  const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
+  const version = normalizeAzureApiVersion(
+    resolveOptionalString(versionSource, env, `${target.name} api version`, {
+      allowLiteral: true,
+      optionalEnv: true
+    })
+  );
+  const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
+  const maxOutputTokens = resolveOptionalNumber(
+    maxTokensSource,
+    `${target.name} max output tokens`
+  );
+  const retry = resolveRetryConfig(target);
+  return {
+    resourceName,
+    deploymentName,
+    apiKey,
+    version,
+    temperature,
+    maxOutputTokens,
+    retry
+  };
+}
+function resolveAnthropicConfig(target, env) {
+  const apiKeySource = target.api_key ?? target.apiKey;
+  const modelSource = target.model ?? target.deployment ?? target.variant;
+  const temperatureSource = target.temperature;
+  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const thinkingBudgetSource = target.thinking_budget ?? target.thinkingBudget;
+  const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
+  const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
+  const retry = resolveRetryConfig(target);
+  return {
+    apiKey,
+    model,
+    temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
+    maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
+    thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`),
+    retry
+  };
+}
+function resolveGeminiConfig(target, env) {
+  const apiKeySource = target.api_key ?? target.apiKey;
+  const modelSource = target.model ?? target.deployment ?? target.variant;
+  const temperatureSource = target.temperature;
+  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
+  const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
+    allowLiteral: true,
+    optionalEnv: true
+  }) ?? "gemini-2.5-flash";
+  const retry = resolveRetryConfig(target);
+  return {
+    apiKey,
+    model,
+    temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
+    maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
+    retry
+  };
+}
+function resolveCodexConfig(target, env) {
+  const executableSource = target.executable ?? target.command ?? target.binary;
+  const argsSource = target.args ?? target.arguments;
+  const cwdSource = target.cwd;
+  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
   const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
   const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
+  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
   const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
     allowLiteral: true,
     optionalEnv: true
@@ -3443,13 +4406,15 @@ function resolveCodexConfig(target, env) {
     optionalEnv: true
   });
   const logFormat = normalizeCodexLogFormat(logFormatSource);
+  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
   return {
     executable,
     args,
     cwd,
     timeoutMs,
     logDir,
-    logFormat
+    logFormat,
+    systemPrompt
   };
 }
 function normalizeCodexLogFormat(value) {
@@ -3465,6 +4430,70 @@ function normalizeCodexLogFormat(value) {
   }
   throw new Error("codex log format must be 'summary' or 'json'");
 }
+function resolvePiCodingAgentConfig(target, env) {
+  const executableSource = target.executable ?? target.command ?? target.binary;
+  const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
+  const modelSource = target.model ?? target.pi_model ?? target.piModel;
+  const apiKeySource = target.api_key ?? target.apiKey;
+  const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
+  const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
+  const argsSource = target.args ?? target.arguments;
+  const cwdSource = target.cwd;
+  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
+  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
+  const logFormatSource = target.log_format ?? target.logFormat;
+  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const executable = resolveOptionalString(executableSource, env, `${target.name} pi executable`, {
+    allowLiteral: true,
+    optionalEnv: true
+  }) ?? "pi";
+  const provider = resolveOptionalString(providerSource, env, `${target.name} pi provider`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const model = resolveOptionalString(modelSource, env, `${target.name} pi model`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi api key`, {
+    allowLiteral: false,
+    optionalEnv: true
+  });
+  const tools = resolveOptionalString(toolsSource, env, `${target.name} pi tools`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const thinking = resolveOptionalString(thinkingSource, env, `${target.name} pi thinking`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const args = resolveOptionalStringArray(argsSource, env, `${target.name} pi args`);
+  const cwd = resolveOptionalString(cwdSource, env, `${target.name} pi cwd`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi timeout`);
+  const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi log directory`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const logFormat = logFormatSource === "json" || logFormatSource === "summary" ? logFormatSource : void 0;
+  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
+  return {
+    executable,
+    provider,
+    model,
+    apiKey,
+    tools,
+    thinking,
+    args,
+    cwd,
+    timeoutMs,
+    logDir,
+    logFormat,
+    systemPrompt
+  };
+}
 function resolveMockConfig(target) {
   const response = typeof target.response === "string" ? target.response : void 0;
   return { response };
@@ -3499,46 +4528,35 @@ function resolveVSCodeConfig(target, env, insiders) {
     workspaceTemplate
   };
 }
-function resolveCliConfig(target, env, evalFilePath) {
-  const commandTemplateSource = target.command_template ?? target.commandTemplate;
-  const filesFormat = resolveOptionalLiteralString(
-    target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
-  );
-  const verbose = resolveOptionalBoolean(target.verbose ?? target.cli_verbose ?? target.cliVerbose);
-  const keepTempFiles = resolveOptionalBoolean(
-    target.keep_temp_files ?? target.keepTempFiles ?? target.keep_output_files ?? target.keepOutputFiles
-  );
-  let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
-    allowLiteral: true,
-    optionalEnv: true
-  });
-  if (cwd && evalFilePath && !import_node_path11.default.isAbsolute(cwd)) {
-    cwd = import_node_path11.default.resolve(import_node_path11.default.dirname(import_node_path11.default.resolve(evalFilePath)), cwd);
+var cliErrorMap = (issue, ctx) => {
+  if (issue.code === import_zod.z.ZodIssueCode.unrecognized_keys) {
+    return { message: `Unknown CLI provider settings: ${issue.keys.join(", ")}` };
   }
-  if (!cwd && evalFilePath) {
-    cwd = import_node_path11.default.dirname(import_node_path11.default.resolve(evalFilePath));
+  if (issue.code === import_zod.z.ZodIssueCode.invalid_union_discriminator) {
+    return { message: "healthcheck type must be 'http' or 'command'" };
   }
-  const timeoutMs = resolveTimeoutMs(
-    target.timeout_seconds ?? target.timeoutSeconds,
-    `${target.name} timeout`
-  );
-  const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name, evalFilePath);
-  const commandTemplate = resolveString(
-    commandTemplateSource,
-    env,
-    `${target.name} CLI command template`,
-    true
-  );
-  assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
-  return {
-    commandTemplate,
-    filesFormat,
-    cwd,
-    timeoutMs,
-    healthcheck,
-    verbose,
-    keepTempFiles
-  };
+  if (issue.code === import_zod.z.ZodIssueCode.invalid_type && issue.expected === "string") {
+    return { message: `${ctx.defaultError} (expected a string value)` };
+  }
+  return { message: ctx.defaultError };
+};
+function resolveCliConfig(target, env, evalFilePath) {
+  const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
+  if (!parseResult.success) {
+    const firstError = parseResult.error.errors[0];
+    const path16 = firstError?.path.join(".") || "";
+    const prefix = path16 ? `${target.name} ${path16}: ` : `${target.name}: `;
+    throw new Error(`${prefix}${firstError?.message}`);
+  }
+  const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
+  assertSupportedCliPlaceholders(normalized.commandTemplate, `${target.name} CLI command template`);
+  if (normalized.healthcheck?.type === "command") {
+    assertSupportedCliPlaceholders(
+      normalized.healthcheck.commandTemplate,
+      `${target.name} healthcheck command template`
+    );
+  }
+  return normalized;
 }
 function resolveTimeoutMs(source, description) {
   const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
@@ -3550,49 +4568,6 @@ function resolveTimeoutMs(source, description) {
   }
   return Math.floor(seconds * 1e3);
 }
-function resolveCliHealthcheck(source, env, targetName, evalFilePath) {
-  if (source === void 0 || source === null) {
-    return void 0;
-  }
-  if (typeof source !== "object" || Array.isArray(source)) {
-    throw new Error(`${targetName} healthcheck must be an object`);
-  }
-  const candidate = source;
-  const type = candidate.type;
-  const timeoutMs = resolveTimeoutMs(
-    candidate.timeout_seconds ?? candidate.timeoutSeconds,
-    `${targetName} healthcheck timeout`
-  );
-  if (type === "http") {
-    const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
-    return {
-      type: "http",
-      url,
-      timeoutMs
-    };
-  }
-  if (type === "command") {
-    const commandTemplate = resolveString(
-      candidate.command_template ?? candidate.commandTemplate,
-      env,
-      `${targetName} healthcheck command template`,
-      true
-    );
-    assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
-    const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
-      allowLiteral: true,
-      optionalEnv: true
-    });
-    const resolvedCwd = cwd && evalFilePath && !import_node_path11.default.isAbsolute(cwd) ? import_node_path11.default.resolve(import_node_path11.default.dirname(import_node_path11.default.resolve(evalFilePath)), cwd) : cwd;
-    return {
-      type: "command",
-      commandTemplate,
-      timeoutMs,
-      cwd: resolvedCwd
-    };
-  }
-  throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
-}
 function assertSupportedCliPlaceholders(template, description) {
   const placeholders = extractCliPlaceholders(template);
   for (const placeholder of placeholders) {
@@ -3758,7 +4733,7 @@ function resolveOptionalNumberArray(source, description) {
 }
 // src/evaluation/providers/vscode.ts
-var import_node_path12 = __toESM(require("path"), 1);
+var import_node_path13 = __toESM(require("path"), 1);
 var import_subagent = require("subagent");
 // src/evaluation/providers/vscode-templates.ts
@@ -3928,7 +4903,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
     return "";
   }
   const buildList = (files) => files.map((absolutePath) => {
-    const fileName = import_node_path12.default.basename(absolutePath);
+    const fileName = import_node_path13.default.basename(absolutePath);
     const fileUri = pathToFileUri2(absolutePath);
     return `* [${fileName}](${fileUri})`;
   });
@@ -3953,8 +4928,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    const absolutePath = import_node_path12.default.resolve(attachment);
-    const normalized = absolutePath.split(import_node_path12.default.sep).join("/");
+    const absolutePath = import_node_path13.default.resolve(attachment);
+    const normalized = absolutePath.split(import_node_path13.default.sep).join("/");
     if (isGuidelineFile(normalized, guidelinePatterns)) {
       if (!unique.has(absolutePath)) {
         unique.set(absolutePath, absolutePath);
@@ -3969,7 +4944,7 @@ function collectAttachmentFiles(attachments) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    const absolutePath = import_node_path12.default.resolve(attachment);
+    const absolutePath = import_node_path13.default.resolve(attachment);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
     }
@@ -3977,7 +4952,7 @@ function collectAttachmentFiles(attachments) {
   return Array.from(unique.values());
 }
 function pathToFileUri2(filePath) {
-  const absolutePath = import_node_path12.default.isAbsolute(filePath) ? filePath : import_node_path12.default.resolve(filePath);
+  const absolutePath = import_node_path13.default.isAbsolute(filePath) ? filePath : import_node_path13.default.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
   if (/^[a-zA-Z]:\//.test(normalizedPath)) {
     return `file:///${normalizedPath}`;
@@ -3990,7 +4965,7 @@ function normalizeAttachments(attachments) {
   }
   const deduped = /* @__PURE__ */ new Set();
   for (const attachment of attachments) {
-    deduped.add(import_node_path12.default.resolve(attachment));
+    deduped.add(import_node_path13.default.resolve(attachment));
   }
   return Array.from(deduped);
 }
@@ -3999,7 +4974,7 @@ function mergeAttachments(all) {
   for (const list of all) {
     if (!list) continue;
     for (const inputFile of list) {
-      deduped.add(import_node_path12.default.resolve(inputFile));
+      deduped.add(import_node_path13.default.resolve(inputFile));
     }
   }
   return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -4046,9 +5021,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
 }
 // src/evaluation/providers/targets-file.ts
-var import_node_fs4 = require("fs");
-var import_promises10 = require("fs/promises");
-var import_node_path13 = __toESM(require("path"), 1);
+var import_node_fs5 = require("fs");
+var import_promises11 = require("fs/promises");
+var import_node_path14 = __toESM(require("path"), 1);
 var import_yaml3 = require("yaml");
 function isRecord(value) {
   return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -4078,18 +5053,18 @@ function assertTargetDefinition(value, index, filePath) {
 }
 async function fileExists3(filePath) {
   try {
-    await (0, import_promises10.access)(filePath, import_node_fs4.constants.F_OK);
+    await (0, import_promises11.access)(filePath, import_node_fs5.constants.F_OK);
     return true;
   } catch {
     return false;
   }
 }
 async function readTargetDefinitions(filePath) {
-  const absolutePath = import_node_path13.default.resolve(filePath);
+  const absolutePath = import_node_path14.default.resolve(filePath);
   if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
-  const raw = await (0, import_promises10.readFile)(absolutePath, "utf8");
+  const raw = await (0, import_promises11.readFile)(absolutePath, "utf8");
   const parsed = (0, import_yaml3.parse)(raw);
   if (!isRecord(parsed)) {
     throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -4117,6 +5092,8 @@ function createProvider(target) {
       return new CliProvider(target.name, target.config);
     case "codex":
       return new CodexProvider(target.name, target.config);
+    case "pi-coding-agent":
+      return new PiCodingAgentProvider(target.name, target.config);
     case "mock":
       return new MockProvider(target.name, target.config);
     case "vscode":
@@ -4137,9 +5114,76 @@ function resolveAndCreateProvider(definition, env = process.env) {
 var import_ai2 = require("ai");
 var import_zod2 = require("zod");
+// src/runtime/exec.ts
+function getBunSpawn() {
+  const bunSpawn = globalThis.Bun?.spawn;
+  return typeof bunSpawn === "function" ? bunSpawn : void 0;
+}
+async function execShellWithStdin(command, stdinPayload, options = {}) {
+  const bunSpawn = getBunSpawn();
+  if (bunSpawn) {
+    const encoder = new TextEncoder();
+    const proc = bunSpawn({
+      cmd: ["sh", "-c", command],
+      cwd: options.cwd,
+      stdin: encoder.encode(stdinPayload),
+      stdout: "pipe",
+      stderr: "pipe"
+    });
+    const timeout = options.timeoutMs ? setTimeout(() => {
+      proc.kill();
+    }, options.timeoutMs) : void 0;
+    try {
+      const stdout = await new Response(proc.stdout).text();
+      const stderr = await new Response(proc.stderr).text();
+      const exitCode = await proc.exited;
+      return { stdout, stderr, exitCode };
+    } finally {
+      if (timeout !== void 0) {
+        clearTimeout(timeout);
+      }
+    }
+  }
+  const { spawn: spawn3 } = await import("child_process");
+  return await new Promise((resolve, reject) => {
+    const child = spawn3(command, {
+      shell: true,
+      cwd: options.cwd,
+      stdio: ["pipe", "pipe", "pipe"]
+    });
+    let stdout = "";
+    let stderr = "";
+    const timeout = options.timeoutMs ? setTimeout(() => {
+      child.kill();
+      reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
+    }, options.timeoutMs) : void 0;
+    child.stdout?.on("data", (data) => {
+      stdout += data.toString();
+    });
+    child.stderr?.on("data", (data) => {
+      stderr += data.toString();
+    });
+    child.on("error", (error) => {
+      if (timeout !== void 0) {
+        clearTimeout(timeout);
+      }
+      reject(error);
+    });
+    child.on("exit", (code) => {
+      if (timeout !== void 0) {
+        clearTimeout(timeout);
+      }
+      resolve({ stdout, stderr, exitCode: code ?? 0 });
+    });
+    child.stdin?.write(stdinPayload);
+    child.stdin?.end();
+  });
+}
 // src/evaluation/providers/types.ts
 var AGENT_PROVIDER_KINDS = [
   "codex",
+  "pi-coding-agent",
   "vscode",
   "vscode-insiders"
 ];
@@ -4438,17 +5482,17 @@ var CodeEvaluator = class {
     const inputPayload = JSON.stringify(
       {
         question: context.evalCase.question,
-        expected_outcome: context.evalCase.expected_outcome,
-        expected_messages: context.evalCase.expected_messages,
-        reference_answer: context.evalCase.reference_answer,
-        candidate_answer: context.candidate,
-        output_messages: context.outputMessages ?? null,
-        guideline_files: context.evalCase.guideline_paths,
-        input_files: context.evalCase.file_paths.filter(
-          (path15) => !context.evalCase.guideline_paths.includes(path15)
+        expectedOutcome: context.evalCase.expected_outcome,
+        expectedMessages: context.evalCase.expected_messages,
+        referenceAnswer: context.evalCase.reference_answer,
+        candidateAnswer: context.candidate,
+        outputMessages: context.outputMessages ?? null,
+        guidelineFiles: context.evalCase.guideline_paths,
+        inputFiles: context.evalCase.file_paths.filter(
+          (path16) => !context.evalCase.guideline_paths.includes(path16)
         ),
-        input_messages: context.evalCase.input_messages,
-        candidate_trace_summary: context.traceSummary ?? null
+        inputMessages: context.evalCase.input_messages,
+        traceSummary: context.traceSummary ?? null
       },
       null,
       2
@@ -4518,43 +5562,17 @@ function calculateRubricScore(result, rubrics) {
   return { score, verdict, hits, misses };
 }
 async function executeScript(scriptPath, input, agentTimeoutMs, cwd) {
-  const { spawn: spawn2 } = await import("child_process");
-  return await new Promise((resolve, reject) => {
-    const child = spawn2(scriptPath, {
-      shell: true,
-      cwd
-    });
-    let stdout = "";
-    let stderr = "";
-    const timeout = agentTimeoutMs ? setTimeout(() => {
-      child.kill();
-      reject(new Error(`Code evaluator timed out after ${agentTimeoutMs}ms`));
-    }, agentTimeoutMs) : void 0;
-    child.stdout?.on("data", (data) => {
-      stdout += data.toString();
-    });
-    child.stderr?.on("data", (data) => {
-      stderr += data.toString();
-    });
-    child.on("error", (error) => {
-      if (timeout !== void 0) {
-        clearTimeout(timeout);
-      }
-      reject(error);
-    });
-    child.on("exit", (code) => {
-      if (timeout !== void 0) {
-        clearTimeout(timeout);
-      }
-      if (code && code !== 0 && stderr.length > 0) {
-        reject(new Error(`Code evaluator exited with code ${code}: ${stderr.trim()}`));
-        return;
-      }
-      resolve(stdout.trim());
-    });
-    child.stdin?.write(input);
-    child.stdin?.end();
+  const { stdout, stderr, exitCode } = await execShellWithStdin(scriptPath, input, {
+    cwd,
+    timeoutMs: agentTimeoutMs
   });
+  if (exitCode !== 0) {
+    const trimmedErr = stderr.trim();
+    throw new Error(
+      trimmedErr.length > 0 ? `Code evaluator exited with code ${exitCode}: ${trimmedErr}` : `Code evaluator exited with code ${exitCode}`
+    );
+  }
+  return stdout.trim();
 }
 function parseJsonSafe(payload) {
   try {
@@ -4568,6 +5586,33 @@ function substituteVariables(template, variables) {
     return variables[varName] ?? match;
   });
 }
+function deepEqual(a, b) {
+  if (a === b) return true;
+  if (a === null || b === null) return a === b;
+  if (typeof a !== typeof b) return false;
+  if (typeof a !== "object") return a === b;
+  if (Array.isArray(a) !== Array.isArray(b)) return false;
+  if (Array.isArray(a) && Array.isArray(b)) {
+    if (a.length !== b.length) return false;
+    return a.every((val, i) => deepEqual(val, b[i]));
+  }
+  const aObj = a;
+  const bObj = b;
+  const aKeys = Object.keys(aObj);
+  const bKeys = Object.keys(bObj);
+  if (aKeys.length !== bKeys.length) return false;
+  return aKeys.every((key) => Object.hasOwn(bObj, key) && deepEqual(aObj[key], bObj[key]));
+}
+function argsMatch(expected, actual) {
+  if (expected === void 0) return true;
+  if (expected === "any") return true;
+  if (actual === void 0) return false;
+  for (const key of Object.keys(expected)) {
+    if (!Object.hasOwn(actual, key)) return false;
+    if (!deepEqual(expected[key], actual[key])) return false;
+  }
+  return true;
+}
 var ToolTrajectoryEvaluator = class {
   kind = "tool_trajectory";
   config;
@@ -4624,7 +5669,10 @@ var ToolTrajectoryEvaluator = class {
     for (const message of messages) {
       if (message.toolCalls) {
         for (const call of message.toolCalls) {
-          toolCalls.push({ name: call.tool });
+          toolCalls.push({
+            name: call.tool,
+            args: call.input
+          });
         }
       }
     }
@@ -4693,18 +5741,29 @@ var ToolTrajectoryEvaluator = class {
     const misses = [];
     let actualIndex = 0;
     for (let i = 0; i < expected.length; i++) {
-      const expectedTool = expected[i].tool;
+      const expectedItem = expected[i];
+      const expectedTool = expectedItem.tool;
       let found = false;
+      let argsMismatch = false;
       while (actualIndex < toolCalls.length) {
-        if (toolCalls[actualIndex].name === expectedTool) {
-          hits.push(`Found ${expectedTool} at position ${actualIndex}`);
+        const actualCall = toolCalls[actualIndex];
+        if (actualCall.name === expectedTool) {
+          if (argsMatch(expectedItem.args, actualCall.args)) {
+            hits.push(`Found ${expectedTool} at position ${actualIndex}`);
+            actualIndex++;
+            found = true;
+            break;
+          }
+          misses.push(
+            `Expected ${expectedTool} at position ${i}: tool found at ${actualIndex} but args mismatch`
+          );
           actualIndex++;
-          found = true;
+          argsMismatch = true;
           break;
         }
         actualIndex++;
       }
-      if (!found) {
+      if (!found && !argsMismatch) {
         misses.push(`Expected ${expectedTool} at position ${i}, not found in remaining trace`);
       }
     }
@@ -4735,10 +5794,16 @@ var ToolTrajectoryEvaluator = class {
     }
     const checkLength = Math.min(expected.length, toolCalls.length);
     for (let i = 0; i < checkLength; i++) {
-      const expectedTool = expected[i].tool;
-      const actualTool = toolCalls[i].name;
+      const expectedItem = expected[i];
+      const expectedTool = expectedItem.tool;
+      const actualCall = toolCalls[i];
+      const actualTool = actualCall.name;
       if (actualTool === expectedTool) {
-        hits.push(`Position ${i}: ${expectedTool} \u2713`);
+        if (argsMatch(expectedItem.args, actualCall.args)) {
+          hits.push(`Position ${i}: ${expectedTool}`);
+        } else {
+          misses.push(`Position ${i}: ${expectedTool} args mismatch`);
+        }
       } else {
         misses.push(`Position ${i}: expected ${expectedTool}, got ${actualTool}`);
       }
@@ -4982,9 +6047,9 @@ var CompositeEvaluator = class {
 };
 // src/evaluation/orchestrator.ts
-var import_node_crypto2 = require("crypto");
-var import_promises11 = require("fs/promises");
-var import_node_path14 = __toESM(require("path"), 1);
+var import_node_crypto3 = require("crypto");
+var import_promises12 = require("fs/promises");
+var import_node_path15 = __toESM(require("path"), 1);
 // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
 var Node = class {
@@ -5380,7 +6445,12 @@ async function runBatchEvaluation(options) {
     const promptInputs = promptInputsList[i];
     const providerResponse = batchResponse[i];
     const outputMessages = providerResponse.outputMessages;
-    const traceSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
+    const baseSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
+    const traceSummary = baseSummary ? mergeExecutionMetrics(baseSummary, {
+      tokenUsage: providerResponse.tokenUsage,
+      costUsd: providerResponse.costUsd,
+      durationMs: providerResponse.durationMs
+    }) : void 0;
     const candidate = extractLastAssistantContent(outputMessages);
     let result;
     try {
@@ -5501,7 +6571,12 @@ async function runEvalCase(options) {
     await cache.set(cacheKey, providerResponse);
   }
   const outputMessages = providerResponse.outputMessages;
-  const traceSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
+  const baseSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
+  const traceSummary = baseSummary ? mergeExecutionMetrics(baseSummary, {
+    tokenUsage: providerResponse.tokenUsage,
+    costUsd: providerResponse.costUsd,
+    durationMs: providerResponse.durationMs
+  }) : void 0;
   const candidate = extractLastAssistantContent(outputMessages);
   try {
     return await evaluateCandidate({
@@ -5574,21 +6649,21 @@ async function evaluateCandidate(options) {
   }
   return {
     timestamp: completedAt.toISOString(),
-    eval_id: evalCase.id,
+    evalId: evalCase.id,
     dataset: evalCase.dataset,
-    conversation_id: evalCase.conversation_id,
+    conversationId: evalCase.conversation_id,
     score: score.score,
     hits: score.hits,
     misses: score.misses,
-    candidate_answer: candidate,
+    candidateAnswer: candidate,
     target: target.name,
     reasoning: score.reasoning,
-    raw_aspects: score.rawAspects,
-    agent_provider_request: agentProviderRequest,
-    lm_provider_request: lmProviderRequest,
-    evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
-    evaluator_results: evaluatorResults,
-    trace_summary: traceSummary
+    rawAspects: score.rawAspects,
+    agentProviderRequest,
+    lmProviderRequest,
+    evaluatorProviderRequest: evaluatorResults ? void 0 : score.evaluatorRawRequest,
+    evaluatorResults,
+    traceSummary
   };
 }
 async function runEvaluatorsForCase(options) {
@@ -5686,7 +6761,7 @@ async function runEvaluatorList(options) {
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
-          evaluator_provider_request: score2.evaluatorRawRequest
+          evaluatorProviderRequest: score2.evaluatorRawRequest
         });
       }
       if (evaluator.type === "code") {
@@ -5717,11 +6792,11 @@ async function runEvaluatorList(options) {
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
-          evaluator_provider_request: score2.evaluatorRawRequest
+          evaluatorProviderRequest: score2.evaluatorRawRequest
         });
       }
       if (evaluator.type === "composite") {
-        const evalFileDir = evalCase.guideline_paths[0] ? import_node_path14.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
+        const evalFileDir = evalCase.guideline_paths[0] ? import_node_path15.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
         const createEvaluator = (memberConfig) => {
           switch (memberConfig.type) {
             case "llm_judge":
@@ -5774,8 +6849,8 @@ async function runEvaluatorList(options) {
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
-          evaluator_provider_request: score2.evaluatorRawRequest,
-          evaluator_results: mapChildResults(score2.evaluatorResults)
+          evaluatorProviderRequest: score2.evaluatorRawRequest,
+          evaluatorResults: mapChildResults(score2.evaluatorResults)
         });
       }
       if (evaluator.type === "tool_trajectory") {
@@ -5933,22 +7008,22 @@ function buildEvaluatorRegistry(overrides, resolveJudgeProvider) {
 async function dumpPrompt(directory, evalCase, promptInputs) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
   const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
-  const filePath = import_node_path14.default.resolve(directory, filename);
-  await (0, import_promises11.mkdir)(import_node_path14.default.dirname(filePath), { recursive: true });
+  const filePath = import_node_path15.default.resolve(directory, filename);
+  await (0, import_promises12.mkdir)(import_node_path15.default.dirname(filePath), { recursive: true });
   const payload = {
     eval_id: evalCase.id,
     question: promptInputs.question,
     guidelines: promptInputs.guidelines,
     guideline_paths: evalCase.guideline_paths
   };
-  await (0, import_promises11.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
+  await (0, import_promises12.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
 }
 function sanitizeFilename(value) {
   if (!value) {
     return "prompt";
   }
   const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
-  return sanitized.length > 0 ? sanitized : (0, import_node_crypto2.randomUUID)();
+  return sanitized.length > 0 ? sanitized : (0, import_node_crypto3.randomUUID)();
 }
 async function invokeProvider(provider, options) {
   const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
@@ -6005,22 +7080,22 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
   }
   return {
     timestamp: timestamp.toISOString(),
-    eval_id: evalCase.id,
+    evalId: evalCase.id,
     dataset: evalCase.dataset,
-    conversation_id: evalCase.conversation_id,
+    conversationId: evalCase.conversation_id,
     score: 0,
     hits: [],
     misses: [`Error: ${message}`],
-    candidate_answer: `Error occurred: ${message}`,
+    candidateAnswer: `Error occurred: ${message}`,
     target: targetName,
-    raw_aspects: [],
-    agent_provider_request: agentProviderRequest,
-    lm_provider_request: lmProviderRequest,
+    rawAspects: [],
+    agentProviderRequest,
+    lmProviderRequest,
     error: message
   };
 }
 function createCacheKey(provider, target, evalCase, promptInputs) {
-  const hash = (0, import_node_crypto2.createHash)("sha256");
+  const hash = (0, import_node_crypto3.createHash)("sha256");
   hash.update(provider.id);
   hash.update(target.name);
   hash.update(evalCase.id);
@@ -6060,8 +7135,8 @@ function mapChildResults(children) {
     hits: child.hits,
     misses: child.misses,
     reasoning: child.reasoning,
-    evaluator_provider_request: child.evaluatorRawRequest,
-    evaluator_results: mapChildResults(child.evaluatorResults)
+    evaluatorProviderRequest: child.evaluatorRawRequest,
+    evaluatorResults: mapChildResults(child.evaluatorResults)
   }));
 }
 function computeWeightedMean(entries) {
@@ -6163,17 +7238,21 @@ function createAgentKernel() {
 0 && (module.exports = {
   CodeEvaluator,
   CompositeEvaluator,
+  DEFAULT_EXPLORATION_TOOLS,
   LlmJudgeEvaluator,
   TEST_MESSAGE_ROLES,
   ToolTrajectoryEvaluator,
+  avgToolDurationMs,
   buildDirectoryChain,
   buildPromptInputs,
   buildSearchRoots,
   computeTraceSummary,
   consumeCodexLogEntries,
+  consumePiLogEntries,
   createAgentKernel,
   createProvider,
   ensureVSCodeSubagents,
+  explorationRatio,
   extractCodeBlocks,
   fileExists,
   findGitRoot,
@@ -6187,6 +7266,7 @@ function createAgentKernel() {
   isTestMessageRole,
   listTargetNames,
   loadEvalCases,
+  mergeExecutionMetrics,
   normalizeLineEndings,
   readJsonFile,
   readTargetDefinitions,
@@ -6197,6 +7277,8 @@ function createAgentKernel() {
   resolveTargetDefinition,
   runEvalCase,
   runEvaluation,
-  subscribeToCodexLogEntries
+  subscribeToCodexLogEntries,
+  subscribeToPiLogEntries,
+  tokensPerTool
 });
 //# sourceMappingURL=index.cjs.map