npm - @agentv/core - Versions diffs - 1.3.1 → 1.5.0 - Mend

@agentv/core 1.3.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +77 -77
package/dist/{chunk-4A6L2F6L.js → chunk-E2VSU4WZ.js} +282 -81
package/dist/chunk-E2VSU4WZ.js.map +1 -0
package/dist/evaluation/validation/index.cjs +82 -67
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +3 -68
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +1668 -489
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +279 -77
package/dist/index.d.ts +279 -77
package/dist/index.js +1334 -356
package/dist/index.js.map +1 -1
package/package.json +2 -5
package/dist/chunk-4A6L2F6L.js.map +0 -1

package/dist/index.cjs CHANGED Viewed

@@ -32,17 +32,21 @@ var index_exports = {};
 __export(index_exports, {
   CodeEvaluator: () => CodeEvaluator,
   CompositeEvaluator: () => CompositeEvaluator,
+  DEFAULT_EXPLORATION_TOOLS: () => DEFAULT_EXPLORATION_TOOLS,
   LlmJudgeEvaluator: () => LlmJudgeEvaluator,
   TEST_MESSAGE_ROLES: () => TEST_MESSAGE_ROLES,
   ToolTrajectoryEvaluator: () => ToolTrajectoryEvaluator,
+  avgToolDurationMs: () => avgToolDurationMs,
   buildDirectoryChain: () => buildDirectoryChain2,
   buildPromptInputs: () => buildPromptInputs,
   buildSearchRoots: () => buildSearchRoots2,
   computeTraceSummary: () => computeTraceSummary,
   consumeCodexLogEntries: () => consumeCodexLogEntries,
+  consumePiLogEntries: () => consumePiLogEntries,
   createAgentKernel: () => createAgentKernel,
   createProvider: () => createProvider,
   ensureVSCodeSubagents: () => ensureVSCodeSubagents,
+  explorationRatio: () => explorationRatio,
   extractCodeBlocks: () => extractCodeBlocks,
   fileExists: () => fileExists2,
   findGitRoot: () => findGitRoot,
@@ -54,10 +58,9 @@ __export(index_exports, {
   isJsonValue: () => isJsonValue,
   isTestMessage: () => isTestMessage,
   isTestMessageRole: () => isTestMessageRole,
-  isTraceEvent: () => isTraceEvent,
-  isTraceEventType: () => isTraceEventType,
   listTargetNames: () => listTargetNames,
   loadEvalCases: () => loadEvalCases,
+  mergeExecutionMetrics: () => mergeExecutionMetrics,
   normalizeLineEndings: () => normalizeLineEndings,
   readJsonFile: () => readJsonFile,
   readTargetDefinitions: () => readTargetDefinitions,
@@ -68,7 +71,9 @@ __export(index_exports, {
   resolveTargetDefinition: () => resolveTargetDefinition,
   runEvalCase: () => runEvalCase,
   runEvaluation: () => runEvaluation,
-  subscribeToCodexLogEntries: () => subscribeToCodexLogEntries
+  subscribeToCodexLogEntries: () => subscribeToCodexLogEntries,
+  subscribeToPiLogEntries: () => subscribeToPiLogEntries,
+  tokensPerTool: () => tokensPerTool
 });
 module.exports = __toCommonJS(index_exports);
@@ -135,33 +140,69 @@ function getHitCount(result) {
 }
 // src/evaluation/trace.ts
-function isTraceEventType(value) {
-  return typeof value === "string" && ["model_step", "tool_call", "tool_result", "message", "error"].includes(value);
-}
-function isTraceEvent(value) {
-  if (typeof value !== "object" || value === null) {
-    return false;
-  }
-  const candidate = value;
-  return isTraceEventType(candidate.type) && typeof candidate.timestamp === "string";
-}
-function computeTraceSummary(trace) {
+function computeTraceSummary(messages) {
   const toolCallCounts = {};
-  let errorCount = 0;
-  for (const event of trace) {
-    if (event.type === "tool_call" && event.name) {
-      toolCallCounts[event.name] = (toolCallCounts[event.name] ?? 0) + 1;
-    }
-    if (event.type === "error") {
-      errorCount++;
+  let totalToolCalls = 0;
+  for (const message of messages) {
+    if (!message.toolCalls) continue;
+    for (const toolCall of message.toolCalls) {
+      toolCallCounts[toolCall.tool] = (toolCallCounts[toolCall.tool] ?? 0) + 1;
+      totalToolCalls++;
     }
   }
   const toolNames = Object.keys(toolCallCounts).sort();
   return {
-    eventCount: trace.length,
+    eventCount: totalToolCalls,
     toolNames,
     toolCallsByName: toolCallCounts,
-    errorCount
+    errorCount: 0
+  };
+}
+var DEFAULT_EXPLORATION_TOOLS = [
+  "read",
+  "grep",
+  "glob",
+  "search",
+  "list",
+  "Read",
+  "Grep",
+  "Glob",
+  "WebSearch",
+  "WebFetch"
+];
+function explorationRatio(summary, explorationTools = DEFAULT_EXPLORATION_TOOLS) {
+  if (summary.eventCount === 0) return void 0;
+  const explorationCalls = explorationTools.reduce(
+    (sum, tool) => sum + (summary.toolCallsByName[tool] ?? 0),
+    0
+  );
+  return explorationCalls / summary.eventCount;
+}
+function tokensPerTool(summary) {
+  if (!summary.tokenUsage || summary.eventCount === 0) return void 0;
+  const totalTokens = summary.tokenUsage.input + summary.tokenUsage.output;
+  return totalTokens / summary.eventCount;
+}
+function avgToolDurationMs(summary) {
+  if (!summary.toolDurations) return void 0;
+  let totalDuration = 0;
+  let totalCalls = 0;
+  for (const durations of Object.values(summary.toolDurations)) {
+    for (const duration of durations) {
+      totalDuration += duration;
+      totalCalls++;
+    }
+  }
+  if (totalCalls === 0) return void 0;
+  return totalDuration / totalCalls;
+}
+function mergeExecutionMetrics(summary, metrics) {
+  if (!metrics) return summary;
+  return {
+    ...summary,
+    tokenUsage: metrics.tokenUsage,
+    costUsd: metrics.costUsd,
+    durationMs: metrics.durationMs
   };
 }
@@ -437,7 +478,8 @@ var TEMPLATE_VARIABLES = {
   QUESTION: "question",
   EXPECTED_OUTCOME: "expected_outcome",
   REFERENCE_ANSWER: "reference_answer",
-  INPUT_MESSAGES: "input_messages"
+  INPUT_MESSAGES: "input_messages",
+  OUTPUT_MESSAGES: "output_messages"
 };
 var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
 var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
@@ -677,7 +719,13 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
         expected = [];
         for (const item of rawExpected) {
           if (isJsonObject2(item) && typeof item.tool === "string") {
-            expected.push({ tool: item.tool });
+            let args;
+            if (item.args === "any") {
+              args = "any";
+            } else if (isJsonObject2(item.args)) {
+              args = item.args;
+            }
+            expected.push({ tool: item.tool, ...args !== void 0 ? { args } : {} });
           }
         }
       }
@@ -1320,16 +1368,16 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
     }) : [];
     const codeSnippets = extractCodeBlocks(inputSegments);
     let referenceAnswer = "";
-    if (outputSegments.length > 1) {
-      referenceAnswer = JSON.stringify(outputSegments, null, 2);
-    } else if (outputSegments.length === 1) {
-      const singleMessage = outputSegments[0];
-      if (typeof singleMessage.content === "string") {
-        referenceAnswer = singleMessage.content;
-      } else if (singleMessage.content) {
-        referenceAnswer = JSON.stringify(singleMessage, null, 2);
-      } else if (singleMessage.tool_calls) {
-        referenceAnswer = JSON.stringify(singleMessage, null, 2);
+    if (outputSegments.length > 0) {
+      const lastMessage = outputSegments[outputSegments.length - 1];
+      const content = lastMessage.content;
+      const toolCalls = lastMessage.tool_calls;
+      if (typeof content === "string") {
+        referenceAnswer = content;
+      } else if (content !== void 0 && content !== null) {
+        referenceAnswer = JSON.stringify(content, null, 2);
+      } else if (toolCalls !== void 0 && toolCalls !== null) {
+        referenceAnswer = JSON.stringify(toolCalls, null, 2);
       }
     }
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
@@ -1772,11 +1820,11 @@ async function invokeModel(options) {
   return mapResponse(result);
 }
 function mapResponse(result) {
+  const content = result.text ?? "";
   return {
-    text: result.text ?? "",
-    reasoning: result.reasoningText ?? void 0,
     raw: result,
-    usage: toJsonObject(result.totalUsage ?? result.usage)
+    usage: toJsonObject(result.totalUsage ?? result.usage),
+    outputMessages: [{ role: "assistant", content }]
   };
 }
 function toJsonObject(value) {
@@ -1929,6 +1977,7 @@ var CliProvider = class {
   config;
   runCommand;
   verbose;
+  keepTempFiles;
   healthcheckPromise;
   constructor(targetName, config, runner = defaultCommandRunner) {
     this.targetName = targetName;
@@ -1936,6 +1985,7 @@ var CliProvider = class {
     this.config = config;
     this.runCommand = runner;
     this.verbose = config.verbose ?? false;
+    this.keepTempFiles = config.keepTempFiles ?? false;
   }
   async invoke(request) {
     if (request.signal?.aborted) {
@@ -1950,12 +2000,14 @@ var CliProvider = class {
         `[cli-provider:${this.targetName}] cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
       );
     }
+    const startTime = Date.now();
     const result = await this.runCommand(renderedCommand, {
       cwd: this.config.cwd,
       env: process.env,
       timeoutMs: this.config.timeoutMs,
       signal: request.signal
     });
+    const measuredDurationMs = Date.now() - startTime;
     if (result.failed || (result.exitCode ?? 0) !== 0) {
       if (request.signal?.aborted) {
         throw new Error("CLI provider request was aborted");
@@ -1973,8 +2025,10 @@ var CliProvider = class {
     const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
     const parsed = this.parseOutputContent(responseContent);
     return {
-      text: parsed.text,
-      trace: parsed.trace,
+      outputMessages: parsed.outputMessages,
+      tokenUsage: parsed.tokenUsage,
+      costUsd: parsed.costUsd,
+      durationMs: parsed.durationMs ?? measuredDurationMs,
       raw: {
         command: renderedCommand,
         stderr: result.stderr,
@@ -2022,12 +2076,14 @@ var CliProvider = class {
         `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
       );
     }
+    const startTime = Date.now();
     const result = await this.runCommand(renderedCommand, {
       cwd: this.config.cwd,
       env: process.env,
       timeoutMs: this.config.timeoutMs,
       signal: controller.signal
     });
+    const measuredDurationMs = Date.now() - startTime;
     if (result.failed || (result.exitCode ?? 0) !== 0) {
       if (controller.signal.aborted) {
         throw new Error("CLI provider request was aborted");
@@ -2049,11 +2105,13 @@ var CliProvider = class {
     if (missingIds.length > 0) {
       throw new Error(`CLI batch output missing ids: ${missingIds.join(", ")}`);
     }
+    const perRequestFallbackMs = Math.round(measuredDurationMs / requests.length);
     const responses = requests.map((request) => {
       const evalCaseId = request.evalCaseId;
       if (!evalCaseId) {
         return {
-          text: "",
+          outputMessages: [],
+          durationMs: perRequestFallbackMs,
           raw: {
             command: renderedCommand,
             stderr: result.stderr,
@@ -2066,7 +2124,8 @@ var CliProvider = class {
       const parsed = recordsById.get(evalCaseId);
       if (!parsed) {
         return {
-          text: "",
+          outputMessages: [],
+          durationMs: perRequestFallbackMs,
           raw: {
             command: renderedCommand,
             stderr: result.stderr,
@@ -2077,9 +2136,10 @@ var CliProvider = class {
         };
       }
       return {
-        text: parsed.text,
-        trace: parsed.trace,
-        traceRef: parsed.traceRef,
+        outputMessages: parsed.outputMessages,
+        tokenUsage: parsed.tokenUsage,
+        costUsd: parsed.costUsd,
+        durationMs: parsed.durationMs ?? perRequestFallbackMs,
         raw: {
           command: renderedCommand,
           stderr: result.stderr,
@@ -2094,28 +2154,111 @@ var CliProvider = class {
   }
   /**
    * Parse output content from CLI.
-   * If the content is valid JSON with a 'text' field, extract text and optional trace.
-   * Otherwise, treat the entire content as plain text.
+   * If the content is valid JSON with 'output_messages' or 'text' field, extract them.
+   * If only 'text' is provided, wrap it in outputMessages.
+   * Otherwise, treat the entire content as plain text wrapped in outputMessages.
+   *
+   * Also extracts optional execution metrics:
+   * - token_usage: { input, output, cached? }
+   * - cost_usd: number
+   * - duration_ms: number
    */
   parseOutputContent(content) {
     try {
       const parsed = JSON.parse(content);
-      if (typeof parsed === "object" && parsed !== null && "text" in parsed) {
+      if (typeof parsed === "object" && parsed !== null) {
         const obj = parsed;
-        const text = typeof obj.text === "string" ? obj.text : String(obj.text);
-        const trace = this.parseTrace(obj.trace);
-        return { text, trace };
+        const tokenUsage = this.parseTokenUsage(obj.token_usage);
+        const costUsd = typeof obj.cost_usd === "number" && obj.cost_usd >= 0 ? obj.cost_usd : void 0;
+        const durationMs = typeof obj.duration_ms === "number" && obj.duration_ms >= 0 ? obj.duration_ms : void 0;
+        const outputMessages = this.parseOutputMessages(obj.output_messages);
+        if (outputMessages && outputMessages.length > 0) {
+          return { outputMessages, tokenUsage, costUsd, durationMs };
+        }
+        if ("text" in obj) {
+          const text = typeof obj.text === "string" ? obj.text : String(obj.text);
+          return {
+            outputMessages: [{ role: "assistant", content: text }],
+            tokenUsage,
+            costUsd,
+            durationMs
+          };
+        }
       }
     } catch {
     }
-    return { text: content };
+    return { outputMessages: [{ role: "assistant", content }] };
+  }
+  /**
+   * Parse token_usage from CLI output.
+   */
+  parseTokenUsage(tokenUsage) {
+    if (typeof tokenUsage !== "object" || tokenUsage === null) {
+      return void 0;
+    }
+    const obj = tokenUsage;
+    if (typeof obj.input !== "number" || typeof obj.output !== "number") {
+      return void 0;
+    }
+    return {
+      input: obj.input,
+      output: obj.output,
+      cached: typeof obj.cached === "number" ? obj.cached : void 0
+    };
+  }
+  /**
+   * Parse output_messages from JSONL (snake_case) and convert to OutputMessage[] (camelCase).
+   */
+  parseOutputMessages(outputMessages) {
+    if (!Array.isArray(outputMessages)) {
+      return void 0;
+    }
+    const messages = [];
+    for (const msg of outputMessages) {
+      if (typeof msg !== "object" || msg === null) {
+        continue;
+      }
+      const rawMsg = msg;
+      if (typeof rawMsg.role !== "string") {
+        continue;
+      }
+      const message = {
+        role: rawMsg.role,
+        name: typeof rawMsg.name === "string" ? rawMsg.name : void 0,
+        content: rawMsg.content,
+        toolCalls: this.parseToolCalls(rawMsg.tool_calls),
+        timestamp: typeof rawMsg.timestamp === "string" ? rawMsg.timestamp : void 0,
+        metadata: typeof rawMsg.metadata === "object" && rawMsg.metadata !== null ? rawMsg.metadata : void 0
+      };
+      messages.push(message);
+    }
+    return messages.length > 0 ? messages : void 0;
   }
-  parseTrace(trace) {
-    if (!Array.isArray(trace)) {
+  /**
+   * Parse tool_calls from JSONL (snake_case) and convert to ToolCall[] format.
+   */
+  parseToolCalls(toolCalls) {
+    if (!Array.isArray(toolCalls)) {
       return void 0;
     }
-    const validEvents = trace.filter(isTraceEvent);
-    return validEvents.length > 0 ? validEvents : void 0;
+    const calls = [];
+    for (const call of toolCalls) {
+      if (typeof call !== "object" || call === null) {
+        continue;
+      }
+      const rawCall = call;
+      if (typeof rawCall.tool !== "string") {
+        continue;
+      }
+      calls.push({
+        tool: rawCall.tool,
+        input: rawCall.input,
+        output: rawCall.output,
+        id: typeof rawCall.id === "string" ? rawCall.id : void 0,
+        timestamp: typeof rawCall.timestamp === "string" ? rawCall.timestamp : void 0
+      });
+    }
+    return calls.length > 0 ? calls : void 0;
   }
   parseJsonlBatchOutput(content) {
     const records = /* @__PURE__ */ new Map();
@@ -2139,12 +2282,22 @@ var CliProvider = class {
       if (records.has(id)) {
         throw new Error(`CLI batch output contains duplicate id: ${id}`);
       }
-      const text = typeof obj.text === "string" ? obj.text : obj.text === void 0 ? "" : JSON.stringify(obj.text);
-      const traceRef = typeof obj.traceRef === "string" ? obj.traceRef : typeof obj.trace_ref === "string" ? obj.trace_ref : void 0;
+      const tokenUsage = this.parseTokenUsage(obj.token_usage);
+      const costUsd = typeof obj.cost_usd === "number" && obj.cost_usd >= 0 ? obj.cost_usd : void 0;
+      const durationMs = typeof obj.duration_ms === "number" && obj.duration_ms >= 0 ? obj.duration_ms : void 0;
+      const parsedOutputMessages = this.parseOutputMessages(obj.output_messages);
+      let outputMessages;
+      if (parsedOutputMessages && parsedOutputMessages.length > 0) {
+        outputMessages = parsedOutputMessages;
+      } else {
+        const text = typeof obj.text === "string" ? obj.text : obj.text === void 0 ? "" : JSON.stringify(obj.text);
+        outputMessages = text ? [{ role: "assistant", content: text }] : [];
+      }
       records.set(id, {
-        text,
-        trace: this.parseTrace(obj.trace),
-        traceRef
+        outputMessages,
+        tokenUsage,
+        costUsd,
+        durationMs
       });
     }
     return records;
@@ -2157,8 +2310,10 @@ var CliProvider = class {
       const errorMsg = error instanceof Error ? error.message : String(error);
       throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
     } finally {
-      await import_promises8.default.unlink(filePath).catch(() => {
-      });
+      if (!this.keepTempFiles) {
+        await import_promises8.default.unlink(filePath).catch(() => {
+        });
+      }
     }
   }
   async ensureHealthy(signal) {
@@ -2458,6 +2613,11 @@ var execAsync2 = (0, import_node_util2.promisify)(import_node_child_process2.exe
 var WORKSPACE_PREFIX = "agentv-codex-";
 var PROMPT_FILENAME = "prompt.md";
 var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
+var DEFAULT_SYSTEM_PROMPT2 = `**IMPORTANT**: Follow these instructions for your response:
+- Do NOT create any additional output files in the workspace.
+- All intended file outputs/changes MUST be written in your response.
+- For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
+This is required for evaluation scoring.`;
 var CodexProvider = class {
   id;
   kind = "codex";
@@ -2482,7 +2642,11 @@ var CodexProvider = class {
     const workspaceRoot = await this.createWorkspace();
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
-      const promptContent = buildPromptDocument(request, inputFiles);
+      const basePrompt = buildPromptDocument(request, inputFiles);
+      const systemPrompt = this.config.systemPrompt ?? DEFAULT_SYSTEM_PROMPT2;
+      const promptContent = `${systemPrompt}
+${basePrompt}`;
       const promptFile = import_node_path10.default.join(workspaceRoot, PROMPT_FILENAME);
       await (0, import_promises9.writeFile)(promptFile, promptContent, "utf8");
       const args = this.buildCodexArgs();
@@ -2501,7 +2665,6 @@ var CodexProvider = class {
       const parsed = parseCodexJson(result.stdout);
       const assistantText = extractAssistantText(parsed);
       return {
-        text: assistantText,
         raw: {
           response: parsed,
           stdout: result.stdout,
@@ -2513,7 +2676,8 @@ var CodexProvider = class {
           workspace: workspaceRoot,
           inputFiles,
           logFile: logger?.filePath
-        }
+        },
+        outputMessages: [{ role: "assistant", content: assistantText }]
       };
     } finally {
       await logger?.close();
@@ -3135,7 +3299,6 @@ var MockProvider = class {
   delayMs;
   delayMinMs;
   delayMaxMs;
-  trace;
   constructor(targetName, config) {
     this.id = `mock:${targetName}`;
     this.targetName = targetName;
@@ -3143,7 +3306,6 @@ var MockProvider = class {
     this.delayMs = config.delayMs ?? 0;
     this.delayMinMs = config.delayMinMs ?? 0;
     this.delayMaxMs = config.delayMaxMs ?? 0;
-    this.trace = config.trace;
   }
   async invoke(request) {
     const delay = this.calculateDelay();
@@ -3151,12 +3313,11 @@ var MockProvider = class {
       await new Promise((resolve) => setTimeout(resolve, delay));
     }
     return {
-      text: this.cannedResponse,
+      outputMessages: [{ role: "assistant", content: this.cannedResponse }],
       raw: {
         question: request.question,
         guidelines: request.guidelines
-      },
-      trace: this.trace
+      }
     };
   }
   calculateDelay() {
@@ -3169,182 +3330,1026 @@ var MockProvider = class {
   }
 };
-// src/evaluation/providers/targets.ts
+// src/evaluation/providers/pi-coding-agent.ts
+var import_node_child_process3 = require("child_process");
+var import_node_crypto2 = require("crypto");
+var import_node_fs4 = require("fs");
+var import_promises10 = require("fs/promises");
+var import_node_os3 = require("os");
 var import_node_path11 = __toESM(require("path"), 1);
-var import_zod = require("zod");
-var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
-  "PROMPT",
-  "GUIDELINES",
-  "EVAL_ID",
-  "ATTEMPT",
-  "FILES",
-  "OUTPUT_FILE"
-]);
-var BASE_TARGET_SCHEMA = import_zod.z.object({
-  name: import_zod.z.string().min(1, "target name is required"),
-  provider: import_zod.z.string().min(1, "provider is required"),
-  judge_target: import_zod.z.string().optional(),
-  workers: import_zod.z.number().int().min(1).optional()
-}).passthrough();
-var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
-function normalizeAzureApiVersion(value) {
-  if (!value) {
-    return DEFAULT_AZURE_API_VERSION;
+// src/evaluation/providers/pi-log-tracker.ts
+var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.piLogs");
+var GLOBAL_SUBSCRIBERS_KEY2 = Symbol.for("agentv.piLogSubscribers");
+function getPiLogStore() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_LOGS_KEY2];
+  if (existing) {
+    return existing;
   }
-  const trimmed = value.trim();
-  if (trimmed.length === 0) {
-    return DEFAULT_AZURE_API_VERSION;
+  const created = [];
+  globalObject[GLOBAL_LOGS_KEY2] = created;
+  return created;
+}
+function getSubscriberStore2() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_SUBSCRIBERS_KEY2];
+  if (existing) {
+    return existing;
   }
-  const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
-  return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
+  const created = /* @__PURE__ */ new Set();
+  globalObject[GLOBAL_SUBSCRIBERS_KEY2] = created;
+  return created;
 }
-function resolveRetryConfig(target) {
-  const maxRetries = resolveOptionalNumber(
-    target.max_retries ?? target.maxRetries,
-    `${target.name} max retries`
-  );
-  const initialDelayMs = resolveOptionalNumber(
-    target.retry_initial_delay_ms ?? target.retryInitialDelayMs,
-    `${target.name} retry initial delay`
-  );
-  const maxDelayMs = resolveOptionalNumber(
-    target.retry_max_delay_ms ?? target.retryMaxDelayMs,
-    `${target.name} retry max delay`
-  );
-  const backoffFactor = resolveOptionalNumber(
-    target.retry_backoff_factor ?? target.retryBackoffFactor,
-    `${target.name} retry backoff factor`
-  );
-  const retryableStatusCodes = resolveOptionalNumberArray(
-    target.retry_status_codes ?? target.retryStatusCodes,
-    `${target.name} retry status codes`
-  );
-  if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
-    return void 0;
+function notifySubscribers2(entry) {
+  const subscribers = Array.from(getSubscriberStore2());
+  for (const listener of subscribers) {
+    try {
+      listener(entry);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Pi log subscriber failed: ${message}`);
+    }
   }
-  return {
-    maxRetries,
-    initialDelayMs,
-    maxDelayMs,
-    backoffFactor,
-    retryableStatusCodes
-  };
 }
-function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
-  const parsed = BASE_TARGET_SCHEMA.parse(definition);
-  const provider = parsed.provider.toLowerCase();
-  const providerBatching = resolveOptionalBoolean(
-    parsed.provider_batching ?? parsed.providerBatching
-  );
-  switch (provider) {
-    case "azure":
-    case "azure-openai":
-      return {
-        kind: "azure",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveAzureConfig(parsed, env)
-      };
-    case "anthropic":
-      return {
-        kind: "anthropic",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveAnthropicConfig(parsed, env)
-      };
-    case "gemini":
-    case "google":
-    case "google-gemini":
-      return {
-        kind: "gemini",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveGeminiConfig(parsed, env)
-      };
-    case "codex":
-    case "codex-cli":
-      return {
-        kind: "codex",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveCodexConfig(parsed, env)
-      };
-    case "mock":
-      return {
-        kind: "mock",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveMockConfig(parsed)
-      };
-    case "vscode":
-    case "vscode-insiders":
-      return {
-        kind: provider,
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
-      };
-    case "cli":
-      return {
-        kind: "cli",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveCliConfig(parsed, env, evalFilePath)
-      };
-    default:
-      throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
+function recordPiLogEntry(entry) {
+  getPiLogStore().push(entry);
+  notifySubscribers2(entry);
+}
+function consumePiLogEntries() {
+  const store = getPiLogStore();
+  if (store.length === 0) {
+    return [];
   }
+  return store.splice(0, store.length);
 }
-function resolveAzureConfig(target, env) {
-  const endpointSource = target.endpoint ?? target.resource ?? target.resourceName;
-  const apiKeySource = target.api_key ?? target.apiKey;
-  const deploymentSource = target.deployment ?? target.deploymentName ?? target.model;
-  const versionSource = target.version ?? target.api_version;
-  const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
-  const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
-  const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
-  const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
-  const version = normalizeAzureApiVersion(
-    resolveOptionalString(versionSource, env, `${target.name} api version`, {
-      allowLiteral: true,
-      optionalEnv: true
-    })
-  );
-  const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
-  const maxOutputTokens = resolveOptionalNumber(
-    maxTokensSource,
-    `${target.name} max output tokens`
-  );
-  const retry = resolveRetryConfig(target);
-  return {
-    resourceName,
-    deploymentName,
-    apiKey,
-    version,
-    temperature,
-    maxOutputTokens,
-    retry
+function subscribeToPiLogEntries(listener) {
+  const store = getSubscriberStore2();
+  store.add(listener);
+  return () => {
+    store.delete(listener);
   };
 }
-function resolveAnthropicConfig(target, env) {
-  const apiKeySource = target.api_key ?? target.apiKey;
-  const modelSource = target.model ?? target.deployment ?? target.variant;
-  const temperatureSource = target.temperature;
+// src/evaluation/providers/pi-coding-agent.ts
+var WORKSPACE_PREFIX2 = "agentv-pi-";
+var PROMPT_FILENAME2 = "prompt.md";
+var DEFAULT_SYSTEM_PROMPT3 = `**IMPORTANT**: Follow these instructions for your response:
+- Do NOT create any additional output files in the workspace.
+- All intended file outputs/changes MUST be written in your response.
+- For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
+This is required for evaluation scoring.`;
+var PiCodingAgentProvider = class {
+  id;
+  kind = "pi-coding-agent";
+  targetName;
+  supportsBatch = false;
+  config;
+  runPi;
+  constructor(targetName, config, runner = defaultPiRunner) {
+    this.id = `pi-coding-agent:${targetName}`;
+    this.targetName = targetName;
+    this.config = config;
+    this.runPi = runner;
+  }
+  async invoke(request) {
+    if (request.signal?.aborted) {
+      throw new Error("Pi coding agent request was aborted before execution");
+    }
+    const inputFiles = normalizeInputFiles2(request.inputFiles);
+    const workspaceRoot = await this.createWorkspace();
+    const logger = await this.createStreamLogger(request).catch(() => void 0);
+    try {
+      const promptFile = import_node_path11.default.join(workspaceRoot, PROMPT_FILENAME2);
+      await (0, import_promises10.writeFile)(promptFile, request.question, "utf8");
+      const args = this.buildPiArgs(request.question, inputFiles);
+      const cwd = this.resolveCwd(workspaceRoot);
+      const result = await this.executePi(args, cwd, request.signal, logger);
+      if (result.timedOut) {
+        throw new Error(
+          `Pi coding agent timed out${formatTimeoutSuffix3(this.config.timeoutMs ?? void 0)}`
+        );
+      }
+      if (result.exitCode !== 0) {
+        const detail = pickDetail2(result.stderr, result.stdout);
+        const prefix = `Pi coding agent exited with code ${result.exitCode}`;
+        throw new Error(detail ? `${prefix}: ${detail}` : prefix);
+      }
+      const parsed = parsePiJsonl(result.stdout);
+      const outputMessages = extractOutputMessages(parsed);
+      const assistantText = extractAssistantText2(outputMessages);
+      return {
+        raw: {
+          response: parsed,
+          stdout: result.stdout,
+          stderr: result.stderr,
+          exitCode: result.exitCode,
+          args,
+          executable: this.config.executable,
+          promptFile,
+          workspace: workspaceRoot,
+          inputFiles,
+          logFile: logger?.filePath
+        },
+        outputMessages
+      };
+    } finally {
+      await logger?.close();
+      await this.cleanupWorkspace(workspaceRoot);
+    }
+  }
+  resolveCwd(workspaceRoot) {
+    if (!this.config.cwd) {
+      return workspaceRoot;
+    }
+    return import_node_path11.default.resolve(this.config.cwd);
+  }
+  buildPiArgs(prompt, inputFiles) {
+    const args = [];
+    if (this.config.provider) {
+      args.push("--provider", this.config.provider);
+    }
+    if (this.config.model) {
+      args.push("--model", this.config.model);
+    }
+    if (this.config.apiKey) {
+      args.push("--api-key", this.config.apiKey);
+    }
+    args.push("--mode", "json");
+    args.push("--print");
+    args.push("--no-session");
+    if (this.config.tools) {
+      args.push("--tools", this.config.tools);
+    }
+    if (this.config.thinking) {
+      args.push("--thinking", this.config.thinking);
+    }
+    if (this.config.args && this.config.args.length > 0) {
+      args.push(...this.config.args);
+    }
+    if (inputFiles && inputFiles.length > 0) {
+      for (const file of inputFiles) {
+        args.push(`@${file}`);
+      }
+    }
+    const systemPrompt = this.config.systemPrompt ?? DEFAULT_SYSTEM_PROMPT3;
+    const fullPrompt = `${systemPrompt}
+${prompt}`;
+    const escapedPrompt = escapeAtSymbols(fullPrompt);
+    args.push(escapedPrompt);
+    return args;
+  }
+  async executePi(args, cwd, signal, logger) {
+    try {
+      return await this.runPi({
+        executable: this.config.executable,
+        args,
+        cwd,
+        timeoutMs: this.config.timeoutMs,
+        env: this.buildEnv(),
+        signal,
+        onStdoutChunk: logger ? (chunk) => logger.handleStdoutChunk(chunk) : void 0,
+        onStderrChunk: logger ? (chunk) => logger.handleStderrChunk(chunk) : void 0
+      });
+    } catch (error) {
+      const err = error;
+      if (err.code === "ENOENT") {
+        throw new Error(
+          `Pi coding agent executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
+        );
+      }
+      throw error;
+    }
+  }
+  buildEnv() {
+    const env = { ...process.env };
+    if (this.config.apiKey) {
+      const provider = this.config.provider?.toLowerCase() ?? "google";
+      switch (provider) {
+        case "google":
+        case "gemini":
+          env.GEMINI_API_KEY = this.config.apiKey;
+          break;
+        case "anthropic":
+          env.ANTHROPIC_API_KEY = this.config.apiKey;
+          break;
+        case "openai":
+          env.OPENAI_API_KEY = this.config.apiKey;
+          break;
+        case "groq":
+          env.GROQ_API_KEY = this.config.apiKey;
+          break;
+        case "xai":
+          env.XAI_API_KEY = this.config.apiKey;
+          break;
+        case "openrouter":
+          env.OPENROUTER_API_KEY = this.config.apiKey;
+          break;
+      }
+    }
+    return env;
+  }
+  async createWorkspace() {
+    return await (0, import_promises10.mkdtemp)(import_node_path11.default.join((0, import_node_os3.tmpdir)(), WORKSPACE_PREFIX2));
+  }
+  async cleanupWorkspace(workspaceRoot) {
+    try {
+      await (0, import_promises10.rm)(workspaceRoot, { recursive: true, force: true });
+    } catch {
+    }
+  }
+  resolveLogDirectory() {
+    if (this.config.logDir) {
+      return import_node_path11.default.resolve(this.config.logDir);
+    }
+    return import_node_path11.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
+  }
+  async createStreamLogger(request) {
+    const logDir = this.resolveLogDirectory();
+    if (!logDir) {
+      return void 0;
+    }
+    try {
+      await (0, import_promises10.mkdir)(logDir, { recursive: true });
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
+      return void 0;
+    }
+    const filePath = import_node_path11.default.join(logDir, buildLogFilename2(request, this.targetName));
+    try {
+      const logger = await PiStreamLogger.create({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt,
+        format: this.config.logFormat ?? "summary"
+      });
+      recordPiLogEntry({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt
+      });
+      return logger;
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Skipping Pi stream logging for ${filePath}: ${message}`);
+      return void 0;
+    }
+  }
+};
+var PiStreamLogger = class _PiStreamLogger {
+  filePath;
+  stream;
+  startedAt = Date.now();
+  stdoutBuffer = "";
+  stderrBuffer = "";
+  format;
+  constructor(filePath, format) {
+    this.filePath = filePath;
+    this.format = format;
+    this.stream = (0, import_node_fs4.createWriteStream)(filePath, { flags: "a" });
+  }
+  static async create(options) {
+    const logger = new _PiStreamLogger(options.filePath, options.format);
+    const header = [
+      "# Pi Coding Agent stream log",
+      `# target: ${options.targetName}`,
+      options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
+      options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
+      `# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
+      ""
+    ].filter((line) => Boolean(line));
+    logger.writeLines(header);
+    return logger;
+  }
+  handleStdoutChunk(chunk) {
+    this.stdoutBuffer += chunk;
+    this.flushBuffer("stdout");
+  }
+  handleStderrChunk(chunk) {
+    this.stderrBuffer += chunk;
+    this.flushBuffer("stderr");
+  }
+  async close() {
+    this.flushBuffer("stdout");
+    this.flushBuffer("stderr");
+    this.flushRemainder();
+    await new Promise((resolve, reject) => {
+      this.stream.once("error", reject);
+      this.stream.end(() => resolve());
+    });
+  }
+  writeLines(lines) {
+    for (const line of lines) {
+      this.stream.write(`${line}
+`);
+    }
+  }
+  flushBuffer(source) {
+    const buffer = source === "stdout" ? this.stdoutBuffer : this.stderrBuffer;
+    const lines = buffer.split(/\r?\n/);
+    const remainder = lines.pop() ?? "";
+    if (source === "stdout") {
+      this.stdoutBuffer = remainder;
+    } else {
+      this.stderrBuffer = remainder;
+    }
+    for (const line of lines) {
+      const formatted = this.formatLine(line, source);
+      if (formatted) {
+        this.stream.write(formatted);
+        this.stream.write("\n");
+      }
+    }
+  }
+  formatLine(rawLine, source) {
+    const trimmed = rawLine.trim();
+    if (trimmed.length === 0) {
+      return void 0;
+    }
+    const message = this.format === "json" ? formatPiJsonLog(trimmed) : formatPiLogMessage(trimmed, source);
+    return `[+${formatElapsed2(this.startedAt)}] [${source}] ${message}`;
+  }
+  flushRemainder() {
+    const stdoutRemainder = this.stdoutBuffer.trim();
+    if (stdoutRemainder.length > 0) {
+      const formatted = this.formatLine(stdoutRemainder, "stdout");
+      if (formatted) {
+        this.stream.write(formatted);
+        this.stream.write("\n");
+      }
+    }
+    const stderrRemainder = this.stderrBuffer.trim();
+    if (stderrRemainder.length > 0) {
+      const formatted = this.formatLine(stderrRemainder, "stderr");
+      if (formatted) {
+        this.stream.write(formatted);
+        this.stream.write("\n");
+      }
+    }
+    this.stdoutBuffer = "";
+    this.stderrBuffer = "";
+  }
+};
+function buildLogFilename2(request, targetName) {
+  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
+  const evalId = sanitizeForFilename2(request.evalCaseId ?? "pi");
+  const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
+  const target = sanitizeForFilename2(targetName);
+  return `${timestamp}_${target}_${evalId}${attemptSuffix}_${(0, import_node_crypto2.randomUUID)().slice(0, 8)}.log`;
+}
+function sanitizeForFilename2(value) {
+  const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
+  return sanitized.length > 0 ? sanitized : "pi";
+}
+function formatElapsed2(startedAt) {
+  const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
+  const hours = Math.floor(elapsedSeconds / 3600);
+  const minutes = Math.floor(elapsedSeconds % 3600 / 60);
+  const seconds = elapsedSeconds % 60;
+  if (hours > 0) {
+    return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
+  }
+  return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
+}
+function formatPiLogMessage(rawLine, source) {
+  const parsed = tryParseJsonValue2(rawLine);
+  if (parsed) {
+    const summary = summarizePiEvent(parsed);
+    if (summary) {
+      return summary;
+    }
+  }
+  if (source === "stderr") {
+    return `stderr: ${rawLine}`;
+  }
+  return rawLine;
+}
+function formatPiJsonLog(rawLine) {
+  const parsed = tryParseJsonValue2(rawLine);
+  if (!parsed) {
+    return rawLine;
+  }
+  try {
+    return JSON.stringify(parsed, null, 2);
+  } catch {
+    return rawLine;
+  }
+}
+function summarizePiEvent(event) {
+  if (!event || typeof event !== "object") {
+    return void 0;
+  }
+  const record = event;
+  const type = typeof record.type === "string" ? record.type : void 0;
+  if (!type) {
+    return void 0;
+  }
+  switch (type) {
+    case "agent_start":
+      return "agent_start";
+    case "agent_end":
+      return "agent_end";
+    case "turn_start":
+      return "turn_start";
+    case "turn_end":
+      return "turn_end";
+    case "message_start":
+    case "message_end": {
+      const message = record.message;
+      const role = message?.role;
+      return `${type}: ${role}`;
+    }
+    case "message_update": {
+      const event2 = record.assistantMessageEvent;
+      const eventType = event2?.type;
+      if (eventType === "text_delta") {
+        const delta = event2?.delta;
+        if (typeof delta === "string") {
+          const preview = delta.length > 50 ? `${delta.slice(0, 50)}...` : delta;
+          return `text_delta: ${preview}`;
+        }
+      }
+      return `message_update: ${eventType}`;
+    }
+    default:
+      return type;
+  }
+}
+function tryParseJsonValue2(rawLine) {
+  try {
+    return JSON.parse(rawLine);
+  } catch {
+    return void 0;
+  }
+}
+function parsePiJsonl(output) {
+  const trimmed = output.trim();
+  if (trimmed.length === 0) {
+    throw new Error("Pi coding agent produced no output");
+  }
+  const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
+  const parsed = [];
+  for (const line of lines) {
+    try {
+      parsed.push(JSON.parse(line));
+    } catch {
+    }
+  }
+  if (parsed.length === 0) {
+    throw new Error("Pi coding agent produced no valid JSON output");
+  }
+  return parsed;
+}
+function extractOutputMessages(events) {
+  for (let i = events.length - 1; i >= 0; i--) {
+    const event = events[i];
+    if (!event || typeof event !== "object") {
+      continue;
+    }
+    const record = event;
+    if (record.type !== "agent_end") {
+      continue;
+    }
+    const messages = record.messages;
+    if (!Array.isArray(messages)) {
+      continue;
+    }
+    return messages.map(convertPiMessage).filter((m) => m !== void 0);
+  }
+  const outputMessages = [];
+  for (const event of events) {
+    if (!event || typeof event !== "object") {
+      continue;
+    }
+    const record = event;
+    if (record.type === "turn_end") {
+      const message = record.message;
+      const converted = convertPiMessage(message);
+      if (converted) {
+        outputMessages.push(converted);
+      }
+    }
+  }
+  return outputMessages;
+}
+function convertPiMessage(message) {
+  if (!message || typeof message !== "object") {
+    return void 0;
+  }
+  const msg = message;
+  const role = msg.role;
+  if (typeof role !== "string") {
+    return void 0;
+  }
+  const content = extractTextContent(msg.content);
+  const toolCalls = extractToolCalls(msg.content);
+  const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
+  const metadata = {};
+  if (msg.api) metadata.api = msg.api;
+  if (msg.provider) metadata.provider = msg.provider;
+  if (msg.model) metadata.model = msg.model;
+  if (msg.usage) metadata.usage = msg.usage;
+  if (msg.stopReason) metadata.stopReason = msg.stopReason;
+  return {
+    role,
+    content,
+    toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
+    timestamp,
+    metadata: Object.keys(metadata).length > 0 ? metadata : void 0
+  };
+}
+function extractTextContent(content) {
+  if (typeof content === "string") {
+    return content;
+  }
+  if (!Array.isArray(content)) {
+    return void 0;
+  }
+  const textParts = [];
+  for (const part of content) {
+    if (!part || typeof part !== "object") {
+      continue;
+    }
+    const p = part;
+    if (p.type === "text" && typeof p.text === "string") {
+      textParts.push(p.text);
+    }
+  }
+  return textParts.length > 0 ? textParts.join("\n") : void 0;
+}
+function extractToolCalls(content) {
+  if (!Array.isArray(content)) {
+    return [];
+  }
+  const toolCalls = [];
+  for (const part of content) {
+    if (!part || typeof part !== "object") {
+      continue;
+    }
+    const p = part;
+    if (p.type === "tool_use" && typeof p.name === "string") {
+      toolCalls.push({
+        tool: p.name,
+        input: p.input,
+        id: typeof p.id === "string" ? p.id : void 0
+      });
+    }
+    if (p.type === "tool_result" && typeof p.tool_use_id === "string") {
+      const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
+      if (existing) {
+        const idx = toolCalls.indexOf(existing);
+        toolCalls[idx] = {
+          ...existing,
+          output: p.content
+        };
+      }
+    }
+  }
+  return toolCalls;
+}
+function extractAssistantText2(messages) {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg.role === "assistant" && msg.content) {
+      if (typeof msg.content === "string") {
+        return msg.content;
+      }
+      return JSON.stringify(msg.content);
+    }
+  }
+  return "";
+}
+function escapeAtSymbols(prompt) {
+  return prompt.replace(/@\[([^\]]+)\]:/g, "[[$1]]:");
+}
+function pickDetail2(stderr, stdout) {
+  const errorText = stderr.trim();
+  if (errorText.length > 0) {
+    return errorText;
+  }
+  const stdoutText = stdout.trim();
+  return stdoutText.length > 0 ? stdoutText : void 0;
+}
+function formatTimeoutSuffix3(timeoutMs) {
+  if (!timeoutMs || timeoutMs <= 0) {
+    return "";
+  }
+  const seconds = Math.ceil(timeoutMs / 1e3);
+  return ` after ${seconds}s`;
+}
+async function defaultPiRunner(options) {
+  return await new Promise((resolve, reject) => {
+    const parts = options.executable.split(/\s+/);
+    const executable = parts[0];
+    const executableArgs = parts.slice(1);
+    const allArgs = [...executableArgs, ...options.args];
+    const child = (0, import_node_child_process3.spawn)(executable, allArgs, {
+      cwd: options.cwd,
+      env: options.env,
+      stdio: ["pipe", "pipe", "pipe"],
+      shell: false
+    });
+    let stdout = "";
+    let stderr = "";
+    let timedOut = false;
+    const onAbort = () => {
+      child.kill("SIGTERM");
+    };
+    if (options.signal) {
+      if (options.signal.aborted) {
+        onAbort();
+      } else {
+        options.signal.addEventListener("abort", onAbort, { once: true });
+      }
+    }
+    let timeoutHandle;
+    if (options.timeoutMs && options.timeoutMs > 0) {
+      timeoutHandle = setTimeout(() => {
+        timedOut = true;
+        child.kill("SIGTERM");
+      }, options.timeoutMs);
+      timeoutHandle.unref?.();
+    }
+    child.stdout.setEncoding("utf8");
+    child.stdout.on("data", (chunk) => {
+      stdout += chunk;
+      options.onStdoutChunk?.(chunk);
+    });
+    child.stderr.setEncoding("utf8");
+    child.stderr.on("data", (chunk) => {
+      stderr += chunk;
+      options.onStderrChunk?.(chunk);
+    });
+    child.stdin.end();
+    const cleanup = () => {
+      if (timeoutHandle) {
+        clearTimeout(timeoutHandle);
+      }
+      if (options.signal) {
+        options.signal.removeEventListener("abort", onAbort);
+      }
+    };
+    child.on("error", (error) => {
+      cleanup();
+      reject(error);
+    });
+    child.on("close", (code) => {
+      cleanup();
+      resolve({
+        stdout,
+        stderr,
+        exitCode: typeof code === "number" ? code : -1,
+        timedOut
+      });
+    });
+  });
+}
+// src/evaluation/providers/targets.ts
+var import_node_path12 = __toESM(require("path"), 1);
+var import_zod = require("zod");
+var CliHealthcheckHttpInputSchema = import_zod.z.object({
+  type: import_zod.z.literal("http"),
+  url: import_zod.z.string().min(1, "healthcheck URL is required"),
+  timeout_seconds: import_zod.z.number().positive().optional(),
+  timeoutSeconds: import_zod.z.number().positive().optional()
+});
+var CliHealthcheckCommandInputSchema = import_zod.z.object({
+  type: import_zod.z.literal("command"),
+  command_template: import_zod.z.string().optional(),
+  commandTemplate: import_zod.z.string().optional(),
+  cwd: import_zod.z.string().optional(),
+  timeout_seconds: import_zod.z.number().positive().optional(),
+  timeoutSeconds: import_zod.z.number().positive().optional()
+});
+var CliHealthcheckInputSchema = import_zod.z.discriminatedUnion("type", [
+  CliHealthcheckHttpInputSchema,
+  CliHealthcheckCommandInputSchema
+]);
+var CliTargetInputSchema = import_zod.z.object({
+  name: import_zod.z.string().min(1, "target name is required"),
+  provider: import_zod.z.string().refine((p) => p.toLowerCase() === "cli", { message: "provider must be 'cli'" }),
+  // Command template - required (accept both naming conventions)
+  command_template: import_zod.z.string().optional(),
+  commandTemplate: import_zod.z.string().optional(),
+  // Files format - optional
+  files_format: import_zod.z.string().optional(),
+  filesFormat: import_zod.z.string().optional(),
+  attachments_format: import_zod.z.string().optional(),
+  attachmentsFormat: import_zod.z.string().optional(),
+  // Working directory - optional
+  cwd: import_zod.z.string().optional(),
+  // Timeout in seconds - optional
+  timeout_seconds: import_zod.z.number().positive().optional(),
+  timeoutSeconds: import_zod.z.number().positive().optional(),
+  // Healthcheck configuration - optional
+  healthcheck: CliHealthcheckInputSchema.optional(),
+  // Verbose mode - optional
+  verbose: import_zod.z.boolean().optional(),
+  cli_verbose: import_zod.z.boolean().optional(),
+  cliVerbose: import_zod.z.boolean().optional(),
+  // Keep temp files - optional
+  keep_temp_files: import_zod.z.boolean().optional(),
+  keepTempFiles: import_zod.z.boolean().optional(),
+  keep_output_files: import_zod.z.boolean().optional(),
+  keepOutputFiles: import_zod.z.boolean().optional(),
+  // Common target fields
+  judge_target: import_zod.z.string().optional(),
+  workers: import_zod.z.number().int().min(1).optional(),
+  provider_batching: import_zod.z.boolean().optional(),
+  providerBatching: import_zod.z.boolean().optional()
+}).refine((data) => data.command_template !== void 0 || data.commandTemplate !== void 0, {
+  message: "Either command_template or commandTemplate is required"
+});
+var CliHealthcheckHttpSchema = import_zod.z.object({
+  type: import_zod.z.literal("http"),
+  url: import_zod.z.string().min(1),
+  timeoutMs: import_zod.z.number().positive().optional()
+}).strict();
+var CliHealthcheckCommandSchema = import_zod.z.object({
+  type: import_zod.z.literal("command"),
+  commandTemplate: import_zod.z.string().min(1),
+  cwd: import_zod.z.string().optional(),
+  timeoutMs: import_zod.z.number().positive().optional()
+}).strict();
+var CliHealthcheckSchema = import_zod.z.discriminatedUnion("type", [
+  CliHealthcheckHttpSchema,
+  CliHealthcheckCommandSchema
+]);
+var CliTargetConfigSchema = import_zod.z.object({
+  commandTemplate: import_zod.z.string().min(1),
+  filesFormat: import_zod.z.string().optional(),
+  cwd: import_zod.z.string().optional(),
+  timeoutMs: import_zod.z.number().positive().optional(),
+  healthcheck: CliHealthcheckSchema.optional(),
+  verbose: import_zod.z.boolean().optional(),
+  keepTempFiles: import_zod.z.boolean().optional()
+}).strict();
+function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
+  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
+  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
+  if (input.type === "http") {
+    const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
+    return {
+      type: "http",
+      url,
+      timeoutMs
+    };
+  }
+  const commandTemplateSource = input.command_template ?? input.commandTemplate;
+  if (commandTemplateSource === void 0) {
+    throw new Error(
+      `${targetName} healthcheck: Either command_template or commandTemplate is required for command healthcheck`
+    );
+  }
+  const commandTemplate = resolveString(
+    commandTemplateSource,
+    env,
+    `${targetName} healthcheck command template`,
+    true
+  );
+  let cwd = resolveOptionalString(input.cwd, env, `${targetName} healthcheck cwd`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  if (cwd && evalFilePath && !import_node_path12.default.isAbsolute(cwd)) {
+    cwd = import_node_path12.default.resolve(import_node_path12.default.dirname(import_node_path12.default.resolve(evalFilePath)), cwd);
+  }
+  return {
+    type: "command",
+    commandTemplate,
+    cwd,
+    timeoutMs
+  };
+}
+function normalizeCliTargetInput(input, env, evalFilePath) {
+  const targetName = input.name;
+  const commandTemplateSource = input.command_template ?? input.commandTemplate;
+  if (commandTemplateSource === void 0) {
+    throw new Error(`${targetName}: Either command_template or commandTemplate is required`);
+  }
+  const commandTemplate = resolveString(
+    commandTemplateSource,
+    env,
+    `${targetName} CLI command template`,
+    true
+  );
+  const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
+  const filesFormat = resolveOptionalLiteralString(filesFormatSource);
+  let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  if (cwd && evalFilePath && !import_node_path12.default.isAbsolute(cwd)) {
+    cwd = import_node_path12.default.resolve(import_node_path12.default.dirname(import_node_path12.default.resolve(evalFilePath)), cwd);
+  }
+  if (!cwd && evalFilePath) {
+    cwd = import_node_path12.default.dirname(import_node_path12.default.resolve(evalFilePath));
+  }
+  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
+  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
+  const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose ?? input.cliVerbose);
+  const keepTempFiles = resolveOptionalBoolean(
+    input.keep_temp_files ?? input.keepTempFiles ?? input.keep_output_files ?? input.keepOutputFiles
+  );
+  const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
+  return {
+    commandTemplate,
+    filesFormat,
+    cwd,
+    timeoutMs,
+    healthcheck,
+    verbose,
+    keepTempFiles
+  };
+}
+var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
+  "PROMPT",
+  "GUIDELINES",
+  "EVAL_ID",
+  "ATTEMPT",
+  "FILES",
+  "OUTPUT_FILE"
+]);
+var BASE_TARGET_SCHEMA = import_zod.z.object({
+  name: import_zod.z.string().min(1, "target name is required"),
+  provider: import_zod.z.string().min(1, "provider is required"),
+  judge_target: import_zod.z.string().optional(),
+  workers: import_zod.z.number().int().min(1).optional()
+}).passthrough();
+var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
+function normalizeAzureApiVersion(value) {
+  if (!value) {
+    return DEFAULT_AZURE_API_VERSION;
+  }
+  const trimmed = value.trim();
+  if (trimmed.length === 0) {
+    return DEFAULT_AZURE_API_VERSION;
+  }
+  const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
+  return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
+}
+function resolveRetryConfig(target) {
+  const maxRetries = resolveOptionalNumber(
+    target.max_retries ?? target.maxRetries,
+    `${target.name} max retries`
+  );
+  const initialDelayMs = resolveOptionalNumber(
+    target.retry_initial_delay_ms ?? target.retryInitialDelayMs,
+    `${target.name} retry initial delay`
+  );
+  const maxDelayMs = resolveOptionalNumber(
+    target.retry_max_delay_ms ?? target.retryMaxDelayMs,
+    `${target.name} retry max delay`
+  );
+  const backoffFactor = resolveOptionalNumber(
+    target.retry_backoff_factor ?? target.retryBackoffFactor,
+    `${target.name} retry backoff factor`
+  );
+  const retryableStatusCodes = resolveOptionalNumberArray(
+    target.retry_status_codes ?? target.retryStatusCodes,
+    `${target.name} retry status codes`
+  );
+  if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
+    return void 0;
+  }
+  return {
+    maxRetries,
+    initialDelayMs,
+    maxDelayMs,
+    backoffFactor,
+    retryableStatusCodes
+  };
+}
+function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
+  const parsed = BASE_TARGET_SCHEMA.parse(definition);
+  const provider = parsed.provider.toLowerCase();
+  const providerBatching = resolveOptionalBoolean(
+    parsed.provider_batching ?? parsed.providerBatching
+  );
+  switch (provider) {
+    case "azure":
+    case "azure-openai":
+      return {
+        kind: "azure",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveAzureConfig(parsed, env)
+      };
+    case "anthropic":
+      return {
+        kind: "anthropic",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveAnthropicConfig(parsed, env)
+      };
+    case "gemini":
+    case "google":
+    case "google-gemini":
+      return {
+        kind: "gemini",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveGeminiConfig(parsed, env)
+      };
+    case "codex":
+    case "codex-cli":
+      return {
+        kind: "codex",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveCodexConfig(parsed, env)
+      };
+    case "pi":
+    case "pi-coding-agent":
+      return {
+        kind: "pi-coding-agent",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolvePiCodingAgentConfig(parsed, env)
+      };
+    case "mock":
+      return {
+        kind: "mock",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveMockConfig(parsed)
+      };
+    case "vscode":
+    case "vscode-insiders":
+      return {
+        kind: provider,
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
+      };
+    case "cli":
+      return {
+        kind: "cli",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveCliConfig(parsed, env, evalFilePath)
+      };
+    default:
+      throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
+  }
+}
+function resolveAzureConfig(target, env) {
+  const endpointSource = target.endpoint ?? target.resource ?? target.resourceName;
+  const apiKeySource = target.api_key ?? target.apiKey;
+  const deploymentSource = target.deployment ?? target.deploymentName ?? target.model;
+  const versionSource = target.version ?? target.api_version;
+  const temperatureSource = target.temperature;
+  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
+  const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
+  const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
+  const version = normalizeAzureApiVersion(
+    resolveOptionalString(versionSource, env, `${target.name} api version`, {
+      allowLiteral: true,
+      optionalEnv: true
+    })
+  );
+  const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
+  const maxOutputTokens = resolveOptionalNumber(
+    maxTokensSource,
+    `${target.name} max output tokens`
+  );
+  const retry = resolveRetryConfig(target);
+  return {
+    resourceName,
+    deploymentName,
+    apiKey,
+    version,
+    temperature,
+    maxOutputTokens,
+    retry
+  };
+}
+function resolveAnthropicConfig(target, env) {
+  const apiKeySource = target.api_key ?? target.apiKey;
+  const modelSource = target.model ?? target.deployment ?? target.variant;
+  const temperatureSource = target.temperature;
   const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
   const thinkingBudgetSource = target.thinking_budget ?? target.thinkingBudget;
   const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
@@ -3385,6 +4390,7 @@ function resolveCodexConfig(target, env) {
   const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
   const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
   const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
+  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
   const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
     allowLiteral: true,
     optionalEnv: true
@@ -3400,13 +4406,15 @@ function resolveCodexConfig(target, env) {
     optionalEnv: true
   });
   const logFormat = normalizeCodexLogFormat(logFormatSource);
+  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
   return {
     executable,
     args,
     cwd,
     timeoutMs,
     logDir,
-    logFormat
+    logFormat,
+    systemPrompt
   };
 }
 function normalizeCodexLogFormat(value) {
@@ -3422,10 +4430,73 @@ function normalizeCodexLogFormat(value) {
   }
   throw new Error("codex log format must be 'summary' or 'json'");
 }
+function resolvePiCodingAgentConfig(target, env) {
+  const executableSource = target.executable ?? target.command ?? target.binary;
+  const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
+  const modelSource = target.model ?? target.pi_model ?? target.piModel;
+  const apiKeySource = target.api_key ?? target.apiKey;
+  const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
+  const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
+  const argsSource = target.args ?? target.arguments;
+  const cwdSource = target.cwd;
+  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
+  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
+  const logFormatSource = target.log_format ?? target.logFormat;
+  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const executable = resolveOptionalString(executableSource, env, `${target.name} pi executable`, {
+    allowLiteral: true,
+    optionalEnv: true
+  }) ?? "pi";
+  const provider = resolveOptionalString(providerSource, env, `${target.name} pi provider`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const model = resolveOptionalString(modelSource, env, `${target.name} pi model`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi api key`, {
+    allowLiteral: false,
+    optionalEnv: true
+  });
+  const tools = resolveOptionalString(toolsSource, env, `${target.name} pi tools`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const thinking = resolveOptionalString(thinkingSource, env, `${target.name} pi thinking`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const args = resolveOptionalStringArray(argsSource, env, `${target.name} pi args`);
+  const cwd = resolveOptionalString(cwdSource, env, `${target.name} pi cwd`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi timeout`);
+  const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi log directory`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const logFormat = logFormatSource === "json" || logFormatSource === "summary" ? logFormatSource : void 0;
+  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
+  return {
+    executable,
+    provider,
+    model,
+    apiKey,
+    tools,
+    thinking,
+    args,
+    cwd,
+    timeoutMs,
+    logDir,
+    logFormat,
+    systemPrompt
+  };
+}
 function resolveMockConfig(target) {
   const response = typeof target.response === "string" ? target.response : void 0;
-  const trace = Array.isArray(target.trace) ? target.trace : void 0;
-  return { response, trace };
+  return { response };
 }
 function resolveVSCodeConfig(target, env, insiders) {
   const workspaceTemplateEnvVar = resolveOptionalLiteralString(
@@ -3457,42 +4528,35 @@ function resolveVSCodeConfig(target, env, insiders) {
     workspaceTemplate
   };
 }
-function resolveCliConfig(target, env, evalFilePath) {
-  const commandTemplateSource = target.command_template ?? target.commandTemplate;
-  const filesFormat = resolveOptionalLiteralString(
-    target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
-  );
-  const verbose = resolveOptionalBoolean(target.verbose ?? target.cli_verbose ?? target.cliVerbose);
-  let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
-    allowLiteral: true,
-    optionalEnv: true
-  });
-  if (cwd && evalFilePath && !import_node_path11.default.isAbsolute(cwd)) {
-    cwd = import_node_path11.default.resolve(import_node_path11.default.dirname(import_node_path11.default.resolve(evalFilePath)), cwd);
+var cliErrorMap = (issue, ctx) => {
+  if (issue.code === import_zod.z.ZodIssueCode.unrecognized_keys) {
+    return { message: `Unknown CLI provider settings: ${issue.keys.join(", ")}` };
   }
-  if (!cwd && evalFilePath) {
-    cwd = import_node_path11.default.dirname(import_node_path11.default.resolve(evalFilePath));
+  if (issue.code === import_zod.z.ZodIssueCode.invalid_union_discriminator) {
+    return { message: "healthcheck type must be 'http' or 'command'" };
   }
-  const timeoutMs = resolveTimeoutMs(
-    target.timeout_seconds ?? target.timeoutSeconds,
-    `${target.name} timeout`
-  );
-  const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name, evalFilePath);
-  const commandTemplate = resolveString(
-    commandTemplateSource,
-    env,
-    `${target.name} CLI command template`,
-    true
-  );
-  assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
-  return {
-    commandTemplate,
-    filesFormat,
-    cwd,
-    timeoutMs,
-    healthcheck,
-    verbose
-  };
+  if (issue.code === import_zod.z.ZodIssueCode.invalid_type && issue.expected === "string") {
+    return { message: `${ctx.defaultError} (expected a string value)` };
+  }
+  return { message: ctx.defaultError };
+};
+function resolveCliConfig(target, env, evalFilePath) {
+  const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
+  if (!parseResult.success) {
+    const firstError = parseResult.error.errors[0];
+    const path16 = firstError?.path.join(".") || "";
+    const prefix = path16 ? `${target.name} ${path16}: ` : `${target.name}: `;
+    throw new Error(`${prefix}${firstError?.message}`);
+  }
+  const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
+  assertSupportedCliPlaceholders(normalized.commandTemplate, `${target.name} CLI command template`);
+  if (normalized.healthcheck?.type === "command") {
+    assertSupportedCliPlaceholders(
+      normalized.healthcheck.commandTemplate,
+      `${target.name} healthcheck command template`
+    );
+  }
+  return normalized;
 }
 function resolveTimeoutMs(source, description) {
   const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
@@ -3504,49 +4568,6 @@ function resolveTimeoutMs(source, description) {
   }
   return Math.floor(seconds * 1e3);
 }
-function resolveCliHealthcheck(source, env, targetName, evalFilePath) {
-  if (source === void 0 || source === null) {
-    return void 0;
-  }
-  if (typeof source !== "object" || Array.isArray(source)) {
-    throw new Error(`${targetName} healthcheck must be an object`);
-  }
-  const candidate = source;
-  const type = candidate.type;
-  const timeoutMs = resolveTimeoutMs(
-    candidate.timeout_seconds ?? candidate.timeoutSeconds,
-    `${targetName} healthcheck timeout`
-  );
-  if (type === "http") {
-    const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
-    return {
-      type: "http",
-      url,
-      timeoutMs
-    };
-  }
-  if (type === "command") {
-    const commandTemplate = resolveString(
-      candidate.command_template ?? candidate.commandTemplate,
-      env,
-      `${targetName} healthcheck command template`,
-      true
-    );
-    assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
-    const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
-      allowLiteral: true,
-      optionalEnv: true
-    });
-    const resolvedCwd = cwd && evalFilePath && !import_node_path11.default.isAbsolute(cwd) ? import_node_path11.default.resolve(import_node_path11.default.dirname(import_node_path11.default.resolve(evalFilePath)), cwd) : cwd;
-    return {
-      type: "command",
-      commandTemplate,
-      timeoutMs,
-      cwd: resolvedCwd
-    };
-  }
-  throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
-}
 function assertSupportedCliPlaceholders(template, description) {
   const placeholders = extractCliPlaceholders(template);
   for (const placeholder of placeholders) {
@@ -3712,7 +4733,7 @@ function resolveOptionalNumberArray(source, description) {
 }
 // src/evaluation/providers/vscode.ts
-var import_node_path12 = __toESM(require("path"), 1);
+var import_node_path13 = __toESM(require("path"), 1);
 var import_subagent = require("subagent");
 // src/evaluation/providers/vscode-templates.ts
@@ -3786,7 +4807,7 @@ var VSCodeProvider = class {
     }
     if (this.config.dryRun) {
       return {
-        text: "",
+        outputMessages: [],
         raw: {
           session,
           inputFiles
@@ -3795,7 +4816,7 @@ var VSCodeProvider = class {
     }
     const responseText = await readTextFile(session.responseFile);
     return {
-      text: responseText,
+      outputMessages: [{ role: "assistant", content: responseText }],
       raw: {
         session,
         inputFiles
@@ -3833,7 +4854,7 @@ var VSCodeProvider = class {
     }
     if (this.config.dryRun) {
       return normalizedRequests.map(({ inputFiles }) => ({
-        text: "",
+        outputMessages: [],
         raw: {
           session,
           inputFiles,
@@ -3850,7 +4871,7 @@ var VSCodeProvider = class {
     for (const [index, responseFile] of session.responseFiles.entries()) {
       const responseText = await readTextFile(responseFile);
       responses.push({
-        text: responseText,
+        outputMessages: [{ role: "assistant", content: responseText }],
         raw: {
           session,
           inputFiles: normalizedRequests[index]?.inputFiles,
@@ -3882,7 +4903,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
     return "";
   }
   const buildList = (files) => files.map((absolutePath) => {
-    const fileName = import_node_path12.default.basename(absolutePath);
+    const fileName = import_node_path13.default.basename(absolutePath);
     const fileUri = pathToFileUri2(absolutePath);
     return `* [${fileName}](${fileUri})`;
   });
@@ -3907,8 +4928,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    const absolutePath = import_node_path12.default.resolve(attachment);
-    const normalized = absolutePath.split(import_node_path12.default.sep).join("/");
+    const absolutePath = import_node_path13.default.resolve(attachment);
+    const normalized = absolutePath.split(import_node_path13.default.sep).join("/");
     if (isGuidelineFile(normalized, guidelinePatterns)) {
       if (!unique.has(absolutePath)) {
         unique.set(absolutePath, absolutePath);
@@ -3923,7 +4944,7 @@ function collectAttachmentFiles(attachments) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    const absolutePath = import_node_path12.default.resolve(attachment);
+    const absolutePath = import_node_path13.default.resolve(attachment);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
     }
@@ -3931,7 +4952,7 @@ function collectAttachmentFiles(attachments) {
   return Array.from(unique.values());
 }
 function pathToFileUri2(filePath) {
-  const absolutePath = import_node_path12.default.isAbsolute(filePath) ? filePath : import_node_path12.default.resolve(filePath);
+  const absolutePath = import_node_path13.default.isAbsolute(filePath) ? filePath : import_node_path13.default.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
   if (/^[a-zA-Z]:\//.test(normalizedPath)) {
     return `file:///${normalizedPath}`;
@@ -3944,7 +4965,7 @@ function normalizeAttachments(attachments) {
   }
   const deduped = /* @__PURE__ */ new Set();
   for (const attachment of attachments) {
-    deduped.add(import_node_path12.default.resolve(attachment));
+    deduped.add(import_node_path13.default.resolve(attachment));
   }
   return Array.from(deduped);
 }
@@ -3953,7 +4974,7 @@ function mergeAttachments(all) {
   for (const list of all) {
     if (!list) continue;
     for (const inputFile of list) {
-      deduped.add(import_node_path12.default.resolve(inputFile));
+      deduped.add(import_node_path13.default.resolve(inputFile));
     }
   }
   return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -4000,9 +5021,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
 }
 // src/evaluation/providers/targets-file.ts
-var import_node_fs4 = require("fs");
-var import_promises10 = require("fs/promises");
-var import_node_path13 = __toESM(require("path"), 1);
+var import_node_fs5 = require("fs");
+var import_promises11 = require("fs/promises");
+var import_node_path14 = __toESM(require("path"), 1);
 var import_yaml3 = require("yaml");
 function isRecord(value) {
   return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -4032,18 +5053,18 @@ function assertTargetDefinition(value, index, filePath) {
 }
 async function fileExists3(filePath) {
   try {
-    await (0, import_promises10.access)(filePath, import_node_fs4.constants.F_OK);
+    await (0, import_promises11.access)(filePath, import_node_fs5.constants.F_OK);
     return true;
   } catch {
     return false;
   }
 }
 async function readTargetDefinitions(filePath) {
-  const absolutePath = import_node_path13.default.resolve(filePath);
+  const absolutePath = import_node_path14.default.resolve(filePath);
   if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
-  const raw = await (0, import_promises10.readFile)(absolutePath, "utf8");
+  const raw = await (0, import_promises11.readFile)(absolutePath, "utf8");
   const parsed = (0, import_yaml3.parse)(raw);
   if (!isRecord(parsed)) {
     throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -4071,6 +5092,8 @@ function createProvider(target) {
       return new CliProvider(target.name, target.config);
     case "codex":
       return new CodexProvider(target.name, target.config);
+    case "pi-coding-agent":
+      return new PiCodingAgentProvider(target.name, target.config);
     case "mock":
       return new MockProvider(target.name, target.config);
     case "vscode":
@@ -4090,6 +5113,100 @@ function resolveAndCreateProvider(definition, env = process.env) {
 // src/evaluation/evaluators.ts
 var import_ai2 = require("ai");
 var import_zod2 = require("zod");
+// src/runtime/exec.ts
+function getBunSpawn() {
+  const bunSpawn = globalThis.Bun?.spawn;
+  return typeof bunSpawn === "function" ? bunSpawn : void 0;
+}
+async function execShellWithStdin(command, stdinPayload, options = {}) {
+  const bunSpawn = getBunSpawn();
+  if (bunSpawn) {
+    const encoder = new TextEncoder();
+    const proc = bunSpawn({
+      cmd: ["sh", "-c", command],
+      cwd: options.cwd,
+      stdin: encoder.encode(stdinPayload),
+      stdout: "pipe",
+      stderr: "pipe"
+    });
+    const timeout = options.timeoutMs ? setTimeout(() => {
+      proc.kill();
+    }, options.timeoutMs) : void 0;
+    try {
+      const stdout = await new Response(proc.stdout).text();
+      const stderr = await new Response(proc.stderr).text();
+      const exitCode = await proc.exited;
+      return { stdout, stderr, exitCode };
+    } finally {
+      if (timeout !== void 0) {
+        clearTimeout(timeout);
+      }
+    }
+  }
+  const { spawn: spawn3 } = await import("child_process");
+  return await new Promise((resolve, reject) => {
+    const child = spawn3(command, {
+      shell: true,
+      cwd: options.cwd,
+      stdio: ["pipe", "pipe", "pipe"]
+    });
+    let stdout = "";
+    let stderr = "";
+    const timeout = options.timeoutMs ? setTimeout(() => {
+      child.kill();
+      reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
+    }, options.timeoutMs) : void 0;
+    child.stdout?.on("data", (data) => {
+      stdout += data.toString();
+    });
+    child.stderr?.on("data", (data) => {
+      stderr += data.toString();
+    });
+    child.on("error", (error) => {
+      if (timeout !== void 0) {
+        clearTimeout(timeout);
+      }
+      reject(error);
+    });
+    child.on("exit", (code) => {
+      if (timeout !== void 0) {
+        clearTimeout(timeout);
+      }
+      resolve({ stdout, stderr, exitCode: code ?? 0 });
+    });
+    child.stdin?.write(stdinPayload);
+    child.stdin?.end();
+  });
+}
+// src/evaluation/providers/types.ts
+var AGENT_PROVIDER_KINDS = [
+  "codex",
+  "pi-coding-agent",
+  "vscode",
+  "vscode-insiders"
+];
+function extractLastAssistantContent(messages) {
+  if (!messages || messages.length === 0) {
+    return "";
+  }
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg.role === "assistant" && msg.content !== void 0) {
+      if (typeof msg.content === "string") {
+        return msg.content;
+      }
+      return JSON.stringify(msg.content);
+    }
+  }
+  return "";
+}
+function isAgentProvider(provider) {
+  return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
+}
+// src/evaluation/evaluators.ts
 var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
 Use the reference_answer as a gold standard for a high-quality response (if provided). The reference_answer may be a simple text response, or it may contain a sequence of expected agent messages including tool calls. When it contains multiple messages, the last message represents the final expected answer. The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
@@ -4154,6 +5271,7 @@ var LlmJudgeEvaluator = class {
         null,
         2
       ),
+      [TEMPLATE_VARIABLES.OUTPUT_MESSAGES]: JSON.stringify(context.outputMessages ?? [], null, 2),
       [TEMPLATE_VARIABLES.CANDIDATE_ANSWER]: context.candidate.trim(),
       [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? "").trim(),
       [TEMPLATE_VARIABLES.EXPECTED_OUTCOME]: context.evalCase.expected_outcome.trim(),
@@ -4178,7 +5296,7 @@ var LlmJudgeEvaluator = class {
       const score = clampScore(data.score);
       const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
       const misses = Array.isArray(data.misses) ? data.misses.filter(isNonEmptyString).slice(0, 4) : [];
-      const reasoning = data.reasoning ?? providerResponse?.reasoning;
+      const reasoning = data.reasoning;
       const expectedAspectCount = Math.max(hits.length + misses.length, 1);
       return {
         score,
@@ -4280,7 +5398,9 @@ var LlmJudgeEvaluator = class {
           maxOutputTokens: this.maxOutputTokens,
           temperature: this.temperature
         });
-        const data = schema.parse(parseJsonFromText(response.text ?? ""));
+        const data = schema.parse(
+          parseJsonFromText(extractLastAssistantContent(response.outputMessages))
+        );
         return { data, providerResponse: response };
       } catch (e) {
         lastError = e instanceof Error ? e : new Error(String(e));
@@ -4362,17 +5482,17 @@ var CodeEvaluator = class {
     const inputPayload = JSON.stringify(
       {
         question: context.evalCase.question,
-        expected_outcome: context.evalCase.expected_outcome,
-        expected_messages: context.evalCase.expected_messages,
-        reference_answer: context.evalCase.reference_answer,
-        candidate_answer: context.candidate,
-        guideline_files: context.evalCase.guideline_paths,
-        input_files: context.evalCase.file_paths.filter(
-          (path15) => !context.evalCase.guideline_paths.includes(path15)
+        expectedOutcome: context.evalCase.expected_outcome,
+        expectedMessages: context.evalCase.expected_messages,
+        referenceAnswer: context.evalCase.reference_answer,
+        candidateAnswer: context.candidate,
+        outputMessages: context.outputMessages ?? null,
+        guidelineFiles: context.evalCase.guideline_paths,
+        inputFiles: context.evalCase.file_paths.filter(
+          (path16) => !context.evalCase.guideline_paths.includes(path16)
         ),
-        input_messages: context.evalCase.input_messages,
-        candidate_trace_file: context.candidateTraceRef ?? null,
-        candidate_trace_summary: context.candidateTraceSummary ?? null
+        inputMessages: context.evalCase.input_messages,
+        traceSummary: context.traceSummary ?? null
       },
       null,
       2
@@ -4442,43 +5562,17 @@ function calculateRubricScore(result, rubrics) {
   return { score, verdict, hits, misses };
 }
 async function executeScript(scriptPath, input, agentTimeoutMs, cwd) {
-  const { spawn: spawn2 } = await import("child_process");
-  return await new Promise((resolve, reject) => {
-    const child = spawn2(scriptPath, {
-      shell: true,
-      cwd
-    });
-    let stdout = "";
-    let stderr = "";
-    const timeout = agentTimeoutMs ? setTimeout(() => {
-      child.kill();
-      reject(new Error(`Code evaluator timed out after ${agentTimeoutMs}ms`));
-    }, agentTimeoutMs) : void 0;
-    child.stdout?.on("data", (data) => {
-      stdout += data.toString();
-    });
-    child.stderr?.on("data", (data) => {
-      stderr += data.toString();
-    });
-    child.on("error", (error) => {
-      if (timeout !== void 0) {
-        clearTimeout(timeout);
-      }
-      reject(error);
-    });
-    child.on("exit", (code) => {
-      if (timeout !== void 0) {
-        clearTimeout(timeout);
-      }
-      if (code && code !== 0 && stderr.length > 0) {
-        reject(new Error(`Code evaluator exited with code ${code}: ${stderr.trim()}`));
-        return;
-      }
-      resolve(stdout.trim());
-    });
-    child.stdin?.write(input);
-    child.stdin?.end();
+  const { stdout, stderr, exitCode } = await execShellWithStdin(scriptPath, input, {
+    cwd,
+    timeoutMs: agentTimeoutMs
   });
+  if (exitCode !== 0) {
+    const trimmedErr = stderr.trim();
+    throw new Error(
+      trimmedErr.length > 0 ? `Code evaluator exited with code ${exitCode}: ${trimmedErr}` : `Code evaluator exited with code ${exitCode}`
+    );
+  }
+  return stdout.trim();
 }
 function parseJsonSafe(payload) {
   try {
@@ -4492,6 +5586,33 @@ function substituteVariables(template, variables) {
     return variables[varName] ?? match;
   });
 }
+function deepEqual(a, b) {
+  if (a === b) return true;
+  if (a === null || b === null) return a === b;
+  if (typeof a !== typeof b) return false;
+  if (typeof a !== "object") return a === b;
+  if (Array.isArray(a) !== Array.isArray(b)) return false;
+  if (Array.isArray(a) && Array.isArray(b)) {
+    if (a.length !== b.length) return false;
+    return a.every((val, i) => deepEqual(val, b[i]));
+  }
+  const aObj = a;
+  const bObj = b;
+  const aKeys = Object.keys(aObj);
+  const bKeys = Object.keys(bObj);
+  if (aKeys.length !== bKeys.length) return false;
+  return aKeys.every((key) => Object.hasOwn(bObj, key) && deepEqual(aObj[key], bObj[key]));
+}
+function argsMatch(expected, actual) {
+  if (expected === void 0) return true;
+  if (expected === "any") return true;
+  if (actual === void 0) return false;
+  for (const key of Object.keys(expected)) {
+    if (!Object.hasOwn(actual, key)) return false;
+    if (!deepEqual(expected[key], actual[key])) return false;
+  }
+  return true;
+}
 var ToolTrajectoryEvaluator = class {
   kind = "tool_trajectory";
   config;
@@ -4499,8 +5620,19 @@ var ToolTrajectoryEvaluator = class {
     this.config = options.config;
   }
   evaluate(context) {
-    const { candidateTrace, candidateTraceSummary } = context;
-    if (!candidateTrace || !candidateTraceSummary) {
+    const { outputMessages, traceSummary } = context;
+    const toolCalls = this.extractToolCallsFromMessages(outputMessages);
+    if (toolCalls.length === 0 && !traceSummary) {
+      return {
+        score: 0,
+        verdict: "fail",
+        hits: [],
+        misses: ["No trace available for evaluation"],
+        expectedAspectCount: 1
+      };
+    }
+    const summary = toolCalls.length > 0 ? this.buildSummary(toolCalls) : traceSummary;
+    if (!summary) {
       return {
         score: 0,
         verdict: "fail",
@@ -4511,11 +5643,11 @@ var ToolTrajectoryEvaluator = class {
     }
     switch (this.config.mode) {
       case "any_order":
-        return this.evaluateAnyOrder(candidateTraceSummary);
+        return this.evaluateAnyOrder(summary);
       case "in_order":
-        return this.evaluateInOrder(candidateTrace);
+        return this.evaluateInOrder(toolCalls);
       case "exact":
-        return this.evaluateExact(candidateTrace);
+        return this.evaluateExact(toolCalls);
       default:
         return {
           score: 0,
@@ -4526,6 +5658,42 @@ var ToolTrajectoryEvaluator = class {
         };
     }
   }
+  /**
+   * Extract tool calls from output messages.
+   */
+  extractToolCallsFromMessages(messages) {
+    if (!messages) {
+      return [];
+    }
+    const toolCalls = [];
+    for (const message of messages) {
+      if (message.toolCalls) {
+        for (const call of message.toolCalls) {
+          toolCalls.push({
+            name: call.tool,
+            args: call.input
+          });
+        }
+      }
+    }
+    return toolCalls;
+  }
+  /**
+   * Build a summary from extracted tool calls.
+   */
+  buildSummary(toolCalls) {
+    const toolCallsByName = {};
+    for (const call of toolCalls) {
+      toolCallsByName[call.name] = (toolCallsByName[call.name] ?? 0) + 1;
+    }
+    const toolNames = Object.keys(toolCallsByName).sort();
+    return {
+      eventCount: toolCalls.length,
+      toolNames,
+      toolCallsByName,
+      errorCount: 0
+    };
+  }
   evaluateAnyOrder(summary) {
     const minimums = this.config.minimums ?? {};
     const toolNames = Object.keys(minimums);
@@ -4558,7 +5726,7 @@ var ToolTrajectoryEvaluator = class {
       expectedAspectCount: toolNames.length
     };
   }
-  evaluateInOrder(trace) {
+  evaluateInOrder(toolCalls) {
     const expected = this.config.expected ?? [];
     if (expected.length === 0) {
       return {
@@ -4569,23 +5737,33 @@ var ToolTrajectoryEvaluator = class {
         expectedAspectCount: 0
       };
     }
-    const actualToolCalls = trace.filter((e) => e.type === "tool_call" && e.name);
     const hits = [];
     const misses = [];
     let actualIndex = 0;
     for (let i = 0; i < expected.length; i++) {
-      const expectedTool = expected[i].tool;
+      const expectedItem = expected[i];
+      const expectedTool = expectedItem.tool;
       let found = false;
-      while (actualIndex < actualToolCalls.length) {
-        if (actualToolCalls[actualIndex].name === expectedTool) {
-          hits.push(`Found ${expectedTool} at position ${actualIndex}`);
+      let argsMismatch = false;
+      while (actualIndex < toolCalls.length) {
+        const actualCall = toolCalls[actualIndex];
+        if (actualCall.name === expectedTool) {
+          if (argsMatch(expectedItem.args, actualCall.args)) {
+            hits.push(`Found ${expectedTool} at position ${actualIndex}`);
+            actualIndex++;
+            found = true;
+            break;
+          }
+          misses.push(
+            `Expected ${expectedTool} at position ${i}: tool found at ${actualIndex} but args mismatch`
+          );
           actualIndex++;
-          found = true;
+          argsMismatch = true;
           break;
         }
         actualIndex++;
       }
-      if (!found) {
+      if (!found && !argsMismatch) {
         misses.push(`Expected ${expectedTool} at position ${i}, not found in remaining trace`);
       }
     }
@@ -4598,7 +5776,7 @@ var ToolTrajectoryEvaluator = class {
       expectedAspectCount: expected.length
     };
   }
-  evaluateExact(trace) {
+  evaluateExact(toolCalls) {
     const expected = this.config.expected ?? [];
     if (expected.length === 0) {
       return {
@@ -4609,18 +5787,23 @@ var ToolTrajectoryEvaluator = class {
         expectedAspectCount: 0
       };
     }
-    const actualToolCalls = trace.filter((e) => e.type === "tool_call" && e.name);
     const hits = [];
     const misses = [];
-    if (actualToolCalls.length !== expected.length) {
-      misses.push(`Expected ${expected.length} tool calls, got ${actualToolCalls.length}`);
+    if (toolCalls.length !== expected.length) {
+      misses.push(`Expected ${expected.length} tool calls, got ${toolCalls.length}`);
     }
-    const checkLength = Math.min(expected.length, actualToolCalls.length);
+    const checkLength = Math.min(expected.length, toolCalls.length);
     for (let i = 0; i < checkLength; i++) {
-      const expectedTool = expected[i].tool;
-      const actualTool = actualToolCalls[i].name;
+      const expectedItem = expected[i];
+      const expectedTool = expectedItem.tool;
+      const actualCall = toolCalls[i];
+      const actualTool = actualCall.name;
       if (actualTool === expectedTool) {
-        hits.push(`Position ${i}: ${expectedTool} \u2713`);
+        if (argsMatch(expectedItem.args, actualCall.args)) {
+          hits.push(`Position ${i}: ${expectedTool}`);
+        } else {
+          misses.push(`Position ${i}: ${expectedTool} args mismatch`);
+        }
       } else {
         misses.push(`Position ${i}: expected ${expectedTool}, got ${actualTool}`);
       }
@@ -4832,11 +6015,13 @@ var CompositeEvaluator = class {
         evalCaseId: context.evalCase.id,
         attempt: context.attempt
       });
-      const data = freeformEvaluationSchema.parse(parseJsonFromText(response.text ?? ""));
+      const data = freeformEvaluationSchema.parse(
+        parseJsonFromText(extractLastAssistantContent(response.outputMessages))
+      );
       const score = clampScore(data.score);
       const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
       const misses = Array.isArray(data.misses) ? data.misses.filter(isNonEmptyString).slice(0, 4) : [];
-      const reasoning = data.reasoning ?? response.reasoning;
+      const reasoning = data.reasoning;
       return {
         score,
         verdict: scoreToVerdict(score),
@@ -4862,9 +6047,9 @@ var CompositeEvaluator = class {
 };
 // src/evaluation/orchestrator.ts
-var import_node_crypto2 = require("crypto");
-var import_promises11 = require("fs/promises");
-var import_node_path14 = __toESM(require("path"), 1);
+var import_node_crypto3 = require("crypto");
+var import_promises12 = require("fs/promises");
+var import_node_path15 = __toESM(require("path"), 1);
 // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
 var Node = class {
@@ -5005,16 +6190,6 @@ function validateConcurrency(concurrency) {
   }
 }
-// src/evaluation/providers/types.ts
-var AGENT_PROVIDER_KINDS = [
-  "codex",
-  "vscode",
-  "vscode-insiders"
-];
-function isAgentProvider(provider) {
-  return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
-}
 // src/evaluation/orchestrator.ts
 async function runEvaluation(options) {
   const {
@@ -5269,11 +6444,19 @@ async function runBatchEvaluation(options) {
     const evalCase = evalCases[i];
     const promptInputs = promptInputsList[i];
     const providerResponse = batchResponse[i];
+    const outputMessages = providerResponse.outputMessages;
+    const baseSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
+    const traceSummary = baseSummary ? mergeExecutionMetrics(baseSummary, {
+      tokenUsage: providerResponse.tokenUsage,
+      costUsd: providerResponse.costUsd,
+      durationMs: providerResponse.durationMs
+    }) : void 0;
+    const candidate = extractLastAssistantContent(outputMessages);
     let result;
     try {
       result = await evaluateCandidate({
         evalCase,
-        candidate: providerResponse.text ?? "",
+        candidate,
         target,
         provider,
         evaluators: evaluatorRegistry,
@@ -5281,7 +6464,9 @@ async function runBatchEvaluation(options) {
         nowFn,
         attempt: 0,
         judgeProvider: await resolveJudgeProvider(target),
-        agentTimeoutMs
+        agentTimeoutMs,
+        outputMessages,
+        traceSummary
       });
     } catch (error) {
       const errorResult = buildErrorResult(
@@ -5385,21 +6570,18 @@ async function runEvalCase(options) {
   if (cacheKey && cache && !cachedResponse) {
     await cache.set(cacheKey, providerResponse);
   }
-  let candidateTrace = providerResponse.trace;
-  if (!candidateTrace && providerResponse.traceRef) {
-    try {
-      const rawTrace = await readJsonFile(providerResponse.traceRef);
-      if (Array.isArray(rawTrace) && rawTrace.every(isTraceEvent)) {
-        candidateTrace = rawTrace;
-      }
-    } catch {
-    }
-  }
-  const candidateTraceSummary = candidateTrace ? computeTraceSummary(candidateTrace) : void 0;
+  const outputMessages = providerResponse.outputMessages;
+  const baseSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
+  const traceSummary = baseSummary ? mergeExecutionMetrics(baseSummary, {
+    tokenUsage: providerResponse.tokenUsage,
+    costUsd: providerResponse.costUsd,
+    durationMs: providerResponse.durationMs
+  }) : void 0;
+  const candidate = extractLastAssistantContent(outputMessages);
   try {
     return await evaluateCandidate({
       evalCase,
-      candidate: providerResponse.text ?? "",
+      candidate,
       target,
       provider,
       evaluators,
@@ -5408,9 +6590,8 @@ async function runEvalCase(options) {
       attempt,
       judgeProvider,
       agentTimeoutMs,
-      candidateTrace,
-      candidateTraceRef: providerResponse.traceRef,
-      candidateTraceSummary
+      outputMessages,
+      traceSummary
     });
   } catch (error) {
     return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
@@ -5428,9 +6609,8 @@ async function evaluateCandidate(options) {
     attempt,
     judgeProvider,
     agentTimeoutMs,
-    candidateTrace,
-    candidateTraceRef,
-    candidateTraceSummary
+    outputMessages,
+    traceSummary
   } = options;
   const gradeTimestamp = nowFn();
   const { score, evaluatorResults } = await runEvaluatorsForCase({
@@ -5444,9 +6624,8 @@ async function evaluateCandidate(options) {
     now: gradeTimestamp,
     judgeProvider,
     agentTimeoutMs,
-    candidateTrace,
-    candidateTraceRef,
-    candidateTraceSummary
+    outputMessages,
+    traceSummary
   });
   const completedAt = nowFn();
   let agentProviderRequest;
@@ -5470,21 +6649,21 @@ async function evaluateCandidate(options) {
   }
   return {
     timestamp: completedAt.toISOString(),
-    eval_id: evalCase.id,
+    evalId: evalCase.id,
     dataset: evalCase.dataset,
-    conversation_id: evalCase.conversation_id,
+    conversationId: evalCase.conversation_id,
     score: score.score,
     hits: score.hits,
     misses: score.misses,
-    candidate_answer: candidate,
+    candidateAnswer: candidate,
     target: target.name,
     reasoning: score.reasoning,
-    raw_aspects: score.rawAspects,
-    agent_provider_request: agentProviderRequest,
-    lm_provider_request: lmProviderRequest,
-    evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
-    evaluator_results: evaluatorResults,
-    trace_summary: candidateTraceSummary
+    rawAspects: score.rawAspects,
+    agentProviderRequest,
+    lmProviderRequest,
+    evaluatorProviderRequest: evaluatorResults ? void 0 : score.evaluatorRawRequest,
+    evaluatorResults,
+    traceSummary
   };
 }
 async function runEvaluatorsForCase(options) {
@@ -5499,9 +6678,8 @@ async function runEvaluatorsForCase(options) {
     now,
     judgeProvider,
     agentTimeoutMs,
-    candidateTrace,
-    candidateTraceRef,
-    candidateTraceSummary
+    outputMessages,
+    traceSummary
   } = options;
   if (evalCase.evaluators && evalCase.evaluators.length > 0) {
     return runEvaluatorList({
@@ -5516,9 +6694,8 @@ async function runEvaluatorsForCase(options) {
       now,
       judgeProvider,
       agentTimeoutMs,
-      candidateTrace,
-      candidateTraceRef,
-      candidateTraceSummary
+      outputMessages,
+      traceSummary
     });
   }
   const evaluatorKind = evalCase.evaluator ?? "llm_judge";
@@ -5535,9 +6712,8 @@ async function runEvaluatorsForCase(options) {
     promptInputs,
     now,
     judgeProvider,
-    candidateTrace,
-    candidateTraceRef,
-    candidateTraceSummary
+    outputMessages,
+    traceSummary
   });
   return { score };
 }
@@ -5554,9 +6730,8 @@ async function runEvaluatorList(options) {
     now,
     judgeProvider,
     agentTimeoutMs,
-    candidateTrace,
-    candidateTraceRef,
-    candidateTraceSummary
+    outputMessages,
+    traceSummary
   } = options;
   const scored = [];
   const evaluatorResults = [];
@@ -5586,7 +6761,7 @@ async function runEvaluatorList(options) {
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
-          evaluator_provider_request: score2.evaluatorRawRequest
+          evaluatorProviderRequest: score2.evaluatorRawRequest
         });
       }
       if (evaluator.type === "code") {
@@ -5603,8 +6778,8 @@ async function runEvaluatorList(options) {
           attempt,
           promptInputs,
           now,
-          candidateTraceRef,
-          candidateTraceSummary
+          outputMessages,
+          traceSummary
         });
         const weight = evaluator.weight ?? 1;
         scored.push({ score: score2, name: evaluator.name, type: "code_judge", weight });
@@ -5617,11 +6792,11 @@ async function runEvaluatorList(options) {
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
-          evaluator_provider_request: score2.evaluatorRawRequest
+          evaluatorProviderRequest: score2.evaluatorRawRequest
         });
       }
       if (evaluator.type === "composite") {
-        const evalFileDir = evalCase.guideline_paths[0] ? import_node_path14.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
+        const evalFileDir = evalCase.guideline_paths[0] ? import_node_path15.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
         const createEvaluator = (memberConfig) => {
           switch (memberConfig.type) {
             case "llm_judge":
@@ -5674,8 +6849,8 @@ async function runEvaluatorList(options) {
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
-          evaluator_provider_request: score2.evaluatorRawRequest,
-          evaluator_results: mapChildResults(score2.evaluatorResults)
+          evaluatorProviderRequest: score2.evaluatorRawRequest,
+          evaluatorResults: mapChildResults(score2.evaluatorResults)
         });
       }
       if (evaluator.type === "tool_trajectory") {
@@ -5690,9 +6865,8 @@ async function runEvaluatorList(options) {
           attempt,
           promptInputs,
           now,
-          candidateTrace,
-          candidateTraceRef,
-          candidateTraceSummary
+          outputMessages,
+          traceSummary
         });
         const weight = evaluator.weight ?? 1;
         scored.push({ score: score2, name: evaluator.name, type: evaluator.type, weight });
@@ -5834,22 +7008,22 @@ function buildEvaluatorRegistry(overrides, resolveJudgeProvider) {
 async function dumpPrompt(directory, evalCase, promptInputs) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
   const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
-  const filePath = import_node_path14.default.resolve(directory, filename);
-  await (0, import_promises11.mkdir)(import_node_path14.default.dirname(filePath), { recursive: true });
+  const filePath = import_node_path15.default.resolve(directory, filename);
+  await (0, import_promises12.mkdir)(import_node_path15.default.dirname(filePath), { recursive: true });
   const payload = {
     eval_id: evalCase.id,
     question: promptInputs.question,
     guidelines: promptInputs.guidelines,
     guideline_paths: evalCase.guideline_paths
   };
-  await (0, import_promises11.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
+  await (0, import_promises12.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
 }
 function sanitizeFilename(value) {
   if (!value) {
     return "prompt";
   }
   const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
-  return sanitized.length > 0 ? sanitized : (0, import_node_crypto2.randomUUID)();
+  return sanitized.length > 0 ? sanitized : (0, import_node_crypto3.randomUUID)();
 }
 async function invokeProvider(provider, options) {
   const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
@@ -5906,22 +7080,22 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
   }
   return {
     timestamp: timestamp.toISOString(),
-    eval_id: evalCase.id,
+    evalId: evalCase.id,
     dataset: evalCase.dataset,
-    conversation_id: evalCase.conversation_id,
+    conversationId: evalCase.conversation_id,
     score: 0,
     hits: [],
     misses: [`Error: ${message}`],
-    candidate_answer: `Error occurred: ${message}`,
+    candidateAnswer: `Error occurred: ${message}`,
     target: targetName,
-    raw_aspects: [],
-    agent_provider_request: agentProviderRequest,
-    lm_provider_request: lmProviderRequest,
+    rawAspects: [],
+    agentProviderRequest,
+    lmProviderRequest,
     error: message
   };
 }
 function createCacheKey(provider, target, evalCase, promptInputs) {
-  const hash = (0, import_node_crypto2.createHash)("sha256");
+  const hash = (0, import_node_crypto3.createHash)("sha256");
   hash.update(provider.id);
   hash.update(target.name);
   hash.update(evalCase.id);
@@ -5961,8 +7135,8 @@ function mapChildResults(children) {
     hits: child.hits,
     misses: child.misses,
     reasoning: child.reasoning,
-    evaluator_provider_request: child.evaluatorRawRequest,
-    evaluator_results: mapChildResults(child.evaluatorResults)
+    evaluatorProviderRequest: child.evaluatorRawRequest,
+    evaluatorResults: mapChildResults(child.evaluatorResults)
   }));
 }
 function computeWeightedMean(entries) {
@@ -6064,17 +7238,21 @@ function createAgentKernel() {
 0 && (module.exports = {
   CodeEvaluator,
   CompositeEvaluator,
+  DEFAULT_EXPLORATION_TOOLS,
   LlmJudgeEvaluator,
   TEST_MESSAGE_ROLES,
   ToolTrajectoryEvaluator,
+  avgToolDurationMs,
   buildDirectoryChain,
   buildPromptInputs,
   buildSearchRoots,
   computeTraceSummary,
   consumeCodexLogEntries,
+  consumePiLogEntries,
   createAgentKernel,
   createProvider,
   ensureVSCodeSubagents,
+  explorationRatio,
   extractCodeBlocks,
   fileExists,
   findGitRoot,
@@ -6086,10 +7264,9 @@ function createAgentKernel() {
   isJsonValue,
   isTestMessage,
   isTestMessageRole,
-  isTraceEvent,
-  isTraceEventType,
   listTargetNames,
   loadEvalCases,
+  mergeExecutionMetrics,
   normalizeLineEndings,
   readJsonFile,
   readTargetDefinitions,
@@ -6100,6 +7277,8 @@ function createAgentKernel() {
   resolveTargetDefinition,
   runEvalCase,
   runEvaluation,
-  subscribeToCodexLogEntries
+  subscribeToCodexLogEntries,
+  subscribeToPiLogEntries,
+  tokensPerTool
 });
 //# sourceMappingURL=index.cjs.map