npm - @agentv/core - Versions diffs - 2.7.1-next.5 → 2.8.0-next.1 - Mend

@agentv/core 2.7.1-next.5 → 2.8.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{chunk-6W5E3VR6.js → chunk-P2465XAH.js} +24 -49
package/dist/chunk-P2465XAH.js.map +1 -0
package/dist/evaluation/validation/index.cjs +28 -58
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +21 -44
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +295 -220
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +44 -42
package/dist/index.d.ts +44 -42
package/dist/index.js +273 -173
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-6W5E3VR6.js.map +0 -1

package/dist/index.cjs CHANGED Viewed

@@ -2141,6 +2141,24 @@ function extractCacheConfig(suite) {
   const resolvedCachePath = typeof cachePath === "string" && cachePath.trim().length > 0 ? cachePath.trim() : void 0;
   return { enabled: cache, cachePath: resolvedCachePath };
 }
+function extractTotalBudgetUsd(suite) {
+  const execution = suite.execution;
+  if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
+    return void 0;
+  }
+  const executionObj = execution;
+  const rawBudget = executionObj.total_budget_usd ?? executionObj.totalBudgetUsd;
+  if (rawBudget === void 0 || rawBudget === null) {
+    return void 0;
+  }
+  if (typeof rawBudget === "number" && rawBudget > 0) {
+    return rawBudget;
+  }
+  logWarning(
+    `Invalid execution.total_budget_usd: ${rawBudget}. Must be a positive number. Ignoring.`
+  );
+  return void 0;
+}
 function logWarning(message) {
   console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
 }
@@ -2273,24 +2291,24 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       continue;
     }
     if (typeValue === "code_judge") {
-      let script;
-      const rawScript = rawEvaluator.script;
-      if (typeof rawScript === "string") {
-        const trimmed = rawScript.trim();
+      let command;
+      const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
+      if (typeof rawCommand === "string") {
+        const trimmed = rawCommand.trim();
         if (trimmed.length === 0) {
           throw new Error(
-            `Invalid code_judge script for evaluator '${name}' in '${evalId}': script cannot be empty`
+            `Invalid code_judge command for evaluator '${name}' in '${evalId}': command cannot be empty`
           );
         }
-        script = parseCommandToArgv(trimmed);
+        command = parseCommandToArgv(trimmed);
       } else {
-        script = asStringArray(
-          rawScript,
-          `code_judge script for evaluator '${name}' in '${evalId}'`
+        command = asStringArray(
+          rawCommand,
+          `code_judge command for evaluator '${name}' in '${evalId}'`
         );
       }
-      if (!script) {
-        logWarning2(`Skipping code_judge evaluator '${name}' in '${evalId}': missing script`);
+      if (!command) {
+        logWarning2(`Skipping code_judge evaluator '${name}' in '${evalId}': missing command`);
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
@@ -2335,6 +2353,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       const knownProps2 = /* @__PURE__ */ new Set([
         "name",
         "type",
+        "command",
         "script",
         "cwd",
         "weight",
@@ -2351,7 +2370,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       evaluators.push({
         name,
         type: "code",
-        script,
+        command,
         cwd,
         resolvedCwd,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
@@ -2953,20 +2972,20 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
     let resolvedPromptScript;
     let promptScriptConfig;
     if (isJsonObject2(rawPrompt)) {
-      const scriptArray = asStringArray(
-        rawPrompt.script,
-        `prompt.script for evaluator '${name}' in '${evalId}'`
+      const commandArray = asStringArray(
+        rawPrompt.command ?? rawPrompt.script,
+        `prompt.command for evaluator '${name}' in '${evalId}'`
       );
-      if (!scriptArray) {
-        throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires script array`);
+      if (!commandArray) {
+        throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires command array`);
       }
-      const scriptPath = scriptArray[scriptArray.length - 1];
-      const resolved = await resolveFileReference2(scriptPath, searchRoots);
+      const commandPath = commandArray[commandArray.length - 1];
+      const resolved = await resolveFileReference2(commandPath, searchRoots);
       if (resolved.resolvedPath) {
-        resolvedPromptScript = [...scriptArray.slice(0, -1), import_node_path4.default.resolve(resolved.resolvedPath)];
+        resolvedPromptScript = [...commandArray.slice(0, -1), import_node_path4.default.resolve(resolved.resolvedPath)];
       } else {
         throw new Error(
-          `Evaluator '${name}' in '${evalId}': prompt script file not found: ${resolved.displayPath}`
+          `Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
         );
       }
       if (isJsonObject2(rawPrompt.config)) {
@@ -4197,6 +4216,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
     trials: extractTrialsConfig(parsed),
     targets: extractTargetsFromSuite(parsed),
     cacheConfig: extractCacheConfig(parsed),
+    totalBudgetUsd: extractTotalBudgetUsd(parsed),
     ...metadata !== void 0 && { metadata }
   };
 }
@@ -4387,16 +4407,16 @@ var loadEvalCaseById = loadTestById;
 function parseWorkspaceScriptConfig(raw, evalFileDir) {
   if (!isJsonObject(raw)) return void 0;
   const obj = raw;
-  const script = obj.script;
-  if (!Array.isArray(script) || script.length === 0) return void 0;
-  const scriptArr = script.filter((s) => typeof s === "string");
-  if (scriptArr.length === 0) return void 0;
+  const commandSource = obj.command ?? obj.script;
+  if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
+  const commandArr = commandSource.filter((s) => typeof s === "string");
+  if (commandArr.length === 0) return void 0;
   const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
   let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
   if (cwd && !import_node_path8.default.isAbsolute(cwd)) {
     cwd = import_node_path8.default.resolve(evalFileDir, cwd);
   }
-  const config = { script: scriptArr };
+  const config = { command: commandArr };
   if (timeoutMs !== void 0) {
     return { ...config, timeout_ms: timeoutMs, ...cwd !== void 0 && { cwd } };
   }
@@ -5589,50 +5609,58 @@ var CliProvider = class {
     await this.ensureHealthy(request.signal);
     const effectiveCwd = request.cwd ?? this.config.cwd;
     const outputFilePath = generateOutputFilePath(request.evalCaseId);
-    const templateValues = buildTemplateValues(request, this.config, outputFilePath);
-    const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
+    const { values: templateValues, promptFilePath } = await buildTemplateValues(
+      request,
+      this.config,
+      outputFilePath
+    );
+    const renderedCommand = renderTemplate(this.config.command, templateValues);
     if (this.verbose) {
       console.log(
         `[cli-provider:${this.targetName}] cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
       );
     }
-    const startTime = Date.now();
-    const result = await this.runCommand(renderedCommand, {
-      cwd: effectiveCwd,
-      env: process.env,
-      timeoutMs: this.config.timeoutMs,
-      signal: request.signal
-    });
-    const measuredDurationMs = Date.now() - startTime;
-    if (result.failed || (result.exitCode ?? 0) !== 0) {
-      if (request.signal?.aborted) {
-        throw new Error("CLI provider request was aborted");
-      }
-      if (result.timedOut) {
-        throw new Error(
-          `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
-        );
-      }
-      const codeText = result.exitCode !== null ? result.exitCode : "unknown";
-      const detail = result.stderr.trim() || result.stdout.trim();
-      const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
-      throw new Error(message);
-    }
-    const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
-    const parsed = this.parseOutputContent(responseContent);
-    return {
-      output: parsed.output,
-      tokenUsage: parsed.tokenUsage,
-      costUsd: parsed.costUsd,
-      durationMs: parsed.durationMs ?? measuredDurationMs,
-      raw: {
-        command: renderedCommand,
-        stderr: result.stderr,
-        exitCode: result.exitCode ?? 0,
+    try {
+      const startTime = Date.now();
+      const result = await this.runCommand(renderedCommand, {
         cwd: effectiveCwd,
-        outputFile: outputFilePath
+        env: process.env,
+        timeoutMs: this.config.timeoutMs,
+        signal: request.signal
+      });
+      const measuredDurationMs = Date.now() - startTime;
+      if (result.failed || (result.exitCode ?? 0) !== 0) {
+        if (request.signal?.aborted) {
+          throw new Error("CLI provider request was aborted");
+        }
+        if (result.timedOut) {
+          throw new Error(
+            `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
+          );
+        }
+        const codeText = result.exitCode !== null ? result.exitCode : "unknown";
+        const detail = result.stderr.trim() || result.stdout.trim();
+        const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
+        throw new Error(message);
       }
-    };
+      const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
+      const parsed = this.parseOutputContent(responseContent);
+      return {
+        output: parsed.output,
+        tokenUsage: parsed.tokenUsage,
+        costUsd: parsed.costUsd,
+        durationMs: parsed.durationMs ?? measuredDurationMs,
+        raw: {
+          command: renderedCommand,
+          stderr: result.stderr,
+          exitCode: result.exitCode ?? 0,
+          cwd: effectiveCwd,
+          outputFile: outputFilePath
+        }
+      };
+    } finally {
+      await cleanupTempFile(promptFilePath, this.keepTempFiles);
+    }
   }
   async invokeBatch(requests) {
     if (requests.length === 0) {
@@ -5655,7 +5683,7 @@ var CliProvider = class {
         batchInputFiles.push(...request.inputFiles);
       }
     }
-    const templateValues = buildTemplateValues(
+    const { values: templateValues, promptFilePath } = await buildTemplateValues(
       {
         question: "",
         guidelines: "",
@@ -5666,87 +5694,91 @@ var CliProvider = class {
       this.config,
       outputFilePath
     );
-    const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
+    const renderedCommand = renderTemplate(this.config.command, templateValues);
     if (this.verbose) {
       console.log(
         `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
       );
     }
-    const startTime = Date.now();
-    const result = await this.runCommand(renderedCommand, {
-      cwd: this.config.cwd,
-      env: process.env,
-      timeoutMs: this.config.timeoutMs,
-      signal: controller.signal
-    });
-    const measuredDurationMs = Date.now() - startTime;
-    if (result.failed || (result.exitCode ?? 0) !== 0) {
-      if (controller.signal.aborted) {
-        throw new Error("CLI provider request was aborted");
-      }
-      if (result.timedOut) {
-        throw new Error(
-          `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
-        );
-      }
-      const codeText = result.exitCode !== null ? result.exitCode : "unknown";
-      const detail = result.stderr.trim() || result.stdout.trim();
-      const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
-      throw new Error(message);
-    }
-    const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
-    const recordsById = this.parseJsonlBatchOutput(responseContent);
-    const perRequestFallbackMs = Math.round(measuredDurationMs / requests.length);
-    const responses = requests.map((request) => {
-      const evalCaseId = request.evalCaseId;
-      if (!evalCaseId) {
-        return {
-          output: [],
-          durationMs: perRequestFallbackMs,
-          raw: {
-            command: renderedCommand,
-            stderr: result.stderr,
-            exitCode: result.exitCode ?? 0,
-            cwd: this.config.cwd,
-            outputFile: outputFilePath
+    try {
+      const startTime = Date.now();
+      const result = await this.runCommand(renderedCommand, {
+        cwd: this.config.cwd,
+        env: process.env,
+        timeoutMs: this.config.timeoutMs,
+        signal: controller.signal
+      });
+      const measuredDurationMs = Date.now() - startTime;
+      if (result.failed || (result.exitCode ?? 0) !== 0) {
+        if (controller.signal.aborted) {
+          throw new Error("CLI provider request was aborted");
+        }
+        if (result.timedOut) {
+          throw new Error(
+            `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
+          );
+        }
+        const codeText = result.exitCode !== null ? result.exitCode : "unknown";
+        const detail = result.stderr.trim() || result.stdout.trim();
+        const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
+        throw new Error(message);
+      }
+      const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
+      const recordsById = this.parseJsonlBatchOutput(responseContent);
+      const perRequestFallbackMs = Math.round(measuredDurationMs / requests.length);
+      const responses = requests.map((request) => {
+        const evalCaseId = request.evalCaseId;
+        if (!evalCaseId) {
+          return {
+            output: [],
+            durationMs: perRequestFallbackMs,
+            raw: {
+              command: renderedCommand,
+              stderr: result.stderr,
+              exitCode: result.exitCode ?? 0,
+              cwd: this.config.cwd,
+              outputFile: outputFilePath
+            }
+          };
+        }
+        const parsed = recordsById.get(evalCaseId);
+        if (!parsed) {
+          const errorMessage = `Batch output missing id '${evalCaseId}'`;
+          if (this.verbose) {
+            console.warn(`[cli-provider:${this.targetName}] ${errorMessage}`);
           }
-        };
-      }
-      const parsed = recordsById.get(evalCaseId);
-      if (!parsed) {
-        const errorMessage = `Batch output missing id '${evalCaseId}'`;
-        if (this.verbose) {
-          console.warn(`[cli-provider:${this.targetName}] ${errorMessage}`);
+          return {
+            output: [{ role: "assistant", content: `Error: ${errorMessage}` }],
+            durationMs: perRequestFallbackMs,
+            raw: {
+              command: renderedCommand,
+              stderr: result.stderr,
+              exitCode: result.exitCode ?? 0,
+              cwd: this.config.cwd,
+              outputFile: outputFilePath,
+              error: errorMessage
+            }
+          };
         }
         return {
-          output: [{ role: "assistant", content: `Error: ${errorMessage}` }],
-          durationMs: perRequestFallbackMs,
+          output: parsed.output,
+          tokenUsage: parsed.tokenUsage,
+          costUsd: parsed.costUsd,
+          durationMs: parsed.durationMs ?? perRequestFallbackMs,
           raw: {
             command: renderedCommand,
             stderr: result.stderr,
             exitCode: result.exitCode ?? 0,
             cwd: this.config.cwd,
             outputFile: outputFilePath,
-            error: errorMessage
+            recordId: evalCaseId
           }
         };
-      }
-      return {
-        output: parsed.output,
-        tokenUsage: parsed.tokenUsage,
-        costUsd: parsed.costUsd,
-        durationMs: parsed.durationMs ?? perRequestFallbackMs,
-        raw: {
-          command: renderedCommand,
-          stderr: result.stderr,
-          exitCode: result.exitCode ?? 0,
-          cwd: this.config.cwd,
-          outputFile: outputFilePath,
-          recordId: evalCaseId
-        }
-      };
-    });
-    return responses;
+      });
+      return responses;
+    } finally {
+      await cleanupTempFile(promptFilePath, this.keepTempFiles);
+    }
   }
   /**
    * Parse output content from CLI.
@@ -5861,7 +5893,7 @@ var CliProvider = class {
       return;
     }
     const timeoutMs = healthcheck.timeoutMs ?? this.config.timeoutMs;
-    if (healthcheck.type === "http") {
+    if ("url" in healthcheck && healthcheck.url) {
       const controller = new AbortController();
       const timer = timeoutMs ? setTimeout(() => controller.abort(), timeoutMs) : void 0;
       signal?.addEventListener("abort", () => controller.abort(), { once: true });
@@ -5880,50 +5912,70 @@ var CliProvider = class {
       }
       return;
     }
-    const renderedCommand = renderTemplate(
-      healthcheck.commandTemplate,
-      buildTemplateValues(
-        {
-          question: "",
-          guidelines: "",
-          inputFiles: [],
-          evalCaseId: "healthcheck",
-          attempt: 0
-        },
-        this.config,
-        generateOutputFilePath("healthcheck")
-      )
+    const hcCommand = "command" in healthcheck ? healthcheck.command : void 0;
+    if (!hcCommand) {
+      throw new Error(`CLI healthcheck for '${this.targetName}': 'command' or 'url' is required`);
+    }
+    const { values: templateValues, promptFilePath } = await buildTemplateValues(
+      {
+        question: "",
+        guidelines: "",
+        inputFiles: [],
+        evalCaseId: "healthcheck",
+        attempt: 0
+      },
+      this.config,
+      generateOutputFilePath("healthcheck")
     );
+    const renderedCommand = renderTemplate(hcCommand, templateValues);
+    const hcCwd = "cwd" in healthcheck ? healthcheck.cwd : void 0;
     if (this.verbose) {
       console.log(
-        `[cli-provider:${this.targetName}] (healthcheck) cwd=${healthcheck.cwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
+        `[cli-provider:${this.targetName}] (healthcheck) cwd=${hcCwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
       );
     }
-    const result = await this.runCommand(renderedCommand, {
-      cwd: healthcheck.cwd ?? this.config.cwd,
-      env: process.env,
-      timeoutMs,
-      signal
-    });
-    if (result.failed || (result.exitCode ?? 0) !== 0) {
-      const codeText = result.exitCode !== null ? result.exitCode : "unknown";
-      const detail = result.stderr.trim() || result.stdout.trim();
-      const message = detail ? `${detail} (exit code ${codeText})` : `CLI healthcheck command exited with code ${codeText}`;
-      throw new Error(`CLI healthcheck failed for '${this.targetName}': ${message}`);
+    try {
+      const result = await this.runCommand(renderedCommand, {
+        cwd: hcCwd ?? this.config.cwd,
+        env: process.env,
+        timeoutMs,
+        signal
+      });
+      if (result.failed || (result.exitCode ?? 0) !== 0) {
+        const codeText = result.exitCode !== null ? result.exitCode : "unknown";
+        const detail = result.stderr.trim() || result.stdout.trim();
+        const message = detail ? `${detail} (exit code ${codeText})` : `CLI healthcheck command exited with code ${codeText}`;
+        throw new Error(`CLI healthcheck failed for '${this.targetName}': ${message}`);
+      }
+    } finally {
+      await cleanupTempFile(promptFilePath, this.keepTempFiles);
     }
   }
 };
-function buildTemplateValues(request, config, outputFilePath) {
+async function buildTemplateValues(request, config, outputFilePath) {
   const inputFiles = normalizeInputFiles2(request.inputFiles);
+  const promptFilePath = generateOutputFilePath(request.evalCaseId, ".prompt.txt");
+  await import_promises11.default.writeFile(promptFilePath, request.question ?? "", "utf8");
   return {
-    PROMPT: shellEscape(request.question ?? ""),
-    GUIDELINES: shellEscape(request.guidelines ?? ""),
-    EVAL_ID: shellEscape(request.evalCaseId ?? ""),
-    ATTEMPT: shellEscape(String(request.attempt ?? 0)),
-    FILES: formatFileList(inputFiles, config.filesFormat),
-    OUTPUT_FILE: shellEscape(outputFilePath)
+    values: {
+      PROMPT: shellEscape(request.question ?? ""),
+      PROMPT_FILE: shellEscape(promptFilePath),
+      GUIDELINES: shellEscape(request.guidelines ?? ""),
+      EVAL_ID: shellEscape(request.evalCaseId ?? ""),
+      ATTEMPT: shellEscape(String(request.attempt ?? 0)),
+      FILES: formatFileList(inputFiles, config.filesFormat),
+      OUTPUT_FILE: shellEscape(outputFilePath)
+    },
+    promptFilePath
   };
 }
+async function cleanupTempFile(filePath, keepTempFiles) {
+  if (!filePath || keepTempFiles) {
+    return;
+  }
+  await import_promises11.default.unlink(filePath).catch(() => {
+  });
+}
 function normalizeInputFiles2(inputFiles) {
   if (!inputFiles || inputFiles.length === 0) {
     return void 0;
@@ -8285,29 +8337,25 @@ var ProviderRegistry = class {
 var import_node_path18 = __toESM(require("path"), 1);
 var import_zod3 = require("zod");
 var CliHealthcheckHttpInputSchema = import_zod3.z.object({
-  type: import_zod3.z.literal("http"),
   url: import_zod3.z.string().min(1, "healthcheck URL is required"),
   timeout_seconds: import_zod3.z.number().positive().optional(),
   timeoutSeconds: import_zod3.z.number().positive().optional()
 });
 var CliHealthcheckCommandInputSchema = import_zod3.z.object({
-  type: import_zod3.z.literal("command"),
-  command_template: import_zod3.z.string().optional(),
-  commandTemplate: import_zod3.z.string().optional(),
+  command: import_zod3.z.string().min(1, "healthcheck command is required"),
   cwd: import_zod3.z.string().optional(),
   timeout_seconds: import_zod3.z.number().positive().optional(),
   timeoutSeconds: import_zod3.z.number().positive().optional()
 });
-var CliHealthcheckInputSchema = import_zod3.z.discriminatedUnion("type", [
+var CliHealthcheckInputSchema = import_zod3.z.union([
   CliHealthcheckHttpInputSchema,
   CliHealthcheckCommandInputSchema
 ]);
 var CliTargetInputSchema = import_zod3.z.object({
   name: import_zod3.z.string().min(1, "target name is required"),
   provider: import_zod3.z.string().refine((p) => p.toLowerCase() === "cli", { message: "provider must be 'cli'" }),
-  // Command template - required (accept both naming conventions)
-  command_template: import_zod3.z.string().optional(),
-  commandTemplate: import_zod3.z.string().optional(),
+  // Command - required
+  command: import_zod3.z.string(),
   // Files format - optional
   files_format: import_zod3.z.string().optional(),
   filesFormat: import_zod3.z.string().optional(),
@@ -8337,26 +8385,22 @@ var CliTargetInputSchema = import_zod3.z.object({
   workers: import_zod3.z.number().int().min(1).optional(),
   provider_batching: import_zod3.z.boolean().optional(),
   providerBatching: import_zod3.z.boolean().optional()
-}).refine((data) => data.command_template !== void 0 || data.commandTemplate !== void 0, {
-  message: "Either command_template or commandTemplate is required"
 });
 var CliHealthcheckHttpSchema = import_zod3.z.object({
-  type: import_zod3.z.literal("http"),
   url: import_zod3.z.string().min(1),
   timeoutMs: import_zod3.z.number().positive().optional()
 }).strict();
 var CliHealthcheckCommandSchema = import_zod3.z.object({
-  type: import_zod3.z.literal("command"),
-  commandTemplate: import_zod3.z.string().min(1),
+  command: import_zod3.z.string().min(1),
   cwd: import_zod3.z.string().optional(),
   timeoutMs: import_zod3.z.number().positive().optional()
 }).strict();
-var CliHealthcheckSchema = import_zod3.z.discriminatedUnion("type", [
+var CliHealthcheckSchema = import_zod3.z.union([
   CliHealthcheckHttpSchema,
   CliHealthcheckCommandSchema
 ]);
 var CliTargetConfigSchema = import_zod3.z.object({
-  commandTemplate: import_zod3.z.string().min(1),
+  command: import_zod3.z.string().min(1),
   filesFormat: import_zod3.z.string().optional(),
   cwd: import_zod3.z.string().optional(),
   workspaceTemplate: import_zod3.z.string().optional(),
@@ -8368,26 +8412,19 @@ var CliTargetConfigSchema = import_zod3.z.object({
 function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
   const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
   const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
-  if (input.type === "http") {
+  if ("url" in input && input.url) {
     const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
     return {
-      type: "http",
       url,
       timeoutMs
     };
   }
-  const commandTemplateSource = input.command_template ?? input.commandTemplate;
-  if (commandTemplateSource === void 0) {
+  if (!("command" in input) || !input.command) {
     throw new Error(
-      `${targetName} healthcheck: Either command_template or commandTemplate is required for command healthcheck`
+      `${targetName} healthcheck: Either 'command' or 'url' is required for healthcheck`
     );
   }
-  const commandTemplate = resolveString(
-    commandTemplateSource,
-    env,
-    `${targetName} healthcheck command template`,
-    true
-  );
+  const command = resolveString(input.command, env, `${targetName} healthcheck command`, true);
   let cwd = resolveOptionalString(input.cwd, env, `${targetName} healthcheck cwd`, {
     allowLiteral: true,
     optionalEnv: true
@@ -8399,24 +8436,14 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
     cwd = import_node_path18.default.dirname(import_node_path18.default.resolve(evalFilePath));
   }
   return {
-    type: "command",
-    commandTemplate,
+    command,
     cwd,
     timeoutMs
   };
 }
 function normalizeCliTargetInput(input, env, evalFilePath) {
   const targetName = input.name;
-  const commandTemplateSource = input.command_template ?? input.commandTemplate;
-  if (commandTemplateSource === void 0) {
-    throw new Error(`${targetName}: Either command_template or commandTemplate is required`);
-  }
-  const commandTemplate = resolveString(
-    commandTemplateSource,
-    env,
-    `${targetName} CLI command template`,
-    true
-  );
+  const command = resolveString(input.command, env, `${targetName} CLI command`, true);
   const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
   const filesFormat = resolveOptionalLiteralString(filesFormatSource);
   const workspaceTemplateSource = input.workspace_template ?? input.workspaceTemplate;
@@ -8455,7 +8482,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
   );
   const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
   return {
-    commandTemplate,
+    command,
     filesFormat,
     cwd,
     workspaceTemplate,
@@ -8467,6 +8494,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
 }
 var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
   "PROMPT",
+  "PROMPT_FILE",
   "GUIDELINES",
   "EVAL_ID",
   "ATTEMPT",
@@ -9181,8 +9209,8 @@ var cliErrorMap = (issue, ctx) => {
   if (issue.code === import_zod3.z.ZodIssueCode.unrecognized_keys) {
     return { message: `Unknown CLI provider settings: ${issue.keys.join(", ")}` };
   }
-  if (issue.code === import_zod3.z.ZodIssueCode.invalid_union_discriminator) {
-    return { message: "healthcheck type must be 'http' or 'command'" };
+  if (issue.code === import_zod3.z.ZodIssueCode.invalid_union) {
+    return { message: "healthcheck must have either 'url' (HTTP) or 'command' (command)" };
   }
   if (issue.code === import_zod3.z.ZodIssueCode.invalid_type && issue.expected === "string") {
     return { message: `${ctx.defaultError} (expected a string value)` };
@@ -9198,18 +9226,17 @@ function resolveCliConfig(target, env, evalFilePath) {
     throw new Error(`${prefix}${firstError?.message}`);
   }
   const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
-  assertSupportedCliPlaceholders(normalized.commandTemplate, `${target.name} CLI command template`);
-  if (normalized.healthcheck?.type === "command") {
+  assertSupportedCliPlaceholders(normalized.command, `${target.name} CLI command`);
+  if ("command" in (normalized.healthcheck ?? {}) && normalized.healthcheck.command) {
     assertSupportedCliPlaceholders(
-      normalized.healthcheck.commandTemplate,
-      `${target.name} healthcheck command template`
+      normalized.healthcheck.command,
+      `${target.name} healthcheck command`
     );
   }
   return normalized;
 }
 function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath) {
-  const commandTemplateSource = target.command_template ?? target.commandTemplate;
-  const commandTemplate = commandTemplateSource ? resolveString(commandTemplateSource, env, `${target.name} command template`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
+  const command = target.command ? resolveString(target.command, env, `${target.name} command`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
   const timeoutSeconds = target.timeout_seconds ?? target.timeoutSeconds;
   const timeoutMs = resolveTimeoutMs(timeoutSeconds, `${target.name} timeout`);
   let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
@@ -9223,7 +9250,7 @@ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath
     cwd = import_node_path18.default.dirname(import_node_path18.default.resolve(evalFilePath));
   }
   return {
-    commandTemplate,
+    command,
     cwd,
     timeoutMs
   };
@@ -10926,7 +10953,7 @@ async function discoverProviders(registry, baseDir) {
     }
     registry.register(kindName, (target) => {
       return new CliProvider(target.name, {
-        commandTemplate: `bun run ${filePath} {PROMPT}`
+        command: `bun run ${filePath} {PROMPT}`
       });
     });
     discoveredKinds.push(kindName);
@@ -11439,13 +11466,13 @@ function toCamelCaseDeep(obj) {
 var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
 var CodeEvaluator = class {
   kind = "code";
-  script;
+  command;
   cwd;
   agentTimeoutMs;
   config;
   target;
   constructor(options) {
-    this.script = options.script;
+    this.command = options.command ?? options.script ?? [];
     this.cwd = options.cwd;
     this.agentTimeoutMs = options.agentTimeoutMs;
     this.config = options.config;
@@ -11504,7 +11531,7 @@ var CodeEvaluator = class {
     const env = proxyEnv || workspaceEnv ? { ...proxyEnv, ...workspaceEnv } : void 0;
     try {
       const stdout = await executeScript(
-        this.script,
+        this.command,
         inputPayload,
         this.agentTimeoutMs,
         this.cwd,
@@ -11518,7 +11545,7 @@ var CodeEvaluator = class {
       const details = parsed?.details && typeof parsed.details === "object" && !Array.isArray(parsed.details) ? parsed.details : void 0;
       const proxyUsage = getProxyUsage?.();
       const evaluatorRawRequest = {
-        script: this.script,
+        command: this.command,
         ...this.cwd ? { cwd: this.cwd } : {},
         ...proxyUsage ? {
           target_proxy: {
@@ -11548,7 +11575,7 @@ var CodeEvaluator = class {
         expectedAspectCount: 1,
         reasoning: message,
         evaluatorRawRequest: {
-          script: this.script,
+          command: this.command,
           ...this.cwd ? { cwd: this.cwd } : {},
           ...proxyUsage ? {
             target_proxy: {
@@ -14507,7 +14534,7 @@ var llmJudgeFactory = (config, context2) => {
 var codeFactory = (config, context2) => {
   const c = config;
   return new CodeEvaluator({
-    script: c.script,
+    command: c.command ?? c.script ?? [],
     cwd: c.resolvedCwd ?? c.cwd,
     agentTimeoutMs: context2.agentTimeoutMs,
     config: c.config,
@@ -14689,7 +14716,7 @@ async function discoverAssertions(registry, baseDir) {
     }
     const factory = (_config, context2) => {
       return new CodeEvaluator({
-        script: ["bun", "run", filePath],
+        command: ["bun", "run", filePath],
         agentTimeoutMs: context2.agentTimeoutMs
       });
     };
@@ -15043,7 +15070,8 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
   });
   const timeoutMs = config.timeout_ms ?? (failureMode === "fatal" ? 6e4 : 3e4);
   const cwd = config.cwd;
-  const result = await execFileWithStdin(config.script, stdin, {
+  const commandArray = config.command ?? config.script ?? [];
+  const result = await execFileWithStdin(commandArray, stdin, {
     timeoutMs,
     cwd
   });
@@ -15090,7 +15118,8 @@ async function runEvaluation(options) {
     keepWorkspaces,
     cleanupWorkspaces,
     trials,
-    streamCallbacks
+    streamCallbacks,
+    totalBudgetUsd
   } = options;
   let useCache = options.useCache;
   if (trials && trials.count > 1 && useCache) {
@@ -15263,10 +15292,39 @@ async function runEvaluation(options) {
   let nextWorkerId = 1;
   const workerIdByEvalId = /* @__PURE__ */ new Map();
   let beforeAllOutputAttached = false;
+  let cumulativeBudgetCost = 0;
+  let budgetExhausted = false;
   const promises = filteredEvalCases.map(
     (evalCase) => limit(async () => {
       const workerId = nextWorkerId++;
       workerIdByEvalId.set(evalCase.id, workerId);
+      if (totalBudgetUsd !== void 0 && budgetExhausted) {
+        const budgetResult = {
+          timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
+          testId: evalCase.id,
+          dataset: evalCase.dataset,
+          score: 0,
+          hits: [],
+          misses: [],
+          answer: "",
+          target: target.name,
+          error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
+          budgetExceeded: true
+        };
+        if (onProgress) {
+          await onProgress({
+            workerId,
+            testId: evalCase.id,
+            status: "failed",
+            completedAt: Date.now(),
+            error: budgetResult.error
+          });
+        }
+        if (onResult) {
+          await onResult(budgetResult);
+        }
+        return budgetResult;
+      }
       if (onProgress) {
         await onProgress({
           workerId,
@@ -15300,6 +15358,23 @@ async function runEvaluation(options) {
           typeRegistry
         };
         let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
+        if (totalBudgetUsd !== void 0) {
+          let caseCost;
+          if (result.trials && result.trials.length > 0) {
+            const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
+            if (trialCostSum > 0) {
+              caseCost = trialCostSum;
+            }
+          } else {
+            caseCost = result.trace?.costUsd;
+          }
+          if (caseCost !== void 0) {
+            cumulativeBudgetCost += caseCost;
+            if (cumulativeBudgetCost >= totalBudgetUsd) {
+              budgetExhausted = true;
+            }
+          }
+        }
         if (beforeAllOutput && !beforeAllOutputAttached) {
           result = { ...result, beforeAllOutput };
           beforeAllOutputAttached = true;