npm - agentv - Versions diffs - 4.19.0 → 4.20.0-next.1 - Mend

agentv 4.19.0 → 4.20.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/{chunk-IWI4AJRS.js → chunk-LMQFWJJL.js} RENAMED Viewed

@@ -15,9 +15,10 @@ import {
   resolveWorkspaceOrFilePath,
   toSnakeCaseDeep,
   writeArtifactsFromResults
-} from "./chunk-NL6P5MUH.js";
+} from "./chunk-ZNS74WKH.js";
 import {
   ResponseCache,
+  RunBudgetTracker,
   commitAndPushResultsBranch,
   createDraftResultsPr,
   deriveCategory,
@@ -30,7 +31,7 @@ import {
   shouldSkipCacheForTemperature,
   stageResultsArtifacts,
   syncResultsRepo
-} from "./chunk-R2QDYORI.js";
+} from "./chunk-36HXBYUY.js";
 import {
   CLI_PLACEHOLDERS,
   COMMON_TARGET_SETTINGS,
@@ -60,12 +61,12 @@ import {
   subscribeToCopilotSdkLogEntries,
   subscribeToPiLogEntries,
   toCamelCaseDeep
-} from "./chunk-PTYQS37Y.js";
+} from "./chunk-LP4Y5D2Z.js";
 // package.json
 var package_default = {
   name: "agentv",
-  version: "4.19.0",
+  version: "4.20.0-next.1",
   description: "CLI entry point for AgentV",
   type: "module",
   repository: {
@@ -4764,7 +4765,8 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
     tags: normalizeStringArray(rawOptions.tag),
     excludeTags: normalizeStringArray(rawOptions.excludeTag),
     transcript: normalizeString(rawOptions.transcript),
-    experiment: normalizeString(rawOptions.experiment)
+    experiment: normalizeString(rawOptions.experiment),
+    budgetUsd: normalizeOptionalNumber(rawOptions.budgetUsd)
   };
 }
 async function ensureFileExists(filePath, description) {
@@ -5022,6 +5024,7 @@ async function runSingleEvalFile(params) {
     trialsConfig,
     matrixMode,
     budgetUsd,
+    runBudgetTracker,
     failOnError,
     providerFactory
   } = params;
@@ -5088,6 +5091,7 @@ async function runSingleEvalFile(params) {
     keepWorkspaces: options.keepWorkspaces,
     trials: trialsConfig,
     budgetUsd,
+    runBudgetTracker,
     failOnError,
     graderTarget: options.graderTarget,
     model: options.model,
@@ -5277,7 +5281,7 @@ async function runEvalCommand(input) {
   const useFileExport = !!options.otelFile;
   if (options.exportOtel || useFileExport) {
     try {
-      const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-RTIUSC6L.js");
+      const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-GURCO6IS.js");
       let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
       let headers = {};
       if (options.otelBackend) {
@@ -5335,6 +5339,10 @@ async function runEvalCommand(input) {
   const remoteEvalSummaries = [];
   const seenTestCases = /* @__PURE__ */ new Set();
   const displayIdTracker = createDisplayIdTracker();
+  const runBudgetTracker = options.budgetUsd ? new RunBudgetTracker(options.budgetUsd) : void 0;
+  if (runBudgetTracker) {
+    console.log(`Run budget cap: $${runBudgetTracker.budgetCapUsd.toFixed(2)}`);
+  }
   const perFileWorkers = options.workers;
   const fileMetadata = /* @__PURE__ */ new Map();
   for (const testFilePath of resolvedTestFiles) {
@@ -5472,7 +5480,7 @@ async function runEvalCommand(input) {
   const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
   let transcriptProviderFactory;
   if (options.transcript) {
-    const { TranscriptProvider } = await import("./dist-RTIUSC6L.js");
+    const { TranscriptProvider } = await import("./dist-GURCO6IS.js");
     const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
     const totalTests = [...fileMetadata.values()].reduce(
       (sum, meta) => sum + meta.testCases.length,
@@ -5490,6 +5498,34 @@ async function runEvalCommand(input) {
   }
   try {
     for (const testFilePath of activeTestFiles) {
+      if (runBudgetTracker?.isExceeded()) {
+        const targetPrep2 = fileMetadata.get(testFilePath);
+        if (!targetPrep2) continue;
+        const budgetMsg = `Run budget exceeded ($${runBudgetTracker.currentCostUsd.toFixed(4)} / $${runBudgetTracker.budgetCapUsd.toFixed(4)})`;
+        console.log(`
+\u26A0 ${budgetMsg} \u2014 skipping ${path15.basename(testFilePath)}`);
+        for (const { selection } of targetPrep2.selections) {
+          const skippedResults = targetPrep2.testCases.map((testCase) => ({
+            timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+            testId: testCase.id,
+            score: 0,
+            assertions: [],
+            output: [],
+            error: budgetMsg,
+            budgetExceeded: true,
+            executionStatus: "execution_error",
+            failureStage: "setup",
+            failureReasonCode: "budget_exceeded",
+            executionError: { message: budgetMsg, stage: "setup" },
+            target: selection.targetName
+          }));
+          for (const r of skippedResults) {
+            await outputWriter.append(r);
+          }
+          allResults.push(...skippedResults);
+        }
+        continue;
+      }
       const targetPrep = fileMetadata.get(testFilePath);
       if (!targetPrep) {
         throw new Error(`Missing metadata for ${testFilePath}`);
@@ -5530,6 +5566,7 @@ async function runEvalCommand(input) {
               trialsConfig: options.transcript ? void 0 : targetPrep.trialsConfig,
               matrixMode: targetPrep.selections.length > 1,
               budgetUsd: targetPrep.budgetUsd,
+              runBudgetTracker,
               failOnError: targetPrep.failOnError,
               threshold: resolvedThreshold,
               providerFactory: transcriptProviderFactory ?? targetPrep.providerFactory
@@ -5612,7 +5649,7 @@ async function runEvalCommand(input) {
     if (usesDefaultArtifactWorkspace && allResults.length > 0) {
       const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
       if (isResumeAppend) {
-        const { writePerTestArtifacts } = await import("./artifact-writer-YATMDPWI.js");
+        const { writePerTestArtifacts } = await import("./artifact-writer-RFXWXUOV.js");
         await writePerTestArtifacts(allResults, runDir, {
           experiment: normalizeExperimentName(options.experiment)
         });
@@ -5702,13 +5739,21 @@ Tip: ${summary.executionErrorCount} execution error(s) detected. Re-run failed t
   agentv eval run ${evalFileArgs}${targetFlag} --output ${relativeRunDir} --rerun-failed`
       );
     }
+    const runBudgetExceeded = runBudgetTracker?.isExceeded() ?? false;
+    if (runBudgetExceeded) {
+      console.log(
+        `
+\u26A0 Run budget exceeded: $${runBudgetTracker?.currentCostUsd.toFixed(4)} spent of $${runBudgetTracker?.budgetCapUsd.toFixed(4)} cap`
+      );
+    }
     return {
       executionErrorCount: summary.executionErrorCount,
       outputPath,
       testFiles: activeTestFiles,
       target: options.target,
       thresholdFailed,
-      allExecutionErrors
+      allExecutionErrors,
+      budgetExceeded: runBudgetExceeded || void 0
     };
   } finally {
     unsubscribeCodexLogs();
@@ -5822,4 +5867,4 @@ export {
   getCategories,
   filterByCategory
 };
-//# sourceMappingURL=chunk-IWI4AJRS.js.map
+//# sourceMappingURL=chunk-LMQFWJJL.js.map