npm - agentv - Versions diffs - 2.12.0 → 2.13.0 - Mend

agentv 2.12.0 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/{chunk-LUHCYBMD.js → chunk-FSBZM3HT.js} +66 -5
package/dist/chunk-FSBZM3HT.js.map +1 -0
package/dist/{chunk-6KU2ZUFJ.js → chunk-M6JYP6A6.js} +17 -55
package/dist/chunk-M6JYP6A6.js.map +1 -0
package/dist/{chunk-YBJX5CP6.js → chunk-UWDI4UVN.js} +202 -19
package/dist/chunk-UWDI4UVN.js.map +1 -0
package/dist/cli.js +3 -3
package/dist/{dist-OPPA4P5R.js → dist-CCUHG3SN.js} +4 -2
package/dist/index.js +3 -3
package/dist/{interactive-TOUKPSHP.js → interactive-P3D5O673.js} +3 -3
package/package.json +4 -2
package/dist/chunk-6KU2ZUFJ.js.map +0 -1
package/dist/chunk-LUHCYBMD.js.map +0 -1
package/dist/chunk-YBJX5CP6.js.map +0 -1
/package/dist/{dist-OPPA4P5R.js.map → dist-CCUHG3SN.js.map} +0 -0
/package/dist/{interactive-TOUKPSHP.js.map → interactive-P3D5O673.js.map} +0 -0

package/dist/{chunk-LUHCYBMD.js → chunk-FSBZM3HT.js} RENAMED Viewed

@@ -148,7 +148,7 @@ var require_dist = __commonJS({
   }
 });
-// ../../packages/core/dist/chunk-7HPKTRFZ.js
+// ../../packages/core/dist/chunk-JHER2LQ5.js
 import { constants } from "node:fs";
 import { access, readFile } from "node:fs/promises";
 import path from "node:path";
@@ -4195,7 +4195,7 @@ var coerce = {
 };
 var NEVER = INVALID;
-// ../../packages/core/dist/chunk-7HPKTRFZ.js
+// ../../packages/core/dist/chunk-JHER2LQ5.js
 var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
 var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
 var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
@@ -34331,6 +34331,11 @@ async function loadConfig(evalFilePath, repoRoot) {
         continue;
       }
       const config2 = parsed;
+      const requiredVersion = parsed.required_version;
+      if (requiredVersion !== void 0 && typeof requiredVersion !== "string") {
+        logWarning(`Invalid required_version in ${configPath}, expected string`);
+        continue;
+      }
       const guidelinePatterns = config2.guideline_patterns;
       if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
         logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
@@ -34354,6 +34359,7 @@ async function loadConfig(evalFilePath, repoRoot) {
         configPath
       );
       return {
+        required_version: requiredVersion,
         guideline_patterns: guidelinePatterns,
         eval_patterns: evalPatterns,
         execution: executionDefaults
@@ -34497,6 +34503,22 @@ function extractTotalBudgetUsd(suite) {
   );
   return void 0;
 }
+function extractFailOnError(suite) {
+  const execution = suite.execution;
+  if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
+    return void 0;
+  }
+  const executionObj = execution;
+  const raw = executionObj.fail_on_error ?? executionObj.failOnError;
+  if (raw === void 0 || raw === null) {
+    return void 0;
+  }
+  if (typeof raw === "boolean") {
+    return raw;
+  }
+  logWarning(`Invalid execution.fail_on_error: ${raw}. Must be true or false. Ignoring.`);
+  return void 0;
+}
 function parseExecutionDefaults(raw, configPath) {
   if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
     return void 0;
@@ -36653,13 +36675,15 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
   }
   const { tests, parsed } = await loadTestsFromYaml(evalFilePath, repoRoot, options);
   const metadata = parseMetadata(parsed);
+  const failOnError = extractFailOnError(parsed);
   return {
     tests,
     trials: extractTrialsConfig(parsed),
     targets: extractTargetsFromSuite(parsed),
     cacheConfig: extractCacheConfig(parsed),
     totalBudgetUsd: extractTotalBudgetUsd(parsed),
-    ...metadata !== void 0 && { metadata }
+    ...metadata !== void 0 && { metadata },
+    ...failOnError !== void 0 && { failOnError }
   };
 }
 var loadEvalSuite = loadTestSuite;
@@ -46553,7 +46577,8 @@ async function runEvaluation(options) {
     cleanupWorkspaces,
     trials,
     streamCallbacks,
-    totalBudgetUsd
+    totalBudgetUsd,
+    failOnError
   } = options;
   let useCache = options.useCache;
   if (trials && trials.count > 1 && useCache) {
@@ -46749,6 +46774,7 @@ async function runEvaluation(options) {
   let beforeAllOutputAttached = false;
   let cumulativeBudgetCost = 0;
   let budgetExhausted = false;
+  let failOnErrorTriggered = false;
   const promises = filteredEvalCases.map(
     (evalCase) => limit(async () => {
       const workerId = nextWorkerId++;
@@ -46787,6 +46813,37 @@ async function runEvaluation(options) {
         }
         return budgetResult;
       }
+      if (failOnError === true && failOnErrorTriggered) {
+        const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
+        const haltResult = {
+          timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
+          testId: evalCase.id,
+          dataset: evalCase.dataset,
+          score: 0,
+          hits: [],
+          misses: [],
+          answer: "",
+          target: target.name,
+          error: errorMsg,
+          executionStatus: "execution_error",
+          failureStage: "setup",
+          failureReasonCode: "error_threshold_exceeded",
+          executionError: { message: errorMsg, stage: "setup" }
+        };
+        if (onProgress) {
+          await onProgress({
+            workerId,
+            testId: evalCase.id,
+            status: "failed",
+            completedAt: Date.now(),
+            error: haltResult.error
+          });
+        }
+        if (onResult) {
+          await onResult(haltResult);
+        }
+        return haltResult;
+      }
       if (onProgress) {
         await onProgress({
           workerId,
@@ -46839,6 +46896,9 @@ async function runEvaluation(options) {
             }
           }
         }
+        if (failOnError === true && result.executionStatus === "execution_error") {
+          failOnErrorTriggered = true;
+        }
         if (beforeAllOutput && !beforeAllOutputAttached) {
           result = { ...result, beforeAllOutput };
           beforeAllOutputAttached = true;
@@ -48851,6 +48911,7 @@ export {
   extractTargetsFromTestCase,
   extractTrialsConfig,
   extractCacheConfig,
+  extractFailOnError,
   detectFormat,
   buildPromptInputs,
   readTestSuiteMetadata,
@@ -48950,4 +49011,4 @@ export {
   OtelStreamingObserver,
   createAgentKernel
 };
-//# sourceMappingURL=chunk-LUHCYBMD.js.map
+//# sourceMappingURL=chunk-FSBZM3HT.js.map