npm - @agentv/core - Versions diffs - 2.12.0 → 2.13.0 - Mend

@agentv/core 2.12.0 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/{chunk-7HPKTRFZ.js → chunk-JHER2LQ5.js} +1 -1
package/dist/chunk-JHER2LQ5.js.map +1 -0
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +1 -1
package/dist/index.cjs +64 -2
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +18 -1
package/dist/index.d.ts +18 -1
package/dist/index.js +64 -3
package/dist/index.js.map +1 -1
package/package.json +2 -2
package/dist/chunk-7HPKTRFZ.js.map +0 -1

package/dist/evaluation/validation/index.js CHANGED Viewed

@@ -6,7 +6,7 @@ import {
   findGitRoot,
   isEvaluatorKind,
   resolveFileReference
-} from "../../chunk-7HPKTRFZ.js";
+} from "../../chunk-JHER2LQ5.js";
 // src/evaluation/validation/file-type.ts
 import { readFile } from "node:fs/promises";

package/dist/index.cjs CHANGED Viewed

@@ -1492,6 +1492,7 @@ __export(index_exports, {
   executeWorkspaceScript: () => executeWorkspaceScript,
   explorationRatio: () => explorationRatio,
   extractCacheConfig: () => extractCacheConfig,
+  extractFailOnError: () => extractFailOnError,
   extractJsonBlob: () => extractJsonBlob,
   extractTargetFromSuite: () => extractTargetFromSuite,
   extractTargetsFromSuite: () => extractTargetsFromSuite,
@@ -2014,6 +2015,11 @@ async function loadConfig(evalFilePath, repoRoot) {
         continue;
       }
       const config = parsed;
+      const requiredVersion = parsed.required_version;
+      if (requiredVersion !== void 0 && typeof requiredVersion !== "string") {
+        logWarning(`Invalid required_version in ${configPath}, expected string`);
+        continue;
+      }
       const guidelinePatterns = config.guideline_patterns;
       if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
         logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
@@ -2037,6 +2043,7 @@ async function loadConfig(evalFilePath, repoRoot) {
         configPath
       );
       return {
+        required_version: requiredVersion,
         guideline_patterns: guidelinePatterns,
         eval_patterns: evalPatterns,
         execution: executionDefaults
@@ -2180,6 +2187,22 @@ function extractTotalBudgetUsd(suite) {
   );
   return void 0;
 }
+function extractFailOnError(suite) {
+  const execution = suite.execution;
+  if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
+    return void 0;
+  }
+  const executionObj = execution;
+  const raw = executionObj.fail_on_error ?? executionObj.failOnError;
+  if (raw === void 0 || raw === null) {
+    return void 0;
+  }
+  if (typeof raw === "boolean") {
+    return raw;
+  }
+  logWarning(`Invalid execution.fail_on_error: ${raw}. Must be true or false. Ignoring.`);
+  return void 0;
+}
 function parseExecutionDefaults(raw, configPath) {
   if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
     return void 0;
@@ -4375,13 +4398,15 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
   }
   const { tests, parsed } = await loadTestsFromYaml(evalFilePath, repoRoot, options);
   const metadata = parseMetadata(parsed);
+  const failOnError = extractFailOnError(parsed);
   return {
     tests,
     trials: extractTrialsConfig(parsed),
     targets: extractTargetsFromSuite(parsed),
     cacheConfig: extractCacheConfig(parsed),
     totalBudgetUsd: extractTotalBudgetUsd(parsed),
-    ...metadata !== void 0 && { metadata }
+    ...metadata !== void 0 && { metadata },
+    ...failOnError !== void 0 && { failOnError }
   };
 }
 var loadEvalSuite = loadTestSuite;
@@ -15780,7 +15805,8 @@ async function runEvaluation(options) {
     cleanupWorkspaces,
     trials,
     streamCallbacks,
-    totalBudgetUsd
+    totalBudgetUsd,
+    failOnError
   } = options;
   let useCache = options.useCache;
   if (trials && trials.count > 1 && useCache) {
@@ -15976,6 +16002,7 @@ async function runEvaluation(options) {
   let beforeAllOutputAttached = false;
   let cumulativeBudgetCost = 0;
   let budgetExhausted = false;
+  let failOnErrorTriggered = false;
   const promises = filteredEvalCases.map(
     (evalCase) => limit(async () => {
       const workerId = nextWorkerId++;
@@ -16014,6 +16041,37 @@ async function runEvaluation(options) {
         }
         return budgetResult;
       }
+      if (failOnError === true && failOnErrorTriggered) {
+        const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
+        const haltResult = {
+          timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
+          testId: evalCase.id,
+          dataset: evalCase.dataset,
+          score: 0,
+          hits: [],
+          misses: [],
+          answer: "",
+          target: target.name,
+          error: errorMsg,
+          executionStatus: "execution_error",
+          failureStage: "setup",
+          failureReasonCode: "error_threshold_exceeded",
+          executionError: { message: errorMsg, stage: "setup" }
+        };
+        if (onProgress) {
+          await onProgress({
+            workerId,
+            testId: evalCase.id,
+            status: "failed",
+            completedAt: Date.now(),
+            error: haltResult.error
+          });
+        }
+        if (onResult) {
+          await onResult(haltResult);
+        }
+        return haltResult;
+      }
       if (onProgress) {
         await onProgress({
           workerId,
@@ -16066,6 +16124,9 @@ async function runEvaluation(options) {
             }
           }
         }
+        if (failOnError === true && result.executionStatus === "execution_error") {
+          failOnErrorTriggered = true;
+        }
         if (beforeAllOutput && !beforeAllOutputAttached) {
           result = { ...result, beforeAllOutput };
           beforeAllOutputAttached = true;
@@ -18132,6 +18193,7 @@ function createAgentKernel() {
   executeWorkspaceScript,
   explorationRatio,
   extractCacheConfig,
+  extractFailOnError,
   extractJsonBlob,
   extractTargetFromSuite,
   extractTargetsFromSuite,