agentv 2.12.0 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -148,7 +148,7 @@ var require_dist = __commonJS({
148
148
  }
149
149
  });
150
150
 
151
- // ../../packages/core/dist/chunk-7HPKTRFZ.js
151
+ // ../../packages/core/dist/chunk-JHER2LQ5.js
152
152
  import { constants } from "node:fs";
153
153
  import { access, readFile } from "node:fs/promises";
154
154
  import path from "node:path";
@@ -4195,7 +4195,7 @@ var coerce = {
4195
4195
  };
4196
4196
  var NEVER = INVALID;
4197
4197
 
4198
- // ../../packages/core/dist/chunk-7HPKTRFZ.js
4198
+ // ../../packages/core/dist/chunk-JHER2LQ5.js
4199
4199
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
4200
4200
  var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
4201
4201
  var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
@@ -34331,6 +34331,11 @@ async function loadConfig(evalFilePath, repoRoot) {
34331
34331
  continue;
34332
34332
  }
34333
34333
  const config2 = parsed;
34334
+ const requiredVersion = parsed.required_version;
34335
+ if (requiredVersion !== void 0 && typeof requiredVersion !== "string") {
34336
+ logWarning(`Invalid required_version in ${configPath}, expected string`);
34337
+ continue;
34338
+ }
34334
34339
  const guidelinePatterns = config2.guideline_patterns;
34335
34340
  if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
34336
34341
  logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
@@ -34354,6 +34359,7 @@ async function loadConfig(evalFilePath, repoRoot) {
34354
34359
  configPath
34355
34360
  );
34356
34361
  return {
34362
+ required_version: requiredVersion,
34357
34363
  guideline_patterns: guidelinePatterns,
34358
34364
  eval_patterns: evalPatterns,
34359
34365
  execution: executionDefaults
@@ -34497,6 +34503,22 @@ function extractTotalBudgetUsd(suite) {
34497
34503
  );
34498
34504
  return void 0;
34499
34505
  }
34506
+ function extractFailOnError(suite) {
34507
+ const execution = suite.execution;
34508
+ if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
34509
+ return void 0;
34510
+ }
34511
+ const executionObj = execution;
34512
+ const raw = executionObj.fail_on_error ?? executionObj.failOnError;
34513
+ if (raw === void 0 || raw === null) {
34514
+ return void 0;
34515
+ }
34516
+ if (typeof raw === "boolean") {
34517
+ return raw;
34518
+ }
34519
+ logWarning(`Invalid execution.fail_on_error: ${raw}. Must be true or false. Ignoring.`);
34520
+ return void 0;
34521
+ }
34500
34522
  function parseExecutionDefaults(raw, configPath) {
34501
34523
  if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
34502
34524
  return void 0;
@@ -36653,13 +36675,15 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
36653
36675
  }
36654
36676
  const { tests, parsed } = await loadTestsFromYaml(evalFilePath, repoRoot, options);
36655
36677
  const metadata = parseMetadata(parsed);
36678
+ const failOnError = extractFailOnError(parsed);
36656
36679
  return {
36657
36680
  tests,
36658
36681
  trials: extractTrialsConfig(parsed),
36659
36682
  targets: extractTargetsFromSuite(parsed),
36660
36683
  cacheConfig: extractCacheConfig(parsed),
36661
36684
  totalBudgetUsd: extractTotalBudgetUsd(parsed),
36662
- ...metadata !== void 0 && { metadata }
36685
+ ...metadata !== void 0 && { metadata },
36686
+ ...failOnError !== void 0 && { failOnError }
36663
36687
  };
36664
36688
  }
36665
36689
  var loadEvalSuite = loadTestSuite;
@@ -46553,7 +46577,8 @@ async function runEvaluation(options) {
46553
46577
  cleanupWorkspaces,
46554
46578
  trials,
46555
46579
  streamCallbacks,
46556
- totalBudgetUsd
46580
+ totalBudgetUsd,
46581
+ failOnError
46557
46582
  } = options;
46558
46583
  let useCache = options.useCache;
46559
46584
  if (trials && trials.count > 1 && useCache) {
@@ -46749,6 +46774,7 @@ async function runEvaluation(options) {
46749
46774
  let beforeAllOutputAttached = false;
46750
46775
  let cumulativeBudgetCost = 0;
46751
46776
  let budgetExhausted = false;
46777
+ let failOnErrorTriggered = false;
46752
46778
  const promises = filteredEvalCases.map(
46753
46779
  (evalCase) => limit(async () => {
46754
46780
  const workerId = nextWorkerId++;
@@ -46787,6 +46813,37 @@ async function runEvaluation(options) {
46787
46813
  }
46788
46814
  return budgetResult;
46789
46815
  }
46816
+ if (failOnError === true && failOnErrorTriggered) {
46817
+ const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
46818
+ const haltResult = {
46819
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
46820
+ testId: evalCase.id,
46821
+ dataset: evalCase.dataset,
46822
+ score: 0,
46823
+ hits: [],
46824
+ misses: [],
46825
+ answer: "",
46826
+ target: target.name,
46827
+ error: errorMsg,
46828
+ executionStatus: "execution_error",
46829
+ failureStage: "setup",
46830
+ failureReasonCode: "error_threshold_exceeded",
46831
+ executionError: { message: errorMsg, stage: "setup" }
46832
+ };
46833
+ if (onProgress) {
46834
+ await onProgress({
46835
+ workerId,
46836
+ testId: evalCase.id,
46837
+ status: "failed",
46838
+ completedAt: Date.now(),
46839
+ error: haltResult.error
46840
+ });
46841
+ }
46842
+ if (onResult) {
46843
+ await onResult(haltResult);
46844
+ }
46845
+ return haltResult;
46846
+ }
46790
46847
  if (onProgress) {
46791
46848
  await onProgress({
46792
46849
  workerId,
@@ -46839,6 +46896,9 @@ async function runEvaluation(options) {
46839
46896
  }
46840
46897
  }
46841
46898
  }
46899
+ if (failOnError === true && result.executionStatus === "execution_error") {
46900
+ failOnErrorTriggered = true;
46901
+ }
46842
46902
  if (beforeAllOutput && !beforeAllOutputAttached) {
46843
46903
  result = { ...result, beforeAllOutput };
46844
46904
  beforeAllOutputAttached = true;
@@ -48851,6 +48911,7 @@ export {
48851
48911
  extractTargetsFromTestCase,
48852
48912
  extractTrialsConfig,
48853
48913
  extractCacheConfig,
48914
+ extractFailOnError,
48854
48915
  detectFormat,
48855
48916
  buildPromptInputs,
48856
48917
  readTestSuiteMetadata,
@@ -48950,4 +49011,4 @@ export {
48950
49011
  OtelStreamingObserver,
48951
49012
  createAgentKernel
48952
49013
  };
48953
- //# sourceMappingURL=chunk-LUHCYBMD.js.map
49014
+ //# sourceMappingURL=chunk-FSBZM3HT.js.map