agentv 4.1.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
301
301
  }
302
302
  });
303
303
 
304
- // ../../packages/core/dist/chunk-PXYYRDHH.js
304
+ // ../../packages/core/dist/chunk-V6QVGHVD.js
305
305
  import { constants } from "node:fs";
306
306
  import { access, readFile } from "node:fs/promises";
307
307
  import path from "node:path";
@@ -419,7 +419,7 @@ __export(external_exports2, {
419
419
  void: () => voidType
420
420
  });
421
421
 
422
- // ../../packages/core/dist/chunk-PXYYRDHH.js
422
+ // ../../packages/core/dist/chunk-V6QVGHVD.js
423
423
  import { readFile as readFile2 } from "node:fs/promises";
424
424
  import path3 from "node:path";
425
425
  import fg from "fast-glob";
@@ -24187,14 +24187,9 @@ function resolveAndCreateProvider(definition, env = process.env) {
24187
24187
  const resolved = resolveTargetDefinition(definition, env);
24188
24188
  return createProvider(resolved);
24189
24189
  }
24190
+ var PASS_THRESHOLD = 0.8;
24190
24191
  function scoreToVerdict(score) {
24191
- if (score >= 0.8) {
24192
- return "pass";
24193
- }
24194
- if (score >= 0.6) {
24195
- return "borderline";
24196
- }
24197
- return "fail";
24192
+ return score >= PASS_THRESHOLD ? "pass" : "fail";
24198
24193
  }
24199
24194
  function clampScore(value) {
24200
24195
  if (Number.isNaN(value) || !Number.isFinite(value)) {
@@ -24260,13 +24255,16 @@ function deepEqual(a, b) {
24260
24255
  if (aKeys.length !== bKeys.length) return false;
24261
24256
  return aKeys.every((key) => Object.hasOwn(bObj, key) && deepEqual(aObj[key], bObj[key]));
24262
24257
  }
24258
+ var NEGATED_VERDICT = {
24259
+ pass: "fail",
24260
+ fail: "pass",
24261
+ skip: "skip"
24262
+ };
24263
24263
  function negateScore(score) {
24264
- const negatedScore = clampScore(1 - score.score);
24265
- const negatedVerdict = score.verdict === "pass" ? "fail" : score.verdict === "fail" ? "pass" : "borderline";
24266
24264
  return {
24267
24265
  ...score,
24268
- score: negatedScore,
24269
- verdict: negatedVerdict,
24266
+ score: clampScore(1 - score.score),
24267
+ verdict: NEGATED_VERDICT[score.verdict],
24270
24268
  assertions: score.assertions.map((a) => ({
24271
24269
  ...a,
24272
24270
  passed: !a.passed,
@@ -25957,7 +25955,7 @@ var DEFAULT_COMPOSITE_AGGREGATOR_PROMPT = `Review the following evaluation resul
25957
25955
  {{EVALUATOR_RESULTS_JSON}}
25958
25956
 
25959
25957
  Decide the final score and verdict based on all evaluator results.
25960
- Return a JSON object with: score (0.0-1.0), verdict (pass/fail/borderline), and reasoning.`;
25958
+ Return a JSON object with: score (0.0-1.0), verdict (pass/fail), and reasoning.`;
25961
25959
  var CompositeEvaluator = class {
25962
25960
  kind = "composite";
25963
25961
  config;
@@ -26071,7 +26069,7 @@ var CompositeEvaluator = class {
26071
26069
  continue;
26072
26070
  }
26073
26071
  evaluatedCount++;
26074
- const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
26072
+ const isPassing = member.result.verdict === "pass";
26075
26073
  if (isPassing) {
26076
26074
  passingCount++;
26077
26075
  }
@@ -26136,7 +26134,7 @@ var CompositeEvaluator = class {
26136
26134
  passed: Boolean(a.passed),
26137
26135
  ...typeof a.evidence === "string" ? { evidence: a.evidence } : {}
26138
26136
  })) : [];
26139
- const verdict = typeof parsed?.verdict === "string" && (parsed.verdict === "pass" || parsed.verdict === "fail" || parsed.verdict === "borderline") ? parsed.verdict : scoreToVerdict(score);
26137
+ const verdict = typeof parsed?.verdict === "string" && (parsed.verdict === "pass" || parsed.verdict === "fail") ? parsed.verdict : scoreToVerdict(score);
26140
26138
  return {
26141
26139
  score,
26142
26140
  verdict,
@@ -29273,9 +29271,8 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
29273
29271
  }
29274
29272
  return result.stdout;
29275
29273
  }
29276
- var QUALITY_PASS_THRESHOLD = 0.8;
29277
29274
  function classifyQualityStatus(score) {
29278
- return score >= QUALITY_PASS_THRESHOLD ? "ok" : "quality_failure";
29275
+ return score >= PASS_THRESHOLD ? "ok" : "quality_failure";
29279
29276
  }
29280
29277
  function buildSkippedEvaluatorError(scores) {
29281
29278
  const skippedScores = scores?.filter((score) => score.verdict === "skip") ?? [];
@@ -31023,7 +31020,6 @@ async function runEvaluatorList(options) {
31023
31020
  }
31024
31021
  }
31025
31022
  }
31026
- const PASS_THRESHOLD = 0.8;
31027
31023
  const hasRequiredFailure = scored.some((entry) => {
31028
31024
  if (!entry.required) return false;
31029
31025
  const minScore = typeof entry.required === "number" ? entry.required : PASS_THRESHOLD;
@@ -31392,24 +31388,17 @@ function mapAssertionType(type) {
31392
31388
  function computeSummary(results, durationMs) {
31393
31389
  const total = results.length;
31394
31390
  let passed = 0;
31395
- let failed = 0;
31396
- let borderline = 0;
31397
31391
  let scoreSum = 0;
31398
31392
  for (const r of results) {
31399
31393
  scoreSum += r.score;
31400
- if (r.score >= 0.8) {
31394
+ if (r.score >= PASS_THRESHOLD) {
31401
31395
  passed++;
31402
- } else if (r.score < 0.5) {
31403
- failed++;
31404
- } else {
31405
- borderline++;
31406
31396
  }
31407
31397
  }
31408
31398
  return {
31409
31399
  total,
31410
31400
  passed,
31411
- failed,
31412
- borderline,
31401
+ failed: total - passed,
31413
31402
  durationMs,
31414
31403
  meanScore: total > 0 ? scoreSum / total : 0
31415
31404
  };
@@ -32276,6 +32265,7 @@ export {
32276
32265
  createBuiltinProviderRegistry,
32277
32266
  createProvider,
32278
32267
  resolveAndCreateProvider,
32268
+ PASS_THRESHOLD,
32279
32269
  scoreToVerdict,
32280
32270
  clampScore,
32281
32271
  extractJsonBlob,
@@ -32354,4 +32344,4 @@ export {
32354
32344
  OtelStreamingObserver,
32355
32345
  createAgentKernel
32356
32346
  };
32357
- //# sourceMappingURL=chunk-XEAW7OQT.js.map
32347
+ //# sourceMappingURL=chunk-XLM3RNN7.js.map