agentv 4.1.1 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-QCKPJPYC.js → chunk-HAZJO7OY.js} +4 -4
- package/dist/{chunk-QCKPJPYC.js.map → chunk-HAZJO7OY.js.map} +1 -1
- package/dist/{chunk-TDY2FQN5.js → chunk-UXSQQHCI.js} +124 -153
- package/dist/chunk-UXSQQHCI.js.map +1 -0
- package/dist/{chunk-XEAW7OQT.js → chunk-XLM3RNN7.js} +19 -29
- package/dist/chunk-XLM3RNN7.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-2JUUJ6PT.js → dist-VVXR6TYM.js} +4 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-ASB4FU3J.js → interactive-NVNOLL2H.js} +3 -3
- package/dist/studio/assets/{index-DofvSOmX.js → index-Cir5Hc8S.js} +1 -1
- package/dist/studio/assets/{index-CDGReinH.js → index-D8LVkz9x.js} +1 -1
- package/dist/studio/index.html +1 -1
- package/package.json +1 -1
- package/dist/chunk-TDY2FQN5.js.map +0 -1
- package/dist/chunk-XEAW7OQT.js.map +0 -1
- /package/dist/{dist-2JUUJ6PT.js.map → dist-VVXR6TYM.js.map} +0 -0
- /package/dist/{interactive-ASB4FU3J.js.map → interactive-NVNOLL2H.js.map} +0 -0
|
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
|
|
|
301
301
|
}
|
|
302
302
|
});
|
|
303
303
|
|
|
304
|
-
// ../../packages/core/dist/chunk-
|
|
304
|
+
// ../../packages/core/dist/chunk-V6QVGHVD.js
|
|
305
305
|
import { constants } from "node:fs";
|
|
306
306
|
import { access, readFile } from "node:fs/promises";
|
|
307
307
|
import path from "node:path";
|
|
@@ -419,7 +419,7 @@ __export(external_exports2, {
|
|
|
419
419
|
void: () => voidType
|
|
420
420
|
});
|
|
421
421
|
|
|
422
|
-
// ../../packages/core/dist/chunk-
|
|
422
|
+
// ../../packages/core/dist/chunk-V6QVGHVD.js
|
|
423
423
|
import { readFile as readFile2 } from "node:fs/promises";
|
|
424
424
|
import path3 from "node:path";
|
|
425
425
|
import fg from "fast-glob";
|
|
@@ -24187,14 +24187,9 @@ function resolveAndCreateProvider(definition, env = process.env) {
|
|
|
24187
24187
|
const resolved = resolveTargetDefinition(definition, env);
|
|
24188
24188
|
return createProvider(resolved);
|
|
24189
24189
|
}
|
|
24190
|
+
var PASS_THRESHOLD = 0.8;
|
|
24190
24191
|
function scoreToVerdict(score) {
|
|
24191
|
-
|
|
24192
|
-
return "pass";
|
|
24193
|
-
}
|
|
24194
|
-
if (score >= 0.6) {
|
|
24195
|
-
return "borderline";
|
|
24196
|
-
}
|
|
24197
|
-
return "fail";
|
|
24192
|
+
return score >= PASS_THRESHOLD ? "pass" : "fail";
|
|
24198
24193
|
}
|
|
24199
24194
|
function clampScore(value) {
|
|
24200
24195
|
if (Number.isNaN(value) || !Number.isFinite(value)) {
|
|
@@ -24260,13 +24255,16 @@ function deepEqual(a, b) {
|
|
|
24260
24255
|
if (aKeys.length !== bKeys.length) return false;
|
|
24261
24256
|
return aKeys.every((key) => Object.hasOwn(bObj, key) && deepEqual(aObj[key], bObj[key]));
|
|
24262
24257
|
}
|
|
24258
|
+
var NEGATED_VERDICT = {
|
|
24259
|
+
pass: "fail",
|
|
24260
|
+
fail: "pass",
|
|
24261
|
+
skip: "skip"
|
|
24262
|
+
};
|
|
24263
24263
|
function negateScore(score) {
|
|
24264
|
-
const negatedScore = clampScore(1 - score.score);
|
|
24265
|
-
const negatedVerdict = score.verdict === "pass" ? "fail" : score.verdict === "fail" ? "pass" : "borderline";
|
|
24266
24264
|
return {
|
|
24267
24265
|
...score,
|
|
24268
|
-
score:
|
|
24269
|
-
verdict:
|
|
24266
|
+
score: clampScore(1 - score.score),
|
|
24267
|
+
verdict: NEGATED_VERDICT[score.verdict],
|
|
24270
24268
|
assertions: score.assertions.map((a) => ({
|
|
24271
24269
|
...a,
|
|
24272
24270
|
passed: !a.passed,
|
|
@@ -25957,7 +25955,7 @@ var DEFAULT_COMPOSITE_AGGREGATOR_PROMPT = `Review the following evaluation resul
|
|
|
25957
25955
|
{{EVALUATOR_RESULTS_JSON}}
|
|
25958
25956
|
|
|
25959
25957
|
Decide the final score and verdict based on all evaluator results.
|
|
25960
|
-
Return a JSON object with: score (0.0-1.0), verdict (pass/fail
|
|
25958
|
+
Return a JSON object with: score (0.0-1.0), verdict (pass/fail), and reasoning.`;
|
|
25961
25959
|
var CompositeEvaluator = class {
|
|
25962
25960
|
kind = "composite";
|
|
25963
25961
|
config;
|
|
@@ -26071,7 +26069,7 @@ var CompositeEvaluator = class {
|
|
|
26071
26069
|
continue;
|
|
26072
26070
|
}
|
|
26073
26071
|
evaluatedCount++;
|
|
26074
|
-
const isPassing = member.result.verdict === "pass"
|
|
26072
|
+
const isPassing = member.result.verdict === "pass";
|
|
26075
26073
|
if (isPassing) {
|
|
26076
26074
|
passingCount++;
|
|
26077
26075
|
}
|
|
@@ -26136,7 +26134,7 @@ var CompositeEvaluator = class {
|
|
|
26136
26134
|
passed: Boolean(a.passed),
|
|
26137
26135
|
...typeof a.evidence === "string" ? { evidence: a.evidence } : {}
|
|
26138
26136
|
})) : [];
|
|
26139
|
-
const verdict = typeof parsed?.verdict === "string" && (parsed.verdict === "pass" || parsed.verdict === "fail"
|
|
26137
|
+
const verdict = typeof parsed?.verdict === "string" && (parsed.verdict === "pass" || parsed.verdict === "fail") ? parsed.verdict : scoreToVerdict(score);
|
|
26140
26138
|
return {
|
|
26141
26139
|
score,
|
|
26142
26140
|
verdict,
|
|
@@ -29273,9 +29271,8 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
|
|
|
29273
29271
|
}
|
|
29274
29272
|
return result.stdout;
|
|
29275
29273
|
}
|
|
29276
|
-
var QUALITY_PASS_THRESHOLD = 0.8;
|
|
29277
29274
|
function classifyQualityStatus(score) {
|
|
29278
|
-
return score >=
|
|
29275
|
+
return score >= PASS_THRESHOLD ? "ok" : "quality_failure";
|
|
29279
29276
|
}
|
|
29280
29277
|
function buildSkippedEvaluatorError(scores) {
|
|
29281
29278
|
const skippedScores = scores?.filter((score) => score.verdict === "skip") ?? [];
|
|
@@ -31023,7 +31020,6 @@ async function runEvaluatorList(options) {
|
|
|
31023
31020
|
}
|
|
31024
31021
|
}
|
|
31025
31022
|
}
|
|
31026
|
-
const PASS_THRESHOLD = 0.8;
|
|
31027
31023
|
const hasRequiredFailure = scored.some((entry) => {
|
|
31028
31024
|
if (!entry.required) return false;
|
|
31029
31025
|
const minScore = typeof entry.required === "number" ? entry.required : PASS_THRESHOLD;
|
|
@@ -31392,24 +31388,17 @@ function mapAssertionType(type) {
|
|
|
31392
31388
|
function computeSummary(results, durationMs) {
|
|
31393
31389
|
const total = results.length;
|
|
31394
31390
|
let passed = 0;
|
|
31395
|
-
let failed = 0;
|
|
31396
|
-
let borderline = 0;
|
|
31397
31391
|
let scoreSum = 0;
|
|
31398
31392
|
for (const r of results) {
|
|
31399
31393
|
scoreSum += r.score;
|
|
31400
|
-
if (r.score >=
|
|
31394
|
+
if (r.score >= PASS_THRESHOLD) {
|
|
31401
31395
|
passed++;
|
|
31402
|
-
} else if (r.score < 0.5) {
|
|
31403
|
-
failed++;
|
|
31404
|
-
} else {
|
|
31405
|
-
borderline++;
|
|
31406
31396
|
}
|
|
31407
31397
|
}
|
|
31408
31398
|
return {
|
|
31409
31399
|
total,
|
|
31410
31400
|
passed,
|
|
31411
|
-
failed,
|
|
31412
|
-
borderline,
|
|
31401
|
+
failed: total - passed,
|
|
31413
31402
|
durationMs,
|
|
31414
31403
|
meanScore: total > 0 ? scoreSum / total : 0
|
|
31415
31404
|
};
|
|
@@ -32276,6 +32265,7 @@ export {
|
|
|
32276
32265
|
createBuiltinProviderRegistry,
|
|
32277
32266
|
createProvider,
|
|
32278
32267
|
resolveAndCreateProvider,
|
|
32268
|
+
PASS_THRESHOLD,
|
|
32279
32269
|
scoreToVerdict,
|
|
32280
32270
|
clampScore,
|
|
32281
32271
|
extractJsonBlob,
|
|
@@ -32354,4 +32344,4 @@ export {
|
|
|
32354
32344
|
OtelStreamingObserver,
|
|
32355
32345
|
createAgentKernel
|
|
32356
32346
|
};
|
|
32357
|
-
//# sourceMappingURL=chunk-
|
|
32347
|
+
//# sourceMappingURL=chunk-XLM3RNN7.js.map
|