agentv 4.15.7-next.1 → 4.15.8-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-OX2MQVVU.js → chunk-DM5WAANZ.js} +3 -3
- package/dist/{chunk-47FH4AUZ.js → chunk-TXVRJ4YM.js} +5 -5
- package/dist/{chunk-47FH4AUZ.js.map → chunk-TXVRJ4YM.js.map} +1 -1
- package/dist/cli.js +2 -2
- package/dist/index.js +2 -2
- package/dist/{interactive-7Q4TXHS3.js → interactive-LTAYSEVU.js} +2 -2
- package/package.json +1 -1
- /package/dist/{chunk-OX2MQVVU.js.map → chunk-DM5WAANZ.js.map} +0 -0
- /package/dist/{interactive-7Q4TXHS3.js.map → interactive-LTAYSEVU.js.map} +0 -0
|
@@ -42,7 +42,7 @@ import {
|
|
|
42
42
|
validateFileReferences,
|
|
43
43
|
validateTargetsFile,
|
|
44
44
|
writeArtifactsFromResults
|
|
45
|
-
} from "./chunk-
|
|
45
|
+
} from "./chunk-TXVRJ4YM.js";
|
|
46
46
|
import {
|
|
47
47
|
DEFAULT_CATEGORY,
|
|
48
48
|
DEFAULT_THRESHOLD,
|
|
@@ -3916,7 +3916,7 @@ var evalRunCommand = command({
|
|
|
3916
3916
|
},
|
|
3917
3917
|
handler: async (args) => {
|
|
3918
3918
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
3919
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
3919
|
+
const { launchInteractiveWizard } = await import("./interactive-LTAYSEVU.js");
|
|
3920
3920
|
await launchInteractiveWizard();
|
|
3921
3921
|
return;
|
|
3922
3922
|
}
|
|
@@ -10066,4 +10066,4 @@ export {
|
|
|
10066
10066
|
preprocessArgv,
|
|
10067
10067
|
runCli
|
|
10068
10068
|
};
|
|
10069
|
-
//# sourceMappingURL=chunk-
|
|
10069
|
+
//# sourceMappingURL=chunk-DM5WAANZ.js.map
|
|
@@ -47,7 +47,7 @@ import {
|
|
|
47
47
|
// package.json
|
|
48
48
|
var package_default = {
|
|
49
49
|
name: "agentv",
|
|
50
|
-
version: "4.15.
|
|
50
|
+
version: "4.15.8-next.1",
|
|
51
51
|
description: "CLI entry point for AgentV",
|
|
52
52
|
type: "module",
|
|
53
53
|
repository: {
|
|
@@ -2930,7 +2930,7 @@ function calculateEvaluationSummary(results, options) {
|
|
|
2930
2930
|
};
|
|
2931
2931
|
}
|
|
2932
2932
|
function formatScore2(value) {
|
|
2933
|
-
return
|
|
2933
|
+
return `${Math.round(value * 100)}%`;
|
|
2934
2934
|
}
|
|
2935
2935
|
function formatEvaluationSummary(summary, options) {
|
|
2936
2936
|
if (summary.total === 0) {
|
|
@@ -2963,7 +2963,7 @@ function formatEvaluationSummary(summary, options) {
|
|
|
2963
2963
|
} else {
|
|
2964
2964
|
overallVerdict = overallPassed ? "PASS" : "FAIL";
|
|
2965
2965
|
verdictColor = overallPassed ? "\x1B[32m" : "\x1B[31m";
|
|
2966
|
-
verdictText = `RESULT: ${overallVerdict} (${summary.passedCount}/${summary.total} scored >= ${threshold}
|
|
2966
|
+
verdictText = `RESULT: ${overallVerdict} (${summary.passedCount}/${summary.total} scored >= ${Math.round(threshold * 100)}%, mean: ${formatScore2(summary.mean)})`;
|
|
2967
2967
|
}
|
|
2968
2968
|
lines.push("\n==================================================");
|
|
2969
2969
|
if (useColor) {
|
|
@@ -3000,7 +3000,7 @@ function formatEvaluationSummary(summary, options) {
|
|
|
3000
3000
|
lines.push("\nScore distribution:");
|
|
3001
3001
|
for (const bin of summary.histogram) {
|
|
3002
3002
|
const [start, end] = bin.range;
|
|
3003
|
-
lines.push(` ${
|
|
3003
|
+
lines.push(` ${Math.round(start * 100)}%-${Math.round(end * 100)}%: ${bin.count}`);
|
|
3004
3004
|
}
|
|
3005
3005
|
lines.push("\nTop performing tests:");
|
|
3006
3006
|
summary.topResults.forEach((result, index) => {
|
|
@@ -5883,4 +5883,4 @@ export {
|
|
|
5883
5883
|
getCategories,
|
|
5884
5884
|
filterByCategory
|
|
5885
5885
|
};
|
|
5886
|
-
//# sourceMappingURL=chunk-
|
|
5886
|
+
//# sourceMappingURL=chunk-TXVRJ4YM.js.map
|