agentv 4.5.2 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{chunk-OIBYQMCK.js → chunk-MHWYA4CS.js} +155 -59
- package/dist/chunk-MHWYA4CS.js.map +1 -0
- package/dist/{chunk-HF5UGZSZ.js → chunk-NSVFUL27.js} +17 -5
- package/dist/chunk-NSVFUL27.js.map +1 -0
- package/dist/{chunk-7DRAXDVC.js → chunk-YXXD27OK.js} +329 -104
- package/dist/chunk-YXXD27OK.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-VWMHFUXR.js → dist-BN5NUVAB.js} +4 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-BOJUYBJS.js → interactive-DMSVE6CS.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-7DRAXDVC.js.map +0 -1
- package/dist/chunk-HF5UGZSZ.js.map +0 -1
- package/dist/chunk-OIBYQMCK.js.map +0 -1
- /package/dist/{dist-VWMHFUXR.js.map → dist-BN5NUVAB.js.map} +0 -0
- /package/dist/{interactive-BOJUYBJS.js.map → interactive-DMSVE6CS.js.map} +0 -0
|
@@ -24,7 +24,7 @@ import {
|
|
|
24
24
|
validateFileReferences,
|
|
25
25
|
validateTargetsFile,
|
|
26
26
|
writeArtifactsFromResults
|
|
27
|
-
} from "./chunk-
|
|
27
|
+
} from "./chunk-MHWYA4CS.js";
|
|
28
28
|
import {
|
|
29
29
|
DEFAULT_CATEGORY,
|
|
30
30
|
PASS_THRESHOLD,
|
|
@@ -50,7 +50,7 @@ import {
|
|
|
50
50
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
51
51
|
transpileEvalYamlFile,
|
|
52
52
|
trimBaselineResult
|
|
53
|
-
} from "./chunk-
|
|
53
|
+
} from "./chunk-YXXD27OK.js";
|
|
54
54
|
import {
|
|
55
55
|
__commonJS,
|
|
56
56
|
__esm,
|
|
@@ -3998,11 +3998,21 @@ var evalRunCommand = command({
|
|
|
3998
3998
|
type: optional(number),
|
|
3999
3999
|
long: "threshold",
|
|
4000
4000
|
description: "Per-test score threshold (0-1, default 0.8). Exit 1 if any test scores below this value"
|
|
4001
|
+
}),
|
|
4002
|
+
tag: multioption({
|
|
4003
|
+
type: array(string),
|
|
4004
|
+
long: "tag",
|
|
4005
|
+
description: "Only run eval files that have this tag (repeatable, AND logic)"
|
|
4006
|
+
}),
|
|
4007
|
+
excludeTag: multioption({
|
|
4008
|
+
type: array(string),
|
|
4009
|
+
long: "exclude-tag",
|
|
4010
|
+
description: "Skip eval files that have this tag (repeatable, file skipped if any match)"
|
|
4001
4011
|
})
|
|
4002
4012
|
},
|
|
4003
4013
|
handler: async (args) => {
|
|
4004
4014
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4005
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4015
|
+
const { launchInteractiveWizard } = await import("./interactive-DMSVE6CS.js");
|
|
4006
4016
|
await launchInteractiveWizard();
|
|
4007
4017
|
return;
|
|
4008
4018
|
}
|
|
@@ -4039,7 +4049,9 @@ var evalRunCommand = command({
|
|
|
4039
4049
|
graderTarget: args.graderTarget,
|
|
4040
4050
|
model: args.model,
|
|
4041
4051
|
outputMessages: args.outputMessages,
|
|
4042
|
-
threshold: args.threshold
|
|
4052
|
+
threshold: args.threshold,
|
|
4053
|
+
tag: args.tag,
|
|
4054
|
+
excludeTag: args.excludeTag
|
|
4043
4055
|
};
|
|
4044
4056
|
const result = await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
|
|
4045
4057
|
if (result?.thresholdFailed) {
|
|
@@ -8569,4 +8581,4 @@ export {
|
|
|
8569
8581
|
preprocessArgv,
|
|
8570
8582
|
runCli
|
|
8571
8583
|
};
|
|
8572
|
-
//# sourceMappingURL=chunk-
|
|
8584
|
+
//# sourceMappingURL=chunk-NSVFUL27.js.map
|