agentv 4.5.1 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-5DEZ72J3.js → chunk-5GZJIXTY.js} +155 -59
- package/dist/chunk-5GZJIXTY.js.map +1 -0
- package/dist/{chunk-7DRAXDVC.js → chunk-KQQTEWZF.js} +111 -47
- package/dist/chunk-KQQTEWZF.js.map +1 -0
- package/dist/{chunk-BQC2CDLN.js → chunk-U2LSJ6Y4.js} +19 -5
- package/dist/chunk-U2LSJ6Y4.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-VWMHFUXR.js → dist-FBPCDLOY.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-OG7ZJIHG.js → interactive-6D3ULOMN.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-5DEZ72J3.js.map +0 -1
- package/dist/chunk-7DRAXDVC.js.map +0 -1
- package/dist/chunk-BQC2CDLN.js.map +0 -1
- /package/dist/{dist-VWMHFUXR.js.map → dist-FBPCDLOY.js.map} +0 -0
- /package/dist/{interactive-OG7ZJIHG.js.map → interactive-6D3ULOMN.js.map} +0 -0
|
@@ -24,7 +24,7 @@ import {
|
|
|
24
24
|
validateFileReferences,
|
|
25
25
|
validateTargetsFile,
|
|
26
26
|
writeArtifactsFromResults
|
|
27
|
-
} from "./chunk-
|
|
27
|
+
} from "./chunk-5GZJIXTY.js";
|
|
28
28
|
import {
|
|
29
29
|
DEFAULT_CATEGORY,
|
|
30
30
|
PASS_THRESHOLD,
|
|
@@ -50,7 +50,7 @@ import {
|
|
|
50
50
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
51
51
|
transpileEvalYamlFile,
|
|
52
52
|
trimBaselineResult
|
|
53
|
-
} from "./chunk-
|
|
53
|
+
} from "./chunk-KQQTEWZF.js";
|
|
54
54
|
import {
|
|
55
55
|
__commonJS,
|
|
56
56
|
__require,
|
|
@@ -3960,11 +3960,21 @@ var evalRunCommand = command({
|
|
|
3960
3960
|
type: optional(number),
|
|
3961
3961
|
long: "threshold",
|
|
3962
3962
|
description: "Per-test score threshold (0-1, default 0.8). Exit 1 if any test scores below this value"
|
|
3963
|
+
}),
|
|
3964
|
+
tag: multioption({
|
|
3965
|
+
type: array(string),
|
|
3966
|
+
long: "tag",
|
|
3967
|
+
description: "Only run eval files that have this tag (repeatable, AND logic)"
|
|
3968
|
+
}),
|
|
3969
|
+
excludeTag: multioption({
|
|
3970
|
+
type: array(string),
|
|
3971
|
+
long: "exclude-tag",
|
|
3972
|
+
description: "Skip eval files that have this tag (repeatable, file skipped if any match)"
|
|
3963
3973
|
})
|
|
3964
3974
|
},
|
|
3965
3975
|
handler: async (args) => {
|
|
3966
3976
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
3967
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
3977
|
+
const { launchInteractiveWizard } = await import("./interactive-6D3ULOMN.js");
|
|
3968
3978
|
await launchInteractiveWizard();
|
|
3969
3979
|
return;
|
|
3970
3980
|
}
|
|
@@ -4001,7 +4011,9 @@ var evalRunCommand = command({
|
|
|
4001
4011
|
graderTarget: args.graderTarget,
|
|
4002
4012
|
model: args.model,
|
|
4003
4013
|
outputMessages: args.outputMessages,
|
|
4004
|
-
threshold: args.threshold
|
|
4014
|
+
threshold: args.threshold,
|
|
4015
|
+
tag: args.tag,
|
|
4016
|
+
excludeTag: args.excludeTag
|
|
4005
4017
|
};
|
|
4006
4018
|
const result = await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
|
|
4007
4019
|
if (result?.thresholdFailed) {
|
|
@@ -8464,6 +8476,7 @@ var app = subcommands({
|
|
|
8464
8476
|
pipeline: pipelineCommand,
|
|
8465
8477
|
results: resultsCommand,
|
|
8466
8478
|
self: selfCommand,
|
|
8479
|
+
serve: resultsServeCommand,
|
|
8467
8480
|
studio: resultsServeCommand,
|
|
8468
8481
|
trace: traceCommand,
|
|
8469
8482
|
transpile: transpileCommand,
|
|
@@ -8482,6 +8495,7 @@ var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
|
|
|
8482
8495
|
"pipeline",
|
|
8483
8496
|
"results",
|
|
8484
8497
|
"self",
|
|
8498
|
+
"serve",
|
|
8485
8499
|
"studio",
|
|
8486
8500
|
"trace",
|
|
8487
8501
|
"transpile",
|
|
@@ -8529,4 +8543,4 @@ export {
|
|
|
8529
8543
|
preprocessArgv,
|
|
8530
8544
|
runCli
|
|
8531
8545
|
};
|
|
8532
|
-
//# sourceMappingURL=chunk-
|
|
8546
|
+
//# sourceMappingURL=chunk-U2LSJ6Y4.js.map
|