agentv 4.5.2 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,7 @@ import {
24
24
  validateFileReferences,
25
25
  validateTargetsFile,
26
26
  writeArtifactsFromResults
27
- } from "./chunk-OIBYQMCK.js";
27
+ } from "./chunk-MHWYA4CS.js";
28
28
  import {
29
29
  DEFAULT_CATEGORY,
30
30
  PASS_THRESHOLD,
@@ -50,7 +50,7 @@ import {
50
50
  toSnakeCaseDeep as toSnakeCaseDeep2,
51
51
  transpileEvalYamlFile,
52
52
  trimBaselineResult
53
- } from "./chunk-7DRAXDVC.js";
53
+ } from "./chunk-YXXD27OK.js";
54
54
  import {
55
55
  __commonJS,
56
56
  __esm,
@@ -3998,11 +3998,21 @@ var evalRunCommand = command({
3998
3998
  type: optional(number),
3999
3999
  long: "threshold",
4000
4000
  description: "Per-test score threshold (0-1, default 0.8). Exit 1 if any test scores below this value"
4001
+ }),
4002
+ tag: multioption({
4003
+ type: array(string),
4004
+ long: "tag",
4005
+ description: "Only run eval files that have this tag (repeatable, AND logic)"
4006
+ }),
4007
+ excludeTag: multioption({
4008
+ type: array(string),
4009
+ long: "exclude-tag",
4010
+ description: "Skip eval files that have this tag (repeatable, file skipped if any match)"
4001
4011
  })
4002
4012
  },
4003
4013
  handler: async (args) => {
4004
4014
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4005
- const { launchInteractiveWizard } = await import("./interactive-BOJUYBJS.js");
4015
+ const { launchInteractiveWizard } = await import("./interactive-DMSVE6CS.js");
4006
4016
  await launchInteractiveWizard();
4007
4017
  return;
4008
4018
  }
@@ -4039,7 +4049,9 @@ var evalRunCommand = command({
4039
4049
  graderTarget: args.graderTarget,
4040
4050
  model: args.model,
4041
4051
  outputMessages: args.outputMessages,
4042
- threshold: args.threshold
4052
+ threshold: args.threshold,
4053
+ tag: args.tag,
4054
+ excludeTag: args.excludeTag
4043
4055
  };
4044
4056
  const result = await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
4045
4057
  if (result?.thresholdFailed) {
@@ -8569,4 +8581,4 @@ export {
8569
8581
  preprocessArgv,
8570
8582
  runCli
8571
8583
  };
8572
- //# sourceMappingURL=chunk-HF5UGZSZ.js.map
8584
+ //# sourceMappingURL=chunk-NSVFUL27.js.map