agentv 4.5.1 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,7 @@ import {
24
24
  validateFileReferences,
25
25
  validateTargetsFile,
26
26
  writeArtifactsFromResults
27
- } from "./chunk-5DEZ72J3.js";
27
+ } from "./chunk-5GZJIXTY.js";
28
28
  import {
29
29
  DEFAULT_CATEGORY,
30
30
  PASS_THRESHOLD,
@@ -50,7 +50,7 @@ import {
50
50
  toSnakeCaseDeep as toSnakeCaseDeep2,
51
51
  transpileEvalYamlFile,
52
52
  trimBaselineResult
53
- } from "./chunk-7DRAXDVC.js";
53
+ } from "./chunk-KQQTEWZF.js";
54
54
  import {
55
55
  __commonJS,
56
56
  __require,
@@ -3960,11 +3960,21 @@ var evalRunCommand = command({
3960
3960
  type: optional(number),
3961
3961
  long: "threshold",
3962
3962
  description: "Per-test score threshold (0-1, default 0.8). Exit 1 if any test scores below this value"
3963
+ }),
3964
+ tag: multioption({
3965
+ type: array(string),
3966
+ long: "tag",
3967
+ description: "Only run eval files that have this tag (repeatable, AND logic)"
3968
+ }),
3969
+ excludeTag: multioption({
3970
+ type: array(string),
3971
+ long: "exclude-tag",
3972
+ description: "Skip eval files that have this tag (repeatable, file skipped if any match)"
3963
3973
  })
3964
3974
  },
3965
3975
  handler: async (args) => {
3966
3976
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
3967
- const { launchInteractiveWizard } = await import("./interactive-OG7ZJIHG.js");
3977
+ const { launchInteractiveWizard } = await import("./interactive-6D3ULOMN.js");
3968
3978
  await launchInteractiveWizard();
3969
3979
  return;
3970
3980
  }
@@ -4001,7 +4011,9 @@ var evalRunCommand = command({
4001
4011
  graderTarget: args.graderTarget,
4002
4012
  model: args.model,
4003
4013
  outputMessages: args.outputMessages,
4004
- threshold: args.threshold
4014
+ threshold: args.threshold,
4015
+ tag: args.tag,
4016
+ excludeTag: args.excludeTag
4005
4017
  };
4006
4018
  const result = await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
4007
4019
  if (result?.thresholdFailed) {
@@ -8464,6 +8476,7 @@ var app = subcommands({
8464
8476
  pipeline: pipelineCommand,
8465
8477
  results: resultsCommand,
8466
8478
  self: selfCommand,
8479
+ serve: resultsServeCommand,
8467
8480
  studio: resultsServeCommand,
8468
8481
  trace: traceCommand,
8469
8482
  transpile: transpileCommand,
@@ -8482,6 +8495,7 @@ var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
8482
8495
  "pipeline",
8483
8496
  "results",
8484
8497
  "self",
8498
+ "serve",
8485
8499
  "studio",
8486
8500
  "trace",
8487
8501
  "transpile",
@@ -8529,4 +8543,4 @@ export {
8529
8543
  preprocessArgv,
8530
8544
  runCli
8531
8545
  };
8532
- //# sourceMappingURL=chunk-BQC2CDLN.js.map
8546
+ //# sourceMappingURL=chunk-U2LSJ6Y4.js.map