agentv 4.5.2 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,7 @@ import {
24
24
  validateFileReferences,
25
25
  validateTargetsFile,
26
26
  writeArtifactsFromResults
27
- } from "./chunk-OIBYQMCK.js";
27
+ } from "./chunk-5GZJIXTY.js";
28
28
  import {
29
29
  DEFAULT_CATEGORY,
30
30
  PASS_THRESHOLD,
@@ -50,13 +50,10 @@ import {
50
50
  toSnakeCaseDeep as toSnakeCaseDeep2,
51
51
  transpileEvalYamlFile,
52
52
  trimBaselineResult
53
- } from "./chunk-7DRAXDVC.js";
53
+ } from "./chunk-KQQTEWZF.js";
54
54
  import {
55
55
  __commonJS,
56
- __esm,
57
- __export,
58
56
  __require,
59
- __toCommonJS,
60
57
  __toESM
61
58
  } from "./chunk-5H446C7X.js";
62
59
 
@@ -281,7 +278,7 @@ var require_ms = __commonJS({
281
278
  var require_common = __commonJS({
282
279
  "../../node_modules/.bun/debug@4.4.3/node_modules/debug/src/common.js"(exports, module) {
283
280
  "use strict";
284
- function setup(env3) {
281
+ function setup(env2) {
285
282
  createDebug.debug = createDebug;
286
283
  createDebug.default = createDebug;
287
284
  createDebug.coerce = coerce;
@@ -290,8 +287,8 @@ var require_common = __commonJS({
290
287
  createDebug.enabled = enabled;
291
288
  createDebug.humanize = require_ms();
292
289
  createDebug.destroy = destroy;
293
- Object.keys(env3).forEach((key) => {
294
- createDebug[key] = env3[key];
290
+ Object.keys(env2).forEach((key) => {
291
+ createDebug[key] = env2[key];
295
292
  });
296
293
  createDebug.names = [];
297
294
  createDebug.skips = [];
@@ -626,153 +623,118 @@ var require_browser = __commonJS({
626
623
  }
627
624
  });
628
625
 
629
- // ../../node_modules/.bun/supports-color@10.2.2/node_modules/supports-color/index.js
630
- var supports_color_exports = {};
631
- __export(supports_color_exports, {
632
- createSupportsColor: () => createSupportsColor2,
633
- default: () => supports_color_default2
634
- });
635
- import process3 from "node:process";
636
- import os2 from "node:os";
637
- import tty2 from "node:tty";
638
- function hasFlag2(flag2, argv = globalThis.Deno ? globalThis.Deno.args : process3.argv) {
639
- const prefix = flag2.startsWith("-") ? "" : flag2.length === 1 ? "-" : "--";
640
- const position = argv.indexOf(prefix + flag2);
641
- const terminatorPosition = argv.indexOf("--");
642
- return position !== -1 && (terminatorPosition === -1 || position < terminatorPosition);
643
- }
644
- function envForceColor2() {
645
- if (!("FORCE_COLOR" in env2)) {
646
- return;
647
- }
648
- if (env2.FORCE_COLOR === "true") {
649
- return 1;
650
- }
651
- if (env2.FORCE_COLOR === "false") {
652
- return 0;
653
- }
654
- if (env2.FORCE_COLOR.length === 0) {
655
- return 1;
656
- }
657
- const level = Math.min(Number.parseInt(env2.FORCE_COLOR, 10), 3);
658
- if (![0, 1, 2, 3].includes(level)) {
659
- return;
660
- }
661
- return level;
662
- }
663
- function translateLevel2(level) {
664
- if (level === 0) {
665
- return false;
666
- }
667
- return {
668
- level,
669
- hasBasic: true,
670
- has256: level >= 2,
671
- has16m: level >= 3
672
- };
673
- }
674
- function _supportsColor2(haveStream, { streamIsTTY, sniffFlags = true } = {}) {
675
- const noFlagForceColor = envForceColor2();
676
- if (noFlagForceColor !== void 0) {
677
- flagForceColor2 = noFlagForceColor;
678
- }
679
- const forceColor = sniffFlags ? flagForceColor2 : noFlagForceColor;
680
- if (forceColor === 0) {
681
- return 0;
682
- }
683
- if (sniffFlags) {
684
- if (hasFlag2("color=16m") || hasFlag2("color=full") || hasFlag2("color=truecolor")) {
685
- return 3;
686
- }
687
- if (hasFlag2("color=256")) {
688
- return 2;
689
- }
690
- }
691
- if ("TF_BUILD" in env2 && "AGENT_NAME" in env2) {
692
- return 1;
693
- }
694
- if (haveStream && !streamIsTTY && forceColor === void 0) {
695
- return 0;
696
- }
697
- const min = forceColor || 0;
698
- if (env2.TERM === "dumb") {
699
- return min;
626
+ // ../../node_modules/.bun/has-flag@4.0.0/node_modules/has-flag/index.js
627
+ var require_has_flag = __commonJS({
628
+ "../../node_modules/.bun/has-flag@4.0.0/node_modules/has-flag/index.js"(exports, module) {
629
+ "use strict";
630
+ module.exports = (flag2, argv = process.argv) => {
631
+ const prefix = flag2.startsWith("-") ? "" : flag2.length === 1 ? "-" : "--";
632
+ const position = argv.indexOf(prefix + flag2);
633
+ const terminatorPosition = argv.indexOf("--");
634
+ return position !== -1 && (terminatorPosition === -1 || position < terminatorPosition);
635
+ };
700
636
  }
701
- if (process3.platform === "win32") {
702
- const osRelease = os2.release().split(".");
703
- if (Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
704
- return Number(osRelease[2]) >= 14931 ? 3 : 2;
637
+ });
638
+
639
+ // ../../node_modules/.bun/supports-color@7.2.0/node_modules/supports-color/index.js
640
+ var require_supports_color = __commonJS({
641
+ "../../node_modules/.bun/supports-color@7.2.0/node_modules/supports-color/index.js"(exports, module) {
642
+ "use strict";
643
+ var os2 = __require("os");
644
+ var tty2 = __require("tty");
645
+ var hasFlag2 = require_has_flag();
646
+ var { env: env2 } = process;
647
+ var forceColor;
648
+ if (hasFlag2("no-color") || hasFlag2("no-colors") || hasFlag2("color=false") || hasFlag2("color=never")) {
649
+ forceColor = 0;
650
+ } else if (hasFlag2("color") || hasFlag2("colors") || hasFlag2("color=true") || hasFlag2("color=always")) {
651
+ forceColor = 1;
705
652
  }
706
- return 1;
707
- }
708
- if ("CI" in env2) {
709
- if (["GITHUB_ACTIONS", "GITEA_ACTIONS", "CIRCLECI"].some((key) => key in env2)) {
710
- return 3;
653
+ if ("FORCE_COLOR" in env2) {
654
+ if (env2.FORCE_COLOR === "true") {
655
+ forceColor = 1;
656
+ } else if (env2.FORCE_COLOR === "false") {
657
+ forceColor = 0;
658
+ } else {
659
+ forceColor = env2.FORCE_COLOR.length === 0 ? 1 : Math.min(parseInt(env2.FORCE_COLOR, 10), 3);
660
+ }
711
661
  }
712
- if (["TRAVIS", "APPVEYOR", "GITLAB_CI", "BUILDKITE", "DRONE"].some((sign) => sign in env2) || env2.CI_NAME === "codeship") {
713
- return 1;
662
+ function translateLevel2(level) {
663
+ if (level === 0) {
664
+ return false;
665
+ }
666
+ return {
667
+ level,
668
+ hasBasic: true,
669
+ has256: level >= 2,
670
+ has16m: level >= 3
671
+ };
714
672
  }
715
- return min;
716
- }
717
- if ("TEAMCITY_VERSION" in env2) {
718
- return /^(9\.(0*[1-9]\d*)\.|\d{2,}\.)/.test(env2.TEAMCITY_VERSION) ? 1 : 0;
719
- }
720
- if (env2.COLORTERM === "truecolor") {
721
- return 3;
722
- }
723
- if (env2.TERM === "xterm-kitty") {
724
- return 3;
725
- }
726
- if (env2.TERM === "xterm-ghostty") {
727
- return 3;
728
- }
729
- if (env2.TERM === "wezterm") {
730
- return 3;
731
- }
732
- if ("TERM_PROGRAM" in env2) {
733
- const version = Number.parseInt((env2.TERM_PROGRAM_VERSION || "").split(".")[0], 10);
734
- switch (env2.TERM_PROGRAM) {
735
- case "iTerm.app": {
736
- return version >= 3 ? 3 : 2;
673
+ function supportsColor2(haveStream, streamIsTTY) {
674
+ if (forceColor === 0) {
675
+ return 0;
737
676
  }
738
- case "Apple_Terminal": {
677
+ if (hasFlag2("color=16m") || hasFlag2("color=full") || hasFlag2("color=truecolor")) {
678
+ return 3;
679
+ }
680
+ if (hasFlag2("color=256")) {
681
+ return 2;
682
+ }
683
+ if (haveStream && !streamIsTTY && forceColor === void 0) {
684
+ return 0;
685
+ }
686
+ const min = forceColor || 0;
687
+ if (env2.TERM === "dumb") {
688
+ return min;
689
+ }
690
+ if (process.platform === "win32") {
691
+ const osRelease = os2.release().split(".");
692
+ if (Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
693
+ return Number(osRelease[2]) >= 14931 ? 3 : 2;
694
+ }
695
+ return 1;
696
+ }
697
+ if ("CI" in env2) {
698
+ if (["TRAVIS", "CIRCLECI", "APPVEYOR", "GITLAB_CI", "GITHUB_ACTIONS", "BUILDKITE"].some((sign) => sign in env2) || env2.CI_NAME === "codeship") {
699
+ return 1;
700
+ }
701
+ return min;
702
+ }
703
+ if ("TEAMCITY_VERSION" in env2) {
704
+ return /^(9\.(0*[1-9]\d*)\.|\d{2,}\.)/.test(env2.TEAMCITY_VERSION) ? 1 : 0;
705
+ }
706
+ if (env2.COLORTERM === "truecolor") {
707
+ return 3;
708
+ }
709
+ if ("TERM_PROGRAM" in env2) {
710
+ const version = parseInt((env2.TERM_PROGRAM_VERSION || "").split(".")[0], 10);
711
+ switch (env2.TERM_PROGRAM) {
712
+ case "iTerm.app":
713
+ return version >= 3 ? 3 : 2;
714
+ case "Apple_Terminal":
715
+ return 2;
716
+ }
717
+ }
718
+ if (/-256(color)?$/i.test(env2.TERM)) {
739
719
  return 2;
740
720
  }
721
+ if (/^screen|^xterm|^vt100|^vt220|^rxvt|color|ansi|cygwin|linux/i.test(env2.TERM)) {
722
+ return 1;
723
+ }
724
+ if ("COLORTERM" in env2) {
725
+ return 1;
726
+ }
727
+ return min;
741
728
  }
742
- }
743
- if (/-256(color)?$/i.test(env2.TERM)) {
744
- return 2;
745
- }
746
- if (/^screen|^xterm|^vt100|^vt220|^rxvt|color|ansi|cygwin|linux/i.test(env2.TERM)) {
747
- return 1;
748
- }
749
- if ("COLORTERM" in env2) {
750
- return 1;
751
- }
752
- return min;
753
- }
754
- function createSupportsColor2(stream, options = {}) {
755
- const level = _supportsColor2(stream, {
756
- streamIsTTY: stream && stream.isTTY,
757
- ...options
758
- });
759
- return translateLevel2(level);
760
- }
761
- var env2, flagForceColor2, supportsColor2, supports_color_default2;
762
- var init_supports_color = __esm({
763
- "../../node_modules/.bun/supports-color@10.2.2/node_modules/supports-color/index.js"() {
764
- "use strict";
765
- ({ env: env2 } = process3);
766
- if (hasFlag2("no-color") || hasFlag2("no-colors") || hasFlag2("color=false") || hasFlag2("color=never")) {
767
- flagForceColor2 = 0;
768
- } else if (hasFlag2("color") || hasFlag2("colors") || hasFlag2("color=true") || hasFlag2("color=always")) {
769
- flagForceColor2 = 1;
729
+ function getSupportLevel(stream) {
730
+ const level = supportsColor2(stream, stream && stream.isTTY);
731
+ return translateLevel2(level);
770
732
  }
771
- supportsColor2 = {
772
- stdout: createSupportsColor2({ isTTY: tty2.isatty(1) }),
773
- stderr: createSupportsColor2({ isTTY: tty2.isatty(2) })
733
+ module.exports = {
734
+ supportsColor: getSupportLevel,
735
+ stdout: translateLevel2(supportsColor2(true, tty2.isatty(1))),
736
+ stderr: translateLevel2(supportsColor2(true, tty2.isatty(2)))
774
737
  };
775
- supports_color_default2 = supportsColor2;
776
738
  }
777
739
  });
778
740
 
@@ -780,7 +742,7 @@ var init_supports_color = __esm({
780
742
  var require_node = __commonJS({
781
743
  "../../node_modules/.bun/debug@4.4.3/node_modules/debug/src/node.js"(exports, module) {
782
744
  "use strict";
783
- var tty3 = __require("tty");
745
+ var tty2 = __require("tty");
784
746
  var util = __require("util");
785
747
  exports.init = init;
786
748
  exports.log = log;
@@ -795,8 +757,8 @@ var require_node = __commonJS({
795
757
  );
796
758
  exports.colors = [6, 2, 3, 4, 5, 1];
797
759
  try {
798
- const supportsColor3 = (init_supports_color(), __toCommonJS(supports_color_exports));
799
- if (supportsColor3 && (supportsColor3.stderr || supportsColor3).level >= 2) {
760
+ const supportsColor2 = require_supports_color();
761
+ if (supportsColor2 && (supportsColor2.stderr || supportsColor2).level >= 2) {
800
762
  exports.colors = [
801
763
  20,
802
764
  21,
@@ -898,7 +860,7 @@ var require_node = __commonJS({
898
860
  return obj;
899
861
  }, {});
900
862
  function useColors() {
901
- return "colors" in exports.inspectOpts ? Boolean(exports.inspectOpts.colors) : tty3.isatty(process.stderr.fd);
863
+ return "colors" in exports.inspectOpts ? Boolean(exports.inspectOpts.colors) : tty2.isatty(process.stderr.fd);
902
864
  }
903
865
  function formatArgs(args) {
904
866
  const { namespace: name, useColors: useColors2 } = this;
@@ -1630,8 +1592,8 @@ function fullFlag(config) {
1630
1592
  }
1631
1593
  const defaults = [];
1632
1594
  if (config.env) {
1633
- const env3 = process.env[config.env] === void 0 ? "" : `=${source_default.italic(process.env[config.env])}`;
1634
- defaults.push(`env: ${config.env}${env3}`);
1595
+ const env2 = process.env[config.env] === void 0 ? "" : `=${source_default.italic(process.env[config.env])}`;
1596
+ defaults.push(`env: ${config.env}${env2}`);
1635
1597
  }
1636
1598
  if (config.defaultValue) {
1637
1599
  try {
@@ -2248,8 +2210,8 @@ function fullOption(config) {
2248
2210
  usage += ` <${displayName}>`;
2249
2211
  const defaults = [];
2250
2212
  if (config.env) {
2251
- const env3 = process.env[config.env] === void 0 ? "" : `=${source_default.italic(process.env[config.env])}`;
2252
- defaults.push(`env: ${config.env}${env3}`);
2213
+ const env2 = process.env[config.env] === void 0 ? "" : `=${source_default.italic(process.env[config.env])}`;
2214
+ defaults.push(`env: ${config.env}${env2}`);
2253
2215
  }
2254
2216
  if (config.defaultValue) {
2255
2217
  try {
@@ -3998,11 +3960,21 @@ var evalRunCommand = command({
3998
3960
  type: optional(number),
3999
3961
  long: "threshold",
4000
3962
  description: "Per-test score threshold (0-1, default 0.8). Exit 1 if any test scores below this value"
3963
+ }),
3964
+ tag: multioption({
3965
+ type: array(string),
3966
+ long: "tag",
3967
+ description: "Only run eval files that have this tag (repeatable, AND logic)"
3968
+ }),
3969
+ excludeTag: multioption({
3970
+ type: array(string),
3971
+ long: "exclude-tag",
3972
+ description: "Skip eval files that have this tag (repeatable, file skipped if any match)"
4001
3973
  })
4002
3974
  },
4003
3975
  handler: async (args) => {
4004
3976
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4005
- const { launchInteractiveWizard } = await import("./interactive-BOJUYBJS.js");
3977
+ const { launchInteractiveWizard } = await import("./interactive-6D3ULOMN.js");
4006
3978
  await launchInteractiveWizard();
4007
3979
  return;
4008
3980
  }
@@ -4039,7 +4011,9 @@ var evalRunCommand = command({
4039
4011
  graderTarget: args.graderTarget,
4040
4012
  model: args.model,
4041
4013
  outputMessages: args.outputMessages,
4042
- threshold: args.threshold
4014
+ threshold: args.threshold,
4015
+ tag: args.tag,
4016
+ excludeTag: args.excludeTag
4043
4017
  };
4044
4018
  const result = await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
4045
4019
  if (result?.thresholdFailed) {
@@ -4886,15 +4860,15 @@ function loadEnvFile(dir) {
4886
4860
  while (true) {
4887
4861
  const candidate = join4(current, ".env");
4888
4862
  if (existsSync3(candidate)) {
4889
- const env3 = {};
4863
+ const env2 = {};
4890
4864
  for (const line of readFileSync4(candidate, "utf8").split("\n")) {
4891
4865
  const trimmed = line.trim();
4892
4866
  if (!trimmed || trimmed.startsWith("#")) continue;
4893
4867
  const eqIdx = trimmed.indexOf("=");
4894
4868
  if (eqIdx === -1) continue;
4895
- env3[trimmed.slice(0, eqIdx).trim()] = trimmed.slice(eqIdx + 1).trim();
4869
+ env2[trimmed.slice(0, eqIdx).trim()] = trimmed.slice(eqIdx + 1).trim();
4896
4870
  }
4897
- return env3;
4871
+ return env2;
4898
4872
  }
4899
4873
  const parent = dirname2(current);
4900
4874
  if (parent === current) break;
@@ -8569,4 +8543,4 @@ export {
8569
8543
  preprocessArgv,
8570
8544
  runCli
8571
8545
  };
8572
- //# sourceMappingURL=chunk-HF5UGZSZ.js.map
8546
+ //# sourceMappingURL=chunk-U2LSJ6Y4.js.map