agentv 4.5.2 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-OIBYQMCK.js → chunk-5GZJIXTY.js} +155 -59
- package/dist/chunk-5GZJIXTY.js.map +1 -0
- package/dist/{chunk-7DRAXDVC.js → chunk-KQQTEWZF.js} +111 -47
- package/dist/chunk-KQQTEWZF.js.map +1 -0
- package/dist/{chunk-HF5UGZSZ.js → chunk-U2LSJ6Y4.js} +133 -159
- package/dist/chunk-U2LSJ6Y4.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-VWMHFUXR.js → dist-FBPCDLOY.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-BOJUYBJS.js → interactive-6D3ULOMN.js} +3 -3
- package/dist/studio/assets/{index-vn54AYtS.js → index-D-gfAa3s.js} +1 -1
- package/dist/studio/assets/{index-C7TnyYee.js → index-zWHsVvgi.js} +1 -1
- package/dist/studio/index.html +1 -1
- package/package.json +1 -1
- package/dist/chunk-7DRAXDVC.js.map +0 -1
- package/dist/chunk-HF5UGZSZ.js.map +0 -1
- package/dist/chunk-OIBYQMCK.js.map +0 -1
- /package/dist/{dist-VWMHFUXR.js.map → dist-FBPCDLOY.js.map} +0 -0
- /package/dist/{interactive-BOJUYBJS.js.map → interactive-6D3ULOMN.js.map} +0 -0
|
@@ -24,7 +24,7 @@ import {
|
|
|
24
24
|
validateFileReferences,
|
|
25
25
|
validateTargetsFile,
|
|
26
26
|
writeArtifactsFromResults
|
|
27
|
-
} from "./chunk-
|
|
27
|
+
} from "./chunk-5GZJIXTY.js";
|
|
28
28
|
import {
|
|
29
29
|
DEFAULT_CATEGORY,
|
|
30
30
|
PASS_THRESHOLD,
|
|
@@ -50,13 +50,10 @@ import {
|
|
|
50
50
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
51
51
|
transpileEvalYamlFile,
|
|
52
52
|
trimBaselineResult
|
|
53
|
-
} from "./chunk-
|
|
53
|
+
} from "./chunk-KQQTEWZF.js";
|
|
54
54
|
import {
|
|
55
55
|
__commonJS,
|
|
56
|
-
__esm,
|
|
57
|
-
__export,
|
|
58
56
|
__require,
|
|
59
|
-
__toCommonJS,
|
|
60
57
|
__toESM
|
|
61
58
|
} from "./chunk-5H446C7X.js";
|
|
62
59
|
|
|
@@ -281,7 +278,7 @@ var require_ms = __commonJS({
|
|
|
281
278
|
var require_common = __commonJS({
|
|
282
279
|
"../../node_modules/.bun/debug@4.4.3/node_modules/debug/src/common.js"(exports, module) {
|
|
283
280
|
"use strict";
|
|
284
|
-
function setup(
|
|
281
|
+
function setup(env2) {
|
|
285
282
|
createDebug.debug = createDebug;
|
|
286
283
|
createDebug.default = createDebug;
|
|
287
284
|
createDebug.coerce = coerce;
|
|
@@ -290,8 +287,8 @@ var require_common = __commonJS({
|
|
|
290
287
|
createDebug.enabled = enabled;
|
|
291
288
|
createDebug.humanize = require_ms();
|
|
292
289
|
createDebug.destroy = destroy;
|
|
293
|
-
Object.keys(
|
|
294
|
-
createDebug[key] =
|
|
290
|
+
Object.keys(env2).forEach((key) => {
|
|
291
|
+
createDebug[key] = env2[key];
|
|
295
292
|
});
|
|
296
293
|
createDebug.names = [];
|
|
297
294
|
createDebug.skips = [];
|
|
@@ -626,153 +623,118 @@ var require_browser = __commonJS({
|
|
|
626
623
|
}
|
|
627
624
|
});
|
|
628
625
|
|
|
629
|
-
// ../../node_modules/.bun/
|
|
630
|
-
var
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
const prefix = flag2.startsWith("-") ? "" : flag2.length === 1 ? "-" : "--";
|
|
640
|
-
const position = argv.indexOf(prefix + flag2);
|
|
641
|
-
const terminatorPosition = argv.indexOf("--");
|
|
642
|
-
return position !== -1 && (terminatorPosition === -1 || position < terminatorPosition);
|
|
643
|
-
}
|
|
644
|
-
function envForceColor2() {
|
|
645
|
-
if (!("FORCE_COLOR" in env2)) {
|
|
646
|
-
return;
|
|
647
|
-
}
|
|
648
|
-
if (env2.FORCE_COLOR === "true") {
|
|
649
|
-
return 1;
|
|
650
|
-
}
|
|
651
|
-
if (env2.FORCE_COLOR === "false") {
|
|
652
|
-
return 0;
|
|
653
|
-
}
|
|
654
|
-
if (env2.FORCE_COLOR.length === 0) {
|
|
655
|
-
return 1;
|
|
656
|
-
}
|
|
657
|
-
const level = Math.min(Number.parseInt(env2.FORCE_COLOR, 10), 3);
|
|
658
|
-
if (![0, 1, 2, 3].includes(level)) {
|
|
659
|
-
return;
|
|
660
|
-
}
|
|
661
|
-
return level;
|
|
662
|
-
}
|
|
663
|
-
function translateLevel2(level) {
|
|
664
|
-
if (level === 0) {
|
|
665
|
-
return false;
|
|
666
|
-
}
|
|
667
|
-
return {
|
|
668
|
-
level,
|
|
669
|
-
hasBasic: true,
|
|
670
|
-
has256: level >= 2,
|
|
671
|
-
has16m: level >= 3
|
|
672
|
-
};
|
|
673
|
-
}
|
|
674
|
-
function _supportsColor2(haveStream, { streamIsTTY, sniffFlags = true } = {}) {
|
|
675
|
-
const noFlagForceColor = envForceColor2();
|
|
676
|
-
if (noFlagForceColor !== void 0) {
|
|
677
|
-
flagForceColor2 = noFlagForceColor;
|
|
678
|
-
}
|
|
679
|
-
const forceColor = sniffFlags ? flagForceColor2 : noFlagForceColor;
|
|
680
|
-
if (forceColor === 0) {
|
|
681
|
-
return 0;
|
|
682
|
-
}
|
|
683
|
-
if (sniffFlags) {
|
|
684
|
-
if (hasFlag2("color=16m") || hasFlag2("color=full") || hasFlag2("color=truecolor")) {
|
|
685
|
-
return 3;
|
|
686
|
-
}
|
|
687
|
-
if (hasFlag2("color=256")) {
|
|
688
|
-
return 2;
|
|
689
|
-
}
|
|
690
|
-
}
|
|
691
|
-
if ("TF_BUILD" in env2 && "AGENT_NAME" in env2) {
|
|
692
|
-
return 1;
|
|
693
|
-
}
|
|
694
|
-
if (haveStream && !streamIsTTY && forceColor === void 0) {
|
|
695
|
-
return 0;
|
|
696
|
-
}
|
|
697
|
-
const min = forceColor || 0;
|
|
698
|
-
if (env2.TERM === "dumb") {
|
|
699
|
-
return min;
|
|
626
|
+
// ../../node_modules/.bun/has-flag@4.0.0/node_modules/has-flag/index.js
|
|
627
|
+
var require_has_flag = __commonJS({
|
|
628
|
+
"../../node_modules/.bun/has-flag@4.0.0/node_modules/has-flag/index.js"(exports, module) {
|
|
629
|
+
"use strict";
|
|
630
|
+
module.exports = (flag2, argv = process.argv) => {
|
|
631
|
+
const prefix = flag2.startsWith("-") ? "" : flag2.length === 1 ? "-" : "--";
|
|
632
|
+
const position = argv.indexOf(prefix + flag2);
|
|
633
|
+
const terminatorPosition = argv.indexOf("--");
|
|
634
|
+
return position !== -1 && (terminatorPosition === -1 || position < terminatorPosition);
|
|
635
|
+
};
|
|
700
636
|
}
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
637
|
+
});
|
|
638
|
+
|
|
639
|
+
// ../../node_modules/.bun/supports-color@7.2.0/node_modules/supports-color/index.js
|
|
640
|
+
var require_supports_color = __commonJS({
|
|
641
|
+
"../../node_modules/.bun/supports-color@7.2.0/node_modules/supports-color/index.js"(exports, module) {
|
|
642
|
+
"use strict";
|
|
643
|
+
var os2 = __require("os");
|
|
644
|
+
var tty2 = __require("tty");
|
|
645
|
+
var hasFlag2 = require_has_flag();
|
|
646
|
+
var { env: env2 } = process;
|
|
647
|
+
var forceColor;
|
|
648
|
+
if (hasFlag2("no-color") || hasFlag2("no-colors") || hasFlag2("color=false") || hasFlag2("color=never")) {
|
|
649
|
+
forceColor = 0;
|
|
650
|
+
} else if (hasFlag2("color") || hasFlag2("colors") || hasFlag2("color=true") || hasFlag2("color=always")) {
|
|
651
|
+
forceColor = 1;
|
|
705
652
|
}
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
653
|
+
if ("FORCE_COLOR" in env2) {
|
|
654
|
+
if (env2.FORCE_COLOR === "true") {
|
|
655
|
+
forceColor = 1;
|
|
656
|
+
} else if (env2.FORCE_COLOR === "false") {
|
|
657
|
+
forceColor = 0;
|
|
658
|
+
} else {
|
|
659
|
+
forceColor = env2.FORCE_COLOR.length === 0 ? 1 : Math.min(parseInt(env2.FORCE_COLOR, 10), 3);
|
|
660
|
+
}
|
|
711
661
|
}
|
|
712
|
-
|
|
713
|
-
|
|
662
|
+
function translateLevel2(level) {
|
|
663
|
+
if (level === 0) {
|
|
664
|
+
return false;
|
|
665
|
+
}
|
|
666
|
+
return {
|
|
667
|
+
level,
|
|
668
|
+
hasBasic: true,
|
|
669
|
+
has256: level >= 2,
|
|
670
|
+
has16m: level >= 3
|
|
671
|
+
};
|
|
714
672
|
}
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
return /^(9\.(0*[1-9]\d*)\.|\d{2,}\.)/.test(env2.TEAMCITY_VERSION) ? 1 : 0;
|
|
719
|
-
}
|
|
720
|
-
if (env2.COLORTERM === "truecolor") {
|
|
721
|
-
return 3;
|
|
722
|
-
}
|
|
723
|
-
if (env2.TERM === "xterm-kitty") {
|
|
724
|
-
return 3;
|
|
725
|
-
}
|
|
726
|
-
if (env2.TERM === "xterm-ghostty") {
|
|
727
|
-
return 3;
|
|
728
|
-
}
|
|
729
|
-
if (env2.TERM === "wezterm") {
|
|
730
|
-
return 3;
|
|
731
|
-
}
|
|
732
|
-
if ("TERM_PROGRAM" in env2) {
|
|
733
|
-
const version = Number.parseInt((env2.TERM_PROGRAM_VERSION || "").split(".")[0], 10);
|
|
734
|
-
switch (env2.TERM_PROGRAM) {
|
|
735
|
-
case "iTerm.app": {
|
|
736
|
-
return version >= 3 ? 3 : 2;
|
|
673
|
+
function supportsColor2(haveStream, streamIsTTY) {
|
|
674
|
+
if (forceColor === 0) {
|
|
675
|
+
return 0;
|
|
737
676
|
}
|
|
738
|
-
|
|
677
|
+
if (hasFlag2("color=16m") || hasFlag2("color=full") || hasFlag2("color=truecolor")) {
|
|
678
|
+
return 3;
|
|
679
|
+
}
|
|
680
|
+
if (hasFlag2("color=256")) {
|
|
681
|
+
return 2;
|
|
682
|
+
}
|
|
683
|
+
if (haveStream && !streamIsTTY && forceColor === void 0) {
|
|
684
|
+
return 0;
|
|
685
|
+
}
|
|
686
|
+
const min = forceColor || 0;
|
|
687
|
+
if (env2.TERM === "dumb") {
|
|
688
|
+
return min;
|
|
689
|
+
}
|
|
690
|
+
if (process.platform === "win32") {
|
|
691
|
+
const osRelease = os2.release().split(".");
|
|
692
|
+
if (Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
|
|
693
|
+
return Number(osRelease[2]) >= 14931 ? 3 : 2;
|
|
694
|
+
}
|
|
695
|
+
return 1;
|
|
696
|
+
}
|
|
697
|
+
if ("CI" in env2) {
|
|
698
|
+
if (["TRAVIS", "CIRCLECI", "APPVEYOR", "GITLAB_CI", "GITHUB_ACTIONS", "BUILDKITE"].some((sign) => sign in env2) || env2.CI_NAME === "codeship") {
|
|
699
|
+
return 1;
|
|
700
|
+
}
|
|
701
|
+
return min;
|
|
702
|
+
}
|
|
703
|
+
if ("TEAMCITY_VERSION" in env2) {
|
|
704
|
+
return /^(9\.(0*[1-9]\d*)\.|\d{2,}\.)/.test(env2.TEAMCITY_VERSION) ? 1 : 0;
|
|
705
|
+
}
|
|
706
|
+
if (env2.COLORTERM === "truecolor") {
|
|
707
|
+
return 3;
|
|
708
|
+
}
|
|
709
|
+
if ("TERM_PROGRAM" in env2) {
|
|
710
|
+
const version = parseInt((env2.TERM_PROGRAM_VERSION || "").split(".")[0], 10);
|
|
711
|
+
switch (env2.TERM_PROGRAM) {
|
|
712
|
+
case "iTerm.app":
|
|
713
|
+
return version >= 3 ? 3 : 2;
|
|
714
|
+
case "Apple_Terminal":
|
|
715
|
+
return 2;
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
if (/-256(color)?$/i.test(env2.TERM)) {
|
|
739
719
|
return 2;
|
|
740
720
|
}
|
|
721
|
+
if (/^screen|^xterm|^vt100|^vt220|^rxvt|color|ansi|cygwin|linux/i.test(env2.TERM)) {
|
|
722
|
+
return 1;
|
|
723
|
+
}
|
|
724
|
+
if ("COLORTERM" in env2) {
|
|
725
|
+
return 1;
|
|
726
|
+
}
|
|
727
|
+
return min;
|
|
741
728
|
}
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
}
|
|
746
|
-
if (/^screen|^xterm|^vt100|^vt220|^rxvt|color|ansi|cygwin|linux/i.test(env2.TERM)) {
|
|
747
|
-
return 1;
|
|
748
|
-
}
|
|
749
|
-
if ("COLORTERM" in env2) {
|
|
750
|
-
return 1;
|
|
751
|
-
}
|
|
752
|
-
return min;
|
|
753
|
-
}
|
|
754
|
-
function createSupportsColor2(stream, options = {}) {
|
|
755
|
-
const level = _supportsColor2(stream, {
|
|
756
|
-
streamIsTTY: stream && stream.isTTY,
|
|
757
|
-
...options
|
|
758
|
-
});
|
|
759
|
-
return translateLevel2(level);
|
|
760
|
-
}
|
|
761
|
-
var env2, flagForceColor2, supportsColor2, supports_color_default2;
|
|
762
|
-
var init_supports_color = __esm({
|
|
763
|
-
"../../node_modules/.bun/supports-color@10.2.2/node_modules/supports-color/index.js"() {
|
|
764
|
-
"use strict";
|
|
765
|
-
({ env: env2 } = process3);
|
|
766
|
-
if (hasFlag2("no-color") || hasFlag2("no-colors") || hasFlag2("color=false") || hasFlag2("color=never")) {
|
|
767
|
-
flagForceColor2 = 0;
|
|
768
|
-
} else if (hasFlag2("color") || hasFlag2("colors") || hasFlag2("color=true") || hasFlag2("color=always")) {
|
|
769
|
-
flagForceColor2 = 1;
|
|
729
|
+
function getSupportLevel(stream) {
|
|
730
|
+
const level = supportsColor2(stream, stream && stream.isTTY);
|
|
731
|
+
return translateLevel2(level);
|
|
770
732
|
}
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
733
|
+
module.exports = {
|
|
734
|
+
supportsColor: getSupportLevel,
|
|
735
|
+
stdout: translateLevel2(supportsColor2(true, tty2.isatty(1))),
|
|
736
|
+
stderr: translateLevel2(supportsColor2(true, tty2.isatty(2)))
|
|
774
737
|
};
|
|
775
|
-
supports_color_default2 = supportsColor2;
|
|
776
738
|
}
|
|
777
739
|
});
|
|
778
740
|
|
|
@@ -780,7 +742,7 @@ var init_supports_color = __esm({
|
|
|
780
742
|
var require_node = __commonJS({
|
|
781
743
|
"../../node_modules/.bun/debug@4.4.3/node_modules/debug/src/node.js"(exports, module) {
|
|
782
744
|
"use strict";
|
|
783
|
-
var
|
|
745
|
+
var tty2 = __require("tty");
|
|
784
746
|
var util = __require("util");
|
|
785
747
|
exports.init = init;
|
|
786
748
|
exports.log = log;
|
|
@@ -795,8 +757,8 @@ var require_node = __commonJS({
|
|
|
795
757
|
);
|
|
796
758
|
exports.colors = [6, 2, 3, 4, 5, 1];
|
|
797
759
|
try {
|
|
798
|
-
const
|
|
799
|
-
if (
|
|
760
|
+
const supportsColor2 = require_supports_color();
|
|
761
|
+
if (supportsColor2 && (supportsColor2.stderr || supportsColor2).level >= 2) {
|
|
800
762
|
exports.colors = [
|
|
801
763
|
20,
|
|
802
764
|
21,
|
|
@@ -898,7 +860,7 @@ var require_node = __commonJS({
|
|
|
898
860
|
return obj;
|
|
899
861
|
}, {});
|
|
900
862
|
function useColors() {
|
|
901
|
-
return "colors" in exports.inspectOpts ? Boolean(exports.inspectOpts.colors) :
|
|
863
|
+
return "colors" in exports.inspectOpts ? Boolean(exports.inspectOpts.colors) : tty2.isatty(process.stderr.fd);
|
|
902
864
|
}
|
|
903
865
|
function formatArgs(args) {
|
|
904
866
|
const { namespace: name, useColors: useColors2 } = this;
|
|
@@ -1630,8 +1592,8 @@ function fullFlag(config) {
|
|
|
1630
1592
|
}
|
|
1631
1593
|
const defaults = [];
|
|
1632
1594
|
if (config.env) {
|
|
1633
|
-
const
|
|
1634
|
-
defaults.push(`env: ${config.env}${
|
|
1595
|
+
const env2 = process.env[config.env] === void 0 ? "" : `=${source_default.italic(process.env[config.env])}`;
|
|
1596
|
+
defaults.push(`env: ${config.env}${env2}`);
|
|
1635
1597
|
}
|
|
1636
1598
|
if (config.defaultValue) {
|
|
1637
1599
|
try {
|
|
@@ -2248,8 +2210,8 @@ function fullOption(config) {
|
|
|
2248
2210
|
usage += ` <${displayName}>`;
|
|
2249
2211
|
const defaults = [];
|
|
2250
2212
|
if (config.env) {
|
|
2251
|
-
const
|
|
2252
|
-
defaults.push(`env: ${config.env}${
|
|
2213
|
+
const env2 = process.env[config.env] === void 0 ? "" : `=${source_default.italic(process.env[config.env])}`;
|
|
2214
|
+
defaults.push(`env: ${config.env}${env2}`);
|
|
2253
2215
|
}
|
|
2254
2216
|
if (config.defaultValue) {
|
|
2255
2217
|
try {
|
|
@@ -3998,11 +3960,21 @@ var evalRunCommand = command({
|
|
|
3998
3960
|
type: optional(number),
|
|
3999
3961
|
long: "threshold",
|
|
4000
3962
|
description: "Per-test score threshold (0-1, default 0.8). Exit 1 if any test scores below this value"
|
|
3963
|
+
}),
|
|
3964
|
+
tag: multioption({
|
|
3965
|
+
type: array(string),
|
|
3966
|
+
long: "tag",
|
|
3967
|
+
description: "Only run eval files that have this tag (repeatable, AND logic)"
|
|
3968
|
+
}),
|
|
3969
|
+
excludeTag: multioption({
|
|
3970
|
+
type: array(string),
|
|
3971
|
+
long: "exclude-tag",
|
|
3972
|
+
description: "Skip eval files that have this tag (repeatable, file skipped if any match)"
|
|
4001
3973
|
})
|
|
4002
3974
|
},
|
|
4003
3975
|
handler: async (args) => {
|
|
4004
3976
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4005
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
3977
|
+
const { launchInteractiveWizard } = await import("./interactive-6D3ULOMN.js");
|
|
4006
3978
|
await launchInteractiveWizard();
|
|
4007
3979
|
return;
|
|
4008
3980
|
}
|
|
@@ -4039,7 +4011,9 @@ var evalRunCommand = command({
|
|
|
4039
4011
|
graderTarget: args.graderTarget,
|
|
4040
4012
|
model: args.model,
|
|
4041
4013
|
outputMessages: args.outputMessages,
|
|
4042
|
-
threshold: args.threshold
|
|
4014
|
+
threshold: args.threshold,
|
|
4015
|
+
tag: args.tag,
|
|
4016
|
+
excludeTag: args.excludeTag
|
|
4043
4017
|
};
|
|
4044
4018
|
const result = await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
|
|
4045
4019
|
if (result?.thresholdFailed) {
|
|
@@ -4886,15 +4860,15 @@ function loadEnvFile(dir) {
|
|
|
4886
4860
|
while (true) {
|
|
4887
4861
|
const candidate = join4(current, ".env");
|
|
4888
4862
|
if (existsSync3(candidate)) {
|
|
4889
|
-
const
|
|
4863
|
+
const env2 = {};
|
|
4890
4864
|
for (const line of readFileSync4(candidate, "utf8").split("\n")) {
|
|
4891
4865
|
const trimmed = line.trim();
|
|
4892
4866
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
4893
4867
|
const eqIdx = trimmed.indexOf("=");
|
|
4894
4868
|
if (eqIdx === -1) continue;
|
|
4895
|
-
|
|
4869
|
+
env2[trimmed.slice(0, eqIdx).trim()] = trimmed.slice(eqIdx + 1).trim();
|
|
4896
4870
|
}
|
|
4897
|
-
return
|
|
4871
|
+
return env2;
|
|
4898
4872
|
}
|
|
4899
4873
|
const parent = dirname2(current);
|
|
4900
4874
|
if (parent === current) break;
|
|
@@ -8569,4 +8543,4 @@ export {
|
|
|
8569
8543
|
preprocessArgv,
|
|
8570
8544
|
runCli
|
|
8571
8545
|
};
|
|
8572
|
-
//# sourceMappingURL=chunk-
|
|
8546
|
+
//# sourceMappingURL=chunk-U2LSJ6Y4.js.map
|