agentv 3.8.0 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{chunk-ASYRKFAI.js → chunk-GC5P5HHZ.js} +6 -37
- package/dist/chunk-GC5P5HHZ.js.map +1 -0
- package/dist/{chunk-YZRGQ6ZS.js → chunk-Q2YWV4QM.js} +12 -16
- package/dist/chunk-Q2YWV4QM.js.map +1 -0
- package/dist/{chunk-F4UDJ7LG.js → chunk-TXDPYXHY.js} +356 -616
- package/dist/chunk-TXDPYXHY.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-4AQUJJAP.js → dist-PIOSPBKX.js} +2 -4
- package/dist/index.js +3 -3
- package/dist/{interactive-OPQGDF77.js → interactive-3VTDK5NX.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-ASYRKFAI.js.map +0 -1
- package/dist/chunk-F4UDJ7LG.js.map +0 -1
- package/dist/chunk-YZRGQ6ZS.js.map +0 -1
- /package/dist/{dist-4AQUJJAP.js.map → dist-PIOSPBKX.js.map} +0 -0
- /package/dist/{interactive-OPQGDF77.js.map → interactive-3VTDK5NX.js.map} +0 -0
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
validateEvalFile,
|
|
17
17
|
validateFileReferences,
|
|
18
18
|
validateTargetsFile
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-GC5P5HHZ.js";
|
|
20
20
|
import {
|
|
21
21
|
createBuiltinRegistry,
|
|
22
22
|
createProvider,
|
|
@@ -34,7 +34,7 @@ import {
|
|
|
34
34
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
35
35
|
transpileEvalYamlFile,
|
|
36
36
|
trimBaselineResult
|
|
37
|
-
} from "./chunk-
|
|
37
|
+
} from "./chunk-TXDPYXHY.js";
|
|
38
38
|
import {
|
|
39
39
|
__commonJS,
|
|
40
40
|
__esm,
|
|
@@ -3714,7 +3714,6 @@ async function getPromptEvalInput(evalPath, testId) {
|
|
|
3714
3714
|
return {
|
|
3715
3715
|
test_id: evalCase.id,
|
|
3716
3716
|
input: resolveMessages(evalCase.input, fileMap),
|
|
3717
|
-
guideline_paths: evalCase.guideline_paths,
|
|
3718
3717
|
criteria: evalCase.criteria
|
|
3719
3718
|
};
|
|
3720
3719
|
}
|
|
@@ -3739,9 +3738,8 @@ async function getPromptEvalGradingBrief(evalPath, testId) {
|
|
|
3739
3738
|
if (inputText) {
|
|
3740
3739
|
lines.push(`Input: "${inputText}"`);
|
|
3741
3740
|
}
|
|
3742
|
-
|
|
3743
|
-
|
|
3744
|
-
lines.push(`Files: ${filePaths.join(", ")}`);
|
|
3741
|
+
if (evalCase.file_paths.length > 0) {
|
|
3742
|
+
lines.push(`Files: ${evalCase.file_paths.join(", ")}`);
|
|
3745
3743
|
}
|
|
3746
3744
|
if (evalCase.reference_answer) {
|
|
3747
3745
|
lines.push(`Expected: "${evalCase.reference_answer}"`);
|
|
@@ -3973,7 +3971,6 @@ var evalAssertCommand = command({
|
|
|
3973
3971
|
criteria: "",
|
|
3974
3972
|
expected_output: [],
|
|
3975
3973
|
reference_answer: "",
|
|
3976
|
-
guideline_files: [],
|
|
3977
3974
|
input_files: [],
|
|
3978
3975
|
trace: null,
|
|
3979
3976
|
token_usage: null,
|
|
@@ -4189,7 +4186,7 @@ var evalRunCommand = command({
|
|
|
4189
4186
|
},
|
|
4190
4187
|
handler: async (args) => {
|
|
4191
4188
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4192
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4189
|
+
const { launchInteractiveWizard } = await import("./interactive-3VTDK5NX.js");
|
|
4193
4190
|
await launchInteractiveWizard();
|
|
4194
4191
|
return;
|
|
4195
4192
|
}
|
|
@@ -5037,7 +5034,6 @@ function buildEvalTest(raw) {
|
|
|
5037
5034
|
input: [],
|
|
5038
5035
|
input_segments: [],
|
|
5039
5036
|
expected_output: [],
|
|
5040
|
-
guideline_paths: [],
|
|
5041
5037
|
file_paths: [],
|
|
5042
5038
|
criteria: ""
|
|
5043
5039
|
};
|
|
@@ -5075,7 +5071,7 @@ async function runScore(results, evaluatorConfig, testIdFilter) {
|
|
|
5075
5071
|
target: { kind: "custom", name: raw.target ?? "unknown", config: {} },
|
|
5076
5072
|
provider: stubProvider,
|
|
5077
5073
|
attempt: 1,
|
|
5078
|
-
promptInputs: { question: ""
|
|
5074
|
+
promptInputs: { question: "" },
|
|
5079
5075
|
now: /* @__PURE__ */ new Date(),
|
|
5080
5076
|
output: Array.isArray(output) ? output : void 0,
|
|
5081
5077
|
trace,
|
|
@@ -5329,7 +5325,7 @@ function formatResultDetail(result, index, tree) {
|
|
|
5329
5325
|
}
|
|
5330
5326
|
const scoreColor = result.score >= 0.9 ? c2.green : result.score >= 0.5 ? c2.yellow : c2.red;
|
|
5331
5327
|
lines.push(
|
|
5332
|
-
`${c2.bold}${testId}${c2.reset} ${scoreColor}${formatScore(result.score)}${c2.reset}${result.target ? ` ${c2.dim}target: ${result.target}${c2.reset}` : ""}${result.
|
|
5328
|
+
`${c2.bold}${testId}${c2.reset} ${scoreColor}${formatScore(result.score)}${c2.reset}${result.target ? ` ${c2.dim}target: ${result.target}${c2.reset}` : ""}${result.eval_set ? ` ${c2.dim}eval-set: ${result.eval_set}${c2.reset}` : ""}`
|
|
5333
5329
|
);
|
|
5334
5330
|
if (result.error) {
|
|
5335
5331
|
lines.push(` ${c2.red}Error: ${result.error}${c2.reset}`);
|
|
@@ -5503,8 +5499,8 @@ function groupResults(results, groupBy2) {
|
|
|
5503
5499
|
case "target":
|
|
5504
5500
|
key = result.target ?? "unknown";
|
|
5505
5501
|
break;
|
|
5506
|
-
case "
|
|
5507
|
-
key = result.
|
|
5502
|
+
case "eval-set":
|
|
5503
|
+
key = result.eval_set ?? "unknown";
|
|
5508
5504
|
break;
|
|
5509
5505
|
case "test-id":
|
|
5510
5506
|
key = result.test_id ?? result.eval_id ?? "unknown";
|
|
@@ -5586,10 +5582,10 @@ var traceStatsCommand = command({
|
|
|
5586
5582
|
description: "Path to JSONL result file"
|
|
5587
5583
|
}),
|
|
5588
5584
|
groupBy: option({
|
|
5589
|
-
type: optional(oneOf(["target", "
|
|
5585
|
+
type: optional(oneOf(["target", "eval-set", "test-id"])),
|
|
5590
5586
|
long: "group-by",
|
|
5591
5587
|
short: "g",
|
|
5592
|
-
description: "Group statistics by: target,
|
|
5588
|
+
description: "Group statistics by: target, eval-set, or test-id"
|
|
5593
5589
|
}),
|
|
5594
5590
|
format: option({
|
|
5595
5591
|
type: optional(oneOf(["table", "json"])),
|
|
@@ -6280,4 +6276,4 @@ export {
|
|
|
6280
6276
|
preprocessArgv,
|
|
6281
6277
|
runCli
|
|
6282
6278
|
};
|
|
6283
|
-
//# sourceMappingURL=chunk-
|
|
6279
|
+
//# sourceMappingURL=chunk-Q2YWV4QM.js.map
|