agentv 3.13.0 → 3.13.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/{chunk-6H4IAXQH.js → chunk-4Z5E5CYT.js} +54 -22
- package/dist/chunk-4Z5E5CYT.js.map +1 -0
- package/dist/{chunk-7OHZAFND.js → chunk-D3LNJUUB.js} +67 -35
- package/dist/chunk-D3LNJUUB.js.map +1 -0
- package/dist/{chunk-DJU4C6NS.js → chunk-X2343WOK.js} +31 -19
- package/dist/chunk-X2343WOK.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-SMKOBBFB.js → dist-KPMR7RBT.js} +4 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-RV664PCR.js → interactive-HVKLYGRX.js} +3 -3
- package/dist/templates/.agentv/.env.example +23 -0
- package/dist/templates/.agentv/config.yaml +13 -4
- package/dist/templates/.agentv/targets.yaml +16 -0
- package/package.json +1 -1
- package/dist/chunk-6H4IAXQH.js.map +0 -1
- package/dist/chunk-7OHZAFND.js.map +0 -1
- package/dist/chunk-DJU4C6NS.js.map +0 -1
- /package/dist/{dist-SMKOBBFB.js.map → dist-KPMR7RBT.js.map} +0 -0
- /package/dist/{interactive-RV664PCR.js.map → interactive-HVKLYGRX.js.map} +0 -0
|
@@ -2,6 +2,7 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
|
|
|
2
2
|
import {
|
|
3
3
|
HtmlWriter,
|
|
4
4
|
RESULT_INDEX_FILENAME,
|
|
5
|
+
RESULT_RUNS_DIRNAME,
|
|
5
6
|
detectFileType,
|
|
6
7
|
findRepoRoot,
|
|
7
8
|
loadLightweightResults,
|
|
@@ -21,7 +22,7 @@ import {
|
|
|
21
22
|
validateFileReferences,
|
|
22
23
|
validateTargetsFile,
|
|
23
24
|
writeArtifactsFromResults
|
|
24
|
-
} from "./chunk-
|
|
25
|
+
} from "./chunk-4Z5E5CYT.js";
|
|
25
26
|
import {
|
|
26
27
|
createBuiltinRegistry,
|
|
27
28
|
executeScript,
|
|
@@ -38,7 +39,7 @@ import {
|
|
|
38
39
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
39
40
|
transpileEvalYamlFile,
|
|
40
41
|
trimBaselineResult
|
|
41
|
-
} from "./chunk-
|
|
42
|
+
} from "./chunk-D3LNJUUB.js";
|
|
42
43
|
import {
|
|
43
44
|
__commonJS,
|
|
44
45
|
__esm,
|
|
@@ -3388,7 +3389,7 @@ function convertEvalsJsonToYaml(inputPath) {
|
|
|
3388
3389
|
for (const assertion of test.assertions) {
|
|
3389
3390
|
lines.push(` - name: ${assertion.name}`);
|
|
3390
3391
|
lines.push(` type: ${assertion.type}`);
|
|
3391
|
-
if (
|
|
3392
|
+
if (assertion.type === "llm-grader" && "prompt" in assertion) {
|
|
3392
3393
|
const prompt = assertion.prompt;
|
|
3393
3394
|
lines.push(` prompt: "${prompt.replace(/"/g, '\\"')}"`);
|
|
3394
3395
|
}
|
|
@@ -3745,10 +3746,10 @@ async function getPromptEvalGradingBrief(evalPath, testId) {
|
|
|
3745
3746
|
if (item.outcome) criteria.push(item.outcome);
|
|
3746
3747
|
}
|
|
3747
3748
|
}
|
|
3748
|
-
} else if (type === "llm-grader" || type === "llm_grader"
|
|
3749
|
+
} else if (type === "llm-grader" || type === "llm_grader") {
|
|
3749
3750
|
const prompt = entry.prompt ?? bag.prompt ?? bag.criteria;
|
|
3750
3751
|
criteria.push(`[llm-grader] ${typeof prompt === "string" ? prompt : ""}`);
|
|
3751
|
-
} else if (type === "code-grader" || type === "code_grader"
|
|
3752
|
+
} else if (type === "code-grader" || type === "code_grader") {
|
|
3752
3753
|
const name = entry.name ?? type;
|
|
3753
3754
|
const desc = bag.description ?? entry.description;
|
|
3754
3755
|
criteria.push(`[code-grader] ${name}${desc ? `: ${desc}` : ""}`);
|
|
@@ -4175,11 +4176,16 @@ var evalRunCommand = command({
|
|
|
4175
4176
|
type: optional(string),
|
|
4176
4177
|
long: "output-messages",
|
|
4177
4178
|
description: 'Number of trailing messages to include in results output (default: 1, or "all")'
|
|
4179
|
+
}),
|
|
4180
|
+
threshold: option({
|
|
4181
|
+
type: optional(number),
|
|
4182
|
+
long: "threshold",
|
|
4183
|
+
description: "Suite-level quality gate: exit 1 if mean score falls below this value (0-1)"
|
|
4178
4184
|
})
|
|
4179
4185
|
},
|
|
4180
4186
|
handler: async (args) => {
|
|
4181
4187
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4182
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4188
|
+
const { launchInteractiveWizard } = await import("./interactive-HVKLYGRX.js");
|
|
4183
4189
|
await launchInteractiveWizard();
|
|
4184
4190
|
return;
|
|
4185
4191
|
}
|
|
@@ -4215,9 +4221,13 @@ var evalRunCommand = command({
|
|
|
4215
4221
|
artifacts: args.artifacts,
|
|
4216
4222
|
graderTarget: args.graderTarget,
|
|
4217
4223
|
model: args.model,
|
|
4218
|
-
outputMessages: args.outputMessages
|
|
4224
|
+
outputMessages: args.outputMessages,
|
|
4225
|
+
threshold: args.threshold
|
|
4219
4226
|
};
|
|
4220
|
-
await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
|
|
4227
|
+
const result = await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
|
|
4228
|
+
if (result?.thresholdFailed) {
|
|
4229
|
+
process.exit(1);
|
|
4230
|
+
}
|
|
4221
4231
|
}
|
|
4222
4232
|
});
|
|
4223
4233
|
|
|
@@ -4760,7 +4770,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
|
|
|
4760
4770
|
let hasCodeGraders = false;
|
|
4761
4771
|
let hasLlmGraders = false;
|
|
4762
4772
|
for (const assertion of assertions) {
|
|
4763
|
-
if (assertion.type === "code-grader"
|
|
4773
|
+
if (assertion.type === "code-grader") {
|
|
4764
4774
|
if (!hasCodeGraders) {
|
|
4765
4775
|
await mkdir3(codeGradersDir, { recursive: true });
|
|
4766
4776
|
hasCodeGraders = true;
|
|
@@ -4773,7 +4783,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
|
|
|
4773
4783
|
weight: config.weight ?? 1,
|
|
4774
4784
|
config: config.config ?? {}
|
|
4775
4785
|
});
|
|
4776
|
-
} else if (assertion.type === "llm-grader"
|
|
4786
|
+
} else if (assertion.type === "llm-grader") {
|
|
4777
4787
|
if (!hasLlmGraders) {
|
|
4778
4788
|
await mkdir3(llmGradersDir, { recursive: true });
|
|
4779
4789
|
hasLlmGraders = true;
|
|
@@ -5021,13 +5031,15 @@ function loadOtlpTraceFile(filePath) {
|
|
|
5021
5031
|
} : void 0,
|
|
5022
5032
|
spans: traceSummary?.spans,
|
|
5023
5033
|
output: stringAttr(rootAttrs.agentv_output_text),
|
|
5024
|
-
scores: root.events?.filter(
|
|
5034
|
+
scores: root.events?.filter(
|
|
5035
|
+
(event) => event.name?.startsWith("agentv.grader.") || event.name?.startsWith("agentv.evaluator.")
|
|
5036
|
+
).map((event) => {
|
|
5025
5037
|
const attrs = parseOtlpAttributes(event.attributes);
|
|
5026
|
-
const name = event.name?.replace(/^agentv\.evaluator\./, "") ?? "unknown";
|
|
5038
|
+
const name = event.name?.replace(/^agentv\.grader\./, "").replace(/^agentv\.evaluator\./, "") ?? "unknown";
|
|
5027
5039
|
return {
|
|
5028
5040
|
name,
|
|
5029
|
-
type: stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
|
|
5030
|
-
score: numberAttr(attrs.agentv_evaluator_score) ?? 0
|
|
5041
|
+
type: stringAttr(attrs.agentv_grader_type) ?? stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
|
|
5042
|
+
score: numberAttr(attrs.agentv_grader_score) ?? numberAttr(attrs.agentv_evaluator_score) ?? 0
|
|
5031
5043
|
};
|
|
5032
5044
|
})
|
|
5033
5045
|
};
|
|
@@ -5131,13 +5143,13 @@ function toTraceSummary(result) {
|
|
|
5131
5143
|
}
|
|
5132
5144
|
function listResultFiles(cwd, limit) {
|
|
5133
5145
|
const baseDir = path6.join(cwd, ".agentv", "results");
|
|
5134
|
-
const
|
|
5146
|
+
const runsDir = path6.join(baseDir, RESULT_RUNS_DIRNAME);
|
|
5135
5147
|
const files = [];
|
|
5136
5148
|
try {
|
|
5137
|
-
const entries2 = readdirSync2(
|
|
5149
|
+
const entries2 = readdirSync2(runsDir, { withFileTypes: true });
|
|
5138
5150
|
for (const entry of entries2) {
|
|
5139
5151
|
if (entry.isDirectory()) {
|
|
5140
|
-
const primaryPath = resolveExistingRunPrimaryPath(path6.join(
|
|
5152
|
+
const primaryPath = resolveExistingRunPrimaryPath(path6.join(runsDir, entry.name));
|
|
5141
5153
|
if (primaryPath) {
|
|
5142
5154
|
files.push({ filePath: primaryPath, displayName: entry.name });
|
|
5143
5155
|
}
|
|
@@ -5145,7 +5157,7 @@ function listResultFiles(cwd, limit) {
|
|
|
5145
5157
|
}
|
|
5146
5158
|
for (const entry of entries2) {
|
|
5147
5159
|
if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
|
|
5148
|
-
files.push({ filePath: path6.join(
|
|
5160
|
+
files.push({ filePath: path6.join(runsDir, entry.name), displayName: entry.name });
|
|
5149
5161
|
}
|
|
5150
5162
|
}
|
|
5151
5163
|
} catch {
|
|
@@ -7753,4 +7765,4 @@ export {
|
|
|
7753
7765
|
preprocessArgv,
|
|
7754
7766
|
runCli
|
|
7755
7767
|
};
|
|
7756
|
-
//# sourceMappingURL=chunk-
|
|
7768
|
+
//# sourceMappingURL=chunk-X2343WOK.js.map
|