agentv 3.13.0 → 3.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/{chunk-7OHZAFND.js → chunk-K747KGDP.js} +47 -34
- package/dist/chunk-K747KGDP.js.map +1 -0
- package/dist/{chunk-6H4IAXQH.js → chunk-LSXO22CF.js} +8 -6
- package/dist/chunk-LSXO22CF.js.map +1 -0
- package/dist/{chunk-DJU4C6NS.js → chunk-UK7UMQOX.js} +20 -17
- package/dist/chunk-UK7UMQOX.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-SMKOBBFB.js → dist-LCZDS36N.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-RV664PCR.js → interactive-76ZJVPI7.js} +3 -3
- package/dist/templates/.agentv/.env.example +23 -0
- package/dist/templates/.agentv/config.yaml +13 -4
- package/dist/templates/.agentv/targets.yaml +16 -0
- package/package.json +1 -1
- package/dist/chunk-6H4IAXQH.js.map +0 -1
- package/dist/chunk-7OHZAFND.js.map +0 -1
- package/dist/chunk-DJU4C6NS.js.map +0 -1
- /package/dist/{dist-SMKOBBFB.js.map → dist-LCZDS36N.js.map} +0 -0
- /package/dist/{interactive-RV664PCR.js.map → interactive-76ZJVPI7.js.map} +0 -0
|
@@ -2,6 +2,7 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
|
|
|
2
2
|
import {
|
|
3
3
|
HtmlWriter,
|
|
4
4
|
RESULT_INDEX_FILENAME,
|
|
5
|
+
RESULT_RUNS_DIRNAME,
|
|
5
6
|
detectFileType,
|
|
6
7
|
findRepoRoot,
|
|
7
8
|
loadLightweightResults,
|
|
@@ -21,7 +22,7 @@ import {
|
|
|
21
22
|
validateFileReferences,
|
|
22
23
|
validateTargetsFile,
|
|
23
24
|
writeArtifactsFromResults
|
|
24
|
-
} from "./chunk-
|
|
25
|
+
} from "./chunk-LSXO22CF.js";
|
|
25
26
|
import {
|
|
26
27
|
createBuiltinRegistry,
|
|
27
28
|
executeScript,
|
|
@@ -38,7 +39,7 @@ import {
|
|
|
38
39
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
39
40
|
transpileEvalYamlFile,
|
|
40
41
|
trimBaselineResult
|
|
41
|
-
} from "./chunk-
|
|
42
|
+
} from "./chunk-K747KGDP.js";
|
|
42
43
|
import {
|
|
43
44
|
__commonJS,
|
|
44
45
|
__esm,
|
|
@@ -3388,7 +3389,7 @@ function convertEvalsJsonToYaml(inputPath) {
|
|
|
3388
3389
|
for (const assertion of test.assertions) {
|
|
3389
3390
|
lines.push(` - name: ${assertion.name}`);
|
|
3390
3391
|
lines.push(` type: ${assertion.type}`);
|
|
3391
|
-
if (
|
|
3392
|
+
if (assertion.type === "llm-grader" && "prompt" in assertion) {
|
|
3392
3393
|
const prompt = assertion.prompt;
|
|
3393
3394
|
lines.push(` prompt: "${prompt.replace(/"/g, '\\"')}"`);
|
|
3394
3395
|
}
|
|
@@ -3745,10 +3746,10 @@ async function getPromptEvalGradingBrief(evalPath, testId) {
|
|
|
3745
3746
|
if (item.outcome) criteria.push(item.outcome);
|
|
3746
3747
|
}
|
|
3747
3748
|
}
|
|
3748
|
-
} else if (type === "llm-grader" || type === "llm_grader"
|
|
3749
|
+
} else if (type === "llm-grader" || type === "llm_grader") {
|
|
3749
3750
|
const prompt = entry.prompt ?? bag.prompt ?? bag.criteria;
|
|
3750
3751
|
criteria.push(`[llm-grader] ${typeof prompt === "string" ? prompt : ""}`);
|
|
3751
|
-
} else if (type === "code-grader" || type === "code_grader"
|
|
3752
|
+
} else if (type === "code-grader" || type === "code_grader") {
|
|
3752
3753
|
const name = entry.name ?? type;
|
|
3753
3754
|
const desc = bag.description ?? entry.description;
|
|
3754
3755
|
criteria.push(`[code-grader] ${name}${desc ? `: ${desc}` : ""}`);
|
|
@@ -4179,7 +4180,7 @@ var evalRunCommand = command({
|
|
|
4179
4180
|
},
|
|
4180
4181
|
handler: async (args) => {
|
|
4181
4182
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4182
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4183
|
+
const { launchInteractiveWizard } = await import("./interactive-76ZJVPI7.js");
|
|
4183
4184
|
await launchInteractiveWizard();
|
|
4184
4185
|
return;
|
|
4185
4186
|
}
|
|
@@ -4760,7 +4761,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
|
|
|
4760
4761
|
let hasCodeGraders = false;
|
|
4761
4762
|
let hasLlmGraders = false;
|
|
4762
4763
|
for (const assertion of assertions) {
|
|
4763
|
-
if (assertion.type === "code-grader"
|
|
4764
|
+
if (assertion.type === "code-grader") {
|
|
4764
4765
|
if (!hasCodeGraders) {
|
|
4765
4766
|
await mkdir3(codeGradersDir, { recursive: true });
|
|
4766
4767
|
hasCodeGraders = true;
|
|
@@ -4773,7 +4774,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
|
|
|
4773
4774
|
weight: config.weight ?? 1,
|
|
4774
4775
|
config: config.config ?? {}
|
|
4775
4776
|
});
|
|
4776
|
-
} else if (assertion.type === "llm-grader"
|
|
4777
|
+
} else if (assertion.type === "llm-grader") {
|
|
4777
4778
|
if (!hasLlmGraders) {
|
|
4778
4779
|
await mkdir3(llmGradersDir, { recursive: true });
|
|
4779
4780
|
hasLlmGraders = true;
|
|
@@ -5021,13 +5022,15 @@ function loadOtlpTraceFile(filePath) {
|
|
|
5021
5022
|
} : void 0,
|
|
5022
5023
|
spans: traceSummary?.spans,
|
|
5023
5024
|
output: stringAttr(rootAttrs.agentv_output_text),
|
|
5024
|
-
scores: root.events?.filter(
|
|
5025
|
+
scores: root.events?.filter(
|
|
5026
|
+
(event) => event.name?.startsWith("agentv.grader.") || event.name?.startsWith("agentv.evaluator.")
|
|
5027
|
+
).map((event) => {
|
|
5025
5028
|
const attrs = parseOtlpAttributes(event.attributes);
|
|
5026
|
-
const name = event.name?.replace(/^agentv\.evaluator\./, "") ?? "unknown";
|
|
5029
|
+
const name = event.name?.replace(/^agentv\.grader\./, "").replace(/^agentv\.evaluator\./, "") ?? "unknown";
|
|
5027
5030
|
return {
|
|
5028
5031
|
name,
|
|
5029
|
-
type: stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
|
|
5030
|
-
score: numberAttr(attrs.agentv_evaluator_score) ?? 0
|
|
5032
|
+
type: stringAttr(attrs.agentv_grader_type) ?? stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
|
|
5033
|
+
score: numberAttr(attrs.agentv_grader_score) ?? numberAttr(attrs.agentv_evaluator_score) ?? 0
|
|
5031
5034
|
};
|
|
5032
5035
|
})
|
|
5033
5036
|
};
|
|
@@ -5131,13 +5134,13 @@ function toTraceSummary(result) {
|
|
|
5131
5134
|
}
|
|
5132
5135
|
function listResultFiles(cwd, limit) {
|
|
5133
5136
|
const baseDir = path6.join(cwd, ".agentv", "results");
|
|
5134
|
-
const
|
|
5137
|
+
const runsDir = path6.join(baseDir, RESULT_RUNS_DIRNAME);
|
|
5135
5138
|
const files = [];
|
|
5136
5139
|
try {
|
|
5137
|
-
const entries2 = readdirSync2(
|
|
5140
|
+
const entries2 = readdirSync2(runsDir, { withFileTypes: true });
|
|
5138
5141
|
for (const entry of entries2) {
|
|
5139
5142
|
if (entry.isDirectory()) {
|
|
5140
|
-
const primaryPath = resolveExistingRunPrimaryPath(path6.join(
|
|
5143
|
+
const primaryPath = resolveExistingRunPrimaryPath(path6.join(runsDir, entry.name));
|
|
5141
5144
|
if (primaryPath) {
|
|
5142
5145
|
files.push({ filePath: primaryPath, displayName: entry.name });
|
|
5143
5146
|
}
|
|
@@ -5145,7 +5148,7 @@ function listResultFiles(cwd, limit) {
|
|
|
5145
5148
|
}
|
|
5146
5149
|
for (const entry of entries2) {
|
|
5147
5150
|
if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
|
|
5148
|
-
files.push({ filePath: path6.join(
|
|
5151
|
+
files.push({ filePath: path6.join(runsDir, entry.name), displayName: entry.name });
|
|
5149
5152
|
}
|
|
5150
5153
|
}
|
|
5151
5154
|
} catch {
|
|
@@ -7753,4 +7756,4 @@ export {
|
|
|
7753
7756
|
preprocessArgv,
|
|
7754
7757
|
runCli
|
|
7755
7758
|
};
|
|
7756
|
-
//# sourceMappingURL=chunk-
|
|
7759
|
+
//# sourceMappingURL=chunk-UK7UMQOX.js.map
|