agentv 3.12.0 → 3.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -10
- package/dist/{chunk-UYBLUYHN.js → chunk-K747KGDP.js} +64 -49
- package/dist/chunk-K747KGDP.js.map +1 -0
- package/dist/{chunk-VLOFRXH4.js → chunk-LSXO22CF.js} +17 -43
- package/dist/chunk-LSXO22CF.js.map +1 -0
- package/dist/{chunk-2ELQ6F3C.js → chunk-UK7UMQOX.js} +29 -34
- package/dist/chunk-UK7UMQOX.js.map +1 -0
- package/dist/cli.js +3 -4
- package/dist/cli.js.map +1 -1
- package/dist/{dist-L6R5HJ72.js → dist-LCZDS36N.js} +2 -6
- package/dist/index.js +3 -4
- package/dist/{interactive-5X62YEEX.js → interactive-76ZJVPI7.js} +3 -4
- package/dist/{interactive-5X62YEEX.js.map → interactive-76ZJVPI7.js.map} +1 -1
- package/package.json +1 -1
- package/dist/chunk-2ELQ6F3C.js.map +0 -1
- package/dist/chunk-NR7QVL75.js +0 -122
- package/dist/chunk-NR7QVL75.js.map +0 -1
- package/dist/chunk-UYBLUYHN.js.map +0 -1
- package/dist/chunk-VLOFRXH4.js.map +0 -1
- package/dist/simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js +0 -9
- package/dist/simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js.map +0 -1
- /package/dist/{dist-L6R5HJ72.js.map → dist-LCZDS36N.js.map} +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
3
|
HtmlWriter,
|
|
4
|
-
LEGACY_RESULTS_FILENAME,
|
|
5
4
|
RESULT_INDEX_FILENAME,
|
|
5
|
+
RESULT_RUNS_DIRNAME,
|
|
6
6
|
detectFileType,
|
|
7
7
|
findRepoRoot,
|
|
8
8
|
loadLightweightResults,
|
|
@@ -22,7 +22,7 @@ import {
|
|
|
22
22
|
validateFileReferences,
|
|
23
23
|
validateTargetsFile,
|
|
24
24
|
writeArtifactsFromResults
|
|
25
|
-
} from "./chunk-
|
|
25
|
+
} from "./chunk-LSXO22CF.js";
|
|
26
26
|
import {
|
|
27
27
|
createBuiltinRegistry,
|
|
28
28
|
executeScript,
|
|
@@ -39,7 +39,7 @@ import {
|
|
|
39
39
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
40
40
|
transpileEvalYamlFile,
|
|
41
41
|
trimBaselineResult
|
|
42
|
-
} from "./chunk-
|
|
42
|
+
} from "./chunk-K747KGDP.js";
|
|
43
43
|
import {
|
|
44
44
|
__commonJS,
|
|
45
45
|
__esm,
|
|
@@ -3389,7 +3389,7 @@ function convertEvalsJsonToYaml(inputPath) {
|
|
|
3389
3389
|
for (const assertion of test.assertions) {
|
|
3390
3390
|
lines.push(` - name: ${assertion.name}`);
|
|
3391
3391
|
lines.push(` type: ${assertion.type}`);
|
|
3392
|
-
if (
|
|
3392
|
+
if (assertion.type === "llm-grader" && "prompt" in assertion) {
|
|
3393
3393
|
const prompt = assertion.prompt;
|
|
3394
3394
|
lines.push(` prompt: "${prompt.replace(/"/g, '\\"')}"`);
|
|
3395
3395
|
}
|
|
@@ -3746,10 +3746,10 @@ async function getPromptEvalGradingBrief(evalPath, testId) {
|
|
|
3746
3746
|
if (item.outcome) criteria.push(item.outcome);
|
|
3747
3747
|
}
|
|
3748
3748
|
}
|
|
3749
|
-
} else if (type === "llm-grader" || type === "llm_grader"
|
|
3749
|
+
} else if (type === "llm-grader" || type === "llm_grader") {
|
|
3750
3750
|
const prompt = entry.prompt ?? bag.prompt ?? bag.criteria;
|
|
3751
3751
|
criteria.push(`[llm-grader] ${typeof prompt === "string" ? prompt : ""}`);
|
|
3752
|
-
} else if (type === "code-grader" || type === "code_grader"
|
|
3752
|
+
} else if (type === "code-grader" || type === "code_grader") {
|
|
3753
3753
|
const name = entry.name ?? type;
|
|
3754
3754
|
const desc = bag.description ?? entry.description;
|
|
3755
3755
|
criteria.push(`[code-grader] ${name}${desc ? `: ${desc}` : ""}`);
|
|
@@ -4126,11 +4126,6 @@ var evalRunCommand = command({
|
|
|
4126
4126
|
long: "otel-file",
|
|
4127
4127
|
description: "Write OTLP JSON trace to file (importable by OTel backends)"
|
|
4128
4128
|
}),
|
|
4129
|
-
traceFile: option({
|
|
4130
|
-
type: optional(string),
|
|
4131
|
-
long: "trace-file",
|
|
4132
|
-
description: "Write human-readable trace JSONL to file"
|
|
4133
|
-
}),
|
|
4134
4129
|
exportOtel: flag({
|
|
4135
4130
|
long: "export-otel",
|
|
4136
4131
|
description: "Export evaluation traces via OTLP/HTTP to configured endpoint"
|
|
@@ -4185,7 +4180,7 @@ var evalRunCommand = command({
|
|
|
4185
4180
|
},
|
|
4186
4181
|
handler: async (args) => {
|
|
4187
4182
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4188
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4183
|
+
const { launchInteractiveWizard } = await import("./interactive-76ZJVPI7.js");
|
|
4189
4184
|
await launchInteractiveWizard();
|
|
4190
4185
|
return;
|
|
4191
4186
|
}
|
|
@@ -4211,7 +4206,6 @@ var evalRunCommand = command({
|
|
|
4211
4206
|
workspacePath: args.workspacePath,
|
|
4212
4207
|
trace: false,
|
|
4213
4208
|
otelFile: args.otelFile,
|
|
4214
|
-
traceFile: args.traceFile,
|
|
4215
4209
|
exportOtel: args.exportOtel,
|
|
4216
4210
|
otelBackend: args.otelBackend,
|
|
4217
4211
|
otelCaptureContent: args.otelCaptureContent,
|
|
@@ -4767,7 +4761,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
|
|
|
4767
4761
|
let hasCodeGraders = false;
|
|
4768
4762
|
let hasLlmGraders = false;
|
|
4769
4763
|
for (const assertion of assertions) {
|
|
4770
|
-
if (assertion.type === "code-grader"
|
|
4764
|
+
if (assertion.type === "code-grader") {
|
|
4771
4765
|
if (!hasCodeGraders) {
|
|
4772
4766
|
await mkdir3(codeGradersDir, { recursive: true });
|
|
4773
4767
|
hasCodeGraders = true;
|
|
@@ -4780,7 +4774,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
|
|
|
4780
4774
|
weight: config.weight ?? 1,
|
|
4781
4775
|
config: config.config ?? {}
|
|
4782
4776
|
});
|
|
4783
|
-
} else if (assertion.type === "llm-grader"
|
|
4777
|
+
} else if (assertion.type === "llm-grader") {
|
|
4784
4778
|
if (!hasLlmGraders) {
|
|
4785
4779
|
await mkdir3(llmGradersDir, { recursive: true });
|
|
4786
4780
|
hasLlmGraders = true;
|
|
@@ -4866,12 +4860,6 @@ function loadResultFile(filePath) {
|
|
|
4866
4860
|
return loadJsonlRecords(resolvedFilePath);
|
|
4867
4861
|
}
|
|
4868
4862
|
function resolveTraceResultPath(filePath) {
|
|
4869
|
-
if (path6.basename(filePath) === LEGACY_RESULTS_FILENAME) {
|
|
4870
|
-
return filePath;
|
|
4871
|
-
}
|
|
4872
|
-
if (!filePath.endsWith(".jsonl") && !filePath.endsWith(".json")) {
|
|
4873
|
-
return resolveWorkspaceOrFilePath(filePath);
|
|
4874
|
-
}
|
|
4875
4863
|
return resolveWorkspaceOrFilePath(filePath);
|
|
4876
4864
|
}
|
|
4877
4865
|
function loadJsonlRecords(filePath) {
|
|
@@ -4945,7 +4933,9 @@ function loadOtlpTraceFile(filePath) {
|
|
|
4945
4933
|
}
|
|
4946
4934
|
}
|
|
4947
4935
|
const roots = spans.filter((span) => !span.parentSpanId || !spanMap.has(span.parentSpanId));
|
|
4948
|
-
|
|
4936
|
+
const supportedRoots = roots.filter(isAgentvEvalRoot);
|
|
4937
|
+
const candidateRoots = supportedRoots.length > 0 ? supportedRoots : roots;
|
|
4938
|
+
return candidateRoots.map((root, index) => {
|
|
4949
4939
|
const descendants = collectChildSpans(root.spanId, childMap);
|
|
4950
4940
|
const rootAttrs = parseOtlpAttributes(root.attributes);
|
|
4951
4941
|
const parsedDescendants = descendants.map((span) => ({
|
|
@@ -5032,18 +5022,24 @@ function loadOtlpTraceFile(filePath) {
|
|
|
5032
5022
|
} : void 0,
|
|
5033
5023
|
spans: traceSummary?.spans,
|
|
5034
5024
|
output: stringAttr(rootAttrs.agentv_output_text),
|
|
5035
|
-
scores: root.events?.filter(
|
|
5025
|
+
scores: root.events?.filter(
|
|
5026
|
+
(event) => event.name?.startsWith("agentv.grader.") || event.name?.startsWith("agentv.evaluator.")
|
|
5027
|
+
).map((event) => {
|
|
5036
5028
|
const attrs = parseOtlpAttributes(event.attributes);
|
|
5037
|
-
const name = event.name?.replace(/^agentv\.evaluator\./, "") ?? "unknown";
|
|
5029
|
+
const name = event.name?.replace(/^agentv\.grader\./, "").replace(/^agentv\.evaluator\./, "") ?? "unknown";
|
|
5038
5030
|
return {
|
|
5039
5031
|
name,
|
|
5040
|
-
type: stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
|
|
5041
|
-
score: numberAttr(attrs.agentv_evaluator_score) ?? 0
|
|
5032
|
+
type: stringAttr(attrs.agentv_grader_type) ?? stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
|
|
5033
|
+
score: numberAttr(attrs.agentv_grader_score) ?? numberAttr(attrs.agentv_evaluator_score) ?? 0
|
|
5042
5034
|
};
|
|
5043
5035
|
})
|
|
5044
5036
|
};
|
|
5045
5037
|
});
|
|
5046
5038
|
}
|
|
5039
|
+
function isAgentvEvalRoot(span) {
|
|
5040
|
+
const attrs = parseOtlpAttributes(span.attributes);
|
|
5041
|
+
return span.name === "agentv.eval" || numberAttr(attrs.agentv_score) !== void 0 || typeof stringAttr(attrs.agentv_test_id) === "string";
|
|
5042
|
+
}
|
|
5047
5043
|
function collectChildSpans(spanId, childMap) {
|
|
5048
5044
|
if (!spanId) return [];
|
|
5049
5045
|
const direct = childMap.get(spanId) ?? [];
|
|
@@ -5138,13 +5134,13 @@ function toTraceSummary(result) {
|
|
|
5138
5134
|
}
|
|
5139
5135
|
function listResultFiles(cwd, limit) {
|
|
5140
5136
|
const baseDir = path6.join(cwd, ".agentv", "results");
|
|
5141
|
-
const
|
|
5137
|
+
const runsDir = path6.join(baseDir, RESULT_RUNS_DIRNAME);
|
|
5142
5138
|
const files = [];
|
|
5143
5139
|
try {
|
|
5144
|
-
const entries2 = readdirSync2(
|
|
5140
|
+
const entries2 = readdirSync2(runsDir, { withFileTypes: true });
|
|
5145
5141
|
for (const entry of entries2) {
|
|
5146
5142
|
if (entry.isDirectory()) {
|
|
5147
|
-
const primaryPath = resolveExistingRunPrimaryPath(path6.join(
|
|
5143
|
+
const primaryPath = resolveExistingRunPrimaryPath(path6.join(runsDir, entry.name));
|
|
5148
5144
|
if (primaryPath) {
|
|
5149
5145
|
files.push({ filePath: primaryPath, displayName: entry.name });
|
|
5150
5146
|
}
|
|
@@ -5152,7 +5148,7 @@ function listResultFiles(cwd, limit) {
|
|
|
5152
5148
|
}
|
|
5153
5149
|
for (const entry of entries2) {
|
|
5154
5150
|
if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
|
|
5155
|
-
files.push({ filePath: path6.join(
|
|
5151
|
+
files.push({ filePath: path6.join(runsDir, entry.name), displayName: entry.name });
|
|
5156
5152
|
}
|
|
5157
5153
|
}
|
|
5158
5154
|
} catch {
|
|
@@ -5317,8 +5313,7 @@ var resultsExportCommand = command({
|
|
|
5317
5313
|
const { results } = await loadResults(source, cwd);
|
|
5318
5314
|
const outputDir = out ? path7.isAbsolute(out) ? out : path7.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
|
|
5319
5315
|
await writeArtifactsFromResults(results, outputDir, {
|
|
5320
|
-
evalFile: sourceFile
|
|
5321
|
-
writeLegacyResults: false
|
|
5316
|
+
evalFile: sourceFile
|
|
5322
5317
|
});
|
|
5323
5318
|
console.log(`Exported ${results.length} test(s) to ${outputDir}`);
|
|
5324
5319
|
for (const result of results) {
|
|
@@ -6640,7 +6635,7 @@ var traceScoreCommand = command({
|
|
|
6640
6635
|
);
|
|
6641
6636
|
if (!hasTrace) {
|
|
6642
6637
|
console.error(
|
|
6643
|
-
`${c2.red}Error:${c2.reset} Source lacks trace metrics.
|
|
6638
|
+
`${c2.red}Error:${c2.reset} Source lacks trace metrics. Use an OTLP trace export via ${c2.bold}--otel-file${c2.reset} or a run manifest with summary metrics in ${c2.bold}index.jsonl${c2.reset}.`
|
|
6644
6639
|
);
|
|
6645
6640
|
process.exit(1);
|
|
6646
6641
|
}
|
|
@@ -7761,4 +7756,4 @@ export {
|
|
|
7761
7756
|
preprocessArgv,
|
|
7762
7757
|
runCli
|
|
7763
7758
|
};
|
|
7764
|
-
//# sourceMappingURL=chunk-
|
|
7759
|
+
//# sourceMappingURL=chunk-UK7UMQOX.js.map
|