agentv 3.11.1 → 3.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -12
- package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} +1 -2
- package/dist/{chunk-CKMAM2GD.js → chunk-6H4IAXQH.js} +435 -198
- package/dist/chunk-6H4IAXQH.js.map +1 -0
- package/dist/{chunk-OYD2NB55.js → chunk-7OHZAFND.js} +120 -29
- package/dist/chunk-7OHZAFND.js.map +1 -0
- package/dist/{chunk-V2S5CZU3.js → chunk-DJU4C6NS.js} +914 -529
- package/dist/chunk-DJU4C6NS.js.map +1 -0
- package/dist/{chunk-BAUNAXHT.js → chunk-XOSNETAV.js} +1 -1
- package/dist/cli.js +4 -6
- package/dist/cli.js.map +1 -1
- package/dist/{dist-VUPMLHIV.js → dist-SMKOBBFB.js} +3 -8
- package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js → esm-5Q4BZALM-5REQWAUV.js} +2 -3
- package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js.map → esm-5Q4BZALM-5REQWAUV.js.map} +1 -1
- package/dist/{esm-UYZ3HJBU.js → esm-CZAWIY6F.js} +2 -2
- package/dist/index.js +4 -6
- package/dist/{interactive-FZJANO4A.js → interactive-RV664PCR.js} +4 -6
- package/dist/{interactive-FZJANO4A.js.map → interactive-RV664PCR.js.map} +1 -1
- package/dist/{otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js} +1 -2
- package/dist/{src-PXDA7QIS.js → src-ML4D2MC2.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-2QFWRIYL.js +0 -186
- package/dist/chunk-2QFWRIYL.js.map +0 -1
- package/dist/chunk-2RMPO6LY.js +0 -747
- package/dist/chunk-2RMPO6LY.js.map +0 -1
- package/dist/chunk-3Q7WIXT4.js +0 -4846
- package/dist/chunk-3Q7WIXT4.js.map +0 -1
- package/dist/chunk-73O2DCJP.js +0 -1274
- package/dist/chunk-73O2DCJP.js.map +0 -1
- package/dist/chunk-AUKF3Y3W.js +0 -212
- package/dist/chunk-AUKF3Y3W.js.map +0 -1
- package/dist/chunk-BRH7SIDP.js +0 -133
- package/dist/chunk-BRH7SIDP.js.map +0 -1
- package/dist/chunk-BXM4I3BM.js +0 -526
- package/dist/chunk-BXM4I3BM.js.map +0 -1
- package/dist/chunk-CKMAM2GD.js.map +0 -1
- package/dist/chunk-FHTURHTY.js +0 -546
- package/dist/chunk-FHTURHTY.js.map +0 -1
- package/dist/chunk-GJFXQQWG.js +0 -21
- package/dist/chunk-GJFXQQWG.js.map +0 -1
- package/dist/chunk-HKMLG4KF.js +0 -38
- package/dist/chunk-HKMLG4KF.js.map +0 -1
- package/dist/chunk-JGU3PVA4.js +0 -133
- package/dist/chunk-JGU3PVA4.js.map +0 -1
- package/dist/chunk-JK6V4KVD.js +0 -114
- package/dist/chunk-JK6V4KVD.js.map +0 -1
- package/dist/chunk-LHU5FGVZ.js +0 -4804
- package/dist/chunk-LHU5FGVZ.js.map +0 -1
- package/dist/chunk-OL2WGI6E.js +0 -149
- package/dist/chunk-OL2WGI6E.js.map +0 -1
- package/dist/chunk-ONETZL6N.js +0 -15
- package/dist/chunk-ONETZL6N.js.map +0 -1
- package/dist/chunk-OYD2NB55.js.map +0 -1
- package/dist/chunk-QV4UGEN6.js +0 -320
- package/dist/chunk-QV4UGEN6.js.map +0 -1
- package/dist/chunk-QXLDKGF3.js +0 -46
- package/dist/chunk-QXLDKGF3.js.map +0 -1
- package/dist/chunk-U6VEM66A.js +0 -63
- package/dist/chunk-U6VEM66A.js.map +0 -1
- package/dist/chunk-UALXHIMX.js +0 -48
- package/dist/chunk-UALXHIMX.js.map +0 -1
- package/dist/chunk-UGXG73VF.js +0 -55
- package/dist/chunk-UGXG73VF.js.map +0 -1
- package/dist/chunk-UHP5KEDL.js +0 -38
- package/dist/chunk-UHP5KEDL.js.map +0 -1
- package/dist/chunk-V2S5CZU3.js.map +0 -1
- package/dist/chunk-WVSXFZWP.js +0 -204
- package/dist/chunk-WVSXFZWP.js.map +0 -1
- package/dist/chunk-XSUMCWKO.js +0 -30
- package/dist/chunk-XSUMCWKO.js.map +0 -1
- package/dist/chunk-XUO7ZEHU.js +0 -181
- package/dist/chunk-XUO7ZEHU.js.map +0 -1
- package/dist/chunk-YSGUX5JT.js +0 -1002
- package/dist/chunk-YSGUX5JT.js.map +0 -1
- package/dist/dist-3PCP5TNF-RYMVLILE.js +0 -25785
- package/dist/dist-3PCP5TNF-RYMVLILE.js.map +0 -1
- package/dist/dist-BOIN5LC5-T5UWUK43.js +0 -76113
- package/dist/dist-BOIN5LC5-T5UWUK43.js.map +0 -1
- package/dist/dist-LXPDQOBI-4V5J2WDS.js +0 -13
- package/dist/dist-LXPDQOBI-4V5J2WDS.js.map +0 -1
- package/dist/dist-es-4WSJUIYR-XKIX65IH.js +0 -69
- package/dist/dist-es-4WSJUIYR-XKIX65IH.js.map +0 -1
- package/dist/dist-es-7K7MKRME-CCMAZOQC.js +0 -355
- package/dist/dist-es-7K7MKRME-CCMAZOQC.js.map +0 -1
- package/dist/dist-es-B2RTOKRI-VWZHK5RE.js +0 -191
- package/dist/dist-es-B2RTOKRI-VWZHK5RE.js.map +0 -1
- package/dist/dist-es-HHZ4FAXA-CRERHWKB.js +0 -164
- package/dist/dist-es-HHZ4FAXA-CRERHWKB.js.map +0 -1
- package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js +0 -355
- package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js.map +0 -1
- package/dist/dist-es-L6R4FPI5-IKIRYN45.js +0 -472
- package/dist/dist-es-L6R4FPI5-IKIRYN45.js.map +0 -1
- package/dist/dist-es-SRVEB5QV-Q4CTC2HX.js +0 -24
- package/dist/dist-es-TRIVUKV4-2J47CDXR.js +0 -85
- package/dist/dist-es-TRIVUKV4-2J47CDXR.js.map +0 -1
- package/dist/dist-es-UEEUAV34-IZQDTAMW.js +0 -16
- package/dist/esm-UYZ3HJBU.js.map +0 -1
- package/dist/event-streams-NZADSH5J-6MOSNEV3.js +0 -247
- package/dist/event-streams-NZADSH5J-6MOSNEV3.js.map +0 -1
- package/dist/loadSso-IQZ5NB6C-DZJTORO3.js +0 -738
- package/dist/loadSso-IQZ5NB6C-DZJTORO3.js.map +0 -1
- package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js +0 -387
- package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js.map +0 -1
- package/dist/otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js.map +0 -1
- package/dist/signin-2ANR4DVS-K5VGBEJF.js +0 -556
- package/dist/signin-2ANR4DVS-K5VGBEJF.js.map +0 -1
- package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js +0 -10
- package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js.map +0 -1
- package/dist/src-SLOMUG7K-CV5JG263.js +0 -1408
- package/dist/src-SLOMUG7K-CV5JG263.js.map +0 -1
- package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js +0 -708
- package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js.map +0 -1
- package/dist/sts-X7JGSP4H-PDAAYDDH.js +0 -2917
- package/dist/sts-X7JGSP4H-PDAAYDDH.js.map +0 -1
- package/dist/undici-VAR2VUJI-6PAOUXZC.js +0 -23388
- package/dist/undici-VAR2VUJI-6PAOUXZC.js.map +0 -1
- /package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js.map → agentv-provider-NFFLXG5M-TJAWCWCX.js.map} +0 -0
- /package/dist/{chunk-BAUNAXHT.js.map → chunk-XOSNETAV.js.map} +0 -0
- /package/dist/{dist-VUPMLHIV.js.map → dist-SMKOBBFB.js.map} +0 -0
- /package/dist/{dist-es-SRVEB5QV-Q4CTC2HX.js.map → esm-CZAWIY6F.js.map} +0 -0
- /package/dist/{dist-es-UEEUAV34-IZQDTAMW.js.map → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js.map} +0 -0
- /package/dist/{src-PXDA7QIS.js.map → src-ML4D2MC2.js.map} +0 -0
|
@@ -1,35 +1,36 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
3
|
HtmlWriter,
|
|
4
|
-
|
|
5
|
-
buildBenchmarkArtifact,
|
|
6
|
-
buildGradingArtifact,
|
|
7
|
-
buildTimingArtifact,
|
|
4
|
+
RESULT_INDEX_FILENAME,
|
|
8
5
|
detectFileType,
|
|
9
6
|
findRepoRoot,
|
|
7
|
+
loadLightweightResults,
|
|
8
|
+
loadManifestResults,
|
|
10
9
|
loadRunCache,
|
|
11
10
|
package_default,
|
|
12
|
-
parseJsonlResults,
|
|
13
11
|
resolveEvalPaths,
|
|
12
|
+
resolveExistingRunPrimaryPath,
|
|
13
|
+
resolveResultSourcePath,
|
|
14
14
|
resolveRunCacheFile,
|
|
15
|
+
resolveWorkspaceOrFilePath,
|
|
15
16
|
runEvalCommand,
|
|
16
17
|
selectTarget,
|
|
17
18
|
toSnakeCaseDeep,
|
|
18
19
|
validateConfigFile,
|
|
19
20
|
validateEvalFile,
|
|
20
21
|
validateFileReferences,
|
|
21
|
-
validateTargetsFile
|
|
22
|
-
|
|
22
|
+
validateTargetsFile,
|
|
23
|
+
writeArtifactsFromResults
|
|
24
|
+
} from "./chunk-6H4IAXQH.js";
|
|
23
25
|
import {
|
|
24
26
|
createBuiltinRegistry,
|
|
25
|
-
createProvider,
|
|
26
27
|
executeScript,
|
|
27
|
-
generateRubrics,
|
|
28
28
|
getAgentvHome,
|
|
29
29
|
getOutputFilenames,
|
|
30
30
|
getWorkspacePoolRoot,
|
|
31
31
|
isAgentSkillsFormat,
|
|
32
32
|
loadTestById,
|
|
33
|
+
loadTestSuite,
|
|
33
34
|
loadTests,
|
|
34
35
|
normalizeLineEndings,
|
|
35
36
|
parseAgentSkillsEvals,
|
|
@@ -37,7 +38,7 @@ import {
|
|
|
37
38
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
38
39
|
transpileEvalYamlFile,
|
|
39
40
|
trimBaselineResult
|
|
40
|
-
} from "./chunk-
|
|
41
|
+
} from "./chunk-7OHZAFND.js";
|
|
41
42
|
import {
|
|
42
43
|
__commonJS,
|
|
43
44
|
__esm,
|
|
@@ -2888,7 +2889,6 @@ function oneOf(literals) {
|
|
|
2888
2889
|
}
|
|
2889
2890
|
|
|
2890
2891
|
// src/commands/compare/index.ts
|
|
2891
|
-
import { readFileSync } from "node:fs";
|
|
2892
2892
|
var colors = {
|
|
2893
2893
|
reset: "\x1B[0m",
|
|
2894
2894
|
bold: "\x1B[1m",
|
|
@@ -2902,41 +2902,22 @@ var colors = {
|
|
|
2902
2902
|
var noColor = process.env.NO_COLOR !== void 0 || !process.stdout.isTTY;
|
|
2903
2903
|
var c = noColor ? Object.fromEntries(Object.keys(colors).map((k) => [k, ""])) : colors;
|
|
2904
2904
|
function loadJsonlResults(filePath) {
|
|
2905
|
-
|
|
2906
|
-
|
|
2907
|
-
|
|
2908
|
-
|
|
2909
|
-
const testId = record.test_id ?? record.eval_id;
|
|
2910
|
-
if (typeof testId !== "string") {
|
|
2911
|
-
throw new Error(`Missing test_id in result: ${line}`);
|
|
2912
|
-
}
|
|
2913
|
-
if (typeof record.score !== "number") {
|
|
2914
|
-
throw new Error(`Missing or invalid score in result: ${line}`);
|
|
2915
|
-
}
|
|
2916
|
-
return { testId, score: record.score };
|
|
2917
|
-
});
|
|
2905
|
+
return loadLightweightResults(resolveResultSourcePath(filePath)).map((record) => ({
|
|
2906
|
+
testId: record.testId,
|
|
2907
|
+
score: record.score
|
|
2908
|
+
}));
|
|
2918
2909
|
}
|
|
2919
2910
|
function loadCombinedResults(filePath) {
|
|
2920
|
-
const content = readFileSync(filePath, "utf8");
|
|
2921
|
-
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
2922
2911
|
const groups = /* @__PURE__ */ new Map();
|
|
2923
|
-
for (const
|
|
2924
|
-
const record = JSON.parse(line);
|
|
2925
|
-
const testId = record.test_id ?? record.eval_id;
|
|
2926
|
-
if (typeof testId !== "string") {
|
|
2927
|
-
throw new Error(`Missing test_id in result: ${line}`);
|
|
2928
|
-
}
|
|
2929
|
-
if (typeof record.score !== "number") {
|
|
2930
|
-
throw new Error(`Missing or invalid score in result: ${line}`);
|
|
2931
|
-
}
|
|
2912
|
+
for (const record of loadLightweightResults(resolveResultSourcePath(filePath))) {
|
|
2932
2913
|
if (typeof record.target !== "string") {
|
|
2933
|
-
throw new Error(`Missing target field in combined result: ${
|
|
2914
|
+
throw new Error(`Missing target field in combined result source: ${filePath}`);
|
|
2934
2915
|
}
|
|
2935
2916
|
const target = record.target;
|
|
2936
2917
|
if (!groups.has(target)) {
|
|
2937
2918
|
groups.set(target, []);
|
|
2938
2919
|
}
|
|
2939
|
-
groups.get(target)?.push({ testId, score: record.score });
|
|
2920
|
+
groups.get(target)?.push({ testId: record.testId, score: record.score });
|
|
2940
2921
|
}
|
|
2941
2922
|
return groups;
|
|
2942
2923
|
}
|
|
@@ -3303,11 +3284,11 @@ var compareCommand = command({
|
|
|
3303
3284
|
});
|
|
3304
3285
|
|
|
3305
3286
|
// src/commands/convert/index.ts
|
|
3306
|
-
import { readFileSync
|
|
3287
|
+
import { readFileSync, writeFileSync } from "node:fs";
|
|
3307
3288
|
import path from "node:path";
|
|
3308
3289
|
import { stringify as stringifyYaml } from "yaml";
|
|
3309
3290
|
async function convertJsonlToHtml(inputPath, outputPath) {
|
|
3310
|
-
const content =
|
|
3291
|
+
const content = readFileSync(inputPath, "utf8");
|
|
3311
3292
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
3312
3293
|
const writer = await HtmlWriter.open(outputPath);
|
|
3313
3294
|
for (const line of lines) {
|
|
@@ -3317,7 +3298,7 @@ async function convertJsonlToHtml(inputPath, outputPath) {
|
|
|
3317
3298
|
return lines.length;
|
|
3318
3299
|
}
|
|
3319
3300
|
function convertJsonlToYaml(inputPath, outputPath) {
|
|
3320
|
-
const content =
|
|
3301
|
+
const content = readFileSync(inputPath, "utf8");
|
|
3321
3302
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
3322
3303
|
let yamlOutput = "";
|
|
3323
3304
|
let isFirst = true;
|
|
@@ -3336,7 +3317,7 @@ function convertJsonlToYaml(inputPath, outputPath) {
|
|
|
3336
3317
|
return lines.length;
|
|
3337
3318
|
}
|
|
3338
3319
|
function convertEvalsJsonToYaml(inputPath) {
|
|
3339
|
-
const content =
|
|
3320
|
+
const content = readFileSync(inputPath, "utf8");
|
|
3340
3321
|
const parsed = JSON.parse(content);
|
|
3341
3322
|
if (!isAgentSkillsFormat(parsed)) {
|
|
3342
3323
|
throw new Error(`Not a valid Agent Skills evals.json: missing 'evals' array`);
|
|
@@ -3924,7 +3905,7 @@ var evalPromptCommand = subcommands({
|
|
|
3924
3905
|
});
|
|
3925
3906
|
|
|
3926
3907
|
// src/commands/eval/commands/assert.ts
|
|
3927
|
-
import { readFileSync as
|
|
3908
|
+
import { readFileSync as readFileSync2 } from "node:fs";
|
|
3928
3909
|
import path3 from "node:path";
|
|
3929
3910
|
import fg from "fast-glob";
|
|
3930
3911
|
var evalAssertCommand = command({
|
|
@@ -3956,7 +3937,7 @@ var evalAssertCommand = command({
|
|
|
3956
3937
|
let resolvedOutput;
|
|
3957
3938
|
let resolvedInput;
|
|
3958
3939
|
if (file) {
|
|
3959
|
-
const content = JSON.parse(
|
|
3940
|
+
const content = JSON.parse(readFileSync2(path3.resolve(file), "utf8"));
|
|
3960
3941
|
resolvedOutput = content.output ?? "";
|
|
3961
3942
|
resolvedInput = content.input ?? "";
|
|
3962
3943
|
} else {
|
|
@@ -4144,11 +4125,6 @@ var evalRunCommand = command({
|
|
|
4144
4125
|
long: "otel-file",
|
|
4145
4126
|
description: "Write OTLP JSON trace to file (importable by OTel backends)"
|
|
4146
4127
|
}),
|
|
4147
|
-
traceFile: option({
|
|
4148
|
-
type: optional(string),
|
|
4149
|
-
long: "trace-file",
|
|
4150
|
-
description: "Write human-readable trace JSONL to file"
|
|
4151
|
-
}),
|
|
4152
4128
|
exportOtel: flag({
|
|
4153
4129
|
long: "export-otel",
|
|
4154
4130
|
description: "Export evaluation traces via OTLP/HTTP to configured endpoint"
|
|
@@ -4183,7 +4159,7 @@ var evalRunCommand = command({
|
|
|
4183
4159
|
artifacts: option({
|
|
4184
4160
|
type: optional(string),
|
|
4185
4161
|
long: "artifacts",
|
|
4186
|
-
description: "Write companion artifacts (grading
|
|
4162
|
+
description: "Write companion artifacts (index.jsonl, <test>/grading.json, <test>/timing.json, timing.json, benchmark.json) to the specified directory"
|
|
4187
4163
|
}),
|
|
4188
4164
|
graderTarget: option({
|
|
4189
4165
|
type: optional(string),
|
|
@@ -4203,7 +4179,7 @@ var evalRunCommand = command({
|
|
|
4203
4179
|
},
|
|
4204
4180
|
handler: async (args) => {
|
|
4205
4181
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4206
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4182
|
+
const { launchInteractiveWizard } = await import("./interactive-RV664PCR.js");
|
|
4207
4183
|
await launchInteractiveWizard();
|
|
4208
4184
|
return;
|
|
4209
4185
|
}
|
|
@@ -4229,7 +4205,6 @@ var evalRunCommand = command({
|
|
|
4229
4205
|
workspacePath: args.workspacePath,
|
|
4230
4206
|
trace: false,
|
|
4231
4207
|
otelFile: args.otelFile,
|
|
4232
|
-
traceFile: args.traceFile,
|
|
4233
4208
|
exportOtel: args.exportOtel,
|
|
4234
4209
|
otelBackend: args.otelBackend,
|
|
4235
4210
|
otelCaptureContent: args.otelCaptureContent,
|
|
@@ -4257,212 +4232,31 @@ var evalCommand = subcommands({
|
|
|
4257
4232
|
}
|
|
4258
4233
|
});
|
|
4259
4234
|
|
|
4260
|
-
// src/commands/generate/rubrics.ts
|
|
4261
|
-
import { readFile, writeFile as writeFile2 } from "node:fs/promises";
|
|
4262
|
-
import path4 from "node:path";
|
|
4263
|
-
import { pathToFileURL } from "node:url";
|
|
4264
|
-
import { isMap, isSeq, parseDocument } from "yaml";
|
|
4265
|
-
function isJsonObject(value) {
|
|
4266
|
-
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
4267
|
-
}
|
|
4268
|
-
function asString(value) {
|
|
4269
|
-
return typeof value === "string" ? value : void 0;
|
|
4270
|
-
}
|
|
4271
|
-
async function loadRubricGenerator() {
|
|
4272
|
-
const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
|
|
4273
|
-
if (customGenerator) {
|
|
4274
|
-
const generatorPath = path4.resolve(customGenerator);
|
|
4275
|
-
const generatorUrl = pathToFileURL(generatorPath).href;
|
|
4276
|
-
const module = await import(generatorUrl);
|
|
4277
|
-
return module.generateRubrics;
|
|
4278
|
-
}
|
|
4279
|
-
return generateRubrics;
|
|
4280
|
-
}
|
|
4281
|
-
async function generateRubricsCommand(options) {
|
|
4282
|
-
const { file, target: targetOverride, verbose } = options;
|
|
4283
|
-
console.log(`Generating rubrics for: ${file}`);
|
|
4284
|
-
const absolutePath = path4.resolve(file);
|
|
4285
|
-
const content = await readFile(absolutePath, "utf8");
|
|
4286
|
-
const doc = parseDocument(content);
|
|
4287
|
-
const parsed = doc.toJSON();
|
|
4288
|
-
if (!isJsonObject(parsed)) {
|
|
4289
|
-
throw new Error(`Invalid YAML file format: ${file}`);
|
|
4290
|
-
}
|
|
4291
|
-
const suite = parsed;
|
|
4292
|
-
const evalcases = suite.tests;
|
|
4293
|
-
if (!Array.isArray(evalcases)) {
|
|
4294
|
-
throw new Error(`No tests found in ${file}`);
|
|
4295
|
-
}
|
|
4296
|
-
const targetSelection = await selectTarget({
|
|
4297
|
-
testFilePath: absolutePath,
|
|
4298
|
-
repoRoot: process.cwd(),
|
|
4299
|
-
cwd: process.cwd(),
|
|
4300
|
-
cliTargetName: targetOverride,
|
|
4301
|
-
dryRun: false,
|
|
4302
|
-
dryRunDelay: 0,
|
|
4303
|
-
dryRunDelayMin: 0,
|
|
4304
|
-
dryRunDelayMax: 0,
|
|
4305
|
-
env: process.env
|
|
4306
|
-
});
|
|
4307
|
-
if (verbose) {
|
|
4308
|
-
console.log(`Using target: ${targetSelection.targetName}`);
|
|
4309
|
-
}
|
|
4310
|
-
const provider = createProvider(targetSelection.resolvedTarget);
|
|
4311
|
-
const generateRubricsFunc = await loadRubricGenerator();
|
|
4312
|
-
let updatedCount = 0;
|
|
4313
|
-
let skippedCount = 0;
|
|
4314
|
-
const evalcasesNode = doc.getIn(["tests"]);
|
|
4315
|
-
if (!evalcasesNode || !isSeq(evalcasesNode)) {
|
|
4316
|
-
throw new Error("tests must be a sequence");
|
|
4317
|
-
}
|
|
4318
|
-
for (let i = 0; i < evalcases.length; i++) {
|
|
4319
|
-
const rawCase = evalcases[i];
|
|
4320
|
-
if (!isJsonObject(rawCase)) {
|
|
4321
|
-
continue;
|
|
4322
|
-
}
|
|
4323
|
-
const evalCase = rawCase;
|
|
4324
|
-
const id = asString(evalCase.id) ?? "unknown";
|
|
4325
|
-
const expectedOutcome = asString(evalCase.criteria) ?? asString(evalCase.outcome);
|
|
4326
|
-
if (!expectedOutcome) {
|
|
4327
|
-
if (verbose) {
|
|
4328
|
-
console.log(` Skipping ${id}: no criteria`);
|
|
4329
|
-
}
|
|
4330
|
-
skippedCount++;
|
|
4331
|
-
continue;
|
|
4332
|
-
}
|
|
4333
|
-
if (evalCase.rubrics !== void 0) {
|
|
4334
|
-
if (verbose) {
|
|
4335
|
-
console.log(` Skipping ${id}: rubrics already defined`);
|
|
4336
|
-
}
|
|
4337
|
-
skippedCount++;
|
|
4338
|
-
continue;
|
|
4339
|
-
}
|
|
4340
|
-
console.log(` Generating rubrics for: ${id}`);
|
|
4341
|
-
const question = extractQuestion(evalCase);
|
|
4342
|
-
const referenceAnswer = asString(evalCase.reference_answer);
|
|
4343
|
-
const rubrics = await generateRubricsFunc({
|
|
4344
|
-
criteria: expectedOutcome,
|
|
4345
|
-
question,
|
|
4346
|
-
referenceAnswer,
|
|
4347
|
-
provider
|
|
4348
|
-
});
|
|
4349
|
-
const caseNode = evalcasesNode.items[i];
|
|
4350
|
-
if (caseNode && isMap(caseNode)) {
|
|
4351
|
-
caseNode.set(
|
|
4352
|
-
"rubrics",
|
|
4353
|
-
rubrics.filter((r) => r.outcome !== void 0).map((r) => ({
|
|
4354
|
-
id: r.id,
|
|
4355
|
-
outcome: r.outcome,
|
|
4356
|
-
weight: r.weight,
|
|
4357
|
-
required: r.required ?? true
|
|
4358
|
-
}))
|
|
4359
|
-
);
|
|
4360
|
-
}
|
|
4361
|
-
updatedCount++;
|
|
4362
|
-
if (verbose) {
|
|
4363
|
-
console.log(` Generated ${rubrics.length} rubric(s)`);
|
|
4364
|
-
}
|
|
4365
|
-
}
|
|
4366
|
-
if (updatedCount > 0) {
|
|
4367
|
-
const output = doc.toString();
|
|
4368
|
-
await writeFile2(absolutePath, output, "utf8");
|
|
4369
|
-
console.log(`
|
|
4370
|
-
Updated ${updatedCount} test(s) with generated rubrics`);
|
|
4371
|
-
if (skippedCount > 0) {
|
|
4372
|
-
console.log(`Skipped ${skippedCount} test(s)`);
|
|
4373
|
-
}
|
|
4374
|
-
} else {
|
|
4375
|
-
console.log("\nNo tests updated (all already have rubrics or missing criteria)");
|
|
4376
|
-
}
|
|
4377
|
-
}
|
|
4378
|
-
function extractQuestion(evalCase) {
|
|
4379
|
-
const explicitQuestion = asString(evalCase.question);
|
|
4380
|
-
if (explicitQuestion) {
|
|
4381
|
-
return explicitQuestion;
|
|
4382
|
-
}
|
|
4383
|
-
const inputMessages = evalCase.input;
|
|
4384
|
-
if (!Array.isArray(inputMessages)) {
|
|
4385
|
-
return void 0;
|
|
4386
|
-
}
|
|
4387
|
-
for (const msg of inputMessages) {
|
|
4388
|
-
if (!isJsonObject(msg)) {
|
|
4389
|
-
continue;
|
|
4390
|
-
}
|
|
4391
|
-
if (msg.role === "user" && typeof msg.content === "string") {
|
|
4392
|
-
return msg.content;
|
|
4393
|
-
}
|
|
4394
|
-
}
|
|
4395
|
-
return void 0;
|
|
4396
|
-
}
|
|
4397
|
-
|
|
4398
|
-
// src/commands/generate/index.ts
|
|
4399
|
-
var rubricsCommand = command({
|
|
4400
|
-
name: "rubrics",
|
|
4401
|
-
description: "Generate rubrics from criteria in YAML eval file",
|
|
4402
|
-
args: {
|
|
4403
|
-
file: positional({
|
|
4404
|
-
type: string,
|
|
4405
|
-
displayName: "file",
|
|
4406
|
-
description: "Path to YAML eval file"
|
|
4407
|
-
}),
|
|
4408
|
-
target: option({
|
|
4409
|
-
type: optional(string),
|
|
4410
|
-
long: "target",
|
|
4411
|
-
short: "t",
|
|
4412
|
-
description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
|
|
4413
|
-
}),
|
|
4414
|
-
verbose: flag({
|
|
4415
|
-
long: "verbose",
|
|
4416
|
-
short: "v",
|
|
4417
|
-
description: "Show detailed progress"
|
|
4418
|
-
})
|
|
4419
|
-
},
|
|
4420
|
-
handler: async ({ file, target, verbose }) => {
|
|
4421
|
-
try {
|
|
4422
|
-
await generateRubricsCommand({
|
|
4423
|
-
file,
|
|
4424
|
-
target,
|
|
4425
|
-
verbose
|
|
4426
|
-
});
|
|
4427
|
-
} catch (error) {
|
|
4428
|
-
console.error(`Error: ${error.message}`);
|
|
4429
|
-
process.exit(1);
|
|
4430
|
-
}
|
|
4431
|
-
}
|
|
4432
|
-
});
|
|
4433
|
-
var generateCommand = subcommands({
|
|
4434
|
-
name: "generate",
|
|
4435
|
-
description: "Generate evaluation artifacts",
|
|
4436
|
-
cmds: {
|
|
4437
|
-
rubrics: rubricsCommand
|
|
4438
|
-
}
|
|
4439
|
-
});
|
|
4440
|
-
|
|
4441
4235
|
// src/commands/init/index.ts
|
|
4442
4236
|
import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
|
|
4443
|
-
import
|
|
4237
|
+
import path5 from "node:path";
|
|
4444
4238
|
import * as readline from "node:readline/promises";
|
|
4445
4239
|
|
|
4446
4240
|
// src/templates/index.ts
|
|
4447
|
-
import { readFileSync as
|
|
4448
|
-
import
|
|
4241
|
+
import { readFileSync as readFileSync3, readdirSync, statSync } from "node:fs";
|
|
4242
|
+
import path4 from "node:path";
|
|
4449
4243
|
import { fileURLToPath } from "node:url";
|
|
4450
4244
|
function getAgentvTemplates() {
|
|
4451
4245
|
return getTemplatesFromDir(".agentv");
|
|
4452
4246
|
}
|
|
4453
4247
|
function getEnvExampleTemplate() {
|
|
4454
|
-
const currentDir =
|
|
4455
|
-
const templatesBase = currentDir.includes(`${
|
|
4456
|
-
const content =
|
|
4248
|
+
const currentDir = path4.dirname(fileURLToPath(import.meta.url));
|
|
4249
|
+
const templatesBase = currentDir.includes(`${path4.sep}dist`) ? path4.join(currentDir, "templates") : currentDir;
|
|
4250
|
+
const content = readFileSync3(path4.join(templatesBase, ".env.example"), "utf-8");
|
|
4457
4251
|
return { path: ".env.example", content };
|
|
4458
4252
|
}
|
|
4459
4253
|
function getTemplatesFromDir(subdir) {
|
|
4460
|
-
const currentDir =
|
|
4254
|
+
const currentDir = path4.dirname(fileURLToPath(import.meta.url));
|
|
4461
4255
|
let templatesDir;
|
|
4462
|
-
if (currentDir.includes(`${
|
|
4463
|
-
templatesDir =
|
|
4256
|
+
if (currentDir.includes(`${path4.sep}dist`)) {
|
|
4257
|
+
templatesDir = path4.join(currentDir, "templates", subdir);
|
|
4464
4258
|
} else {
|
|
4465
|
-
templatesDir =
|
|
4259
|
+
templatesDir = path4.join(currentDir, subdir);
|
|
4466
4260
|
}
|
|
4467
4261
|
return readTemplatesRecursively(templatesDir, "");
|
|
4468
4262
|
}
|
|
@@ -4470,15 +4264,15 @@ function readTemplatesRecursively(dir, relativePath) {
|
|
|
4470
4264
|
const templates = [];
|
|
4471
4265
|
const entries2 = readdirSync(dir);
|
|
4472
4266
|
for (const entry of entries2) {
|
|
4473
|
-
const fullPath =
|
|
4267
|
+
const fullPath = path4.join(dir, entry);
|
|
4474
4268
|
const stat3 = statSync(fullPath);
|
|
4475
|
-
const entryRelativePath = relativePath ?
|
|
4269
|
+
const entryRelativePath = relativePath ? path4.join(relativePath, entry) : entry;
|
|
4476
4270
|
if (stat3.isDirectory()) {
|
|
4477
4271
|
templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
|
|
4478
4272
|
} else {
|
|
4479
|
-
const content =
|
|
4273
|
+
const content = readFileSync3(fullPath, "utf-8");
|
|
4480
4274
|
templates.push({
|
|
4481
|
-
path: entryRelativePath.split(
|
|
4275
|
+
path: entryRelativePath.split(path4.sep).join("/"),
|
|
4482
4276
|
// Normalize to forward slashes
|
|
4483
4277
|
content
|
|
4484
4278
|
});
|
|
@@ -4507,22 +4301,22 @@ async function promptYesNo(message) {
|
|
|
4507
4301
|
}
|
|
4508
4302
|
}
|
|
4509
4303
|
async function initCommand(options = {}) {
|
|
4510
|
-
const targetPath =
|
|
4511
|
-
const agentvDir =
|
|
4304
|
+
const targetPath = path5.resolve(options.targetPath ?? ".");
|
|
4305
|
+
const agentvDir = path5.join(targetPath, ".agentv");
|
|
4512
4306
|
const otherAgentvTemplates = getAgentvTemplates();
|
|
4513
4307
|
const envTemplate = getEnvExampleTemplate();
|
|
4514
4308
|
const existingFiles = [];
|
|
4515
4309
|
if (envTemplate) {
|
|
4516
|
-
const envFilePath =
|
|
4310
|
+
const envFilePath = path5.join(targetPath, ".env.example");
|
|
4517
4311
|
if (existsSync(envFilePath)) {
|
|
4518
4312
|
existingFiles.push(".env.example");
|
|
4519
4313
|
}
|
|
4520
4314
|
}
|
|
4521
4315
|
if (existsSync(agentvDir)) {
|
|
4522
4316
|
for (const template of otherAgentvTemplates) {
|
|
4523
|
-
const targetFilePath =
|
|
4317
|
+
const targetFilePath = path5.join(agentvDir, template.path);
|
|
4524
4318
|
if (existsSync(targetFilePath)) {
|
|
4525
|
-
existingFiles.push(
|
|
4319
|
+
existingFiles.push(path5.relative(targetPath, targetFilePath));
|
|
4526
4320
|
}
|
|
4527
4321
|
}
|
|
4528
4322
|
}
|
|
@@ -4544,18 +4338,18 @@ async function initCommand(options = {}) {
|
|
|
4544
4338
|
mkdirSync(agentvDir, { recursive: true });
|
|
4545
4339
|
}
|
|
4546
4340
|
if (envTemplate) {
|
|
4547
|
-
const envFilePath =
|
|
4341
|
+
const envFilePath = path5.join(targetPath, ".env.example");
|
|
4548
4342
|
writeFileSync2(envFilePath, envTemplate.content, "utf-8");
|
|
4549
4343
|
console.log("Created .env.example");
|
|
4550
4344
|
}
|
|
4551
4345
|
for (const template of otherAgentvTemplates) {
|
|
4552
|
-
const targetFilePath =
|
|
4553
|
-
const targetDirPath =
|
|
4346
|
+
const targetFilePath = path5.join(agentvDir, template.path);
|
|
4347
|
+
const targetDirPath = path5.dirname(targetFilePath);
|
|
4554
4348
|
if (!existsSync(targetDirPath)) {
|
|
4555
4349
|
mkdirSync(targetDirPath, { recursive: true });
|
|
4556
4350
|
}
|
|
4557
4351
|
writeFileSync2(targetFilePath, template.content, "utf-8");
|
|
4558
|
-
console.log(`Created ${
|
|
4352
|
+
console.log(`Created ${path5.relative(targetPath, targetFilePath)}`);
|
|
4559
4353
|
}
|
|
4560
4354
|
console.log("\nAgentV initialized successfully!");
|
|
4561
4355
|
console.log("\nFiles installed to root:");
|
|
@@ -4563,7 +4357,7 @@ async function initCommand(options = {}) {
|
|
|
4563
4357
|
console.log(" - .env.example");
|
|
4564
4358
|
}
|
|
4565
4359
|
console.log(`
|
|
4566
|
-
Files installed to ${
|
|
4360
|
+
Files installed to ${path5.relative(targetPath, agentvDir)}:`);
|
|
4567
4361
|
for (const t of otherAgentvTemplates) {
|
|
4568
4362
|
console.log(` - ${t.path}`);
|
|
4569
4363
|
}
|
|
@@ -4593,13 +4387,443 @@ var initCmdTsCommand = command({
|
|
|
4593
4387
|
}
|
|
4594
4388
|
});
|
|
4595
4389
|
|
|
4390
|
+
// src/commands/pipeline/bench.ts
|
|
4391
|
+
import { readFile, readdir, writeFile as writeFile2 } from "node:fs/promises";
|
|
4392
|
+
import { join } from "node:path";
|
|
4393
|
+
var evalBenchCommand = command({
|
|
4394
|
+
name: "bench",
|
|
4395
|
+
description: "Merge evaluator scores and produce benchmark artifacts",
|
|
4396
|
+
args: {
|
|
4397
|
+
exportDir: positional({
|
|
4398
|
+
type: string,
|
|
4399
|
+
displayName: "export-dir",
|
|
4400
|
+
description: "Export directory from pipeline input/grade"
|
|
4401
|
+
})
|
|
4402
|
+
},
|
|
4403
|
+
handler: async ({ exportDir }) => {
|
|
4404
|
+
const manifest = JSON.parse(await readFile(join(exportDir, "manifest.json"), "utf8"));
|
|
4405
|
+
const testIds = manifest.test_ids;
|
|
4406
|
+
const targetName = manifest.target?.name ?? "unknown";
|
|
4407
|
+
const stdinData = await readStdin();
|
|
4408
|
+
const llmScores = stdinData ? JSON.parse(stdinData) : {};
|
|
4409
|
+
const indexLines = [];
|
|
4410
|
+
const allPassRates = [];
|
|
4411
|
+
for (const testId of testIds) {
|
|
4412
|
+
const testDir = join(exportDir, testId);
|
|
4413
|
+
const evaluators = [];
|
|
4414
|
+
const allAssertions = [];
|
|
4415
|
+
const codeResultsDir = join(testDir, "code_grader_results");
|
|
4416
|
+
try {
|
|
4417
|
+
const resultFiles = (await readdir(codeResultsDir)).filter((f) => f.endsWith(".json"));
|
|
4418
|
+
for (const file of resultFiles) {
|
|
4419
|
+
const result = JSON.parse(await readFile(join(codeResultsDir, file), "utf8"));
|
|
4420
|
+
evaluators.push({
|
|
4421
|
+
name: result.name,
|
|
4422
|
+
type: "code-grader",
|
|
4423
|
+
score: result.score,
|
|
4424
|
+
weight: result.weight ?? 1,
|
|
4425
|
+
assertions: result.assertions ?? []
|
|
4426
|
+
});
|
|
4427
|
+
for (const a of result.assertions ?? []) {
|
|
4428
|
+
allAssertions.push({ text: a.text, passed: a.passed, evidence: a.evidence ?? "" });
|
|
4429
|
+
}
|
|
4430
|
+
}
|
|
4431
|
+
} catch {
|
|
4432
|
+
}
|
|
4433
|
+
const testLlmScores = llmScores[testId] ?? {};
|
|
4434
|
+
const llmGradersDir = join(testDir, "llm_graders");
|
|
4435
|
+
try {
|
|
4436
|
+
const graderFiles = (await readdir(llmGradersDir)).filter((f) => f.endsWith(".json"));
|
|
4437
|
+
for (const file of graderFiles) {
|
|
4438
|
+
const graderMeta = JSON.parse(await readFile(join(llmGradersDir, file), "utf8"));
|
|
4439
|
+
const graderName = graderMeta.name;
|
|
4440
|
+
const llmResult = testLlmScores[graderName];
|
|
4441
|
+
if (llmResult) {
|
|
4442
|
+
evaluators.push({
|
|
4443
|
+
name: graderName,
|
|
4444
|
+
type: "llm-grader",
|
|
4445
|
+
score: llmResult.score,
|
|
4446
|
+
weight: graderMeta.weight ?? 1,
|
|
4447
|
+
assertions: llmResult.assertions ?? []
|
|
4448
|
+
});
|
|
4449
|
+
for (const a of llmResult.assertions ?? []) {
|
|
4450
|
+
allAssertions.push({ text: a.text, passed: a.passed, evidence: a.evidence ?? "" });
|
|
4451
|
+
}
|
|
4452
|
+
}
|
|
4453
|
+
}
|
|
4454
|
+
} catch {
|
|
4455
|
+
}
|
|
4456
|
+
const totalWeight = evaluators.reduce((sum, e) => sum + e.weight, 0);
|
|
4457
|
+
const weightedScore = totalWeight > 0 ? evaluators.reduce((sum, e) => sum + e.score * e.weight, 0) / totalWeight : 0;
|
|
4458
|
+
const passed = allAssertions.filter((a) => a.passed).length;
|
|
4459
|
+
const failed = allAssertions.filter((a) => !a.passed).length;
|
|
4460
|
+
const passRate = allAssertions.length > 0 ? Math.round(passed / allAssertions.length * 1e3) / 1e3 : 0;
|
|
4461
|
+
allPassRates.push(passRate);
|
|
4462
|
+
const grading = {
|
|
4463
|
+
assertions: allAssertions,
|
|
4464
|
+
summary: { passed, failed, total: allAssertions.length, pass_rate: passRate },
|
|
4465
|
+
execution_metrics: { tool_calls: {}, total_tool_calls: 0, errors_encountered: 0 },
|
|
4466
|
+
evaluators: evaluators.map((e) => ({
|
|
4467
|
+
name: e.name,
|
|
4468
|
+
type: e.type,
|
|
4469
|
+
score: e.score,
|
|
4470
|
+
reasoning: "",
|
|
4471
|
+
weight: e.weight
|
|
4472
|
+
}))
|
|
4473
|
+
};
|
|
4474
|
+
await writeFile2(
|
|
4475
|
+
join(testDir, "grading.json"),
|
|
4476
|
+
`${JSON.stringify(grading, null, 2)}
|
|
4477
|
+
`,
|
|
4478
|
+
"utf8"
|
|
4479
|
+
);
|
|
4480
|
+
indexLines.push(
|
|
4481
|
+
JSON.stringify({
|
|
4482
|
+
timestamp: manifest.timestamp,
|
|
4483
|
+
test_id: testId,
|
|
4484
|
+
score: Math.round(weightedScore * 1e3) / 1e3,
|
|
4485
|
+
target: targetName,
|
|
4486
|
+
grading_path: `${testId}/grading.json`,
|
|
4487
|
+
timing_path: `${testId}/timing.json`
|
|
4488
|
+
})
|
|
4489
|
+
);
|
|
4490
|
+
}
|
|
4491
|
+
await writeFile2(
|
|
4492
|
+
join(exportDir, "index.jsonl"),
|
|
4493
|
+
indexLines.length > 0 ? `${indexLines.join("\n")}
|
|
4494
|
+
` : "",
|
|
4495
|
+
"utf8"
|
|
4496
|
+
);
|
|
4497
|
+
const passRateStats = computeStats(allPassRates);
|
|
4498
|
+
const benchmark = {
|
|
4499
|
+
metadata: {
|
|
4500
|
+
eval_file: manifest.eval_file,
|
|
4501
|
+
timestamp: manifest.timestamp,
|
|
4502
|
+
targets: [targetName],
|
|
4503
|
+
tests_run: testIds
|
|
4504
|
+
},
|
|
4505
|
+
run_summary: {
|
|
4506
|
+
[targetName]: {
|
|
4507
|
+
pass_rate: passRateStats,
|
|
4508
|
+
time_seconds: { mean: 0, stddev: 0 },
|
|
4509
|
+
tokens: { mean: 0, stddev: 0 }
|
|
4510
|
+
}
|
|
4511
|
+
},
|
|
4512
|
+
notes: []
|
|
4513
|
+
};
|
|
4514
|
+
await writeFile2(
|
|
4515
|
+
join(exportDir, "benchmark.json"),
|
|
4516
|
+
`${JSON.stringify(benchmark, null, 2)}
|
|
4517
|
+
`,
|
|
4518
|
+
"utf8"
|
|
4519
|
+
);
|
|
4520
|
+
console.log(`Benchmark: ${testIds.length} test(s), pass_rate=${passRateStats.mean}`);
|
|
4521
|
+
}
|
|
4522
|
+
});
|
|
4523
|
+
async function readStdin() {
|
|
4524
|
+
const chunks = [];
|
|
4525
|
+
for await (const chunk of process.stdin) {
|
|
4526
|
+
chunks.push(chunk);
|
|
4527
|
+
}
|
|
4528
|
+
return Buffer.concat(chunks).toString("utf8").trim();
|
|
4529
|
+
}
|
|
4530
|
+
function computeStats(values) {
|
|
4531
|
+
if (values.length === 0) return { mean: 0, stddev: 0 };
|
|
4532
|
+
const mean2 = values.reduce((sum, v) => sum + v, 0) / values.length;
|
|
4533
|
+
const variance = values.reduce((sum, v) => sum + (v - mean2) ** 2, 0) / values.length;
|
|
4534
|
+
return {
|
|
4535
|
+
mean: Math.round(mean2 * 1e3) / 1e3,
|
|
4536
|
+
stddev: Math.round(Math.sqrt(variance) * 1e3) / 1e3
|
|
4537
|
+
};
|
|
4538
|
+
}
|
|
4539
|
+
|
|
4540
|
+
// src/commands/pipeline/grade.ts
|
|
4541
|
+
import { mkdir as mkdir2, readFile as readFile2, readdir as readdir2, writeFile as writeFile3 } from "node:fs/promises";
|
|
4542
|
+
import { join as join2 } from "node:path";
|
|
4543
|
+
var evalGradeCommand = command({
|
|
4544
|
+
name: "grade",
|
|
4545
|
+
description: "Run code-grader assertions on responses in an export directory",
|
|
4546
|
+
args: {
|
|
4547
|
+
exportDir: positional({
|
|
4548
|
+
type: string,
|
|
4549
|
+
displayName: "export-dir",
|
|
4550
|
+
description: "Export directory from pipeline input"
|
|
4551
|
+
})
|
|
4552
|
+
},
|
|
4553
|
+
handler: async ({ exportDir }) => {
|
|
4554
|
+
const manifestPath = join2(exportDir, "manifest.json");
|
|
4555
|
+
const manifest = JSON.parse(await readFile2(manifestPath, "utf8"));
|
|
4556
|
+
const testIds = manifest.test_ids;
|
|
4557
|
+
let totalGraders = 0;
|
|
4558
|
+
let totalPassed = 0;
|
|
4559
|
+
for (const testId of testIds) {
|
|
4560
|
+
const testDir = join2(exportDir, testId);
|
|
4561
|
+
const codeGradersDir = join2(testDir, "code_graders");
|
|
4562
|
+
const resultsDir = join2(testDir, "code_grader_results");
|
|
4563
|
+
let graderFiles;
|
|
4564
|
+
try {
|
|
4565
|
+
graderFiles = (await readdir2(codeGradersDir)).filter((f) => f.endsWith(".json"));
|
|
4566
|
+
} catch {
|
|
4567
|
+
continue;
|
|
4568
|
+
}
|
|
4569
|
+
if (graderFiles.length === 0) continue;
|
|
4570
|
+
await mkdir2(resultsDir, { recursive: true });
|
|
4571
|
+
const responseText = await readFile2(join2(testDir, "response.md"), "utf8");
|
|
4572
|
+
const inputData = JSON.parse(await readFile2(join2(testDir, "input.json"), "utf8"));
|
|
4573
|
+
for (const graderFile of graderFiles) {
|
|
4574
|
+
const graderConfig = JSON.parse(await readFile2(join2(codeGradersDir, graderFile), "utf8"));
|
|
4575
|
+
const graderName = graderConfig.name;
|
|
4576
|
+
const payload = JSON.stringify({
|
|
4577
|
+
output: [{ role: "assistant", content: responseText }],
|
|
4578
|
+
input: inputData.input_messages,
|
|
4579
|
+
question: inputData.input_text,
|
|
4580
|
+
criteria: "",
|
|
4581
|
+
expected_output: [],
|
|
4582
|
+
reference_answer: "",
|
|
4583
|
+
input_files: [],
|
|
4584
|
+
trace: null,
|
|
4585
|
+
token_usage: null,
|
|
4586
|
+
cost_usd: null,
|
|
4587
|
+
duration_ms: null,
|
|
4588
|
+
start_time: null,
|
|
4589
|
+
end_time: null,
|
|
4590
|
+
file_changes: null,
|
|
4591
|
+
workspace_path: null,
|
|
4592
|
+
config: graderConfig.config ?? null,
|
|
4593
|
+
metadata: {},
|
|
4594
|
+
input_text: inputData.input_text,
|
|
4595
|
+
output_text: responseText,
|
|
4596
|
+
expected_output_text: ""
|
|
4597
|
+
});
|
|
4598
|
+
try {
|
|
4599
|
+
const stdout = await executeScript(
|
|
4600
|
+
graderConfig.command,
|
|
4601
|
+
payload,
|
|
4602
|
+
void 0,
|
|
4603
|
+
graderConfig.cwd
|
|
4604
|
+
);
|
|
4605
|
+
const parsed = JSON.parse(stdout);
|
|
4606
|
+
const score = typeof parsed.score === "number" ? parsed.score : 0;
|
|
4607
|
+
const assertions = Array.isArray(parsed.assertions) ? parsed.assertions : [];
|
|
4608
|
+
const result = {
|
|
4609
|
+
name: graderName,
|
|
4610
|
+
type: "code-grader",
|
|
4611
|
+
score,
|
|
4612
|
+
weight: graderConfig.weight ?? 1,
|
|
4613
|
+
assertions,
|
|
4614
|
+
details: parsed.details ?? {}
|
|
4615
|
+
};
|
|
4616
|
+
await writeFile3(
|
|
4617
|
+
join2(resultsDir, `${graderName}.json`),
|
|
4618
|
+
`${JSON.stringify(result, null, 2)}
|
|
4619
|
+
`,
|
|
4620
|
+
"utf8"
|
|
4621
|
+
);
|
|
4622
|
+
totalGraders++;
|
|
4623
|
+
if (score >= 0.5) totalPassed++;
|
|
4624
|
+
} catch (error) {
|
|
4625
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4626
|
+
console.error(` ${testId}/${graderName}: ERROR \u2014 ${message}`);
|
|
4627
|
+
const errorResult = {
|
|
4628
|
+
name: graderName,
|
|
4629
|
+
type: "code-grader",
|
|
4630
|
+
score: 0,
|
|
4631
|
+
weight: graderConfig.weight ?? 1,
|
|
4632
|
+
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
4633
|
+
details: { error: message }
|
|
4634
|
+
};
|
|
4635
|
+
await writeFile3(
|
|
4636
|
+
join2(resultsDir, `${graderName}.json`),
|
|
4637
|
+
`${JSON.stringify(errorResult, null, 2)}
|
|
4638
|
+
`,
|
|
4639
|
+
"utf8"
|
|
4640
|
+
);
|
|
4641
|
+
totalGraders++;
|
|
4642
|
+
}
|
|
4643
|
+
}
|
|
4644
|
+
}
|
|
4645
|
+
console.log(`Graded ${totalGraders} code-grader(s): ${totalPassed} passed`);
|
|
4646
|
+
}
|
|
4647
|
+
});
|
|
4648
|
+
|
|
4649
|
+
// src/commands/pipeline/input.ts
|
|
4650
|
+
import { readFile as readFile3 } from "node:fs/promises";
|
|
4651
|
+
import { mkdir as mkdir3, writeFile as writeFile4 } from "node:fs/promises";
|
|
4652
|
+
import { dirname, join as join3, resolve } from "node:path";
|
|
4653
|
+
var evalInputCommand = command({
|
|
4654
|
+
name: "input",
|
|
4655
|
+
description: "Extract eval inputs, target commands, and grader prompts for agent-mode runs",
|
|
4656
|
+
args: {
|
|
4657
|
+
evalPath: positional({
|
|
4658
|
+
type: string,
|
|
4659
|
+
displayName: "eval-path",
|
|
4660
|
+
description: "Path to eval YAML file"
|
|
4661
|
+
}),
|
|
4662
|
+
out: option({
|
|
4663
|
+
type: string,
|
|
4664
|
+
long: "out",
|
|
4665
|
+
description: "Output directory for extracted inputs"
|
|
4666
|
+
})
|
|
4667
|
+
},
|
|
4668
|
+
handler: async ({ evalPath, out }) => {
|
|
4669
|
+
const resolvedEvalPath = resolve(evalPath);
|
|
4670
|
+
const outDir = resolve(out);
|
|
4671
|
+
const repoRoot = await findRepoRoot(dirname(resolvedEvalPath));
|
|
4672
|
+
const evalDir = dirname(resolvedEvalPath);
|
|
4673
|
+
const suite = await loadTestSuite(resolvedEvalPath, repoRoot);
|
|
4674
|
+
const tests = suite.tests;
|
|
4675
|
+
if (tests.length === 0) {
|
|
4676
|
+
console.error("No tests found in eval file.");
|
|
4677
|
+
process.exit(1);
|
|
4678
|
+
}
|
|
4679
|
+
let targetInfo = null;
|
|
4680
|
+
let targetName = "agent";
|
|
4681
|
+
let targetKind = "agent";
|
|
4682
|
+
try {
|
|
4683
|
+
const selection = await selectTarget({
|
|
4684
|
+
testFilePath: resolvedEvalPath,
|
|
4685
|
+
repoRoot,
|
|
4686
|
+
cwd: evalDir,
|
|
4687
|
+
dryRun: false,
|
|
4688
|
+
dryRunDelay: 0,
|
|
4689
|
+
dryRunDelayMin: 0,
|
|
4690
|
+
dryRunDelayMax: 0,
|
|
4691
|
+
env: process.env
|
|
4692
|
+
});
|
|
4693
|
+
targetName = selection.targetName;
|
|
4694
|
+
if (selection.resolvedTarget.kind === "cli") {
|
|
4695
|
+
targetKind = "cli";
|
|
4696
|
+
const config = selection.resolvedTarget.config;
|
|
4697
|
+
targetInfo = {
|
|
4698
|
+
kind: "cli",
|
|
4699
|
+
command: config.command,
|
|
4700
|
+
cwd: config.cwd ?? evalDir,
|
|
4701
|
+
timeoutMs: config.timeoutMs ?? 3e4
|
|
4702
|
+
};
|
|
4703
|
+
}
|
|
4704
|
+
} catch {
|
|
4705
|
+
}
|
|
4706
|
+
const testIds = [];
|
|
4707
|
+
for (const test of tests) {
|
|
4708
|
+
const testDir = join3(outDir, test.id);
|
|
4709
|
+
await mkdir3(testDir, { recursive: true });
|
|
4710
|
+
testIds.push(test.id);
|
|
4711
|
+
const inputText = test.question;
|
|
4712
|
+
const inputMessages = test.input.map((m) => ({
|
|
4713
|
+
role: m.role,
|
|
4714
|
+
content: typeof m.content === "string" ? m.content : m.content
|
|
4715
|
+
}));
|
|
4716
|
+
await writeJson(join3(testDir, "input.json"), {
|
|
4717
|
+
input_text: inputText,
|
|
4718
|
+
input_messages: inputMessages,
|
|
4719
|
+
file_paths: test.file_paths,
|
|
4720
|
+
metadata: test.metadata ?? {}
|
|
4721
|
+
});
|
|
4722
|
+
if (targetInfo) {
|
|
4723
|
+
await writeJson(join3(testDir, "invoke.json"), {
|
|
4724
|
+
kind: "cli",
|
|
4725
|
+
command: targetInfo.command,
|
|
4726
|
+
cwd: targetInfo.cwd,
|
|
4727
|
+
timeout_ms: targetInfo.timeoutMs,
|
|
4728
|
+
env: {}
|
|
4729
|
+
});
|
|
4730
|
+
} else {
|
|
4731
|
+
await writeJson(join3(testDir, "invoke.json"), {
|
|
4732
|
+
kind: "agent",
|
|
4733
|
+
instructions: "Execute this task in the current workspace. The agent IS the target."
|
|
4734
|
+
});
|
|
4735
|
+
}
|
|
4736
|
+
await writeFile4(join3(testDir, "criteria.md"), test.criteria ?? "", "utf8");
|
|
4737
|
+
if (test.expected_output.length > 0 || test.reference_answer !== void 0 && test.reference_answer !== "") {
|
|
4738
|
+
await writeJson(join3(testDir, "expected_output.json"), {
|
|
4739
|
+
expected_output: test.expected_output,
|
|
4740
|
+
reference_answer: test.reference_answer ?? ""
|
|
4741
|
+
});
|
|
4742
|
+
}
|
|
4743
|
+
await writeGraderConfigs(testDir, test.assertions ?? [], evalDir);
|
|
4744
|
+
}
|
|
4745
|
+
await writeJson(join3(outDir, "manifest.json"), {
|
|
4746
|
+
eval_file: resolvedEvalPath,
|
|
4747
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
4748
|
+
target: {
|
|
4749
|
+
name: targetName,
|
|
4750
|
+
kind: targetKind
|
|
4751
|
+
},
|
|
4752
|
+
test_ids: testIds
|
|
4753
|
+
});
|
|
4754
|
+
console.log(`Extracted ${testIds.length} test(s) to ${outDir}`);
|
|
4755
|
+
}
|
|
4756
|
+
});
|
|
4757
|
+
async function writeGraderConfigs(testDir, assertions, evalDir) {
|
|
4758
|
+
const codeGradersDir = join3(testDir, "code_graders");
|
|
4759
|
+
const llmGradersDir = join3(testDir, "llm_graders");
|
|
4760
|
+
let hasCodeGraders = false;
|
|
4761
|
+
let hasLlmGraders = false;
|
|
4762
|
+
for (const assertion of assertions) {
|
|
4763
|
+
if (assertion.type === "code-grader" || assertion.type === "code-judge") {
|
|
4764
|
+
if (!hasCodeGraders) {
|
|
4765
|
+
await mkdir3(codeGradersDir, { recursive: true });
|
|
4766
|
+
hasCodeGraders = true;
|
|
4767
|
+
}
|
|
4768
|
+
const config = assertion;
|
|
4769
|
+
await writeJson(join3(codeGradersDir, `${config.name}.json`), {
|
|
4770
|
+
name: config.name,
|
|
4771
|
+
command: config.command,
|
|
4772
|
+
cwd: config.resolvedCwd ?? config.cwd ?? evalDir,
|
|
4773
|
+
weight: config.weight ?? 1,
|
|
4774
|
+
config: config.config ?? {}
|
|
4775
|
+
});
|
|
4776
|
+
} else if (assertion.type === "llm-grader" || assertion.type === "llm-judge") {
|
|
4777
|
+
if (!hasLlmGraders) {
|
|
4778
|
+
await mkdir3(llmGradersDir, { recursive: true });
|
|
4779
|
+
hasLlmGraders = true;
|
|
4780
|
+
}
|
|
4781
|
+
const config = assertion;
|
|
4782
|
+
let promptContent = "";
|
|
4783
|
+
if (config.resolvedPromptPath) {
|
|
4784
|
+
try {
|
|
4785
|
+
promptContent = await readFile3(config.resolvedPromptPath, "utf8");
|
|
4786
|
+
} catch {
|
|
4787
|
+
promptContent = typeof config.prompt === "string" ? config.prompt : "";
|
|
4788
|
+
}
|
|
4789
|
+
} else if (typeof config.prompt === "string") {
|
|
4790
|
+
promptContent = config.prompt;
|
|
4791
|
+
}
|
|
4792
|
+
await writeJson(join3(llmGradersDir, `${config.name}.json`), {
|
|
4793
|
+
name: config.name,
|
|
4794
|
+
prompt_content: promptContent,
|
|
4795
|
+
weight: config.weight ?? 1,
|
|
4796
|
+
threshold: 0.5,
|
|
4797
|
+
config: {}
|
|
4798
|
+
});
|
|
4799
|
+
}
|
|
4800
|
+
}
|
|
4801
|
+
}
|
|
4802
|
+
async function writeJson(filePath, data) {
|
|
4803
|
+
await writeFile4(filePath, `${JSON.stringify(data, null, 2)}
|
|
4804
|
+
`, "utf8");
|
|
4805
|
+
}
|
|
4806
|
+
|
|
4807
|
+
// src/commands/pipeline/index.ts
|
|
4808
|
+
var pipelineCommand = subcommands({
|
|
4809
|
+
name: "pipeline",
|
|
4810
|
+
description: "Agent-mode eval pipeline commands (input \u2192 grade \u2192 bench)",
|
|
4811
|
+
cmds: {
|
|
4812
|
+
input: evalInputCommand,
|
|
4813
|
+
grade: evalGradeCommand,
|
|
4814
|
+
bench: evalBenchCommand
|
|
4815
|
+
}
|
|
4816
|
+
});
|
|
4817
|
+
|
|
4596
4818
|
// src/commands/results/export.ts
|
|
4597
|
-
import
|
|
4598
|
-
|
|
4819
|
+
import path7 from "node:path";
|
|
4820
|
+
|
|
4821
|
+
// src/commands/results/shared.ts
|
|
4822
|
+
import { existsSync as existsSync2 } from "node:fs";
|
|
4599
4823
|
|
|
4600
4824
|
// src/commands/trace/utils.ts
|
|
4601
|
-
import { readFileSync as
|
|
4602
|
-
import
|
|
4825
|
+
import { readFileSync as readFileSync4, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
|
|
4826
|
+
import path6 from "node:path";
|
|
4603
4827
|
var colors2 = {
|
|
4604
4828
|
reset: "\x1B[0m",
|
|
4605
4829
|
bold: "\x1B[1m",
|
|
@@ -4625,7 +4849,20 @@ function padLeft2(str, len) {
|
|
|
4625
4849
|
return " ".repeat(Math.max(0, len - plainLen)) + str;
|
|
4626
4850
|
}
|
|
4627
4851
|
function loadResultFile(filePath) {
|
|
4628
|
-
const
|
|
4852
|
+
const resolvedFilePath = resolveTraceResultPath(filePath);
|
|
4853
|
+
if (path6.extname(resolvedFilePath) === ".json") {
|
|
4854
|
+
return loadOtlpTraceFile(resolvedFilePath);
|
|
4855
|
+
}
|
|
4856
|
+
if (path6.basename(resolvedFilePath) === RESULT_INDEX_FILENAME) {
|
|
4857
|
+
return loadManifestAsRawResults(resolvedFilePath);
|
|
4858
|
+
}
|
|
4859
|
+
return loadJsonlRecords(resolvedFilePath);
|
|
4860
|
+
}
|
|
4861
|
+
function resolveTraceResultPath(filePath) {
|
|
4862
|
+
return resolveWorkspaceOrFilePath(filePath);
|
|
4863
|
+
}
|
|
4864
|
+
function loadJsonlRecords(filePath) {
|
|
4865
|
+
const content = readFileSync4(filePath, "utf8");
|
|
4629
4866
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
4630
4867
|
return lines.map((line, i) => {
|
|
4631
4868
|
const record = JSON.parse(line);
|
|
@@ -4635,25 +4872,280 @@ function loadResultFile(filePath) {
|
|
|
4635
4872
|
return record;
|
|
4636
4873
|
});
|
|
4637
4874
|
}
|
|
4875
|
+
function loadManifestAsRawResults(filePath) {
|
|
4876
|
+
return loadManifestResults(filePath).map(toRawResult);
|
|
4877
|
+
}
|
|
4878
|
+
function toRawResult(result) {
|
|
4879
|
+
return {
|
|
4880
|
+
timestamp: result.timestamp,
|
|
4881
|
+
test_id: result.testId,
|
|
4882
|
+
eval_set: result.eval_set,
|
|
4883
|
+
conversation_id: result.conversationId,
|
|
4884
|
+
score: result.score,
|
|
4885
|
+
assertions: result.assertions?.map((assertion) => ({
|
|
4886
|
+
text: assertion.text,
|
|
4887
|
+
passed: assertion.passed,
|
|
4888
|
+
evidence: assertion.evidence
|
|
4889
|
+
})),
|
|
4890
|
+
target: result.target,
|
|
4891
|
+
error: result.error,
|
|
4892
|
+
scores: result.scores?.map((score) => ({
|
|
4893
|
+
name: score.name,
|
|
4894
|
+
type: score.type,
|
|
4895
|
+
score: score.score,
|
|
4896
|
+
assertions: score.assertions?.map((assertion) => ({
|
|
4897
|
+
text: assertion.text,
|
|
4898
|
+
passed: assertion.passed,
|
|
4899
|
+
evidence: assertion.evidence
|
|
4900
|
+
})),
|
|
4901
|
+
weight: score.weight
|
|
4902
|
+
})),
|
|
4903
|
+
token_usage: result.tokenUsage ? {
|
|
4904
|
+
input: result.tokenUsage.input,
|
|
4905
|
+
output: result.tokenUsage.output,
|
|
4906
|
+
cached: result.tokenUsage.cached
|
|
4907
|
+
} : void 0,
|
|
4908
|
+
cost_usd: result.costUsd,
|
|
4909
|
+
duration_ms: result.durationMs,
|
|
4910
|
+
start_time: result.startTime,
|
|
4911
|
+
end_time: result.endTime,
|
|
4912
|
+
input: result.input,
|
|
4913
|
+
output: result.output,
|
|
4914
|
+
file_changes: result.fileChanges
|
|
4915
|
+
};
|
|
4916
|
+
}
|
|
4917
|
+
function loadOtlpTraceFile(filePath) {
|
|
4918
|
+
const parsed = JSON.parse(readFileSync4(filePath, "utf8"));
|
|
4919
|
+
const spans = parsed.resourceSpans?.flatMap((resource) => resource.scopeSpans ?? []).flatMap((scope) => scope.spans ?? []);
|
|
4920
|
+
if (!spans || spans.length === 0) {
|
|
4921
|
+
return [];
|
|
4922
|
+
}
|
|
4923
|
+
const spanMap = /* @__PURE__ */ new Map();
|
|
4924
|
+
const childMap = /* @__PURE__ */ new Map();
|
|
4925
|
+
for (const span of spans) {
|
|
4926
|
+
if (!span.spanId) continue;
|
|
4927
|
+
spanMap.set(span.spanId, span);
|
|
4928
|
+
if (span.parentSpanId) {
|
|
4929
|
+
const siblings = childMap.get(span.parentSpanId) ?? [];
|
|
4930
|
+
siblings.push(span);
|
|
4931
|
+
childMap.set(span.parentSpanId, siblings);
|
|
4932
|
+
}
|
|
4933
|
+
}
|
|
4934
|
+
const roots = spans.filter((span) => !span.parentSpanId || !spanMap.has(span.parentSpanId));
|
|
4935
|
+
const supportedRoots = roots.filter(isAgentvEvalRoot);
|
|
4936
|
+
const candidateRoots = supportedRoots.length > 0 ? supportedRoots : roots;
|
|
4937
|
+
return candidateRoots.map((root, index) => {
|
|
4938
|
+
const descendants = collectChildSpans(root.spanId, childMap);
|
|
4939
|
+
const rootAttrs = parseOtlpAttributes(root.attributes);
|
|
4940
|
+
const parsedDescendants = descendants.map((span) => ({
|
|
4941
|
+
...span,
|
|
4942
|
+
parsedAttributes: parseOtlpAttributes(span.attributes)
|
|
4943
|
+
}));
|
|
4944
|
+
const toolSpans = parsedDescendants.filter(
|
|
4945
|
+
(span) => typeof span.parsedAttributes.gen_ai_tool_name === "string"
|
|
4946
|
+
);
|
|
4947
|
+
const llmSpans = parsedDescendants.filter(
|
|
4948
|
+
(span) => span.parsedAttributes.gen_ai_operation_name === "chat" || typeof span.name === "string" && span.name.startsWith("chat ")
|
|
4949
|
+
);
|
|
4950
|
+
const tokenUsage = descendants.reduce(
|
|
4951
|
+
(acc, span) => {
|
|
4952
|
+
const attrs = parseOtlpAttributes(span.attributes);
|
|
4953
|
+
acc.input += numberAttr(attrs.gen_ai_usage_input_tokens) ?? 0;
|
|
4954
|
+
acc.output += numberAttr(attrs.gen_ai_usage_output_tokens) ?? 0;
|
|
4955
|
+
const cached = numberAttr(attrs.gen_ai_usage_cache_read_input_tokens);
|
|
4956
|
+
if (cached !== void 0 && cached > 0) {
|
|
4957
|
+
acc.cached = (acc.cached ?? 0) + cached;
|
|
4958
|
+
}
|
|
4959
|
+
return acc;
|
|
4960
|
+
},
|
|
4961
|
+
{ input: 0, output: 0, cached: void 0 }
|
|
4962
|
+
);
|
|
4963
|
+
const traceSummary = buildDerivedTraceSummary({
|
|
4964
|
+
trace: {
|
|
4965
|
+
event_count: numberAttr(rootAttrs.agentv_trace_event_count) ?? (toolSpans.length > 0 ? toolSpans.length : void 0),
|
|
4966
|
+
tool_calls: countRawSpanNames(
|
|
4967
|
+
toolSpans.map((span) => ({
|
|
4968
|
+
type: "tool",
|
|
4969
|
+
name: String(span.parsedAttributes.gen_ai_tool_name)
|
|
4970
|
+
}))
|
|
4971
|
+
),
|
|
4972
|
+
error_count: descendants.filter((span) => span.status?.code === 2).length || void 0,
|
|
4973
|
+
llm_call_count: numberAttr(rootAttrs.agentv_trace_llm_call_count) ?? (llmSpans.length > 0 ? llmSpans.length : void 0)
|
|
4974
|
+
},
|
|
4975
|
+
spans: [
|
|
4976
|
+
...llmSpans.map((span) => ({
|
|
4977
|
+
type: "llm",
|
|
4978
|
+
name: span.name ?? "chat",
|
|
4979
|
+
duration_ms: durationFromSpan(span)
|
|
4980
|
+
})),
|
|
4981
|
+
...toolSpans.map((span) => ({
|
|
4982
|
+
type: "tool",
|
|
4983
|
+
name: String(span.parsedAttributes.gen_ai_tool_name),
|
|
4984
|
+
duration_ms: durationFromSpan(span)
|
|
4985
|
+
}))
|
|
4986
|
+
],
|
|
4987
|
+
duration_ms: numberAttr(rootAttrs.agentv_trace_duration_ms) ?? durationFromSpan(root),
|
|
4988
|
+
cost_usd: numberAttr(rootAttrs.agentv_trace_cost_usd),
|
|
4989
|
+
token_usage: tokenUsage.input || tokenUsage.output || tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_input) || numberAttr(rootAttrs.agentv_trace_token_output) || numberAttr(rootAttrs.agentv_trace_token_cached) ? {
|
|
4990
|
+
input: tokenUsage.input || numberAttr(rootAttrs.agentv_trace_token_input) || 0,
|
|
4991
|
+
output: tokenUsage.output || numberAttr(rootAttrs.agentv_trace_token_output) || 0,
|
|
4992
|
+
...tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_cached) ? {
|
|
4993
|
+
cached: tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_cached) || 0
|
|
4994
|
+
} : {}
|
|
4995
|
+
} : void 0
|
|
4996
|
+
});
|
|
4997
|
+
const score = numberAttr(rootAttrs.agentv_score);
|
|
4998
|
+
if (score === void 0) {
|
|
4999
|
+
throw new Error(
|
|
5000
|
+
`Unsupported OTLP trace root span at index ${index + 1}: missing agentv.score attribute`
|
|
5001
|
+
);
|
|
5002
|
+
}
|
|
5003
|
+
return {
|
|
5004
|
+
test_id: stringAttr(rootAttrs.agentv_test_id) ?? stringAttr(rootAttrs.agentv_eval_id) ?? `trace-${index + 1}`,
|
|
5005
|
+
eval_set: stringAttr(rootAttrs.agentv_eval_set),
|
|
5006
|
+
target: stringAttr(rootAttrs.agentv_target),
|
|
5007
|
+
score,
|
|
5008
|
+
error: root.status?.code === 2 ? root.status.message : void 0,
|
|
5009
|
+
cost_usd: traceSummary?.cost_usd,
|
|
5010
|
+
duration_ms: traceSummary?.duration_ms,
|
|
5011
|
+
token_usage: traceSummary?.token_usage,
|
|
5012
|
+
trace: traceSummary ? {
|
|
5013
|
+
event_count: traceSummary.event_count,
|
|
5014
|
+
tool_calls: traceSummary.tool_calls,
|
|
5015
|
+
error_count: traceSummary.error_count,
|
|
5016
|
+
tool_durations: traceSummary.tool_durations,
|
|
5017
|
+
llm_call_count: traceSummary.llm_call_count,
|
|
5018
|
+
token_usage: traceSummary.token_usage,
|
|
5019
|
+
cost_usd: traceSummary.cost_usd,
|
|
5020
|
+
duration_ms: traceSummary.duration_ms
|
|
5021
|
+
} : void 0,
|
|
5022
|
+
spans: traceSummary?.spans,
|
|
5023
|
+
output: stringAttr(rootAttrs.agentv_output_text),
|
|
5024
|
+
scores: root.events?.filter((event) => event.name?.startsWith("agentv.evaluator.")).map((event) => {
|
|
5025
|
+
const attrs = parseOtlpAttributes(event.attributes);
|
|
5026
|
+
const name = event.name?.replace(/^agentv\.evaluator\./, "") ?? "unknown";
|
|
5027
|
+
return {
|
|
5028
|
+
name,
|
|
5029
|
+
type: stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
|
|
5030
|
+
score: numberAttr(attrs.agentv_evaluator_score) ?? 0
|
|
5031
|
+
};
|
|
5032
|
+
})
|
|
5033
|
+
};
|
|
5034
|
+
});
|
|
5035
|
+
}
|
|
5036
|
+
function isAgentvEvalRoot(span) {
|
|
5037
|
+
const attrs = parseOtlpAttributes(span.attributes);
|
|
5038
|
+
return span.name === "agentv.eval" || numberAttr(attrs.agentv_score) !== void 0 || typeof stringAttr(attrs.agentv_test_id) === "string";
|
|
5039
|
+
}
|
|
5040
|
+
function collectChildSpans(spanId, childMap) {
|
|
5041
|
+
if (!spanId) return [];
|
|
5042
|
+
const direct = childMap.get(spanId) ?? [];
|
|
5043
|
+
const all = [...direct];
|
|
5044
|
+
for (const child of direct) {
|
|
5045
|
+
all.push(...collectChildSpans(child.spanId, childMap));
|
|
5046
|
+
}
|
|
5047
|
+
return all;
|
|
5048
|
+
}
|
|
5049
|
+
function parseOtlpAttributes(attributes) {
|
|
5050
|
+
const parsed = {};
|
|
5051
|
+
for (const attribute of attributes ?? []) {
|
|
5052
|
+
parsed[attribute.key.replace(/\./g, "_")] = parseOtlpValue(attribute.value);
|
|
5053
|
+
}
|
|
5054
|
+
return parsed;
|
|
5055
|
+
}
|
|
5056
|
+
function parseOtlpValue(value) {
|
|
5057
|
+
if (!value) return void 0;
|
|
5058
|
+
if ("stringValue" in value && value.stringValue !== void 0) return value.stringValue;
|
|
5059
|
+
if ("intValue" in value && value.intValue !== void 0) return Number(value.intValue);
|
|
5060
|
+
if ("doubleValue" in value && value.doubleValue !== void 0) return value.doubleValue;
|
|
5061
|
+
if ("boolValue" in value && value.boolValue !== void 0) return value.boolValue;
|
|
5062
|
+
if ("arrayValue" in value)
|
|
5063
|
+
return (value.arrayValue?.values ?? []).map((entry) => parseOtlpValue(entry));
|
|
5064
|
+
return void 0;
|
|
5065
|
+
}
|
|
5066
|
+
function durationFromSpan(span) {
|
|
5067
|
+
const start = Number(span.startTimeUnixNano);
|
|
5068
|
+
const end = Number(span.endTimeUnixNano);
|
|
5069
|
+
if (!Number.isFinite(start) || !Number.isFinite(end)) return void 0;
|
|
5070
|
+
return Math.round((end - start) / 1e6);
|
|
5071
|
+
}
|
|
5072
|
+
function stringAttr(value) {
|
|
5073
|
+
return typeof value === "string" ? value : void 0;
|
|
5074
|
+
}
|
|
5075
|
+
function numberAttr(value) {
|
|
5076
|
+
return typeof value === "number" && Number.isFinite(value) ? value : void 0;
|
|
5077
|
+
}
|
|
5078
|
+
function buildDerivedTraceSummary(result) {
|
|
5079
|
+
const toolSpans = (result.spans ?? []).filter((span) => span.type === "tool");
|
|
5080
|
+
const llmSpans = (result.spans ?? []).filter((span) => span.type === "llm");
|
|
5081
|
+
const toolCalls = result.trace?.tool_calls ?? countRawSpanNames(toolSpans);
|
|
5082
|
+
const toolDurations = result.trace?.tool_durations ?? groupRawSpanDurations(toolSpans);
|
|
5083
|
+
const hasSpanData = (result.spans?.length ?? 0) > 0;
|
|
5084
|
+
const eventCount = result.trace?.event_count ?? (hasSpanData ? toolSpans.length : void 0);
|
|
5085
|
+
const llmCallCount = result.trace?.llm_call_count ?? (hasSpanData ? llmSpans.length : void 0);
|
|
5086
|
+
if (!result.trace && !result.spans?.length && result.token_usage === void 0 && result.cost_usd === void 0 && result.duration_ms === void 0) {
|
|
5087
|
+
return void 0;
|
|
5088
|
+
}
|
|
5089
|
+
return {
|
|
5090
|
+
event_count: eventCount,
|
|
5091
|
+
tool_calls: toolCalls,
|
|
5092
|
+
error_count: result.trace?.error_count,
|
|
5093
|
+
tool_durations: toolDurations,
|
|
5094
|
+
llm_call_count: llmCallCount,
|
|
5095
|
+
token_usage: result.trace?.token_usage ?? result.token_usage,
|
|
5096
|
+
cost_usd: result.trace?.cost_usd ?? result.cost_usd,
|
|
5097
|
+
duration_ms: result.trace?.duration_ms ?? result.duration_ms,
|
|
5098
|
+
spans: result.spans
|
|
5099
|
+
};
|
|
5100
|
+
}
|
|
5101
|
+
function countRawSpanNames(spans) {
|
|
5102
|
+
const counts = {};
|
|
5103
|
+
for (const span of spans) {
|
|
5104
|
+
counts[span.name] = (counts[span.name] ?? 0) + 1;
|
|
5105
|
+
}
|
|
5106
|
+
return Object.keys(counts).length > 0 ? counts : void 0;
|
|
5107
|
+
}
|
|
5108
|
+
function groupRawSpanDurations(spans) {
|
|
5109
|
+
const grouped = {};
|
|
5110
|
+
for (const span of spans) {
|
|
5111
|
+
if (span.duration_ms === void 0) continue;
|
|
5112
|
+
const existing = grouped[span.name] ?? [];
|
|
5113
|
+
existing.push(span.duration_ms);
|
|
5114
|
+
grouped[span.name] = existing;
|
|
5115
|
+
}
|
|
5116
|
+
return Object.keys(grouped).length > 0 ? grouped : void 0;
|
|
5117
|
+
}
|
|
5118
|
+
function getTraceSummary(result) {
|
|
5119
|
+
const derived = buildDerivedTraceSummary(result);
|
|
5120
|
+
if (!derived) return void 0;
|
|
5121
|
+
const { spans: _spans, ...trace } = derived;
|
|
5122
|
+
return trace;
|
|
5123
|
+
}
|
|
5124
|
+
function getTraceSpans(result) {
|
|
5125
|
+
return buildDerivedTraceSummary(result)?.spans ?? [];
|
|
5126
|
+
}
|
|
5127
|
+
function toTraceSummary(result) {
|
|
5128
|
+
const rawTrace = getTraceSummary(result);
|
|
5129
|
+
if (!rawTrace) return void 0;
|
|
5130
|
+
return toCamelCaseDeep(rawTrace);
|
|
5131
|
+
}
|
|
4638
5132
|
function listResultFiles(cwd, limit) {
|
|
4639
|
-
const baseDir =
|
|
4640
|
-
const rawDir =
|
|
5133
|
+
const baseDir = path6.join(cwd, ".agentv", "results");
|
|
5134
|
+
const rawDir = path6.join(baseDir, "raw");
|
|
4641
5135
|
const files = [];
|
|
4642
5136
|
try {
|
|
4643
5137
|
const entries2 = readdirSync2(rawDir, { withFileTypes: true });
|
|
4644
5138
|
for (const entry of entries2) {
|
|
4645
5139
|
if (entry.isDirectory()) {
|
|
4646
|
-
const
|
|
4647
|
-
|
|
4648
|
-
|
|
4649
|
-
files.push({ filePath: jsonlPath, displayName: entry.name });
|
|
4650
|
-
} catch {
|
|
5140
|
+
const primaryPath = resolveExistingRunPrimaryPath(path6.join(rawDir, entry.name));
|
|
5141
|
+
if (primaryPath) {
|
|
5142
|
+
files.push({ filePath: primaryPath, displayName: entry.name });
|
|
4651
5143
|
}
|
|
4652
5144
|
}
|
|
4653
5145
|
}
|
|
4654
5146
|
for (const entry of entries2) {
|
|
4655
5147
|
if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
|
|
4656
|
-
files.push({ filePath:
|
|
5148
|
+
files.push({ filePath: path6.join(rawDir, entry.name), displayName: entry.name });
|
|
4657
5149
|
}
|
|
4658
5150
|
}
|
|
4659
5151
|
} catch {
|
|
@@ -4661,7 +5153,7 @@ function listResultFiles(cwd, limit) {
|
|
|
4661
5153
|
try {
|
|
4662
5154
|
const entries2 = readdirSync2(baseDir).filter((f) => f.endsWith(".jsonl"));
|
|
4663
5155
|
for (const entry of entries2) {
|
|
4664
|
-
files.push({ filePath:
|
|
5156
|
+
files.push({ filePath: path6.join(baseDir, entry), displayName: entry });
|
|
4665
5157
|
}
|
|
4666
5158
|
} catch {
|
|
4667
5159
|
}
|
|
@@ -4729,84 +5221,65 @@ function formatScore(score) {
|
|
|
4729
5221
|
return `${(score * 100).toFixed(0)}%`;
|
|
4730
5222
|
}
|
|
4731
5223
|
|
|
4732
|
-
// src/commands/results/
|
|
4733
|
-
|
|
4734
|
-
|
|
5224
|
+
// src/commands/results/shared.ts
|
|
5225
|
+
var sourceArg = positional({
|
|
5226
|
+
type: optional(string),
|
|
5227
|
+
displayName: "source",
|
|
5228
|
+
description: "Result file or workspace directory (defaults to most recent in .agentv/results/)"
|
|
5229
|
+
});
|
|
5230
|
+
async function resolveSourceFile(source, cwd) {
|
|
5231
|
+
let sourceFile;
|
|
5232
|
+
if (source) {
|
|
5233
|
+
sourceFile = resolveResultSourcePath(source, cwd);
|
|
5234
|
+
if (!existsSync2(sourceFile)) {
|
|
5235
|
+
console.error(`Error: File not found: ${sourceFile}`);
|
|
5236
|
+
process.exit(1);
|
|
5237
|
+
}
|
|
5238
|
+
} else {
|
|
5239
|
+
const cache = await loadRunCache(cwd);
|
|
5240
|
+
const cachedFile = cache ? resolveRunCacheFile(cache) : "";
|
|
5241
|
+
if (cachedFile && existsSync2(cachedFile)) {
|
|
5242
|
+
sourceFile = cachedFile;
|
|
5243
|
+
} else {
|
|
5244
|
+
const metas = listResultFiles(cwd, 1);
|
|
5245
|
+
if (metas.length === 0) {
|
|
5246
|
+
console.error("Error: No result files found in .agentv/results/");
|
|
5247
|
+
console.error("Run an evaluation first: agentv eval <eval-file>");
|
|
5248
|
+
process.exit(1);
|
|
5249
|
+
}
|
|
5250
|
+
sourceFile = metas[0].path;
|
|
5251
|
+
}
|
|
5252
|
+
}
|
|
5253
|
+
return { sourceFile };
|
|
5254
|
+
}
|
|
5255
|
+
async function loadResults(source, cwd) {
|
|
5256
|
+
const { sourceFile } = await resolveSourceFile(source, cwd);
|
|
5257
|
+
const results = loadManifestResults(sourceFile);
|
|
4735
5258
|
if (results.length === 0) {
|
|
4736
|
-
|
|
5259
|
+
console.error(`No results found in ${sourceFile}`);
|
|
5260
|
+
process.exit(1);
|
|
4737
5261
|
}
|
|
4738
|
-
|
|
5262
|
+
return { results: patchTestIds(results), sourceFile };
|
|
5263
|
+
}
|
|
5264
|
+
function patchTestIds(results) {
|
|
5265
|
+
return results.map((r) => {
|
|
4739
5266
|
if (!r.testId && r.evalId) {
|
|
4740
5267
|
return { ...r, testId: String(r.evalId) };
|
|
4741
5268
|
}
|
|
4742
5269
|
return r;
|
|
4743
5270
|
});
|
|
4744
|
-
mkdirSync2(outputDir, { recursive: true });
|
|
4745
|
-
const benchmark = buildBenchmarkArtifact(patched, sourceFile);
|
|
4746
|
-
writeFileSync3(path8.join(outputDir, "benchmark.json"), `${JSON.stringify(benchmark, null, 2)}
|
|
4747
|
-
`);
|
|
4748
|
-
const timing = buildTimingArtifact(patched);
|
|
4749
|
-
writeFileSync3(path8.join(outputDir, "timing.json"), `${JSON.stringify(timing, null, 2)}
|
|
4750
|
-
`);
|
|
4751
|
-
const aggregateGrading = buildAggregateGradingArtifact(patched);
|
|
4752
|
-
writeFileSync3(
|
|
4753
|
-
path8.join(outputDir, "grading.json"),
|
|
4754
|
-
`${JSON.stringify(aggregateGrading, null, 2)}
|
|
4755
|
-
`
|
|
4756
|
-
);
|
|
4757
|
-
const gradingDir = path8.join(outputDir, "grading");
|
|
4758
|
-
mkdirSync2(gradingDir, { recursive: true });
|
|
4759
|
-
for (const result of patched) {
|
|
4760
|
-
const id = safeTestId(result);
|
|
4761
|
-
const grading = buildGradingArtifact(result);
|
|
4762
|
-
writeFileSync3(path8.join(gradingDir, `${id}.json`), `${JSON.stringify(grading, null, 2)}
|
|
4763
|
-
`);
|
|
4764
|
-
}
|
|
4765
|
-
const outputsDir = path8.join(outputDir, "outputs");
|
|
4766
|
-
mkdirSync2(outputsDir, { recursive: true });
|
|
4767
|
-
for (const result of patched) {
|
|
4768
|
-
if (result.output && result.output.length > 0) {
|
|
4769
|
-
const id = safeTestId(result);
|
|
4770
|
-
const md = formatOutputMarkdown(result.output);
|
|
4771
|
-
writeFileSync3(path8.join(outputsDir, `${id}.md`), md);
|
|
4772
|
-
}
|
|
4773
|
-
}
|
|
4774
|
-
const inputsDir = path8.join(outputDir, "inputs");
|
|
4775
|
-
mkdirSync2(inputsDir, { recursive: true });
|
|
4776
|
-
for (const result of patched) {
|
|
4777
|
-
const id = safeTestId(result);
|
|
4778
|
-
const input = extractInput(result);
|
|
4779
|
-
if (input) {
|
|
4780
|
-
writeFileSync3(path8.join(inputsDir, `${id}.md`), input);
|
|
4781
|
-
}
|
|
4782
|
-
}
|
|
4783
|
-
}
|
|
4784
|
-
function formatOutputMarkdown(output) {
|
|
4785
|
-
return output.map((msg) => `@[${msg.role}]:
|
|
4786
|
-
${String(msg.content ?? "")}`).join("\n\n");
|
|
4787
|
-
}
|
|
4788
|
-
function extractInput(result) {
|
|
4789
|
-
const input = result.input;
|
|
4790
|
-
if (!input) return null;
|
|
4791
|
-
if (typeof input === "string") return input;
|
|
4792
|
-
if (Array.isArray(input) && input.length > 0) {
|
|
4793
|
-
return formatOutputMarkdown(input);
|
|
4794
|
-
}
|
|
4795
|
-
return null;
|
|
4796
|
-
}
|
|
4797
|
-
function safeTestId(result) {
|
|
4798
|
-
const raw = result.testId ?? result.evalId ?? "unknown";
|
|
4799
|
-
return String(raw).replace(/[/\\:*?"<>|]/g, "_");
|
|
4800
5271
|
}
|
|
5272
|
+
|
|
5273
|
+
// src/commands/results/export.ts
|
|
4801
5274
|
function deriveOutputDir(cwd, sourceFile) {
|
|
4802
|
-
const parentDir =
|
|
5275
|
+
const parentDir = path7.basename(path7.dirname(sourceFile));
|
|
4803
5276
|
if (parentDir.startsWith("eval_")) {
|
|
4804
5277
|
const dirName2 = parentDir.slice(5);
|
|
4805
|
-
return
|
|
5278
|
+
return path7.join(cwd, ".agentv", "results", "export", dirName2);
|
|
4806
5279
|
}
|
|
4807
|
-
const basename =
|
|
5280
|
+
const basename = path7.basename(sourceFile, ".jsonl");
|
|
4808
5281
|
const dirName = basename.startsWith("eval_") ? basename.slice(5) : basename;
|
|
4809
|
-
return
|
|
5282
|
+
return path7.join(cwd, ".agentv", "results", "export", dirName);
|
|
4810
5283
|
}
|
|
4811
5284
|
var resultsExportCommand = command({
|
|
4812
5285
|
name: "export",
|
|
@@ -4833,28 +5306,12 @@ var resultsExportCommand = command({
|
|
|
4833
5306
|
handler: async ({ source, out, dir }) => {
|
|
4834
5307
|
const cwd = dir ?? process.cwd();
|
|
4835
5308
|
try {
|
|
4836
|
-
|
|
4837
|
-
|
|
4838
|
-
|
|
4839
|
-
|
|
4840
|
-
|
|
4841
|
-
|
|
4842
|
-
if (cachedFile && existsSync2(cachedFile)) {
|
|
4843
|
-
sourceFile = cachedFile;
|
|
4844
|
-
} else {
|
|
4845
|
-
const metas = listResultFiles(cwd, 1);
|
|
4846
|
-
if (metas.length === 0) {
|
|
4847
|
-
console.error("Error: No result files found in .agentv/results/");
|
|
4848
|
-
console.error("Run an evaluation first: agentv eval <eval-file>");
|
|
4849
|
-
process.exit(1);
|
|
4850
|
-
}
|
|
4851
|
-
sourceFile = metas[0].path;
|
|
4852
|
-
}
|
|
4853
|
-
}
|
|
4854
|
-
const content = readFileSync6(sourceFile, "utf8");
|
|
4855
|
-
const outputDir = out ? path8.isAbsolute(out) ? out : path8.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
|
|
4856
|
-
exportResults(sourceFile, content, outputDir);
|
|
4857
|
-
const results = parseJsonlResults(content);
|
|
5309
|
+
const { sourceFile } = await resolveSourceFile(source, cwd);
|
|
5310
|
+
const { results } = await loadResults(source, cwd);
|
|
5311
|
+
const outputDir = out ? path7.isAbsolute(out) ? out : path7.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
|
|
5312
|
+
await writeArtifactsFromResults(results, outputDir, {
|
|
5313
|
+
evalFile: sourceFile
|
|
5314
|
+
});
|
|
4858
5315
|
console.log(`Exported ${results.length} test(s) to ${outputDir}`);
|
|
4859
5316
|
for (const result of results) {
|
|
4860
5317
|
const id = result.testId ?? result.evalId ?? "unknown";
|
|
@@ -4867,58 +5324,6 @@ var resultsExportCommand = command({
|
|
|
4867
5324
|
}
|
|
4868
5325
|
});
|
|
4869
5326
|
|
|
4870
|
-
// src/commands/results/shared.ts
|
|
4871
|
-
import { existsSync as existsSync3, readFileSync as readFileSync7 } from "node:fs";
|
|
4872
|
-
import path9 from "node:path";
|
|
4873
|
-
var sourceArg = positional({
|
|
4874
|
-
type: optional(string),
|
|
4875
|
-
displayName: "source",
|
|
4876
|
-
description: "JSONL result file (defaults to most recent in .agentv/results/)"
|
|
4877
|
-
});
|
|
4878
|
-
async function resolveSourceFile(source, cwd) {
|
|
4879
|
-
let sourceFile;
|
|
4880
|
-
if (source) {
|
|
4881
|
-
sourceFile = path9.isAbsolute(source) ? source : path9.resolve(cwd, source);
|
|
4882
|
-
if (!existsSync3(sourceFile)) {
|
|
4883
|
-
console.error(`Error: File not found: ${sourceFile}`);
|
|
4884
|
-
process.exit(1);
|
|
4885
|
-
}
|
|
4886
|
-
} else {
|
|
4887
|
-
const cache = await loadRunCache(cwd);
|
|
4888
|
-
const cachedFile = cache ? resolveRunCacheFile(cache) : "";
|
|
4889
|
-
if (cachedFile && existsSync3(cachedFile)) {
|
|
4890
|
-
sourceFile = cachedFile;
|
|
4891
|
-
} else {
|
|
4892
|
-
const metas = listResultFiles(cwd, 1);
|
|
4893
|
-
if (metas.length === 0) {
|
|
4894
|
-
console.error("Error: No result files found in .agentv/results/");
|
|
4895
|
-
console.error("Run an evaluation first: agentv eval <eval-file>");
|
|
4896
|
-
process.exit(1);
|
|
4897
|
-
}
|
|
4898
|
-
sourceFile = metas[0].path;
|
|
4899
|
-
}
|
|
4900
|
-
}
|
|
4901
|
-
const content = readFileSync7(sourceFile, "utf8");
|
|
4902
|
-
return { sourceFile, content };
|
|
4903
|
-
}
|
|
4904
|
-
async function loadResults(source, cwd) {
|
|
4905
|
-
const { sourceFile, content } = await resolveSourceFile(source, cwd);
|
|
4906
|
-
const results = parseJsonlResults(content);
|
|
4907
|
-
if (results.length === 0) {
|
|
4908
|
-
console.error(`No results found in ${sourceFile}`);
|
|
4909
|
-
process.exit(1);
|
|
4910
|
-
}
|
|
4911
|
-
return { results: patchTestIds(results), sourceFile };
|
|
4912
|
-
}
|
|
4913
|
-
function patchTestIds(results) {
|
|
4914
|
-
return results.map((r) => {
|
|
4915
|
-
if (!r.testId && r.evalId) {
|
|
4916
|
-
return { ...r, testId: String(r.evalId) };
|
|
4917
|
-
}
|
|
4918
|
-
return r;
|
|
4919
|
-
});
|
|
4920
|
-
}
|
|
4921
|
-
|
|
4922
5327
|
// src/commands/results/failures.ts
|
|
4923
5328
|
function formatFailures(results) {
|
|
4924
5329
|
return results.filter((r) => r.score < 1).map((r) => {
|
|
@@ -5045,7 +5450,7 @@ var resultsShowCommand = command({
|
|
|
5045
5450
|
});
|
|
5046
5451
|
|
|
5047
5452
|
// src/commands/results/summary.ts
|
|
5048
|
-
import { existsSync as
|
|
5453
|
+
import { existsSync as existsSync3, readFileSync as readFileSync5 } from "node:fs";
|
|
5049
5454
|
function formatSummary(results, grading) {
|
|
5050
5455
|
const total = results.length;
|
|
5051
5456
|
let passed;
|
|
@@ -5096,9 +5501,9 @@ var resultsSummaryCommand = command({
|
|
|
5096
5501
|
const { results, sourceFile } = await loadResults(source, cwd);
|
|
5097
5502
|
let grading;
|
|
5098
5503
|
const gradingPath = sourceFile.replace(/\.jsonl$/, ".grading.json");
|
|
5099
|
-
if (
|
|
5504
|
+
if (existsSync3(gradingPath)) {
|
|
5100
5505
|
try {
|
|
5101
|
-
grading = JSON.parse(
|
|
5506
|
+
grading = JSON.parse(readFileSync5(gradingPath, "utf8"));
|
|
5102
5507
|
} catch {
|
|
5103
5508
|
}
|
|
5104
5509
|
}
|
|
@@ -5123,68 +5528,26 @@ var resultsCommand = subcommands({
|
|
|
5123
5528
|
});
|
|
5124
5529
|
|
|
5125
5530
|
// src/commands/results/serve.ts
|
|
5126
|
-
import { existsSync as
|
|
5127
|
-
import
|
|
5531
|
+
import { existsSync as existsSync4, readFileSync as readFileSync6, writeFileSync as writeFileSync3 } from "node:fs";
|
|
5532
|
+
import path8 from "node:path";
|
|
5128
5533
|
import { Hono } from "hono";
|
|
5129
|
-
async function resolveSourceFile2(source, cwd) {
|
|
5130
|
-
if (source) {
|
|
5131
|
-
const resolved = path10.isAbsolute(source) ? source : path10.resolve(cwd, source);
|
|
5132
|
-
if (!existsSync5(resolved)) {
|
|
5133
|
-
throw new Error(`Source file not found: ${resolved}`);
|
|
5134
|
-
}
|
|
5135
|
-
return resolved;
|
|
5136
|
-
}
|
|
5137
|
-
const cache = await loadRunCache(cwd);
|
|
5138
|
-
const cachedFile = cache ? resolveRunCacheFile(cache) : "";
|
|
5139
|
-
if (cachedFile && existsSync5(cachedFile)) {
|
|
5140
|
-
return cachedFile;
|
|
5141
|
-
}
|
|
5142
|
-
const metas = listResultFiles(cwd, 10);
|
|
5143
|
-
if (metas.length === 0) {
|
|
5144
|
-
throw new Error(
|
|
5145
|
-
"No result files found in .agentv/results/\nRun an evaluation first: agentv eval <eval-file>"
|
|
5146
|
-
);
|
|
5147
|
-
}
|
|
5148
|
-
if (metas.length > 1) {
|
|
5149
|
-
console.log("Available result files:");
|
|
5150
|
-
for (const m of metas) {
|
|
5151
|
-
console.log(` ${m.path}`);
|
|
5152
|
-
}
|
|
5153
|
-
console.log(`
|
|
5154
|
-
Serving most recent: ${metas[0].path}
|
|
5155
|
-
`);
|
|
5156
|
-
}
|
|
5157
|
-
return metas[0].path;
|
|
5158
|
-
}
|
|
5159
|
-
function loadResults2(content) {
|
|
5160
|
-
const results = parseJsonlResults(content);
|
|
5161
|
-
if (results.length === 0) {
|
|
5162
|
-
throw new Error("No valid results found in JSONL content");
|
|
5163
|
-
}
|
|
5164
|
-
return results.map((r) => {
|
|
5165
|
-
if (!r.testId && r.evalId) {
|
|
5166
|
-
return { ...r, testId: String(r.evalId) };
|
|
5167
|
-
}
|
|
5168
|
-
return r;
|
|
5169
|
-
});
|
|
5170
|
-
}
|
|
5171
5534
|
function feedbackPath(cwd) {
|
|
5172
|
-
return
|
|
5535
|
+
return path8.join(cwd, "feedback.json");
|
|
5173
5536
|
}
|
|
5174
5537
|
function readFeedback(cwd) {
|
|
5175
5538
|
const fp = feedbackPath(cwd);
|
|
5176
|
-
if (!
|
|
5539
|
+
if (!existsSync4(fp)) {
|
|
5177
5540
|
return { reviews: [] };
|
|
5178
5541
|
}
|
|
5179
5542
|
try {
|
|
5180
|
-
return JSON.parse(
|
|
5543
|
+
return JSON.parse(readFileSync6(fp, "utf8"));
|
|
5181
5544
|
} catch (err2) {
|
|
5182
5545
|
console.error(`Warning: could not parse ${fp}, starting fresh: ${err2.message}`);
|
|
5183
5546
|
return { reviews: [] };
|
|
5184
5547
|
}
|
|
5185
5548
|
}
|
|
5186
5549
|
function writeFeedback(cwd, data) {
|
|
5187
|
-
|
|
5550
|
+
writeFileSync3(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
|
|
5188
5551
|
`, "utf8");
|
|
5189
5552
|
}
|
|
5190
5553
|
function createApp(results, cwd) {
|
|
@@ -5854,9 +6217,7 @@ var resultsServeCommand = command({
|
|
|
5854
6217
|
const cwd = dir ?? process.cwd();
|
|
5855
6218
|
const listenPort = port ?? 3117;
|
|
5856
6219
|
try {
|
|
5857
|
-
const sourceFile = await
|
|
5858
|
-
const content = readFileSync9(sourceFile, "utf8");
|
|
5859
|
-
const results = loadResults2(content);
|
|
6220
|
+
const { results, sourceFile } = await loadResults(source, cwd);
|
|
5860
6221
|
const app2 = createApp(results, cwd);
|
|
5861
6222
|
console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
|
|
5862
6223
|
console.log(`Dashboard: http://localhost:${listenPort}`);
|
|
@@ -5889,7 +6250,7 @@ function detectPackageManager() {
|
|
|
5889
6250
|
return detectPackageManagerFromPath(process.argv[1] ?? "");
|
|
5890
6251
|
}
|
|
5891
6252
|
function runCommand(cmd, args) {
|
|
5892
|
-
return new Promise((
|
|
6253
|
+
return new Promise((resolve2, reject) => {
|
|
5893
6254
|
const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
|
|
5894
6255
|
let stdout = "";
|
|
5895
6256
|
child.stdout?.on("data", (data) => {
|
|
@@ -5897,7 +6258,7 @@ function runCommand(cmd, args) {
|
|
|
5897
6258
|
stdout += data.toString();
|
|
5898
6259
|
});
|
|
5899
6260
|
child.on("error", reject);
|
|
5900
|
-
child.on("close", (code) =>
|
|
6261
|
+
child.on("close", (code) => resolve2({ exitCode: code ?? 1, stdout }));
|
|
5901
6262
|
});
|
|
5902
6263
|
}
|
|
5903
6264
|
var updateCommand = command({
|
|
@@ -6109,10 +6470,6 @@ function parseAssertSpec(spec) {
|
|
|
6109
6470
|
);
|
|
6110
6471
|
}
|
|
6111
6472
|
}
|
|
6112
|
-
function toTraceSummary(raw) {
|
|
6113
|
-
if (!raw.trace) return void 0;
|
|
6114
|
-
return toCamelCaseDeep(raw.trace);
|
|
6115
|
-
}
|
|
6116
6473
|
function extractCandidate(raw) {
|
|
6117
6474
|
if (raw.output !== void 0)
|
|
6118
6475
|
return typeof raw.output === "string" ? raw.output : JSON.stringify(raw.output);
|
|
@@ -6224,8 +6581,8 @@ var traceScoreCommand = command({
|
|
|
6224
6581
|
args: {
|
|
6225
6582
|
file: positional({
|
|
6226
6583
|
type: string,
|
|
6227
|
-
displayName: "
|
|
6228
|
-
description: "Path to
|
|
6584
|
+
displayName: "trace-source",
|
|
6585
|
+
description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
|
|
6229
6586
|
}),
|
|
6230
6587
|
assert: option({
|
|
6231
6588
|
type: string,
|
|
@@ -6271,11 +6628,11 @@ var traceScoreCommand = command({
|
|
|
6271
6628
|
);
|
|
6272
6629
|
if (traceRequired) {
|
|
6273
6630
|
const hasTrace = results.some(
|
|
6274
|
-
(r) => r
|
|
6631
|
+
(r) => toTraceSummary(r) || r.cost_usd !== void 0 || r.duration_ms !== void 0 || r.token_usage !== void 0
|
|
6275
6632
|
);
|
|
6276
6633
|
if (!hasTrace) {
|
|
6277
6634
|
console.error(
|
|
6278
|
-
`${c2.red}Error:${c2.reset}
|
|
6635
|
+
`${c2.red}Error:${c2.reset} Source lacks trace metrics. Use an OTLP trace export via ${c2.bold}--otel-file${c2.reset} or a run manifest with summary metrics in ${c2.bold}index.jsonl${c2.reset}.`
|
|
6279
6636
|
);
|
|
6280
6637
|
process.exit(1);
|
|
6281
6638
|
}
|
|
@@ -6308,7 +6665,7 @@ var traceScoreCommand = command({
|
|
|
6308
6665
|
|
|
6309
6666
|
// src/commands/trace/show.ts
|
|
6310
6667
|
function renderFlatTrace(result) {
|
|
6311
|
-
const trace = result
|
|
6668
|
+
const trace = getTraceSummary(result);
|
|
6312
6669
|
const parts = [];
|
|
6313
6670
|
if (trace?.tool_calls && Object.keys(trace.tool_calls).length > 0) {
|
|
6314
6671
|
const toolParts = Object.entries(trace.tool_calls).map(([name, count]) => {
|
|
@@ -6339,8 +6696,12 @@ function renderScores(scores) {
|
|
|
6339
6696
|
}
|
|
6340
6697
|
function renderTree(result) {
|
|
6341
6698
|
const messages = result.output;
|
|
6699
|
+
const spans = getTraceSpans(result);
|
|
6342
6700
|
if (!messages || messages.length === 0) {
|
|
6343
|
-
if (
|
|
6701
|
+
if (spans.length > 0) {
|
|
6702
|
+
return renderSpanTree(result, spans);
|
|
6703
|
+
}
|
|
6704
|
+
if (getTraceSummary(result) || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
|
|
6344
6705
|
return renderFlatTrace(result);
|
|
6345
6706
|
}
|
|
6346
6707
|
return `${c2.dim}No trace data available${c2.reset}`;
|
|
@@ -6406,6 +6767,30 @@ function renderTree(result) {
|
|
|
6406
6767
|
}
|
|
6407
6768
|
return lines.join("\n");
|
|
6408
6769
|
}
|
|
6770
|
+
function renderSpanTree(result, spans) {
|
|
6771
|
+
const lines = [];
|
|
6772
|
+
const testId = result.test_id ?? result.eval_id ?? "unknown";
|
|
6773
|
+
const totalTokens = result.token_usage ? result.token_usage.input + result.token_usage.output : void 0;
|
|
6774
|
+
const rootParts = [testId];
|
|
6775
|
+
if (result.duration_ms !== void 0) rootParts.push(formatDuration(result.duration_ms));
|
|
6776
|
+
if (totalTokens !== void 0) rootParts.push(`${formatNumber(totalTokens)} tok`);
|
|
6777
|
+
if (result.cost_usd !== void 0) rootParts.push(formatCost(result.cost_usd));
|
|
6778
|
+
lines.push(`${c2.bold}${rootParts.join(", ")}${c2.reset}`);
|
|
6779
|
+
spans.forEach((span, index) => {
|
|
6780
|
+
const connector = index === spans.length - 1 ? "\u2514\u2500" : "\u251C\u2500";
|
|
6781
|
+
const color = span.type === "llm" ? c2.cyan : c2.yellow;
|
|
6782
|
+
const parts = [`${color}${span.name}${c2.reset}`];
|
|
6783
|
+
if (span.duration_ms !== void 0) {
|
|
6784
|
+
parts.push(formatDuration(span.duration_ms));
|
|
6785
|
+
}
|
|
6786
|
+
lines.push(`${connector} ${parts.join(", ")}`);
|
|
6787
|
+
});
|
|
6788
|
+
if (result.scores && result.scores.length > 0) {
|
|
6789
|
+
lines.push("");
|
|
6790
|
+
lines.push(`${c2.dim}Scores:${c2.reset} ${renderScores(result.scores)}`);
|
|
6791
|
+
}
|
|
6792
|
+
return lines.join("\n");
|
|
6793
|
+
}
|
|
6409
6794
|
function formatResultDetail(result, index, tree) {
|
|
6410
6795
|
const lines = [];
|
|
6411
6796
|
const testId = result.test_id ?? result.eval_id ?? `result-${index}`;
|
|
@@ -6489,8 +6874,8 @@ var traceShowCommand = command({
|
|
|
6489
6874
|
args: {
|
|
6490
6875
|
file: positional({
|
|
6491
6876
|
type: string,
|
|
6492
|
-
displayName: "
|
|
6493
|
-
description: "Path to
|
|
6877
|
+
displayName: "trace-source",
|
|
6878
|
+
description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
|
|
6494
6879
|
}),
|
|
6495
6880
|
testId: option({
|
|
6496
6881
|
type: optional(string),
|
|
@@ -6499,7 +6884,7 @@ var traceShowCommand = command({
|
|
|
6499
6884
|
}),
|
|
6500
6885
|
tree: flag({
|
|
6501
6886
|
long: "tree",
|
|
6502
|
-
description: "Show hierarchical trace tree
|
|
6887
|
+
description: "Show hierarchical trace tree from output messages or exported trace spans"
|
|
6503
6888
|
}),
|
|
6504
6889
|
format: option({
|
|
6505
6890
|
type: optional(oneOf(["table", "json"])),
|
|
@@ -6570,11 +6955,11 @@ function collectMetrics(results) {
|
|
|
6570
6955
|
formatter: (n) => formatNumber(Math.round(n))
|
|
6571
6956
|
});
|
|
6572
6957
|
}
|
|
6573
|
-
const toolCalls = results.map((r) => r
|
|
6958
|
+
const toolCalls = results.map((r) => getTraceSummary(r)?.event_count).filter((v) => v !== void 0);
|
|
6574
6959
|
if (toolCalls.length > 0) {
|
|
6575
6960
|
rows.push({ name: "tool_calls", values: toolCalls, formatter: (n) => String(Math.round(n)) });
|
|
6576
6961
|
}
|
|
6577
|
-
const llmCalls = results.map((r) => r
|
|
6962
|
+
const llmCalls = results.map((r) => getTraceSummary(r)?.llm_call_count).filter((v) => v !== void 0);
|
|
6578
6963
|
if (llmCalls.length > 0) {
|
|
6579
6964
|
rows.push({ name: "llm_calls", values: llmCalls, formatter: (n) => String(Math.round(n)) });
|
|
6580
6965
|
}
|
|
@@ -6668,8 +7053,8 @@ var traceStatsCommand = command({
|
|
|
6668
7053
|
args: {
|
|
6669
7054
|
file: positional({
|
|
6670
7055
|
type: string,
|
|
6671
|
-
displayName: "
|
|
6672
|
-
description: "Path to
|
|
7056
|
+
displayName: "trace-source",
|
|
7057
|
+
description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
|
|
6673
7058
|
}),
|
|
6674
7059
|
groupBy: option({
|
|
6675
7060
|
type: optional(oneOf(["target", "eval-set", "test-id"])),
|
|
@@ -6719,8 +7104,8 @@ var traceCommand = subcommands({
|
|
|
6719
7104
|
});
|
|
6720
7105
|
|
|
6721
7106
|
// src/commands/transpile/index.ts
|
|
6722
|
-
import { writeFileSync as
|
|
6723
|
-
import
|
|
7107
|
+
import { writeFileSync as writeFileSync4 } from "node:fs";
|
|
7108
|
+
import path9 from "node:path";
|
|
6724
7109
|
var transpileCommand = command({
|
|
6725
7110
|
name: "transpile",
|
|
6726
7111
|
description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
|
|
@@ -6744,7 +7129,7 @@ var transpileCommand = command({
|
|
|
6744
7129
|
handler: async ({ input, outDir, stdout }) => {
|
|
6745
7130
|
let result;
|
|
6746
7131
|
try {
|
|
6747
|
-
result = transpileEvalYamlFile(
|
|
7132
|
+
result = transpileEvalYamlFile(path9.resolve(input));
|
|
6748
7133
|
} catch (error) {
|
|
6749
7134
|
console.error(`Error: ${error.message}`);
|
|
6750
7135
|
process.exit(1);
|
|
@@ -6768,12 +7153,12 @@ var transpileCommand = command({
|
|
|
6768
7153
|
process.stdout.write("\n");
|
|
6769
7154
|
return;
|
|
6770
7155
|
}
|
|
6771
|
-
const outputDir = outDir ?
|
|
7156
|
+
const outputDir = outDir ? path9.resolve(outDir) : path9.dirname(path9.resolve(input));
|
|
6772
7157
|
const fileNames = getOutputFilenames(result);
|
|
6773
7158
|
for (const [skill, evalsJson] of result.files) {
|
|
6774
7159
|
const fileName = fileNames.get(skill) ?? "evals.json";
|
|
6775
|
-
const outputPath =
|
|
6776
|
-
|
|
7160
|
+
const outputPath = path9.join(outputDir, fileName);
|
|
7161
|
+
writeFileSync4(outputPath, `${JSON.stringify(evalsJson, null, 2)}
|
|
6777
7162
|
`);
|
|
6778
7163
|
console.log(`Transpiled to ${outputPath}`);
|
|
6779
7164
|
}
|
|
@@ -6781,7 +7166,7 @@ var transpileCommand = command({
|
|
|
6781
7166
|
});
|
|
6782
7167
|
|
|
6783
7168
|
// src/commands/trim/index.ts
|
|
6784
|
-
import { readFileSync as
|
|
7169
|
+
import { readFileSync as readFileSync7, writeFileSync as writeFileSync5 } from "node:fs";
|
|
6785
7170
|
var trimCommand = command({
|
|
6786
7171
|
name: "trim",
|
|
6787
7172
|
description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
|
|
@@ -6800,7 +7185,7 @@ var trimCommand = command({
|
|
|
6800
7185
|
},
|
|
6801
7186
|
handler: async ({ input, out }) => {
|
|
6802
7187
|
try {
|
|
6803
|
-
const content =
|
|
7188
|
+
const content = readFileSync7(input, "utf8");
|
|
6804
7189
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
6805
7190
|
const trimmedLines = lines.map((line) => {
|
|
6806
7191
|
const record = JSON.parse(line);
|
|
@@ -6812,7 +7197,7 @@ var trimCommand = command({
|
|
|
6812
7197
|
const output = `${trimmedLines.join("\n")}
|
|
6813
7198
|
`;
|
|
6814
7199
|
if (out) {
|
|
6815
|
-
|
|
7200
|
+
writeFileSync5(out, output, "utf8");
|
|
6816
7201
|
console.error(`Trimmed ${lines.length} record(s) \u2192 ${out}`);
|
|
6817
7202
|
} else {
|
|
6818
7203
|
process.stdout.write(output);
|
|
@@ -6906,8 +7291,8 @@ function isTTY() {
|
|
|
6906
7291
|
|
|
6907
7292
|
// src/commands/validate/validate-files.ts
|
|
6908
7293
|
import { constants } from "node:fs";
|
|
6909
|
-
import { access, readdir, stat } from "node:fs/promises";
|
|
6910
|
-
import
|
|
7294
|
+
import { access, readdir as readdir3, stat } from "node:fs/promises";
|
|
7295
|
+
import path10 from "node:path";
|
|
6911
7296
|
async function validateFiles(paths) {
|
|
6912
7297
|
const filePaths = await expandPaths(paths);
|
|
6913
7298
|
const results = [];
|
|
@@ -6925,7 +7310,7 @@ async function validateFiles(paths) {
|
|
|
6925
7310
|
};
|
|
6926
7311
|
}
|
|
6927
7312
|
async function validateSingleFile(filePath) {
|
|
6928
|
-
const absolutePath =
|
|
7313
|
+
const absolutePath = path10.resolve(filePath);
|
|
6929
7314
|
const fileType = await detectFileType(absolutePath);
|
|
6930
7315
|
let result;
|
|
6931
7316
|
if (fileType === "eval") {
|
|
@@ -6950,7 +7335,7 @@ async function validateSingleFile(filePath) {
|
|
|
6950
7335
|
async function expandPaths(paths) {
|
|
6951
7336
|
const expanded = [];
|
|
6952
7337
|
for (const inputPath of paths) {
|
|
6953
|
-
const absolutePath =
|
|
7338
|
+
const absolutePath = path10.resolve(inputPath);
|
|
6954
7339
|
try {
|
|
6955
7340
|
await access(absolutePath, constants.F_OK);
|
|
6956
7341
|
} catch {
|
|
@@ -6972,9 +7357,9 @@ async function expandPaths(paths) {
|
|
|
6972
7357
|
async function findYamlFiles(dirPath) {
|
|
6973
7358
|
const results = [];
|
|
6974
7359
|
try {
|
|
6975
|
-
const entries2 = await
|
|
7360
|
+
const entries2 = await readdir3(dirPath, { withFileTypes: true });
|
|
6976
7361
|
for (const entry of entries2) {
|
|
6977
|
-
const fullPath =
|
|
7362
|
+
const fullPath = path10.join(dirPath, entry.name);
|
|
6978
7363
|
if (entry.isDirectory()) {
|
|
6979
7364
|
if (entry.name === "node_modules" || entry.name.startsWith(".")) {
|
|
6980
7365
|
continue;
|
|
@@ -6991,7 +7376,7 @@ async function findYamlFiles(dirPath) {
|
|
|
6991
7376
|
return results;
|
|
6992
7377
|
}
|
|
6993
7378
|
function isYamlFile(filePath) {
|
|
6994
|
-
const ext =
|
|
7379
|
+
const ext = path10.extname(filePath).toLowerCase();
|
|
6995
7380
|
return ext === ".yaml" || ext === ".yml";
|
|
6996
7381
|
}
|
|
6997
7382
|
|
|
@@ -7029,14 +7414,14 @@ var validateCommand = command({
|
|
|
7029
7414
|
});
|
|
7030
7415
|
|
|
7031
7416
|
// src/commands/workspace/clean.ts
|
|
7032
|
-
import { existsSync as
|
|
7033
|
-
import { readFile as
|
|
7034
|
-
import
|
|
7417
|
+
import { existsSync as existsSync5 } from "node:fs";
|
|
7418
|
+
import { readFile as readFile4, readdir as readdir4, rm } from "node:fs/promises";
|
|
7419
|
+
import path11 from "node:path";
|
|
7035
7420
|
async function confirm(message) {
|
|
7036
7421
|
const readline2 = await import("node:readline");
|
|
7037
7422
|
const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
|
|
7038
|
-
const answer = await new Promise((
|
|
7039
|
-
rl.question(`${message} [y/N] `,
|
|
7423
|
+
const answer = await new Promise((resolve2) => {
|
|
7424
|
+
rl.question(`${message} [y/N] `, resolve2);
|
|
7040
7425
|
});
|
|
7041
7426
|
rl.close();
|
|
7042
7427
|
return answer.toLowerCase() === "y";
|
|
@@ -7058,19 +7443,19 @@ var cleanCommand = command({
|
|
|
7058
7443
|
},
|
|
7059
7444
|
handler: async ({ repo, force }) => {
|
|
7060
7445
|
const poolRoot = getWorkspacePoolRoot();
|
|
7061
|
-
if (!
|
|
7446
|
+
if (!existsSync5(poolRoot)) {
|
|
7062
7447
|
console.log("No workspace pool entries found.");
|
|
7063
7448
|
return;
|
|
7064
7449
|
}
|
|
7065
7450
|
if (repo) {
|
|
7066
|
-
const entries2 = await
|
|
7451
|
+
const entries2 = await readdir4(poolRoot, { withFileTypes: true });
|
|
7067
7452
|
const poolDirs = entries2.filter((e) => e.isDirectory());
|
|
7068
7453
|
const matchingDirs = [];
|
|
7069
7454
|
for (const dir of poolDirs) {
|
|
7070
|
-
const poolDir =
|
|
7071
|
-
const metadataPath =
|
|
7455
|
+
const poolDir = path11.join(poolRoot, dir.name);
|
|
7456
|
+
const metadataPath = path11.join(poolDir, "metadata.json");
|
|
7072
7457
|
try {
|
|
7073
|
-
const raw = await
|
|
7458
|
+
const raw = await readFile4(metadataPath, "utf-8");
|
|
7074
7459
|
const metadata = JSON.parse(raw);
|
|
7075
7460
|
const hasRepo = metadata.repos?.some((r) => {
|
|
7076
7461
|
if (r.source.type === "git" && r.source.url) {
|
|
@@ -7099,7 +7484,7 @@ var cleanCommand = command({
|
|
|
7099
7484
|
}
|
|
7100
7485
|
for (const dir of matchingDirs) {
|
|
7101
7486
|
await rm(dir, { recursive: true, force: true });
|
|
7102
|
-
console.log(`Removed: ${
|
|
7487
|
+
console.log(`Removed: ${path11.basename(dir).slice(0, 12)}...`);
|
|
7103
7488
|
}
|
|
7104
7489
|
console.log("Done.");
|
|
7105
7490
|
} else {
|
|
@@ -7117,15 +7502,15 @@ var cleanCommand = command({
|
|
|
7117
7502
|
});
|
|
7118
7503
|
|
|
7119
7504
|
// src/commands/workspace/list.ts
|
|
7120
|
-
import { existsSync as
|
|
7121
|
-
import { readFile as
|
|
7122
|
-
import
|
|
7505
|
+
import { existsSync as existsSync6 } from "node:fs";
|
|
7506
|
+
import { readFile as readFile5, readdir as readdir5, stat as stat2 } from "node:fs/promises";
|
|
7507
|
+
import path12 from "node:path";
|
|
7123
7508
|
async function getDirectorySize(dirPath) {
|
|
7124
7509
|
let totalSize = 0;
|
|
7125
7510
|
try {
|
|
7126
|
-
const entries2 = await
|
|
7511
|
+
const entries2 = await readdir5(dirPath, { withFileTypes: true });
|
|
7127
7512
|
for (const entry of entries2) {
|
|
7128
|
-
const fullPath =
|
|
7513
|
+
const fullPath = path12.join(dirPath, entry.name);
|
|
7129
7514
|
if (entry.isDirectory()) {
|
|
7130
7515
|
totalSize += await getDirectorySize(fullPath);
|
|
7131
7516
|
} else {
|
|
@@ -7149,25 +7534,25 @@ var listCommand = command({
|
|
|
7149
7534
|
args: {},
|
|
7150
7535
|
handler: async () => {
|
|
7151
7536
|
const poolRoot = getWorkspacePoolRoot();
|
|
7152
|
-
if (!
|
|
7537
|
+
if (!existsSync6(poolRoot)) {
|
|
7153
7538
|
console.log("No workspace pool entries found.");
|
|
7154
7539
|
return;
|
|
7155
7540
|
}
|
|
7156
|
-
const entries2 = await
|
|
7541
|
+
const entries2 = await readdir5(poolRoot, { withFileTypes: true });
|
|
7157
7542
|
const poolDirs = entries2.filter((e) => e.isDirectory());
|
|
7158
7543
|
if (poolDirs.length === 0) {
|
|
7159
7544
|
console.log("No workspace pool entries found.");
|
|
7160
7545
|
return;
|
|
7161
7546
|
}
|
|
7162
7547
|
for (const dir of poolDirs) {
|
|
7163
|
-
const poolDir =
|
|
7548
|
+
const poolDir = path12.join(poolRoot, dir.name);
|
|
7164
7549
|
const fingerprint = dir.name;
|
|
7165
|
-
const poolEntries = await
|
|
7550
|
+
const poolEntries = await readdir5(poolDir, { withFileTypes: true });
|
|
7166
7551
|
const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
|
|
7167
|
-
const metadataPath =
|
|
7552
|
+
const metadataPath = path12.join(poolDir, "metadata.json");
|
|
7168
7553
|
let metadata = null;
|
|
7169
7554
|
try {
|
|
7170
|
-
const raw = await
|
|
7555
|
+
const raw = await readFile5(metadataPath, "utf-8");
|
|
7171
7556
|
metadata = JSON.parse(raw);
|
|
7172
7557
|
} catch {
|
|
7173
7558
|
}
|
|
@@ -7204,16 +7589,16 @@ var workspaceCommand = subcommands({
|
|
|
7204
7589
|
|
|
7205
7590
|
// src/update-check.ts
|
|
7206
7591
|
import { spawn as spawn2 } from "node:child_process";
|
|
7207
|
-
import { readFile as
|
|
7208
|
-
import { join } from "node:path";
|
|
7592
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
7593
|
+
import { join as join4 } from "node:path";
|
|
7209
7594
|
var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
|
|
7210
7595
|
var AGENTV_DIR = getAgentvHome();
|
|
7211
7596
|
var CACHE_FILE = "version-check.json";
|
|
7212
7597
|
var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
|
|
7213
|
-
async function getCachedUpdateInfo(
|
|
7214
|
-
const filePath =
|
|
7598
|
+
async function getCachedUpdateInfo(path13) {
|
|
7599
|
+
const filePath = path13 ?? join4(AGENTV_DIR, CACHE_FILE);
|
|
7215
7600
|
try {
|
|
7216
|
-
const raw = await
|
|
7601
|
+
const raw = await readFile6(filePath, "utf-8");
|
|
7217
7602
|
const data = JSON.parse(raw);
|
|
7218
7603
|
if (typeof data.latestVersion === "string" && typeof data.lastCheckedAt === "string") {
|
|
7219
7604
|
return data;
|
|
@@ -7245,7 +7630,7 @@ function buildNotice(currentVersion, latestVersion) {
|
|
|
7245
7630
|
}
|
|
7246
7631
|
function backgroundUpdateCheck() {
|
|
7247
7632
|
const dir = AGENTV_DIR;
|
|
7248
|
-
const filePath =
|
|
7633
|
+
const filePath = join4(dir, CACHE_FILE);
|
|
7249
7634
|
const script = `
|
|
7250
7635
|
const https = require('https');
|
|
7251
7636
|
const fs = require('fs');
|
|
@@ -7299,8 +7684,8 @@ var app = subcommands({
|
|
|
7299
7684
|
compare: compareCommand,
|
|
7300
7685
|
convert: convertCommand,
|
|
7301
7686
|
create: createCommand,
|
|
7302
|
-
generate: generateCommand,
|
|
7303
7687
|
init: initCmdTsCommand,
|
|
7688
|
+
pipeline: pipelineCommand,
|
|
7304
7689
|
results: resultsCommand,
|
|
7305
7690
|
self: selfCommand,
|
|
7306
7691
|
serve: resultsServeCommand,
|
|
@@ -7317,8 +7702,8 @@ var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
|
|
|
7317
7702
|
"compare",
|
|
7318
7703
|
"convert",
|
|
7319
7704
|
"create",
|
|
7320
|
-
"generate",
|
|
7321
7705
|
"init",
|
|
7706
|
+
"pipeline",
|
|
7322
7707
|
"results",
|
|
7323
7708
|
"self",
|
|
7324
7709
|
"serve",
|
|
@@ -7368,4 +7753,4 @@ export {
|
|
|
7368
7753
|
preprocessArgv,
|
|
7369
7754
|
runCli
|
|
7370
7755
|
};
|
|
7371
|
-
//# sourceMappingURL=chunk-
|
|
7756
|
+
//# sourceMappingURL=chunk-DJU4C6NS.js.map
|