agentv 3.11.1 → 3.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -12
- package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} +1 -2
- package/dist/{chunk-V2S5CZU3.js → chunk-2ELQ6F3C.js} +916 -523
- package/dist/chunk-2ELQ6F3C.js.map +1 -0
- package/dist/{chunk-JK6V4KVD.js → chunk-NR7QVL75.js} +32 -24
- package/dist/chunk-NR7QVL75.js.map +1 -0
- package/dist/{chunk-OYD2NB55.js → chunk-UYBLUYHN.js} +104 -15
- package/dist/chunk-UYBLUYHN.js.map +1 -0
- package/dist/{chunk-CKMAM2GD.js → chunk-VLOFRXH4.js} +461 -196
- package/dist/chunk-VLOFRXH4.js.map +1 -0
- package/dist/{chunk-BAUNAXHT.js → chunk-XOSNETAV.js} +1 -1
- package/dist/cli.js +5 -6
- package/dist/cli.js.map +1 -1
- package/dist/{dist-VUPMLHIV.js → dist-L6R5HJ72.js} +4 -5
- package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js → esm-5Q4BZALM-5REQWAUV.js} +2 -3
- package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js.map → esm-5Q4BZALM-5REQWAUV.js.map} +1 -1
- package/dist/{esm-UYZ3HJBU.js → esm-CZAWIY6F.js} +2 -2
- package/dist/index.js +5 -6
- package/dist/{interactive-FZJANO4A.js → interactive-5X62YEEX.js} +5 -6
- package/dist/{interactive-FZJANO4A.js.map → interactive-5X62YEEX.js.map} +1 -1
- package/dist/{otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js} +1 -2
- package/dist/{simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js → simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js} +2 -3
- package/dist/{src-PXDA7QIS.js → src-ML4D2MC2.js} +2 -2
- package/dist/templates/.agentv/.env.example +23 -0
- package/dist/templates/.agentv/config.yaml +13 -4
- package/dist/templates/.agentv/targets.yaml +16 -0
- package/package.json +1 -1
- package/dist/chunk-2QFWRIYL.js +0 -186
- package/dist/chunk-2QFWRIYL.js.map +0 -1
- package/dist/chunk-2RMPO6LY.js +0 -747
- package/dist/chunk-2RMPO6LY.js.map +0 -1
- package/dist/chunk-3Q7WIXT4.js +0 -4846
- package/dist/chunk-3Q7WIXT4.js.map +0 -1
- package/dist/chunk-73O2DCJP.js +0 -1274
- package/dist/chunk-73O2DCJP.js.map +0 -1
- package/dist/chunk-AUKF3Y3W.js +0 -212
- package/dist/chunk-AUKF3Y3W.js.map +0 -1
- package/dist/chunk-BRH7SIDP.js +0 -133
- package/dist/chunk-BRH7SIDP.js.map +0 -1
- package/dist/chunk-BXM4I3BM.js +0 -526
- package/dist/chunk-BXM4I3BM.js.map +0 -1
- package/dist/chunk-CKMAM2GD.js.map +0 -1
- package/dist/chunk-FHTURHTY.js +0 -546
- package/dist/chunk-FHTURHTY.js.map +0 -1
- package/dist/chunk-GJFXQQWG.js +0 -21
- package/dist/chunk-GJFXQQWG.js.map +0 -1
- package/dist/chunk-HKMLG4KF.js +0 -38
- package/dist/chunk-HKMLG4KF.js.map +0 -1
- package/dist/chunk-JGU3PVA4.js +0 -133
- package/dist/chunk-JGU3PVA4.js.map +0 -1
- package/dist/chunk-JK6V4KVD.js.map +0 -1
- package/dist/chunk-LHU5FGVZ.js +0 -4804
- package/dist/chunk-LHU5FGVZ.js.map +0 -1
- package/dist/chunk-OL2WGI6E.js +0 -149
- package/dist/chunk-OL2WGI6E.js.map +0 -1
- package/dist/chunk-ONETZL6N.js +0 -15
- package/dist/chunk-ONETZL6N.js.map +0 -1
- package/dist/chunk-OYD2NB55.js.map +0 -1
- package/dist/chunk-QV4UGEN6.js +0 -320
- package/dist/chunk-QV4UGEN6.js.map +0 -1
- package/dist/chunk-QXLDKGF3.js +0 -46
- package/dist/chunk-QXLDKGF3.js.map +0 -1
- package/dist/chunk-U6VEM66A.js +0 -63
- package/dist/chunk-U6VEM66A.js.map +0 -1
- package/dist/chunk-UALXHIMX.js +0 -48
- package/dist/chunk-UALXHIMX.js.map +0 -1
- package/dist/chunk-UGXG73VF.js +0 -55
- package/dist/chunk-UGXG73VF.js.map +0 -1
- package/dist/chunk-UHP5KEDL.js +0 -38
- package/dist/chunk-UHP5KEDL.js.map +0 -1
- package/dist/chunk-V2S5CZU3.js.map +0 -1
- package/dist/chunk-WVSXFZWP.js +0 -204
- package/dist/chunk-WVSXFZWP.js.map +0 -1
- package/dist/chunk-XSUMCWKO.js +0 -30
- package/dist/chunk-XSUMCWKO.js.map +0 -1
- package/dist/chunk-XUO7ZEHU.js +0 -181
- package/dist/chunk-XUO7ZEHU.js.map +0 -1
- package/dist/chunk-YSGUX5JT.js +0 -1002
- package/dist/chunk-YSGUX5JT.js.map +0 -1
- package/dist/dist-3PCP5TNF-RYMVLILE.js +0 -25785
- package/dist/dist-3PCP5TNF-RYMVLILE.js.map +0 -1
- package/dist/dist-BOIN5LC5-T5UWUK43.js +0 -76113
- package/dist/dist-BOIN5LC5-T5UWUK43.js.map +0 -1
- package/dist/dist-LXPDQOBI-4V5J2WDS.js +0 -13
- package/dist/dist-LXPDQOBI-4V5J2WDS.js.map +0 -1
- package/dist/dist-es-4WSJUIYR-XKIX65IH.js +0 -69
- package/dist/dist-es-4WSJUIYR-XKIX65IH.js.map +0 -1
- package/dist/dist-es-7K7MKRME-CCMAZOQC.js +0 -355
- package/dist/dist-es-7K7MKRME-CCMAZOQC.js.map +0 -1
- package/dist/dist-es-B2RTOKRI-VWZHK5RE.js +0 -191
- package/dist/dist-es-B2RTOKRI-VWZHK5RE.js.map +0 -1
- package/dist/dist-es-HHZ4FAXA-CRERHWKB.js +0 -164
- package/dist/dist-es-HHZ4FAXA-CRERHWKB.js.map +0 -1
- package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js +0 -355
- package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js.map +0 -1
- package/dist/dist-es-L6R4FPI5-IKIRYN45.js +0 -472
- package/dist/dist-es-L6R4FPI5-IKIRYN45.js.map +0 -1
- package/dist/dist-es-SRVEB5QV-Q4CTC2HX.js +0 -24
- package/dist/dist-es-TRIVUKV4-2J47CDXR.js +0 -85
- package/dist/dist-es-TRIVUKV4-2J47CDXR.js.map +0 -1
- package/dist/dist-es-UEEUAV34-IZQDTAMW.js +0 -16
- package/dist/event-streams-NZADSH5J-6MOSNEV3.js +0 -247
- package/dist/event-streams-NZADSH5J-6MOSNEV3.js.map +0 -1
- package/dist/loadSso-IQZ5NB6C-DZJTORO3.js +0 -738
- package/dist/loadSso-IQZ5NB6C-DZJTORO3.js.map +0 -1
- package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js +0 -387
- package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js.map +0 -1
- package/dist/otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js.map +0 -1
- package/dist/signin-2ANR4DVS-K5VGBEJF.js +0 -556
- package/dist/signin-2ANR4DVS-K5VGBEJF.js.map +0 -1
- package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js.map +0 -1
- package/dist/src-SLOMUG7K-CV5JG263.js +0 -1408
- package/dist/src-SLOMUG7K-CV5JG263.js.map +0 -1
- package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js +0 -708
- package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js.map +0 -1
- package/dist/sts-X7JGSP4H-PDAAYDDH.js +0 -2917
- package/dist/sts-X7JGSP4H-PDAAYDDH.js.map +0 -1
- package/dist/undici-VAR2VUJI-6PAOUXZC.js +0 -23388
- package/dist/undici-VAR2VUJI-6PAOUXZC.js.map +0 -1
- /package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js.map → agentv-provider-NFFLXG5M-TJAWCWCX.js.map} +0 -0
- /package/dist/{chunk-BAUNAXHT.js.map → chunk-XOSNETAV.js.map} +0 -0
- /package/dist/{dist-VUPMLHIV.js.map → dist-L6R5HJ72.js.map} +0 -0
- /package/dist/{dist-es-SRVEB5QV-Q4CTC2HX.js.map → esm-CZAWIY6F.js.map} +0 -0
- /package/dist/{dist-es-UEEUAV34-IZQDTAMW.js.map → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js.map} +0 -0
- /package/dist/{esm-UYZ3HJBU.js.map → simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js.map} +0 -0
- /package/dist/{src-PXDA7QIS.js.map → src-ML4D2MC2.js.map} +0 -0
|
@@ -1,35 +1,37 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
3
|
HtmlWriter,
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
buildGradingArtifact,
|
|
7
|
-
buildTimingArtifact,
|
|
4
|
+
LEGACY_RESULTS_FILENAME,
|
|
5
|
+
RESULT_INDEX_FILENAME,
|
|
8
6
|
detectFileType,
|
|
9
7
|
findRepoRoot,
|
|
8
|
+
loadLightweightResults,
|
|
9
|
+
loadManifestResults,
|
|
10
10
|
loadRunCache,
|
|
11
11
|
package_default,
|
|
12
|
-
parseJsonlResults,
|
|
13
12
|
resolveEvalPaths,
|
|
13
|
+
resolveExistingRunPrimaryPath,
|
|
14
|
+
resolveResultSourcePath,
|
|
14
15
|
resolveRunCacheFile,
|
|
16
|
+
resolveWorkspaceOrFilePath,
|
|
15
17
|
runEvalCommand,
|
|
16
18
|
selectTarget,
|
|
17
19
|
toSnakeCaseDeep,
|
|
18
20
|
validateConfigFile,
|
|
19
21
|
validateEvalFile,
|
|
20
22
|
validateFileReferences,
|
|
21
|
-
validateTargetsFile
|
|
22
|
-
|
|
23
|
+
validateTargetsFile,
|
|
24
|
+
writeArtifactsFromResults
|
|
25
|
+
} from "./chunk-VLOFRXH4.js";
|
|
23
26
|
import {
|
|
24
27
|
createBuiltinRegistry,
|
|
25
|
-
createProvider,
|
|
26
28
|
executeScript,
|
|
27
|
-
generateRubrics,
|
|
28
29
|
getAgentvHome,
|
|
29
30
|
getOutputFilenames,
|
|
30
31
|
getWorkspacePoolRoot,
|
|
31
32
|
isAgentSkillsFormat,
|
|
32
33
|
loadTestById,
|
|
34
|
+
loadTestSuite,
|
|
33
35
|
loadTests,
|
|
34
36
|
normalizeLineEndings,
|
|
35
37
|
parseAgentSkillsEvals,
|
|
@@ -37,7 +39,7 @@ import {
|
|
|
37
39
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
38
40
|
transpileEvalYamlFile,
|
|
39
41
|
trimBaselineResult
|
|
40
|
-
} from "./chunk-
|
|
42
|
+
} from "./chunk-UYBLUYHN.js";
|
|
41
43
|
import {
|
|
42
44
|
__commonJS,
|
|
43
45
|
__esm,
|
|
@@ -2888,7 +2890,6 @@ function oneOf(literals) {
|
|
|
2888
2890
|
}
|
|
2889
2891
|
|
|
2890
2892
|
// src/commands/compare/index.ts
|
|
2891
|
-
import { readFileSync } from "node:fs";
|
|
2892
2893
|
var colors = {
|
|
2893
2894
|
reset: "\x1B[0m",
|
|
2894
2895
|
bold: "\x1B[1m",
|
|
@@ -2902,41 +2903,22 @@ var colors = {
|
|
|
2902
2903
|
var noColor = process.env.NO_COLOR !== void 0 || !process.stdout.isTTY;
|
|
2903
2904
|
var c = noColor ? Object.fromEntries(Object.keys(colors).map((k) => [k, ""])) : colors;
|
|
2904
2905
|
function loadJsonlResults(filePath) {
|
|
2905
|
-
|
|
2906
|
-
|
|
2907
|
-
|
|
2908
|
-
|
|
2909
|
-
const testId = record.test_id ?? record.eval_id;
|
|
2910
|
-
if (typeof testId !== "string") {
|
|
2911
|
-
throw new Error(`Missing test_id in result: ${line}`);
|
|
2912
|
-
}
|
|
2913
|
-
if (typeof record.score !== "number") {
|
|
2914
|
-
throw new Error(`Missing or invalid score in result: ${line}`);
|
|
2915
|
-
}
|
|
2916
|
-
return { testId, score: record.score };
|
|
2917
|
-
});
|
|
2906
|
+
return loadLightweightResults(resolveResultSourcePath(filePath)).map((record) => ({
|
|
2907
|
+
testId: record.testId,
|
|
2908
|
+
score: record.score
|
|
2909
|
+
}));
|
|
2918
2910
|
}
|
|
2919
2911
|
function loadCombinedResults(filePath) {
|
|
2920
|
-
const content = readFileSync(filePath, "utf8");
|
|
2921
|
-
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
2922
2912
|
const groups = /* @__PURE__ */ new Map();
|
|
2923
|
-
for (const
|
|
2924
|
-
const record = JSON.parse(line);
|
|
2925
|
-
const testId = record.test_id ?? record.eval_id;
|
|
2926
|
-
if (typeof testId !== "string") {
|
|
2927
|
-
throw new Error(`Missing test_id in result: ${line}`);
|
|
2928
|
-
}
|
|
2929
|
-
if (typeof record.score !== "number") {
|
|
2930
|
-
throw new Error(`Missing or invalid score in result: ${line}`);
|
|
2931
|
-
}
|
|
2913
|
+
for (const record of loadLightweightResults(resolveResultSourcePath(filePath))) {
|
|
2932
2914
|
if (typeof record.target !== "string") {
|
|
2933
|
-
throw new Error(`Missing target field in combined result: ${
|
|
2915
|
+
throw new Error(`Missing target field in combined result source: ${filePath}`);
|
|
2934
2916
|
}
|
|
2935
2917
|
const target = record.target;
|
|
2936
2918
|
if (!groups.has(target)) {
|
|
2937
2919
|
groups.set(target, []);
|
|
2938
2920
|
}
|
|
2939
|
-
groups.get(target)?.push({ testId, score: record.score });
|
|
2921
|
+
groups.get(target)?.push({ testId: record.testId, score: record.score });
|
|
2940
2922
|
}
|
|
2941
2923
|
return groups;
|
|
2942
2924
|
}
|
|
@@ -3303,11 +3285,11 @@ var compareCommand = command({
|
|
|
3303
3285
|
});
|
|
3304
3286
|
|
|
3305
3287
|
// src/commands/convert/index.ts
|
|
3306
|
-
import { readFileSync
|
|
3288
|
+
import { readFileSync, writeFileSync } from "node:fs";
|
|
3307
3289
|
import path from "node:path";
|
|
3308
3290
|
import { stringify as stringifyYaml } from "yaml";
|
|
3309
3291
|
async function convertJsonlToHtml(inputPath, outputPath) {
|
|
3310
|
-
const content =
|
|
3292
|
+
const content = readFileSync(inputPath, "utf8");
|
|
3311
3293
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
3312
3294
|
const writer = await HtmlWriter.open(outputPath);
|
|
3313
3295
|
for (const line of lines) {
|
|
@@ -3317,7 +3299,7 @@ async function convertJsonlToHtml(inputPath, outputPath) {
|
|
|
3317
3299
|
return lines.length;
|
|
3318
3300
|
}
|
|
3319
3301
|
function convertJsonlToYaml(inputPath, outputPath) {
|
|
3320
|
-
const content =
|
|
3302
|
+
const content = readFileSync(inputPath, "utf8");
|
|
3321
3303
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
3322
3304
|
let yamlOutput = "";
|
|
3323
3305
|
let isFirst = true;
|
|
@@ -3336,7 +3318,7 @@ function convertJsonlToYaml(inputPath, outputPath) {
|
|
|
3336
3318
|
return lines.length;
|
|
3337
3319
|
}
|
|
3338
3320
|
function convertEvalsJsonToYaml(inputPath) {
|
|
3339
|
-
const content =
|
|
3321
|
+
const content = readFileSync(inputPath, "utf8");
|
|
3340
3322
|
const parsed = JSON.parse(content);
|
|
3341
3323
|
if (!isAgentSkillsFormat(parsed)) {
|
|
3342
3324
|
throw new Error(`Not a valid Agent Skills evals.json: missing 'evals' array`);
|
|
@@ -3924,7 +3906,7 @@ var evalPromptCommand = subcommands({
|
|
|
3924
3906
|
});
|
|
3925
3907
|
|
|
3926
3908
|
// src/commands/eval/commands/assert.ts
|
|
3927
|
-
import { readFileSync as
|
|
3909
|
+
import { readFileSync as readFileSync2 } from "node:fs";
|
|
3928
3910
|
import path3 from "node:path";
|
|
3929
3911
|
import fg from "fast-glob";
|
|
3930
3912
|
var evalAssertCommand = command({
|
|
@@ -3956,7 +3938,7 @@ var evalAssertCommand = command({
|
|
|
3956
3938
|
let resolvedOutput;
|
|
3957
3939
|
let resolvedInput;
|
|
3958
3940
|
if (file) {
|
|
3959
|
-
const content = JSON.parse(
|
|
3941
|
+
const content = JSON.parse(readFileSync2(path3.resolve(file), "utf8"));
|
|
3960
3942
|
resolvedOutput = content.output ?? "";
|
|
3961
3943
|
resolvedInput = content.input ?? "";
|
|
3962
3944
|
} else {
|
|
@@ -4183,7 +4165,7 @@ var evalRunCommand = command({
|
|
|
4183
4165
|
artifacts: option({
|
|
4184
4166
|
type: optional(string),
|
|
4185
4167
|
long: "artifacts",
|
|
4186
|
-
description: "Write companion artifacts (grading
|
|
4168
|
+
description: "Write companion artifacts (index.jsonl, <test>/grading.json, <test>/timing.json, timing.json, benchmark.json) to the specified directory"
|
|
4187
4169
|
}),
|
|
4188
4170
|
graderTarget: option({
|
|
4189
4171
|
type: optional(string),
|
|
@@ -4203,7 +4185,7 @@ var evalRunCommand = command({
|
|
|
4203
4185
|
},
|
|
4204
4186
|
handler: async (args) => {
|
|
4205
4187
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4206
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4188
|
+
const { launchInteractiveWizard } = await import("./interactive-5X62YEEX.js");
|
|
4207
4189
|
await launchInteractiveWizard();
|
|
4208
4190
|
return;
|
|
4209
4191
|
}
|
|
@@ -4257,212 +4239,31 @@ var evalCommand = subcommands({
|
|
|
4257
4239
|
}
|
|
4258
4240
|
});
|
|
4259
4241
|
|
|
4260
|
-
// src/commands/generate/rubrics.ts
|
|
4261
|
-
import { readFile, writeFile as writeFile2 } from "node:fs/promises";
|
|
4262
|
-
import path4 from "node:path";
|
|
4263
|
-
import { pathToFileURL } from "node:url";
|
|
4264
|
-
import { isMap, isSeq, parseDocument } from "yaml";
|
|
4265
|
-
function isJsonObject(value) {
|
|
4266
|
-
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
4267
|
-
}
|
|
4268
|
-
function asString(value) {
|
|
4269
|
-
return typeof value === "string" ? value : void 0;
|
|
4270
|
-
}
|
|
4271
|
-
async function loadRubricGenerator() {
|
|
4272
|
-
const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
|
|
4273
|
-
if (customGenerator) {
|
|
4274
|
-
const generatorPath = path4.resolve(customGenerator);
|
|
4275
|
-
const generatorUrl = pathToFileURL(generatorPath).href;
|
|
4276
|
-
const module = await import(generatorUrl);
|
|
4277
|
-
return module.generateRubrics;
|
|
4278
|
-
}
|
|
4279
|
-
return generateRubrics;
|
|
4280
|
-
}
|
|
4281
|
-
async function generateRubricsCommand(options) {
|
|
4282
|
-
const { file, target: targetOverride, verbose } = options;
|
|
4283
|
-
console.log(`Generating rubrics for: ${file}`);
|
|
4284
|
-
const absolutePath = path4.resolve(file);
|
|
4285
|
-
const content = await readFile(absolutePath, "utf8");
|
|
4286
|
-
const doc = parseDocument(content);
|
|
4287
|
-
const parsed = doc.toJSON();
|
|
4288
|
-
if (!isJsonObject(parsed)) {
|
|
4289
|
-
throw new Error(`Invalid YAML file format: ${file}`);
|
|
4290
|
-
}
|
|
4291
|
-
const suite = parsed;
|
|
4292
|
-
const evalcases = suite.tests;
|
|
4293
|
-
if (!Array.isArray(evalcases)) {
|
|
4294
|
-
throw new Error(`No tests found in ${file}`);
|
|
4295
|
-
}
|
|
4296
|
-
const targetSelection = await selectTarget({
|
|
4297
|
-
testFilePath: absolutePath,
|
|
4298
|
-
repoRoot: process.cwd(),
|
|
4299
|
-
cwd: process.cwd(),
|
|
4300
|
-
cliTargetName: targetOverride,
|
|
4301
|
-
dryRun: false,
|
|
4302
|
-
dryRunDelay: 0,
|
|
4303
|
-
dryRunDelayMin: 0,
|
|
4304
|
-
dryRunDelayMax: 0,
|
|
4305
|
-
env: process.env
|
|
4306
|
-
});
|
|
4307
|
-
if (verbose) {
|
|
4308
|
-
console.log(`Using target: ${targetSelection.targetName}`);
|
|
4309
|
-
}
|
|
4310
|
-
const provider = createProvider(targetSelection.resolvedTarget);
|
|
4311
|
-
const generateRubricsFunc = await loadRubricGenerator();
|
|
4312
|
-
let updatedCount = 0;
|
|
4313
|
-
let skippedCount = 0;
|
|
4314
|
-
const evalcasesNode = doc.getIn(["tests"]);
|
|
4315
|
-
if (!evalcasesNode || !isSeq(evalcasesNode)) {
|
|
4316
|
-
throw new Error("tests must be a sequence");
|
|
4317
|
-
}
|
|
4318
|
-
for (let i = 0; i < evalcases.length; i++) {
|
|
4319
|
-
const rawCase = evalcases[i];
|
|
4320
|
-
if (!isJsonObject(rawCase)) {
|
|
4321
|
-
continue;
|
|
4322
|
-
}
|
|
4323
|
-
const evalCase = rawCase;
|
|
4324
|
-
const id = asString(evalCase.id) ?? "unknown";
|
|
4325
|
-
const expectedOutcome = asString(evalCase.criteria) ?? asString(evalCase.outcome);
|
|
4326
|
-
if (!expectedOutcome) {
|
|
4327
|
-
if (verbose) {
|
|
4328
|
-
console.log(` Skipping ${id}: no criteria`);
|
|
4329
|
-
}
|
|
4330
|
-
skippedCount++;
|
|
4331
|
-
continue;
|
|
4332
|
-
}
|
|
4333
|
-
if (evalCase.rubrics !== void 0) {
|
|
4334
|
-
if (verbose) {
|
|
4335
|
-
console.log(` Skipping ${id}: rubrics already defined`);
|
|
4336
|
-
}
|
|
4337
|
-
skippedCount++;
|
|
4338
|
-
continue;
|
|
4339
|
-
}
|
|
4340
|
-
console.log(` Generating rubrics for: ${id}`);
|
|
4341
|
-
const question = extractQuestion(evalCase);
|
|
4342
|
-
const referenceAnswer = asString(evalCase.reference_answer);
|
|
4343
|
-
const rubrics = await generateRubricsFunc({
|
|
4344
|
-
criteria: expectedOutcome,
|
|
4345
|
-
question,
|
|
4346
|
-
referenceAnswer,
|
|
4347
|
-
provider
|
|
4348
|
-
});
|
|
4349
|
-
const caseNode = evalcasesNode.items[i];
|
|
4350
|
-
if (caseNode && isMap(caseNode)) {
|
|
4351
|
-
caseNode.set(
|
|
4352
|
-
"rubrics",
|
|
4353
|
-
rubrics.filter((r) => r.outcome !== void 0).map((r) => ({
|
|
4354
|
-
id: r.id,
|
|
4355
|
-
outcome: r.outcome,
|
|
4356
|
-
weight: r.weight,
|
|
4357
|
-
required: r.required ?? true
|
|
4358
|
-
}))
|
|
4359
|
-
);
|
|
4360
|
-
}
|
|
4361
|
-
updatedCount++;
|
|
4362
|
-
if (verbose) {
|
|
4363
|
-
console.log(` Generated ${rubrics.length} rubric(s)`);
|
|
4364
|
-
}
|
|
4365
|
-
}
|
|
4366
|
-
if (updatedCount > 0) {
|
|
4367
|
-
const output = doc.toString();
|
|
4368
|
-
await writeFile2(absolutePath, output, "utf8");
|
|
4369
|
-
console.log(`
|
|
4370
|
-
Updated ${updatedCount} test(s) with generated rubrics`);
|
|
4371
|
-
if (skippedCount > 0) {
|
|
4372
|
-
console.log(`Skipped ${skippedCount} test(s)`);
|
|
4373
|
-
}
|
|
4374
|
-
} else {
|
|
4375
|
-
console.log("\nNo tests updated (all already have rubrics or missing criteria)");
|
|
4376
|
-
}
|
|
4377
|
-
}
|
|
4378
|
-
function extractQuestion(evalCase) {
|
|
4379
|
-
const explicitQuestion = asString(evalCase.question);
|
|
4380
|
-
if (explicitQuestion) {
|
|
4381
|
-
return explicitQuestion;
|
|
4382
|
-
}
|
|
4383
|
-
const inputMessages = evalCase.input;
|
|
4384
|
-
if (!Array.isArray(inputMessages)) {
|
|
4385
|
-
return void 0;
|
|
4386
|
-
}
|
|
4387
|
-
for (const msg of inputMessages) {
|
|
4388
|
-
if (!isJsonObject(msg)) {
|
|
4389
|
-
continue;
|
|
4390
|
-
}
|
|
4391
|
-
if (msg.role === "user" && typeof msg.content === "string") {
|
|
4392
|
-
return msg.content;
|
|
4393
|
-
}
|
|
4394
|
-
}
|
|
4395
|
-
return void 0;
|
|
4396
|
-
}
|
|
4397
|
-
|
|
4398
|
-
// src/commands/generate/index.ts
|
|
4399
|
-
var rubricsCommand = command({
|
|
4400
|
-
name: "rubrics",
|
|
4401
|
-
description: "Generate rubrics from criteria in YAML eval file",
|
|
4402
|
-
args: {
|
|
4403
|
-
file: positional({
|
|
4404
|
-
type: string,
|
|
4405
|
-
displayName: "file",
|
|
4406
|
-
description: "Path to YAML eval file"
|
|
4407
|
-
}),
|
|
4408
|
-
target: option({
|
|
4409
|
-
type: optional(string),
|
|
4410
|
-
long: "target",
|
|
4411
|
-
short: "t",
|
|
4412
|
-
description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
|
|
4413
|
-
}),
|
|
4414
|
-
verbose: flag({
|
|
4415
|
-
long: "verbose",
|
|
4416
|
-
short: "v",
|
|
4417
|
-
description: "Show detailed progress"
|
|
4418
|
-
})
|
|
4419
|
-
},
|
|
4420
|
-
handler: async ({ file, target, verbose }) => {
|
|
4421
|
-
try {
|
|
4422
|
-
await generateRubricsCommand({
|
|
4423
|
-
file,
|
|
4424
|
-
target,
|
|
4425
|
-
verbose
|
|
4426
|
-
});
|
|
4427
|
-
} catch (error) {
|
|
4428
|
-
console.error(`Error: ${error.message}`);
|
|
4429
|
-
process.exit(1);
|
|
4430
|
-
}
|
|
4431
|
-
}
|
|
4432
|
-
});
|
|
4433
|
-
var generateCommand = subcommands({
|
|
4434
|
-
name: "generate",
|
|
4435
|
-
description: "Generate evaluation artifacts",
|
|
4436
|
-
cmds: {
|
|
4437
|
-
rubrics: rubricsCommand
|
|
4438
|
-
}
|
|
4439
|
-
});
|
|
4440
|
-
|
|
4441
4242
|
// src/commands/init/index.ts
|
|
4442
4243
|
import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
|
|
4443
|
-
import
|
|
4244
|
+
import path5 from "node:path";
|
|
4444
4245
|
import * as readline from "node:readline/promises";
|
|
4445
4246
|
|
|
4446
4247
|
// src/templates/index.ts
|
|
4447
|
-
import { readFileSync as
|
|
4448
|
-
import
|
|
4248
|
+
import { readFileSync as readFileSync3, readdirSync, statSync } from "node:fs";
|
|
4249
|
+
import path4 from "node:path";
|
|
4449
4250
|
import { fileURLToPath } from "node:url";
|
|
4450
4251
|
function getAgentvTemplates() {
|
|
4451
4252
|
return getTemplatesFromDir(".agentv");
|
|
4452
4253
|
}
|
|
4453
4254
|
function getEnvExampleTemplate() {
|
|
4454
|
-
const currentDir =
|
|
4455
|
-
const templatesBase = currentDir.includes(`${
|
|
4456
|
-
const content =
|
|
4255
|
+
const currentDir = path4.dirname(fileURLToPath(import.meta.url));
|
|
4256
|
+
const templatesBase = currentDir.includes(`${path4.sep}dist`) ? path4.join(currentDir, "templates") : currentDir;
|
|
4257
|
+
const content = readFileSync3(path4.join(templatesBase, ".env.example"), "utf-8");
|
|
4457
4258
|
return { path: ".env.example", content };
|
|
4458
4259
|
}
|
|
4459
4260
|
function getTemplatesFromDir(subdir) {
|
|
4460
|
-
const currentDir =
|
|
4261
|
+
const currentDir = path4.dirname(fileURLToPath(import.meta.url));
|
|
4461
4262
|
let templatesDir;
|
|
4462
|
-
if (currentDir.includes(`${
|
|
4463
|
-
templatesDir =
|
|
4263
|
+
if (currentDir.includes(`${path4.sep}dist`)) {
|
|
4264
|
+
templatesDir = path4.join(currentDir, "templates", subdir);
|
|
4464
4265
|
} else {
|
|
4465
|
-
templatesDir =
|
|
4266
|
+
templatesDir = path4.join(currentDir, subdir);
|
|
4466
4267
|
}
|
|
4467
4268
|
return readTemplatesRecursively(templatesDir, "");
|
|
4468
4269
|
}
|
|
@@ -4470,15 +4271,15 @@ function readTemplatesRecursively(dir, relativePath) {
|
|
|
4470
4271
|
const templates = [];
|
|
4471
4272
|
const entries2 = readdirSync(dir);
|
|
4472
4273
|
for (const entry of entries2) {
|
|
4473
|
-
const fullPath =
|
|
4274
|
+
const fullPath = path4.join(dir, entry);
|
|
4474
4275
|
const stat3 = statSync(fullPath);
|
|
4475
|
-
const entryRelativePath = relativePath ?
|
|
4276
|
+
const entryRelativePath = relativePath ? path4.join(relativePath, entry) : entry;
|
|
4476
4277
|
if (stat3.isDirectory()) {
|
|
4477
4278
|
templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
|
|
4478
4279
|
} else {
|
|
4479
|
-
const content =
|
|
4280
|
+
const content = readFileSync3(fullPath, "utf-8");
|
|
4480
4281
|
templates.push({
|
|
4481
|
-
path: entryRelativePath.split(
|
|
4282
|
+
path: entryRelativePath.split(path4.sep).join("/"),
|
|
4482
4283
|
// Normalize to forward slashes
|
|
4483
4284
|
content
|
|
4484
4285
|
});
|
|
@@ -4507,22 +4308,22 @@ async function promptYesNo(message) {
|
|
|
4507
4308
|
}
|
|
4508
4309
|
}
|
|
4509
4310
|
async function initCommand(options = {}) {
|
|
4510
|
-
const targetPath =
|
|
4511
|
-
const agentvDir =
|
|
4311
|
+
const targetPath = path5.resolve(options.targetPath ?? ".");
|
|
4312
|
+
const agentvDir = path5.join(targetPath, ".agentv");
|
|
4512
4313
|
const otherAgentvTemplates = getAgentvTemplates();
|
|
4513
4314
|
const envTemplate = getEnvExampleTemplate();
|
|
4514
4315
|
const existingFiles = [];
|
|
4515
4316
|
if (envTemplate) {
|
|
4516
|
-
const envFilePath =
|
|
4317
|
+
const envFilePath = path5.join(targetPath, ".env.example");
|
|
4517
4318
|
if (existsSync(envFilePath)) {
|
|
4518
4319
|
existingFiles.push(".env.example");
|
|
4519
4320
|
}
|
|
4520
4321
|
}
|
|
4521
4322
|
if (existsSync(agentvDir)) {
|
|
4522
4323
|
for (const template of otherAgentvTemplates) {
|
|
4523
|
-
const targetFilePath =
|
|
4324
|
+
const targetFilePath = path5.join(agentvDir, template.path);
|
|
4524
4325
|
if (existsSync(targetFilePath)) {
|
|
4525
|
-
existingFiles.push(
|
|
4326
|
+
existingFiles.push(path5.relative(targetPath, targetFilePath));
|
|
4526
4327
|
}
|
|
4527
4328
|
}
|
|
4528
4329
|
}
|
|
@@ -4544,18 +4345,18 @@ async function initCommand(options = {}) {
|
|
|
4544
4345
|
mkdirSync(agentvDir, { recursive: true });
|
|
4545
4346
|
}
|
|
4546
4347
|
if (envTemplate) {
|
|
4547
|
-
const envFilePath =
|
|
4348
|
+
const envFilePath = path5.join(targetPath, ".env.example");
|
|
4548
4349
|
writeFileSync2(envFilePath, envTemplate.content, "utf-8");
|
|
4549
4350
|
console.log("Created .env.example");
|
|
4550
4351
|
}
|
|
4551
4352
|
for (const template of otherAgentvTemplates) {
|
|
4552
|
-
const targetFilePath =
|
|
4553
|
-
const targetDirPath =
|
|
4353
|
+
const targetFilePath = path5.join(agentvDir, template.path);
|
|
4354
|
+
const targetDirPath = path5.dirname(targetFilePath);
|
|
4554
4355
|
if (!existsSync(targetDirPath)) {
|
|
4555
4356
|
mkdirSync(targetDirPath, { recursive: true });
|
|
4556
4357
|
}
|
|
4557
4358
|
writeFileSync2(targetFilePath, template.content, "utf-8");
|
|
4558
|
-
console.log(`Created ${
|
|
4359
|
+
console.log(`Created ${path5.relative(targetPath, targetFilePath)}`);
|
|
4559
4360
|
}
|
|
4560
4361
|
console.log("\nAgentV initialized successfully!");
|
|
4561
4362
|
console.log("\nFiles installed to root:");
|
|
@@ -4563,7 +4364,7 @@ async function initCommand(options = {}) {
|
|
|
4563
4364
|
console.log(" - .env.example");
|
|
4564
4365
|
}
|
|
4565
4366
|
console.log(`
|
|
4566
|
-
Files installed to ${
|
|
4367
|
+
Files installed to ${path5.relative(targetPath, agentvDir)}:`);
|
|
4567
4368
|
for (const t of otherAgentvTemplates) {
|
|
4568
4369
|
console.log(` - ${t.path}`);
|
|
4569
4370
|
}
|
|
@@ -4593,13 +4394,443 @@ var initCmdTsCommand = command({
|
|
|
4593
4394
|
}
|
|
4594
4395
|
});
|
|
4595
4396
|
|
|
4397
|
+
// src/commands/pipeline/bench.ts
|
|
4398
|
+
import { readFile, readdir, writeFile as writeFile2 } from "node:fs/promises";
|
|
4399
|
+
import { join } from "node:path";
|
|
4400
|
+
var evalBenchCommand = command({
|
|
4401
|
+
name: "bench",
|
|
4402
|
+
description: "Merge evaluator scores and produce benchmark artifacts",
|
|
4403
|
+
args: {
|
|
4404
|
+
exportDir: positional({
|
|
4405
|
+
type: string,
|
|
4406
|
+
displayName: "export-dir",
|
|
4407
|
+
description: "Export directory from pipeline input/grade"
|
|
4408
|
+
})
|
|
4409
|
+
},
|
|
4410
|
+
handler: async ({ exportDir }) => {
|
|
4411
|
+
const manifest = JSON.parse(await readFile(join(exportDir, "manifest.json"), "utf8"));
|
|
4412
|
+
const testIds = manifest.test_ids;
|
|
4413
|
+
const targetName = manifest.target?.name ?? "unknown";
|
|
4414
|
+
const stdinData = await readStdin();
|
|
4415
|
+
const llmScores = stdinData ? JSON.parse(stdinData) : {};
|
|
4416
|
+
const indexLines = [];
|
|
4417
|
+
const allPassRates = [];
|
|
4418
|
+
for (const testId of testIds) {
|
|
4419
|
+
const testDir = join(exportDir, testId);
|
|
4420
|
+
const evaluators = [];
|
|
4421
|
+
const allAssertions = [];
|
|
4422
|
+
const codeResultsDir = join(testDir, "code_grader_results");
|
|
4423
|
+
try {
|
|
4424
|
+
const resultFiles = (await readdir(codeResultsDir)).filter((f) => f.endsWith(".json"));
|
|
4425
|
+
for (const file of resultFiles) {
|
|
4426
|
+
const result = JSON.parse(await readFile(join(codeResultsDir, file), "utf8"));
|
|
4427
|
+
evaluators.push({
|
|
4428
|
+
name: result.name,
|
|
4429
|
+
type: "code-grader",
|
|
4430
|
+
score: result.score,
|
|
4431
|
+
weight: result.weight ?? 1,
|
|
4432
|
+
assertions: result.assertions ?? []
|
|
4433
|
+
});
|
|
4434
|
+
for (const a of result.assertions ?? []) {
|
|
4435
|
+
allAssertions.push({ text: a.text, passed: a.passed, evidence: a.evidence ?? "" });
|
|
4436
|
+
}
|
|
4437
|
+
}
|
|
4438
|
+
} catch {
|
|
4439
|
+
}
|
|
4440
|
+
const testLlmScores = llmScores[testId] ?? {};
|
|
4441
|
+
const llmGradersDir = join(testDir, "llm_graders");
|
|
4442
|
+
try {
|
|
4443
|
+
const graderFiles = (await readdir(llmGradersDir)).filter((f) => f.endsWith(".json"));
|
|
4444
|
+
for (const file of graderFiles) {
|
|
4445
|
+
const graderMeta = JSON.parse(await readFile(join(llmGradersDir, file), "utf8"));
|
|
4446
|
+
const graderName = graderMeta.name;
|
|
4447
|
+
const llmResult = testLlmScores[graderName];
|
|
4448
|
+
if (llmResult) {
|
|
4449
|
+
evaluators.push({
|
|
4450
|
+
name: graderName,
|
|
4451
|
+
type: "llm-grader",
|
|
4452
|
+
score: llmResult.score,
|
|
4453
|
+
weight: graderMeta.weight ?? 1,
|
|
4454
|
+
assertions: llmResult.assertions ?? []
|
|
4455
|
+
});
|
|
4456
|
+
for (const a of llmResult.assertions ?? []) {
|
|
4457
|
+
allAssertions.push({ text: a.text, passed: a.passed, evidence: a.evidence ?? "" });
|
|
4458
|
+
}
|
|
4459
|
+
}
|
|
4460
|
+
}
|
|
4461
|
+
} catch {
|
|
4462
|
+
}
|
|
4463
|
+
const totalWeight = evaluators.reduce((sum, e) => sum + e.weight, 0);
|
|
4464
|
+
const weightedScore = totalWeight > 0 ? evaluators.reduce((sum, e) => sum + e.score * e.weight, 0) / totalWeight : 0;
|
|
4465
|
+
const passed = allAssertions.filter((a) => a.passed).length;
|
|
4466
|
+
const failed = allAssertions.filter((a) => !a.passed).length;
|
|
4467
|
+
const passRate = allAssertions.length > 0 ? Math.round(passed / allAssertions.length * 1e3) / 1e3 : 0;
|
|
4468
|
+
allPassRates.push(passRate);
|
|
4469
|
+
const grading = {
|
|
4470
|
+
assertions: allAssertions,
|
|
4471
|
+
summary: { passed, failed, total: allAssertions.length, pass_rate: passRate },
|
|
4472
|
+
execution_metrics: { tool_calls: {}, total_tool_calls: 0, errors_encountered: 0 },
|
|
4473
|
+
evaluators: evaluators.map((e) => ({
|
|
4474
|
+
name: e.name,
|
|
4475
|
+
type: e.type,
|
|
4476
|
+
score: e.score,
|
|
4477
|
+
reasoning: "",
|
|
4478
|
+
weight: e.weight
|
|
4479
|
+
}))
|
|
4480
|
+
};
|
|
4481
|
+
await writeFile2(
|
|
4482
|
+
join(testDir, "grading.json"),
|
|
4483
|
+
`${JSON.stringify(grading, null, 2)}
|
|
4484
|
+
`,
|
|
4485
|
+
"utf8"
|
|
4486
|
+
);
|
|
4487
|
+
indexLines.push(
|
|
4488
|
+
JSON.stringify({
|
|
4489
|
+
timestamp: manifest.timestamp,
|
|
4490
|
+
test_id: testId,
|
|
4491
|
+
score: Math.round(weightedScore * 1e3) / 1e3,
|
|
4492
|
+
target: targetName,
|
|
4493
|
+
grading_path: `${testId}/grading.json`,
|
|
4494
|
+
timing_path: `${testId}/timing.json`
|
|
4495
|
+
})
|
|
4496
|
+
);
|
|
4497
|
+
}
|
|
4498
|
+
await writeFile2(
|
|
4499
|
+
join(exportDir, "index.jsonl"),
|
|
4500
|
+
indexLines.length > 0 ? `${indexLines.join("\n")}
|
|
4501
|
+
` : "",
|
|
4502
|
+
"utf8"
|
|
4503
|
+
);
|
|
4504
|
+
const passRateStats = computeStats(allPassRates);
|
|
4505
|
+
const benchmark = {
|
|
4506
|
+
metadata: {
|
|
4507
|
+
eval_file: manifest.eval_file,
|
|
4508
|
+
timestamp: manifest.timestamp,
|
|
4509
|
+
targets: [targetName],
|
|
4510
|
+
tests_run: testIds
|
|
4511
|
+
},
|
|
4512
|
+
run_summary: {
|
|
4513
|
+
[targetName]: {
|
|
4514
|
+
pass_rate: passRateStats,
|
|
4515
|
+
time_seconds: { mean: 0, stddev: 0 },
|
|
4516
|
+
tokens: { mean: 0, stddev: 0 }
|
|
4517
|
+
}
|
|
4518
|
+
},
|
|
4519
|
+
notes: []
|
|
4520
|
+
};
|
|
4521
|
+
await writeFile2(
|
|
4522
|
+
join(exportDir, "benchmark.json"),
|
|
4523
|
+
`${JSON.stringify(benchmark, null, 2)}
|
|
4524
|
+
`,
|
|
4525
|
+
"utf8"
|
|
4526
|
+
);
|
|
4527
|
+
console.log(`Benchmark: ${testIds.length} test(s), pass_rate=${passRateStats.mean}`);
|
|
4528
|
+
}
|
|
4529
|
+
});
|
|
4530
|
+
async function readStdin() {
|
|
4531
|
+
const chunks = [];
|
|
4532
|
+
for await (const chunk of process.stdin) {
|
|
4533
|
+
chunks.push(chunk);
|
|
4534
|
+
}
|
|
4535
|
+
return Buffer.concat(chunks).toString("utf8").trim();
|
|
4536
|
+
}
|
|
4537
|
+
function computeStats(values) {
|
|
4538
|
+
if (values.length === 0) return { mean: 0, stddev: 0 };
|
|
4539
|
+
const mean2 = values.reduce((sum, v) => sum + v, 0) / values.length;
|
|
4540
|
+
const variance = values.reduce((sum, v) => sum + (v - mean2) ** 2, 0) / values.length;
|
|
4541
|
+
return {
|
|
4542
|
+
mean: Math.round(mean2 * 1e3) / 1e3,
|
|
4543
|
+
stddev: Math.round(Math.sqrt(variance) * 1e3) / 1e3
|
|
4544
|
+
};
|
|
4545
|
+
}
|
|
4546
|
+
|
|
4547
|
+
// src/commands/pipeline/grade.ts
|
|
4548
|
+
import { mkdir as mkdir2, readFile as readFile2, readdir as readdir2, writeFile as writeFile3 } from "node:fs/promises";
|
|
4549
|
+
import { join as join2 } from "node:path";
|
|
4550
|
+
var evalGradeCommand = command({
|
|
4551
|
+
name: "grade",
|
|
4552
|
+
description: "Run code-grader assertions on responses in an export directory",
|
|
4553
|
+
args: {
|
|
4554
|
+
exportDir: positional({
|
|
4555
|
+
type: string,
|
|
4556
|
+
displayName: "export-dir",
|
|
4557
|
+
description: "Export directory from pipeline input"
|
|
4558
|
+
})
|
|
4559
|
+
},
|
|
4560
|
+
handler: async ({ exportDir }) => {
|
|
4561
|
+
const manifestPath = join2(exportDir, "manifest.json");
|
|
4562
|
+
const manifest = JSON.parse(await readFile2(manifestPath, "utf8"));
|
|
4563
|
+
const testIds = manifest.test_ids;
|
|
4564
|
+
let totalGraders = 0;
|
|
4565
|
+
let totalPassed = 0;
|
|
4566
|
+
for (const testId of testIds) {
|
|
4567
|
+
const testDir = join2(exportDir, testId);
|
|
4568
|
+
const codeGradersDir = join2(testDir, "code_graders");
|
|
4569
|
+
const resultsDir = join2(testDir, "code_grader_results");
|
|
4570
|
+
let graderFiles;
|
|
4571
|
+
try {
|
|
4572
|
+
graderFiles = (await readdir2(codeGradersDir)).filter((f) => f.endsWith(".json"));
|
|
4573
|
+
} catch {
|
|
4574
|
+
continue;
|
|
4575
|
+
}
|
|
4576
|
+
if (graderFiles.length === 0) continue;
|
|
4577
|
+
await mkdir2(resultsDir, { recursive: true });
|
|
4578
|
+
const responseText = await readFile2(join2(testDir, "response.md"), "utf8");
|
|
4579
|
+
const inputData = JSON.parse(await readFile2(join2(testDir, "input.json"), "utf8"));
|
|
4580
|
+
for (const graderFile of graderFiles) {
|
|
4581
|
+
const graderConfig = JSON.parse(await readFile2(join2(codeGradersDir, graderFile), "utf8"));
|
|
4582
|
+
const graderName = graderConfig.name;
|
|
4583
|
+
const payload = JSON.stringify({
|
|
4584
|
+
output: [{ role: "assistant", content: responseText }],
|
|
4585
|
+
input: inputData.input_messages,
|
|
4586
|
+
question: inputData.input_text,
|
|
4587
|
+
criteria: "",
|
|
4588
|
+
expected_output: [],
|
|
4589
|
+
reference_answer: "",
|
|
4590
|
+
input_files: [],
|
|
4591
|
+
trace: null,
|
|
4592
|
+
token_usage: null,
|
|
4593
|
+
cost_usd: null,
|
|
4594
|
+
duration_ms: null,
|
|
4595
|
+
start_time: null,
|
|
4596
|
+
end_time: null,
|
|
4597
|
+
file_changes: null,
|
|
4598
|
+
workspace_path: null,
|
|
4599
|
+
config: graderConfig.config ?? null,
|
|
4600
|
+
metadata: {},
|
|
4601
|
+
input_text: inputData.input_text,
|
|
4602
|
+
output_text: responseText,
|
|
4603
|
+
expected_output_text: ""
|
|
4604
|
+
});
|
|
4605
|
+
try {
|
|
4606
|
+
const stdout = await executeScript(
|
|
4607
|
+
graderConfig.command,
|
|
4608
|
+
payload,
|
|
4609
|
+
void 0,
|
|
4610
|
+
graderConfig.cwd
|
|
4611
|
+
);
|
|
4612
|
+
const parsed = JSON.parse(stdout);
|
|
4613
|
+
const score = typeof parsed.score === "number" ? parsed.score : 0;
|
|
4614
|
+
const assertions = Array.isArray(parsed.assertions) ? parsed.assertions : [];
|
|
4615
|
+
const result = {
|
|
4616
|
+
name: graderName,
|
|
4617
|
+
type: "code-grader",
|
|
4618
|
+
score,
|
|
4619
|
+
weight: graderConfig.weight ?? 1,
|
|
4620
|
+
assertions,
|
|
4621
|
+
details: parsed.details ?? {}
|
|
4622
|
+
};
|
|
4623
|
+
await writeFile3(
|
|
4624
|
+
join2(resultsDir, `${graderName}.json`),
|
|
4625
|
+
`${JSON.stringify(result, null, 2)}
|
|
4626
|
+
`,
|
|
4627
|
+
"utf8"
|
|
4628
|
+
);
|
|
4629
|
+
totalGraders++;
|
|
4630
|
+
if (score >= 0.5) totalPassed++;
|
|
4631
|
+
} catch (error) {
|
|
4632
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4633
|
+
console.error(` ${testId}/${graderName}: ERROR \u2014 ${message}`);
|
|
4634
|
+
const errorResult = {
|
|
4635
|
+
name: graderName,
|
|
4636
|
+
type: "code-grader",
|
|
4637
|
+
score: 0,
|
|
4638
|
+
weight: graderConfig.weight ?? 1,
|
|
4639
|
+
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
4640
|
+
details: { error: message }
|
|
4641
|
+
};
|
|
4642
|
+
await writeFile3(
|
|
4643
|
+
join2(resultsDir, `${graderName}.json`),
|
|
4644
|
+
`${JSON.stringify(errorResult, null, 2)}
|
|
4645
|
+
`,
|
|
4646
|
+
"utf8"
|
|
4647
|
+
);
|
|
4648
|
+
totalGraders++;
|
|
4649
|
+
}
|
|
4650
|
+
}
|
|
4651
|
+
}
|
|
4652
|
+
console.log(`Graded ${totalGraders} code-grader(s): ${totalPassed} passed`);
|
|
4653
|
+
}
|
|
4654
|
+
});
|
|
4655
|
+
|
|
4656
|
+
// src/commands/pipeline/input.ts
|
|
4657
|
+
import { readFile as readFile3 } from "node:fs/promises";
|
|
4658
|
+
import { mkdir as mkdir3, writeFile as writeFile4 } from "node:fs/promises";
|
|
4659
|
+
import { dirname, join as join3, resolve } from "node:path";
|
|
4660
|
+
var evalInputCommand = command({
|
|
4661
|
+
name: "input",
|
|
4662
|
+
description: "Extract eval inputs, target commands, and grader prompts for agent-mode runs",
|
|
4663
|
+
args: {
|
|
4664
|
+
evalPath: positional({
|
|
4665
|
+
type: string,
|
|
4666
|
+
displayName: "eval-path",
|
|
4667
|
+
description: "Path to eval YAML file"
|
|
4668
|
+
}),
|
|
4669
|
+
out: option({
|
|
4670
|
+
type: string,
|
|
4671
|
+
long: "out",
|
|
4672
|
+
description: "Output directory for extracted inputs"
|
|
4673
|
+
})
|
|
4674
|
+
},
|
|
4675
|
+
handler: async ({ evalPath, out }) => {
|
|
4676
|
+
const resolvedEvalPath = resolve(evalPath);
|
|
4677
|
+
const outDir = resolve(out);
|
|
4678
|
+
const repoRoot = await findRepoRoot(dirname(resolvedEvalPath));
|
|
4679
|
+
const evalDir = dirname(resolvedEvalPath);
|
|
4680
|
+
const suite = await loadTestSuite(resolvedEvalPath, repoRoot);
|
|
4681
|
+
const tests = suite.tests;
|
|
4682
|
+
if (tests.length === 0) {
|
|
4683
|
+
console.error("No tests found in eval file.");
|
|
4684
|
+
process.exit(1);
|
|
4685
|
+
}
|
|
4686
|
+
let targetInfo = null;
|
|
4687
|
+
let targetName = "agent";
|
|
4688
|
+
let targetKind = "agent";
|
|
4689
|
+
try {
|
|
4690
|
+
const selection = await selectTarget({
|
|
4691
|
+
testFilePath: resolvedEvalPath,
|
|
4692
|
+
repoRoot,
|
|
4693
|
+
cwd: evalDir,
|
|
4694
|
+
dryRun: false,
|
|
4695
|
+
dryRunDelay: 0,
|
|
4696
|
+
dryRunDelayMin: 0,
|
|
4697
|
+
dryRunDelayMax: 0,
|
|
4698
|
+
env: process.env
|
|
4699
|
+
});
|
|
4700
|
+
targetName = selection.targetName;
|
|
4701
|
+
if (selection.resolvedTarget.kind === "cli") {
|
|
4702
|
+
targetKind = "cli";
|
|
4703
|
+
const config = selection.resolvedTarget.config;
|
|
4704
|
+
targetInfo = {
|
|
4705
|
+
kind: "cli",
|
|
4706
|
+
command: config.command,
|
|
4707
|
+
cwd: config.cwd ?? evalDir,
|
|
4708
|
+
timeoutMs: config.timeoutMs ?? 3e4
|
|
4709
|
+
};
|
|
4710
|
+
}
|
|
4711
|
+
} catch {
|
|
4712
|
+
}
|
|
4713
|
+
const testIds = [];
|
|
4714
|
+
for (const test of tests) {
|
|
4715
|
+
const testDir = join3(outDir, test.id);
|
|
4716
|
+
await mkdir3(testDir, { recursive: true });
|
|
4717
|
+
testIds.push(test.id);
|
|
4718
|
+
const inputText = test.question;
|
|
4719
|
+
const inputMessages = test.input.map((m) => ({
|
|
4720
|
+
role: m.role,
|
|
4721
|
+
content: typeof m.content === "string" ? m.content : m.content
|
|
4722
|
+
}));
|
|
4723
|
+
await writeJson(join3(testDir, "input.json"), {
|
|
4724
|
+
input_text: inputText,
|
|
4725
|
+
input_messages: inputMessages,
|
|
4726
|
+
file_paths: test.file_paths,
|
|
4727
|
+
metadata: test.metadata ?? {}
|
|
4728
|
+
});
|
|
4729
|
+
if (targetInfo) {
|
|
4730
|
+
await writeJson(join3(testDir, "invoke.json"), {
|
|
4731
|
+
kind: "cli",
|
|
4732
|
+
command: targetInfo.command,
|
|
4733
|
+
cwd: targetInfo.cwd,
|
|
4734
|
+
timeout_ms: targetInfo.timeoutMs,
|
|
4735
|
+
env: {}
|
|
4736
|
+
});
|
|
4737
|
+
} else {
|
|
4738
|
+
await writeJson(join3(testDir, "invoke.json"), {
|
|
4739
|
+
kind: "agent",
|
|
4740
|
+
instructions: "Execute this task in the current workspace. The agent IS the target."
|
|
4741
|
+
});
|
|
4742
|
+
}
|
|
4743
|
+
await writeFile4(join3(testDir, "criteria.md"), test.criteria ?? "", "utf8");
|
|
4744
|
+
if (test.expected_output.length > 0 || test.reference_answer !== void 0 && test.reference_answer !== "") {
|
|
4745
|
+
await writeJson(join3(testDir, "expected_output.json"), {
|
|
4746
|
+
expected_output: test.expected_output,
|
|
4747
|
+
reference_answer: test.reference_answer ?? ""
|
|
4748
|
+
});
|
|
4749
|
+
}
|
|
4750
|
+
await writeGraderConfigs(testDir, test.assertions ?? [], evalDir);
|
|
4751
|
+
}
|
|
4752
|
+
await writeJson(join3(outDir, "manifest.json"), {
|
|
4753
|
+
eval_file: resolvedEvalPath,
|
|
4754
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
4755
|
+
target: {
|
|
4756
|
+
name: targetName,
|
|
4757
|
+
kind: targetKind
|
|
4758
|
+
},
|
|
4759
|
+
test_ids: testIds
|
|
4760
|
+
});
|
|
4761
|
+
console.log(`Extracted ${testIds.length} test(s) to ${outDir}`);
|
|
4762
|
+
}
|
|
4763
|
+
});
|
|
4764
|
+
async function writeGraderConfigs(testDir, assertions, evalDir) {
|
|
4765
|
+
const codeGradersDir = join3(testDir, "code_graders");
|
|
4766
|
+
const llmGradersDir = join3(testDir, "llm_graders");
|
|
4767
|
+
let hasCodeGraders = false;
|
|
4768
|
+
let hasLlmGraders = false;
|
|
4769
|
+
for (const assertion of assertions) {
|
|
4770
|
+
if (assertion.type === "code-grader" || assertion.type === "code-judge") {
|
|
4771
|
+
if (!hasCodeGraders) {
|
|
4772
|
+
await mkdir3(codeGradersDir, { recursive: true });
|
|
4773
|
+
hasCodeGraders = true;
|
|
4774
|
+
}
|
|
4775
|
+
const config = assertion;
|
|
4776
|
+
await writeJson(join3(codeGradersDir, `${config.name}.json`), {
|
|
4777
|
+
name: config.name,
|
|
4778
|
+
command: config.command,
|
|
4779
|
+
cwd: config.resolvedCwd ?? config.cwd ?? evalDir,
|
|
4780
|
+
weight: config.weight ?? 1,
|
|
4781
|
+
config: config.config ?? {}
|
|
4782
|
+
});
|
|
4783
|
+
} else if (assertion.type === "llm-grader" || assertion.type === "llm-judge") {
|
|
4784
|
+
if (!hasLlmGraders) {
|
|
4785
|
+
await mkdir3(llmGradersDir, { recursive: true });
|
|
4786
|
+
hasLlmGraders = true;
|
|
4787
|
+
}
|
|
4788
|
+
const config = assertion;
|
|
4789
|
+
let promptContent = "";
|
|
4790
|
+
if (config.resolvedPromptPath) {
|
|
4791
|
+
try {
|
|
4792
|
+
promptContent = await readFile3(config.resolvedPromptPath, "utf8");
|
|
4793
|
+
} catch {
|
|
4794
|
+
promptContent = typeof config.prompt === "string" ? config.prompt : "";
|
|
4795
|
+
}
|
|
4796
|
+
} else if (typeof config.prompt === "string") {
|
|
4797
|
+
promptContent = config.prompt;
|
|
4798
|
+
}
|
|
4799
|
+
await writeJson(join3(llmGradersDir, `${config.name}.json`), {
|
|
4800
|
+
name: config.name,
|
|
4801
|
+
prompt_content: promptContent,
|
|
4802
|
+
weight: config.weight ?? 1,
|
|
4803
|
+
threshold: 0.5,
|
|
4804
|
+
config: {}
|
|
4805
|
+
});
|
|
4806
|
+
}
|
|
4807
|
+
}
|
|
4808
|
+
}
|
|
4809
|
+
async function writeJson(filePath, data) {
|
|
4810
|
+
await writeFile4(filePath, `${JSON.stringify(data, null, 2)}
|
|
4811
|
+
`, "utf8");
|
|
4812
|
+
}
|
|
4813
|
+
|
|
4814
|
+
// src/commands/pipeline/index.ts
|
|
4815
|
+
var pipelineCommand = subcommands({
|
|
4816
|
+
name: "pipeline",
|
|
4817
|
+
description: "Agent-mode eval pipeline commands (input \u2192 grade \u2192 bench)",
|
|
4818
|
+
cmds: {
|
|
4819
|
+
input: evalInputCommand,
|
|
4820
|
+
grade: evalGradeCommand,
|
|
4821
|
+
bench: evalBenchCommand
|
|
4822
|
+
}
|
|
4823
|
+
});
|
|
4824
|
+
|
|
4596
4825
|
// src/commands/results/export.ts
|
|
4597
|
-
import
|
|
4598
|
-
|
|
4826
|
+
import path7 from "node:path";
|
|
4827
|
+
|
|
4828
|
+
// src/commands/results/shared.ts
|
|
4829
|
+
import { existsSync as existsSync2 } from "node:fs";
|
|
4599
4830
|
|
|
4600
4831
|
// src/commands/trace/utils.ts
|
|
4601
|
-
import { readFileSync as
|
|
4602
|
-
import
|
|
4832
|
+
import { readFileSync as readFileSync4, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
|
|
4833
|
+
import path6 from "node:path";
|
|
4603
4834
|
var colors2 = {
|
|
4604
4835
|
reset: "\x1B[0m",
|
|
4605
4836
|
bold: "\x1B[1m",
|
|
@@ -4625,7 +4856,26 @@ function padLeft2(str, len) {
|
|
|
4625
4856
|
return " ".repeat(Math.max(0, len - plainLen)) + str;
|
|
4626
4857
|
}
|
|
4627
4858
|
function loadResultFile(filePath) {
|
|
4628
|
-
const
|
|
4859
|
+
const resolvedFilePath = resolveTraceResultPath(filePath);
|
|
4860
|
+
if (path6.extname(resolvedFilePath) === ".json") {
|
|
4861
|
+
return loadOtlpTraceFile(resolvedFilePath);
|
|
4862
|
+
}
|
|
4863
|
+
if (path6.basename(resolvedFilePath) === RESULT_INDEX_FILENAME) {
|
|
4864
|
+
return loadManifestAsRawResults(resolvedFilePath);
|
|
4865
|
+
}
|
|
4866
|
+
return loadJsonlRecords(resolvedFilePath);
|
|
4867
|
+
}
|
|
4868
|
+
function resolveTraceResultPath(filePath) {
|
|
4869
|
+
if (path6.basename(filePath) === LEGACY_RESULTS_FILENAME) {
|
|
4870
|
+
return filePath;
|
|
4871
|
+
}
|
|
4872
|
+
if (!filePath.endsWith(".jsonl") && !filePath.endsWith(".json")) {
|
|
4873
|
+
return resolveWorkspaceOrFilePath(filePath);
|
|
4874
|
+
}
|
|
4875
|
+
return resolveWorkspaceOrFilePath(filePath);
|
|
4876
|
+
}
|
|
4877
|
+
function loadJsonlRecords(filePath) {
|
|
4878
|
+
const content = readFileSync4(filePath, "utf8");
|
|
4629
4879
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
4630
4880
|
return lines.map((line, i) => {
|
|
4631
4881
|
const record = JSON.parse(line);
|
|
@@ -4635,25 +4885,274 @@ function loadResultFile(filePath) {
|
|
|
4635
4885
|
return record;
|
|
4636
4886
|
});
|
|
4637
4887
|
}
|
|
4888
|
+
function loadManifestAsRawResults(filePath) {
|
|
4889
|
+
return loadManifestResults(filePath).map(toRawResult);
|
|
4890
|
+
}
|
|
4891
|
+
function toRawResult(result) {
|
|
4892
|
+
return {
|
|
4893
|
+
timestamp: result.timestamp,
|
|
4894
|
+
test_id: result.testId,
|
|
4895
|
+
eval_set: result.eval_set,
|
|
4896
|
+
conversation_id: result.conversationId,
|
|
4897
|
+
score: result.score,
|
|
4898
|
+
assertions: result.assertions?.map((assertion) => ({
|
|
4899
|
+
text: assertion.text,
|
|
4900
|
+
passed: assertion.passed,
|
|
4901
|
+
evidence: assertion.evidence
|
|
4902
|
+
})),
|
|
4903
|
+
target: result.target,
|
|
4904
|
+
error: result.error,
|
|
4905
|
+
scores: result.scores?.map((score) => ({
|
|
4906
|
+
name: score.name,
|
|
4907
|
+
type: score.type,
|
|
4908
|
+
score: score.score,
|
|
4909
|
+
assertions: score.assertions?.map((assertion) => ({
|
|
4910
|
+
text: assertion.text,
|
|
4911
|
+
passed: assertion.passed,
|
|
4912
|
+
evidence: assertion.evidence
|
|
4913
|
+
})),
|
|
4914
|
+
weight: score.weight
|
|
4915
|
+
})),
|
|
4916
|
+
token_usage: result.tokenUsage ? {
|
|
4917
|
+
input: result.tokenUsage.input,
|
|
4918
|
+
output: result.tokenUsage.output,
|
|
4919
|
+
cached: result.tokenUsage.cached
|
|
4920
|
+
} : void 0,
|
|
4921
|
+
cost_usd: result.costUsd,
|
|
4922
|
+
duration_ms: result.durationMs,
|
|
4923
|
+
start_time: result.startTime,
|
|
4924
|
+
end_time: result.endTime,
|
|
4925
|
+
input: result.input,
|
|
4926
|
+
output: result.output,
|
|
4927
|
+
file_changes: result.fileChanges
|
|
4928
|
+
};
|
|
4929
|
+
}
|
|
4930
|
+
function loadOtlpTraceFile(filePath) {
|
|
4931
|
+
const parsed = JSON.parse(readFileSync4(filePath, "utf8"));
|
|
4932
|
+
const spans = parsed.resourceSpans?.flatMap((resource) => resource.scopeSpans ?? []).flatMap((scope) => scope.spans ?? []);
|
|
4933
|
+
if (!spans || spans.length === 0) {
|
|
4934
|
+
return [];
|
|
4935
|
+
}
|
|
4936
|
+
const spanMap = /* @__PURE__ */ new Map();
|
|
4937
|
+
const childMap = /* @__PURE__ */ new Map();
|
|
4938
|
+
for (const span of spans) {
|
|
4939
|
+
if (!span.spanId) continue;
|
|
4940
|
+
spanMap.set(span.spanId, span);
|
|
4941
|
+
if (span.parentSpanId) {
|
|
4942
|
+
const siblings = childMap.get(span.parentSpanId) ?? [];
|
|
4943
|
+
siblings.push(span);
|
|
4944
|
+
childMap.set(span.parentSpanId, siblings);
|
|
4945
|
+
}
|
|
4946
|
+
}
|
|
4947
|
+
const roots = spans.filter((span) => !span.parentSpanId || !spanMap.has(span.parentSpanId));
|
|
4948
|
+
return roots.map((root, index) => {
|
|
4949
|
+
const descendants = collectChildSpans(root.spanId, childMap);
|
|
4950
|
+
const rootAttrs = parseOtlpAttributes(root.attributes);
|
|
4951
|
+
const parsedDescendants = descendants.map((span) => ({
|
|
4952
|
+
...span,
|
|
4953
|
+
parsedAttributes: parseOtlpAttributes(span.attributes)
|
|
4954
|
+
}));
|
|
4955
|
+
const toolSpans = parsedDescendants.filter(
|
|
4956
|
+
(span) => typeof span.parsedAttributes.gen_ai_tool_name === "string"
|
|
4957
|
+
);
|
|
4958
|
+
const llmSpans = parsedDescendants.filter(
|
|
4959
|
+
(span) => span.parsedAttributes.gen_ai_operation_name === "chat" || typeof span.name === "string" && span.name.startsWith("chat ")
|
|
4960
|
+
);
|
|
4961
|
+
const tokenUsage = descendants.reduce(
|
|
4962
|
+
(acc, span) => {
|
|
4963
|
+
const attrs = parseOtlpAttributes(span.attributes);
|
|
4964
|
+
acc.input += numberAttr(attrs.gen_ai_usage_input_tokens) ?? 0;
|
|
4965
|
+
acc.output += numberAttr(attrs.gen_ai_usage_output_tokens) ?? 0;
|
|
4966
|
+
const cached = numberAttr(attrs.gen_ai_usage_cache_read_input_tokens);
|
|
4967
|
+
if (cached !== void 0 && cached > 0) {
|
|
4968
|
+
acc.cached = (acc.cached ?? 0) + cached;
|
|
4969
|
+
}
|
|
4970
|
+
return acc;
|
|
4971
|
+
},
|
|
4972
|
+
{ input: 0, output: 0, cached: void 0 }
|
|
4973
|
+
);
|
|
4974
|
+
const traceSummary = buildDerivedTraceSummary({
|
|
4975
|
+
trace: {
|
|
4976
|
+
event_count: numberAttr(rootAttrs.agentv_trace_event_count) ?? (toolSpans.length > 0 ? toolSpans.length : void 0),
|
|
4977
|
+
tool_calls: countRawSpanNames(
|
|
4978
|
+
toolSpans.map((span) => ({
|
|
4979
|
+
type: "tool",
|
|
4980
|
+
name: String(span.parsedAttributes.gen_ai_tool_name)
|
|
4981
|
+
}))
|
|
4982
|
+
),
|
|
4983
|
+
error_count: descendants.filter((span) => span.status?.code === 2).length || void 0,
|
|
4984
|
+
llm_call_count: numberAttr(rootAttrs.agentv_trace_llm_call_count) ?? (llmSpans.length > 0 ? llmSpans.length : void 0)
|
|
4985
|
+
},
|
|
4986
|
+
spans: [
|
|
4987
|
+
...llmSpans.map((span) => ({
|
|
4988
|
+
type: "llm",
|
|
4989
|
+
name: span.name ?? "chat",
|
|
4990
|
+
duration_ms: durationFromSpan(span)
|
|
4991
|
+
})),
|
|
4992
|
+
...toolSpans.map((span) => ({
|
|
4993
|
+
type: "tool",
|
|
4994
|
+
name: String(span.parsedAttributes.gen_ai_tool_name),
|
|
4995
|
+
duration_ms: durationFromSpan(span)
|
|
4996
|
+
}))
|
|
4997
|
+
],
|
|
4998
|
+
duration_ms: numberAttr(rootAttrs.agentv_trace_duration_ms) ?? durationFromSpan(root),
|
|
4999
|
+
cost_usd: numberAttr(rootAttrs.agentv_trace_cost_usd),
|
|
5000
|
+
token_usage: tokenUsage.input || tokenUsage.output || tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_input) || numberAttr(rootAttrs.agentv_trace_token_output) || numberAttr(rootAttrs.agentv_trace_token_cached) ? {
|
|
5001
|
+
input: tokenUsage.input || numberAttr(rootAttrs.agentv_trace_token_input) || 0,
|
|
5002
|
+
output: tokenUsage.output || numberAttr(rootAttrs.agentv_trace_token_output) || 0,
|
|
5003
|
+
...tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_cached) ? {
|
|
5004
|
+
cached: tokenUsage.cached || numberAttr(rootAttrs.agentv_trace_token_cached) || 0
|
|
5005
|
+
} : {}
|
|
5006
|
+
} : void 0
|
|
5007
|
+
});
|
|
5008
|
+
const score = numberAttr(rootAttrs.agentv_score);
|
|
5009
|
+
if (score === void 0) {
|
|
5010
|
+
throw new Error(
|
|
5011
|
+
`Unsupported OTLP trace root span at index ${index + 1}: missing agentv.score attribute`
|
|
5012
|
+
);
|
|
5013
|
+
}
|
|
5014
|
+
return {
|
|
5015
|
+
test_id: stringAttr(rootAttrs.agentv_test_id) ?? stringAttr(rootAttrs.agentv_eval_id) ?? `trace-${index + 1}`,
|
|
5016
|
+
eval_set: stringAttr(rootAttrs.agentv_eval_set),
|
|
5017
|
+
target: stringAttr(rootAttrs.agentv_target),
|
|
5018
|
+
score,
|
|
5019
|
+
error: root.status?.code === 2 ? root.status.message : void 0,
|
|
5020
|
+
cost_usd: traceSummary?.cost_usd,
|
|
5021
|
+
duration_ms: traceSummary?.duration_ms,
|
|
5022
|
+
token_usage: traceSummary?.token_usage,
|
|
5023
|
+
trace: traceSummary ? {
|
|
5024
|
+
event_count: traceSummary.event_count,
|
|
5025
|
+
tool_calls: traceSummary.tool_calls,
|
|
5026
|
+
error_count: traceSummary.error_count,
|
|
5027
|
+
tool_durations: traceSummary.tool_durations,
|
|
5028
|
+
llm_call_count: traceSummary.llm_call_count,
|
|
5029
|
+
token_usage: traceSummary.token_usage,
|
|
5030
|
+
cost_usd: traceSummary.cost_usd,
|
|
5031
|
+
duration_ms: traceSummary.duration_ms
|
|
5032
|
+
} : void 0,
|
|
5033
|
+
spans: traceSummary?.spans,
|
|
5034
|
+
output: stringAttr(rootAttrs.agentv_output_text),
|
|
5035
|
+
scores: root.events?.filter((event) => event.name?.startsWith("agentv.evaluator.")).map((event) => {
|
|
5036
|
+
const attrs = parseOtlpAttributes(event.attributes);
|
|
5037
|
+
const name = event.name?.replace(/^agentv\.evaluator\./, "") ?? "unknown";
|
|
5038
|
+
return {
|
|
5039
|
+
name,
|
|
5040
|
+
type: stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
|
|
5041
|
+
score: numberAttr(attrs.agentv_evaluator_score) ?? 0
|
|
5042
|
+
};
|
|
5043
|
+
})
|
|
5044
|
+
};
|
|
5045
|
+
});
|
|
5046
|
+
}
|
|
5047
|
+
function collectChildSpans(spanId, childMap) {
|
|
5048
|
+
if (!spanId) return [];
|
|
5049
|
+
const direct = childMap.get(spanId) ?? [];
|
|
5050
|
+
const all = [...direct];
|
|
5051
|
+
for (const child of direct) {
|
|
5052
|
+
all.push(...collectChildSpans(child.spanId, childMap));
|
|
5053
|
+
}
|
|
5054
|
+
return all;
|
|
5055
|
+
}
|
|
5056
|
+
function parseOtlpAttributes(attributes) {
|
|
5057
|
+
const parsed = {};
|
|
5058
|
+
for (const attribute of attributes ?? []) {
|
|
5059
|
+
parsed[attribute.key.replace(/\./g, "_")] = parseOtlpValue(attribute.value);
|
|
5060
|
+
}
|
|
5061
|
+
return parsed;
|
|
5062
|
+
}
|
|
5063
|
+
function parseOtlpValue(value) {
|
|
5064
|
+
if (!value) return void 0;
|
|
5065
|
+
if ("stringValue" in value && value.stringValue !== void 0) return value.stringValue;
|
|
5066
|
+
if ("intValue" in value && value.intValue !== void 0) return Number(value.intValue);
|
|
5067
|
+
if ("doubleValue" in value && value.doubleValue !== void 0) return value.doubleValue;
|
|
5068
|
+
if ("boolValue" in value && value.boolValue !== void 0) return value.boolValue;
|
|
5069
|
+
if ("arrayValue" in value)
|
|
5070
|
+
return (value.arrayValue?.values ?? []).map((entry) => parseOtlpValue(entry));
|
|
5071
|
+
return void 0;
|
|
5072
|
+
}
|
|
5073
|
+
function durationFromSpan(span) {
|
|
5074
|
+
const start = Number(span.startTimeUnixNano);
|
|
5075
|
+
const end = Number(span.endTimeUnixNano);
|
|
5076
|
+
if (!Number.isFinite(start) || !Number.isFinite(end)) return void 0;
|
|
5077
|
+
return Math.round((end - start) / 1e6);
|
|
5078
|
+
}
|
|
5079
|
+
function stringAttr(value) {
|
|
5080
|
+
return typeof value === "string" ? value : void 0;
|
|
5081
|
+
}
|
|
5082
|
+
function numberAttr(value) {
|
|
5083
|
+
return typeof value === "number" && Number.isFinite(value) ? value : void 0;
|
|
5084
|
+
}
|
|
5085
|
+
function buildDerivedTraceSummary(result) {
|
|
5086
|
+
const toolSpans = (result.spans ?? []).filter((span) => span.type === "tool");
|
|
5087
|
+
const llmSpans = (result.spans ?? []).filter((span) => span.type === "llm");
|
|
5088
|
+
const toolCalls = result.trace?.tool_calls ?? countRawSpanNames(toolSpans);
|
|
5089
|
+
const toolDurations = result.trace?.tool_durations ?? groupRawSpanDurations(toolSpans);
|
|
5090
|
+
const hasSpanData = (result.spans?.length ?? 0) > 0;
|
|
5091
|
+
const eventCount = result.trace?.event_count ?? (hasSpanData ? toolSpans.length : void 0);
|
|
5092
|
+
const llmCallCount = result.trace?.llm_call_count ?? (hasSpanData ? llmSpans.length : void 0);
|
|
5093
|
+
if (!result.trace && !result.spans?.length && result.token_usage === void 0 && result.cost_usd === void 0 && result.duration_ms === void 0) {
|
|
5094
|
+
return void 0;
|
|
5095
|
+
}
|
|
5096
|
+
return {
|
|
5097
|
+
event_count: eventCount,
|
|
5098
|
+
tool_calls: toolCalls,
|
|
5099
|
+
error_count: result.trace?.error_count,
|
|
5100
|
+
tool_durations: toolDurations,
|
|
5101
|
+
llm_call_count: llmCallCount,
|
|
5102
|
+
token_usage: result.trace?.token_usage ?? result.token_usage,
|
|
5103
|
+
cost_usd: result.trace?.cost_usd ?? result.cost_usd,
|
|
5104
|
+
duration_ms: result.trace?.duration_ms ?? result.duration_ms,
|
|
5105
|
+
spans: result.spans
|
|
5106
|
+
};
|
|
5107
|
+
}
|
|
5108
|
+
function countRawSpanNames(spans) {
|
|
5109
|
+
const counts = {};
|
|
5110
|
+
for (const span of spans) {
|
|
5111
|
+
counts[span.name] = (counts[span.name] ?? 0) + 1;
|
|
5112
|
+
}
|
|
5113
|
+
return Object.keys(counts).length > 0 ? counts : void 0;
|
|
5114
|
+
}
|
|
5115
|
+
function groupRawSpanDurations(spans) {
|
|
5116
|
+
const grouped = {};
|
|
5117
|
+
for (const span of spans) {
|
|
5118
|
+
if (span.duration_ms === void 0) continue;
|
|
5119
|
+
const existing = grouped[span.name] ?? [];
|
|
5120
|
+
existing.push(span.duration_ms);
|
|
5121
|
+
grouped[span.name] = existing;
|
|
5122
|
+
}
|
|
5123
|
+
return Object.keys(grouped).length > 0 ? grouped : void 0;
|
|
5124
|
+
}
|
|
5125
|
+
function getTraceSummary(result) {
|
|
5126
|
+
const derived = buildDerivedTraceSummary(result);
|
|
5127
|
+
if (!derived) return void 0;
|
|
5128
|
+
const { spans: _spans, ...trace } = derived;
|
|
5129
|
+
return trace;
|
|
5130
|
+
}
|
|
5131
|
+
function getTraceSpans(result) {
|
|
5132
|
+
return buildDerivedTraceSummary(result)?.spans ?? [];
|
|
5133
|
+
}
|
|
5134
|
+
function toTraceSummary(result) {
|
|
5135
|
+
const rawTrace = getTraceSummary(result);
|
|
5136
|
+
if (!rawTrace) return void 0;
|
|
5137
|
+
return toCamelCaseDeep(rawTrace);
|
|
5138
|
+
}
|
|
4638
5139
|
function listResultFiles(cwd, limit) {
|
|
4639
|
-
const baseDir =
|
|
4640
|
-
const rawDir =
|
|
5140
|
+
const baseDir = path6.join(cwd, ".agentv", "results");
|
|
5141
|
+
const rawDir = path6.join(baseDir, "raw");
|
|
4641
5142
|
const files = [];
|
|
4642
5143
|
try {
|
|
4643
5144
|
const entries2 = readdirSync2(rawDir, { withFileTypes: true });
|
|
4644
5145
|
for (const entry of entries2) {
|
|
4645
5146
|
if (entry.isDirectory()) {
|
|
4646
|
-
const
|
|
4647
|
-
|
|
4648
|
-
|
|
4649
|
-
files.push({ filePath: jsonlPath, displayName: entry.name });
|
|
4650
|
-
} catch {
|
|
5147
|
+
const primaryPath = resolveExistingRunPrimaryPath(path6.join(rawDir, entry.name));
|
|
5148
|
+
if (primaryPath) {
|
|
5149
|
+
files.push({ filePath: primaryPath, displayName: entry.name });
|
|
4651
5150
|
}
|
|
4652
5151
|
}
|
|
4653
5152
|
}
|
|
4654
5153
|
for (const entry of entries2) {
|
|
4655
5154
|
if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
|
|
4656
|
-
files.push({ filePath:
|
|
5155
|
+
files.push({ filePath: path6.join(rawDir, entry.name), displayName: entry.name });
|
|
4657
5156
|
}
|
|
4658
5157
|
}
|
|
4659
5158
|
} catch {
|
|
@@ -4661,7 +5160,7 @@ function listResultFiles(cwd, limit) {
|
|
|
4661
5160
|
try {
|
|
4662
5161
|
const entries2 = readdirSync2(baseDir).filter((f) => f.endsWith(".jsonl"));
|
|
4663
5162
|
for (const entry of entries2) {
|
|
4664
|
-
files.push({ filePath:
|
|
5163
|
+
files.push({ filePath: path6.join(baseDir, entry), displayName: entry });
|
|
4665
5164
|
}
|
|
4666
5165
|
} catch {
|
|
4667
5166
|
}
|
|
@@ -4729,84 +5228,65 @@ function formatScore(score) {
|
|
|
4729
5228
|
return `${(score * 100).toFixed(0)}%`;
|
|
4730
5229
|
}
|
|
4731
5230
|
|
|
4732
|
-
// src/commands/results/
|
|
4733
|
-
|
|
4734
|
-
|
|
5231
|
+
// src/commands/results/shared.ts
|
|
5232
|
+
var sourceArg = positional({
|
|
5233
|
+
type: optional(string),
|
|
5234
|
+
displayName: "source",
|
|
5235
|
+
description: "Result file or workspace directory (defaults to most recent in .agentv/results/)"
|
|
5236
|
+
});
|
|
5237
|
+
async function resolveSourceFile(source, cwd) {
|
|
5238
|
+
let sourceFile;
|
|
5239
|
+
if (source) {
|
|
5240
|
+
sourceFile = resolveResultSourcePath(source, cwd);
|
|
5241
|
+
if (!existsSync2(sourceFile)) {
|
|
5242
|
+
console.error(`Error: File not found: ${sourceFile}`);
|
|
5243
|
+
process.exit(1);
|
|
5244
|
+
}
|
|
5245
|
+
} else {
|
|
5246
|
+
const cache = await loadRunCache(cwd);
|
|
5247
|
+
const cachedFile = cache ? resolveRunCacheFile(cache) : "";
|
|
5248
|
+
if (cachedFile && existsSync2(cachedFile)) {
|
|
5249
|
+
sourceFile = cachedFile;
|
|
5250
|
+
} else {
|
|
5251
|
+
const metas = listResultFiles(cwd, 1);
|
|
5252
|
+
if (metas.length === 0) {
|
|
5253
|
+
console.error("Error: No result files found in .agentv/results/");
|
|
5254
|
+
console.error("Run an evaluation first: agentv eval <eval-file>");
|
|
5255
|
+
process.exit(1);
|
|
5256
|
+
}
|
|
5257
|
+
sourceFile = metas[0].path;
|
|
5258
|
+
}
|
|
5259
|
+
}
|
|
5260
|
+
return { sourceFile };
|
|
5261
|
+
}
|
|
5262
|
+
async function loadResults(source, cwd) {
|
|
5263
|
+
const { sourceFile } = await resolveSourceFile(source, cwd);
|
|
5264
|
+
const results = loadManifestResults(sourceFile);
|
|
4735
5265
|
if (results.length === 0) {
|
|
4736
|
-
|
|
5266
|
+
console.error(`No results found in ${sourceFile}`);
|
|
5267
|
+
process.exit(1);
|
|
4737
5268
|
}
|
|
4738
|
-
|
|
5269
|
+
return { results: patchTestIds(results), sourceFile };
|
|
5270
|
+
}
|
|
5271
|
+
function patchTestIds(results) {
|
|
5272
|
+
return results.map((r) => {
|
|
4739
5273
|
if (!r.testId && r.evalId) {
|
|
4740
5274
|
return { ...r, testId: String(r.evalId) };
|
|
4741
5275
|
}
|
|
4742
5276
|
return r;
|
|
4743
5277
|
});
|
|
4744
|
-
mkdirSync2(outputDir, { recursive: true });
|
|
4745
|
-
const benchmark = buildBenchmarkArtifact(patched, sourceFile);
|
|
4746
|
-
writeFileSync3(path8.join(outputDir, "benchmark.json"), `${JSON.stringify(benchmark, null, 2)}
|
|
4747
|
-
`);
|
|
4748
|
-
const timing = buildTimingArtifact(patched);
|
|
4749
|
-
writeFileSync3(path8.join(outputDir, "timing.json"), `${JSON.stringify(timing, null, 2)}
|
|
4750
|
-
`);
|
|
4751
|
-
const aggregateGrading = buildAggregateGradingArtifact(patched);
|
|
4752
|
-
writeFileSync3(
|
|
4753
|
-
path8.join(outputDir, "grading.json"),
|
|
4754
|
-
`${JSON.stringify(aggregateGrading, null, 2)}
|
|
4755
|
-
`
|
|
4756
|
-
);
|
|
4757
|
-
const gradingDir = path8.join(outputDir, "grading");
|
|
4758
|
-
mkdirSync2(gradingDir, { recursive: true });
|
|
4759
|
-
for (const result of patched) {
|
|
4760
|
-
const id = safeTestId(result);
|
|
4761
|
-
const grading = buildGradingArtifact(result);
|
|
4762
|
-
writeFileSync3(path8.join(gradingDir, `${id}.json`), `${JSON.stringify(grading, null, 2)}
|
|
4763
|
-
`);
|
|
4764
|
-
}
|
|
4765
|
-
const outputsDir = path8.join(outputDir, "outputs");
|
|
4766
|
-
mkdirSync2(outputsDir, { recursive: true });
|
|
4767
|
-
for (const result of patched) {
|
|
4768
|
-
if (result.output && result.output.length > 0) {
|
|
4769
|
-
const id = safeTestId(result);
|
|
4770
|
-
const md = formatOutputMarkdown(result.output);
|
|
4771
|
-
writeFileSync3(path8.join(outputsDir, `${id}.md`), md);
|
|
4772
|
-
}
|
|
4773
|
-
}
|
|
4774
|
-
const inputsDir = path8.join(outputDir, "inputs");
|
|
4775
|
-
mkdirSync2(inputsDir, { recursive: true });
|
|
4776
|
-
for (const result of patched) {
|
|
4777
|
-
const id = safeTestId(result);
|
|
4778
|
-
const input = extractInput(result);
|
|
4779
|
-
if (input) {
|
|
4780
|
-
writeFileSync3(path8.join(inputsDir, `${id}.md`), input);
|
|
4781
|
-
}
|
|
4782
|
-
}
|
|
4783
|
-
}
|
|
4784
|
-
function formatOutputMarkdown(output) {
|
|
4785
|
-
return output.map((msg) => `@[${msg.role}]:
|
|
4786
|
-
${String(msg.content ?? "")}`).join("\n\n");
|
|
4787
|
-
}
|
|
4788
|
-
function extractInput(result) {
|
|
4789
|
-
const input = result.input;
|
|
4790
|
-
if (!input) return null;
|
|
4791
|
-
if (typeof input === "string") return input;
|
|
4792
|
-
if (Array.isArray(input) && input.length > 0) {
|
|
4793
|
-
return formatOutputMarkdown(input);
|
|
4794
|
-
}
|
|
4795
|
-
return null;
|
|
4796
|
-
}
|
|
4797
|
-
function safeTestId(result) {
|
|
4798
|
-
const raw = result.testId ?? result.evalId ?? "unknown";
|
|
4799
|
-
return String(raw).replace(/[/\\:*?"<>|]/g, "_");
|
|
4800
5278
|
}
|
|
5279
|
+
|
|
5280
|
+
// src/commands/results/export.ts
|
|
4801
5281
|
function deriveOutputDir(cwd, sourceFile) {
|
|
4802
|
-
const parentDir =
|
|
5282
|
+
const parentDir = path7.basename(path7.dirname(sourceFile));
|
|
4803
5283
|
if (parentDir.startsWith("eval_")) {
|
|
4804
5284
|
const dirName2 = parentDir.slice(5);
|
|
4805
|
-
return
|
|
5285
|
+
return path7.join(cwd, ".agentv", "results", "export", dirName2);
|
|
4806
5286
|
}
|
|
4807
|
-
const basename =
|
|
5287
|
+
const basename = path7.basename(sourceFile, ".jsonl");
|
|
4808
5288
|
const dirName = basename.startsWith("eval_") ? basename.slice(5) : basename;
|
|
4809
|
-
return
|
|
5289
|
+
return path7.join(cwd, ".agentv", "results", "export", dirName);
|
|
4810
5290
|
}
|
|
4811
5291
|
var resultsExportCommand = command({
|
|
4812
5292
|
name: "export",
|
|
@@ -4833,28 +5313,13 @@ var resultsExportCommand = command({
|
|
|
4833
5313
|
handler: async ({ source, out, dir }) => {
|
|
4834
5314
|
const cwd = dir ?? process.cwd();
|
|
4835
5315
|
try {
|
|
4836
|
-
|
|
4837
|
-
|
|
4838
|
-
|
|
4839
|
-
|
|
4840
|
-
|
|
4841
|
-
|
|
4842
|
-
|
|
4843
|
-
sourceFile = cachedFile;
|
|
4844
|
-
} else {
|
|
4845
|
-
const metas = listResultFiles(cwd, 1);
|
|
4846
|
-
if (metas.length === 0) {
|
|
4847
|
-
console.error("Error: No result files found in .agentv/results/");
|
|
4848
|
-
console.error("Run an evaluation first: agentv eval <eval-file>");
|
|
4849
|
-
process.exit(1);
|
|
4850
|
-
}
|
|
4851
|
-
sourceFile = metas[0].path;
|
|
4852
|
-
}
|
|
4853
|
-
}
|
|
4854
|
-
const content = readFileSync6(sourceFile, "utf8");
|
|
4855
|
-
const outputDir = out ? path8.isAbsolute(out) ? out : path8.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
|
|
4856
|
-
exportResults(sourceFile, content, outputDir);
|
|
4857
|
-
const results = parseJsonlResults(content);
|
|
5316
|
+
const { sourceFile } = await resolveSourceFile(source, cwd);
|
|
5317
|
+
const { results } = await loadResults(source, cwd);
|
|
5318
|
+
const outputDir = out ? path7.isAbsolute(out) ? out : path7.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
|
|
5319
|
+
await writeArtifactsFromResults(results, outputDir, {
|
|
5320
|
+
evalFile: sourceFile,
|
|
5321
|
+
writeLegacyResults: false
|
|
5322
|
+
});
|
|
4858
5323
|
console.log(`Exported ${results.length} test(s) to ${outputDir}`);
|
|
4859
5324
|
for (const result of results) {
|
|
4860
5325
|
const id = result.testId ?? result.evalId ?? "unknown";
|
|
@@ -4867,58 +5332,6 @@ var resultsExportCommand = command({
|
|
|
4867
5332
|
}
|
|
4868
5333
|
});
|
|
4869
5334
|
|
|
4870
|
-
// src/commands/results/shared.ts
|
|
4871
|
-
import { existsSync as existsSync3, readFileSync as readFileSync7 } from "node:fs";
|
|
4872
|
-
import path9 from "node:path";
|
|
4873
|
-
var sourceArg = positional({
|
|
4874
|
-
type: optional(string),
|
|
4875
|
-
displayName: "source",
|
|
4876
|
-
description: "JSONL result file (defaults to most recent in .agentv/results/)"
|
|
4877
|
-
});
|
|
4878
|
-
async function resolveSourceFile(source, cwd) {
|
|
4879
|
-
let sourceFile;
|
|
4880
|
-
if (source) {
|
|
4881
|
-
sourceFile = path9.isAbsolute(source) ? source : path9.resolve(cwd, source);
|
|
4882
|
-
if (!existsSync3(sourceFile)) {
|
|
4883
|
-
console.error(`Error: File not found: ${sourceFile}`);
|
|
4884
|
-
process.exit(1);
|
|
4885
|
-
}
|
|
4886
|
-
} else {
|
|
4887
|
-
const cache = await loadRunCache(cwd);
|
|
4888
|
-
const cachedFile = cache ? resolveRunCacheFile(cache) : "";
|
|
4889
|
-
if (cachedFile && existsSync3(cachedFile)) {
|
|
4890
|
-
sourceFile = cachedFile;
|
|
4891
|
-
} else {
|
|
4892
|
-
const metas = listResultFiles(cwd, 1);
|
|
4893
|
-
if (metas.length === 0) {
|
|
4894
|
-
console.error("Error: No result files found in .agentv/results/");
|
|
4895
|
-
console.error("Run an evaluation first: agentv eval <eval-file>");
|
|
4896
|
-
process.exit(1);
|
|
4897
|
-
}
|
|
4898
|
-
sourceFile = metas[0].path;
|
|
4899
|
-
}
|
|
4900
|
-
}
|
|
4901
|
-
const content = readFileSync7(sourceFile, "utf8");
|
|
4902
|
-
return { sourceFile, content };
|
|
4903
|
-
}
|
|
4904
|
-
async function loadResults(source, cwd) {
|
|
4905
|
-
const { sourceFile, content } = await resolveSourceFile(source, cwd);
|
|
4906
|
-
const results = parseJsonlResults(content);
|
|
4907
|
-
if (results.length === 0) {
|
|
4908
|
-
console.error(`No results found in ${sourceFile}`);
|
|
4909
|
-
process.exit(1);
|
|
4910
|
-
}
|
|
4911
|
-
return { results: patchTestIds(results), sourceFile };
|
|
4912
|
-
}
|
|
4913
|
-
function patchTestIds(results) {
|
|
4914
|
-
return results.map((r) => {
|
|
4915
|
-
if (!r.testId && r.evalId) {
|
|
4916
|
-
return { ...r, testId: String(r.evalId) };
|
|
4917
|
-
}
|
|
4918
|
-
return r;
|
|
4919
|
-
});
|
|
4920
|
-
}
|
|
4921
|
-
|
|
4922
5335
|
// src/commands/results/failures.ts
|
|
4923
5336
|
function formatFailures(results) {
|
|
4924
5337
|
return results.filter((r) => r.score < 1).map((r) => {
|
|
@@ -5045,7 +5458,7 @@ var resultsShowCommand = command({
|
|
|
5045
5458
|
});
|
|
5046
5459
|
|
|
5047
5460
|
// src/commands/results/summary.ts
|
|
5048
|
-
import { existsSync as
|
|
5461
|
+
import { existsSync as existsSync3, readFileSync as readFileSync5 } from "node:fs";
|
|
5049
5462
|
function formatSummary(results, grading) {
|
|
5050
5463
|
const total = results.length;
|
|
5051
5464
|
let passed;
|
|
@@ -5096,9 +5509,9 @@ var resultsSummaryCommand = command({
|
|
|
5096
5509
|
const { results, sourceFile } = await loadResults(source, cwd);
|
|
5097
5510
|
let grading;
|
|
5098
5511
|
const gradingPath = sourceFile.replace(/\.jsonl$/, ".grading.json");
|
|
5099
|
-
if (
|
|
5512
|
+
if (existsSync3(gradingPath)) {
|
|
5100
5513
|
try {
|
|
5101
|
-
grading = JSON.parse(
|
|
5514
|
+
grading = JSON.parse(readFileSync5(gradingPath, "utf8"));
|
|
5102
5515
|
} catch {
|
|
5103
5516
|
}
|
|
5104
5517
|
}
|
|
@@ -5123,68 +5536,26 @@ var resultsCommand = subcommands({
|
|
|
5123
5536
|
});
|
|
5124
5537
|
|
|
5125
5538
|
// src/commands/results/serve.ts
|
|
5126
|
-
import { existsSync as
|
|
5127
|
-
import
|
|
5539
|
+
import { existsSync as existsSync4, readFileSync as readFileSync6, writeFileSync as writeFileSync3 } from "node:fs";
|
|
5540
|
+
import path8 from "node:path";
|
|
5128
5541
|
import { Hono } from "hono";
|
|
5129
|
-
async function resolveSourceFile2(source, cwd) {
|
|
5130
|
-
if (source) {
|
|
5131
|
-
const resolved = path10.isAbsolute(source) ? source : path10.resolve(cwd, source);
|
|
5132
|
-
if (!existsSync5(resolved)) {
|
|
5133
|
-
throw new Error(`Source file not found: ${resolved}`);
|
|
5134
|
-
}
|
|
5135
|
-
return resolved;
|
|
5136
|
-
}
|
|
5137
|
-
const cache = await loadRunCache(cwd);
|
|
5138
|
-
const cachedFile = cache ? resolveRunCacheFile(cache) : "";
|
|
5139
|
-
if (cachedFile && existsSync5(cachedFile)) {
|
|
5140
|
-
return cachedFile;
|
|
5141
|
-
}
|
|
5142
|
-
const metas = listResultFiles(cwd, 10);
|
|
5143
|
-
if (metas.length === 0) {
|
|
5144
|
-
throw new Error(
|
|
5145
|
-
"No result files found in .agentv/results/\nRun an evaluation first: agentv eval <eval-file>"
|
|
5146
|
-
);
|
|
5147
|
-
}
|
|
5148
|
-
if (metas.length > 1) {
|
|
5149
|
-
console.log("Available result files:");
|
|
5150
|
-
for (const m of metas) {
|
|
5151
|
-
console.log(` ${m.path}`);
|
|
5152
|
-
}
|
|
5153
|
-
console.log(`
|
|
5154
|
-
Serving most recent: ${metas[0].path}
|
|
5155
|
-
`);
|
|
5156
|
-
}
|
|
5157
|
-
return metas[0].path;
|
|
5158
|
-
}
|
|
5159
|
-
function loadResults2(content) {
|
|
5160
|
-
const results = parseJsonlResults(content);
|
|
5161
|
-
if (results.length === 0) {
|
|
5162
|
-
throw new Error("No valid results found in JSONL content");
|
|
5163
|
-
}
|
|
5164
|
-
return results.map((r) => {
|
|
5165
|
-
if (!r.testId && r.evalId) {
|
|
5166
|
-
return { ...r, testId: String(r.evalId) };
|
|
5167
|
-
}
|
|
5168
|
-
return r;
|
|
5169
|
-
});
|
|
5170
|
-
}
|
|
5171
5542
|
function feedbackPath(cwd) {
|
|
5172
|
-
return
|
|
5543
|
+
return path8.join(cwd, "feedback.json");
|
|
5173
5544
|
}
|
|
5174
5545
|
function readFeedback(cwd) {
|
|
5175
5546
|
const fp = feedbackPath(cwd);
|
|
5176
|
-
if (!
|
|
5547
|
+
if (!existsSync4(fp)) {
|
|
5177
5548
|
return { reviews: [] };
|
|
5178
5549
|
}
|
|
5179
5550
|
try {
|
|
5180
|
-
return JSON.parse(
|
|
5551
|
+
return JSON.parse(readFileSync6(fp, "utf8"));
|
|
5181
5552
|
} catch (err2) {
|
|
5182
5553
|
console.error(`Warning: could not parse ${fp}, starting fresh: ${err2.message}`);
|
|
5183
5554
|
return { reviews: [] };
|
|
5184
5555
|
}
|
|
5185
5556
|
}
|
|
5186
5557
|
function writeFeedback(cwd, data) {
|
|
5187
|
-
|
|
5558
|
+
writeFileSync3(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
|
|
5188
5559
|
`, "utf8");
|
|
5189
5560
|
}
|
|
5190
5561
|
function createApp(results, cwd) {
|
|
@@ -5854,9 +6225,7 @@ var resultsServeCommand = command({
|
|
|
5854
6225
|
const cwd = dir ?? process.cwd();
|
|
5855
6226
|
const listenPort = port ?? 3117;
|
|
5856
6227
|
try {
|
|
5857
|
-
const sourceFile = await
|
|
5858
|
-
const content = readFileSync9(sourceFile, "utf8");
|
|
5859
|
-
const results = loadResults2(content);
|
|
6228
|
+
const { results, sourceFile } = await loadResults(source, cwd);
|
|
5860
6229
|
const app2 = createApp(results, cwd);
|
|
5861
6230
|
console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
|
|
5862
6231
|
console.log(`Dashboard: http://localhost:${listenPort}`);
|
|
@@ -5889,7 +6258,7 @@ function detectPackageManager() {
|
|
|
5889
6258
|
return detectPackageManagerFromPath(process.argv[1] ?? "");
|
|
5890
6259
|
}
|
|
5891
6260
|
function runCommand(cmd, args) {
|
|
5892
|
-
return new Promise((
|
|
6261
|
+
return new Promise((resolve2, reject) => {
|
|
5893
6262
|
const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
|
|
5894
6263
|
let stdout = "";
|
|
5895
6264
|
child.stdout?.on("data", (data) => {
|
|
@@ -5897,7 +6266,7 @@ function runCommand(cmd, args) {
|
|
|
5897
6266
|
stdout += data.toString();
|
|
5898
6267
|
});
|
|
5899
6268
|
child.on("error", reject);
|
|
5900
|
-
child.on("close", (code) =>
|
|
6269
|
+
child.on("close", (code) => resolve2({ exitCode: code ?? 1, stdout }));
|
|
5901
6270
|
});
|
|
5902
6271
|
}
|
|
5903
6272
|
var updateCommand = command({
|
|
@@ -6109,10 +6478,6 @@ function parseAssertSpec(spec) {
|
|
|
6109
6478
|
);
|
|
6110
6479
|
}
|
|
6111
6480
|
}
|
|
6112
|
-
function toTraceSummary(raw) {
|
|
6113
|
-
if (!raw.trace) return void 0;
|
|
6114
|
-
return toCamelCaseDeep(raw.trace);
|
|
6115
|
-
}
|
|
6116
6481
|
function extractCandidate(raw) {
|
|
6117
6482
|
if (raw.output !== void 0)
|
|
6118
6483
|
return typeof raw.output === "string" ? raw.output : JSON.stringify(raw.output);
|
|
@@ -6224,8 +6589,8 @@ var traceScoreCommand = command({
|
|
|
6224
6589
|
args: {
|
|
6225
6590
|
file: positional({
|
|
6226
6591
|
type: string,
|
|
6227
|
-
displayName: "
|
|
6228
|
-
description: "Path to
|
|
6592
|
+
displayName: "trace-source",
|
|
6593
|
+
description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
|
|
6229
6594
|
}),
|
|
6230
6595
|
assert: option({
|
|
6231
6596
|
type: string,
|
|
@@ -6271,11 +6636,11 @@ var traceScoreCommand = command({
|
|
|
6271
6636
|
);
|
|
6272
6637
|
if (traceRequired) {
|
|
6273
6638
|
const hasTrace = results.some(
|
|
6274
|
-
(r) => r
|
|
6639
|
+
(r) => toTraceSummary(r) || r.cost_usd !== void 0 || r.duration_ms !== void 0 || r.token_usage !== void 0
|
|
6275
6640
|
);
|
|
6276
6641
|
if (!hasTrace) {
|
|
6277
6642
|
console.error(
|
|
6278
|
-
`${c2.red}Error:${c2.reset}
|
|
6643
|
+
`${c2.red}Error:${c2.reset} Source lacks trace metrics. Export a trace file with ${c2.bold}--trace-file${c2.reset} or ${c2.bold}--otel-file${c2.reset}.`
|
|
6279
6644
|
);
|
|
6280
6645
|
process.exit(1);
|
|
6281
6646
|
}
|
|
@@ -6308,7 +6673,7 @@ var traceScoreCommand = command({
|
|
|
6308
6673
|
|
|
6309
6674
|
// src/commands/trace/show.ts
|
|
6310
6675
|
function renderFlatTrace(result) {
|
|
6311
|
-
const trace = result
|
|
6676
|
+
const trace = getTraceSummary(result);
|
|
6312
6677
|
const parts = [];
|
|
6313
6678
|
if (trace?.tool_calls && Object.keys(trace.tool_calls).length > 0) {
|
|
6314
6679
|
const toolParts = Object.entries(trace.tool_calls).map(([name, count]) => {
|
|
@@ -6339,8 +6704,12 @@ function renderScores(scores) {
|
|
|
6339
6704
|
}
|
|
6340
6705
|
function renderTree(result) {
|
|
6341
6706
|
const messages = result.output;
|
|
6707
|
+
const spans = getTraceSpans(result);
|
|
6342
6708
|
if (!messages || messages.length === 0) {
|
|
6343
|
-
if (
|
|
6709
|
+
if (spans.length > 0) {
|
|
6710
|
+
return renderSpanTree(result, spans);
|
|
6711
|
+
}
|
|
6712
|
+
if (getTraceSummary(result) || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
|
|
6344
6713
|
return renderFlatTrace(result);
|
|
6345
6714
|
}
|
|
6346
6715
|
return `${c2.dim}No trace data available${c2.reset}`;
|
|
@@ -6406,6 +6775,30 @@ function renderTree(result) {
|
|
|
6406
6775
|
}
|
|
6407
6776
|
return lines.join("\n");
|
|
6408
6777
|
}
|
|
6778
|
+
function renderSpanTree(result, spans) {
|
|
6779
|
+
const lines = [];
|
|
6780
|
+
const testId = result.test_id ?? result.eval_id ?? "unknown";
|
|
6781
|
+
const totalTokens = result.token_usage ? result.token_usage.input + result.token_usage.output : void 0;
|
|
6782
|
+
const rootParts = [testId];
|
|
6783
|
+
if (result.duration_ms !== void 0) rootParts.push(formatDuration(result.duration_ms));
|
|
6784
|
+
if (totalTokens !== void 0) rootParts.push(`${formatNumber(totalTokens)} tok`);
|
|
6785
|
+
if (result.cost_usd !== void 0) rootParts.push(formatCost(result.cost_usd));
|
|
6786
|
+
lines.push(`${c2.bold}${rootParts.join(", ")}${c2.reset}`);
|
|
6787
|
+
spans.forEach((span, index) => {
|
|
6788
|
+
const connector = index === spans.length - 1 ? "\u2514\u2500" : "\u251C\u2500";
|
|
6789
|
+
const color = span.type === "llm" ? c2.cyan : c2.yellow;
|
|
6790
|
+
const parts = [`${color}${span.name}${c2.reset}`];
|
|
6791
|
+
if (span.duration_ms !== void 0) {
|
|
6792
|
+
parts.push(formatDuration(span.duration_ms));
|
|
6793
|
+
}
|
|
6794
|
+
lines.push(`${connector} ${parts.join(", ")}`);
|
|
6795
|
+
});
|
|
6796
|
+
if (result.scores && result.scores.length > 0) {
|
|
6797
|
+
lines.push("");
|
|
6798
|
+
lines.push(`${c2.dim}Scores:${c2.reset} ${renderScores(result.scores)}`);
|
|
6799
|
+
}
|
|
6800
|
+
return lines.join("\n");
|
|
6801
|
+
}
|
|
6409
6802
|
function formatResultDetail(result, index, tree) {
|
|
6410
6803
|
const lines = [];
|
|
6411
6804
|
const testId = result.test_id ?? result.eval_id ?? `result-${index}`;
|
|
@@ -6489,8 +6882,8 @@ var traceShowCommand = command({
|
|
|
6489
6882
|
args: {
|
|
6490
6883
|
file: positional({
|
|
6491
6884
|
type: string,
|
|
6492
|
-
displayName: "
|
|
6493
|
-
description: "Path to
|
|
6885
|
+
displayName: "trace-source",
|
|
6886
|
+
description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
|
|
6494
6887
|
}),
|
|
6495
6888
|
testId: option({
|
|
6496
6889
|
type: optional(string),
|
|
@@ -6499,7 +6892,7 @@ var traceShowCommand = command({
|
|
|
6499
6892
|
}),
|
|
6500
6893
|
tree: flag({
|
|
6501
6894
|
long: "tree",
|
|
6502
|
-
description: "Show hierarchical trace tree
|
|
6895
|
+
description: "Show hierarchical trace tree from output messages or exported trace spans"
|
|
6503
6896
|
}),
|
|
6504
6897
|
format: option({
|
|
6505
6898
|
type: optional(oneOf(["table", "json"])),
|
|
@@ -6570,11 +6963,11 @@ function collectMetrics(results) {
|
|
|
6570
6963
|
formatter: (n) => formatNumber(Math.round(n))
|
|
6571
6964
|
});
|
|
6572
6965
|
}
|
|
6573
|
-
const toolCalls = results.map((r) => r
|
|
6966
|
+
const toolCalls = results.map((r) => getTraceSummary(r)?.event_count).filter((v) => v !== void 0);
|
|
6574
6967
|
if (toolCalls.length > 0) {
|
|
6575
6968
|
rows.push({ name: "tool_calls", values: toolCalls, formatter: (n) => String(Math.round(n)) });
|
|
6576
6969
|
}
|
|
6577
|
-
const llmCalls = results.map((r) => r
|
|
6970
|
+
const llmCalls = results.map((r) => getTraceSummary(r)?.llm_call_count).filter((v) => v !== void 0);
|
|
6578
6971
|
if (llmCalls.length > 0) {
|
|
6579
6972
|
rows.push({ name: "llm_calls", values: llmCalls, formatter: (n) => String(Math.round(n)) });
|
|
6580
6973
|
}
|
|
@@ -6668,8 +7061,8 @@ var traceStatsCommand = command({
|
|
|
6668
7061
|
args: {
|
|
6669
7062
|
file: positional({
|
|
6670
7063
|
type: string,
|
|
6671
|
-
displayName: "
|
|
6672
|
-
description: "Path to
|
|
7064
|
+
displayName: "trace-source",
|
|
7065
|
+
description: "Path to a run workspace, result manifest, simple trace JSONL, or OTLP JSON file"
|
|
6673
7066
|
}),
|
|
6674
7067
|
groupBy: option({
|
|
6675
7068
|
type: optional(oneOf(["target", "eval-set", "test-id"])),
|
|
@@ -6719,8 +7112,8 @@ var traceCommand = subcommands({
|
|
|
6719
7112
|
});
|
|
6720
7113
|
|
|
6721
7114
|
// src/commands/transpile/index.ts
|
|
6722
|
-
import { writeFileSync as
|
|
6723
|
-
import
|
|
7115
|
+
import { writeFileSync as writeFileSync4 } from "node:fs";
|
|
7116
|
+
import path9 from "node:path";
|
|
6724
7117
|
var transpileCommand = command({
|
|
6725
7118
|
name: "transpile",
|
|
6726
7119
|
description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
|
|
@@ -6744,7 +7137,7 @@ var transpileCommand = command({
|
|
|
6744
7137
|
handler: async ({ input, outDir, stdout }) => {
|
|
6745
7138
|
let result;
|
|
6746
7139
|
try {
|
|
6747
|
-
result = transpileEvalYamlFile(
|
|
7140
|
+
result = transpileEvalYamlFile(path9.resolve(input));
|
|
6748
7141
|
} catch (error) {
|
|
6749
7142
|
console.error(`Error: ${error.message}`);
|
|
6750
7143
|
process.exit(1);
|
|
@@ -6768,12 +7161,12 @@ var transpileCommand = command({
|
|
|
6768
7161
|
process.stdout.write("\n");
|
|
6769
7162
|
return;
|
|
6770
7163
|
}
|
|
6771
|
-
const outputDir = outDir ?
|
|
7164
|
+
const outputDir = outDir ? path9.resolve(outDir) : path9.dirname(path9.resolve(input));
|
|
6772
7165
|
const fileNames = getOutputFilenames(result);
|
|
6773
7166
|
for (const [skill, evalsJson] of result.files) {
|
|
6774
7167
|
const fileName = fileNames.get(skill) ?? "evals.json";
|
|
6775
|
-
const outputPath =
|
|
6776
|
-
|
|
7168
|
+
const outputPath = path9.join(outputDir, fileName);
|
|
7169
|
+
writeFileSync4(outputPath, `${JSON.stringify(evalsJson, null, 2)}
|
|
6777
7170
|
`);
|
|
6778
7171
|
console.log(`Transpiled to ${outputPath}`);
|
|
6779
7172
|
}
|
|
@@ -6781,7 +7174,7 @@ var transpileCommand = command({
|
|
|
6781
7174
|
});
|
|
6782
7175
|
|
|
6783
7176
|
// src/commands/trim/index.ts
|
|
6784
|
-
import { readFileSync as
|
|
7177
|
+
import { readFileSync as readFileSync7, writeFileSync as writeFileSync5 } from "node:fs";
|
|
6785
7178
|
var trimCommand = command({
|
|
6786
7179
|
name: "trim",
|
|
6787
7180
|
description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
|
|
@@ -6800,7 +7193,7 @@ var trimCommand = command({
|
|
|
6800
7193
|
},
|
|
6801
7194
|
handler: async ({ input, out }) => {
|
|
6802
7195
|
try {
|
|
6803
|
-
const content =
|
|
7196
|
+
const content = readFileSync7(input, "utf8");
|
|
6804
7197
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
6805
7198
|
const trimmedLines = lines.map((line) => {
|
|
6806
7199
|
const record = JSON.parse(line);
|
|
@@ -6812,7 +7205,7 @@ var trimCommand = command({
|
|
|
6812
7205
|
const output = `${trimmedLines.join("\n")}
|
|
6813
7206
|
`;
|
|
6814
7207
|
if (out) {
|
|
6815
|
-
|
|
7208
|
+
writeFileSync5(out, output, "utf8");
|
|
6816
7209
|
console.error(`Trimmed ${lines.length} record(s) \u2192 ${out}`);
|
|
6817
7210
|
} else {
|
|
6818
7211
|
process.stdout.write(output);
|
|
@@ -6906,8 +7299,8 @@ function isTTY() {
|
|
|
6906
7299
|
|
|
6907
7300
|
// src/commands/validate/validate-files.ts
|
|
6908
7301
|
import { constants } from "node:fs";
|
|
6909
|
-
import { access, readdir, stat } from "node:fs/promises";
|
|
6910
|
-
import
|
|
7302
|
+
import { access, readdir as readdir3, stat } from "node:fs/promises";
|
|
7303
|
+
import path10 from "node:path";
|
|
6911
7304
|
async function validateFiles(paths) {
|
|
6912
7305
|
const filePaths = await expandPaths(paths);
|
|
6913
7306
|
const results = [];
|
|
@@ -6925,7 +7318,7 @@ async function validateFiles(paths) {
|
|
|
6925
7318
|
};
|
|
6926
7319
|
}
|
|
6927
7320
|
async function validateSingleFile(filePath) {
|
|
6928
|
-
const absolutePath =
|
|
7321
|
+
const absolutePath = path10.resolve(filePath);
|
|
6929
7322
|
const fileType = await detectFileType(absolutePath);
|
|
6930
7323
|
let result;
|
|
6931
7324
|
if (fileType === "eval") {
|
|
@@ -6950,7 +7343,7 @@ async function validateSingleFile(filePath) {
|
|
|
6950
7343
|
async function expandPaths(paths) {
|
|
6951
7344
|
const expanded = [];
|
|
6952
7345
|
for (const inputPath of paths) {
|
|
6953
|
-
const absolutePath =
|
|
7346
|
+
const absolutePath = path10.resolve(inputPath);
|
|
6954
7347
|
try {
|
|
6955
7348
|
await access(absolutePath, constants.F_OK);
|
|
6956
7349
|
} catch {
|
|
@@ -6972,9 +7365,9 @@ async function expandPaths(paths) {
|
|
|
6972
7365
|
async function findYamlFiles(dirPath) {
|
|
6973
7366
|
const results = [];
|
|
6974
7367
|
try {
|
|
6975
|
-
const entries2 = await
|
|
7368
|
+
const entries2 = await readdir3(dirPath, { withFileTypes: true });
|
|
6976
7369
|
for (const entry of entries2) {
|
|
6977
|
-
const fullPath =
|
|
7370
|
+
const fullPath = path10.join(dirPath, entry.name);
|
|
6978
7371
|
if (entry.isDirectory()) {
|
|
6979
7372
|
if (entry.name === "node_modules" || entry.name.startsWith(".")) {
|
|
6980
7373
|
continue;
|
|
@@ -6991,7 +7384,7 @@ async function findYamlFiles(dirPath) {
|
|
|
6991
7384
|
return results;
|
|
6992
7385
|
}
|
|
6993
7386
|
function isYamlFile(filePath) {
|
|
6994
|
-
const ext =
|
|
7387
|
+
const ext = path10.extname(filePath).toLowerCase();
|
|
6995
7388
|
return ext === ".yaml" || ext === ".yml";
|
|
6996
7389
|
}
|
|
6997
7390
|
|
|
@@ -7029,14 +7422,14 @@ var validateCommand = command({
|
|
|
7029
7422
|
});
|
|
7030
7423
|
|
|
7031
7424
|
// src/commands/workspace/clean.ts
|
|
7032
|
-
import { existsSync as
|
|
7033
|
-
import { readFile as
|
|
7034
|
-
import
|
|
7425
|
+
import { existsSync as existsSync5 } from "node:fs";
|
|
7426
|
+
import { readFile as readFile4, readdir as readdir4, rm } from "node:fs/promises";
|
|
7427
|
+
import path11 from "node:path";
|
|
7035
7428
|
async function confirm(message) {
|
|
7036
7429
|
const readline2 = await import("node:readline");
|
|
7037
7430
|
const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
|
|
7038
|
-
const answer = await new Promise((
|
|
7039
|
-
rl.question(`${message} [y/N] `,
|
|
7431
|
+
const answer = await new Promise((resolve2) => {
|
|
7432
|
+
rl.question(`${message} [y/N] `, resolve2);
|
|
7040
7433
|
});
|
|
7041
7434
|
rl.close();
|
|
7042
7435
|
return answer.toLowerCase() === "y";
|
|
@@ -7058,19 +7451,19 @@ var cleanCommand = command({
|
|
|
7058
7451
|
},
|
|
7059
7452
|
handler: async ({ repo, force }) => {
|
|
7060
7453
|
const poolRoot = getWorkspacePoolRoot();
|
|
7061
|
-
if (!
|
|
7454
|
+
if (!existsSync5(poolRoot)) {
|
|
7062
7455
|
console.log("No workspace pool entries found.");
|
|
7063
7456
|
return;
|
|
7064
7457
|
}
|
|
7065
7458
|
if (repo) {
|
|
7066
|
-
const entries2 = await
|
|
7459
|
+
const entries2 = await readdir4(poolRoot, { withFileTypes: true });
|
|
7067
7460
|
const poolDirs = entries2.filter((e) => e.isDirectory());
|
|
7068
7461
|
const matchingDirs = [];
|
|
7069
7462
|
for (const dir of poolDirs) {
|
|
7070
|
-
const poolDir =
|
|
7071
|
-
const metadataPath =
|
|
7463
|
+
const poolDir = path11.join(poolRoot, dir.name);
|
|
7464
|
+
const metadataPath = path11.join(poolDir, "metadata.json");
|
|
7072
7465
|
try {
|
|
7073
|
-
const raw = await
|
|
7466
|
+
const raw = await readFile4(metadataPath, "utf-8");
|
|
7074
7467
|
const metadata = JSON.parse(raw);
|
|
7075
7468
|
const hasRepo = metadata.repos?.some((r) => {
|
|
7076
7469
|
if (r.source.type === "git" && r.source.url) {
|
|
@@ -7099,7 +7492,7 @@ var cleanCommand = command({
|
|
|
7099
7492
|
}
|
|
7100
7493
|
for (const dir of matchingDirs) {
|
|
7101
7494
|
await rm(dir, { recursive: true, force: true });
|
|
7102
|
-
console.log(`Removed: ${
|
|
7495
|
+
console.log(`Removed: ${path11.basename(dir).slice(0, 12)}...`);
|
|
7103
7496
|
}
|
|
7104
7497
|
console.log("Done.");
|
|
7105
7498
|
} else {
|
|
@@ -7117,15 +7510,15 @@ var cleanCommand = command({
|
|
|
7117
7510
|
});
|
|
7118
7511
|
|
|
7119
7512
|
// src/commands/workspace/list.ts
|
|
7120
|
-
import { existsSync as
|
|
7121
|
-
import { readFile as
|
|
7122
|
-
import
|
|
7513
|
+
import { existsSync as existsSync6 } from "node:fs";
|
|
7514
|
+
import { readFile as readFile5, readdir as readdir5, stat as stat2 } from "node:fs/promises";
|
|
7515
|
+
import path12 from "node:path";
|
|
7123
7516
|
async function getDirectorySize(dirPath) {
|
|
7124
7517
|
let totalSize = 0;
|
|
7125
7518
|
try {
|
|
7126
|
-
const entries2 = await
|
|
7519
|
+
const entries2 = await readdir5(dirPath, { withFileTypes: true });
|
|
7127
7520
|
for (const entry of entries2) {
|
|
7128
|
-
const fullPath =
|
|
7521
|
+
const fullPath = path12.join(dirPath, entry.name);
|
|
7129
7522
|
if (entry.isDirectory()) {
|
|
7130
7523
|
totalSize += await getDirectorySize(fullPath);
|
|
7131
7524
|
} else {
|
|
@@ -7149,25 +7542,25 @@ var listCommand = command({
|
|
|
7149
7542
|
args: {},
|
|
7150
7543
|
handler: async () => {
|
|
7151
7544
|
const poolRoot = getWorkspacePoolRoot();
|
|
7152
|
-
if (!
|
|
7545
|
+
if (!existsSync6(poolRoot)) {
|
|
7153
7546
|
console.log("No workspace pool entries found.");
|
|
7154
7547
|
return;
|
|
7155
7548
|
}
|
|
7156
|
-
const entries2 = await
|
|
7549
|
+
const entries2 = await readdir5(poolRoot, { withFileTypes: true });
|
|
7157
7550
|
const poolDirs = entries2.filter((e) => e.isDirectory());
|
|
7158
7551
|
if (poolDirs.length === 0) {
|
|
7159
7552
|
console.log("No workspace pool entries found.");
|
|
7160
7553
|
return;
|
|
7161
7554
|
}
|
|
7162
7555
|
for (const dir of poolDirs) {
|
|
7163
|
-
const poolDir =
|
|
7556
|
+
const poolDir = path12.join(poolRoot, dir.name);
|
|
7164
7557
|
const fingerprint = dir.name;
|
|
7165
|
-
const poolEntries = await
|
|
7558
|
+
const poolEntries = await readdir5(poolDir, { withFileTypes: true });
|
|
7166
7559
|
const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
|
|
7167
|
-
const metadataPath =
|
|
7560
|
+
const metadataPath = path12.join(poolDir, "metadata.json");
|
|
7168
7561
|
let metadata = null;
|
|
7169
7562
|
try {
|
|
7170
|
-
const raw = await
|
|
7563
|
+
const raw = await readFile5(metadataPath, "utf-8");
|
|
7171
7564
|
metadata = JSON.parse(raw);
|
|
7172
7565
|
} catch {
|
|
7173
7566
|
}
|
|
@@ -7204,16 +7597,16 @@ var workspaceCommand = subcommands({
|
|
|
7204
7597
|
|
|
7205
7598
|
// src/update-check.ts
|
|
7206
7599
|
import { spawn as spawn2 } from "node:child_process";
|
|
7207
|
-
import { readFile as
|
|
7208
|
-
import { join } from "node:path";
|
|
7600
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
7601
|
+
import { join as join4 } from "node:path";
|
|
7209
7602
|
var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
|
|
7210
7603
|
var AGENTV_DIR = getAgentvHome();
|
|
7211
7604
|
var CACHE_FILE = "version-check.json";
|
|
7212
7605
|
var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
|
|
7213
|
-
async function getCachedUpdateInfo(
|
|
7214
|
-
const filePath =
|
|
7606
|
+
async function getCachedUpdateInfo(path13) {
|
|
7607
|
+
const filePath = path13 ?? join4(AGENTV_DIR, CACHE_FILE);
|
|
7215
7608
|
try {
|
|
7216
|
-
const raw = await
|
|
7609
|
+
const raw = await readFile6(filePath, "utf-8");
|
|
7217
7610
|
const data = JSON.parse(raw);
|
|
7218
7611
|
if (typeof data.latestVersion === "string" && typeof data.lastCheckedAt === "string") {
|
|
7219
7612
|
return data;
|
|
@@ -7245,7 +7638,7 @@ function buildNotice(currentVersion, latestVersion) {
|
|
|
7245
7638
|
}
|
|
7246
7639
|
function backgroundUpdateCheck() {
|
|
7247
7640
|
const dir = AGENTV_DIR;
|
|
7248
|
-
const filePath =
|
|
7641
|
+
const filePath = join4(dir, CACHE_FILE);
|
|
7249
7642
|
const script = `
|
|
7250
7643
|
const https = require('https');
|
|
7251
7644
|
const fs = require('fs');
|
|
@@ -7299,8 +7692,8 @@ var app = subcommands({
|
|
|
7299
7692
|
compare: compareCommand,
|
|
7300
7693
|
convert: convertCommand,
|
|
7301
7694
|
create: createCommand,
|
|
7302
|
-
generate: generateCommand,
|
|
7303
7695
|
init: initCmdTsCommand,
|
|
7696
|
+
pipeline: pipelineCommand,
|
|
7304
7697
|
results: resultsCommand,
|
|
7305
7698
|
self: selfCommand,
|
|
7306
7699
|
serve: resultsServeCommand,
|
|
@@ -7317,8 +7710,8 @@ var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
|
|
|
7317
7710
|
"compare",
|
|
7318
7711
|
"convert",
|
|
7319
7712
|
"create",
|
|
7320
|
-
"generate",
|
|
7321
7713
|
"init",
|
|
7714
|
+
"pipeline",
|
|
7322
7715
|
"results",
|
|
7323
7716
|
"self",
|
|
7324
7717
|
"serve",
|
|
@@ -7368,4 +7761,4 @@ export {
|
|
|
7368
7761
|
preprocessArgv,
|
|
7369
7762
|
runCli
|
|
7370
7763
|
};
|
|
7371
|
-
//# sourceMappingURL=chunk-
|
|
7764
|
+
//# sourceMappingURL=chunk-2ELQ6F3C.js.map
|