agentv 2.18.4 → 3.0.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -36
- package/dist/agentv-provider-5CJVBBGG-2XVZBW7L.js +9 -0
- package/dist/{chunk-RMUVJ44Z.js → chunk-5WIB7A27.js} +598 -403
- package/dist/chunk-5WIB7A27.js.map +1 -0
- package/dist/chunk-6GSYTMXD.js +31520 -0
- package/dist/chunk-6GSYTMXD.js.map +1 -0
- package/dist/{chunk-KSUL3F3R.js → chunk-DY4ZDTTO.js} +1018 -140
- package/dist/chunk-DY4ZDTTO.js.map +1 -0
- package/dist/chunk-HF4X7ALN.js +24299 -0
- package/dist/chunk-HF4X7ALN.js.map +1 -0
- package/dist/{chunk-FV32QHPB.js → chunk-XOSNETAV.js} +1 -1
- package/dist/cli.js +5 -4
- package/dist/cli.js.map +1 -1
- package/dist/{dist-EDQZMZH2.js → dist-WN2QIOQR.js} +27 -11
- package/dist/{esm-DX3WQKEN.js → esm-CZAWIY6F.js} +2 -2
- package/dist/esm-CZAWIY6F.js.map +1 -0
- package/dist/index.js +5 -4
- package/dist/{interactive-J4IBXJF7.js → interactive-B432TCRZ.js} +5 -4
- package/dist/{interactive-J4IBXJF7.js.map → interactive-B432TCRZ.js.map} +1 -1
- package/dist/{src-2N5EJ2N6.js → src-ML4D2MC2.js} +2 -2
- package/dist/templates/.agentv/config.yaml +0 -5
- package/dist/templates/.agentv/targets.yaml +8 -11
- package/package.json +2 -2
- package/dist/chunk-KSUL3F3R.js.map +0 -1
- package/dist/chunk-RMUVJ44Z.js.map +0 -1
- package/dist/chunk-YTHTGLMT.js +0 -49786
- package/dist/chunk-YTHTGLMT.js.map +0 -1
- /package/dist/{dist-EDQZMZH2.js.map → agentv-provider-5CJVBBGG-2XVZBW7L.js.map} +0 -0
- /package/dist/{chunk-FV32QHPB.js.map → chunk-XOSNETAV.js.map} +0 -0
- /package/dist/{esm-DX3WQKEN.js.map → dist-WN2QIOQR.js.map} +0 -0
- /package/dist/{src-2N5EJ2N6.js.map → src-ML4D2MC2.js.map} +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
|
+
HtmlWriter,
|
|
3
4
|
detectFileType,
|
|
4
5
|
findRepoRoot,
|
|
5
6
|
package_default,
|
|
@@ -11,23 +12,25 @@ import {
|
|
|
11
12
|
validateEvalFile,
|
|
12
13
|
validateFileReferences,
|
|
13
14
|
validateTargetsFile
|
|
14
|
-
} from "./chunk-
|
|
15
|
+
} from "./chunk-DY4ZDTTO.js";
|
|
15
16
|
import {
|
|
16
|
-
assembleLlmJudgePrompt,
|
|
17
|
-
buildPromptInputs,
|
|
18
17
|
createBuiltinRegistry,
|
|
19
18
|
createProvider,
|
|
20
19
|
executeScript,
|
|
21
20
|
generateRubrics,
|
|
22
21
|
getAgentvHome,
|
|
22
|
+
getOutputFilenames,
|
|
23
23
|
getWorkspacePoolRoot,
|
|
24
|
+
isAgentSkillsFormat,
|
|
24
25
|
loadTestById,
|
|
25
26
|
loadTests,
|
|
26
27
|
normalizeLineEndings,
|
|
28
|
+
parseAgentSkillsEvals,
|
|
27
29
|
toCamelCaseDeep,
|
|
28
30
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
31
|
+
transpileEvalYamlFile,
|
|
29
32
|
trimBaselineResult
|
|
30
|
-
} from "./chunk-
|
|
33
|
+
} from "./chunk-HF4X7ALN.js";
|
|
31
34
|
import {
|
|
32
35
|
__commonJS,
|
|
33
36
|
__esm,
|
|
@@ -3296,6 +3299,16 @@ var compareCommand = command({
|
|
|
3296
3299
|
import { readFileSync as readFileSync2, writeFileSync } from "node:fs";
|
|
3297
3300
|
import path from "node:path";
|
|
3298
3301
|
import { stringify as stringifyYaml } from "yaml";
|
|
3302
|
+
async function convertJsonlToHtml(inputPath, outputPath) {
|
|
3303
|
+
const content = readFileSync2(inputPath, "utf8");
|
|
3304
|
+
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
3305
|
+
const writer = await HtmlWriter.open(outputPath);
|
|
3306
|
+
for (const line of lines) {
|
|
3307
|
+
await writer.append(JSON.parse(line));
|
|
3308
|
+
}
|
|
3309
|
+
await writer.close();
|
|
3310
|
+
return lines.length;
|
|
3311
|
+
}
|
|
3299
3312
|
function convertJsonlToYaml(inputPath, outputPath) {
|
|
3300
3313
|
const content = readFileSync2(inputPath, "utf8");
|
|
3301
3314
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
@@ -3315,35 +3328,157 @@ function convertJsonlToYaml(inputPath, outputPath) {
|
|
|
3315
3328
|
writeFileSync(outputPath, yamlOutput);
|
|
3316
3329
|
return lines.length;
|
|
3317
3330
|
}
|
|
3331
|
+
function convertEvalsJsonToYaml(inputPath) {
|
|
3332
|
+
const content = readFileSync2(inputPath, "utf8");
|
|
3333
|
+
const parsed = JSON.parse(content);
|
|
3334
|
+
if (!isAgentSkillsFormat(parsed)) {
|
|
3335
|
+
throw new Error(`Not a valid Agent Skills evals.json: missing 'evals' array`);
|
|
3336
|
+
}
|
|
3337
|
+
const tests = parseAgentSkillsEvals(parsed, inputPath, path.dirname(path.resolve(inputPath)));
|
|
3338
|
+
const lines = [];
|
|
3339
|
+
lines.push("# Converted from Agent Skills evals.json");
|
|
3340
|
+
lines.push("# See: https://agentskills.io/skill-creation/evaluating-skills");
|
|
3341
|
+
lines.push("#");
|
|
3342
|
+
lines.push("# AgentV features you can add:");
|
|
3343
|
+
lines.push("# - type: is_json, contains, regex for deterministic evaluators");
|
|
3344
|
+
lines.push("# - type: code-grader for custom scoring scripts");
|
|
3345
|
+
lines.push("# - Multi-turn conversations via input message arrays");
|
|
3346
|
+
lines.push("# - Composite evaluators with weighted scoring");
|
|
3347
|
+
lines.push("# - Workspace isolation with repos and hooks");
|
|
3348
|
+
lines.push("");
|
|
3349
|
+
if (parsed.skill_name) {
|
|
3350
|
+
lines.push(`description: "Evals for ${parsed.skill_name} skill"`);
|
|
3351
|
+
lines.push("");
|
|
3352
|
+
}
|
|
3353
|
+
lines.push("tests:");
|
|
3354
|
+
for (const test of tests) {
|
|
3355
|
+
lines.push(` - id: "${test.id}"`);
|
|
3356
|
+
lines.push("");
|
|
3357
|
+
if (test.criteria) {
|
|
3358
|
+
lines.push(" criteria: |-");
|
|
3359
|
+
for (const line of test.criteria.split("\n")) {
|
|
3360
|
+
lines.push(` ${line}`);
|
|
3361
|
+
}
|
|
3362
|
+
lines.push("");
|
|
3363
|
+
}
|
|
3364
|
+
lines.push(" input:");
|
|
3365
|
+
for (const msg of test.input) {
|
|
3366
|
+
lines.push(` - role: ${msg.role}`);
|
|
3367
|
+
if (typeof msg.content === "string" && msg.content.includes("\n")) {
|
|
3368
|
+
lines.push(" content: |-");
|
|
3369
|
+
for (const line of msg.content.split("\n")) {
|
|
3370
|
+
lines.push(` ${line}`);
|
|
3371
|
+
}
|
|
3372
|
+
} else {
|
|
3373
|
+
lines.push(
|
|
3374
|
+
` content: "${typeof msg.content === "string" ? msg.content.replace(/"/g, '\\"') : msg.content}"`
|
|
3375
|
+
);
|
|
3376
|
+
}
|
|
3377
|
+
}
|
|
3378
|
+
lines.push("");
|
|
3379
|
+
if (test.expected_output && test.expected_output.length > 0) {
|
|
3380
|
+
lines.push(" expected_output:");
|
|
3381
|
+
for (const msg of test.expected_output) {
|
|
3382
|
+
lines.push(` - role: ${msg.role}`);
|
|
3383
|
+
if (typeof msg.content === "string" && msg.content.includes("\n")) {
|
|
3384
|
+
lines.push(" content: |-");
|
|
3385
|
+
for (const line of msg.content.split("\n")) {
|
|
3386
|
+
lines.push(` ${line}`);
|
|
3387
|
+
}
|
|
3388
|
+
} else {
|
|
3389
|
+
lines.push(
|
|
3390
|
+
` content: "${typeof msg.content === "string" ? msg.content.replace(/"/g, '\\"') : msg.content}"`
|
|
3391
|
+
);
|
|
3392
|
+
}
|
|
3393
|
+
}
|
|
3394
|
+
lines.push("");
|
|
3395
|
+
}
|
|
3396
|
+
if (test.assertions && test.assertions.length > 0) {
|
|
3397
|
+
lines.push(" # Promoted from evals.json assertions[]");
|
|
3398
|
+
lines.push(" # Replace with type: is_json, contains, or regex for deterministic checks");
|
|
3399
|
+
lines.push(" assertions:");
|
|
3400
|
+
for (const assertion of test.assertions) {
|
|
3401
|
+
lines.push(` - name: ${assertion.name}`);
|
|
3402
|
+
lines.push(` type: ${assertion.type}`);
|
|
3403
|
+
if ((assertion.type === "llm-grader" || assertion.type === "llm-judge") && "prompt" in assertion) {
|
|
3404
|
+
const prompt = assertion.prompt;
|
|
3405
|
+
lines.push(` prompt: "${prompt.replace(/"/g, '\\"')}"`);
|
|
3406
|
+
}
|
|
3407
|
+
}
|
|
3408
|
+
lines.push("");
|
|
3409
|
+
}
|
|
3410
|
+
if (test.file_paths && test.file_paths.length > 0) {
|
|
3411
|
+
lines.push(" # TODO: Configure workspace.repos or file references for these files:");
|
|
3412
|
+
const agentSkillsFiles = test.metadata?.agent_skills_files;
|
|
3413
|
+
if (agentSkillsFiles) {
|
|
3414
|
+
for (const file of agentSkillsFiles) {
|
|
3415
|
+
lines.push(` # - ${file}`);
|
|
3416
|
+
}
|
|
3417
|
+
}
|
|
3418
|
+
lines.push("");
|
|
3419
|
+
}
|
|
3420
|
+
}
|
|
3421
|
+
return `${lines.join("\n")}
|
|
3422
|
+
`;
|
|
3423
|
+
}
|
|
3318
3424
|
var convertCommand = command({
|
|
3319
3425
|
name: "convert",
|
|
3320
|
-
description: "Convert evaluation
|
|
3426
|
+
description: "Convert between evaluation formats (JSONL\u2192YAML, JSONL\u2192HTML, evals.json\u2192EVAL.yaml)",
|
|
3321
3427
|
args: {
|
|
3322
3428
|
input: positional({
|
|
3323
3429
|
type: string,
|
|
3324
3430
|
displayName: "input",
|
|
3325
|
-
description: "Path to input
|
|
3431
|
+
description: "Path to input file (.jsonl or .json)"
|
|
3326
3432
|
}),
|
|
3327
3433
|
out: option({
|
|
3328
3434
|
type: optional(string),
|
|
3329
3435
|
long: "out",
|
|
3330
3436
|
short: "o",
|
|
3331
|
-
description: "Output file path (defaults to
|
|
3437
|
+
description: "Output file path (defaults to stdout for evals.json, .yaml or .html for JSONL)"
|
|
3332
3438
|
})
|
|
3333
3439
|
},
|
|
3334
3440
|
handler: async ({ input, out }) => {
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3441
|
+
const ext = path.extname(input).toLowerCase();
|
|
3442
|
+
if (ext === ".json") {
|
|
3443
|
+
try {
|
|
3444
|
+
const yaml = convertEvalsJsonToYaml(input);
|
|
3445
|
+
if (out) {
|
|
3446
|
+
writeFileSync(out, yaml);
|
|
3447
|
+
console.log(`Converted to ${path.resolve(out)}`);
|
|
3448
|
+
} else {
|
|
3449
|
+
process.stdout.write(yaml);
|
|
3450
|
+
}
|
|
3451
|
+
} catch (error) {
|
|
3452
|
+
console.error(`Error: ${error.message}`);
|
|
3453
|
+
process.exit(1);
|
|
3454
|
+
}
|
|
3455
|
+
return;
|
|
3338
3456
|
}
|
|
3339
|
-
|
|
3340
|
-
|
|
3341
|
-
|
|
3342
|
-
|
|
3343
|
-
|
|
3344
|
-
|
|
3345
|
-
|
|
3457
|
+
if (ext === ".jsonl") {
|
|
3458
|
+
const outExt = out ? path.extname(out).toLowerCase() : ".yaml";
|
|
3459
|
+
if (outExt === ".html" || outExt === ".htm") {
|
|
3460
|
+
const outputPath2 = out ?? input.replace(/\.jsonl$/, ".html");
|
|
3461
|
+
try {
|
|
3462
|
+
const count = await convertJsonlToHtml(input, outputPath2);
|
|
3463
|
+
console.log(`Converted ${count} records to ${path.resolve(outputPath2)}`);
|
|
3464
|
+
} catch (error) {
|
|
3465
|
+
console.error(`Error: ${error.message}`);
|
|
3466
|
+
process.exit(1);
|
|
3467
|
+
}
|
|
3468
|
+
return;
|
|
3469
|
+
}
|
|
3470
|
+
const outputPath = out ?? input.replace(/\.jsonl$/, ".yaml");
|
|
3471
|
+
try {
|
|
3472
|
+
const count = convertJsonlToYaml(input, outputPath);
|
|
3473
|
+
console.log(`Converted ${count} records to ${path.resolve(outputPath)}`);
|
|
3474
|
+
} catch (error) {
|
|
3475
|
+
console.error(`Error: ${error.message}`);
|
|
3476
|
+
process.exit(1);
|
|
3477
|
+
}
|
|
3478
|
+
return;
|
|
3346
3479
|
}
|
|
3480
|
+
console.error(`Error: Unsupported input format '${ext}'. Supported: .json, .jsonl`);
|
|
3481
|
+
process.exit(1);
|
|
3347
3482
|
}
|
|
3348
3483
|
});
|
|
3349
3484
|
|
|
@@ -3387,7 +3522,7 @@ tests:
|
|
|
3387
3522
|
criteria: Agent responds correctly
|
|
3388
3523
|
input: "Hello, how are you?"
|
|
3389
3524
|
expected_output: "I'm doing well"
|
|
3390
|
-
|
|
3525
|
+
assertions:
|
|
3391
3526
|
- type: contains
|
|
3392
3527
|
value: "well"
|
|
3393
3528
|
`,
|
|
@@ -3400,8 +3535,8 @@ tests:
|
|
|
3400
3535
|
criteria: Agent responds correctly and completely
|
|
3401
3536
|
input: "Hello, how are you?"
|
|
3402
3537
|
expected_output: "I'm doing well, thank you for asking!"
|
|
3403
|
-
|
|
3404
|
-
- type: llm-
|
|
3538
|
+
assertions:
|
|
3539
|
+
- type: llm-grader
|
|
3405
3540
|
rubric:
|
|
3406
3541
|
accuracy:
|
|
3407
3542
|
weight: 0.6
|
|
@@ -3470,7 +3605,7 @@ var createAssertionCommand = command({
|
|
|
3470
3605
|
console.log(`Created ${path2.relative(process.cwd(), filePath)} (template: ${templateName})`);
|
|
3471
3606
|
console.log(`
|
|
3472
3607
|
Use in EVAL.yaml:
|
|
3473
|
-
|
|
3608
|
+
assertions:
|
|
3474
3609
|
- type: ${name}`);
|
|
3475
3610
|
}
|
|
3476
3611
|
});
|
|
@@ -3559,38 +3694,104 @@ var createCommand = subcommands({
|
|
|
3559
3694
|
}
|
|
3560
3695
|
});
|
|
3561
3696
|
|
|
3562
|
-
// src/commands/eval/commands/prompt/
|
|
3563
|
-
|
|
3564
|
-
|
|
3565
|
-
|
|
3566
|
-
|
|
3567
|
-
|
|
3568
|
-
|
|
3569
|
-
|
|
3570
|
-
|
|
3571
|
-
|
|
3572
|
-
|
|
3573
|
-
|
|
3574
|
-
|
|
3575
|
-
|
|
3576
|
-
|
|
3577
|
-
|
|
3578
|
-
|
|
3579
|
-
|
|
3580
|
-
|
|
3581
|
-
|
|
3582
|
-
|
|
3583
|
-
|
|
3584
|
-
|
|
3585
|
-
|
|
3586
|
-
|
|
3587
|
-
|
|
3588
|
-
|
|
3589
|
-
|
|
3590
|
-
|
|
3591
|
-
|
|
3697
|
+
// src/commands/eval/commands/prompt/accessors.ts
|
|
3698
|
+
async function listPromptEvalTestIds(evalPath) {
|
|
3699
|
+
const repoRoot = await findRepoRoot(process.cwd());
|
|
3700
|
+
const tests = await loadTests(evalPath, repoRoot);
|
|
3701
|
+
return {
|
|
3702
|
+
eval_path: evalPath,
|
|
3703
|
+
test_ids: tests.map((test) => test.id).sort()
|
|
3704
|
+
};
|
|
3705
|
+
}
|
|
3706
|
+
async function getPromptEvalInput(evalPath, testId) {
|
|
3707
|
+
const repoRoot = await findRepoRoot(process.cwd());
|
|
3708
|
+
const evalCase = await loadTestById(evalPath, repoRoot, testId);
|
|
3709
|
+
const fileMap = buildFileMap(evalCase.input_segments, evalCase.file_paths);
|
|
3710
|
+
return {
|
|
3711
|
+
test_id: evalCase.id,
|
|
3712
|
+
input: resolveMessages(evalCase.input, fileMap),
|
|
3713
|
+
guideline_paths: evalCase.guideline_paths,
|
|
3714
|
+
criteria: evalCase.criteria
|
|
3715
|
+
};
|
|
3716
|
+
}
|
|
3717
|
+
async function getPromptEvalExpectedOutput(evalPath, testId) {
|
|
3718
|
+
const repoRoot = await findRepoRoot(process.cwd());
|
|
3719
|
+
const evalCase = await loadTestById(evalPath, repoRoot, testId);
|
|
3720
|
+
return {
|
|
3721
|
+
test_id: evalCase.id,
|
|
3722
|
+
criteria: evalCase.criteria,
|
|
3723
|
+
expected_output: evalCase.expected_output,
|
|
3724
|
+
reference_answer: evalCase.reference_answer,
|
|
3725
|
+
assertions: evalCase.assertions ?? []
|
|
3726
|
+
};
|
|
3727
|
+
}
|
|
3728
|
+
async function getPromptEvalGradingBrief(evalPath, testId) {
|
|
3729
|
+
const repoRoot = await findRepoRoot(process.cwd());
|
|
3730
|
+
const evalCase = await loadTestById(evalPath, repoRoot, testId);
|
|
3731
|
+
const fileMap = buildFileMap(evalCase.input_segments, evalCase.file_paths);
|
|
3732
|
+
const resolvedInput = resolveMessages(evalCase.input, fileMap);
|
|
3733
|
+
const lines = [];
|
|
3734
|
+
const inputText = extractTextFromMessages(resolvedInput);
|
|
3735
|
+
if (inputText) {
|
|
3736
|
+
lines.push(`Input: "${inputText}"`);
|
|
3737
|
+
}
|
|
3738
|
+
const filePaths = evalCase.file_paths.filter((p) => !evalCase.guideline_paths.includes(p));
|
|
3739
|
+
if (filePaths.length > 0) {
|
|
3740
|
+
lines.push(`Files: ${filePaths.join(", ")}`);
|
|
3741
|
+
}
|
|
3742
|
+
if (evalCase.reference_answer) {
|
|
3743
|
+
lines.push(`Expected: "${evalCase.reference_answer}"`);
|
|
3744
|
+
}
|
|
3745
|
+
const criteria = [];
|
|
3746
|
+
if (evalCase.criteria) {
|
|
3747
|
+
criteria.push(evalCase.criteria);
|
|
3748
|
+
}
|
|
3749
|
+
for (const assertion of evalCase.assertions ?? []) {
|
|
3750
|
+
const entry = assertion;
|
|
3751
|
+
const type = entry.type;
|
|
3752
|
+
const bag = entry.config ?? {};
|
|
3753
|
+
if (type === "contains") {
|
|
3754
|
+
criteria.push(`Output contains '${entry.value}'`);
|
|
3755
|
+
} else if (type === "rubrics") {
|
|
3756
|
+
const items = entry.criteria ?? bag.criteria;
|
|
3757
|
+
if (Array.isArray(items)) {
|
|
3758
|
+
for (const item of items) {
|
|
3759
|
+
if (item.outcome) criteria.push(item.outcome);
|
|
3760
|
+
}
|
|
3761
|
+
}
|
|
3762
|
+
} else if (type === "llm-grader" || type === "llm_grader" || type === "llm-judge" || type === "llm_judge") {
|
|
3763
|
+
const prompt = entry.prompt ?? bag.prompt ?? bag.criteria;
|
|
3764
|
+
criteria.push(`[llm-grader] ${typeof prompt === "string" ? prompt : ""}`);
|
|
3765
|
+
} else if (type === "code-grader" || type === "code_grader" || type === "code-judge" || type === "code_judge") {
|
|
3766
|
+
const name = entry.name ?? type;
|
|
3767
|
+
const desc = bag.description ?? entry.description;
|
|
3768
|
+
criteria.push(`[code-grader] ${name}${desc ? `: ${desc}` : ""}`);
|
|
3769
|
+
} else if (type === "skill-trigger") {
|
|
3770
|
+
const trigger = entry.should_trigger !== false;
|
|
3771
|
+
criteria.push(`[skill-trigger] should_trigger: ${trigger} for ${entry.skill}`);
|
|
3772
|
+
} else if (type) {
|
|
3773
|
+
criteria.push(`[${type}] ${entry.value ?? bag.criteria ?? bag.prompt ?? ""}`);
|
|
3774
|
+
}
|
|
3592
3775
|
}
|
|
3593
|
-
|
|
3776
|
+
if (criteria.length > 0) {
|
|
3777
|
+
lines.push("Criteria:");
|
|
3778
|
+
for (const c3 of criteria) {
|
|
3779
|
+
lines.push(` - ${c3}`);
|
|
3780
|
+
}
|
|
3781
|
+
}
|
|
3782
|
+
return lines.join("\n");
|
|
3783
|
+
}
|
|
3784
|
+
function extractTextFromMessages(messages) {
|
|
3785
|
+
for (const msg of messages) {
|
|
3786
|
+
if (msg.role !== "user") continue;
|
|
3787
|
+
if (typeof msg.content === "string") return msg.content;
|
|
3788
|
+
if (Array.isArray(msg.content)) {
|
|
3789
|
+
const textBlocks = msg.content.filter((b) => b.type === "text").map((b) => b.value);
|
|
3790
|
+
if (textBlocks.length > 0) return textBlocks.join(" ");
|
|
3791
|
+
}
|
|
3792
|
+
}
|
|
3793
|
+
return "";
|
|
3794
|
+
}
|
|
3594
3795
|
function buildFileMap(inputSegments, allFilePaths) {
|
|
3595
3796
|
const map = /* @__PURE__ */ new Map();
|
|
3596
3797
|
for (const segment of inputSegments) {
|
|
@@ -3602,7 +3803,7 @@ function buildFileMap(inputSegments, allFilePaths) {
|
|
|
3602
3803
|
get(key) {
|
|
3603
3804
|
const direct = map.get(key);
|
|
3604
3805
|
if (direct) return direct;
|
|
3605
|
-
return allFilePaths.find((
|
|
3806
|
+
return allFilePaths.find((filePath) => filePath.endsWith(`/${key}`) || filePath === key);
|
|
3606
3807
|
},
|
|
3607
3808
|
has(key) {
|
|
3608
3809
|
return this.get(key) !== void 0;
|
|
@@ -3638,291 +3839,61 @@ function resolveMessages(messages, fileMap) {
|
|
|
3638
3839
|
});
|
|
3639
3840
|
}
|
|
3640
3841
|
|
|
3641
|
-
// src/commands/eval/commands/prompt/
|
|
3642
|
-
|
|
3643
|
-
|
|
3644
|
-
|
|
3645
|
-
description: "Run code judges and output LLM judge prompts for a single test",
|
|
3842
|
+
// src/commands/eval/commands/prompt/index.ts
|
|
3843
|
+
var evalPromptEvalSubcommand = command({
|
|
3844
|
+
name: "eval",
|
|
3845
|
+
description: "Extract eval prompt data for agents",
|
|
3646
3846
|
args: {
|
|
3647
|
-
|
|
3648
|
-
|
|
3649
|
-
|
|
3650
|
-
|
|
3847
|
+
list: flag({
|
|
3848
|
+
long: "list",
|
|
3849
|
+
description: "List available test IDs"
|
|
3850
|
+
}),
|
|
3851
|
+
input: flag({
|
|
3852
|
+
long: "input",
|
|
3853
|
+
description: "Extract the test input payload for a single test"
|
|
3854
|
+
}),
|
|
3855
|
+
expectedOutput: flag({
|
|
3856
|
+
long: "expected-output",
|
|
3857
|
+
description: "Extract expected output and grading context for a single test"
|
|
3858
|
+
}),
|
|
3859
|
+
gradingBrief: flag({
|
|
3860
|
+
long: "grading-brief",
|
|
3861
|
+
description: "Output human-readable grading brief with typed criteria"
|
|
3651
3862
|
}),
|
|
3652
3863
|
testId: option({
|
|
3653
|
-
type: string,
|
|
3864
|
+
type: optional(string),
|
|
3654
3865
|
long: "test-id",
|
|
3655
|
-
description: "Test ID"
|
|
3866
|
+
description: "Test ID (required for --input and --expected-output)"
|
|
3656
3867
|
}),
|
|
3657
|
-
|
|
3868
|
+
evalPath: positional({
|
|
3658
3869
|
type: string,
|
|
3659
|
-
|
|
3660
|
-
description: "Path to
|
|
3870
|
+
displayName: "eval-path",
|
|
3871
|
+
description: "Path to evaluation .yaml, .json, or .jsonl file"
|
|
3661
3872
|
})
|
|
3662
3873
|
},
|
|
3663
|
-
handler: async (
|
|
3664
|
-
const
|
|
3665
|
-
|
|
3666
|
-
|
|
3667
|
-
|
|
3668
|
-
|
|
3669
|
-
const evaluators = evalCase.evaluators ?? [];
|
|
3670
|
-
const outputs = [];
|
|
3671
|
-
for (const config of evaluators) {
|
|
3672
|
-
const output = await processEvaluator(config, evalCase, candidate, promptInputs);
|
|
3673
|
-
outputs.push(output);
|
|
3674
|
-
}
|
|
3675
|
-
if (outputs.length === 0) {
|
|
3676
|
-
const assembly = assembleLlmJudgePrompt({
|
|
3677
|
-
evalCase,
|
|
3678
|
-
candidate,
|
|
3679
|
-
promptInputs
|
|
3680
|
-
});
|
|
3681
|
-
outputs.push({
|
|
3682
|
-
name: "default_llm_judge",
|
|
3683
|
-
type: "llm-judge",
|
|
3684
|
-
status: "prompt_ready",
|
|
3685
|
-
prompt: {
|
|
3686
|
-
system_prompt: assembly.systemPrompt,
|
|
3687
|
-
user_prompt: assembly.userPrompt
|
|
3688
|
-
}
|
|
3689
|
-
});
|
|
3690
|
-
}
|
|
3691
|
-
const result = {
|
|
3692
|
-
test_id: evalCase.id,
|
|
3693
|
-
evaluators: outputs
|
|
3694
|
-
};
|
|
3695
|
-
process.stdout.write(JSON.stringify(result, null, 2));
|
|
3696
|
-
process.stdout.write("\n");
|
|
3697
|
-
}
|
|
3698
|
-
});
|
|
3699
|
-
async function processEvaluator(config, evalCase, candidate, promptInputs) {
|
|
3700
|
-
switch (config.type) {
|
|
3701
|
-
case "code-judge": {
|
|
3702
|
-
const codeConfig = config;
|
|
3703
|
-
const script = codeConfig.command ?? codeConfig.script ?? [];
|
|
3704
|
-
const scriptCwd = codeConfig.resolvedCwd ?? codeConfig.cwd;
|
|
3705
|
-
const payload = {
|
|
3706
|
-
question: evalCase.question,
|
|
3707
|
-
criteria: evalCase.criteria,
|
|
3708
|
-
expectedOutput: evalCase.expected_output,
|
|
3709
|
-
referenceAnswer: evalCase.reference_answer,
|
|
3710
|
-
answer: candidate,
|
|
3711
|
-
output: null,
|
|
3712
|
-
guidelineFiles: evalCase.guideline_paths,
|
|
3713
|
-
inputFiles: evalCase.file_paths.filter((p) => !evalCase.guideline_paths.includes(p)),
|
|
3714
|
-
input: evalCase.input,
|
|
3715
|
-
trace: null,
|
|
3716
|
-
fileChanges: null,
|
|
3717
|
-
workspacePath: null,
|
|
3718
|
-
config: codeConfig.config ?? null
|
|
3719
|
-
};
|
|
3720
|
-
try {
|
|
3721
|
-
const inputPayload = JSON.stringify(toSnakeCaseDeep2(payload), null, 2);
|
|
3722
|
-
const stdout = await executeScript(script, inputPayload, 6e4, scriptCwd);
|
|
3723
|
-
const parsed = JSON.parse(stdout);
|
|
3724
|
-
return {
|
|
3725
|
-
name: codeConfig.name,
|
|
3726
|
-
type: "code-judge",
|
|
3727
|
-
status: "completed",
|
|
3728
|
-
result: parsed
|
|
3729
|
-
};
|
|
3730
|
-
} catch (error) {
|
|
3731
|
-
return {
|
|
3732
|
-
name: codeConfig.name,
|
|
3733
|
-
type: "code-judge",
|
|
3734
|
-
status: "completed",
|
|
3735
|
-
result: {
|
|
3736
|
-
score: 0,
|
|
3737
|
-
error: error instanceof Error ? error.message : String(error)
|
|
3738
|
-
}
|
|
3739
|
-
};
|
|
3740
|
-
}
|
|
3741
|
-
}
|
|
3742
|
-
case "llm-judge": {
|
|
3743
|
-
const llmConfig = config;
|
|
3744
|
-
const assembly = assembleLlmJudgePrompt({
|
|
3745
|
-
evalCase,
|
|
3746
|
-
candidate,
|
|
3747
|
-
promptInputs,
|
|
3748
|
-
evaluatorConfig: llmConfig
|
|
3749
|
-
});
|
|
3750
|
-
return {
|
|
3751
|
-
name: llmConfig.name,
|
|
3752
|
-
type: "llm-judge",
|
|
3753
|
-
status: "prompt_ready",
|
|
3754
|
-
prompt: {
|
|
3755
|
-
system_prompt: assembly.systemPrompt,
|
|
3756
|
-
user_prompt: assembly.userPrompt
|
|
3757
|
-
}
|
|
3758
|
-
};
|
|
3759
|
-
}
|
|
3760
|
-
default: {
|
|
3761
|
-
return {
|
|
3762
|
-
name: config.name,
|
|
3763
|
-
type: config.type,
|
|
3764
|
-
status: "prompt_ready",
|
|
3765
|
-
result: {
|
|
3766
|
-
message: `Evaluator type "${config.type}" requires the full eval pipeline. Use \`agentv eval\` instead.`
|
|
3767
|
-
}
|
|
3768
|
-
};
|
|
3874
|
+
handler: async ({ evalPath, expectedOutput, gradingBrief, input, list, testId }) => {
|
|
3875
|
+
const selectedModes = [list, input, expectedOutput, gradingBrief].filter(Boolean).length;
|
|
3876
|
+
if (selectedModes !== 1) {
|
|
3877
|
+
throw new Error(
|
|
3878
|
+
"Specify exactly one of --list, --input, --expected-output, or --grading-brief."
|
|
3879
|
+
);
|
|
3769
3880
|
}
|
|
3770
|
-
|
|
3771
|
-
|
|
3772
|
-
|
|
3773
|
-
// src/commands/eval/commands/prompt/overview.ts
|
|
3774
|
-
function getEvalMode() {
|
|
3775
|
-
const mode = process.env.AGENTV_PROMPT_EVAL_MODE ?? "agent";
|
|
3776
|
-
if (mode !== "agent" && mode !== "cli") {
|
|
3777
|
-
throw new Error(`Invalid AGENTV_PROMPT_EVAL_MODE="${mode}". Valid values: agent, cli`);
|
|
3778
|
-
}
|
|
3779
|
-
return mode;
|
|
3780
|
-
}
|
|
3781
|
-
async function generateOverviewPrompt(evalPaths) {
|
|
3782
|
-
const cwd = process.cwd();
|
|
3783
|
-
const resolvedPaths = await resolveEvalPaths(evalPaths, cwd);
|
|
3784
|
-
const repoRoot = await findRepoRoot(cwd);
|
|
3785
|
-
const mode = getEvalMode();
|
|
3786
|
-
const fileEntries = [];
|
|
3787
|
-
for (const evalPath of resolvedPaths) {
|
|
3788
|
-
const tests = await loadTests(evalPath, repoRoot);
|
|
3789
|
-
fileEntries.push({ path: evalPath, tests });
|
|
3790
|
-
}
|
|
3791
|
-
const totalCases = fileEntries.reduce((sum, e) => sum + e.tests.length, 0);
|
|
3792
|
-
if (mode === "cli") {
|
|
3793
|
-
return generateCliModePrompt(fileEntries, totalCases);
|
|
3794
|
-
}
|
|
3795
|
-
return generateAgentModePrompt(fileEntries, totalCases);
|
|
3796
|
-
}
|
|
3797
|
-
function generateAgentModePrompt(fileEntries, totalCases) {
|
|
3798
|
-
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").slice(0, -1);
|
|
3799
|
-
const lines = [
|
|
3800
|
-
"# AgentV Eval Orchestration",
|
|
3801
|
-
"",
|
|
3802
|
-
"**Mode: agent** \u2014 You orchestrate the evaluation using agents. No API keys needed.",
|
|
3803
|
-
"",
|
|
3804
|
-
`You are orchestrating ${totalCases} evaluation case${totalCases === 1 ? "" : "s"}.`,
|
|
3805
|
-
"",
|
|
3806
|
-
"## Setup",
|
|
3807
|
-
"",
|
|
3808
|
-
`- **Results file:** \`.agentv/results/eval_${timestamp}.jsonl\``,
|
|
3809
|
-
"- **Temp answers:** `.agentv/tmp/`",
|
|
3810
|
-
"",
|
|
3811
|
-
"Ensure both directories exist before starting.",
|
|
3812
|
-
"",
|
|
3813
|
-
"## For each test case",
|
|
3814
|
-
"",
|
|
3815
|
-
"Run these two agents **sequentially**:",
|
|
3816
|
-
"",
|
|
3817
|
-
"### 1. Dispatch `eval-candidate` agent",
|
|
3818
|
-
"",
|
|
3819
|
-
"Parameters:",
|
|
3820
|
-
"- `eval-path`: Path to the eval YAML file",
|
|
3821
|
-
"- `test-id`: The test case ID",
|
|
3822
|
-
"- `answer-file`: `.agentv/tmp/eval_<test-id>.txt`",
|
|
3823
|
-
"",
|
|
3824
|
-
"The agent retrieves the task input, acts as the candidate LLM, and saves its response.",
|
|
3825
|
-
"",
|
|
3826
|
-
"### 2. Dispatch `eval-judge` agent (after candidate completes)",
|
|
3827
|
-
"",
|
|
3828
|
-
"Parameters:",
|
|
3829
|
-
"- `eval-path`: Path to the eval YAML file",
|
|
3830
|
-
"- `test-id`: The test case ID",
|
|
3831
|
-
"- `answer-file`: `.agentv/tmp/eval_<test-id>.txt`",
|
|
3832
|
-
`- \`results-file\`: \`.agentv/results/eval_${timestamp}.jsonl\``,
|
|
3833
|
-
"",
|
|
3834
|
-
"The agent runs evaluators, scores the response, and appends results to the JSONL file.",
|
|
3835
|
-
""
|
|
3836
|
-
];
|
|
3837
|
-
for (const { path: evalPath, tests } of fileEntries) {
|
|
3838
|
-
lines.push(`## ${evalPath}`);
|
|
3839
|
-
lines.push("");
|
|
3840
|
-
for (const evalCase of tests) {
|
|
3841
|
-
const evaluatorSummary = describeEvaluators(evalCase);
|
|
3842
|
-
lines.push(`### ${evalCase.id}`);
|
|
3843
|
-
lines.push(`Criteria: ${evalCase.criteria}`);
|
|
3844
|
-
if (evaluatorSummary) {
|
|
3845
|
-
lines.push(`Evaluators: ${evaluatorSummary}`);
|
|
3881
|
+
if (gradingBrief) {
|
|
3882
|
+
if (!testId) {
|
|
3883
|
+
throw new Error("--test-id is required with --grading-brief.");
|
|
3846
3884
|
}
|
|
3847
|
-
|
|
3848
|
-
|
|
3849
|
-
|
|
3850
|
-
|
|
3851
|
-
lines.push(`- answer-file: \`.agentv/tmp/eval_${evalCase.id}.txt\``);
|
|
3852
|
-
lines.push("");
|
|
3853
|
-
lines.push("**2. Dispatch `eval-judge` agent** (after candidate completes):");
|
|
3854
|
-
lines.push(`- eval-path: \`${evalPath}\``);
|
|
3855
|
-
lines.push(`- test-id: \`${evalCase.id}\``);
|
|
3856
|
-
lines.push(`- answer-file: \`.agentv/tmp/eval_${evalCase.id}.txt\``);
|
|
3857
|
-
lines.push(`- results-file: \`.agentv/results/eval_${timestamp}.jsonl\``);
|
|
3858
|
-
lines.push("");
|
|
3885
|
+
const brief = await getPromptEvalGradingBrief(evalPath, testId);
|
|
3886
|
+
process.stdout.write(brief);
|
|
3887
|
+
process.stdout.write("\n");
|
|
3888
|
+
return;
|
|
3859
3889
|
}
|
|
3860
|
-
|
|
3861
|
-
|
|
3862
|
-
}
|
|
3863
|
-
function generateCliModePrompt(fileEntries, totalCases) {
|
|
3864
|
-
const evalPathArgs = fileEntries.map((e) => e.path).join(" ");
|
|
3865
|
-
const lines = [
|
|
3866
|
-
"# AgentV Eval Orchestration",
|
|
3867
|
-
"",
|
|
3868
|
-
"**Mode: cli** \u2014 Run the evaluation end-to-end using the CLI.",
|
|
3869
|
-
"",
|
|
3870
|
-
`You are orchestrating ${totalCases} evaluation case${totalCases === 1 ? "" : "s"}.`,
|
|
3871
|
-
"",
|
|
3872
|
-
"## Run the evaluation",
|
|
3873
|
-
"",
|
|
3874
|
-
"```bash",
|
|
3875
|
-
`agentv eval ${evalPathArgs}`,
|
|
3876
|
-
"```",
|
|
3877
|
-
"",
|
|
3878
|
-
"Results are written to `.agentv/results/`. The output path is printed in the CLI output.",
|
|
3879
|
-
"Parse the JSONL file for per-test scores, hits, and misses.",
|
|
3880
|
-
""
|
|
3881
|
-
];
|
|
3882
|
-
for (const { path: evalPath, tests } of fileEntries) {
|
|
3883
|
-
lines.push(`## ${evalPath}`);
|
|
3884
|
-
lines.push("");
|
|
3885
|
-
for (const evalCase of tests) {
|
|
3886
|
-
const evaluatorSummary = describeEvaluators(evalCase);
|
|
3887
|
-
lines.push(`### ${evalCase.id}`);
|
|
3888
|
-
lines.push(`Criteria: ${evalCase.criteria}`);
|
|
3889
|
-
if (evaluatorSummary) {
|
|
3890
|
-
lines.push(`Evaluators: ${evaluatorSummary}`);
|
|
3891
|
-
}
|
|
3892
|
-
lines.push("");
|
|
3890
|
+
if ((input || expectedOutput) && !testId) {
|
|
3891
|
+
throw new Error("--test-id is required with --input and --expected-output.");
|
|
3893
3892
|
}
|
|
3894
|
-
|
|
3895
|
-
|
|
3896
|
-
|
|
3897
|
-
|
|
3898
|
-
name: "overview",
|
|
3899
|
-
description: "Output orchestration prompt for host agent to run evals",
|
|
3900
|
-
args: {
|
|
3901
|
-
evalPaths: restPositionals({
|
|
3902
|
-
type: string,
|
|
3903
|
-
displayName: "eval-paths",
|
|
3904
|
-
description: "Path(s) or glob(s) to evaluation .yaml file(s)"
|
|
3905
|
-
})
|
|
3906
|
-
},
|
|
3907
|
-
handler: async (args) => {
|
|
3908
|
-
const output = await generateOverviewPrompt(args.evalPaths);
|
|
3909
|
-
process.stdout.write(output);
|
|
3910
|
-
}
|
|
3911
|
-
});
|
|
3912
|
-
function describeEvaluators(evalCase) {
|
|
3913
|
-
const configs = evalCase.evaluators;
|
|
3914
|
-
if (!configs || configs.length === 0) return void 0;
|
|
3915
|
-
return configs.map((c3) => `${c3.name} (${c3.type})`).join(", ");
|
|
3916
|
-
}
|
|
3917
|
-
|
|
3918
|
-
// src/commands/eval/commands/prompt/index.ts
|
|
3919
|
-
var evalPromptEvalSubcommand = subcommands({
|
|
3920
|
-
name: "eval",
|
|
3921
|
-
description: "Eval prompt commands (overview, input, judge)",
|
|
3922
|
-
cmds: {
|
|
3923
|
-
overview: evalPromptOverviewCommand,
|
|
3924
|
-
input: evalPromptInputCommand,
|
|
3925
|
-
judge: evalPromptJudgeCommand
|
|
3893
|
+
const requiredTestId = testId ?? "";
|
|
3894
|
+
const output = list ? await listPromptEvalTestIds(evalPath) : input ? await getPromptEvalInput(evalPath, requiredTestId) : await getPromptEvalExpectedOutput(evalPath, requiredTestId);
|
|
3895
|
+
process.stdout.write(JSON.stringify(output, null, 2));
|
|
3896
|
+
process.stdout.write("\n");
|
|
3926
3897
|
}
|
|
3927
3898
|
});
|
|
3928
3899
|
var evalPromptCommand = subcommands({
|
|
@@ -3933,6 +3904,120 @@ var evalPromptCommand = subcommands({
|
|
|
3933
3904
|
}
|
|
3934
3905
|
});
|
|
3935
3906
|
|
|
3907
|
+
// src/commands/eval/commands/assert.ts
|
|
3908
|
+
import { readFileSync as readFileSync3 } from "node:fs";
|
|
3909
|
+
import path3 from "node:path";
|
|
3910
|
+
import fg from "fast-glob";
|
|
3911
|
+
var evalAssertCommand = command({
|
|
3912
|
+
name: "assert",
|
|
3913
|
+
description: "Run a single code-grader assertion from .agentv/graders/ and print the score",
|
|
3914
|
+
args: {
|
|
3915
|
+
graderName: positional({
|
|
3916
|
+
type: string,
|
|
3917
|
+
displayName: "name",
|
|
3918
|
+
description: "Assertion name (matches filename without extension in .agentv/graders/)"
|
|
3919
|
+
}),
|
|
3920
|
+
agentOutput: option({
|
|
3921
|
+
type: optional(string),
|
|
3922
|
+
long: "agent-output",
|
|
3923
|
+
description: "The agent's full response text"
|
|
3924
|
+
}),
|
|
3925
|
+
agentInput: option({
|
|
3926
|
+
type: optional(string),
|
|
3927
|
+
long: "agent-input",
|
|
3928
|
+
description: "The original user prompt"
|
|
3929
|
+
}),
|
|
3930
|
+
file: option({
|
|
3931
|
+
type: optional(string),
|
|
3932
|
+
long: "file",
|
|
3933
|
+
description: "Path to JSON file with { output, input } fields"
|
|
3934
|
+
})
|
|
3935
|
+
},
|
|
3936
|
+
handler: async ({ graderName, agentOutput: output, agentInput: input, file }) => {
|
|
3937
|
+
let resolvedOutput;
|
|
3938
|
+
let resolvedInput;
|
|
3939
|
+
if (file) {
|
|
3940
|
+
const content = JSON.parse(readFileSync3(path3.resolve(file), "utf8"));
|
|
3941
|
+
resolvedOutput = content.output ?? "";
|
|
3942
|
+
resolvedInput = content.input ?? "";
|
|
3943
|
+
} else {
|
|
3944
|
+
if (output === void 0) {
|
|
3945
|
+
console.error("Error: --agent-output is required (or use --file)");
|
|
3946
|
+
process.exit(1);
|
|
3947
|
+
}
|
|
3948
|
+
resolvedOutput = output;
|
|
3949
|
+
resolvedInput = input ?? "";
|
|
3950
|
+
}
|
|
3951
|
+
if (!/^[a-zA-Z0-9_-]+$/.test(graderName)) {
|
|
3952
|
+
console.error(
|
|
3953
|
+
`Error: Invalid grader name '${graderName}' \u2014 only letters, digits, hyphens, and underscores allowed`
|
|
3954
|
+
);
|
|
3955
|
+
process.exit(1);
|
|
3956
|
+
}
|
|
3957
|
+
const scriptPath = await findGraderScript(graderName, process.cwd());
|
|
3958
|
+
if (!scriptPath) {
|
|
3959
|
+
console.error(
|
|
3960
|
+
`Error: Grader '${graderName}' not found in .agentv/graders/ (or .agentv/judges/)`
|
|
3961
|
+
);
|
|
3962
|
+
process.exit(1);
|
|
3963
|
+
}
|
|
3964
|
+
const payload = JSON.stringify(
|
|
3965
|
+
{
|
|
3966
|
+
answer: resolvedOutput,
|
|
3967
|
+
output: [{ role: "assistant", content: resolvedOutput }],
|
|
3968
|
+
input: [{ role: "user", content: resolvedInput }],
|
|
3969
|
+
question: resolvedInput,
|
|
3970
|
+
criteria: "",
|
|
3971
|
+
expected_output: [],
|
|
3972
|
+
reference_answer: "",
|
|
3973
|
+
guideline_files: [],
|
|
3974
|
+
input_files: [],
|
|
3975
|
+
trace: null,
|
|
3976
|
+
token_usage: null,
|
|
3977
|
+
cost_usd: null,
|
|
3978
|
+
duration_ms: null,
|
|
3979
|
+
start_time: null,
|
|
3980
|
+
end_time: null,
|
|
3981
|
+
file_changes: null,
|
|
3982
|
+
workspace_path: null,
|
|
3983
|
+
config: null,
|
|
3984
|
+
metadata: {}
|
|
3985
|
+
},
|
|
3986
|
+
null,
|
|
3987
|
+
2
|
|
3988
|
+
);
|
|
3989
|
+
try {
|
|
3990
|
+
const stdout = await executeScript(["bun", "run", scriptPath], payload);
|
|
3991
|
+
const parsed = JSON.parse(stdout);
|
|
3992
|
+
const score = typeof parsed.score === "number" ? parsed.score : 0;
|
|
3993
|
+
process.stdout.write(JSON.stringify(parsed, null, 2));
|
|
3994
|
+
process.stdout.write("\n");
|
|
3995
|
+
process.exit(score >= 0.5 ? 0 : 1);
|
|
3996
|
+
} catch (error) {
|
|
3997
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
3998
|
+
console.error(`Error: ${message}`);
|
|
3999
|
+
process.exit(1);
|
|
4000
|
+
}
|
|
4001
|
+
}
|
|
4002
|
+
});
|
|
4003
|
+
async function findGraderScript(graderName, startDir) {
|
|
4004
|
+
let dir = path3.resolve(startDir);
|
|
4005
|
+
const root = path3.parse(dir).root;
|
|
4006
|
+
while (dir !== root) {
|
|
4007
|
+
for (const subdir of ["graders", "judges"]) {
|
|
4008
|
+
const gradersDir = path3.join(dir, ".agentv", subdir);
|
|
4009
|
+
const found = await fg([`${graderName}.{ts,js,mts,mjs}`], {
|
|
4010
|
+
cwd: gradersDir,
|
|
4011
|
+
absolute: true,
|
|
4012
|
+
onlyFiles: true
|
|
4013
|
+
});
|
|
4014
|
+
if (found.length > 0) return found[0];
|
|
4015
|
+
}
|
|
4016
|
+
dir = path3.dirname(dir);
|
|
4017
|
+
}
|
|
4018
|
+
return null;
|
|
4019
|
+
}
|
|
4020
|
+
|
|
3936
4021
|
// src/commands/eval/commands/run.ts
|
|
3937
4022
|
var evalRunCommand = command({
|
|
3938
4023
|
name: "eval",
|
|
@@ -3972,12 +4057,12 @@ var evalRunCommand = command({
|
|
|
3972
4057
|
type: array(string),
|
|
3973
4058
|
long: "output",
|
|
3974
4059
|
short: "o",
|
|
3975
|
-
description: "Output file path(s). Format inferred from extension: .jsonl, .json, .xml, .yaml"
|
|
4060
|
+
description: "Output file path(s). Format inferred from extension: .jsonl, .json, .xml, .yaml, .html"
|
|
3976
4061
|
}),
|
|
3977
4062
|
outputFormat: option({
|
|
3978
4063
|
type: optional(string),
|
|
3979
4064
|
long: "output-format",
|
|
3980
|
-
description: "Output format: 'jsonl' or '
|
|
4065
|
+
description: "Output format: 'jsonl', 'yaml', or 'html' (default: jsonl)"
|
|
3981
4066
|
}),
|
|
3982
4067
|
dryRun: flag({
|
|
3983
4068
|
long: "dry-run",
|
|
@@ -4068,11 +4153,31 @@ var evalRunCommand = command({
|
|
|
4068
4153
|
strict: flag({
|
|
4069
4154
|
long: "strict",
|
|
4070
4155
|
description: "Exit with error on version mismatch (instead of warning)"
|
|
4156
|
+
}),
|
|
4157
|
+
benchmarkJson: option({
|
|
4158
|
+
type: optional(string),
|
|
4159
|
+
long: "benchmark-json",
|
|
4160
|
+
description: "Write Agent Skills benchmark.json to the specified path"
|
|
4161
|
+
}),
|
|
4162
|
+
artifacts: option({
|
|
4163
|
+
type: optional(string),
|
|
4164
|
+
long: "artifacts",
|
|
4165
|
+
description: "Write companion artifacts (grading/<test>.json, timing.json, benchmark.json) to the specified directory"
|
|
4166
|
+
}),
|
|
4167
|
+
graderTarget: option({
|
|
4168
|
+
type: optional(string),
|
|
4169
|
+
long: "grader-target",
|
|
4170
|
+
description: 'Override grader target for all evaluators (e.g., "agentv", or a target name from targets.yaml)'
|
|
4171
|
+
}),
|
|
4172
|
+
model: option({
|
|
4173
|
+
type: optional(string),
|
|
4174
|
+
long: "model",
|
|
4175
|
+
description: 'Override model for the grader target (e.g., "openai:gpt-5-mini")'
|
|
4071
4176
|
})
|
|
4072
4177
|
},
|
|
4073
4178
|
handler: async (args) => {
|
|
4074
4179
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4075
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4180
|
+
const { launchInteractiveWizard } = await import("./interactive-B432TCRZ.js");
|
|
4076
4181
|
await launchInteractiveWizard();
|
|
4077
4182
|
return;
|
|
4078
4183
|
}
|
|
@@ -4104,15 +4209,30 @@ var evalRunCommand = command({
|
|
|
4104
4209
|
otelCaptureContent: args.otelCaptureContent,
|
|
4105
4210
|
otelGroupTurns: args.otelGroupTurns,
|
|
4106
4211
|
retryErrors: args.retryErrors,
|
|
4107
|
-
strict: args.strict
|
|
4212
|
+
strict: args.strict,
|
|
4213
|
+
benchmarkJson: args.benchmarkJson,
|
|
4214
|
+
artifacts: args.artifacts,
|
|
4215
|
+
graderTarget: args.graderTarget,
|
|
4216
|
+
model: args.model
|
|
4108
4217
|
};
|
|
4109
4218
|
await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
|
|
4110
4219
|
}
|
|
4111
4220
|
});
|
|
4112
4221
|
|
|
4222
|
+
// src/commands/eval/index.ts
|
|
4223
|
+
var evalCommand = subcommands({
|
|
4224
|
+
name: "eval",
|
|
4225
|
+
description: "Evaluation commands",
|
|
4226
|
+
cmds: {
|
|
4227
|
+
run: evalRunCommand,
|
|
4228
|
+
prompt: evalPromptCommand,
|
|
4229
|
+
assert: evalAssertCommand
|
|
4230
|
+
}
|
|
4231
|
+
});
|
|
4232
|
+
|
|
4113
4233
|
// src/commands/generate/rubrics.ts
|
|
4114
|
-
import { readFile
|
|
4115
|
-
import
|
|
4234
|
+
import { readFile, writeFile as writeFile2 } from "node:fs/promises";
|
|
4235
|
+
import path4 from "node:path";
|
|
4116
4236
|
import { pathToFileURL } from "node:url";
|
|
4117
4237
|
import { isMap, isSeq, parseDocument } from "yaml";
|
|
4118
4238
|
function isJsonObject(value) {
|
|
@@ -4124,7 +4244,7 @@ function asString(value) {
|
|
|
4124
4244
|
async function loadRubricGenerator() {
|
|
4125
4245
|
const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
|
|
4126
4246
|
if (customGenerator) {
|
|
4127
|
-
const generatorPath =
|
|
4247
|
+
const generatorPath = path4.resolve(customGenerator);
|
|
4128
4248
|
const generatorUrl = pathToFileURL(generatorPath).href;
|
|
4129
4249
|
const module = await import(generatorUrl);
|
|
4130
4250
|
return module.generateRubrics;
|
|
@@ -4134,8 +4254,8 @@ async function loadRubricGenerator() {
|
|
|
4134
4254
|
async function generateRubricsCommand(options) {
|
|
4135
4255
|
const { file, target: targetOverride, verbose } = options;
|
|
4136
4256
|
console.log(`Generating rubrics for: ${file}`);
|
|
4137
|
-
const absolutePath =
|
|
4138
|
-
const content = await
|
|
4257
|
+
const absolutePath = path4.resolve(file);
|
|
4258
|
+
const content = await readFile(absolutePath, "utf8");
|
|
4139
4259
|
const doc = parseDocument(content);
|
|
4140
4260
|
const parsed = doc.toJSON();
|
|
4141
4261
|
if (!isJsonObject(parsed)) {
|
|
@@ -4293,23 +4413,23 @@ var generateCommand = subcommands({
|
|
|
4293
4413
|
|
|
4294
4414
|
// src/commands/init/index.ts
|
|
4295
4415
|
import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
|
|
4296
|
-
import
|
|
4416
|
+
import path6 from "node:path";
|
|
4297
4417
|
import * as readline from "node:readline/promises";
|
|
4298
4418
|
|
|
4299
4419
|
// src/templates/index.ts
|
|
4300
|
-
import { readFileSync as
|
|
4301
|
-
import
|
|
4420
|
+
import { readFileSync as readFileSync4, readdirSync, statSync } from "node:fs";
|
|
4421
|
+
import path5 from "node:path";
|
|
4302
4422
|
import { fileURLToPath } from "node:url";
|
|
4303
4423
|
function getAgentvTemplates() {
|
|
4304
4424
|
return getTemplatesFromDir(".agentv");
|
|
4305
4425
|
}
|
|
4306
4426
|
function getTemplatesFromDir(subdir) {
|
|
4307
|
-
const currentDir =
|
|
4427
|
+
const currentDir = path5.dirname(fileURLToPath(import.meta.url));
|
|
4308
4428
|
let templatesDir;
|
|
4309
|
-
if (currentDir.includes(`${
|
|
4310
|
-
templatesDir =
|
|
4429
|
+
if (currentDir.includes(`${path5.sep}dist`)) {
|
|
4430
|
+
templatesDir = path5.join(currentDir, "templates", subdir);
|
|
4311
4431
|
} else {
|
|
4312
|
-
templatesDir =
|
|
4432
|
+
templatesDir = path5.join(currentDir, subdir);
|
|
4313
4433
|
}
|
|
4314
4434
|
return readTemplatesRecursively(templatesDir, "");
|
|
4315
4435
|
}
|
|
@@ -4317,15 +4437,15 @@ function readTemplatesRecursively(dir, relativePath) {
|
|
|
4317
4437
|
const templates = [];
|
|
4318
4438
|
const entries2 = readdirSync(dir);
|
|
4319
4439
|
for (const entry of entries2) {
|
|
4320
|
-
const fullPath =
|
|
4440
|
+
const fullPath = path5.join(dir, entry);
|
|
4321
4441
|
const stat3 = statSync(fullPath);
|
|
4322
|
-
const entryRelativePath = relativePath ?
|
|
4442
|
+
const entryRelativePath = relativePath ? path5.join(relativePath, entry) : entry;
|
|
4323
4443
|
if (stat3.isDirectory()) {
|
|
4324
4444
|
templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
|
|
4325
4445
|
} else {
|
|
4326
|
-
const content =
|
|
4446
|
+
const content = readFileSync4(fullPath, "utf-8");
|
|
4327
4447
|
templates.push({
|
|
4328
|
-
path: entryRelativePath.split(
|
|
4448
|
+
path: entryRelativePath.split(path5.sep).join("/"),
|
|
4329
4449
|
// Normalize to forward slashes
|
|
4330
4450
|
content
|
|
4331
4451
|
});
|
|
@@ -4354,23 +4474,23 @@ async function promptYesNo(message) {
|
|
|
4354
4474
|
}
|
|
4355
4475
|
}
|
|
4356
4476
|
async function initCommand(options = {}) {
|
|
4357
|
-
const targetPath =
|
|
4358
|
-
const agentvDir =
|
|
4477
|
+
const targetPath = path6.resolve(options.targetPath ?? ".");
|
|
4478
|
+
const agentvDir = path6.join(targetPath, ".agentv");
|
|
4359
4479
|
const agentvTemplates = getAgentvTemplates();
|
|
4360
4480
|
const envTemplate = agentvTemplates.find((t) => t.path === ".env.example");
|
|
4361
4481
|
const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.example");
|
|
4362
4482
|
const existingFiles = [];
|
|
4363
4483
|
if (envTemplate) {
|
|
4364
|
-
const envFilePath =
|
|
4484
|
+
const envFilePath = path6.join(targetPath, ".env.example");
|
|
4365
4485
|
if (existsSync(envFilePath)) {
|
|
4366
4486
|
existingFiles.push(".env.example");
|
|
4367
4487
|
}
|
|
4368
4488
|
}
|
|
4369
4489
|
if (existsSync(agentvDir)) {
|
|
4370
4490
|
for (const template of otherAgentvTemplates) {
|
|
4371
|
-
const targetFilePath =
|
|
4491
|
+
const targetFilePath = path6.join(agentvDir, template.path);
|
|
4372
4492
|
if (existsSync(targetFilePath)) {
|
|
4373
|
-
existingFiles.push(
|
|
4493
|
+
existingFiles.push(path6.relative(targetPath, targetFilePath));
|
|
4374
4494
|
}
|
|
4375
4495
|
}
|
|
4376
4496
|
}
|
|
@@ -4392,18 +4512,18 @@ async function initCommand(options = {}) {
|
|
|
4392
4512
|
mkdirSync(agentvDir, { recursive: true });
|
|
4393
4513
|
}
|
|
4394
4514
|
if (envTemplate) {
|
|
4395
|
-
const envFilePath =
|
|
4515
|
+
const envFilePath = path6.join(targetPath, ".env.example");
|
|
4396
4516
|
writeFileSync2(envFilePath, envTemplate.content, "utf-8");
|
|
4397
4517
|
console.log("Created .env.example");
|
|
4398
4518
|
}
|
|
4399
4519
|
for (const template of otherAgentvTemplates) {
|
|
4400
|
-
const targetFilePath =
|
|
4401
|
-
const targetDirPath =
|
|
4520
|
+
const targetFilePath = path6.join(agentvDir, template.path);
|
|
4521
|
+
const targetDirPath = path6.dirname(targetFilePath);
|
|
4402
4522
|
if (!existsSync(targetDirPath)) {
|
|
4403
4523
|
mkdirSync(targetDirPath, { recursive: true });
|
|
4404
4524
|
}
|
|
4405
4525
|
writeFileSync2(targetFilePath, template.content, "utf-8");
|
|
4406
|
-
console.log(`Created ${
|
|
4526
|
+
console.log(`Created ${path6.relative(targetPath, targetFilePath)}`);
|
|
4407
4527
|
}
|
|
4408
4528
|
console.log("\nAgentV initialized successfully!");
|
|
4409
4529
|
console.log("\nFiles installed to root:");
|
|
@@ -4411,7 +4531,7 @@ async function initCommand(options = {}) {
|
|
|
4411
4531
|
console.log(" - .env.example");
|
|
4412
4532
|
}
|
|
4413
4533
|
console.log(`
|
|
4414
|
-
Files installed to ${
|
|
4534
|
+
Files installed to ${path6.relative(targetPath, agentvDir)}:`);
|
|
4415
4535
|
for (const t of otherAgentvTemplates) {
|
|
4416
4536
|
console.log(` - ${t.path}`);
|
|
4417
4537
|
}
|
|
@@ -4530,8 +4650,8 @@ var selfCommand = subcommands({
|
|
|
4530
4650
|
});
|
|
4531
4651
|
|
|
4532
4652
|
// src/commands/trace/utils.ts
|
|
4533
|
-
import { readFileSync as
|
|
4534
|
-
import
|
|
4653
|
+
import { readFileSync as readFileSync5, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
|
|
4654
|
+
import path7 from "node:path";
|
|
4535
4655
|
var colors2 = {
|
|
4536
4656
|
reset: "\x1B[0m",
|
|
4537
4657
|
bold: "\x1B[1m",
|
|
@@ -4557,7 +4677,7 @@ function padLeft2(str, len) {
|
|
|
4557
4677
|
return " ".repeat(Math.max(0, len - plainLen)) + str;
|
|
4558
4678
|
}
|
|
4559
4679
|
function loadResultFile(filePath) {
|
|
4560
|
-
const content =
|
|
4680
|
+
const content = readFileSync5(filePath, "utf8");
|
|
4561
4681
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
4562
4682
|
return lines.map((line, i) => {
|
|
4563
4683
|
const record = JSON.parse(line);
|
|
@@ -4568,7 +4688,7 @@ function loadResultFile(filePath) {
|
|
|
4568
4688
|
});
|
|
4569
4689
|
}
|
|
4570
4690
|
function listResultFiles(cwd, limit) {
|
|
4571
|
-
const resultsDir =
|
|
4691
|
+
const resultsDir = path7.join(cwd, ".agentv", "results");
|
|
4572
4692
|
let files;
|
|
4573
4693
|
try {
|
|
4574
4694
|
files = readdirSync2(resultsDir).filter((f) => f.endsWith(".jsonl"));
|
|
@@ -4581,7 +4701,7 @@ function listResultFiles(cwd, limit) {
|
|
|
4581
4701
|
}
|
|
4582
4702
|
const metas = [];
|
|
4583
4703
|
for (const filename of files) {
|
|
4584
|
-
const filePath =
|
|
4704
|
+
const filePath = path7.join(resultsDir, filename);
|
|
4585
4705
|
try {
|
|
4586
4706
|
const stat3 = statSync2(filePath);
|
|
4587
4707
|
const results = loadResultFile(filePath);
|
|
@@ -4807,8 +4927,8 @@ var stubProvider = {
|
|
|
4807
4927
|
throw new Error("trace score does not support LLM-based evaluators");
|
|
4808
4928
|
}
|
|
4809
4929
|
};
|
|
4810
|
-
var
|
|
4811
|
-
kind: "llm-
|
|
4930
|
+
var stubLlmGrader = {
|
|
4931
|
+
kind: "llm-grader",
|
|
4812
4932
|
evaluate() {
|
|
4813
4933
|
throw new Error("trace score does not support LLM-based evaluators");
|
|
4814
4934
|
}
|
|
@@ -4816,7 +4936,7 @@ var stubLlmJudge = {
|
|
|
4816
4936
|
async function runScore(results, evaluatorConfig, testIdFilter) {
|
|
4817
4937
|
const registry = createBuiltinRegistry();
|
|
4818
4938
|
const dispatchContext = {
|
|
4819
|
-
|
|
4939
|
+
llmGrader: stubLlmGrader,
|
|
4820
4940
|
registry
|
|
4821
4941
|
};
|
|
4822
4942
|
const evaluator = await registry.create(evaluatorConfig, dispatchContext);
|
|
@@ -5380,8 +5500,70 @@ var traceCommand = subcommands({
|
|
|
5380
5500
|
}
|
|
5381
5501
|
});
|
|
5382
5502
|
|
|
5503
|
+
// src/commands/transpile/index.ts
|
|
5504
|
+
import { writeFileSync as writeFileSync3 } from "node:fs";
|
|
5505
|
+
import path8 from "node:path";
|
|
5506
|
+
var transpileCommand = command({
|
|
5507
|
+
name: "transpile",
|
|
5508
|
+
description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
|
|
5509
|
+
args: {
|
|
5510
|
+
input: positional({
|
|
5511
|
+
type: string,
|
|
5512
|
+
displayName: "input",
|
|
5513
|
+
description: "Path to EVAL.yaml file"
|
|
5514
|
+
}),
|
|
5515
|
+
outDir: option({
|
|
5516
|
+
type: optional(string),
|
|
5517
|
+
long: "out-dir",
|
|
5518
|
+
short: "d",
|
|
5519
|
+
description: "Output directory (defaults to directory of input file)"
|
|
5520
|
+
}),
|
|
5521
|
+
stdout: flag({
|
|
5522
|
+
long: "stdout",
|
|
5523
|
+
description: "Write to stdout instead of file(s) (only valid for single-skill output)"
|
|
5524
|
+
})
|
|
5525
|
+
},
|
|
5526
|
+
handler: async ({ input, outDir, stdout }) => {
|
|
5527
|
+
let result;
|
|
5528
|
+
try {
|
|
5529
|
+
result = transpileEvalYamlFile(path8.resolve(input));
|
|
5530
|
+
} catch (error) {
|
|
5531
|
+
console.error(`Error: ${error.message}`);
|
|
5532
|
+
process.exit(1);
|
|
5533
|
+
}
|
|
5534
|
+
for (const warning of result.warnings) {
|
|
5535
|
+
console.warn(`Warning: ${warning}`);
|
|
5536
|
+
}
|
|
5537
|
+
if (result.files.size === 0) {
|
|
5538
|
+
console.error("Error: No output produced (no tests found)");
|
|
5539
|
+
process.exit(1);
|
|
5540
|
+
}
|
|
5541
|
+
if (stdout) {
|
|
5542
|
+
if (result.files.size > 1) {
|
|
5543
|
+
console.error(
|
|
5544
|
+
"Error: --stdout is only valid when input produces a single evals.json (multi-skill input produces multiple files)"
|
|
5545
|
+
);
|
|
5546
|
+
process.exit(1);
|
|
5547
|
+
}
|
|
5548
|
+
const [file] = result.files.values();
|
|
5549
|
+
process.stdout.write(JSON.stringify(file, null, 2));
|
|
5550
|
+
process.stdout.write("\n");
|
|
5551
|
+
return;
|
|
5552
|
+
}
|
|
5553
|
+
const outputDir = outDir ? path8.resolve(outDir) : path8.dirname(path8.resolve(input));
|
|
5554
|
+
const fileNames = getOutputFilenames(result);
|
|
5555
|
+
for (const [skill, evalsJson] of result.files) {
|
|
5556
|
+
const fileName = fileNames.get(skill) ?? "evals.json";
|
|
5557
|
+
const outputPath = path8.join(outputDir, fileName);
|
|
5558
|
+
writeFileSync3(outputPath, `${JSON.stringify(evalsJson, null, 2)}
|
|
5559
|
+
`);
|
|
5560
|
+
console.log(`Transpiled to ${outputPath}`);
|
|
5561
|
+
}
|
|
5562
|
+
}
|
|
5563
|
+
});
|
|
5564
|
+
|
|
5383
5565
|
// src/commands/trim/index.ts
|
|
5384
|
-
import { readFileSync as
|
|
5566
|
+
import { readFileSync as readFileSync6, writeFileSync as writeFileSync4 } from "node:fs";
|
|
5385
5567
|
var trimCommand = command({
|
|
5386
5568
|
name: "trim",
|
|
5387
5569
|
description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
|
|
@@ -5400,7 +5582,7 @@ var trimCommand = command({
|
|
|
5400
5582
|
},
|
|
5401
5583
|
handler: async ({ input, out }) => {
|
|
5402
5584
|
try {
|
|
5403
|
-
const content =
|
|
5585
|
+
const content = readFileSync6(input, "utf8");
|
|
5404
5586
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
5405
5587
|
const trimmedLines = lines.map((line) => {
|
|
5406
5588
|
const record = JSON.parse(line);
|
|
@@ -5412,7 +5594,7 @@ var trimCommand = command({
|
|
|
5412
5594
|
const output = `${trimmedLines.join("\n")}
|
|
5413
5595
|
`;
|
|
5414
5596
|
if (out) {
|
|
5415
|
-
|
|
5597
|
+
writeFileSync4(out, output, "utf8");
|
|
5416
5598
|
console.error(`Trimmed ${lines.length} record(s) \u2192 ${out}`);
|
|
5417
5599
|
} else {
|
|
5418
5600
|
process.stdout.write(output);
|
|
@@ -5507,7 +5689,7 @@ function isTTY() {
|
|
|
5507
5689
|
// src/commands/validate/validate-files.ts
|
|
5508
5690
|
import { constants } from "node:fs";
|
|
5509
5691
|
import { access, readdir, stat } from "node:fs/promises";
|
|
5510
|
-
import
|
|
5692
|
+
import path9 from "node:path";
|
|
5511
5693
|
async function validateFiles(paths) {
|
|
5512
5694
|
const filePaths = await expandPaths(paths);
|
|
5513
5695
|
const results = [];
|
|
@@ -5525,7 +5707,7 @@ async function validateFiles(paths) {
|
|
|
5525
5707
|
};
|
|
5526
5708
|
}
|
|
5527
5709
|
async function validateSingleFile(filePath) {
|
|
5528
|
-
const absolutePath =
|
|
5710
|
+
const absolutePath = path9.resolve(filePath);
|
|
5529
5711
|
const fileType = await detectFileType(absolutePath);
|
|
5530
5712
|
let result;
|
|
5531
5713
|
if (fileType === "eval") {
|
|
@@ -5550,7 +5732,7 @@ async function validateSingleFile(filePath) {
|
|
|
5550
5732
|
async function expandPaths(paths) {
|
|
5551
5733
|
const expanded = [];
|
|
5552
5734
|
for (const inputPath of paths) {
|
|
5553
|
-
const absolutePath =
|
|
5735
|
+
const absolutePath = path9.resolve(inputPath);
|
|
5554
5736
|
try {
|
|
5555
5737
|
await access(absolutePath, constants.F_OK);
|
|
5556
5738
|
} catch {
|
|
@@ -5574,7 +5756,7 @@ async function findYamlFiles(dirPath) {
|
|
|
5574
5756
|
try {
|
|
5575
5757
|
const entries2 = await readdir(dirPath, { withFileTypes: true });
|
|
5576
5758
|
for (const entry of entries2) {
|
|
5577
|
-
const fullPath =
|
|
5759
|
+
const fullPath = path9.join(dirPath, entry.name);
|
|
5578
5760
|
if (entry.isDirectory()) {
|
|
5579
5761
|
if (entry.name === "node_modules" || entry.name.startsWith(".")) {
|
|
5580
5762
|
continue;
|
|
@@ -5591,7 +5773,7 @@ async function findYamlFiles(dirPath) {
|
|
|
5591
5773
|
return results;
|
|
5592
5774
|
}
|
|
5593
5775
|
function isYamlFile(filePath) {
|
|
5594
|
-
const ext =
|
|
5776
|
+
const ext = path9.extname(filePath).toLowerCase();
|
|
5595
5777
|
return ext === ".yaml" || ext === ".yml";
|
|
5596
5778
|
}
|
|
5597
5779
|
|
|
@@ -5630,8 +5812,8 @@ var validateCommand = command({
|
|
|
5630
5812
|
|
|
5631
5813
|
// src/commands/workspace/clean.ts
|
|
5632
5814
|
import { existsSync as existsSync2 } from "node:fs";
|
|
5633
|
-
import { readFile as
|
|
5634
|
-
import
|
|
5815
|
+
import { readFile as readFile2, readdir as readdir2, rm } from "node:fs/promises";
|
|
5816
|
+
import path10 from "node:path";
|
|
5635
5817
|
async function confirm(message) {
|
|
5636
5818
|
const readline2 = await import("node:readline");
|
|
5637
5819
|
const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
|
|
@@ -5667,10 +5849,10 @@ var cleanCommand = command({
|
|
|
5667
5849
|
const poolDirs = entries2.filter((e) => e.isDirectory());
|
|
5668
5850
|
const matchingDirs = [];
|
|
5669
5851
|
for (const dir of poolDirs) {
|
|
5670
|
-
const poolDir =
|
|
5671
|
-
const metadataPath =
|
|
5852
|
+
const poolDir = path10.join(poolRoot, dir.name);
|
|
5853
|
+
const metadataPath = path10.join(poolDir, "metadata.json");
|
|
5672
5854
|
try {
|
|
5673
|
-
const raw = await
|
|
5855
|
+
const raw = await readFile2(metadataPath, "utf-8");
|
|
5674
5856
|
const metadata = JSON.parse(raw);
|
|
5675
5857
|
const hasRepo = metadata.repos?.some((r) => {
|
|
5676
5858
|
if (r.source.type === "git" && r.source.url) {
|
|
@@ -5699,7 +5881,7 @@ var cleanCommand = command({
|
|
|
5699
5881
|
}
|
|
5700
5882
|
for (const dir of matchingDirs) {
|
|
5701
5883
|
await rm(dir, { recursive: true, force: true });
|
|
5702
|
-
console.log(`Removed: ${
|
|
5884
|
+
console.log(`Removed: ${path10.basename(dir).slice(0, 12)}...`);
|
|
5703
5885
|
}
|
|
5704
5886
|
console.log("Done.");
|
|
5705
5887
|
} else {
|
|
@@ -5718,14 +5900,14 @@ var cleanCommand = command({
|
|
|
5718
5900
|
|
|
5719
5901
|
// src/commands/workspace/list.ts
|
|
5720
5902
|
import { existsSync as existsSync3 } from "node:fs";
|
|
5721
|
-
import { readFile as
|
|
5722
|
-
import
|
|
5903
|
+
import { readFile as readFile3, readdir as readdir3, stat as stat2 } from "node:fs/promises";
|
|
5904
|
+
import path11 from "node:path";
|
|
5723
5905
|
async function getDirectorySize(dirPath) {
|
|
5724
5906
|
let totalSize = 0;
|
|
5725
5907
|
try {
|
|
5726
5908
|
const entries2 = await readdir3(dirPath, { withFileTypes: true });
|
|
5727
5909
|
for (const entry of entries2) {
|
|
5728
|
-
const fullPath =
|
|
5910
|
+
const fullPath = path11.join(dirPath, entry.name);
|
|
5729
5911
|
if (entry.isDirectory()) {
|
|
5730
5912
|
totalSize += await getDirectorySize(fullPath);
|
|
5731
5913
|
} else {
|
|
@@ -5760,14 +5942,14 @@ var listCommand = command({
|
|
|
5760
5942
|
return;
|
|
5761
5943
|
}
|
|
5762
5944
|
for (const dir of poolDirs) {
|
|
5763
|
-
const poolDir =
|
|
5945
|
+
const poolDir = path11.join(poolRoot, dir.name);
|
|
5764
5946
|
const fingerprint = dir.name;
|
|
5765
5947
|
const poolEntries = await readdir3(poolDir, { withFileTypes: true });
|
|
5766
5948
|
const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
|
|
5767
|
-
const metadataPath =
|
|
5949
|
+
const metadataPath = path11.join(poolDir, "metadata.json");
|
|
5768
5950
|
let metadata = null;
|
|
5769
5951
|
try {
|
|
5770
|
-
const raw = await
|
|
5952
|
+
const raw = await readFile3(metadataPath, "utf-8");
|
|
5771
5953
|
metadata = JSON.parse(raw);
|
|
5772
5954
|
} catch {
|
|
5773
5955
|
}
|
|
@@ -5804,16 +5986,16 @@ var workspaceCommand = subcommands({
|
|
|
5804
5986
|
|
|
5805
5987
|
// src/update-check.ts
|
|
5806
5988
|
import { spawn as spawn2 } from "node:child_process";
|
|
5807
|
-
import { readFile as
|
|
5989
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
5808
5990
|
import { join } from "node:path";
|
|
5809
5991
|
var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
|
|
5810
5992
|
var AGENTV_DIR = getAgentvHome();
|
|
5811
5993
|
var CACHE_FILE = "version-check.json";
|
|
5812
5994
|
var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
|
|
5813
|
-
async function getCachedUpdateInfo(
|
|
5814
|
-
const filePath =
|
|
5995
|
+
async function getCachedUpdateInfo(path12) {
|
|
5996
|
+
const filePath = path12 ?? join(AGENTV_DIR, CACHE_FILE);
|
|
5815
5997
|
try {
|
|
5816
|
-
const raw = await
|
|
5998
|
+
const raw = await readFile4(filePath, "utf-8");
|
|
5817
5999
|
const data = JSON.parse(raw);
|
|
5818
6000
|
if (typeof data.latestVersion === "string" && typeof data.lastCheckedAt === "string") {
|
|
5819
6001
|
return data;
|
|
@@ -5894,7 +6076,7 @@ var app = subcommands({
|
|
|
5894
6076
|
description: "AgentV CLI",
|
|
5895
6077
|
version: package_default.version,
|
|
5896
6078
|
cmds: {
|
|
5897
|
-
eval:
|
|
6079
|
+
eval: evalCommand,
|
|
5898
6080
|
prompt: evalPromptCommand,
|
|
5899
6081
|
compare: compareCommand,
|
|
5900
6082
|
convert: convertCommand,
|
|
@@ -5903,26 +6085,29 @@ var app = subcommands({
|
|
|
5903
6085
|
init: initCmdTsCommand,
|
|
5904
6086
|
self: selfCommand,
|
|
5905
6087
|
trace: traceCommand,
|
|
6088
|
+
transpile: transpileCommand,
|
|
5906
6089
|
trim: trimCommand,
|
|
5907
6090
|
validate: validateCommand,
|
|
5908
6091
|
workspace: workspaceCommand
|
|
5909
6092
|
}
|
|
5910
6093
|
});
|
|
5911
|
-
var
|
|
6094
|
+
var EVAL_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "prompt", "assert"]);
|
|
6095
|
+
var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
|
|
6096
|
+
"prompt",
|
|
6097
|
+
"compare",
|
|
6098
|
+
"convert",
|
|
6099
|
+
"create",
|
|
6100
|
+
"generate",
|
|
6101
|
+
"init",
|
|
6102
|
+
"self",
|
|
6103
|
+
"trace",
|
|
6104
|
+
"transpile",
|
|
6105
|
+
"trim",
|
|
6106
|
+
"validate",
|
|
6107
|
+
"workspace"
|
|
6108
|
+
]);
|
|
5912
6109
|
function preprocessArgv(argv) {
|
|
5913
6110
|
const result = [...argv];
|
|
5914
|
-
const promptIndex = result.indexOf("prompt");
|
|
5915
|
-
if (promptIndex !== -1) {
|
|
5916
|
-
const nextArg = result[promptIndex + 1];
|
|
5917
|
-
if (nextArg !== "eval") {
|
|
5918
|
-
result.splice(promptIndex + 1, 0, "eval");
|
|
5919
|
-
}
|
|
5920
|
-
const evalIdx = promptIndex + 1;
|
|
5921
|
-
const subSubArg = result[evalIdx + 1];
|
|
5922
|
-
if (subSubArg === void 0 || !PROMPT_EVAL_SUBCOMMANDS.has(subSubArg)) {
|
|
5923
|
-
result.splice(evalIdx + 1, 0, "overview");
|
|
5924
|
-
}
|
|
5925
|
-
}
|
|
5926
6111
|
for (let i = 0; i < result.length; i++) {
|
|
5927
6112
|
if (result[i] === "--eval-id") {
|
|
5928
6113
|
result[i] = "--test-id";
|
|
@@ -5930,6 +6115,16 @@ function preprocessArgv(argv) {
|
|
|
5930
6115
|
result[i] = `--test-id=${result[i].slice("--eval-id=".length)}`;
|
|
5931
6116
|
}
|
|
5932
6117
|
}
|
|
6118
|
+
const evalIdx = result.indexOf("eval");
|
|
6119
|
+
if (evalIdx !== -1) {
|
|
6120
|
+
const isTopLevel = !result.slice(0, evalIdx).some((arg) => TOP_LEVEL_COMMANDS.has(arg));
|
|
6121
|
+
if (isTopLevel) {
|
|
6122
|
+
const nextArg = result[evalIdx + 1];
|
|
6123
|
+
if (nextArg !== void 0 && !EVAL_SUBCOMMANDS.has(nextArg) && nextArg !== "--help" && nextArg !== "-h") {
|
|
6124
|
+
result.splice(evalIdx + 1, 0, "run");
|
|
6125
|
+
}
|
|
6126
|
+
}
|
|
6127
|
+
}
|
|
5933
6128
|
return result;
|
|
5934
6129
|
}
|
|
5935
6130
|
async function runCli(argv = process.argv) {
|
|
@@ -5951,4 +6146,4 @@ export {
|
|
|
5951
6146
|
preprocessArgv,
|
|
5952
6147
|
runCli
|
|
5953
6148
|
};
|
|
5954
|
-
//# sourceMappingURL=chunk-
|
|
6149
|
+
//# sourceMappingURL=chunk-5WIB7A27.js.map
|