agentv 3.3.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -9
- package/dist/{agentv-provider-HDSAUUEF-LUBMM7TH.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} +2 -2
- package/dist/{chunk-6LP5Z5Y4.js → chunk-5GG6DDP5.js} +256 -128
- package/dist/chunk-5GG6DDP5.js.map +1 -0
- package/dist/{chunk-AR3QEKXH.js → chunk-BJV6MDBE.js} +3 -3
- package/dist/{chunk-AR3QEKXH.js.map → chunk-BJV6MDBE.js.map} +1 -1
- package/dist/{chunk-5M3K2DMV.js → chunk-D6G4N2H2.js} +550 -516
- package/dist/chunk-D6G4N2H2.js.map +1 -0
- package/dist/{chunk-4ZMSAQWS.js → chunk-RLL4QGNL.js} +172 -81
- package/dist/chunk-RLL4QGNL.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/{dist-OC53WD3P.js → dist-MZFXE6B5.js} +3 -5
- package/dist/index.js +4 -4
- package/dist/{interactive-NA6SAIAG.js → interactive-J7SUWZH2.js} +45 -5
- package/dist/interactive-J7SUWZH2.js.map +1 -0
- package/dist/templates/.agentv/.env.example +11 -9
- package/dist/templates/.agentv/config.yaml +5 -0
- package/dist/templates/.agentv/targets.yaml +0 -16
- package/package.json +2 -2
- package/dist/chunk-4ZMSAQWS.js.map +0 -1
- package/dist/chunk-5M3K2DMV.js.map +0 -1
- package/dist/chunk-6LP5Z5Y4.js.map +0 -1
- package/dist/interactive-NA6SAIAG.js.map +0 -1
- /package/dist/{agentv-provider-HDSAUUEF-LUBMM7TH.js.map → agentv-provider-NFFLXG5M-TJAWCWCX.js.map} +0 -0
- /package/dist/{dist-OC53WD3P.js.map → dist-MZFXE6B5.js.map} +0 -0
package/README.md
CHANGED
|
@@ -238,21 +238,19 @@ import json, sys
|
|
|
238
238
|
data = json.load(sys.stdin)
|
|
239
239
|
answer = data.get("answer", "")
|
|
240
240
|
|
|
241
|
-
|
|
242
|
-
misses = []
|
|
241
|
+
assertions = []
|
|
243
242
|
|
|
244
243
|
if "42" in answer:
|
|
245
|
-
|
|
244
|
+
assertions.append({"text": "Answer contains correct value (42)", "passed": True})
|
|
246
245
|
else:
|
|
247
|
-
|
|
246
|
+
assertions.append({"text": "Answer does not contain expected value (42)", "passed": False})
|
|
248
247
|
|
|
249
|
-
|
|
248
|
+
passed = sum(1 for a in assertions if a["passed"])
|
|
249
|
+
score = 1.0 if passed == len(assertions) else 0.0
|
|
250
250
|
|
|
251
251
|
print(json.dumps({
|
|
252
252
|
"score": score,
|
|
253
|
-
"
|
|
254
|
-
"misses": misses,
|
|
255
|
-
"reasoning": f"Passed {len(hits)} check(s)"
|
|
253
|
+
"assertions": assertions,
|
|
256
254
|
}))
|
|
257
255
|
```
|
|
258
256
|
|
|
@@ -309,7 +307,7 @@ const { results, summary } = await evaluate({
|
|
|
309
307
|
{
|
|
310
308
|
id: 'greeting',
|
|
311
309
|
input: 'Say hello',
|
|
312
|
-
|
|
310
|
+
assertions: [{ type: 'contains', value: 'Hello' }],
|
|
313
311
|
},
|
|
314
312
|
],
|
|
315
313
|
});
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
3
|
AgentvProvider
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-BJV6MDBE.js";
|
|
5
5
|
import "./chunk-5H446C7X.js";
|
|
6
6
|
export {
|
|
7
7
|
AgentvProvider
|
|
8
8
|
};
|
|
9
|
-
//# sourceMappingURL=agentv-provider-
|
|
9
|
+
//# sourceMappingURL=agentv-provider-NFFLXG5M-TJAWCWCX.js.map
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
3
|
HtmlWriter,
|
|
4
|
+
buildBenchmarkArtifact,
|
|
5
|
+
buildGradingArtifact,
|
|
6
|
+
buildTimingArtifact,
|
|
4
7
|
detectFileType,
|
|
5
8
|
findRepoRoot,
|
|
6
9
|
package_default,
|
|
10
|
+
parseJsonlResults,
|
|
7
11
|
resolveEvalPaths,
|
|
8
12
|
runEvalCommand,
|
|
9
13
|
selectTarget,
|
|
@@ -12,7 +16,7 @@ import {
|
|
|
12
16
|
validateEvalFile,
|
|
13
17
|
validateFileReferences,
|
|
14
18
|
validateTargetsFile
|
|
15
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-RLL4QGNL.js";
|
|
16
20
|
import {
|
|
17
21
|
createBuiltinRegistry,
|
|
18
22
|
createProvider,
|
|
@@ -30,7 +34,7 @@ import {
|
|
|
30
34
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
31
35
|
transpileEvalYamlFile,
|
|
32
36
|
trimBaselineResult
|
|
33
|
-
} from "./chunk-
|
|
37
|
+
} from "./chunk-D6G4N2H2.js";
|
|
34
38
|
import {
|
|
35
39
|
__commonJS,
|
|
36
40
|
__esm,
|
|
@@ -4181,7 +4185,7 @@ var evalRunCommand = command({
|
|
|
4181
4185
|
},
|
|
4182
4186
|
handler: async (args) => {
|
|
4183
4187
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4184
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4188
|
+
const { launchInteractiveWizard } = await import("./interactive-J7SUWZH2.js");
|
|
4185
4189
|
await launchInteractiveWizard();
|
|
4186
4190
|
return;
|
|
4187
4191
|
}
|
|
@@ -4565,93 +4569,9 @@ var initCmdTsCommand = command({
|
|
|
4565
4569
|
}
|
|
4566
4570
|
});
|
|
4567
4571
|
|
|
4568
|
-
// src/commands/
|
|
4569
|
-
import {
|
|
4570
|
-
|
|
4571
|
-
if (scriptPath.includes(".bun")) {
|
|
4572
|
-
return "bun";
|
|
4573
|
-
}
|
|
4574
|
-
return "npm";
|
|
4575
|
-
}
|
|
4576
|
-
function detectPackageManager() {
|
|
4577
|
-
return detectPackageManagerFromPath(process.argv[1] ?? "");
|
|
4578
|
-
}
|
|
4579
|
-
function runCommand(cmd, args) {
|
|
4580
|
-
return new Promise((resolve, reject) => {
|
|
4581
|
-
const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
|
|
4582
|
-
let stdout = "";
|
|
4583
|
-
child.stdout?.on("data", (data) => {
|
|
4584
|
-
process.stdout.write(data);
|
|
4585
|
-
stdout += data.toString();
|
|
4586
|
-
});
|
|
4587
|
-
child.on("error", reject);
|
|
4588
|
-
child.on("close", (code) => resolve({ exitCode: code ?? 1, stdout }));
|
|
4589
|
-
});
|
|
4590
|
-
}
|
|
4591
|
-
var updateCommand = command({
|
|
4592
|
-
name: "update",
|
|
4593
|
-
description: "Update agentv to the latest version",
|
|
4594
|
-
args: {
|
|
4595
|
-
npm: flag({ long: "npm", description: "Force update using npm" }),
|
|
4596
|
-
bun: flag({ long: "bun", description: "Force update using bun" })
|
|
4597
|
-
},
|
|
4598
|
-
handler: async ({ npm, bun }) => {
|
|
4599
|
-
if (npm && bun) {
|
|
4600
|
-
console.error("Error: Cannot specify both --npm and --bun");
|
|
4601
|
-
process.exit(1);
|
|
4602
|
-
}
|
|
4603
|
-
let pm;
|
|
4604
|
-
if (npm) {
|
|
4605
|
-
pm = "npm";
|
|
4606
|
-
} else if (bun) {
|
|
4607
|
-
pm = "bun";
|
|
4608
|
-
} else {
|
|
4609
|
-
pm = detectPackageManager();
|
|
4610
|
-
}
|
|
4611
|
-
const currentVersion = package_default.version;
|
|
4612
|
-
console.log(`Current version: ${currentVersion}`);
|
|
4613
|
-
console.log(`Updating agentv using ${pm}...
|
|
4614
|
-
`);
|
|
4615
|
-
const args = pm === "npm" ? ["install", "-g", "agentv@latest"] : ["add", "-g", "agentv@latest"];
|
|
4616
|
-
try {
|
|
4617
|
-
const result = await runCommand(pm, args);
|
|
4618
|
-
if (result.exitCode !== 0) {
|
|
4619
|
-
console.error("\nUpdate failed.");
|
|
4620
|
-
process.exit(1);
|
|
4621
|
-
}
|
|
4622
|
-
let newVersion;
|
|
4623
|
-
try {
|
|
4624
|
-
const versionResult = await runCommand("agentv", ["--version"]);
|
|
4625
|
-
newVersion = versionResult.stdout.trim();
|
|
4626
|
-
} catch {
|
|
4627
|
-
}
|
|
4628
|
-
if (newVersion) {
|
|
4629
|
-
console.log(`
|
|
4630
|
-
Update complete: ${currentVersion} \u2192 ${newVersion}`);
|
|
4631
|
-
} else {
|
|
4632
|
-
console.log("\nUpdate complete.");
|
|
4633
|
-
}
|
|
4634
|
-
} catch (error) {
|
|
4635
|
-
if (error instanceof Error) {
|
|
4636
|
-
if (error.message.includes("ENOENT") || error.message.includes("not found")) {
|
|
4637
|
-
const alternative = pm === "npm" ? "bun" : "npm";
|
|
4638
|
-
console.error(`Error: ${pm} not found. Try using --${alternative} flag.`);
|
|
4639
|
-
} else {
|
|
4640
|
-
console.error(`Error: ${error.message}`);
|
|
4641
|
-
}
|
|
4642
|
-
process.exit(1);
|
|
4643
|
-
}
|
|
4644
|
-
throw error;
|
|
4645
|
-
}
|
|
4646
|
-
}
|
|
4647
|
-
});
|
|
4648
|
-
var selfCommand = subcommands({
|
|
4649
|
-
name: "self",
|
|
4650
|
-
description: "Manage the agentv installation",
|
|
4651
|
-
cmds: {
|
|
4652
|
-
update: updateCommand
|
|
4653
|
-
}
|
|
4654
|
-
});
|
|
4572
|
+
// src/commands/results/export.ts
|
|
4573
|
+
import { mkdirSync as mkdirSync2, readFileSync as readFileSync6, writeFileSync as writeFileSync3 } from "node:fs";
|
|
4574
|
+
import path8 from "node:path";
|
|
4655
4575
|
|
|
4656
4576
|
// src/commands/trace/utils.ts
|
|
4657
4577
|
import { readFileSync as readFileSync5, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
|
|
@@ -4757,6 +4677,202 @@ function formatScore(score) {
|
|
|
4757
4677
|
return `${(score * 100).toFixed(0)}%`;
|
|
4758
4678
|
}
|
|
4759
4679
|
|
|
4680
|
+
// src/commands/results/export.ts
|
|
4681
|
+
function exportResults(sourceFile, content, outputDir) {
|
|
4682
|
+
const results = parseJsonlResults(content);
|
|
4683
|
+
if (results.length === 0) {
|
|
4684
|
+
throw new Error(`No results found in ${sourceFile}`);
|
|
4685
|
+
}
|
|
4686
|
+
const patched = results.map((r) => {
|
|
4687
|
+
if (!r.testId && r.evalId) {
|
|
4688
|
+
return { ...r, testId: String(r.evalId) };
|
|
4689
|
+
}
|
|
4690
|
+
return r;
|
|
4691
|
+
});
|
|
4692
|
+
mkdirSync2(outputDir, { recursive: true });
|
|
4693
|
+
const benchmark = buildBenchmarkArtifact(patched, sourceFile);
|
|
4694
|
+
writeFileSync3(path8.join(outputDir, "benchmark.json"), `${JSON.stringify(benchmark, null, 2)}
|
|
4695
|
+
`);
|
|
4696
|
+
const timing = buildTimingArtifact(patched);
|
|
4697
|
+
writeFileSync3(path8.join(outputDir, "timing.json"), `${JSON.stringify(timing, null, 2)}
|
|
4698
|
+
`);
|
|
4699
|
+
const gradingDir = path8.join(outputDir, "grading");
|
|
4700
|
+
mkdirSync2(gradingDir, { recursive: true });
|
|
4701
|
+
for (const result of patched) {
|
|
4702
|
+
const id = safeTestId(result);
|
|
4703
|
+
const grading = buildGradingArtifact(result);
|
|
4704
|
+
writeFileSync3(path8.join(gradingDir, `${id}.json`), `${JSON.stringify(grading, null, 2)}
|
|
4705
|
+
`);
|
|
4706
|
+
}
|
|
4707
|
+
const outputsDir = path8.join(outputDir, "outputs");
|
|
4708
|
+
mkdirSync2(outputsDir, { recursive: true });
|
|
4709
|
+
for (const result of patched) {
|
|
4710
|
+
const answer = result.answer;
|
|
4711
|
+
if (answer) {
|
|
4712
|
+
const id = safeTestId(result);
|
|
4713
|
+
writeFileSync3(path8.join(outputsDir, `${id}.txt`), answer);
|
|
4714
|
+
}
|
|
4715
|
+
}
|
|
4716
|
+
}
|
|
4717
|
+
function safeTestId(result) {
|
|
4718
|
+
const raw = result.testId ?? result.evalId ?? "unknown";
|
|
4719
|
+
return String(raw).replace(/[/\\:*?"<>|]/g, "_");
|
|
4720
|
+
}
|
|
4721
|
+
function deriveOutputDir(cwd, sourceFile) {
|
|
4722
|
+
const basename = path8.basename(sourceFile, ".jsonl");
|
|
4723
|
+
const dirName = basename.startsWith("eval_") ? basename.slice(5) : basename;
|
|
4724
|
+
return path8.join(cwd, ".agentv", "results", dirName);
|
|
4725
|
+
}
|
|
4726
|
+
var resultsExportCommand = command({
|
|
4727
|
+
name: "export",
|
|
4728
|
+
description: "Export JSONL eval results into a per-test directory structure",
|
|
4729
|
+
args: {
|
|
4730
|
+
source: positional({
|
|
4731
|
+
type: optional(string),
|
|
4732
|
+
displayName: "source",
|
|
4733
|
+
description: "JSONL result file to export (defaults to most recent in .agentv/results/)"
|
|
4734
|
+
}),
|
|
4735
|
+
out: option({
|
|
4736
|
+
type: optional(string),
|
|
4737
|
+
long: "out",
|
|
4738
|
+
short: "o",
|
|
4739
|
+
description: "Output directory (defaults to .agentv/results/<run-timestamp>/)"
|
|
4740
|
+
}),
|
|
4741
|
+
dir: option({
|
|
4742
|
+
type: optional(string),
|
|
4743
|
+
long: "dir",
|
|
4744
|
+
short: "d",
|
|
4745
|
+
description: "Working directory (default: current directory)"
|
|
4746
|
+
})
|
|
4747
|
+
},
|
|
4748
|
+
handler: async ({ source, out, dir }) => {
|
|
4749
|
+
const cwd = dir ?? process.cwd();
|
|
4750
|
+
try {
|
|
4751
|
+
let sourceFile;
|
|
4752
|
+
if (source) {
|
|
4753
|
+
sourceFile = path8.isAbsolute(source) ? source : path8.resolve(cwd, source);
|
|
4754
|
+
} else {
|
|
4755
|
+
const metas = listResultFiles(cwd, 1);
|
|
4756
|
+
if (metas.length === 0) {
|
|
4757
|
+
console.error("Error: No result files found in .agentv/results/");
|
|
4758
|
+
console.error("Run an evaluation first: agentv eval <eval-file>");
|
|
4759
|
+
process.exit(1);
|
|
4760
|
+
}
|
|
4761
|
+
sourceFile = metas[0].path;
|
|
4762
|
+
}
|
|
4763
|
+
const content = readFileSync6(sourceFile, "utf8");
|
|
4764
|
+
const outputDir = out ? path8.isAbsolute(out) ? out : path8.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
|
|
4765
|
+
exportResults(sourceFile, content, outputDir);
|
|
4766
|
+
const results = parseJsonlResults(content);
|
|
4767
|
+
console.log(`Exported ${results.length} test(s) to ${outputDir}`);
|
|
4768
|
+
for (const result of results) {
|
|
4769
|
+
const id = result.testId ?? result.evalId ?? "unknown";
|
|
4770
|
+
console.log(` ${id}`);
|
|
4771
|
+
}
|
|
4772
|
+
} catch (error) {
|
|
4773
|
+
console.error(`Error: ${error.message}`);
|
|
4774
|
+
process.exit(1);
|
|
4775
|
+
}
|
|
4776
|
+
}
|
|
4777
|
+
});
|
|
4778
|
+
|
|
4779
|
+
// src/commands/results/index.ts
|
|
4780
|
+
var resultsCommand = subcommands({
|
|
4781
|
+
name: "results",
|
|
4782
|
+
description: "Inspect, export, and manage evaluation results",
|
|
4783
|
+
cmds: {
|
|
4784
|
+
export: resultsExportCommand
|
|
4785
|
+
}
|
|
4786
|
+
});
|
|
4787
|
+
|
|
4788
|
+
// src/commands/self/index.ts
|
|
4789
|
+
import { spawn } from "node:child_process";
|
|
4790
|
+
function detectPackageManagerFromPath(scriptPath) {
|
|
4791
|
+
if (scriptPath.includes(".bun")) {
|
|
4792
|
+
return "bun";
|
|
4793
|
+
}
|
|
4794
|
+
return "npm";
|
|
4795
|
+
}
|
|
4796
|
+
function detectPackageManager() {
|
|
4797
|
+
return detectPackageManagerFromPath(process.argv[1] ?? "");
|
|
4798
|
+
}
|
|
4799
|
+
function runCommand(cmd, args) {
|
|
4800
|
+
return new Promise((resolve, reject) => {
|
|
4801
|
+
const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
|
|
4802
|
+
let stdout = "";
|
|
4803
|
+
child.stdout?.on("data", (data) => {
|
|
4804
|
+
process.stdout.write(data);
|
|
4805
|
+
stdout += data.toString();
|
|
4806
|
+
});
|
|
4807
|
+
child.on("error", reject);
|
|
4808
|
+
child.on("close", (code) => resolve({ exitCode: code ?? 1, stdout }));
|
|
4809
|
+
});
|
|
4810
|
+
}
|
|
4811
|
+
var updateCommand = command({
|
|
4812
|
+
name: "update",
|
|
4813
|
+
description: "Update agentv to the latest version",
|
|
4814
|
+
args: {
|
|
4815
|
+
npm: flag({ long: "npm", description: "Force update using npm" }),
|
|
4816
|
+
bun: flag({ long: "bun", description: "Force update using bun" })
|
|
4817
|
+
},
|
|
4818
|
+
handler: async ({ npm, bun }) => {
|
|
4819
|
+
if (npm && bun) {
|
|
4820
|
+
console.error("Error: Cannot specify both --npm and --bun");
|
|
4821
|
+
process.exit(1);
|
|
4822
|
+
}
|
|
4823
|
+
let pm;
|
|
4824
|
+
if (npm) {
|
|
4825
|
+
pm = "npm";
|
|
4826
|
+
} else if (bun) {
|
|
4827
|
+
pm = "bun";
|
|
4828
|
+
} else {
|
|
4829
|
+
pm = detectPackageManager();
|
|
4830
|
+
}
|
|
4831
|
+
const currentVersion = package_default.version;
|
|
4832
|
+
console.log(`Current version: ${currentVersion}`);
|
|
4833
|
+
console.log(`Updating agentv using ${pm}...
|
|
4834
|
+
`);
|
|
4835
|
+
const args = pm === "npm" ? ["install", "-g", "agentv@latest"] : ["add", "-g", "agentv@latest"];
|
|
4836
|
+
try {
|
|
4837
|
+
const result = await runCommand(pm, args);
|
|
4838
|
+
if (result.exitCode !== 0) {
|
|
4839
|
+
console.error("\nUpdate failed.");
|
|
4840
|
+
process.exit(1);
|
|
4841
|
+
}
|
|
4842
|
+
let newVersion;
|
|
4843
|
+
try {
|
|
4844
|
+
const versionResult = await runCommand("agentv", ["--version"]);
|
|
4845
|
+
newVersion = versionResult.stdout.trim();
|
|
4846
|
+
} catch {
|
|
4847
|
+
}
|
|
4848
|
+
if (newVersion) {
|
|
4849
|
+
console.log(`
|
|
4850
|
+
Update complete: ${currentVersion} \u2192 ${newVersion}`);
|
|
4851
|
+
} else {
|
|
4852
|
+
console.log("\nUpdate complete.");
|
|
4853
|
+
}
|
|
4854
|
+
} catch (error) {
|
|
4855
|
+
if (error instanceof Error) {
|
|
4856
|
+
if (error.message.includes("ENOENT") || error.message.includes("not found")) {
|
|
4857
|
+
const alternative = pm === "npm" ? "bun" : "npm";
|
|
4858
|
+
console.error(`Error: ${pm} not found. Try using --${alternative} flag.`);
|
|
4859
|
+
} else {
|
|
4860
|
+
console.error(`Error: ${error.message}`);
|
|
4861
|
+
}
|
|
4862
|
+
process.exit(1);
|
|
4863
|
+
}
|
|
4864
|
+
throw error;
|
|
4865
|
+
}
|
|
4866
|
+
}
|
|
4867
|
+
});
|
|
4868
|
+
var selfCommand = subcommands({
|
|
4869
|
+
name: "self",
|
|
4870
|
+
description: "Manage the agentv installation",
|
|
4871
|
+
cmds: {
|
|
4872
|
+
update: updateCommand
|
|
4873
|
+
}
|
|
4874
|
+
});
|
|
4875
|
+
|
|
4760
4876
|
// src/commands/trace/list.ts
|
|
4761
4877
|
function formatListTable(metas) {
|
|
4762
4878
|
const lines = [];
|
|
@@ -4973,9 +5089,7 @@ async function runScore(results, evaluatorConfig, testIdFilter) {
|
|
|
4973
5089
|
originalScore: raw.score,
|
|
4974
5090
|
newScore: score.score,
|
|
4975
5091
|
verdict: score.verdict,
|
|
4976
|
-
|
|
4977
|
-
misses: score.misses,
|
|
4978
|
-
reasoning: score.reasoning
|
|
5092
|
+
assertions: score.assertions
|
|
4979
5093
|
});
|
|
4980
5094
|
}
|
|
4981
5095
|
return scored;
|
|
@@ -4994,7 +5108,9 @@ function renderTable(scored, assertSpec) {
|
|
|
4994
5108
|
lines.push(cols.map((col) => "\u2500".repeat(col.width)).join("\u2500\u2500"));
|
|
4995
5109
|
for (const r of scored) {
|
|
4996
5110
|
const verdictColor = r.verdict === "pass" ? c2.green : c2.red;
|
|
4997
|
-
const
|
|
5111
|
+
const failed = r.assertions.filter((a) => !a.passed);
|
|
5112
|
+
const passed = r.assertions.filter((a) => a.passed);
|
|
5113
|
+
const detail = failed.length > 0 ? failed[0].text.slice(0, 48) : passed.length > 0 ? passed[0].text.slice(0, 48) : "";
|
|
4998
5114
|
const row = [
|
|
4999
5115
|
padRight2(r.testId.slice(0, 24), cols[0].width),
|
|
5000
5116
|
padLeft2(formatScore(r.originalScore), cols[1].width),
|
|
@@ -5216,11 +5332,17 @@ function formatResultDetail(result, index, tree) {
|
|
|
5216
5332
|
if (result.error) {
|
|
5217
5333
|
lines.push(` ${c2.red}Error: ${result.error}${c2.reset}`);
|
|
5218
5334
|
}
|
|
5219
|
-
if (result.
|
|
5220
|
-
|
|
5221
|
-
|
|
5222
|
-
|
|
5223
|
-
|
|
5335
|
+
if (result.assertions && result.assertions.length > 0) {
|
|
5336
|
+
const passed = result.assertions.filter((a) => a.passed);
|
|
5337
|
+
const failed = result.assertions.filter((a) => !a.passed);
|
|
5338
|
+
if (passed.length > 0)
|
|
5339
|
+
lines.push(
|
|
5340
|
+
` ${c2.green}\u2713 Passed:${c2.reset} ${passed.map((a) => a.text).join(", ")}`
|
|
5341
|
+
);
|
|
5342
|
+
if (failed.length > 0)
|
|
5343
|
+
lines.push(
|
|
5344
|
+
` ${c2.red}\u2717 Failed:${c2.reset} ${failed.map((a) => a.text).join(", ")}`
|
|
5345
|
+
);
|
|
5224
5346
|
}
|
|
5225
5347
|
if (result.scores && result.scores.length > 0) {
|
|
5226
5348
|
lines.push(` ${c2.dim}Scores:${c2.reset} ${renderScores(result.scores)}`);
|
|
@@ -5228,10 +5350,14 @@ function formatResultDetail(result, index, tree) {
|
|
|
5228
5350
|
if (result.trace || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
|
|
5229
5351
|
lines.push(` ${c2.dim}Trace:${c2.reset} ${renderFlatTrace(result)}`);
|
|
5230
5352
|
}
|
|
5231
|
-
if (result.
|
|
5232
|
-
const
|
|
5233
|
-
|
|
5234
|
-
|
|
5353
|
+
if (result.assertions && result.assertions.length > 0) {
|
|
5354
|
+
const withEvidence = result.assertions.filter((a) => a.evidence);
|
|
5355
|
+
if (withEvidence.length > 0) {
|
|
5356
|
+
const maxLen = 200;
|
|
5357
|
+
const evidence = withEvidence[0].evidence;
|
|
5358
|
+
const truncated = evidence.length > maxLen ? `${evidence.slice(0, maxLen)}...` : evidence;
|
|
5359
|
+
lines.push(` ${c2.dim}Evidence: ${truncated}${c2.reset}`);
|
|
5360
|
+
}
|
|
5235
5361
|
}
|
|
5236
5362
|
return lines.join("\n");
|
|
5237
5363
|
}
|
|
@@ -5505,8 +5631,8 @@ var traceCommand = subcommands({
|
|
|
5505
5631
|
});
|
|
5506
5632
|
|
|
5507
5633
|
// src/commands/transpile/index.ts
|
|
5508
|
-
import { writeFileSync as
|
|
5509
|
-
import
|
|
5634
|
+
import { writeFileSync as writeFileSync4 } from "node:fs";
|
|
5635
|
+
import path9 from "node:path";
|
|
5510
5636
|
var transpileCommand = command({
|
|
5511
5637
|
name: "transpile",
|
|
5512
5638
|
description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
|
|
@@ -5530,7 +5656,7 @@ var transpileCommand = command({
|
|
|
5530
5656
|
handler: async ({ input, outDir, stdout }) => {
|
|
5531
5657
|
let result;
|
|
5532
5658
|
try {
|
|
5533
|
-
result = transpileEvalYamlFile(
|
|
5659
|
+
result = transpileEvalYamlFile(path9.resolve(input));
|
|
5534
5660
|
} catch (error) {
|
|
5535
5661
|
console.error(`Error: ${error.message}`);
|
|
5536
5662
|
process.exit(1);
|
|
@@ -5554,12 +5680,12 @@ var transpileCommand = command({
|
|
|
5554
5680
|
process.stdout.write("\n");
|
|
5555
5681
|
return;
|
|
5556
5682
|
}
|
|
5557
|
-
const outputDir = outDir ?
|
|
5683
|
+
const outputDir = outDir ? path9.resolve(outDir) : path9.dirname(path9.resolve(input));
|
|
5558
5684
|
const fileNames = getOutputFilenames(result);
|
|
5559
5685
|
for (const [skill, evalsJson] of result.files) {
|
|
5560
5686
|
const fileName = fileNames.get(skill) ?? "evals.json";
|
|
5561
|
-
const outputPath =
|
|
5562
|
-
|
|
5687
|
+
const outputPath = path9.join(outputDir, fileName);
|
|
5688
|
+
writeFileSync4(outputPath, `${JSON.stringify(evalsJson, null, 2)}
|
|
5563
5689
|
`);
|
|
5564
5690
|
console.log(`Transpiled to ${outputPath}`);
|
|
5565
5691
|
}
|
|
@@ -5567,7 +5693,7 @@ var transpileCommand = command({
|
|
|
5567
5693
|
});
|
|
5568
5694
|
|
|
5569
5695
|
// src/commands/trim/index.ts
|
|
5570
|
-
import { readFileSync as
|
|
5696
|
+
import { readFileSync as readFileSync7, writeFileSync as writeFileSync5 } from "node:fs";
|
|
5571
5697
|
var trimCommand = command({
|
|
5572
5698
|
name: "trim",
|
|
5573
5699
|
description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
|
|
@@ -5586,7 +5712,7 @@ var trimCommand = command({
|
|
|
5586
5712
|
},
|
|
5587
5713
|
handler: async ({ input, out }) => {
|
|
5588
5714
|
try {
|
|
5589
|
-
const content =
|
|
5715
|
+
const content = readFileSync7(input, "utf8");
|
|
5590
5716
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
5591
5717
|
const trimmedLines = lines.map((line) => {
|
|
5592
5718
|
const record = JSON.parse(line);
|
|
@@ -5598,7 +5724,7 @@ var trimCommand = command({
|
|
|
5598
5724
|
const output = `${trimmedLines.join("\n")}
|
|
5599
5725
|
`;
|
|
5600
5726
|
if (out) {
|
|
5601
|
-
|
|
5727
|
+
writeFileSync5(out, output, "utf8");
|
|
5602
5728
|
console.error(`Trimmed ${lines.length} record(s) \u2192 ${out}`);
|
|
5603
5729
|
} else {
|
|
5604
5730
|
process.stdout.write(output);
|
|
@@ -5693,7 +5819,7 @@ function isTTY() {
|
|
|
5693
5819
|
// src/commands/validate/validate-files.ts
|
|
5694
5820
|
import { constants } from "node:fs";
|
|
5695
5821
|
import { access, readdir, stat } from "node:fs/promises";
|
|
5696
|
-
import
|
|
5822
|
+
import path10 from "node:path";
|
|
5697
5823
|
async function validateFiles(paths) {
|
|
5698
5824
|
const filePaths = await expandPaths(paths);
|
|
5699
5825
|
const results = [];
|
|
@@ -5711,7 +5837,7 @@ async function validateFiles(paths) {
|
|
|
5711
5837
|
};
|
|
5712
5838
|
}
|
|
5713
5839
|
async function validateSingleFile(filePath) {
|
|
5714
|
-
const absolutePath =
|
|
5840
|
+
const absolutePath = path10.resolve(filePath);
|
|
5715
5841
|
const fileType = await detectFileType(absolutePath);
|
|
5716
5842
|
let result;
|
|
5717
5843
|
if (fileType === "eval") {
|
|
@@ -5736,7 +5862,7 @@ async function validateSingleFile(filePath) {
|
|
|
5736
5862
|
async function expandPaths(paths) {
|
|
5737
5863
|
const expanded = [];
|
|
5738
5864
|
for (const inputPath of paths) {
|
|
5739
|
-
const absolutePath =
|
|
5865
|
+
const absolutePath = path10.resolve(inputPath);
|
|
5740
5866
|
try {
|
|
5741
5867
|
await access(absolutePath, constants.F_OK);
|
|
5742
5868
|
} catch {
|
|
@@ -5760,7 +5886,7 @@ async function findYamlFiles(dirPath) {
|
|
|
5760
5886
|
try {
|
|
5761
5887
|
const entries2 = await readdir(dirPath, { withFileTypes: true });
|
|
5762
5888
|
for (const entry of entries2) {
|
|
5763
|
-
const fullPath =
|
|
5889
|
+
const fullPath = path10.join(dirPath, entry.name);
|
|
5764
5890
|
if (entry.isDirectory()) {
|
|
5765
5891
|
if (entry.name === "node_modules" || entry.name.startsWith(".")) {
|
|
5766
5892
|
continue;
|
|
@@ -5777,7 +5903,7 @@ async function findYamlFiles(dirPath) {
|
|
|
5777
5903
|
return results;
|
|
5778
5904
|
}
|
|
5779
5905
|
function isYamlFile(filePath) {
|
|
5780
|
-
const ext =
|
|
5906
|
+
const ext = path10.extname(filePath).toLowerCase();
|
|
5781
5907
|
return ext === ".yaml" || ext === ".yml";
|
|
5782
5908
|
}
|
|
5783
5909
|
|
|
@@ -5817,7 +5943,7 @@ var validateCommand = command({
|
|
|
5817
5943
|
// src/commands/workspace/clean.ts
|
|
5818
5944
|
import { existsSync as existsSync2 } from "node:fs";
|
|
5819
5945
|
import { readFile as readFile2, readdir as readdir2, rm } from "node:fs/promises";
|
|
5820
|
-
import
|
|
5946
|
+
import path11 from "node:path";
|
|
5821
5947
|
async function confirm(message) {
|
|
5822
5948
|
const readline2 = await import("node:readline");
|
|
5823
5949
|
const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
|
|
@@ -5853,8 +5979,8 @@ var cleanCommand = command({
|
|
|
5853
5979
|
const poolDirs = entries2.filter((e) => e.isDirectory());
|
|
5854
5980
|
const matchingDirs = [];
|
|
5855
5981
|
for (const dir of poolDirs) {
|
|
5856
|
-
const poolDir =
|
|
5857
|
-
const metadataPath =
|
|
5982
|
+
const poolDir = path11.join(poolRoot, dir.name);
|
|
5983
|
+
const metadataPath = path11.join(poolDir, "metadata.json");
|
|
5858
5984
|
try {
|
|
5859
5985
|
const raw = await readFile2(metadataPath, "utf-8");
|
|
5860
5986
|
const metadata = JSON.parse(raw);
|
|
@@ -5885,7 +6011,7 @@ var cleanCommand = command({
|
|
|
5885
6011
|
}
|
|
5886
6012
|
for (const dir of matchingDirs) {
|
|
5887
6013
|
await rm(dir, { recursive: true, force: true });
|
|
5888
|
-
console.log(`Removed: ${
|
|
6014
|
+
console.log(`Removed: ${path11.basename(dir).slice(0, 12)}...`);
|
|
5889
6015
|
}
|
|
5890
6016
|
console.log("Done.");
|
|
5891
6017
|
} else {
|
|
@@ -5905,13 +6031,13 @@ var cleanCommand = command({
|
|
|
5905
6031
|
// src/commands/workspace/list.ts
|
|
5906
6032
|
import { existsSync as existsSync3 } from "node:fs";
|
|
5907
6033
|
import { readFile as readFile3, readdir as readdir3, stat as stat2 } from "node:fs/promises";
|
|
5908
|
-
import
|
|
6034
|
+
import path12 from "node:path";
|
|
5909
6035
|
async function getDirectorySize(dirPath) {
|
|
5910
6036
|
let totalSize = 0;
|
|
5911
6037
|
try {
|
|
5912
6038
|
const entries2 = await readdir3(dirPath, { withFileTypes: true });
|
|
5913
6039
|
for (const entry of entries2) {
|
|
5914
|
-
const fullPath =
|
|
6040
|
+
const fullPath = path12.join(dirPath, entry.name);
|
|
5915
6041
|
if (entry.isDirectory()) {
|
|
5916
6042
|
totalSize += await getDirectorySize(fullPath);
|
|
5917
6043
|
} else {
|
|
@@ -5946,11 +6072,11 @@ var listCommand = command({
|
|
|
5946
6072
|
return;
|
|
5947
6073
|
}
|
|
5948
6074
|
for (const dir of poolDirs) {
|
|
5949
|
-
const poolDir =
|
|
6075
|
+
const poolDir = path12.join(poolRoot, dir.name);
|
|
5950
6076
|
const fingerprint = dir.name;
|
|
5951
6077
|
const poolEntries = await readdir3(poolDir, { withFileTypes: true });
|
|
5952
6078
|
const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
|
|
5953
|
-
const metadataPath =
|
|
6079
|
+
const metadataPath = path12.join(poolDir, "metadata.json");
|
|
5954
6080
|
let metadata = null;
|
|
5955
6081
|
try {
|
|
5956
6082
|
const raw = await readFile3(metadataPath, "utf-8");
|
|
@@ -5996,8 +6122,8 @@ var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
|
|
|
5996
6122
|
var AGENTV_DIR = getAgentvHome();
|
|
5997
6123
|
var CACHE_FILE = "version-check.json";
|
|
5998
6124
|
var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
|
|
5999
|
-
async function getCachedUpdateInfo(
|
|
6000
|
-
const filePath =
|
|
6125
|
+
async function getCachedUpdateInfo(path13) {
|
|
6126
|
+
const filePath = path13 ?? join(AGENTV_DIR, CACHE_FILE);
|
|
6001
6127
|
try {
|
|
6002
6128
|
const raw = await readFile4(filePath, "utf-8");
|
|
6003
6129
|
const data = JSON.parse(raw);
|
|
@@ -6087,6 +6213,7 @@ var app = subcommands({
|
|
|
6087
6213
|
create: createCommand,
|
|
6088
6214
|
generate: generateCommand,
|
|
6089
6215
|
init: initCmdTsCommand,
|
|
6216
|
+
results: resultsCommand,
|
|
6090
6217
|
self: selfCommand,
|
|
6091
6218
|
trace: traceCommand,
|
|
6092
6219
|
transpile: transpileCommand,
|
|
@@ -6103,6 +6230,7 @@ var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
|
|
|
6103
6230
|
"create",
|
|
6104
6231
|
"generate",
|
|
6105
6232
|
"init",
|
|
6233
|
+
"results",
|
|
6106
6234
|
"self",
|
|
6107
6235
|
"trace",
|
|
6108
6236
|
"transpile",
|
|
@@ -6150,4 +6278,4 @@ export {
|
|
|
6150
6278
|
preprocessArgv,
|
|
6151
6279
|
runCli
|
|
6152
6280
|
};
|
|
6153
|
-
//# sourceMappingURL=chunk-
|
|
6281
|
+
//# sourceMappingURL=chunk-5GG6DDP5.js.map
|