@agentv/core 4.20.0 → 4.21.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-24ND5HZC.js → chunk-LKX4QW3G.js} +60 -2
- package/dist/{chunk-24ND5HZC.js.map → chunk-LKX4QW3G.js.map} +1 -1
- package/dist/{chunk-ELF6SQAK.js → chunk-WCW3V6QJ.js} +28 -17
- package/dist/chunk-WCW3V6QJ.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +94 -8
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +40 -10
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +104 -24
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +23 -8
- package/dist/index.d.ts +23 -8
- package/dist/index.js +32 -27
- package/dist/index.js.map +1 -1
- package/dist/{ts-eval-loader-32COE32J.js → ts-eval-loader-HPIPE72C.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-ELF6SQAK.js.map +0 -1
- /package/dist/{ts-eval-loader-32COE32J.js.map → ts-eval-loader-HPIPE72C.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -559,6 +559,63 @@ async function resolveFileReference(ref, evalFileDir) {
|
|
|
559
559
|
}
|
|
560
560
|
return loadCasesFromFile(absolutePattern);
|
|
561
561
|
}
|
|
562
|
+
async function loadCasesFromDirectory(dirPath) {
|
|
563
|
+
const entries = await (0, import_promises2.readdir)(dirPath, { withFileTypes: true });
|
|
564
|
+
const subdirs = entries.filter((e) => e.isDirectory()).sort((a, b) => a.name < b.name ? -1 : a.name > b.name ? 1 : 0);
|
|
565
|
+
const results = [];
|
|
566
|
+
for (const subdir of subdirs) {
|
|
567
|
+
const subdirPath = import_node_path2.default.join(dirPath, subdir.name);
|
|
568
|
+
let caseFilePath;
|
|
569
|
+
for (const filename of ["case.yaml", "case.yml"]) {
|
|
570
|
+
const candidate = import_node_path2.default.join(subdirPath, filename);
|
|
571
|
+
try {
|
|
572
|
+
const s = await (0, import_promises2.stat)(candidate);
|
|
573
|
+
if (s.isFile()) {
|
|
574
|
+
caseFilePath = candidate;
|
|
575
|
+
break;
|
|
576
|
+
}
|
|
577
|
+
} catch {
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
if (!caseFilePath) {
|
|
581
|
+
console.warn(
|
|
582
|
+
`${ANSI_YELLOW}Warning: Skipping directory '${subdir.name}' \u2014 no case.yaml found${ANSI_RESET2}`
|
|
583
|
+
);
|
|
584
|
+
continue;
|
|
585
|
+
}
|
|
586
|
+
let content;
|
|
587
|
+
try {
|
|
588
|
+
content = await (0, import_promises2.readFile)(caseFilePath, "utf8");
|
|
589
|
+
} catch (error) {
|
|
590
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
591
|
+
throw new Error(`Cannot read case file: ${caseFilePath}
|
|
592
|
+
${message}`);
|
|
593
|
+
}
|
|
594
|
+
const raw = (0, import_yaml.parse)(content);
|
|
595
|
+
const parsed = interpolateEnv(raw, process.env);
|
|
596
|
+
if (!isJsonObject(parsed)) {
|
|
597
|
+
throw new Error(
|
|
598
|
+
`Case file must contain a YAML object, got ${typeof parsed}: ${caseFilePath}`
|
|
599
|
+
);
|
|
600
|
+
}
|
|
601
|
+
const caseObj = { ...parsed };
|
|
602
|
+
if (caseObj.id === void 0 || caseObj.id === null) {
|
|
603
|
+
caseObj.id = subdir.name;
|
|
604
|
+
}
|
|
605
|
+
if (!caseObj.workspace) {
|
|
606
|
+
const workspaceDirPath = import_node_path2.default.join(subdirPath, "workspace");
|
|
607
|
+
try {
|
|
608
|
+
const s = await (0, import_promises2.stat)(workspaceDirPath);
|
|
609
|
+
if (s.isDirectory()) {
|
|
610
|
+
caseObj.workspace = { template: workspaceDirPath };
|
|
611
|
+
}
|
|
612
|
+
} catch {
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
results.push(caseObj);
|
|
616
|
+
}
|
|
617
|
+
return results;
|
|
618
|
+
}
|
|
562
619
|
async function expandFileReferences(tests, evalFileDir) {
|
|
563
620
|
const expanded = [];
|
|
564
621
|
for (const entry of tests) {
|
|
@@ -5585,11 +5642,11 @@ function createFilesystemTools(workspacePath) {
|
|
|
5585
5642
|
execute: async (input) => {
|
|
5586
5643
|
try {
|
|
5587
5644
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
5588
|
-
const
|
|
5589
|
-
if (
|
|
5645
|
+
const stat14 = await import_promises12.default.stat(resolved);
|
|
5646
|
+
if (stat14.isDirectory()) {
|
|
5590
5647
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
5591
5648
|
}
|
|
5592
|
-
const buffer = Buffer.alloc(Math.min(
|
|
5649
|
+
const buffer = Buffer.alloc(Math.min(stat14.size, MAX_FILE_SIZE));
|
|
5593
5650
|
const fd = await import_promises12.default.open(resolved, "r");
|
|
5594
5651
|
try {
|
|
5595
5652
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
@@ -5597,8 +5654,8 @@ function createFilesystemTools(workspacePath) {
|
|
|
5597
5654
|
await fd.close();
|
|
5598
5655
|
}
|
|
5599
5656
|
const content = buffer.toString("utf-8");
|
|
5600
|
-
const truncated =
|
|
5601
|
-
return { content, truncated, size:
|
|
5657
|
+
const truncated = stat14.size > MAX_FILE_SIZE;
|
|
5658
|
+
return { content, truncated, size: stat14.size };
|
|
5602
5659
|
} catch (error) {
|
|
5603
5660
|
return { error: error instanceof Error ? error.message : String(error) };
|
|
5604
5661
|
}
|
|
@@ -5649,8 +5706,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
5649
5706
|
const ext = import_node_path12.default.extname(entry.name).toLowerCase();
|
|
5650
5707
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
5651
5708
|
try {
|
|
5652
|
-
const
|
|
5653
|
-
if (
|
|
5709
|
+
const stat14 = await import_promises12.default.stat(fullPath);
|
|
5710
|
+
if (stat14.size > MAX_FILE_SIZE) continue;
|
|
5654
5711
|
const content = await import_promises12.default.readFile(fullPath, "utf-8");
|
|
5655
5712
|
const lines = content.split("\n");
|
|
5656
5713
|
for (let i = 0; i < lines.length; i++) {
|
|
@@ -22501,7 +22558,17 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
22501
22558
|
let expandedTestCases;
|
|
22502
22559
|
if (typeof rawTestCases === "string") {
|
|
22503
22560
|
const externalPath = import_node_path50.default.resolve(evalFileDir, rawTestCases);
|
|
22504
|
-
|
|
22561
|
+
let isDir = false;
|
|
22562
|
+
try {
|
|
22563
|
+
const pathStat = await (0, import_promises36.stat)(externalPath);
|
|
22564
|
+
isDir = pathStat.isDirectory();
|
|
22565
|
+
} catch {
|
|
22566
|
+
}
|
|
22567
|
+
if (isDir) {
|
|
22568
|
+
expandedTestCases = await loadCasesFromDirectory(externalPath);
|
|
22569
|
+
} else {
|
|
22570
|
+
expandedTestCases = await loadCasesFromFile(externalPath);
|
|
22571
|
+
}
|
|
22505
22572
|
} else if (Array.isArray(rawTestCases)) {
|
|
22506
22573
|
expandedTestCases = await expandFileReferences(rawTestCases, evalFileDir);
|
|
22507
22574
|
} else {
|
|
@@ -25313,32 +25380,44 @@ var import_node_path55 = __toESM(require("path"), 1);
|
|
|
25313
25380
|
var import_yaml10 = require("yaml");
|
|
25314
25381
|
init_paths();
|
|
25315
25382
|
function getBenchmarksRegistryPath() {
|
|
25316
|
-
return import_node_path55.default.join(getAgentvConfigDir(), "
|
|
25383
|
+
return import_node_path55.default.join(getAgentvConfigDir(), "benchmarks.yaml");
|
|
25317
25384
|
}
|
|
25318
|
-
function
|
|
25319
|
-
|
|
25320
|
-
const
|
|
25321
|
-
if (
|
|
25322
|
-
|
|
25323
|
-
|
|
25324
|
-
|
|
25325
|
-
|
|
25385
|
+
function fromYaml(raw) {
|
|
25386
|
+
if (!raw || typeof raw !== "object") return null;
|
|
25387
|
+
const e = raw;
|
|
25388
|
+
if (typeof e.id !== "string" || typeof e.name !== "string" || typeof e.path !== "string") {
|
|
25389
|
+
return null;
|
|
25390
|
+
}
|
|
25391
|
+
return {
|
|
25392
|
+
id: e.id,
|
|
25393
|
+
name: e.name,
|
|
25394
|
+
path: e.path,
|
|
25395
|
+
addedAt: typeof e.added_at === "string" ? e.added_at : "",
|
|
25396
|
+
lastOpenedAt: typeof e.last_opened_at === "string" ? e.last_opened_at : ""
|
|
25397
|
+
};
|
|
25398
|
+
}
|
|
25399
|
+
function toYaml(entry) {
|
|
25400
|
+
return {
|
|
25401
|
+
id: entry.id,
|
|
25402
|
+
name: entry.name,
|
|
25403
|
+
path: entry.path,
|
|
25404
|
+
added_at: entry.addedAt,
|
|
25405
|
+
last_opened_at: entry.lastOpenedAt
|
|
25406
|
+
};
|
|
25326
25407
|
}
|
|
25327
25408
|
function loadBenchmarkRegistry() {
|
|
25328
25409
|
const registryPath = getBenchmarksRegistryPath();
|
|
25329
|
-
if (!(0, import_node_fs20.existsSync)(registryPath)) {
|
|
25330
|
-
migrateProjectsYaml(registryPath);
|
|
25331
|
-
}
|
|
25332
25410
|
if (!(0, import_node_fs20.existsSync)(registryPath)) {
|
|
25333
25411
|
return { benchmarks: [] };
|
|
25334
25412
|
}
|
|
25335
25413
|
try {
|
|
25336
25414
|
const raw = (0, import_node_fs20.readFileSync)(registryPath, "utf-8");
|
|
25337
25415
|
const parsed = (0, import_yaml10.parse)(raw);
|
|
25338
|
-
if (!parsed ||
|
|
25416
|
+
if (!parsed || typeof parsed !== "object") {
|
|
25339
25417
|
return { benchmarks: [] };
|
|
25340
25418
|
}
|
|
25341
|
-
|
|
25419
|
+
const benchmarks = Array.isArray(parsed.benchmarks) ? parsed.benchmarks.map(fromYaml).filter((e) => e !== null) : [];
|
|
25420
|
+
return { benchmarks };
|
|
25342
25421
|
} catch {
|
|
25343
25422
|
return { benchmarks: [] };
|
|
25344
25423
|
}
|
|
@@ -25349,7 +25428,8 @@ function saveBenchmarkRegistry(registry) {
|
|
|
25349
25428
|
if (!(0, import_node_fs20.existsSync)(dir)) {
|
|
25350
25429
|
(0, import_node_fs20.mkdirSync)(dir, { recursive: true });
|
|
25351
25430
|
}
|
|
25352
|
-
|
|
25431
|
+
const payload = { benchmarks: registry.benchmarks.map(toYaml) };
|
|
25432
|
+
(0, import_node_fs20.writeFileSync)(registryPath, (0, import_yaml10.stringify)(payload), "utf-8");
|
|
25353
25433
|
}
|
|
25354
25434
|
function deriveBenchmarkId(dirPath, existingIds) {
|
|
25355
25435
|
const base = import_node_path55.default.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
@@ -25432,7 +25512,7 @@ function discoverBenchmarks(rootDir, maxDepth = 2) {
|
|
|
25432
25512
|
}
|
|
25433
25513
|
}
|
|
25434
25514
|
scan(absRoot, 0);
|
|
25435
|
-
return results;
|
|
25515
|
+
return results.sort();
|
|
25436
25516
|
}
|
|
25437
25517
|
|
|
25438
25518
|
// src/evaluation/baseline.ts
|