@agentv/core 4.20.0-next.1 → 4.21.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -559,6 +559,63 @@ async function resolveFileReference(ref, evalFileDir) {
559
559
  }
560
560
  return loadCasesFromFile(absolutePattern);
561
561
  }
562
+ async function loadCasesFromDirectory(dirPath) {
563
+ const entries = await (0, import_promises2.readdir)(dirPath, { withFileTypes: true });
564
+ const subdirs = entries.filter((e) => e.isDirectory()).sort((a, b) => a.name < b.name ? -1 : a.name > b.name ? 1 : 0);
565
+ const results = [];
566
+ for (const subdir of subdirs) {
567
+ const subdirPath = import_node_path2.default.join(dirPath, subdir.name);
568
+ let caseFilePath;
569
+ for (const filename of ["case.yaml", "case.yml"]) {
570
+ const candidate = import_node_path2.default.join(subdirPath, filename);
571
+ try {
572
+ const s = await (0, import_promises2.stat)(candidate);
573
+ if (s.isFile()) {
574
+ caseFilePath = candidate;
575
+ break;
576
+ }
577
+ } catch {
578
+ }
579
+ }
580
+ if (!caseFilePath) {
581
+ console.warn(
582
+ `${ANSI_YELLOW}Warning: Skipping directory '${subdir.name}' \u2014 no case.yaml found${ANSI_RESET2}`
583
+ );
584
+ continue;
585
+ }
586
+ let content;
587
+ try {
588
+ content = await (0, import_promises2.readFile)(caseFilePath, "utf8");
589
+ } catch (error) {
590
+ const message = error instanceof Error ? error.message : String(error);
591
+ throw new Error(`Cannot read case file: ${caseFilePath}
592
+ ${message}`);
593
+ }
594
+ const raw = (0, import_yaml.parse)(content);
595
+ const parsed = interpolateEnv(raw, process.env);
596
+ if (!isJsonObject(parsed)) {
597
+ throw new Error(
598
+ `Case file must contain a YAML object, got ${typeof parsed}: ${caseFilePath}`
599
+ );
600
+ }
601
+ const caseObj = { ...parsed };
602
+ if (caseObj.id === void 0 || caseObj.id === null) {
603
+ caseObj.id = subdir.name;
604
+ }
605
+ if (!caseObj.workspace) {
606
+ const workspaceDirPath = import_node_path2.default.join(subdirPath, "workspace");
607
+ try {
608
+ const s = await (0, import_promises2.stat)(workspaceDirPath);
609
+ if (s.isDirectory()) {
610
+ caseObj.workspace = { template: workspaceDirPath };
611
+ }
612
+ } catch {
613
+ }
614
+ }
615
+ results.push(caseObj);
616
+ }
617
+ return results;
618
+ }
562
619
  async function expandFileReferences(tests, evalFileDir) {
563
620
  const expanded = [];
564
621
  for (const entry of tests) {
@@ -5585,11 +5642,11 @@ function createFilesystemTools(workspacePath) {
5585
5642
  execute: async (input) => {
5586
5643
  try {
5587
5644
  const resolved = resolveSandboxed(workspacePath, input.path);
5588
- const stat12 = await import_promises12.default.stat(resolved);
5589
- if (stat12.isDirectory()) {
5645
+ const stat14 = await import_promises12.default.stat(resolved);
5646
+ if (stat14.isDirectory()) {
5590
5647
  return { error: `'${input.path}' is a directory, not a file` };
5591
5648
  }
5592
- const buffer = Buffer.alloc(Math.min(stat12.size, MAX_FILE_SIZE));
5649
+ const buffer = Buffer.alloc(Math.min(stat14.size, MAX_FILE_SIZE));
5593
5650
  const fd = await import_promises12.default.open(resolved, "r");
5594
5651
  try {
5595
5652
  await fd.read(buffer, 0, buffer.length, 0);
@@ -5597,8 +5654,8 @@ function createFilesystemTools(workspacePath) {
5597
5654
  await fd.close();
5598
5655
  }
5599
5656
  const content = buffer.toString("utf-8");
5600
- const truncated = stat12.size > MAX_FILE_SIZE;
5601
- return { content, truncated, size: stat12.size };
5657
+ const truncated = stat14.size > MAX_FILE_SIZE;
5658
+ return { content, truncated, size: stat14.size };
5602
5659
  } catch (error) {
5603
5660
  return { error: error instanceof Error ? error.message : String(error) };
5604
5661
  }
@@ -5649,8 +5706,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
5649
5706
  const ext = import_node_path12.default.extname(entry.name).toLowerCase();
5650
5707
  if (BINARY_EXTENSIONS.has(ext)) continue;
5651
5708
  try {
5652
- const stat12 = await import_promises12.default.stat(fullPath);
5653
- if (stat12.size > MAX_FILE_SIZE) continue;
5709
+ const stat14 = await import_promises12.default.stat(fullPath);
5710
+ if (stat14.size > MAX_FILE_SIZE) continue;
5654
5711
  const content = await import_promises12.default.readFile(fullPath, "utf-8");
5655
5712
  const lines = content.split("\n");
5656
5713
  for (let i = 0; i < lines.length; i++) {
@@ -22501,7 +22558,17 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
22501
22558
  let expandedTestCases;
22502
22559
  if (typeof rawTestCases === "string") {
22503
22560
  const externalPath = import_node_path50.default.resolve(evalFileDir, rawTestCases);
22504
- expandedTestCases = await loadCasesFromFile(externalPath);
22561
+ let isDir = false;
22562
+ try {
22563
+ const pathStat = await (0, import_promises36.stat)(externalPath);
22564
+ isDir = pathStat.isDirectory();
22565
+ } catch {
22566
+ }
22567
+ if (isDir) {
22568
+ expandedTestCases = await loadCasesFromDirectory(externalPath);
22569
+ } else {
22570
+ expandedTestCases = await loadCasesFromFile(externalPath);
22571
+ }
22505
22572
  } else if (Array.isArray(rawTestCases)) {
22506
22573
  expandedTestCases = await expandFileReferences(rawTestCases, evalFileDir);
22507
22574
  } else {
@@ -25313,32 +25380,44 @@ var import_node_path55 = __toESM(require("path"), 1);
25313
25380
  var import_yaml10 = require("yaml");
25314
25381
  init_paths();
25315
25382
  function getBenchmarksRegistryPath() {
25316
- return import_node_path55.default.join(getAgentvConfigDir(), "projects.yaml");
25383
+ return import_node_path55.default.join(getAgentvConfigDir(), "benchmarks.yaml");
25317
25384
  }
25318
- function migrateProjectsYaml(targetPath) {
25319
- const dataHome = getAgentvHome();
25320
- const configDir = getAgentvConfigDir();
25321
- if (dataHome === configDir) return;
25322
- const legacyPath = import_node_path55.default.join(dataHome, "projects.yaml");
25323
- if (!(0, import_node_fs20.existsSync)(legacyPath)) return;
25324
- (0, import_node_fs20.mkdirSync)(import_node_path55.default.dirname(targetPath), { recursive: true });
25325
- (0, import_node_fs20.copyFileSync)(legacyPath, targetPath);
25385
+ function fromYaml(raw) {
25386
+ if (!raw || typeof raw !== "object") return null;
25387
+ const e = raw;
25388
+ if (typeof e.id !== "string" || typeof e.name !== "string" || typeof e.path !== "string") {
25389
+ return null;
25390
+ }
25391
+ return {
25392
+ id: e.id,
25393
+ name: e.name,
25394
+ path: e.path,
25395
+ addedAt: typeof e.added_at === "string" ? e.added_at : "",
25396
+ lastOpenedAt: typeof e.last_opened_at === "string" ? e.last_opened_at : ""
25397
+ };
25398
+ }
25399
+ function toYaml(entry) {
25400
+ return {
25401
+ id: entry.id,
25402
+ name: entry.name,
25403
+ path: entry.path,
25404
+ added_at: entry.addedAt,
25405
+ last_opened_at: entry.lastOpenedAt
25406
+ };
25326
25407
  }
25327
25408
  function loadBenchmarkRegistry() {
25328
25409
  const registryPath = getBenchmarksRegistryPath();
25329
- if (!(0, import_node_fs20.existsSync)(registryPath)) {
25330
- migrateProjectsYaml(registryPath);
25331
- }
25332
25410
  if (!(0, import_node_fs20.existsSync)(registryPath)) {
25333
25411
  return { benchmarks: [] };
25334
25412
  }
25335
25413
  try {
25336
25414
  const raw = (0, import_node_fs20.readFileSync)(registryPath, "utf-8");
25337
25415
  const parsed = (0, import_yaml10.parse)(raw);
25338
- if (!parsed || !Array.isArray(parsed.benchmarks)) {
25416
+ if (!parsed || typeof parsed !== "object") {
25339
25417
  return { benchmarks: [] };
25340
25418
  }
25341
- return { benchmarks: parsed.benchmarks };
25419
+ const benchmarks = Array.isArray(parsed.benchmarks) ? parsed.benchmarks.map(fromYaml).filter((e) => e !== null) : [];
25420
+ return { benchmarks };
25342
25421
  } catch {
25343
25422
  return { benchmarks: [] };
25344
25423
  }
@@ -25349,7 +25428,8 @@ function saveBenchmarkRegistry(registry) {
25349
25428
  if (!(0, import_node_fs20.existsSync)(dir)) {
25350
25429
  (0, import_node_fs20.mkdirSync)(dir, { recursive: true });
25351
25430
  }
25352
- (0, import_node_fs20.writeFileSync)(registryPath, (0, import_yaml10.stringify)({ benchmarks: registry.benchmarks }), "utf-8");
25431
+ const payload = { benchmarks: registry.benchmarks.map(toYaml) };
25432
+ (0, import_node_fs20.writeFileSync)(registryPath, (0, import_yaml10.stringify)(payload), "utf-8");
25353
25433
  }
25354
25434
  function deriveBenchmarkId(dirPath, existingIds) {
25355
25435
  const base = import_node_path55.default.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
@@ -25432,7 +25512,7 @@ function discoverBenchmarks(rootDir, maxDepth = 2) {
25432
25512
  }
25433
25513
  }
25434
25514
  scan(absRoot, 0);
25435
- return results;
25515
+ return results.sort();
25436
25516
  }
25437
25517
 
25438
25518
  // src/evaluation/baseline.ts