@agentv/core 4.3.4 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -10342,13 +10342,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
10342
10342
  async function execShellWithStdin(command, stdinPayload, options = {}) {
10343
10343
  const { mkdir: mkdir16, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
10344
10344
  const { tmpdir: tmpdir3 } = await import("node:os");
10345
- const path47 = await import("node:path");
10345
+ const path48 = await import("node:path");
10346
10346
  const { randomUUID: randomUUID10 } = await import("node:crypto");
10347
- const dir = path47.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
10347
+ const dir = path48.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
10348
10348
  await mkdir16(dir, { recursive: true });
10349
- const stdinPath = path47.join(dir, "stdin.txt");
10350
- const stdoutPath = path47.join(dir, "stdout.txt");
10351
- const stderrPath = path47.join(dir, "stderr.txt");
10349
+ const stdinPath = path48.join(dir, "stdin.txt");
10350
+ const stdoutPath = path48.join(dir, "stdout.txt");
10351
+ const stderrPath = path48.join(dir, "stderr.txt");
10352
10352
  await writeFile9(stdinPath, stdinPayload, "utf8");
10353
10353
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
10354
10354
  const { spawn: spawn5 } = await import("node:child_process");
@@ -12561,115 +12561,115 @@ var FieldAccuracyEvaluator = class {
12561
12561
  * Evaluate a single field against the expected value.
12562
12562
  */
12563
12563
  evaluateField(fieldConfig, candidateData, expectedData) {
12564
- const { path: path47, match, required = true, weight = 1 } = fieldConfig;
12565
- const candidateValue = resolvePath(candidateData, path47);
12566
- const expectedValue = resolvePath(expectedData, path47);
12564
+ const { path: path48, match, required = true, weight = 1 } = fieldConfig;
12565
+ const candidateValue = resolvePath(candidateData, path48);
12566
+ const expectedValue = resolvePath(expectedData, path48);
12567
12567
  if (expectedValue === void 0) {
12568
12568
  return {
12569
- path: path47,
12569
+ path: path48,
12570
12570
  score: 1,
12571
12571
  // No expected value means no comparison needed
12572
12572
  weight,
12573
12573
  hit: true,
12574
- message: `${path47}: no expected value`
12574
+ message: `${path48}: no expected value`
12575
12575
  };
12576
12576
  }
12577
12577
  if (candidateValue === void 0) {
12578
12578
  if (required) {
12579
12579
  return {
12580
- path: path47,
12580
+ path: path48,
12581
12581
  score: 0,
12582
12582
  weight,
12583
12583
  hit: false,
12584
- message: `${path47} (required, missing)`
12584
+ message: `${path48} (required, missing)`
12585
12585
  };
12586
12586
  }
12587
12587
  return {
12588
- path: path47,
12588
+ path: path48,
12589
12589
  score: 1,
12590
12590
  // Don't penalize missing optional fields
12591
12591
  weight: 0,
12592
12592
  // Zero weight means it won't affect the score
12593
12593
  hit: true,
12594
- message: `${path47}: optional field missing`
12594
+ message: `${path48}: optional field missing`
12595
12595
  };
12596
12596
  }
12597
12597
  switch (match) {
12598
12598
  case "exact":
12599
- return this.compareExact(path47, candidateValue, expectedValue, weight);
12599
+ return this.compareExact(path48, candidateValue, expectedValue, weight);
12600
12600
  case "numeric_tolerance":
12601
12601
  return this.compareNumericTolerance(
12602
- path47,
12602
+ path48,
12603
12603
  candidateValue,
12604
12604
  expectedValue,
12605
12605
  fieldConfig,
12606
12606
  weight
12607
12607
  );
12608
12608
  case "date":
12609
- return this.compareDate(path47, candidateValue, expectedValue, fieldConfig, weight);
12609
+ return this.compareDate(path48, candidateValue, expectedValue, fieldConfig, weight);
12610
12610
  default:
12611
12611
  return {
12612
- path: path47,
12612
+ path: path48,
12613
12613
  score: 0,
12614
12614
  weight,
12615
12615
  hit: false,
12616
- message: `${path47}: unknown match type "${match}"`
12616
+ message: `${path48}: unknown match type "${match}"`
12617
12617
  };
12618
12618
  }
12619
12619
  }
12620
12620
  /**
12621
12621
  * Exact equality comparison.
12622
12622
  */
12623
- compareExact(path47, candidateValue, expectedValue, weight) {
12623
+ compareExact(path48, candidateValue, expectedValue, weight) {
12624
12624
  if (deepEqual(candidateValue, expectedValue)) {
12625
12625
  return {
12626
- path: path47,
12626
+ path: path48,
12627
12627
  score: 1,
12628
12628
  weight,
12629
12629
  hit: true,
12630
- message: path47
12630
+ message: path48
12631
12631
  };
12632
12632
  }
12633
12633
  if (typeof candidateValue !== typeof expectedValue) {
12634
12634
  return {
12635
- path: path47,
12635
+ path: path48,
12636
12636
  score: 0,
12637
12637
  weight,
12638
12638
  hit: false,
12639
- message: `${path47} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
12639
+ message: `${path48} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
12640
12640
  };
12641
12641
  }
12642
12642
  return {
12643
- path: path47,
12643
+ path: path48,
12644
12644
  score: 0,
12645
12645
  weight,
12646
12646
  hit: false,
12647
- message: `${path47} (value mismatch)`
12647
+ message: `${path48} (value mismatch)`
12648
12648
  };
12649
12649
  }
12650
12650
  /**
12651
12651
  * Numeric comparison with absolute or relative tolerance.
12652
12652
  */
12653
- compareNumericTolerance(path47, candidateValue, expectedValue, fieldConfig, weight) {
12653
+ compareNumericTolerance(path48, candidateValue, expectedValue, fieldConfig, weight) {
12654
12654
  const { tolerance = 0, relative = false } = fieldConfig;
12655
12655
  const candidateNum = toNumber(candidateValue);
12656
12656
  const expectedNum = toNumber(expectedValue);
12657
12657
  if (candidateNum === null || expectedNum === null) {
12658
12658
  return {
12659
- path: path47,
12659
+ path: path48,
12660
12660
  score: 0,
12661
12661
  weight,
12662
12662
  hit: false,
12663
- message: `${path47} (non-numeric value)`
12663
+ message: `${path48} (non-numeric value)`
12664
12664
  };
12665
12665
  }
12666
12666
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
12667
12667
  return {
12668
- path: path47,
12668
+ path: path48,
12669
12669
  score: 0,
12670
12670
  weight,
12671
12671
  hit: false,
12672
- message: `${path47} (invalid numeric value)`
12672
+ message: `${path48} (invalid numeric value)`
12673
12673
  };
12674
12674
  }
12675
12675
  const diff = Math.abs(candidateNum - expectedNum);
@@ -12682,61 +12682,61 @@ var FieldAccuracyEvaluator = class {
12682
12682
  }
12683
12683
  if (withinTolerance) {
12684
12684
  return {
12685
- path: path47,
12685
+ path: path48,
12686
12686
  score: 1,
12687
12687
  weight,
12688
12688
  hit: true,
12689
- message: `${path47} (within tolerance: diff=${diff.toFixed(2)})`
12689
+ message: `${path48} (within tolerance: diff=${diff.toFixed(2)})`
12690
12690
  };
12691
12691
  }
12692
12692
  return {
12693
- path: path47,
12693
+ path: path48,
12694
12694
  score: 0,
12695
12695
  weight,
12696
12696
  hit: false,
12697
- message: `${path47} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
12697
+ message: `${path48} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
12698
12698
  };
12699
12699
  }
12700
12700
  /**
12701
12701
  * Date comparison with format normalization.
12702
12702
  */
12703
- compareDate(path47, candidateValue, expectedValue, fieldConfig, weight) {
12703
+ compareDate(path48, candidateValue, expectedValue, fieldConfig, weight) {
12704
12704
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
12705
12705
  const candidateDate = parseDate(String(candidateValue), formats);
12706
12706
  const expectedDate = parseDate(String(expectedValue), formats);
12707
12707
  if (candidateDate === null) {
12708
12708
  return {
12709
- path: path47,
12709
+ path: path48,
12710
12710
  score: 0,
12711
12711
  weight,
12712
12712
  hit: false,
12713
- message: `${path47} (unparseable candidate date)`
12713
+ message: `${path48} (unparseable candidate date)`
12714
12714
  };
12715
12715
  }
12716
12716
  if (expectedDate === null) {
12717
12717
  return {
12718
- path: path47,
12718
+ path: path48,
12719
12719
  score: 0,
12720
12720
  weight,
12721
12721
  hit: false,
12722
- message: `${path47} (unparseable expected date)`
12722
+ message: `${path48} (unparseable expected date)`
12723
12723
  };
12724
12724
  }
12725
12725
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
12726
12726
  return {
12727
- path: path47,
12727
+ path: path48,
12728
12728
  score: 1,
12729
12729
  weight,
12730
12730
  hit: true,
12731
- message: path47
12731
+ message: path48
12732
12732
  };
12733
12733
  }
12734
12734
  return {
12735
- path: path47,
12735
+ path: path48,
12736
12736
  score: 0,
12737
12737
  weight,
12738
12738
  hit: false,
12739
- message: `${path47} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
12739
+ message: `${path48} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
12740
12740
  };
12741
12741
  }
12742
12742
  /**
@@ -12769,11 +12769,11 @@ var FieldAccuracyEvaluator = class {
12769
12769
  };
12770
12770
  }
12771
12771
  };
12772
- function resolvePath(obj, path47) {
12773
- if (!path47 || !obj) {
12772
+ function resolvePath(obj, path48) {
12773
+ if (!path48 || !obj) {
12774
12774
  return void 0;
12775
12775
  }
12776
- const parts = path47.split(/\.|\[|\]/).filter((p) => p.length > 0);
12776
+ const parts = path48.split(/\.|\[|\]/).filter((p) => p.length > 0);
12777
12777
  let current = obj;
12778
12778
  for (const part of parts) {
12779
12779
  if (current === null || current === void 0) {
@@ -13257,8 +13257,8 @@ var TokenUsageEvaluator = class {
13257
13257
  };
13258
13258
 
13259
13259
  // src/evaluation/evaluators/tool-trajectory.ts
13260
- function getNestedValue(obj, path47) {
13261
- const parts = path47.split(".");
13260
+ function getNestedValue(obj, path48) {
13261
+ const parts = path48.split(".");
13262
13262
  let current = obj;
13263
13263
  for (const part of parts) {
13264
13264
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -17499,7 +17499,7 @@ async function discoverDefaultTarget(repoRoot) {
17499
17499
  return null;
17500
17500
  }
17501
17501
  async function loadEnvHierarchy(repoRoot, startPath) {
17502
- const { readFileSync: readFileSync2 } = await import("node:fs");
17502
+ const { readFileSync: readFileSync3 } = await import("node:fs");
17503
17503
  const chain = buildDirectoryChain(startPath, repoRoot);
17504
17504
  const envFiles = [];
17505
17505
  for (const dir of chain) {
@@ -17508,7 +17508,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
17508
17508
  }
17509
17509
  for (let i = 0; i < envFiles.length; i++) {
17510
17510
  try {
17511
- const content = readFileSync2(envFiles[i], "utf8");
17511
+ const content = readFileSync3(envFiles[i], "utf8");
17512
17512
  for (const line of content.split("\n")) {
17513
17513
  const trimmed = line.trim();
17514
17514
  if (!trimmed || trimmed.startsWith("#")) continue;
@@ -17580,12 +17580,12 @@ var CONFIG_FILE_NAMES = [
17580
17580
  ".agentv/config.js"
17581
17581
  ];
17582
17582
  async function loadTsConfig(projectRoot) {
17583
- const { existsSync: existsSync5 } = await import("node:fs");
17583
+ const { existsSync: existsSync6 } = await import("node:fs");
17584
17584
  const { pathToFileURL } = await import("node:url");
17585
17585
  const { join: join2 } = await import("node:path");
17586
17586
  for (const fileName of CONFIG_FILE_NAMES) {
17587
17587
  const filePath = join2(projectRoot, fileName);
17588
- if (!existsSync5(filePath)) {
17588
+ if (!existsSync6(filePath)) {
17589
17589
  continue;
17590
17590
  }
17591
17591
  try {
@@ -17722,6 +17722,121 @@ function shouldSkipCacheForTemperature(targetConfig) {
17722
17722
  return false;
17723
17723
  }
17724
17724
 
17725
+ // src/projects.ts
17726
+ import { existsSync as existsSync5, mkdirSync, readFileSync as readFileSync2, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
17727
+ import path47 from "node:path";
17728
+ import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
17729
+ function getProjectsRegistryPath() {
17730
+ return path47.join(getAgentvHome(), "projects.yaml");
17731
+ }
17732
+ function loadProjectRegistry() {
17733
+ const registryPath = getProjectsRegistryPath();
17734
+ if (!existsSync5(registryPath)) {
17735
+ return { projects: [] };
17736
+ }
17737
+ try {
17738
+ const raw = readFileSync2(registryPath, "utf-8");
17739
+ const parsed = parseYaml3(raw);
17740
+ if (!parsed || !Array.isArray(parsed.projects)) {
17741
+ return { projects: [] };
17742
+ }
17743
+ return { projects: parsed.projects };
17744
+ } catch {
17745
+ return { projects: [] };
17746
+ }
17747
+ }
17748
+ function saveProjectRegistry(registry) {
17749
+ const registryPath = getProjectsRegistryPath();
17750
+ const dir = path47.dirname(registryPath);
17751
+ if (!existsSync5(dir)) {
17752
+ mkdirSync(dir, { recursive: true });
17753
+ }
17754
+ writeFileSync(registryPath, stringifyYaml(registry), "utf-8");
17755
+ }
17756
+ function deriveProjectId(dirPath, existingIds) {
17757
+ const base = path47.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
17758
+ let candidate = base || "project";
17759
+ let suffix = 2;
17760
+ while (existingIds.includes(candidate)) {
17761
+ candidate = `${base}-${suffix}`;
17762
+ suffix++;
17763
+ }
17764
+ return candidate;
17765
+ }
17766
+ function addProject(projectPath) {
17767
+ const absPath = path47.resolve(projectPath);
17768
+ if (!existsSync5(absPath)) {
17769
+ throw new Error(`Directory not found: ${absPath}`);
17770
+ }
17771
+ if (!existsSync5(path47.join(absPath, ".agentv"))) {
17772
+ throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
17773
+ }
17774
+ const registry = loadProjectRegistry();
17775
+ const existing = registry.projects.find((p) => p.path === absPath);
17776
+ if (existing) {
17777
+ return existing;
17778
+ }
17779
+ const now = (/* @__PURE__ */ new Date()).toISOString();
17780
+ const entry = {
17781
+ id: deriveProjectId(
17782
+ absPath,
17783
+ registry.projects.map((p) => p.id)
17784
+ ),
17785
+ name: path47.basename(absPath),
17786
+ path: absPath,
17787
+ addedAt: now,
17788
+ lastOpenedAt: now
17789
+ };
17790
+ registry.projects.push(entry);
17791
+ saveProjectRegistry(registry);
17792
+ return entry;
17793
+ }
17794
+ function removeProject(projectId) {
17795
+ const registry = loadProjectRegistry();
17796
+ const idx = registry.projects.findIndex((p) => p.id === projectId);
17797
+ if (idx < 0) return false;
17798
+ registry.projects.splice(idx, 1);
17799
+ saveProjectRegistry(registry);
17800
+ return true;
17801
+ }
17802
+ function getProject(projectId) {
17803
+ return loadProjectRegistry().projects.find((p) => p.id === projectId);
17804
+ }
17805
+ function touchProject(projectId) {
17806
+ const registry = loadProjectRegistry();
17807
+ const entry = registry.projects.find((p) => p.id === projectId);
17808
+ if (entry) {
17809
+ entry.lastOpenedAt = (/* @__PURE__ */ new Date()).toISOString();
17810
+ saveProjectRegistry(registry);
17811
+ }
17812
+ }
17813
+ function discoverProjects(rootDir, maxDepth = 2) {
17814
+ const absRoot = path47.resolve(rootDir);
17815
+ if (!existsSync5(absRoot) || !statSync2(absRoot).isDirectory()) {
17816
+ return [];
17817
+ }
17818
+ const results = [];
17819
+ function scan(dir, depth) {
17820
+ if (depth > maxDepth) return;
17821
+ if (existsSync5(path47.join(dir, ".agentv"))) {
17822
+ results.push(dir);
17823
+ return;
17824
+ }
17825
+ if (depth === maxDepth) return;
17826
+ try {
17827
+ const entries = readdirSync3(dir, { withFileTypes: true });
17828
+ for (const entry of entries) {
17829
+ if (!entry.isDirectory()) continue;
17830
+ if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
17831
+ scan(path47.join(dir, entry.name), depth + 1);
17832
+ }
17833
+ } catch {
17834
+ }
17835
+ }
17836
+ scan(absRoot, 0);
17837
+ return results;
17838
+ }
17839
+
17725
17840
  // src/evaluation/baseline.ts
17726
17841
  var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
17727
17842
  "requests",
@@ -18312,6 +18427,7 @@ export {
18312
18427
  ToolTrajectoryEvaluator,
18313
18428
  WorkspaceCreationError,
18314
18429
  WorkspacePoolManager,
18430
+ addProject,
18315
18431
  assembleLlmGraderPrompt,
18316
18432
  assembleLlmGraderPrompt as assembleLlmJudgePrompt,
18317
18433
  avgToolDurationMs,
@@ -18341,11 +18457,13 @@ export {
18341
18457
  deepEqual,
18342
18458
  defineConfig,
18343
18459
  deriveCategory,
18460
+ deriveProjectId,
18344
18461
  detectFormat,
18345
18462
  discoverAssertions,
18346
18463
  discoverCopilotSessions,
18347
18464
  discoverGraders,
18348
18465
  discoverGraders as discoverJudges,
18466
+ discoverProjects,
18349
18467
  discoverProviders,
18350
18468
  ensureVSCodeSubagents,
18351
18469
  evaluate,
@@ -18369,6 +18487,8 @@ export {
18369
18487
  generateRubrics,
18370
18488
  getAgentvHome,
18371
18489
  getOutputFilenames,
18490
+ getProject,
18491
+ getProjectsRegistryPath,
18372
18492
  getSubagentsRoot,
18373
18493
  getTextContent,
18374
18494
  getTraceStateRoot,
@@ -18390,6 +18510,7 @@ export {
18390
18510
  loadEvalCaseById,
18391
18511
  loadEvalCases,
18392
18512
  loadEvalSuite,
18513
+ loadProjectRegistry,
18393
18514
  loadTestById,
18394
18515
  loadTestSuite,
18395
18516
  loadTests,
@@ -18404,6 +18525,7 @@ export {
18404
18525
  readTargetDefinitions,
18405
18526
  readTestSuiteMetadata,
18406
18527
  readTextFile,
18528
+ removeProject,
18407
18529
  resolveAndCreateProvider,
18408
18530
  resolveFileReference,
18409
18531
  resolveTargetDefinition,
@@ -18422,6 +18544,7 @@ export {
18422
18544
  runIsJsonAssertion,
18423
18545
  runRegexAssertion,
18424
18546
  runStartsWithAssertion,
18547
+ saveProjectRegistry,
18425
18548
  scoreToVerdict,
18426
18549
  shouldEnableCache,
18427
18550
  shouldSkipCacheForTemperature,
@@ -18434,6 +18557,7 @@ export {
18434
18557
  toCamelCaseDeep,
18435
18558
  toSnakeCaseDeep,
18436
18559
  tokensPerTool,
18560
+ touchProject,
18437
18561
  transpileEvalYaml,
18438
18562
  transpileEvalYamlFile,
18439
18563
  trimBaselineResult