agentv 4.12.5 → 4.12.6-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,7 +42,7 @@ import {
42
42
  validateFileReferences,
43
43
  validateTargetsFile,
44
44
  writeArtifactsFromResults
45
- } from "./chunk-MY5UKOPW.js";
45
+ } from "./chunk-SSMWGOHT.js";
46
46
  import {
47
47
  DEFAULT_CATEGORY,
48
48
  DEFAULT_THRESHOLD,
@@ -3912,7 +3912,7 @@ var evalRunCommand = command({
3912
3912
  },
3913
3913
  handler: async (args) => {
3914
3914
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
3915
- const { launchInteractiveWizard } = await import("./interactive-33ACYB2B.js");
3915
+ const { launchInteractiveWizard } = await import("./interactive-KRISJADM.js");
3916
3916
  await launchInteractiveWizard();
3917
3917
  return;
3918
3918
  }
@@ -7817,17 +7817,20 @@ function stripHeavyFields(results) {
7817
7817
  };
7818
7818
  });
7819
7819
  }
7820
- async function handleRuns(c4, { searchDir }) {
7820
+ async function handleRuns(c4, { searchDir, agentvDir }) {
7821
7821
  const { runs: metas } = await listMergedResultFiles(searchDir);
7822
+ const { threshold: passThreshold } = loadStudioConfig(agentvDir);
7822
7823
  return c4.json({
7823
7824
  runs: metas.map((m) => {
7824
7825
  let target;
7825
7826
  let experiment;
7827
+ let passRate = m.passRate;
7826
7828
  try {
7827
7829
  const records = loadLightweightResults(m.path);
7828
7830
  if (records.length > 0) {
7829
7831
  target = records[0].target;
7830
7832
  experiment = records[0].experiment;
7833
+ passRate = records.filter((r) => r.score >= passThreshold).length / records.length;
7831
7834
  }
7832
7835
  } catch {
7833
7836
  }
@@ -7837,7 +7840,7 @@ async function handleRuns(c4, { searchDir }) {
7837
7840
  path: m.path,
7838
7841
  timestamp: m.timestamp,
7839
7842
  test_count: m.testCount,
7840
- pass_rate: m.passRate,
7843
+ pass_rate: passRate,
7841
7844
  avg_score: m.avgScore,
7842
7845
  size_bytes: m.sizeBytes,
7843
7846
  source: m.source,
@@ -9853,4 +9856,4 @@ export {
9853
9856
  preprocessArgv,
9854
9857
  runCli
9855
9858
  };
9856
- //# sourceMappingURL=chunk-TQMECG6F.js.map
9859
+ //# sourceMappingURL=chunk-FKJXRYS3.js.map