agentv 4.24.1 → 4.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,8 +13,8 @@ import {
13
13
  writeArtifacts,
14
14
  writeArtifactsFromResults,
15
15
  writePerTestArtifacts
16
- } from "./chunk-OQKETJT6.js";
17
- import "./chunk-5CC472UM.js";
16
+ } from "./chunk-FWDOW3TN.js";
17
+ import "./chunk-QHSB6LBN.js";
18
18
  import "./chunk-QOBQ5XYF.js";
19
19
  import "./chunk-BPGJ4HBU.js";
20
20
  import "./chunk-XGWXNNH6.js";
@@ -38,4 +38,4 @@ export {
38
38
  writeArtifactsFromResults,
39
39
  writePerTestArtifacts
40
40
  };
41
- //# sourceMappingURL=artifact-writer-SHHWQAXG.js.map
41
+ //# sourceMappingURL=artifact-writer-RFZXLCR7.js.map
@@ -44,7 +44,7 @@ import {
44
44
  validateFileReferences,
45
45
  validateTargetsFile,
46
46
  validateWorkspacePaths
47
- } from "./chunk-B4Y5U75M.js";
47
+ } from "./chunk-Y7LMYQ7O.js";
48
48
  import {
49
49
  RESULT_INDEX_FILENAME,
50
50
  aggregateRunDir,
@@ -52,7 +52,7 @@ import {
52
52
  resolveRunManifestPath,
53
53
  toSnakeCaseDeep as toSnakeCaseDeep2,
54
54
  writeArtifactsFromResults
55
- } from "./chunk-OQKETJT6.js";
55
+ } from "./chunk-FWDOW3TN.js";
56
56
  import {
57
57
  DEFAULT_CATEGORY,
58
58
  addBenchmark,
@@ -71,7 +71,7 @@ import {
71
71
  toTranscriptJsonLines,
72
72
  transpileEvalYamlFile,
73
73
  trimBaselineResult
74
- } from "./chunk-5CC472UM.js";
74
+ } from "./chunk-QHSB6LBN.js";
75
75
  import {
76
76
  DEFAULT_THRESHOLD,
77
77
  createBuiltinRegistry,
@@ -3996,7 +3996,7 @@ var evalRunCommand = command({
3996
3996
  },
3997
3997
  handler: async (args) => {
3998
3998
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
3999
- const { launchInteractiveWizard } = await import("./interactive-KPCZ6YIR.js");
3999
+ const { launchInteractiveWizard } = await import("./interactive-XF7C6MM5.js");
4000
4000
  await launchInteractiveWizard();
4001
4001
  return;
4002
4002
  }
@@ -9831,19 +9831,30 @@ function stripHeavyFields(results) {
9831
9831
  };
9832
9832
  });
9833
9833
  }
9834
+ function inferExperimentFromRunId(runId) {
9835
+ const separatorIndex = runId.lastIndexOf("::");
9836
+ if (separatorIndex === -1) {
9837
+ return void 0;
9838
+ }
9839
+ const experiment = runId.slice(0, separatorIndex).trim();
9840
+ if (!experiment || experiment === "default") {
9841
+ return void 0;
9842
+ }
9843
+ return experiment;
9844
+ }
9834
9845
  async function handleRuns(c4, { searchDir, agentvDir }) {
9835
9846
  const { runs: metas } = await listMergedResultFiles(searchDir);
9836
9847
  const { threshold: passThreshold } = loadStudioConfig(agentvDir);
9837
9848
  return c4.json({
9838
9849
  runs: metas.map((m) => {
9839
9850
  let target;
9840
- let experiment;
9851
+ let experiment = inferExperimentFromRunId(m.raw_filename);
9841
9852
  let passRate = m.passRate;
9842
9853
  try {
9843
9854
  const records = loadLightweightResults(m.path);
9844
9855
  if (records.length > 0) {
9845
9856
  target = records[0].target;
9846
- experiment = records[0].experiment;
9857
+ experiment = records[0].experiment ?? experiment;
9847
9858
  passRate = records.filter((r) => r.score >= passThreshold).length / records.length;
9848
9859
  }
9849
9860
  } catch {
@@ -10413,12 +10424,12 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
10413
10424
  const { runs: metas } = await listMergedResultFiles(p.path);
10414
10425
  for (const m of metas) {
10415
10426
  let target;
10416
- let experiment;
10427
+ let experiment = inferExperimentFromRunId(m.raw_filename);
10417
10428
  try {
10418
10429
  const records = loadLightweightResults(m.path);
10419
10430
  if (records.length > 0) {
10420
10431
  target = records[0].target;
10421
- experiment = records[0].experiment;
10432
+ experiment = records[0].experiment ?? experiment;
10422
10433
  }
10423
10434
  } catch {
10424
10435
  }
@@ -11974,4 +11985,4 @@ export {
11974
11985
  preprocessArgv,
11975
11986
  runCli
11976
11987
  };
11977
- //# sourceMappingURL=chunk-VQIZ5FIV.js.map
11988
+ //# sourceMappingURL=chunk-55KC6LD7.js.map