npm - agentv - Versions diffs - 3.14.4 → 3.14.6 - Mend

agentv 3.14.4 → 3.14.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/{chunk-3NLBBQX6.js → chunk-CQRWNXVG.js} +42 -20
package/dist/chunk-CQRWNXVG.js.map +1 -0
package/dist/{chunk-SAPEYQ5U.js → chunk-Y25VL7PX.js} +3 -3
package/dist/chunk-Y25VL7PX.js.map +1 -0
package/dist/cli.js +2 -2
package/dist/index.js +2 -2
package/dist/{interactive-PGZ55VHT.js → interactive-5ESM5DWV.js} +2 -2
package/dist/templates/.env.example +0 -3
package/package.json +1 -1
package/dist/chunk-3NLBBQX6.js.map +0 -1
package/dist/chunk-SAPEYQ5U.js.map +0 -1
/package/dist/{interactive-PGZ55VHT.js.map → interactive-5ESM5DWV.js.map} +0 -0

package/dist/{chunk-3NLBBQX6.js → chunk-CQRWNXVG.js} RENAMED Viewed

@@ -23,7 +23,7 @@ import {
   validateFileReferences,
   validateTargetsFile,
   writeArtifactsFromResults
-} from "./chunk-SAPEYQ5U.js";
+} from "./chunk-Y25VL7PX.js";
 import {
   createBuiltinRegistry,
   executeScript,
@@ -4186,7 +4186,7 @@ var evalRunCommand = command({
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-PGZ55VHT.js");
+      const { launchInteractiveWizard } = await import("./interactive-5ESM5DWV.js");
       await launchInteractiveWizard();
       return;
     }
@@ -4421,6 +4421,8 @@ var evalBenchCommand = command({
     const manifest = JSON.parse(await readFile(join(exportDir, "manifest.json"), "utf8"));
     const testIds = manifest.test_ids;
     const targetName = manifest.target?.name ?? "unknown";
+    const evalSet = manifest.eval_set ?? "";
+    const safeEvalSet = evalSet ? evalSet.replace(/[\/\\:*?"<>|]/g, "_") : "";
     let stdinData;
     if (llmScoresPath) {
       stdinData = await readFile(llmScoresPath, "utf8");
@@ -4431,7 +4433,9 @@ var evalBenchCommand = command({
     const indexLines = [];
     const allPassRates = [];
     for (const testId of testIds) {
-      const testDir = join(exportDir, testId);
+      const subpath = safeEvalSet ? [safeEvalSet, testId] : [testId];
+      const testDir = join(exportDir, ...subpath);
+      const artifactSubdir = subpath.join("/");
       const evaluators = [];
       const allAssertions = [];
       const codeResultsDir = join(testDir, "code_grader_results");
@@ -4527,13 +4531,14 @@ var evalBenchCommand = command({
         JSON.stringify({
           timestamp: manifest.timestamp,
           test_id: testId,
+          eval_set: evalSet || void 0,
           score: Math.round(weightedScore * 1e3) / 1e3,
           target: targetName,
           scores,
           execution_status: executionStatus,
-          grading_path: `${testId}/grading.json`,
-          timing_path: `${testId}/timing.json`,
-          response_path: hasResponse ? `${testId}/response.md` : null
+          grading_path: `${artifactSubdir}/grading.json`,
+          timing_path: `${artifactSubdir}/timing.json`,
+          response_path: hasResponse ? `${artifactSubdir}/response.md` : void 0
         })
       );
     }
@@ -4603,10 +4608,13 @@ var evalGradeCommand = command({
     const manifestPath = join2(exportDir, "manifest.json");
     const manifest = JSON.parse(await readFile2(manifestPath, "utf8"));
     const testIds = manifest.test_ids;
+    const evalSet = manifest.eval_set ?? "";
+    const safeEvalSet = evalSet ? evalSet.replace(/[\/\\:*?"<>|]/g, "_") : "";
     let totalGraders = 0;
     let totalPassed = 0;
     for (const testId of testIds) {
-      const testDir = join2(exportDir, testId);
+      const subpath = safeEvalSet ? [safeEvalSet, testId] : [testId];
+      const testDir = join2(exportDir, ...subpath);
       const codeGradersDir = join2(testDir, "code_graders");
       const resultsDir = join2(testDir, "code_grader_results");
       let graderFiles;
@@ -4701,7 +4709,7 @@ import { mkdir as mkdir3, writeFile as writeFile4 } from "node:fs/promises";
 import { dirname, join as join3, resolve } from "node:path";
 var evalInputCommand = command({
   name: "input",
-  description: "Extract eval inputs, target commands, and grader prompts for agent-mode runs",
+  description: "Extract eval inputs, target commands, and grader prompts for subagent-mode runs",
   args: {
     evalPath: positional({
       type: string,
@@ -4711,7 +4719,7 @@ var evalInputCommand = command({
     out: option({
       type: optional(string),
       long: "out",
-      description: "Output directory for extracted inputs (default: .agentv/results/runs/eval_<timestamp>)"
+      description: "Output directory for extracted inputs (default: .agentv/results/runs/<timestamp>)"
     })
   },
   handler: async ({ evalPath, out }) => {
@@ -4752,9 +4760,12 @@ var evalInputCommand = command({
       }
     } catch {
     }
+    const evalSetName = suite.metadata?.name?.trim() ?? "";
+    const safeEvalSet = evalSetName ? evalSetName.replace(/[\/\\:*?"<>|]/g, "_") : "";
     const testIds = [];
     for (const test of tests) {
-      const testDir = join3(outDir, test.id);
+      const subpath = safeEvalSet ? [safeEvalSet, test.id] : [test.id];
+      const testDir = join3(outDir, ...subpath);
       await mkdir3(testDir, { recursive: true });
       testIds.push(test.id);
       const inputText = test.question;
@@ -4793,6 +4804,7 @@ var evalInputCommand = command({
     }
     await writeJson(join3(outDir, "manifest.json"), {
       eval_file: resolvedEvalPath,
+      eval_set: evalSetName || void 0,
       timestamp: (/* @__PURE__ */ new Date()).toISOString(),
       target: {
         name: targetName,
@@ -4892,7 +4904,7 @@ var evalRunCommand2 = command({
     out: option({
       type: optional(string),
       long: "out",
-      description: "Output directory for results (default: .agentv/results/runs/eval_<timestamp>)"
+      description: "Output directory for results (default: .agentv/results/runs/<timestamp>)"
     }),
     workers: option({
       type: optional(number),
@@ -4938,9 +4950,12 @@ var evalRunCommand2 = command({
       }
     } catch {
     }
+    const evalSetName = suite.metadata?.name?.trim() ?? "";
+    const safeEvalSet = evalSetName ? evalSetName.replace(/[\/\\:*?"<>|]/g, "_") : "";
     const testIds = [];
     for (const test of tests) {
-      const testDir = join4(outDir, test.id);
+      const subpath = safeEvalSet ? [safeEvalSet, test.id] : [test.id];
+      const testDir = join4(outDir, ...subpath);
       await mkdir4(testDir, { recursive: true });
       testIds.push(test.id);
       const inputText = test.question;
@@ -4979,6 +4994,7 @@ var evalRunCommand2 = command({
     }
     await writeJson2(join4(outDir, "manifest.json"), {
       eval_file: resolvedEvalPath,
+      eval_set: evalSetName || void 0,
       timestamp: (/* @__PURE__ */ new Date()).toISOString(),
       target: { name: targetName, kind: targetKind },
       test_ids: testIds
@@ -4993,7 +5009,8 @@ var evalRunCommand2 = command({
       const maxWorkers = workers ?? testIds.length;
       console.log(`Invoking ${testIds.length} CLI target(s) (${maxWorkers} workers)...`);
       const invokeTarget = async (testId) => {
-        const testDir = join4(outDir, testId);
+        const subpath = safeEvalSet ? [safeEvalSet, testId] : [testId];
+        const testDir = join4(outDir, ...subpath);
         const invoke = JSON.parse(await readFile4(join4(testDir, "invoke.json"), "utf8"));
         if (invoke.kind !== "cli") return;
         const inputData = JSON.parse(await readFile4(join4(testDir, "input.json"), "utf8"));
@@ -5061,12 +5078,13 @@ var evalRunCommand2 = command({
       }
       await Promise.all(pending);
     } else {
-      console.log("Agent-as-target mode \u2014 skipping CLI invocation.");
+      console.log("Subagent-as-target mode \u2014 skipping CLI invocation.");
     }
     let totalGraders = 0;
     let totalPassed = 0;
     for (const testId of testIds) {
-      const testDir = join4(outDir, testId);
+      const subpath = safeEvalSet ? [safeEvalSet, testId] : [testId];
+      const testDir = join4(outDir, ...subpath);
       const codeGradersDir = join4(testDir, "code_graders");
       const resultsDir = join4(testDir, "code_grader_results");
       let graderFiles;
@@ -5684,9 +5702,11 @@ function patchTestIds(results) {
 // src/commands/results/export.ts
 function deriveOutputDir(cwd, sourceFile) {
   const parentDir = path7.basename(path7.dirname(sourceFile));
+  if (/^\d{4}-\d{2}-\d{2}T/.test(parentDir)) {
+    return path7.join(cwd, ".agentv", "results", "export", parentDir);
+  }
   if (parentDir.startsWith("eval_")) {
-    const dirName2 = parentDir.slice(5);
-    return path7.join(cwd, ".agentv", "results", "export", dirName2);
+    return path7.join(cwd, ".agentv", "results", "export", parentDir.slice(5));
   }
   const basename = path7.basename(sourceFile, ".jsonl");
   const dirName = basename.startsWith("eval_") ? basename.slice(5) : basename;
@@ -5939,10 +5959,12 @@ function checkDirectoryNaming(runDir) {
       message: `Directory is not under a 'runs/' parent (found '${parentName}/'). Expected: .agentv/results/runs/<run-dir>`
     });
   }
-  if (!/^eval_\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-\d{3}Z$/.test(dirName)) {
+  const isNewFormat = /^\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-\d{3}Z$/.test(dirName);
+  const isLegacyFormat = /^eval_\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-\d{3}Z$/.test(dirName);
+  if (!isNewFormat && !isLegacyFormat) {
     diagnostics.push({
       severity: "warning",
-      message: `Directory name '${dirName}' does not match the expected pattern 'eval_<ISO-timestamp>'. Example: eval_2026-03-27T12-42-24-429Z`
+      message: `Directory name '${dirName}' does not match the expected pattern '<ISO-timestamp>'. Example: 2026-03-27T12-42-24-429Z`
     });
   }
   return diagnostics;
@@ -8525,4 +8547,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-3NLBBQX6.js.map
+//# sourceMappingURL=chunk-CQRWNXVG.js.map