npm - @agentv/core - Versions diffs - 0.7.2 → 0.7.4 - Mend

@agentv/core 0.7.2 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/chunk-L6RCDZ4Z.js +641 -0
package/dist/chunk-L6RCDZ4Z.js.map +1 -0
package/dist/evaluation/validation/index.cjs +11 -1
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +2 -2
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +76 -74
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +3 -3
package/dist/index.d.ts +3 -3
package/dist/index.js +63 -541
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-UQLHF3T7.js +0 -158
package/dist/chunk-UQLHF3T7.js.map +0 -1

package/dist/index.cjs CHANGED Viewed

@@ -382,6 +382,7 @@ async function processMessages(options) {
 }
 async function loadEvalCases(evalFilePath, repoRoot, options) {
   const verbose = options?.verbose ?? false;
+  const evalIdFilter = options?.evalId;
   const absoluteTestPath = import_node_path2.default.resolve(evalFilePath);
   if (!await fileExists2(absoluteTestPath)) {
     throw new Error(`Test file not found: ${evalFilePath}`);
@@ -413,62 +414,39 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
   const results = [];
   for (const rawEvalcase of rawTestcases) {
     if (!isJsonObject(rawEvalcase)) {
-      logWarning("Skipping invalid test case entry (expected object)");
+      logWarning("Skipping invalid eval case entry (expected object)");
       continue;
     }
     const evalcase = rawEvalcase;
     const id = asString(evalcase.id);
+    if (evalIdFilter && id !== evalIdFilter) {
+      continue;
+    }
     const conversationId = asString(evalcase.conversation_id);
     const outcome = asString(evalcase.outcome);
     const inputMessagesValue = evalcase.input_messages;
     const expectedMessagesValue = evalcase.expected_messages;
     if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
-      logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
+      logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
       continue;
     }
     if (!Array.isArray(expectedMessagesValue)) {
-      logWarning(`Test case '${id}' missing expected_messages array`);
+      logWarning(`Eval case '${id}' missing expected_messages array`);
       continue;
     }
     const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
     const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
-    const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
-    const userMessages = inputMessages.filter((message) => message.role === "user");
-    const systemMessages = inputMessages.filter((message) => message.role === "system");
-    if (assistantMessages.length === 0) {
-      logWarning(`No assistant message found for test case: ${id}`);
+    if (expectedMessages.length === 0) {
+      logWarning(`No expected message found for eval case: ${id}`);
       continue;
     }
-    if (assistantMessages.length > 1) {
-      logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
-    }
-    if (systemMessages.length > 1) {
-      logWarning(`Multiple system messages found for test case: ${id}, using first`);
-    }
-    let systemMessageContent;
-    if (systemMessages.length > 0) {
-      const content = systemMessages[0]?.content;
-      if (typeof content === "string") {
-        systemMessageContent = content;
-      } else if (Array.isArray(content)) {
-        const textParts = [];
-        for (const segment of content) {
-          if (isJsonObject(segment)) {
-            const value = segment.value;
-            if (typeof value === "string") {
-              textParts.push(value);
-            }
-          }
-        }
-        if (textParts.length > 0) {
-          systemMessageContent = textParts.join("\n\n");
-        }
-      }
+    if (expectedMessages.length > 1) {
+      logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
     }
     const guidelinePaths = [];
     const inputTextParts = [];
     const inputSegments = await processMessages({
-      messages: userMessages,
+      messages: inputMessages,
       searchRoots,
       repoRootPath,
       guidelinePatterns,
@@ -478,7 +456,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       verbose
     });
     const outputSegments = await processMessages({
-      messages: assistantMessages,
+      messages: expectedMessages,
       searchRoots,
       repoRootPath,
       guidelinePatterns,
@@ -486,10 +464,10 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       verbose
     });
     const codeSnippets = extractCodeBlocks(inputSegments);
-    const assistantContent = assistantMessages[0]?.content;
-    const referenceAnswer = await resolveAssistantContent(assistantContent, searchRoots, verbose);
+    const expectedContent = expectedMessages[0]?.content;
+    const referenceAnswer = await resolveAssistantContent(expectedContent, searchRoots, verbose);
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
-    const testCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
+    const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
     const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
     const userFilePaths = [];
     for (const segment of inputSegments) {
@@ -508,19 +486,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       question,
       input_segments: inputSegments,
       output_segments: outputSegments,
-      system_message: systemMessageContent,
       reference_answer: referenceAnswer,
       guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path2.default.resolve(guidelinePath)),
       guideline_patterns: guidelinePatterns,
       file_paths: allFilePaths,
       code_snippets: codeSnippets,
       expected_outcome: outcome,
-      evaluator: testCaseEvaluatorKind,
+      evaluator: evalCaseEvaluatorKind,
       evaluators
     };
     if (verbose) {
       console.log(`
-[Test Case: ${id}]`);
+[Eval Case: ${id}]`);
       if (testCase.guideline_paths.length > 0) {
         console.log(`  Guidelines used: ${testCase.guideline_paths.length}`);
         for (const guidelinePath of testCase.guideline_paths) {
@@ -579,7 +556,7 @@ ${body}`);
   }
   const question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
   const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
-  return { question, guidelines, systemMessage: testCase.system_message };
+  return { question, guidelines };
 }
 async function fileExists2(absolutePath) {
   try {
@@ -965,6 +942,8 @@ var GeminiProvider = class {
 // src/evaluation/providers/cli.ts
 var import_node_child_process = require("child_process");
+var import_promises3 = __toESM(require("fs/promises"), 1);
+var import_node_os = __toESM(require("os"), 1);
 var import_node_path3 = __toESM(require("path"), 1);
 var import_node_util = require("util");
 var execAsync = (0, import_node_util.promisify)(import_node_child_process.exec);
@@ -980,6 +959,7 @@ async function defaultCommandRunner(command, options) {
   };
   try {
     const { stdout, stderr } = await execAsync(command, execOptions);
+    console.error(`[CLI DEBUG] SUCCESS - stdout: ${stdout.length} bytes, stderr: ${stderr.length} bytes`);
     return {
       stdout,
       stderr,
@@ -990,6 +970,8 @@ async function defaultCommandRunner(command, options) {
     };
   } catch (error) {
     const execError = error;
+    console.error(`[CLI DEBUG] ERROR - code: ${execError.code}, message: ${execError.message}`);
+    console.error(`[CLI DEBUG] stdout: ${execError.stdout?.length ?? 0} bytes, stderr: ${execError.stderr?.length ?? 0} bytes`);
     return {
       stdout: execError.stdout ?? "",
       stderr: execError.stderr ?? "",
@@ -1019,7 +1001,8 @@ var CliProvider = class {
       throw new Error("CLI provider request was aborted before execution");
     }
     await this.ensureHealthy(request.signal);
-    const templateValues = buildTemplateValues(request, this.config);
+    const outputFilePath = generateOutputFilePath(request.evalCaseId);
+    const templateValues = buildTemplateValues(request, this.config, outputFilePath);
     const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
     const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
     const result = await this.runCommand(renderedCommand, {
@@ -1042,16 +1025,30 @@ var CliProvider = class {
       const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
       throw new Error(message);
     }
+    const responseText = await this.readAndCleanupOutputFile(outputFilePath);
     return {
-      text: result.stdout,
+      text: responseText,
       raw: {
         command: renderedCommand,
         stderr: result.stderr,
         exitCode: result.exitCode ?? 0,
-        cwd: this.config.cwd
+        cwd: this.config.cwd,
+        outputFile: outputFilePath
       }
     };
   }
+  async readAndCleanupOutputFile(filePath) {
+    try {
+      const content = await import_promises3.default.readFile(filePath, "utf-8");
+      return content;
+    } catch (error) {
+      const errorMsg = error instanceof Error ? error.message : String(error);
+      throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
+    } finally {
+      await import_promises3.default.unlink(filePath).catch(() => {
+      });
+    }
+  }
   async ensureHealthy(signal) {
     if (!this.config.healthcheck) {
       return;
@@ -1092,10 +1089,11 @@ var CliProvider = class {
           question: "",
           guidelines: "",
           inputFiles: [],
-          evalCaseId: "",
+          evalCaseId: "healthcheck",
           attempt: 0
         },
-        this.config
+        this.config,
+        generateOutputFilePath("healthcheck")
       )
     );
     const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
@@ -1113,14 +1111,15 @@ var CliProvider = class {
     }
   }
 };
-function buildTemplateValues(request, config) {
+function buildTemplateValues(request, config, outputFilePath) {
   const inputFiles = normalizeInputFiles(request.inputFiles);
   return {
     PROMPT: shellEscape(request.question ?? ""),
     GUIDELINES: shellEscape(request.guidelines ?? ""),
     EVAL_ID: shellEscape(request.evalCaseId ?? ""),
     ATTEMPT: shellEscape(String(request.attempt ?? 0)),
-    FILES: formatFileList(inputFiles, config.filesFormat)
+    FILES: formatFileList(inputFiles, config.filesFormat),
+    OUTPUT_FILE: shellEscape(outputFilePath)
   };
 }
 function normalizeInputFiles(inputFiles) {
@@ -1158,11 +1157,17 @@ function shellEscape(value) {
     return "''";
   }
   if (process.platform === "win32") {
-    const escaped = value.replace(/"/g, '\\"');
-    return `"${escaped}"`;
+    const escaped = value.replace(/'/g, "''");
+    return `'${escaped}'`;
   }
   return `'${value.replace(/'/g, `'"'"'`)}'`;
 }
+function generateOutputFilePath(evalCaseId) {
+  const safeEvalId = evalCaseId || "unknown";
+  const timestamp = Date.now();
+  const random = Math.random().toString(36).substring(2, 9);
+  return import_node_path3.default.join(import_node_os.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
+}
 function formatTimeoutSuffix(timeoutMs) {
   if (!timeoutMs || timeoutMs <= 0) {
     return "";
@@ -1175,8 +1180,8 @@ function formatTimeoutSuffix(timeoutMs) {
 var import_node_child_process2 = require("child_process");
 var import_node_crypto = require("crypto");
 var import_node_fs3 = require("fs");
-var import_promises3 = require("fs/promises");
-var import_node_os = require("os");
+var import_promises4 = require("fs/promises");
+var import_node_os2 = require("os");
 var import_node_path5 = __toESM(require("path"), 1);
 var import_node_util2 = require("util");
@@ -1365,7 +1370,7 @@ var CodexProvider = class {
     try {
       const promptContent = buildPromptDocument(request, inputFiles);
       const promptFile = import_node_path5.default.join(workspaceRoot, PROMPT_FILENAME);
-      await (0, import_promises3.writeFile)(promptFile, promptContent, "utf8");
+      await (0, import_promises4.writeFile)(promptFile, promptContent, "utf8");
       const args = this.buildCodexArgs();
       const cwd = this.resolveCwd(workspaceRoot);
       const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
@@ -1448,11 +1453,11 @@ var CodexProvider = class {
     }
   }
   async createWorkspace() {
-    return await (0, import_promises3.mkdtemp)(import_node_path5.default.join((0, import_node_os.tmpdir)(), WORKSPACE_PREFIX));
+    return await (0, import_promises4.mkdtemp)(import_node_path5.default.join((0, import_node_os2.tmpdir)(), WORKSPACE_PREFIX));
   }
   async cleanupWorkspace(workspaceRoot) {
     try {
-      await (0, import_promises3.rm)(workspaceRoot, { recursive: true, force: true });
+      await (0, import_promises4.rm)(workspaceRoot, { recursive: true, force: true });
     } catch {
     }
   }
@@ -1472,7 +1477,7 @@ var CodexProvider = class {
       return void 0;
     }
     try {
-      await (0, import_promises3.mkdir)(logDir, { recursive: true });
+      await (0, import_promises4.mkdir)(logDir, { recursive: true });
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
@@ -1695,7 +1700,7 @@ async function locateExecutable(candidate) {
   if (includesPathSeparator) {
     const resolved = import_node_path5.default.isAbsolute(candidate) ? candidate : import_node_path5.default.resolve(candidate);
     const executablePath = await ensureWindowsExecutableVariant(resolved);
-    await (0, import_promises3.access)(executablePath, import_node_fs3.constants.F_OK);
+    await (0, import_promises4.access)(executablePath, import_node_fs3.constants.F_OK);
     return executablePath;
   }
   const locator = process.platform === "win32" ? "where" : "which";
@@ -1705,7 +1710,7 @@ async function locateExecutable(candidate) {
     const preferred = selectExecutableCandidate(lines);
     if (preferred) {
       const executablePath = await ensureWindowsExecutableVariant(preferred);
-      await (0, import_promises3.access)(executablePath, import_node_fs3.constants.F_OK);
+      await (0, import_promises4.access)(executablePath, import_node_fs3.constants.F_OK);
       return executablePath;
     }
   } catch {
@@ -1739,7 +1744,7 @@ async function ensureWindowsExecutableVariant(candidate) {
   for (const ext of extensions) {
     const withExtension = `${candidate}${ext}`;
     try {
-      await (0, import_promises3.access)(withExtension, import_node_fs3.constants.F_OK);
+      await (0, import_promises4.access)(withExtension, import_node_fs3.constants.F_OK);
       return withExtension;
     } catch {
     }
@@ -2041,7 +2046,7 @@ var MockProvider = class {
 // src/evaluation/providers/targets.ts
 var import_zod = require("zod");
-var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
+var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES", "OUTPUT_FILE"]);
 var BASE_TARGET_SCHEMA = import_zod.z.object({
   name: import_zod.z.string().min(1, "target name is required"),
   provider: import_zod.z.string().min(1, "provider is required"),
@@ -2768,7 +2773,7 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
 // src/evaluation/providers/targets-file.ts
 var import_node_fs4 = require("fs");
-var import_promises4 = require("fs/promises");
+var import_promises5 = require("fs/promises");
 var import_node_path7 = __toESM(require("path"), 1);
 var import_yaml2 = require("yaml");
@@ -2838,7 +2843,7 @@ function assertTargetDefinition(value, index, filePath) {
 }
 async function fileExists3(filePath) {
   try {
-    await (0, import_promises4.access)(filePath, import_node_fs4.constants.F_OK);
+    await (0, import_promises5.access)(filePath, import_node_fs4.constants.F_OK);
     return true;
   } catch {
     return false;
@@ -2849,7 +2854,7 @@ async function readTargetDefinitions(filePath) {
   if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
-  const raw = await (0, import_promises4.readFile)(absolutePath, "utf8");
+  const raw = await (0, import_promises5.readFile)(absolutePath, "utf8");
   const parsed = (0, import_yaml2.parse)(raw);
   if (!isRecord(parsed)) {
     throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
@@ -3095,7 +3100,6 @@ var CodeEvaluator = class {
         expected_outcome: context.evalCase.expected_outcome,
         reference_answer: context.evalCase.reference_answer,
         candidate_answer: context.candidate,
-        system_message: context.promptInputs.systemMessage ?? "",
         guideline_paths: context.evalCase.guideline_paths,
         input_files: context.evalCase.file_paths,
         input_segments: context.evalCase.input_segments
@@ -3195,7 +3199,7 @@ function substituteVariables(template, variables) {
 // src/evaluation/orchestrator.ts
 var import_node_crypto3 = require("crypto");
-var import_promises5 = require("fs/promises");
+var import_promises6 = require("fs/promises");
 var import_node_path8 = __toESM(require("path"), 1);
 // ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
@@ -3337,7 +3341,7 @@ function validateConcurrency(concurrency) {
 // src/evaluation/orchestrator.ts
 async function runEvaluation(options) {
   const {
-    testFilePath,
+    testFilePath: evalFilePath,
     repoRoot,
     target,
     targets,
@@ -3356,11 +3360,11 @@ async function runEvaluation(options) {
     onProgress
   } = options;
   const load = loadEvalCases;
-  const evalCases = await load(testFilePath, repoRoot, { verbose });
+  const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
   const filteredEvalCases = filterEvalCases(evalCases, evalId);
   if (filteredEvalCases.length === 0) {
     if (evalId) {
-      throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
+      throw new Error(`Eval case with id '${evalId}' not found in ${evalFilePath}`);
     }
     return [];
   }
@@ -3739,8 +3743,7 @@ async function evaluateCandidate(options) {
   const rawRequest = {
     question: promptInputs.question,
     ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
-    guideline_paths: evalCase.guideline_paths,
-    system_message: promptInputs.systemMessage ?? ""
+    guideline_paths: evalCase.guideline_paths
   };
   return {
     eval_id: evalCase.id,
@@ -3956,14 +3959,14 @@ async function dumpPrompt(directory, evalCase, promptInputs) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
   const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
   const filePath = import_node_path8.default.resolve(directory, filename);
-  await (0, import_promises5.mkdir)(import_node_path8.default.dirname(filePath), { recursive: true });
+  await (0, import_promises6.mkdir)(import_node_path8.default.dirname(filePath), { recursive: true });
   const payload = {
     eval_id: evalCase.id,
     question: promptInputs.question,
     guidelines: promptInputs.guidelines,
     guideline_paths: evalCase.guideline_paths
   };
-  await (0, import_promises5.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
+  await (0, import_promises6.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
 }
 function sanitizeFilename(value) {
   if (!value) {
@@ -4004,7 +4007,6 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
     question: promptInputs.question,
     ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
     guideline_paths: evalCase.guideline_paths,
-    system_message: promptInputs.systemMessage ?? "",
     error: message
   };
   return {