npm - @agentv/core - Versions diffs - 0.7.0 → 0.7.3 - Mend

@agentv/core 0.7.0 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/{chunk-L7I5UTJU.js → chunk-UQLHF3T7.js} +12 -3
package/dist/chunk-UQLHF3T7.js.map +1 -0
package/dist/evaluation/validation/index.cjs +143 -2
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.d.cts +1 -1
package/dist/evaluation/validation/index.d.ts +1 -1
package/dist/evaluation/validation/index.js +143 -2
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +79 -135
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +3 -3
package/dist/index.d.ts +3 -3
package/dist/index.js +69 -132
package/dist/index.js.map +1 -1
package/package.json +2 -2
package/dist/chunk-L7I5UTJU.js.map +0 -1

package/dist/index.d.cts CHANGED Viewed

@@ -95,7 +95,7 @@ type LlmJudgeEvaluatorConfig = {
 };
 type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
 /**
- * Test case definition sourced from AgentV specs.
+ * Eval case definition sourced from AgentV specs.
  */
 interface EvalCase {
     readonly id: string;
@@ -104,7 +104,6 @@ interface EvalCase {
     readonly question: string;
     readonly input_segments: readonly JsonObject[];
     readonly output_segments: readonly JsonObject[];
-    readonly system_message?: string;
     readonly reference_answer: string;
     readonly guideline_paths: readonly string[];
     readonly guideline_patterns?: readonly string[];
@@ -115,7 +114,7 @@ interface EvalCase {
     readonly evaluators?: readonly EvaluatorConfig[];
 }
 /**
- * Evaluator scorecard for a single test case run.
+ * Evaluator scorecard for a single eval case run.
  */
 interface EvaluationResult {
     readonly eval_id: string;
@@ -159,6 +158,7 @@ declare function isGuidelineFile(filePath: string, patterns?: readonly string[])
 declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
 type LoadOptions = {
     readonly verbose?: boolean;
+    readonly evalId?: string;
 };
 /**
  * Load eval cases from a AgentV YAML specification file.

package/dist/index.d.ts CHANGED Viewed

@@ -95,7 +95,7 @@ type LlmJudgeEvaluatorConfig = {
 };
 type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
 /**
- * Test case definition sourced from AgentV specs.
+ * Eval case definition sourced from AgentV specs.
  */
 interface EvalCase {
     readonly id: string;
@@ -104,7 +104,6 @@ interface EvalCase {
     readonly question: string;
     readonly input_segments: readonly JsonObject[];
     readonly output_segments: readonly JsonObject[];
-    readonly system_message?: string;
     readonly reference_answer: string;
     readonly guideline_paths: readonly string[];
     readonly guideline_patterns?: readonly string[];
@@ -115,7 +114,7 @@ interface EvalCase {
     readonly evaluators?: readonly EvaluatorConfig[];
 }
 /**
- * Evaluator scorecard for a single test case run.
+ * Evaluator scorecard for a single eval case run.
  */
 interface EvaluationResult {
     readonly eval_id: string;
@@ -159,6 +158,7 @@ declare function isGuidelineFile(filePath: string, patterns?: readonly string[])
 declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
 type LoadOptions = {
     readonly verbose?: boolean;
+    readonly evalId?: string;
 };
 /**
  * Load eval cases from a AgentV YAML specification file.

package/dist/index.js CHANGED Viewed

@@ -4,9 +4,10 @@ import {
   buildSearchRoots,
   fileExists,
   findGitRoot,
+  isAgentProvider,
   readTextFile,
   resolveFileReference
-} from "./chunk-L7I5UTJU.js";
+} from "./chunk-UQLHF3T7.js";
 // src/evaluation/types.ts
 var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -217,6 +218,7 @@ async function processMessages(options) {
 }
 async function loadEvalCases(evalFilePath, repoRoot, options) {
   const verbose = options?.verbose ?? false;
+  const evalIdFilter = options?.evalId;
   const absoluteTestPath = path.resolve(evalFilePath);
   if (!await fileExists2(absoluteTestPath)) {
     throw new Error(`Test file not found: ${evalFilePath}`);
@@ -248,62 +250,39 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
   const results = [];
   for (const rawEvalcase of rawTestcases) {
     if (!isJsonObject(rawEvalcase)) {
-      logWarning("Skipping invalid test case entry (expected object)");
+      logWarning("Skipping invalid eval case entry (expected object)");
       continue;
     }
     const evalcase = rawEvalcase;
     const id = asString(evalcase.id);
+    if (evalIdFilter && id !== evalIdFilter) {
+      continue;
+    }
     const conversationId = asString(evalcase.conversation_id);
     const outcome = asString(evalcase.outcome);
     const inputMessagesValue = evalcase.input_messages;
     const expectedMessagesValue = evalcase.expected_messages;
     if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
-      logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
+      logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
       continue;
     }
     if (!Array.isArray(expectedMessagesValue)) {
-      logWarning(`Test case '${id}' missing expected_messages array`);
+      logWarning(`Eval case '${id}' missing expected_messages array`);
       continue;
     }
     const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
     const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
-    const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
-    const userMessages = inputMessages.filter((message) => message.role === "user");
-    const systemMessages = inputMessages.filter((message) => message.role === "system");
-    if (assistantMessages.length === 0) {
-      logWarning(`No assistant message found for test case: ${id}`);
+    if (expectedMessages.length === 0) {
+      logWarning(`No expected message found for eval case: ${id}`);
       continue;
     }
-    if (assistantMessages.length > 1) {
-      logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
-    }
-    if (systemMessages.length > 1) {
-      logWarning(`Multiple system messages found for test case: ${id}, using first`);
-    }
-    let systemMessageContent;
-    if (systemMessages.length > 0) {
-      const content = systemMessages[0]?.content;
-      if (typeof content === "string") {
-        systemMessageContent = content;
-      } else if (Array.isArray(content)) {
-        const textParts = [];
-        for (const segment of content) {
-          if (isJsonObject(segment)) {
-            const value = segment.value;
-            if (typeof value === "string") {
-              textParts.push(value);
-            }
-          }
-        }
-        if (textParts.length > 0) {
-          systemMessageContent = textParts.join("\n\n");
-        }
-      }
+    if (expectedMessages.length > 1) {
+      logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
     }
     const guidelinePaths = [];
     const inputTextParts = [];
     const inputSegments = await processMessages({
-      messages: userMessages,
+      messages: inputMessages,
       searchRoots,
       repoRootPath,
       guidelinePatterns,
@@ -313,7 +292,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       verbose
     });
     const outputSegments = await processMessages({
-      messages: assistantMessages,
+      messages: expectedMessages,
       searchRoots,
       repoRootPath,
       guidelinePatterns,
@@ -321,10 +300,10 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       verbose
     });
     const codeSnippets = extractCodeBlocks(inputSegments);
-    const assistantContent = assistantMessages[0]?.content;
-    const referenceAnswer = await resolveAssistantContent(assistantContent, searchRoots, verbose);
+    const expectedContent = expectedMessages[0]?.content;
+    const referenceAnswer = await resolveAssistantContent(expectedContent, searchRoots, verbose);
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
-    const testCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
+    const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
     const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
     const userFilePaths = [];
     for (const segment of inputSegments) {
@@ -343,19 +322,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       question,
       input_segments: inputSegments,
       output_segments: outputSegments,
-      system_message: systemMessageContent,
       reference_answer: referenceAnswer,
       guideline_paths: guidelinePaths.map((guidelinePath) => path.resolve(guidelinePath)),
       guideline_patterns: guidelinePatterns,
       file_paths: allFilePaths,
       code_snippets: codeSnippets,
       expected_outcome: outcome,
-      evaluator: testCaseEvaluatorKind,
+      evaluator: evalCaseEvaluatorKind,
       evaluators
     };
     if (verbose) {
       console.log(`
-[Test Case: ${id}]`);
+[Eval Case: ${id}]`);
       if (testCase.guideline_paths.length > 0) {
         console.log(`  Guidelines used: ${testCase.guideline_paths.length}`);
         for (const guidelinePath of testCase.guideline_paths) {
@@ -414,7 +392,7 @@ ${body}`);
   }
   const question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
   const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
-  return { question, guidelines, systemMessage: testCase.system_message };
+  return { question, guidelines };
 }
 async function fileExists2(absolutePath) {
   try {
@@ -1010,7 +988,7 @@ function formatTimeoutSuffix(timeoutMs) {
 import { exec as execCallback, spawn } from "node:child_process";
 import { randomUUID } from "node:crypto";
 import { constants as constants2, createWriteStream } from "node:fs";
-import { access as access2, copyFile, mkdtemp, mkdir, rm, writeFile } from "node:fs/promises";
+import { access as access2, mkdtemp, mkdir, rm, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import path4 from "node:path";
 import { promisify as promisify2 } from "node:util";
@@ -1173,7 +1151,6 @@ function pathToFileUri(filePath) {
 var execAsync2 = promisify2(execCallback);
 var WORKSPACE_PREFIX = "agentv-codex-";
 var PROMPT_FILENAME = "prompt.md";
-var FILES_DIR = "files";
 var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
 var CodexProvider = class {
   id;
@@ -1196,21 +1173,10 @@ var CodexProvider = class {
     }
     await this.ensureEnvironmentReady();
     const inputFiles = normalizeInputFiles2(request.inputFiles);
-    const originalGuidelines = new Set(
-      collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => path4.resolve(file))
-    );
     const workspaceRoot = await this.createWorkspace();
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
-      const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
-        inputFiles,
-        workspaceRoot,
-        originalGuidelines
-      );
-      const promptContent = buildPromptDocument(request, mirroredInputFiles, {
-        guidelinePatterns: request.guideline_patterns,
-        guidelineOverrides: guidelineMirrors
-      });
+      const promptContent = buildPromptDocument(request, inputFiles);
       const promptFile = path4.join(workspaceRoot, PROMPT_FILENAME);
       await writeFile(promptFile, promptContent, "utf8");
       const args = this.buildCodexArgs();
@@ -1239,7 +1205,7 @@ var CodexProvider = class {
           executable: this.resolvedExecutable ?? this.config.executable,
           promptFile,
           workspace: workspaceRoot,
-          inputFiles: mirroredInputFiles,
+          inputFiles,
           logFile: logger?.filePath
         }
       };
@@ -1294,37 +1260,6 @@ var CodexProvider = class {
       throw error;
     }
   }
-  async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
-    if (!inputFiles || inputFiles.length === 0) {
-      return {
-        mirroredInputFiles: void 0,
-        guidelineMirrors: /* @__PURE__ */ new Set()
-      };
-    }
-    const filesRoot = path4.join(workspaceRoot, FILES_DIR);
-    await mkdir(filesRoot, { recursive: true });
-    const mirrored = [];
-    const guidelineMirrors = /* @__PURE__ */ new Set();
-    const nameCounts = /* @__PURE__ */ new Map();
-    for (const inputFile of inputFiles) {
-      const absoluteSource = path4.resolve(inputFile);
-      const baseName = path4.basename(absoluteSource);
-      const count = nameCounts.get(baseName) ?? 0;
-      nameCounts.set(baseName, count + 1);
-      const finalName = count === 0 ? baseName : `${baseName}.${count}`;
-      const destination = path4.join(filesRoot, finalName);
-      await copyFile(absoluteSource, destination);
-      const resolvedDestination = path4.resolve(destination);
-      mirrored.push(resolvedDestination);
-      if (guidelineOriginals.has(absoluteSource)) {
-        guidelineMirrors.add(resolvedDestination);
-      }
-    }
-    return {
-      mirroredInputFiles: mirrored,
-      guidelineMirrors
-    };
-  }
   async createWorkspace() {
     return await mkdtemp(path4.join(tmpdir(), WORKSPACE_PREFIX));
   }
@@ -2295,23 +2230,25 @@ function resolveOptionalString(source, env, description, options) {
   if (trimmed.length === 0) {
     return void 0;
   }
-  const envValue = env[trimmed];
-  if (envValue !== void 0) {
-    if (envValue.trim().length === 0) {
-      throw new Error(`Environment variable '${trimmed}' for ${description} is empty`);
+  const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
+  if (envVarMatch) {
+    const varName = envVarMatch[1];
+    const envValue = env[varName];
+    if (envValue !== void 0) {
+      if (envValue.trim().length === 0) {
+        throw new Error(`Environment variable '${varName}' for ${description} is empty`);
+      }
+      return envValue;
     }
-    return envValue;
-  }
-  const allowLiteral = options?.allowLiteral ?? false;
-  const optionalEnv = options?.optionalEnv ?? false;
-  const looksLikeEnv = isLikelyEnvReference(trimmed);
-  if (looksLikeEnv) {
+    const optionalEnv = options?.optionalEnv ?? false;
     if (optionalEnv) {
       return void 0;
     }
-    if (!allowLiteral) {
-      throw new Error(`Environment variable '${trimmed}' required for ${description} is not set`);
-    }
+    throw new Error(`Environment variable '${varName}' required for ${description} is not set`);
+  }
+  const allowLiteral = options?.allowLiteral ?? false;
+  if (!allowLiteral) {
+    throw new Error(`${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`);
   }
   return trimmed;
 }
@@ -2358,9 +2295,6 @@ function resolveOptionalBoolean(source) {
   }
   throw new Error("expected boolean value");
 }
-function isLikelyEnvReference(value) {
-  return /^[A-Z0-9_]+$/.test(value);
-}
 function resolveOptionalStringArray(source, env, description) {
   if (source === void 0 || source === null) {
     return void 0;
@@ -2381,21 +2315,25 @@ function resolveOptionalStringArray(source, env, description) {
     if (trimmed.length === 0) {
       throw new Error(`${description}[${i}] cannot be empty`);
     }
-    const envValue = env[trimmed];
-    if (envValue !== void 0) {
-      if (envValue.trim().length === 0) {
-        throw new Error(`Environment variable '${trimmed}' for ${description}[${i}] is empty`);
+    const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
+    if (envVarMatch) {
+      const varName = envVarMatch[1];
+      const envValue = env[varName];
+      if (envValue !== void 0) {
+        if (envValue.trim().length === 0) {
+          throw new Error(`Environment variable '${varName}' for ${description}[${i}] is empty`);
+        }
+        resolved.push(envValue);
+        continue;
       }
-      resolved.push(envValue);
-    } else {
-      resolved.push(trimmed);
+      throw new Error(`Environment variable '${varName}' for ${description}[${i}] is not set`);
     }
+    resolved.push(trimmed);
   }
   return resolved.length > 0 ? resolved : void 0;
 }
 // src/evaluation/providers/vscode.ts
-import { readFile as readFile2 } from "node:fs/promises";
 import path5 from "node:path";
 import { dispatchAgentSession, dispatchBatchAgent, getSubagentRoot, provisionSubagents } from "subagent";
 var VSCodeProvider = class {
@@ -2439,7 +2377,7 @@ var VSCodeProvider = class {
         }
       };
     }
-    const responseText = await readFile2(session.responseFile, "utf8");
+    const responseText = await readTextFile(session.responseFile);
     return {
       text: responseText,
       raw: {
@@ -2493,7 +2431,7 @@ var VSCodeProvider = class {
     }
     const responses = [];
     for (const [index, responseFile] of session.responseFiles.entries()) {
-      const responseText = await readFile2(responseFile, "utf8");
+      const responseText = await readTextFile(responseFile);
       responses.push({
         text: responseText,
         raw: {
@@ -2643,7 +2581,7 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
 // src/evaluation/providers/targets-file.ts
 import { constants as constants3 } from "node:fs";
-import { access as access3, readFile as readFile3 } from "node:fs/promises";
+import { access as access3, readFile as readFile2 } from "node:fs/promises";
 import path6 from "node:path";
 import { parse as parse2 } from "yaml";
 function isRecord(value) {
@@ -2711,7 +2649,7 @@ async function readTargetDefinitions(filePath) {
   if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
-  const raw = await readFile3(absolutePath, "utf8");
+  const raw = await readFile2(absolutePath, "utf8");
   const parsed = parse2(raw);
   if (!isRecord(parsed)) {
     throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
@@ -2957,7 +2895,6 @@ var CodeEvaluator = class {
         expected_outcome: context.evalCase.expected_outcome,
         reference_answer: context.evalCase.reference_answer,
         candidate_answer: context.candidate,
-        system_message: context.promptInputs.systemMessage ?? "",
         guideline_paths: context.evalCase.guideline_paths,
         input_files: context.evalCase.file_paths,
         input_segments: context.evalCase.input_segments
@@ -3199,7 +3136,7 @@ function validateConcurrency(concurrency) {
 // src/evaluation/orchestrator.ts
 async function runEvaluation(options) {
   const {
-    testFilePath,
+    testFilePath: evalFilePath,
     repoRoot,
     target,
     targets,
@@ -3218,11 +3155,11 @@ async function runEvaluation(options) {
     onProgress
   } = options;
   const load = loadEvalCases;
-  const evalCases = await load(testFilePath, repoRoot, { verbose });
+  const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
   const filteredEvalCases = filterEvalCases(evalCases, evalId);
   if (filteredEvalCases.length === 0) {
     if (evalId) {
-      throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
+      throw new Error(`Eval case with id '${evalId}' not found in ${evalFilePath}`);
     }
     return [];
   }
@@ -3376,7 +3313,8 @@ async function runEvaluation(options) {
         target.name,
         (now ?? (() => /* @__PURE__ */ new Date()))(),
         outcome.reason,
-        promptInputs
+        promptInputs,
+        primaryProvider
       );
       results.push(errorResult);
       if (onResult) {
@@ -3460,7 +3398,7 @@ async function runBatchEvaluation(options) {
         agentTimeoutMs
       });
     } catch (error) {
-      const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
+      const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
       results.push(errorResult);
       if (onResult) {
         await onResult(errorResult);
@@ -3537,7 +3475,7 @@ async function runEvalCase(options) {
         attempt += 1;
         continue;
       }
-      return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
+      return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
     }
   }
   if (!providerResponse) {
@@ -3546,7 +3484,8 @@ async function runEvalCase(options) {
       target.name,
       nowFn(),
       lastError ?? new Error("Provider did not return a response"),
-      promptInputs
+      promptInputs,
+      provider
     );
   }
   if (cacheKey && cache && !cachedResponse) {
@@ -3566,7 +3505,7 @@ async function runEvalCase(options) {
       agentTimeoutMs
     });
   } catch (error) {
-    return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
+    return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
   }
 }
 async function evaluateCandidate(options) {
@@ -3598,9 +3537,8 @@ async function evaluateCandidate(options) {
   const completedAt = nowFn();
   const rawRequest = {
     question: promptInputs.question,
-    guidelines: promptInputs.guidelines,
-    guideline_paths: evalCase.guideline_paths,
-    system_message: promptInputs.systemMessage ?? ""
+    ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
+    guideline_paths: evalCase.guideline_paths
   };
   return {
     eval_id: evalCase.id,
@@ -3858,13 +3796,12 @@ async function invokeProvider(provider, options) {
     }
   }
 }
-function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
+function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider) {
   const message = error instanceof Error ? error.message : String(error);
   const rawRequest = {
     question: promptInputs.question,
-    guidelines: promptInputs.guidelines,
+    ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
     guideline_paths: evalCase.guideline_paths,
-    system_message: promptInputs.systemMessage ?? "",
     error: message
   };
   return {