npm - @agentv/core - Versions diffs - 0.10.0 → 0.10.1 - Mend

@agentv/core 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs CHANGED Viewed

@@ -54,6 +54,7 @@ __export(index_exports, {
   loadEvalCases: () => loadEvalCases,
   normalizeLineEndings: () => normalizeLineEndings,
   readTargetDefinitions: () => readTargetDefinitions,
+  readTestSuiteMetadata: () => readTestSuiteMetadata,
   readTextFile: () => readTextFile,
   resolveAndCreateProvider: () => resolveAndCreateProvider,
   resolveFileReference: () => resolveFileReference,
@@ -239,6 +240,33 @@ var ANSI_YELLOW = "\x1B[33m";
 var ANSI_RESET = "\x1B[0m";
 var SCHEMA_EVAL_V2 = "agentv-eval-v2";
 var SCHEMA_CONFIG_V2 = "agentv-config-v2";
+async function readTestSuiteMetadata(testFilePath) {
+  try {
+    const absolutePath = import_node_path2.default.resolve(testFilePath);
+    const content = await (0, import_promises2.readFile)(absolutePath, "utf8");
+    const parsed = (0, import_yaml.parse)(content);
+    if (!isJsonObject(parsed)) {
+      return {};
+    }
+    return { target: extractTargetFromSuite(parsed) };
+  } catch {
+    return {};
+  }
+}
+function extractTargetFromSuite(suite) {
+  const execution = suite.execution;
+  if (execution && typeof execution === "object" && !Array.isArray(execution)) {
+    const executionTarget = execution.target;
+    if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
+      return executionTarget.trim();
+    }
+  }
+  const targetValue = suite.target;
+  if (typeof targetValue === "string" && targetValue.trim().length > 0) {
+    return targetValue.trim();
+  }
+  return void 0;
+}
 async function loadConfig(evalFilePath, repoRoot) {
   const directories = buildDirectoryChain(evalFilePath, repoRoot);
   for (const directory of directories) {
@@ -415,6 +443,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
   }
   const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
+  const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
+  const globalTarget = asString(globalExecution?.target) ?? asString(suite.target);
   const results = [];
   for (const rawEvalcase of rawTestcases) {
     if (!isJsonObject(rawEvalcase)) {
@@ -469,7 +499,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
     const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
-    const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
+    const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
     const userFilePaths = [];
     for (const segment of inputSegments) {
       if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -836,9 +866,9 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
   }
   return parts.join(" ");
 }
-async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
+async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
   const execution = rawEvalCase.execution;
-  const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators;
+  const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
   if (candidateEvaluators === void 0) {
     return void 0;
   }
@@ -876,6 +906,8 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
             resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => `  Tried: ${attempt}`) : void 0
           );
         }
+      } else {
+        resolvedCwd = searchRoots[0];
       }
       evaluators.push({
         name,
@@ -904,8 +936,7 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
       name,
       type: "llm_judge",
       prompt,
-      promptPath,
-      model
+      promptPath
     });
   }
   return evaluators.length > 0 ? evaluators : void 0;
@@ -3222,10 +3253,7 @@ var LlmJudgeEvaluator = class {
       prompt = substituteVariables(systemPrompt, variables);
       systemPrompt = buildSystemPrompt(hasReferenceAnswer);
     }
-    const metadata = {
-      ...systemPrompt !== void 0 ? { systemPrompt } : {},
-      ...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
-    };
+    const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
     const response = await judgeProvider.invoke({
       question: prompt,
       metadata,
@@ -3245,8 +3273,7 @@ var LlmJudgeEvaluator = class {
       provider: judgeProvider.id,
       prompt,
       target: context.target.name,
-      ...systemPrompt !== void 0 ? { systemPrompt } : {},
-      ...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
+      ...systemPrompt !== void 0 && { systemPrompt }
     };
     return {
       score,
@@ -4240,8 +4267,7 @@ async function runLlmJudgeEvaluator(options) {
     now,
     judgeProvider,
     systemPrompt: customPrompt,
-    evaluator: config,
-    judgeModel: config.model
+    evaluator: config
   });
 }
 async function resolveCustomPrompt(config) {
@@ -4427,6 +4453,7 @@ function createAgentKernel() {
   loadEvalCases,
   normalizeLineEndings,
   readTargetDefinitions,
+  readTestSuiteMetadata,
   readTextFile,
   resolveAndCreateProvider,
   resolveFileReference,