npm - agentv - Versions diffs - 0.2.3 → 0.2.6 - Mend

agentv 0.2.3 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +3 -3
package/dist/{chunk-S3RN2GSO.js → chunk-32ZAVIQY.js} +104 -48
package/dist/chunk-32ZAVIQY.js.map +1 -0
package/dist/cli.js +1 -1
package/dist/index.js +1 -1
package/package.json +2 -2
package/dist/chunk-S3RN2GSO.js.map +0 -1

package/README.md CHANGED Viewed

@@ -139,7 +139,7 @@ agentv eval --target vscode_projectx "path/to/test.yaml"
 Run a specific test case with custom targets path:
 ```bash
-agentv eval --target vscode_projectx --targets "path/to/targets.yaml" --test-id "my-test-case" "path/to/test.yaml"
+agentv eval --target vscode_projectx --targets "path/to/targets.yaml" --eval-id "my-test-case" "path/to/test.yaml"
 ```
 ### Command Line Options
@@ -147,7 +147,7 @@ agentv eval --target vscode_projectx --targets "path/to/targets.yaml" --test-id
 - `test_file`: Path to test YAML file (required, positional argument)
 - `--target TARGET`: Execution target name from targets.yaml (overrides target specified in test file)
 - `--targets TARGETS`: Path to targets.yaml file (default: ./.agentv/targets.yaml)
-- `--test-id TEST_ID`: Run only the test case with this specific ID
+- `--eval-id EVAL_ID`: Run only the test case with this specific ID
 - `--out OUTPUT_FILE`: Output file path (default: results/{testname}_{timestamp}.jsonl)
 - `--format FORMAT`: Output format: 'jsonl' or 'yaml' (default: jsonl)
 - `--dry-run`: Run with mock model for testing
@@ -296,7 +296,7 @@ AgentV uses an AI-powered quality grader that:
 **JSONL format (default):**
 - One JSON object per line (newline-delimited)
-- Fields: `test_id`, `score`, `hits`, `misses`, `model_answer`, `expected_aspect_count`, `target`, `timestamp`, `reasoning`, `raw_request`, `grader_raw_request`
+- Fields: `eval_id`, `score`, `hits`, `misses`, `model_answer`, `expected_aspect_count`, `target`, `timestamp`, `reasoning`, `raw_request`, `grader_raw_request`
 **YAML format (with `--format yaml`):**

package/dist/{chunk-S3RN2GSO.js → chunk-32ZAVIQY.js} RENAMED Viewed

@@ -585,7 +585,7 @@ var require_utc = __commonJS({
 import { Command } from "commander";
 import { readFileSync as readFileSync2 } from "node:fs";
-// ../../packages/core/dist/chunk-5REK5RSI.js
+// ../../packages/core/dist/chunk-QVS4OL44.js
 import { constants } from "node:fs";
 import { access } from "node:fs/promises";
 import path from "node:path";
@@ -664,6 +664,29 @@ async function resolveFileReference(rawValue, searchRoots) {
   }
   return { displayPath, attempted };
 }
+var KNOWN_PROVIDERS = [
+  "azure",
+  "anthropic",
+  "gemini",
+  "mock",
+  "vscode",
+  "vscode-insiders"
+];
+var PROVIDER_ALIASES = [
+  "azure-openai",
+  // alias for "azure"
+  "google",
+  // alias for "gemini"
+  "google-gemini",
+  // alias for "gemini"
+  "openai",
+  // legacy/future support
+  "bedrock",
+  // legacy/future support
+  "vertex"
+  // legacy/future support
+];
+var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
 // ../../packages/core/dist/index.js
 import { constants as constants3 } from "node:fs";
@@ -10903,7 +10926,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     }
     const codeSnippets = extractCodeBlocks(userSegments);
     const assistantContent = assistantMessages[0]?.content;
-    const expectedAssistantRaw = normalizeAssistantContent(assistantContent);
+    const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
     const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
     const testCaseGrader = coerceGrader(testcase.grader) ?? globalGrader;
     const testCase = {
@@ -11019,7 +11042,7 @@ function cloneJsonValue(value) {
   }
   return cloneJsonObject(value);
 }
-function normalizeAssistantContent(content) {
+async function resolveAssistantContent(content, searchRoots, verbose) {
   if (typeof content === "string") {
     return content;
   }
@@ -11032,12 +11055,42 @@ function normalizeAssistantContent(content) {
       parts.push(entry);
       continue;
     }
-    const textValue = asString(entry["text"]);
+    if (!isJsonObject(entry)) {
+      continue;
+    }
+    const segmentType = asString(entry.type);
+    if (segmentType === "file") {
+      const rawValue = asString(entry.value);
+      if (!rawValue) {
+        continue;
+      }
+      const { displayPath, resolvedPath, attempted } = await resolveFileReference(
+        rawValue,
+        searchRoots
+      );
+      if (!resolvedPath) {
+        const attempts = attempted.length ? ["  Tried:", ...attempted.map((candidate) => `    ${candidate}`)] : void 0;
+        logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
+        continue;
+      }
+      try {
+        const fileContent = (await readFile2(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
+        parts.push(fileContent);
+        if (verbose) {
+          console.log(`  [Expected Assistant File] Found: ${displayPath}`);
+          console.log(`    Resolved to: ${resolvedPath}`);
+        }
+      } catch (error) {
+        logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
+      }
+      continue;
+    }
+    const textValue = asString(entry.text);
     if (typeof textValue === "string") {
       parts.push(textValue);
       continue;
     }
-    const valueValue = asString(entry["value"]);
+    const valueValue = asString(entry.value);
     if (typeof valueValue === "string") {
       parts.push(valueValue);
       continue;
@@ -11528,7 +11581,7 @@ function resolveOptionalBoolean(source2) {
 function isLikelyEnvReference(value) {
   return /^[A-Z0-9_]+$/.test(value);
 }
-var PROMPT_FILE_PREFIX = "bbeval-vscode-";
+var PROMPT_FILE_PREFIX = "agentv-vscode-";
 var VSCodeProvider = class {
   id;
   kind;
@@ -11595,7 +11648,7 @@ function buildPromptDocument(request, attachments) {
   if (instructionFiles.length > 0) {
     parts.push(buildMandatoryPrereadBlock(instructionFiles));
   }
-  parts.push(`# BbEval Request`);
+  parts.push(`# AgentV Request`);
   if (request.testCaseId) {
     parts.push(`- Test Case: ${request.testCaseId}`);
   }
@@ -11734,18 +11787,24 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
 function isRecord(value) {
   return typeof value === "object" && value !== null && !Array.isArray(value);
 }
-function checkVersion(parsed, absolutePath) {
-  const version = typeof parsed.version === "number" ? parsed.version : typeof parsed.version === "string" ? parseFloat(parsed.version) : void 0;
-  if (version === void 0) {
+function checkSchema(parsed, absolutePath) {
+  const schema = parsed.$schema;
+  if (schema === void 0) {
+    throw new Error(
+      `Missing $schema field in targets.yaml at ${absolutePath}.
+Please add '$schema: ${TARGETS_SCHEMA_V2}' at the top of the file.`
+    );
+  }
+  if (typeof schema !== "string") {
     throw new Error(
-      `Missing version field in targets.yaml at ${absolutePath}.
-Please add 'version: 2.0' at the top of the file.`
+      `Invalid $schema field in targets.yaml at ${absolutePath}.
+Expected a string value '${TARGETS_SCHEMA_V2}'.`
     );
   }
-  if (version < 2) {
+  if (schema !== TARGETS_SCHEMA_V2) {
     throw new Error(
-      `Outdated targets.yaml format (version ${version}) at ${absolutePath}.
-Please update to version 2.0 format with 'targets' array.`
+      `Invalid $schema '${schema}' in targets.yaml at ${absolutePath}.
+Expected '${TARGETS_SCHEMA_V2}'.`
     );
   }
 }
@@ -11793,9 +11852,9 @@ async function readTargetDefinitions(filePath) {
   const raw = await readFile3(absolutePath, "utf8");
   const parsed = parse22(raw);
   if (!isRecord(parsed)) {
-    throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with 'version' and 'targets' fields`);
+    throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
   }
-  checkVersion(parsed, absolutePath);
+  checkSchema(parsed, absolutePath);
   const targets = extractTargetsArray(parsed, absolutePath);
   const definitions = targets.map((entry, index) => assertTargetDefinition(entry, index, absolutePath));
   return definitions;
@@ -12330,17 +12389,17 @@ async function runEvaluation(options) {
     cache,
     useCache,
     now,
-    testId,
+    evalId,
     verbose,
     onResult,
     onProgress
   } = options;
   const load = loadTestCases;
   const testCases = await load(testFilePath, repoRoot, { verbose });
-  const filteredTestCases = filterTestCases(testCases, testId);
+  const filteredTestCases = filterTestCases(testCases, evalId);
   if (filteredTestCases.length === 0) {
-    if (testId) {
-      throw new Error(`Test case with id '${testId}' not found in ${testFilePath}`);
+    if (evalId) {
+      throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
     }
     return [];
   }
@@ -12388,7 +12447,7 @@ async function runEvaluation(options) {
     for (let i6 = 0; i6 < filteredTestCases.length; i6++) {
       await onProgress({
         workerId: i6 + 1,
-        testId: filteredTestCases[i6].id,
+        evalId: filteredTestCases[i6].id,
         status: "pending"
       });
     }
@@ -12396,15 +12455,15 @@ async function runEvaluation(options) {
   const workers = options.maxConcurrency ?? target.workers ?? 1;
   const limit = pLimit(workers);
   let nextWorkerId = 1;
-  const workerIdByTestId = /* @__PURE__ */ new Map();
+  const workerIdByEvalId = /* @__PURE__ */ new Map();
   const promises = filteredTestCases.map(
     (testCase) => limit(async () => {
       const workerId = nextWorkerId++;
-      workerIdByTestId.set(testCase.id, workerId);
+      workerIdByEvalId.set(testCase.id, workerId);
       if (onProgress) {
         await onProgress({
           workerId,
-          testId: testCase.id,
+          evalId: testCase.id,
           status: "running",
           startedAt: Date.now()
         });
@@ -12427,7 +12486,7 @@ async function runEvaluation(options) {
         if (onProgress) {
           await onProgress({
             workerId,
-            testId: testCase.id,
+            evalId: testCase.id,
             status: "completed",
             startedAt: 0,
             // Not used for completed status
@@ -12442,7 +12501,7 @@ async function runEvaluation(options) {
         if (onProgress) {
           await onProgress({
             workerId,
-            testId: testCase.id,
+            evalId: testCase.id,
             status: "failed",
             completedAt: Date.now(),
             error: error instanceof Error ? error.message : String(error)
@@ -12564,7 +12623,7 @@ async function runTestCase(options) {
     guideline_paths: testCase.guideline_paths
   };
   return {
-    test_id: testCase.id,
+    eval_id: testCase.id,
     conversation_id: testCase.conversation_id,
     score: grade.score,
     hits: grade.hits,
@@ -12579,11 +12638,11 @@ async function runTestCase(options) {
     grader_raw_request: grade.graderRawRequest
   };
 }
-function filterTestCases(testCases, testId) {
-  if (!testId) {
+function filterTestCases(testCases, evalId) {
+  if (!evalId) {
     return testCases;
   }
-  return testCases.filter((testCase) => testCase.id === testId);
+  return testCases.filter((testCase) => testCase.id === evalId);
 }
 function buildGraderRegistry(overrides, resolveJudgeProvider) {
   const heuristic = overrides?.heuristic ?? new HeuristicGrader();
@@ -12607,7 +12666,7 @@ async function dumpPrompt(directory, testCase, promptInputs) {
   const filePath = path42.resolve(directory, filename);
   await mkdir3(path42.dirname(filePath), { recursive: true });
   const payload = {
-    test_id: testCase.id,
+    eval_id: testCase.id,
     request: promptInputs.request,
     guidelines: promptInputs.guidelines,
     guideline_paths: testCase.guideline_paths
@@ -12656,7 +12715,7 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
     error: message
   };
   return {
-    test_id: testCase.id,
+    eval_id: testCase.id,
     conversation_id: testCase.conversation_id,
     score: 0,
     hits: [],
@@ -13127,9 +13186,9 @@ var ProgressDisplay = class {
       this.scheduleRender();
     } else {
       if (progress.status === "completed") {
-        console.log(`\u2713 Test ${progress.testId} completed`);
+        console.log(`\u2713 Test ${progress.evalId} completed`);
       } else if (progress.status === "failed") {
-        console.log(`\u2717 Test ${progress.testId} failed${progress.error ? `: ${progress.error}` : ""}`);
+        console.log(`\u2717 Test ${progress.evalId} failed${progress.error ? `: ${progress.error}` : ""}`);
       }
     }
   }
@@ -13162,7 +13221,7 @@ var ProgressDisplay = class {
     const statusIcon = this.getStatusIcon(worker.status);
     const elapsed = worker.startedAt ? this.formatElapsed(Date.now() - worker.startedAt) : "";
     const timeLabel = elapsed ? ` (${elapsed})` : "";
-    let testLabel = worker.testId;
+    let testLabel = worker.evalId;
     if (testLabel.length > 50) {
       testLabel = testLabel.substring(0, 47) + "...";
     }
@@ -13349,9 +13408,7 @@ var TARGET_FILE_CANDIDATES = [
   "targets.yaml",
   "targets.yml",
   path11.join(".agentv", "targets.yaml"),
-  path11.join(".agentv", "targets.yml"),
-  path11.join(".bbeval", "targets.yaml"),
-  path11.join(".bbeval", "targets.yml")
+  path11.join(".agentv", "targets.yml")
 ];
 async function fileExists4(filePath) {
   try {
@@ -13525,7 +13582,7 @@ function normalizeOptions(rawOptions) {
   return {
     target: normalizeString(rawOptions.target),
     targetsPath: normalizeString(rawOptions.targets),
-    testId: normalizeString(rawOptions.testId),
+    evalId: normalizeString(rawOptions.evalId),
     workers: workers > 0 ? workers : void 0,
     outPath: normalizeString(rawOptions.out),
     format,
@@ -13672,7 +13729,7 @@ async function runEvalCommand(input) {
       promptDumpDir,
       cache,
       useCache: options.cache,
-      testId: options.testId,
+      evalId: options.evalId,
       verbose: options.verbose,
       maxConcurrency: resolvedWorkers,
       onResult: async (result) => {
@@ -13685,7 +13742,7 @@ async function runEvalCommand(input) {
         }
         progressDisplay.updateWorker({
           workerId: event.workerId,
-          testId: event.testId,
+          evalId: event.evalId,
           status: event.status,
           startedAt: event.startedAt,
           completedAt: event.completedAt,
@@ -13735,7 +13792,7 @@ function parseInteger(value, fallback) {
   return parsed;
 }
 function registerEvalCommand(program) {
-  program.command("eval").description("Run BbEval test suites and report results").argument("<test-file>", "Path to the evaluation .test.yaml file").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--test-id <id>", "Run only the test case with this identifier").option(
+  program.command("eval").description("Run eval suites and report results").argument("<eval-file>", "Path to the evaluation .yaml file").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the test case with this identifier").option(
     "--workers <count>",
     "Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
     (value) => parseInteger(value, 1)
@@ -14008,7 +14065,6 @@ function validateMessages(messages, location, filePath, errors) {
     }
   }
 }
-var SCHEMA_TARGETS_V22 = "agentv-targets-v2";
 function isObject2(value) {
   return typeof value === "object" && value !== null && !Array.isArray(value);
 }
@@ -14046,8 +14102,8 @@ async function validateTargetsFile(filePath) {
     };
   }
   const schema = parsed["$schema"];
-  if (schema !== SCHEMA_TARGETS_V22) {
-    const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_TARGETS_V22}'` : `Missing required field '$schema'. Expected '${SCHEMA_TARGETS_V22}'`;
+  if (schema !== TARGETS_SCHEMA_V2) {
+    const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${TARGETS_SCHEMA_V2}'` : `Missing required field '$schema'. Expected '${TARGETS_SCHEMA_V2}'`;
     errors.push({
       severity: "error",
       filePath: absolutePath,
@@ -14070,7 +14126,7 @@ async function validateTargetsFile(filePath) {
       errors
     };
   }
-  const knownProviders = ["azure", "openai", "anthropic", "bedrock", "vertex"];
+  const knownProviders = [...KNOWN_PROVIDERS, ...PROVIDER_ALIASES];
   for (let i6 = 0; i6 < targets.length; i6++) {
     const target = targets[i6];
     const location = `targets[${i6}]`;
@@ -14539,4 +14595,4 @@ export {
   createProgram,
   runCli
 };
-//# sourceMappingURL=chunk-S3RN2GSO.js.map
+//# sourceMappingURL=chunk-32ZAVIQY.js.map