npm - @agentv/core - Versions diffs - 0.2.3 → 0.2.6 - Mend

@agentv/core 0.2.3 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{chunk-5REK5RSI.js → chunk-QVS4OL44.js} +30 -2
package/dist/chunk-QVS4OL44.js.map +1 -0
package/dist/evaluation/validation/index.cjs +30 -4
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +7 -5
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +73 -32
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +9 -9
package/dist/index.d.ts +9 -9
package/dist/index.js +70 -33
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-5REK5RSI.js.map +0 -1

package/dist/index.d.cts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { AxChatRequest } from '@ax-llm/ax';
 /**
- * JSON primitive values appearing in BbEval payloads.
+ * JSON primitive values appearing in AgentV payloads.
  */
 type JsonPrimitive = string | number | boolean | null;
 /**
@@ -64,11 +64,11 @@ type TestMessage = SystemTestMessage | UserTestMessage | AssistantTestMessage |
  */
 declare function isTestMessageRole(value: unknown): value is TestMessageRole;
 /**
- * Guard matching BbEval JSON objects.
+ * Guard matching AgentV JSON objects.
  */
 declare function isJsonObject(value: unknown): value is JsonObject;
 /**
- * Guard matching BbEval JSON values.
+ * Guard matching AgentV JSON values.
  */
 declare function isJsonValue(value: unknown): value is JsonValue;
 /**
@@ -89,7 +89,7 @@ type GraderKind = (typeof GRADER_KIND_VALUES)[number];
  */
 declare function isGraderKind(value: unknown): value is GraderKind;
 /**
- * Test case definition sourced from BbEval specs.
+ * Test case definition sourced from AgentV specs.
  */
 interface TestCase {
     readonly id: string;
@@ -106,7 +106,7 @@ interface TestCase {
  * Evaluator scorecard for a single test case run.
  */
 interface EvaluationResult {
-    readonly test_id: string;
+    readonly eval_id: string;
     readonly conversation_id?: string;
     readonly score: number;
     readonly hits: readonly string[];
@@ -130,14 +130,14 @@ declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
  */
 declare function isGuidelineFile(filePath: string): boolean;
 /**
- * Extract fenced code blocks from BbEval user segments.
+ * Extract fenced code blocks from AgentV user segments.
  */
 declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
 type LoadOptions = {
     readonly verbose?: boolean;
 };
 /**
- * Load test cases from a BbEval YAML specification file.
+ * Load eval cases from a AgentV YAML specification file.
  */
 declare function loadTestCases(testFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly TestCase[]>;
 /**
@@ -369,7 +369,7 @@ interface RunTestCaseOptions {
 }
 interface ProgressEvent {
     readonly workerId: number;
-    readonly testId: string;
+    readonly evalId: string;
     readonly status: "pending" | "running" | "completed" | "failed";
     readonly startedAt?: number;
     readonly completedAt?: number;
@@ -389,7 +389,7 @@ interface RunEvaluationOptions {
     readonly cache?: EvaluationCache;
     readonly useCache?: boolean;
     readonly now?: () => Date;
-    readonly testId?: string;
+    readonly evalId?: string;
     readonly verbose?: boolean;
     readonly maxConcurrency?: number;
     readonly onResult?: (result: EvaluationResult) => MaybePromise<void>;

package/dist/index.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { AxChatRequest } from '@ax-llm/ax';
 /**
- * JSON primitive values appearing in BbEval payloads.
+ * JSON primitive values appearing in AgentV payloads.
  */
 type JsonPrimitive = string | number | boolean | null;
 /**
@@ -64,11 +64,11 @@ type TestMessage = SystemTestMessage | UserTestMessage | AssistantTestMessage |
  */
 declare function isTestMessageRole(value: unknown): value is TestMessageRole;
 /**
- * Guard matching BbEval JSON objects.
+ * Guard matching AgentV JSON objects.
  */
 declare function isJsonObject(value: unknown): value is JsonObject;
 /**
- * Guard matching BbEval JSON values.
+ * Guard matching AgentV JSON values.
  */
 declare function isJsonValue(value: unknown): value is JsonValue;
 /**
@@ -89,7 +89,7 @@ type GraderKind = (typeof GRADER_KIND_VALUES)[number];
  */
 declare function isGraderKind(value: unknown): value is GraderKind;
 /**
- * Test case definition sourced from BbEval specs.
+ * Test case definition sourced from AgentV specs.
  */
 interface TestCase {
     readonly id: string;
@@ -106,7 +106,7 @@ interface TestCase {
  * Evaluator scorecard for a single test case run.
  */
 interface EvaluationResult {
-    readonly test_id: string;
+    readonly eval_id: string;
     readonly conversation_id?: string;
     readonly score: number;
     readonly hits: readonly string[];
@@ -130,14 +130,14 @@ declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
  */
 declare function isGuidelineFile(filePath: string): boolean;
 /**
- * Extract fenced code blocks from BbEval user segments.
+ * Extract fenced code blocks from AgentV user segments.
  */
 declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
 type LoadOptions = {
     readonly verbose?: boolean;
 };
 /**
- * Load test cases from a BbEval YAML specification file.
+ * Load eval cases from a AgentV YAML specification file.
  */
 declare function loadTestCases(testFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly TestCase[]>;
 /**
@@ -369,7 +369,7 @@ interface RunTestCaseOptions {
 }
 interface ProgressEvent {
     readonly workerId: number;
-    readonly testId: string;
+    readonly evalId: string;
     readonly status: "pending" | "running" | "completed" | "failed";
     readonly startedAt?: number;
     readonly completedAt?: number;
@@ -389,7 +389,7 @@ interface RunEvaluationOptions {
     readonly cache?: EvaluationCache;
     readonly useCache?: boolean;
     readonly now?: () => Date;
-    readonly testId?: string;
+    readonly evalId?: string;
     readonly verbose?: boolean;
     readonly maxConcurrency?: number;
     readonly onResult?: (result: EvaluationResult) => MaybePromise<void>;

package/dist/index.js CHANGED Viewed

@@ -1,7 +1,8 @@
 import {
+  TARGETS_SCHEMA_V2,
   buildSearchRoots,
   resolveFileReference
-} from "./chunk-5REK5RSI.js";
+} from "./chunk-QVS4OL44.js";
 // src/evaluation/types.ts
 var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -205,7 +206,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     }
     const codeSnippets = extractCodeBlocks(userSegments);
     const assistantContent = assistantMessages[0]?.content;
-    const expectedAssistantRaw = normalizeAssistantContent(assistantContent);
+    const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
     const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
     const testCaseGrader = coerceGrader(testcase.grader) ?? globalGrader;
     const testCase = {
@@ -321,7 +322,7 @@ function cloneJsonValue(value) {
   }
   return cloneJsonObject(value);
 }
-function normalizeAssistantContent(content) {
+async function resolveAssistantContent(content, searchRoots, verbose) {
   if (typeof content === "string") {
     return content;
   }
@@ -334,12 +335,42 @@ function normalizeAssistantContent(content) {
       parts.push(entry);
       continue;
     }
-    const textValue = asString(entry["text"]);
+    if (!isJsonObject(entry)) {
+      continue;
+    }
+    const segmentType = asString(entry.type);
+    if (segmentType === "file") {
+      const rawValue = asString(entry.value);
+      if (!rawValue) {
+        continue;
+      }
+      const { displayPath, resolvedPath, attempted } = await resolveFileReference(
+        rawValue,
+        searchRoots
+      );
+      if (!resolvedPath) {
+        const attempts = attempted.length ? ["  Tried:", ...attempted.map((candidate) => `    ${candidate}`)] : void 0;
+        logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
+        continue;
+      }
+      try {
+        const fileContent = (await readFile(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
+        parts.push(fileContent);
+        if (verbose) {
+          console.log(`  [Expected Assistant File] Found: ${displayPath}`);
+          console.log(`    Resolved to: ${resolvedPath}`);
+        }
+      } catch (error) {
+        logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
+      }
+      continue;
+    }
+    const textValue = asString(entry.text);
     if (typeof textValue === "string") {
       parts.push(textValue);
       continue;
     }
-    const valueValue = asString(entry["value"]);
+    const valueValue = asString(entry.value);
     if (typeof valueValue === "string") {
       parts.push(valueValue);
       continue;
@@ -844,7 +875,7 @@ import { mkdtemp, readFile as readFile2, rm, writeFile } from "node:fs/promises"
 import { tmpdir } from "node:os";
 import path2 from "node:path";
 import { dispatchAgentSession, getSubagentRoot, provisionSubagents } from "subagent";
-var PROMPT_FILE_PREFIX = "bbeval-vscode-";
+var PROMPT_FILE_PREFIX = "agentv-vscode-";
 var VSCodeProvider = class {
   id;
   kind;
@@ -911,7 +942,7 @@ function buildPromptDocument(request, attachments) {
   if (instructionFiles.length > 0) {
     parts.push(buildMandatoryPrereadBlock(instructionFiles));
   }
-  parts.push(`# BbEval Request`);
+  parts.push(`# AgentV Request`);
   if (request.testCaseId) {
     parts.push(`- Test Case: ${request.testCaseId}`);
   }
@@ -1056,18 +1087,24 @@ import { parse as parse2 } from "yaml";
 function isRecord(value) {
   return typeof value === "object" && value !== null && !Array.isArray(value);
 }
-function checkVersion(parsed, absolutePath) {
-  const version = typeof parsed.version === "number" ? parsed.version : typeof parsed.version === "string" ? parseFloat(parsed.version) : void 0;
-  if (version === void 0) {
+function checkSchema(parsed, absolutePath) {
+  const schema = parsed.$schema;
+  if (schema === void 0) {
+    throw new Error(
+      `Missing $schema field in targets.yaml at ${absolutePath}.
+Please add '$schema: ${TARGETS_SCHEMA_V2}' at the top of the file.`
+    );
+  }
+  if (typeof schema !== "string") {
     throw new Error(
-      `Missing version field in targets.yaml at ${absolutePath}.
-Please add 'version: 2.0' at the top of the file.`
+      `Invalid $schema field in targets.yaml at ${absolutePath}.
+Expected a string value '${TARGETS_SCHEMA_V2}'.`
     );
   }
-  if (version < 2) {
+  if (schema !== TARGETS_SCHEMA_V2) {
     throw new Error(
-      `Outdated targets.yaml format (version ${version}) at ${absolutePath}.
-Please update to version 2.0 format with 'targets' array.`
+      `Invalid $schema '${schema}' in targets.yaml at ${absolutePath}.
+Expected '${TARGETS_SCHEMA_V2}'.`
     );
   }
 }
@@ -1115,9 +1152,9 @@ async function readTargetDefinitions(filePath) {
   const raw = await readFile3(absolutePath, "utf8");
   const parsed = parse2(raw);
   if (!isRecord(parsed)) {
-    throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with 'version' and 'targets' fields`);
+    throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
   }
-  checkVersion(parsed, absolutePath);
+  checkSchema(parsed, absolutePath);
   const targets = extractTargetsArray(parsed, absolutePath);
   const definitions = targets.map((entry, index) => assertTargetDefinition(entry, index, absolutePath));
   return definitions;
@@ -1678,17 +1715,17 @@ async function runEvaluation(options) {
     cache,
     useCache,
     now,
-    testId,
+    evalId,
     verbose,
     onResult,
     onProgress
   } = options;
   const load = loadTestCases;
   const testCases = await load(testFilePath, repoRoot, { verbose });
-  const filteredTestCases = filterTestCases(testCases, testId);
+  const filteredTestCases = filterTestCases(testCases, evalId);
   if (filteredTestCases.length === 0) {
-    if (testId) {
-      throw new Error(`Test case with id '${testId}' not found in ${testFilePath}`);
+    if (evalId) {
+      throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
     }
     return [];
   }
@@ -1736,7 +1773,7 @@ async function runEvaluation(options) {
     for (let i = 0; i < filteredTestCases.length; i++) {
       await onProgress({
         workerId: i + 1,
-        testId: filteredTestCases[i].id,
+        evalId: filteredTestCases[i].id,
         status: "pending"
       });
     }
@@ -1744,15 +1781,15 @@ async function runEvaluation(options) {
   const workers = options.maxConcurrency ?? target.workers ?? 1;
   const limit = pLimit(workers);
   let nextWorkerId = 1;
-  const workerIdByTestId = /* @__PURE__ */ new Map();
+  const workerIdByEvalId = /* @__PURE__ */ new Map();
   const promises = filteredTestCases.map(
     (testCase) => limit(async () => {
       const workerId = nextWorkerId++;
-      workerIdByTestId.set(testCase.id, workerId);
+      workerIdByEvalId.set(testCase.id, workerId);
       if (onProgress) {
         await onProgress({
           workerId,
-          testId: testCase.id,
+          evalId: testCase.id,
           status: "running",
           startedAt: Date.now()
         });
@@ -1775,7 +1812,7 @@ async function runEvaluation(options) {
         if (onProgress) {
           await onProgress({
             workerId,
-            testId: testCase.id,
+            evalId: testCase.id,
             status: "completed",
             startedAt: 0,
             // Not used for completed status
@@ -1790,7 +1827,7 @@ async function runEvaluation(options) {
         if (onProgress) {
           await onProgress({
             workerId,
-            testId: testCase.id,
+            evalId: testCase.id,
             status: "failed",
             completedAt: Date.now(),
             error: error instanceof Error ? error.message : String(error)
@@ -1912,7 +1949,7 @@ async function runTestCase(options) {
     guideline_paths: testCase.guideline_paths
   };
   return {
-    test_id: testCase.id,
+    eval_id: testCase.id,
     conversation_id: testCase.conversation_id,
     score: grade.score,
     hits: grade.hits,
@@ -1927,11 +1964,11 @@ async function runTestCase(options) {
     grader_raw_request: grade.graderRawRequest
   };
 }
-function filterTestCases(testCases, testId) {
-  if (!testId) {
+function filterTestCases(testCases, evalId) {
+  if (!evalId) {
     return testCases;
   }
-  return testCases.filter((testCase) => testCase.id === testId);
+  return testCases.filter((testCase) => testCase.id === evalId);
 }
 function buildGraderRegistry(overrides, resolveJudgeProvider) {
   const heuristic = overrides?.heuristic ?? new HeuristicGrader();
@@ -1955,7 +1992,7 @@ async function dumpPrompt(directory, testCase, promptInputs) {
   const filePath = path4.resolve(directory, filename);
   await mkdir(path4.dirname(filePath), { recursive: true });
   const payload = {
-    test_id: testCase.id,
+    eval_id: testCase.id,
     request: promptInputs.request,
     guidelines: promptInputs.guidelines,
     guideline_paths: testCase.guideline_paths
@@ -2004,7 +2041,7 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
     error: message
   };
   return {
-    test_id: testCase.id,
+    eval_id: testCase.id,
     conversation_id: testCase.conversation_id,
     score: 0,
     hits: [],