npm - @agentv/core - Versions diffs - 0.5.3 → 0.6.1 - Mend

@agentv/core 0.5.3 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/{chunk-NL7K4CAK.js → chunk-OW3SHBIJ.js} +7 -2
package/dist/chunk-OW3SHBIJ.js.map +1 -0
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +1 -1
package/dist/index.cjs +172 -5
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +28 -2
package/dist/index.d.ts +28 -2
package/dist/index.js +167 -6
package/dist/index.js.map +1 -1
package/package.json +2 -2
package/dist/chunk-NL7K4CAK.js.map +0 -1

package/dist/index.cjs CHANGED Viewed

@@ -36,6 +36,7 @@ __export(index_exports, {
   buildDirectoryChain: () => buildDirectoryChain,
   buildPromptInputs: () => buildPromptInputs,
   buildSearchRoots: () => buildSearchRoots,
+  consumeCodexLogEntries: () => consumeCodexLogEntries,
   createAgentKernel: () => createAgentKernel,
   createProvider: () => createProvider,
   ensureVSCodeSubagents: () => ensureVSCodeSubagents,
@@ -52,11 +53,13 @@ __export(index_exports, {
   listTargetNames: () => listTargetNames,
   loadEvalCases: () => loadEvalCases,
   readTargetDefinitions: () => readTargetDefinitions,
+  readTextFile: () => readTextFile,
   resolveAndCreateProvider: () => resolveAndCreateProvider,
   resolveFileReference: () => resolveFileReference,
   resolveTargetDefinition: () => resolveTargetDefinition,
   runEvalCase: () => runEvalCase,
-  runEvaluation: () => runEvaluation
+  runEvaluation: () => runEvaluation,
+  subscribeToCodexLogEntries: () => subscribeToCodexLogEntries
 });
 module.exports = __toCommonJS(index_exports);
@@ -130,6 +133,10 @@ async function fileExists(filePath) {
     return false;
   }
 }
+async function readTextFile(filePath) {
+  const content = await (0, import_promises.readFile)(filePath, "utf8");
+  return content.replace(/\r\n/g, "\n");
+}
 async function findGitRoot(startPath) {
   let currentDir = import_node_path.default.dirname(import_node_path.default.resolve(startPath));
   const root = import_node_path.default.parse(currentDir).root;
@@ -308,6 +315,9 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
   const suite = parsed;
+  const datasetNameFromSuite = asString(suite.dataset)?.trim();
+  const fallbackDataset = import_node_path2.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
+  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
   const schema = suite.$schema;
   if (schema !== SCHEMA_EVAL_V2) {
     const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
@@ -455,6 +465,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     ];
     const testCase = {
       id,
+      dataset: datasetName,
       conversation_id: conversationId,
       task: userTextPrompt,
       user_segments: userSegments,
@@ -835,6 +846,9 @@ var AzureProvider = class {
     );
     return mapResponse(ensureChatResponse(response));
   }
+  getAxAI() {
+    return this.ai;
+  }
 };
 var AnthropicProvider = class {
   constructor(targetName, config) {
@@ -869,6 +883,9 @@ var AnthropicProvider = class {
     );
     return mapResponse(ensureChatResponse(response));
   }
+  getAxAI() {
+    return this.ai;
+  }
 };
 var GeminiProvider = class {
   constructor(targetName, config) {
@@ -902,6 +919,9 @@ var GeminiProvider = class {
     );
     return mapResponse(ensureChatResponse(response));
   }
+  getAxAI() {
+    return this.ai;
+  }
 };
 // src/evaluation/providers/cli.ts
@@ -1222,6 +1242,59 @@ function pathToFileUri(filePath) {
   return `file://${normalizedPath}`;
 }
+// src/evaluation/providers/codex-log-tracker.ts
+var GLOBAL_LOGS_KEY = Symbol.for("agentv.codexLogs");
+var GLOBAL_SUBSCRIBERS_KEY = Symbol.for("agentv.codexLogSubscribers");
+function getCodexLogStore() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_LOGS_KEY];
+  if (existing) {
+    return existing;
+  }
+  const created = [];
+  globalObject[GLOBAL_LOGS_KEY] = created;
+  return created;
+}
+function getSubscriberStore() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_SUBSCRIBERS_KEY];
+  if (existing) {
+    return existing;
+  }
+  const created = /* @__PURE__ */ new Set();
+  globalObject[GLOBAL_SUBSCRIBERS_KEY] = created;
+  return created;
+}
+function notifySubscribers(entry) {
+  const subscribers = Array.from(getSubscriberStore());
+  for (const listener of subscribers) {
+    try {
+      listener(entry);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Codex log subscriber failed: ${message}`);
+    }
+  }
+}
+function recordCodexLogEntry(entry) {
+  getCodexLogStore().push(entry);
+  notifySubscribers(entry);
+}
+function consumeCodexLogEntries() {
+  const store = getCodexLogStore();
+  if (store.length === 0) {
+    return [];
+  }
+  return store.splice(0, store.length);
+}
+function subscribeToCodexLogEntries(listener) {
+  const store = getSubscriberStore();
+  store.add(listener);
+  return () => {
+    store.delete(listener);
+  };
+}
 // src/evaluation/providers/codex.ts
 var execAsync2 = (0, import_node_util2.promisify)(import_node_child_process2.exec);
 var WORKSPACE_PREFIX = "agentv-codex-";
@@ -1418,7 +1491,12 @@ var CodexProvider = class {
         attempt: request.attempt,
         format: this.config.logFormat ?? "summary"
       });
-      console.log(`Streaming Codex CLI output to ${filePath}`);
+      recordCodexLogEntry({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt
+      });
       return logger;
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
@@ -2808,7 +2886,30 @@ function resolveAndCreateProvider(definition, env = process.env) {
 }
 // src/evaluation/evaluators.ts
+var import_ax3 = require("@ax-llm/ax");
 var import_node_crypto2 = require("crypto");
+var LLM_JUDGE_SIGNATURE = (0, import_ax3.f)().input(
+  "evaluationContext",
+  import_ax3.f.object(
+    {
+      expectedOutcome: import_ax3.f.string("The expected outcome for the original task"),
+      request: import_ax3.f.string("The original task request"),
+      referenceAnswer: import_ax3.f.string("The gold standard reference answer"),
+      generatedAnswer: import_ax3.f.string("The answer to evaluate"),
+      guidelines: import_ax3.f.string("Additional evaluation guidelines or instructions").optional()
+    },
+    "Complete evaluation context for the judge"
+  )
+).output(
+  "evaluation",
+  import_ax3.f.object({
+    score: import_ax3.f.number("Score between 0.0 and 1.0").min(0).max(1),
+    hits: import_ax3.f.string("Brief specific achievement").array(),
+    misses: import_ax3.f.string("Brief specific failure or omission").array(),
+    reasoning: import_ax3.f.string("Concise explanation for the score").max(500)
+  })
+).build();
+var LLM_JUDGE = (0, import_ax3.ax)(LLM_JUDGE_SIGNATURE);
 var LlmJudgeEvaluator = class {
   kind = "llm_judge";
   resolveJudgeProvider;
@@ -2826,6 +2927,44 @@ var LlmJudgeEvaluator = class {
     if (!judgeProvider) {
       throw new Error("No judge provider available for LLM grading");
     }
+    if (providerSupportsAx(judgeProvider)) {
+      return this.evaluateWithAx(context, judgeProvider);
+    }
+    return this.evaluateWithPrompt(context, judgeProvider);
+  }
+  async evaluateWithAx(context, judgeProvider) {
+    const ai = judgeProvider.getAxAI();
+    const guidelines = context.promptInputs.guidelines?.trim();
+    const evaluationContext = {
+      expectedOutcome: context.evalCase.outcome.trim(),
+      request: context.evalCase.task.trim(),
+      referenceAnswer: context.evalCase.expected_assistant_raw.trim(),
+      generatedAnswer: context.candidate.trim(),
+      ...guidelines ? { guidelines } : {}
+    };
+    const options = this.buildJudgeForwardOptions(context);
+    const result = await LLM_JUDGE.forward(ai, { evaluationContext }, options);
+    const evaluation = result.evaluation;
+    const expectedAspectCount = Math.max(
+      evaluation.hits.length + evaluation.misses.length,
+      1
+    );
+    return {
+      score: evaluation.score,
+      hits: evaluation.hits,
+      misses: evaluation.misses,
+      expectedAspectCount,
+      reasoning: evaluation.reasoning,
+      evaluatorRawRequest: {
+        id: (0, import_node_crypto2.randomUUID)(),
+        provider: judgeProvider.id,
+        target: context.target.name,
+        method: "ax-structured-output",
+        signature: LLM_JUDGE_SIGNATURE.toString()
+      }
+    };
+  }
+  async evaluateWithPrompt(context, judgeProvider) {
     const prompt = buildQualityPrompt(context.evalCase, context.candidate);
     const systemPrompt = context.systemPrompt ?? this.customPrompt ?? QUALITY_SYSTEM_PROMPT;
     const metadata = {
@@ -2845,6 +2984,7 @@ var LlmJudgeEvaluator = class {
     const hits = Array.isArray(parsed.hits) ? parsed.hits.filter(isNonEmptyString).slice(0, 4) : [];
     const misses = Array.isArray(parsed.misses) ? parsed.misses.filter(isNonEmptyString).slice(0, 4) : [];
     const reasoning = parsed.reasoning ?? response.reasoning;
+    const expectedAspectCount = Math.max(hits.length + misses.length, 1);
     const evaluatorRawRequest = {
       id: (0, import_node_crypto2.randomUUID)(),
       provider: judgeProvider.id,
@@ -2857,12 +2997,34 @@ var LlmJudgeEvaluator = class {
       score,
       hits,
       misses,
-      expectedAspectCount: hits.length + misses.length || 1,
+      expectedAspectCount,
       reasoning,
       evaluatorRawRequest
     };
   }
+  buildJudgeForwardOptions(context) {
+    const modelConfig = this.buildJudgeModelConfig();
+    if (modelConfig === void 0 && context.judgeModel === void 0) {
+      return void 0;
+    }
+    return {
+      ...context.judgeModel ? { model: context.judgeModel } : {},
+      ...modelConfig ? { modelConfig } : {}
+    };
+  }
+  buildJudgeModelConfig() {
+    if (this.maxOutputTokens === void 0 && this.temperature === void 0) {
+      return void 0;
+    }
+    return {
+      ...this.maxOutputTokens !== void 0 ? { maxTokens: this.maxOutputTokens } : {},
+      ...this.temperature !== void 0 ? { temperature: this.temperature } : {}
+    };
+  }
 };
+function providerSupportsAx(provider) {
+  return typeof provider.getAxAI === "function";
+}
 var QUALITY_SYSTEM_PROMPT = [
   "You are an expert evaluator. Your goal is to grade the generated_answer based on how well it achieves the expected_outcome for the original task.",
   "",
@@ -3633,6 +3795,7 @@ async function evaluateCandidate(options) {
   };
   return {
     eval_id: evalCase.id,
+    dataset: evalCase.dataset,
     conversation_id: evalCase.conversation_id,
     score: score.score,
     hits: score.hits,
@@ -3809,7 +3972,7 @@ async function runLlmJudgeEvaluator(options) {
 async function resolveCustomPrompt(config) {
   if (config.promptPath) {
     try {
-      return await (0, import_promises6.readFile)(config.promptPath, "utf8");
+      return await readTextFile(config.promptPath);
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       console.warn(`Could not read custom prompt at ${config.promptPath}: ${message}`);
@@ -3897,6 +4060,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs)
   };
   return {
     eval_id: evalCase.id,
+    dataset: evalCase.dataset,
     conversation_id: evalCase.conversation_id,
     score: 0,
     hits: [],
@@ -3947,6 +4111,7 @@ function createAgentKernel() {
   buildDirectoryChain,
   buildPromptInputs,
   buildSearchRoots,
+  consumeCodexLogEntries,
   createAgentKernel,
   createProvider,
   ensureVSCodeSubagents,
@@ -3963,10 +4128,12 @@ function createAgentKernel() {
   listTargetNames,
   loadEvalCases,
   readTargetDefinitions,
+  readTextFile,
   resolveAndCreateProvider,
   resolveFileReference,
   resolveTargetDefinition,
   runEvalCase,
-  runEvaluation
+  runEvaluation,
+  subscribeToCodexLogEntries
 });
 //# sourceMappingURL=index.cjs.map