npm - @agentv/core - Versions diffs - 3.12.0 → 3.13.1 - Mend

@agentv/core 3.12.0 → 3.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/{chunk-4XWPXNQM.js → chunk-ZB3AUPES.js} +1 -3
package/dist/chunk-ZB3AUPES.js.map +1 -0
package/dist/evaluation/validation/index.cjs +0 -2
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +1 -1
package/dist/index.cjs +63 -177
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +15 -55
package/dist/index.d.ts +15 -55
package/dist/index.js +62 -49
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-3G2KXH7N.js +0 -120
package/dist/chunk-3G2KXH7N.js.map +0 -1
package/dist/chunk-4XWPXNQM.js.map +0 -1
package/dist/simple-trace-file-exporter-CRIO5HDZ.js +0 -7
package/dist/simple-trace-file-exporter-CRIO5HDZ.js.map +0 -1

package/dist/index.js CHANGED Viewed

@@ -19,16 +19,13 @@ import {
   readTextFile,
   resolveFileReference,
   resolveTargetDefinition
-} from "./chunk-4XWPXNQM.js";
+} from "./chunk-ZB3AUPES.js";
 import {
   AgentvProvider
 } from "./chunk-W5YDZWT4.js";
 import {
   OtlpJsonFileExporter
 } from "./chunk-HFSYZHGF.js";
-import {
-  SimpleTraceFileExporter
-} from "./chunk-3G2KXH7N.js";
 // src/evaluation/trace.ts
 function computeTraceSummary(messages) {
@@ -615,12 +612,6 @@ function parseExecutionDefaults(raw, configPath) {
   } else if (obj.verbose !== void 0) {
     logWarning(`Invalid execution.verbose in ${configPath}, expected boolean`);
   }
-  const traceFile = obj.trace_file;
-  if (typeof traceFile === "string" && traceFile.trim().length > 0) {
-    result.trace_file = traceFile.trim();
-  } else if (traceFile !== void 0) {
-    logWarning(`Invalid execution.trace_file in ${configPath}, expected non-empty string`);
-  }
   if (typeof obj.keep_workspaces === "boolean") {
     result.keep_workspaces = obj.keep_workspaces;
   } else if (obj.keep_workspaces !== void 0) {
@@ -737,6 +728,9 @@ var ANSI_RESET4 = "\x1B[0m";
 function normalizeEvaluatorType(type) {
   return type.replace(/_/g, "-");
 }
+function isDeprecatedJudgeType(type) {
+  return type === "code-judge" || type === "llm-judge";
+}
 async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
   const execution = rawEvalCase.execution;
   const executionObject = isJsonObject2(execution) ? execution : void 0;
@@ -799,6 +793,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
     const rawName = asString(rawEvaluator.name);
     const rawType = rawEvaluator.type;
     const typeValue = typeof rawType === "string" ? normalizeEvaluatorType(rawType) : rawType;
+    if (typeof typeValue === "string" && isDeprecatedJudgeType(typeValue)) {
+      logWarning2(
+        `Skipping evaluator '${rawName ?? "<unnamed>"}' in '${evalId}': '${rawType}' is deprecated. Use '${typeValue.replace("-judge", "-grader")}' instead`
+      );
+      continue;
+    }
     const isCustomType = typeof typeValue === "string" && !isEvaluatorKind(typeValue);
     if (typeof typeValue !== "string") {
       logWarning2(`Skipping evaluator with invalid type in '${evalId}'`);
@@ -831,7 +831,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       });
       continue;
     }
-    if (typeValue === "code-grader" || typeValue === "code-judge") {
+    if (typeValue === "code-grader") {
       let command;
       if (rawEvaluator.script !== void 0 && rawEvaluator.command === void 0) {
         console.warn(
@@ -941,7 +941,14 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const aggregatorType = asString(rawAggregator.type);
-      if (aggregatorType !== "weighted_average" && aggregatorType !== "code-grader" && aggregatorType !== "code-judge" && aggregatorType !== "llm-grader" && aggregatorType !== "llm-judge" && aggregatorType !== "threshold") {
+      const normalizedAggregatorType = typeof aggregatorType === "string" ? aggregatorType === "weighted_average" || aggregatorType === "threshold" ? aggregatorType : normalizeEvaluatorType(aggregatorType) : aggregatorType;
+      if (typeof normalizedAggregatorType === "string" && isDeprecatedJudgeType(normalizedAggregatorType)) {
+        logWarning2(
+          `Skipping composite evaluator '${name}' in '${evalId}': aggregator type '${aggregatorType}' is deprecated. Use '${normalizedAggregatorType.replace("-judge", "-grader")}' instead`
+        );
+        continue;
+      }
+      if (normalizedAggregatorType !== "weighted_average" && normalizedAggregatorType !== "code-grader" && normalizedAggregatorType !== "llm-grader" && normalizedAggregatorType !== "threshold") {
         logWarning2(
           `Skipping composite evaluator '${name}' in '${evalId}': invalid aggregator type '${aggregatorType}'`
         );
@@ -976,7 +983,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       let aggregator;
-      if (aggregatorType === "weighted_average") {
+      if (normalizedAggregatorType === "weighted_average") {
         const weights = isJsonObject2(rawAggregator.weights) ? rawAggregator.weights : void 0;
         const parsedWeights = {};
         if (weights) {
@@ -990,7 +997,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
           type: "weighted_average",
           ...Object.keys(parsedWeights).length > 0 ? { weights: parsedWeights } : {}
         };
-      } else if (aggregatorType === "code-grader" || aggregatorType === "code-judge") {
+      } else if (normalizedAggregatorType === "code-grader") {
         const aggregatorPath = asString(rawAggregator.path);
         if (!aggregatorPath) {
           logWarning2(
@@ -1003,7 +1010,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
           path: aggregatorPath,
           cwd: searchRoots[0]
         };
-      } else if (aggregatorType === "threshold") {
+      } else if (normalizedAggregatorType === "threshold") {
         const thresholdValue = rawAggregator.threshold;
         if (typeof thresholdValue !== "number" || thresholdValue < 0 || thresholdValue > 1) {
           logWarning2(
@@ -1751,10 +1758,15 @@ function coerceEvaluator(candidate, contextId) {
     return void 0;
   }
   const normalized = normalizeEvaluatorType(candidate);
+  if (isDeprecatedJudgeType(normalized)) {
+    throw new Error(
+      `Unsupported grader '${candidate}' in ${contextId}. Use '${normalized.replace("-judge", "-grader")}' instead.`
+    );
+  }
   if (isEvaluatorKind(normalized)) {
     return normalized;
   }
-  logWarning2(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
+  logWarning2(`Unknown grader '${candidate}' in ${contextId}, falling back to default`);
   return void 0;
 }
 function asString(value) {
@@ -3187,9 +3199,7 @@ function assertionToNaturalLanguage(entry) {
     case "ends_with":
       return `Output ends with '${entry.value}'`;
     case "llm-grader":
-    case "llm_grader":
-    case "llm-judge":
-    case "llm_judge": {
+    case "llm_grader": {
       if (Array.isArray(entry.rubrics) && entry.rubrics.length > 0) {
         return null;
       }
@@ -3202,9 +3212,7 @@ function assertionToNaturalLanguage(entry) {
       return tools ? `Agent called tools in order: ${tools}` : "Agent followed expected tool trajectory";
     }
     case "code-grader":
-    case "code_grader":
-    case "code-judge":
-    case "code_judge": {
+    case "code_grader": {
       const graderName = entry.name ?? deriveGraderNameFromCommand(entry.command) ?? "code-grader";
       const desc = typeof entry.description === "string" ? entry.description : void 0;
       return codeGraderInstruction(graderName, desc);
@@ -3235,7 +3243,7 @@ function assertionToNaturalLanguage(entry) {
   }
 }
 function assertionToNaturalLanguageList(entry) {
-  if (entry.type === "llm-grader" || entry.type === "llm_grader" || entry.type === "llm-judge" || entry.type === "llm_judge") {
+  if (entry.type === "llm-grader" || entry.type === "llm_grader") {
     if (Array.isArray(entry.rubrics) && entry.rubrics.length > 0) {
       return entry.rubrics.map((r) => r.outcome ?? r.criteria ?? r.id).filter((s) => typeof s === "string");
     }
@@ -9601,10 +9609,26 @@ function extractJsonBlob(text) {
   const match = text.match(/\{[\s\S]*\}/);
   return match?.[0];
 }
+function repairSchemaNearBooleanFields(text) {
+  return text.replace(
+    /("passed"\s*:\s*)(?:"([^"]+)"|([A-Za-z_][A-Za-z0-9_-]*))/gi,
+    (_match, prefix, quotedValue, bareValue) => {
+      const value = (quotedValue ?? bareValue ?? "").trim().toLowerCase();
+      if (value === "true") {
+        return `${prefix}true`;
+      }
+      if (value === "false") {
+        return `${prefix}false`;
+      }
+      return `${prefix}false`;
+    }
+  );
+}
 function parseJsonFromText(text) {
   const cleaned = typeof text === "string" ? text.replace(/```json\n?|```/g, "").trim() : "";
   const blob = extractJsonBlob(cleaned) ?? cleaned;
-  return JSON.parse(blob);
+  const repaired = repairSchemaNearBooleanFields(blob);
+  return JSON.parse(repaired);
 }
 function isNonEmptyString(value) {
   return typeof value === "string" && value.trim().length > 0;
@@ -10076,7 +10100,7 @@ function toCamelCaseDeep(obj) {
 // src/evaluation/evaluators/code-evaluator.ts
 var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
 var CodeEvaluator = class {
-  kind = "code-judge";
+  kind = "code-grader";
   command;
   cwd;
   agentTimeoutMs;
@@ -10095,7 +10119,7 @@ var CodeEvaluator = class {
     if (outputForPayload) {
       const serialized = JSON.stringify(outputForPayload);
       if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
-        const tmpDir = await mkdtemp2(join(tmpdir2(), "agentv-judge-"));
+        const tmpDir = await mkdtemp2(join(tmpdir2(), "agentv-grader-"));
         outputPath = join(tmpDir, "output.json");
         await writeFile6(outputPath, serialized);
         outputForPayload = null;
@@ -10353,7 +10377,7 @@ var LlmGraderEvaluator = class {
       return this.evaluateWithDelegatedAgent(context, graderProvider);
     }
     const config = context.evaluator;
-    if ((config?.type === "llm-grader" || config?.type === "llm-judge") && config.rubrics && config.rubrics.length > 0) {
+    if (config?.type === "llm-grader" && config.rubrics && config.rubrics.length > 0) {
       return this.evaluateWithRubrics(context, graderProvider, config.rubrics);
     }
     return this.evaluateFreeform(context, graderProvider);
@@ -10538,7 +10562,7 @@ ${context.fileChanges}`;
     const systemPrompt = this.buildAgentSystemPrompt(context);
     const userPrompt = this.buildAgentUserPrompt(context);
     const config = context.evaluator;
-    const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
+    const rubrics = config?.type === "llm-grader" ? config.rubrics : void 0;
     const fsTools = createFilesystemTools(workspacePath);
     const evaluatorRawRequest = {
       mode: "built-in",
@@ -10634,7 +10658,7 @@ ${context.fileChanges}`;
         };
       }
       const config = context.evaluator;
-      const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
+      const rubrics = config?.type === "llm-grader" ? config.rubrics : void 0;
       const details = {
         mode: modeLabel,
         grader_target: provider.targetName
@@ -10674,7 +10698,7 @@ ${context.fileChanges}`;
    */
   buildAgentSystemPrompt(context) {
     const config = context.evaluator;
-    const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
+    const rubrics = config?.type === "llm-grader" ? config.rubrics : void 0;
     const parts = [
       "You are an expert evaluator with access to the workspace filesystem.",
       "Use the provided tools to investigate the workspace and verify the criteria are met.",
@@ -10705,7 +10729,7 @@ ${context.fileChanges}`;
       return substituteVariables(this.evaluatorTemplate, variables);
     }
     const config = context.evaluator;
-    const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
+    const rubrics = config?.type === "llm-grader" ? config.rubrics : void 0;
     const parts = [
       "Evaluate the candidate answer by investigating the workspace.",
       "",
@@ -10748,7 +10772,7 @@ ${context.fileChanges}`;
   buildDelegatedPrompt(context) {
     const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
     const config = context.evaluator;
-    const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
+    const rubrics = config?.type === "llm-grader" ? config.rubrics : void 0;
     if (this.evaluatorTemplate) {
       const variables = {
         [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
@@ -11245,10 +11269,8 @@ var CompositeEvaluator = class {
     const aggregator = this.config.aggregator;
     switch (aggregator.type) {
       case "code-grader":
-      case "code-judge":
         return this.runCodeAggregator(results, aggregator.path, aggregator.cwd ?? this.cwd);
       case "llm-grader":
-      case "llm-judge":
         return this.runLlmAggregator(results, context, aggregator);
       case "threshold":
         return this.runThreshold(results, aggregator.threshold);
@@ -13670,7 +13692,7 @@ var endsWithFactory = (config) => {
 };
 function createBuiltinRegistry() {
   const registry = new EvaluatorRegistry();
-  registry.register("llm-grader", llmGraderFactory).register("llm-judge", llmGraderFactory).register("code-grader", codeFactory).register("code-judge", codeFactory).register("composite", compositeFactory).register("tool-trajectory", toolTrajectoryFactory).register("field-accuracy", fieldAccuracyFactory).register("latency", latencyFactory).register("cost", costFactory).register("token-usage", tokenUsageFactory).register("execution-metrics", executionMetricsFactory).register("skill-trigger", skillTriggerFactory).register("contains", containsFactory).register("contains-any", containsAnyFactory).register("contains-all", containsAllFactory).register("icontains", icontainsFactory).register("icontains-any", icontainsAnyFactory).register("icontains-all", icontainsAllFactory).register("starts-with", startsWithFactory).register("ends-with", endsWithFactory).register("regex", regexFactory).register("is-json", isJsonFactory).register("equals", equalsFactory).register("inline-assert", (config) => {
+  registry.register("llm-grader", llmGraderFactory).register("code-grader", codeFactory).register("composite", compositeFactory).register("tool-trajectory", toolTrajectoryFactory).register("field-accuracy", fieldAccuracyFactory).register("latency", latencyFactory).register("cost", costFactory).register("token-usage", tokenUsageFactory).register("execution-metrics", executionMetricsFactory).register("skill-trigger", skillTriggerFactory).register("contains", containsFactory).register("contains-any", containsAnyFactory).register("contains-all", containsAllFactory).register("icontains", icontainsFactory).register("icontains-any", icontainsAnyFactory).register("icontains-all", icontainsAllFactory).register("starts-with", startsWithFactory).register("ends-with", endsWithFactory).register("regex", regexFactory).register("is-json", isJsonFactory).register("equals", equalsFactory).register("inline-assert", (config) => {
     const fn = config[INLINE_ASSERT_FN];
     if (!fn) {
       throw new Error(
@@ -16388,7 +16410,7 @@ function filterEvalCases(evalCases, filter) {
   return evalCases.filter((evalCase) => micromatch3.isMatch(evalCase.id, filter));
 }
 function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
-  const llmGrader = overrides?.["llm-grader"] ?? overrides?.["llm-judge"] ?? new LlmGraderEvaluator({
+  const llmGrader = overrides?.["llm-grader"] ?? new LlmGraderEvaluator({
     resolveGraderProvider: async (context) => {
       if (context.graderProvider) {
         return context.graderProvider;
@@ -16820,8 +16842,6 @@ var AgentVConfigSchema = z4.object({
     agentTimeoutMs: z4.number().int().min(0).optional(),
     /** Enable verbose logging */
     verbose: z4.boolean().optional(),
-    /** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
-    traceFile: z4.string().optional(),
     /** Always keep temp workspaces after eval */
     keepWorkspaces: z4.boolean().optional(),
     /** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
@@ -17121,12 +17141,6 @@ var OtelTraceExporter = class {
           new SimpleSpanProcessor(new OtlpJsonFileExporter2(this.options.otlpFilePath))
         );
       }
-      if (this.options.traceFilePath) {
-        const { SimpleTraceFileExporter: SimpleTraceFileExporter2 } = await import("./simple-trace-file-exporter-CRIO5HDZ.js");
-        processors.push(
-          new SimpleSpanProcessor(new SimpleTraceFileExporter2(this.options.traceFilePath))
-        );
-      }
       if (processors.length === 0) {
         return false;
       }
@@ -17240,10 +17254,10 @@ var OtelTraceExporter = class {
         }
         if (result.scores) {
           for (const score of result.scores) {
-            rootSpan.addEvent(`agentv.evaluator.${score.name}`, {
-              "agentv.evaluator.score": score.score,
-              "agentv.evaluator.type": score.type,
-              ...score.verdict ? { "agentv.evaluator.verdict": score.verdict } : {}
+            rootSpan.addEvent(`agentv.grader.${score.name}`, {
+              "agentv.grader.score": score.score,
+              "agentv.grader.type": score.type,
+              ...score.verdict ? { "agentv.grader.verdict": score.verdict } : {}
             });
           }
         }
@@ -17577,7 +17591,6 @@ export {
   ProviderRegistry,
   RepoManager,
   ResponseCache,
-  SimpleTraceFileExporter,
   SkillTriggerEvaluator,
   TEST_MESSAGE_ROLES,
   TemplateNotDirectoryError,