npm - @agentv/core - Versions diffs - 2.17.0 → 2.17.1-next.1 - Mend

@agentv/core 2.17.0 → 2.17.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/{chunk-CPPYERD2.js → chunk-PSYFRPNT.js} +1 -1
package/dist/chunk-PSYFRPNT.js.map +1 -0
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +1 -1
package/dist/index.cjs +50 -17
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +6 -0
package/dist/index.d.ts +6 -0
package/dist/index.js +51 -18
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-CPPYERD2.js.map +0 -1

package/dist/index.d.cts CHANGED Viewed

@@ -1166,6 +1166,12 @@ interface EvaluatorResult {
     readonly details?: JsonObject;
     /** Token usage from LLM calls made by this evaluator (optional). */
     readonly tokenUsage?: TokenUsage;
+    /** Wall-clock duration of this judge execution in milliseconds. */
+    readonly durationMs?: number;
+    /** ISO 8601 UTC timestamp when this judge started executing. */
+    readonly startedAt?: string;
+    /** ISO 8601 UTC timestamp when this judge finished executing. */
+    readonly endedAt?: string;
 }
 /**
  * Convenience accessor matching the Python hit_count property.

package/dist/index.d.ts CHANGED Viewed

@@ -1166,6 +1166,12 @@ interface EvaluatorResult {
     readonly details?: JsonObject;
     /** Token usage from LLM calls made by this evaluator (optional). */
     readonly tokenUsage?: TokenUsage;
+    /** Wall-clock duration of this judge execution in milliseconds. */
+    readonly durationMs?: number;
+    /** ISO 8601 UTC timestamp when this judge started executing. */
+    readonly startedAt?: string;
+    /** ISO 8601 UTC timestamp when this judge finished executing. */
+    readonly endedAt?: string;
 }
 /**
  * Convenience accessor matching the Python hit_count property.

package/dist/index.js CHANGED Viewed

@@ -17,7 +17,7 @@ import {
   readTextFile,
   resolveFileReference,
   resolveTargetDefinition
-} from "./chunk-CPPYERD2.js";
+} from "./chunk-PSYFRPNT.js";
 import {
   OtlpJsonFileExporter
 } from "./chunk-HFSYZHGF.js";
@@ -151,6 +151,25 @@ import path8 from "node:path";
 import micromatch3 from "micromatch";
 import { parse as parse2 } from "yaml";
+// src/evaluation/interpolation.ts
+var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
+function interpolateEnv(value, env) {
+  if (typeof value === "string") {
+    return value.replace(ENV_VAR_PATTERN, (_, varName) => env[varName] ?? "");
+  }
+  if (Array.isArray(value)) {
+    return value.map((item) => interpolateEnv(item, env));
+  }
+  if (value !== null && typeof value === "object") {
+    const result = {};
+    for (const [key, val] of Object.entries(value)) {
+      result[key] = interpolateEnv(val, env);
+    }
+    return result;
+  }
+  return value;
+}
 // src/evaluation/loaders/case-file-loader.ts
 import { readFile } from "node:fs/promises";
 import path from "node:path";
@@ -169,7 +188,8 @@ function isGlobPattern(filePath) {
   return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
 }
 function parseYamlCases(content, filePath) {
-  const parsed = parseYaml(content);
+  const raw = parseYaml(content);
+  const parsed = interpolateEnv(raw, process.env);
   if (!Array.isArray(parsed)) {
     throw new Error(
       `External test file must contain a YAML array, got ${typeof parsed}: ${filePath}`
@@ -191,7 +211,8 @@ function parseJsonlCases(content, filePath) {
     const line = lines[i].trim();
     if (line === "") continue;
     try {
-      const parsed = JSON.parse(line);
+      const raw = JSON.parse(line);
+      const parsed = interpolateEnv(raw, process.env);
       if (!isJsonObject(parsed)) {
         throw new Error("Expected JSON object");
       }
@@ -2340,7 +2361,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
   }
   try {
     const content = await readFile5(sidecarPath, "utf8");
-    const parsed = parseYaml2(content);
+    const parsed = interpolateEnv(parseYaml2(content), process.env);
     if (!isJsonObject(parsed)) {
       logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
       return {};
@@ -2363,7 +2384,8 @@ function parseJsonlContent(content, filePath) {
     const line = lines[i].trim();
     if (line === "") continue;
     try {
-      const parsed = JSON.parse(line);
+      const raw = JSON.parse(line);
+      const parsed = interpolateEnv(raw, process.env);
       if (!isJsonObject(parsed)) {
         throw new Error("Expected JSON object");
       }
@@ -2420,9 +2442,10 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
     }
     const inputMessages = resolveInputMessages(evalcase);
     const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
-    if (!id || !outcome || !inputMessages || inputMessages.length === 0) {
+    const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
+    if (!id || !hasEvaluationSpec || !inputMessages || inputMessages.length === 0) {
       logError(
-        `Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, criteria, and/or input`
+        `Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
       );
       continue;
     }
@@ -2500,7 +2523,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
       guideline_paths: guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
       guideline_patterns: guidelinePatterns,
       file_paths: allFilePaths,
-      criteria: outcome,
+      criteria: outcome ?? "",
       evaluator: evalCaseEvaluatorKind,
       evaluators
     };
@@ -2813,7 +2836,7 @@ async function readTestSuiteMetadata(testFilePath) {
   try {
     const absolutePath = path8.resolve(testFilePath);
     const content = await readFile7(absolutePath, "utf8");
-    const parsed = parse2(content);
+    const parsed = interpolateEnv(parse2(content), process.env);
     if (!isJsonObject(parsed)) {
       return {};
     }
@@ -2863,11 +2886,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
   const config = await loadConfig(absoluteTestPath, repoRootPath);
   const guidelinePatterns = config?.guideline_patterns;
   const rawFile = await readFile7(absoluteTestPath, "utf8");
-  const parsed = parse2(rawFile);
-  if (!isJsonObject(parsed)) {
+  const interpolated = interpolateEnv(parse2(rawFile), process.env);
+  if (!isJsonObject(interpolated)) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
-  const suite = parsed;
+  const suite = interpolated;
   const datasetNameFromSuite = asString6(suite.dataset)?.trim();
   const fallbackDataset = path8.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
   const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
@@ -2911,9 +2934,10 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     }
     const testInputMessages = resolveInputMessages(evalcase);
     const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
-    if (!id || !outcome || !testInputMessages || testInputMessages.length === 0) {
+    const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
+    if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
       logError2(
-        `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, criteria, and/or input`
+        `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
       );
       continue;
     }
@@ -3009,7 +3033,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       guideline_paths: guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
       guideline_patterns: guidelinePatterns,
       file_paths: allFilePaths,
-      criteria: outcome,
+      criteria: outcome ?? "",
       evaluator: evalCaseEvaluatorKind,
       evaluators,
       workspace: mergedWorkspace,
@@ -3149,7 +3173,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
     } catch {
       throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
     }
-    const parsed = parse2(content);
+    const parsed = interpolateEnv(parse2(content), process.env);
     if (!isJsonObject(parsed)) {
       throw new Error(
         `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
@@ -14847,9 +14871,11 @@ async function runEvaluatorList(options) {
     registry: typeRegistry
   };
   for (const evaluatorConfig of evaluators ?? []) {
+    const startedAt = /* @__PURE__ */ new Date();
     try {
       const evaluatorInstance = await typeRegistry.create(evaluatorConfig, dispatchContext);
       const score2 = await evaluatorInstance.evaluate(evalContext);
+      const endedAt = /* @__PURE__ */ new Date();
       const weight = evaluatorConfig.weight ?? 1;
       scored.push({
         score: score2,
@@ -14870,9 +14896,13 @@ async function runEvaluatorList(options) {
         evaluatorProviderRequest: score2.evaluatorRawRequest,
         details: score2.details,
         scores: mapChildResults(score2.scores),
-        tokenUsage: score2.tokenUsage
+        tokenUsage: score2.tokenUsage,
+        durationMs: endedAt.getTime() - startedAt.getTime(),
+        startedAt: startedAt.toISOString(),
+        endedAt: endedAt.toISOString()
       });
     } catch (error) {
+      const endedAt = /* @__PURE__ */ new Date();
       const message = error instanceof Error ? error.message : String(error);
       const fallbackScore = {
         score: 0,
@@ -14898,7 +14928,10 @@ async function runEvaluatorList(options) {
         verdict: "fail",
         hits: [],
         misses: [`Evaluator '${evaluatorConfig.name ?? "unknown"}' failed: ${message}`],
-        reasoning: message
+        reasoning: message,
+        durationMs: endedAt.getTime() - startedAt.getTime(),
+        startedAt: startedAt.toISOString(),
+        endedAt: endedAt.toISOString()
       });
     }
     if (evaluatorConfig.negate === true && scored.length > 0) {