npm - @agentv/core - Versions diffs - 2.5.8 → 2.7.1-next.1 - Mend

@agentv/core 2.5.8 → 2.7.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/dist/{chunk-LGQ5OPJD.js → chunk-6W5E3VR6.js} +383 -54
package/dist/chunk-6W5E3VR6.js.map +1 -0
package/dist/chunk-HFSYZHGF.js +82 -0
package/dist/chunk-HFSYZHGF.js.map +1 -0
package/dist/chunk-HMXZ2AX4.js +112 -0
package/dist/chunk-HMXZ2AX4.js.map +1 -0
package/dist/esm-5Q4BZALM.js +968 -0
package/dist/esm-5Q4BZALM.js.map +1 -0
package/dist/evaluation/validation/index.cjs +337 -70
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +294 -69
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +9221 -4040
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +1717 -234
package/dist/index.d.ts +1717 -234
package/dist/index.js +6563 -3147
package/dist/index.js.map +1 -1
package/dist/otlp-json-file-exporter-77FDBRSY.js +7 -0
package/dist/otlp-json-file-exporter-77FDBRSY.js.map +1 -0
package/dist/simple-trace-file-exporter-S76DMABU.js +7 -0
package/dist/simple-trace-file-exporter-S76DMABU.js.map +1 -0
package/package.json +18 -5
package/dist/chunk-LGQ5OPJD.js.map +0 -1

package/dist/evaluation/validation/index.cjs CHANGED Viewed

@@ -106,6 +106,37 @@ function getExpectedSchema(fileType) {
 var import_promises2 = require("fs/promises");
 var import_node_path2 = __toESM(require("path"), 1);
 var import_yaml2 = require("yaml");
+// src/evaluation/types.ts
+var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
+var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
+var EVALUATOR_KIND_VALUES = [
+  "code_judge",
+  "llm_judge",
+  "rubric",
+  "composite",
+  "tool_trajectory",
+  "field_accuracy",
+  "latency",
+  "cost",
+  "token_usage",
+  "execution_metrics",
+  "agent_judge",
+  "contains",
+  "regex",
+  "is_json",
+  "equals",
+  "rubrics"
+];
+var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
+function isEvaluatorKind(value) {
+  return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
+}
+// src/evaluation/validation/eval-validator.ts
+var ASSERTION_TYPES_WITH_VALUE = /* @__PURE__ */ new Set(["contains", "equals", "regex"]);
+var VALID_TEST_FILE_EXTENSIONS = /* @__PURE__ */ new Set([".yaml", ".yml", ".jsonl"]);
+var NAME_PATTERN = /^[a-z0-9-]+$/;
 function isObject(value) {
   return typeof value === "object" && value !== null && !Array.isArray(value);
 }
@@ -142,13 +173,41 @@ async function validateEvalFile(filePath) {
       errors
     };
   }
-  const evalcases = parsed.evalcases;
-  if (!Array.isArray(evalcases)) {
+  validateMetadata(parsed, absolutePath, errors);
+  let cases = parsed.tests;
+  if (cases === void 0 && "eval_cases" in parsed) {
+    cases = parsed.eval_cases;
     errors.push({
-      severity: "error",
+      severity: "warning",
+      filePath: absolutePath,
+      location: "eval_cases",
+      message: "'eval_cases' is deprecated. Use 'tests' instead."
+    });
+  }
+  if (cases === void 0 && "evalcases" in parsed) {
+    cases = parsed.evalcases;
+    errors.push({
+      severity: "warning",
       filePath: absolutePath,
       location: "evalcases",
-      message: "Missing or invalid 'evalcases' field (must be an array)"
+      message: "'evalcases' is deprecated. Use 'tests' instead."
+    });
+  }
+  if (typeof cases === "string") {
+    validateTestsStringPath(cases, absolutePath, errors);
+    return {
+      valid: errors.filter((e) => e.severity === "error").length === 0,
+      filePath: absolutePath,
+      fileType: "eval",
+      errors
+    };
+  }
+  if (!Array.isArray(cases)) {
+    errors.push({
+      severity: "error",
+      filePath: absolutePath,
+      location: "tests",
+      message: "Missing or invalid 'tests' field (must be an array or a file path string)"
     });
     return {
       valid: errors.length === 0,
@@ -157,9 +216,9 @@ async function validateEvalFile(filePath) {
       errors
     };
   }
-  for (let i = 0; i < evalcases.length; i++) {
-    const evalCase = evalcases[i];
-    const location = `evalcases[${i}]`;
+  for (let i = 0; i < cases.length; i++) {
+    const evalCase = cases[i];
+    const location = `tests[${i}]`;
     if (!isObject(evalCase)) {
       errors.push({
         severity: "error",
@@ -178,23 +237,29 @@ async function validateEvalFile(filePath) {
         message: "Missing or invalid 'id' field (must be a non-empty string)"
       });
     }
-    const expectedOutcome = evalCase.expected_outcome ?? evalCase.outcome;
-    if (expectedOutcome !== void 0 && (typeof expectedOutcome !== "string" || expectedOutcome.trim().length === 0)) {
+    let criteria = evalCase.criteria;
+    if (criteria === void 0 && "expected_outcome" in evalCase) {
+      criteria = evalCase.expected_outcome;
       errors.push({
-        severity: "error",
+        severity: "warning",
         filePath: absolutePath,
         location: `${location}.expected_outcome`,
-        message: "Invalid 'expected_outcome' or 'outcome' field (must be a non-empty string if provided)"
+        message: "'expected_outcome' is deprecated. Use 'criteria' instead."
+      });
+    }
+    if (criteria !== void 0 && (typeof criteria !== "string" || criteria.trim().length === 0)) {
+      errors.push({
+        severity: "error",
+        filePath: absolutePath,
+        location: `${location}.criteria`,
+        message: "Invalid 'criteria' field (must be a non-empty string if provided)"
       });
     }
-    const inputMessages = evalCase.input_messages;
-    const inputAlias = evalCase.input;
-    if (Array.isArray(inputMessages)) {
-      validateMessages(inputMessages, `${location}.input_messages`, absolutePath, errors);
-    } else if (inputAlias !== void 0) {
-      if (typeof inputAlias === "string") {
-      } else if (Array.isArray(inputAlias)) {
-        validateMessages(inputAlias, `${location}.input`, absolutePath, errors);
+    const inputField = evalCase.input;
+    if (inputField !== void 0) {
+      if (typeof inputField === "string") {
+      } else if (Array.isArray(inputField)) {
+        validateMessages(inputField, `${location}.input`, absolutePath, errors);
       } else {
         errors.push({
           severity: "error",
@@ -207,33 +272,23 @@ async function validateEvalFile(filePath) {
       errors.push({
         severity: "error",
         filePath: absolutePath,
-        location: `${location}.input_messages`,
-        message: "Missing 'input_messages' or 'input' field (must provide one)"
+        location: `${location}.input`,
+        message: "Missing 'input' field (must be a string or array of messages)"
       });
     }
-    const expectedMessages = evalCase.expected_messages;
-    const expectedOutputAlias = evalCase.expected_output;
-    if (expectedMessages !== void 0 && !Array.isArray(expectedMessages)) {
-      errors.push({
-        severity: "error",
-        filePath: absolutePath,
-        location: `${location}.expected_messages`,
-        message: "Invalid 'expected_messages' field (must be an array if provided)"
-      });
-    } else if (Array.isArray(expectedMessages)) {
-      validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
-    } else if (expectedOutputAlias !== void 0) {
-      if (typeof expectedOutputAlias === "string") {
-      } else if (Array.isArray(expectedOutputAlias)) {
-        if (expectedOutputAlias.length > 0 && isObject(expectedOutputAlias[0]) && "role" in expectedOutputAlias[0]) {
+    const expectedOutputField = evalCase.expected_output;
+    if (expectedOutputField !== void 0) {
+      if (typeof expectedOutputField === "string") {
+      } else if (Array.isArray(expectedOutputField)) {
+        if (expectedOutputField.length > 0 && isObject(expectedOutputField[0]) && "role" in expectedOutputField[0]) {
           validateMessages(
-            expectedOutputAlias,
+            expectedOutputField,
             `${location}.expected_output`,
             absolutePath,
             errors
           );
         }
-      } else if (isObject(expectedOutputAlias)) {
+      } else if (isObject(expectedOutputField)) {
       } else {
         errors.push({
           severity: "error",
@@ -243,9 +298,13 @@ async function validateEvalFile(filePath) {
         });
       }
     }
+    const assertField = evalCase.assert;
+    if (assertField !== void 0) {
+      validateAssertArray(assertField, location, absolutePath, errors);
+    }
   }
   return {
-    valid: errors.length === 0,
+    valid: errors.filter((e) => e.severity === "error").length === 0,
     filePath: absolutePath,
     fileType: "eval",
     errors
@@ -325,6 +384,133 @@ function validateMessages(messages, location, filePath, errors) {
     }
   }
 }
+function validateMetadata(parsed, filePath, errors) {
+  const name = parsed.name;
+  if (name !== void 0) {
+    if (typeof name === "string") {
+      if (!NAME_PATTERN.test(name)) {
+        errors.push({
+          severity: "warning",
+          filePath,
+          location: "name",
+          message: `Invalid 'name' format '${name}'. Must match pattern /^[a-z0-9-]+$/ (lowercase alphanumeric with hyphens).`
+        });
+      }
+    }
+    if (!("description" in parsed) || parsed.description === void 0) {
+      errors.push({
+        severity: "warning",
+        filePath,
+        location: "name",
+        message: "When 'name' is present, 'description' should also be provided."
+      });
+    }
+  }
+}
+function validateTestsStringPath(testsPath, filePath, errors) {
+  const ext = import_node_path2.default.extname(testsPath);
+  if (!VALID_TEST_FILE_EXTENSIONS.has(ext)) {
+    errors.push({
+      severity: "warning",
+      filePath,
+      location: "tests",
+      message: `Unsupported file extension '${ext}' for tests path '${testsPath}'. Supported extensions: ${[...VALID_TEST_FILE_EXTENSIONS].join(", ")}`
+    });
+  }
+}
+function validateAssertArray(assertField, parentLocation, filePath, errors) {
+  if (!Array.isArray(assertField)) {
+    errors.push({
+      severity: "warning",
+      filePath,
+      location: `${parentLocation}.assert`,
+      message: "'assert' must be an array of assertion objects."
+    });
+    return;
+  }
+  for (let i = 0; i < assertField.length; i++) {
+    const item = assertField[i];
+    const location = `${parentLocation}.assert[${i}]`;
+    if (!isObject(item)) {
+      errors.push({
+        severity: "warning",
+        filePath,
+        location,
+        message: "Assertion item must be an object with a type field."
+      });
+      continue;
+    }
+    const typeValue = item.type;
+    if (typeValue === void 0 || typeof typeValue !== "string") {
+      errors.push({
+        severity: "warning",
+        filePath,
+        location: `${location}.type`,
+        message: "Assertion item is missing a 'type' field."
+      });
+      continue;
+    }
+    if (!isEvaluatorKind(typeValue)) {
+      errors.push({
+        severity: "warning",
+        filePath,
+        location: `${location}.type`,
+        message: `Unknown assertion type '${typeValue}'.`
+      });
+      continue;
+    }
+    if (ASSERTION_TYPES_WITH_VALUE.has(typeValue)) {
+      const value = item.value;
+      if (value === void 0 || typeof value !== "string") {
+        errors.push({
+          severity: "warning",
+          filePath,
+          location: `${location}.value`,
+          message: `Assertion type '${typeValue}' requires a 'value' field (string).`
+        });
+        continue;
+      }
+      if (typeValue === "regex") {
+        try {
+          new RegExp(value);
+        } catch {
+          errors.push({
+            severity: "warning",
+            filePath,
+            location: `${location}.value`,
+            message: `Invalid regex pattern '${value}': not a valid regular expression.`
+          });
+        }
+      }
+    }
+    const required = item.required;
+    if (required !== void 0) {
+      validateRequiredField(required, location, filePath, errors);
+    }
+  }
+}
+function validateRequiredField(required, parentLocation, filePath, errors) {
+  if (typeof required === "boolean") {
+    return;
+  }
+  if (typeof required === "number") {
+    if (required <= 0 || required > 1) {
+      errors.push({
+        severity: "warning",
+        filePath,
+        location: `${parentLocation}.required`,
+        message: `Invalid 'required' value ${required}. When a number, it must be between 0 (exclusive) and 1 (inclusive).`
+      });
+    }
+    return;
+  }
+  errors.push({
+    severity: "warning",
+    filePath,
+    location: `${parentLocation}.required`,
+    message: `Invalid 'required' value. Must be a boolean or a number between 0 (exclusive) and 1 (inclusive).`
+  });
+}
 function validateContentForRoleMarkers(content, location, filePath, errors) {
   const markers = ["@[System]:", "@[User]:", "@[Assistant]:", "@[Tool]:"];
   for (const marker of markers) {
@@ -378,6 +564,9 @@ var CliTargetInputSchema = import_zod.z.object({
   attachmentsFormat: import_zod.z.string().optional(),
   // Working directory - optional
   cwd: import_zod.z.string().optional(),
+  // Workspace template directory - optional (mutually exclusive with cwd)
+  workspace_template: import_zod.z.string().optional(),
+  workspaceTemplate: import_zod.z.string().optional(),
   // Timeout in seconds - optional
   timeout_seconds: import_zod.z.number().positive().optional(),
   timeoutSeconds: import_zod.z.number().positive().optional(),
@@ -419,6 +608,7 @@ var CliTargetConfigSchema = import_zod.z.object({
   commandTemplate: import_zod.z.string().min(1),
   filesFormat: import_zod.z.string().optional(),
   cwd: import_zod.z.string().optional(),
+  workspaceTemplate: import_zod.z.string().optional(),
   timeoutMs: import_zod.z.number().positive().optional(),
   healthcheck: CliHealthcheckSchema.optional(),
   verbose: import_zod.z.boolean().optional(),
@@ -436,7 +626,9 @@ var BASE_TARGET_SCHEMA = import_zod.z.object({
   name: import_zod.z.string().min(1, "target name is required"),
   provider: import_zod.z.string().min(1, "provider is required"),
   judge_target: import_zod.z.string().optional(),
-  workers: import_zod.z.number().int().min(1).optional()
+  workers: import_zod.z.number().int().min(1).optional(),
+  workspace_template: import_zod.z.string().optional(),
+  workspaceTemplate: import_zod.z.string().optional()
 }).passthrough();
 // src/evaluation/providers/types.ts
@@ -445,10 +637,11 @@ var KNOWN_PROVIDERS = [
   "anthropic",
   "gemini",
   "codex",
+  "copilot",
   "copilot-cli",
   "pi-coding-agent",
   "pi-agent-sdk",
-  "claude-code",
+  "claude",
   "cli",
   "mock",
   "vscode",
@@ -463,8 +656,16 @@ var PROVIDER_ALIASES = [
   // alias for "gemini"
   "codex-cli",
   // alias for "codex"
+  "copilot-sdk",
+  // alias for "copilot"
+  "copilot_sdk",
+  // alias for "copilot" (underscore variant)
   "pi",
   // alias for "pi-coding-agent"
+  "claude-code",
+  // alias for "claude" (legacy)
+  "claude-sdk",
+  // alias for "claude"
   "openai",
   // legacy/future support
   "bedrock",
@@ -535,6 +736,7 @@ var GEMINI_SETTINGS = /* @__PURE__ */ new Set([
 ]);
 var CODEX_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
+  "model",
   "executable",
   "command",
   "binary",
@@ -550,41 +752,45 @@ var CODEX_SETTINGS = /* @__PURE__ */ new Set([
   "log_format",
   "logFormat",
   "log_output_format",
-  "logOutputFormat"
+  "logOutputFormat",
+  "system_prompt",
+  "systemPrompt",
+  "workspace_template",
+  "workspaceTemplate"
 ]);
-var COPILOT_SETTINGS = /* @__PURE__ */ new Set([
+var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
-  "executable",
-  "command",
-  "binary",
-  "args",
-  "arguments",
+  "cli_url",
+  "cliUrl",
+  "cli_path",
+  "cliPath",
+  "github_token",
+  "githubToken",
   "model",
   "cwd",
   "timeout_seconds",
   "timeoutSeconds",
   "log_dir",
   "logDir",
-  "log_directory",
-  "logDirectory",
   "log_format",
   "logFormat",
-  "log_output_format",
-  "logOutputFormat",
   "system_prompt",
-  "systemPrompt"
+  "systemPrompt",
+  "workspace_template",
+  "workspaceTemplate"
 ]);
 var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
+  "executable",
   "workspace_template",
   "workspaceTemplate",
-  "vscode_cmd",
-  "command",
   "wait",
   "dry_run",
   "dryRun",
   "subagent_root",
-  "subagentRoot"
+  "subagentRoot",
+  "timeout_seconds",
+  "timeoutSeconds"
 ]);
 var MOCK_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
@@ -595,6 +801,29 @@ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
   "trace"
   // For testing tool_trajectory evaluator
 ]);
+var CLAUDE_SETTINGS = /* @__PURE__ */ new Set([
+  ...COMMON_SETTINGS,
+  "model",
+  "cwd",
+  "timeout_seconds",
+  "timeoutSeconds",
+  "log_dir",
+  "logDir",
+  "log_directory",
+  "logDirectory",
+  "log_format",
+  "logFormat",
+  "log_output_format",
+  "logOutputFormat",
+  "system_prompt",
+  "systemPrompt",
+  "workspace_template",
+  "workspaceTemplate",
+  "max_turns",
+  "maxTurns",
+  "max_budget_usd",
+  "maxBudgetUsd"
+]);
 function getKnownSettings(provider) {
   const normalizedProvider = provider.toLowerCase();
   switch (normalizedProvider) {
@@ -610,8 +839,15 @@ function getKnownSettings(provider) {
     case "codex":
     case "codex-cli":
       return CODEX_SETTINGS;
+    case "copilot":
+    case "copilot-sdk":
+    case "copilot_sdk":
     case "copilot-cli":
-      return COPILOT_SETTINGS;
+      return COPILOT_SDK_SETTINGS;
+    case "claude":
+    case "claude-code":
+    case "claude-sdk":
+      return CLAUDE_SETTINGS;
     case "vscode":
     case "vscode-insiders":
       return VSCODE_SETTINGS;
@@ -909,7 +1145,32 @@ async function validateConfigFile(filePath) {
         });
       }
     }
-    const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns"]);
+    const evalPatterns = config.eval_patterns;
+    if (evalPatterns !== void 0) {
+      if (!Array.isArray(evalPatterns)) {
+        errors.push({
+          severity: "error",
+          filePath,
+          location: "eval_patterns",
+          message: "Field 'eval_patterns' must be an array"
+        });
+      } else if (!evalPatterns.every((p) => typeof p === "string")) {
+        errors.push({
+          severity: "error",
+          filePath,
+          location: "eval_patterns",
+          message: "All entries in 'eval_patterns' must be strings"
+        });
+      } else if (evalPatterns.length === 0) {
+        errors.push({
+          severity: "warning",
+          filePath,
+          location: "eval_patterns",
+          message: "Field 'eval_patterns' is empty. Consider removing it or adding patterns."
+        });
+      }
+    }
+    const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns", "eval_patterns"]);
     const unexpectedFields = Object.keys(config).filter((key) => !allowedFields.has(key));
     if (unexpectedFields.length > 0) {
       errors.push({
@@ -1046,30 +1307,36 @@ async function validateFileReferences(evalFilePath) {
   if (!isObject3(parsed)) {
     return errors;
   }
-  const evalcases = parsed.evalcases;
-  if (!Array.isArray(evalcases)) {
+  let cases = parsed.tests;
+  if (cases === void 0 && "eval_cases" in parsed) {
+    cases = parsed.eval_cases;
+  }
+  if (cases === void 0 && "evalcases" in parsed) {
+    cases = parsed.evalcases;
+  }
+  if (!Array.isArray(cases)) {
     return errors;
   }
-  for (let i = 0; i < evalcases.length; i++) {
-    const evalCase = evalcases[i];
+  for (let i = 0; i < cases.length; i++) {
+    const evalCase = cases[i];
     if (!isObject3(evalCase)) {
       continue;
     }
-    const inputMessages = evalCase.input_messages;
-    if (Array.isArray(inputMessages)) {
+    const inputField = evalCase.input;
+    if (Array.isArray(inputField)) {
       await validateMessagesFileRefs(
-        inputMessages,
-        `evalcases[${i}].input_messages`,
+        inputField,
+        `tests[${i}].input`,
         searchRoots,
         absolutePath,
         errors
       );
     }
-    const expectedMessages = evalCase.expected_messages;
-    if (Array.isArray(expectedMessages)) {
+    const expectedOutputField = evalCase.expected_output;
+    if (Array.isArray(expectedOutputField)) {
       await validateMessagesFileRefs(
-        expectedMessages,
-        `evalcases[${i}].expected_messages`,
+        expectedOutputField,
+        `tests[${i}].expected_output`,
         searchRoots,
         absolutePath,
         errors