npm - agentv - Versions diffs - 2.9.0-next.2 → 2.11.0 - Mend

agentv 2.9.0-next.2 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +63 -0
package/dist/{chunk-3INJ7ISP.js → chunk-CVC3VMZ3.js} +149 -14
package/dist/chunk-CVC3VMZ3.js.map +1 -0
package/dist/{chunk-RJWTL3VS.js → chunk-EXJWRKKL.js} +741 -176
package/dist/chunk-EXJWRKKL.js.map +1 -0
package/dist/{chunk-PC3FAOHT.js → chunk-GO7OTNQ4.js} +109 -9
package/dist/chunk-GO7OTNQ4.js.map +1 -0
package/dist/cli.js +3 -3
package/dist/{dist-BGRU67HI.js → dist-NYXYDALF.js} +18 -2
package/dist/index.js +3 -3
package/dist/{interactive-7KFUCBIP.js → interactive-V4A3RRU3.js} +3 -3
package/package.json +1 -1
package/dist/chunk-3INJ7ISP.js.map +0 -1
package/dist/chunk-PC3FAOHT.js.map +0 -1
package/dist/chunk-RJWTL3VS.js.map +0 -1
/package/dist/{dist-BGRU67HI.js.map → dist-NYXYDALF.js.map} +0 -0
/package/dist/{interactive-7KFUCBIP.js.map → interactive-V4A3RRU3.js.map} +0 -0

package/README.md CHANGED Viewed

@@ -375,6 +375,69 @@ For complete examples and patterns, see:
 - [custom-evaluators](https://agentv.dev/evaluators/custom-evaluators/)
 - [code-judge-sdk example](examples/features/code-judge-sdk)
+### Deterministic Assertions
+Built-in assertion types for common text-matching patterns — no LLM judge or code_judge needed:
+| Type | Value | Behavior |
+|------|-------|----------|
+| `contains` | `string` | Pass if output includes the substring |
+| `contains_any` | `string[]` | Pass if output includes ANY of the strings |
+| `contains_all` | `string[]` | Pass if output includes ALL of the strings |
+| `icontains` | `string` | Case-insensitive `contains` |
+| `icontains_any` | `string[]` | Case-insensitive `contains_any` |
+| `icontains_all` | `string[]` | Case-insensitive `contains_all` |
+| `starts_with` | `string` | Pass if output starts with value (trimmed) |
+| `ends_with` | `string` | Pass if output ends with value (trimmed) |
+| `regex` | `string` | Pass if output matches regex (optional `flags: "i"`) |
+| `equals` | `string` | Pass if output exactly equals value (trimmed) |
+| `is_json` | — | Pass if output is valid JSON |
+All assertions support `weight`, `required`, and `negate` flags. Use `negate: true` to invert (no `not_` prefix needed).
+```yaml
+assert:
+  # Case-insensitive matching for natural language variation
+  - type: icontains_any
+    value: ["missing rule code", "need rule code", "provide rule code"]
+    required: true
+  # Multiple required terms
+  - type: icontains_all
+    value: ["country code", "rule codes"]
+  # Case-insensitive regex
+  - type: regex
+    value: "[a-z]+@[a-z]+\\.[a-z]+"
+    flags: "i"
+```
+See the [assert-extended example](examples/features/assert-extended) for complete patterns.
+### Target Configuration: `judge_target`
+Agent provider targets (`codex`, `copilot`, `claude`, `vscode`) **must** specify `judge_target` when using `llm_judge` or `rubrics` evaluators. Without it, AgentV errors at startup — agent providers can't return structured JSON for judging.
+```yaml
+targets:
+  # Agent target — requires judge_target for LLM-based evaluation
+  - name: codex_local
+    provider: codex
+    judge_target: azure_base  # Required: LLM provider for judging
+  # LLM target — no judge_target needed (judges itself)
+  - name: azure_base
+    provider: azure
+```
+### Agentic Eval Patterns
+When agents respond via tool calls instead of text, use `tool_trajectory` instead of text assertions:
+- **Agent takes workspace actions** (creates files, runs commands) → `tool_trajectory` evaluator
+- **Agent responds in text** (answers questions, asks for info) → `contains`/`icontains_any`/`llm_judge`
+- **Agent does both** → `composite` evaluator combining both
 ### LLM Judges
 Create markdown judge files with evaluation criteria and scoring guidelines:

package/dist/{chunk-3INJ7ISP.js → chunk-CVC3VMZ3.js} RENAMED Viewed

@@ -5,13 +5,14 @@ import {
   resolveEvalPaths,
   runEvalCommand,
   selectTarget,
-  toSnakeCaseDeep,
+  toSnakeCaseDeep as toSnakeCaseDeep2,
   validateConfigFile,
   validateEvalFile,
   validateFileReferences,
   validateTargetsFile
-} from "./chunk-PC3FAOHT.js";
+} from "./chunk-GO7OTNQ4.js";
 import {
+  RepoManager,
   assembleLlmJudgePrompt,
   buildPromptInputs,
   createBuiltinRegistry,
@@ -22,9 +23,9 @@ import {
   loadTests,
   normalizeLineEndings,
   toCamelCaseDeep,
-  toSnakeCaseDeep as toSnakeCaseDeep2,
+  toSnakeCaseDeep,
   trimBaselineResult
-} from "./chunk-RJWTL3VS.js";
+} from "./chunk-EXJWRKKL.js";
 import {
   __commonJS,
   __esm,
@@ -2877,7 +2878,7 @@ function oneOf(literals) {
 // package.json
 var package_default = {
   name: "agentv",
-  version: "2.9.0-next.2",
+  version: "2.11.0",
   description: "CLI entry point for AgentV",
   type: "module",
   repository: {
@@ -2924,6 +2925,43 @@ var package_default = {
   }
 };
+// src/commands/cache/index.ts
+var cleanCommand = command({
+  name: "clean",
+  description: "Remove all cached git repositories",
+  args: {
+    force: flag({
+      long: "force",
+      short: "f",
+      description: "Skip confirmation prompt"
+    })
+  },
+  handler: async ({ force }) => {
+    if (!force) {
+      const readline2 = await import("node:readline");
+      const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
+      const answer = await new Promise((resolve) => {
+        rl.question("Remove all cached git repos from ~/.agentv/git-cache/? [y/N] ", resolve);
+      });
+      rl.close();
+      if (answer.toLowerCase() !== "y") {
+        console.log("Cancelled.");
+        return;
+      }
+    }
+    const manager = new RepoManager();
+    await manager.cleanCache();
+    console.log("Cache cleaned.");
+  }
+});
+var cacheCommand = subcommands({
+  name: "cache",
+  description: "Manage AgentV cache",
+  cmds: {
+    clean: cleanCommand
+  }
+});
 // src/commands/compare/index.ts
 import { readFileSync } from "node:fs";
 var colors = {
@@ -3267,7 +3305,7 @@ var compareCommand = command({
         const results2 = loadJsonlResults(results[1]);
         const comparison = compareResults(results1, results2, effectiveThreshold);
         if (outputFormat === "json") {
-          console.log(JSON.stringify(toSnakeCaseDeep(comparison), null, 2));
+          console.log(JSON.stringify(toSnakeCaseDeep2(comparison), null, 2));
         } else {
           console.log(formatTable(comparison, results[0], results[1]));
         }
@@ -3313,7 +3351,7 @@ var compareCommand = command({
           }
           const comparison = compareResults(baselineResults, candidateResults, effectiveThreshold);
           if (outputFormat === "json") {
-            console.log(JSON.stringify(toSnakeCaseDeep(comparison), null, 2));
+            console.log(JSON.stringify(toSnakeCaseDeep2(comparison), null, 2));
           } else {
             console.log(formatTable(comparison, baseline, candidate));
           }
@@ -3322,7 +3360,7 @@ var compareCommand = command({
         } else {
           const matrixOutput = compareMatrix(groups, effectiveThreshold);
           if (outputFormat === "json") {
-            console.log(JSON.stringify(toSnakeCaseDeep(matrixOutput), null, 2));
+            console.log(JSON.stringify(toSnakeCaseDeep2(matrixOutput), null, 2));
           } else {
             console.log(formatMatrix(matrixOutput, baseline));
           }
@@ -3765,7 +3803,7 @@ async function processEvaluator(config, evalCase, candidate, promptInputs) {
         config: codeConfig.config ?? null
       };
       try {
-        const inputPayload = JSON.stringify(toSnakeCaseDeep2(payload), null, 2);
+        const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
         const stdout = await executeScript(script, inputPayload, 6e4, scriptCwd);
         const parsed = JSON.parse(stdout);
         return {
@@ -4042,7 +4080,7 @@ var evalRunCommand = command({
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-7KFUCBIP.js");
+      const { launchInteractiveWizard } = await import("./interactive-V4A3RRU3.js");
       await launchInteractiveWizard();
       return;
     }
@@ -4658,7 +4696,7 @@ var traceListCommand = command({
     try {
       const metas = listResultFiles(cwd, limit);
       if (outputFormat === "json") {
-        console.log(JSON.stringify(toSnakeCaseDeep(metas), null, 2));
+        console.log(JSON.stringify(toSnakeCaseDeep2(metas), null, 2));
       } else {
         console.log(formatListTable(metas));
       }
@@ -5325,7 +5363,7 @@ var traceStatsCommand = command({
       const groups = groupResults(results, groupBy2);
       if (outputFormat === "json") {
         const statsJson = computeStatsJson(groups, file);
-        console.log(JSON.stringify(toSnakeCaseDeep(statsJson), null, 2));
+        console.log(JSON.stringify(toSnakeCaseDeep2(statsJson), null, 2));
       } else {
         console.log(formatStatsTable(groups, file));
       }
@@ -5374,7 +5412,7 @@ var trimCommand = command({
         const record = JSON.parse(line);
         const camel = toCamelCaseDeep(record);
         const trimmed = trimBaselineResult(camel);
-        const snake = toSnakeCaseDeep2(trimmed);
+        const snake = toSnakeCaseDeep(trimmed);
         return JSON.stringify(snake);
       });
       const output = `${trimmedLines.join("\n")}
@@ -5596,12 +5634,100 @@ var validateCommand = command({
   }
 });
+// src/update-check.ts
+import { spawn as spawn2 } from "node:child_process";
+import { readFile as readFile3 } from "node:fs/promises";
+import { homedir } from "node:os";
+import { join } from "node:path";
+var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
+var AGENTV_DIR = join(homedir(), ".agentv");
+var CACHE_FILE = "version-check.json";
+var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
+async function getCachedUpdateInfo(path8) {
+  const filePath = path8 ?? join(AGENTV_DIR, CACHE_FILE);
+  try {
+    const raw = await readFile3(filePath, "utf-8");
+    const data = JSON.parse(raw);
+    if (typeof data.latestVersion === "string" && typeof data.lastCheckedAt === "string") {
+      return data;
+    }
+    return null;
+  } catch {
+    return null;
+  }
+}
+function shouldCheck(cache) {
+  if (!cache) return true;
+  const elapsed = Date.now() - new Date(cache.lastCheckedAt).getTime();
+  return elapsed > CHECK_INTERVAL_MS;
+}
+function isNewer(a, b) {
+  const pa = a.split(".").map((s) => Number(s.replace(/-.*$/, "")));
+  const pb = b.split(".").map((s) => Number(s.replace(/-.*$/, "")));
+  for (let i = 0; i < 3; i++) {
+    if ((pa[i] ?? 0) > (pb[i] ?? 0)) return true;
+    if ((pa[i] ?? 0) < (pb[i] ?? 0)) return false;
+  }
+  return false;
+}
+function buildNotice(currentVersion, latestVersion) {
+  if (!latestVersion) return null;
+  if (!isNewer(latestVersion, currentVersion)) return null;
+  return `  Update available: ${currentVersion} \u2192 ${latestVersion}
+  Run \`agentv self update\` to upgrade.`;
+}
+function backgroundUpdateCheck() {
+  const dir = AGENTV_DIR;
+  const filePath = join(dir, CACHE_FILE);
+  const script = `
+    const https = require('https');
+    const fs = require('fs');
+    const dir = ${JSON.stringify(dir)};
+    const filePath = ${JSON.stringify(filePath)};
+    https.get(${JSON.stringify(NPM_REGISTRY_URL)}, { timeout: 5000 }, (res) => {
+      if (res.statusCode !== 200) { res.resume(); process.exit(); }
+      let body = '';
+      res.on('data', (c) => body += c);
+      res.on('end', () => {
+        try {
+          const v = JSON.parse(body).version;
+          if (typeof v === 'string') {
+            fs.mkdirSync(dir, { recursive: true });
+            fs.writeFileSync(filePath, JSON.stringify({ latestVersion: v, lastCheckedAt: new Date().toISOString() }, null, 2));
+          }
+        } catch {}
+        process.exit();
+      });
+    }).on('error', () => process.exit()).on('timeout', function() { this.destroy(); process.exit(); });
+  `;
+  try {
+    const child = spawn2(process.execPath, ["-e", script], {
+      detached: true,
+      stdio: "ignore",
+      windowsHide: true
+    });
+    child.unref();
+  } catch {
+  }
+}
+async function getUpdateNotice(currentVersion) {
+  if (process.env.AGENTV_NO_UPDATE_CHECK === "1" || process.env.CI === "true") {
+    return null;
+  }
+  const cache = await getCachedUpdateInfo();
+  if (shouldCheck(cache)) {
+    backgroundUpdateCheck();
+  }
+  return buildNotice(currentVersion, cache?.latestVersion ?? null);
+}
 // src/index.ts
 var app = subcommands({
   name: "agentv",
   description: "AgentV CLI",
   version: package_default.version,
   cmds: {
+    cache: cacheCommand,
     eval: evalRunCommand,
     prompt: evalPromptCommand,
     compare: compareCommand,
@@ -5640,6 +5766,15 @@ function preprocessArgv(argv) {
   return result;
 }
 async function runCli(argv = process.argv) {
+  let updateNotice = null;
+  process.on("exit", () => {
+    if (updateNotice) process.stderr.write(`
+${updateNotice}
+`);
+  });
+  getUpdateNotice(package_default.version).then((n) => {
+    updateNotice = n;
+  });
   const processedArgv = preprocessArgv(argv);
   await run(binary(app), processedArgv);
 }
@@ -5649,4 +5784,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-3INJ7ISP.js.map
+//# sourceMappingURL=chunk-CVC3VMZ3.js.map