agentv 2.10.0 → 2.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -375,6 +375,69 @@ For complete examples and patterns, see:
375
375
  - [custom-evaluators](https://agentv.dev/evaluators/custom-evaluators/)
376
376
  - [code-judge-sdk example](examples/features/code-judge-sdk)
377
377
 
378
+ ### Deterministic Assertions
379
+
380
+ Built-in assertion types for common text-matching patterns — no LLM judge or code_judge needed:
381
+
382
+ | Type | Value | Behavior |
383
+ |------|-------|----------|
384
+ | `contains` | `string` | Pass if output includes the substring |
385
+ | `contains_any` | `string[]` | Pass if output includes ANY of the strings |
386
+ | `contains_all` | `string[]` | Pass if output includes ALL of the strings |
387
+ | `icontains` | `string` | Case-insensitive `contains` |
388
+ | `icontains_any` | `string[]` | Case-insensitive `contains_any` |
389
+ | `icontains_all` | `string[]` | Case-insensitive `contains_all` |
390
+ | `starts_with` | `string` | Pass if output starts with value (trimmed) |
391
+ | `ends_with` | `string` | Pass if output ends with value (trimmed) |
392
+ | `regex` | `string` | Pass if output matches regex (optional `flags: "i"`) |
393
+ | `equals` | `string` | Pass if output exactly equals value (trimmed) |
394
+ | `is_json` | — | Pass if output is valid JSON |
395
+
396
+ All assertions support `weight`, `required`, and `negate` flags. Use `negate: true` to invert (no `not_` prefix needed).
397
+
398
+ ```yaml
399
+ assert:
400
+ # Case-insensitive matching for natural language variation
401
+ - type: icontains_any
402
+ value: ["missing rule code", "need rule code", "provide rule code"]
403
+ required: true
404
+
405
+ # Multiple required terms
406
+ - type: icontains_all
407
+ value: ["country code", "rule codes"]
408
+
409
+ # Case-insensitive regex
410
+ - type: regex
411
+ value: "[a-z]+@[a-z]+\\.[a-z]+"
412
+ flags: "i"
413
+ ```
414
+
415
+ See the [assert-extended example](examples/features/assert-extended) for complete patterns.
416
+
417
+ ### Target Configuration: `judge_target`
418
+
419
+ Agent provider targets (`codex`, `copilot`, `claude`, `vscode`) **must** specify `judge_target` when using `llm_judge` or `rubrics` evaluators. Without it, AgentV errors at startup — agent providers can't return structured JSON for judging.
420
+
421
+ ```yaml
422
+ targets:
423
+ # Agent target — requires judge_target for LLM-based evaluation
424
+ - name: codex_local
425
+ provider: codex
426
+ judge_target: azure_base # Required: LLM provider for judging
427
+
428
+ # LLM target — no judge_target needed (judges itself)
429
+ - name: azure_base
430
+ provider: azure
431
+ ```
432
+
433
+ ### Agentic Eval Patterns
434
+
435
+ When agents respond via tool calls instead of text, use `tool_trajectory` instead of text assertions:
436
+
437
+ - **Agent takes workspace actions** (creates files, runs commands) → `tool_trajectory` evaluator
438
+ - **Agent responds in text** (answers questions, asks for info) → `contains`/`icontains_any`/`llm_judge`
439
+ - **Agent does both** → `composite` evaluator combining both
440
+
378
441
  ### LLM Judges
379
442
 
380
443
  Create markdown judge files with evaluation criteria and scoring guidelines:
@@ -5,13 +5,14 @@ import {
5
5
  resolveEvalPaths,
6
6
  runEvalCommand,
7
7
  selectTarget,
8
- toSnakeCaseDeep,
8
+ toSnakeCaseDeep as toSnakeCaseDeep2,
9
9
  validateConfigFile,
10
10
  validateEvalFile,
11
11
  validateFileReferences,
12
12
  validateTargetsFile
13
- } from "./chunk-PC3FAOHT.js";
13
+ } from "./chunk-IL7CRMY6.js";
14
14
  import {
15
+ RepoManager,
15
16
  assembleLlmJudgePrompt,
16
17
  buildPromptInputs,
17
18
  createBuiltinRegistry,
@@ -22,9 +23,9 @@ import {
22
23
  loadTests,
23
24
  normalizeLineEndings,
24
25
  toCamelCaseDeep,
25
- toSnakeCaseDeep as toSnakeCaseDeep2,
26
+ toSnakeCaseDeep,
26
27
  trimBaselineResult
27
- } from "./chunk-RJWTL3VS.js";
28
+ } from "./chunk-MQIQH5LB.js";
28
29
  import {
29
30
  __commonJS,
30
31
  __esm,
@@ -2877,7 +2878,7 @@ function oneOf(literals) {
2877
2878
  // package.json
2878
2879
  var package_default = {
2879
2880
  name: "agentv",
2880
- version: "2.10.0",
2881
+ version: "2.11.1",
2881
2882
  description: "CLI entry point for AgentV",
2882
2883
  type: "module",
2883
2884
  repository: {
@@ -2924,6 +2925,90 @@ var package_default = {
2924
2925
  }
2925
2926
  };
2926
2927
 
2928
+ // src/commands/cache/add.ts
2929
+ import { existsSync } from "node:fs";
2930
+ import { join, resolve } from "node:path";
2931
+ var addCommand = command({
2932
+ name: "add",
2933
+ description: "Seed cache from a local git repository",
2934
+ args: {
2935
+ url: option({
2936
+ long: "url",
2937
+ description: "Remote URL to associate with the cache entry",
2938
+ type: string
2939
+ }),
2940
+ from: option({
2941
+ long: "from",
2942
+ description: "Path to local git repository to clone from",
2943
+ type: string
2944
+ }),
2945
+ force: flag({
2946
+ long: "force",
2947
+ short: "f",
2948
+ description: "Overwrite existing cache entry"
2949
+ })
2950
+ },
2951
+ handler: async ({ url, from, force }) => {
2952
+ const localPath = resolve(from);
2953
+ if (!existsSync(localPath)) {
2954
+ console.error(`Error: local path does not exist: ${localPath}`);
2955
+ process.exit(1);
2956
+ }
2957
+ if (!existsSync(join(localPath, ".git")) && !existsSync(join(localPath, "HEAD"))) {
2958
+ console.error(`Error: ${localPath} does not appear to be a git repository`);
2959
+ process.exit(1);
2960
+ }
2961
+ const manager = new RepoManager();
2962
+ try {
2963
+ const cachePath = await manager.seedCache(localPath, url, { force });
2964
+ console.log(`Cache seeded from ${localPath}`);
2965
+ console.log(` Remote URL: ${url}`);
2966
+ console.log(` Cache path: ${cachePath}`);
2967
+ } catch (err2) {
2968
+ console.error(`Error: ${err2 instanceof Error ? err2.message : err2}`);
2969
+ process.exit(1);
2970
+ }
2971
+ }
2972
+ });
2973
+
2974
+ // src/commands/cache/index.ts
2975
+ var cleanCommand = command({
2976
+ name: "clean",
2977
+ description: "Remove all cached git repositories",
2978
+ args: {
2979
+ force: flag({
2980
+ long: "force",
2981
+ short: "f",
2982
+ description: "Skip confirmation prompt"
2983
+ })
2984
+ },
2985
+ handler: async ({ force }) => {
2986
+ if (!force) {
2987
+ const readline2 = await import("node:readline");
2988
+ const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
2989
+ const answer = await new Promise((resolve2) => {
2990
+ rl.question("Remove all cached git repos from ~/.agentv/git-cache/? [y/N] ", resolve2);
2991
+ });
2992
+ rl.close();
2993
+ if (answer.toLowerCase() !== "y") {
2994
+ console.log("Cancelled.");
2995
+ return;
2996
+ }
2997
+ }
2998
+ const manager = new RepoManager();
2999
+ await manager.cleanCache();
3000
+ console.log("Cache cleaned.");
3001
+ }
3002
+ });
3003
+ var cacheCommand = subcommands({
3004
+ name: "cache",
3005
+ description: "Manage AgentV cache",
3006
+ cmds: {
3007
+ add: addCommand,
3008
+ clean: cleanCommand
3009
+ }
3010
+ });
3011
+
2927
3012
  // src/commands/compare/index.ts
2928
3013
  import { readFileSync } from "node:fs";
2929
3014
  var colors = {
@@ -3267,7 +3352,7 @@ var compareCommand = command({
3267
3352
  const results2 = loadJsonlResults(results[1]);
3268
3353
  const comparison = compareResults(results1, results2, effectiveThreshold);
3269
3354
  if (outputFormat === "json") {
3270
- console.log(JSON.stringify(toSnakeCaseDeep(comparison), null, 2));
3355
+ console.log(JSON.stringify(toSnakeCaseDeep2(comparison), null, 2));
3271
3356
  } else {
3272
3357
  console.log(formatTable(comparison, results[0], results[1]));
3273
3358
  }
@@ -3313,7 +3398,7 @@ var compareCommand = command({
3313
3398
  }
3314
3399
  const comparison = compareResults(baselineResults, candidateResults, effectiveThreshold);
3315
3400
  if (outputFormat === "json") {
3316
- console.log(JSON.stringify(toSnakeCaseDeep(comparison), null, 2));
3401
+ console.log(JSON.stringify(toSnakeCaseDeep2(comparison), null, 2));
3317
3402
  } else {
3318
3403
  console.log(formatTable(comparison, baseline, candidate));
3319
3404
  }
@@ -3322,7 +3407,7 @@ var compareCommand = command({
3322
3407
  } else {
3323
3408
  const matrixOutput = compareMatrix(groups, effectiveThreshold);
3324
3409
  if (outputFormat === "json") {
3325
- console.log(JSON.stringify(toSnakeCaseDeep(matrixOutput), null, 2));
3410
+ console.log(JSON.stringify(toSnakeCaseDeep2(matrixOutput), null, 2));
3326
3411
  } else {
3327
3412
  console.log(formatMatrix(matrixOutput, baseline));
3328
3413
  }
@@ -3765,7 +3850,7 @@ async function processEvaluator(config, evalCase, candidate, promptInputs) {
3765
3850
  config: codeConfig.config ?? null
3766
3851
  };
3767
3852
  try {
3768
- const inputPayload = JSON.stringify(toSnakeCaseDeep2(payload), null, 2);
3853
+ const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
3769
3854
  const stdout = await executeScript(script, inputPayload, 6e4, scriptCwd);
3770
3855
  const parsed = JSON.parse(stdout);
3771
3856
  return {
@@ -4042,7 +4127,7 @@ var evalRunCommand = command({
4042
4127
  },
4043
4128
  handler: async (args) => {
4044
4129
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4045
- const { launchInteractiveWizard } = await import("./interactive-7KFUCBIP.js");
4130
+ const { launchInteractiveWizard } = await import("./interactive-7NQRG7GK.js");
4046
4131
  await launchInteractiveWizard();
4047
4132
  return;
4048
4133
  }
@@ -4260,7 +4345,7 @@ var generateCommand = subcommands({
4260
4345
  });
4261
4346
 
4262
4347
  // src/commands/init/index.ts
4263
- import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
4348
+ import { existsSync as existsSync2, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
4264
4349
  import path5 from "node:path";
4265
4350
  import * as readline from "node:readline/promises";
4266
4351
 
@@ -4330,14 +4415,14 @@ async function initCommand(options = {}) {
4330
4415
  const existingFiles = [];
4331
4416
  if (envTemplate) {
4332
4417
  const envFilePath = path5.join(targetPath, ".env.example");
4333
- if (existsSync(envFilePath)) {
4418
+ if (existsSync2(envFilePath)) {
4334
4419
  existingFiles.push(".env.example");
4335
4420
  }
4336
4421
  }
4337
- if (existsSync(agentvDir)) {
4422
+ if (existsSync2(agentvDir)) {
4338
4423
  for (const template of otherAgentvTemplates) {
4339
4424
  const targetFilePath = path5.join(agentvDir, template.path);
4340
- if (existsSync(targetFilePath)) {
4425
+ if (existsSync2(targetFilePath)) {
4341
4426
  existingFiles.push(path5.relative(targetPath, targetFilePath));
4342
4427
  }
4343
4428
  }
@@ -4356,7 +4441,7 @@ async function initCommand(options = {}) {
4356
4441
  }
4357
4442
  console.log();
4358
4443
  }
4359
- if (!existsSync(agentvDir)) {
4444
+ if (!existsSync2(agentvDir)) {
4360
4445
  mkdirSync(agentvDir, { recursive: true });
4361
4446
  }
4362
4447
  if (envTemplate) {
@@ -4367,7 +4452,7 @@ async function initCommand(options = {}) {
4367
4452
  for (const template of otherAgentvTemplates) {
4368
4453
  const targetFilePath = path5.join(agentvDir, template.path);
4369
4454
  const targetDirPath = path5.dirname(targetFilePath);
4370
- if (!existsSync(targetDirPath)) {
4455
+ if (!existsSync2(targetDirPath)) {
4371
4456
  mkdirSync(targetDirPath, { recursive: true });
4372
4457
  }
4373
4458
  writeFileSync2(targetFilePath, template.content, "utf-8");
@@ -4421,7 +4506,7 @@ function detectPackageManager() {
4421
4506
  return detectPackageManagerFromPath(process.argv[1] ?? "");
4422
4507
  }
4423
4508
  function runCommand(cmd, args) {
4424
- return new Promise((resolve, reject) => {
4509
+ return new Promise((resolve2, reject) => {
4425
4510
  const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
4426
4511
  let stdout = "";
4427
4512
  child.stdout?.on("data", (data) => {
@@ -4429,7 +4514,7 @@ function runCommand(cmd, args) {
4429
4514
  stdout += data.toString();
4430
4515
  });
4431
4516
  child.on("error", reject);
4432
- child.on("close", (code) => resolve({ exitCode: code ?? 1, stdout }));
4517
+ child.on("close", (code) => resolve2({ exitCode: code ?? 1, stdout }));
4433
4518
  });
4434
4519
  }
4435
4520
  var updateCommand = command({
@@ -4658,7 +4743,7 @@ var traceListCommand = command({
4658
4743
  try {
4659
4744
  const metas = listResultFiles(cwd, limit);
4660
4745
  if (outputFormat === "json") {
4661
- console.log(JSON.stringify(toSnakeCaseDeep(metas), null, 2));
4746
+ console.log(JSON.stringify(toSnakeCaseDeep2(metas), null, 2));
4662
4747
  } else {
4663
4748
  console.log(formatListTable(metas));
4664
4749
  }
@@ -5325,7 +5410,7 @@ var traceStatsCommand = command({
5325
5410
  const groups = groupResults(results, groupBy2);
5326
5411
  if (outputFormat === "json") {
5327
5412
  const statsJson = computeStatsJson(groups, file);
5328
- console.log(JSON.stringify(toSnakeCaseDeep(statsJson), null, 2));
5413
+ console.log(JSON.stringify(toSnakeCaseDeep2(statsJson), null, 2));
5329
5414
  } else {
5330
5415
  console.log(formatStatsTable(groups, file));
5331
5416
  }
@@ -5374,7 +5459,7 @@ var trimCommand = command({
5374
5459
  const record = JSON.parse(line);
5375
5460
  const camel = toCamelCaseDeep(record);
5376
5461
  const trimmed = trimBaselineResult(camel);
5377
- const snake = toSnakeCaseDeep2(trimmed);
5462
+ const snake = toSnakeCaseDeep(trimmed);
5378
5463
  return JSON.stringify(snake);
5379
5464
  });
5380
5465
  const output = `${trimmedLines.join("\n")}
@@ -5596,12 +5681,100 @@ var validateCommand = command({
5596
5681
  }
5597
5682
  });
5598
5683
 
5684
+ // src/update-check.ts
5685
+ import { spawn as spawn2 } from "node:child_process";
5686
+ import { readFile as readFile3 } from "node:fs/promises";
5687
+ import { homedir } from "node:os";
5688
+ import { join as join2 } from "node:path";
5689
+ var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
5690
+ var AGENTV_DIR = join2(homedir(), ".agentv");
5691
+ var CACHE_FILE = "version-check.json";
5692
+ var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
5693
+ async function getCachedUpdateInfo(path8) {
5694
+ const filePath = path8 ?? join2(AGENTV_DIR, CACHE_FILE);
5695
+ try {
5696
+ const raw = await readFile3(filePath, "utf-8");
5697
+ const data = JSON.parse(raw);
5698
+ if (typeof data.latestVersion === "string" && typeof data.lastCheckedAt === "string") {
5699
+ return data;
5700
+ }
5701
+ return null;
5702
+ } catch {
5703
+ return null;
5704
+ }
5705
+ }
5706
+ function shouldCheck(cache) {
5707
+ if (!cache) return true;
5708
+ const elapsed = Date.now() - new Date(cache.lastCheckedAt).getTime();
5709
+ return elapsed > CHECK_INTERVAL_MS;
5710
+ }
5711
+ function isNewer(a, b) {
5712
+ const pa = a.split(".").map((s) => Number(s.replace(/-.*$/, "")));
5713
+ const pb = b.split(".").map((s) => Number(s.replace(/-.*$/, "")));
5714
+ for (let i = 0; i < 3; i++) {
5715
+ if ((pa[i] ?? 0) > (pb[i] ?? 0)) return true;
5716
+ if ((pa[i] ?? 0) < (pb[i] ?? 0)) return false;
5717
+ }
5718
+ return false;
5719
+ }
5720
+ function buildNotice(currentVersion, latestVersion) {
5721
+ if (!latestVersion) return null;
5722
+ if (!isNewer(latestVersion, currentVersion)) return null;
5723
+ return ` Update available: ${currentVersion} \u2192 ${latestVersion}
5724
+ Run \`agentv self update\` to upgrade.`;
5725
+ }
5726
+ function backgroundUpdateCheck() {
5727
+ const dir = AGENTV_DIR;
5728
+ const filePath = join2(dir, CACHE_FILE);
5729
+ const script = `
5730
+ const https = require('https');
5731
+ const fs = require('fs');
5732
+ const dir = ${JSON.stringify(dir)};
5733
+ const filePath = ${JSON.stringify(filePath)};
5734
+ https.get(${JSON.stringify(NPM_REGISTRY_URL)}, { timeout: 5000 }, (res) => {
5735
+ if (res.statusCode !== 200) { res.resume(); process.exit(); }
5736
+ let body = '';
5737
+ res.on('data', (c) => body += c);
5738
+ res.on('end', () => {
5739
+ try {
5740
+ const v = JSON.parse(body).version;
5741
+ if (typeof v === 'string') {
5742
+ fs.mkdirSync(dir, { recursive: true });
5743
+ fs.writeFileSync(filePath, JSON.stringify({ latestVersion: v, lastCheckedAt: new Date().toISOString() }, null, 2));
5744
+ }
5745
+ } catch {}
5746
+ process.exit();
5747
+ });
5748
+ }).on('error', () => process.exit()).on('timeout', function() { this.destroy(); process.exit(); });
5749
+ `;
5750
+ try {
5751
+ const child = spawn2(process.execPath, ["-e", script], {
5752
+ detached: true,
5753
+ stdio: "ignore",
5754
+ windowsHide: true
5755
+ });
5756
+ child.unref();
5757
+ } catch {
5758
+ }
5759
+ }
5760
+ async function getUpdateNotice(currentVersion) {
5761
+ if (process.env.AGENTV_NO_UPDATE_CHECK === "1" || process.env.CI === "true") {
5762
+ return null;
5763
+ }
5764
+ const cache = await getCachedUpdateInfo();
5765
+ if (shouldCheck(cache)) {
5766
+ backgroundUpdateCheck();
5767
+ }
5768
+ return buildNotice(currentVersion, cache?.latestVersion ?? null);
5769
+ }
5770
+
5599
5771
  // src/index.ts
5600
5772
  var app = subcommands({
5601
5773
  name: "agentv",
5602
5774
  description: "AgentV CLI",
5603
5775
  version: package_default.version,
5604
5776
  cmds: {
5777
+ cache: cacheCommand,
5605
5778
  eval: evalRunCommand,
5606
5779
  prompt: evalPromptCommand,
5607
5780
  compare: compareCommand,
@@ -5640,6 +5813,15 @@ function preprocessArgv(argv) {
5640
5813
  return result;
5641
5814
  }
5642
5815
  async function runCli(argv = process.argv) {
5816
+ let updateNotice = null;
5817
+ process.on("exit", () => {
5818
+ if (updateNotice) process.stderr.write(`
5819
+ ${updateNotice}
5820
+ `);
5821
+ });
5822
+ getUpdateNotice(package_default.version).then((n) => {
5823
+ updateNotice = n;
5824
+ });
5643
5825
  const processedArgv = preprocessArgv(argv);
5644
5826
  await run(binary(app), processedArgv);
5645
5827
  }
@@ -5649,4 +5831,4 @@ export {
5649
5831
  preprocessArgv,
5650
5832
  runCli
5651
5833
  };
5652
- //# sourceMappingURL=chunk-G3OTPFYX.js.map
5834
+ //# sourceMappingURL=chunk-D6KWUG7C.js.map