agentv 0.2.3 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -139,7 +139,7 @@ agentv eval --target vscode_projectx "path/to/test.yaml"
139
139
  Run a specific test case with custom targets path:
140
140
 
141
141
  ```bash
142
- agentv eval --target vscode_projectx --targets "path/to/targets.yaml" --test-id "my-test-case" "path/to/test.yaml"
142
+ agentv eval --target vscode_projectx --targets "path/to/targets.yaml" --eval-id "my-test-case" "path/to/test.yaml"
143
143
  ```
144
144
 
145
145
  ### Command Line Options
@@ -147,7 +147,7 @@ agentv eval --target vscode_projectx --targets "path/to/targets.yaml" --test-id
147
147
  - `test_file`: Path to test YAML file (required, positional argument)
148
148
  - `--target TARGET`: Execution target name from targets.yaml (overrides target specified in test file)
149
149
  - `--targets TARGETS`: Path to targets.yaml file (default: ./.agentv/targets.yaml)
150
- - `--test-id TEST_ID`: Run only the test case with this specific ID
150
+ - `--eval-id EVAL_ID`: Run only the test case with this specific ID
151
151
  - `--out OUTPUT_FILE`: Output file path (default: results/{testname}_{timestamp}.jsonl)
152
152
  - `--format FORMAT`: Output format: 'jsonl' or 'yaml' (default: jsonl)
153
153
  - `--dry-run`: Run with mock model for testing
@@ -296,7 +296,7 @@ AgentV uses an AI-powered quality grader that:
296
296
  **JSONL format (default):**
297
297
 
298
298
  - One JSON object per line (newline-delimited)
299
- - Fields: `test_id`, `score`, `hits`, `misses`, `model_answer`, `expected_aspect_count`, `target`, `timestamp`, `reasoning`, `raw_request`, `grader_raw_request`
299
+ - Fields: `eval_id`, `score`, `hits`, `misses`, `model_answer`, `expected_aspect_count`, `target`, `timestamp`, `reasoning`, `raw_request`, `grader_raw_request`
300
300
 
301
301
  **YAML format (with `--format yaml`):**
302
302
 
@@ -585,7 +585,7 @@ var require_utc = __commonJS({
585
585
  import { Command } from "commander";
586
586
  import { readFileSync as readFileSync2 } from "node:fs";
587
587
 
588
- // ../../packages/core/dist/chunk-5REK5RSI.js
588
+ // ../../packages/core/dist/chunk-QVS4OL44.js
589
589
  import { constants } from "node:fs";
590
590
  import { access } from "node:fs/promises";
591
591
  import path from "node:path";
@@ -664,6 +664,29 @@ async function resolveFileReference(rawValue, searchRoots) {
664
664
  }
665
665
  return { displayPath, attempted };
666
666
  }
667
+ var KNOWN_PROVIDERS = [
668
+ "azure",
669
+ "anthropic",
670
+ "gemini",
671
+ "mock",
672
+ "vscode",
673
+ "vscode-insiders"
674
+ ];
675
+ var PROVIDER_ALIASES = [
676
+ "azure-openai",
677
+ // alias for "azure"
678
+ "google",
679
+ // alias for "gemini"
680
+ "google-gemini",
681
+ // alias for "gemini"
682
+ "openai",
683
+ // legacy/future support
684
+ "bedrock",
685
+ // legacy/future support
686
+ "vertex"
687
+ // legacy/future support
688
+ ];
689
+ var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
667
690
 
668
691
  // ../../packages/core/dist/index.js
669
692
  import { constants as constants3 } from "node:fs";
@@ -10903,7 +10926,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
10903
10926
  }
10904
10927
  const codeSnippets = extractCodeBlocks(userSegments);
10905
10928
  const assistantContent = assistantMessages[0]?.content;
10906
- const expectedAssistantRaw = normalizeAssistantContent(assistantContent);
10929
+ const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
10907
10930
  const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
10908
10931
  const testCaseGrader = coerceGrader(testcase.grader) ?? globalGrader;
10909
10932
  const testCase = {
@@ -11019,7 +11042,7 @@ function cloneJsonValue(value) {
11019
11042
  }
11020
11043
  return cloneJsonObject(value);
11021
11044
  }
11022
- function normalizeAssistantContent(content) {
11045
+ async function resolveAssistantContent(content, searchRoots, verbose) {
11023
11046
  if (typeof content === "string") {
11024
11047
  return content;
11025
11048
  }
@@ -11032,12 +11055,42 @@ function normalizeAssistantContent(content) {
11032
11055
  parts.push(entry);
11033
11056
  continue;
11034
11057
  }
11035
- const textValue = asString(entry["text"]);
11058
+ if (!isJsonObject(entry)) {
11059
+ continue;
11060
+ }
11061
+ const segmentType = asString(entry.type);
11062
+ if (segmentType === "file") {
11063
+ const rawValue = asString(entry.value);
11064
+ if (!rawValue) {
11065
+ continue;
11066
+ }
11067
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference(
11068
+ rawValue,
11069
+ searchRoots
11070
+ );
11071
+ if (!resolvedPath) {
11072
+ const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
11073
+ logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
11074
+ continue;
11075
+ }
11076
+ try {
11077
+ const fileContent = (await readFile2(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
11078
+ parts.push(fileContent);
11079
+ if (verbose) {
11080
+ console.log(` [Expected Assistant File] Found: ${displayPath}`);
11081
+ console.log(` Resolved to: ${resolvedPath}`);
11082
+ }
11083
+ } catch (error) {
11084
+ logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
11085
+ }
11086
+ continue;
11087
+ }
11088
+ const textValue = asString(entry.text);
11036
11089
  if (typeof textValue === "string") {
11037
11090
  parts.push(textValue);
11038
11091
  continue;
11039
11092
  }
11040
- const valueValue = asString(entry["value"]);
11093
+ const valueValue = asString(entry.value);
11041
11094
  if (typeof valueValue === "string") {
11042
11095
  parts.push(valueValue);
11043
11096
  continue;
@@ -11528,7 +11581,7 @@ function resolveOptionalBoolean(source2) {
11528
11581
  function isLikelyEnvReference(value) {
11529
11582
  return /^[A-Z0-9_]+$/.test(value);
11530
11583
  }
11531
- var PROMPT_FILE_PREFIX = "bbeval-vscode-";
11584
+ var PROMPT_FILE_PREFIX = "agentv-vscode-";
11532
11585
  var VSCodeProvider = class {
11533
11586
  id;
11534
11587
  kind;
@@ -11595,7 +11648,7 @@ function buildPromptDocument(request, attachments) {
11595
11648
  if (instructionFiles.length > 0) {
11596
11649
  parts.push(buildMandatoryPrereadBlock(instructionFiles));
11597
11650
  }
11598
- parts.push(`# BbEval Request`);
11651
+ parts.push(`# AgentV Request`);
11599
11652
  if (request.testCaseId) {
11600
11653
  parts.push(`- Test Case: ${request.testCaseId}`);
11601
11654
  }
@@ -11734,18 +11787,24 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
11734
11787
  function isRecord(value) {
11735
11788
  return typeof value === "object" && value !== null && !Array.isArray(value);
11736
11789
  }
11737
- function checkVersion(parsed, absolutePath) {
11738
- const version = typeof parsed.version === "number" ? parsed.version : typeof parsed.version === "string" ? parseFloat(parsed.version) : void 0;
11739
- if (version === void 0) {
11790
+ function checkSchema(parsed, absolutePath) {
11791
+ const schema = parsed.$schema;
11792
+ if (schema === void 0) {
11793
+ throw new Error(
11794
+ `Missing $schema field in targets.yaml at ${absolutePath}.
11795
+ Please add '$schema: ${TARGETS_SCHEMA_V2}' at the top of the file.`
11796
+ );
11797
+ }
11798
+ if (typeof schema !== "string") {
11740
11799
  throw new Error(
11741
- `Missing version field in targets.yaml at ${absolutePath}.
11742
- Please add 'version: 2.0' at the top of the file.`
11800
+ `Invalid $schema field in targets.yaml at ${absolutePath}.
11801
+ Expected a string value '${TARGETS_SCHEMA_V2}'.`
11743
11802
  );
11744
11803
  }
11745
- if (version < 2) {
11804
+ if (schema !== TARGETS_SCHEMA_V2) {
11746
11805
  throw new Error(
11747
- `Outdated targets.yaml format (version ${version}) at ${absolutePath}.
11748
- Please update to version 2.0 format with 'targets' array.`
11806
+ `Invalid $schema '${schema}' in targets.yaml at ${absolutePath}.
11807
+ Expected '${TARGETS_SCHEMA_V2}'.`
11749
11808
  );
11750
11809
  }
11751
11810
  }
@@ -11793,9 +11852,9 @@ async function readTargetDefinitions(filePath) {
11793
11852
  const raw = await readFile3(absolutePath, "utf8");
11794
11853
  const parsed = parse22(raw);
11795
11854
  if (!isRecord(parsed)) {
11796
- throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with 'version' and 'targets' fields`);
11855
+ throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
11797
11856
  }
11798
- checkVersion(parsed, absolutePath);
11857
+ checkSchema(parsed, absolutePath);
11799
11858
  const targets = extractTargetsArray(parsed, absolutePath);
11800
11859
  const definitions = targets.map((entry, index) => assertTargetDefinition(entry, index, absolutePath));
11801
11860
  return definitions;
@@ -12330,17 +12389,17 @@ async function runEvaluation(options) {
12330
12389
  cache,
12331
12390
  useCache,
12332
12391
  now,
12333
- testId,
12392
+ evalId,
12334
12393
  verbose,
12335
12394
  onResult,
12336
12395
  onProgress
12337
12396
  } = options;
12338
12397
  const load = loadTestCases;
12339
12398
  const testCases = await load(testFilePath, repoRoot, { verbose });
12340
- const filteredTestCases = filterTestCases(testCases, testId);
12399
+ const filteredTestCases = filterTestCases(testCases, evalId);
12341
12400
  if (filteredTestCases.length === 0) {
12342
- if (testId) {
12343
- throw new Error(`Test case with id '${testId}' not found in ${testFilePath}`);
12401
+ if (evalId) {
12402
+ throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
12344
12403
  }
12345
12404
  return [];
12346
12405
  }
@@ -12388,7 +12447,7 @@ async function runEvaluation(options) {
12388
12447
  for (let i6 = 0; i6 < filteredTestCases.length; i6++) {
12389
12448
  await onProgress({
12390
12449
  workerId: i6 + 1,
12391
- testId: filteredTestCases[i6].id,
12450
+ evalId: filteredTestCases[i6].id,
12392
12451
  status: "pending"
12393
12452
  });
12394
12453
  }
@@ -12396,15 +12455,15 @@ async function runEvaluation(options) {
12396
12455
  const workers = options.maxConcurrency ?? target.workers ?? 1;
12397
12456
  const limit = pLimit(workers);
12398
12457
  let nextWorkerId = 1;
12399
- const workerIdByTestId = /* @__PURE__ */ new Map();
12458
+ const workerIdByEvalId = /* @__PURE__ */ new Map();
12400
12459
  const promises = filteredTestCases.map(
12401
12460
  (testCase) => limit(async () => {
12402
12461
  const workerId = nextWorkerId++;
12403
- workerIdByTestId.set(testCase.id, workerId);
12462
+ workerIdByEvalId.set(testCase.id, workerId);
12404
12463
  if (onProgress) {
12405
12464
  await onProgress({
12406
12465
  workerId,
12407
- testId: testCase.id,
12466
+ evalId: testCase.id,
12408
12467
  status: "running",
12409
12468
  startedAt: Date.now()
12410
12469
  });
@@ -12427,7 +12486,7 @@ async function runEvaluation(options) {
12427
12486
  if (onProgress) {
12428
12487
  await onProgress({
12429
12488
  workerId,
12430
- testId: testCase.id,
12489
+ evalId: testCase.id,
12431
12490
  status: "completed",
12432
12491
  startedAt: 0,
12433
12492
  // Not used for completed status
@@ -12442,7 +12501,7 @@ async function runEvaluation(options) {
12442
12501
  if (onProgress) {
12443
12502
  await onProgress({
12444
12503
  workerId,
12445
- testId: testCase.id,
12504
+ evalId: testCase.id,
12446
12505
  status: "failed",
12447
12506
  completedAt: Date.now(),
12448
12507
  error: error instanceof Error ? error.message : String(error)
@@ -12564,7 +12623,7 @@ async function runTestCase(options) {
12564
12623
  guideline_paths: testCase.guideline_paths
12565
12624
  };
12566
12625
  return {
12567
- test_id: testCase.id,
12626
+ eval_id: testCase.id,
12568
12627
  conversation_id: testCase.conversation_id,
12569
12628
  score: grade.score,
12570
12629
  hits: grade.hits,
@@ -12579,11 +12638,11 @@ async function runTestCase(options) {
12579
12638
  grader_raw_request: grade.graderRawRequest
12580
12639
  };
12581
12640
  }
12582
- function filterTestCases(testCases, testId) {
12583
- if (!testId) {
12641
+ function filterTestCases(testCases, evalId) {
12642
+ if (!evalId) {
12584
12643
  return testCases;
12585
12644
  }
12586
- return testCases.filter((testCase) => testCase.id === testId);
12645
+ return testCases.filter((testCase) => testCase.id === evalId);
12587
12646
  }
12588
12647
  function buildGraderRegistry(overrides, resolveJudgeProvider) {
12589
12648
  const heuristic = overrides?.heuristic ?? new HeuristicGrader();
@@ -12607,7 +12666,7 @@ async function dumpPrompt(directory, testCase, promptInputs) {
12607
12666
  const filePath = path42.resolve(directory, filename);
12608
12667
  await mkdir3(path42.dirname(filePath), { recursive: true });
12609
12668
  const payload = {
12610
- test_id: testCase.id,
12669
+ eval_id: testCase.id,
12611
12670
  request: promptInputs.request,
12612
12671
  guidelines: promptInputs.guidelines,
12613
12672
  guideline_paths: testCase.guideline_paths
@@ -12656,7 +12715,7 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
12656
12715
  error: message
12657
12716
  };
12658
12717
  return {
12659
- test_id: testCase.id,
12718
+ eval_id: testCase.id,
12660
12719
  conversation_id: testCase.conversation_id,
12661
12720
  score: 0,
12662
12721
  hits: [],
@@ -13127,9 +13186,9 @@ var ProgressDisplay = class {
13127
13186
  this.scheduleRender();
13128
13187
  } else {
13129
13188
  if (progress.status === "completed") {
13130
- console.log(`\u2713 Test ${progress.testId} completed`);
13189
+ console.log(`\u2713 Test ${progress.evalId} completed`);
13131
13190
  } else if (progress.status === "failed") {
13132
- console.log(`\u2717 Test ${progress.testId} failed${progress.error ? `: ${progress.error}` : ""}`);
13191
+ console.log(`\u2717 Test ${progress.evalId} failed${progress.error ? `: ${progress.error}` : ""}`);
13133
13192
  }
13134
13193
  }
13135
13194
  }
@@ -13162,7 +13221,7 @@ var ProgressDisplay = class {
13162
13221
  const statusIcon = this.getStatusIcon(worker.status);
13163
13222
  const elapsed = worker.startedAt ? this.formatElapsed(Date.now() - worker.startedAt) : "";
13164
13223
  const timeLabel = elapsed ? ` (${elapsed})` : "";
13165
- let testLabel = worker.testId;
13224
+ let testLabel = worker.evalId;
13166
13225
  if (testLabel.length > 50) {
13167
13226
  testLabel = testLabel.substring(0, 47) + "...";
13168
13227
  }
@@ -13349,9 +13408,7 @@ var TARGET_FILE_CANDIDATES = [
13349
13408
  "targets.yaml",
13350
13409
  "targets.yml",
13351
13410
  path11.join(".agentv", "targets.yaml"),
13352
- path11.join(".agentv", "targets.yml"),
13353
- path11.join(".bbeval", "targets.yaml"),
13354
- path11.join(".bbeval", "targets.yml")
13411
+ path11.join(".agentv", "targets.yml")
13355
13412
  ];
13356
13413
  async function fileExists4(filePath) {
13357
13414
  try {
@@ -13525,7 +13582,7 @@ function normalizeOptions(rawOptions) {
13525
13582
  return {
13526
13583
  target: normalizeString(rawOptions.target),
13527
13584
  targetsPath: normalizeString(rawOptions.targets),
13528
- testId: normalizeString(rawOptions.testId),
13585
+ evalId: normalizeString(rawOptions.evalId),
13529
13586
  workers: workers > 0 ? workers : void 0,
13530
13587
  outPath: normalizeString(rawOptions.out),
13531
13588
  format,
@@ -13672,7 +13729,7 @@ async function runEvalCommand(input) {
13672
13729
  promptDumpDir,
13673
13730
  cache,
13674
13731
  useCache: options.cache,
13675
- testId: options.testId,
13732
+ evalId: options.evalId,
13676
13733
  verbose: options.verbose,
13677
13734
  maxConcurrency: resolvedWorkers,
13678
13735
  onResult: async (result) => {
@@ -13685,7 +13742,7 @@ async function runEvalCommand(input) {
13685
13742
  }
13686
13743
  progressDisplay.updateWorker({
13687
13744
  workerId: event.workerId,
13688
- testId: event.testId,
13745
+ evalId: event.evalId,
13689
13746
  status: event.status,
13690
13747
  startedAt: event.startedAt,
13691
13748
  completedAt: event.completedAt,
@@ -13735,7 +13792,7 @@ function parseInteger(value, fallback) {
13735
13792
  return parsed;
13736
13793
  }
13737
13794
  function registerEvalCommand(program) {
13738
- program.command("eval").description("Run BbEval test suites and report results").argument("<test-file>", "Path to the evaluation .test.yaml file").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--test-id <id>", "Run only the test case with this identifier").option(
13795
+ program.command("eval").description("Run eval suites and report results").argument("<eval-file>", "Path to the evaluation .yaml file").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the test case with this identifier").option(
13739
13796
  "--workers <count>",
13740
13797
  "Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
13741
13798
  (value) => parseInteger(value, 1)
@@ -14008,7 +14065,6 @@ function validateMessages(messages, location, filePath, errors) {
14008
14065
  }
14009
14066
  }
14010
14067
  }
14011
- var SCHEMA_TARGETS_V22 = "agentv-targets-v2";
14012
14068
  function isObject2(value) {
14013
14069
  return typeof value === "object" && value !== null && !Array.isArray(value);
14014
14070
  }
@@ -14046,8 +14102,8 @@ async function validateTargetsFile(filePath) {
14046
14102
  };
14047
14103
  }
14048
14104
  const schema = parsed["$schema"];
14049
- if (schema !== SCHEMA_TARGETS_V22) {
14050
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_TARGETS_V22}'` : `Missing required field '$schema'. Expected '${SCHEMA_TARGETS_V22}'`;
14105
+ if (schema !== TARGETS_SCHEMA_V2) {
14106
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${TARGETS_SCHEMA_V2}'` : `Missing required field '$schema'. Expected '${TARGETS_SCHEMA_V2}'`;
14051
14107
  errors.push({
14052
14108
  severity: "error",
14053
14109
  filePath: absolutePath,
@@ -14070,7 +14126,7 @@ async function validateTargetsFile(filePath) {
14070
14126
  errors
14071
14127
  };
14072
14128
  }
14073
- const knownProviders = ["azure", "openai", "anthropic", "bedrock", "vertex"];
14129
+ const knownProviders = [...KNOWN_PROVIDERS, ...PROVIDER_ALIASES];
14074
14130
  for (let i6 = 0; i6 < targets.length; i6++) {
14075
14131
  const target = targets[i6];
14076
14132
  const location = `targets[${i6}]`;
@@ -14539,4 +14595,4 @@ export {
14539
14595
  createProgram,
14540
14596
  runCli
14541
14597
  };
14542
- //# sourceMappingURL=chunk-S3RN2GSO.js.map
14598
+ //# sourceMappingURL=chunk-32ZAVIQY.js.map