@agentv/core 0.2.3 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -329,7 +329,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
329
329
  }
330
330
  const codeSnippets = extractCodeBlocks(userSegments);
331
331
  const assistantContent = assistantMessages[0]?.content;
332
- const expectedAssistantRaw = normalizeAssistantContent(assistantContent);
332
+ const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
333
333
  const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
334
334
  const testCaseGrader = coerceGrader(testcase.grader) ?? globalGrader;
335
335
  const testCase = {
@@ -445,7 +445,7 @@ function cloneJsonValue(value) {
445
445
  }
446
446
  return cloneJsonObject(value);
447
447
  }
448
- function normalizeAssistantContent(content) {
448
+ async function resolveAssistantContent(content, searchRoots, verbose) {
449
449
  if (typeof content === "string") {
450
450
  return content;
451
451
  }
@@ -458,12 +458,42 @@ function normalizeAssistantContent(content) {
458
458
  parts.push(entry);
459
459
  continue;
460
460
  }
461
- const textValue = asString(entry["text"]);
461
+ if (!isJsonObject(entry)) {
462
+ continue;
463
+ }
464
+ const segmentType = asString(entry.type);
465
+ if (segmentType === "file") {
466
+ const rawValue = asString(entry.value);
467
+ if (!rawValue) {
468
+ continue;
469
+ }
470
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference(
471
+ rawValue,
472
+ searchRoots
473
+ );
474
+ if (!resolvedPath) {
475
+ const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
476
+ logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
477
+ continue;
478
+ }
479
+ try {
480
+ const fileContent = (await (0, import_promises2.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
481
+ parts.push(fileContent);
482
+ if (verbose) {
483
+ console.log(` [Expected Assistant File] Found: ${displayPath}`);
484
+ console.log(` Resolved to: ${resolvedPath}`);
485
+ }
486
+ } catch (error) {
487
+ logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
488
+ }
489
+ continue;
490
+ }
491
+ const textValue = asString(entry.text);
462
492
  if (typeof textValue === "string") {
463
493
  parts.push(textValue);
464
494
  continue;
465
495
  }
466
- const valueValue = asString(entry["value"]);
496
+ const valueValue = asString(entry.value);
467
497
  if (typeof valueValue === "string") {
468
498
  parts.push(valueValue);
469
499
  continue;
@@ -968,7 +998,7 @@ var import_promises3 = require("fs/promises");
968
998
  var import_node_os = require("os");
969
999
  var import_node_path3 = __toESM(require("path"), 1);
970
1000
  var import_subagent = require("subagent");
971
- var PROMPT_FILE_PREFIX = "bbeval-vscode-";
1001
+ var PROMPT_FILE_PREFIX = "agentv-vscode-";
972
1002
  var VSCodeProvider = class {
973
1003
  id;
974
1004
  kind;
@@ -1035,7 +1065,7 @@ function buildPromptDocument(request, attachments) {
1035
1065
  if (instructionFiles.length > 0) {
1036
1066
  parts.push(buildMandatoryPrereadBlock(instructionFiles));
1037
1067
  }
1038
- parts.push(`# BbEval Request`);
1068
+ parts.push(`# AgentV Request`);
1039
1069
  if (request.testCaseId) {
1040
1070
  parts.push(`- Test Case: ${request.testCaseId}`);
1041
1071
  }
@@ -1177,21 +1207,32 @@ var import_node_fs3 = require("fs");
1177
1207
  var import_promises4 = require("fs/promises");
1178
1208
  var import_node_path4 = __toESM(require("path"), 1);
1179
1209
  var import_yaml2 = require("yaml");
1210
+
1211
+ // src/evaluation/providers/types.ts
1212
+ var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
1213
+
1214
+ // src/evaluation/providers/targets-file.ts
1180
1215
  function isRecord(value) {
1181
1216
  return typeof value === "object" && value !== null && !Array.isArray(value);
1182
1217
  }
1183
- function checkVersion(parsed, absolutePath) {
1184
- const version = typeof parsed.version === "number" ? parsed.version : typeof parsed.version === "string" ? parseFloat(parsed.version) : void 0;
1185
- if (version === void 0) {
1218
+ function checkSchema(parsed, absolutePath) {
1219
+ const schema = parsed.$schema;
1220
+ if (schema === void 0) {
1221
+ throw new Error(
1222
+ `Missing $schema field in targets.yaml at ${absolutePath}.
1223
+ Please add '$schema: ${TARGETS_SCHEMA_V2}' at the top of the file.`
1224
+ );
1225
+ }
1226
+ if (typeof schema !== "string") {
1186
1227
  throw new Error(
1187
- `Missing version field in targets.yaml at ${absolutePath}.
1188
- Please add 'version: 2.0' at the top of the file.`
1228
+ `Invalid $schema field in targets.yaml at ${absolutePath}.
1229
+ Expected a string value '${TARGETS_SCHEMA_V2}'.`
1189
1230
  );
1190
1231
  }
1191
- if (version < 2) {
1232
+ if (schema !== TARGETS_SCHEMA_V2) {
1192
1233
  throw new Error(
1193
- `Outdated targets.yaml format (version ${version}) at ${absolutePath}.
1194
- Please update to version 2.0 format with 'targets' array.`
1234
+ `Invalid $schema '${schema}' in targets.yaml at ${absolutePath}.
1235
+ Expected '${TARGETS_SCHEMA_V2}'.`
1195
1236
  );
1196
1237
  }
1197
1238
  }
@@ -1239,9 +1280,9 @@ async function readTargetDefinitions(filePath) {
1239
1280
  const raw = await (0, import_promises4.readFile)(absolutePath, "utf8");
1240
1281
  const parsed = (0, import_yaml2.parse)(raw);
1241
1282
  if (!isRecord(parsed)) {
1242
- throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with 'version' and 'targets' fields`);
1283
+ throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
1243
1284
  }
1244
- checkVersion(parsed, absolutePath);
1285
+ checkSchema(parsed, absolutePath);
1245
1286
  const targets = extractTargetsArray(parsed, absolutePath);
1246
1287
  const definitions = targets.map((entry, index) => assertTargetDefinition(entry, index, absolutePath));
1247
1288
  return definitions;
@@ -1802,17 +1843,17 @@ async function runEvaluation(options) {
1802
1843
  cache,
1803
1844
  useCache,
1804
1845
  now,
1805
- testId,
1846
+ evalId,
1806
1847
  verbose,
1807
1848
  onResult,
1808
1849
  onProgress
1809
1850
  } = options;
1810
1851
  const load = loadTestCases;
1811
1852
  const testCases = await load(testFilePath, repoRoot, { verbose });
1812
- const filteredTestCases = filterTestCases(testCases, testId);
1853
+ const filteredTestCases = filterTestCases(testCases, evalId);
1813
1854
  if (filteredTestCases.length === 0) {
1814
- if (testId) {
1815
- throw new Error(`Test case with id '${testId}' not found in ${testFilePath}`);
1855
+ if (evalId) {
1856
+ throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
1816
1857
  }
1817
1858
  return [];
1818
1859
  }
@@ -1860,7 +1901,7 @@ async function runEvaluation(options) {
1860
1901
  for (let i = 0; i < filteredTestCases.length; i++) {
1861
1902
  await onProgress({
1862
1903
  workerId: i + 1,
1863
- testId: filteredTestCases[i].id,
1904
+ evalId: filteredTestCases[i].id,
1864
1905
  status: "pending"
1865
1906
  });
1866
1907
  }
@@ -1868,15 +1909,15 @@ async function runEvaluation(options) {
1868
1909
  const workers = options.maxConcurrency ?? target.workers ?? 1;
1869
1910
  const limit = pLimit(workers);
1870
1911
  let nextWorkerId = 1;
1871
- const workerIdByTestId = /* @__PURE__ */ new Map();
1912
+ const workerIdByEvalId = /* @__PURE__ */ new Map();
1872
1913
  const promises = filteredTestCases.map(
1873
1914
  (testCase) => limit(async () => {
1874
1915
  const workerId = nextWorkerId++;
1875
- workerIdByTestId.set(testCase.id, workerId);
1916
+ workerIdByEvalId.set(testCase.id, workerId);
1876
1917
  if (onProgress) {
1877
1918
  await onProgress({
1878
1919
  workerId,
1879
- testId: testCase.id,
1920
+ evalId: testCase.id,
1880
1921
  status: "running",
1881
1922
  startedAt: Date.now()
1882
1923
  });
@@ -1899,7 +1940,7 @@ async function runEvaluation(options) {
1899
1940
  if (onProgress) {
1900
1941
  await onProgress({
1901
1942
  workerId,
1902
- testId: testCase.id,
1943
+ evalId: testCase.id,
1903
1944
  status: "completed",
1904
1945
  startedAt: 0,
1905
1946
  // Not used for completed status
@@ -1914,7 +1955,7 @@ async function runEvaluation(options) {
1914
1955
  if (onProgress) {
1915
1956
  await onProgress({
1916
1957
  workerId,
1917
- testId: testCase.id,
1958
+ evalId: testCase.id,
1918
1959
  status: "failed",
1919
1960
  completedAt: Date.now(),
1920
1961
  error: error instanceof Error ? error.message : String(error)
@@ -2036,7 +2077,7 @@ async function runTestCase(options) {
2036
2077
  guideline_paths: testCase.guideline_paths
2037
2078
  };
2038
2079
  return {
2039
- test_id: testCase.id,
2080
+ eval_id: testCase.id,
2040
2081
  conversation_id: testCase.conversation_id,
2041
2082
  score: grade.score,
2042
2083
  hits: grade.hits,
@@ -2051,11 +2092,11 @@ async function runTestCase(options) {
2051
2092
  grader_raw_request: grade.graderRawRequest
2052
2093
  };
2053
2094
  }
2054
- function filterTestCases(testCases, testId) {
2055
- if (!testId) {
2095
+ function filterTestCases(testCases, evalId) {
2096
+ if (!evalId) {
2056
2097
  return testCases;
2057
2098
  }
2058
- return testCases.filter((testCase) => testCase.id === testId);
2099
+ return testCases.filter((testCase) => testCase.id === evalId);
2059
2100
  }
2060
2101
  function buildGraderRegistry(overrides, resolveJudgeProvider) {
2061
2102
  const heuristic = overrides?.heuristic ?? new HeuristicGrader();
@@ -2079,7 +2120,7 @@ async function dumpPrompt(directory, testCase, promptInputs) {
2079
2120
  const filePath = import_node_path5.default.resolve(directory, filename);
2080
2121
  await (0, import_promises5.mkdir)(import_node_path5.default.dirname(filePath), { recursive: true });
2081
2122
  const payload = {
2082
- test_id: testCase.id,
2123
+ eval_id: testCase.id,
2083
2124
  request: promptInputs.request,
2084
2125
  guidelines: promptInputs.guidelines,
2085
2126
  guideline_paths: testCase.guideline_paths
@@ -2128,7 +2169,7 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
2128
2169
  error: message
2129
2170
  };
2130
2171
  return {
2131
- test_id: testCase.id,
2172
+ eval_id: testCase.id,
2132
2173
  conversation_id: testCase.conversation_id,
2133
2174
  score: 0,
2134
2175
  hits: [],