@agentv/core 0.2.3 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-5REK5RSI.js → chunk-QVS4OL44.js} +30 -2
- package/dist/chunk-QVS4OL44.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +30 -4
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +7 -5
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +73 -32
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +9 -9
- package/dist/index.d.ts +9 -9
- package/dist/index.js +70 -33
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-5REK5RSI.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -329,7 +329,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
329
329
|
}
|
|
330
330
|
const codeSnippets = extractCodeBlocks(userSegments);
|
|
331
331
|
const assistantContent = assistantMessages[0]?.content;
|
|
332
|
-
const expectedAssistantRaw =
|
|
332
|
+
const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
|
|
333
333
|
const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
334
334
|
const testCaseGrader = coerceGrader(testcase.grader) ?? globalGrader;
|
|
335
335
|
const testCase = {
|
|
@@ -445,7 +445,7 @@ function cloneJsonValue(value) {
|
|
|
445
445
|
}
|
|
446
446
|
return cloneJsonObject(value);
|
|
447
447
|
}
|
|
448
|
-
function
|
|
448
|
+
async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
449
449
|
if (typeof content === "string") {
|
|
450
450
|
return content;
|
|
451
451
|
}
|
|
@@ -458,12 +458,42 @@ function normalizeAssistantContent(content) {
|
|
|
458
458
|
parts.push(entry);
|
|
459
459
|
continue;
|
|
460
460
|
}
|
|
461
|
-
|
|
461
|
+
if (!isJsonObject(entry)) {
|
|
462
|
+
continue;
|
|
463
|
+
}
|
|
464
|
+
const segmentType = asString(entry.type);
|
|
465
|
+
if (segmentType === "file") {
|
|
466
|
+
const rawValue = asString(entry.value);
|
|
467
|
+
if (!rawValue) {
|
|
468
|
+
continue;
|
|
469
|
+
}
|
|
470
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference(
|
|
471
|
+
rawValue,
|
|
472
|
+
searchRoots
|
|
473
|
+
);
|
|
474
|
+
if (!resolvedPath) {
|
|
475
|
+
const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
|
|
476
|
+
logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
|
|
477
|
+
continue;
|
|
478
|
+
}
|
|
479
|
+
try {
|
|
480
|
+
const fileContent = (await (0, import_promises2.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
481
|
+
parts.push(fileContent);
|
|
482
|
+
if (verbose) {
|
|
483
|
+
console.log(` [Expected Assistant File] Found: ${displayPath}`);
|
|
484
|
+
console.log(` Resolved to: ${resolvedPath}`);
|
|
485
|
+
}
|
|
486
|
+
} catch (error) {
|
|
487
|
+
logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
|
|
488
|
+
}
|
|
489
|
+
continue;
|
|
490
|
+
}
|
|
491
|
+
const textValue = asString(entry.text);
|
|
462
492
|
if (typeof textValue === "string") {
|
|
463
493
|
parts.push(textValue);
|
|
464
494
|
continue;
|
|
465
495
|
}
|
|
466
|
-
const valueValue = asString(entry
|
|
496
|
+
const valueValue = asString(entry.value);
|
|
467
497
|
if (typeof valueValue === "string") {
|
|
468
498
|
parts.push(valueValue);
|
|
469
499
|
continue;
|
|
@@ -968,7 +998,7 @@ var import_promises3 = require("fs/promises");
|
|
|
968
998
|
var import_node_os = require("os");
|
|
969
999
|
var import_node_path3 = __toESM(require("path"), 1);
|
|
970
1000
|
var import_subagent = require("subagent");
|
|
971
|
-
var PROMPT_FILE_PREFIX = "
|
|
1001
|
+
var PROMPT_FILE_PREFIX = "agentv-vscode-";
|
|
972
1002
|
var VSCodeProvider = class {
|
|
973
1003
|
id;
|
|
974
1004
|
kind;
|
|
@@ -1035,7 +1065,7 @@ function buildPromptDocument(request, attachments) {
|
|
|
1035
1065
|
if (instructionFiles.length > 0) {
|
|
1036
1066
|
parts.push(buildMandatoryPrereadBlock(instructionFiles));
|
|
1037
1067
|
}
|
|
1038
|
-
parts.push(`#
|
|
1068
|
+
parts.push(`# AgentV Request`);
|
|
1039
1069
|
if (request.testCaseId) {
|
|
1040
1070
|
parts.push(`- Test Case: ${request.testCaseId}`);
|
|
1041
1071
|
}
|
|
@@ -1177,21 +1207,32 @@ var import_node_fs3 = require("fs");
|
|
|
1177
1207
|
var import_promises4 = require("fs/promises");
|
|
1178
1208
|
var import_node_path4 = __toESM(require("path"), 1);
|
|
1179
1209
|
var import_yaml2 = require("yaml");
|
|
1210
|
+
|
|
1211
|
+
// src/evaluation/providers/types.ts
|
|
1212
|
+
var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
|
|
1213
|
+
|
|
1214
|
+
// src/evaluation/providers/targets-file.ts
|
|
1180
1215
|
function isRecord(value) {
|
|
1181
1216
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
1182
1217
|
}
|
|
1183
|
-
function
|
|
1184
|
-
const
|
|
1185
|
-
if (
|
|
1218
|
+
function checkSchema(parsed, absolutePath) {
|
|
1219
|
+
const schema = parsed.$schema;
|
|
1220
|
+
if (schema === void 0) {
|
|
1221
|
+
throw new Error(
|
|
1222
|
+
`Missing $schema field in targets.yaml at ${absolutePath}.
|
|
1223
|
+
Please add '$schema: ${TARGETS_SCHEMA_V2}' at the top of the file.`
|
|
1224
|
+
);
|
|
1225
|
+
}
|
|
1226
|
+
if (typeof schema !== "string") {
|
|
1186
1227
|
throw new Error(
|
|
1187
|
-
`
|
|
1188
|
-
|
|
1228
|
+
`Invalid $schema field in targets.yaml at ${absolutePath}.
|
|
1229
|
+
Expected a string value '${TARGETS_SCHEMA_V2}'.`
|
|
1189
1230
|
);
|
|
1190
1231
|
}
|
|
1191
|
-
if (
|
|
1232
|
+
if (schema !== TARGETS_SCHEMA_V2) {
|
|
1192
1233
|
throw new Error(
|
|
1193
|
-
`
|
|
1194
|
-
|
|
1234
|
+
`Invalid $schema '${schema}' in targets.yaml at ${absolutePath}.
|
|
1235
|
+
Expected '${TARGETS_SCHEMA_V2}'.`
|
|
1195
1236
|
);
|
|
1196
1237
|
}
|
|
1197
1238
|
}
|
|
@@ -1239,9 +1280,9 @@ async function readTargetDefinitions(filePath) {
|
|
|
1239
1280
|
const raw = await (0, import_promises4.readFile)(absolutePath, "utf8");
|
|
1240
1281
|
const parsed = (0, import_yaml2.parse)(raw);
|
|
1241
1282
|
if (!isRecord(parsed)) {
|
|
1242
|
-
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '
|
|
1283
|
+
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
|
|
1243
1284
|
}
|
|
1244
|
-
|
|
1285
|
+
checkSchema(parsed, absolutePath);
|
|
1245
1286
|
const targets = extractTargetsArray(parsed, absolutePath);
|
|
1246
1287
|
const definitions = targets.map((entry, index) => assertTargetDefinition(entry, index, absolutePath));
|
|
1247
1288
|
return definitions;
|
|
@@ -1802,17 +1843,17 @@ async function runEvaluation(options) {
|
|
|
1802
1843
|
cache,
|
|
1803
1844
|
useCache,
|
|
1804
1845
|
now,
|
|
1805
|
-
|
|
1846
|
+
evalId,
|
|
1806
1847
|
verbose,
|
|
1807
1848
|
onResult,
|
|
1808
1849
|
onProgress
|
|
1809
1850
|
} = options;
|
|
1810
1851
|
const load = loadTestCases;
|
|
1811
1852
|
const testCases = await load(testFilePath, repoRoot, { verbose });
|
|
1812
|
-
const filteredTestCases = filterTestCases(testCases,
|
|
1853
|
+
const filteredTestCases = filterTestCases(testCases, evalId);
|
|
1813
1854
|
if (filteredTestCases.length === 0) {
|
|
1814
|
-
if (
|
|
1815
|
-
throw new Error(`Test case with id '${
|
|
1855
|
+
if (evalId) {
|
|
1856
|
+
throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
|
|
1816
1857
|
}
|
|
1817
1858
|
return [];
|
|
1818
1859
|
}
|
|
@@ -1860,7 +1901,7 @@ async function runEvaluation(options) {
|
|
|
1860
1901
|
for (let i = 0; i < filteredTestCases.length; i++) {
|
|
1861
1902
|
await onProgress({
|
|
1862
1903
|
workerId: i + 1,
|
|
1863
|
-
|
|
1904
|
+
evalId: filteredTestCases[i].id,
|
|
1864
1905
|
status: "pending"
|
|
1865
1906
|
});
|
|
1866
1907
|
}
|
|
@@ -1868,15 +1909,15 @@ async function runEvaluation(options) {
|
|
|
1868
1909
|
const workers = options.maxConcurrency ?? target.workers ?? 1;
|
|
1869
1910
|
const limit = pLimit(workers);
|
|
1870
1911
|
let nextWorkerId = 1;
|
|
1871
|
-
const
|
|
1912
|
+
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
1872
1913
|
const promises = filteredTestCases.map(
|
|
1873
1914
|
(testCase) => limit(async () => {
|
|
1874
1915
|
const workerId = nextWorkerId++;
|
|
1875
|
-
|
|
1916
|
+
workerIdByEvalId.set(testCase.id, workerId);
|
|
1876
1917
|
if (onProgress) {
|
|
1877
1918
|
await onProgress({
|
|
1878
1919
|
workerId,
|
|
1879
|
-
|
|
1920
|
+
evalId: testCase.id,
|
|
1880
1921
|
status: "running",
|
|
1881
1922
|
startedAt: Date.now()
|
|
1882
1923
|
});
|
|
@@ -1899,7 +1940,7 @@ async function runEvaluation(options) {
|
|
|
1899
1940
|
if (onProgress) {
|
|
1900
1941
|
await onProgress({
|
|
1901
1942
|
workerId,
|
|
1902
|
-
|
|
1943
|
+
evalId: testCase.id,
|
|
1903
1944
|
status: "completed",
|
|
1904
1945
|
startedAt: 0,
|
|
1905
1946
|
// Not used for completed status
|
|
@@ -1914,7 +1955,7 @@ async function runEvaluation(options) {
|
|
|
1914
1955
|
if (onProgress) {
|
|
1915
1956
|
await onProgress({
|
|
1916
1957
|
workerId,
|
|
1917
|
-
|
|
1958
|
+
evalId: testCase.id,
|
|
1918
1959
|
status: "failed",
|
|
1919
1960
|
completedAt: Date.now(),
|
|
1920
1961
|
error: error instanceof Error ? error.message : String(error)
|
|
@@ -2036,7 +2077,7 @@ async function runTestCase(options) {
|
|
|
2036
2077
|
guideline_paths: testCase.guideline_paths
|
|
2037
2078
|
};
|
|
2038
2079
|
return {
|
|
2039
|
-
|
|
2080
|
+
eval_id: testCase.id,
|
|
2040
2081
|
conversation_id: testCase.conversation_id,
|
|
2041
2082
|
score: grade.score,
|
|
2042
2083
|
hits: grade.hits,
|
|
@@ -2051,11 +2092,11 @@ async function runTestCase(options) {
|
|
|
2051
2092
|
grader_raw_request: grade.graderRawRequest
|
|
2052
2093
|
};
|
|
2053
2094
|
}
|
|
2054
|
-
function filterTestCases(testCases,
|
|
2055
|
-
if (!
|
|
2095
|
+
function filterTestCases(testCases, evalId) {
|
|
2096
|
+
if (!evalId) {
|
|
2056
2097
|
return testCases;
|
|
2057
2098
|
}
|
|
2058
|
-
return testCases.filter((testCase) => testCase.id ===
|
|
2099
|
+
return testCases.filter((testCase) => testCase.id === evalId);
|
|
2059
2100
|
}
|
|
2060
2101
|
function buildGraderRegistry(overrides, resolveJudgeProvider) {
|
|
2061
2102
|
const heuristic = overrides?.heuristic ?? new HeuristicGrader();
|
|
@@ -2079,7 +2120,7 @@ async function dumpPrompt(directory, testCase, promptInputs) {
|
|
|
2079
2120
|
const filePath = import_node_path5.default.resolve(directory, filename);
|
|
2080
2121
|
await (0, import_promises5.mkdir)(import_node_path5.default.dirname(filePath), { recursive: true });
|
|
2081
2122
|
const payload = {
|
|
2082
|
-
|
|
2123
|
+
eval_id: testCase.id,
|
|
2083
2124
|
request: promptInputs.request,
|
|
2084
2125
|
guidelines: promptInputs.guidelines,
|
|
2085
2126
|
guideline_paths: testCase.guideline_paths
|
|
@@ -2128,7 +2169,7 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
|
|
|
2128
2169
|
error: message
|
|
2129
2170
|
};
|
|
2130
2171
|
return {
|
|
2131
|
-
|
|
2172
|
+
eval_id: testCase.id,
|
|
2132
2173
|
conversation_id: testCase.conversation_id,
|
|
2133
2174
|
score: 0,
|
|
2134
2175
|
hits: [],
|