@agentv/core 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +70 -27
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +8 -3
- package/dist/index.d.ts +8 -3
- package/dist/index.js +69 -27
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -54,6 +54,7 @@ __export(index_exports, {
|
|
|
54
54
|
loadEvalCases: () => loadEvalCases,
|
|
55
55
|
normalizeLineEndings: () => normalizeLineEndings,
|
|
56
56
|
readTargetDefinitions: () => readTargetDefinitions,
|
|
57
|
+
readTestSuiteMetadata: () => readTestSuiteMetadata,
|
|
57
58
|
readTextFile: () => readTextFile,
|
|
58
59
|
resolveAndCreateProvider: () => resolveAndCreateProvider,
|
|
59
60
|
resolveFileReference: () => resolveFileReference,
|
|
@@ -239,6 +240,33 @@ var ANSI_YELLOW = "\x1B[33m";
|
|
|
239
240
|
var ANSI_RESET = "\x1B[0m";
|
|
240
241
|
var SCHEMA_EVAL_V2 = "agentv-eval-v2";
|
|
241
242
|
var SCHEMA_CONFIG_V2 = "agentv-config-v2";
|
|
243
|
+
async function readTestSuiteMetadata(testFilePath) {
|
|
244
|
+
try {
|
|
245
|
+
const absolutePath = import_node_path2.default.resolve(testFilePath);
|
|
246
|
+
const content = await (0, import_promises2.readFile)(absolutePath, "utf8");
|
|
247
|
+
const parsed = (0, import_yaml.parse)(content);
|
|
248
|
+
if (!isJsonObject(parsed)) {
|
|
249
|
+
return {};
|
|
250
|
+
}
|
|
251
|
+
return { target: extractTargetFromSuite(parsed) };
|
|
252
|
+
} catch {
|
|
253
|
+
return {};
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
function extractTargetFromSuite(suite) {
|
|
257
|
+
const execution = suite.execution;
|
|
258
|
+
if (execution && typeof execution === "object" && !Array.isArray(execution)) {
|
|
259
|
+
const executionTarget = execution.target;
|
|
260
|
+
if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
|
|
261
|
+
return executionTarget.trim();
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
const targetValue = suite.target;
|
|
265
|
+
if (typeof targetValue === "string" && targetValue.trim().length > 0) {
|
|
266
|
+
return targetValue.trim();
|
|
267
|
+
}
|
|
268
|
+
return void 0;
|
|
269
|
+
}
|
|
242
270
|
async function loadConfig(evalFilePath, repoRoot) {
|
|
243
271
|
const directories = buildDirectoryChain(evalFilePath, repoRoot);
|
|
244
272
|
for (const directory of directories) {
|
|
@@ -415,6 +443,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
415
443
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
|
|
416
444
|
}
|
|
417
445
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
|
|
446
|
+
const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
447
|
+
const globalTarget = asString(globalExecution?.target) ?? asString(suite.target);
|
|
418
448
|
const results = [];
|
|
419
449
|
for (const rawEvalcase of rawTestcases) {
|
|
420
450
|
if (!isJsonObject(rawEvalcase)) {
|
|
@@ -469,7 +499,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
469
499
|
const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
|
|
470
500
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
471
501
|
const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
|
|
472
|
-
const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
|
|
502
|
+
const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
|
|
473
503
|
const userFilePaths = [];
|
|
474
504
|
for (const segment of inputSegments) {
|
|
475
505
|
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
@@ -555,14 +585,13 @@ function formatSegment(segment) {
|
|
|
555
585
|
const text = asString(segment.text);
|
|
556
586
|
const filePath = asString(segment.path);
|
|
557
587
|
if (text && filePath) {
|
|
558
|
-
return
|
|
559
|
-
${text}`;
|
|
588
|
+
return formatFileContents([{ content: text.trim(), isFile: true, displayPath: filePath }]);
|
|
560
589
|
}
|
|
561
590
|
}
|
|
562
591
|
return void 0;
|
|
563
592
|
}
|
|
564
593
|
async function buildPromptInputs(testCase) {
|
|
565
|
-
const
|
|
594
|
+
const guidelineParts = [];
|
|
566
595
|
for (const rawPath of testCase.guideline_paths) {
|
|
567
596
|
const absolutePath = import_node_path2.default.resolve(rawPath);
|
|
568
597
|
if (!await fileExists2(absolutePath)) {
|
|
@@ -570,14 +599,17 @@ async function buildPromptInputs(testCase) {
|
|
|
570
599
|
continue;
|
|
571
600
|
}
|
|
572
601
|
try {
|
|
573
|
-
const content = (await (0, import_promises2.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n");
|
|
574
|
-
|
|
575
|
-
|
|
602
|
+
const content = (await (0, import_promises2.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
603
|
+
guidelineParts.push({
|
|
604
|
+
content,
|
|
605
|
+
isFile: true,
|
|
606
|
+
displayPath: import_node_path2.default.basename(absolutePath)
|
|
607
|
+
});
|
|
576
608
|
} catch (error) {
|
|
577
609
|
logWarning(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
578
610
|
}
|
|
579
611
|
}
|
|
580
|
-
const guidelines =
|
|
612
|
+
const guidelines = formatFileContents(guidelineParts);
|
|
581
613
|
const segmentsByMessage = [];
|
|
582
614
|
const fileContentsByPath = /* @__PURE__ */ new Map();
|
|
583
615
|
for (const segment of testCase.input_segments) {
|
|
@@ -779,6 +811,20 @@ function cloneJsonValue(value) {
|
|
|
779
811
|
}
|
|
780
812
|
return cloneJsonObject(value);
|
|
781
813
|
}
|
|
814
|
+
function formatFileContents(parts) {
|
|
815
|
+
const fileCount = parts.filter((p) => p.isFile).length;
|
|
816
|
+
if (fileCount > 0) {
|
|
817
|
+
return parts.map((part) => {
|
|
818
|
+
if (part.isFile && part.displayPath) {
|
|
819
|
+
return `<file path="${part.displayPath}">
|
|
820
|
+
${part.content}
|
|
821
|
+
</file>`;
|
|
822
|
+
}
|
|
823
|
+
return part.content;
|
|
824
|
+
}).join("\n\n");
|
|
825
|
+
}
|
|
826
|
+
return parts.map((p) => p.content).join(" ");
|
|
827
|
+
}
|
|
782
828
|
async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
783
829
|
if (typeof content === "string") {
|
|
784
830
|
return content;
|
|
@@ -789,7 +835,7 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
|
789
835
|
const parts = [];
|
|
790
836
|
for (const entry of content) {
|
|
791
837
|
if (typeof entry === "string") {
|
|
792
|
-
parts.push(entry);
|
|
838
|
+
parts.push({ content: entry, isFile: false });
|
|
793
839
|
continue;
|
|
794
840
|
}
|
|
795
841
|
if (!isJsonObject(entry)) {
|
|
@@ -811,8 +857,8 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
|
811
857
|
continue;
|
|
812
858
|
}
|
|
813
859
|
try {
|
|
814
|
-
const fileContent = (await (0, import_promises2.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
815
|
-
parts.push(fileContent);
|
|
860
|
+
const fileContent = (await (0, import_promises2.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
861
|
+
parts.push({ content: fileContent, isFile: true, displayPath });
|
|
816
862
|
if (verbose) {
|
|
817
863
|
console.log(` [Expected Assistant File] Found: ${displayPath}`);
|
|
818
864
|
console.log(` Resolved to: ${resolvedPath}`);
|
|
@@ -824,21 +870,21 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
|
824
870
|
}
|
|
825
871
|
const textValue = asString(entry.text);
|
|
826
872
|
if (typeof textValue === "string") {
|
|
827
|
-
parts.push(textValue);
|
|
873
|
+
parts.push({ content: textValue, isFile: false });
|
|
828
874
|
continue;
|
|
829
875
|
}
|
|
830
876
|
const valueValue = asString(entry.value);
|
|
831
877
|
if (typeof valueValue === "string") {
|
|
832
|
-
parts.push(valueValue);
|
|
878
|
+
parts.push({ content: valueValue, isFile: false });
|
|
833
879
|
continue;
|
|
834
880
|
}
|
|
835
|
-
parts.push(JSON.stringify(entry));
|
|
881
|
+
parts.push({ content: JSON.stringify(entry), isFile: false });
|
|
836
882
|
}
|
|
837
|
-
return parts
|
|
883
|
+
return formatFileContents(parts);
|
|
838
884
|
}
|
|
839
|
-
async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
|
|
885
|
+
async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
|
|
840
886
|
const execution = rawEvalCase.execution;
|
|
841
|
-
const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators;
|
|
887
|
+
const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
|
|
842
888
|
if (candidateEvaluators === void 0) {
|
|
843
889
|
return void 0;
|
|
844
890
|
}
|
|
@@ -876,6 +922,8 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
|
|
|
876
922
|
resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
|
|
877
923
|
);
|
|
878
924
|
}
|
|
925
|
+
} else {
|
|
926
|
+
resolvedCwd = searchRoots[0];
|
|
879
927
|
}
|
|
880
928
|
evaluators.push({
|
|
881
929
|
name,
|
|
@@ -904,8 +952,7 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
|
|
|
904
952
|
name,
|
|
905
953
|
type: "llm_judge",
|
|
906
954
|
prompt,
|
|
907
|
-
promptPath
|
|
908
|
-
model
|
|
955
|
+
promptPath
|
|
909
956
|
});
|
|
910
957
|
}
|
|
911
958
|
return evaluators.length > 0 ? evaluators : void 0;
|
|
@@ -3222,10 +3269,7 @@ var LlmJudgeEvaluator = class {
|
|
|
3222
3269
|
prompt = substituteVariables(systemPrompt, variables);
|
|
3223
3270
|
systemPrompt = buildSystemPrompt(hasReferenceAnswer);
|
|
3224
3271
|
}
|
|
3225
|
-
const metadata = {
|
|
3226
|
-
...systemPrompt !== void 0 ? { systemPrompt } : {},
|
|
3227
|
-
...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
|
|
3228
|
-
};
|
|
3272
|
+
const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
|
|
3229
3273
|
const response = await judgeProvider.invoke({
|
|
3230
3274
|
question: prompt,
|
|
3231
3275
|
metadata,
|
|
@@ -3245,8 +3289,7 @@ var LlmJudgeEvaluator = class {
|
|
|
3245
3289
|
provider: judgeProvider.id,
|
|
3246
3290
|
prompt,
|
|
3247
3291
|
target: context.target.name,
|
|
3248
|
-
...systemPrompt !== void 0
|
|
3249
|
-
...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
|
|
3292
|
+
...systemPrompt !== void 0 && { systemPrompt }
|
|
3250
3293
|
};
|
|
3251
3294
|
return {
|
|
3252
3295
|
score,
|
|
@@ -4240,8 +4283,7 @@ async function runLlmJudgeEvaluator(options) {
|
|
|
4240
4283
|
now,
|
|
4241
4284
|
judgeProvider,
|
|
4242
4285
|
systemPrompt: customPrompt,
|
|
4243
|
-
evaluator: config
|
|
4244
|
-
judgeModel: config.model
|
|
4286
|
+
evaluator: config
|
|
4245
4287
|
});
|
|
4246
4288
|
}
|
|
4247
4289
|
async function resolveCustomPrompt(config) {
|
|
@@ -4427,6 +4469,7 @@ function createAgentKernel() {
|
|
|
4427
4469
|
loadEvalCases,
|
|
4428
4470
|
normalizeLineEndings,
|
|
4429
4471
|
readTargetDefinitions,
|
|
4472
|
+
readTestSuiteMetadata,
|
|
4430
4473
|
readTextFile,
|
|
4431
4474
|
resolveAndCreateProvider,
|
|
4432
4475
|
resolveFileReference,
|