agentv 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -11752,6 +11752,33 @@ var ANSI_YELLOW = "\x1B[33m";
|
|
|
11752
11752
|
var ANSI_RESET = "\x1B[0m";
|
|
11753
11753
|
var SCHEMA_EVAL_V2 = "agentv-eval-v2";
|
|
11754
11754
|
var SCHEMA_CONFIG_V2 = "agentv-config-v2";
|
|
11755
|
+
async function readTestSuiteMetadata(testFilePath) {
|
|
11756
|
+
try {
|
|
11757
|
+
const absolutePath = path8.resolve(testFilePath);
|
|
11758
|
+
const content = await readFile3(absolutePath, "utf8");
|
|
11759
|
+
const parsed = parse3(content);
|
|
11760
|
+
if (!isJsonObject(parsed)) {
|
|
11761
|
+
return {};
|
|
11762
|
+
}
|
|
11763
|
+
return { target: extractTargetFromSuite(parsed) };
|
|
11764
|
+
} catch {
|
|
11765
|
+
return {};
|
|
11766
|
+
}
|
|
11767
|
+
}
|
|
11768
|
+
function extractTargetFromSuite(suite) {
|
|
11769
|
+
const execution = suite.execution;
|
|
11770
|
+
if (execution && typeof execution === "object" && !Array.isArray(execution)) {
|
|
11771
|
+
const executionTarget = execution.target;
|
|
11772
|
+
if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
|
|
11773
|
+
return executionTarget.trim();
|
|
11774
|
+
}
|
|
11775
|
+
}
|
|
11776
|
+
const targetValue = suite.target;
|
|
11777
|
+
if (typeof targetValue === "string" && targetValue.trim().length > 0) {
|
|
11778
|
+
return targetValue.trim();
|
|
11779
|
+
}
|
|
11780
|
+
return void 0;
|
|
11781
|
+
}
|
|
11755
11782
|
async function loadConfig(evalFilePath, repoRoot) {
|
|
11756
11783
|
const directories = buildDirectoryChain(evalFilePath, repoRoot);
|
|
11757
11784
|
for (const directory of directories) {
|
|
@@ -11928,6 +11955,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
11928
11955
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
|
|
11929
11956
|
}
|
|
11930
11957
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
|
|
11958
|
+
const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
11959
|
+
const globalTarget = asString(globalExecution?.target) ?? asString(suite.target);
|
|
11931
11960
|
const results = [];
|
|
11932
11961
|
for (const rawEvalcase of rawTestcases) {
|
|
11933
11962
|
if (!isJsonObject(rawEvalcase)) {
|
|
@@ -11982,7 +12011,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
11982
12011
|
const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
|
|
11983
12012
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
11984
12013
|
const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
|
|
11985
|
-
const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
|
|
12014
|
+
const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
|
|
11986
12015
|
const userFilePaths = [];
|
|
11987
12016
|
for (const segment of inputSegments) {
|
|
11988
12017
|
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
@@ -12068,14 +12097,13 @@ function formatSegment(segment) {
|
|
|
12068
12097
|
const text = asString(segment.text);
|
|
12069
12098
|
const filePath = asString(segment.path);
|
|
12070
12099
|
if (text && filePath) {
|
|
12071
|
-
return
|
|
12072
|
-
${text}`;
|
|
12100
|
+
return formatFileContents([{ content: text.trim(), isFile: true, displayPath: filePath }]);
|
|
12073
12101
|
}
|
|
12074
12102
|
}
|
|
12075
12103
|
return void 0;
|
|
12076
12104
|
}
|
|
12077
12105
|
async function buildPromptInputs(testCase) {
|
|
12078
|
-
const
|
|
12106
|
+
const guidelineParts = [];
|
|
12079
12107
|
for (const rawPath of testCase.guideline_paths) {
|
|
12080
12108
|
const absolutePath = path8.resolve(rawPath);
|
|
12081
12109
|
if (!await fileExists2(absolutePath)) {
|
|
@@ -12083,14 +12111,17 @@ async function buildPromptInputs(testCase) {
|
|
|
12083
12111
|
continue;
|
|
12084
12112
|
}
|
|
12085
12113
|
try {
|
|
12086
|
-
const content = (await readFile3(absolutePath, "utf8")).replace(/\r\n/g, "\n");
|
|
12087
|
-
|
|
12088
|
-
|
|
12114
|
+
const content = (await readFile3(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
12115
|
+
guidelineParts.push({
|
|
12116
|
+
content,
|
|
12117
|
+
isFile: true,
|
|
12118
|
+
displayPath: path8.basename(absolutePath)
|
|
12119
|
+
});
|
|
12089
12120
|
} catch (error) {
|
|
12090
12121
|
logWarning(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
12091
12122
|
}
|
|
12092
12123
|
}
|
|
12093
|
-
const guidelines =
|
|
12124
|
+
const guidelines = formatFileContents(guidelineParts);
|
|
12094
12125
|
const segmentsByMessage = [];
|
|
12095
12126
|
const fileContentsByPath = /* @__PURE__ */ new Map();
|
|
12096
12127
|
for (const segment of testCase.input_segments) {
|
|
@@ -12292,6 +12323,20 @@ function cloneJsonValue(value) {
|
|
|
12292
12323
|
}
|
|
12293
12324
|
return cloneJsonObject(value);
|
|
12294
12325
|
}
|
|
12326
|
+
function formatFileContents(parts) {
|
|
12327
|
+
const fileCount = parts.filter((p) => p.isFile).length;
|
|
12328
|
+
if (fileCount > 0) {
|
|
12329
|
+
return parts.map((part) => {
|
|
12330
|
+
if (part.isFile && part.displayPath) {
|
|
12331
|
+
return `<file path="${part.displayPath}">
|
|
12332
|
+
${part.content}
|
|
12333
|
+
</file>`;
|
|
12334
|
+
}
|
|
12335
|
+
return part.content;
|
|
12336
|
+
}).join("\n\n");
|
|
12337
|
+
}
|
|
12338
|
+
return parts.map((p) => p.content).join(" ");
|
|
12339
|
+
}
|
|
12295
12340
|
async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
12296
12341
|
if (typeof content === "string") {
|
|
12297
12342
|
return content;
|
|
@@ -12302,7 +12347,7 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
|
12302
12347
|
const parts = [];
|
|
12303
12348
|
for (const entry of content) {
|
|
12304
12349
|
if (typeof entry === "string") {
|
|
12305
|
-
parts.push(entry);
|
|
12350
|
+
parts.push({ content: entry, isFile: false });
|
|
12306
12351
|
continue;
|
|
12307
12352
|
}
|
|
12308
12353
|
if (!isJsonObject(entry)) {
|
|
@@ -12324,8 +12369,8 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
|
12324
12369
|
continue;
|
|
12325
12370
|
}
|
|
12326
12371
|
try {
|
|
12327
|
-
const fileContent = (await readFile3(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
12328
|
-
parts.push(fileContent);
|
|
12372
|
+
const fileContent = (await readFile3(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
12373
|
+
parts.push({ content: fileContent, isFile: true, displayPath });
|
|
12329
12374
|
if (verbose) {
|
|
12330
12375
|
console.log(` [Expected Assistant File] Found: ${displayPath}`);
|
|
12331
12376
|
console.log(` Resolved to: ${resolvedPath}`);
|
|
@@ -12337,21 +12382,21 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
|
12337
12382
|
}
|
|
12338
12383
|
const textValue = asString(entry.text);
|
|
12339
12384
|
if (typeof textValue === "string") {
|
|
12340
|
-
parts.push(textValue);
|
|
12385
|
+
parts.push({ content: textValue, isFile: false });
|
|
12341
12386
|
continue;
|
|
12342
12387
|
}
|
|
12343
12388
|
const valueValue = asString(entry.value);
|
|
12344
12389
|
if (typeof valueValue === "string") {
|
|
12345
|
-
parts.push(valueValue);
|
|
12390
|
+
parts.push({ content: valueValue, isFile: false });
|
|
12346
12391
|
continue;
|
|
12347
12392
|
}
|
|
12348
|
-
parts.push(JSON.stringify(entry));
|
|
12393
|
+
parts.push({ content: JSON.stringify(entry), isFile: false });
|
|
12349
12394
|
}
|
|
12350
|
-
return parts
|
|
12395
|
+
return formatFileContents(parts);
|
|
12351
12396
|
}
|
|
12352
|
-
async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
|
|
12397
|
+
async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
|
|
12353
12398
|
const execution = rawEvalCase.execution;
|
|
12354
|
-
const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators;
|
|
12399
|
+
const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
|
|
12355
12400
|
if (candidateEvaluators === void 0) {
|
|
12356
12401
|
return void 0;
|
|
12357
12402
|
}
|
|
@@ -12389,6 +12434,8 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
|
|
|
12389
12434
|
resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
|
|
12390
12435
|
);
|
|
12391
12436
|
}
|
|
12437
|
+
} else {
|
|
12438
|
+
resolvedCwd = searchRoots[0];
|
|
12392
12439
|
}
|
|
12393
12440
|
evaluators.push({
|
|
12394
12441
|
name,
|
|
@@ -12417,8 +12464,7 @@ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
|
|
|
12417
12464
|
name,
|
|
12418
12465
|
type: "llm_judge",
|
|
12419
12466
|
prompt,
|
|
12420
|
-
promptPath
|
|
12421
|
-
model
|
|
12467
|
+
promptPath
|
|
12422
12468
|
});
|
|
12423
12469
|
}
|
|
12424
12470
|
return evaluators.length > 0 ? evaluators : void 0;
|
|
@@ -14157,10 +14203,7 @@ var LlmJudgeEvaluator = class {
|
|
|
14157
14203
|
prompt = substituteVariables(systemPrompt, variables);
|
|
14158
14204
|
systemPrompt = buildSystemPrompt(hasReferenceAnswer);
|
|
14159
14205
|
}
|
|
14160
|
-
const metadata = {
|
|
14161
|
-
...systemPrompt !== void 0 ? { systemPrompt } : {},
|
|
14162
|
-
...context2.judgeModel !== void 0 ? { model: context2.judgeModel } : {}
|
|
14163
|
-
};
|
|
14206
|
+
const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
|
|
14164
14207
|
const response = await judgeProvider.invoke({
|
|
14165
14208
|
question: prompt,
|
|
14166
14209
|
metadata,
|
|
@@ -14180,8 +14223,7 @@ var LlmJudgeEvaluator = class {
|
|
|
14180
14223
|
provider: judgeProvider.id,
|
|
14181
14224
|
prompt,
|
|
14182
14225
|
target: context2.target.name,
|
|
14183
|
-
...systemPrompt !== void 0
|
|
14184
|
-
...context2.judgeModel !== void 0 ? { model: context2.judgeModel } : {}
|
|
14226
|
+
...systemPrompt !== void 0 && { systemPrompt }
|
|
14185
14227
|
};
|
|
14186
14228
|
return {
|
|
14187
14229
|
score,
|
|
@@ -15164,8 +15206,7 @@ async function runLlmJudgeEvaluator(options) {
|
|
|
15164
15206
|
now,
|
|
15165
15207
|
judgeProvider,
|
|
15166
15208
|
systemPrompt: customPrompt,
|
|
15167
|
-
evaluator: config
|
|
15168
|
-
judgeModel: config.model
|
|
15209
|
+
evaluator: config
|
|
15169
15210
|
});
|
|
15170
15211
|
}
|
|
15171
15212
|
async function resolveCustomPrompt(config) {
|
|
@@ -16918,9 +16959,8 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
16918
16959
|
|
|
16919
16960
|
// src/commands/eval/targets.ts
|
|
16920
16961
|
import { constants as constants5 } from "node:fs";
|
|
16921
|
-
import { access as access5
|
|
16962
|
+
import { access as access5 } from "node:fs/promises";
|
|
16922
16963
|
import path13 from "node:path";
|
|
16923
|
-
import { parse as parse6 } from "yaml";
|
|
16924
16964
|
var TARGET_FILE_CANDIDATES = [
|
|
16925
16965
|
"targets.yaml",
|
|
16926
16966
|
"targets.yml",
|
|
@@ -16942,18 +16982,8 @@ async function fileExists5(filePath) {
|
|
|
16942
16982
|
}
|
|
16943
16983
|
}
|
|
16944
16984
|
async function readTestSuiteTarget(testFilePath) {
|
|
16945
|
-
|
|
16946
|
-
|
|
16947
|
-
const parsed = parse6(raw);
|
|
16948
|
-
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
16949
|
-
const targetValue = parsed.target;
|
|
16950
|
-
if (typeof targetValue === "string" && targetValue.trim().length > 0) {
|
|
16951
|
-
return targetValue.trim();
|
|
16952
|
-
}
|
|
16953
|
-
}
|
|
16954
|
-
} catch {
|
|
16955
|
-
}
|
|
16956
|
-
return void 0;
|
|
16985
|
+
const metadata = await readTestSuiteMetadata(testFilePath);
|
|
16986
|
+
return metadata.target;
|
|
16957
16987
|
}
|
|
16958
16988
|
async function discoverTargetsFile(options) {
|
|
16959
16989
|
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
@@ -17924,4 +17954,4 @@ export {
|
|
|
17924
17954
|
createProgram,
|
|
17925
17955
|
runCli
|
|
17926
17956
|
};
|
|
17927
|
-
//# sourceMappingURL=chunk-
|
|
17957
|
+
//# sourceMappingURL=chunk-7CJK3EYC.js.map
|