agentv 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -4847,7 +4847,7 @@ function isAgentProvider(provider) {
|
|
|
4847
4847
|
}
|
|
4848
4848
|
|
|
4849
4849
|
// ../../packages/core/dist/index.js
|
|
4850
|
-
import { readFile as
|
|
4850
|
+
import { readFile as readFile5 } from "node:fs/promises";
|
|
4851
4851
|
import path62 from "node:path";
|
|
4852
4852
|
import { parse as parse22 } from "yaml";
|
|
4853
4853
|
import micromatch from "micromatch";
|
|
@@ -4859,8 +4859,9 @@ import { access as access3 } from "node:fs/promises";
|
|
|
4859
4859
|
import path8 from "node:path";
|
|
4860
4860
|
import path32 from "node:path";
|
|
4861
4861
|
import { readFile as readFile22 } from "node:fs/promises";
|
|
4862
|
-
import path42 from "node:path";
|
|
4863
4862
|
import { readFile as readFile32 } from "node:fs/promises";
|
|
4863
|
+
import path42 from "node:path";
|
|
4864
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
4864
4865
|
import path52 from "node:path";
|
|
4865
4866
|
|
|
4866
4867
|
// ../../node_modules/.pnpm/@ai-sdk+provider@2.0.0/node_modules/@ai-sdk/provider/dist/index.mjs
|
|
@@ -34384,7 +34385,7 @@ async function provisionSubagents(options) {
|
|
|
34384
34385
|
|
|
34385
34386
|
// ../../packages/core/dist/index.js
|
|
34386
34387
|
import { constants as constants32 } from "node:fs";
|
|
34387
|
-
import { access as access32, readFile as
|
|
34388
|
+
import { access as access32, readFile as readFile6 } from "node:fs/promises";
|
|
34388
34389
|
import path11 from "node:path";
|
|
34389
34390
|
import { parse as parse32 } from "yaml";
|
|
34390
34391
|
import { createHash, randomUUID as randomUUID2 } from "node:crypto";
|
|
@@ -34467,7 +34468,7 @@ ${part.content}
|
|
|
34467
34468
|
}
|
|
34468
34469
|
return parts.map((p) => p.content).join(" ");
|
|
34469
34470
|
}
|
|
34470
|
-
function formatSegment(segment) {
|
|
34471
|
+
function formatSegment(segment, mode = "lm") {
|
|
34471
34472
|
const type = asString(segment.type);
|
|
34472
34473
|
if (type === "text") {
|
|
34473
34474
|
return asString(segment.value);
|
|
@@ -34477,8 +34478,14 @@ function formatSegment(segment) {
|
|
|
34477
34478
|
return refPath ? `<Attached: ${refPath}>` : void 0;
|
|
34478
34479
|
}
|
|
34479
34480
|
if (type === "file") {
|
|
34480
|
-
const text2 = asString(segment.text);
|
|
34481
34481
|
const filePath = asString(segment.path);
|
|
34482
|
+
if (!filePath) {
|
|
34483
|
+
return void 0;
|
|
34484
|
+
}
|
|
34485
|
+
if (mode === "agent") {
|
|
34486
|
+
return `<file: path="${filePath}">`;
|
|
34487
|
+
}
|
|
34488
|
+
const text2 = asString(segment.text);
|
|
34482
34489
|
if (text2 && filePath) {
|
|
34483
34490
|
return formatFileContents([{ content: text2.trim(), isFile: true, displayPath: filePath }]);
|
|
34484
34491
|
}
|
|
@@ -34666,8 +34673,58 @@ function extractTargetFromSuite(suite) {
|
|
|
34666
34673
|
function logWarning(message) {
|
|
34667
34674
|
console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET}`);
|
|
34668
34675
|
}
|
|
34676
|
+
var TEMPLATE_VARIABLES = {
|
|
34677
|
+
CANDIDATE_ANSWER: "candidate_answer",
|
|
34678
|
+
EXPECTED_MESSAGES: "expected_messages",
|
|
34679
|
+
QUESTION: "question",
|
|
34680
|
+
EXPECTED_OUTCOME: "expected_outcome",
|
|
34681
|
+
REFERENCE_ANSWER: "reference_answer",
|
|
34682
|
+
INPUT_MESSAGES: "input_messages"
|
|
34683
|
+
};
|
|
34684
|
+
var VALID_TEMPLATE_VARIABLES = new Set(
|
|
34685
|
+
Object.values(TEMPLATE_VARIABLES)
|
|
34686
|
+
);
|
|
34687
|
+
var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
34688
|
+
TEMPLATE_VARIABLES.CANDIDATE_ANSWER,
|
|
34689
|
+
TEMPLATE_VARIABLES.EXPECTED_MESSAGES
|
|
34690
|
+
]);
|
|
34669
34691
|
var ANSI_YELLOW2 = "\x1B[33m";
|
|
34670
34692
|
var ANSI_RESET2 = "\x1B[0m";
|
|
34693
|
+
async function validateCustomPromptContent(promptPath) {
|
|
34694
|
+
const content = await readFile22(promptPath, "utf8");
|
|
34695
|
+
validateTemplateVariables(content, promptPath);
|
|
34696
|
+
}
|
|
34697
|
+
function validateTemplateVariables(content, source2) {
|
|
34698
|
+
const variablePattern = /\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g;
|
|
34699
|
+
const foundVariables = /* @__PURE__ */ new Set();
|
|
34700
|
+
const invalidVariables = [];
|
|
34701
|
+
let match;
|
|
34702
|
+
while ((match = variablePattern.exec(content)) !== null) {
|
|
34703
|
+
const varName = match[1];
|
|
34704
|
+
foundVariables.add(varName);
|
|
34705
|
+
if (!VALID_TEMPLATE_VARIABLES.has(varName)) {
|
|
34706
|
+
invalidVariables.push(varName);
|
|
34707
|
+
}
|
|
34708
|
+
}
|
|
34709
|
+
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.CANDIDATE_ANSWER);
|
|
34710
|
+
const hasExpectedMessages = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_MESSAGES);
|
|
34711
|
+
const hasRequiredFields = hasCandidateAnswer || hasExpectedMessages;
|
|
34712
|
+
if (!hasRequiredFields) {
|
|
34713
|
+
throw new Error(
|
|
34714
|
+
`Missing required fields. Must include at least one of:
|
|
34715
|
+
- {{ ${TEMPLATE_VARIABLES.CANDIDATE_ANSWER} }}
|
|
34716
|
+
- {{ ${TEMPLATE_VARIABLES.EXPECTED_MESSAGES} }}`
|
|
34717
|
+
);
|
|
34718
|
+
}
|
|
34719
|
+
if (invalidVariables.length > 0) {
|
|
34720
|
+
const warningMessage = `${ANSI_YELLOW2}Warning: Custom evaluator template at ${source2}
|
|
34721
|
+
Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
|
|
34722
|
+
Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${ANSI_RESET2}`;
|
|
34723
|
+
console.warn(warningMessage);
|
|
34724
|
+
}
|
|
34725
|
+
}
|
|
34726
|
+
var ANSI_YELLOW3 = "\x1B[33m";
|
|
34727
|
+
var ANSI_RESET3 = "\x1B[0m";
|
|
34671
34728
|
async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
|
|
34672
34729
|
const execution = rawEvalCase.execution;
|
|
34673
34730
|
const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
|
|
@@ -34726,6 +34783,12 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
|
|
|
34726
34783
|
const resolved = await resolveFileReference2(prompt, searchRoots);
|
|
34727
34784
|
if (resolved.resolvedPath) {
|
|
34728
34785
|
promptPath = path32.resolve(resolved.resolvedPath);
|
|
34786
|
+
try {
|
|
34787
|
+
await validateCustomPromptContent(promptPath);
|
|
34788
|
+
} catch (error40) {
|
|
34789
|
+
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
34790
|
+
throw new Error(`Evaluator '${name16}' template (${promptPath}): ${message}`);
|
|
34791
|
+
}
|
|
34729
34792
|
} else {
|
|
34730
34793
|
logWarning2(
|
|
34731
34794
|
`Inline prompt used for evaluator '${name16}' in '${evalId}' (file not found: ${resolved.displayPath})`,
|
|
@@ -34762,14 +34825,14 @@ function isJsonObject2(value) {
|
|
|
34762
34825
|
function logWarning2(message, details) {
|
|
34763
34826
|
if (details && details.length > 0) {
|
|
34764
34827
|
const detailBlock = details.join("\n");
|
|
34765
|
-
console.warn(`${
|
|
34766
|
-
${detailBlock}${
|
|
34828
|
+
console.warn(`${ANSI_YELLOW3}Warning: ${message}
|
|
34829
|
+
${detailBlock}${ANSI_RESET3}`);
|
|
34767
34830
|
} else {
|
|
34768
|
-
console.warn(`${
|
|
34831
|
+
console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
|
|
34769
34832
|
}
|
|
34770
34833
|
}
|
|
34771
|
-
var
|
|
34772
|
-
var
|
|
34834
|
+
var ANSI_YELLOW4 = "\x1B[33m";
|
|
34835
|
+
var ANSI_RESET4 = "\x1B[0m";
|
|
34773
34836
|
async function processMessages(options) {
|
|
34774
34837
|
const {
|
|
34775
34838
|
messages,
|
|
@@ -34812,7 +34875,7 @@ async function processMessages(options) {
|
|
|
34812
34875
|
continue;
|
|
34813
34876
|
}
|
|
34814
34877
|
try {
|
|
34815
|
-
const fileContent = (await
|
|
34878
|
+
const fileContent = (await readFile32(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
34816
34879
|
if (messageType === "input" && guidelinePatterns && guidelinePaths) {
|
|
34817
34880
|
const relativeToRepo = path42.relative(repoRootPath, resolvedPath);
|
|
34818
34881
|
if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
|
|
@@ -34883,7 +34946,7 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
|
34883
34946
|
continue;
|
|
34884
34947
|
}
|
|
34885
34948
|
try {
|
|
34886
|
-
const fileContent = (await
|
|
34949
|
+
const fileContent = (await readFile32(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
34887
34950
|
parts.push({ content: fileContent, isFile: true, displayPath });
|
|
34888
34951
|
if (verbose) {
|
|
34889
34952
|
console.log(` [Expected Assistant File] Found: ${displayPath}`);
|
|
@@ -34933,15 +34996,15 @@ function cloneJsonValue(value) {
|
|
|
34933
34996
|
function logWarning3(message, details) {
|
|
34934
34997
|
if (details && details.length > 0) {
|
|
34935
34998
|
const detailBlock = details.join("\n");
|
|
34936
|
-
console.warn(`${
|
|
34937
|
-
${detailBlock}${
|
|
34999
|
+
console.warn(`${ANSI_YELLOW4}Warning: ${message}
|
|
35000
|
+
${detailBlock}${ANSI_RESET4}`);
|
|
34938
35001
|
} else {
|
|
34939
|
-
console.warn(`${
|
|
35002
|
+
console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
|
|
34940
35003
|
}
|
|
34941
35004
|
}
|
|
34942
|
-
var
|
|
34943
|
-
var
|
|
34944
|
-
async function buildPromptInputs(testCase) {
|
|
35005
|
+
var ANSI_YELLOW5 = "\x1B[33m";
|
|
35006
|
+
var ANSI_RESET5 = "\x1B[0m";
|
|
35007
|
+
async function buildPromptInputs(testCase, mode = "lm") {
|
|
34945
35008
|
const guidelineParts = [];
|
|
34946
35009
|
for (const rawPath of testCase.guideline_paths) {
|
|
34947
35010
|
const absolutePath = path52.resolve(rawPath);
|
|
@@ -34950,7 +35013,7 @@ async function buildPromptInputs(testCase) {
|
|
|
34950
35013
|
continue;
|
|
34951
35014
|
}
|
|
34952
35015
|
try {
|
|
34953
|
-
const content = (await
|
|
35016
|
+
const content = (await readFile4(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
34954
35017
|
guidelineParts.push({
|
|
34955
35018
|
content,
|
|
34956
35019
|
isFile: true,
|
|
@@ -35017,7 +35080,7 @@ async function buildPromptInputs(testCase) {
|
|
|
35017
35080
|
const roleLabel = message.role.charAt(0).toUpperCase() + message.role.slice(1);
|
|
35018
35081
|
const contentParts = [];
|
|
35019
35082
|
for (const segment of segments) {
|
|
35020
|
-
const formattedContent = formatSegment(segment);
|
|
35083
|
+
const formattedContent = formatSegment(segment, mode);
|
|
35021
35084
|
if (formattedContent) {
|
|
35022
35085
|
contentParts.push(formattedContent);
|
|
35023
35086
|
}
|
|
@@ -35032,7 +35095,11 @@ ${messageContent}`);
|
|
|
35032
35095
|
} else {
|
|
35033
35096
|
const questionParts = [];
|
|
35034
35097
|
for (const segment of testCase.input_segments) {
|
|
35035
|
-
|
|
35098
|
+
if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
|
|
35099
|
+
questionParts.push(`<Attached: ${segment.path}>`);
|
|
35100
|
+
continue;
|
|
35101
|
+
}
|
|
35102
|
+
const formattedContent = formatSegment(segment, mode);
|
|
35036
35103
|
if (formattedContent) {
|
|
35037
35104
|
questionParts.push(formattedContent);
|
|
35038
35105
|
}
|
|
@@ -35046,7 +35113,8 @@ ${messageContent}`);
|
|
|
35046
35113
|
messages: testCase.input_messages,
|
|
35047
35114
|
segmentsByMessage,
|
|
35048
35115
|
guidelinePatterns: testCase.guideline_patterns,
|
|
35049
|
-
guidelineContent: guidelines
|
|
35116
|
+
guidelineContent: guidelines,
|
|
35117
|
+
mode
|
|
35050
35118
|
}) : void 0;
|
|
35051
35119
|
return { question, guidelines, chatPrompt };
|
|
35052
35120
|
}
|
|
@@ -35063,7 +35131,7 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
|
35063
35131
|
return messagesWithContent > 1;
|
|
35064
35132
|
}
|
|
35065
35133
|
function buildChatPromptFromSegments(options) {
|
|
35066
|
-
const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
|
|
35134
|
+
const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt, mode = "lm" } = options;
|
|
35067
35135
|
if (messages.length === 0) {
|
|
35068
35136
|
return void 0;
|
|
35069
35137
|
}
|
|
@@ -35081,7 +35149,7 @@ ${guidelineContent.trim()}`);
|
|
|
35081
35149
|
const segments = segmentsByMessage[startIndex];
|
|
35082
35150
|
const contentParts = [];
|
|
35083
35151
|
for (const segment of segments) {
|
|
35084
|
-
const formatted = formatSegment(segment);
|
|
35152
|
+
const formatted = formatSegment(segment, mode);
|
|
35085
35153
|
if (formatted) {
|
|
35086
35154
|
contentParts.push(formatted);
|
|
35087
35155
|
}
|
|
@@ -35114,7 +35182,7 @@ ${guidelineContent.trim()}`);
|
|
|
35114
35182
|
if (segment.type === "guideline_ref") {
|
|
35115
35183
|
continue;
|
|
35116
35184
|
}
|
|
35117
|
-
const formatted = formatSegment(segment);
|
|
35185
|
+
const formatted = formatSegment(segment, mode);
|
|
35118
35186
|
if (formatted) {
|
|
35119
35187
|
const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
|
|
35120
35188
|
if (isGuidelineRef) {
|
|
@@ -35138,15 +35206,16 @@ function asString4(value) {
|
|
|
35138
35206
|
return typeof value === "string" ? value : void 0;
|
|
35139
35207
|
}
|
|
35140
35208
|
function logWarning4(message) {
|
|
35141
|
-
console.warn(`${
|
|
35209
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
|
|
35142
35210
|
}
|
|
35143
|
-
var
|
|
35144
|
-
var
|
|
35211
|
+
var ANSI_YELLOW6 = "\x1B[33m";
|
|
35212
|
+
var ANSI_RED = "\x1B[31m";
|
|
35213
|
+
var ANSI_RESET6 = "\x1B[0m";
|
|
35145
35214
|
var SCHEMA_EVAL_V2 = "agentv-eval-v2";
|
|
35146
35215
|
async function readTestSuiteMetadata(testFilePath) {
|
|
35147
35216
|
try {
|
|
35148
35217
|
const absolutePath = path62.resolve(testFilePath);
|
|
35149
|
-
const content = await
|
|
35218
|
+
const content = await readFile5(absolutePath, "utf8");
|
|
35150
35219
|
const parsed = parse22(content);
|
|
35151
35220
|
if (!isJsonObject(parsed)) {
|
|
35152
35221
|
return {};
|
|
@@ -35164,7 +35233,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
|
35164
35233
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
35165
35234
|
const config2 = await loadConfig(absoluteTestPath, repoRootPath);
|
|
35166
35235
|
const guidelinePatterns = config2?.guideline_patterns;
|
|
35167
|
-
const rawFile = await
|
|
35236
|
+
const rawFile = await readFile5(absoluteTestPath, "utf8");
|
|
35168
35237
|
const parsed = parse22(rawFile);
|
|
35169
35238
|
if (!isJsonObject(parsed)) {
|
|
35170
35239
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
@@ -35202,14 +35271,14 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
35202
35271
|
const inputMessagesValue = evalcase.input_messages;
|
|
35203
35272
|
const expectedMessagesValue = evalcase.expected_messages;
|
|
35204
35273
|
if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
|
|
35205
|
-
|
|
35274
|
+
logError(`Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`);
|
|
35206
35275
|
continue;
|
|
35207
35276
|
}
|
|
35208
35277
|
const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
|
|
35209
35278
|
const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
|
|
35210
35279
|
const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
|
|
35211
35280
|
if (hasExpectedMessages && expectedMessages.length === 0) {
|
|
35212
|
-
|
|
35281
|
+
logError(`No valid expected message found for eval case: ${id}`);
|
|
35213
35282
|
continue;
|
|
35214
35283
|
}
|
|
35215
35284
|
if (expectedMessages.length > 1) {
|
|
@@ -35240,7 +35309,14 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
35240
35309
|
const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
|
|
35241
35310
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
35242
35311
|
const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
|
|
35243
|
-
|
|
35312
|
+
let evaluators;
|
|
35313
|
+
try {
|
|
35314
|
+
evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
|
|
35315
|
+
} catch (error40) {
|
|
35316
|
+
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
35317
|
+
logError(`Skipping eval case '${id}': ${message}`);
|
|
35318
|
+
continue;
|
|
35319
|
+
}
|
|
35244
35320
|
const userFilePaths = [];
|
|
35245
35321
|
for (const segment of inputSegments) {
|
|
35246
35322
|
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
@@ -35258,7 +35334,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
35258
35334
|
question,
|
|
35259
35335
|
input_messages: inputMessages,
|
|
35260
35336
|
input_segments: inputSegments,
|
|
35261
|
-
|
|
35337
|
+
expected_segments: outputSegments,
|
|
35262
35338
|
reference_answer: referenceAnswer,
|
|
35263
35339
|
guideline_paths: guidelinePaths.map((guidelinePath) => path62.resolve(guidelinePath)),
|
|
35264
35340
|
guideline_patterns: guidelinePatterns,
|
|
@@ -35290,10 +35366,19 @@ function asString5(value) {
|
|
|
35290
35366
|
function logWarning5(message, details) {
|
|
35291
35367
|
if (details && details.length > 0) {
|
|
35292
35368
|
const detailBlock = details.join("\n");
|
|
35293
|
-
console.warn(`${
|
|
35294
|
-
${detailBlock}${
|
|
35369
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}
|
|
35370
|
+
${detailBlock}${ANSI_RESET6}`);
|
|
35295
35371
|
} else {
|
|
35296
|
-
console.warn(`${
|
|
35372
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
|
|
35373
|
+
}
|
|
35374
|
+
}
|
|
35375
|
+
function logError(message, details) {
|
|
35376
|
+
if (details && details.length > 0) {
|
|
35377
|
+
const detailBlock = details.join("\n");
|
|
35378
|
+
console.error(`${ANSI_RED}Error: ${message}
|
|
35379
|
+
${detailBlock}${ANSI_RESET6}`);
|
|
35380
|
+
} else {
|
|
35381
|
+
console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
|
|
35297
35382
|
}
|
|
35298
35383
|
}
|
|
35299
35384
|
var DEFAULT_SYSTEM_PROMPT = "You are a careful assistant. Follow all provided instructions and do not fabricate results.";
|
|
@@ -36991,7 +37076,7 @@ async function readTargetDefinitions(filePath) {
|
|
|
36991
37076
|
if (!await fileExists3(absolutePath)) {
|
|
36992
37077
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
36993
37078
|
}
|
|
36994
|
-
const raw = await
|
|
37079
|
+
const raw = await readFile6(absolutePath, "utf8");
|
|
36995
37080
|
const parsed = parse32(raw);
|
|
36996
37081
|
if (!isRecord(parsed)) {
|
|
36997
37082
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
|
|
@@ -37034,16 +37119,16 @@ Use the reference_answer as a gold standard for a high-quality response (if prov
|
|
|
37034
37119
|
Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.
|
|
37035
37120
|
|
|
37036
37121
|
[[ ## expected_outcome ## ]]
|
|
37037
|
-
{{
|
|
37122
|
+
{{${TEMPLATE_VARIABLES.EXPECTED_OUTCOME}}}
|
|
37038
37123
|
|
|
37039
37124
|
[[ ## question ## ]]
|
|
37040
|
-
{{
|
|
37125
|
+
{{${TEMPLATE_VARIABLES.QUESTION}}}
|
|
37041
37126
|
|
|
37042
37127
|
[[ ## reference_answer ## ]]
|
|
37043
|
-
{{
|
|
37128
|
+
{{${TEMPLATE_VARIABLES.REFERENCE_ANSWER}}}
|
|
37044
37129
|
|
|
37045
37130
|
[[ ## candidate_answer ## ]]
|
|
37046
|
-
{{
|
|
37131
|
+
{{${TEMPLATE_VARIABLES.CANDIDATE_ANSWER}}}`;
|
|
37047
37132
|
var LlmJudgeEvaluator = class {
|
|
37048
37133
|
kind = "llm_judge";
|
|
37049
37134
|
resolveJudgeProvider;
|
|
@@ -37066,12 +37151,12 @@ var LlmJudgeEvaluator = class {
|
|
|
37066
37151
|
async evaluateWithPrompt(context, judgeProvider) {
|
|
37067
37152
|
const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
|
|
37068
37153
|
const variables = {
|
|
37069
|
-
|
|
37070
|
-
|
|
37071
|
-
|
|
37072
|
-
|
|
37073
|
-
|
|
37074
|
-
|
|
37154
|
+
[TEMPLATE_VARIABLES.INPUT_MESSAGES]: JSON.stringify(context.evalCase.input_segments, null, 2),
|
|
37155
|
+
[TEMPLATE_VARIABLES.EXPECTED_MESSAGES]: JSON.stringify(context.evalCase.expected_segments, null, 2),
|
|
37156
|
+
[TEMPLATE_VARIABLES.CANDIDATE_ANSWER]: context.candidate.trim(),
|
|
37157
|
+
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? "").trim(),
|
|
37158
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTCOME]: context.evalCase.expected_outcome.trim(),
|
|
37159
|
+
[TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim()
|
|
37075
37160
|
};
|
|
37076
37161
|
const systemPrompt = buildOutputSchema();
|
|
37077
37162
|
const evaluatorTemplate = context.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
|
|
@@ -37303,7 +37388,7 @@ function parseJsonSafe(payload) {
|
|
|
37303
37388
|
}
|
|
37304
37389
|
}
|
|
37305
37390
|
function substituteVariables(template, variables) {
|
|
37306
|
-
return template.replace(/\{\{([a-zA-Z0-9_]+)\}\}/g, (match, varName) => {
|
|
37391
|
+
return template.replace(/\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g, (match, varName) => {
|
|
37307
37392
|
return variables[varName] ?? match;
|
|
37308
37393
|
});
|
|
37309
37394
|
}
|
|
@@ -37456,11 +37541,11 @@ async function runEvaluation(options) {
|
|
|
37456
37541
|
now,
|
|
37457
37542
|
evalId,
|
|
37458
37543
|
verbose,
|
|
37544
|
+
evalCases: preloadedEvalCases,
|
|
37459
37545
|
onResult,
|
|
37460
37546
|
onProgress
|
|
37461
37547
|
} = options;
|
|
37462
|
-
const
|
|
37463
|
-
const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
|
|
37548
|
+
const evalCases = preloadedEvalCases ?? await loadEvalCases(evalFilePath, repoRoot, { verbose, evalId });
|
|
37464
37549
|
const filteredEvalCases = filterEvalCases(evalCases, evalId);
|
|
37465
37550
|
if (filteredEvalCases.length === 0) {
|
|
37466
37551
|
if (evalId) {
|
|
@@ -37644,8 +37729,9 @@ async function runBatchEvaluation(options) {
|
|
|
37644
37729
|
agentTimeoutMs
|
|
37645
37730
|
} = options;
|
|
37646
37731
|
const promptInputsList = [];
|
|
37732
|
+
const formattingMode = isAgentProvider(provider) ? "agent" : "lm";
|
|
37647
37733
|
for (const evalCase of evalCases) {
|
|
37648
|
-
const promptInputs = await buildPromptInputs(evalCase);
|
|
37734
|
+
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
37649
37735
|
if (promptDumpDir) {
|
|
37650
37736
|
await dumpPrompt(promptDumpDir, evalCase, promptInputs);
|
|
37651
37737
|
}
|
|
@@ -37751,7 +37837,8 @@ async function runEvalCase(options) {
|
|
|
37751
37837
|
signal,
|
|
37752
37838
|
judgeProvider
|
|
37753
37839
|
} = options;
|
|
37754
|
-
const
|
|
37840
|
+
const formattingMode = isAgentProvider(provider) ? "agent" : "lm";
|
|
37841
|
+
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
37755
37842
|
if (promptDumpDir) {
|
|
37756
37843
|
await dumpPrompt(promptDumpDir, evalCase, promptInputs);
|
|
37757
37844
|
}
|
|
@@ -38040,7 +38127,8 @@ async function runLlmJudgeEvaluator(options) {
|
|
|
38040
38127
|
async function resolveCustomPrompt(config2) {
|
|
38041
38128
|
if (config2.promptPath) {
|
|
38042
38129
|
try {
|
|
38043
|
-
|
|
38130
|
+
const content = await readTextFile(config2.promptPath);
|
|
38131
|
+
return content;
|
|
38044
38132
|
} catch (error40) {
|
|
38045
38133
|
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
38046
38134
|
console.warn(`Could not read custom prompt at ${config2.promptPath}: ${message}`);
|
|
@@ -38927,7 +39015,7 @@ function formatEvaluationSummary(summary) {
|
|
|
38927
39015
|
}
|
|
38928
39016
|
|
|
38929
39017
|
// ../../packages/core/dist/evaluation/validation/index.js
|
|
38930
|
-
import { readFile as
|
|
39018
|
+
import { readFile as readFile7 } from "node:fs/promises";
|
|
38931
39019
|
import { parse as parse6 } from "yaml";
|
|
38932
39020
|
import { readFile as readFile23 } from "node:fs/promises";
|
|
38933
39021
|
import path16 from "node:path";
|
|
@@ -38945,7 +39033,7 @@ var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
|
|
|
38945
39033
|
var SCHEMA_CONFIG_V22 = "agentv-config-v2";
|
|
38946
39034
|
async function detectFileType(filePath) {
|
|
38947
39035
|
try {
|
|
38948
|
-
const content = await
|
|
39036
|
+
const content = await readFile7(filePath, "utf8");
|
|
38949
39037
|
const parsed = parse6(content);
|
|
38950
39038
|
if (typeof parsed !== "object" || parsed === null) {
|
|
38951
39039
|
return "unknown";
|
|
@@ -39794,9 +39882,9 @@ var TARGET_FILE_CANDIDATES = [
|
|
|
39794
39882
|
path17.join(".agentv", "targets.yaml"),
|
|
39795
39883
|
path17.join(".agentv", "targets.yml")
|
|
39796
39884
|
];
|
|
39797
|
-
var
|
|
39798
|
-
var
|
|
39799
|
-
var
|
|
39885
|
+
var ANSI_YELLOW7 = "\x1B[33m";
|
|
39886
|
+
var ANSI_RED2 = "\x1B[31m";
|
|
39887
|
+
var ANSI_RESET7 = "\x1B[0m";
|
|
39800
39888
|
function isTTY() {
|
|
39801
39889
|
return process.stdout.isTTY ?? false;
|
|
39802
39890
|
}
|
|
@@ -39869,8 +39957,8 @@ async function selectTarget(options) {
|
|
|
39869
39957
|
Warnings in ${targetsFilePath}:`);
|
|
39870
39958
|
for (const warning of warnings) {
|
|
39871
39959
|
const location = warning.location ? ` [${warning.location}]` : "";
|
|
39872
|
-
const prefix = useColors ? `${
|
|
39873
|
-
const message = useColors ? `${
|
|
39960
|
+
const prefix = useColors ? `${ANSI_YELLOW7} \u26A0${ANSI_RESET7}` : " \u26A0";
|
|
39961
|
+
const message = useColors ? `${ANSI_YELLOW7}${warning.message}${ANSI_RESET7}` : warning.message;
|
|
39874
39962
|
console.warn(`${prefix}${location} ${message}`);
|
|
39875
39963
|
}
|
|
39876
39964
|
console.warn("");
|
|
@@ -39881,8 +39969,8 @@ Warnings in ${targetsFilePath}:`);
|
|
|
39881
39969
|
Errors in ${targetsFilePath}:`);
|
|
39882
39970
|
for (const error40 of errors) {
|
|
39883
39971
|
const location = error40.location ? ` [${error40.location}]` : "";
|
|
39884
|
-
const prefix = useColors ? `${
|
|
39885
|
-
const message = useColors ? `${
|
|
39972
|
+
const prefix = useColors ? `${ANSI_RED2} \u2717${ANSI_RESET7}` : " \u2717";
|
|
39973
|
+
const message = useColors ? `${ANSI_RED2}${error40.message}${ANSI_RESET7}` : error40.message;
|
|
39886
39974
|
console.error(`${prefix}${location} ${message}`);
|
|
39887
39975
|
}
|
|
39888
39976
|
throw new Error(`Targets file validation failed with ${errors.length} error(s)`);
|
|
@@ -40082,7 +40170,7 @@ async function prepareFileMetadata(params) {
|
|
|
40082
40170
|
const inlineTargetLabel = `${selection.targetName} [provider=${providerLabel}]`;
|
|
40083
40171
|
const evalCases = await loadEvalCases(testFilePath, repoRoot, { verbose: options.verbose, evalId: options.evalId });
|
|
40084
40172
|
const filteredIds = options.evalId ? evalCases.filter((value) => value.id === options.evalId).map((value) => value.id) : evalCases.map((value) => value.id);
|
|
40085
|
-
return { evalIds: filteredIds, selection, inlineTargetLabel };
|
|
40173
|
+
return { evalIds: filteredIds, evalCases, selection, inlineTargetLabel };
|
|
40086
40174
|
}
|
|
40087
40175
|
async function runWithLimit(items, limit, task) {
|
|
40088
40176
|
const safeLimit = Math.max(1, limit);
|
|
@@ -40110,7 +40198,8 @@ async function runSingleEvalFile(params) {
|
|
|
40110
40198
|
seenEvalCases,
|
|
40111
40199
|
displayIdTracker,
|
|
40112
40200
|
selection,
|
|
40113
|
-
inlineTargetLabel
|
|
40201
|
+
inlineTargetLabel,
|
|
40202
|
+
evalCases
|
|
40114
40203
|
} = params;
|
|
40115
40204
|
await ensureFileExists(testFilePath, "Test file");
|
|
40116
40205
|
const resolvedTargetSelection = selection;
|
|
@@ -40162,6 +40251,7 @@ async function runSingleEvalFile(params) {
|
|
|
40162
40251
|
cache,
|
|
40163
40252
|
useCache: options.cache,
|
|
40164
40253
|
evalId: options.evalId,
|
|
40254
|
+
evalCases,
|
|
40165
40255
|
verbose: options.verbose,
|
|
40166
40256
|
maxConcurrency: resolvedWorkers,
|
|
40167
40257
|
onResult: async (result) => {
|
|
@@ -40267,7 +40357,8 @@ async function runEvalCommand(input) {
|
|
|
40267
40357
|
seenEvalCases,
|
|
40268
40358
|
displayIdTracker,
|
|
40269
40359
|
selection: targetPrep.selection,
|
|
40270
|
-
inlineTargetLabel: targetPrep.inlineTargetLabel
|
|
40360
|
+
inlineTargetLabel: targetPrep.inlineTargetLabel,
|
|
40361
|
+
evalCases: targetPrep.evalCases
|
|
40271
40362
|
});
|
|
40272
40363
|
allResults.push(...result.results);
|
|
40273
40364
|
if (result.promptDumpDir) {
|
|
@@ -40590,12 +40681,12 @@ function registerStatusCommand(program) {
|
|
|
40590
40681
|
}
|
|
40591
40682
|
|
|
40592
40683
|
// src/commands/validate/format-output.ts
|
|
40593
|
-
var
|
|
40594
|
-
var
|
|
40684
|
+
var ANSI_RED3 = "\x1B[31m";
|
|
40685
|
+
var ANSI_YELLOW8 = "\x1B[33m";
|
|
40595
40686
|
var ANSI_GREEN = "\x1B[32m";
|
|
40596
40687
|
var ANSI_CYAN = "\x1B[36m";
|
|
40597
40688
|
var ANSI_BOLD = "\x1B[1m";
|
|
40598
|
-
var
|
|
40689
|
+
var ANSI_RESET8 = "\x1B[0m";
|
|
40599
40690
|
function formatSummary(summary, useColors) {
|
|
40600
40691
|
const lines = [];
|
|
40601
40692
|
lines.push("");
|
|
@@ -40611,15 +40702,15 @@ function formatSummary(summary, useColors) {
|
|
|
40611
40702
|
}
|
|
40612
40703
|
function formatHeader(text2, useColors) {
|
|
40613
40704
|
if (useColors) {
|
|
40614
|
-
return `${ANSI_BOLD}${ANSI_CYAN}${text2}${
|
|
40705
|
+
return `${ANSI_BOLD}${ANSI_CYAN}${text2}${ANSI_RESET8}`;
|
|
40615
40706
|
}
|
|
40616
40707
|
return text2;
|
|
40617
40708
|
}
|
|
40618
40709
|
function formatFileResult(result, useColors) {
|
|
40619
40710
|
const lines = [];
|
|
40620
40711
|
const status = result.valid ? "\u2713" : "\u2717";
|
|
40621
|
-
const statusColor = result.valid ? ANSI_GREEN :
|
|
40622
|
-
const statusText = useColors ? `${statusColor}${status}${
|
|
40712
|
+
const statusColor = result.valid ? ANSI_GREEN : ANSI_RED3;
|
|
40713
|
+
const statusText = useColors ? `${statusColor}${status}${ANSI_RESET8}` : status;
|
|
40623
40714
|
const fileName = result.filePath;
|
|
40624
40715
|
lines.push(`${statusText} ${fileName}`);
|
|
40625
40716
|
if (result.errors.length > 0) {
|
|
@@ -40631,8 +40722,8 @@ function formatFileResult(result, useColors) {
|
|
|
40631
40722
|
}
|
|
40632
40723
|
function formatError2(error40, useColors) {
|
|
40633
40724
|
const prefix = error40.severity === "error" ? " \u2717" : " \u26A0";
|
|
40634
|
-
const color = error40.severity === "error" ?
|
|
40635
|
-
const coloredPrefix = useColors ? `${color}${prefix}${
|
|
40725
|
+
const color = error40.severity === "error" ? ANSI_RED3 : ANSI_YELLOW8;
|
|
40726
|
+
const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET8}` : prefix;
|
|
40636
40727
|
const location = error40.location ? ` [${error40.location}]` : "";
|
|
40637
40728
|
return `${coloredPrefix}${location} ${error40.message}`;
|
|
40638
40729
|
}
|
|
@@ -40645,15 +40736,15 @@ function formatStats(summary, useColors) {
|
|
|
40645
40736
|
(r) => r.errors.some((e) => e.severity === "warning")
|
|
40646
40737
|
).length;
|
|
40647
40738
|
if (useColors) {
|
|
40648
|
-
lines.push(`${ANSI_BOLD}${totalText}${
|
|
40649
|
-
lines.push(`${ANSI_GREEN}${validText}${
|
|
40739
|
+
lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET8}`);
|
|
40740
|
+
lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET8}`);
|
|
40650
40741
|
if (summary.invalidFiles > 0) {
|
|
40651
|
-
lines.push(`${
|
|
40742
|
+
lines.push(`${ANSI_RED3}${invalidText}${ANSI_RESET8}`);
|
|
40652
40743
|
} else {
|
|
40653
40744
|
lines.push(invalidText);
|
|
40654
40745
|
}
|
|
40655
40746
|
if (filesWithWarnings > 0) {
|
|
40656
|
-
lines.push(`${
|
|
40747
|
+
lines.push(`${ANSI_YELLOW8}Files with warnings: ${filesWithWarnings}${ANSI_RESET8}`);
|
|
40657
40748
|
}
|
|
40658
40749
|
} else {
|
|
40659
40750
|
lines.push(totalText);
|
|
@@ -40827,4 +40918,4 @@ export {
|
|
|
40827
40918
|
createProgram,
|
|
40828
40919
|
runCli
|
|
40829
40920
|
};
|
|
40830
|
-
//# sourceMappingURL=chunk-
|
|
40921
|
+
//# sourceMappingURL=chunk-HWGALLUR.js.map
|