@agentv/core 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-QVS4OL44.js → chunk-XXNQA4EW.js} +27 -1
- package/dist/chunk-XXNQA4EW.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +93 -8
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.d.cts +7 -2
- package/dist/evaluation/validation/index.d.ts +7 -2
- package/dist/evaluation/validation/index.js +91 -7
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +274 -182
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +39 -10
- package/dist/index.d.ts +39 -10
- package/dist/index.js +237 -187
- package/dist/index.js.map +1 -1
- package/package.json +6 -2
- package/dist/chunk-QVS4OL44.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
TARGETS_SCHEMA_V2,
|
|
3
|
+
buildDirectoryChain,
|
|
3
4
|
buildSearchRoots,
|
|
5
|
+
fileExists,
|
|
6
|
+
findGitRoot,
|
|
4
7
|
resolveFileReference
|
|
5
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-XXNQA4EW.js";
|
|
6
9
|
|
|
7
10
|
// src/evaluation/types.ts
|
|
8
11
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
@@ -56,6 +59,7 @@ function getHitCount(result) {
|
|
|
56
59
|
}
|
|
57
60
|
|
|
58
61
|
// src/evaluation/yaml-parser.ts
|
|
62
|
+
import micromatch from "micromatch";
|
|
59
63
|
import { constants } from "node:fs";
|
|
60
64
|
import { access, readFile } from "node:fs/promises";
|
|
61
65
|
import path from "node:path";
|
|
@@ -65,9 +69,52 @@ var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
|
|
|
65
69
|
var ANSI_YELLOW = "\x1B[33m";
|
|
66
70
|
var ANSI_RESET = "\x1B[0m";
|
|
67
71
|
var SCHEMA_EVAL_V2 = "agentv-eval-v2";
|
|
68
|
-
|
|
72
|
+
var SCHEMA_CONFIG_V2 = "agentv-config-v2";
|
|
73
|
+
async function loadConfig(evalFilePath, repoRoot) {
|
|
74
|
+
const directories = buildDirectoryChain(evalFilePath, repoRoot);
|
|
75
|
+
for (const directory of directories) {
|
|
76
|
+
const configPath = path.join(directory, ".agentv", "config.yaml");
|
|
77
|
+
if (!await fileExists2(configPath)) {
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
try {
|
|
81
|
+
const rawConfig = await readFile(configPath, "utf8");
|
|
82
|
+
const parsed = parse(rawConfig);
|
|
83
|
+
if (!isJsonObject(parsed)) {
|
|
84
|
+
logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
const config = parsed;
|
|
88
|
+
const schema = config.$schema;
|
|
89
|
+
if (schema !== SCHEMA_CONFIG_V2) {
|
|
90
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${configPath}. Expected '${SCHEMA_CONFIG_V2}'` : `Missing required field '$schema' in ${configPath}.
|
|
91
|
+
Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
|
|
92
|
+
logWarning(message);
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
const guidelinePatterns = config.guideline_patterns;
|
|
96
|
+
if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
|
|
97
|
+
logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
|
|
101
|
+
logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
guideline_patterns: guidelinePatterns
|
|
106
|
+
};
|
|
107
|
+
} catch (error) {
|
|
108
|
+
logWarning(`Could not read .agentv/config.yaml at ${configPath}: ${error.message}`);
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return null;
|
|
113
|
+
}
|
|
114
|
+
function isGuidelineFile(filePath, patterns) {
|
|
69
115
|
const normalized = filePath.split("\\").join("/");
|
|
70
|
-
|
|
116
|
+
const patternsToUse = patterns ?? [];
|
|
117
|
+
return micromatch.isMatch(normalized, patternsToUse);
|
|
71
118
|
}
|
|
72
119
|
function extractCodeBlocks(segments) {
|
|
73
120
|
const codeBlocks = [];
|
|
@@ -87,43 +134,45 @@ function extractCodeBlocks(segments) {
|
|
|
87
134
|
}
|
|
88
135
|
return codeBlocks;
|
|
89
136
|
}
|
|
90
|
-
async function
|
|
137
|
+
async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
91
138
|
const verbose = options?.verbose ?? false;
|
|
92
|
-
const absoluteTestPath = path.resolve(
|
|
93
|
-
if (!await
|
|
94
|
-
throw new Error(`Test file not found: ${
|
|
139
|
+
const absoluteTestPath = path.resolve(evalFilePath);
|
|
140
|
+
if (!await fileExists2(absoluteTestPath)) {
|
|
141
|
+
throw new Error(`Test file not found: ${evalFilePath}`);
|
|
95
142
|
}
|
|
96
143
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
97
144
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
145
|
+
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
146
|
+
const guidelinePatterns = config?.guideline_patterns;
|
|
98
147
|
const rawFile = await readFile(absoluteTestPath, "utf8");
|
|
99
148
|
const parsed = parse(rawFile);
|
|
100
149
|
if (!isJsonObject(parsed)) {
|
|
101
|
-
throw new Error(`Invalid test file format: ${
|
|
150
|
+
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
102
151
|
}
|
|
103
152
|
const suite = parsed;
|
|
104
153
|
const schema = suite.$schema;
|
|
105
154
|
if (schema !== SCHEMA_EVAL_V2) {
|
|
106
|
-
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${
|
|
155
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
|
|
107
156
|
Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
108
157
|
throw new Error(message);
|
|
109
158
|
}
|
|
110
159
|
const rawTestcases = suite.evalcases;
|
|
111
160
|
if (!Array.isArray(rawTestcases)) {
|
|
112
|
-
throw new Error(`Invalid test file format: ${
|
|
161
|
+
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
|
|
113
162
|
}
|
|
114
163
|
const globalGrader = coerceGrader(suite.grader) ?? "llm_judge";
|
|
115
164
|
const results = [];
|
|
116
|
-
for (const
|
|
117
|
-
if (!isJsonObject(
|
|
165
|
+
for (const rawEvalcase of rawTestcases) {
|
|
166
|
+
if (!isJsonObject(rawEvalcase)) {
|
|
118
167
|
logWarning("Skipping invalid test case entry (expected object)");
|
|
119
168
|
continue;
|
|
120
169
|
}
|
|
121
|
-
const
|
|
122
|
-
const id = asString(
|
|
123
|
-
const conversationId = asString(
|
|
124
|
-
const outcome = asString(
|
|
125
|
-
const inputMessagesValue =
|
|
126
|
-
const expectedMessagesValue =
|
|
170
|
+
const evalcase = rawEvalcase;
|
|
171
|
+
const id = asString(evalcase.id);
|
|
172
|
+
const conversationId = asString(evalcase.conversation_id);
|
|
173
|
+
const outcome = asString(evalcase.outcome);
|
|
174
|
+
const inputMessagesValue = evalcase.input_messages;
|
|
175
|
+
const expectedMessagesValue = evalcase.expected_messages;
|
|
127
176
|
if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
|
|
128
177
|
logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
|
|
129
178
|
continue;
|
|
@@ -136,6 +185,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
136
185
|
const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
|
|
137
186
|
const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
|
|
138
187
|
const userMessages = inputMessages.filter((message) => message.role === "user");
|
|
188
|
+
const systemMessages = inputMessages.filter((message) => message.role === "system");
|
|
139
189
|
if (assistantMessages.length === 0) {
|
|
140
190
|
logWarning(`No assistant message found for test case: ${id}`);
|
|
141
191
|
continue;
|
|
@@ -143,6 +193,29 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
143
193
|
if (assistantMessages.length > 1) {
|
|
144
194
|
logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
|
|
145
195
|
}
|
|
196
|
+
if (systemMessages.length > 1) {
|
|
197
|
+
logWarning(`Multiple system messages found for test case: ${id}, using first`);
|
|
198
|
+
}
|
|
199
|
+
let systemMessageContent;
|
|
200
|
+
if (systemMessages.length > 0) {
|
|
201
|
+
const content = systemMessages[0]?.content;
|
|
202
|
+
if (typeof content === "string") {
|
|
203
|
+
systemMessageContent = content;
|
|
204
|
+
} else if (Array.isArray(content)) {
|
|
205
|
+
const textParts = [];
|
|
206
|
+
for (const segment of content) {
|
|
207
|
+
if (isJsonObject(segment)) {
|
|
208
|
+
const value = segment.value;
|
|
209
|
+
if (typeof value === "string") {
|
|
210
|
+
textParts.push(value);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
if (textParts.length > 0) {
|
|
215
|
+
systemMessageContent = textParts.join("\n\n");
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
146
219
|
const userSegments = [];
|
|
147
220
|
const guidelinePaths = [];
|
|
148
221
|
const userTextParts = [];
|
|
@@ -174,7 +247,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
174
247
|
}
|
|
175
248
|
try {
|
|
176
249
|
const fileContent = (await readFile(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
177
|
-
|
|
250
|
+
const relativeToRepo = path.relative(repoRootPath, resolvedPath);
|
|
251
|
+
if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
|
|
178
252
|
guidelinePaths.push(path.resolve(resolvedPath));
|
|
179
253
|
if (verbose) {
|
|
180
254
|
console.log(` [Guideline] Found: ${displayPath}`);
|
|
@@ -184,7 +258,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
184
258
|
userSegments.push({
|
|
185
259
|
type: "file",
|
|
186
260
|
path: displayPath,
|
|
187
|
-
text: fileContent
|
|
261
|
+
text: fileContent,
|
|
262
|
+
resolvedPath: path.resolve(resolvedPath)
|
|
188
263
|
});
|
|
189
264
|
if (verbose) {
|
|
190
265
|
console.log(` [File] Found: ${displayPath}`);
|
|
@@ -208,14 +283,27 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
208
283
|
const assistantContent = assistantMessages[0]?.content;
|
|
209
284
|
const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
|
|
210
285
|
const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
211
|
-
const testCaseGrader = coerceGrader(
|
|
286
|
+
const testCaseGrader = coerceGrader(evalcase.grader) ?? globalGrader;
|
|
287
|
+
const userFilePaths = [];
|
|
288
|
+
for (const segment of userSegments) {
|
|
289
|
+
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
290
|
+
userFilePaths.push(segment.resolvedPath);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
const allFilePaths = [
|
|
294
|
+
...guidelinePaths.map((guidelinePath) => path.resolve(guidelinePath)),
|
|
295
|
+
...userFilePaths
|
|
296
|
+
];
|
|
212
297
|
const testCase = {
|
|
213
298
|
id,
|
|
214
299
|
conversation_id: conversationId,
|
|
215
300
|
task: userTextPrompt,
|
|
216
301
|
user_segments: userSegments,
|
|
302
|
+
system_message: systemMessageContent,
|
|
217
303
|
expected_assistant_raw: expectedAssistantRaw,
|
|
218
304
|
guideline_paths: guidelinePaths.map((guidelinePath) => path.resolve(guidelinePath)),
|
|
305
|
+
guideline_patterns: guidelinePatterns,
|
|
306
|
+
file_paths: allFilePaths,
|
|
219
307
|
code_snippets: codeSnippets,
|
|
220
308
|
outcome,
|
|
221
309
|
grader: testCaseGrader
|
|
@@ -240,7 +328,7 @@ async function buildPromptInputs(testCase) {
|
|
|
240
328
|
const guidelineContents = [];
|
|
241
329
|
for (const rawPath of testCase.guideline_paths) {
|
|
242
330
|
const absolutePath = path.resolve(rawPath);
|
|
243
|
-
if (!await
|
|
331
|
+
if (!await fileExists2(absolutePath)) {
|
|
244
332
|
logWarning(`Could not read guideline file ${absolutePath}: file does not exist`);
|
|
245
333
|
continue;
|
|
246
334
|
}
|
|
@@ -281,9 +369,9 @@ ${body}`);
|
|
|
281
369
|
}
|
|
282
370
|
const request = requestParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
283
371
|
const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
284
|
-
return { request, guidelines };
|
|
372
|
+
return { request, guidelines, systemMessage: testCase.system_message };
|
|
285
373
|
}
|
|
286
|
-
async function
|
|
374
|
+
async function fileExists2(absolutePath) {
|
|
287
375
|
try {
|
|
288
376
|
await access(absolutePath, constants.F_OK);
|
|
289
377
|
return true;
|
|
@@ -407,15 +495,18 @@ function buildChatPrompt(request) {
|
|
|
407
495
|
return request.chatPrompt;
|
|
408
496
|
}
|
|
409
497
|
const systemSegments = [];
|
|
410
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
411
|
-
systemSegments.push(`Guidelines:
|
|
412
|
-
${request.guidelines.trim()}`);
|
|
413
|
-
}
|
|
414
498
|
const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
|
|
415
499
|
if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
|
|
416
500
|
systemSegments.push(metadataSystemPrompt.trim());
|
|
501
|
+
} else {
|
|
502
|
+
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
503
|
+
}
|
|
504
|
+
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
505
|
+
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
506
|
+
|
|
507
|
+
${request.guidelines.trim()}`);
|
|
417
508
|
}
|
|
418
|
-
const systemContent = systemSegments.
|
|
509
|
+
const systemContent = systemSegments.join("\n\n");
|
|
419
510
|
const userContent = request.prompt.trim();
|
|
420
511
|
const prompt = [
|
|
421
512
|
{
|
|
@@ -871,11 +962,9 @@ function isLikelyEnvReference(value) {
|
|
|
871
962
|
}
|
|
872
963
|
|
|
873
964
|
// src/evaluation/providers/vscode.ts
|
|
874
|
-
import {
|
|
875
|
-
import { tmpdir } from "node:os";
|
|
965
|
+
import { readFile as readFile2 } from "node:fs/promises";
|
|
876
966
|
import path2 from "node:path";
|
|
877
967
|
import { dispatchAgentSession, getSubagentRoot, provisionSubagents } from "subagent";
|
|
878
|
-
var PROMPT_FILE_PREFIX = "agentv-vscode-";
|
|
879
968
|
var VSCodeProvider = class {
|
|
880
969
|
id;
|
|
881
970
|
kind;
|
|
@@ -892,128 +981,89 @@ var VSCodeProvider = class {
|
|
|
892
981
|
throw new Error("VS Code provider request was aborted before dispatch");
|
|
893
982
|
}
|
|
894
983
|
const attachments = normalizeAttachments(request.attachments);
|
|
895
|
-
const promptContent = buildPromptDocument(request, attachments);
|
|
896
|
-
const
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
const failure = session.error ?? "VS Code subagent did not produce a response";
|
|
913
|
-
throw new Error(failure);
|
|
914
|
-
}
|
|
915
|
-
if (this.config.dryRun) {
|
|
916
|
-
return {
|
|
917
|
-
text: "",
|
|
918
|
-
raw: {
|
|
919
|
-
session,
|
|
920
|
-
promptFile: promptPath,
|
|
921
|
-
attachments
|
|
922
|
-
}
|
|
923
|
-
};
|
|
924
|
-
}
|
|
925
|
-
const responseText = await readFile2(session.responseFile, "utf8");
|
|
984
|
+
const promptContent = buildPromptDocument(request, attachments, request.guideline_patterns);
|
|
985
|
+
const session = await dispatchAgentSession({
|
|
986
|
+
userQuery: promptContent,
|
|
987
|
+
// Use full prompt content instead of just request.prompt
|
|
988
|
+
extraAttachments: attachments,
|
|
989
|
+
wait: this.config.waitForResponse,
|
|
990
|
+
dryRun: this.config.dryRun,
|
|
991
|
+
vscodeCmd: this.config.command,
|
|
992
|
+
subagentRoot: this.config.subagentRoot,
|
|
993
|
+
workspaceTemplate: this.config.workspaceTemplate,
|
|
994
|
+
silent: true
|
|
995
|
+
});
|
|
996
|
+
if (session.exitCode !== 0 || !session.responseFile) {
|
|
997
|
+
const failure = session.error ?? "VS Code subagent did not produce a response";
|
|
998
|
+
throw new Error(failure);
|
|
999
|
+
}
|
|
1000
|
+
if (this.config.dryRun) {
|
|
926
1001
|
return {
|
|
927
|
-
text:
|
|
1002
|
+
text: "",
|
|
928
1003
|
raw: {
|
|
929
1004
|
session,
|
|
930
|
-
promptFile: promptPath,
|
|
931
1005
|
attachments
|
|
932
1006
|
}
|
|
933
1007
|
};
|
|
934
|
-
} finally {
|
|
935
|
-
await rm(directory, { recursive: true, force: true });
|
|
936
1008
|
}
|
|
1009
|
+
const responseText = await readFile2(session.responseFile, "utf8");
|
|
1010
|
+
return {
|
|
1011
|
+
text: responseText,
|
|
1012
|
+
raw: {
|
|
1013
|
+
session,
|
|
1014
|
+
attachments
|
|
1015
|
+
}
|
|
1016
|
+
};
|
|
937
1017
|
}
|
|
938
1018
|
};
|
|
939
|
-
function buildPromptDocument(request, attachments) {
|
|
1019
|
+
function buildPromptDocument(request, attachments, guidelinePatterns) {
|
|
940
1020
|
const parts = [];
|
|
941
|
-
const
|
|
942
|
-
if (
|
|
943
|
-
parts.push(buildMandatoryPrereadBlock(
|
|
944
|
-
}
|
|
945
|
-
parts.push(`# AgentV Request`);
|
|
946
|
-
if (request.testCaseId) {
|
|
947
|
-
parts.push(`- Test Case: ${request.testCaseId}`);
|
|
948
|
-
}
|
|
949
|
-
if (request.metadata?.target) {
|
|
950
|
-
parts.push(`- Target: ${String(request.metadata.target)}`);
|
|
951
|
-
}
|
|
952
|
-
parts.push("\n## Task\n", request.prompt.trim());
|
|
953
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
954
|
-
parts.push("\n## Guidelines\n", request.guidelines.trim());
|
|
955
|
-
}
|
|
956
|
-
if (attachments && attachments.length > 0) {
|
|
957
|
-
const attachmentList = attachments.map((item) => `- ${item}`).join("\n");
|
|
958
|
-
parts.push("\n## Attachments\n", attachmentList);
|
|
1021
|
+
const guidelineFiles = collectGuidelineFiles(attachments, guidelinePatterns);
|
|
1022
|
+
if (guidelineFiles.length > 0) {
|
|
1023
|
+
parts.push("\n", buildMandatoryPrereadBlock(guidelineFiles));
|
|
959
1024
|
}
|
|
1025
|
+
parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
|
|
960
1026
|
return parts.join("\n").trim();
|
|
961
1027
|
}
|
|
962
|
-
function buildMandatoryPrereadBlock(
|
|
963
|
-
if (
|
|
1028
|
+
function buildMandatoryPrereadBlock(guidelineFiles) {
|
|
1029
|
+
if (guidelineFiles.length === 0) {
|
|
964
1030
|
return "";
|
|
965
1031
|
}
|
|
966
1032
|
const fileList = [];
|
|
967
|
-
const tokenList = [];
|
|
968
1033
|
let counter = 0;
|
|
969
|
-
for (const absolutePath of
|
|
1034
|
+
for (const absolutePath of guidelineFiles) {
|
|
970
1035
|
counter += 1;
|
|
971
1036
|
const fileName = path2.basename(absolutePath);
|
|
972
1037
|
const fileUri = pathToFileUri(absolutePath);
|
|
973
|
-
fileList.push(
|
|
974
|
-
tokenList.push(`INSTRUCTIONS_READ: \`${fileName}\` i=${counter} SHA256=<hex>`);
|
|
1038
|
+
fileList.push(`* [${fileName}](${fileUri})`);
|
|
975
1039
|
}
|
|
976
|
-
const filesText = fileList.join("
|
|
977
|
-
const tokensText = tokenList.join("\n");
|
|
1040
|
+
const filesText = fileList.join("\n");
|
|
978
1041
|
const instruction = [
|
|
979
|
-
`Read all
|
|
980
|
-
|
|
981
|
-
"`Get-FileHash -Algorithm SHA256 -LiteralPath '<file-path>' | Select-Object -ExpandProperty Hash`.",
|
|
982
|
-
`Then include, at the top of your reply, these exact tokens on separate lines:
|
|
1042
|
+
`Read all guideline files:
|
|
1043
|
+
${filesText}.
|
|
983
1044
|
`,
|
|
984
|
-
tokensText,
|
|
985
|
-
`
|
|
986
|
-
Replace \`<hex>\` with the actual SHA256 hash value computed from the PowerShell command.`,
|
|
987
1045
|
`If any file is missing, fail with ERROR: missing-file <filename> and stop.
|
|
988
1046
|
`,
|
|
989
|
-
`Then
|
|
990
|
-
].join("
|
|
991
|
-
return
|
|
992
|
-
|
|
993
|
-
${instruction}
|
|
994
|
-
|
|
995
|
-
`;
|
|
1047
|
+
`Then apply system_instructions on the user query below.`
|
|
1048
|
+
].join("");
|
|
1049
|
+
return `${instruction}`;
|
|
996
1050
|
}
|
|
997
|
-
function
|
|
1051
|
+
function collectGuidelineFiles(attachments, guidelinePatterns) {
|
|
998
1052
|
if (!attachments || attachments.length === 0) {
|
|
999
1053
|
return [];
|
|
1000
1054
|
}
|
|
1001
1055
|
const unique = /* @__PURE__ */ new Map();
|
|
1002
1056
|
for (const attachment of attachments) {
|
|
1003
|
-
if (!isInstructionPath(attachment)) {
|
|
1004
|
-
continue;
|
|
1005
|
-
}
|
|
1006
1057
|
const absolutePath = path2.resolve(attachment);
|
|
1007
|
-
|
|
1008
|
-
|
|
1058
|
+
const normalized = absolutePath.split(path2.sep).join("/");
|
|
1059
|
+
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
1060
|
+
if (!unique.has(absolutePath)) {
|
|
1061
|
+
unique.set(absolutePath, absolutePath);
|
|
1062
|
+
}
|
|
1009
1063
|
}
|
|
1010
1064
|
}
|
|
1011
1065
|
return Array.from(unique.values());
|
|
1012
1066
|
}
|
|
1013
|
-
function isInstructionPath(filePath) {
|
|
1014
|
-
const normalized = filePath.split(path2.sep).join("/");
|
|
1015
|
-
return normalized.endsWith(".instructions.md") || normalized.includes("/instructions/") || normalized.endsWith(".prompt.md") || normalized.includes("/prompts/");
|
|
1016
|
-
}
|
|
1017
1067
|
function pathToFileUri(filePath) {
|
|
1018
1068
|
const absolutePath = path2.isAbsolute(filePath) ? filePath : path2.resolve(filePath);
|
|
1019
1069
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
@@ -1022,14 +1072,6 @@ function pathToFileUri(filePath) {
|
|
|
1022
1072
|
}
|
|
1023
1073
|
return `file://${normalizedPath}`;
|
|
1024
1074
|
}
|
|
1025
|
-
function composeUserQuery(request) {
|
|
1026
|
-
const segments = [];
|
|
1027
|
-
segments.push(request.prompt.trim());
|
|
1028
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
1029
|
-
segments.push("\nGuidelines:\n", request.guidelines.trim());
|
|
1030
|
-
}
|
|
1031
|
-
return segments.join("\n").trim();
|
|
1032
|
-
}
|
|
1033
1075
|
function normalizeAttachments(attachments) {
|
|
1034
1076
|
if (!attachments || attachments.length === 0) {
|
|
1035
1077
|
return void 0;
|
|
@@ -1136,7 +1178,7 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
1136
1178
|
judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
|
|
1137
1179
|
};
|
|
1138
1180
|
}
|
|
1139
|
-
async function
|
|
1181
|
+
async function fileExists3(filePath) {
|
|
1140
1182
|
try {
|
|
1141
1183
|
await access2(filePath, constants2.F_OK);
|
|
1142
1184
|
return true;
|
|
@@ -1146,7 +1188,7 @@ async function fileExists2(filePath) {
|
|
|
1146
1188
|
}
|
|
1147
1189
|
async function readTargetDefinitions(filePath) {
|
|
1148
1190
|
const absolutePath = path3.resolve(filePath);
|
|
1149
|
-
if (!await
|
|
1191
|
+
if (!await fileExists3(absolutePath)) {
|
|
1150
1192
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
1151
1193
|
}
|
|
1152
1194
|
const raw = await readFile3(absolutePath, "utf8");
|
|
@@ -1376,7 +1418,7 @@ import { randomUUID } from "node:crypto";
|
|
|
1376
1418
|
var HeuristicGrader = class {
|
|
1377
1419
|
kind = "heuristic";
|
|
1378
1420
|
grade(context) {
|
|
1379
|
-
const expectedAspects = extractAspects(context.
|
|
1421
|
+
const expectedAspects = extractAspects(context.evalCase.expected_assistant_raw);
|
|
1380
1422
|
const result = scoreCandidateResponse(context.candidate, expectedAspects);
|
|
1381
1423
|
const misses = [...result.misses];
|
|
1382
1424
|
if (expectedAspects.length === 0 && isErrorLike(context.candidate)) {
|
|
@@ -1409,14 +1451,14 @@ var QualityGrader = class {
|
|
|
1409
1451
|
if (!judgeProvider) {
|
|
1410
1452
|
throw new Error("No judge provider available for LLM grading");
|
|
1411
1453
|
}
|
|
1412
|
-
const prompt = buildQualityPrompt(context.
|
|
1454
|
+
const prompt = buildQualityPrompt(context.evalCase, context.candidate);
|
|
1413
1455
|
const metadata = {
|
|
1414
1456
|
systemPrompt: QUALITY_SYSTEM_PROMPT
|
|
1415
1457
|
};
|
|
1416
1458
|
const response = await judgeProvider.invoke({
|
|
1417
1459
|
prompt,
|
|
1418
1460
|
metadata,
|
|
1419
|
-
|
|
1461
|
+
evalCaseId: context.evalCase.id,
|
|
1420
1462
|
attempt: context.attempt,
|
|
1421
1463
|
maxOutputTokens: this.maxOutputTokens,
|
|
1422
1464
|
temperature: this.temperature
|
|
@@ -1462,16 +1504,16 @@ var QUALITY_SYSTEM_PROMPT = [
|
|
|
1462
1504
|
function buildQualityPrompt(testCase, candidate) {
|
|
1463
1505
|
const parts = [
|
|
1464
1506
|
"[[ ## expected_outcome ## ]]",
|
|
1465
|
-
testCase.outcome,
|
|
1507
|
+
testCase.outcome.trim(),
|
|
1466
1508
|
"",
|
|
1467
1509
|
"[[ ## request ## ]]",
|
|
1468
|
-
testCase.task,
|
|
1510
|
+
testCase.task.trim(),
|
|
1469
1511
|
"",
|
|
1470
1512
|
"[[ ## reference_answer ## ]]",
|
|
1471
|
-
testCase.expected_assistant_raw,
|
|
1513
|
+
testCase.expected_assistant_raw.trim(),
|
|
1472
1514
|
"",
|
|
1473
1515
|
"[[ ## generated_answer ## ]]",
|
|
1474
|
-
candidate,
|
|
1516
|
+
candidate.trim(),
|
|
1475
1517
|
"",
|
|
1476
1518
|
"Respond with a single JSON object matching the schema described in the system prompt."
|
|
1477
1519
|
];
|
|
@@ -1720,10 +1762,10 @@ async function runEvaluation(options) {
|
|
|
1720
1762
|
onResult,
|
|
1721
1763
|
onProgress
|
|
1722
1764
|
} = options;
|
|
1723
|
-
const load =
|
|
1724
|
-
const
|
|
1725
|
-
const
|
|
1726
|
-
if (
|
|
1765
|
+
const load = loadEvalCases;
|
|
1766
|
+
const evalCases = await load(testFilePath, repoRoot, { verbose });
|
|
1767
|
+
const filteredEvalCases = filterEvalCases(evalCases, evalId);
|
|
1768
|
+
if (filteredEvalCases.length === 0) {
|
|
1727
1769
|
if (evalId) {
|
|
1728
1770
|
throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
|
|
1729
1771
|
}
|
|
@@ -1769,11 +1811,11 @@ async function runEvaluation(options) {
|
|
|
1769
1811
|
};
|
|
1770
1812
|
const graderRegistry = buildGraderRegistry(graders, resolveJudgeProvider);
|
|
1771
1813
|
const primaryProvider = getOrCreateProvider(target);
|
|
1772
|
-
if (onProgress &&
|
|
1773
|
-
for (let i = 0; i <
|
|
1814
|
+
if (onProgress && filteredEvalCases.length > 0) {
|
|
1815
|
+
for (let i = 0; i < filteredEvalCases.length; i++) {
|
|
1774
1816
|
await onProgress({
|
|
1775
1817
|
workerId: i + 1,
|
|
1776
|
-
evalId:
|
|
1818
|
+
evalId: filteredEvalCases[i].id,
|
|
1777
1819
|
status: "pending"
|
|
1778
1820
|
});
|
|
1779
1821
|
}
|
|
@@ -1782,22 +1824,22 @@ async function runEvaluation(options) {
|
|
|
1782
1824
|
const limit = pLimit(workers);
|
|
1783
1825
|
let nextWorkerId = 1;
|
|
1784
1826
|
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
1785
|
-
const promises =
|
|
1786
|
-
(
|
|
1827
|
+
const promises = filteredEvalCases.map(
|
|
1828
|
+
(evalCase) => limit(async () => {
|
|
1787
1829
|
const workerId = nextWorkerId++;
|
|
1788
|
-
workerIdByEvalId.set(
|
|
1830
|
+
workerIdByEvalId.set(evalCase.id, workerId);
|
|
1789
1831
|
if (onProgress) {
|
|
1790
1832
|
await onProgress({
|
|
1791
1833
|
workerId,
|
|
1792
|
-
evalId:
|
|
1834
|
+
evalId: evalCase.id,
|
|
1793
1835
|
status: "running",
|
|
1794
1836
|
startedAt: Date.now()
|
|
1795
1837
|
});
|
|
1796
1838
|
}
|
|
1797
1839
|
try {
|
|
1798
1840
|
const judgeProvider = await resolveJudgeProvider(target);
|
|
1799
|
-
const result = await
|
|
1800
|
-
|
|
1841
|
+
const result = await runEvalCase({
|
|
1842
|
+
evalCase,
|
|
1801
1843
|
provider: primaryProvider,
|
|
1802
1844
|
target,
|
|
1803
1845
|
graders: graderRegistry,
|
|
@@ -1812,7 +1854,7 @@ async function runEvaluation(options) {
|
|
|
1812
1854
|
if (onProgress) {
|
|
1813
1855
|
await onProgress({
|
|
1814
1856
|
workerId,
|
|
1815
|
-
evalId:
|
|
1857
|
+
evalId: evalCase.id,
|
|
1816
1858
|
status: "completed",
|
|
1817
1859
|
startedAt: 0,
|
|
1818
1860
|
// Not used for completed status
|
|
@@ -1827,7 +1869,7 @@ async function runEvaluation(options) {
|
|
|
1827
1869
|
if (onProgress) {
|
|
1828
1870
|
await onProgress({
|
|
1829
1871
|
workerId,
|
|
1830
|
-
evalId:
|
|
1872
|
+
evalId: evalCase.id,
|
|
1831
1873
|
status: "failed",
|
|
1832
1874
|
completedAt: Date.now(),
|
|
1833
1875
|
error: error instanceof Error ? error.message : String(error)
|
|
@@ -1844,10 +1886,10 @@ async function runEvaluation(options) {
|
|
|
1844
1886
|
if (outcome.status === "fulfilled") {
|
|
1845
1887
|
results.push(outcome.value);
|
|
1846
1888
|
} else {
|
|
1847
|
-
const
|
|
1848
|
-
const promptInputs = await buildPromptInputs(
|
|
1889
|
+
const evalCase = filteredEvalCases[i];
|
|
1890
|
+
const promptInputs = await buildPromptInputs(evalCase);
|
|
1849
1891
|
const errorResult = buildErrorResult(
|
|
1850
|
-
|
|
1892
|
+
evalCase,
|
|
1851
1893
|
target.name,
|
|
1852
1894
|
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
1853
1895
|
outcome.reason,
|
|
@@ -1861,9 +1903,9 @@ async function runEvaluation(options) {
|
|
|
1861
1903
|
}
|
|
1862
1904
|
return results;
|
|
1863
1905
|
}
|
|
1864
|
-
async function
|
|
1906
|
+
async function runEvalCase(options) {
|
|
1865
1907
|
const {
|
|
1866
|
-
|
|
1908
|
+
evalCase,
|
|
1867
1909
|
provider,
|
|
1868
1910
|
target,
|
|
1869
1911
|
graders,
|
|
@@ -1876,11 +1918,11 @@ async function runTestCase(options) {
|
|
|
1876
1918
|
signal,
|
|
1877
1919
|
judgeProvider
|
|
1878
1920
|
} = options;
|
|
1879
|
-
const promptInputs = await buildPromptInputs(
|
|
1921
|
+
const promptInputs = await buildPromptInputs(evalCase);
|
|
1880
1922
|
if (promptDumpDir) {
|
|
1881
|
-
await dumpPrompt(promptDumpDir,
|
|
1923
|
+
await dumpPrompt(promptDumpDir, evalCase, promptInputs);
|
|
1882
1924
|
}
|
|
1883
|
-
const cacheKey = useCache ? createCacheKey(provider, target,
|
|
1925
|
+
const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
|
|
1884
1926
|
let cachedResponse;
|
|
1885
1927
|
if (cacheKey && cache) {
|
|
1886
1928
|
cachedResponse = await cache.get(cacheKey);
|
|
@@ -1893,7 +1935,7 @@ async function runTestCase(options) {
|
|
|
1893
1935
|
while (!providerResponse && attempt < attemptBudget) {
|
|
1894
1936
|
try {
|
|
1895
1937
|
providerResponse = await invokeProvider(provider, {
|
|
1896
|
-
|
|
1938
|
+
evalCase,
|
|
1897
1939
|
target,
|
|
1898
1940
|
promptInputs,
|
|
1899
1941
|
attempt,
|
|
@@ -1906,12 +1948,12 @@ async function runTestCase(options) {
|
|
|
1906
1948
|
attempt += 1;
|
|
1907
1949
|
continue;
|
|
1908
1950
|
}
|
|
1909
|
-
return buildErrorResult(
|
|
1951
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
1910
1952
|
}
|
|
1911
1953
|
}
|
|
1912
1954
|
if (!providerResponse) {
|
|
1913
1955
|
return buildErrorResult(
|
|
1914
|
-
|
|
1956
|
+
evalCase,
|
|
1915
1957
|
target.name,
|
|
1916
1958
|
nowFn(),
|
|
1917
1959
|
lastError ?? new Error("Provider did not return a response"),
|
|
@@ -1921,7 +1963,7 @@ async function runTestCase(options) {
|
|
|
1921
1963
|
if (cacheKey && cache && !cachedResponse) {
|
|
1922
1964
|
await cache.set(cacheKey, providerResponse);
|
|
1923
1965
|
}
|
|
1924
|
-
const graderKind =
|
|
1966
|
+
const graderKind = evalCase.grader ?? "heuristic";
|
|
1925
1967
|
const activeGrader = graders[graderKind] ?? graders.heuristic;
|
|
1926
1968
|
if (!activeGrader) {
|
|
1927
1969
|
throw new Error(`No grader registered for kind '${graderKind}'`);
|
|
@@ -1930,7 +1972,7 @@ async function runTestCase(options) {
|
|
|
1930
1972
|
try {
|
|
1931
1973
|
const gradeTimestamp = nowFn();
|
|
1932
1974
|
grade = await activeGrader.grade({
|
|
1933
|
-
|
|
1975
|
+
evalCase,
|
|
1934
1976
|
candidate: providerResponse.text ?? "",
|
|
1935
1977
|
target,
|
|
1936
1978
|
provider,
|
|
@@ -1940,17 +1982,18 @@ async function runTestCase(options) {
|
|
|
1940
1982
|
judgeProvider
|
|
1941
1983
|
});
|
|
1942
1984
|
} catch (error) {
|
|
1943
|
-
return buildErrorResult(
|
|
1985
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
1944
1986
|
}
|
|
1945
1987
|
const completedAt = nowFn();
|
|
1946
1988
|
const rawRequest = {
|
|
1947
1989
|
request: promptInputs.request,
|
|
1948
1990
|
guidelines: promptInputs.guidelines,
|
|
1949
|
-
guideline_paths:
|
|
1991
|
+
guideline_paths: evalCase.guideline_paths,
|
|
1992
|
+
system_message: promptInputs.systemMessage ?? ""
|
|
1950
1993
|
};
|
|
1951
1994
|
return {
|
|
1952
|
-
eval_id:
|
|
1953
|
-
conversation_id:
|
|
1995
|
+
eval_id: evalCase.id,
|
|
1996
|
+
conversation_id: evalCase.conversation_id,
|
|
1954
1997
|
score: grade.score,
|
|
1955
1998
|
hits: grade.hits,
|
|
1956
1999
|
misses: grade.misses,
|
|
@@ -1964,11 +2007,11 @@ async function runTestCase(options) {
|
|
|
1964
2007
|
grader_raw_request: grade.graderRawRequest
|
|
1965
2008
|
};
|
|
1966
2009
|
}
|
|
1967
|
-
function
|
|
2010
|
+
function filterEvalCases(evalCases, evalId) {
|
|
1968
2011
|
if (!evalId) {
|
|
1969
|
-
return
|
|
2012
|
+
return evalCases;
|
|
1970
2013
|
}
|
|
1971
|
-
return
|
|
2014
|
+
return evalCases.filter((evalCase) => evalCase.id === evalId);
|
|
1972
2015
|
}
|
|
1973
2016
|
function buildGraderRegistry(overrides, resolveJudgeProvider) {
|
|
1974
2017
|
const heuristic = overrides?.heuristic ?? new HeuristicGrader();
|
|
@@ -1986,16 +2029,16 @@ function buildGraderRegistry(overrides, resolveJudgeProvider) {
|
|
|
1986
2029
|
llm_judge: llmJudge
|
|
1987
2030
|
};
|
|
1988
2031
|
}
|
|
1989
|
-
async function dumpPrompt(directory,
|
|
2032
|
+
async function dumpPrompt(directory, evalCase, promptInputs) {
|
|
1990
2033
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
1991
|
-
const filename = `${timestamp}_${sanitizeFilename(
|
|
2034
|
+
const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
|
|
1992
2035
|
const filePath = path4.resolve(directory, filename);
|
|
1993
2036
|
await mkdir(path4.dirname(filePath), { recursive: true });
|
|
1994
2037
|
const payload = {
|
|
1995
|
-
eval_id:
|
|
2038
|
+
eval_id: evalCase.id,
|
|
1996
2039
|
request: promptInputs.request,
|
|
1997
2040
|
guidelines: promptInputs.guidelines,
|
|
1998
|
-
guideline_paths:
|
|
2041
|
+
guideline_paths: evalCase.guideline_paths
|
|
1999
2042
|
};
|
|
2000
2043
|
await writeFile2(filePath, JSON.stringify(payload, null, 2), "utf8");
|
|
2001
2044
|
}
|
|
@@ -2007,7 +2050,7 @@ function sanitizeFilename(value) {
|
|
|
2007
2050
|
return sanitized.length > 0 ? sanitized : randomUUID2();
|
|
2008
2051
|
}
|
|
2009
2052
|
async function invokeProvider(provider, options) {
|
|
2010
|
-
const {
|
|
2053
|
+
const { evalCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
|
|
2011
2054
|
const controller = new AbortController();
|
|
2012
2055
|
const timeout = agentTimeoutMs ? setTimeout(() => controller.abort(), agentTimeoutMs) : void 0;
|
|
2013
2056
|
if (signal) {
|
|
@@ -2017,12 +2060,12 @@ async function invokeProvider(provider, options) {
|
|
|
2017
2060
|
return await provider.invoke({
|
|
2018
2061
|
prompt: promptInputs.request,
|
|
2019
2062
|
guidelines: promptInputs.guidelines,
|
|
2020
|
-
|
|
2021
|
-
|
|
2063
|
+
guideline_patterns: evalCase.guideline_patterns,
|
|
2064
|
+
attachments: evalCase.file_paths,
|
|
2065
|
+
evalCaseId: evalCase.id,
|
|
2022
2066
|
attempt,
|
|
2023
2067
|
metadata: {
|
|
2024
|
-
|
|
2025
|
-
grader: testCase.grader
|
|
2068
|
+
systemPrompt: promptInputs.systemMessage ?? ""
|
|
2026
2069
|
},
|
|
2027
2070
|
signal: controller.signal
|
|
2028
2071
|
});
|
|
@@ -2032,17 +2075,18 @@ async function invokeProvider(provider, options) {
|
|
|
2032
2075
|
}
|
|
2033
2076
|
}
|
|
2034
2077
|
}
|
|
2035
|
-
function buildErrorResult(
|
|
2078
|
+
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
|
|
2036
2079
|
const message = error instanceof Error ? error.message : String(error);
|
|
2037
2080
|
const rawRequest = {
|
|
2038
2081
|
request: promptInputs.request,
|
|
2039
2082
|
guidelines: promptInputs.guidelines,
|
|
2040
|
-
guideline_paths:
|
|
2083
|
+
guideline_paths: evalCase.guideline_paths,
|
|
2084
|
+
system_message: promptInputs.systemMessage ?? "",
|
|
2041
2085
|
error: message
|
|
2042
2086
|
};
|
|
2043
2087
|
return {
|
|
2044
|
-
eval_id:
|
|
2045
|
-
conversation_id:
|
|
2088
|
+
eval_id: evalCase.id,
|
|
2089
|
+
conversation_id: evalCase.conversation_id,
|
|
2046
2090
|
score: 0,
|
|
2047
2091
|
hits: [],
|
|
2048
2092
|
misses: [`Error: ${message}`],
|
|
@@ -2054,13 +2098,14 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
|
|
|
2054
2098
|
raw_request: rawRequest
|
|
2055
2099
|
};
|
|
2056
2100
|
}
|
|
2057
|
-
function createCacheKey(provider, target,
|
|
2101
|
+
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
2058
2102
|
const hash = createHash("sha256");
|
|
2059
2103
|
hash.update(provider.id);
|
|
2060
2104
|
hash.update(target.name);
|
|
2061
|
-
hash.update(
|
|
2105
|
+
hash.update(evalCase.id);
|
|
2062
2106
|
hash.update(promptInputs.request);
|
|
2063
2107
|
hash.update(promptInputs.guidelines);
|
|
2108
|
+
hash.update(promptInputs.systemMessage ?? "");
|
|
2064
2109
|
return hash.digest("hex");
|
|
2065
2110
|
}
|
|
2066
2111
|
function isTimeoutLike(error) {
|
|
@@ -2088,7 +2133,9 @@ export {
|
|
|
2088
2133
|
HeuristicGrader,
|
|
2089
2134
|
QualityGrader,
|
|
2090
2135
|
TEST_MESSAGE_ROLES,
|
|
2136
|
+
buildDirectoryChain,
|
|
2091
2137
|
buildPromptInputs,
|
|
2138
|
+
buildSearchRoots,
|
|
2092
2139
|
calculateHits,
|
|
2093
2140
|
calculateMisses,
|
|
2094
2141
|
createAgentKernel,
|
|
@@ -2096,6 +2143,8 @@ export {
|
|
|
2096
2143
|
ensureVSCodeSubagents,
|
|
2097
2144
|
extractAspects,
|
|
2098
2145
|
extractCodeBlocks,
|
|
2146
|
+
fileExists,
|
|
2147
|
+
findGitRoot,
|
|
2099
2148
|
getHitCount,
|
|
2100
2149
|
isErrorLike,
|
|
2101
2150
|
isGraderKind,
|
|
@@ -2105,12 +2154,13 @@ export {
|
|
|
2105
2154
|
isTestMessage,
|
|
2106
2155
|
isTestMessageRole,
|
|
2107
2156
|
listTargetNames,
|
|
2108
|
-
|
|
2157
|
+
loadEvalCases,
|
|
2109
2158
|
readTargetDefinitions,
|
|
2110
2159
|
resolveAndCreateProvider,
|
|
2160
|
+
resolveFileReference,
|
|
2111
2161
|
resolveTargetDefinition,
|
|
2162
|
+
runEvalCase,
|
|
2112
2163
|
runEvaluation,
|
|
2113
|
-
runTestCase,
|
|
2114
2164
|
scoreCandidateResponse
|
|
2115
2165
|
};
|
|
2116
2166
|
//# sourceMappingURL=index.js.map
|