@agentv/core 0.2.3 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -34,7 +34,9 @@ __export(index_exports, {
34
34
  HeuristicGrader: () => HeuristicGrader,
35
35
  QualityGrader: () => QualityGrader,
36
36
  TEST_MESSAGE_ROLES: () => TEST_MESSAGE_ROLES,
37
+ buildDirectoryChain: () => buildDirectoryChain,
37
38
  buildPromptInputs: () => buildPromptInputs,
39
+ buildSearchRoots: () => buildSearchRoots,
38
40
  calculateHits: () => calculateHits,
39
41
  calculateMisses: () => calculateMisses,
40
42
  createAgentKernel: () => createAgentKernel,
@@ -42,6 +44,8 @@ __export(index_exports, {
42
44
  ensureVSCodeSubagents: () => ensureVSCodeSubagents,
43
45
  extractAspects: () => extractAspects,
44
46
  extractCodeBlocks: () => extractCodeBlocks,
47
+ fileExists: () => fileExists,
48
+ findGitRoot: () => findGitRoot,
45
49
  getHitCount: () => getHitCount,
46
50
  isErrorLike: () => isErrorLike,
47
51
  isGraderKind: () => isGraderKind,
@@ -51,12 +55,13 @@ __export(index_exports, {
51
55
  isTestMessage: () => isTestMessage,
52
56
  isTestMessageRole: () => isTestMessageRole,
53
57
  listTargetNames: () => listTargetNames,
54
- loadTestCases: () => loadTestCases,
58
+ loadEvalCases: () => loadEvalCases,
55
59
  readTargetDefinitions: () => readTargetDefinitions,
56
60
  resolveAndCreateProvider: () => resolveAndCreateProvider,
61
+ resolveFileReference: () => resolveFileReference,
57
62
  resolveTargetDefinition: () => resolveTargetDefinition,
63
+ runEvalCase: () => runEvalCase,
58
64
  runEvaluation: () => runEvaluation,
59
- runTestCase: () => runTestCase,
60
65
  scoreCandidateResponse: () => scoreCandidateResponse
61
66
  });
62
67
  module.exports = __toCommonJS(index_exports);
@@ -113,6 +118,7 @@ function getHitCount(result) {
113
118
  }
114
119
 
115
120
  // src/evaluation/yaml-parser.ts
121
+ var import_micromatch = __toESM(require("micromatch"), 1);
116
122
  var import_node_fs2 = require("fs");
117
123
  var import_promises2 = require("fs/promises");
118
124
  var import_node_path2 = __toESM(require("path"), 1);
@@ -131,6 +137,46 @@ async function fileExists(filePath) {
131
137
  return false;
132
138
  }
133
139
  }
140
+ async function findGitRoot(startPath) {
141
+ let currentDir = import_node_path.default.dirname(import_node_path.default.resolve(startPath));
142
+ const root = import_node_path.default.parse(currentDir).root;
143
+ while (currentDir !== root) {
144
+ const gitPath = import_node_path.default.join(currentDir, ".git");
145
+ if (await fileExists(gitPath)) {
146
+ return currentDir;
147
+ }
148
+ const parentDir = import_node_path.default.dirname(currentDir);
149
+ if (parentDir === currentDir) {
150
+ break;
151
+ }
152
+ currentDir = parentDir;
153
+ }
154
+ return null;
155
+ }
156
+ function buildDirectoryChain(filePath, repoRoot) {
157
+ const directories = [];
158
+ const seen = /* @__PURE__ */ new Set();
159
+ const boundary = import_node_path.default.resolve(repoRoot);
160
+ let current = import_node_path.default.resolve(import_node_path.default.dirname(filePath));
161
+ while (current !== void 0) {
162
+ if (!seen.has(current)) {
163
+ directories.push(current);
164
+ seen.add(current);
165
+ }
166
+ if (current === boundary) {
167
+ break;
168
+ }
169
+ const parent = import_node_path.default.dirname(current);
170
+ if (parent === current) {
171
+ break;
172
+ }
173
+ current = parent;
174
+ }
175
+ if (!seen.has(boundary)) {
176
+ directories.push(boundary);
177
+ }
178
+ return directories;
179
+ }
134
180
  function buildSearchRoots(evalPath, repoRoot) {
135
181
  const uniqueRoots = [];
136
182
  const addRoot = (root) => {
@@ -188,9 +234,52 @@ var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
188
234
  var ANSI_YELLOW = "\x1B[33m";
189
235
  var ANSI_RESET = "\x1B[0m";
190
236
  var SCHEMA_EVAL_V2 = "agentv-eval-v2";
191
- function isGuidelineFile(filePath) {
237
+ var SCHEMA_CONFIG_V2 = "agentv-config-v2";
238
+ async function loadConfig(evalFilePath, repoRoot) {
239
+ const directories = buildDirectoryChain(evalFilePath, repoRoot);
240
+ for (const directory of directories) {
241
+ const configPath = import_node_path2.default.join(directory, ".agentv", "config.yaml");
242
+ if (!await fileExists2(configPath)) {
243
+ continue;
244
+ }
245
+ try {
246
+ const rawConfig = await (0, import_promises2.readFile)(configPath, "utf8");
247
+ const parsed = (0, import_yaml.parse)(rawConfig);
248
+ if (!isJsonObject(parsed)) {
249
+ logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
250
+ continue;
251
+ }
252
+ const config = parsed;
253
+ const schema = config.$schema;
254
+ if (schema !== SCHEMA_CONFIG_V2) {
255
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${configPath}. Expected '${SCHEMA_CONFIG_V2}'` : `Missing required field '$schema' in ${configPath}.
256
+ Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
257
+ logWarning(message);
258
+ continue;
259
+ }
260
+ const guidelinePatterns = config.guideline_patterns;
261
+ if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
262
+ logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
263
+ continue;
264
+ }
265
+ if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
266
+ logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
267
+ continue;
268
+ }
269
+ return {
270
+ guideline_patterns: guidelinePatterns
271
+ };
272
+ } catch (error) {
273
+ logWarning(`Could not read .agentv/config.yaml at ${configPath}: ${error.message}`);
274
+ continue;
275
+ }
276
+ }
277
+ return null;
278
+ }
279
+ function isGuidelineFile(filePath, patterns) {
192
280
  const normalized = filePath.split("\\").join("/");
193
- return normalized.endsWith(".instructions.md") || normalized.includes("/instructions/") || normalized.endsWith(".prompt.md") || normalized.includes("/prompts/");
281
+ const patternsToUse = patterns ?? [];
282
+ return import_micromatch.default.isMatch(normalized, patternsToUse);
194
283
  }
195
284
  function extractCodeBlocks(segments) {
196
285
  const codeBlocks = [];
@@ -210,43 +299,45 @@ function extractCodeBlocks(segments) {
210
299
  }
211
300
  return codeBlocks;
212
301
  }
213
- async function loadTestCases(testFilePath, repoRoot, options) {
302
+ async function loadEvalCases(evalFilePath, repoRoot, options) {
214
303
  const verbose = options?.verbose ?? false;
215
- const absoluteTestPath = import_node_path2.default.resolve(testFilePath);
304
+ const absoluteTestPath = import_node_path2.default.resolve(evalFilePath);
216
305
  if (!await fileExists2(absoluteTestPath)) {
217
- throw new Error(`Test file not found: ${testFilePath}`);
306
+ throw new Error(`Test file not found: ${evalFilePath}`);
218
307
  }
219
308
  const repoRootPath = resolveToAbsolutePath(repoRoot);
220
309
  const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
310
+ const config = await loadConfig(absoluteTestPath, repoRootPath);
311
+ const guidelinePatterns = config?.guideline_patterns;
221
312
  const rawFile = await (0, import_promises2.readFile)(absoluteTestPath, "utf8");
222
313
  const parsed = (0, import_yaml.parse)(rawFile);
223
314
  if (!isJsonObject(parsed)) {
224
- throw new Error(`Invalid test file format: ${testFilePath}`);
315
+ throw new Error(`Invalid test file format: ${evalFilePath}`);
225
316
  }
226
317
  const suite = parsed;
227
318
  const schema = suite.$schema;
228
319
  if (schema !== SCHEMA_EVAL_V2) {
229
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${testFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${testFilePath}.
320
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
230
321
  Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
231
322
  throw new Error(message);
232
323
  }
233
324
  const rawTestcases = suite.evalcases;
234
325
  if (!Array.isArray(rawTestcases)) {
235
- throw new Error(`Invalid test file format: ${testFilePath} - missing 'evalcases' field`);
326
+ throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
236
327
  }
237
328
  const globalGrader = coerceGrader(suite.grader) ?? "llm_judge";
238
329
  const results = [];
239
- for (const rawTestcase of rawTestcases) {
240
- if (!isJsonObject(rawTestcase)) {
330
+ for (const rawEvalcase of rawTestcases) {
331
+ if (!isJsonObject(rawEvalcase)) {
241
332
  logWarning("Skipping invalid test case entry (expected object)");
242
333
  continue;
243
334
  }
244
- const testcase = rawTestcase;
245
- const id = asString(testcase.id);
246
- const conversationId = asString(testcase.conversation_id);
247
- const outcome = asString(testcase.outcome);
248
- const inputMessagesValue = testcase.input_messages;
249
- const expectedMessagesValue = testcase.expected_messages;
335
+ const evalcase = rawEvalcase;
336
+ const id = asString(evalcase.id);
337
+ const conversationId = asString(evalcase.conversation_id);
338
+ const outcome = asString(evalcase.outcome);
339
+ const inputMessagesValue = evalcase.input_messages;
340
+ const expectedMessagesValue = evalcase.expected_messages;
250
341
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
251
342
  logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
252
343
  continue;
@@ -259,6 +350,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
259
350
  const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
260
351
  const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
261
352
  const userMessages = inputMessages.filter((message) => message.role === "user");
353
+ const systemMessages = inputMessages.filter((message) => message.role === "system");
262
354
  if (assistantMessages.length === 0) {
263
355
  logWarning(`No assistant message found for test case: ${id}`);
264
356
  continue;
@@ -266,6 +358,29 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
266
358
  if (assistantMessages.length > 1) {
267
359
  logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
268
360
  }
361
+ if (systemMessages.length > 1) {
362
+ logWarning(`Multiple system messages found for test case: ${id}, using first`);
363
+ }
364
+ let systemMessageContent;
365
+ if (systemMessages.length > 0) {
366
+ const content = systemMessages[0]?.content;
367
+ if (typeof content === "string") {
368
+ systemMessageContent = content;
369
+ } else if (Array.isArray(content)) {
370
+ const textParts = [];
371
+ for (const segment of content) {
372
+ if (isJsonObject(segment)) {
373
+ const value = segment.value;
374
+ if (typeof value === "string") {
375
+ textParts.push(value);
376
+ }
377
+ }
378
+ }
379
+ if (textParts.length > 0) {
380
+ systemMessageContent = textParts.join("\n\n");
381
+ }
382
+ }
383
+ }
269
384
  const userSegments = [];
270
385
  const guidelinePaths = [];
271
386
  const userTextParts = [];
@@ -297,7 +412,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
297
412
  }
298
413
  try {
299
414
  const fileContent = (await (0, import_promises2.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
300
- if (isGuidelineFile(displayPath)) {
415
+ const relativeToRepo = import_node_path2.default.relative(repoRootPath, resolvedPath);
416
+ if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
301
417
  guidelinePaths.push(import_node_path2.default.resolve(resolvedPath));
302
418
  if (verbose) {
303
419
  console.log(` [Guideline] Found: ${displayPath}`);
@@ -307,7 +423,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
307
423
  userSegments.push({
308
424
  type: "file",
309
425
  path: displayPath,
310
- text: fileContent
426
+ text: fileContent,
427
+ resolvedPath: import_node_path2.default.resolve(resolvedPath)
311
428
  });
312
429
  if (verbose) {
313
430
  console.log(` [File] Found: ${displayPath}`);
@@ -329,16 +446,29 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
329
446
  }
330
447
  const codeSnippets = extractCodeBlocks(userSegments);
331
448
  const assistantContent = assistantMessages[0]?.content;
332
- const expectedAssistantRaw = normalizeAssistantContent(assistantContent);
449
+ const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
333
450
  const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
334
- const testCaseGrader = coerceGrader(testcase.grader) ?? globalGrader;
451
+ const testCaseGrader = coerceGrader(evalcase.grader) ?? globalGrader;
452
+ const userFilePaths = [];
453
+ for (const segment of userSegments) {
454
+ if (segment.type === "file" && typeof segment.resolvedPath === "string") {
455
+ userFilePaths.push(segment.resolvedPath);
456
+ }
457
+ }
458
+ const allFilePaths = [
459
+ ...guidelinePaths.map((guidelinePath) => import_node_path2.default.resolve(guidelinePath)),
460
+ ...userFilePaths
461
+ ];
335
462
  const testCase = {
336
463
  id,
337
464
  conversation_id: conversationId,
338
465
  task: userTextPrompt,
339
466
  user_segments: userSegments,
467
+ system_message: systemMessageContent,
340
468
  expected_assistant_raw: expectedAssistantRaw,
341
469
  guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path2.default.resolve(guidelinePath)),
470
+ guideline_patterns: guidelinePatterns,
471
+ file_paths: allFilePaths,
342
472
  code_snippets: codeSnippets,
343
473
  outcome,
344
474
  grader: testCaseGrader
@@ -404,7 +534,7 @@ ${body}`);
404
534
  }
405
535
  const request = requestParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
406
536
  const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
407
- return { request, guidelines };
537
+ return { request, guidelines, systemMessage: testCase.system_message };
408
538
  }
409
539
  async function fileExists2(absolutePath) {
410
540
  try {
@@ -445,7 +575,7 @@ function cloneJsonValue(value) {
445
575
  }
446
576
  return cloneJsonObject(value);
447
577
  }
448
- function normalizeAssistantContent(content) {
578
+ async function resolveAssistantContent(content, searchRoots, verbose) {
449
579
  if (typeof content === "string") {
450
580
  return content;
451
581
  }
@@ -458,12 +588,42 @@ function normalizeAssistantContent(content) {
458
588
  parts.push(entry);
459
589
  continue;
460
590
  }
461
- const textValue = asString(entry["text"]);
591
+ if (!isJsonObject(entry)) {
592
+ continue;
593
+ }
594
+ const segmentType = asString(entry.type);
595
+ if (segmentType === "file") {
596
+ const rawValue = asString(entry.value);
597
+ if (!rawValue) {
598
+ continue;
599
+ }
600
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference(
601
+ rawValue,
602
+ searchRoots
603
+ );
604
+ if (!resolvedPath) {
605
+ const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
606
+ logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
607
+ continue;
608
+ }
609
+ try {
610
+ const fileContent = (await (0, import_promises2.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
611
+ parts.push(fileContent);
612
+ if (verbose) {
613
+ console.log(` [Expected Assistant File] Found: ${displayPath}`);
614
+ console.log(` Resolved to: ${resolvedPath}`);
615
+ }
616
+ } catch (error) {
617
+ logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
618
+ }
619
+ continue;
620
+ }
621
+ const textValue = asString(entry.text);
462
622
  if (typeof textValue === "string") {
463
623
  parts.push(textValue);
464
624
  continue;
465
625
  }
466
- const valueValue = asString(entry["value"]);
626
+ const valueValue = asString(entry.value);
467
627
  if (typeof valueValue === "string") {
468
628
  parts.push(valueValue);
469
629
  continue;
@@ -500,15 +660,18 @@ function buildChatPrompt(request) {
500
660
  return request.chatPrompt;
501
661
  }
502
662
  const systemSegments = [];
503
- if (request.guidelines && request.guidelines.trim().length > 0) {
504
- systemSegments.push(`Guidelines:
505
- ${request.guidelines.trim()}`);
506
- }
507
663
  const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
508
664
  if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
509
665
  systemSegments.push(metadataSystemPrompt.trim());
666
+ } else {
667
+ systemSegments.push(DEFAULT_SYSTEM_PROMPT);
510
668
  }
511
- const systemContent = systemSegments.length > 0 ? systemSegments.join("\n\n") : DEFAULT_SYSTEM_PROMPT;
669
+ if (request.guidelines && request.guidelines.trim().length > 0) {
670
+ systemSegments.push(`[[ ## Guidelines ## ]]
671
+
672
+ ${request.guidelines.trim()}`);
673
+ }
674
+ const systemContent = systemSegments.join("\n\n");
512
675
  const userContent = request.prompt.trim();
513
676
  const prompt = [
514
677
  {
@@ -965,10 +1128,8 @@ function isLikelyEnvReference(value) {
965
1128
 
966
1129
  // src/evaluation/providers/vscode.ts
967
1130
  var import_promises3 = require("fs/promises");
968
- var import_node_os = require("os");
969
1131
  var import_node_path3 = __toESM(require("path"), 1);
970
1132
  var import_subagent = require("subagent");
971
- var PROMPT_FILE_PREFIX = "bbeval-vscode-";
972
1133
  var VSCodeProvider = class {
973
1134
  id;
974
1135
  kind;
@@ -985,128 +1146,89 @@ var VSCodeProvider = class {
985
1146
  throw new Error("VS Code provider request was aborted before dispatch");
986
1147
  }
987
1148
  const attachments = normalizeAttachments(request.attachments);
988
- const promptContent = buildPromptDocument(request, attachments);
989
- const directory = await (0, import_promises3.mkdtemp)(import_node_path3.default.join((0, import_node_os.tmpdir)(), PROMPT_FILE_PREFIX));
990
- const promptPath = import_node_path3.default.join(directory, `${request.testCaseId ?? "request"}.prompt.md`);
991
- try {
992
- await (0, import_promises3.writeFile)(promptPath, promptContent, "utf8");
993
- const session = await (0, import_subagent.dispatchAgentSession)({
994
- userQuery: composeUserQuery(request),
995
- promptFile: promptPath,
996
- extraAttachments: attachments,
997
- wait: this.config.waitForResponse,
998
- dryRun: this.config.dryRun,
999
- vscodeCmd: this.config.command,
1000
- subagentRoot: this.config.subagentRoot,
1001
- workspaceTemplate: this.config.workspaceTemplate,
1002
- silent: true
1003
- });
1004
- if (session.exitCode !== 0 || !session.responseFile) {
1005
- const failure = session.error ?? "VS Code subagent did not produce a response";
1006
- throw new Error(failure);
1007
- }
1008
- if (this.config.dryRun) {
1009
- return {
1010
- text: "",
1011
- raw: {
1012
- session,
1013
- promptFile: promptPath,
1014
- attachments
1015
- }
1016
- };
1017
- }
1018
- const responseText = await (0, import_promises3.readFile)(session.responseFile, "utf8");
1149
+ const promptContent = buildPromptDocument(request, attachments, request.guideline_patterns);
1150
+ const session = await (0, import_subagent.dispatchAgentSession)({
1151
+ userQuery: promptContent,
1152
+ // Use full prompt content instead of just request.prompt
1153
+ extraAttachments: attachments,
1154
+ wait: this.config.waitForResponse,
1155
+ dryRun: this.config.dryRun,
1156
+ vscodeCmd: this.config.command,
1157
+ subagentRoot: this.config.subagentRoot,
1158
+ workspaceTemplate: this.config.workspaceTemplate,
1159
+ silent: true
1160
+ });
1161
+ if (session.exitCode !== 0 || !session.responseFile) {
1162
+ const failure = session.error ?? "VS Code subagent did not produce a response";
1163
+ throw new Error(failure);
1164
+ }
1165
+ if (this.config.dryRun) {
1019
1166
  return {
1020
- text: responseText,
1167
+ text: "",
1021
1168
  raw: {
1022
1169
  session,
1023
- promptFile: promptPath,
1024
1170
  attachments
1025
1171
  }
1026
1172
  };
1027
- } finally {
1028
- await (0, import_promises3.rm)(directory, { recursive: true, force: true });
1029
1173
  }
1174
+ const responseText = await (0, import_promises3.readFile)(session.responseFile, "utf8");
1175
+ return {
1176
+ text: responseText,
1177
+ raw: {
1178
+ session,
1179
+ attachments
1180
+ }
1181
+ };
1030
1182
  }
1031
1183
  };
1032
- function buildPromptDocument(request, attachments) {
1184
+ function buildPromptDocument(request, attachments, guidelinePatterns) {
1033
1185
  const parts = [];
1034
- const instructionFiles = collectInstructionFiles(attachments);
1035
- if (instructionFiles.length > 0) {
1036
- parts.push(buildMandatoryPrereadBlock(instructionFiles));
1037
- }
1038
- parts.push(`# BbEval Request`);
1039
- if (request.testCaseId) {
1040
- parts.push(`- Test Case: ${request.testCaseId}`);
1041
- }
1042
- if (request.metadata?.target) {
1043
- parts.push(`- Target: ${String(request.metadata.target)}`);
1044
- }
1045
- parts.push("\n## Task\n", request.prompt.trim());
1046
- if (request.guidelines && request.guidelines.trim().length > 0) {
1047
- parts.push("\n## Guidelines\n", request.guidelines.trim());
1048
- }
1049
- if (attachments && attachments.length > 0) {
1050
- const attachmentList = attachments.map((item) => `- ${item}`).join("\n");
1051
- parts.push("\n## Attachments\n", attachmentList);
1186
+ const guidelineFiles = collectGuidelineFiles(attachments, guidelinePatterns);
1187
+ if (guidelineFiles.length > 0) {
1188
+ parts.push("\n", buildMandatoryPrereadBlock(guidelineFiles));
1052
1189
  }
1190
+ parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
1053
1191
  return parts.join("\n").trim();
1054
1192
  }
1055
- function buildMandatoryPrereadBlock(instructionFiles) {
1056
- if (instructionFiles.length === 0) {
1193
+ function buildMandatoryPrereadBlock(guidelineFiles) {
1194
+ if (guidelineFiles.length === 0) {
1057
1195
  return "";
1058
1196
  }
1059
1197
  const fileList = [];
1060
- const tokenList = [];
1061
1198
  let counter = 0;
1062
- for (const absolutePath of instructionFiles) {
1199
+ for (const absolutePath of guidelineFiles) {
1063
1200
  counter += 1;
1064
1201
  const fileName = import_node_path3.default.basename(absolutePath);
1065
1202
  const fileUri = pathToFileUri(absolutePath);
1066
- fileList.push(`[${fileName}](${fileUri})`);
1067
- tokenList.push(`INSTRUCTIONS_READ: \`${fileName}\` i=${counter} SHA256=<hex>`);
1203
+ fileList.push(`* [${fileName}](${fileUri})`);
1068
1204
  }
1069
- const filesText = fileList.join(", ");
1070
- const tokensText = tokenList.join("\n");
1205
+ const filesText = fileList.join("\n");
1071
1206
  const instruction = [
1072
- `Read all instruction files: ${filesText}.`,
1073
- `After reading each file, compute its SHA256 hash using this PowerShell command:`,
1074
- "`Get-FileHash -Algorithm SHA256 -LiteralPath '<file-path>' | Select-Object -ExpandProperty Hash`.",
1075
- `Then include, at the top of your reply, these exact tokens on separate lines:
1207
+ `Read all guideline files:
1208
+ ${filesText}.
1076
1209
  `,
1077
- tokensText,
1078
- `
1079
- Replace \`<hex>\` with the actual SHA256 hash value computed from the PowerShell command.`,
1080
1210
  `If any file is missing, fail with ERROR: missing-file <filename> and stop.
1081
1211
  `,
1082
- `Then fetch all documentation required by the instructions before proceeding with your task.`
1083
- ].join(" ");
1084
- return `[[ ## mandatory_pre_read ## ]]
1085
-
1086
- ${instruction}
1087
-
1088
- `;
1212
+ `Then apply system_instructions on the user query below.`
1213
+ ].join("");
1214
+ return `${instruction}`;
1089
1215
  }
1090
- function collectInstructionFiles(attachments) {
1216
+ function collectGuidelineFiles(attachments, guidelinePatterns) {
1091
1217
  if (!attachments || attachments.length === 0) {
1092
1218
  return [];
1093
1219
  }
1094
1220
  const unique = /* @__PURE__ */ new Map();
1095
1221
  for (const attachment of attachments) {
1096
- if (!isInstructionPath(attachment)) {
1097
- continue;
1098
- }
1099
1222
  const absolutePath = import_node_path3.default.resolve(attachment);
1100
- if (!unique.has(absolutePath)) {
1101
- unique.set(absolutePath, absolutePath);
1223
+ const normalized = absolutePath.split(import_node_path3.default.sep).join("/");
1224
+ if (isGuidelineFile(normalized, guidelinePatterns)) {
1225
+ if (!unique.has(absolutePath)) {
1226
+ unique.set(absolutePath, absolutePath);
1227
+ }
1102
1228
  }
1103
1229
  }
1104
1230
  return Array.from(unique.values());
1105
1231
  }
1106
- function isInstructionPath(filePath) {
1107
- const normalized = filePath.split(import_node_path3.default.sep).join("/");
1108
- return normalized.endsWith(".instructions.md") || normalized.includes("/instructions/") || normalized.endsWith(".prompt.md") || normalized.includes("/prompts/");
1109
- }
1110
1232
  function pathToFileUri(filePath) {
1111
1233
  const absolutePath = import_node_path3.default.isAbsolute(filePath) ? filePath : import_node_path3.default.resolve(filePath);
1112
1234
  const normalizedPath = absolutePath.replace(/\\/g, "/");
@@ -1115,14 +1237,6 @@ function pathToFileUri(filePath) {
1115
1237
  }
1116
1238
  return `file://${normalizedPath}`;
1117
1239
  }
1118
- function composeUserQuery(request) {
1119
- const segments = [];
1120
- segments.push(request.prompt.trim());
1121
- if (request.guidelines && request.guidelines.trim().length > 0) {
1122
- segments.push("\nGuidelines:\n", request.guidelines.trim());
1123
- }
1124
- return segments.join("\n").trim();
1125
- }
1126
1240
  function normalizeAttachments(attachments) {
1127
1241
  if (!attachments || attachments.length === 0) {
1128
1242
  return void 0;
@@ -1177,21 +1291,32 @@ var import_node_fs3 = require("fs");
1177
1291
  var import_promises4 = require("fs/promises");
1178
1292
  var import_node_path4 = __toESM(require("path"), 1);
1179
1293
  var import_yaml2 = require("yaml");
1294
+
1295
+ // src/evaluation/providers/types.ts
1296
+ var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
1297
+
1298
+ // src/evaluation/providers/targets-file.ts
1180
1299
  function isRecord(value) {
1181
1300
  return typeof value === "object" && value !== null && !Array.isArray(value);
1182
1301
  }
1183
- function checkVersion(parsed, absolutePath) {
1184
- const version = typeof parsed.version === "number" ? parsed.version : typeof parsed.version === "string" ? parseFloat(parsed.version) : void 0;
1185
- if (version === void 0) {
1302
+ function checkSchema(parsed, absolutePath) {
1303
+ const schema = parsed.$schema;
1304
+ if (schema === void 0) {
1305
+ throw new Error(
1306
+ `Missing $schema field in targets.yaml at ${absolutePath}.
1307
+ Please add '$schema: ${TARGETS_SCHEMA_V2}' at the top of the file.`
1308
+ );
1309
+ }
1310
+ if (typeof schema !== "string") {
1186
1311
  throw new Error(
1187
- `Missing version field in targets.yaml at ${absolutePath}.
1188
- Please add 'version: 2.0' at the top of the file.`
1312
+ `Invalid $schema field in targets.yaml at ${absolutePath}.
1313
+ Expected a string value '${TARGETS_SCHEMA_V2}'.`
1189
1314
  );
1190
1315
  }
1191
- if (version < 2) {
1316
+ if (schema !== TARGETS_SCHEMA_V2) {
1192
1317
  throw new Error(
1193
- `Outdated targets.yaml format (version ${version}) at ${absolutePath}.
1194
- Please update to version 2.0 format with 'targets' array.`
1318
+ `Invalid $schema '${schema}' in targets.yaml at ${absolutePath}.
1319
+ Expected '${TARGETS_SCHEMA_V2}'.`
1195
1320
  );
1196
1321
  }
1197
1322
  }
@@ -1239,9 +1364,9 @@ async function readTargetDefinitions(filePath) {
1239
1364
  const raw = await (0, import_promises4.readFile)(absolutePath, "utf8");
1240
1365
  const parsed = (0, import_yaml2.parse)(raw);
1241
1366
  if (!isRecord(parsed)) {
1242
- throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with 'version' and 'targets' fields`);
1367
+ throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
1243
1368
  }
1244
- checkVersion(parsed, absolutePath);
1369
+ checkSchema(parsed, absolutePath);
1245
1370
  const targets = extractTargetsArray(parsed, absolutePath);
1246
1371
  const definitions = targets.map((entry, index) => assertTargetDefinition(entry, index, absolutePath));
1247
1372
  return definitions;
@@ -1463,7 +1588,7 @@ var import_node_crypto = require("crypto");
1463
1588
  var HeuristicGrader = class {
1464
1589
  kind = "heuristic";
1465
1590
  grade(context) {
1466
- const expectedAspects = extractAspects(context.testCase.expected_assistant_raw);
1591
+ const expectedAspects = extractAspects(context.evalCase.expected_assistant_raw);
1467
1592
  const result = scoreCandidateResponse(context.candidate, expectedAspects);
1468
1593
  const misses = [...result.misses];
1469
1594
  if (expectedAspects.length === 0 && isErrorLike(context.candidate)) {
@@ -1496,14 +1621,14 @@ var QualityGrader = class {
1496
1621
  if (!judgeProvider) {
1497
1622
  throw new Error("No judge provider available for LLM grading");
1498
1623
  }
1499
- const prompt = buildQualityPrompt(context.testCase, context.candidate);
1624
+ const prompt = buildQualityPrompt(context.evalCase, context.candidate);
1500
1625
  const metadata = {
1501
1626
  systemPrompt: QUALITY_SYSTEM_PROMPT
1502
1627
  };
1503
1628
  const response = await judgeProvider.invoke({
1504
1629
  prompt,
1505
1630
  metadata,
1506
- testCaseId: context.testCase.id,
1631
+ evalCaseId: context.evalCase.id,
1507
1632
  attempt: context.attempt,
1508
1633
  maxOutputTokens: this.maxOutputTokens,
1509
1634
  temperature: this.temperature
@@ -1549,16 +1674,16 @@ var QUALITY_SYSTEM_PROMPT = [
1549
1674
  function buildQualityPrompt(testCase, candidate) {
1550
1675
  const parts = [
1551
1676
  "[[ ## expected_outcome ## ]]",
1552
- testCase.outcome,
1677
+ testCase.outcome.trim(),
1553
1678
  "",
1554
1679
  "[[ ## request ## ]]",
1555
- testCase.task,
1680
+ testCase.task.trim(),
1556
1681
  "",
1557
1682
  "[[ ## reference_answer ## ]]",
1558
- testCase.expected_assistant_raw,
1683
+ testCase.expected_assistant_raw.trim(),
1559
1684
  "",
1560
1685
  "[[ ## generated_answer ## ]]",
1561
- candidate,
1686
+ candidate.trim(),
1562
1687
  "",
1563
1688
  "Respond with a single JSON object matching the schema described in the system prompt."
1564
1689
  ];
@@ -1802,17 +1927,17 @@ async function runEvaluation(options) {
1802
1927
  cache,
1803
1928
  useCache,
1804
1929
  now,
1805
- testId,
1930
+ evalId,
1806
1931
  verbose,
1807
1932
  onResult,
1808
1933
  onProgress
1809
1934
  } = options;
1810
- const load = loadTestCases;
1811
- const testCases = await load(testFilePath, repoRoot, { verbose });
1812
- const filteredTestCases = filterTestCases(testCases, testId);
1813
- if (filteredTestCases.length === 0) {
1814
- if (testId) {
1815
- throw new Error(`Test case with id '${testId}' not found in ${testFilePath}`);
1935
+ const load = loadEvalCases;
1936
+ const evalCases = await load(testFilePath, repoRoot, { verbose });
1937
+ const filteredEvalCases = filterEvalCases(evalCases, evalId);
1938
+ if (filteredEvalCases.length === 0) {
1939
+ if (evalId) {
1940
+ throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
1816
1941
  }
1817
1942
  return [];
1818
1943
  }
@@ -1856,11 +1981,11 @@ async function runEvaluation(options) {
1856
1981
  };
1857
1982
  const graderRegistry = buildGraderRegistry(graders, resolveJudgeProvider);
1858
1983
  const primaryProvider = getOrCreateProvider(target);
1859
- if (onProgress && filteredTestCases.length > 0) {
1860
- for (let i = 0; i < filteredTestCases.length; i++) {
1984
+ if (onProgress && filteredEvalCases.length > 0) {
1985
+ for (let i = 0; i < filteredEvalCases.length; i++) {
1861
1986
  await onProgress({
1862
1987
  workerId: i + 1,
1863
- testId: filteredTestCases[i].id,
1988
+ evalId: filteredEvalCases[i].id,
1864
1989
  status: "pending"
1865
1990
  });
1866
1991
  }
@@ -1868,23 +1993,23 @@ async function runEvaluation(options) {
1868
1993
  const workers = options.maxConcurrency ?? target.workers ?? 1;
1869
1994
  const limit = pLimit(workers);
1870
1995
  let nextWorkerId = 1;
1871
- const workerIdByTestId = /* @__PURE__ */ new Map();
1872
- const promises = filteredTestCases.map(
1873
- (testCase) => limit(async () => {
1996
+ const workerIdByEvalId = /* @__PURE__ */ new Map();
1997
+ const promises = filteredEvalCases.map(
1998
+ (evalCase) => limit(async () => {
1874
1999
  const workerId = nextWorkerId++;
1875
- workerIdByTestId.set(testCase.id, workerId);
2000
+ workerIdByEvalId.set(evalCase.id, workerId);
1876
2001
  if (onProgress) {
1877
2002
  await onProgress({
1878
2003
  workerId,
1879
- testId: testCase.id,
2004
+ evalId: evalCase.id,
1880
2005
  status: "running",
1881
2006
  startedAt: Date.now()
1882
2007
  });
1883
2008
  }
1884
2009
  try {
1885
2010
  const judgeProvider = await resolveJudgeProvider(target);
1886
- const result = await runTestCase({
1887
- testCase,
2011
+ const result = await runEvalCase({
2012
+ evalCase,
1888
2013
  provider: primaryProvider,
1889
2014
  target,
1890
2015
  graders: graderRegistry,
@@ -1899,7 +2024,7 @@ async function runEvaluation(options) {
1899
2024
  if (onProgress) {
1900
2025
  await onProgress({
1901
2026
  workerId,
1902
- testId: testCase.id,
2027
+ evalId: evalCase.id,
1903
2028
  status: "completed",
1904
2029
  startedAt: 0,
1905
2030
  // Not used for completed status
@@ -1914,7 +2039,7 @@ async function runEvaluation(options) {
1914
2039
  if (onProgress) {
1915
2040
  await onProgress({
1916
2041
  workerId,
1917
- testId: testCase.id,
2042
+ evalId: evalCase.id,
1918
2043
  status: "failed",
1919
2044
  completedAt: Date.now(),
1920
2045
  error: error instanceof Error ? error.message : String(error)
@@ -1931,10 +2056,10 @@ async function runEvaluation(options) {
1931
2056
  if (outcome.status === "fulfilled") {
1932
2057
  results.push(outcome.value);
1933
2058
  } else {
1934
- const testCase = filteredTestCases[i];
1935
- const promptInputs = await buildPromptInputs(testCase);
2059
+ const evalCase = filteredEvalCases[i];
2060
+ const promptInputs = await buildPromptInputs(evalCase);
1936
2061
  const errorResult = buildErrorResult(
1937
- testCase,
2062
+ evalCase,
1938
2063
  target.name,
1939
2064
  (now ?? (() => /* @__PURE__ */ new Date()))(),
1940
2065
  outcome.reason,
@@ -1948,9 +2073,9 @@ async function runEvaluation(options) {
1948
2073
  }
1949
2074
  return results;
1950
2075
  }
1951
- async function runTestCase(options) {
2076
+ async function runEvalCase(options) {
1952
2077
  const {
1953
- testCase,
2078
+ evalCase,
1954
2079
  provider,
1955
2080
  target,
1956
2081
  graders,
@@ -1963,11 +2088,11 @@ async function runTestCase(options) {
1963
2088
  signal,
1964
2089
  judgeProvider
1965
2090
  } = options;
1966
- const promptInputs = await buildPromptInputs(testCase);
2091
+ const promptInputs = await buildPromptInputs(evalCase);
1967
2092
  if (promptDumpDir) {
1968
- await dumpPrompt(promptDumpDir, testCase, promptInputs);
2093
+ await dumpPrompt(promptDumpDir, evalCase, promptInputs);
1969
2094
  }
1970
- const cacheKey = useCache ? createCacheKey(provider, target, testCase, promptInputs) : void 0;
2095
+ const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
1971
2096
  let cachedResponse;
1972
2097
  if (cacheKey && cache) {
1973
2098
  cachedResponse = await cache.get(cacheKey);
@@ -1980,7 +2105,7 @@ async function runTestCase(options) {
1980
2105
  while (!providerResponse && attempt < attemptBudget) {
1981
2106
  try {
1982
2107
  providerResponse = await invokeProvider(provider, {
1983
- testCase,
2108
+ evalCase,
1984
2109
  target,
1985
2110
  promptInputs,
1986
2111
  attempt,
@@ -1993,12 +2118,12 @@ async function runTestCase(options) {
1993
2118
  attempt += 1;
1994
2119
  continue;
1995
2120
  }
1996
- return buildErrorResult(testCase, target.name, nowFn(), error, promptInputs);
2121
+ return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
1997
2122
  }
1998
2123
  }
1999
2124
  if (!providerResponse) {
2000
2125
  return buildErrorResult(
2001
- testCase,
2126
+ evalCase,
2002
2127
  target.name,
2003
2128
  nowFn(),
2004
2129
  lastError ?? new Error("Provider did not return a response"),
@@ -2008,7 +2133,7 @@ async function runTestCase(options) {
2008
2133
  if (cacheKey && cache && !cachedResponse) {
2009
2134
  await cache.set(cacheKey, providerResponse);
2010
2135
  }
2011
- const graderKind = testCase.grader ?? "heuristic";
2136
+ const graderKind = evalCase.grader ?? "heuristic";
2012
2137
  const activeGrader = graders[graderKind] ?? graders.heuristic;
2013
2138
  if (!activeGrader) {
2014
2139
  throw new Error(`No grader registered for kind '${graderKind}'`);
@@ -2017,7 +2142,7 @@ async function runTestCase(options) {
2017
2142
  try {
2018
2143
  const gradeTimestamp = nowFn();
2019
2144
  grade = await activeGrader.grade({
2020
- testCase,
2145
+ evalCase,
2021
2146
  candidate: providerResponse.text ?? "",
2022
2147
  target,
2023
2148
  provider,
@@ -2027,17 +2152,18 @@ async function runTestCase(options) {
2027
2152
  judgeProvider
2028
2153
  });
2029
2154
  } catch (error) {
2030
- return buildErrorResult(testCase, target.name, nowFn(), error, promptInputs);
2155
+ return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
2031
2156
  }
2032
2157
  const completedAt = nowFn();
2033
2158
  const rawRequest = {
2034
2159
  request: promptInputs.request,
2035
2160
  guidelines: promptInputs.guidelines,
2036
- guideline_paths: testCase.guideline_paths
2161
+ guideline_paths: evalCase.guideline_paths,
2162
+ system_message: promptInputs.systemMessage ?? ""
2037
2163
  };
2038
2164
  return {
2039
- test_id: testCase.id,
2040
- conversation_id: testCase.conversation_id,
2165
+ eval_id: evalCase.id,
2166
+ conversation_id: evalCase.conversation_id,
2041
2167
  score: grade.score,
2042
2168
  hits: grade.hits,
2043
2169
  misses: grade.misses,
@@ -2051,11 +2177,11 @@ async function runTestCase(options) {
2051
2177
  grader_raw_request: grade.graderRawRequest
2052
2178
  };
2053
2179
  }
2054
- function filterTestCases(testCases, testId) {
2055
- if (!testId) {
2056
- return testCases;
2180
+ function filterEvalCases(evalCases, evalId) {
2181
+ if (!evalId) {
2182
+ return evalCases;
2057
2183
  }
2058
- return testCases.filter((testCase) => testCase.id === testId);
2184
+ return evalCases.filter((evalCase) => evalCase.id === evalId);
2059
2185
  }
2060
2186
  function buildGraderRegistry(overrides, resolveJudgeProvider) {
2061
2187
  const heuristic = overrides?.heuristic ?? new HeuristicGrader();
@@ -2073,16 +2199,16 @@ function buildGraderRegistry(overrides, resolveJudgeProvider) {
2073
2199
  llm_judge: llmJudge
2074
2200
  };
2075
2201
  }
2076
- async function dumpPrompt(directory, testCase, promptInputs) {
2202
+ async function dumpPrompt(directory, evalCase, promptInputs) {
2077
2203
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
2078
- const filename = `${timestamp}_${sanitizeFilename(testCase.id)}.json`;
2204
+ const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
2079
2205
  const filePath = import_node_path5.default.resolve(directory, filename);
2080
2206
  await (0, import_promises5.mkdir)(import_node_path5.default.dirname(filePath), { recursive: true });
2081
2207
  const payload = {
2082
- test_id: testCase.id,
2208
+ eval_id: evalCase.id,
2083
2209
  request: promptInputs.request,
2084
2210
  guidelines: promptInputs.guidelines,
2085
- guideline_paths: testCase.guideline_paths
2211
+ guideline_paths: evalCase.guideline_paths
2086
2212
  };
2087
2213
  await (0, import_promises5.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
2088
2214
  }
@@ -2094,7 +2220,7 @@ function sanitizeFilename(value) {
2094
2220
  return sanitized.length > 0 ? sanitized : (0, import_node_crypto2.randomUUID)();
2095
2221
  }
2096
2222
  async function invokeProvider(provider, options) {
2097
- const { testCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
2223
+ const { evalCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
2098
2224
  const controller = new AbortController();
2099
2225
  const timeout = agentTimeoutMs ? setTimeout(() => controller.abort(), agentTimeoutMs) : void 0;
2100
2226
  if (signal) {
@@ -2104,12 +2230,12 @@ async function invokeProvider(provider, options) {
2104
2230
  return await provider.invoke({
2105
2231
  prompt: promptInputs.request,
2106
2232
  guidelines: promptInputs.guidelines,
2107
- attachments: testCase.guideline_paths,
2108
- testCaseId: testCase.id,
2233
+ guideline_patterns: evalCase.guideline_patterns,
2234
+ attachments: evalCase.file_paths,
2235
+ evalCaseId: evalCase.id,
2109
2236
  attempt,
2110
2237
  metadata: {
2111
- target: target.name,
2112
- grader: testCase.grader
2238
+ systemPrompt: promptInputs.systemMessage ?? ""
2113
2239
  },
2114
2240
  signal: controller.signal
2115
2241
  });
@@ -2119,17 +2245,18 @@ async function invokeProvider(provider, options) {
2119
2245
  }
2120
2246
  }
2121
2247
  }
2122
- function buildErrorResult(testCase, targetName, timestamp, error, promptInputs) {
2248
+ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
2123
2249
  const message = error instanceof Error ? error.message : String(error);
2124
2250
  const rawRequest = {
2125
2251
  request: promptInputs.request,
2126
2252
  guidelines: promptInputs.guidelines,
2127
- guideline_paths: testCase.guideline_paths,
2253
+ guideline_paths: evalCase.guideline_paths,
2254
+ system_message: promptInputs.systemMessage ?? "",
2128
2255
  error: message
2129
2256
  };
2130
2257
  return {
2131
- test_id: testCase.id,
2132
- conversation_id: testCase.conversation_id,
2258
+ eval_id: evalCase.id,
2259
+ conversation_id: evalCase.conversation_id,
2133
2260
  score: 0,
2134
2261
  hits: [],
2135
2262
  misses: [`Error: ${message}`],
@@ -2141,13 +2268,14 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
2141
2268
  raw_request: rawRequest
2142
2269
  };
2143
2270
  }
2144
- function createCacheKey(provider, target, testCase, promptInputs) {
2271
+ function createCacheKey(provider, target, evalCase, promptInputs) {
2145
2272
  const hash = (0, import_node_crypto2.createHash)("sha256");
2146
2273
  hash.update(provider.id);
2147
2274
  hash.update(target.name);
2148
- hash.update(testCase.id);
2275
+ hash.update(evalCase.id);
2149
2276
  hash.update(promptInputs.request);
2150
2277
  hash.update(promptInputs.guidelines);
2278
+ hash.update(promptInputs.systemMessage ?? "");
2151
2279
  return hash.digest("hex");
2152
2280
  }
2153
2281
  function isTimeoutLike(error) {
@@ -2176,7 +2304,9 @@ function createAgentKernel() {
2176
2304
  HeuristicGrader,
2177
2305
  QualityGrader,
2178
2306
  TEST_MESSAGE_ROLES,
2307
+ buildDirectoryChain,
2179
2308
  buildPromptInputs,
2309
+ buildSearchRoots,
2180
2310
  calculateHits,
2181
2311
  calculateMisses,
2182
2312
  createAgentKernel,
@@ -2184,6 +2314,8 @@ function createAgentKernel() {
2184
2314
  ensureVSCodeSubagents,
2185
2315
  extractAspects,
2186
2316
  extractCodeBlocks,
2317
+ fileExists,
2318
+ findGitRoot,
2187
2319
  getHitCount,
2188
2320
  isErrorLike,
2189
2321
  isGraderKind,
@@ -2193,12 +2325,13 @@ function createAgentKernel() {
2193
2325
  isTestMessage,
2194
2326
  isTestMessageRole,
2195
2327
  listTargetNames,
2196
- loadTestCases,
2328
+ loadEvalCases,
2197
2329
  readTargetDefinitions,
2198
2330
  resolveAndCreateProvider,
2331
+ resolveFileReference,
2199
2332
  resolveTargetDefinition,
2333
+ runEvalCase,
2200
2334
  runEvaluation,
2201
- runTestCase,
2202
2335
  scoreCandidateResponse
2203
2336
  });
2204
2337
  //# sourceMappingURL=index.cjs.map