@agentv/core 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -116,7 +116,7 @@ function getHitCount(result) {
116
116
  }
117
117
 
118
118
  // src/evaluation/yaml-parser.ts
119
- var import_promises5 = require("fs/promises");
119
+ var import_promises6 = require("fs/promises");
120
120
  var import_node_path6 = __toESM(require("path"), 1);
121
121
  var import_yaml2 = require("yaml");
122
122
 
@@ -125,11 +125,11 @@ function extractCodeBlocks(segments) {
125
125
  const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
126
126
  const codeBlocks = [];
127
127
  for (const segment of segments) {
128
- const typeValue = segment["type"];
128
+ const typeValue = segment.type;
129
129
  if (typeof typeValue !== "string" || typeValue !== "text") {
130
130
  continue;
131
131
  }
132
- const textValue = segment["value"];
132
+ const textValue = segment.value;
133
133
  if (typeof textValue !== "string") {
134
134
  continue;
135
135
  }
@@ -154,7 +154,7 @@ ${part.content}
154
154
  }
155
155
  return parts.map((p) => p.content).join(" ");
156
156
  }
157
- function formatSegment(segment) {
157
+ function formatSegment(segment, mode = "lm") {
158
158
  const type = asString(segment.type);
159
159
  if (type === "text") {
160
160
  return asString(segment.value);
@@ -164,8 +164,14 @@ function formatSegment(segment) {
164
164
  return refPath ? `<Attached: ${refPath}>` : void 0;
165
165
  }
166
166
  if (type === "file") {
167
- const text = asString(segment.text);
168
167
  const filePath = asString(segment.path);
168
+ if (!filePath) {
169
+ return void 0;
170
+ }
171
+ if (mode === "agent") {
172
+ return `<file: path="${filePath}">`;
173
+ }
174
+ const text = asString(segment.text);
169
175
  if (text && filePath) {
170
176
  return formatFileContents([{ content: text.trim(), isFile: true, displayPath: filePath }]);
171
177
  }
@@ -194,9 +200,9 @@ function asString(value) {
194
200
  }
195
201
 
196
202
  // src/evaluation/loaders/config-loader.ts
197
- var import_micromatch = __toESM(require("micromatch"), 1);
198
203
  var import_promises2 = require("fs/promises");
199
204
  var import_node_path2 = __toESM(require("path"), 1);
205
+ var import_micromatch = __toESM(require("micromatch"), 1);
200
206
  var import_yaml = require("yaml");
201
207
 
202
208
  // src/evaluation/loaders/file-resolver.ts
@@ -338,8 +344,9 @@ Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
338
344
  guideline_patterns: guidelinePatterns
339
345
  };
340
346
  } catch (error) {
341
- logWarning(`Could not read .agentv/config.yaml at ${configPath}: ${error.message}`);
342
- continue;
347
+ logWarning(
348
+ `Could not read .agentv/config.yaml at ${configPath}: ${error.message}`
349
+ );
343
350
  }
344
351
  }
345
352
  return null;
@@ -369,8 +376,66 @@ function logWarning(message) {
369
376
 
370
377
  // src/evaluation/loaders/evaluator-parser.ts
371
378
  var import_node_path3 = __toESM(require("path"), 1);
379
+
380
+ // src/evaluation/validation/prompt-validator.ts
381
+ var import_promises3 = require("fs/promises");
382
+
383
+ // src/evaluation/template-variables.ts
384
+ var TEMPLATE_VARIABLES = {
385
+ CANDIDATE_ANSWER: "candidate_answer",
386
+ EXPECTED_MESSAGES: "expected_messages",
387
+ QUESTION: "question",
388
+ EXPECTED_OUTCOME: "expected_outcome",
389
+ REFERENCE_ANSWER: "reference_answer",
390
+ INPUT_MESSAGES: "input_messages"
391
+ };
392
+ var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
393
+ var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
394
+ TEMPLATE_VARIABLES.CANDIDATE_ANSWER,
395
+ TEMPLATE_VARIABLES.EXPECTED_MESSAGES
396
+ ]);
397
+
398
+ // src/evaluation/validation/prompt-validator.ts
372
399
  var ANSI_YELLOW2 = "\x1B[33m";
373
400
  var ANSI_RESET2 = "\x1B[0m";
401
+ async function validateCustomPromptContent(promptPath) {
402
+ const content = await (0, import_promises3.readFile)(promptPath, "utf8");
403
+ validateTemplateVariables(content, promptPath);
404
+ }
405
+ function validateTemplateVariables(content, source) {
406
+ const variablePattern = /\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g;
407
+ const foundVariables = /* @__PURE__ */ new Set();
408
+ const invalidVariables = [];
409
+ let match = variablePattern.exec(content);
410
+ while (match !== null) {
411
+ const varName = match[1];
412
+ foundVariables.add(varName);
413
+ if (!VALID_TEMPLATE_VARIABLES.has(varName)) {
414
+ invalidVariables.push(varName);
415
+ }
416
+ match = variablePattern.exec(content);
417
+ }
418
+ const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.CANDIDATE_ANSWER);
419
+ const hasExpectedMessages = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_MESSAGES);
420
+ const hasRequiredFields = hasCandidateAnswer || hasExpectedMessages;
421
+ if (!hasRequiredFields) {
422
+ throw new Error(
423
+ `Missing required fields. Must include at least one of:
424
+ - {{ ${TEMPLATE_VARIABLES.CANDIDATE_ANSWER} }}
425
+ - {{ ${TEMPLATE_VARIABLES.EXPECTED_MESSAGES} }}`
426
+ );
427
+ }
428
+ if (invalidVariables.length > 0) {
429
+ const warningMessage = `${ANSI_YELLOW2}Warning: Custom evaluator template at ${source}
430
+ Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
431
+ Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${ANSI_RESET2}`;
432
+ console.warn(warningMessage);
433
+ }
434
+ }
435
+
436
+ // src/evaluation/loaders/evaluator-parser.ts
437
+ var ANSI_YELLOW3 = "\x1B[33m";
438
+ var ANSI_RESET3 = "\x1B[0m";
374
439
  async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
375
440
  const execution = rawEvalCase.execution;
376
441
  const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
@@ -429,6 +494,12 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
429
494
  const resolved = await resolveFileReference(prompt, searchRoots);
430
495
  if (resolved.resolvedPath) {
431
496
  promptPath = import_node_path3.default.resolve(resolved.resolvedPath);
497
+ try {
498
+ await validateCustomPromptContent(promptPath);
499
+ } catch (error) {
500
+ const message = error instanceof Error ? error.message : String(error);
501
+ throw new Error(`Evaluator '${name}' template (${promptPath}): ${message}`);
502
+ }
432
503
  } else {
433
504
  logWarning2(
434
505
  `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
@@ -465,18 +536,18 @@ function isJsonObject2(value) {
465
536
  function logWarning2(message, details) {
466
537
  if (details && details.length > 0) {
467
538
  const detailBlock = details.join("\n");
468
- console.warn(`${ANSI_YELLOW2}Warning: ${message}
469
- ${detailBlock}${ANSI_RESET2}`);
539
+ console.warn(`${ANSI_YELLOW3}Warning: ${message}
540
+ ${detailBlock}${ANSI_RESET3}`);
470
541
  } else {
471
- console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
542
+ console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
472
543
  }
473
544
  }
474
545
 
475
546
  // src/evaluation/loaders/message-processor.ts
476
- var import_promises3 = require("fs/promises");
547
+ var import_promises4 = require("fs/promises");
477
548
  var import_node_path4 = __toESM(require("path"), 1);
478
- var ANSI_YELLOW3 = "\x1B[33m";
479
- var ANSI_RESET3 = "\x1B[0m";
549
+ var ANSI_YELLOW4 = "\x1B[33m";
550
+ var ANSI_RESET4 = "\x1B[0m";
480
551
  async function processMessages(options) {
481
552
  const {
482
553
  messages,
@@ -519,7 +590,7 @@ async function processMessages(options) {
519
590
  continue;
520
591
  }
521
592
  try {
522
- const fileContent = (await (0, import_promises3.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
593
+ const fileContent = (await (0, import_promises4.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
523
594
  if (messageType === "input" && guidelinePatterns && guidelinePaths) {
524
595
  const relativeToRepo = import_node_path4.default.relative(repoRootPath, resolvedPath);
525
596
  if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
@@ -590,7 +661,7 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
590
661
  continue;
591
662
  }
592
663
  try {
593
- const fileContent = (await (0, import_promises3.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
664
+ const fileContent = (await (0, import_promises4.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
594
665
  parts.push({ content: fileContent, isFile: true, displayPath });
595
666
  if (verbose) {
596
667
  console.log(` [Expected Assistant File] Found: ${displayPath}`);
@@ -640,19 +711,19 @@ function cloneJsonValue(value) {
640
711
  function logWarning3(message, details) {
641
712
  if (details && details.length > 0) {
642
713
  const detailBlock = details.join("\n");
643
- console.warn(`${ANSI_YELLOW3}Warning: ${message}
644
- ${detailBlock}${ANSI_RESET3}`);
714
+ console.warn(`${ANSI_YELLOW4}Warning: ${message}
715
+ ${detailBlock}${ANSI_RESET4}`);
645
716
  } else {
646
- console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
717
+ console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
647
718
  }
648
719
  }
649
720
 
650
721
  // src/evaluation/formatting/prompt-builder.ts
651
- var import_promises4 = require("fs/promises");
722
+ var import_promises5 = require("fs/promises");
652
723
  var import_node_path5 = __toESM(require("path"), 1);
653
- var ANSI_YELLOW4 = "\x1B[33m";
654
- var ANSI_RESET4 = "\x1B[0m";
655
- async function buildPromptInputs(testCase) {
724
+ var ANSI_YELLOW5 = "\x1B[33m";
725
+ var ANSI_RESET5 = "\x1B[0m";
726
+ async function buildPromptInputs(testCase, mode = "lm") {
656
727
  const guidelineParts = [];
657
728
  for (const rawPath of testCase.guideline_paths) {
658
729
  const absolutePath = import_node_path5.default.resolve(rawPath);
@@ -661,7 +732,7 @@ async function buildPromptInputs(testCase) {
661
732
  continue;
662
733
  }
663
734
  try {
664
- const content = (await (0, import_promises4.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
735
+ const content = (await (0, import_promises5.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
665
736
  guidelineParts.push({
666
737
  content,
667
738
  isFile: true,
@@ -728,7 +799,7 @@ async function buildPromptInputs(testCase) {
728
799
  const roleLabel = message.role.charAt(0).toUpperCase() + message.role.slice(1);
729
800
  const contentParts = [];
730
801
  for (const segment of segments) {
731
- const formattedContent = formatSegment(segment);
802
+ const formattedContent = formatSegment(segment, mode);
732
803
  if (formattedContent) {
733
804
  contentParts.push(formattedContent);
734
805
  }
@@ -743,7 +814,11 @@ ${messageContent}`);
743
814
  } else {
744
815
  const questionParts = [];
745
816
  for (const segment of testCase.input_segments) {
746
- const formattedContent = formatSegment(segment);
817
+ if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
818
+ questionParts.push(`<Attached: ${segment.path}>`);
819
+ continue;
820
+ }
821
+ const formattedContent = formatSegment(segment, mode);
747
822
  if (formattedContent) {
748
823
  questionParts.push(formattedContent);
749
824
  }
@@ -757,7 +832,8 @@ ${messageContent}`);
757
832
  messages: testCase.input_messages,
758
833
  segmentsByMessage,
759
834
  guidelinePatterns: testCase.guideline_patterns,
760
- guidelineContent: guidelines
835
+ guidelineContent: guidelines,
836
+ mode
761
837
  }) : void 0;
762
838
  return { question, guidelines, chatPrompt };
763
839
  }
@@ -774,7 +850,14 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
774
850
  return messagesWithContent > 1;
775
851
  }
776
852
  function buildChatPromptFromSegments(options) {
777
- const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
853
+ const {
854
+ messages,
855
+ segmentsByMessage,
856
+ guidelinePatterns,
857
+ guidelineContent,
858
+ systemPrompt,
859
+ mode = "lm"
860
+ } = options;
778
861
  if (messages.length === 0) {
779
862
  return void 0;
780
863
  }
@@ -792,7 +875,7 @@ ${guidelineContent.trim()}`);
792
875
  const segments = segmentsByMessage[startIndex];
793
876
  const contentParts = [];
794
877
  for (const segment of segments) {
795
- const formatted = formatSegment(segment);
878
+ const formatted = formatSegment(segment, mode);
796
879
  if (formatted) {
797
880
  contentParts.push(formatted);
798
881
  }
@@ -825,7 +908,7 @@ ${guidelineContent.trim()}`);
825
908
  if (segment.type === "guideline_ref") {
826
909
  continue;
827
910
  }
828
- const formatted = formatSegment(segment);
911
+ const formatted = formatSegment(segment, mode);
829
912
  if (formatted) {
830
913
  const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
831
914
  if (isGuidelineRef) {
@@ -849,17 +932,17 @@ function asString4(value) {
849
932
  return typeof value === "string" ? value : void 0;
850
933
  }
851
934
  function logWarning4(message) {
852
- console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
935
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
853
936
  }
854
937
 
855
938
  // src/evaluation/yaml-parser.ts
856
- var ANSI_YELLOW5 = "\x1B[33m";
857
- var ANSI_RESET5 = "\x1B[0m";
858
- var SCHEMA_EVAL_V2 = "agentv-eval-v2";
939
+ var ANSI_YELLOW6 = "\x1B[33m";
940
+ var ANSI_RED = "\x1B[31m";
941
+ var ANSI_RESET6 = "\x1B[0m";
859
942
  async function readTestSuiteMetadata(testFilePath) {
860
943
  try {
861
944
  const absolutePath = import_node_path6.default.resolve(testFilePath);
862
- const content = await (0, import_promises5.readFile)(absolutePath, "utf8");
945
+ const content = await (0, import_promises6.readFile)(absolutePath, "utf8");
863
946
  const parsed = (0, import_yaml2.parse)(content);
864
947
  if (!isJsonObject(parsed)) {
865
948
  return {};
@@ -877,7 +960,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
877
960
  const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
878
961
  const config = await loadConfig(absoluteTestPath, repoRootPath);
879
962
  const guidelinePatterns = config?.guideline_patterns;
880
- const rawFile = await (0, import_promises5.readFile)(absoluteTestPath, "utf8");
963
+ const rawFile = await (0, import_promises6.readFile)(absoluteTestPath, "utf8");
881
964
  const parsed = (0, import_yaml2.parse)(rawFile);
882
965
  if (!isJsonObject(parsed)) {
883
966
  throw new Error(`Invalid test file format: ${evalFilePath}`);
@@ -886,12 +969,6 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
886
969
  const datasetNameFromSuite = asString5(suite.dataset)?.trim();
887
970
  const fallbackDataset = import_node_path6.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
888
971
  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
889
- const schema = suite.$schema;
890
- if (schema !== SCHEMA_EVAL_V2) {
891
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
892
- Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
893
- throw new Error(message);
894
- }
895
972
  const rawTestcases = suite.evalcases;
896
973
  if (!Array.isArray(rawTestcases)) {
897
974
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
@@ -915,14 +992,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
915
992
  const inputMessagesValue = evalcase.input_messages;
916
993
  const expectedMessagesValue = evalcase.expected_messages;
917
994
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
918
- logWarning5(`Skipping incomplete eval case: ${id ?? "unknown"}`);
995
+ logError(
996
+ `Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`
997
+ );
919
998
  continue;
920
999
  }
921
1000
  const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
922
- const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
1001
+ const inputMessages = inputMessagesValue.filter(
1002
+ (msg) => isTestMessage(msg)
1003
+ );
923
1004
  const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
924
1005
  if (hasExpectedMessages && expectedMessages.length === 0) {
925
- logWarning5(`No valid expected message found for eval case: ${id}`);
1006
+ logError(`No valid expected message found for eval case: ${id}`);
926
1007
  continue;
927
1008
  }
928
1009
  if (expectedMessages.length > 1) {
@@ -953,7 +1034,14 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
953
1034
  const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
954
1035
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
955
1036
  const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
956
- const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
1037
+ let evaluators;
1038
+ try {
1039
+ evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
1040
+ } catch (error) {
1041
+ const message = error instanceof Error ? error.message : String(error);
1042
+ logError(`Skipping eval case '${id}': ${message}`);
1043
+ continue;
1044
+ }
957
1045
  const userFilePaths = [];
958
1046
  for (const segment of inputSegments) {
959
1047
  if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -971,7 +1059,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
971
1059
  question,
972
1060
  input_messages: inputMessages,
973
1061
  input_segments: inputSegments,
974
- output_segments: outputSegments,
1062
+ expected_segments: outputSegments,
975
1063
  reference_answer: referenceAnswer,
976
1064
  guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
977
1065
  guideline_patterns: guidelinePatterns,
@@ -1003,20 +1091,29 @@ function asString5(value) {
1003
1091
  function logWarning5(message, details) {
1004
1092
  if (details && details.length > 0) {
1005
1093
  const detailBlock = details.join("\n");
1006
- console.warn(`${ANSI_YELLOW5}Warning: ${message}
1007
- ${detailBlock}${ANSI_RESET5}`);
1094
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}
1095
+ ${detailBlock}${ANSI_RESET6}`);
1096
+ } else {
1097
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
1098
+ }
1099
+ }
1100
+ function logError(message, details) {
1101
+ if (details && details.length > 0) {
1102
+ const detailBlock = details.join("\n");
1103
+ console.error(`${ANSI_RED}Error: ${message}
1104
+ ${detailBlock}${ANSI_RESET6}`);
1008
1105
  } else {
1009
- console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
1106
+ console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
1010
1107
  }
1011
1108
  }
1012
1109
 
1013
1110
  // src/evaluation/file-utils.ts
1014
1111
  var import_node_fs2 = require("fs");
1015
- var import_promises6 = require("fs/promises");
1112
+ var import_promises7 = require("fs/promises");
1016
1113
  var import_node_path7 = __toESM(require("path"), 1);
1017
1114
  async function fileExists2(filePath) {
1018
1115
  try {
1019
- await (0, import_promises6.access)(filePath, import_node_fs2.constants.F_OK);
1116
+ await (0, import_promises7.access)(filePath, import_node_fs2.constants.F_OK);
1020
1117
  return true;
1021
1118
  } catch {
1022
1119
  return false;
@@ -1026,7 +1123,7 @@ function normalizeLineEndings(content) {
1026
1123
  return content.replace(/\r\n/g, "\n");
1027
1124
  }
1028
1125
  async function readTextFile(filePath) {
1029
- const content = await (0, import_promises6.readFile)(filePath, "utf8");
1126
+ const content = await (0, import_promises7.readFile)(filePath, "utf8");
1030
1127
  return normalizeLineEndings(content);
1031
1128
  }
1032
1129
  async function findGitRoot(startPath) {
@@ -1447,7 +1544,7 @@ async function withRetry(fn, retryConfig, signal) {
1447
1544
 
1448
1545
  // src/evaluation/providers/cli.ts
1449
1546
  var import_node_child_process = require("child_process");
1450
- var import_promises7 = __toESM(require("fs/promises"), 1);
1547
+ var import_promises8 = __toESM(require("fs/promises"), 1);
1451
1548
  var import_node_os = __toESM(require("os"), 1);
1452
1549
  var import_node_path8 = __toESM(require("path"), 1);
1453
1550
  var import_node_util = require("util");
@@ -1548,7 +1645,7 @@ var CliProvider = class {
1548
1645
  const errorMsg = error instanceof Error ? error.message : String(error);
1549
1646
  throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
1550
1647
  } finally {
1551
- await import_promises7.default.unlink(filePath).catch(() => {
1648
+ await import_promises8.default.unlink(filePath).catch(() => {
1552
1649
  });
1553
1650
  }
1554
1651
  }
@@ -1687,7 +1784,7 @@ function formatTimeoutSuffix(timeoutMs) {
1687
1784
  var import_node_child_process2 = require("child_process");
1688
1785
  var import_node_crypto = require("crypto");
1689
1786
  var import_node_fs3 = require("fs");
1690
- var import_promises8 = require("fs/promises");
1787
+ var import_promises9 = require("fs/promises");
1691
1788
  var import_node_os2 = require("os");
1692
1789
  var import_node_path10 = __toESM(require("path"), 1);
1693
1790
  var import_node_util2 = require("util");
@@ -1755,9 +1852,7 @@ function buildPromptDocument(request, inputFiles, options) {
1755
1852
  options?.guidelineOverrides
1756
1853
  );
1757
1854
  const inputFilesList = collectInputFiles(inputFiles);
1758
- const nonGuidelineInputFiles = inputFilesList.filter(
1759
- (file) => !guidelineFiles.includes(file)
1760
- );
1855
+ const nonGuidelineInputFiles = inputFilesList.filter((file) => !guidelineFiles.includes(file));
1761
1856
  const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
1762
1857
  if (prereadBlock.length > 0) {
1763
1858
  parts.push("\n", prereadBlock);
@@ -1877,7 +1972,7 @@ var CodexProvider = class {
1877
1972
  try {
1878
1973
  const promptContent = buildPromptDocument(request, inputFiles);
1879
1974
  const promptFile = import_node_path10.default.join(workspaceRoot, PROMPT_FILENAME);
1880
- await (0, import_promises8.writeFile)(promptFile, promptContent, "utf8");
1975
+ await (0, import_promises9.writeFile)(promptFile, promptContent, "utf8");
1881
1976
  const args = this.buildCodexArgs();
1882
1977
  const cwd = this.resolveCwd(workspaceRoot);
1883
1978
  const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
@@ -1929,7 +2024,15 @@ var CodexProvider = class {
1929
2024
  return import_node_path10.default.resolve(this.config.cwd);
1930
2025
  }
1931
2026
  buildCodexArgs() {
1932
- const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
2027
+ const args = [
2028
+ "--ask-for-approval",
2029
+ "never",
2030
+ "exec",
2031
+ "--json",
2032
+ "--color",
2033
+ "never",
2034
+ "--skip-git-repo-check"
2035
+ ];
1933
2036
  if (this.config.args && this.config.args.length > 0) {
1934
2037
  args.push(...this.config.args);
1935
2038
  }
@@ -1960,11 +2063,11 @@ var CodexProvider = class {
1960
2063
  }
1961
2064
  }
1962
2065
  async createWorkspace() {
1963
- return await (0, import_promises8.mkdtemp)(import_node_path10.default.join((0, import_node_os2.tmpdir)(), WORKSPACE_PREFIX));
2066
+ return await (0, import_promises9.mkdtemp)(import_node_path10.default.join((0, import_node_os2.tmpdir)(), WORKSPACE_PREFIX));
1964
2067
  }
1965
2068
  async cleanupWorkspace(workspaceRoot) {
1966
2069
  try {
1967
- await (0, import_promises8.rm)(workspaceRoot, { recursive: true, force: true });
2070
+ await (0, import_promises9.rm)(workspaceRoot, { recursive: true, force: true });
1968
2071
  } catch {
1969
2072
  }
1970
2073
  }
@@ -1984,7 +2087,7 @@ var CodexProvider = class {
1984
2087
  return void 0;
1985
2088
  }
1986
2089
  try {
1987
- await (0, import_promises8.mkdir)(logDir, { recursive: true });
2090
+ await (0, import_promises9.mkdir)(logDir, { recursive: true });
1988
2091
  } catch (error) {
1989
2092
  const message = error instanceof Error ? error.message : String(error);
1990
2093
  console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
@@ -2207,7 +2310,7 @@ async function locateExecutable(candidate) {
2207
2310
  if (includesPathSeparator) {
2208
2311
  const resolved = import_node_path10.default.isAbsolute(candidate) ? candidate : import_node_path10.default.resolve(candidate);
2209
2312
  const executablePath = await ensureWindowsExecutableVariant(resolved);
2210
- await (0, import_promises8.access)(executablePath, import_node_fs3.constants.F_OK);
2313
+ await (0, import_promises9.access)(executablePath, import_node_fs3.constants.F_OK);
2211
2314
  return executablePath;
2212
2315
  }
2213
2316
  const locator = process.platform === "win32" ? "where" : "which";
@@ -2217,7 +2320,7 @@ async function locateExecutable(candidate) {
2217
2320
  const preferred = selectExecutableCandidate(lines);
2218
2321
  if (preferred) {
2219
2322
  const executablePath = await ensureWindowsExecutableVariant(preferred);
2220
- await (0, import_promises8.access)(executablePath, import_node_fs3.constants.F_OK);
2323
+ await (0, import_promises9.access)(executablePath, import_node_fs3.constants.F_OK);
2221
2324
  return executablePath;
2222
2325
  }
2223
2326
  } catch {
@@ -2251,7 +2354,7 @@ async function ensureWindowsExecutableVariant(candidate) {
2251
2354
  for (const ext of extensions) {
2252
2355
  const withExtension = `${candidate}${ext}`;
2253
2356
  try {
2254
- await (0, import_promises8.access)(withExtension, import_node_fs3.constants.F_OK);
2357
+ await (0, import_promises9.access)(withExtension, import_node_fs3.constants.F_OK);
2255
2358
  return withExtension;
2256
2359
  } catch {
2257
2360
  }
@@ -2553,7 +2656,14 @@ var MockProvider = class {
2553
2656
 
2554
2657
  // src/evaluation/providers/targets.ts
2555
2658
  var import_zod = require("zod");
2556
- var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES", "OUTPUT_FILE"]);
2659
+ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
2660
+ "PROMPT",
2661
+ "GUIDELINES",
2662
+ "EVAL_ID",
2663
+ "ATTEMPT",
2664
+ "FILES",
2665
+ "OUTPUT_FILE"
2666
+ ]);
2557
2667
  var BASE_TARGET_SCHEMA = import_zod.z.object({
2558
2668
  name: import_zod.z.string().min(1, "target name is required"),
2559
2669
  provider: import_zod.z.string().min(1, "provider is required"),
@@ -2798,11 +2908,18 @@ function resolveMockConfig(target) {
2798
2908
  return { response };
2799
2909
  }
2800
2910
  function resolveVSCodeConfig(target, env, insiders) {
2801
- const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template ?? target.workspaceTemplate);
2802
- const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
2803
- allowLiteral: false,
2804
- optionalEnv: true
2805
- }) : void 0;
2911
+ const workspaceTemplateEnvVar = resolveOptionalLiteralString(
2912
+ target.workspace_template ?? target.workspaceTemplate
2913
+ );
2914
+ const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
2915
+ workspaceTemplateEnvVar,
2916
+ env,
2917
+ `${target.name} workspace template path`,
2918
+ {
2919
+ allowLiteral: false,
2920
+ optionalEnv: true
2921
+ }
2922
+ ) : void 0;
2806
2923
  const commandSource = target.vscode_cmd ?? target.command;
2807
2924
  const waitSource = target.wait;
2808
2925
  const dryRunSource = target.dry_run ?? target.dryRun;
@@ -2829,7 +2946,10 @@ function resolveCliConfig(target, env) {
2829
2946
  allowLiteral: true,
2830
2947
  optionalEnv: true
2831
2948
  });
2832
- const timeoutMs = resolveTimeoutMs(target.timeout_seconds ?? target.timeoutSeconds, `${target.name} timeout`);
2949
+ const timeoutMs = resolveTimeoutMs(
2950
+ target.timeout_seconds ?? target.timeoutSeconds,
2951
+ `${target.name} timeout`
2952
+ );
2833
2953
  const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name);
2834
2954
  const commandTemplate = resolveString(
2835
2955
  commandTemplateSource,
@@ -2957,7 +3077,9 @@ function resolveOptionalString(source, env, description, options) {
2957
3077
  }
2958
3078
  const allowLiteral = options?.allowLiteral ?? false;
2959
3079
  if (!allowLiteral) {
2960
- throw new Error(`${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`);
3080
+ throw new Error(
3081
+ `${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`
3082
+ );
2961
3083
  }
2962
3084
  return trimmed;
2963
3085
  }
@@ -3181,9 +3303,7 @@ function buildPromptDocument2(request, attachments, guidelinePatterns) {
3181
3303
  }
3182
3304
  const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
3183
3305
  const attachmentFiles = collectAttachmentFiles(attachments);
3184
- const nonGuidelineAttachments = attachmentFiles.filter(
3185
- (file) => !guidelineFiles.includes(file)
3186
- );
3306
+ const nonGuidelineAttachments = attachmentFiles.filter((file) => !guidelineFiles.includes(file));
3187
3307
  const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
3188
3308
  if (prereadBlock.length > 0) {
3189
3309
  parts.push("\n", prereadBlock);
@@ -3292,8 +3412,10 @@ async function ensureVSCodeSubagents(options) {
3292
3412
  if (result.skippedExisting.length > 0) {
3293
3413
  console.log(`Reusing ${result.skippedExisting.length} existing unlocked subagent(s)`);
3294
3414
  }
3295
- console.log(`
3296
- total unlocked subagents available: ${result.created.length + result.skippedExisting.length}`);
3415
+ console.log(
3416
+ `
3417
+ total unlocked subagents available: ${result.created.length + result.skippedExisting.length}`
3418
+ );
3297
3419
  }
3298
3420
  return {
3299
3421
  provisioned: true,
@@ -3313,46 +3435,12 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
3313
3435
 
3314
3436
  // src/evaluation/providers/targets-file.ts
3315
3437
  var import_node_fs4 = require("fs");
3316
- var import_promises9 = require("fs/promises");
3438
+ var import_promises10 = require("fs/promises");
3317
3439
  var import_node_path12 = __toESM(require("path"), 1);
3318
3440
  var import_yaml3 = require("yaml");
3319
-
3320
- // src/evaluation/providers/types.ts
3321
- var AGENT_PROVIDER_KINDS = [
3322
- "codex",
3323
- "vscode",
3324
- "vscode-insiders"
3325
- ];
3326
- var TARGETS_SCHEMA_V2 = "agentv-targets-v2.2";
3327
- function isAgentProvider(provider) {
3328
- return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
3329
- }
3330
-
3331
- // src/evaluation/providers/targets-file.ts
3332
3441
  function isRecord(value) {
3333
3442
  return typeof value === "object" && value !== null && !Array.isArray(value);
3334
3443
  }
3335
- function checkSchema(parsed, absolutePath) {
3336
- const schema = parsed.$schema;
3337
- if (schema === void 0) {
3338
- throw new Error(
3339
- `Missing $schema field in targets.yaml at ${absolutePath}.
3340
- Please add '$schema: ${TARGETS_SCHEMA_V2}' at the top of the file.`
3341
- );
3342
- }
3343
- if (typeof schema !== "string") {
3344
- throw new Error(
3345
- `Invalid $schema field in targets.yaml at ${absolutePath}.
3346
- Expected a string value '${TARGETS_SCHEMA_V2}'.`
3347
- );
3348
- }
3349
- if (schema !== TARGETS_SCHEMA_V2) {
3350
- throw new Error(
3351
- `Invalid $schema '${schema}' in targets.yaml at ${absolutePath}.
3352
- Expected '${TARGETS_SCHEMA_V2}'.`
3353
- );
3354
- }
3355
- }
3356
3444
  function extractTargetsArray(parsed, absolutePath) {
3357
3445
  const targets = parsed.targets;
3358
3446
  if (!Array.isArray(targets)) {
@@ -3367,7 +3455,9 @@ function assertTargetDefinition(value, index, filePath) {
3367
3455
  const name = value.name;
3368
3456
  const provider = value.provider;
3369
3457
  if (typeof name !== "string" || name.trim().length === 0) {
3370
- throw new Error(`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`);
3458
+ throw new Error(
3459
+ `targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`
3460
+ );
3371
3461
  }
3372
3462
  if (typeof provider !== "string" || provider.trim().length === 0) {
3373
3463
  throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
@@ -3376,7 +3466,7 @@ function assertTargetDefinition(value, index, filePath) {
3376
3466
  }
3377
3467
  async function fileExists3(filePath) {
3378
3468
  try {
3379
- await (0, import_promises9.access)(filePath, import_node_fs4.constants.F_OK);
3469
+ await (0, import_promises10.access)(filePath, import_node_fs4.constants.F_OK);
3380
3470
  return true;
3381
3471
  } catch {
3382
3472
  return false;
@@ -3387,14 +3477,15 @@ async function readTargetDefinitions(filePath) {
3387
3477
  if (!await fileExists3(absolutePath)) {
3388
3478
  throw new Error(`targets.yaml not found at ${absolutePath}`);
3389
3479
  }
3390
- const raw = await (0, import_promises9.readFile)(absolutePath, "utf8");
3480
+ const raw = await (0, import_promises10.readFile)(absolutePath, "utf8");
3391
3481
  const parsed = (0, import_yaml3.parse)(raw);
3392
3482
  if (!isRecord(parsed)) {
3393
- throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
3483
+ throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
3394
3484
  }
3395
- checkSchema(parsed, absolutePath);
3396
3485
  const targets = extractTargetsArray(parsed, absolutePath);
3397
- const definitions = targets.map((entry, index) => assertTargetDefinition(entry, index, absolutePath));
3486
+ const definitions = targets.map(
3487
+ (entry, index) => assertTargetDefinition(entry, index, absolutePath)
3488
+ );
3398
3489
  return definitions;
3399
3490
  }
3400
3491
  function listTargetNames(definitions) {
@@ -3438,16 +3529,16 @@ Use the reference_answer as a gold standard for a high-quality response (if prov
3438
3529
  Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.
3439
3530
 
3440
3531
  [[ ## expected_outcome ## ]]
3441
- {{expected_outcome}}
3532
+ {{${TEMPLATE_VARIABLES.EXPECTED_OUTCOME}}}
3442
3533
 
3443
3534
  [[ ## question ## ]]
3444
- {{question}}
3535
+ {{${TEMPLATE_VARIABLES.QUESTION}}}
3445
3536
 
3446
3537
  [[ ## reference_answer ## ]]
3447
- {{reference_answer}}
3538
+ {{${TEMPLATE_VARIABLES.REFERENCE_ANSWER}}}
3448
3539
 
3449
3540
  [[ ## candidate_answer ## ]]
3450
- {{candidate_answer}}`;
3541
+ {{${TEMPLATE_VARIABLES.CANDIDATE_ANSWER}}}`;
3451
3542
  var LlmJudgeEvaluator = class {
3452
3543
  kind = "llm_judge";
3453
3544
  resolveJudgeProvider;
@@ -3470,12 +3561,16 @@ var LlmJudgeEvaluator = class {
3470
3561
  async evaluateWithPrompt(context, judgeProvider) {
3471
3562
  const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
3472
3563
  const variables = {
3473
- input_messages: JSON.stringify(context.evalCase.input_segments, null, 2),
3474
- output_messages: JSON.stringify(context.evalCase.output_segments, null, 2),
3475
- candidate_answer: context.candidate.trim(),
3476
- reference_answer: (context.evalCase.reference_answer ?? "").trim(),
3477
- expected_outcome: context.evalCase.expected_outcome.trim(),
3478
- question: formattedQuestion.trim()
3564
+ [TEMPLATE_VARIABLES.INPUT_MESSAGES]: JSON.stringify(context.evalCase.input_segments, null, 2),
3565
+ [TEMPLATE_VARIABLES.EXPECTED_MESSAGES]: JSON.stringify(
3566
+ context.evalCase.expected_segments,
3567
+ null,
3568
+ 2
3569
+ ),
3570
+ [TEMPLATE_VARIABLES.CANDIDATE_ANSWER]: context.candidate.trim(),
3571
+ [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? "").trim(),
3572
+ [TEMPLATE_VARIABLES.EXPECTED_OUTCOME]: context.evalCase.expected_outcome.trim(),
3573
+ [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim()
3479
3574
  };
3480
3575
  const systemPrompt = buildOutputSchema();
3481
3576
  const evaluatorTemplate = context.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
@@ -3707,17 +3802,17 @@ function parseJsonSafe(payload) {
3707
3802
  }
3708
3803
  }
3709
3804
  function substituteVariables(template, variables) {
3710
- return template.replace(/\{\{([a-zA-Z0-9_]+)\}\}/g, (match, varName) => {
3805
+ return template.replace(/\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g, (match, varName) => {
3711
3806
  return variables[varName] ?? match;
3712
3807
  });
3713
3808
  }
3714
3809
 
3715
3810
  // src/evaluation/orchestrator.ts
3716
3811
  var import_node_crypto2 = require("crypto");
3717
- var import_promises10 = require("fs/promises");
3812
+ var import_promises11 = require("fs/promises");
3718
3813
  var import_node_path13 = __toESM(require("path"), 1);
3719
3814
 
3720
- // ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
3815
+ // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
3721
3816
  var Node = class {
3722
3817
  value;
3723
3818
  next;
@@ -3750,6 +3845,9 @@ var Queue = class {
3750
3845
  }
3751
3846
  this.#head = this.#head.next;
3752
3847
  this.#size--;
3848
+ if (!this.#head) {
3849
+ this.#tail = void 0;
3850
+ }
3753
3851
  return current.value;
3754
3852
  }
3755
3853
  peek() {
@@ -3780,7 +3878,7 @@ var Queue = class {
3780
3878
  }
3781
3879
  };
3782
3880
 
3783
- // ../../node_modules/.pnpm/p-limit@6.2.0/node_modules/p-limit/index.js
3881
+ // ../../node_modules/.bun/p-limit@6.2.0/node_modules/p-limit/index.js
3784
3882
  function pLimit(concurrency) {
3785
3883
  validateConcurrency(concurrency);
3786
3884
  const queue = new Queue();
@@ -3853,6 +3951,16 @@ function validateConcurrency(concurrency) {
3853
3951
  }
3854
3952
  }
3855
3953
 
3954
+ // src/evaluation/providers/types.ts
3955
+ var AGENT_PROVIDER_KINDS = [
3956
+ "codex",
3957
+ "vscode",
3958
+ "vscode-insiders"
3959
+ ];
3960
+ function isAgentProvider(provider) {
3961
+ return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
3962
+ }
3963
+
3856
3964
  // src/evaluation/orchestrator.ts
3857
3965
  async function runEvaluation(options) {
3858
3966
  const {
@@ -3871,11 +3979,11 @@ async function runEvaluation(options) {
3871
3979
  now,
3872
3980
  evalId,
3873
3981
  verbose,
3982
+ evalCases: preloadedEvalCases,
3874
3983
  onResult,
3875
3984
  onProgress
3876
3985
  } = options;
3877
- const load = loadEvalCases;
3878
- const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
3986
+ const evalCases = preloadedEvalCases ?? await loadEvalCases(evalFilePath, repoRoot, { verbose, evalId });
3879
3987
  const filteredEvalCases = filterEvalCases(evalCases, evalId);
3880
3988
  if (filteredEvalCases.length === 0) {
3881
3989
  if (evalId) {
@@ -3956,7 +4064,9 @@ async function runEvaluation(options) {
3956
4064
  } catch (error) {
3957
4065
  if (verbose) {
3958
4066
  const message = error instanceof Error ? error.message : String(error);
3959
- console.warn(`Provider batch execution failed, falling back to per-case dispatch: ${message}`);
4067
+ console.warn(
4068
+ `Provider batch execution failed, falling back to per-case dispatch: ${message}`
4069
+ );
3960
4070
  }
3961
4071
  }
3962
4072
  }
@@ -4059,8 +4169,9 @@ async function runBatchEvaluation(options) {
4059
4169
  agentTimeoutMs
4060
4170
  } = options;
4061
4171
  const promptInputsList = [];
4172
+ const formattingMode = isAgentProvider(provider) ? "agent" : "lm";
4062
4173
  for (const evalCase of evalCases) {
4063
- const promptInputs = await buildPromptInputs(evalCase);
4174
+ const promptInputs = await buildPromptInputs(evalCase, formattingMode);
4064
4175
  if (promptDumpDir) {
4065
4176
  await dumpPrompt(promptDumpDir, evalCase, promptInputs);
4066
4177
  }
@@ -4119,7 +4230,14 @@ async function runBatchEvaluation(options) {
4119
4230
  agentTimeoutMs
4120
4231
  });
4121
4232
  } catch (error) {
4122
- const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
4233
+ const errorResult = buildErrorResult(
4234
+ evalCase,
4235
+ target.name,
4236
+ nowFn(),
4237
+ error,
4238
+ promptInputs,
4239
+ provider
4240
+ );
4123
4241
  results.push(errorResult);
4124
4242
  if (onResult) {
4125
4243
  await onResult(errorResult);
@@ -4166,7 +4284,8 @@ async function runEvalCase(options) {
4166
4284
  signal,
4167
4285
  judgeProvider
4168
4286
  } = options;
4169
- const promptInputs = await buildPromptInputs(evalCase);
4287
+ const formattingMode = isAgentProvider(provider) ? "agent" : "lm";
4288
+ const promptInputs = await buildPromptInputs(evalCase, formattingMode);
4170
4289
  if (promptDumpDir) {
4171
4290
  await dumpPrompt(promptDumpDir, evalCase, promptInputs);
4172
4291
  }
@@ -4296,7 +4415,18 @@ async function evaluateCandidate(options) {
4296
4415
  };
4297
4416
  }
4298
4417
  async function runEvaluatorsForCase(options) {
4299
- const { evalCase, candidate, target, provider, evaluators, attempt, promptInputs, now, judgeProvider, agentTimeoutMs } = options;
4418
+ const {
4419
+ evalCase,
4420
+ candidate,
4421
+ target,
4422
+ provider,
4423
+ evaluators,
4424
+ attempt,
4425
+ promptInputs,
4426
+ now,
4427
+ judgeProvider,
4428
+ agentTimeoutMs
4429
+ } = options;
4300
4430
  if (evalCase.evaluators && evalCase.evaluators.length > 0) {
4301
4431
  return runEvaluatorList({
4302
4432
  evalCase,
@@ -4397,7 +4527,6 @@ async function runEvaluatorList(options) {
4397
4527
  reasoning: score2.reasoning,
4398
4528
  evaluator_provider_request: score2.evaluatorRawRequest
4399
4529
  });
4400
- continue;
4401
4530
  }
4402
4531
  } catch (error) {
4403
4532
  const message = error instanceof Error ? error.message : String(error);
@@ -4408,7 +4537,11 @@ async function runEvaluatorList(options) {
4408
4537
  expectedAspectCount: 1,
4409
4538
  reasoning: message
4410
4539
  };
4411
- scored.push({ score: fallbackScore, name: evaluator.name ?? "unknown", type: evaluator.type ?? "unknown" });
4540
+ scored.push({
4541
+ score: fallbackScore,
4542
+ name: evaluator.name ?? "unknown",
4543
+ type: evaluator.type ?? "unknown"
4544
+ });
4412
4545
  evaluatorResults.push({
4413
4546
  name: evaluator.name ?? "unknown",
4414
4547
  type: evaluator.type ?? "unknown",
@@ -4422,7 +4555,10 @@ async function runEvaluatorList(options) {
4422
4555
  const aggregateScore = scored.length > 0 ? scored.reduce((total, entry) => total + entry.score.score, 0) / scored.length : 0;
4423
4556
  const hits = scored.flatMap((entry) => entry.score.hits);
4424
4557
  const misses = scored.flatMap((entry) => entry.score.misses);
4425
- const expectedAspectCount = scored.reduce((total, entry) => total + (entry.score.expectedAspectCount ?? 0), 0);
4558
+ const expectedAspectCount = scored.reduce(
4559
+ (total, entry) => total + (entry.score.expectedAspectCount ?? 0),
4560
+ 0
4561
+ );
4426
4562
  const rawAspects = scored.flatMap((entry) => entry.score.rawAspects ?? []);
4427
4563
  const reasoningParts = scored.map((entry) => entry.score.reasoning ? `${entry.name}: ${entry.score.reasoning}` : void 0).filter(isNonEmptyString2);
4428
4564
  const reasoning = reasoningParts.length > 0 ? reasoningParts.join(" | ") : void 0;
@@ -4437,7 +4573,18 @@ async function runEvaluatorList(options) {
4437
4573
  return { score, evaluatorResults };
4438
4574
  }
4439
4575
  async function runLlmJudgeEvaluator(options) {
4440
- const { config, evalCase, candidate, target, provider, evaluatorRegistry, attempt, promptInputs, now, judgeProvider } = options;
4576
+ const {
4577
+ config,
4578
+ evalCase,
4579
+ candidate,
4580
+ target,
4581
+ provider,
4582
+ evaluatorRegistry,
4583
+ attempt,
4584
+ promptInputs,
4585
+ now,
4586
+ judgeProvider
4587
+ } = options;
4441
4588
  const customPrompt = await resolveCustomPrompt(config);
4442
4589
  return evaluatorRegistry.llm_judge.evaluate({
4443
4590
  evalCase,
@@ -4455,7 +4602,8 @@ async function runLlmJudgeEvaluator(options) {
4455
4602
  async function resolveCustomPrompt(config) {
4456
4603
  if (config.promptPath) {
4457
4604
  try {
4458
- return await readTextFile(config.promptPath);
4605
+ const content = await readTextFile(config.promptPath);
4606
+ return content;
4459
4607
  } catch (error) {
4460
4608
  const message = error instanceof Error ? error.message : String(error);
4461
4609
  console.warn(`Could not read custom prompt at ${config.promptPath}: ${message}`);
@@ -4490,14 +4638,14 @@ async function dumpPrompt(directory, evalCase, promptInputs) {
4490
4638
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
4491
4639
  const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
4492
4640
  const filePath = import_node_path13.default.resolve(directory, filename);
4493
- await (0, import_promises10.mkdir)(import_node_path13.default.dirname(filePath), { recursive: true });
4641
+ await (0, import_promises11.mkdir)(import_node_path13.default.dirname(filePath), { recursive: true });
4494
4642
  const payload = {
4495
4643
  eval_id: evalCase.id,
4496
4644
  question: promptInputs.question,
4497
4645
  guidelines: promptInputs.guidelines,
4498
4646
  guideline_paths: evalCase.guideline_paths
4499
4647
  };
4500
- await (0, import_promises10.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
4648
+ await (0, import_promises11.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
4501
4649
  }
4502
4650
  function sanitizeFilename(value) {
4503
4651
  if (!value) {