agentv 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4847,7 +4847,7 @@ function isAgentProvider(provider) {
4847
4847
  }
4848
4848
 
4849
4849
  // ../../packages/core/dist/index.js
4850
- import { readFile as readFile4 } from "node:fs/promises";
4850
+ import { readFile as readFile5 } from "node:fs/promises";
4851
4851
  import path62 from "node:path";
4852
4852
  import { parse as parse22 } from "yaml";
4853
4853
  import micromatch from "micromatch";
@@ -4859,8 +4859,9 @@ import { access as access3 } from "node:fs/promises";
4859
4859
  import path8 from "node:path";
4860
4860
  import path32 from "node:path";
4861
4861
  import { readFile as readFile22 } from "node:fs/promises";
4862
- import path42 from "node:path";
4863
4862
  import { readFile as readFile32 } from "node:fs/promises";
4863
+ import path42 from "node:path";
4864
+ import { readFile as readFile4 } from "node:fs/promises";
4864
4865
  import path52 from "node:path";
4865
4866
 
4866
4867
  // ../../node_modules/.pnpm/@ai-sdk+provider@2.0.0/node_modules/@ai-sdk/provider/dist/index.mjs
@@ -34384,7 +34385,7 @@ async function provisionSubagents(options) {
34384
34385
 
34385
34386
  // ../../packages/core/dist/index.js
34386
34387
  import { constants as constants32 } from "node:fs";
34387
- import { access as access32, readFile as readFile5 } from "node:fs/promises";
34388
+ import { access as access32, readFile as readFile6 } from "node:fs/promises";
34388
34389
  import path11 from "node:path";
34389
34390
  import { parse as parse32 } from "yaml";
34390
34391
  import { createHash, randomUUID as randomUUID2 } from "node:crypto";
@@ -34467,7 +34468,7 @@ ${part.content}
34467
34468
  }
34468
34469
  return parts.map((p) => p.content).join(" ");
34469
34470
  }
34470
- function formatSegment(segment) {
34471
+ function formatSegment(segment, mode = "lm") {
34471
34472
  const type = asString(segment.type);
34472
34473
  if (type === "text") {
34473
34474
  return asString(segment.value);
@@ -34477,8 +34478,14 @@ function formatSegment(segment) {
34477
34478
  return refPath ? `<Attached: ${refPath}>` : void 0;
34478
34479
  }
34479
34480
  if (type === "file") {
34480
- const text2 = asString(segment.text);
34481
34481
  const filePath = asString(segment.path);
34482
+ if (!filePath) {
34483
+ return void 0;
34484
+ }
34485
+ if (mode === "agent") {
34486
+ return `<file: path="${filePath}">`;
34487
+ }
34488
+ const text2 = asString(segment.text);
34482
34489
  if (text2 && filePath) {
34483
34490
  return formatFileContents([{ content: text2.trim(), isFile: true, displayPath: filePath }]);
34484
34491
  }
@@ -34666,8 +34673,58 @@ function extractTargetFromSuite(suite) {
34666
34673
  function logWarning(message) {
34667
34674
  console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET}`);
34668
34675
  }
34676
+ var TEMPLATE_VARIABLES = {
34677
+ CANDIDATE_ANSWER: "candidate_answer",
34678
+ EXPECTED_MESSAGES: "expected_messages",
34679
+ QUESTION: "question",
34680
+ EXPECTED_OUTCOME: "expected_outcome",
34681
+ REFERENCE_ANSWER: "reference_answer",
34682
+ INPUT_MESSAGES: "input_messages"
34683
+ };
34684
+ var VALID_TEMPLATE_VARIABLES = new Set(
34685
+ Object.values(TEMPLATE_VARIABLES)
34686
+ );
34687
+ var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
34688
+ TEMPLATE_VARIABLES.CANDIDATE_ANSWER,
34689
+ TEMPLATE_VARIABLES.EXPECTED_MESSAGES
34690
+ ]);
34669
34691
  var ANSI_YELLOW2 = "\x1B[33m";
34670
34692
  var ANSI_RESET2 = "\x1B[0m";
34693
+ async function validateCustomPromptContent(promptPath) {
34694
+ const content = await readFile22(promptPath, "utf8");
34695
+ validateTemplateVariables(content, promptPath);
34696
+ }
34697
+ function validateTemplateVariables(content, source2) {
34698
+ const variablePattern = /\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g;
34699
+ const foundVariables = /* @__PURE__ */ new Set();
34700
+ const invalidVariables = [];
34701
+ let match;
34702
+ while ((match = variablePattern.exec(content)) !== null) {
34703
+ const varName = match[1];
34704
+ foundVariables.add(varName);
34705
+ if (!VALID_TEMPLATE_VARIABLES.has(varName)) {
34706
+ invalidVariables.push(varName);
34707
+ }
34708
+ }
34709
+ const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.CANDIDATE_ANSWER);
34710
+ const hasExpectedMessages = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_MESSAGES);
34711
+ const hasRequiredFields = hasCandidateAnswer || hasExpectedMessages;
34712
+ if (!hasRequiredFields) {
34713
+ throw new Error(
34714
+ `Missing required fields. Must include at least one of:
34715
+ - {{ ${TEMPLATE_VARIABLES.CANDIDATE_ANSWER} }}
34716
+ - {{ ${TEMPLATE_VARIABLES.EXPECTED_MESSAGES} }}`
34717
+ );
34718
+ }
34719
+ if (invalidVariables.length > 0) {
34720
+ const warningMessage = `${ANSI_YELLOW2}Warning: Custom evaluator template at ${source2}
34721
+ Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
34722
+ Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${ANSI_RESET2}`;
34723
+ console.warn(warningMessage);
34724
+ }
34725
+ }
34726
+ var ANSI_YELLOW3 = "\x1B[33m";
34727
+ var ANSI_RESET3 = "\x1B[0m";
34671
34728
  async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
34672
34729
  const execution = rawEvalCase.execution;
34673
34730
  const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
@@ -34726,6 +34783,12 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
34726
34783
  const resolved = await resolveFileReference2(prompt, searchRoots);
34727
34784
  if (resolved.resolvedPath) {
34728
34785
  promptPath = path32.resolve(resolved.resolvedPath);
34786
+ try {
34787
+ await validateCustomPromptContent(promptPath);
34788
+ } catch (error40) {
34789
+ const message = error40 instanceof Error ? error40.message : String(error40);
34790
+ throw new Error(`Evaluator '${name16}' template (${promptPath}): ${message}`);
34791
+ }
34729
34792
  } else {
34730
34793
  logWarning2(
34731
34794
  `Inline prompt used for evaluator '${name16}' in '${evalId}' (file not found: ${resolved.displayPath})`,
@@ -34762,14 +34825,14 @@ function isJsonObject2(value) {
34762
34825
  function logWarning2(message, details) {
34763
34826
  if (details && details.length > 0) {
34764
34827
  const detailBlock = details.join("\n");
34765
- console.warn(`${ANSI_YELLOW2}Warning: ${message}
34766
- ${detailBlock}${ANSI_RESET2}`);
34828
+ console.warn(`${ANSI_YELLOW3}Warning: ${message}
34829
+ ${detailBlock}${ANSI_RESET3}`);
34767
34830
  } else {
34768
- console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
34831
+ console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
34769
34832
  }
34770
34833
  }
34771
- var ANSI_YELLOW3 = "\x1B[33m";
34772
- var ANSI_RESET3 = "\x1B[0m";
34834
+ var ANSI_YELLOW4 = "\x1B[33m";
34835
+ var ANSI_RESET4 = "\x1B[0m";
34773
34836
  async function processMessages(options) {
34774
34837
  const {
34775
34838
  messages,
@@ -34812,7 +34875,7 @@ async function processMessages(options) {
34812
34875
  continue;
34813
34876
  }
34814
34877
  try {
34815
- const fileContent = (await readFile22(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
34878
+ const fileContent = (await readFile32(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
34816
34879
  if (messageType === "input" && guidelinePatterns && guidelinePaths) {
34817
34880
  const relativeToRepo = path42.relative(repoRootPath, resolvedPath);
34818
34881
  if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
@@ -34883,7 +34946,7 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
34883
34946
  continue;
34884
34947
  }
34885
34948
  try {
34886
- const fileContent = (await readFile22(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
34949
+ const fileContent = (await readFile32(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
34887
34950
  parts.push({ content: fileContent, isFile: true, displayPath });
34888
34951
  if (verbose) {
34889
34952
  console.log(` [Expected Assistant File] Found: ${displayPath}`);
@@ -34933,15 +34996,15 @@ function cloneJsonValue(value) {
34933
34996
  function logWarning3(message, details) {
34934
34997
  if (details && details.length > 0) {
34935
34998
  const detailBlock = details.join("\n");
34936
- console.warn(`${ANSI_YELLOW3}Warning: ${message}
34937
- ${detailBlock}${ANSI_RESET3}`);
34999
+ console.warn(`${ANSI_YELLOW4}Warning: ${message}
35000
+ ${detailBlock}${ANSI_RESET4}`);
34938
35001
  } else {
34939
- console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
35002
+ console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
34940
35003
  }
34941
35004
  }
34942
- var ANSI_YELLOW4 = "\x1B[33m";
34943
- var ANSI_RESET4 = "\x1B[0m";
34944
- async function buildPromptInputs(testCase) {
35005
+ var ANSI_YELLOW5 = "\x1B[33m";
35006
+ var ANSI_RESET5 = "\x1B[0m";
35007
+ async function buildPromptInputs(testCase, mode = "lm") {
34945
35008
  const guidelineParts = [];
34946
35009
  for (const rawPath of testCase.guideline_paths) {
34947
35010
  const absolutePath = path52.resolve(rawPath);
@@ -34950,7 +35013,7 @@ async function buildPromptInputs(testCase) {
34950
35013
  continue;
34951
35014
  }
34952
35015
  try {
34953
- const content = (await readFile32(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
35016
+ const content = (await readFile4(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
34954
35017
  guidelineParts.push({
34955
35018
  content,
34956
35019
  isFile: true,
@@ -35017,7 +35080,7 @@ async function buildPromptInputs(testCase) {
35017
35080
  const roleLabel = message.role.charAt(0).toUpperCase() + message.role.slice(1);
35018
35081
  const contentParts = [];
35019
35082
  for (const segment of segments) {
35020
- const formattedContent = formatSegment(segment);
35083
+ const formattedContent = formatSegment(segment, mode);
35021
35084
  if (formattedContent) {
35022
35085
  contentParts.push(formattedContent);
35023
35086
  }
@@ -35032,7 +35095,11 @@ ${messageContent}`);
35032
35095
  } else {
35033
35096
  const questionParts = [];
35034
35097
  for (const segment of testCase.input_segments) {
35035
- const formattedContent = formatSegment(segment);
35098
+ if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
35099
+ questionParts.push(`<Attached: ${segment.path}>`);
35100
+ continue;
35101
+ }
35102
+ const formattedContent = formatSegment(segment, mode);
35036
35103
  if (formattedContent) {
35037
35104
  questionParts.push(formattedContent);
35038
35105
  }
@@ -35046,7 +35113,8 @@ ${messageContent}`);
35046
35113
  messages: testCase.input_messages,
35047
35114
  segmentsByMessage,
35048
35115
  guidelinePatterns: testCase.guideline_patterns,
35049
- guidelineContent: guidelines
35116
+ guidelineContent: guidelines,
35117
+ mode
35050
35118
  }) : void 0;
35051
35119
  return { question, guidelines, chatPrompt };
35052
35120
  }
@@ -35063,7 +35131,7 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
35063
35131
  return messagesWithContent > 1;
35064
35132
  }
35065
35133
  function buildChatPromptFromSegments(options) {
35066
- const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
35134
+ const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt, mode = "lm" } = options;
35067
35135
  if (messages.length === 0) {
35068
35136
  return void 0;
35069
35137
  }
@@ -35081,7 +35149,7 @@ ${guidelineContent.trim()}`);
35081
35149
  const segments = segmentsByMessage[startIndex];
35082
35150
  const contentParts = [];
35083
35151
  for (const segment of segments) {
35084
- const formatted = formatSegment(segment);
35152
+ const formatted = formatSegment(segment, mode);
35085
35153
  if (formatted) {
35086
35154
  contentParts.push(formatted);
35087
35155
  }
@@ -35114,7 +35182,7 @@ ${guidelineContent.trim()}`);
35114
35182
  if (segment.type === "guideline_ref") {
35115
35183
  continue;
35116
35184
  }
35117
- const formatted = formatSegment(segment);
35185
+ const formatted = formatSegment(segment, mode);
35118
35186
  if (formatted) {
35119
35187
  const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
35120
35188
  if (isGuidelineRef) {
@@ -35138,15 +35206,16 @@ function asString4(value) {
35138
35206
  return typeof value === "string" ? value : void 0;
35139
35207
  }
35140
35208
  function logWarning4(message) {
35141
- console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
35209
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
35142
35210
  }
35143
- var ANSI_YELLOW5 = "\x1B[33m";
35144
- var ANSI_RESET5 = "\x1B[0m";
35211
+ var ANSI_YELLOW6 = "\x1B[33m";
35212
+ var ANSI_RED = "\x1B[31m";
35213
+ var ANSI_RESET6 = "\x1B[0m";
35145
35214
  var SCHEMA_EVAL_V2 = "agentv-eval-v2";
35146
35215
  async function readTestSuiteMetadata(testFilePath) {
35147
35216
  try {
35148
35217
  const absolutePath = path62.resolve(testFilePath);
35149
- const content = await readFile4(absolutePath, "utf8");
35218
+ const content = await readFile5(absolutePath, "utf8");
35150
35219
  const parsed = parse22(content);
35151
35220
  if (!isJsonObject(parsed)) {
35152
35221
  return {};
@@ -35164,7 +35233,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
35164
35233
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
35165
35234
  const config2 = await loadConfig(absoluteTestPath, repoRootPath);
35166
35235
  const guidelinePatterns = config2?.guideline_patterns;
35167
- const rawFile = await readFile4(absoluteTestPath, "utf8");
35236
+ const rawFile = await readFile5(absoluteTestPath, "utf8");
35168
35237
  const parsed = parse22(rawFile);
35169
35238
  if (!isJsonObject(parsed)) {
35170
35239
  throw new Error(`Invalid test file format: ${evalFilePath}`);
@@ -35202,14 +35271,14 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
35202
35271
  const inputMessagesValue = evalcase.input_messages;
35203
35272
  const expectedMessagesValue = evalcase.expected_messages;
35204
35273
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
35205
- logWarning5(`Skipping incomplete eval case: ${id ?? "unknown"}`);
35274
+ logError(`Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`);
35206
35275
  continue;
35207
35276
  }
35208
35277
  const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
35209
35278
  const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
35210
35279
  const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
35211
35280
  if (hasExpectedMessages && expectedMessages.length === 0) {
35212
- logWarning5(`No valid expected message found for eval case: ${id}`);
35281
+ logError(`No valid expected message found for eval case: ${id}`);
35213
35282
  continue;
35214
35283
  }
35215
35284
  if (expectedMessages.length > 1) {
@@ -35240,7 +35309,14 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
35240
35309
  const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
35241
35310
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
35242
35311
  const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
35243
- const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
35312
+ let evaluators;
35313
+ try {
35314
+ evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
35315
+ } catch (error40) {
35316
+ const message = error40 instanceof Error ? error40.message : String(error40);
35317
+ logError(`Skipping eval case '${id}': ${message}`);
35318
+ continue;
35319
+ }
35244
35320
  const userFilePaths = [];
35245
35321
  for (const segment of inputSegments) {
35246
35322
  if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -35258,7 +35334,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
35258
35334
  question,
35259
35335
  input_messages: inputMessages,
35260
35336
  input_segments: inputSegments,
35261
- output_segments: outputSegments,
35337
+ expected_segments: outputSegments,
35262
35338
  reference_answer: referenceAnswer,
35263
35339
  guideline_paths: guidelinePaths.map((guidelinePath) => path62.resolve(guidelinePath)),
35264
35340
  guideline_patterns: guidelinePatterns,
@@ -35290,10 +35366,19 @@ function asString5(value) {
35290
35366
  function logWarning5(message, details) {
35291
35367
  if (details && details.length > 0) {
35292
35368
  const detailBlock = details.join("\n");
35293
- console.warn(`${ANSI_YELLOW5}Warning: ${message}
35294
- ${detailBlock}${ANSI_RESET5}`);
35369
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}
35370
+ ${detailBlock}${ANSI_RESET6}`);
35295
35371
  } else {
35296
- console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
35372
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
35373
+ }
35374
+ }
35375
+ function logError(message, details) {
35376
+ if (details && details.length > 0) {
35377
+ const detailBlock = details.join("\n");
35378
+ console.error(`${ANSI_RED}Error: ${message}
35379
+ ${detailBlock}${ANSI_RESET6}`);
35380
+ } else {
35381
+ console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
35297
35382
  }
35298
35383
  }
35299
35384
  var DEFAULT_SYSTEM_PROMPT = "You are a careful assistant. Follow all provided instructions and do not fabricate results.";
@@ -36991,7 +37076,7 @@ async function readTargetDefinitions(filePath) {
36991
37076
  if (!await fileExists3(absolutePath)) {
36992
37077
  throw new Error(`targets.yaml not found at ${absolutePath}`);
36993
37078
  }
36994
- const raw = await readFile5(absolutePath, "utf8");
37079
+ const raw = await readFile6(absolutePath, "utf8");
36995
37080
  const parsed = parse32(raw);
36996
37081
  if (!isRecord(parsed)) {
36997
37082
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
@@ -37034,16 +37119,16 @@ Use the reference_answer as a gold standard for a high-quality response (if prov
37034
37119
  Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.
37035
37120
 
37036
37121
  [[ ## expected_outcome ## ]]
37037
- {{expected_outcome}}
37122
+ {{${TEMPLATE_VARIABLES.EXPECTED_OUTCOME}}}
37038
37123
 
37039
37124
  [[ ## question ## ]]
37040
- {{question}}
37125
+ {{${TEMPLATE_VARIABLES.QUESTION}}}
37041
37126
 
37042
37127
  [[ ## reference_answer ## ]]
37043
- {{reference_answer}}
37128
+ {{${TEMPLATE_VARIABLES.REFERENCE_ANSWER}}}
37044
37129
 
37045
37130
  [[ ## candidate_answer ## ]]
37046
- {{candidate_answer}}`;
37131
+ {{${TEMPLATE_VARIABLES.CANDIDATE_ANSWER}}}`;
37047
37132
  var LlmJudgeEvaluator = class {
37048
37133
  kind = "llm_judge";
37049
37134
  resolveJudgeProvider;
@@ -37066,12 +37151,12 @@ var LlmJudgeEvaluator = class {
37066
37151
  async evaluateWithPrompt(context, judgeProvider) {
37067
37152
  const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
37068
37153
  const variables = {
37069
- input_messages: JSON.stringify(context.evalCase.input_segments, null, 2),
37070
- output_messages: JSON.stringify(context.evalCase.output_segments, null, 2),
37071
- candidate_answer: context.candidate.trim(),
37072
- reference_answer: (context.evalCase.reference_answer ?? "").trim(),
37073
- expected_outcome: context.evalCase.expected_outcome.trim(),
37074
- question: formattedQuestion.trim()
37154
+ [TEMPLATE_VARIABLES.INPUT_MESSAGES]: JSON.stringify(context.evalCase.input_segments, null, 2),
37155
+ [TEMPLATE_VARIABLES.EXPECTED_MESSAGES]: JSON.stringify(context.evalCase.expected_segments, null, 2),
37156
+ [TEMPLATE_VARIABLES.CANDIDATE_ANSWER]: context.candidate.trim(),
37157
+ [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? "").trim(),
37158
+ [TEMPLATE_VARIABLES.EXPECTED_OUTCOME]: context.evalCase.expected_outcome.trim(),
37159
+ [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim()
37075
37160
  };
37076
37161
  const systemPrompt = buildOutputSchema();
37077
37162
  const evaluatorTemplate = context.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
@@ -37303,7 +37388,7 @@ function parseJsonSafe(payload) {
37303
37388
  }
37304
37389
  }
37305
37390
  function substituteVariables(template, variables) {
37306
- return template.replace(/\{\{([a-zA-Z0-9_]+)\}\}/g, (match, varName) => {
37391
+ return template.replace(/\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g, (match, varName) => {
37307
37392
  return variables[varName] ?? match;
37308
37393
  });
37309
37394
  }
@@ -37456,11 +37541,11 @@ async function runEvaluation(options) {
37456
37541
  now,
37457
37542
  evalId,
37458
37543
  verbose,
37544
+ evalCases: preloadedEvalCases,
37459
37545
  onResult,
37460
37546
  onProgress
37461
37547
  } = options;
37462
- const load = loadEvalCases;
37463
- const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
37548
+ const evalCases = preloadedEvalCases ?? await loadEvalCases(evalFilePath, repoRoot, { verbose, evalId });
37464
37549
  const filteredEvalCases = filterEvalCases(evalCases, evalId);
37465
37550
  if (filteredEvalCases.length === 0) {
37466
37551
  if (evalId) {
@@ -37644,8 +37729,9 @@ async function runBatchEvaluation(options) {
37644
37729
  agentTimeoutMs
37645
37730
  } = options;
37646
37731
  const promptInputsList = [];
37732
+ const formattingMode = isAgentProvider(provider) ? "agent" : "lm";
37647
37733
  for (const evalCase of evalCases) {
37648
- const promptInputs = await buildPromptInputs(evalCase);
37734
+ const promptInputs = await buildPromptInputs(evalCase, formattingMode);
37649
37735
  if (promptDumpDir) {
37650
37736
  await dumpPrompt(promptDumpDir, evalCase, promptInputs);
37651
37737
  }
@@ -37751,7 +37837,8 @@ async function runEvalCase(options) {
37751
37837
  signal,
37752
37838
  judgeProvider
37753
37839
  } = options;
37754
- const promptInputs = await buildPromptInputs(evalCase);
37840
+ const formattingMode = isAgentProvider(provider) ? "agent" : "lm";
37841
+ const promptInputs = await buildPromptInputs(evalCase, formattingMode);
37755
37842
  if (promptDumpDir) {
37756
37843
  await dumpPrompt(promptDumpDir, evalCase, promptInputs);
37757
37844
  }
@@ -38040,7 +38127,8 @@ async function runLlmJudgeEvaluator(options) {
38040
38127
  async function resolveCustomPrompt(config2) {
38041
38128
  if (config2.promptPath) {
38042
38129
  try {
38043
- return await readTextFile(config2.promptPath);
38130
+ const content = await readTextFile(config2.promptPath);
38131
+ return content;
38044
38132
  } catch (error40) {
38045
38133
  const message = error40 instanceof Error ? error40.message : String(error40);
38046
38134
  console.warn(`Could not read custom prompt at ${config2.promptPath}: ${message}`);
@@ -38927,7 +39015,7 @@ function formatEvaluationSummary(summary) {
38927
39015
  }
38928
39016
 
38929
39017
  // ../../packages/core/dist/evaluation/validation/index.js
38930
- import { readFile as readFile6 } from "node:fs/promises";
39018
+ import { readFile as readFile7 } from "node:fs/promises";
38931
39019
  import { parse as parse6 } from "yaml";
38932
39020
  import { readFile as readFile23 } from "node:fs/promises";
38933
39021
  import path16 from "node:path";
@@ -38945,7 +39033,7 @@ var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
38945
39033
  var SCHEMA_CONFIG_V22 = "agentv-config-v2";
38946
39034
  async function detectFileType(filePath) {
38947
39035
  try {
38948
- const content = await readFile6(filePath, "utf8");
39036
+ const content = await readFile7(filePath, "utf8");
38949
39037
  const parsed = parse6(content);
38950
39038
  if (typeof parsed !== "object" || parsed === null) {
38951
39039
  return "unknown";
@@ -39794,9 +39882,9 @@ var TARGET_FILE_CANDIDATES = [
39794
39882
  path17.join(".agentv", "targets.yaml"),
39795
39883
  path17.join(".agentv", "targets.yml")
39796
39884
  ];
39797
- var ANSI_YELLOW6 = "\x1B[33m";
39798
- var ANSI_RED = "\x1B[31m";
39799
- var ANSI_RESET6 = "\x1B[0m";
39885
+ var ANSI_YELLOW7 = "\x1B[33m";
39886
+ var ANSI_RED2 = "\x1B[31m";
39887
+ var ANSI_RESET7 = "\x1B[0m";
39800
39888
  function isTTY() {
39801
39889
  return process.stdout.isTTY ?? false;
39802
39890
  }
@@ -39869,8 +39957,8 @@ async function selectTarget(options) {
39869
39957
  Warnings in ${targetsFilePath}:`);
39870
39958
  for (const warning of warnings) {
39871
39959
  const location = warning.location ? ` [${warning.location}]` : "";
39872
- const prefix = useColors ? `${ANSI_YELLOW6} \u26A0${ANSI_RESET6}` : " \u26A0";
39873
- const message = useColors ? `${ANSI_YELLOW6}${warning.message}${ANSI_RESET6}` : warning.message;
39960
+ const prefix = useColors ? `${ANSI_YELLOW7} \u26A0${ANSI_RESET7}` : " \u26A0";
39961
+ const message = useColors ? `${ANSI_YELLOW7}${warning.message}${ANSI_RESET7}` : warning.message;
39874
39962
  console.warn(`${prefix}${location} ${message}`);
39875
39963
  }
39876
39964
  console.warn("");
@@ -39881,8 +39969,8 @@ Warnings in ${targetsFilePath}:`);
39881
39969
  Errors in ${targetsFilePath}:`);
39882
39970
  for (const error40 of errors) {
39883
39971
  const location = error40.location ? ` [${error40.location}]` : "";
39884
- const prefix = useColors ? `${ANSI_RED} \u2717${ANSI_RESET6}` : " \u2717";
39885
- const message = useColors ? `${ANSI_RED}${error40.message}${ANSI_RESET6}` : error40.message;
39972
+ const prefix = useColors ? `${ANSI_RED2} \u2717${ANSI_RESET7}` : " \u2717";
39973
+ const message = useColors ? `${ANSI_RED2}${error40.message}${ANSI_RESET7}` : error40.message;
39886
39974
  console.error(`${prefix}${location} ${message}`);
39887
39975
  }
39888
39976
  throw new Error(`Targets file validation failed with ${errors.length} error(s)`);
@@ -40082,7 +40170,7 @@ async function prepareFileMetadata(params) {
40082
40170
  const inlineTargetLabel = `${selection.targetName} [provider=${providerLabel}]`;
40083
40171
  const evalCases = await loadEvalCases(testFilePath, repoRoot, { verbose: options.verbose, evalId: options.evalId });
40084
40172
  const filteredIds = options.evalId ? evalCases.filter((value) => value.id === options.evalId).map((value) => value.id) : evalCases.map((value) => value.id);
40085
- return { evalIds: filteredIds, selection, inlineTargetLabel };
40173
+ return { evalIds: filteredIds, evalCases, selection, inlineTargetLabel };
40086
40174
  }
40087
40175
  async function runWithLimit(items, limit, task) {
40088
40176
  const safeLimit = Math.max(1, limit);
@@ -40110,7 +40198,8 @@ async function runSingleEvalFile(params) {
40110
40198
  seenEvalCases,
40111
40199
  displayIdTracker,
40112
40200
  selection,
40113
- inlineTargetLabel
40201
+ inlineTargetLabel,
40202
+ evalCases
40114
40203
  } = params;
40115
40204
  await ensureFileExists(testFilePath, "Test file");
40116
40205
  const resolvedTargetSelection = selection;
@@ -40162,6 +40251,7 @@ async function runSingleEvalFile(params) {
40162
40251
  cache,
40163
40252
  useCache: options.cache,
40164
40253
  evalId: options.evalId,
40254
+ evalCases,
40165
40255
  verbose: options.verbose,
40166
40256
  maxConcurrency: resolvedWorkers,
40167
40257
  onResult: async (result) => {
@@ -40267,7 +40357,8 @@ async function runEvalCommand(input) {
40267
40357
  seenEvalCases,
40268
40358
  displayIdTracker,
40269
40359
  selection: targetPrep.selection,
40270
- inlineTargetLabel: targetPrep.inlineTargetLabel
40360
+ inlineTargetLabel: targetPrep.inlineTargetLabel,
40361
+ evalCases: targetPrep.evalCases
40271
40362
  });
40272
40363
  allResults.push(...result.results);
40273
40364
  if (result.promptDumpDir) {
@@ -40590,12 +40681,12 @@ function registerStatusCommand(program) {
40590
40681
  }
40591
40682
 
40592
40683
  // src/commands/validate/format-output.ts
40593
- var ANSI_RED2 = "\x1B[31m";
40594
- var ANSI_YELLOW7 = "\x1B[33m";
40684
+ var ANSI_RED3 = "\x1B[31m";
40685
+ var ANSI_YELLOW8 = "\x1B[33m";
40595
40686
  var ANSI_GREEN = "\x1B[32m";
40596
40687
  var ANSI_CYAN = "\x1B[36m";
40597
40688
  var ANSI_BOLD = "\x1B[1m";
40598
- var ANSI_RESET7 = "\x1B[0m";
40689
+ var ANSI_RESET8 = "\x1B[0m";
40599
40690
  function formatSummary(summary, useColors) {
40600
40691
  const lines = [];
40601
40692
  lines.push("");
@@ -40611,15 +40702,15 @@ function formatSummary(summary, useColors) {
40611
40702
  }
40612
40703
  function formatHeader(text2, useColors) {
40613
40704
  if (useColors) {
40614
- return `${ANSI_BOLD}${ANSI_CYAN}${text2}${ANSI_RESET7}`;
40705
+ return `${ANSI_BOLD}${ANSI_CYAN}${text2}${ANSI_RESET8}`;
40615
40706
  }
40616
40707
  return text2;
40617
40708
  }
40618
40709
  function formatFileResult(result, useColors) {
40619
40710
  const lines = [];
40620
40711
  const status = result.valid ? "\u2713" : "\u2717";
40621
- const statusColor = result.valid ? ANSI_GREEN : ANSI_RED2;
40622
- const statusText = useColors ? `${statusColor}${status}${ANSI_RESET7}` : status;
40712
+ const statusColor = result.valid ? ANSI_GREEN : ANSI_RED3;
40713
+ const statusText = useColors ? `${statusColor}${status}${ANSI_RESET8}` : status;
40623
40714
  const fileName = result.filePath;
40624
40715
  lines.push(`${statusText} ${fileName}`);
40625
40716
  if (result.errors.length > 0) {
@@ -40631,8 +40722,8 @@ function formatFileResult(result, useColors) {
40631
40722
  }
40632
40723
  function formatError2(error40, useColors) {
40633
40724
  const prefix = error40.severity === "error" ? " \u2717" : " \u26A0";
40634
- const color = error40.severity === "error" ? ANSI_RED2 : ANSI_YELLOW7;
40635
- const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET7}` : prefix;
40725
+ const color = error40.severity === "error" ? ANSI_RED3 : ANSI_YELLOW8;
40726
+ const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET8}` : prefix;
40636
40727
  const location = error40.location ? ` [${error40.location}]` : "";
40637
40728
  return `${coloredPrefix}${location} ${error40.message}`;
40638
40729
  }
@@ -40645,15 +40736,15 @@ function formatStats(summary, useColors) {
40645
40736
  (r) => r.errors.some((e) => e.severity === "warning")
40646
40737
  ).length;
40647
40738
  if (useColors) {
40648
- lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET7}`);
40649
- lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET7}`);
40739
+ lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET8}`);
40740
+ lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET8}`);
40650
40741
  if (summary.invalidFiles > 0) {
40651
- lines.push(`${ANSI_RED2}${invalidText}${ANSI_RESET7}`);
40742
+ lines.push(`${ANSI_RED3}${invalidText}${ANSI_RESET8}`);
40652
40743
  } else {
40653
40744
  lines.push(invalidText);
40654
40745
  }
40655
40746
  if (filesWithWarnings > 0) {
40656
- lines.push(`${ANSI_YELLOW7}Files with warnings: ${filesWithWarnings}${ANSI_RESET7}`);
40747
+ lines.push(`${ANSI_YELLOW8}Files with warnings: ${filesWithWarnings}${ANSI_RESET8}`);
40657
40748
  }
40658
40749
  } else {
40659
40750
  lines.push(totalText);
@@ -40827,4 +40918,4 @@ export {
40827
40918
  createProgram,
40828
40919
  runCli
40829
40920
  };
40830
- //# sourceMappingURL=chunk-LVLBPRCV.js.map
40921
+ //# sourceMappingURL=chunk-HWGALLUR.js.map