agentv 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11386,6 +11386,7 @@ async function processMessages(options) {
11386
11386
  }
11387
11387
  async function loadEvalCases(evalFilePath, repoRoot, options) {
11388
11388
  const verbose = options?.verbose ?? false;
11389
+ const evalIdFilter = options?.evalId;
11389
11390
  const absoluteTestPath = path8.resolve(evalFilePath);
11390
11391
  if (!await fileExists2(absoluteTestPath)) {
11391
11392
  throw new Error(`Test file not found: ${evalFilePath}`);
@@ -11417,62 +11418,39 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11417
11418
  const results = [];
11418
11419
  for (const rawEvalcase of rawTestcases) {
11419
11420
  if (!isJsonObject(rawEvalcase)) {
11420
- logWarning("Skipping invalid test case entry (expected object)");
11421
+ logWarning("Skipping invalid eval case entry (expected object)");
11421
11422
  continue;
11422
11423
  }
11423
11424
  const evalcase = rawEvalcase;
11424
11425
  const id = asString(evalcase.id);
11426
+ if (evalIdFilter && id !== evalIdFilter) {
11427
+ continue;
11428
+ }
11425
11429
  const conversationId = asString(evalcase.conversation_id);
11426
11430
  const outcome = asString(evalcase.outcome);
11427
11431
  const inputMessagesValue = evalcase.input_messages;
11428
11432
  const expectedMessagesValue = evalcase.expected_messages;
11429
11433
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
11430
- logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
11434
+ logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
11431
11435
  continue;
11432
11436
  }
11433
11437
  if (!Array.isArray(expectedMessagesValue)) {
11434
- logWarning(`Test case '${id}' missing expected_messages array`);
11438
+ logWarning(`Eval case '${id}' missing expected_messages array`);
11435
11439
  continue;
11436
11440
  }
11437
11441
  const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
11438
11442
  const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
11439
- const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
11440
- const userMessages = inputMessages.filter((message) => message.role === "user");
11441
- const systemMessages = inputMessages.filter((message) => message.role === "system");
11442
- if (assistantMessages.length === 0) {
11443
- logWarning(`No assistant message found for test case: ${id}`);
11443
+ if (expectedMessages.length === 0) {
11444
+ logWarning(`No expected message found for eval case: ${id}`);
11444
11445
  continue;
11445
11446
  }
11446
- if (assistantMessages.length > 1) {
11447
- logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
11448
- }
11449
- if (systemMessages.length > 1) {
11450
- logWarning(`Multiple system messages found for test case: ${id}, using first`);
11451
- }
11452
- let systemMessageContent;
11453
- if (systemMessages.length > 0) {
11454
- const content = systemMessages[0]?.content;
11455
- if (typeof content === "string") {
11456
- systemMessageContent = content;
11457
- } else if (Array.isArray(content)) {
11458
- const textParts = [];
11459
- for (const segment of content) {
11460
- if (isJsonObject(segment)) {
11461
- const value = segment.value;
11462
- if (typeof value === "string") {
11463
- textParts.push(value);
11464
- }
11465
- }
11466
- }
11467
- if (textParts.length > 0) {
11468
- systemMessageContent = textParts.join("\n\n");
11469
- }
11470
- }
11447
+ if (expectedMessages.length > 1) {
11448
+ logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
11471
11449
  }
11472
11450
  const guidelinePaths = [];
11473
11451
  const inputTextParts = [];
11474
11452
  const inputSegments = await processMessages({
11475
- messages: userMessages,
11453
+ messages: inputMessages,
11476
11454
  searchRoots,
11477
11455
  repoRootPath,
11478
11456
  guidelinePatterns,
@@ -11482,7 +11460,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11482
11460
  verbose
11483
11461
  });
11484
11462
  const outputSegments = await processMessages({
11485
- messages: assistantMessages,
11463
+ messages: expectedMessages,
11486
11464
  searchRoots,
11487
11465
  repoRootPath,
11488
11466
  guidelinePatterns,
@@ -11490,10 +11468,10 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11490
11468
  verbose
11491
11469
  });
11492
11470
  const codeSnippets = extractCodeBlocks(inputSegments);
11493
- const assistantContent = assistantMessages[0]?.content;
11494
- const referenceAnswer = await resolveAssistantContent(assistantContent, searchRoots, verbose);
11471
+ const expectedContent = expectedMessages[0]?.content;
11472
+ const referenceAnswer = await resolveAssistantContent(expectedContent, searchRoots, verbose);
11495
11473
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
11496
- const testCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
11474
+ const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
11497
11475
  const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
11498
11476
  const userFilePaths = [];
11499
11477
  for (const segment of inputSegments) {
@@ -11512,19 +11490,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11512
11490
  question,
11513
11491
  input_segments: inputSegments,
11514
11492
  output_segments: outputSegments,
11515
- system_message: systemMessageContent,
11516
11493
  reference_answer: referenceAnswer,
11517
11494
  guideline_paths: guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
11518
11495
  guideline_patterns: guidelinePatterns,
11519
11496
  file_paths: allFilePaths,
11520
11497
  code_snippets: codeSnippets,
11521
11498
  expected_outcome: outcome,
11522
- evaluator: testCaseEvaluatorKind,
11499
+ evaluator: evalCaseEvaluatorKind,
11523
11500
  evaluators
11524
11501
  };
11525
11502
  if (verbose) {
11526
11503
  console.log(`
11527
- [Test Case: ${id}]`);
11504
+ [Eval Case: ${id}]`);
11528
11505
  if (testCase.guideline_paths.length > 0) {
11529
11506
  console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
11530
11507
  for (const guidelinePath of testCase.guideline_paths) {
@@ -11583,7 +11560,7 @@ ${body}`);
11583
11560
  }
11584
11561
  const question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
11585
11562
  const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
11586
- return { question, guidelines, systemMessage: testCase.system_message };
11563
+ return { question, guidelines };
11587
11564
  }
11588
11565
  async function fileExists2(absolutePath) {
11589
11566
  try {
@@ -14031,7 +14008,6 @@ var CodeEvaluator = class {
14031
14008
  expected_outcome: context2.evalCase.expected_outcome,
14032
14009
  reference_answer: context2.evalCase.reference_answer,
14033
14010
  candidate_answer: context2.candidate,
14034
- system_message: context2.promptInputs.systemMessage ?? "",
14035
14011
  guideline_paths: context2.evalCase.guideline_paths,
14036
14012
  input_files: context2.evalCase.file_paths,
14037
14013
  input_segments: context2.evalCase.input_segments
@@ -14262,7 +14238,7 @@ function validateConcurrency(concurrency) {
14262
14238
  }
14263
14239
  async function runEvaluation(options) {
14264
14240
  const {
14265
- testFilePath,
14241
+ testFilePath: evalFilePath,
14266
14242
  repoRoot,
14267
14243
  target,
14268
14244
  targets,
@@ -14281,11 +14257,11 @@ async function runEvaluation(options) {
14281
14257
  onProgress
14282
14258
  } = options;
14283
14259
  const load = loadEvalCases;
14284
- const evalCases = await load(testFilePath, repoRoot, { verbose });
14260
+ const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
14285
14261
  const filteredEvalCases = filterEvalCases(evalCases, evalId);
14286
14262
  if (filteredEvalCases.length === 0) {
14287
14263
  if (evalId) {
14288
- throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
14264
+ throw new Error(`Eval case with id '${evalId}' not found in ${evalFilePath}`);
14289
14265
  }
14290
14266
  return [];
14291
14267
  }
@@ -14664,8 +14640,7 @@ async function evaluateCandidate(options) {
14664
14640
  const rawRequest = {
14665
14641
  question: promptInputs.question,
14666
14642
  ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
14667
- guideline_paths: evalCase.guideline_paths,
14668
- system_message: promptInputs.systemMessage ?? ""
14643
+ guideline_paths: evalCase.guideline_paths
14669
14644
  };
14670
14645
  return {
14671
14646
  eval_id: evalCase.id,
@@ -14929,7 +14904,6 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
14929
14904
  question: promptInputs.question,
14930
14905
  ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
14931
14906
  guideline_paths: evalCase.guideline_paths,
14932
- system_message: promptInputs.systemMessage ?? "",
14933
14907
  error: message
14934
14908
  };
14935
14909
  return {
@@ -16837,7 +16811,7 @@ async function prepareFileMetadata(params) {
16837
16811
  });
16838
16812
  const providerLabel = options.dryRun ? `${selection.resolvedTarget.kind} (dry-run)` : selection.resolvedTarget.kind;
16839
16813
  const inlineTargetLabel = `${selection.targetName} [provider=${providerLabel}]`;
16840
- const evalCases = await loadEvalCases(testFilePath, repoRoot, { verbose: options.verbose });
16814
+ const evalCases = await loadEvalCases(testFilePath, repoRoot, { verbose: options.verbose, evalId: options.evalId });
16841
16815
  const filteredIds = options.evalId ? evalCases.filter((value) => value.id === options.evalId).map((value) => value.id) : evalCases.map((value) => value.id);
16842
16816
  return { evalIds: filteredIds, selection, inlineTargetLabel };
16843
16817
  }
@@ -17538,4 +17512,4 @@ export {
17538
17512
  createProgram,
17539
17513
  runCli
17540
17514
  };
17541
- //# sourceMappingURL=chunk-VHNACQDK.js.map
17515
+ //# sourceMappingURL=chunk-GWH4WZTW.js.map