@agentv/core 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -382,6 +382,7 @@ async function processMessages(options) {
382
382
  }
383
383
  async function loadEvalCases(evalFilePath, repoRoot, options) {
384
384
  const verbose = options?.verbose ?? false;
385
+ const evalIdFilter = options?.evalId;
385
386
  const absoluteTestPath = import_node_path2.default.resolve(evalFilePath);
386
387
  if (!await fileExists2(absoluteTestPath)) {
387
388
  throw new Error(`Test file not found: ${evalFilePath}`);
@@ -413,62 +414,39 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
413
414
  const results = [];
414
415
  for (const rawEvalcase of rawTestcases) {
415
416
  if (!isJsonObject(rawEvalcase)) {
416
- logWarning("Skipping invalid test case entry (expected object)");
417
+ logWarning("Skipping invalid eval case entry (expected object)");
417
418
  continue;
418
419
  }
419
420
  const evalcase = rawEvalcase;
420
421
  const id = asString(evalcase.id);
422
+ if (evalIdFilter && id !== evalIdFilter) {
423
+ continue;
424
+ }
421
425
  const conversationId = asString(evalcase.conversation_id);
422
426
  const outcome = asString(evalcase.outcome);
423
427
  const inputMessagesValue = evalcase.input_messages;
424
428
  const expectedMessagesValue = evalcase.expected_messages;
425
429
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
426
- logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
430
+ logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
427
431
  continue;
428
432
  }
429
433
  if (!Array.isArray(expectedMessagesValue)) {
430
- logWarning(`Test case '${id}' missing expected_messages array`);
434
+ logWarning(`Eval case '${id}' missing expected_messages array`);
431
435
  continue;
432
436
  }
433
437
  const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
434
438
  const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
435
- const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
436
- const userMessages = inputMessages.filter((message) => message.role === "user");
437
- const systemMessages = inputMessages.filter((message) => message.role === "system");
438
- if (assistantMessages.length === 0) {
439
- logWarning(`No assistant message found for test case: ${id}`);
439
+ if (expectedMessages.length === 0) {
440
+ logWarning(`No expected message found for eval case: ${id}`);
440
441
  continue;
441
442
  }
442
- if (assistantMessages.length > 1) {
443
- logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
444
- }
445
- if (systemMessages.length > 1) {
446
- logWarning(`Multiple system messages found for test case: ${id}, using first`);
447
- }
448
- let systemMessageContent;
449
- if (systemMessages.length > 0) {
450
- const content = systemMessages[0]?.content;
451
- if (typeof content === "string") {
452
- systemMessageContent = content;
453
- } else if (Array.isArray(content)) {
454
- const textParts = [];
455
- for (const segment of content) {
456
- if (isJsonObject(segment)) {
457
- const value = segment.value;
458
- if (typeof value === "string") {
459
- textParts.push(value);
460
- }
461
- }
462
- }
463
- if (textParts.length > 0) {
464
- systemMessageContent = textParts.join("\n\n");
465
- }
466
- }
443
+ if (expectedMessages.length > 1) {
444
+ logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
467
445
  }
468
446
  const guidelinePaths = [];
469
447
  const inputTextParts = [];
470
448
  const inputSegments = await processMessages({
471
- messages: userMessages,
449
+ messages: inputMessages,
472
450
  searchRoots,
473
451
  repoRootPath,
474
452
  guidelinePatterns,
@@ -478,7 +456,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
478
456
  verbose
479
457
  });
480
458
  const outputSegments = await processMessages({
481
- messages: assistantMessages,
459
+ messages: expectedMessages,
482
460
  searchRoots,
483
461
  repoRootPath,
484
462
  guidelinePatterns,
@@ -486,10 +464,10 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
486
464
  verbose
487
465
  });
488
466
  const codeSnippets = extractCodeBlocks(inputSegments);
489
- const assistantContent = assistantMessages[0]?.content;
490
- const referenceAnswer = await resolveAssistantContent(assistantContent, searchRoots, verbose);
467
+ const expectedContent = expectedMessages[0]?.content;
468
+ const referenceAnswer = await resolveAssistantContent(expectedContent, searchRoots, verbose);
491
469
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
492
- const testCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
470
+ const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
493
471
  const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
494
472
  const userFilePaths = [];
495
473
  for (const segment of inputSegments) {
@@ -508,19 +486,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
508
486
  question,
509
487
  input_segments: inputSegments,
510
488
  output_segments: outputSegments,
511
- system_message: systemMessageContent,
512
489
  reference_answer: referenceAnswer,
513
490
  guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path2.default.resolve(guidelinePath)),
514
491
  guideline_patterns: guidelinePatterns,
515
492
  file_paths: allFilePaths,
516
493
  code_snippets: codeSnippets,
517
494
  expected_outcome: outcome,
518
- evaluator: testCaseEvaluatorKind,
495
+ evaluator: evalCaseEvaluatorKind,
519
496
  evaluators
520
497
  };
521
498
  if (verbose) {
522
499
  console.log(`
523
- [Test Case: ${id}]`);
500
+ [Eval Case: ${id}]`);
524
501
  if (testCase.guideline_paths.length > 0) {
525
502
  console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
526
503
  for (const guidelinePath of testCase.guideline_paths) {
@@ -579,7 +556,7 @@ ${body}`);
579
556
  }
580
557
  const question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
581
558
  const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
582
- return { question, guidelines, systemMessage: testCase.system_message };
559
+ return { question, guidelines };
583
560
  }
584
561
  async function fileExists2(absolutePath) {
585
562
  try {
@@ -3095,7 +3072,6 @@ var CodeEvaluator = class {
3095
3072
  expected_outcome: context.evalCase.expected_outcome,
3096
3073
  reference_answer: context.evalCase.reference_answer,
3097
3074
  candidate_answer: context.candidate,
3098
- system_message: context.promptInputs.systemMessage ?? "",
3099
3075
  guideline_paths: context.evalCase.guideline_paths,
3100
3076
  input_files: context.evalCase.file_paths,
3101
3077
  input_segments: context.evalCase.input_segments
@@ -3337,7 +3313,7 @@ function validateConcurrency(concurrency) {
3337
3313
  // src/evaluation/orchestrator.ts
3338
3314
  async function runEvaluation(options) {
3339
3315
  const {
3340
- testFilePath,
3316
+ testFilePath: evalFilePath,
3341
3317
  repoRoot,
3342
3318
  target,
3343
3319
  targets,
@@ -3356,11 +3332,11 @@ async function runEvaluation(options) {
3356
3332
  onProgress
3357
3333
  } = options;
3358
3334
  const load = loadEvalCases;
3359
- const evalCases = await load(testFilePath, repoRoot, { verbose });
3335
+ const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
3360
3336
  const filteredEvalCases = filterEvalCases(evalCases, evalId);
3361
3337
  if (filteredEvalCases.length === 0) {
3362
3338
  if (evalId) {
3363
- throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
3339
+ throw new Error(`Eval case with id '${evalId}' not found in ${evalFilePath}`);
3364
3340
  }
3365
3341
  return [];
3366
3342
  }
@@ -3739,8 +3715,7 @@ async function evaluateCandidate(options) {
3739
3715
  const rawRequest = {
3740
3716
  question: promptInputs.question,
3741
3717
  ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
3742
- guideline_paths: evalCase.guideline_paths,
3743
- system_message: promptInputs.systemMessage ?? ""
3718
+ guideline_paths: evalCase.guideline_paths
3744
3719
  };
3745
3720
  return {
3746
3721
  eval_id: evalCase.id,
@@ -4004,7 +3979,6 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
4004
3979
  question: promptInputs.question,
4005
3980
  ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
4006
3981
  guideline_paths: evalCase.guideline_paths,
4007
- system_message: promptInputs.systemMessage ?? "",
4008
3982
  error: message
4009
3983
  };
4010
3984
  return {