@agentv/core 0.7.0 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -382,6 +382,7 @@ async function processMessages(options) {
382
382
  }
383
383
  async function loadEvalCases(evalFilePath, repoRoot, options) {
384
384
  const verbose = options?.verbose ?? false;
385
+ const evalIdFilter = options?.evalId;
385
386
  const absoluteTestPath = import_node_path2.default.resolve(evalFilePath);
386
387
  if (!await fileExists2(absoluteTestPath)) {
387
388
  throw new Error(`Test file not found: ${evalFilePath}`);
@@ -413,62 +414,39 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
413
414
  const results = [];
414
415
  for (const rawEvalcase of rawTestcases) {
415
416
  if (!isJsonObject(rawEvalcase)) {
416
- logWarning("Skipping invalid test case entry (expected object)");
417
+ logWarning("Skipping invalid eval case entry (expected object)");
417
418
  continue;
418
419
  }
419
420
  const evalcase = rawEvalcase;
420
421
  const id = asString(evalcase.id);
422
+ if (evalIdFilter && id !== evalIdFilter) {
423
+ continue;
424
+ }
421
425
  const conversationId = asString(evalcase.conversation_id);
422
426
  const outcome = asString(evalcase.outcome);
423
427
  const inputMessagesValue = evalcase.input_messages;
424
428
  const expectedMessagesValue = evalcase.expected_messages;
425
429
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
426
- logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
430
+ logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
427
431
  continue;
428
432
  }
429
433
  if (!Array.isArray(expectedMessagesValue)) {
430
- logWarning(`Test case '${id}' missing expected_messages array`);
434
+ logWarning(`Eval case '${id}' missing expected_messages array`);
431
435
  continue;
432
436
  }
433
437
  const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
434
438
  const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
435
- const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
436
- const userMessages = inputMessages.filter((message) => message.role === "user");
437
- const systemMessages = inputMessages.filter((message) => message.role === "system");
438
- if (assistantMessages.length === 0) {
439
- logWarning(`No assistant message found for test case: ${id}`);
439
+ if (expectedMessages.length === 0) {
440
+ logWarning(`No expected message found for eval case: ${id}`);
440
441
  continue;
441
442
  }
442
- if (assistantMessages.length > 1) {
443
- logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
444
- }
445
- if (systemMessages.length > 1) {
446
- logWarning(`Multiple system messages found for test case: ${id}, using first`);
447
- }
448
- let systemMessageContent;
449
- if (systemMessages.length > 0) {
450
- const content = systemMessages[0]?.content;
451
- if (typeof content === "string") {
452
- systemMessageContent = content;
453
- } else if (Array.isArray(content)) {
454
- const textParts = [];
455
- for (const segment of content) {
456
- if (isJsonObject(segment)) {
457
- const value = segment.value;
458
- if (typeof value === "string") {
459
- textParts.push(value);
460
- }
461
- }
462
- }
463
- if (textParts.length > 0) {
464
- systemMessageContent = textParts.join("\n\n");
465
- }
466
- }
443
+ if (expectedMessages.length > 1) {
444
+ logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
467
445
  }
468
446
  const guidelinePaths = [];
469
447
  const inputTextParts = [];
470
448
  const inputSegments = await processMessages({
471
- messages: userMessages,
449
+ messages: inputMessages,
472
450
  searchRoots,
473
451
  repoRootPath,
474
452
  guidelinePatterns,
@@ -478,7 +456,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
478
456
  verbose
479
457
  });
480
458
  const outputSegments = await processMessages({
481
- messages: assistantMessages,
459
+ messages: expectedMessages,
482
460
  searchRoots,
483
461
  repoRootPath,
484
462
  guidelinePatterns,
@@ -486,10 +464,10 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
486
464
  verbose
487
465
  });
488
466
  const codeSnippets = extractCodeBlocks(inputSegments);
489
- const assistantContent = assistantMessages[0]?.content;
490
- const referenceAnswer = await resolveAssistantContent(assistantContent, searchRoots, verbose);
467
+ const expectedContent = expectedMessages[0]?.content;
468
+ const referenceAnswer = await resolveAssistantContent(expectedContent, searchRoots, verbose);
491
469
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
492
- const testCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
470
+ const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
493
471
  const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
494
472
  const userFilePaths = [];
495
473
  for (const segment of inputSegments) {
@@ -508,19 +486,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
508
486
  question,
509
487
  input_segments: inputSegments,
510
488
  output_segments: outputSegments,
511
- system_message: systemMessageContent,
512
489
  reference_answer: referenceAnswer,
513
490
  guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path2.default.resolve(guidelinePath)),
514
491
  guideline_patterns: guidelinePatterns,
515
492
  file_paths: allFilePaths,
516
493
  code_snippets: codeSnippets,
517
494
  expected_outcome: outcome,
518
- evaluator: testCaseEvaluatorKind,
495
+ evaluator: evalCaseEvaluatorKind,
519
496
  evaluators
520
497
  };
521
498
  if (verbose) {
522
499
  console.log(`
523
- [Test Case: ${id}]`);
500
+ [Eval Case: ${id}]`);
524
501
  if (testCase.guideline_paths.length > 0) {
525
502
  console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
526
503
  for (const guidelinePath of testCase.guideline_paths) {
@@ -579,7 +556,7 @@ ${body}`);
579
556
  }
580
557
  const question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
581
558
  const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
582
- return { question, guidelines, systemMessage: testCase.system_message };
559
+ return { question, guidelines };
583
560
  }
584
561
  async function fileExists2(absolutePath) {
585
562
  try {
@@ -1338,7 +1315,6 @@ function pathToFileUri(filePath) {
1338
1315
  var execAsync2 = (0, import_node_util2.promisify)(import_node_child_process2.exec);
1339
1316
  var WORKSPACE_PREFIX = "agentv-codex-";
1340
1317
  var PROMPT_FILENAME = "prompt.md";
1341
- var FILES_DIR = "files";
1342
1318
  var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
1343
1319
  var CodexProvider = class {
1344
1320
  id;
@@ -1361,21 +1337,10 @@ var CodexProvider = class {
1361
1337
  }
1362
1338
  await this.ensureEnvironmentReady();
1363
1339
  const inputFiles = normalizeInputFiles2(request.inputFiles);
1364
- const originalGuidelines = new Set(
1365
- collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => import_node_path5.default.resolve(file))
1366
- );
1367
1340
  const workspaceRoot = await this.createWorkspace();
1368
1341
  const logger = await this.createStreamLogger(request).catch(() => void 0);
1369
1342
  try {
1370
- const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
1371
- inputFiles,
1372
- workspaceRoot,
1373
- originalGuidelines
1374
- );
1375
- const promptContent = buildPromptDocument(request, mirroredInputFiles, {
1376
- guidelinePatterns: request.guideline_patterns,
1377
- guidelineOverrides: guidelineMirrors
1378
- });
1343
+ const promptContent = buildPromptDocument(request, inputFiles);
1379
1344
  const promptFile = import_node_path5.default.join(workspaceRoot, PROMPT_FILENAME);
1380
1345
  await (0, import_promises3.writeFile)(promptFile, promptContent, "utf8");
1381
1346
  const args = this.buildCodexArgs();
@@ -1404,7 +1369,7 @@ var CodexProvider = class {
1404
1369
  executable: this.resolvedExecutable ?? this.config.executable,
1405
1370
  promptFile,
1406
1371
  workspace: workspaceRoot,
1407
- inputFiles: mirroredInputFiles,
1372
+ inputFiles,
1408
1373
  logFile: logger?.filePath
1409
1374
  }
1410
1375
  };
@@ -1459,37 +1424,6 @@ var CodexProvider = class {
1459
1424
  throw error;
1460
1425
  }
1461
1426
  }
1462
- async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
1463
- if (!inputFiles || inputFiles.length === 0) {
1464
- return {
1465
- mirroredInputFiles: void 0,
1466
- guidelineMirrors: /* @__PURE__ */ new Set()
1467
- };
1468
- }
1469
- const filesRoot = import_node_path5.default.join(workspaceRoot, FILES_DIR);
1470
- await (0, import_promises3.mkdir)(filesRoot, { recursive: true });
1471
- const mirrored = [];
1472
- const guidelineMirrors = /* @__PURE__ */ new Set();
1473
- const nameCounts = /* @__PURE__ */ new Map();
1474
- for (const inputFile of inputFiles) {
1475
- const absoluteSource = import_node_path5.default.resolve(inputFile);
1476
- const baseName = import_node_path5.default.basename(absoluteSource);
1477
- const count = nameCounts.get(baseName) ?? 0;
1478
- nameCounts.set(baseName, count + 1);
1479
- const finalName = count === 0 ? baseName : `${baseName}.${count}`;
1480
- const destination = import_node_path5.default.join(filesRoot, finalName);
1481
- await (0, import_promises3.copyFile)(absoluteSource, destination);
1482
- const resolvedDestination = import_node_path5.default.resolve(destination);
1483
- mirrored.push(resolvedDestination);
1484
- if (guidelineOriginals.has(absoluteSource)) {
1485
- guidelineMirrors.add(resolvedDestination);
1486
- }
1487
- }
1488
- return {
1489
- mirroredInputFiles: mirrored,
1490
- guidelineMirrors
1491
- };
1492
- }
1493
1427
  async createWorkspace() {
1494
1428
  return await (0, import_promises3.mkdtemp)(import_node_path5.default.join((0, import_node_os.tmpdir)(), WORKSPACE_PREFIX));
1495
1429
  }
@@ -2460,23 +2394,25 @@ function resolveOptionalString(source, env, description, options) {
2460
2394
  if (trimmed.length === 0) {
2461
2395
  return void 0;
2462
2396
  }
2463
- const envValue = env[trimmed];
2464
- if (envValue !== void 0) {
2465
- if (envValue.trim().length === 0) {
2466
- throw new Error(`Environment variable '${trimmed}' for ${description} is empty`);
2397
+ const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
2398
+ if (envVarMatch) {
2399
+ const varName = envVarMatch[1];
2400
+ const envValue = env[varName];
2401
+ if (envValue !== void 0) {
2402
+ if (envValue.trim().length === 0) {
2403
+ throw new Error(`Environment variable '${varName}' for ${description} is empty`);
2404
+ }
2405
+ return envValue;
2467
2406
  }
2468
- return envValue;
2469
- }
2470
- const allowLiteral = options?.allowLiteral ?? false;
2471
- const optionalEnv = options?.optionalEnv ?? false;
2472
- const looksLikeEnv = isLikelyEnvReference(trimmed);
2473
- if (looksLikeEnv) {
2407
+ const optionalEnv = options?.optionalEnv ?? false;
2474
2408
  if (optionalEnv) {
2475
2409
  return void 0;
2476
2410
  }
2477
- if (!allowLiteral) {
2478
- throw new Error(`Environment variable '${trimmed}' required for ${description} is not set`);
2479
- }
2411
+ throw new Error(`Environment variable '${varName}' required for ${description} is not set`);
2412
+ }
2413
+ const allowLiteral = options?.allowLiteral ?? false;
2414
+ if (!allowLiteral) {
2415
+ throw new Error(`${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`);
2480
2416
  }
2481
2417
  return trimmed;
2482
2418
  }
@@ -2523,9 +2459,6 @@ function resolveOptionalBoolean(source) {
2523
2459
  }
2524
2460
  throw new Error("expected boolean value");
2525
2461
  }
2526
- function isLikelyEnvReference(value) {
2527
- return /^[A-Z0-9_]+$/.test(value);
2528
- }
2529
2462
  function resolveOptionalStringArray(source, env, description) {
2530
2463
  if (source === void 0 || source === null) {
2531
2464
  return void 0;
@@ -2546,21 +2479,25 @@ function resolveOptionalStringArray(source, env, description) {
2546
2479
  if (trimmed.length === 0) {
2547
2480
  throw new Error(`${description}[${i}] cannot be empty`);
2548
2481
  }
2549
- const envValue = env[trimmed];
2550
- if (envValue !== void 0) {
2551
- if (envValue.trim().length === 0) {
2552
- throw new Error(`Environment variable '${trimmed}' for ${description}[${i}] is empty`);
2482
+ const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
2483
+ if (envVarMatch) {
2484
+ const varName = envVarMatch[1];
2485
+ const envValue = env[varName];
2486
+ if (envValue !== void 0) {
2487
+ if (envValue.trim().length === 0) {
2488
+ throw new Error(`Environment variable '${varName}' for ${description}[${i}] is empty`);
2489
+ }
2490
+ resolved.push(envValue);
2491
+ continue;
2553
2492
  }
2554
- resolved.push(envValue);
2555
- } else {
2556
- resolved.push(trimmed);
2493
+ throw new Error(`Environment variable '${varName}' for ${description}[${i}] is not set`);
2557
2494
  }
2495
+ resolved.push(trimmed);
2558
2496
  }
2559
2497
  return resolved.length > 0 ? resolved : void 0;
2560
2498
  }
2561
2499
 
2562
2500
  // src/evaluation/providers/vscode.ts
2563
- var import_promises4 = require("fs/promises");
2564
2501
  var import_node_path6 = __toESM(require("path"), 1);
2565
2502
  var import_subagent = require("subagent");
2566
2503
  var VSCodeProvider = class {
@@ -2604,7 +2541,7 @@ var VSCodeProvider = class {
2604
2541
  }
2605
2542
  };
2606
2543
  }
2607
- const responseText = await (0, import_promises4.readFile)(session.responseFile, "utf8");
2544
+ const responseText = await readTextFile(session.responseFile);
2608
2545
  return {
2609
2546
  text: responseText,
2610
2547
  raw: {
@@ -2658,7 +2595,7 @@ var VSCodeProvider = class {
2658
2595
  }
2659
2596
  const responses = [];
2660
2597
  for (const [index, responseFile] of session.responseFiles.entries()) {
2661
- const responseText = await (0, import_promises4.readFile)(responseFile, "utf8");
2598
+ const responseText = await readTextFile(responseFile);
2662
2599
  responses.push({
2663
2600
  text: responseText,
2664
2601
  raw: {
@@ -2808,12 +2745,20 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
2808
2745
 
2809
2746
  // src/evaluation/providers/targets-file.ts
2810
2747
  var import_node_fs4 = require("fs");
2811
- var import_promises5 = require("fs/promises");
2748
+ var import_promises4 = require("fs/promises");
2812
2749
  var import_node_path7 = __toESM(require("path"), 1);
2813
2750
  var import_yaml2 = require("yaml");
2814
2751
 
2815
2752
  // src/evaluation/providers/types.ts
2816
- var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
2753
+ var AGENT_PROVIDER_KINDS = [
2754
+ "codex",
2755
+ "vscode",
2756
+ "vscode-insiders"
2757
+ ];
2758
+ var TARGETS_SCHEMA_V2 = "agentv-targets-v2.1";
2759
+ function isAgentProvider(provider) {
2760
+ return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
2761
+ }
2817
2762
 
2818
2763
  // src/evaluation/providers/targets-file.ts
2819
2764
  function isRecord(value) {
@@ -2870,7 +2815,7 @@ function assertTargetDefinition(value, index, filePath) {
2870
2815
  }
2871
2816
  async function fileExists3(filePath) {
2872
2817
  try {
2873
- await (0, import_promises5.access)(filePath, import_node_fs4.constants.F_OK);
2818
+ await (0, import_promises4.access)(filePath, import_node_fs4.constants.F_OK);
2874
2819
  return true;
2875
2820
  } catch {
2876
2821
  return false;
@@ -2881,7 +2826,7 @@ async function readTargetDefinitions(filePath) {
2881
2826
  if (!await fileExists3(absolutePath)) {
2882
2827
  throw new Error(`targets.yaml not found at ${absolutePath}`);
2883
2828
  }
2884
- const raw = await (0, import_promises5.readFile)(absolutePath, "utf8");
2829
+ const raw = await (0, import_promises4.readFile)(absolutePath, "utf8");
2885
2830
  const parsed = (0, import_yaml2.parse)(raw);
2886
2831
  if (!isRecord(parsed)) {
2887
2832
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
@@ -3127,7 +3072,6 @@ var CodeEvaluator = class {
3127
3072
  expected_outcome: context.evalCase.expected_outcome,
3128
3073
  reference_answer: context.evalCase.reference_answer,
3129
3074
  candidate_answer: context.candidate,
3130
- system_message: context.promptInputs.systemMessage ?? "",
3131
3075
  guideline_paths: context.evalCase.guideline_paths,
3132
3076
  input_files: context.evalCase.file_paths,
3133
3077
  input_segments: context.evalCase.input_segments
@@ -3227,7 +3171,7 @@ function substituteVariables(template, variables) {
3227
3171
 
3228
3172
  // src/evaluation/orchestrator.ts
3229
3173
  var import_node_crypto3 = require("crypto");
3230
- var import_promises6 = require("fs/promises");
3174
+ var import_promises5 = require("fs/promises");
3231
3175
  var import_node_path8 = __toESM(require("path"), 1);
3232
3176
 
3233
3177
  // ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
@@ -3369,7 +3313,7 @@ function validateConcurrency(concurrency) {
3369
3313
  // src/evaluation/orchestrator.ts
3370
3314
  async function runEvaluation(options) {
3371
3315
  const {
3372
- testFilePath,
3316
+ testFilePath: evalFilePath,
3373
3317
  repoRoot,
3374
3318
  target,
3375
3319
  targets,
@@ -3388,11 +3332,11 @@ async function runEvaluation(options) {
3388
3332
  onProgress
3389
3333
  } = options;
3390
3334
  const load = loadEvalCases;
3391
- const evalCases = await load(testFilePath, repoRoot, { verbose });
3335
+ const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
3392
3336
  const filteredEvalCases = filterEvalCases(evalCases, evalId);
3393
3337
  if (filteredEvalCases.length === 0) {
3394
3338
  if (evalId) {
3395
- throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
3339
+ throw new Error(`Eval case with id '${evalId}' not found in ${evalFilePath}`);
3396
3340
  }
3397
3341
  return [];
3398
3342
  }
@@ -3546,7 +3490,8 @@ async function runEvaluation(options) {
3546
3490
  target.name,
3547
3491
  (now ?? (() => /* @__PURE__ */ new Date()))(),
3548
3492
  outcome.reason,
3549
- promptInputs
3493
+ promptInputs,
3494
+ primaryProvider
3550
3495
  );
3551
3496
  results.push(errorResult);
3552
3497
  if (onResult) {
@@ -3630,7 +3575,7 @@ async function runBatchEvaluation(options) {
3630
3575
  agentTimeoutMs
3631
3576
  });
3632
3577
  } catch (error) {
3633
- const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
3578
+ const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
3634
3579
  results.push(errorResult);
3635
3580
  if (onResult) {
3636
3581
  await onResult(errorResult);
@@ -3707,7 +3652,7 @@ async function runEvalCase(options) {
3707
3652
  attempt += 1;
3708
3653
  continue;
3709
3654
  }
3710
- return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
3655
+ return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
3711
3656
  }
3712
3657
  }
3713
3658
  if (!providerResponse) {
@@ -3716,7 +3661,8 @@ async function runEvalCase(options) {
3716
3661
  target.name,
3717
3662
  nowFn(),
3718
3663
  lastError ?? new Error("Provider did not return a response"),
3719
- promptInputs
3664
+ promptInputs,
3665
+ provider
3720
3666
  );
3721
3667
  }
3722
3668
  if (cacheKey && cache && !cachedResponse) {
@@ -3736,7 +3682,7 @@ async function runEvalCase(options) {
3736
3682
  agentTimeoutMs
3737
3683
  });
3738
3684
  } catch (error) {
3739
- return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
3685
+ return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
3740
3686
  }
3741
3687
  }
3742
3688
  async function evaluateCandidate(options) {
@@ -3768,9 +3714,8 @@ async function evaluateCandidate(options) {
3768
3714
  const completedAt = nowFn();
3769
3715
  const rawRequest = {
3770
3716
  question: promptInputs.question,
3771
- guidelines: promptInputs.guidelines,
3772
- guideline_paths: evalCase.guideline_paths,
3773
- system_message: promptInputs.systemMessage ?? ""
3717
+ ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
3718
+ guideline_paths: evalCase.guideline_paths
3774
3719
  };
3775
3720
  return {
3776
3721
  eval_id: evalCase.id,
@@ -3986,14 +3931,14 @@ async function dumpPrompt(directory, evalCase, promptInputs) {
3986
3931
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
3987
3932
  const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
3988
3933
  const filePath = import_node_path8.default.resolve(directory, filename);
3989
- await (0, import_promises6.mkdir)(import_node_path8.default.dirname(filePath), { recursive: true });
3934
+ await (0, import_promises5.mkdir)(import_node_path8.default.dirname(filePath), { recursive: true });
3990
3935
  const payload = {
3991
3936
  eval_id: evalCase.id,
3992
3937
  question: promptInputs.question,
3993
3938
  guidelines: promptInputs.guidelines,
3994
3939
  guideline_paths: evalCase.guideline_paths
3995
3940
  };
3996
- await (0, import_promises6.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
3941
+ await (0, import_promises5.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
3997
3942
  }
3998
3943
  function sanitizeFilename(value) {
3999
3944
  if (!value) {
@@ -4028,13 +3973,12 @@ async function invokeProvider(provider, options) {
4028
3973
  }
4029
3974
  }
4030
3975
  }
4031
- function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
3976
+ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider) {
4032
3977
  const message = error instanceof Error ? error.message : String(error);
4033
3978
  const rawRequest = {
4034
3979
  question: promptInputs.question,
4035
- guidelines: promptInputs.guidelines,
3980
+ ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
4036
3981
  guideline_paths: evalCase.guideline_paths,
4037
- system_message: promptInputs.systemMessage ?? "",
4038
3982
  error: message
4039
3983
  };
4040
3984
  return {