agentv 3.5.0 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
301
301
  }
302
302
  });
303
303
 
304
- // ../../packages/core/dist/chunk-EFR4JHPL.js
304
+ // ../../packages/core/dist/chunk-2IZOTQ25.js
305
305
  import { constants } from "node:fs";
306
306
  import { access, readFile } from "node:fs/promises";
307
307
  import path from "node:path";
@@ -419,7 +419,7 @@ __export(external_exports2, {
419
419
  void: () => voidType
420
420
  });
421
421
 
422
- // ../../packages/core/dist/chunk-EFR4JHPL.js
422
+ // ../../packages/core/dist/chunk-2IZOTQ25.js
423
423
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
424
424
  var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
425
425
  var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
@@ -14036,12 +14036,10 @@ function computeTraceSummary(messages) {
14036
14036
  }
14037
14037
  }
14038
14038
  }
14039
- const toolNames = Object.keys(toolCallCounts).sort();
14040
14039
  return {
14041
14040
  trace: {
14042
14041
  eventCount: totalToolCalls,
14043
- toolNames,
14044
- toolCallsByName: toolCallCounts,
14042
+ toolCalls: toolCallCounts,
14045
14043
  errorCount: 0,
14046
14044
  llmCallCount,
14047
14045
  ...hasAnyDuration ? { toolDurations } : {}
@@ -14065,7 +14063,7 @@ var DEFAULT_EXPLORATION_TOOLS = [
14065
14063
  function explorationRatio(summary, explorationTools = DEFAULT_EXPLORATION_TOOLS) {
14066
14064
  if (summary.eventCount === 0) return void 0;
14067
14065
  const explorationCalls = explorationTools.reduce(
14068
- (sum, tool2) => sum + (summary.toolCallsByName[tool2] ?? 0),
14066
+ (sum, tool2) => sum + (summary.toolCalls[tool2] ?? 0),
14069
14067
  0
14070
14068
  );
14071
14069
  return explorationCalls / summary.eventCount;
@@ -14655,14 +14653,8 @@ function logWarning(message) {
14655
14653
  console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET3}`);
14656
14654
  }
14657
14655
  var TEMPLATE_VARIABLES = {
14658
- /** @deprecated Use OUTPUT_TEXT instead */
14659
- ANSWER: "answer",
14660
14656
  EXPECTED_OUTPUT: "expected_output",
14661
- /** @deprecated Use INPUT_TEXT instead */
14662
- QUESTION: "question",
14663
14657
  CRITERIA: "criteria",
14664
- /** @deprecated Use EXPECTED_OUTPUT_TEXT instead */
14665
- REFERENCE_ANSWER: "reference_answer",
14666
14658
  INPUT: "input",
14667
14659
  OUTPUT: "output",
14668
14660
  FILE_CHANGES: "file_changes",
@@ -14672,9 +14664,8 @@ var TEMPLATE_VARIABLES = {
14672
14664
  };
14673
14665
  var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
14674
14666
  var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
14675
- TEMPLATE_VARIABLES.ANSWER,
14676
- TEMPLATE_VARIABLES.EXPECTED_OUTPUT,
14677
- TEMPLATE_VARIABLES.OUTPUT_TEXT
14667
+ TEMPLATE_VARIABLES.OUTPUT_TEXT,
14668
+ TEMPLATE_VARIABLES.EXPECTED_OUTPUT
14678
14669
  ]);
14679
14670
  var ANSI_YELLOW3 = "\x1B[33m";
14680
14671
  var ANSI_RESET4 = "\x1B[0m";
@@ -14695,13 +14686,13 @@ function validateTemplateVariables(content, source) {
14695
14686
  }
14696
14687
  match = variablePattern.exec(content);
14697
14688
  }
14698
- const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.ANSWER) || foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
14689
+ const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
14699
14690
  const hasExpectedOutput = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT);
14700
14691
  const hasRequiredFields = hasCandidateAnswer || hasExpectedOutput;
14701
14692
  if (!hasRequiredFields) {
14702
14693
  throw new Error(
14703
14694
  `Missing required fields. Must include at least one of:
14704
- - {{ ${TEMPLATE_VARIABLES.ANSWER} }} or {{ ${TEMPLATE_VARIABLES.OUTPUT_TEXT} }}
14695
+ - {{ ${TEMPLATE_VARIABLES.OUTPUT_TEXT} }}
14705
14696
  - {{ ${TEMPLATE_VARIABLES.EXPECTED_OUTPUT} }}`
14706
14697
  );
14707
14698
  }
@@ -17843,6 +17834,8 @@ async function invokeModel(options) {
17843
17834
  const { model, request, defaults, retryConfig, providerOptions } = options;
17844
17835
  const chatPrompt = buildChatPrompt(request);
17845
17836
  const { temperature, maxOutputTokens } = resolveModelSettings(request, defaults);
17837
+ const startTime = (/* @__PURE__ */ new Date()).toISOString();
17838
+ const startMs = Date.now();
17846
17839
  const result = await withRetry(
17847
17840
  () => generateText({
17848
17841
  model,
@@ -17856,9 +17849,11 @@ async function invokeModel(options) {
17856
17849
  retryConfig,
17857
17850
  request.signal
17858
17851
  );
17859
- return mapResponse(result);
17852
+ const endTime = (/* @__PURE__ */ new Date()).toISOString();
17853
+ const durationMs = Date.now() - startMs;
17854
+ return mapResponse(result, { durationMs, startTime, endTime });
17860
17855
  }
17861
- function mapResponse(result) {
17856
+ function mapResponse(result, timing) {
17862
17857
  const content = result.text ?? "";
17863
17858
  const rawUsage = result.totalUsage ?? result.usage;
17864
17859
  const reasoning = rawUsage?.outputTokenDetails?.reasoningTokens ?? void 0;
@@ -17873,7 +17868,10 @@ function mapResponse(result) {
17873
17868
  raw: result,
17874
17869
  usage: toJsonObject(rawUsage),
17875
17870
  output: [{ role: "assistant", content }],
17876
- tokenUsage
17871
+ tokenUsage,
17872
+ durationMs: timing?.durationMs,
17873
+ startTime: timing?.startTime,
17874
+ endTime: timing?.endTime
17877
17875
  };
17878
17876
  }
17879
17877
  function toJsonObject(value) {
@@ -18731,10 +18729,12 @@ var ClaudeSdkProvider = class {
18731
18729
  if (usage) {
18732
18730
  const inputTokens = (usage.input_tokens ?? 0) + (usage.cache_read_input_tokens ?? 0) + (usage.cache_creation_input_tokens ?? 0);
18733
18731
  const outputTokens = usage.output_tokens ?? 0;
18732
+ const reasoningTokens = usage.reasoning_tokens ?? void 0;
18734
18733
  tokenUsage = {
18735
18734
  input: inputTokens,
18736
18735
  output: outputTokens,
18737
- cached: usage.cache_read_input_tokens ?? void 0
18736
+ cached: usage.cache_read_input_tokens ?? void 0,
18737
+ reasoning: reasoningTokens
18738
18738
  };
18739
18739
  request.streamCallbacks?.onLlmCallEnd?.(this.config.model ?? "claude", tokenUsage);
18740
18740
  }
@@ -19730,7 +19730,8 @@ ${basePrompt}` : basePrompt;
19730
19730
  onUsage({
19731
19731
  input: usage.input_tokens ?? 0,
19732
19732
  output: usage.output_tokens ?? 0,
19733
- cached: usage.cached_input_tokens ?? void 0
19733
+ cached: usage.cached_input_tokens ?? void 0,
19734
+ reasoning: usage.reasoning_tokens ?? void 0
19734
19735
  });
19735
19736
  }
19736
19737
  }
@@ -21698,10 +21699,12 @@ function extractTokenUsage(events) {
21698
21699
  output: output ?? 0
21699
21700
  };
21700
21701
  const cached = toFiniteNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
21701
- if (cached !== void 0) {
21702
- return { ...result, cached };
21703
- }
21704
- return result;
21702
+ const reasoning = toFiniteNumber(u.reasoning_tokens ?? u.reasoningTokens ?? u.reasoning);
21703
+ return {
21704
+ ...result,
21705
+ ...cached !== void 0 ? { cached } : {},
21706
+ ...reasoning !== void 0 ? { reasoning } : {}
21707
+ };
21705
21708
  }
21706
21709
  }
21707
21710
  const messages = record.messages;
@@ -23927,11 +23930,9 @@ var CodeEvaluator = class {
23927
23930
  }
23928
23931
  }
23929
23932
  const payload = {
23930
- question: context2.evalCase.question,
23931
23933
  criteria: context2.evalCase.criteria,
23932
23934
  expectedOutput: context2.evalCase.expected_output,
23933
- referenceAnswer: context2.evalCase.reference_answer,
23934
- answer: context2.candidate,
23935
+ outputText: context2.candidate,
23935
23936
  output: outputForPayload,
23936
23937
  outputPath,
23937
23938
  guidelineFiles: context2.evalCase.guideline_paths,
@@ -23948,9 +23949,7 @@ var CodeEvaluator = class {
23948
23949
  fileChanges: context2.fileChanges ?? null,
23949
23950
  workspacePath: context2.workspacePath ?? null,
23950
23951
  config: this.config ?? null,
23951
- // Text convenience accessors (new names, always strings)
23952
23952
  inputText: context2.evalCase.question,
23953
- outputText: context2.candidate,
23954
23953
  expectedOutputText: context2.evalCase.reference_answer ?? ""
23955
23954
  };
23956
23955
  const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
@@ -24109,13 +24108,13 @@ Be concise and focused in your evaluation. Provide succinct, specific feedback r
24109
24108
  {{${TEMPLATE_VARIABLES.CRITERIA}}}
24110
24109
 
24111
24110
  [[ ## question ## ]]
24112
- {{${TEMPLATE_VARIABLES.QUESTION}}}
24111
+ {{${TEMPLATE_VARIABLES.INPUT_TEXT}}}
24113
24112
 
24114
24113
  [[ ## reference_answer ## ]]
24115
- {{${TEMPLATE_VARIABLES.REFERENCE_ANSWER}}}
24114
+ {{${TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT}}}
24116
24115
 
24117
24116
  [[ ## answer ## ]]
24118
- {{${TEMPLATE_VARIABLES.ANSWER}}}`;
24117
+ {{${TEMPLATE_VARIABLES.OUTPUT_TEXT}}}`;
24119
24118
  var freeformEvaluationSchema = external_exports2.object({
24120
24119
  score: external_exports2.number().min(0).max(1).describe("Score between 0.0 and 1.0"),
24121
24120
  assertions: external_exports2.array(
@@ -24193,12 +24192,8 @@ var LlmGraderEvaluator = class {
24193
24192
  2
24194
24193
  ),
24195
24194
  [TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify(context2.output ?? [], null, 2),
24196
- [TEMPLATE_VARIABLES.ANSWER]: context2.candidate.trim(),
24197
- [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
24198
24195
  [TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
24199
- [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
24200
24196
  [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
24201
- // Text convenience accessors (new names, always strings)
24202
24197
  [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
24203
24198
  [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
24204
24199
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim()
@@ -24503,10 +24498,10 @@ ${context2.fileChanges}`;
24503
24498
  buildAgentUserPrompt(context2) {
24504
24499
  const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
24505
24500
  const variables = {
24506
- [TEMPLATE_VARIABLES.ANSWER]: context2.candidate.trim(),
24507
- [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
24508
24501
  [TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
24509
- [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
24502
+ [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
24503
+ [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
24504
+ [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim(),
24510
24505
  [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? ""
24511
24506
  };
24512
24507
  if (this.evaluatorTemplate) {
@@ -24559,10 +24554,10 @@ ${context2.fileChanges}`;
24559
24554
  const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
24560
24555
  if (this.evaluatorTemplate) {
24561
24556
  const variables = {
24562
- [TEMPLATE_VARIABLES.ANSWER]: context2.candidate.trim(),
24563
- [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
24564
24557
  [TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
24565
- [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
24558
+ [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
24559
+ [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
24560
+ [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim(),
24566
24561
  [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? ""
24567
24562
  };
24568
24563
  const customPrompt = substituteVariables(this.evaluatorTemplate, variables);
@@ -25978,7 +25973,10 @@ var COPILOT_MATCHER = {
25978
25973
  skillTools: ["Skill", "skill"],
25979
25974
  skillInputField: "skill",
25980
25975
  readTools: ["Read File", "readFile", "Read", "readTextFile"],
25981
- readInputField: "file_path"
25976
+ readInputField: "file_path",
25977
+ skillToolPrefixes: ["Using skill: "],
25978
+ readToolPrefixes: ["Viewing "],
25979
+ readInputFields: ["file_path", "path"]
25982
25980
  };
25983
25981
  var PROVIDER_TOOL_SEMANTICS = {
25984
25982
  claude: CLAUDE_MATCHER,
@@ -26020,12 +26018,22 @@ var SkillTriggerEvaluator = class {
26020
26018
  triggered = true;
26021
26019
  evidence = `Skill tool invoked with ${matcher.skillInputField}="${skillArg}"`;
26022
26020
  }
26021
+ } else if (matcher.skillToolPrefixes?.some(
26022
+ (prefix) => firstTool.tool.startsWith(prefix) && firstTool.tool.includes(skillName)
26023
+ )) {
26024
+ triggered = true;
26025
+ evidence = `Skill tool invoked via tool name "${firstTool.tool}"`;
26023
26026
  } else if (matcher.readTools.includes(firstTool.tool)) {
26024
- const filePath = String(input[matcher.readInputField] ?? "");
26027
+ const filePath = this.readPathFromInput(input, matcher);
26025
26028
  if (filePath.includes(skillName)) {
26026
26029
  triggered = true;
26027
26030
  evidence = `Read tool loaded skill file: ${filePath}`;
26028
26031
  }
26032
+ } else if (matcher.readToolPrefixes?.some(
26033
+ (prefix) => firstTool.tool.startsWith(prefix) && firstTool.tool.includes(skillName)
26034
+ )) {
26035
+ triggered = true;
26036
+ evidence = `Read tool loaded skill file via tool name "${firstTool.tool}"`;
26029
26037
  }
26030
26038
  }
26031
26039
  const pass = triggered === shouldTrigger;
@@ -26054,6 +26062,16 @@ var SkillTriggerEvaluator = class {
26054
26062
  expectedAspectCount: 1
26055
26063
  };
26056
26064
  }
26065
+ readPathFromInput(input, matcher) {
26066
+ const fields = matcher.readInputFields ?? [matcher.readInputField];
26067
+ for (const field of fields) {
26068
+ const value = input[field];
26069
+ if (value !== void 0 && value !== null) {
26070
+ return String(value);
26071
+ }
26072
+ }
26073
+ return "";
26074
+ }
26057
26075
  };
26058
26076
  function assembleLlmGraderPrompt(input) {
26059
26077
  const {
@@ -26086,12 +26104,8 @@ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evalua
26086
26104
  [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(evalCase.input_segments, null, 2),
26087
26105
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: JSON.stringify(evalCase.expected_output, null, 2),
26088
26106
  [TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify([], null, 2),
26089
- [TEMPLATE_VARIABLES.ANSWER]: candidate.trim(),
26090
- [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (evalCase.reference_answer ?? "").trim(),
26091
26107
  [TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
26092
- [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
26093
26108
  [TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
26094
- // Text convenience accessors (new names, always strings)
26095
26109
  [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
26096
26110
  [TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
26097
26111
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (evalCase.reference_answer ?? "").trim()
@@ -26419,11 +26433,9 @@ var ToolTrajectoryEvaluator = class {
26419
26433
  for (const call of toolCalls) {
26420
26434
  toolCallsByName[call.name] = (toolCallsByName[call.name] ?? 0) + 1;
26421
26435
  }
26422
- const toolNames = Object.keys(toolCallsByName).sort();
26423
26436
  return {
26424
26437
  eventCount: toolCalls.length,
26425
- toolNames,
26426
- toolCallsByName,
26438
+ toolCalls: toolCallsByName,
26427
26439
  errorCount: 0
26428
26440
  };
26429
26441
  }
@@ -26441,7 +26453,7 @@ var ToolTrajectoryEvaluator = class {
26441
26453
  const assertions = [];
26442
26454
  for (const toolName of toolNames) {
26443
26455
  const required = minimums[toolName];
26444
- const actual = summary.toolCallsByName[toolName] ?? 0;
26456
+ const actual = summary.toolCalls[toolName] ?? 0;
26445
26457
  if (actual >= required) {
26446
26458
  assertions.push({
26447
26459
  text: `${toolName}: called ${actual} times (required >=${required})`,
@@ -27125,11 +27137,9 @@ async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
27125
27137
  }
27126
27138
  async function executePromptTemplate(script, context2, config, timeoutMs) {
27127
27139
  const payload = {
27128
- question: context2.evalCase.question,
27129
27140
  criteria: context2.evalCase.criteria,
27130
27141
  expectedOutput: context2.evalCase.expected_output,
27131
- referenceAnswer: context2.evalCase.reference_answer,
27132
- answer: context2.candidate,
27142
+ outputText: context2.candidate,
27133
27143
  output: context2.output ?? null,
27134
27144
  guidelineFiles: context2.evalCase.guideline_paths,
27135
27145
  inputFiles: context2.evalCase.file_paths.filter(
@@ -27140,9 +27150,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
27140
27150
  fileChanges: context2.fileChanges ?? null,
27141
27151
  workspacePath: context2.workspacePath ?? null,
27142
27152
  config: config ?? context2.config ?? null,
27143
- // Text convenience accessors (new names, always strings)
27144
27153
  inputText: context2.evalCase.question,
27145
- outputText: context2.candidate,
27146
27154
  expectedOutputText: context2.evalCase.reference_answer ?? ""
27147
27155
  };
27148
27156
  const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
@@ -28762,7 +28770,7 @@ async function runEvaluation(options) {
28762
28770
  dataset: evalCase.dataset,
28763
28771
  score: 0,
28764
28772
  assertions: [],
28765
- answer: "",
28773
+ outputText: "",
28766
28774
  target: target.name,
28767
28775
  error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
28768
28776
  budgetExceeded: true,
@@ -28798,7 +28806,7 @@ async function runEvaluation(options) {
28798
28806
  dataset: evalCase.dataset,
28799
28807
  score: 0,
28800
28808
  assertions: [],
28801
- answer: "",
28809
+ outputText: "",
28802
28810
  target: target.name,
28803
28811
  error: errorMsg,
28804
28812
  executionStatus: "execution_error",
@@ -29063,7 +29071,7 @@ async function runBatchEvaluation(options) {
29063
29071
  const providerResponse = batchResponse[i];
29064
29072
  const output = providerResponse.output;
29065
29073
  const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
29066
- const computed = output ? computeTraceSummary(output) : hasExecutionMetrics ? { trace: { eventCount: 0, toolNames: [], toolCallsByName: {}, errorCount: 0 } } : void 0;
29074
+ const computed = output ? computeTraceSummary(output) : hasExecutionMetrics ? { trace: { eventCount: 0, toolCalls: {}, errorCount: 0 } } : void 0;
29067
29075
  const merged = computed ? mergeExecutionMetrics(computed, {
29068
29076
  tokenUsage: providerResponse.tokenUsage,
29069
29077
  costUsd: providerResponse.costUsd,
@@ -29460,7 +29468,7 @@ async function runEvalCase(options) {
29460
29468
  }
29461
29469
  const output = providerResponse.output;
29462
29470
  const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
29463
- const computed = output ? computeTraceSummary(output) : hasExecutionMetrics ? { trace: { eventCount: 0, toolNames: [], toolCallsByName: {}, errorCount: 0 } } : void 0;
29471
+ const computed = output ? computeTraceSummary(output) : hasExecutionMetrics ? { trace: { eventCount: 0, toolCalls: {}, errorCount: 0 } } : void 0;
29464
29472
  const merged = computed ? mergeExecutionMetrics(computed, {
29465
29473
  tokenUsage: providerResponse.tokenUsage,
29466
29474
  costUsd: providerResponse.costUsd,
@@ -29765,7 +29773,7 @@ async function evaluateCandidate(options) {
29765
29773
  conversationId: evalCase.conversation_id,
29766
29774
  score: score.score,
29767
29775
  assertions: score.assertions,
29768
- answer: candidate,
29776
+ outputText: candidate,
29769
29777
  target: target.name,
29770
29778
  tokenUsage,
29771
29779
  costUsd,
@@ -30121,7 +30129,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
30121
30129
  conversationId: evalCase.conversation_id,
30122
30130
  score: 0,
30123
30131
  assertions: [{ text: `Error: ${message}`, passed: false }],
30124
- answer: `Error occurred: ${message}`,
30132
+ outputText: `Error occurred: ${message}`,
30125
30133
  target: targetName,
30126
30134
  requests,
30127
30135
  input,
@@ -30638,7 +30646,7 @@ function shouldSkipCacheForTemperature(targetConfig) {
30638
30646
  return false;
30639
30647
  }
30640
30648
  var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
30641
- "answer",
30649
+ "outputText",
30642
30650
  "requests",
30643
30651
  "trace",
30644
30652
  "workspacePath",
@@ -30810,14 +30818,17 @@ var OtelTraceExporter = class {
30810
30818
  rootSpan.setAttribute("agentv.target", result.target);
30811
30819
  if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
30812
30820
  rootSpan.setAttribute("agentv.score", result.score);
30813
- if (captureContent) rootSpan.setAttribute("agentv.answer", result.answer);
30821
+ if (captureContent) rootSpan.setAttribute("agentv.output_text", result.outputText);
30814
30822
  if (result.durationMs != null)
30815
30823
  rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
30816
30824
  if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
30817
30825
  if (result.trace) {
30818
30826
  const t = result.trace;
30819
30827
  rootSpan.setAttribute("agentv.trace.event_count", t.eventCount);
30820
- rootSpan.setAttribute("agentv.trace.tool_names", t.toolNames.join(","));
30828
+ rootSpan.setAttribute(
30829
+ "agentv.trace.tool_names",
30830
+ Object.keys(t.toolCalls).sort().join(",")
30831
+ );
30821
30832
  if (t.llmCallCount != null)
30822
30833
  rootSpan.setAttribute("agentv.trace.llm_call_count", t.llmCallCount);
30823
30834
  }
@@ -31237,4 +31248,4 @@ export {
31237
31248
  OtelStreamingObserver,
31238
31249
  createAgentKernel
31239
31250
  };
31240
- //# sourceMappingURL=chunk-D6G4N2H2.js.map
31251
+ //# sourceMappingURL=chunk-XGG64VIY.js.map