agentv 3.2.4 → 3.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
301
301
  }
302
302
  });
303
303
 
304
- // ../../packages/core/dist/chunk-5SQK5FXC.js
304
+ // ../../packages/core/dist/chunk-DVFNM65P.js
305
305
  import { constants } from "node:fs";
306
306
  import { access, readFile } from "node:fs/promises";
307
307
  import path from "node:path";
@@ -419,7 +419,7 @@ __export(external_exports2, {
419
419
  void: () => voidType
420
420
  });
421
421
 
422
- // ../../packages/core/dist/chunk-5SQK5FXC.js
422
+ // ../../packages/core/dist/chunk-DVFNM65P.js
423
423
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
424
424
  var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
425
425
  var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
@@ -17856,7 +17856,14 @@ async function invokeModel(options) {
17856
17856
  function mapResponse(result) {
17857
17857
  const content = result.text ?? "";
17858
17858
  const rawUsage = result.totalUsage ?? result.usage;
17859
- const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
17859
+ const reasoning = rawUsage?.outputTokenDetails?.reasoningTokens ?? void 0;
17860
+ const cached = rawUsage?.inputTokenDetails?.cacheReadTokens ?? void 0;
17861
+ const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? {
17862
+ input: rawUsage.inputTokens,
17863
+ output: rawUsage.outputTokens,
17864
+ ...reasoning != null ? { reasoning } : {},
17865
+ ...cached != null ? { cached } : {}
17866
+ } : void 0;
17860
17867
  return {
17861
17868
  raw: result,
17862
17869
  usage: toJsonObject(rawUsage),
@@ -18191,10 +18198,12 @@ var ClaudeCliProvider = class {
18191
18198
  if (usage) {
18192
18199
  const inputTokens = (usage.input_tokens ?? 0) + (usage.cache_read_input_tokens ?? 0) + (usage.cache_creation_input_tokens ?? 0);
18193
18200
  const outputTokens = usage.output_tokens ?? 0;
18201
+ const reasoningTokens = usage.reasoning_tokens ?? void 0;
18194
18202
  tokenUsage = {
18195
18203
  input: inputTokens,
18196
18204
  output: outputTokens,
18197
- cached: usage.cache_read_input_tokens ?? void 0
18205
+ cached: usage.cache_read_input_tokens ?? void 0,
18206
+ reasoning: reasoningTokens
18198
18207
  };
18199
18208
  request.streamCallbacks?.onLlmCallEnd?.(this.config.model ?? "claude", tokenUsage);
18200
18209
  }
@@ -29279,6 +29288,7 @@ async function runEvalCase(options) {
29279
29288
  } catch {
29280
29289
  }
29281
29290
  }
29291
+ const caseStartMs = Date.now();
29282
29292
  const attemptBudget = (maxRetries ?? 0) + 1;
29283
29293
  let attempt = 0;
29284
29294
  let providerResponse = cachedResponse;
@@ -29427,9 +29437,22 @@ async function runEvalCase(options) {
29427
29437
  fileChanges,
29428
29438
  workspacePath
29429
29439
  });
29440
+ const totalDurationMs = Date.now() - caseStartMs;
29441
+ const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
29442
+ const evalRunTokenUsage = tokenUsage || graderTokens ? {
29443
+ input: (tokenUsage?.input ?? 0) + (graderTokens?.input ?? 0),
29444
+ output: (tokenUsage?.output ?? 0) + (graderTokens?.output ?? 0),
29445
+ ...tokenUsage?.reasoning != null || graderTokens?.reasoning != null ? { reasoning: (tokenUsage?.reasoning ?? 0) + (graderTokens?.reasoning ?? 0) } : {},
29446
+ ...tokenUsage?.cached != null || graderTokens?.cached != null ? { cached: (tokenUsage?.cached ?? 0) + (graderTokens?.cached ?? 0) } : {}
29447
+ } : void 0;
29448
+ const evalRun = {
29449
+ durationMs: totalDurationMs,
29450
+ ...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
29451
+ };
29430
29452
  const executionStatus = providerError ? "execution_error" : classifyQualityStatus(result.score);
29431
29453
  const finalResult = providerError ? {
29432
29454
  ...result,
29455
+ evalRun,
29433
29456
  error: providerError,
29434
29457
  executionStatus,
29435
29458
  failureStage: "agent",
@@ -29438,7 +29461,7 @@ async function runEvalCase(options) {
29438
29461
  beforeAllOutput,
29439
29462
  beforeEachOutput,
29440
29463
  afterEachOutput
29441
- } : { ...result, executionStatus, beforeAllOutput, beforeEachOutput, afterEachOutput };
29464
+ } : { ...result, evalRun, executionStatus, beforeAllOutput, beforeEachOutput, afterEachOutput };
29442
29465
  const isFailure = !!finalResult.error || finalResult.score < 0.5;
29443
29466
  if (workspacePath && !isSharedWorkspace) {
29444
29467
  if (forceCleanup) {
@@ -29458,6 +29481,7 @@ async function runEvalCase(options) {
29458
29481
  }
29459
29482
  return finalResult;
29460
29483
  } catch (error) {
29484
+ const evalRun = { durationMs: Date.now() - caseStartMs };
29461
29485
  const errorResult = buildErrorResult(
29462
29486
  evalCase,
29463
29487
  target.name,
@@ -29473,10 +29497,10 @@ async function runEvalCase(options) {
29473
29497
  await cleanupWorkspace(workspacePath).catch(() => {
29474
29498
  });
29475
29499
  } else {
29476
- return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
29500
+ return { ...errorResult, evalRun, workspacePath, beforeEachOutput, afterEachOutput };
29477
29501
  }
29478
29502
  }
29479
- return { ...errorResult, beforeEachOutput, afterEachOutput };
29503
+ return { ...errorResult, evalRun, beforeEachOutput, afterEachOutput };
29480
29504
  }
29481
29505
  }
29482
29506
  async function runEvalCaseWithTrials(options, trialsConfig) {
@@ -30051,6 +30075,44 @@ function buildResultInput(promptInputs) {
30051
30075
  }
30052
30076
  return promptInputs.question;
30053
30077
  }
30078
+ function aggregateEvaluatorTokenUsage(scores) {
30079
+ if (!scores || scores.length === 0) return void 0;
30080
+ let hasAny = false;
30081
+ let input = 0;
30082
+ let output = 0;
30083
+ let reasoning = 0;
30084
+ let cached = 0;
30085
+ let hasReasoning = false;
30086
+ let hasCached = false;
30087
+ const visit = (items) => {
30088
+ for (const item of items) {
30089
+ if (item.tokenUsage) {
30090
+ hasAny = true;
30091
+ input += item.tokenUsage.input;
30092
+ output += item.tokenUsage.output;
30093
+ if (item.tokenUsage.reasoning != null) {
30094
+ hasReasoning = true;
30095
+ reasoning += item.tokenUsage.reasoning;
30096
+ }
30097
+ if (item.tokenUsage.cached != null) {
30098
+ hasCached = true;
30099
+ cached += item.tokenUsage.cached;
30100
+ }
30101
+ }
30102
+ if (item.scores) {
30103
+ visit(item.scores);
30104
+ }
30105
+ }
30106
+ };
30107
+ visit(scores);
30108
+ if (!hasAny) return void 0;
30109
+ return {
30110
+ input,
30111
+ output,
30112
+ ...hasReasoning ? { reasoning } : {},
30113
+ ...hasCached ? { cached } : {}
30114
+ };
30115
+ }
30054
30116
  function isTimeoutLike(error) {
30055
30117
  if (!error) {
30056
30118
  return false;
@@ -31086,4 +31148,4 @@ export {
31086
31148
  OtelStreamingObserver,
31087
31149
  createAgentKernel
31088
31150
  };
31089
- //# sourceMappingURL=chunk-VBGYESW7.js.map
31151
+ //# sourceMappingURL=chunk-6XTYVCMN.js.map