@agentv/core 4.19.0 → 4.20.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1448,6 +1448,7 @@ var init_template_variables = __esm({
1448
1448
  INPUT: "input",
1449
1449
  OUTPUT: "output",
1450
1450
  FILE_CHANGES: "file_changes",
1451
+ TOOL_CALLS: "tool_calls",
1451
1452
  /** @deprecated Use INPUT instead — resolves to the same text value. */
1452
1453
  INPUT_TEXT: "input_text",
1453
1454
  /** @deprecated Use OUTPUT instead — resolves to the same text value. */
@@ -5832,6 +5833,7 @@ Be concise and focused in your evaluation. Provide succinct, specific feedback r
5832
5833
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context2.evalCase.reference_answer ?? "").trim(),
5833
5834
  [TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
5834
5835
  [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
5836
+ [TEMPLATE_VARIABLES.TOOL_CALLS]: context2.toolCalls ?? "",
5835
5837
  // Deprecated aliases — same values as the primary variables above
5836
5838
  [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
5837
5839
  [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
@@ -5846,6 +5848,12 @@ Be concise and focused in your evaluation. Provide succinct, specific feedback r
5846
5848
 
5847
5849
  [[ ## file_changes ## ]]
5848
5850
  ${context2.fileChanges}`;
5851
+ }
5852
+ if (context2.toolCalls && !context2.graderTemplateOverride && !this.graderTemplate) {
5853
+ userPrompt += `
5854
+
5855
+ [[ ## tool_calls ## ]]
5856
+ ${context2.toolCalls}`;
5849
5857
  }
5850
5858
  const graderRawRequest = {
5851
5859
  userPrompt,
@@ -6167,6 +6175,7 @@ ${context2.fileChanges}`;
6167
6175
  [TEMPLATE_VARIABLES.OUTPUT]: context2.candidate.trim(),
6168
6176
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context2.evalCase.reference_answer ?? "").trim(),
6169
6177
  [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
6178
+ [TEMPLATE_VARIABLES.TOOL_CALLS]: context2.toolCalls ?? "",
6170
6179
  // Deprecated aliases
6171
6180
  [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
6172
6181
  [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
@@ -6195,6 +6204,9 @@ ${context2.fileChanges}`;
6195
6204
  if (context2.fileChanges) {
6196
6205
  parts.push("[[ ## file_changes ## ]]", context2.fileChanges, "");
6197
6206
  }
6207
+ if (context2.toolCalls) {
6208
+ parts.push("[[ ## tool_calls ## ]]", context2.toolCalls, "");
6209
+ }
6198
6210
  if (rubrics && rubrics.length > 0) {
6199
6211
  parts.push("[[ ## rubrics ## ]]");
6200
6212
  for (const rubric of rubrics) {
@@ -6228,6 +6240,7 @@ ${context2.fileChanges}`;
6228
6240
  [TEMPLATE_VARIABLES.OUTPUT]: context2.candidate.trim(),
6229
6241
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context2.evalCase.reference_answer ?? "").trim(),
6230
6242
  [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
6243
+ [TEMPLATE_VARIABLES.TOOL_CALLS]: context2.toolCalls ?? "",
6231
6244
  // Deprecated aliases
6232
6245
  [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
6233
6246
  [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
@@ -6257,6 +6270,9 @@ ${outputSchema}`;
6257
6270
  if (context2.fileChanges) {
6258
6271
  parts.push("[[ ## file_changes ## ]]", context2.fileChanges, "");
6259
6272
  }
6273
+ if (context2.toolCalls) {
6274
+ parts.push("[[ ## tool_calls ## ]]", context2.toolCalls, "");
6275
+ }
6260
6276
  if (rubrics && rubrics.length > 0) {
6261
6277
  parts.push("[[ ## rubrics ## ]]");
6262
6278
  for (const rubric of rubrics) {
@@ -6349,6 +6365,9 @@ ${outputSchema}`;
6349
6365
  if (context2.fileChanges) {
6350
6366
  parts.push("[[ ## file_changes ## ]]", context2.fileChanges, "");
6351
6367
  }
6368
+ if (context2.toolCalls) {
6369
+ parts.push("[[ ## tool_calls ## ]]", context2.toolCalls, "");
6370
+ }
6352
6371
  parts.push("[[ ## scoring_criteria ## ]]");
6353
6372
  for (const rubric of rubrics) {
6354
6373
  const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
@@ -6391,6 +6410,9 @@ ${outputSchema}`;
6391
6410
  if (context2.fileChanges) {
6392
6411
  parts.push("[[ ## file_changes ## ]]", context2.fileChanges, "");
6393
6412
  }
6413
+ if (context2.toolCalls) {
6414
+ parts.push("[[ ## tool_calls ## ]]", context2.toolCalls, "");
6415
+ }
6394
6416
  parts.push("[[ ## rubrics ## ]]");
6395
6417
  for (const rubric of rubrics) {
6396
6418
  const requiredLabel = rubric.required ? " (REQUIRED)" : "";
@@ -7518,6 +7540,61 @@ var init_latency = __esm({
7518
7540
  }
7519
7541
  });
7520
7542
 
7543
+ // src/evaluation/graders/format-tool-calls.ts
7544
+ function formatToolCalls(output) {
7545
+ if (!output) return "";
7546
+ const lines = [];
7547
+ for (const message of output) {
7548
+ if (!message.toolCalls) continue;
7549
+ for (const call of message.toolCalls) {
7550
+ const toolName = call.tool ?? "unknown";
7551
+ const detail = extractKeyDetail(toolName, call.input);
7552
+ lines.push(detail ? `- ${toolName}: ${detail}` : `- ${toolName}`);
7553
+ }
7554
+ }
7555
+ return lines.length > 0 ? lines.join("\n") : "";
7556
+ }
7557
+ function extractKeyDetail(toolName, input) {
7558
+ if (!input || typeof input !== "object") return "";
7559
+ const record = input;
7560
+ const knownFields = KEY_INPUT_FIELDS.get(toolName);
7561
+ if (knownFields) {
7562
+ for (const field of knownFields) {
7563
+ const value = record[field];
7564
+ if (typeof value === "string" && value.length > 0) {
7565
+ return truncate(value);
7566
+ }
7567
+ }
7568
+ }
7569
+ for (const value of Object.values(record)) {
7570
+ if (typeof value === "string" && value.length > 0 && value.length <= MAX_FALLBACK_LENGTH) {
7571
+ return truncate(value);
7572
+ }
7573
+ }
7574
+ return "";
7575
+ }
7576
+ function truncate(value, maxLen = 120) {
7577
+ if (value.length <= maxLen) return value;
7578
+ return `${value.slice(0, maxLen)}\u2026`;
7579
+ }
7580
+ var KEY_INPUT_FIELDS, MAX_FALLBACK_LENGTH;
7581
+ var init_format_tool_calls = __esm({
7582
+ "src/evaluation/graders/format-tool-calls.ts"() {
7583
+ "use strict";
7584
+ init_cjs_shims();
7585
+ KEY_INPUT_FIELDS = /* @__PURE__ */ new Map([
7586
+ ["Skill", ["skill"]],
7587
+ ["Read", ["file_path"]],
7588
+ ["Write", ["file_path"]],
7589
+ ["Edit", ["file_path"]],
7590
+ ["Bash", ["command"]],
7591
+ ["Grep", ["pattern"]],
7592
+ ["Glob", ["pattern"]]
7593
+ ]);
7594
+ MAX_FALLBACK_LENGTH = 120;
7595
+ }
7596
+ });
7597
+
7521
7598
  // src/evaluation/graders/skill-trigger.ts
7522
7599
  var SkillTriggerGrader;
7523
7600
  var init_skill_trigger = __esm({
@@ -7601,19 +7678,27 @@ function assembleLlmGraderPrompt(input) {
7601
7678
  promptInputs,
7602
7679
  evaluatorConfig,
7603
7680
  fileChanges,
7681
+ toolCalls,
7604
7682
  graderTemplateOverride
7605
7683
  } = input;
7606
7684
  const rubrics = evaluatorConfig?.rubrics;
7607
7685
  if (rubrics && rubrics.length > 0) {
7608
7686
  const hasScoreRanges = rubrics.some((r) => r.score_ranges && r.score_ranges.length > 0);
7609
7687
  if (hasScoreRanges) {
7610
- return assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChanges);
7688
+ return assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls);
7611
7689
  }
7612
- return assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChanges);
7690
+ return assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls);
7613
7691
  }
7614
- return assembleFreeform(evalCase, candidate, promptInputs, fileChanges, graderTemplateOverride);
7692
+ return assembleFreeform(
7693
+ evalCase,
7694
+ candidate,
7695
+ promptInputs,
7696
+ fileChanges,
7697
+ toolCalls,
7698
+ graderTemplateOverride
7699
+ );
7615
7700
  }
7616
- function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, graderTemplateOverride) {
7701
+ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, toolCalls, graderTemplateOverride) {
7617
7702
  const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
7618
7703
  const variables = {
7619
7704
  [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
@@ -7621,6 +7706,7 @@ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, grader
7621
7706
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (evalCase.reference_answer ?? "").trim(),
7622
7707
  [TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
7623
7708
  [TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
7709
+ [TEMPLATE_VARIABLES.TOOL_CALLS]: toolCalls ?? "",
7624
7710
  // Deprecated aliases
7625
7711
  [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
7626
7712
  [TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
@@ -7634,6 +7720,12 @@ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, grader
7634
7720
 
7635
7721
  [[ ## file_changes ## ]]
7636
7722
  ${fileChanges}`;
7723
+ }
7724
+ if (toolCalls && !graderTemplateOverride) {
7725
+ userPrompt += `
7726
+
7727
+ [[ ## tool_calls ## ]]
7728
+ ${toolCalls}`;
7637
7729
  }
7638
7730
  return {
7639
7731
  systemPrompt,
@@ -7642,7 +7734,7 @@ ${fileChanges}`;
7642
7734
  mode: "freeform"
7643
7735
  };
7644
7736
  }
7645
- function assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChanges) {
7737
+ function assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls) {
7646
7738
  const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
7647
7739
  const parts = [
7648
7740
  "You are an expert grader. Evaluate the candidate answer against each rubric item below.",
@@ -7661,6 +7753,9 @@ function assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChang
7661
7753
  if (fileChanges) {
7662
7754
  parts.push("[[ ## file_changes ## ]]", fileChanges, "");
7663
7755
  }
7756
+ if (toolCalls) {
7757
+ parts.push("[[ ## tool_calls ## ]]", toolCalls, "");
7758
+ }
7664
7759
  parts.push("[[ ## rubrics ## ]]");
7665
7760
  for (const rubric of rubrics) {
7666
7761
  const requiredLabel = rubric.required ? " (REQUIRED)" : "";
@@ -7677,7 +7772,7 @@ function assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChang
7677
7772
  mode: "checklist"
7678
7773
  };
7679
7774
  }
7680
- function assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChanges) {
7775
+ function assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls) {
7681
7776
  const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
7682
7777
  const parts = [
7683
7778
  "You are an expert grader. Score the candidate answer on each criterion below using the provided score ranges.",
@@ -7697,6 +7792,9 @@ function assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChan
7697
7792
  if (fileChanges) {
7698
7793
  parts.push("[[ ## file_changes ## ]]", fileChanges, "");
7699
7794
  }
7795
+ if (toolCalls) {
7796
+ parts.push("[[ ## tool_calls ## ]]", toolCalls, "");
7797
+ }
7700
7798
  parts.push("[[ ## scoring_criteria ## ]]");
7701
7799
  for (const rubric of rubrics) {
7702
7800
  const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
@@ -8470,6 +8568,7 @@ var init_graders = __esm({
8470
8568
  init_field_accuracy();
8471
8569
  init_latency();
8472
8570
  init_llm_grader();
8571
+ init_format_tool_calls();
8473
8572
  init_skill_trigger();
8474
8573
  init_llm_grader_prompt();
8475
8574
  init_token_usage();
@@ -19200,6 +19299,7 @@ async function runEvaluation(options) {
19200
19299
  trials,
19201
19300
  streamCallbacks,
19202
19301
  budgetUsd,
19302
+ runBudgetTracker,
19203
19303
  failOnError,
19204
19304
  poolWorkspaces,
19205
19305
  poolMaxSlots: configPoolMaxSlots,
@@ -19534,8 +19634,14 @@ async function runEvaluation(options) {
19534
19634
  }
19535
19635
  }
19536
19636
  return { ok: allPassed, depResults };
19637
+ }, extractEvaluationCostUsd2 = function(result) {
19638
+ if (result.trials && result.trials.length > 0) {
19639
+ const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
19640
+ return trialCostSum > 0 ? trialCostSum : void 0;
19641
+ }
19642
+ return result.costUsd;
19537
19643
  };
19538
- var toDependencyResult = toDependencyResult2, checkDependencies = checkDependencies2;
19644
+ var toDependencyResult = toDependencyResult2, checkDependencies = checkDependencies2, extractEvaluationCostUsd = extractEvaluationCostUsd2;
19539
19645
  if (suiteWorkspaceFile && sharedWorkspacePath) {
19540
19646
  const copiedWorkspaceFile = import_node_path47.default.join(sharedWorkspacePath, import_node_path47.default.basename(suiteWorkspaceFile));
19541
19647
  try {
@@ -19728,6 +19834,42 @@ async function runEvaluation(options) {
19728
19834
  async function dispatchTest(evalCase, depResults) {
19729
19835
  const workerId = nextWorkerId++;
19730
19836
  workerIdByEvalId.set(evalCase.id, workerId);
19837
+ if (runBudgetTracker?.isExceeded()) {
19838
+ const budgetResult = {
19839
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
19840
+ testId: evalCase.id,
19841
+ suite: evalCase.suite,
19842
+ category: evalCase.category,
19843
+ score: 0,
19844
+ assertions: [],
19845
+ output: [],
19846
+ target: target.name,
19847
+ error: `Run budget exceeded ($${runBudgetTracker.currentCostUsd.toFixed(4)} / $${runBudgetTracker.budgetCapUsd.toFixed(4)})`,
19848
+ budgetExceeded: true,
19849
+ executionStatus: "execution_error",
19850
+ failureStage: "setup",
19851
+ failureReasonCode: "budget_exceeded",
19852
+ executionError: {
19853
+ message: `Run budget exceeded ($${runBudgetTracker.currentCostUsd.toFixed(4)} / $${runBudgetTracker.budgetCapUsd.toFixed(4)})`,
19854
+ stage: "setup"
19855
+ }
19856
+ };
19857
+ if (onProgress) {
19858
+ await onProgress({
19859
+ workerId,
19860
+ testId: evalCase.id,
19861
+ status: "failed",
19862
+ completedAt: Date.now(),
19863
+ error: budgetResult.error,
19864
+ score: budgetResult.score,
19865
+ executionStatus: budgetResult.executionStatus
19866
+ });
19867
+ }
19868
+ if (onResult) {
19869
+ await onResult(budgetResult);
19870
+ }
19871
+ return budgetResult;
19872
+ }
19731
19873
  if (budgetUsd !== void 0 && budgetExhausted) {
19732
19874
  const budgetResult = {
19733
19875
  timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
@@ -19841,22 +19983,17 @@ async function runEvaluation(options) {
19841
19983
  ...depResults && Object.keys(depResults).length > 0 ? { dependencyResults: depResults } : {}
19842
19984
  };
19843
19985
  let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
19844
- if (budgetUsd !== void 0) {
19845
- let caseCost;
19846
- if (result.trials && result.trials.length > 0) {
19847
- const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
19848
- if (trialCostSum > 0) {
19849
- caseCost = trialCostSum;
19850
- }
19851
- } else {
19852
- caseCost = result.costUsd;
19853
- }
19854
- if (caseCost !== void 0) {
19986
+ const caseCost = extractEvaluationCostUsd2(result);
19987
+ if (caseCost !== void 0) {
19988
+ if (budgetUsd !== void 0) {
19855
19989
  cumulativeBudgetCost += caseCost;
19856
19990
  if (cumulativeBudgetCost >= budgetUsd) {
19857
19991
  budgetExhausted = true;
19858
19992
  }
19859
19993
  }
19994
+ if (runBudgetTracker) {
19995
+ runBudgetTracker.add(caseCost);
19996
+ }
19860
19997
  }
19861
19998
  if (failOnError === true && result.executionStatus === "execution_error") {
19862
19999
  failOnErrorTriggered = true;
@@ -20664,6 +20801,7 @@ async function runEvalCase(options) {
20664
20801
  fileChanges = fileChanges ? `${fileChanges}
20665
20802
  ${providerFileChanges}` : providerFileChanges;
20666
20803
  }
20804
+ const toolCalls = formatToolCalls(output);
20667
20805
  const providerError = extractProviderError(providerResponse);
20668
20806
  const targetAfterEachHook = options.targetHooks?.after_each;
20669
20807
  if (workspacePath && hasHookCommand(targetAfterEachHook)) {
@@ -20747,6 +20885,7 @@ ${providerFileChanges}` : providerFileChanges;
20747
20885
  targetResolver,
20748
20886
  availableTargets,
20749
20887
  fileChanges,
20888
+ toolCalls,
20750
20889
  workspacePath,
20751
20890
  dockerConfig: evalCase.workspace?.docker,
20752
20891
  verbose,
@@ -20944,6 +21083,7 @@ async function evaluateCandidate(options) {
20944
21083
  targetResolver,
20945
21084
  availableTargets,
20946
21085
  fileChanges,
21086
+ toolCalls,
20947
21087
  workspacePath,
20948
21088
  dockerConfig,
20949
21089
  threshold: evalThreshold,
@@ -20972,6 +21112,7 @@ async function evaluateCandidate(options) {
20972
21112
  targetResolver,
20973
21113
  availableTargets,
20974
21114
  fileChanges,
21115
+ toolCalls,
20975
21116
  workspacePath,
20976
21117
  dockerConfig,
20977
21118
  threshold: evalThreshold,
@@ -21049,6 +21190,7 @@ async function runEvaluatorsForCase(options) {
21049
21190
  targetResolver,
21050
21191
  availableTargets,
21051
21192
  fileChanges,
21193
+ toolCalls,
21052
21194
  workspacePath,
21053
21195
  dockerConfig,
21054
21196
  threshold,
@@ -21078,6 +21220,7 @@ async function runEvaluatorsForCase(options) {
21078
21220
  targetResolver,
21079
21221
  availableTargets,
21080
21222
  fileChanges,
21223
+ toolCalls,
21081
21224
  workspacePath,
21082
21225
  dockerConfig,
21083
21226
  threshold,
@@ -21109,6 +21252,7 @@ async function runEvaluatorsForCase(options) {
21109
21252
  targetResolver,
21110
21253
  availableTargets,
21111
21254
  fileChanges,
21255
+ toolCalls,
21112
21256
  workspacePath,
21113
21257
  dockerConfig,
21114
21258
  dependencyResults,
@@ -21150,6 +21294,7 @@ async function runEvaluatorList(options) {
21150
21294
  targetResolver,
21151
21295
  availableTargets,
21152
21296
  fileChanges,
21297
+ toolCalls,
21153
21298
  workspacePath,
21154
21299
  dockerConfig,
21155
21300
  dependencyResults
@@ -21175,6 +21320,7 @@ async function runEvaluatorList(options) {
21175
21320
  targetResolver,
21176
21321
  availableTargets,
21177
21322
  fileChanges,
21323
+ toolCalls,
21178
21324
  workspacePath,
21179
21325
  dockerConfig,
21180
21326
  dependencyResults
@@ -24109,6 +24255,7 @@ __export(index_exports, {
24109
24255
  ProviderRegistry: () => ProviderRegistry,
24110
24256
  RepoManager: () => RepoManager,
24111
24257
  ResponseCache: () => ResponseCache,
24258
+ RunBudgetTracker: () => RunBudgetTracker,
24112
24259
  SkillTriggerGrader: () => SkillTriggerGrader,
24113
24260
  TEST_MESSAGE_ROLES: () => TEST_MESSAGE_ROLES,
24114
24261
  TemplateNotDirectoryError: () => TemplateNotDirectoryError,
@@ -24180,6 +24327,7 @@ __export(index_exports, {
24180
24327
  extractWorkersFromSuite: () => extractWorkersFromSuite,
24181
24328
  fileExists: () => fileExists2,
24182
24329
  findGitRoot: () => findGitRoot,
24330
+ formatToolCalls: () => formatToolCalls,
24183
24331
  freeformEvaluationSchema: () => freeformEvaluationSchema,
24184
24332
  generateRubrics: () => generateRubrics,
24185
24333
  getAgentvConfigDir: () => getAgentvConfigDir,
@@ -25857,6 +26005,31 @@ init_assertion_discovery();
25857
26005
  init_assertions();
25858
26006
  init_grader_discovery();
25859
26007
 
26008
+ // src/evaluation/run-budget-tracker.ts
26009
+ init_cjs_shims();
26010
+ var RunBudgetTracker = class {
26011
+ constructor(capUsd) {
26012
+ this.capUsd = capUsd;
26013
+ }
26014
+ cumulative = 0;
26015
+ /** Accumulate cost from a completed test or file. */
26016
+ add(costUsd) {
26017
+ this.cumulative += costUsd;
26018
+ }
26019
+ /** True when cumulative cost meets or exceeds the cap. */
26020
+ isExceeded() {
26021
+ return this.cumulative >= this.capUsd;
26022
+ }
26023
+ /** Current accumulated cost. */
26024
+ get currentCostUsd() {
26025
+ return this.cumulative;
26026
+ }
26027
+ /** The configured cap. */
26028
+ get budgetCapUsd() {
26029
+ return this.capUsd;
26030
+ }
26031
+ };
26032
+
25860
26033
  // src/import/index.ts
25861
26034
  init_cjs_shims();
25862
26035
 
@@ -26525,6 +26698,7 @@ function createAgentKernel() {
26525
26698
  ProviderRegistry,
26526
26699
  RepoManager,
26527
26700
  ResponseCache,
26701
+ RunBudgetTracker,
26528
26702
  SkillTriggerGrader,
26529
26703
  TEST_MESSAGE_ROLES,
26530
26704
  TemplateNotDirectoryError,
@@ -26596,6 +26770,7 @@ function createAgentKernel() {
26596
26770
  extractWorkersFromSuite,
26597
26771
  fileExists,
26598
26772
  findGitRoot,
26773
+ formatToolCalls,
26599
26774
  freeformEvaluationSchema,
26600
26775
  generateRubrics,
26601
26776
  getAgentvConfigDir,