agentv 4.19.0 → 4.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8225,7 +8225,7 @@ var _a20;
8225
8225
  _a20 = symbol20;
8226
8226
  var defaultDownload2 = createDownload();
8227
8227
 
8228
- // ../../packages/core/dist/chunk-QXX3IBYV.js
8228
+ // ../../packages/core/dist/chunk-ELF6SQAK.js
8229
8229
  import path46 from "node:path";
8230
8230
  import { pathToFileURL as pathToFileURL2 } from "node:url";
8231
8231
  import { existsSync as existsSync6 } from "node:fs";
@@ -12915,7 +12915,7 @@ var openrouter = createOpenRouter({
12915
12915
  // strict for OpenRouter API
12916
12916
  });
12917
12917
 
12918
- // ../../packages/core/dist/chunk-QXX3IBYV.js
12918
+ // ../../packages/core/dist/chunk-ELF6SQAK.js
12919
12919
  import { spawn } from "node:child_process";
12920
12920
  import { randomUUID } from "node:crypto";
12921
12921
  import { createWriteStream } from "node:fs";
@@ -14419,7 +14419,7 @@ var RequestError = class _RequestError extends Error {
14419
14419
  }
14420
14420
  };
14421
14421
 
14422
- // ../../packages/core/dist/chunk-QXX3IBYV.js
14422
+ // ../../packages/core/dist/chunk-ELF6SQAK.js
14423
14423
  import { exec as execCallback } from "node:child_process";
14424
14424
  import { readdirSync, statSync } from "node:fs";
14425
14425
  import { readFile as readFile22, readdir, stat } from "node:fs/promises";
@@ -15461,6 +15461,7 @@ var TEMPLATE_VARIABLES = {
15461
15461
  INPUT: "input",
15462
15462
  OUTPUT: "output",
15463
15463
  FILE_CHANGES: "file_changes",
15464
+ TOOL_CALLS: "tool_calls",
15464
15465
  /** @deprecated Use INPUT instead — resolves to the same text value. */
15465
15466
  INPUT_TEXT: "input_text",
15466
15467
  /** @deprecated Use OUTPUT instead — resolves to the same text value. */
@@ -15637,6 +15638,7 @@ var LlmGrader = class {
15637
15638
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context2.evalCase.reference_answer ?? "").trim(),
15638
15639
  [TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
15639
15640
  [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
15641
+ [TEMPLATE_VARIABLES.TOOL_CALLS]: context2.toolCalls ?? "",
15640
15642
  // Deprecated aliases — same values as the primary variables above
15641
15643
  [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
15642
15644
  [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
@@ -15651,6 +15653,12 @@ var LlmGrader = class {
15651
15653
 
15652
15654
  [[ ## file_changes ## ]]
15653
15655
  ${context2.fileChanges}`;
15656
+ }
15657
+ if (context2.toolCalls && !context2.graderTemplateOverride && !this.graderTemplate) {
15658
+ userPrompt += `
15659
+
15660
+ [[ ## tool_calls ## ]]
15661
+ ${context2.toolCalls}`;
15654
15662
  }
15655
15663
  const graderRawRequest = {
15656
15664
  userPrompt,
@@ -15972,6 +15980,7 @@ ${context2.fileChanges}`;
15972
15980
  [TEMPLATE_VARIABLES.OUTPUT]: context2.candidate.trim(),
15973
15981
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context2.evalCase.reference_answer ?? "").trim(),
15974
15982
  [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
15983
+ [TEMPLATE_VARIABLES.TOOL_CALLS]: context2.toolCalls ?? "",
15975
15984
  // Deprecated aliases
15976
15985
  [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
15977
15986
  [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
@@ -16000,6 +16009,9 @@ ${context2.fileChanges}`;
16000
16009
  if (context2.fileChanges) {
16001
16010
  parts.push("[[ ## file_changes ## ]]", context2.fileChanges, "");
16002
16011
  }
16012
+ if (context2.toolCalls) {
16013
+ parts.push("[[ ## tool_calls ## ]]", context2.toolCalls, "");
16014
+ }
16003
16015
  if (rubrics && rubrics.length > 0) {
16004
16016
  parts.push("[[ ## rubrics ## ]]");
16005
16017
  for (const rubric of rubrics) {
@@ -16033,6 +16045,7 @@ ${context2.fileChanges}`;
16033
16045
  [TEMPLATE_VARIABLES.OUTPUT]: context2.candidate.trim(),
16034
16046
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context2.evalCase.reference_answer ?? "").trim(),
16035
16047
  [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
16048
+ [TEMPLATE_VARIABLES.TOOL_CALLS]: context2.toolCalls ?? "",
16036
16049
  // Deprecated aliases
16037
16050
  [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
16038
16051
  [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
@@ -16062,6 +16075,9 @@ ${outputSchema2}`;
16062
16075
  if (context2.fileChanges) {
16063
16076
  parts.push("[[ ## file_changes ## ]]", context2.fileChanges, "");
16064
16077
  }
16078
+ if (context2.toolCalls) {
16079
+ parts.push("[[ ## tool_calls ## ]]", context2.toolCalls, "");
16080
+ }
16065
16081
  if (rubrics && rubrics.length > 0) {
16066
16082
  parts.push("[[ ## rubrics ## ]]");
16067
16083
  for (const rubric of rubrics) {
@@ -16154,6 +16170,9 @@ ${outputSchema2}`;
16154
16170
  if (context2.fileChanges) {
16155
16171
  parts.push("[[ ## file_changes ## ]]", context2.fileChanges, "");
16156
16172
  }
16173
+ if (context2.toolCalls) {
16174
+ parts.push("[[ ## tool_calls ## ]]", context2.toolCalls, "");
16175
+ }
16157
16176
  parts.push("[[ ## scoring_criteria ## ]]");
16158
16177
  for (const rubric of rubrics) {
16159
16178
  const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
@@ -16196,6 +16215,9 @@ ${outputSchema2}`;
16196
16215
  if (context2.fileChanges) {
16197
16216
  parts.push("[[ ## file_changes ## ]]", context2.fileChanges, "");
16198
16217
  }
16218
+ if (context2.toolCalls) {
16219
+ parts.push("[[ ## tool_calls ## ]]", context2.toolCalls, "");
16220
+ }
16199
16221
  parts.push("[[ ## rubrics ## ]]");
16200
16222
  for (const rubric of rubrics) {
16201
16223
  const requiredLabel = rubric.required ? " (REQUIRED)" : "";
@@ -17694,6 +17716,52 @@ var LatencyGrader = class {
17694
17716
  };
17695
17717
  }
17696
17718
  };
17719
+ var KEY_INPUT_FIELDS = /* @__PURE__ */ new Map([
17720
+ ["Skill", ["skill"]],
17721
+ ["Read", ["file_path"]],
17722
+ ["Write", ["file_path"]],
17723
+ ["Edit", ["file_path"]],
17724
+ ["Bash", ["command"]],
17725
+ ["Grep", ["pattern"]],
17726
+ ["Glob", ["pattern"]]
17727
+ ]);
17728
+ var MAX_FALLBACK_LENGTH = 120;
17729
+ function formatToolCalls(output) {
17730
+ if (!output) return "";
17731
+ const lines = [];
17732
+ for (const message of output) {
17733
+ if (!message.toolCalls) continue;
17734
+ for (const call of message.toolCalls) {
17735
+ const toolName = call.tool ?? "unknown";
17736
+ const detail = extractKeyDetail(toolName, call.input);
17737
+ lines.push(detail ? `- ${toolName}: ${detail}` : `- ${toolName}`);
17738
+ }
17739
+ }
17740
+ return lines.length > 0 ? lines.join("\n") : "";
17741
+ }
17742
+ function extractKeyDetail(toolName, input) {
17743
+ if (!input || typeof input !== "object") return "";
17744
+ const record = input;
17745
+ const knownFields = KEY_INPUT_FIELDS.get(toolName);
17746
+ if (knownFields) {
17747
+ for (const field of knownFields) {
17748
+ const value = record[field];
17749
+ if (typeof value === "string" && value.length > 0) {
17750
+ return truncate(value);
17751
+ }
17752
+ }
17753
+ }
17754
+ for (const value of Object.values(record)) {
17755
+ if (typeof value === "string" && value.length > 0 && value.length <= MAX_FALLBACK_LENGTH) {
17756
+ return truncate(value);
17757
+ }
17758
+ }
17759
+ return "";
17760
+ }
17761
+ function truncate(value, maxLen = 120) {
17762
+ if (value.length <= maxLen) return value;
17763
+ return `${value.slice(0, maxLen)}\u2026`;
17764
+ }
17697
17765
  var SkillTriggerGrader = class {
17698
17766
  kind = "skill-trigger";
17699
17767
  config;
@@ -17767,19 +17835,27 @@ function assembleLlmGraderPrompt(input) {
17767
17835
  promptInputs,
17768
17836
  evaluatorConfig,
17769
17837
  fileChanges,
17838
+ toolCalls,
17770
17839
  graderTemplateOverride
17771
17840
  } = input;
17772
17841
  const rubrics = evaluatorConfig?.rubrics;
17773
17842
  if (rubrics && rubrics.length > 0) {
17774
17843
  const hasScoreRanges = rubrics.some((r) => r.score_ranges && r.score_ranges.length > 0);
17775
17844
  if (hasScoreRanges) {
17776
- return assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChanges);
17845
+ return assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls);
17777
17846
  }
17778
- return assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChanges);
17847
+ return assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls);
17779
17848
  }
17780
- return assembleFreeform(evalCase, candidate, promptInputs, fileChanges, graderTemplateOverride);
17849
+ return assembleFreeform(
17850
+ evalCase,
17851
+ candidate,
17852
+ promptInputs,
17853
+ fileChanges,
17854
+ toolCalls,
17855
+ graderTemplateOverride
17856
+ );
17781
17857
  }
17782
- function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, graderTemplateOverride) {
17858
+ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, toolCalls, graderTemplateOverride) {
17783
17859
  const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
17784
17860
  const variables = {
17785
17861
  [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
@@ -17787,6 +17863,7 @@ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, grader
17787
17863
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (evalCase.reference_answer ?? "").trim(),
17788
17864
  [TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
17789
17865
  [TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
17866
+ [TEMPLATE_VARIABLES.TOOL_CALLS]: toolCalls ?? "",
17790
17867
  // Deprecated aliases
17791
17868
  [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
17792
17869
  [TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
@@ -17800,6 +17877,12 @@ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, grader
17800
17877
 
17801
17878
  [[ ## file_changes ## ]]
17802
17879
  ${fileChanges}`;
17880
+ }
17881
+ if (toolCalls && !graderTemplateOverride) {
17882
+ userPrompt += `
17883
+
17884
+ [[ ## tool_calls ## ]]
17885
+ ${toolCalls}`;
17803
17886
  }
17804
17887
  return {
17805
17888
  systemPrompt,
@@ -17808,7 +17891,7 @@ ${fileChanges}`;
17808
17891
  mode: "freeform"
17809
17892
  };
17810
17893
  }
17811
- function assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChanges) {
17894
+ function assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls) {
17812
17895
  const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
17813
17896
  const parts = [
17814
17897
  "You are an expert grader. Evaluate the candidate answer against each rubric item below.",
@@ -17827,6 +17910,9 @@ function assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChang
17827
17910
  if (fileChanges) {
17828
17911
  parts.push("[[ ## file_changes ## ]]", fileChanges, "");
17829
17912
  }
17913
+ if (toolCalls) {
17914
+ parts.push("[[ ## tool_calls ## ]]", toolCalls, "");
17915
+ }
17830
17916
  parts.push("[[ ## rubrics ## ]]");
17831
17917
  for (const rubric of rubrics) {
17832
17918
  const requiredLabel = rubric.required ? " (REQUIRED)" : "";
@@ -17843,7 +17929,7 @@ function assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChang
17843
17929
  mode: "checklist"
17844
17930
  };
17845
17931
  }
17846
- function assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChanges) {
17932
+ function assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls) {
17847
17933
  const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
17848
17934
  const parts = [
17849
17935
  "You are an expert grader. Score the candidate answer on each criterion below using the provided score ranges.",
@@ -17863,6 +17949,9 @@ function assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChan
17863
17949
  if (fileChanges) {
17864
17950
  parts.push("[[ ## file_changes ## ]]", fileChanges, "");
17865
17951
  }
17952
+ if (toolCalls) {
17953
+ parts.push("[[ ## tool_calls ## ]]", toolCalls, "");
17954
+ }
17866
17955
  parts.push("[[ ## scoring_criteria ## ]]");
17867
17956
  for (const rubric of rubrics) {
17868
17957
  const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
@@ -30105,7 +30194,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
30105
30194
  return { tests: await loadTestsFromAgentSkills(evalFilePath) };
30106
30195
  }
30107
30196
  if (format === "typescript") {
30108
- const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-XFQ6S4DT-S7P2UUBX.js");
30197
+ const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-32COE32J-TCT4RIRT.js");
30109
30198
  return loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
30110
30199
  }
30111
30200
  const { tests, parsed, suiteWorkspacePath } = await loadTestsFromYaml(
@@ -30140,7 +30229,7 @@ async function loadTests(evalFilePath, repoRoot, options) {
30140
30229
  return loadTestsFromAgentSkills(evalFilePath);
30141
30230
  }
30142
30231
  if (format === "typescript") {
30143
- const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-XFQ6S4DT-S7P2UUBX.js");
30232
+ const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-32COE32J-TCT4RIRT.js");
30144
30233
  const suite = await loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
30145
30234
  return suite.tests;
30146
30235
  }
@@ -30743,6 +30832,7 @@ async function runEvaluation(options) {
30743
30832
  trials,
30744
30833
  streamCallbacks,
30745
30834
  budgetUsd,
30835
+ runBudgetTracker,
30746
30836
  failOnError,
30747
30837
  poolWorkspaces,
30748
30838
  poolMaxSlots: configPoolMaxSlots,
@@ -31077,8 +31167,14 @@ async function runEvaluation(options) {
31077
31167
  }
31078
31168
  }
31079
31169
  return { ok: allPassed, depResults };
31170
+ }, extractEvaluationCostUsd2 = function(result) {
31171
+ if (result.trials && result.trials.length > 0) {
31172
+ const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
31173
+ return trialCostSum > 0 ? trialCostSum : void 0;
31174
+ }
31175
+ return result.costUsd;
31080
31176
  };
31081
- var toDependencyResult = toDependencyResult2, checkDependencies = checkDependencies2;
31177
+ var toDependencyResult = toDependencyResult2, checkDependencies = checkDependencies2, extractEvaluationCostUsd = extractEvaluationCostUsd2;
31082
31178
  if (suiteWorkspaceFile && sharedWorkspacePath) {
31083
31179
  const copiedWorkspaceFile = path44.join(sharedWorkspacePath, path44.basename(suiteWorkspaceFile));
31084
31180
  try {
@@ -31271,6 +31367,42 @@ async function runEvaluation(options) {
31271
31367
  async function dispatchTest(evalCase, depResults) {
31272
31368
  const workerId = nextWorkerId++;
31273
31369
  workerIdByEvalId.set(evalCase.id, workerId);
31370
+ if (runBudgetTracker?.isExceeded()) {
31371
+ const budgetResult = {
31372
+ timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
31373
+ testId: evalCase.id,
31374
+ suite: evalCase.suite,
31375
+ category: evalCase.category,
31376
+ score: 0,
31377
+ assertions: [],
31378
+ output: [],
31379
+ target: target.name,
31380
+ error: `Run budget exceeded ($${runBudgetTracker.currentCostUsd.toFixed(4)} / $${runBudgetTracker.budgetCapUsd.toFixed(4)})`,
31381
+ budgetExceeded: true,
31382
+ executionStatus: "execution_error",
31383
+ failureStage: "setup",
31384
+ failureReasonCode: "budget_exceeded",
31385
+ executionError: {
31386
+ message: `Run budget exceeded ($${runBudgetTracker.currentCostUsd.toFixed(4)} / $${runBudgetTracker.budgetCapUsd.toFixed(4)})`,
31387
+ stage: "setup"
31388
+ }
31389
+ };
31390
+ if (onProgress) {
31391
+ await onProgress({
31392
+ workerId,
31393
+ testId: evalCase.id,
31394
+ status: "failed",
31395
+ completedAt: Date.now(),
31396
+ error: budgetResult.error,
31397
+ score: budgetResult.score,
31398
+ executionStatus: budgetResult.executionStatus
31399
+ });
31400
+ }
31401
+ if (onResult) {
31402
+ await onResult(budgetResult);
31403
+ }
31404
+ return budgetResult;
31405
+ }
31274
31406
  if (budgetUsd !== void 0 && budgetExhausted) {
31275
31407
  const budgetResult = {
31276
31408
  timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
@@ -31384,22 +31516,17 @@ async function runEvaluation(options) {
31384
31516
  ...depResults && Object.keys(depResults).length > 0 ? { dependencyResults: depResults } : {}
31385
31517
  };
31386
31518
  let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
31387
- if (budgetUsd !== void 0) {
31388
- let caseCost;
31389
- if (result.trials && result.trials.length > 0) {
31390
- const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
31391
- if (trialCostSum > 0) {
31392
- caseCost = trialCostSum;
31393
- }
31394
- } else {
31395
- caseCost = result.costUsd;
31396
- }
31397
- if (caseCost !== void 0) {
31519
+ const caseCost = extractEvaluationCostUsd2(result);
31520
+ if (caseCost !== void 0) {
31521
+ if (budgetUsd !== void 0) {
31398
31522
  cumulativeBudgetCost += caseCost;
31399
31523
  if (cumulativeBudgetCost >= budgetUsd) {
31400
31524
  budgetExhausted = true;
31401
31525
  }
31402
31526
  }
31527
+ if (runBudgetTracker) {
31528
+ runBudgetTracker.add(caseCost);
31529
+ }
31403
31530
  }
31404
31531
  if (failOnError === true && result.executionStatus === "execution_error") {
31405
31532
  failOnErrorTriggered = true;
@@ -32207,6 +32334,7 @@ async function runEvalCase(options) {
32207
32334
  fileChanges = fileChanges ? `${fileChanges}
32208
32335
  ${providerFileChanges}` : providerFileChanges;
32209
32336
  }
32337
+ const toolCalls = formatToolCalls(output);
32210
32338
  const providerError = extractProviderError(providerResponse);
32211
32339
  const targetAfterEachHook = options.targetHooks?.after_each;
32212
32340
  if (workspacePath && hasHookCommand(targetAfterEachHook)) {
@@ -32290,6 +32418,7 @@ ${providerFileChanges}` : providerFileChanges;
32290
32418
  targetResolver,
32291
32419
  availableTargets,
32292
32420
  fileChanges,
32421
+ toolCalls,
32293
32422
  workspacePath,
32294
32423
  dockerConfig: evalCase.workspace?.docker,
32295
32424
  verbose,
@@ -32487,6 +32616,7 @@ async function evaluateCandidate(options) {
32487
32616
  targetResolver,
32488
32617
  availableTargets,
32489
32618
  fileChanges,
32619
+ toolCalls,
32490
32620
  workspacePath,
32491
32621
  dockerConfig,
32492
32622
  threshold: evalThreshold,
@@ -32515,6 +32645,7 @@ async function evaluateCandidate(options) {
32515
32645
  targetResolver,
32516
32646
  availableTargets,
32517
32647
  fileChanges,
32648
+ toolCalls,
32518
32649
  workspacePath,
32519
32650
  dockerConfig,
32520
32651
  threshold: evalThreshold,
@@ -32592,6 +32723,7 @@ async function runEvaluatorsForCase(options) {
32592
32723
  targetResolver,
32593
32724
  availableTargets,
32594
32725
  fileChanges,
32726
+ toolCalls,
32595
32727
  workspacePath,
32596
32728
  dockerConfig,
32597
32729
  threshold,
@@ -32621,6 +32753,7 @@ async function runEvaluatorsForCase(options) {
32621
32753
  targetResolver,
32622
32754
  availableTargets,
32623
32755
  fileChanges,
32756
+ toolCalls,
32624
32757
  workspacePath,
32625
32758
  dockerConfig,
32626
32759
  threshold,
@@ -32652,6 +32785,7 @@ async function runEvaluatorsForCase(options) {
32652
32785
  targetResolver,
32653
32786
  availableTargets,
32654
32787
  fileChanges,
32788
+ toolCalls,
32655
32789
  workspacePath,
32656
32790
  dockerConfig,
32657
32791
  dependencyResults,
@@ -32693,6 +32827,7 @@ async function runEvaluatorList(options) {
32693
32827
  targetResolver,
32694
32828
  availableTargets,
32695
32829
  fileChanges,
32830
+ toolCalls,
32696
32831
  workspacePath,
32697
32832
  dockerConfig,
32698
32833
  dependencyResults
@@ -32718,6 +32853,7 @@ async function runEvaluatorList(options) {
32718
32853
  targetResolver,
32719
32854
  availableTargets,
32720
32855
  fileChanges,
32856
+ toolCalls,
32721
32857
  workspacePath,
32722
32858
  dockerConfig,
32723
32859
  dependencyResults
@@ -33778,6 +33914,7 @@ export {
33778
33914
  ExecutionMetricsGrader,
33779
33915
  FieldAccuracyGrader,
33780
33916
  LatencyGrader,
33917
+ formatToolCalls,
33781
33918
  SkillTriggerGrader,
33782
33919
  assembleLlmGraderPrompt,
33783
33920
  TokenUsageGrader,
@@ -33864,4 +34001,4 @@ export {
33864
34001
  loadTsEvalFile,
33865
34002
  loadTsEvalSuite
33866
34003
  };
33867
- //# sourceMappingURL=chunk-PTYQS37Y.js.map
34004
+ //# sourceMappingURL=chunk-LP4Y5D2Z.js.map