@ls-stack/agent-eval 0.56.0 → 0.56.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-CfSiAVmi.js"></script>
28
+ <script type="module" crossorigin src="/assets/index-bB8IBDp1.js"></script>
29
29
  <link rel="stylesheet" crossorigin href="/assets/index-Xa_7PteQ.css">
30
30
  </head>
31
31
  <body>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-DQO2Fpt2.mjs";
2
+ import { t as runCli } from "./cli-Ck0mqxd-.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { I as configureEvalRunLogs, Pt as runWithEvalRegistry, St as resolveLlmCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as createFsCacheStore, xt as resolveApiCallsConfig } from "./runExecution-6lrtj48K.mjs";
1
+ import { I as configureEvalRunLogs, Pt as runWithEvalRegistry, St as resolveLlmCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as createFsCacheStore, xt as resolveApiCallsConfig } from "./runExecution-BH7DlMXl.mjs";
2
2
  //#region ../runner/src/caseChild.ts
3
3
  let fatalErrorReported = false;
4
4
  let disconnectExpected = false;
@@ -1,5 +1,5 @@
1
- import { Ct as buildEvalKey, Nt as getEvalRegistry, St as resolveLlmCallsConfig, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as normalizeScoreDef, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as createFsCacheStore, wt as getCaseRowCaseKey, xt as resolveApiCallsConfig } from "./runExecution-6lrtj48K.mjs";
2
- import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-BYaN2mzS.mjs";
1
+ import { Ct as buildEvalKey, Nt as getEvalRegistry, St as resolveLlmCallsConfig, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as normalizeScoreDef, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as createFsCacheStore, wt as getCaseRowCaseKey, xt as resolveApiCallsConfig } from "./runExecution-BH7DlMXl.mjs";
2
+ import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-C1Ex9QI-.mjs";
3
3
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
5
5
  import { createHash, randomUUID } from "node:crypto";
@@ -2171,8 +2171,8 @@ async function commandApp(args) {
2171
2171
  const { serve } = await import("@hono/node-server");
2172
2172
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2173
2173
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2174
- const appModule = await import("./app-Bpe6Monh.mjs");
2175
- const runnerModule = await import("./runner-C3CiS2o7.mjs");
2174
+ const appModule = await import("./app-Db_x-Rit.mjs");
2175
+ const runnerModule = await import("./runner-DbVYcapC.mjs");
2176
2176
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2177
2177
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2178
2178
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -2660,9 +2660,9 @@ declare const caseRowSchema$1: z$1.ZodObject<{
2660
2660
  error: "error";
2661
2661
  running: "running";
2662
2662
  cancelled: "cancelled";
2663
+ pending: "pending";
2663
2664
  pass: "pass";
2664
2665
  fail: "fail";
2665
- pending: "pending";
2666
2666
  }>;
2667
2667
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
2668
2668
  cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
@@ -2860,10 +2860,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2860
2860
  namespace: z$1.ZodString;
2861
2861
  key: z$1.ZodString;
2862
2862
  status: z$1.ZodEnum<{
2863
+ bypass: "bypass";
2864
+ refresh: "refresh";
2863
2865
  hit: "hit";
2864
2866
  miss: "miss";
2865
- refresh: "refresh";
2866
- bypass: "bypass";
2867
2867
  }>;
2868
2868
  read: z$1.ZodOptional<z$1.ZodBoolean>;
2869
2869
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -2884,9 +2884,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2884
2884
  error: "error";
2885
2885
  running: "running";
2886
2886
  cancelled: "cancelled";
2887
+ pending: "pending";
2887
2888
  pass: "pass";
2888
2889
  fail: "fail";
2889
- pending: "pending";
2890
2890
  }>;
2891
2891
  input: z$1.ZodUnknown;
2892
2892
  trace: z$1.ZodArray<z$1.ZodObject<{
@@ -3032,10 +3032,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3032
3032
  namespace: z$1.ZodString;
3033
3033
  key: z$1.ZodString;
3034
3034
  status: z$1.ZodEnum<{
3035
+ bypass: "bypass";
3036
+ refresh: "refresh";
3035
3037
  hit: "hit";
3036
3038
  miss: "miss";
3037
- refresh: "refresh";
3038
- bypass: "bypass";
3039
3039
  }>;
3040
3040
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3041
3041
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -3138,10 +3138,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3138
3138
  namespace: z$1.ZodString;
3139
3139
  key: z$1.ZodString;
3140
3140
  status: z$1.ZodEnum<{
3141
+ bypass: "bypass";
3142
+ refresh: "refresh";
3141
3143
  hit: "hit";
3142
3144
  miss: "miss";
3143
- refresh: "refresh";
3144
- bypass: "bypass";
3145
3145
  }>;
3146
3146
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3147
3147
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -3499,8 +3499,8 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3499
3499
  target: z$1.ZodObject<{
3500
3500
  mode: z$1.ZodEnum<{
3501
3501
  all: "all";
3502
- caseIds: "caseIds";
3503
3502
  evalIds: "evalIds";
3503
+ caseIds: "caseIds";
3504
3504
  }>;
3505
3505
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
3506
3506
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -3514,9 +3514,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3514
3514
  median: "median";
3515
3515
  }>>>;
3516
3516
  cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
3517
- refresh: "refresh";
3518
- bypass: "bypass";
3519
3517
  use: "use";
3518
+ bypass: "bypass";
3519
+ refresh: "refresh";
3520
3520
  }>>;
3521
3521
  }, z$1.core.$strip>;
3522
3522
  /** Persisted lifecycle metadata for a single eval run. */
@@ -4436,9 +4436,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
4436
4436
  * - `refresh`: never read, always write (forces re-execution and overwrites).
4437
4437
  */
4438
4438
  declare const cacheModeSchema: z$1.ZodEnum<{
4439
- refresh: "refresh";
4440
- bypass: "bypass";
4441
4439
  use: "use";
4440
+ bypass: "bypass";
4441
+ refresh: "refresh";
4442
4442
  }>;
4443
4443
  /** Mode controlling how cached spans behave during a run. */
4444
4444
  type CacheMode = z$1.infer<typeof cacheModeSchema>;
@@ -4459,10 +4459,10 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
4459
4459
  type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
4460
4460
  /** Status of a cache lookup recorded on a span or case scope. */
4461
4461
  declare const cacheStatusSchema: z$1.ZodEnum<{
4462
+ bypass: "bypass";
4463
+ refresh: "refresh";
4462
4464
  hit: "hit";
4463
4465
  miss: "miss";
4464
- refresh: "refresh";
4465
- bypass: "bypass";
4466
4466
  }>;
4467
4467
  /** Status of a cache lookup recorded on a span or case scope. */
4468
4468
  type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
@@ -4479,10 +4479,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
4479
4479
  namespace: z$1.ZodString;
4480
4480
  key: z$1.ZodString;
4481
4481
  status: z$1.ZodEnum<{
4482
+ bypass: "bypass";
4483
+ refresh: "refresh";
4482
4484
  hit: "hit";
4483
4485
  miss: "miss";
4484
- refresh: "refresh";
4485
- bypass: "bypass";
4486
4486
  }>;
4487
4487
  read: z$1.ZodOptional<z$1.ZodBoolean>;
4488
4488
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -5435,8 +5435,8 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
5435
5435
  target: z$1.ZodObject<{
5436
5436
  mode: z$1.ZodEnum<{
5437
5437
  all: "all";
5438
- caseIds: "caseIds";
5439
5438
  evalIds: "evalIds";
5439
+ caseIds: "caseIds";
5440
5440
  }>;
5441
5441
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
5442
5442
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -5448,9 +5448,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
5448
5448
  temporary: z$1.ZodOptional<z$1.ZodBoolean>;
5449
5449
  cache: z$1.ZodOptional<z$1.ZodObject<{
5450
5450
  mode: z$1.ZodDefault<z$1.ZodEnum<{
5451
- refresh: "refresh";
5452
- bypass: "bypass";
5453
5451
  use: "use";
5452
+ bypass: "bypass";
5453
+ refresh: "refresh";
5454
5454
  }>>;
5455
5455
  }, z$1.core.$strip>>;
5456
5456
  manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-6lrtj48K.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-DQO2Fpt2.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-DCGrFAmO.mjs";
1
+ import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-BH7DlMXl.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Ck0mqxd-.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-B3iq-tuv.mjs";
4
4
  export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-6lrtj48K.mjs";
2
- import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-BYaN2mzS.mjs";
1
+ import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-BH7DlMXl.mjs";
2
+ import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-C1Ex9QI-.mjs";
3
3
  import { z } from "zod/v4";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -6181,6 +6181,10 @@ function sumNullable(values) {
6181
6181
  }
6182
6182
  return hasValue ? total : void 0;
6183
6183
  }
6184
+ function getMaxLlmTurns(calls) {
6185
+ if (calls.length === 0) return void 0;
6186
+ return Math.max(...calls.map((call) => Math.max(call.stepCount ?? 1, 1)));
6187
+ }
6184
6188
  function assignIfMissing(params) {
6185
6189
  if (!params.activeKeys.has(params.key)) return;
6186
6190
  if (params.key in params.outputs) return;
@@ -6202,7 +6206,7 @@ function addDefaultOutputs(params) {
6202
6206
  assignIfMissing({
6203
6207
  outputs: params.outputs,
6204
6208
  key: "llmTurns",
6205
- value: calls.length,
6209
+ value: getMaxLlmTurns(calls),
6206
6210
  activeKeys
6207
6211
  });
6208
6212
  assignIfMissing({
@@ -1,4 +1,4 @@
1
- import { Et as caseRowSchema, Pt as runWithEvalRegistry, Tt as caseDetailSchema, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, g as commitPendingCacheWrites, gt as matchesTagsFilter, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, wt as getCaseRowCaseKey, yt as runManifestSchema } from "./runExecution-6lrtj48K.mjs";
1
+ import { Et as caseRowSchema, Pt as runWithEvalRegistry, Tt as caseDetailSchema, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, g as commitPendingCacheWrites, gt as matchesTagsFilter, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, wt as getCaseRowCaseKey, yt as runManifestSchema } from "./runExecution-BH7DlMXl.mjs";
2
2
  import { readFile, readdir, rm, writeFile } from "node:fs/promises";
3
3
  import { dirname, join } from "node:path";
4
4
  import { existsSync } from "node:fs";
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-DQO2Fpt2.mjs";
2
- import "./src-DCGrFAmO.mjs";
1
+ import { n as createRunner } from "./cli-Ck0mqxd-.mjs";
2
+ import "./src-B3iq-tuv.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-DYlwuAT3.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-B3hEOT_I.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-6lrtj48K.mjs";
2
- import "./cli-DQO2Fpt2.mjs";
1
+ import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-BH7DlMXl.mjs";
2
+ import "./cli-Ck0mqxd-.mjs";
3
3
  //#region src/index.ts
4
4
  /** Register an eval definition with typed tag support. */
5
5
  function defineEval(definition) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.56.0",
3
+ "version": "0.56.1",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -33,8 +33,8 @@
33
33
  "@types/node": "^24.7.2",
34
34
  "typescript": "^5.9.2",
35
35
  "@agent-evals/runner": "0.0.1",
36
- "@agent-evals/sdk": "0.0.1",
37
- "@agent-evals/shared": "0.0.1"
36
+ "@agent-evals/shared": "0.0.1",
37
+ "@agent-evals/sdk": "0.0.1"
38
38
  },
39
39
  "scripts": {
40
40
  "build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",
@@ -393,7 +393,9 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
393
393
  tokens, and output tokens separately and use `dedupeConsecutiveValues: true`
394
394
  to skip repeated adjacent chart values. `totalTokens` is input + output only;
395
395
  cache read/write tokens stay separate and affect `costUsd` at their own
396
- rates.
396
+ rates. `llmTurns` is the maximum per-call turn count in the case run, using
397
+ configured steps when available and otherwise one turn per matched LLM call
398
+ span.
397
399
  Derived base input cost uses `inputTokens - cachedInputTokens -
398
400
  cacheCreationInputTokens` so cache details are not double-counted.
399
401
  `cacheCreationInputTokens` is the total cache-write count; optional