@ls-stack/agent-eval 0.56.0 → 0.57.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-6lrtj48K.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-DQO2Fpt2.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-DCGrFAmO.mjs";
1
+ import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-BH7DlMXl.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Ck0mqxd-.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-B3iq-tuv.mjs";
4
4
  export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-6lrtj48K.mjs";
2
- import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-BYaN2mzS.mjs";
1
+ import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-BH7DlMXl.mjs";
2
+ import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-C1Ex9QI-.mjs";
3
3
  import { z } from "zod/v4";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -6181,6 +6181,10 @@ function sumNullable(values) {
6181
6181
  }
6182
6182
  return hasValue ? total : void 0;
6183
6183
  }
6184
+ function getMaxLlmTurns(calls) {
6185
+ if (calls.length === 0) return void 0;
6186
+ return Math.max(...calls.map((call) => Math.max(call.stepCount ?? 1, 1)));
6187
+ }
6184
6188
  function assignIfMissing(params) {
6185
6189
  if (!params.activeKeys.has(params.key)) return;
6186
6190
  if (params.key in params.outputs) return;
@@ -6202,7 +6206,7 @@ function addDefaultOutputs(params) {
6202
6206
  assignIfMissing({
6203
6207
  outputs: params.outputs,
6204
6208
  key: "llmTurns",
6205
- value: calls.length,
6209
+ value: getMaxLlmTurns(calls),
6206
6210
  activeKeys
6207
6211
  });
6208
6212
  assignIfMissing({
@@ -1,4 +1,4 @@
1
- import { Et as caseRowSchema, Pt as runWithEvalRegistry, Tt as caseDetailSchema, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, g as commitPendingCacheWrites, gt as matchesTagsFilter, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, wt as getCaseRowCaseKey, yt as runManifestSchema } from "./runExecution-6lrtj48K.mjs";
1
+ import { Et as caseRowSchema, Pt as runWithEvalRegistry, Tt as caseDetailSchema, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, g as commitPendingCacheWrites, gt as matchesTagsFilter, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, wt as getCaseRowCaseKey, yt as runManifestSchema } from "./runExecution-BH7DlMXl.mjs";
2
2
  import { readFile, readdir, rm, writeFile } from "node:fs/promises";
3
3
  import { dirname, join } from "node:path";
4
4
  import { existsSync } from "node:fs";
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-DQO2Fpt2.mjs";
2
- import "./src-DCGrFAmO.mjs";
1
+ import { n as createRunner } from "./cli-Ck0mqxd-.mjs";
2
+ import "./src-B3iq-tuv.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-DYlwuAT3.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-B3hEOT_I.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-6lrtj48K.mjs";
2
- import "./cli-DQO2Fpt2.mjs";
1
+ import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-BH7DlMXl.mjs";
2
+ import "./cli-Ck0mqxd-.mjs";
3
3
  //#region src/index.ts
4
4
  /** Register an eval definition with typed tag support. */
5
5
  function defineEval(definition) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.56.0",
3
+ "version": "0.57.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -393,7 +393,9 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
393
393
  tokens, and output tokens separately and use `dedupeConsecutiveValues: true`
394
394
  to skip repeated adjacent chart values. `totalTokens` is input + output only;
395
395
  cache read/write tokens stay separate and affect `costUsd` at their own
396
- rates.
396
+ rates. `llmTurns` is the maximum per-call turn count in the case run, using
397
+ configured steps when available and otherwise one turn per matched LLM call
398
+ span.
397
399
  Derived base input cost uses `inputTokens - cachedInputTokens -
398
400
  cacheCreationInputTokens` so cache details are not double-counted.
399
401
  `cacheCreationInputTokens` is the total cache-write count; optional
@@ -568,6 +570,8 @@ Targeted evals in `run.json` are recorded by exact `evalKeys`
568
570
  unambiguous in saved history.
569
571
  Temporary runs use the same directory layout, but are removed before the next
570
572
  run of any kind starts.
573
+ When a saved case needs to be handed to another agent, the app can copy the
574
+ saved case detail path or the saved run folder path directly.
571
575
 
572
576
  Use `agent-evals show-runs` when you need stable file
573
577
  paths before reading saved output: