@ls-stack/agent-eval 0.31.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3995,6 +3995,99 @@ function computeTotalCost({ inputTokens, inputCostUsd, outputTokens, outputCostU
3995
3995
  if (hasCost) return total;
3996
3996
  return hasReportedTokens ? 0 : null;
3997
3997
  }
3998
+ /**
3999
+ * Recompute the LLM-call cost breakdown for a hypothetical billing scenario,
4000
+ * using the call's recorded token counts and the resolved pricing registry.
4001
+ *
4002
+ * The `actual` scenario returns the costs already stored on `entry`. Other
4003
+ * scenarios re-derive each cost component from `pricing` so users can compare
4004
+ * what the same usage would have cost under different cache strategies. When
4005
+ * pricing is missing for the model/provider, simulated cost components fall
4006
+ * back to `null` exactly like the original extractor.
4007
+ */
4008
+ function simulateLlmCallCost({ entry, pricing, scenario }) {
4009
+ if (scenario === "actual") return {
4010
+ inputCostUsd: entry.inputCostUsd,
4011
+ outputCostUsd: entry.outputCostUsd,
4012
+ cachedInputCostUsd: entry.cachedInputCostUsd,
4013
+ cacheCreationInputCostUsd: entry.cacheCreationInputCostUsd,
4014
+ reasoningCostUsd: entry.reasoningCostUsd,
4015
+ totalCostUsd: entry.costUsd
4016
+ };
4017
+ const pricingEntry = pickPricingEntry({
4018
+ pricing,
4019
+ model: entry.model,
4020
+ provider: entry.provider
4021
+ });
4022
+ const outputCostUsd = computeTokenCost(entry.outputTokens, pricingEntry?.outputUsdPerMillion);
4023
+ const reasoningCostUsd = computeTokenCost(entry.reasoningTokens, pricingEntry?.reasoningUsdPerMillion);
4024
+ const simulatedTokens = simulateTokenAllocation({
4025
+ entry,
4026
+ scenario
4027
+ });
4028
+ const writeRate = scenario === "withExtendedCachingWrite" ? pricingEntry?.cacheCreationInput1hUsdPerMillion : pricingEntry?.cacheCreationInputUsdPerMillion;
4029
+ const inputCostUsd = computeTokenCost(simulatedTokens.baseInputTokens, pricingEntry?.inputUsdPerMillion);
4030
+ const cachedInputCostUsd = computeTokenCost(simulatedTokens.cachedInputTokens, pricingEntry?.cachedInputUsdPerMillion);
4031
+ const cacheCreationInputCostUsd = computeTokenCost(simulatedTokens.cacheCreationInputTokens, writeRate);
4032
+ return {
4033
+ inputCostUsd,
4034
+ outputCostUsd,
4035
+ cachedInputCostUsd,
4036
+ cacheCreationInputCostUsd,
4037
+ reasoningCostUsd,
4038
+ totalCostUsd: computeTotalCost({
4039
+ inputTokens: simulatedTokens.baseInputTokens,
4040
+ inputCostUsd,
4041
+ outputTokens: entry.outputTokens,
4042
+ outputCostUsd,
4043
+ cachedInputTokens: simulatedTokens.cachedInputTokens,
4044
+ cachedInputCostUsd,
4045
+ cacheCreationInputTokens: simulatedTokens.cacheCreationInputTokens,
4046
+ cacheCreationInputCostUsd,
4047
+ reasoningTokens: entry.reasoningTokens,
4048
+ reasoningCostUsd
4049
+ })
4050
+ };
4051
+ }
4052
+ /**
4053
+ * Project the call's recorded token allocation onto a hypothetical billing
4054
+ * scenario. Cacheable tokens shift between rows so the breakdown reflects the
4055
+ * simulated billing model: `noCache` folds reads/writes into base input,
4056
+ * `withBaseCaching` (warmed) treats every cacheable token as a cache read, and
4057
+ * the first-call write scenarios treat every cacheable token as a cache write.
4058
+ *
4059
+ * The returned counts are what the UI renders on each row and what
4060
+ * {@link simulateLlmCallCost} prices, so display and totals never drift.
4061
+ */
4062
+ function simulateTokenAllocation({ entry, scenario }) {
4063
+ const baseInputTokens = computeBaseInputTokens({
4064
+ inputTokens: entry.inputTokens,
4065
+ cachedInputTokens: entry.cachedInputTokens,
4066
+ cacheCreationInputTokens: entry.cacheCreationInputTokens
4067
+ });
4068
+ if (scenario === "actual" || entry.inputTokens === null) return {
4069
+ baseInputTokens,
4070
+ cachedInputTokens: entry.cachedInputTokens,
4071
+ cacheCreationInputTokens: entry.cacheCreationInputTokens
4072
+ };
4073
+ const cacheableTokens = (entry.cachedInputTokens ?? 0) + (entry.cacheCreationInputTokens ?? 0);
4074
+ const hasCacheable = cacheableTokens > 0;
4075
+ if (scenario === "noCache") return {
4076
+ baseInputTokens: entry.inputTokens,
4077
+ cachedInputTokens: 0,
4078
+ cacheCreationInputTokens: 0
4079
+ };
4080
+ if (scenario === "withBaseCaching") return {
4081
+ baseInputTokens: hasCacheable ? baseInputTokens : 0,
4082
+ cachedInputTokens: hasCacheable ? cacheableTokens : entry.inputTokens,
4083
+ cacheCreationInputTokens: 0
4084
+ };
4085
+ return {
4086
+ baseInputTokens: hasCacheable ? baseInputTokens : 0,
4087
+ cachedInputTokens: 0,
4088
+ cacheCreationInputTokens: hasCacheable ? cacheableTokens : entry.inputTokens
4089
+ };
4090
+ }
3998
4091
  function computeDurationMs$1(span) {
3999
4092
  if (span.endedAt === null) return null;
4000
4093
  const started = Date.parse(span.startedAt);
@@ -7218,4 +7311,4 @@ function toLastRunStatus(status) {
7218
7311
  return status === "pending" ? null : status;
7219
7312
  }
7220
7313
  //#endregion
7221
- export { defaultConfigKeySchema as $, getEvalStartTime as $n, cacheEntrySchema as $t, createRunRequestSchema as A, z$1 as An, runLogLocationSchema as At, getEvalDisplayStatus as B, serializeCacheValue as Bn, manualInputSelectOptionSchema as Bt, loadConfig as C, columnFormatSchema as Cn, evalFreshnessStatusSchema as Ct, createFsCacheStore as D, numberDisplayOptionsSchema as Dn, evalSummarySchema as Dt, validateCharts as E, jsonCellSchema as En, evalStatsConfigSchema as Et, extractApiCalls as F, hashCacheKey as Fn, manualInputFieldDescriptorSchema as Ft, runSummarySchema as G, EvalAssertionError as Gn, evalChartColorSchema as Gt, deriveStatusFromCaseRows as H, manualInputFileValueSchema as Hn, evalChartAggregateSchema as Ht, extractLlmCalls as I, hashCacheKeySync as In, manualInputJsonFieldSchema as It, agentEvalsConfigSchema as J, configureEvalRunLogs as Jn, evalChartTooltipExtraSchema as Jt, DEFAULT_API_CALLS_CONFIG as K, advanceEvalTime as Kn, evalChartConfigSchema as Kt, applyDerivedCallAttributes as L, deserializeCacheRecording as Ln, manualInputMultilineFieldSchema as Lt, sseEnvelopeSchema as M, captureEvalSpanError as Mn, scoreTraceSchema as Mt, extractCacheEntries as N, evalSpan as Nn, manualInputBooleanFieldSchema as Nt, configReloadStateSchema as O, repoFileRefSchema as On, runLogEntrySchema as Ot, extractCacheHits as P, evalTracer as Pn, manualInputDescriptorSchema as Pt, apiCallsConfigSchema as Q, getEvalCaseInput as Qn, cacheDebugKeyFileSchema as Qt, getNestedAttribute as R, deserializeCacheValue as Rn, manualInputNumberFieldSchema as Rt, resolveEvalDefaultConfig as S, columnDefSchema as Sn, discoveryIssueSchema as St, normalizeScoreDef as T, fileRefSchema as Tn, evalStatItemSchema as Tt, deriveStatusFromChildStatuses as U, readManualInputFile as Un, evalChartAxisSchema as Ut, deriveScopedSummaryFromCases as V, repoFile as Vn, manualInputTextFieldSchema as Vt, runManifestSchema as W, evalExpect as Wn, evalChartBuiltinMetricSchema as Wt, apiCallMetricPlacementSchema as X, evalLog as Xn, evalChartsConfigSchema as Xt, apiCallMetricFormatSchema as Y, evalAssert as Yn, evalChartTypeSchema as Yt, apiCallMetricSchema as Z, getCurrentScope as Zn, cacheDebugKeyEntrySchema as Zt, buildManualInputDescriptor as _, traceSpanErrorSchema as _n, getCaseRowCaseKey as _t, getLastRunStatuses as a, cacheRecordingOpSchema as an, runInEvalScope as ar, llmCallMetricPlacementSchema as at, loadEvalModule as b, traceSpanWarningSchema as bn, caseDetailSchema as bt, loadPersistedRunSnapshots as c, serializedCacheSpanSchema as cn, setScopeCacheContext as cr, llmCallPricingSchema as ct, persistRunState as d, traceAttributeDisplayFormatSchema as dn, getEvalRegistry as dr, resolveApiCallsConfig as dt, cacheEntryWithDebugKeySchema as en, incrementEvalOutput as er, evalColumnOverrideSchema as et, recomputeEvalStatusesInRuns as f, traceAttributeDisplayInputSchema as fn, resolveLlmCallsConfig as ft, resolveArtifactPath as g, traceDisplayInputConfigSchema as gn, buildEvalKey as gt, resolveTracePresentation as h, traceDisplayConfigSchema as hn, buildCaseKey as ht, generateRunId as i, cacheOperationTypeSchema as in, runInEvalRuntimeScope as ir, llmCallMetricFormatSchema as it, updateManualScoreRequestSchema as j, buildTraceTree as jn, runLogPhaseSchema as jt, configReloadStatusSchema as k, runArtifactRefSchema as kn, runLogLevelSchema as kt, nextShortIdFromSnapshots as l, spanCacheOptionsSchema as ln, startEvalBackgroundJob as lr, llmCallsConfigSchema as lt, runTouchesEval as m, traceAttributeDisplaySchema as mn, trialSelectionModeSchema as mt, getTargetEvalKeys as n, cacheListItemSchema as nn, mergeEvalOutput as nr, evalDeriveConfigSchema as nt, getLatestRunInfos as o, cacheRecordingSchema as on, runInExistingEvalScope as or, llmCallMetricSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplayPlacementSchema as pn, runLogsConfigSchema as pt, DEFAULT_LLM_CALLS_CONFIG as q, appendToEvalOutput as qn, evalChartMetricSchema as qt, getTargetEvals as r, cacheModeSchema as rn, nextEvalId as rr, llmCallCostCurrencySchema as rt, loadPersistedRunSnapshot as s, cacheStatusSchema as sn, setEvalOutput as sr, llmCallPricingRateSchema as st, executeRun as t, cacheFileSchema as tn, isInEvalScope as tr, evalColumnsSchema as tt, persistCaseDetail as u, traceCacheRefSchema as un, defineEval as ur, removeDefaultConfigSchema as ut, parseManualInputValues as v, traceSpanKindSchema as vn, getCaseRowEvalKey as vt, buildDeclaredColumnDefs as w, columnKindSchema as wn, evalStatAggregateSchema as wt, parseEvalDiscovery as x, cellValueSchema as xn, caseRowSchema as xt, deriveEvalFreshness as y, traceSpanSchema as yn, assertionFailureSchema as yt, getEvalTitle as z, serializeCacheRecording as zn, manualInputSelectFieldSchema as zt };
7314
+ export { apiCallMetricSchema as $, getCurrentScope as $n, cacheDebugKeyEntrySchema as $t, createRunRequestSchema as A, repoFileRefSchema as An, runLogEntrySchema as At, getNestedAttribute as B, deserializeCacheValue as Bn, manualInputNumberFieldSchema as Bt, loadConfig as C, cellValueSchema as Cn, caseRowSchema as Ct, createFsCacheStore as D, fileRefSchema as Dn, evalStatItemSchema as Dt, validateCharts as E, columnKindSchema as En, evalStatAggregateSchema as Et, extractApiCalls as F, evalSpan as Fn, manualInputBooleanFieldSchema as Ft, deriveStatusFromChildStatuses as G, readManualInputFile as Gn, evalChartAxisSchema as Gt, getEvalDisplayStatus as H, serializeCacheValue as Hn, manualInputSelectOptionSchema as Ht, extractLlmCalls as I, evalTracer as In, manualInputDescriptorSchema as It, DEFAULT_API_CALLS_CONFIG as J, advanceEvalTime as Jn, evalChartConfigSchema as Jt, runManifestSchema as K, evalExpect as Kn, evalChartBuiltinMetricSchema as Kt, simulateLlmCallCost as L, hashCacheKey as Ln, manualInputFieldDescriptorSchema as Lt, sseEnvelopeSchema as M, z$1 as Mn, runLogLocationSchema as Mt, extractCacheEntries as N, buildTraceTree as Nn, runLogPhaseSchema as Nt, configReloadStateSchema as O, jsonCellSchema as On, evalStatsConfigSchema as Ot, extractCacheHits as P, captureEvalSpanError as Pn, scoreTraceSchema as Pt, apiCallMetricPlacementSchema as Q, evalLog as Qn, evalChartsConfigSchema as Qt, simulateTokenAllocation as R, hashCacheKeySync as Rn, manualInputJsonFieldSchema as Rt, resolveEvalDefaultConfig as S, traceSpanWarningSchema as Sn, caseDetailSchema as St, normalizeScoreDef as T, columnFormatSchema as Tn, evalFreshnessStatusSchema as Tt, deriveScopedSummaryFromCases as U, repoFile as Un, manualInputTextFieldSchema as Ut, getEvalTitle as V, serializeCacheRecording as Vn, manualInputSelectFieldSchema as Vt, deriveStatusFromCaseRows as W, manualInputFileValueSchema as Wn, evalChartAggregateSchema as Wt, agentEvalsConfigSchema as X, configureEvalRunLogs as Xn, evalChartTooltipExtraSchema as Xt, DEFAULT_LLM_CALLS_CONFIG as Y, appendToEvalOutput as Yn, evalChartMetricSchema as Yt, apiCallMetricFormatSchema as Z, evalAssert as Zn, evalChartTypeSchema as Zt, buildManualInputDescriptor as _, traceDisplayConfigSchema as _n, buildCaseKey as _t, getLastRunStatuses as a, cacheModeSchema as an, nextEvalId as ar, llmCallCostCurrencySchema as at, loadEvalModule as b, traceSpanKindSchema as bn, getCaseRowEvalKey as bt, loadPersistedRunSnapshots as c, cacheRecordingSchema as cn, runInExistingEvalScope as cr, llmCallMetricSchema as ct, persistRunState as d, spanCacheOptionsSchema as dn, startEvalBackgroundJob as dr, llmCallsConfigSchema as dt, cacheDebugKeyFileSchema as en, getEvalCaseInput as er, apiCallsConfigSchema as et, recomputeEvalStatusesInRuns as f, traceCacheRefSchema as fn, defineEval as fr, removeDefaultConfigSchema as ft, resolveArtifactPath as g, traceAttributeDisplaySchema as gn, trialSelectionModeSchema as gt, resolveTracePresentation as h, traceAttributeDisplayPlacementSchema as hn, runLogsConfigSchema as ht, generateRunId as i, cacheListItemSchema as in, mergeEvalOutput as ir, evalDeriveConfigSchema as it, updateManualScoreRequestSchema as j, runArtifactRefSchema as jn, runLogLevelSchema as jt, configReloadStatusSchema as k, numberDisplayOptionsSchema as kn, evalSummarySchema as kt, nextShortIdFromSnapshots as l, cacheStatusSchema as ln, setEvalOutput as lr, llmCallPricingRateSchema as lt, runTouchesEval as m, traceAttributeDisplayInputSchema as mn, resolveLlmCallsConfig as mt, getTargetEvalKeys as n, cacheEntryWithDebugKeySchema as nn, incrementEvalOutput as nr, evalColumnOverrideSchema as nt, getLatestRunInfos as o, cacheOperationTypeSchema as on, runInEvalRuntimeScope as or, llmCallMetricFormatSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplayFormatSchema as pn, getEvalRegistry as pr, resolveApiCallsConfig as pt, runSummarySchema as q, EvalAssertionError as qn, evalChartColorSchema as qt, getTargetEvals as r, cacheFileSchema as rn, isInEvalScope as rr, evalColumnsSchema as rt, loadPersistedRunSnapshot as s, cacheRecordingOpSchema as sn, runInEvalScope as sr, llmCallMetricPlacementSchema as st, executeRun as t, cacheEntrySchema as tn, getEvalStartTime as tr, defaultConfigKeySchema as tt, persistCaseDetail as u, serializedCacheSpanSchema as un, setScopeCacheContext as ur, llmCallPricingSchema as ut, parseManualInputValues as v, traceDisplayInputConfigSchema as vn, buildEvalKey as vt, buildDeclaredColumnDefs as w, columnDefSchema as wn, discoveryIssueSchema as wt, parseEvalDiscovery as x, traceSpanSchema as xn, assertionFailureSchema as xt, deriveEvalFreshness as y, traceSpanErrorSchema as yn, getCaseRowCaseKey as yt, applyDerivedCallAttributes as z, deserializeCacheRecording as zn, manualInputMultilineFieldSchema as zt };
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-Bnm1nz0U.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-Dkol2ukD.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-CMPmuY7W.mjs";
2
- import "./src-gZm9nyTp.mjs";
1
+ import { n as createRunner } from "./cli-huuJbDNb.mjs";
2
+ import "./src-1Qvuh0NH.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -0,0 +1,3 @@
1
+ import "./runOrchestration-ZpN7xty_.mjs";
2
+ import "./cli-huuJbDNb.mjs";
3
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.31.0",
3
+ "version": "0.32.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -31,8 +31,8 @@
31
31
  "devDependencies": {
32
32
  "@types/node": "^24.7.2",
33
33
  "typescript": "^5.9.2",
34
- "@agent-evals/runner": "0.0.1",
35
34
  "@agent-evals/sdk": "0.0.1",
35
+ "@agent-evals/runner": "0.0.1",
36
36
  "@agent-evals/shared": "0.0.1"
37
37
  },
38
38
  "scripts": {