@ls-stack/agent-eval 0.31.0 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-CPcVyFRP.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-ClPR-tfN.css">
28
+ <script type="module" crossorigin src="/assets/index-BV_DM8fZ.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-BPMMRktE.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-CMPmuY7W.mjs";
2
+ import { t as runCli } from "./cli-huuJbDNb.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { B as getEvalDisplayStatus, C as loadConfig, D as createFsCacheStore, E as validateCharts, G as runSummarySchema, L as applyDerivedCallAttributes, S as resolveEvalDefaultConfig, T as normalizeScoreDef, V as deriveScopedSummaryFromCases, _ as buildManualInputDescriptor, _t as getCaseRowCaseKey, a as getLastRunStatuses, b as loadEvalModule, c as loadPersistedRunSnapshots, d as persistRunState, dr as getEvalRegistry, dt as resolveApiCallsConfig, f as recomputeEvalStatusesInRuns, ft as resolveLlmCallsConfig, g as resolveArtifactPath, gt as buildEvalKey, h as resolveTracePresentation, i as generateRunId, l as nextShortIdFromSnapshots, m as runTouchesEval, n as getTargetEvalKeys, o as getLatestRunInfos, p as recomputePersistedCaseStatus, s as loadPersistedRunSnapshot, u as persistCaseDetail, v as parseManualInputValues, vt as getCaseRowEvalKey, w as buildDeclaredColumnDefs, x as parseEvalDiscovery, y as deriveEvalFreshness, z as getEvalTitle } from "./runOrchestration-CAyVXPFz.mjs";
1
+ import { C as loadConfig, D as createFsCacheStore, E as validateCharts, H as getEvalDisplayStatus, S as resolveEvalDefaultConfig, T as normalizeScoreDef, U as deriveScopedSummaryFromCases, V as getEvalTitle, _ as buildManualInputDescriptor, a as getLastRunStatuses, b as loadEvalModule, bt as getCaseRowEvalKey, c as loadPersistedRunSnapshots, d as persistRunState, f as recomputeEvalStatusesInRuns, g as resolveArtifactPath, h as resolveTracePresentation, i as generateRunId, l as nextShortIdFromSnapshots, m as runTouchesEval, mt as resolveLlmCallsConfig, n as getTargetEvalKeys, o as getLatestRunInfos, p as recomputePersistedCaseStatus, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, s as loadPersistedRunSnapshot, u as persistCaseDetail, v as parseManualInputValues, vt as buildEvalKey, w as buildDeclaredColumnDefs, x as parseEvalDiscovery, y as deriveEvalFreshness, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes } from "./runOrchestration-ZpN7xty_.mjs";
2
2
  import { createHash, randomUUID } from "node:crypto";
3
3
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
@@ -1940,8 +1940,8 @@ async function commandApp(args) {
1940
1940
  const { serve } = await import("@hono/node-server");
1941
1941
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
1942
1942
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
1943
- const appModule = await import("./app-BrSMRTpy.mjs");
1944
- const runnerModule = await import("./runner-CmpWwCe1.mjs");
1943
+ const appModule = await import("./app-Dc6vvHRL.mjs");
1944
+ const runnerModule = await import("./runner-BPXPvinB.mjs");
1945
1945
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
1946
1946
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
1947
1947
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -5371,6 +5371,84 @@ type LlmCallEntry = {
5371
5371
  warnings: EvalTraceSpanWarning[];
5372
5372
  error: EvalTraceSpanError | null;
5373
5373
  };
5374
+ /**
5375
+ * Cost-simulation scenarios available in the LLM calls breakdown table.
5376
+ *
5377
+ * - `actual` — Real billed cost recorded on the span.
5378
+ * - `noCache` — Bill every input token at the base input rate, ignoring all
5379
+ * cache reads and cache writes. Worst case for any prompt that could be
5380
+ * cached.
5381
+ * - `withBaseCaching` — Steady-state cost on a fully warmed cache: cache
5382
+ * writes are treated as already paid (free), cache reads keep the cache-read
5383
+ * discount, and base input keeps the base rate. When the call has no
5384
+ * caching at all, every input token is billed at the cache-read rate, as if
5385
+ * the prompt had been warmed by an earlier run. Cache-read pricing is the
5386
+ * same on the base (5-minute) and extended (1-hour) tiers, so this scenario
5387
+ * covers the warmed case for both TTLs.
5388
+ * - `withBaseCachingWrite` — First-call cost paying the 5-minute cache write
5389
+ * premium. When the call already uses caching, every cache write token is
5390
+ * billed at the 5-minute rate (any extended-cache split is folded into the
5391
+ * 5-minute rate). When the call has no caching at all, every input token is
5392
+ * billed at the 5-minute cache write rate, as if this were the first call
5393
+ * warming up the base cache.
5394
+ * - `withExtendedCachingWrite` — First-call cost paying the extended (e.g.
5395
+ * 1-hour) cache write premium. When the call already uses caching, every
5396
+ * cache write token is billed at the extended rate. When the call has no
5397
+ * caching at all, every input token is billed at the extended cache write
5398
+ * rate, as if this were the first call warming up the extended cache.
5399
+ */
5400
+ type LlmCostScenario = 'actual' | 'noCache' | 'withBaseCaching' | 'withBaseCachingWrite' | 'withExtendedCachingWrite';
5401
+ /** Per-row cost values returned by {@link simulateLlmCallCost}. */
5402
+ type LlmCallCostBreakdown = {
5403
+ inputCostUsd: number | null;
5404
+ outputCostUsd: number | null;
5405
+ cachedInputCostUsd: number | null;
5406
+ cacheCreationInputCostUsd: number | null;
5407
+ reasoningCostUsd: number | null;
5408
+ totalCostUsd: number | null;
5409
+ };
5410
+ /**
5411
+ * Recompute the LLM-call cost breakdown for a hypothetical billing scenario,
5412
+ * using the call's recorded token counts and the resolved pricing registry.
5413
+ *
5414
+ * The `actual` scenario returns the costs already stored on `entry`. Other
5415
+ * scenarios re-derive each cost component from `pricing` so users can compare
5416
+ * what the same usage would have cost under different cache strategies. When
5417
+ * pricing is missing for the model/provider, simulated cost components fall
5418
+ * back to `null` exactly like the original extractor.
5419
+ */
5420
+ declare function simulateLlmCallCost({
5421
+ entry,
5422
+ pricing,
5423
+ scenario
5424
+ }: {
5425
+ entry: LlmCallEntry;
5426
+ pricing: ResolvedLlmCallPricing[];
5427
+ scenario: LlmCostScenario;
5428
+ }): LlmCallCostBreakdown;
5429
+ /** Per-row simulated token counts shown in the LLM call breakdown table. */
5430
+ type LlmCallSimulatedTokens = {
5431
+ /** Tokens shown on the `Input` row — base input only (cached + creation are subtracted). */baseInputTokens: number | null; /** Tokens shown on the `Cache read` row. */
5432
+ cachedInputTokens: number | null; /** Tokens shown on the `Cache write` row. */
5433
+ cacheCreationInputTokens: number | null;
5434
+ };
5435
+ /**
5436
+ * Project the call's recorded token allocation onto a hypothetical billing
5437
+ * scenario. Cacheable tokens shift between rows so the breakdown reflects the
5438
+ * simulated billing model: `noCache` folds reads/writes into base input,
5439
+ * `withBaseCaching` (warmed) treats every cacheable token as a cache read, and
5440
+ * the first-call write scenarios treat every cacheable token as a cache write.
5441
+ *
5442
+ * The returned counts are what the UI renders on each row and what
5443
+ * {@link simulateLlmCallCost} prices, so display and totals never drift.
5444
+ */
5445
+ declare function simulateTokenAllocation({
5446
+ entry,
5447
+ scenario
5448
+ }: {
5449
+ entry: LlmCallEntry;
5450
+ scenario: LlmCostScenario;
5451
+ }): LlmCallSimulatedTokens;
5374
5452
  /**
5375
5453
  * Filter `spans` down to LLM calls and project each one to the structured
5376
5454
  * shape consumed by the LLM calls tab.
@@ -6283,4 +6361,4 @@ type ManualInputDescriptor = z$1.infer<typeof manualInputDescriptorSchema>; //#e
6283
6361
  */
6284
6362
  declare function runCli(argv: string[]): Promise<void>;
6285
6363
  //#endregion
6286
- export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallsConfigInput, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
6364
+ export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as defaultConfigKeySchema, $n as getEvalStartTime, $t as cacheEntrySchema, A as createRunRequestSchema, An as z, At as runLogLocationSchema, B as getEvalDisplayStatus, Bn as serializeCacheValue, Bt as manualInputSelectOptionSchema, Cn as columnFormatSchema, Ct as evalFreshnessStatusSchema, Dn as numberDisplayOptionsSchema, Dt as evalSummarySchema, En as jsonCellSchema, Et as evalStatsConfigSchema, F as extractApiCalls, Fn as hashCacheKey, Ft as manualInputFieldDescriptorSchema, G as runSummarySchema, Gn as EvalAssertionError, Gt as evalChartColorSchema, H as deriveStatusFromCaseRows, Hn as manualInputFileValueSchema, Ht as evalChartAggregateSchema, I as extractLlmCalls, In as hashCacheKeySync, It as manualInputJsonFieldSchema, J as agentEvalsConfigSchema, Jt as evalChartTooltipExtraSchema, K as DEFAULT_API_CALLS_CONFIG, Kn as advanceEvalTime, Kt as evalChartConfigSchema, L as applyDerivedCallAttributes, Ln as deserializeCacheRecording, Lt as manualInputMultilineFieldSchema, M as sseEnvelopeSchema, Mn as captureEvalSpanError, Mt as scoreTraceSchema, N as extractCacheEntries, Nn as evalSpan, Nt as manualInputBooleanFieldSchema, O as configReloadStateSchema, On as repoFileRefSchema, Ot as runLogEntrySchema, P as extractCacheHits, Pn as evalTracer, Pt as manualInputDescriptorSchema, Q as apiCallsConfigSchema, Qn as getEvalCaseInput, Qt as cacheDebugKeyFileSchema, R as getNestedAttribute, Rn as deserializeCacheValue, Rt as manualInputNumberFieldSchema, Sn as columnDefSchema, St as discoveryIssueSchema, Tn as fileRefSchema, Tt as evalStatItemSchema, U as deriveStatusFromChildStatuses, Un as readManualInputFile, Ut as evalChartAxisSchema, V as deriveScopedSummaryFromCases, Vn as repoFile, Vt as manualInputTextFieldSchema, W as runManifestSchema, Wn as evalExpect, Wt as evalChartBuiltinMetricSchema, X as apiCallMetricPlacementSchema, Xn as evalLog, Xt as evalChartsConfigSchema, Y as apiCallMetricFormatSchema, Yn as evalAssert, Yt as evalChartTypeSchema, Z as apiCallMetricSchema, Zn as getCurrentScope, Zt as cacheDebugKeyEntrySchema, _n as traceSpanErrorSchema, _t as getCaseRowCaseKey, an as cacheRecordingOpSchema, ar as runInEvalScope, at as llmCallMetricPlacementSchema, bn as traceSpanWarningSchema, bt as caseDetailSchema, cn as serializedCacheSpanSchema, cr as setScopeCacheContext, ct as llmCallPricingSchema, dn as traceAttributeDisplayFormatSchema, dr as getEvalRegistry, dt as resolveApiCallsConfig, en as cacheEntryWithDebugKeySchema, er as incrementEvalOutput, et as evalColumnOverrideSchema, fn as traceAttributeDisplayInputSchema, ft as resolveLlmCallsConfig, gn as traceDisplayInputConfigSchema, gt as buildEvalKey, hn as traceDisplayConfigSchema, ht as buildCaseKey, in as cacheOperationTypeSchema, ir as runInEvalRuntimeScope, it as llmCallMetricFormatSchema, j as updateManualScoreRequestSchema, jn as buildTraceTree, jt as runLogPhaseSchema, k as configReloadStatusSchema, kn as runArtifactRefSchema, kt as runLogLevelSchema, ln as spanCacheOptionsSchema, lr as startEvalBackgroundJob, lt as llmCallsConfigSchema, mn as traceAttributeDisplaySchema, mt as trialSelectionModeSchema, nn as cacheListItemSchema, nr as mergeEvalOutput, nt as evalDeriveConfigSchema, on as cacheRecordingSchema, or as runInExistingEvalScope, ot as llmCallMetricSchema, pn as traceAttributeDisplayPlacementSchema, pt as runLogsConfigSchema, q as DEFAULT_LLM_CALLS_CONFIG, qn as appendToEvalOutput, qt as evalChartMetricSchema, rn as cacheModeSchema, rr as nextEvalId, rt as llmCallCostCurrencySchema, sn as cacheStatusSchema, sr as setEvalOutput, st as llmCallPricingRateSchema, tn as cacheFileSchema, tr as isInEvalScope, tt as evalColumnsSchema, un as traceCacheRefSchema, ur as defineEval, ut as removeDefaultConfigSchema, vn as traceSpanKindSchema, vt as getCaseRowEvalKey, wn as columnKindSchema, wt as evalStatAggregateSchema, xn as cellValueSchema, xt as caseRowSchema, yn as traceSpanSchema, yt as assertionFailureSchema, z as getEvalTitle, zn as serializeCacheRecording, zt as manualInputSelectFieldSchema } from "./runOrchestration-CAyVXPFz.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CMPmuY7W.mjs";
3
- import "./src-gZm9nyTp.mjs";
4
- export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
1
+ import { $ as apiCallMetricSchema, $n as getCurrentScope, $t as cacheDebugKeyEntrySchema, A as createRunRequestSchema, An as repoFileRefSchema, At as runLogEntrySchema, B as getNestedAttribute, Bn as deserializeCacheValue, Bt as manualInputNumberFieldSchema, Cn as cellValueSchema, Ct as caseRowSchema, Dn as fileRefSchema, Dt as evalStatItemSchema, En as columnKindSchema, Et as evalStatAggregateSchema, F as extractApiCalls, Fn as evalSpan, Ft as manualInputBooleanFieldSchema, G as deriveStatusFromChildStatuses, Gn as readManualInputFile, Gt as evalChartAxisSchema, H as getEvalDisplayStatus, Hn as serializeCacheValue, Ht as manualInputSelectOptionSchema, I as extractLlmCalls, In as evalTracer, It as manualInputDescriptorSchema, J as DEFAULT_API_CALLS_CONFIG, Jn as advanceEvalTime, Jt as evalChartConfigSchema, K as runManifestSchema, Kn as evalExpect, Kt as evalChartBuiltinMetricSchema, L as simulateLlmCallCost, Ln as hashCacheKey, Lt as manualInputFieldDescriptorSchema, M as sseEnvelopeSchema, Mn as z, Mt as runLogLocationSchema, N as extractCacheEntries, Nn as buildTraceTree, Nt as runLogPhaseSchema, O as configReloadStateSchema, On as jsonCellSchema, Ot as evalStatsConfigSchema, P as extractCacheHits, Pn as captureEvalSpanError, Pt as scoreTraceSchema, Q as apiCallMetricPlacementSchema, Qn as evalLog, Qt as evalChartsConfigSchema, R as simulateTokenAllocation, Rn as hashCacheKeySync, Rt as manualInputJsonFieldSchema, Sn as traceSpanWarningSchema, St as caseDetailSchema, Tn as columnFormatSchema, Tt as evalFreshnessStatusSchema, U as deriveScopedSummaryFromCases, Un as repoFile, Ut as manualInputTextFieldSchema, V as getEvalTitle, Vn as serializeCacheRecording, Vt as manualInputSelectFieldSchema, W as deriveStatusFromCaseRows, Wn as manualInputFileValueSchema, Wt as evalChartAggregateSchema, X as agentEvalsConfigSchema, Xt as evalChartTooltipExtraSchema, Y as DEFAULT_LLM_CALLS_CONFIG, Yn as appendToEvalOutput, Yt as evalChartMetricSchema, Z as apiCallMetricFormatSchema, Zn as evalAssert, Zt as evalChartTypeSchema, _n as traceDisplayConfigSchema, _t as buildCaseKey, an as cacheModeSchema, ar as nextEvalId, at as llmCallCostCurrencySchema, bn as traceSpanKindSchema, bt as getCaseRowEvalKey, cn as cacheRecordingSchema, cr as runInExistingEvalScope, ct as llmCallMetricSchema, dn as spanCacheOptionsSchema, dr as startEvalBackgroundJob, dt as llmCallsConfigSchema, en as cacheDebugKeyFileSchema, er as getEvalCaseInput, et as apiCallsConfigSchema, fn as traceCacheRefSchema, fr as defineEval, ft as removeDefaultConfigSchema, gn as traceAttributeDisplaySchema, gt as trialSelectionModeSchema, hn as traceAttributeDisplayPlacementSchema, ht as runLogsConfigSchema, in as cacheListItemSchema, ir as mergeEvalOutput, it as evalDeriveConfigSchema, j as updateManualScoreRequestSchema, jn as runArtifactRefSchema, jt as runLogLevelSchema, k as configReloadStatusSchema, kn as numberDisplayOptionsSchema, kt as evalSummarySchema, ln as cacheStatusSchema, lr as setEvalOutput, lt as llmCallPricingRateSchema, mn as traceAttributeDisplayInputSchema, mt as resolveLlmCallsConfig, nn as cacheEntryWithDebugKeySchema, nr as incrementEvalOutput, nt as evalColumnOverrideSchema, on as cacheOperationTypeSchema, or as runInEvalRuntimeScope, ot as llmCallMetricFormatSchema, pn as traceAttributeDisplayFormatSchema, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, qn as EvalAssertionError, qt as evalChartColorSchema, rn as cacheFileSchema, rr as isInEvalScope, rt as evalColumnsSchema, sn as cacheRecordingOpSchema, sr as runInEvalScope, st as llmCallMetricPlacementSchema, tn as cacheEntrySchema, tr as getEvalStartTime, tt as defaultConfigKeySchema, un as serializedCacheSpanSchema, ur as setScopeCacheContext, ut as llmCallPricingSchema, vn as traceDisplayInputConfigSchema, vt as buildEvalKey, wn as columnDefSchema, wt as discoveryIssueSchema, xn as traceSpanSchema, xt as assertionFailureSchema, yn as traceSpanErrorSchema, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes, zn as deserializeCacheRecording, zt as manualInputMultilineFieldSchema } from "./runOrchestration-ZpN7xty_.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-huuJbDNb.mjs";
3
+ import "./src-1Qvuh0NH.mjs";
4
+ export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, Et as evalStatsConfigSchema, G as runSummarySchema, Jn as configureEvalRunLogs, Pt as manualInputDescriptorSchema, Sn as columnDefSchema, W as runManifestSchema, Xt as evalChartsConfigSchema, gt as buildEvalKey, r as getTargetEvals$1, t as executeRun, x as parseEvalDiscovery } from "./runOrchestration-CAyVXPFz.mjs";
1
+ import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, It as manualInputDescriptorSchema, K as runManifestSchema, Ot as evalStatsConfigSchema, Qt as evalChartsConfigSchema, Xn as configureEvalRunLogs, q as runSummarySchema, r as getTargetEvals$1, t as executeRun, vt as buildEvalKey, wn as columnDefSchema, x as parseEvalDiscovery } from "./runOrchestration-ZpN7xty_.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";
@@ -3995,6 +3995,99 @@ function computeTotalCost({ inputTokens, inputCostUsd, outputTokens, outputCostU
3995
3995
  if (hasCost) return total;
3996
3996
  return hasReportedTokens ? 0 : null;
3997
3997
  }
3998
+ /**
3999
+ * Recompute the LLM-call cost breakdown for a hypothetical billing scenario,
4000
+ * using the call's recorded token counts and the resolved pricing registry.
4001
+ *
4002
+ * The `actual` scenario returns the costs already stored on `entry`. Other
4003
+ * scenarios re-derive each cost component from `pricing` so users can compare
4004
+ * what the same usage would have cost under different cache strategies. When
4005
+ * pricing is missing for the model/provider, simulated cost components fall
4006
+ * back to `null` exactly like the original extractor.
4007
+ */
4008
+ function simulateLlmCallCost({ entry, pricing, scenario }) {
4009
+ if (scenario === "actual") return {
4010
+ inputCostUsd: entry.inputCostUsd,
4011
+ outputCostUsd: entry.outputCostUsd,
4012
+ cachedInputCostUsd: entry.cachedInputCostUsd,
4013
+ cacheCreationInputCostUsd: entry.cacheCreationInputCostUsd,
4014
+ reasoningCostUsd: entry.reasoningCostUsd,
4015
+ totalCostUsd: entry.costUsd
4016
+ };
4017
+ const pricingEntry = pickPricingEntry({
4018
+ pricing,
4019
+ model: entry.model,
4020
+ provider: entry.provider
4021
+ });
4022
+ const outputCostUsd = computeTokenCost(entry.outputTokens, pricingEntry?.outputUsdPerMillion);
4023
+ const reasoningCostUsd = computeTokenCost(entry.reasoningTokens, pricingEntry?.reasoningUsdPerMillion);
4024
+ const simulatedTokens = simulateTokenAllocation({
4025
+ entry,
4026
+ scenario
4027
+ });
4028
+ const writeRate = scenario === "withExtendedCachingWrite" ? pricingEntry?.cacheCreationInput1hUsdPerMillion : pricingEntry?.cacheCreationInputUsdPerMillion;
4029
+ const inputCostUsd = computeTokenCost(simulatedTokens.baseInputTokens, pricingEntry?.inputUsdPerMillion);
4030
+ const cachedInputCostUsd = computeTokenCost(simulatedTokens.cachedInputTokens, pricingEntry?.cachedInputUsdPerMillion);
4031
+ const cacheCreationInputCostUsd = computeTokenCost(simulatedTokens.cacheCreationInputTokens, writeRate);
4032
+ return {
4033
+ inputCostUsd,
4034
+ outputCostUsd,
4035
+ cachedInputCostUsd,
4036
+ cacheCreationInputCostUsd,
4037
+ reasoningCostUsd,
4038
+ totalCostUsd: computeTotalCost({
4039
+ inputTokens: simulatedTokens.baseInputTokens,
4040
+ inputCostUsd,
4041
+ outputTokens: entry.outputTokens,
4042
+ outputCostUsd,
4043
+ cachedInputTokens: simulatedTokens.cachedInputTokens,
4044
+ cachedInputCostUsd,
4045
+ cacheCreationInputTokens: simulatedTokens.cacheCreationInputTokens,
4046
+ cacheCreationInputCostUsd,
4047
+ reasoningTokens: entry.reasoningTokens,
4048
+ reasoningCostUsd
4049
+ })
4050
+ };
4051
+ }
4052
+ /**
4053
+ * Project the call's recorded token allocation onto a hypothetical billing
4054
+ * scenario. Cacheable tokens shift between rows so the breakdown reflects the
4055
+ * simulated billing model: `noCache` folds reads/writes into base input,
4056
+ * `withBaseCaching` (warmed) treats every cacheable token as a cache read, and
4057
+ * the first-call write scenarios treat every cacheable token as a cache write.
4058
+ *
4059
+ * The returned counts are what the UI renders on each row and what
4060
+ * {@link simulateLlmCallCost} prices, so display and totals never drift.
4061
+ */
4062
+ function simulateTokenAllocation({ entry, scenario }) {
4063
+ const baseInputTokens = computeBaseInputTokens({
4064
+ inputTokens: entry.inputTokens,
4065
+ cachedInputTokens: entry.cachedInputTokens,
4066
+ cacheCreationInputTokens: entry.cacheCreationInputTokens
4067
+ });
4068
+ if (scenario === "actual" || entry.inputTokens === null) return {
4069
+ baseInputTokens,
4070
+ cachedInputTokens: entry.cachedInputTokens,
4071
+ cacheCreationInputTokens: entry.cacheCreationInputTokens
4072
+ };
4073
+ const cacheableTokens = (entry.cachedInputTokens ?? 0) + (entry.cacheCreationInputTokens ?? 0);
4074
+ const hasCacheable = cacheableTokens > 0;
4075
+ if (scenario === "noCache") return {
4076
+ baseInputTokens: entry.inputTokens,
4077
+ cachedInputTokens: 0,
4078
+ cacheCreationInputTokens: 0
4079
+ };
4080
+ if (scenario === "withBaseCaching") return {
4081
+ baseInputTokens: hasCacheable ? baseInputTokens : 0,
4082
+ cachedInputTokens: hasCacheable ? cacheableTokens : entry.inputTokens,
4083
+ cacheCreationInputTokens: 0
4084
+ };
4085
+ return {
4086
+ baseInputTokens: hasCacheable ? baseInputTokens : 0,
4087
+ cachedInputTokens: 0,
4088
+ cacheCreationInputTokens: hasCacheable ? cacheableTokens : entry.inputTokens
4089
+ };
4090
+ }
3998
4091
  function computeDurationMs$1(span) {
3999
4092
  if (span.endedAt === null) return null;
4000
4093
  const started = Date.parse(span.startedAt);
@@ -7218,4 +7311,4 @@ function toLastRunStatus(status) {
7218
7311
  return status === "pending" ? null : status;
7219
7312
  }
7220
7313
  //#endregion
7221
- export { defaultConfigKeySchema as $, getEvalStartTime as $n, cacheEntrySchema as $t, createRunRequestSchema as A, z$1 as An, runLogLocationSchema as At, getEvalDisplayStatus as B, serializeCacheValue as Bn, manualInputSelectOptionSchema as Bt, loadConfig as C, columnFormatSchema as Cn, evalFreshnessStatusSchema as Ct, createFsCacheStore as D, numberDisplayOptionsSchema as Dn, evalSummarySchema as Dt, validateCharts as E, jsonCellSchema as En, evalStatsConfigSchema as Et, extractApiCalls as F, hashCacheKey as Fn, manualInputFieldDescriptorSchema as Ft, runSummarySchema as G, EvalAssertionError as Gn, evalChartColorSchema as Gt, deriveStatusFromCaseRows as H, manualInputFileValueSchema as Hn, evalChartAggregateSchema as Ht, extractLlmCalls as I, hashCacheKeySync as In, manualInputJsonFieldSchema as It, agentEvalsConfigSchema as J, configureEvalRunLogs as Jn, evalChartTooltipExtraSchema as Jt, DEFAULT_API_CALLS_CONFIG as K, advanceEvalTime as Kn, evalChartConfigSchema as Kt, applyDerivedCallAttributes as L, deserializeCacheRecording as Ln, manualInputMultilineFieldSchema as Lt, sseEnvelopeSchema as M, captureEvalSpanError as Mn, scoreTraceSchema as Mt, extractCacheEntries as N, evalSpan as Nn, manualInputBooleanFieldSchema as Nt, configReloadStateSchema as O, repoFileRefSchema as On, runLogEntrySchema as Ot, extractCacheHits as P, evalTracer as Pn, manualInputDescriptorSchema as Pt, apiCallsConfigSchema as Q, getEvalCaseInput as Qn, cacheDebugKeyFileSchema as Qt, getNestedAttribute as R, deserializeCacheValue as Rn, manualInputNumberFieldSchema as Rt, resolveEvalDefaultConfig as S, columnDefSchema as Sn, discoveryIssueSchema as St, normalizeScoreDef as T, fileRefSchema as Tn, evalStatItemSchema as Tt, deriveStatusFromChildStatuses as U, readManualInputFile as Un, evalChartAxisSchema as Ut, deriveScopedSummaryFromCases as V, repoFile as Vn, manualInputTextFieldSchema as Vt, runManifestSchema as W, evalExpect as Wn, evalChartBuiltinMetricSchema as Wt, apiCallMetricPlacementSchema as X, evalLog as Xn, evalChartsConfigSchema as Xt, apiCallMetricFormatSchema as Y, evalAssert as Yn, evalChartTypeSchema as Yt, apiCallMetricSchema as Z, getCurrentScope as Zn, cacheDebugKeyEntrySchema as Zt, buildManualInputDescriptor as _, traceSpanErrorSchema as _n, getCaseRowCaseKey as _t, getLastRunStatuses as a, cacheRecordingOpSchema as an, runInEvalScope as ar, llmCallMetricPlacementSchema as at, loadEvalModule as b, traceSpanWarningSchema as bn, caseDetailSchema as bt, loadPersistedRunSnapshots as c, serializedCacheSpanSchema as cn, setScopeCacheContext as cr, llmCallPricingSchema as ct, persistRunState as d, traceAttributeDisplayFormatSchema as dn, getEvalRegistry as dr, resolveApiCallsConfig as dt, cacheEntryWithDebugKeySchema as en, incrementEvalOutput as er, evalColumnOverrideSchema as et, recomputeEvalStatusesInRuns as f, traceAttributeDisplayInputSchema as fn, resolveLlmCallsConfig as ft, resolveArtifactPath as g, traceDisplayInputConfigSchema as gn, buildEvalKey as gt, resolveTracePresentation as h, traceDisplayConfigSchema as hn, buildCaseKey as ht, generateRunId as i, cacheOperationTypeSchema as in, runInEvalRuntimeScope as ir, llmCallMetricFormatSchema as it, updateManualScoreRequestSchema as j, buildTraceTree as jn, runLogPhaseSchema as jt, configReloadStatusSchema as k, runArtifactRefSchema as kn, runLogLevelSchema as kt, nextShortIdFromSnapshots as l, spanCacheOptionsSchema as ln, startEvalBackgroundJob as lr, llmCallsConfigSchema as lt, runTouchesEval as m, traceAttributeDisplaySchema as mn, trialSelectionModeSchema as mt, getTargetEvalKeys as n, cacheListItemSchema as nn, mergeEvalOutput as nr, evalDeriveConfigSchema as nt, getLatestRunInfos as o, cacheRecordingSchema as on, runInExistingEvalScope as or, llmCallMetricSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplayPlacementSchema as pn, runLogsConfigSchema as pt, DEFAULT_LLM_CALLS_CONFIG as q, appendToEvalOutput as qn, evalChartMetricSchema as qt, getTargetEvals as r, cacheModeSchema as rn, nextEvalId as rr, llmCallCostCurrencySchema as rt, loadPersistedRunSnapshot as s, cacheStatusSchema as sn, setEvalOutput as sr, llmCallPricingRateSchema as st, executeRun as t, cacheFileSchema as tn, isInEvalScope as tr, evalColumnsSchema as tt, persistCaseDetail as u, traceCacheRefSchema as un, defineEval as ur, removeDefaultConfigSchema as ut, parseManualInputValues as v, traceSpanKindSchema as vn, getCaseRowEvalKey as vt, buildDeclaredColumnDefs as w, columnKindSchema as wn, evalStatAggregateSchema as wt, parseEvalDiscovery as x, cellValueSchema as xn, caseRowSchema as xt, deriveEvalFreshness as y, traceSpanSchema as yn, assertionFailureSchema as yt, getEvalTitle as z, serializeCacheRecording as zn, manualInputSelectFieldSchema as zt };
7314
+ export { apiCallMetricSchema as $, getCurrentScope as $n, cacheDebugKeyEntrySchema as $t, createRunRequestSchema as A, repoFileRefSchema as An, runLogEntrySchema as At, getNestedAttribute as B, deserializeCacheValue as Bn, manualInputNumberFieldSchema as Bt, loadConfig as C, cellValueSchema as Cn, caseRowSchema as Ct, createFsCacheStore as D, fileRefSchema as Dn, evalStatItemSchema as Dt, validateCharts as E, columnKindSchema as En, evalStatAggregateSchema as Et, extractApiCalls as F, evalSpan as Fn, manualInputBooleanFieldSchema as Ft, deriveStatusFromChildStatuses as G, readManualInputFile as Gn, evalChartAxisSchema as Gt, getEvalDisplayStatus as H, serializeCacheValue as Hn, manualInputSelectOptionSchema as Ht, extractLlmCalls as I, evalTracer as In, manualInputDescriptorSchema as It, DEFAULT_API_CALLS_CONFIG as J, advanceEvalTime as Jn, evalChartConfigSchema as Jt, runManifestSchema as K, evalExpect as Kn, evalChartBuiltinMetricSchema as Kt, simulateLlmCallCost as L, hashCacheKey as Ln, manualInputFieldDescriptorSchema as Lt, sseEnvelopeSchema as M, z$1 as Mn, runLogLocationSchema as Mt, extractCacheEntries as N, buildTraceTree as Nn, runLogPhaseSchema as Nt, configReloadStateSchema as O, jsonCellSchema as On, evalStatsConfigSchema as Ot, extractCacheHits as P, captureEvalSpanError as Pn, scoreTraceSchema as Pt, apiCallMetricPlacementSchema as Q, evalLog as Qn, evalChartsConfigSchema as Qt, simulateTokenAllocation as R, hashCacheKeySync as Rn, manualInputJsonFieldSchema as Rt, resolveEvalDefaultConfig as S, traceSpanWarningSchema as Sn, caseDetailSchema as St, normalizeScoreDef as T, columnFormatSchema as Tn, evalFreshnessStatusSchema as Tt, deriveScopedSummaryFromCases as U, repoFile as Un, manualInputTextFieldSchema as Ut, getEvalTitle as V, serializeCacheRecording as Vn, manualInputSelectFieldSchema as Vt, deriveStatusFromCaseRows as W, manualInputFileValueSchema as Wn, evalChartAggregateSchema as Wt, agentEvalsConfigSchema as X, configureEvalRunLogs as Xn, evalChartTooltipExtraSchema as Xt, DEFAULT_LLM_CALLS_CONFIG as Y, appendToEvalOutput as Yn, evalChartMetricSchema as Yt, apiCallMetricFormatSchema as Z, evalAssert as Zn, evalChartTypeSchema as Zt, buildManualInputDescriptor as _, traceDisplayConfigSchema as _n, buildCaseKey as _t, getLastRunStatuses as a, cacheModeSchema as an, nextEvalId as ar, llmCallCostCurrencySchema as at, loadEvalModule as b, traceSpanKindSchema as bn, getCaseRowEvalKey as bt, loadPersistedRunSnapshots as c, cacheRecordingSchema as cn, runInExistingEvalScope as cr, llmCallMetricSchema as ct, persistRunState as d, spanCacheOptionsSchema as dn, startEvalBackgroundJob as dr, llmCallsConfigSchema as dt, cacheDebugKeyFileSchema as en, getEvalCaseInput as er, apiCallsConfigSchema as et, recomputeEvalStatusesInRuns as f, traceCacheRefSchema as fn, defineEval as fr, removeDefaultConfigSchema as ft, resolveArtifactPath as g, traceAttributeDisplaySchema as gn, trialSelectionModeSchema as gt, resolveTracePresentation as h, traceAttributeDisplayPlacementSchema as hn, runLogsConfigSchema as ht, generateRunId as i, cacheListItemSchema as in, mergeEvalOutput as ir, evalDeriveConfigSchema as it, updateManualScoreRequestSchema as j, runArtifactRefSchema as jn, runLogLevelSchema as jt, configReloadStatusSchema as k, numberDisplayOptionsSchema as kn, evalSummarySchema as kt, nextShortIdFromSnapshots as l, cacheStatusSchema as ln, setEvalOutput as lr, llmCallPricingRateSchema as lt, runTouchesEval as m, traceAttributeDisplayInputSchema as mn, resolveLlmCallsConfig as mt, getTargetEvalKeys as n, cacheEntryWithDebugKeySchema as nn, incrementEvalOutput as nr, evalColumnOverrideSchema as nt, getLatestRunInfos as o, cacheOperationTypeSchema as on, runInEvalRuntimeScope as or, llmCallMetricFormatSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplayFormatSchema as pn, getEvalRegistry as pr, resolveApiCallsConfig as pt, runSummarySchema as q, EvalAssertionError as qn, evalChartColorSchema as qt, getTargetEvals as r, cacheFileSchema as rn, isInEvalScope as rr, evalColumnsSchema as rt, loadPersistedRunSnapshot as s, cacheRecordingOpSchema as sn, runInEvalScope as sr, llmCallMetricPlacementSchema as st, executeRun as t, cacheEntrySchema as tn, getEvalStartTime as tr, defaultConfigKeySchema as tt, persistCaseDetail as u, serializedCacheSpanSchema as un, setScopeCacheContext as ur, llmCallPricingSchema as ut, parseManualInputValues as v, traceDisplayInputConfigSchema as vn, buildEvalKey as vt, buildDeclaredColumnDefs as w, columnDefSchema as wn, discoveryIssueSchema as wt, parseEvalDiscovery as x, traceSpanSchema as xn, assertionFailureSchema as xt, deriveEvalFreshness as y, traceSpanErrorSchema as yn, getCaseRowCaseKey as yt, applyDerivedCallAttributes as z, deserializeCacheRecording as zn, manualInputMultilineFieldSchema as zt };
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-Bnm1nz0U.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-Dkol2ukD.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-CMPmuY7W.mjs";
2
- import "./src-gZm9nyTp.mjs";
1
+ import { n as createRunner } from "./cli-huuJbDNb.mjs";
2
+ import "./src-1Qvuh0NH.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -0,0 +1,3 @@
1
+ import "./runOrchestration-ZpN7xty_.mjs";
2
+ import "./cli-huuJbDNb.mjs";
3
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.31.0",
3
+ "version": "0.33.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"