@ls-stack/agent-eval 0.30.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as defaultConfigKeySchema, $n as incrementEvalOutput, $t as cacheEntryWithDebugKeySchema, A as createRunRequestSchema, An as buildTraceTree, At as runLogPhaseSchema, B as getEvalDisplayStatus, Bn as repoFile, Bt as manualInputTextFieldSchema, Cn as columnKindSchema, Ct as evalStatAggregateSchema, Dn as repoFileRefSchema, Dt as runLogEntrySchema, En as numberDisplayOptionsSchema, Et as evalSummarySchema, F as extractApiCalls, Fn as hashCacheKeySync, Ft as manualInputJsonFieldSchema, G as runSummarySchema, Gn as advanceEvalTime, Gt as evalChartConfigSchema, H as deriveStatusFromCaseRows, Hn as readManualInputFile, Ht as evalChartAxisSchema, I as extractLlmCalls, In as deserializeCacheRecording, It as manualInputMultilineFieldSchema, J as agentEvalsConfigSchema, Jn as evalAssert, Jt as evalChartTypeSchema, K as DEFAULT_API_CALLS_CONFIG, Kn as appendToEvalOutput, Kt as evalChartMetricSchema, L as applyDerivedCallAttributes, Ln as deserializeCacheValue, Lt as manualInputNumberFieldSchema, M as sseEnvelopeSchema, Mn as evalSpan, Mt as manualInputBooleanFieldSchema, N as extractCacheEntries, Nn as evalTracer, Nt as manualInputDescriptorSchema, O as configReloadStateSchema, On as runArtifactRefSchema, Ot as runLogLevelSchema, P as extractCacheHits, Pn as hashCacheKey, Pt as manualInputFieldDescriptorSchema, Q as apiCallsConfigSchema, Qn as getEvalStartTime, Qt as cacheEntrySchema, R as getNestedAttribute, Rn as serializeCacheRecording, Rt as manualInputSelectFieldSchema, Sn as columnFormatSchema, St as evalFreshnessStatusSchema, Tn as jsonCellSchema, Tt as evalStatsConfigSchema, U as deriveStatusFromChildStatuses, Un as evalExpect, Ut as evalChartBuiltinMetricSchema, V as deriveScopedSummaryFromCases, Vn as manualInputFileValueSchema, Vt as evalChartAggregateSchema, W as runManifestSchema, Wn as EvalAssertionError, Wt as evalChartColorSchema, X as apiCallMetricPlacementSchema, Xn as getCurrentScope, Xt as cacheDebugKeyEntrySchema, Y as apiCallMetricFormatSchema, Yn as evalLog, Yt as evalChartsConfigSchema, Z as apiCallMetricSchema, Zn as getEvalCaseInput, Zt as cacheDebugKeyFileSchema, _n as traceSpanKindSchema, _t as getCaseRowEvalKey, an as cacheRecordingSchema, ar as runInExistingEvalScope, at as llmCallMetricSchema, bn as cellValueSchema, bt as caseRowSchema, cn as spanCacheOptionsSchema, cr as startEvalBackgroundJob, ct as llmCallsConfigSchema, dn as traceAttributeDisplayInputSchema, dt as resolveLlmCallsConfig, en as cacheFileSchema, er as isInEvalScope, et as evalColumnOverrideSchema, fn as traceAttributeDisplayPlacementSchema, ft as runLogsConfigSchema, gn as traceSpanErrorSchema, gt as getCaseRowCaseKey, hn as traceDisplayInputConfigSchema, ht as buildEvalKey, in as cacheRecordingOpSchema, ir as runInEvalScope, it as llmCallMetricPlacementSchema, j as updateManualScoreRequestSchema, jn as captureEvalSpanError, jt as scoreTraceSchema, k as configReloadStatusSchema, kn as z, kt as runLogLocationSchema, ln as traceCacheRefSchema, lr as defineEval, lt as removeDefaultConfigSchema, mn as traceDisplayConfigSchema, mt as buildCaseKey, nn as cacheModeSchema, nr as nextEvalId, nt as evalDeriveConfigSchema, on as cacheStatusSchema, or as setEvalOutput, ot as llmCallPricingRateSchema, pn as traceAttributeDisplaySchema, pt as trialSelectionModeSchema, q as DEFAULT_LLM_CALLS_CONFIG, qt as evalChartTooltipExtraSchema, rn as cacheOperationTypeSchema, rr as runInEvalRuntimeScope, rt as llmCallMetricFormatSchema, sn as serializedCacheSpanSchema, sr as setScopeCacheContext, st as llmCallPricingSchema, tn as cacheListItemSchema, tr as mergeEvalOutput, tt as evalColumnsSchema, un as traceAttributeDisplayFormatSchema, ur as getEvalRegistry, ut as resolveApiCallsConfig, vn as traceSpanSchema, vt as assertionFailureSchema, wn as fileRefSchema, wt as evalStatItemSchema, xn as columnDefSchema, xt as discoveryIssueSchema, yn as traceSpanWarningSchema, yt as caseDetailSchema, z as getEvalTitle, zn as serializeCacheValue, zt as manualInputSelectOptionSchema } from "./runOrchestration-CO3Vf0cQ.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CiFOqMwS.mjs";
3
- import "./src-BiPLv9ya.mjs";
4
- export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
1
+ import { $ as apiCallMetricSchema, $n as getCurrentScope, $t as cacheDebugKeyEntrySchema, A as createRunRequestSchema, An as repoFileRefSchema, At as runLogEntrySchema, B as getNestedAttribute, Bn as deserializeCacheValue, Bt as manualInputNumberFieldSchema, Cn as cellValueSchema, Ct as caseRowSchema, Dn as fileRefSchema, Dt as evalStatItemSchema, En as columnKindSchema, Et as evalStatAggregateSchema, F as extractApiCalls, Fn as evalSpan, Ft as manualInputBooleanFieldSchema, G as deriveStatusFromChildStatuses, Gn as readManualInputFile, Gt as evalChartAxisSchema, H as getEvalDisplayStatus, Hn as serializeCacheValue, Ht as manualInputSelectOptionSchema, I as extractLlmCalls, In as evalTracer, It as manualInputDescriptorSchema, J as DEFAULT_API_CALLS_CONFIG, Jn as advanceEvalTime, Jt as evalChartConfigSchema, K as runManifestSchema, Kn as evalExpect, Kt as evalChartBuiltinMetricSchema, L as simulateLlmCallCost, Ln as hashCacheKey, Lt as manualInputFieldDescriptorSchema, M as sseEnvelopeSchema, Mn as z, Mt as runLogLocationSchema, N as extractCacheEntries, Nn as buildTraceTree, Nt as runLogPhaseSchema, O as configReloadStateSchema, On as jsonCellSchema, Ot as evalStatsConfigSchema, P as extractCacheHits, Pn as captureEvalSpanError, Pt as scoreTraceSchema, Q as apiCallMetricPlacementSchema, Qn as evalLog, Qt as evalChartsConfigSchema, R as simulateTokenAllocation, Rn as hashCacheKeySync, Rt as manualInputJsonFieldSchema, Sn as traceSpanWarningSchema, St as caseDetailSchema, Tn as columnFormatSchema, Tt as evalFreshnessStatusSchema, U as deriveScopedSummaryFromCases, Un as repoFile, Ut as manualInputTextFieldSchema, V as getEvalTitle, Vn as serializeCacheRecording, Vt as manualInputSelectFieldSchema, W as deriveStatusFromCaseRows, Wn as manualInputFileValueSchema, Wt as evalChartAggregateSchema, X as agentEvalsConfigSchema, Xt as evalChartTooltipExtraSchema, Y as DEFAULT_LLM_CALLS_CONFIG, Yn as appendToEvalOutput, Yt as evalChartMetricSchema, Z as apiCallMetricFormatSchema, Zn as evalAssert, Zt as evalChartTypeSchema, _n as traceDisplayConfigSchema, _t as buildCaseKey, an as cacheModeSchema, ar as nextEvalId, at as llmCallCostCurrencySchema, bn as traceSpanKindSchema, bt as getCaseRowEvalKey, cn as cacheRecordingSchema, cr as runInExistingEvalScope, ct as llmCallMetricSchema, dn as spanCacheOptionsSchema, dr as startEvalBackgroundJob, dt as llmCallsConfigSchema, en as cacheDebugKeyFileSchema, er as getEvalCaseInput, et as apiCallsConfigSchema, fn as traceCacheRefSchema, fr as defineEval, ft as removeDefaultConfigSchema, gn as traceAttributeDisplaySchema, gt as trialSelectionModeSchema, hn as traceAttributeDisplayPlacementSchema, ht as runLogsConfigSchema, in as cacheListItemSchema, ir as mergeEvalOutput, it as evalDeriveConfigSchema, j as updateManualScoreRequestSchema, jn as runArtifactRefSchema, jt as runLogLevelSchema, k as configReloadStatusSchema, kn as numberDisplayOptionsSchema, kt as evalSummarySchema, ln as cacheStatusSchema, lr as setEvalOutput, lt as llmCallPricingRateSchema, mn as traceAttributeDisplayInputSchema, mt as resolveLlmCallsConfig, nn as cacheEntryWithDebugKeySchema, nr as incrementEvalOutput, nt as evalColumnOverrideSchema, on as cacheOperationTypeSchema, or as runInEvalRuntimeScope, ot as llmCallMetricFormatSchema, pn as traceAttributeDisplayFormatSchema, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, qn as EvalAssertionError, qt as evalChartColorSchema, rn as cacheFileSchema, rr as isInEvalScope, rt as evalColumnsSchema, sn as cacheRecordingOpSchema, sr as runInEvalScope, st as llmCallMetricPlacementSchema, tn as cacheEntrySchema, tr as getEvalStartTime, tt as defaultConfigKeySchema, un as serializedCacheSpanSchema, ur as setScopeCacheContext, ut as llmCallPricingSchema, vn as traceDisplayInputConfigSchema, vt as buildEvalKey, wn as columnDefSchema, wt as discoveryIssueSchema, xn as traceSpanSchema, xt as assertionFailureSchema, yn as traceSpanErrorSchema, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes, zn as deserializeCacheRecording, zt as manualInputMultilineFieldSchema } from "./runOrchestration-ZpN7xty_.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-huuJbDNb.mjs";
3
+ import "./src-1Qvuh0NH.mjs";
4
+ export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, G as runSummarySchema, Nt as manualInputDescriptorSchema, Tt as evalStatsConfigSchema, W as runManifestSchema, Yt as evalChartsConfigSchema, ht as buildEvalKey, qn as configureEvalRunLogs, r as getTargetEvals$1, t as executeRun, x as parseEvalDiscovery, xn as columnDefSchema } from "./runOrchestration-CO3Vf0cQ.mjs";
1
+ import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, It as manualInputDescriptorSchema, K as runManifestSchema, Ot as evalStatsConfigSchema, Qt as evalChartsConfigSchema, Xn as configureEvalRunLogs, q as runSummarySchema, r as getTargetEvals$1, t as executeRun, vt as buildEvalKey, wn as columnDefSchema, x as parseEvalDiscovery } from "./runOrchestration-ZpN7xty_.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";
@@ -3316,6 +3316,20 @@ const llmCallPricingSchema = llmCallPricingRateSchema.extend({
3316
3316
  */
3317
3317
  providers: z.record(z.string().min(1), llmCallPricingRateSchema).optional()
3318
3318
  });
3319
+ /**
3320
+ * Schema for extra currencies displayed in the LLM calls breakdown table.
3321
+ * Costs are still derived in USD, then multiplied by `usdToCurrencyRate`.
3322
+ */
3323
+ const llmCallCostCurrencySchema = z.object({
3324
+ /** Currency code or short display token, such as `BRL` or `EUR`. */
3325
+ code: z.string().min(1),
3326
+ /** Optional display label for tooltips and future UI surfaces. */
3327
+ label: z.string().min(1).optional(),
3328
+ /** Multiplier used to convert one USD to this currency. */
3329
+ usdToCurrencyRate: z.number().nonnegative(),
3330
+ /** Number presentation options for the converted value. */
3331
+ numberFormat: numberDisplayOptionsSchema.optional()
3332
+ });
3319
3333
  /** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
3320
3334
  const llmCallsConfigSchema = z.object({
3321
3335
  /** Span kinds treated as LLM calls. Defaults to `['llm']`. */
@@ -3357,6 +3371,11 @@ const llmCallsConfigSchema = z.object({
3357
3371
  * counts. Built-in LLM cost fields are only derived from this registry.
3358
3372
  */
3359
3373
  pricing: z.record(z.string().min(1), llmCallPricingSchema).optional(),
3374
+ /**
3375
+ * Additional currencies shown as columns in the LLM calls breakdown table.
3376
+ * These do not change persisted `costUsd` outputs, stats, or charts.
3377
+ */
3378
+ costCurrencies: z.array(llmCallCostCurrencySchema).optional(),
3360
3379
  /** Custom user-defined metrics surfaced on each LLM call. */
3361
3380
  metrics: z.array(llmCallMetricSchema).optional()
3362
3381
  });
@@ -3422,7 +3441,8 @@ const DEFAULT_LLM_CALLS_CONFIG = {
3422
3441
  },
3423
3442
  derivedAttributes: [],
3424
3443
  metrics: [],
3425
- pricing: []
3444
+ pricing: [],
3445
+ costCurrencies: []
3426
3446
  };
3427
3447
  /** Default API-calls config the UI uses before the workspace fetch resolves. */
3428
3448
  const DEFAULT_API_CALLS_CONFIG = {
@@ -3502,6 +3522,14 @@ function resolveLlmCallPricingEntries(model, pricing) {
3502
3522
  });
3503
3523
  return entries;
3504
3524
  }
3525
+ function resolveLlmCallCostCurrency(currency) {
3526
+ return {
3527
+ code: currency.code,
3528
+ label: currency.label,
3529
+ usdToCurrencyRate: currency.usdToCurrencyRate,
3530
+ numberFormat: currency.numberFormat
3531
+ };
3532
+ }
3505
3533
  /**
3506
3534
  * Resolve the user-authored LLM-calls config to a fully-defaulted shape used
3507
3535
  * by the UI to derive the LLM calls tab.
@@ -3513,6 +3541,8 @@ function resolveLlmCallPricingEntries(model, pricing) {
3513
3541
  * - Missing `metrics[].placements` defaults to `['body']`.
3514
3542
  * - Missing `pricing` defaults to an empty registry; built-in costs are only
3515
3543
  * derived from configured model-keyed pricing and token counts.
3544
+ * - Missing `costCurrencies` defaults to an empty list; extra currencies only
3545
+ * affect the expanded LLM calls breakdown table.
3516
3546
  */
3517
3547
  function resolveLlmCallsConfig(input) {
3518
3548
  return {
@@ -3523,7 +3553,8 @@ function resolveLlmCallsConfig(input) {
3523
3553
  },
3524
3554
  derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
3525
3555
  metrics: (input?.metrics ?? []).map(resolveLlmCallMetric),
3526
- pricing: Object.entries(input?.pricing ?? {}).flatMap(([model, pricing]) => resolveLlmCallPricingEntries(model, pricing))
3556
+ pricing: Object.entries(input?.pricing ?? {}).flatMap(([model, pricing]) => resolveLlmCallPricingEntries(model, pricing)),
3557
+ costCurrencies: (input?.costCurrencies ?? []).map(resolveLlmCallCostCurrency)
3527
3558
  };
3528
3559
  }
3529
3560
  /**
@@ -3964,6 +3995,99 @@ function computeTotalCost({ inputTokens, inputCostUsd, outputTokens, outputCostU
3964
3995
  if (hasCost) return total;
3965
3996
  return hasReportedTokens ? 0 : null;
3966
3997
  }
3998
+ /**
3999
+ * Recompute the LLM-call cost breakdown for a hypothetical billing scenario,
4000
+ * using the call's recorded token counts and the resolved pricing registry.
4001
+ *
4002
+ * The `actual` scenario returns the costs already stored on `entry`. Other
4003
+ * scenarios re-derive each cost component from `pricing` so users can compare
4004
+ * what the same usage would have cost under different cache strategies. When
4005
+ * pricing is missing for the model/provider, simulated cost components fall
4006
+ * back to `null` exactly like the original extractor.
4007
+ */
4008
+ function simulateLlmCallCost({ entry, pricing, scenario }) {
4009
+ if (scenario === "actual") return {
4010
+ inputCostUsd: entry.inputCostUsd,
4011
+ outputCostUsd: entry.outputCostUsd,
4012
+ cachedInputCostUsd: entry.cachedInputCostUsd,
4013
+ cacheCreationInputCostUsd: entry.cacheCreationInputCostUsd,
4014
+ reasoningCostUsd: entry.reasoningCostUsd,
4015
+ totalCostUsd: entry.costUsd
4016
+ };
4017
+ const pricingEntry = pickPricingEntry({
4018
+ pricing,
4019
+ model: entry.model,
4020
+ provider: entry.provider
4021
+ });
4022
+ const outputCostUsd = computeTokenCost(entry.outputTokens, pricingEntry?.outputUsdPerMillion);
4023
+ const reasoningCostUsd = computeTokenCost(entry.reasoningTokens, pricingEntry?.reasoningUsdPerMillion);
4024
+ const simulatedTokens = simulateTokenAllocation({
4025
+ entry,
4026
+ scenario
4027
+ });
4028
+ const writeRate = scenario === "withExtendedCachingWrite" ? pricingEntry?.cacheCreationInput1hUsdPerMillion : pricingEntry?.cacheCreationInputUsdPerMillion;
4029
+ const inputCostUsd = computeTokenCost(simulatedTokens.baseInputTokens, pricingEntry?.inputUsdPerMillion);
4030
+ const cachedInputCostUsd = computeTokenCost(simulatedTokens.cachedInputTokens, pricingEntry?.cachedInputUsdPerMillion);
4031
+ const cacheCreationInputCostUsd = computeTokenCost(simulatedTokens.cacheCreationInputTokens, writeRate);
4032
+ return {
4033
+ inputCostUsd,
4034
+ outputCostUsd,
4035
+ cachedInputCostUsd,
4036
+ cacheCreationInputCostUsd,
4037
+ reasoningCostUsd,
4038
+ totalCostUsd: computeTotalCost({
4039
+ inputTokens: simulatedTokens.baseInputTokens,
4040
+ inputCostUsd,
4041
+ outputTokens: entry.outputTokens,
4042
+ outputCostUsd,
4043
+ cachedInputTokens: simulatedTokens.cachedInputTokens,
4044
+ cachedInputCostUsd,
4045
+ cacheCreationInputTokens: simulatedTokens.cacheCreationInputTokens,
4046
+ cacheCreationInputCostUsd,
4047
+ reasoningTokens: entry.reasoningTokens,
4048
+ reasoningCostUsd
4049
+ })
4050
+ };
4051
+ }
4052
+ /**
4053
+ * Project the call's recorded token allocation onto a hypothetical billing
4054
+ * scenario. Cacheable tokens shift between rows so the breakdown reflects the
4055
+ * simulated billing model: `noCache` folds reads/writes into base input,
4056
+ * `withBaseCaching` (warmed) treats every cacheable token as a cache read, and
4057
+ * the first-call write scenarios treat every cacheable token as a cache write.
4058
+ *
4059
+ * The returned counts are what the UI renders on each row and what
4060
+ * {@link simulateLlmCallCost} prices, so display and totals never drift.
4061
+ */
4062
+ function simulateTokenAllocation({ entry, scenario }) {
4063
+ const baseInputTokens = computeBaseInputTokens({
4064
+ inputTokens: entry.inputTokens,
4065
+ cachedInputTokens: entry.cachedInputTokens,
4066
+ cacheCreationInputTokens: entry.cacheCreationInputTokens
4067
+ });
4068
+ if (scenario === "actual" || entry.inputTokens === null) return {
4069
+ baseInputTokens,
4070
+ cachedInputTokens: entry.cachedInputTokens,
4071
+ cacheCreationInputTokens: entry.cacheCreationInputTokens
4072
+ };
4073
+ const cacheableTokens = (entry.cachedInputTokens ?? 0) + (entry.cacheCreationInputTokens ?? 0);
4074
+ const hasCacheable = cacheableTokens > 0;
4075
+ if (scenario === "noCache") return {
4076
+ baseInputTokens: entry.inputTokens,
4077
+ cachedInputTokens: 0,
4078
+ cacheCreationInputTokens: 0
4079
+ };
4080
+ if (scenario === "withBaseCaching") return {
4081
+ baseInputTokens: hasCacheable ? baseInputTokens : 0,
4082
+ cachedInputTokens: hasCacheable ? cacheableTokens : entry.inputTokens,
4083
+ cacheCreationInputTokens: 0
4084
+ };
4085
+ return {
4086
+ baseInputTokens: hasCacheable ? baseInputTokens : 0,
4087
+ cachedInputTokens: 0,
4088
+ cacheCreationInputTokens: hasCacheable ? cacheableTokens : entry.inputTokens
4089
+ };
4090
+ }
3967
4091
  function computeDurationMs$1(span) {
3968
4092
  if (span.endedAt === null) return null;
3969
4093
  const started = Date.parse(span.startedAt);
@@ -7187,4 +7311,4 @@ function toLastRunStatus(status) {
7187
7311
  return status === "pending" ? null : status;
7188
7312
  }
7189
7313
  //#endregion
7190
- export { defaultConfigKeySchema as $, incrementEvalOutput as $n, cacheEntryWithDebugKeySchema as $t, createRunRequestSchema as A, buildTraceTree as An, runLogPhaseSchema as At, getEvalDisplayStatus as B, repoFile as Bn, manualInputTextFieldSchema as Bt, loadConfig as C, columnKindSchema as Cn, evalStatAggregateSchema as Ct, createFsCacheStore as D, repoFileRefSchema as Dn, runLogEntrySchema as Dt, validateCharts as E, numberDisplayOptionsSchema as En, evalSummarySchema as Et, extractApiCalls as F, hashCacheKeySync as Fn, manualInputJsonFieldSchema as Ft, runSummarySchema as G, advanceEvalTime as Gn, evalChartConfigSchema as Gt, deriveStatusFromCaseRows as H, readManualInputFile as Hn, evalChartAxisSchema as Ht, extractLlmCalls as I, deserializeCacheRecording as In, manualInputMultilineFieldSchema as It, agentEvalsConfigSchema as J, evalAssert as Jn, evalChartTypeSchema as Jt, DEFAULT_API_CALLS_CONFIG as K, appendToEvalOutput as Kn, evalChartMetricSchema as Kt, applyDerivedCallAttributes as L, deserializeCacheValue as Ln, manualInputNumberFieldSchema as Lt, sseEnvelopeSchema as M, evalSpan as Mn, manualInputBooleanFieldSchema as Mt, extractCacheEntries as N, evalTracer as Nn, manualInputDescriptorSchema as Nt, configReloadStateSchema as O, runArtifactRefSchema as On, runLogLevelSchema as Ot, extractCacheHits as P, hashCacheKey as Pn, manualInputFieldDescriptorSchema as Pt, apiCallsConfigSchema as Q, getEvalStartTime as Qn, cacheEntrySchema as Qt, getNestedAttribute as R, serializeCacheRecording as Rn, manualInputSelectFieldSchema as Rt, resolveEvalDefaultConfig as S, columnFormatSchema as Sn, evalFreshnessStatusSchema as St, normalizeScoreDef as T, jsonCellSchema as Tn, evalStatsConfigSchema as Tt, deriveStatusFromChildStatuses as U, evalExpect as Un, evalChartBuiltinMetricSchema as Ut, deriveScopedSummaryFromCases as V, manualInputFileValueSchema as Vn, evalChartAggregateSchema as Vt, runManifestSchema as W, EvalAssertionError as Wn, evalChartColorSchema as Wt, apiCallMetricPlacementSchema as X, getCurrentScope as Xn, cacheDebugKeyEntrySchema as Xt, apiCallMetricFormatSchema as Y, evalLog as Yn, evalChartsConfigSchema as Yt, apiCallMetricSchema as Z, getEvalCaseInput as Zn, cacheDebugKeyFileSchema as Zt, buildManualInputDescriptor as _, traceSpanKindSchema as _n, getCaseRowEvalKey as _t, getLastRunStatuses as a, cacheRecordingSchema as an, runInExistingEvalScope as ar, llmCallMetricSchema as at, loadEvalModule as b, cellValueSchema as bn, caseRowSchema as bt, loadPersistedRunSnapshots as c, spanCacheOptionsSchema as cn, startEvalBackgroundJob as cr, llmCallsConfigSchema as ct, persistRunState as d, traceAttributeDisplayInputSchema as dn, resolveLlmCallsConfig as dt, cacheFileSchema as en, isInEvalScope as er, evalColumnOverrideSchema as et, recomputeEvalStatusesInRuns as f, traceAttributeDisplayPlacementSchema as fn, runLogsConfigSchema as ft, resolveArtifactPath as g, traceSpanErrorSchema as gn, getCaseRowCaseKey as gt, resolveTracePresentation as h, traceDisplayInputConfigSchema as hn, buildEvalKey as ht, generateRunId as i, cacheRecordingOpSchema as in, runInEvalScope as ir, llmCallMetricPlacementSchema as it, updateManualScoreRequestSchema as j, captureEvalSpanError as jn, scoreTraceSchema as jt, configReloadStatusSchema as k, z$1 as kn, runLogLocationSchema as kt, nextShortIdFromSnapshots as l, traceCacheRefSchema as ln, defineEval as lr, removeDefaultConfigSchema as lt, runTouchesEval as m, traceDisplayConfigSchema as mn, buildCaseKey as mt, getTargetEvalKeys as n, cacheModeSchema as nn, nextEvalId as nr, evalDeriveConfigSchema as nt, getLatestRunInfos as o, cacheStatusSchema as on, setEvalOutput as or, llmCallPricingRateSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplaySchema as pn, trialSelectionModeSchema as pt, DEFAULT_LLM_CALLS_CONFIG as q, configureEvalRunLogs as qn, evalChartTooltipExtraSchema as qt, getTargetEvals as r, cacheOperationTypeSchema as rn, runInEvalRuntimeScope as rr, llmCallMetricFormatSchema as rt, loadPersistedRunSnapshot as s, serializedCacheSpanSchema as sn, setScopeCacheContext as sr, llmCallPricingSchema as st, executeRun as t, cacheListItemSchema as tn, mergeEvalOutput as tr, evalColumnsSchema as tt, persistCaseDetail as u, traceAttributeDisplayFormatSchema as un, getEvalRegistry as ur, resolveApiCallsConfig as ut, parseManualInputValues as v, traceSpanSchema as vn, assertionFailureSchema as vt, buildDeclaredColumnDefs as w, fileRefSchema as wn, evalStatItemSchema as wt, parseEvalDiscovery as x, columnDefSchema as xn, discoveryIssueSchema as xt, deriveEvalFreshness as y, traceSpanWarningSchema as yn, caseDetailSchema as yt, getEvalTitle as z, serializeCacheValue as zn, manualInputSelectOptionSchema as zt };
7314
+ export { apiCallMetricSchema as $, getCurrentScope as $n, cacheDebugKeyEntrySchema as $t, createRunRequestSchema as A, repoFileRefSchema as An, runLogEntrySchema as At, getNestedAttribute as B, deserializeCacheValue as Bn, manualInputNumberFieldSchema as Bt, loadConfig as C, cellValueSchema as Cn, caseRowSchema as Ct, createFsCacheStore as D, fileRefSchema as Dn, evalStatItemSchema as Dt, validateCharts as E, columnKindSchema as En, evalStatAggregateSchema as Et, extractApiCalls as F, evalSpan as Fn, manualInputBooleanFieldSchema as Ft, deriveStatusFromChildStatuses as G, readManualInputFile as Gn, evalChartAxisSchema as Gt, getEvalDisplayStatus as H, serializeCacheValue as Hn, manualInputSelectOptionSchema as Ht, extractLlmCalls as I, evalTracer as In, manualInputDescriptorSchema as It, DEFAULT_API_CALLS_CONFIG as J, advanceEvalTime as Jn, evalChartConfigSchema as Jt, runManifestSchema as K, evalExpect as Kn, evalChartBuiltinMetricSchema as Kt, simulateLlmCallCost as L, hashCacheKey as Ln, manualInputFieldDescriptorSchema as Lt, sseEnvelopeSchema as M, z$1 as Mn, runLogLocationSchema as Mt, extractCacheEntries as N, buildTraceTree as Nn, runLogPhaseSchema as Nt, configReloadStateSchema as O, jsonCellSchema as On, evalStatsConfigSchema as Ot, extractCacheHits as P, captureEvalSpanError as Pn, scoreTraceSchema as Pt, apiCallMetricPlacementSchema as Q, evalLog as Qn, evalChartsConfigSchema as Qt, simulateTokenAllocation as R, hashCacheKeySync as Rn, manualInputJsonFieldSchema as Rt, resolveEvalDefaultConfig as S, traceSpanWarningSchema as Sn, caseDetailSchema as St, normalizeScoreDef as T, columnFormatSchema as Tn, evalFreshnessStatusSchema as Tt, deriveScopedSummaryFromCases as U, repoFile as Un, manualInputTextFieldSchema as Ut, getEvalTitle as V, serializeCacheRecording as Vn, manualInputSelectFieldSchema as Vt, deriveStatusFromCaseRows as W, manualInputFileValueSchema as Wn, evalChartAggregateSchema as Wt, agentEvalsConfigSchema as X, configureEvalRunLogs as Xn, evalChartTooltipExtraSchema as Xt, DEFAULT_LLM_CALLS_CONFIG as Y, appendToEvalOutput as Yn, evalChartMetricSchema as Yt, apiCallMetricFormatSchema as Z, evalAssert as Zn, evalChartTypeSchema as Zt, buildManualInputDescriptor as _, traceDisplayConfigSchema as _n, buildCaseKey as _t, getLastRunStatuses as a, cacheModeSchema as an, nextEvalId as ar, llmCallCostCurrencySchema as at, loadEvalModule as b, traceSpanKindSchema as bn, getCaseRowEvalKey as bt, loadPersistedRunSnapshots as c, cacheRecordingSchema as cn, runInExistingEvalScope as cr, llmCallMetricSchema as ct, persistRunState as d, spanCacheOptionsSchema as dn, startEvalBackgroundJob as dr, llmCallsConfigSchema as dt, cacheDebugKeyFileSchema as en, getEvalCaseInput as er, apiCallsConfigSchema as et, recomputeEvalStatusesInRuns as f, traceCacheRefSchema as fn, defineEval as fr, removeDefaultConfigSchema as ft, resolveArtifactPath as g, traceAttributeDisplaySchema as gn, trialSelectionModeSchema as gt, resolveTracePresentation as h, traceAttributeDisplayPlacementSchema as hn, runLogsConfigSchema as ht, generateRunId as i, cacheListItemSchema as in, mergeEvalOutput as ir, evalDeriveConfigSchema as it, updateManualScoreRequestSchema as j, runArtifactRefSchema as jn, runLogLevelSchema as jt, configReloadStatusSchema as k, numberDisplayOptionsSchema as kn, evalSummarySchema as kt, nextShortIdFromSnapshots as l, cacheStatusSchema as ln, setEvalOutput as lr, llmCallPricingRateSchema as lt, runTouchesEval as m, traceAttributeDisplayInputSchema as mn, resolveLlmCallsConfig as mt, getTargetEvalKeys as n, cacheEntryWithDebugKeySchema as nn, incrementEvalOutput as nr, evalColumnOverrideSchema as nt, getLatestRunInfos as o, cacheOperationTypeSchema as on, runInEvalRuntimeScope as or, llmCallMetricFormatSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplayFormatSchema as pn, getEvalRegistry as pr, resolveApiCallsConfig as pt, runSummarySchema as q, EvalAssertionError as qn, evalChartColorSchema as qt, getTargetEvals as r, cacheFileSchema as rn, isInEvalScope as rr, evalColumnsSchema as rt, loadPersistedRunSnapshot as s, cacheRecordingOpSchema as sn, runInEvalScope as sr, llmCallMetricPlacementSchema as st, executeRun as t, cacheEntrySchema as tn, getEvalStartTime as tr, defaultConfigKeySchema as tt, persistCaseDetail as u, serializedCacheSpanSchema as un, setScopeCacheContext as ur, llmCallPricingSchema as ut, parseManualInputValues as v, traceDisplayInputConfigSchema as vn, buildEvalKey as vt, buildDeclaredColumnDefs as w, columnDefSchema as wn, discoveryIssueSchema as wt, parseEvalDiscovery as x, traceSpanSchema as xn, assertionFailureSchema as xt, deriveEvalFreshness as y, traceSpanErrorSchema as yn, getCaseRowCaseKey as yt, applyDerivedCallAttributes as z, deserializeCacheRecording as zn, manualInputMultilineFieldSchema as zt };
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-CXHkf7ih.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-Dkol2ukD.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-CiFOqMwS.mjs";
2
- import "./src-BiPLv9ya.mjs";
1
+ import { n as createRunner } from "./cli-huuJbDNb.mjs";
2
+ import "./src-1Qvuh0NH.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -0,0 +1,3 @@
1
+ import "./runOrchestration-ZpN7xty_.mjs";
2
+ import "./cli-huuJbDNb.mjs";
3
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.30.0",
3
+ "version": "0.32.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -31,8 +31,8 @@
31
31
  "devDependencies": {
32
32
  "@types/node": "^24.7.2",
33
33
  "typescript": "^5.9.2",
34
- "@agent-evals/runner": "0.0.1",
35
34
  "@agent-evals/sdk": "0.0.1",
35
+ "@agent-evals/runner": "0.0.1",
36
36
  "@agent-evals/shared": "0.0.1"
37
37
  },
38
38
  "scripts": {
@@ -341,9 +341,10 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
341
341
  tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter,
342
342
  override `attributes.<field>` for non-default primitive span shapes, configure
343
343
  model-keyed `pricing` to derive USD costs from token counts, with nested
344
- `providers` entries for provider-specific rates, add `derivedAttributes` to
345
- persist computed values back onto matching LLM spans before trace consumers
346
- run, and add entries to `metrics` to surface arbitrary user metrics
344
+ `providers` entries for provider-specific rates, add `costCurrencies` to show
345
+ converted cost columns in the expanded breakdown table only, add
346
+ `derivedAttributes` to persist computed values back onto matching LLM spans
347
+ before trace consumers run, and add entries to `metrics` to surface arbitrary user metrics
347
348
  (`format: 'string' | 'number' | 'duration' | 'json' | 'boolean'`,
348
349
  `placements: ['header' | 'body']`). `derivedAttributes` can be a keyed map
349
350
  for one-off fields or one callback that returns multiple path/value pairs.