@ls-stack/agent-eval 0.31.0 → 0.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-BrSMRTpy.mjs → app-Dc6vvHRL.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-BNQnbfi0.js +118 -0
- package/dist/apps/web/dist/assets/index-BPMMRktE.css +1 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-CMPmuY7W.mjs → cli-huuJbDNb.mjs} +3 -3
- package/dist/index.d.mts +119 -41
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-CAyVXPFz.mjs → runOrchestration-ZpN7xty_.mjs} +94 -1
- package/dist/{runner-CmpWwCe1.mjs → runner-BPXPvinB.mjs} +1 -1
- package/dist/{runner-Bnm1nz0U.mjs → runner-Dkol2ukD.mjs} +2 -2
- package/dist/src-1Qvuh0NH.mjs +3 -0
- package/package.json +2 -2
- package/dist/apps/web/dist/assets/index-CPcVyFRP.js +0 -118
- package/dist/apps/web/dist/assets/index-ClPR-tfN.css +0 -1
- package/dist/src-gZm9nyTp.mjs +0 -3
|
@@ -3995,6 +3995,99 @@ function computeTotalCost({ inputTokens, inputCostUsd, outputTokens, outputCostU
|
|
|
3995
3995
|
if (hasCost) return total;
|
|
3996
3996
|
return hasReportedTokens ? 0 : null;
|
|
3997
3997
|
}
|
|
3998
|
+
/**
|
|
3999
|
+
* Recompute the LLM-call cost breakdown for a hypothetical billing scenario,
|
|
4000
|
+
* using the call's recorded token counts and the resolved pricing registry.
|
|
4001
|
+
*
|
|
4002
|
+
* The `actual` scenario returns the costs already stored on `entry`. Other
|
|
4003
|
+
* scenarios re-derive each cost component from `pricing` so users can compare
|
|
4004
|
+
* what the same usage would have cost under different cache strategies. When
|
|
4005
|
+
* pricing is missing for the model/provider, simulated cost components fall
|
|
4006
|
+
* back to `null` exactly like the original extractor.
|
|
4007
|
+
*/
|
|
4008
|
+
function simulateLlmCallCost({ entry, pricing, scenario }) {
|
|
4009
|
+
if (scenario === "actual") return {
|
|
4010
|
+
inputCostUsd: entry.inputCostUsd,
|
|
4011
|
+
outputCostUsd: entry.outputCostUsd,
|
|
4012
|
+
cachedInputCostUsd: entry.cachedInputCostUsd,
|
|
4013
|
+
cacheCreationInputCostUsd: entry.cacheCreationInputCostUsd,
|
|
4014
|
+
reasoningCostUsd: entry.reasoningCostUsd,
|
|
4015
|
+
totalCostUsd: entry.costUsd
|
|
4016
|
+
};
|
|
4017
|
+
const pricingEntry = pickPricingEntry({
|
|
4018
|
+
pricing,
|
|
4019
|
+
model: entry.model,
|
|
4020
|
+
provider: entry.provider
|
|
4021
|
+
});
|
|
4022
|
+
const outputCostUsd = computeTokenCost(entry.outputTokens, pricingEntry?.outputUsdPerMillion);
|
|
4023
|
+
const reasoningCostUsd = computeTokenCost(entry.reasoningTokens, pricingEntry?.reasoningUsdPerMillion);
|
|
4024
|
+
const simulatedTokens = simulateTokenAllocation({
|
|
4025
|
+
entry,
|
|
4026
|
+
scenario
|
|
4027
|
+
});
|
|
4028
|
+
const writeRate = scenario === "withExtendedCachingWrite" ? pricingEntry?.cacheCreationInput1hUsdPerMillion : pricingEntry?.cacheCreationInputUsdPerMillion;
|
|
4029
|
+
const inputCostUsd = computeTokenCost(simulatedTokens.baseInputTokens, pricingEntry?.inputUsdPerMillion);
|
|
4030
|
+
const cachedInputCostUsd = computeTokenCost(simulatedTokens.cachedInputTokens, pricingEntry?.cachedInputUsdPerMillion);
|
|
4031
|
+
const cacheCreationInputCostUsd = computeTokenCost(simulatedTokens.cacheCreationInputTokens, writeRate);
|
|
4032
|
+
return {
|
|
4033
|
+
inputCostUsd,
|
|
4034
|
+
outputCostUsd,
|
|
4035
|
+
cachedInputCostUsd,
|
|
4036
|
+
cacheCreationInputCostUsd,
|
|
4037
|
+
reasoningCostUsd,
|
|
4038
|
+
totalCostUsd: computeTotalCost({
|
|
4039
|
+
inputTokens: simulatedTokens.baseInputTokens,
|
|
4040
|
+
inputCostUsd,
|
|
4041
|
+
outputTokens: entry.outputTokens,
|
|
4042
|
+
outputCostUsd,
|
|
4043
|
+
cachedInputTokens: simulatedTokens.cachedInputTokens,
|
|
4044
|
+
cachedInputCostUsd,
|
|
4045
|
+
cacheCreationInputTokens: simulatedTokens.cacheCreationInputTokens,
|
|
4046
|
+
cacheCreationInputCostUsd,
|
|
4047
|
+
reasoningTokens: entry.reasoningTokens,
|
|
4048
|
+
reasoningCostUsd
|
|
4049
|
+
})
|
|
4050
|
+
};
|
|
4051
|
+
}
|
|
4052
|
+
/**
|
|
4053
|
+
* Project the call's recorded token allocation onto a hypothetical billing
|
|
4054
|
+
* scenario. Cacheable tokens shift between rows so the breakdown reflects the
|
|
4055
|
+
* simulated billing model: `noCache` folds reads/writes into base input,
|
|
4056
|
+
* `withBaseCaching` (warmed) treats every cacheable token as a cache read, and
|
|
4057
|
+
* the first-call write scenarios treat every cacheable token as a cache write.
|
|
4058
|
+
*
|
|
4059
|
+
* The returned counts are what the UI renders on each row and what
|
|
4060
|
+
* {@link simulateLlmCallCost} prices, so display and totals never drift.
|
|
4061
|
+
*/
|
|
4062
|
+
function simulateTokenAllocation({ entry, scenario }) {
|
|
4063
|
+
const baseInputTokens = computeBaseInputTokens({
|
|
4064
|
+
inputTokens: entry.inputTokens,
|
|
4065
|
+
cachedInputTokens: entry.cachedInputTokens,
|
|
4066
|
+
cacheCreationInputTokens: entry.cacheCreationInputTokens
|
|
4067
|
+
});
|
|
4068
|
+
if (scenario === "actual" || entry.inputTokens === null) return {
|
|
4069
|
+
baseInputTokens,
|
|
4070
|
+
cachedInputTokens: entry.cachedInputTokens,
|
|
4071
|
+
cacheCreationInputTokens: entry.cacheCreationInputTokens
|
|
4072
|
+
};
|
|
4073
|
+
const cacheableTokens = (entry.cachedInputTokens ?? 0) + (entry.cacheCreationInputTokens ?? 0);
|
|
4074
|
+
const hasCacheable = cacheableTokens > 0;
|
|
4075
|
+
if (scenario === "noCache") return {
|
|
4076
|
+
baseInputTokens: entry.inputTokens,
|
|
4077
|
+
cachedInputTokens: 0,
|
|
4078
|
+
cacheCreationInputTokens: 0
|
|
4079
|
+
};
|
|
4080
|
+
if (scenario === "withBaseCaching") return {
|
|
4081
|
+
baseInputTokens: hasCacheable ? baseInputTokens : 0,
|
|
4082
|
+
cachedInputTokens: hasCacheable ? cacheableTokens : entry.inputTokens,
|
|
4083
|
+
cacheCreationInputTokens: 0
|
|
4084
|
+
};
|
|
4085
|
+
return {
|
|
4086
|
+
baseInputTokens: hasCacheable ? baseInputTokens : 0,
|
|
4087
|
+
cachedInputTokens: 0,
|
|
4088
|
+
cacheCreationInputTokens: hasCacheable ? cacheableTokens : entry.inputTokens
|
|
4089
|
+
};
|
|
4090
|
+
}
|
|
3998
4091
|
function computeDurationMs$1(span) {
|
|
3999
4092
|
if (span.endedAt === null) return null;
|
|
4000
4093
|
const started = Date.parse(span.startedAt);
|
|
@@ -7218,4 +7311,4 @@ function toLastRunStatus(status) {
|
|
|
7218
7311
|
return status === "pending" ? null : status;
|
|
7219
7312
|
}
|
|
7220
7313
|
//#endregion
|
|
7221
|
-
export {
|
|
7314
|
+
export { apiCallMetricSchema as $, getCurrentScope as $n, cacheDebugKeyEntrySchema as $t, createRunRequestSchema as A, repoFileRefSchema as An, runLogEntrySchema as At, getNestedAttribute as B, deserializeCacheValue as Bn, manualInputNumberFieldSchema as Bt, loadConfig as C, cellValueSchema as Cn, caseRowSchema as Ct, createFsCacheStore as D, fileRefSchema as Dn, evalStatItemSchema as Dt, validateCharts as E, columnKindSchema as En, evalStatAggregateSchema as Et, extractApiCalls as F, evalSpan as Fn, manualInputBooleanFieldSchema as Ft, deriveStatusFromChildStatuses as G, readManualInputFile as Gn, evalChartAxisSchema as Gt, getEvalDisplayStatus as H, serializeCacheValue as Hn, manualInputSelectOptionSchema as Ht, extractLlmCalls as I, evalTracer as In, manualInputDescriptorSchema as It, DEFAULT_API_CALLS_CONFIG as J, advanceEvalTime as Jn, evalChartConfigSchema as Jt, runManifestSchema as K, evalExpect as Kn, evalChartBuiltinMetricSchema as Kt, simulateLlmCallCost as L, hashCacheKey as Ln, manualInputFieldDescriptorSchema as Lt, sseEnvelopeSchema as M, z$1 as Mn, runLogLocationSchema as Mt, extractCacheEntries as N, buildTraceTree as Nn, runLogPhaseSchema as Nt, configReloadStateSchema as O, jsonCellSchema as On, evalStatsConfigSchema as Ot, extractCacheHits as P, captureEvalSpanError as Pn, scoreTraceSchema as Pt, apiCallMetricPlacementSchema as Q, evalLog as Qn, evalChartsConfigSchema as Qt, simulateTokenAllocation as R, hashCacheKeySync as Rn, manualInputJsonFieldSchema as Rt, resolveEvalDefaultConfig as S, traceSpanWarningSchema as Sn, caseDetailSchema as St, normalizeScoreDef as T, columnFormatSchema as Tn, evalFreshnessStatusSchema as Tt, deriveScopedSummaryFromCases as U, repoFile as Un, manualInputTextFieldSchema as Ut, getEvalTitle as V, serializeCacheRecording as Vn, manualInputSelectFieldSchema as Vt, deriveStatusFromCaseRows as W, manualInputFileValueSchema as Wn, evalChartAggregateSchema as Wt, agentEvalsConfigSchema as X, configureEvalRunLogs as Xn, evalChartTooltipExtraSchema as Xt, DEFAULT_LLM_CALLS_CONFIG as Y, appendToEvalOutput as Yn, evalChartMetricSchema as Yt, apiCallMetricFormatSchema as Z, evalAssert as Zn, evalChartTypeSchema as Zt, buildManualInputDescriptor as _, traceDisplayConfigSchema as _n, buildCaseKey as _t, getLastRunStatuses as a, cacheModeSchema as an, nextEvalId as ar, llmCallCostCurrencySchema as at, loadEvalModule as b, traceSpanKindSchema as bn, getCaseRowEvalKey as bt, loadPersistedRunSnapshots as c, cacheRecordingSchema as cn, runInExistingEvalScope as cr, llmCallMetricSchema as ct, persistRunState as d, spanCacheOptionsSchema as dn, startEvalBackgroundJob as dr, llmCallsConfigSchema as dt, cacheDebugKeyFileSchema as en, getEvalCaseInput as er, apiCallsConfigSchema as et, recomputeEvalStatusesInRuns as f, traceCacheRefSchema as fn, defineEval as fr, removeDefaultConfigSchema as ft, resolveArtifactPath as g, traceAttributeDisplaySchema as gn, trialSelectionModeSchema as gt, resolveTracePresentation as h, traceAttributeDisplayPlacementSchema as hn, runLogsConfigSchema as ht, generateRunId as i, cacheListItemSchema as in, mergeEvalOutput as ir, evalDeriveConfigSchema as it, updateManualScoreRequestSchema as j, runArtifactRefSchema as jn, runLogLevelSchema as jt, configReloadStatusSchema as k, numberDisplayOptionsSchema as kn, evalSummarySchema as kt, nextShortIdFromSnapshots as l, cacheStatusSchema as ln, setEvalOutput as lr, llmCallPricingRateSchema as lt, runTouchesEval as m, traceAttributeDisplayInputSchema as mn, resolveLlmCallsConfig as mt, getTargetEvalKeys as n, cacheEntryWithDebugKeySchema as nn, incrementEvalOutput as nr, evalColumnOverrideSchema as nt, getLatestRunInfos as o, cacheOperationTypeSchema as on, runInEvalRuntimeScope as or, llmCallMetricFormatSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplayFormatSchema as pn, getEvalRegistry as pr, resolveApiCallsConfig as pt, runSummarySchema as q, EvalAssertionError as qn, evalChartColorSchema as qt, getTargetEvals as r, cacheFileSchema as rn, isInEvalScope as rr, evalColumnsSchema as rt, loadPersistedRunSnapshot as s, cacheRecordingOpSchema as sn, runInEvalScope as sr, llmCallMetricPlacementSchema as st, executeRun as t, cacheEntrySchema as tn, getEvalStartTime as tr, defaultConfigKeySchema as tt, persistCaseDetail as u, serializedCacheSpanSchema as un, setScopeCacheContext as ur, llmCallPricingSchema as ut, parseManualInputValues as v, traceDisplayInputConfigSchema as vn, buildEvalKey as vt, buildDeclaredColumnDefs as w, columnDefSchema as wn, discoveryIssueSchema as wt, parseEvalDiscovery as x, traceSpanSchema as xn, assertionFailureSchema as xt, deriveEvalFreshness as y, traceSpanErrorSchema as yn, getCaseRowCaseKey as yt, applyDerivedCallAttributes as z, deserializeCacheRecording as zn, manualInputMultilineFieldSchema as zt };
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-Dkol2ukD.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-huuJbDNb.mjs";
|
|
2
|
+
import "./src-1Qvuh0NH.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ls-stack/agent-eval",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.32.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"agent-evals": "./dist/bin.mjs"
|
|
@@ -31,8 +31,8 @@
|
|
|
31
31
|
"devDependencies": {
|
|
32
32
|
"@types/node": "^24.7.2",
|
|
33
33
|
"typescript": "^5.9.2",
|
|
34
|
-
"@agent-evals/runner": "0.0.1",
|
|
35
34
|
"@agent-evals/sdk": "0.0.1",
|
|
35
|
+
"@agent-evals/runner": "0.0.1",
|
|
36
36
|
"@agent-evals/shared": "0.0.1"
|
|
37
37
|
},
|
|
38
38
|
"scripts": {
|