@inbrowser/agent 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +4 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/retrieval.d.ts.map +1 -1
- package/dist/retrieval.js +4 -0
- package/dist/retrieval.js.map +1 -1
- package/dist/sandbox/index.d.ts +9 -0
- package/dist/sandbox/index.d.ts.map +1 -0
- package/dist/sandbox/index.js +43 -0
- package/dist/sandbox/index.js.map +1 -0
- package/dist/session.d.ts.map +1 -1
- package/dist/session.js +18 -2
- package/dist/session.js.map +1 -1
- package/dist/strategy.d.ts.map +1 -1
- package/dist/strategy.js +10 -0
- package/dist/strategy.js.map +1 -1
- package/dist/tools.d.ts +2 -1
- package/dist/tools.d.ts.map +1 -1
- package/dist/tools.js +11 -0
- package/dist/tools.js.map +1 -1
- package/dist/types/session.d.ts +7 -7
- package/dist/types/session.d.ts.map +1 -1
- package/dist/types/tools.d.ts +4 -0
- package/dist/types/tools.d.ts.map +1 -1
- package/dist/types/trace.d.ts +2 -0
- package/dist/types/trace.d.ts.map +1 -1
- package/dist/usage/index.d.ts +309 -0
- package/dist/usage/index.d.ts.map +1 -0
- package/dist/usage/index.js +1062 -0
- package/dist/usage/index.js.map +1 -0
- package/package.json +15 -3
|
@@ -0,0 +1,1062 @@
|
|
|
1
|
+
export const MODEL_CONTEXT_COMPACTION_THRESHOLD_CHARS = 80_000;
|
|
2
|
+
export const MODEL_CONTEXT_RECENT_USER_TURNS = 2;
|
|
3
|
+
const BREAKDOWN_META = {
|
|
4
|
+
system: { id: 'system', label: 'System prompt', color: '#a4d4a8' },
|
|
5
|
+
history: { id: 'history', label: 'Conversation', color: '#8bb7ff' },
|
|
6
|
+
'tool-results': { id: 'tool-results', label: 'Tool results', color: '#f0c36a' },
|
|
7
|
+
'tool-schemas': { id: 'tool-schemas', label: 'Tool schemas', color: '#c9a7ff' },
|
|
8
|
+
draft: { id: 'draft', label: 'Current draft', color: '#f08a8a' },
|
|
9
|
+
};
|
|
10
|
+
const EMPTY_COMPACTION = {
|
|
11
|
+
compacted: false,
|
|
12
|
+
originalChars: 0,
|
|
13
|
+
compactedChars: 0,
|
|
14
|
+
bytesSaved: 0,
|
|
15
|
+
turnsCompacted: 0,
|
|
16
|
+
messagesCompacted: 0,
|
|
17
|
+
};
|
|
18
|
+
const COMPACTION_RETAINS = [
|
|
19
|
+
'the most recent 2 user turns verbatim',
|
|
20
|
+
'older user prompts and assistant outcomes as deterministic memory',
|
|
21
|
+
'older tool names, one-line summaries, write paths, and validation summaries',
|
|
22
|
+
];
|
|
23
|
+
const COMPACTION_LOSES = [
|
|
24
|
+
'verbatim older assistant text and reasoning',
|
|
25
|
+
'full older tool arguments and bulky tool results',
|
|
26
|
+
'old assistant/tool-call pairing details beyond the retained recent turns',
|
|
27
|
+
];
|
|
28
|
+
const MAX_MEMORY_CHARS = 16_000;
|
|
29
|
+
const MAX_MEMORY_TOOL_LINES = 80;
|
|
30
|
+
export function estimateRequestInputComposition(request, opts = {}) {
|
|
31
|
+
const estimateTokens = opts.estimateTokens ?? defaultTokenEstimate;
|
|
32
|
+
const messages = request.messages ?? [];
|
|
33
|
+
const systemPrompt = request.systemPrompt ?? '';
|
|
34
|
+
const userMessages = messages.filter((message) => message.role === 'user');
|
|
35
|
+
const currentPromptMessage = userMessages[userMessages.length - 1];
|
|
36
|
+
let system = tokenEstimate(systemPrompt, estimateTokens);
|
|
37
|
+
let history = 0;
|
|
38
|
+
let resentToolResults = 0;
|
|
39
|
+
const currentPrompt = tokenEstimate(currentPromptMessage?.text, estimateTokens);
|
|
40
|
+
for (const message of messages) {
|
|
41
|
+
const textTokens = tokenEstimate(message.text, estimateTokens);
|
|
42
|
+
const toolCallTokens = tokenEstimate(safeStringify(message.toolCalls), estimateTokens);
|
|
43
|
+
const resultTokens = tokenEstimate(message.resultJson, estimateTokens);
|
|
44
|
+
if (message.role === 'system') {
|
|
45
|
+
if (!systemPrompt || message.text !== systemPrompt)
|
|
46
|
+
system += textTokens;
|
|
47
|
+
}
|
|
48
|
+
else if (message.role === 'tool') {
|
|
49
|
+
resentToolResults += textTokens + resultTokens;
|
|
50
|
+
}
|
|
51
|
+
else if (message === currentPromptMessage) {
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
history += textTokens + toolCallTokens + resultTokens;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return {
|
|
58
|
+
system,
|
|
59
|
+
history,
|
|
60
|
+
resentToolResults,
|
|
61
|
+
currentPrompt,
|
|
62
|
+
toolSchemas: tokenEstimate(safeStringify(request.tools ?? request.toolDeclarations ?? []), estimateTokens),
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
export function requestInputCompositionTotal(composition) {
|
|
66
|
+
return (composition.system +
|
|
67
|
+
composition.history +
|
|
68
|
+
composition.resentToolResults +
|
|
69
|
+
composition.currentPrompt +
|
|
70
|
+
composition.toolSchemas);
|
|
71
|
+
}
|
|
72
|
+
export function collectRequestToolNames(request) {
|
|
73
|
+
const names = new Set();
|
|
74
|
+
for (const tool of request.tools ?? request.toolDeclarations ?? []) {
|
|
75
|
+
if (tool.name)
|
|
76
|
+
names.add(tool.name);
|
|
77
|
+
}
|
|
78
|
+
for (const message of request.messages ?? []) {
|
|
79
|
+
const calls = Array.isArray(message.toolCalls) ? message.toolCalls : [];
|
|
80
|
+
for (const call of calls) {
|
|
81
|
+
if (call && typeof call === 'object') {
|
|
82
|
+
const name = call.name;
|
|
83
|
+
if (typeof name === 'string')
|
|
84
|
+
names.add(name);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return [...names].sort();
|
|
89
|
+
}
|
|
90
|
+
export function appendContextWindowTraceEvent(tracesByTurn, event, hostCtx) {
|
|
91
|
+
if (event.kind === 'turn_dispatch_complete')
|
|
92
|
+
return tracesByTurn;
|
|
93
|
+
if (event.kind === 'llm_request') {
|
|
94
|
+
const req = event.data;
|
|
95
|
+
const existing = tracesByTurn[req.turnId];
|
|
96
|
+
const nextTrace = {
|
|
97
|
+
turnId: req.turnId,
|
|
98
|
+
requests: upsertByRequestId(existing?.requests ?? [], req),
|
|
99
|
+
responses: existing?.responses ?? [],
|
|
100
|
+
hostCtx: existing?.hostCtx ?? hostCtx,
|
|
101
|
+
};
|
|
102
|
+
return { ...tracesByTurn, [req.turnId]: nextTrace };
|
|
103
|
+
}
|
|
104
|
+
const res = event.data;
|
|
105
|
+
const turnId = turnIdForResponse(res, tracesByTurn);
|
|
106
|
+
if (!turnId)
|
|
107
|
+
return tracesByTurn;
|
|
108
|
+
const existing = tracesByTurn[turnId];
|
|
109
|
+
const nextTrace = {
|
|
110
|
+
turnId,
|
|
111
|
+
requests: existing?.requests ?? [],
|
|
112
|
+
responses: upsertByRequestId(existing?.responses ?? [], res),
|
|
113
|
+
hostCtx: existing?.hostCtx ?? hostCtx,
|
|
114
|
+
};
|
|
115
|
+
return { ...tracesByTurn, [turnId]: nextTrace };
|
|
116
|
+
}
|
|
117
|
+
export function contextWindowTraceEventsToTraces(events, hostCtxByTurn) {
|
|
118
|
+
let tracesByTurn = {};
|
|
119
|
+
for (const event of events) {
|
|
120
|
+
const turnId = turnIdForEvent(event, tracesByTurn);
|
|
121
|
+
const hostCtx = turnId ? resolveTraceHostContext(hostCtxByTurn, event, turnId) : undefined;
|
|
122
|
+
tracesByTurn = appendContextWindowTraceEvent(tracesByTurn, event, hostCtx);
|
|
123
|
+
}
|
|
124
|
+
return tracesByTurn;
|
|
125
|
+
}
|
|
126
|
+
function upsertByRequestId(rows, row) {
|
|
127
|
+
if (!row.requestId)
|
|
128
|
+
return [...rows, row];
|
|
129
|
+
const index = rows.findIndex((existing) => existing.requestId === row.requestId);
|
|
130
|
+
if (index === -1)
|
|
131
|
+
return [...rows, row];
|
|
132
|
+
const next = rows.slice();
|
|
133
|
+
next[index] = row;
|
|
134
|
+
return next;
|
|
135
|
+
}
|
|
136
|
+
function turnIdForEvent(event, tracesByTurn) {
|
|
137
|
+
if (event.kind === 'llm_request')
|
|
138
|
+
return event.data.turnId;
|
|
139
|
+
if (event.kind === 'turn_dispatch_complete')
|
|
140
|
+
return event.data.turnId;
|
|
141
|
+
return turnIdForResponse(event.data, tracesByTurn);
|
|
142
|
+
}
|
|
143
|
+
function turnIdForResponse(response, tracesByTurn) {
|
|
144
|
+
if (!response.requestId)
|
|
145
|
+
return undefined;
|
|
146
|
+
for (const trace of Object.values(tracesByTurn)) {
|
|
147
|
+
if (trace.requests.some((request) => request.requestId === response.requestId)) {
|
|
148
|
+
return trace.turnId;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
const hashIndex = response.requestId.lastIndexOf('#');
|
|
152
|
+
return hashIndex > 0 ? response.requestId.slice(0, hashIndex) : undefined;
|
|
153
|
+
}
|
|
154
|
+
function resolveTraceHostContext(hostCtxByTurn, event, turnId) {
|
|
155
|
+
if (!hostCtxByTurn)
|
|
156
|
+
return undefined;
|
|
157
|
+
return typeof hostCtxByTurn === 'function' ? hostCtxByTurn(event, turnId) : hostCtxByTurn[turnId];
|
|
158
|
+
}
|
|
159
|
+
export function buildContextWindowSnapshot(opts) {
|
|
160
|
+
const prompt = opts.currentPrompt?.trim() ?? '';
|
|
161
|
+
const compact = opts.compactHistory ?? compactHistoryForModel;
|
|
162
|
+
const estimateTokens = opts.estimateTokens ?? defaultTokenEstimate;
|
|
163
|
+
const rawRows = rowsFromParts({
|
|
164
|
+
systemPrompt: opts.systemPrompt,
|
|
165
|
+
messages: opts.messages,
|
|
166
|
+
tools: opts.tools,
|
|
167
|
+
prompt,
|
|
168
|
+
estimated: true,
|
|
169
|
+
estimateTokens,
|
|
170
|
+
});
|
|
171
|
+
const compacted = compact(opts.messages, opts.compactionOptions);
|
|
172
|
+
const rows = rowsFromParts({
|
|
173
|
+
systemPrompt: opts.systemPrompt,
|
|
174
|
+
messages: compacted.messages,
|
|
175
|
+
tools: opts.tools,
|
|
176
|
+
prompt,
|
|
177
|
+
estimated: true,
|
|
178
|
+
estimateTokens,
|
|
179
|
+
});
|
|
180
|
+
const forceCompacted = compact(opts.messages, {
|
|
181
|
+
...opts.compactionOptions,
|
|
182
|
+
force: true,
|
|
183
|
+
});
|
|
184
|
+
const forceRows = rowsFromParts({
|
|
185
|
+
systemPrompt: opts.systemPrompt,
|
|
186
|
+
messages: forceCompacted.messages,
|
|
187
|
+
tools: opts.tools,
|
|
188
|
+
prompt,
|
|
189
|
+
estimated: true,
|
|
190
|
+
estimateTokens,
|
|
191
|
+
});
|
|
192
|
+
const rawTokens = totalTokens(Object.values(rawRows));
|
|
193
|
+
const usedTokens = totalTokens(Object.values(rows));
|
|
194
|
+
const forceTokens = totalTokens(Object.values(forceRows));
|
|
195
|
+
return finalizeSnapshot({
|
|
196
|
+
rows: Object.values(rows),
|
|
197
|
+
limitTokens: opts.limitTokens,
|
|
198
|
+
compaction: compacted.stats,
|
|
199
|
+
compactionPreview: buildCompactionPreview({
|
|
200
|
+
rawTokens,
|
|
201
|
+
currentTokens: usedTokens,
|
|
202
|
+
compactedTokens: forceTokens,
|
|
203
|
+
stats: forceCompacted.stats,
|
|
204
|
+
}),
|
|
205
|
+
pricing: buildPricing({
|
|
206
|
+
providerId: opts.providerId,
|
|
207
|
+
modelId: opts.modelId,
|
|
208
|
+
estimatePromptInputCost: opts.estimatePromptInputCost,
|
|
209
|
+
currentTokens: usedTokens,
|
|
210
|
+
compactedTokens: forceTokens,
|
|
211
|
+
}),
|
|
212
|
+
toolCount: opts.tools.length,
|
|
213
|
+
sessionUsage: sessionUsageFromOptions(opts, usedTokens, estimateTokens),
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
export function formatContextTokens(n) {
|
|
217
|
+
if (n < 1000)
|
|
218
|
+
return String(Math.max(0, Math.round(n)));
|
|
219
|
+
if (n < 100_000) {
|
|
220
|
+
const k = n / 1000;
|
|
221
|
+
const s = k.toFixed(1);
|
|
222
|
+
return `${s.endsWith('.0') ? s.slice(0, -2) : s}k`;
|
|
223
|
+
}
|
|
224
|
+
return `${Math.round(n / 1000)}k`;
|
|
225
|
+
}
|
|
226
|
+
export function formatContextRatio(snapshot) {
|
|
227
|
+
const used = formatContextTokens(snapshot.usedTokens);
|
|
228
|
+
if (!snapshot.limitTokens)
|
|
229
|
+
return `${used} tokens used`;
|
|
230
|
+
return `${used} / ${formatContextTokens(snapshot.limitTokens)} tokens used`;
|
|
231
|
+
}
|
|
232
|
+
export function formatContextPercent(snapshot) {
|
|
233
|
+
if (snapshot.percentFull === undefined)
|
|
234
|
+
return 'limit unknown';
|
|
235
|
+
return `${Math.round(snapshot.percentFull * 100)}% full`;
|
|
236
|
+
}
|
|
237
|
+
export function compactHistoryForModel(messages, opts = {}) {
|
|
238
|
+
const thresholdChars = opts.thresholdChars ?? MODEL_CONTEXT_COMPACTION_THRESHOLD_CHARS;
|
|
239
|
+
const keepRecent = Math.max(1, opts.keepRecentUserTurns ?? MODEL_CONTEXT_RECENT_USER_TURNS);
|
|
240
|
+
const originalChars = estimateHistoryChars(messages);
|
|
241
|
+
if (!opts.force && originalChars <= thresholdChars) {
|
|
242
|
+
return {
|
|
243
|
+
messages: messages.slice(),
|
|
244
|
+
stats: unchangedStats(originalChars),
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
const userIndexes = messages.map((m, i) => (m.role === 'user' ? i : -1)).filter((i) => i >= 0);
|
|
248
|
+
if (userIndexes.length <= keepRecent) {
|
|
249
|
+
return {
|
|
250
|
+
messages: messages.slice(),
|
|
251
|
+
stats: unchangedStats(originalChars),
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
const keepStart = userIndexes[userIndexes.length - keepRecent];
|
|
255
|
+
const older = messages.slice(0, keepStart);
|
|
256
|
+
const recent = messages.slice(keepStart);
|
|
257
|
+
if (older.length === 0) {
|
|
258
|
+
return {
|
|
259
|
+
messages: messages.slice(),
|
|
260
|
+
stats: unchangedStats(originalChars),
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
const memory = buildMemoryMessage(older, opts);
|
|
264
|
+
const compacted = [memory, ...recent];
|
|
265
|
+
const compactedChars = estimateHistoryChars(compacted);
|
|
266
|
+
return {
|
|
267
|
+
messages: compacted,
|
|
268
|
+
stats: {
|
|
269
|
+
compacted: true,
|
|
270
|
+
originalChars,
|
|
271
|
+
compactedChars,
|
|
272
|
+
bytesSaved: Math.max(0, originalChars - compactedChars),
|
|
273
|
+
turnsCompacted: countUserTurns(older),
|
|
274
|
+
messagesCompacted: older.length,
|
|
275
|
+
},
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
export function estimateHistoryChars(messages) {
|
|
279
|
+
let total = 0;
|
|
280
|
+
for (const message of messages) {
|
|
281
|
+
total +=
|
|
282
|
+
message.id.length +
|
|
283
|
+
message.role.length +
|
|
284
|
+
message.text.length +
|
|
285
|
+
(message.thinking?.length ?? 0) +
|
|
286
|
+
32;
|
|
287
|
+
for (const call of message.toolCalls ?? []) {
|
|
288
|
+
total +=
|
|
289
|
+
call.id.length +
|
|
290
|
+
call.name.length +
|
|
291
|
+
call.argsJson.length +
|
|
292
|
+
(call.resultJson?.length ?? 0) +
|
|
293
|
+
(call.summary?.length ?? 0) +
|
|
294
|
+
(call.thinkingUpToHere?.length ?? 0) +
|
|
295
|
+
32;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
return total;
|
|
299
|
+
}
|
|
300
|
+
export function createTurnMetricsAccumulator() {
|
|
301
|
+
let agg = zeroAggregatedTurnMetrics();
|
|
302
|
+
return {
|
|
303
|
+
add(metrics) {
|
|
304
|
+
agg = {
|
|
305
|
+
tokensIn: agg.tokensIn + (metrics.tokensIn ?? 0),
|
|
306
|
+
tokensOut: agg.tokensOut + (metrics.tokensOut ?? 0),
|
|
307
|
+
tokensCached: agg.tokensCached + (metrics.tokensCached ?? 0),
|
|
308
|
+
tokensReasoning: agg.tokensReasoning + (metrics.tokensReasoning ?? 0),
|
|
309
|
+
costUsd: agg.costUsd + (metrics.costUsd ?? 0),
|
|
310
|
+
costEstimated: agg.costEstimated || metrics.costEstimated === true,
|
|
311
|
+
...(metrics.isByok !== undefined
|
|
312
|
+
? { isByok: metrics.isByok }
|
|
313
|
+
: agg.isByok !== undefined
|
|
314
|
+
? { isByok: agg.isByok }
|
|
315
|
+
: {}),
|
|
316
|
+
iterations: agg.iterations + 1,
|
|
317
|
+
};
|
|
318
|
+
return agg;
|
|
319
|
+
},
|
|
320
|
+
totals() {
|
|
321
|
+
return agg;
|
|
322
|
+
},
|
|
323
|
+
reset() {
|
|
324
|
+
agg = zeroAggregatedTurnMetrics();
|
|
325
|
+
},
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
function rowsFromParts({ systemPrompt, messages, tools, prompt, estimated, estimateTokens, }) {
|
|
329
|
+
const rows = emptyRows(estimated);
|
|
330
|
+
rows.system.tokens = tokenEstimate(systemPrompt, estimateTokens);
|
|
331
|
+
for (const message of messages) {
|
|
332
|
+
rows.history.tokens +=
|
|
333
|
+
tokenEstimate(message.text, estimateTokens) + tokenEstimate(message.thinking, estimateTokens);
|
|
334
|
+
rows['tool-results'].tokens += toolCallTokens(message.toolCalls, estimateTokens);
|
|
335
|
+
}
|
|
336
|
+
rows['tool-schemas'].tokens = toolSchemaTokens(tools, estimateTokens);
|
|
337
|
+
rows.draft.tokens = tokenEstimate(prompt, estimateTokens);
|
|
338
|
+
return rows;
|
|
339
|
+
}
|
|
340
|
+
function emptyRows(estimated) {
|
|
341
|
+
return {
|
|
342
|
+
system: { ...BREAKDOWN_META.system, tokens: 0, estimated },
|
|
343
|
+
history: { ...BREAKDOWN_META.history, tokens: 0, estimated },
|
|
344
|
+
'tool-results': { ...BREAKDOWN_META['tool-results'], tokens: 0, estimated },
|
|
345
|
+
'tool-schemas': { ...BREAKDOWN_META['tool-schemas'], tokens: 0, estimated },
|
|
346
|
+
draft: { ...BREAKDOWN_META.draft, tokens: 0, estimated },
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
function finalizeSnapshot({ rows, limitTokens, compaction, compactionPreview, pricing, toolCount, sessionUsage, }) {
|
|
350
|
+
const breakdown = rows.filter((row) => row.tokens > 0);
|
|
351
|
+
const usedTokens = breakdown.reduce((sum, row) => sum + row.tokens, 0);
|
|
352
|
+
const percentFull = limitTokens && limitTokens > 0 ? usedTokens / limitTokens : undefined;
|
|
353
|
+
return {
|
|
354
|
+
basis: 'estimated-next-send',
|
|
355
|
+
usedTokens,
|
|
356
|
+
...(limitTokens !== undefined ? { limitTokens } : {}),
|
|
357
|
+
...(percentFull !== undefined ? { percentFull } : {}),
|
|
358
|
+
status: statusFor(percentFull),
|
|
359
|
+
breakdown,
|
|
360
|
+
compaction,
|
|
361
|
+
compactionPreview,
|
|
362
|
+
pricing,
|
|
363
|
+
toolCount,
|
|
364
|
+
...(sessionUsage ? { sessionUsage } : {}),
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
function totalTokens(rows) {
|
|
368
|
+
return rows.reduce((sum, row) => sum + row.tokens, 0);
|
|
369
|
+
}
|
|
370
|
+
function buildCompactionPreview({ rawTokens, currentTokens, compactedTokens, stats, }) {
|
|
371
|
+
const automaticSavedTokens = Math.max(0, rawTokens - currentTokens);
|
|
372
|
+
const manualSavedTokens = Math.max(0, currentTokens - compactedTokens);
|
|
373
|
+
return {
|
|
374
|
+
rawTokens,
|
|
375
|
+
currentTokens,
|
|
376
|
+
compactedTokens,
|
|
377
|
+
automaticSavedTokens,
|
|
378
|
+
manualSavedTokens,
|
|
379
|
+
savedTokens: manualSavedTokens,
|
|
380
|
+
stats,
|
|
381
|
+
retains: COMPACTION_RETAINS,
|
|
382
|
+
loses: COMPACTION_LOSES,
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
function sessionUsageFromOptions(opts, currentContextTokens, estimateTokens) {
|
|
386
|
+
const turnRows = sessionTurnRowsFromMessages(opts.messages, opts.tracesByTurn, currentContextTokens);
|
|
387
|
+
const requestRows = sessionRequestRowsFromTraces(opts.tracesByTurn, opts.messages, estimateTokens);
|
|
388
|
+
if (opts.sessionTurns === undefined &&
|
|
389
|
+
opts.sessionTokensTotal === undefined &&
|
|
390
|
+
opts.sessionInputTokens === undefined &&
|
|
391
|
+
opts.sessionOutputTokens === undefined &&
|
|
392
|
+
turnRows.length === 0 &&
|
|
393
|
+
requestRows.length === 0) {
|
|
394
|
+
return undefined;
|
|
395
|
+
}
|
|
396
|
+
const derived = sumSessionTurnRows(turnRows);
|
|
397
|
+
const inputTokens = Math.max(0, opts.sessionInputTokens ?? derived.inputTokens);
|
|
398
|
+
const outputTokens = Math.max(0, opts.sessionOutputTokens ?? derived.outputTokens);
|
|
399
|
+
const tokensTotal = Math.max(0, opts.sessionTokensTotal ?? inputTokens + outputTokens);
|
|
400
|
+
const requests = opts.sessionRequests === undefined
|
|
401
|
+
? derived.requests
|
|
402
|
+
: opts.sessionRequests === null
|
|
403
|
+
? null
|
|
404
|
+
: Math.max(0, opts.sessionRequests);
|
|
405
|
+
const workMultiplier = currentContextTokens > 0 && tokensTotal > 0 ? tokensTotal / currentContextTokens : undefined;
|
|
406
|
+
const averageRequestTokens = typeof requests === 'number' && requests > 0 ? tokensTotal / requests : undefined;
|
|
407
|
+
const cachedInputTokens = Math.max(0, opts.sessionCachedInputTokens ?? derived.cachedInputTokens);
|
|
408
|
+
const reasoningTokens = Math.max(0, opts.sessionReasoningTokens ?? derived.reasoningTokens);
|
|
409
|
+
const costUsdTotal = opts.sessionCostUsdTotal ?? derived.costUsdTotal;
|
|
410
|
+
return {
|
|
411
|
+
turns: opts.sessionTurns ?? turnRows.length,
|
|
412
|
+
requests,
|
|
413
|
+
tokensTotal,
|
|
414
|
+
inputTokens,
|
|
415
|
+
outputTokens,
|
|
416
|
+
cachedInputTokens,
|
|
417
|
+
reasoningTokens,
|
|
418
|
+
...(costUsdTotal !== undefined ? { costUsdTotal } : {}),
|
|
419
|
+
...(workMultiplier !== undefined ? { workMultiplier } : {}),
|
|
420
|
+
...(averageRequestTokens !== undefined ? { averageRequestTokens } : {}),
|
|
421
|
+
turnRows,
|
|
422
|
+
requestRows,
|
|
423
|
+
categoryDetails: buildSessionCategoryDetails({
|
|
424
|
+
inputTokens,
|
|
425
|
+
outputTokens,
|
|
426
|
+
cachedInputTokens,
|
|
427
|
+
reasoningTokens,
|
|
428
|
+
tokensTotal,
|
|
429
|
+
requestRows,
|
|
430
|
+
}),
|
|
431
|
+
teachingNotes: {
|
|
432
|
+
providerUsage: 'Provider usage totals are authoritative for input, cache, output, and reasoning when the provider reports them.',
|
|
433
|
+
estimatedComposition: 'Source-level input slices are estimated from saved provider-visible request traces using the configured token estimator.',
|
|
434
|
+
contextVsSpend: 'The context window is one next request; session spend is every provider request already made.',
|
|
435
|
+
},
|
|
436
|
+
};
|
|
437
|
+
}
|
|
438
|
+
function sessionRequestRowsFromTraces(tracesByTurn, messages, estimateTokens) {
|
|
439
|
+
if (!tracesByTurn)
|
|
440
|
+
return [];
|
|
441
|
+
const toolIndex = buildToolCallIndex(messages);
|
|
442
|
+
const rows = [];
|
|
443
|
+
for (const trace of Object.values(tracesByTurn)) {
|
|
444
|
+
const responsesById = new Map();
|
|
445
|
+
for (const response of trace.responses) {
|
|
446
|
+
if (response.requestId)
|
|
447
|
+
responsesById.set(response.requestId, response);
|
|
448
|
+
}
|
|
449
|
+
for (const [index, request] of trace.requests.entries()) {
|
|
450
|
+
const requestId = request.requestId ?? `${trace.turnId}#${request.iteration ?? index}`;
|
|
451
|
+
const response = responsesById.get(requestId) ?? trace.responses[index];
|
|
452
|
+
const composition = estimateRequestInputComposition(request, { estimateTokens });
|
|
453
|
+
const usage = response?.usage;
|
|
454
|
+
const estimatedInput = requestInputCompositionTotal(composition);
|
|
455
|
+
const reasoningTokens = Math.max(0, nonNegative(usage?.reasoningTokens) ?? tokenEstimate(response?.thinking, estimateTokens));
|
|
456
|
+
const inputTokens = Math.max(0, nonNegative(usage?.promptTokens) ?? estimatedInput);
|
|
457
|
+
const outputTokens = Math.max(0, nonNegative(usage?.outputTokens) ??
|
|
458
|
+
tokenEstimate(response?.text, estimateTokens) + reasoningTokens);
|
|
459
|
+
const cachedInputTokens = Math.min(inputTokens, Math.max(0, nonNegative(usage?.cachedTokens) ?? 0));
|
|
460
|
+
const boundedReasoningTokens = Math.min(outputTokens, reasoningTokens);
|
|
461
|
+
const iteration = request.iteration ?? index;
|
|
462
|
+
const toolSchemaNames = toolNamesFromDeclarations(request);
|
|
463
|
+
const emittedToolCalls = toolRefsFromResponseToolCalls(response, toolIndex, trace.turnId, estimateTokens);
|
|
464
|
+
const resentToolResults = toolRefsFromToolResultMessages(request, toolIndex, trace.turnId, estimateTokens);
|
|
465
|
+
const previousRequest = trace.requests[index - 1];
|
|
466
|
+
const previousComposition = previousRequest
|
|
467
|
+
? estimateRequestInputComposition(previousRequest, { estimateTokens })
|
|
468
|
+
: undefined;
|
|
469
|
+
const freshInputTokens = Math.max(0, inputTokens - cachedInputTokens);
|
|
470
|
+
rows.push({
|
|
471
|
+
id: requestId,
|
|
472
|
+
requestId,
|
|
473
|
+
turnId: request.turnId ?? trace.turnId,
|
|
474
|
+
iteration,
|
|
475
|
+
...(typeof request.ts === 'number' ? { ts: request.ts } : {}),
|
|
476
|
+
...(trace.hostCtx?.providerId ? { providerId: trace.hostCtx.providerId } : {}),
|
|
477
|
+
...(trace.hostCtx?.providerLabel ? { providerLabel: trace.hostCtx.providerLabel } : {}),
|
|
478
|
+
...(trace.hostCtx?.modelLabel ? { modelLabel: trace.hostCtx.modelLabel } : {}),
|
|
479
|
+
...(trace.hostCtx?.strategy ? { strategy: trace.hostCtx.strategy } : {}),
|
|
480
|
+
...(trace.hostCtx?.strategySource ? { strategySource: trace.hostCtx.strategySource } : {}),
|
|
481
|
+
inputTokens,
|
|
482
|
+
outputTokens,
|
|
483
|
+
cachedInputTokens,
|
|
484
|
+
reasoningTokens: boundedReasoningTokens,
|
|
485
|
+
freshInputTokens,
|
|
486
|
+
visibleOutputTokens: Math.max(0, outputTokens - boundedReasoningTokens),
|
|
487
|
+
tokensTotal: inputTokens + outputTokens,
|
|
488
|
+
...(typeof usage?.costUsd === 'number' ? { costUsd: usage.costUsd } : {}),
|
|
489
|
+
usageSource: usage ? 'provider' : 'estimate',
|
|
490
|
+
composition,
|
|
491
|
+
messageCount: request.messages?.length ?? 0,
|
|
492
|
+
toolResultMessageCount: request.messages?.filter((message) => message.role === 'tool').length ?? 0,
|
|
493
|
+
toolNames: collectRequestToolNames(request),
|
|
494
|
+
toolSchemaNames,
|
|
495
|
+
emittedToolCalls,
|
|
496
|
+
resentToolResults,
|
|
497
|
+
cacheInsight: buildRequestCacheInsight({
|
|
498
|
+
providerId: trace.hostCtx?.providerId,
|
|
499
|
+
modelLabel: trace.hostCtx?.modelLabel,
|
|
500
|
+
iteration,
|
|
501
|
+
inputTokens,
|
|
502
|
+
cachedInputTokens,
|
|
503
|
+
freshInputTokens,
|
|
504
|
+
composition,
|
|
505
|
+
previousComposition,
|
|
506
|
+
}),
|
|
507
|
+
});
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
rows.sort((a, b) => (a.ts ?? 0) - (b.ts ?? 0) || a.requestId.localeCompare(b.requestId));
|
|
511
|
+
return rows;
|
|
512
|
+
}
|
|
513
|
+
function buildToolCallIndex(messages) {
|
|
514
|
+
const index = new Map();
|
|
515
|
+
for (const message of messages) {
|
|
516
|
+
if (message.role !== 'assistant')
|
|
517
|
+
continue;
|
|
518
|
+
const turnId = message.turnId;
|
|
519
|
+
if (typeof turnId !== 'string')
|
|
520
|
+
continue;
|
|
521
|
+
for (const call of message.toolCalls ?? []) {
|
|
522
|
+
index.set(toolIndexKey(turnId, call.id), {
|
|
523
|
+
name: call.name,
|
|
524
|
+
callId: call.id,
|
|
525
|
+
messageId: message.id,
|
|
526
|
+
});
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
return index;
|
|
530
|
+
}
|
|
531
|
+
function toolIndexKey(turnId, callId) {
|
|
532
|
+
return `${turnId}:${callId}`;
|
|
533
|
+
}
|
|
534
|
+
function toolNamesFromDeclarations(req) {
|
|
535
|
+
const names = new Set();
|
|
536
|
+
for (const tool of req.tools ?? req.toolDeclarations ?? []) {
|
|
537
|
+
if (tool.name)
|
|
538
|
+
names.add(tool.name);
|
|
539
|
+
}
|
|
540
|
+
return [...names].sort();
|
|
541
|
+
}
|
|
542
|
+
function toolRefsFromResponseToolCalls(response, index, turnId, estimateTokens) {
|
|
543
|
+
const calls = Array.isArray(response?.toolCalls) ? response.toolCalls : [];
|
|
544
|
+
return calls.map((raw) => {
|
|
545
|
+
const call = raw && typeof raw === 'object'
|
|
546
|
+
? raw
|
|
547
|
+
: {};
|
|
548
|
+
const callId = typeof call.callId === 'string'
|
|
549
|
+
? call.callId
|
|
550
|
+
: typeof call.id === 'string'
|
|
551
|
+
? call.id
|
|
552
|
+
: undefined;
|
|
553
|
+
const hit = callId ? index.get(toolIndexKey(turnId, callId)) : undefined;
|
|
554
|
+
const name = typeof call.name === 'string' ? call.name : (hit?.name ?? 'tool_call');
|
|
555
|
+
return {
|
|
556
|
+
name,
|
|
557
|
+
...(callId ? { callId } : {}),
|
|
558
|
+
...(hit?.messageId ? { messageId: hit.messageId } : {}),
|
|
559
|
+
tokensEstimated: tokenEstimate(name, estimateTokens) +
|
|
560
|
+
tokenEstimate(safeStringify(call.args), estimateTokens),
|
|
561
|
+
};
|
|
562
|
+
});
|
|
563
|
+
}
|
|
564
|
+
function toolRefsFromToolResultMessages(req, index, turnId, estimateTokens) {
|
|
565
|
+
const refs = [];
|
|
566
|
+
for (const message of req.messages ?? []) {
|
|
567
|
+
if (message.role !== 'tool')
|
|
568
|
+
continue;
|
|
569
|
+
const callId = message.callId ?? message.toolCallId;
|
|
570
|
+
const hit = callId ? index.get(toolIndexKey(turnId, callId)) : undefined;
|
|
571
|
+
refs.push({
|
|
572
|
+
name: message.name ?? hit?.name ?? 'tool_result',
|
|
573
|
+
...(callId ? { callId } : {}),
|
|
574
|
+
...(hit?.messageId ? { messageId: hit.messageId } : {}),
|
|
575
|
+
tokensEstimated: tokenEstimate(message.text, estimateTokens) +
|
|
576
|
+
tokenEstimate(message.resultJson, estimateTokens),
|
|
577
|
+
});
|
|
578
|
+
}
|
|
579
|
+
return refs;
|
|
580
|
+
}
|
|
581
|
+
function buildRequestCacheInsight({ providerId, modelLabel, iteration, inputTokens, cachedInputTokens, freshInputTokens, composition, previousComposition, }) {
|
|
582
|
+
const knownMinimumTokens = knownCacheMinimumTokens(providerId, modelLabel);
|
|
583
|
+
const likelyStablePrefixTokens = previousComposition
|
|
584
|
+
? Math.min(inputTokens, repeatedInputShapeTokens(composition, previousComposition))
|
|
585
|
+
: 0;
|
|
586
|
+
const hitRate = inputTokens > 0 ? cachedInputTokens / inputTokens : 0;
|
|
587
|
+
const meetsKnownMinimum = knownMinimumTokens === undefined ? undefined : inputTokens >= knownMinimumTokens;
|
|
588
|
+
return {
|
|
589
|
+
hitRate,
|
|
590
|
+
cachedTokens: cachedInputTokens,
|
|
591
|
+
freshTokens: freshInputTokens,
|
|
592
|
+
...(knownMinimumTokens !== undefined ? { knownMinimumTokens } : {}),
|
|
593
|
+
...(meetsKnownMinimum !== undefined ? { meetsKnownMinimum } : {}),
|
|
594
|
+
likelyStablePrefixTokens,
|
|
595
|
+
providerMode: cacheProviderMode(providerId),
|
|
596
|
+
explanation: cacheExplanation({
|
|
597
|
+
cachedInputTokens,
|
|
598
|
+
freshInputTokens,
|
|
599
|
+
iteration,
|
|
600
|
+
meetsKnownMinimum,
|
|
601
|
+
likelyStablePrefixTokens,
|
|
602
|
+
}),
|
|
603
|
+
};
|
|
604
|
+
}
|
|
605
|
+
function repeatedInputShapeTokens(current, previous) {
|
|
606
|
+
return (Math.min(current.system, previous.system) +
|
|
607
|
+
Math.min(current.history, previous.history) +
|
|
608
|
+
Math.min(current.toolSchemas, previous.toolSchemas) +
|
|
609
|
+
Math.min(current.resentToolResults, previous.resentToolResults));
|
|
610
|
+
}
|
|
611
|
+
function knownCacheMinimumTokens(providerId, modelLabel) {
|
|
612
|
+
if (providerId !== 'gemini')
|
|
613
|
+
return undefined;
|
|
614
|
+
if (!modelLabel ||
|
|
615
|
+
/(^|\s)(gemini\s*)?3\.5\s+flash/i.test(modelLabel) ||
|
|
616
|
+
/^flash$/i.test(modelLabel)) {
|
|
617
|
+
return 4096;
|
|
618
|
+
}
|
|
619
|
+
return undefined;
|
|
620
|
+
}
|
|
621
|
+
function cacheProviderMode(providerId) {
|
|
622
|
+
switch (providerId) {
|
|
623
|
+
case 'gemini':
|
|
624
|
+
return 'Gemini implicit caching';
|
|
625
|
+
case 'openrouter':
|
|
626
|
+
return 'OpenRouter provider cache telemetry';
|
|
627
|
+
case 'claude':
|
|
628
|
+
return 'Claude prompt caching telemetry';
|
|
629
|
+
default:
|
|
630
|
+
return 'Provider cache telemetry';
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
function cacheExplanation({ cachedInputTokens, freshInputTokens, iteration, meetsKnownMinimum, likelyStablePrefixTokens, }) {
|
|
634
|
+
if (cachedInputTokens > 0 && freshInputTokens > 0) {
|
|
635
|
+
return 'Partial cache hit: stable prefix hit cache; new tool results/current prompt were fresh.';
|
|
636
|
+
}
|
|
637
|
+
if (cachedInputTokens > 0) {
|
|
638
|
+
return likelyStablePrefixTokens > 0
|
|
639
|
+
? 'Cache hit: large repeated prefix detected; provider reported cached tokens.'
|
|
640
|
+
: 'Cache hit: provider reported cached tokens for this request.';
|
|
641
|
+
}
|
|
642
|
+
if (iteration === 0) {
|
|
643
|
+
return 'No provider-reported cached tokens; this is the first request in the turn, so there may be no warm prompt prefix yet.';
|
|
644
|
+
}
|
|
645
|
+
if (meetsKnownMinimum === false) {
|
|
646
|
+
return 'No provider-reported cached tokens; this request is below the known prompt-caching threshold.';
|
|
647
|
+
}
|
|
648
|
+
return 'No provider-reported cached tokens; likely changed prefix, expired cache, different route, or provider chose not to serve a cache hit.';
|
|
649
|
+
}
|
|
650
|
+
function buildSessionCategoryDetails({ inputTokens, outputTokens, cachedInputTokens, reasoningTokens, tokensTotal, requestRows, }) {
|
|
651
|
+
const freshInputTokens = Math.max(0, inputTokens - Math.min(inputTokens, cachedInputTokens));
|
|
652
|
+
const boundedReasoning = Math.min(outputTokens, reasoningTokens);
|
|
653
|
+
const visibleOutputTokens = Math.max(0, outputTokens - boundedReasoning);
|
|
654
|
+
const classified = freshInputTokens +
|
|
655
|
+
Math.min(inputTokens, cachedInputTokens) +
|
|
656
|
+
visibleOutputTokens +
|
|
657
|
+
boundedReasoning;
|
|
658
|
+
const remainder = Math.max(0, tokensTotal - classified);
|
|
659
|
+
return {
|
|
660
|
+
'fresh-input': {
|
|
661
|
+
id: 'fresh-input',
|
|
662
|
+
label: 'Fresh input',
|
|
663
|
+
source: requestRows.length > 0 ? 'mixed' : 'unavailable',
|
|
664
|
+
note: requestRows.length > 0
|
|
665
|
+
? 'Provider totals say how much fresh input was billed; the source slices below distribute that total by estimated request composition.'
|
|
666
|
+
: 'Source-level input composition needs saved provider-visible request traces. This restored session only has high-level token metrics.',
|
|
667
|
+
rows: requestRows.length > 0
|
|
668
|
+
? scaledCompositionRows(sumRequestCompositions(requestRows), freshInputTokens)
|
|
669
|
+
: [],
|
|
670
|
+
},
|
|
671
|
+
'cached-input': {
|
|
672
|
+
id: 'cached-input',
|
|
673
|
+
label: 'Cached input',
|
|
674
|
+
source: cachedInputTokens > 0 ? 'provider-reported' : 'unavailable',
|
|
675
|
+
note: cachedInputTokens > 0
|
|
676
|
+
? 'Cache-read tokens are provider-reported. They still appear in usage and are usually cheaper when the provider exposes cache pricing.'
|
|
677
|
+
: 'No provider-reported cached input has appeared in this session yet.',
|
|
678
|
+
rows: cachedInputTokens > 0
|
|
679
|
+
? [
|
|
680
|
+
{
|
|
681
|
+
id: 'cache-read',
|
|
682
|
+
label: 'Cache-read input',
|
|
683
|
+
tokens: Math.min(inputTokens, cachedInputTokens),
|
|
684
|
+
color: '#a4d4a8',
|
|
685
|
+
estimated: false,
|
|
686
|
+
source: 'provider-reported',
|
|
687
|
+
description: 'Repeated prompt/context served from provider cache when reported.',
|
|
688
|
+
},
|
|
689
|
+
]
|
|
690
|
+
: [],
|
|
691
|
+
},
|
|
692
|
+
'visible-output': {
|
|
693
|
+
id: 'visible-output',
|
|
694
|
+
label: 'Visible output',
|
|
695
|
+
source: visibleOutputTokens > 0 ? 'provider-reported' : 'unavailable',
|
|
696
|
+
note: visibleOutputTokens > 0
|
|
697
|
+
? 'Visible output is provider-reported completion text after hidden reasoning is removed when available.'
|
|
698
|
+
: 'No visible output tokens have been reported yet.',
|
|
699
|
+
rows: visibleOutputTokens > 0
|
|
700
|
+
? [
|
|
701
|
+
{
|
|
702
|
+
id: 'assistant-output',
|
|
703
|
+
label: 'Assistant-visible output',
|
|
704
|
+
tokens: visibleOutputTokens,
|
|
705
|
+
color: '#f0c36a',
|
|
706
|
+
estimated: false,
|
|
707
|
+
source: 'provider-reported',
|
|
708
|
+
description: 'Generated app, rules, code, repairs, tests, and final assistant text before local preview renders.',
|
|
709
|
+
},
|
|
710
|
+
]
|
|
711
|
+
: [],
|
|
712
|
+
},
|
|
713
|
+
'reasoning-output': {
|
|
714
|
+
id: 'reasoning-output',
|
|
715
|
+
label: 'Reasoning output',
|
|
716
|
+
source: boundedReasoning > 0 ? 'provider-reported' : 'unavailable',
|
|
717
|
+
note: boundedReasoning > 0
|
|
718
|
+
? 'Reasoning output is separated only when provider telemetry exposes it.'
|
|
719
|
+
: 'No reasoning output was reported for this session.',
|
|
720
|
+
rows: boundedReasoning > 0
|
|
721
|
+
? [
|
|
722
|
+
{
|
|
723
|
+
id: 'reasoning',
|
|
724
|
+
label: 'Hidden reasoning output',
|
|
725
|
+
tokens: boundedReasoning,
|
|
726
|
+
color: '#c9a7ff',
|
|
727
|
+
estimated: false,
|
|
728
|
+
source: 'provider-reported',
|
|
729
|
+
description: 'Hidden model reasoning tokens reported separately from visible completion text.',
|
|
730
|
+
},
|
|
731
|
+
]
|
|
732
|
+
: [],
|
|
733
|
+
},
|
|
734
|
+
'reported-total': {
|
|
735
|
+
id: 'reported-total',
|
|
736
|
+
label: 'Reported total',
|
|
737
|
+
source: remainder > 0 ? 'provider-reported' : 'unavailable',
|
|
738
|
+
note: remainder > 0
|
|
739
|
+
? 'These tokens were included in the provider total but did not fit the available input/output/cache/reasoning split.'
|
|
740
|
+
: 'No unclassified reported-total tokens remain after the available usage split.',
|
|
741
|
+
rows: remainder > 0
|
|
742
|
+
? [
|
|
743
|
+
{
|
|
744
|
+
id: 'reported-total',
|
|
745
|
+
label: 'Unclassified provider total',
|
|
746
|
+
tokens: remainder,
|
|
747
|
+
color: '#8aa0b8',
|
|
748
|
+
estimated: false,
|
|
749
|
+
source: 'provider-reported',
|
|
750
|
+
description: 'Provider total remainder when detailed usage fields are missing.',
|
|
751
|
+
},
|
|
752
|
+
]
|
|
753
|
+
: [],
|
|
754
|
+
},
|
|
755
|
+
};
|
|
756
|
+
}
|
|
757
|
+
function scaledCompositionRows(composition, targetTokens) {
|
|
758
|
+
if (targetTokens <= 0)
|
|
759
|
+
return [];
|
|
760
|
+
const total = requestInputCompositionTotal(composition);
|
|
761
|
+
if (total <= 0)
|
|
762
|
+
return [];
|
|
763
|
+
return [
|
|
764
|
+
compositionRow('system', 'System prompt', composition.system, total, targetTokens, '#a4d4a8', 'Agent instructions and workspace references sent with model requests.'),
|
|
765
|
+
compositionRow('history', 'Conversation/history', composition.history, total, targetTokens, '#8bb7ff', 'Prior user, assistant, and tool-call context re-sent across requests.'),
|
|
766
|
+
compositionRow('tool-schemas', 'Tool schemas', composition.toolSchemas, total, targetTokens, '#c9a7ff', 'Function/tool declarations available to the model for these requests.'),
|
|
767
|
+
compositionRow('resent-tool-results', 'Resent tool results', composition.resentToolResults, total, targetTokens, '#f0c36a', 'Earlier tool results that appeared again in later provider-visible messages.'),
|
|
768
|
+
compositionRow('current-prompt', 'Current prompt', composition.currentPrompt, total, targetTokens, '#f08a8a', 'The active user prompt portion of each request.'),
|
|
769
|
+
].filter((row) => row.tokens > 0);
|
|
770
|
+
}
|
|
771
|
+
function compositionRow(id, label, componentTokens, compositionTotal, targetTokens, color, description) {
|
|
772
|
+
return {
|
|
773
|
+
id,
|
|
774
|
+
label,
|
|
775
|
+
tokens: Math.round(targetTokens * (componentTokens / compositionTotal)),
|
|
776
|
+
color,
|
|
777
|
+
estimated: true,
|
|
778
|
+
source: 'estimated-from-traces',
|
|
779
|
+
description,
|
|
780
|
+
};
|
|
781
|
+
}
|
|
782
|
+
function sumRequestCompositions(requestRows) {
|
|
783
|
+
return requestRows.reduce((sum, row) => ({
|
|
784
|
+
system: sum.system + row.composition.system,
|
|
785
|
+
history: sum.history + row.composition.history,
|
|
786
|
+
resentToolResults: sum.resentToolResults + row.composition.resentToolResults,
|
|
787
|
+
currentPrompt: sum.currentPrompt + row.composition.currentPrompt,
|
|
788
|
+
toolSchemas: sum.toolSchemas + row.composition.toolSchemas,
|
|
789
|
+
}), {
|
|
790
|
+
system: 0,
|
|
791
|
+
history: 0,
|
|
792
|
+
resentToolResults: 0,
|
|
793
|
+
currentPrompt: 0,
|
|
794
|
+
toolSchemas: 0,
|
|
795
|
+
});
|
|
796
|
+
}
|
|
797
|
+
function sessionTurnRowsFromMessages(messages, tracesByTurn, currentContextTokens) {
|
|
798
|
+
const rows = [];
|
|
799
|
+
for (const message of messages) {
|
|
800
|
+
const metrics = message.metrics;
|
|
801
|
+
if (message.role !== 'assistant' || !metrics)
|
|
802
|
+
continue;
|
|
803
|
+
const inputTokens = Math.max(0, metrics.tokensIn ?? 0);
|
|
804
|
+
const outputTokens = Math.max(0, metrics.tokensOut ?? 0);
|
|
805
|
+
const cachedInputTokens = Math.min(inputTokens, Math.max(0, metrics.tokensCached ?? metrics.cachedTokens ?? 0));
|
|
806
|
+
const reasoningTokens = Math.min(outputTokens, Math.max(0, metrics.tokensReasoning ?? metrics.reasoningTokens ?? 0));
|
|
807
|
+
const tokensTotal = Math.max(0, metrics.tokensTotal ?? inputTokens + outputTokens);
|
|
808
|
+
const turnId = message.turnId;
|
|
809
|
+
const requestCount = typeof turnId === 'string' ? (tracesByTurn?.[turnId]?.requests.length ?? null) : null;
|
|
810
|
+
const multiplierContribution = currentContextTokens > 0 && tokensTotal > 0 ? tokensTotal / currentContextTokens : undefined;
|
|
811
|
+
rows.push({
|
|
812
|
+
id: message.id,
|
|
813
|
+
label: `Turn ${rows.length + 1}`,
|
|
814
|
+
requestCount,
|
|
815
|
+
inputTokens,
|
|
816
|
+
outputTokens,
|
|
817
|
+
cachedInputTokens,
|
|
818
|
+
reasoningTokens,
|
|
819
|
+
freshInputTokens: Math.max(0, inputTokens - cachedInputTokens),
|
|
820
|
+
visibleOutputTokens: Math.max(0, outputTokens - reasoningTokens),
|
|
821
|
+
tokensTotal,
|
|
822
|
+
...(typeof metrics.costUsd === 'number' ? { costUsd: metrics.costUsd } : {}),
|
|
823
|
+
...(multiplierContribution !== undefined ? { multiplierContribution } : {}),
|
|
824
|
+
});
|
|
825
|
+
}
|
|
826
|
+
return rows;
|
|
827
|
+
}
|
|
828
|
+
function sumSessionTurnRows(rows) {
|
|
829
|
+
let requests = 0;
|
|
830
|
+
let inputTokens = 0;
|
|
831
|
+
let outputTokens = 0;
|
|
832
|
+
let cachedInputTokens = 0;
|
|
833
|
+
let reasoningTokens = 0;
|
|
834
|
+
let costUsdTotal = 0;
|
|
835
|
+
let hasCost = false;
|
|
836
|
+
for (const row of rows) {
|
|
837
|
+
if (row.requestCount === null) {
|
|
838
|
+
requests = null;
|
|
839
|
+
}
|
|
840
|
+
else if (requests !== null) {
|
|
841
|
+
requests += row.requestCount;
|
|
842
|
+
}
|
|
843
|
+
inputTokens += row.inputTokens;
|
|
844
|
+
outputTokens += row.outputTokens;
|
|
845
|
+
cachedInputTokens += row.cachedInputTokens;
|
|
846
|
+
reasoningTokens += row.reasoningTokens;
|
|
847
|
+
if (typeof row.costUsd === 'number') {
|
|
848
|
+
costUsdTotal += row.costUsd;
|
|
849
|
+
hasCost = true;
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
return {
|
|
853
|
+
requests,
|
|
854
|
+
inputTokens,
|
|
855
|
+
outputTokens,
|
|
856
|
+
cachedInputTokens,
|
|
857
|
+
reasoningTokens,
|
|
858
|
+
...(hasCost ? { costUsdTotal } : {}),
|
|
859
|
+
};
|
|
860
|
+
}
|
|
861
|
+
function buildPricing({ providerId, modelId, estimatePromptInputCost, currentTokens, compactedTokens, }) {
|
|
862
|
+
if (!providerId || !modelId || !estimatePromptInputCost) {
|
|
863
|
+
return { current: null, compacted: null, savedCostUsd: null };
|
|
864
|
+
}
|
|
865
|
+
const current = estimatePromptInputCost({
|
|
866
|
+
providerId,
|
|
867
|
+
modelId,
|
|
868
|
+
promptTokens: currentTokens,
|
|
869
|
+
cachedTokens: 0,
|
|
870
|
+
});
|
|
871
|
+
const compacted = estimatePromptInputCost({
|
|
872
|
+
providerId,
|
|
873
|
+
modelId,
|
|
874
|
+
promptTokens: compactedTokens,
|
|
875
|
+
cachedTokens: 0,
|
|
876
|
+
});
|
|
877
|
+
return {
|
|
878
|
+
current,
|
|
879
|
+
compacted,
|
|
880
|
+
savedCostUsd: current && compacted ? Math.max(0, current.costUsd - compacted.costUsd) : null,
|
|
881
|
+
};
|
|
882
|
+
}
|
|
883
|
+
function unchangedStats(chars) {
|
|
884
|
+
return {
|
|
885
|
+
compacted: false,
|
|
886
|
+
originalChars: chars,
|
|
887
|
+
compactedChars: chars,
|
|
888
|
+
bytesSaved: 0,
|
|
889
|
+
turnsCompacted: 0,
|
|
890
|
+
messagesCompacted: 0,
|
|
891
|
+
};
|
|
892
|
+
}
|
|
893
|
+
function buildMemoryMessage(older, opts) {
|
|
894
|
+
const lines = [
|
|
895
|
+
'Prior conversation memory (deterministically compacted for model context only; the full visible transcript is preserved in the UI/session):',
|
|
896
|
+
];
|
|
897
|
+
const turns = groupOlderTurns(older);
|
|
898
|
+
let toolLines = 0;
|
|
899
|
+
turns.forEach((turn, index) => {
|
|
900
|
+
lines.push('');
|
|
901
|
+
lines.push(`Turn ${index + 1}:`);
|
|
902
|
+
if (turn.user)
|
|
903
|
+
lines.push(`User: ${preview(turn.user.text, 360)}`);
|
|
904
|
+
for (const system of turn.system) {
|
|
905
|
+
lines.push(`System: ${preview(system.text, 220)}`);
|
|
906
|
+
}
|
|
907
|
+
const assistantOutcome = latestNonEmptyAssistantText(turn.assistant);
|
|
908
|
+
if (assistantOutcome)
|
|
909
|
+
lines.push(`Assistant outcome: ${preview(assistantOutcome, 420)}`);
|
|
910
|
+
for (const call of turn.assistant.flatMap((m) => m.toolCalls ?? [])) {
|
|
911
|
+
if (toolLines >= MAX_MEMORY_TOOL_LINES)
|
|
912
|
+
continue;
|
|
913
|
+
lines.push(`Tool: ${(opts.summarizeToolCall ?? summarizeToolCall)(call)}`);
|
|
914
|
+
toolLines += 1;
|
|
915
|
+
}
|
|
916
|
+
});
|
|
917
|
+
if (toolLines >= MAX_MEMORY_TOOL_LINES) {
|
|
918
|
+
lines.push(`Tool: additional older tool calls omitted after ${MAX_MEMORY_TOOL_LINES} compact rows`);
|
|
919
|
+
}
|
|
920
|
+
const text = limitChars(lines.join('\n'), MAX_MEMORY_CHARS);
|
|
921
|
+
const first = older[0];
|
|
922
|
+
const last = older[older.length - 1];
|
|
923
|
+
const input = {
|
|
924
|
+
id: `context-memory-${stableHash(older.map((m) => m.id).join('|'))}`,
|
|
925
|
+
role: 'assistant',
|
|
926
|
+
text,
|
|
927
|
+
createdAt: first?.createdAt ?? last?.createdAt ?? 0,
|
|
928
|
+
};
|
|
929
|
+
return opts.createMemoryMessage ? opts.createMemoryMessage(input, older) : input;
|
|
930
|
+
}
|
|
931
|
+
function groupOlderTurns(messages) {
|
|
932
|
+
const turns = [];
|
|
933
|
+
let current = null;
|
|
934
|
+
for (const message of messages) {
|
|
935
|
+
if (message.role === 'user') {
|
|
936
|
+
current = { user: message, assistant: [], system: [] };
|
|
937
|
+
turns.push(current);
|
|
938
|
+
continue;
|
|
939
|
+
}
|
|
940
|
+
if (!current) {
|
|
941
|
+
current = { user: null, assistant: [], system: [] };
|
|
942
|
+
turns.push(current);
|
|
943
|
+
}
|
|
944
|
+
if (message.role === 'assistant')
|
|
945
|
+
current.assistant.push(message);
|
|
946
|
+
else if (message.role === 'system')
|
|
947
|
+
current.system.push(message);
|
|
948
|
+
}
|
|
949
|
+
return turns;
|
|
950
|
+
}
|
|
951
|
+
function countUserTurns(messages) {
|
|
952
|
+
return messages.filter((message) => message.role === 'user').length;
|
|
953
|
+
}
|
|
954
|
+
function latestNonEmptyAssistantText(messages) {
|
|
955
|
+
for (let i = messages.length - 1; i >= 0; i -= 1) {
|
|
956
|
+
const text = messages[i]?.text.trim();
|
|
957
|
+
if (text)
|
|
958
|
+
return text;
|
|
959
|
+
}
|
|
960
|
+
return '';
|
|
961
|
+
}
|
|
962
|
+
function summarizeToolCall(call) {
|
|
963
|
+
const args = safeParse(call.argsJson);
|
|
964
|
+
const result = safeParse(call.resultJson ?? '');
|
|
965
|
+
const path = pathFrom(args) ?? pathFrom(result?.data);
|
|
966
|
+
const summary = call.summary ?? (typeof result?.summary === 'string' ? result.summary : '');
|
|
967
|
+
return [call.name, path ? `path=${path}` : '', summary ? `summary=${preview(summary, 220)}` : '']
|
|
968
|
+
.filter(Boolean)
|
|
969
|
+
.join(' | ');
|
|
970
|
+
}
|
|
971
|
+
function pathFrom(value) {
|
|
972
|
+
if (!value || typeof value !== 'object')
|
|
973
|
+
return null;
|
|
974
|
+
const path = value.path;
|
|
975
|
+
return typeof path === 'string' ? path : null;
|
|
976
|
+
}
|
|
977
|
+
function safeParse(json) {
|
|
978
|
+
if (!json)
|
|
979
|
+
return null;
|
|
980
|
+
try {
|
|
981
|
+
const parsed = JSON.parse(json);
|
|
982
|
+
return parsed && typeof parsed === 'object' ? parsed : null;
|
|
983
|
+
}
|
|
984
|
+
catch {
|
|
985
|
+
return null;
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
function preview(text, max) {
|
|
989
|
+
return limitChars(text.replace(/\s+/g, ' ').trim(), max);
|
|
990
|
+
}
|
|
991
|
+
function limitChars(text, max) {
|
|
992
|
+
return text.length <= max ? text : `${text.slice(0, Math.max(0, max - 3))}...`;
|
|
993
|
+
}
|
|
994
|
+
function stableHash(input) {
|
|
995
|
+
let hash = 2166136261;
|
|
996
|
+
for (let i = 0; i < input.length; i += 1) {
|
|
997
|
+
hash ^= input.charCodeAt(i);
|
|
998
|
+
hash = Math.imul(hash, 16777619);
|
|
999
|
+
}
|
|
1000
|
+
return (hash >>> 0).toString(36);
|
|
1001
|
+
}
|
|
1002
|
+
function statusFor(percent) {
|
|
1003
|
+
if (percent === undefined)
|
|
1004
|
+
return 'unknown';
|
|
1005
|
+
if (percent >= 0.95)
|
|
1006
|
+
return 'critical';
|
|
1007
|
+
if (percent >= 0.8)
|
|
1008
|
+
return 'high';
|
|
1009
|
+
if (percent >= 0.5)
|
|
1010
|
+
return 'medium';
|
|
1011
|
+
return 'low';
|
|
1012
|
+
}
|
|
1013
|
+
function toolCallTokens(calls, estimateTokens) {
|
|
1014
|
+
if (!calls)
|
|
1015
|
+
return 0;
|
|
1016
|
+
let total = 0;
|
|
1017
|
+
for (const call of calls) {
|
|
1018
|
+
total += tokenEstimate(call.argsJson, estimateTokens);
|
|
1019
|
+
total += tokenEstimate(call.resultJson, estimateTokens);
|
|
1020
|
+
total += tokenEstimate(call.summary, estimateTokens);
|
|
1021
|
+
}
|
|
1022
|
+
return total;
|
|
1023
|
+
}
|
|
1024
|
+
function toolSchemaTokens(tools, estimateTokens) {
|
|
1025
|
+
const declarations = tools.map((tool) => ({
|
|
1026
|
+
name: tool.name,
|
|
1027
|
+
description: tool.description,
|
|
1028
|
+
parameters: tool.parameters,
|
|
1029
|
+
}));
|
|
1030
|
+
return tokenEstimate(safeStringify(declarations), estimateTokens);
|
|
1031
|
+
}
|
|
1032
|
+
export function tokenEstimate(value, estimateTokens = defaultTokenEstimate) {
|
|
1033
|
+
return estimateTokens(value) ?? 0;
|
|
1034
|
+
}
|
|
1035
|
+
export function safeStringify(value) {
|
|
1036
|
+
if (value === undefined)
|
|
1037
|
+
return '';
|
|
1038
|
+
try {
|
|
1039
|
+
return JSON.stringify(value) ?? '';
|
|
1040
|
+
}
|
|
1041
|
+
catch {
|
|
1042
|
+
return '';
|
|
1043
|
+
}
|
|
1044
|
+
}
|
|
1045
|
+
function defaultTokenEstimate(value) {
|
|
1046
|
+
return value ? Math.ceil(value.length / 4) : 0;
|
|
1047
|
+
}
|
|
1048
|
+
function nonNegative(value) {
|
|
1049
|
+
return typeof value === 'number' && Number.isFinite(value) ? Math.max(0, value) : undefined;
|
|
1050
|
+
}
|
|
1051
|
+
function zeroAggregatedTurnMetrics() {
|
|
1052
|
+
return {
|
|
1053
|
+
tokensIn: 0,
|
|
1054
|
+
tokensOut: 0,
|
|
1055
|
+
tokensCached: 0,
|
|
1056
|
+
tokensReasoning: 0,
|
|
1057
|
+
costUsd: 0,
|
|
1058
|
+
costEstimated: false,
|
|
1059
|
+
iterations: 0,
|
|
1060
|
+
};
|
|
1061
|
+
}
|
|
1062
|
+
//# sourceMappingURL=index.js.map
|