@link-assistant/hive-mind 1.64.0 → 1.64.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/package.json +1 -1
- package/src/agent-commander.lib.mjs +47 -5
- package/src/agent-token-usage.lib.mjs +15 -1
- package/src/claude.budget-stats.lib.mjs +72 -27
- package/src/codex.lib.mjs +10 -0
- package/src/context-fill.lib.mjs +71 -0
- package/src/gemini.lib.mjs +22 -7
- package/src/qwen.lib.mjs +191 -9
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# @link-assistant/hive-mind
|
|
2
2
|
|
|
3
|
+
## 1.64.2
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 320ca42: Fix budget stats sub-agent context-fill calculation so cumulative-only rows (e.g. Claude Haiku 4.5 sub-agent calls that never appear in the parent JSONL) use `input + cache_creation` instead of `input + cache_creation + cache_read`. The previous formula double-counted the cached prefix replayed across calls and produced impossible percentages such as `1.2M / 200K (583%)`.
|
|
8
|
+
|
|
9
|
+
## 1.64.1
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- 51a8721: Add a separate `konard/hive-mind-dind` Docker image for nested Docker testing.
|
|
14
|
+
|
|
3
15
|
## 1.64.0
|
|
4
16
|
|
|
5
17
|
### Minor Changes
|
package/package.json
CHANGED
|
@@ -11,6 +11,7 @@ import { resolveCodexReasoningEffort } from './codex.options.lib.mjs';
|
|
|
11
11
|
import { mapModelForTool } from './models/index.mjs';
|
|
12
12
|
import { buildCodexDisable1mContextConfigArgs, buildCodexSubSessionSizeConfigArgs, parseSubSessionSize } from './sub-session-size.lib.mjs';
|
|
13
13
|
import { detectUsageLimit } from './usage-limit.lib.mjs';
|
|
14
|
+
import { getCacheReadTokenCount, getCumulativeContextInputTokens, getOutputTokenCount } from './context-fill.lib.mjs';
|
|
14
15
|
|
|
15
16
|
export const AGENT_COMMANDER_TOOLS = new Set(['claude', 'codex', 'opencode', 'agent', 'qwen', 'gemini']);
|
|
16
17
|
|
|
@@ -222,10 +223,45 @@ const extractResultSummary = (messages, plainOutput) => {
|
|
|
222
223
|
|
|
223
224
|
const hasErrorMessage = messages => messages.some(message => message?.is_error === true || message?.type === 'error' || message?.type === 'step_error' || message?.error);
|
|
224
225
|
|
|
226
|
+
const normalizeAgentCommanderTokenUsage = usage => {
|
|
227
|
+
if (!usage || typeof usage !== 'object') return null;
|
|
228
|
+
const normalized = {
|
|
229
|
+
...usage,
|
|
230
|
+
contextFillInputTokens: usage.contextFillInputTokens ?? getCumulativeContextInputTokens(usage),
|
|
231
|
+
};
|
|
232
|
+
const cacheReadTokens = getCacheReadTokenCount(normalized);
|
|
233
|
+
const hasTokenCounts = getCumulativeContextInputTokens(normalized) > 0 || getOutputTokenCount(normalized) > 0 || cacheReadTokens > 0;
|
|
234
|
+
if (!hasTokenCounts) return null;
|
|
235
|
+
if (!normalized.stepCount) normalized.stepCount = 1;
|
|
236
|
+
return normalized;
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
const enrichPricingInfoWithTokenUsage = ({ pricingInfo = null, usage = null, tool = null, publicPricingEstimate = null }) => {
|
|
240
|
+
const tokenUsage = normalizeAgentCommanderTokenUsage(pricingInfo?.tokenUsage || usage);
|
|
241
|
+
if (!tokenUsage) return pricingInfo || null;
|
|
242
|
+
|
|
243
|
+
return {
|
|
244
|
+
source: 'agent-commander',
|
|
245
|
+
...(pricingInfo || {}),
|
|
246
|
+
provider: pricingInfo?.provider || tool || 'agent-commander',
|
|
247
|
+
modelId: pricingInfo?.modelId || tokenUsage.respondedModelId || tokenUsage.requestedModelId || null,
|
|
248
|
+
modelName: pricingInfo?.modelName || tokenUsage.respondedModelId || tokenUsage.requestedModelId || null,
|
|
249
|
+
totalCostUSD: pricingInfo?.totalCostUSD ?? publicPricingEstimate ?? null,
|
|
250
|
+
tokenUsage,
|
|
251
|
+
};
|
|
252
|
+
};
|
|
253
|
+
|
|
225
254
|
export const summarizeAgentCommanderResult = ({ result, tool }) => {
|
|
226
255
|
const plainOutput = result?.output?.plain || '';
|
|
227
256
|
if (result?.metadata && typeof result.metadata === 'object') {
|
|
228
257
|
const metadata = result.metadata;
|
|
258
|
+
const streamTokenUsage = metadata.streamTokenUsage || result.usage || null;
|
|
259
|
+
const pricingInfo = enrichPricingInfoWithTokenUsage({
|
|
260
|
+
pricingInfo: metadata.pricingInfo || null,
|
|
261
|
+
usage: streamTokenUsage,
|
|
262
|
+
tool,
|
|
263
|
+
publicPricingEstimate: metadata.publicPricingEstimate ?? metadata.pricingInfo?.totalCostUSD ?? null,
|
|
264
|
+
});
|
|
229
265
|
return {
|
|
230
266
|
success: metadata.success === true,
|
|
231
267
|
sessionId: metadata.sessionId || result.sessionId || null,
|
|
@@ -233,11 +269,11 @@ export const summarizeAgentCommanderResult = ({ result, tool }) => {
|
|
|
233
269
|
limitResetTime: metadata.limitResetTime || null,
|
|
234
270
|
limitTimezone: metadata.limitTimezone || null,
|
|
235
271
|
anthropicTotalCostUSD: metadata.anthropicTotalCostUSD ?? null,
|
|
236
|
-
publicPricingEstimate: metadata.publicPricingEstimate ??
|
|
237
|
-
pricingInfo
|
|
272
|
+
publicPricingEstimate: metadata.publicPricingEstimate ?? pricingInfo?.totalCostUSD ?? null,
|
|
273
|
+
pricingInfo,
|
|
238
274
|
resultSummary: metadata.resultSummary || null,
|
|
239
275
|
resultModelUsage: metadata.resultModelUsage || null,
|
|
240
|
-
streamTokenUsage
|
|
276
|
+
streamTokenUsage,
|
|
241
277
|
subAgentCalls: metadata.subAgentCalls || null,
|
|
242
278
|
errorDuringExecution: metadata.errorDuringExecution === true || result?.exitCode !== 0,
|
|
243
279
|
result: plainOutput,
|
|
@@ -250,6 +286,12 @@ export const summarizeAgentCommanderResult = ({ result, tool }) => {
|
|
|
250
286
|
const resultMessage = [...messages].reverse().find(message => message?.type === 'result') || null;
|
|
251
287
|
const totalCost = typeof resultMessage?.total_cost_usd === 'number' ? resultMessage.total_cost_usd : null;
|
|
252
288
|
const publicPricingEstimate = tool === 'agent' && typeof usage?.totalCost === 'number' ? usage.totalCost : null;
|
|
289
|
+
const pricingInfo = enrichPricingInfoWithTokenUsage({
|
|
290
|
+
pricingInfo: publicPricingEstimate !== null ? { totalCostUSD: publicPricingEstimate, source: 'agent-commander' } : null,
|
|
291
|
+
usage,
|
|
292
|
+
tool,
|
|
293
|
+
publicPricingEstimate,
|
|
294
|
+
});
|
|
253
295
|
|
|
254
296
|
return {
|
|
255
297
|
success: result?.exitCode === 0 && !usageLimit.isUsageLimit && !hasErrorMessage(messages),
|
|
@@ -258,8 +300,8 @@ export const summarizeAgentCommanderResult = ({ result, tool }) => {
|
|
|
258
300
|
limitResetTime: usageLimit.resetTime,
|
|
259
301
|
limitTimezone: usageLimit.timezone,
|
|
260
302
|
anthropicTotalCostUSD: tool === 'claude' ? totalCost : null,
|
|
261
|
-
publicPricingEstimate,
|
|
262
|
-
pricingInfo
|
|
303
|
+
publicPricingEstimate: publicPricingEstimate ?? pricingInfo?.totalCostUSD ?? null,
|
|
304
|
+
pricingInfo,
|
|
263
305
|
resultSummary: extractResultSummary(messages, plainOutput),
|
|
264
306
|
resultModelUsage: null,
|
|
265
307
|
streamTokenUsage: usage,
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import Decimal from 'decimal.js-light';
|
|
4
4
|
import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
|
|
5
|
+
import { getCumulativeContextInputTokens, getRestoredContextInputTokens } from './context-fill.lib.mjs';
|
|
5
6
|
|
|
6
7
|
export const createTokenFieldAvailability = () => ({
|
|
7
8
|
inputTokens: false,
|
|
@@ -23,6 +24,7 @@ export const createAgentTokenUsage = () => ({
|
|
|
23
24
|
respondedModelId: null,
|
|
24
25
|
contextLimit: null,
|
|
25
26
|
outputLimit: null,
|
|
27
|
+
contextFillInputTokens: 0,
|
|
26
28
|
peakContextUsage: 0,
|
|
27
29
|
tokenFieldAvailability: createTokenFieldAvailability(),
|
|
28
30
|
});
|
|
@@ -61,10 +63,22 @@ export const accumulateAgentStepFinishUsage = (usage, data) => {
|
|
|
61
63
|
if (data.part.model.respondedModelID) usage.respondedModelId = data.part.model.respondedModelID;
|
|
62
64
|
}
|
|
63
65
|
|
|
66
|
+
const stepContextFill = getCumulativeContextInputTokens({
|
|
67
|
+
inputTokens: getTokenCount(tokens.input),
|
|
68
|
+
cacheWriteTokens: getTokenCount(tokens.cache?.write),
|
|
69
|
+
});
|
|
70
|
+
if (stepContextFill > (usage.contextFillInputTokens || 0)) {
|
|
71
|
+
usage.contextFillInputTokens = stepContextFill;
|
|
72
|
+
}
|
|
73
|
+
|
|
64
74
|
if (data.part.context) {
|
|
65
75
|
if (data.part.context.contextLimit) usage.contextLimit = data.part.context.contextLimit;
|
|
66
76
|
if (data.part.context.outputLimit) usage.outputLimit = data.part.context.outputLimit;
|
|
67
|
-
const stepContextUsage =
|
|
77
|
+
const stepContextUsage = getRestoredContextInputTokens({
|
|
78
|
+
inputTokens: getTokenCount(tokens.input),
|
|
79
|
+
cacheWriteTokens: getTokenCount(tokens.cache?.write),
|
|
80
|
+
cacheReadTokens: getTokenCount(tokens.cache?.read),
|
|
81
|
+
});
|
|
68
82
|
if (stepContextUsage > (usage.peakContextUsage || 0)) {
|
|
69
83
|
usage.peakContextUsage = stepContextUsage;
|
|
70
84
|
}
|
|
@@ -4,6 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
import { formatNumber } from './claude.lib.mjs';
|
|
6
6
|
import Decimal from 'decimal.js-light';
|
|
7
|
+
import { getCacheReadTokenCount, getCacheWriteTokenCount, getCumulativeContextInputTokens, getDisplayContextInputTokens, getExplicitContextFillInputTokens, getInputTokenCount, getOutputTokenCount, getRestoredContextInputTokens } from './context-fill.lib.mjs';
|
|
8
|
+
|
|
9
|
+
export { getCumulativeContextInputTokens, getRestoredContextInputTokens };
|
|
7
10
|
|
|
8
11
|
/**
|
|
9
12
|
* Helper: creates a fresh sub-session usage object for tracking tokens between compactification events
|
|
@@ -19,9 +22,9 @@ export const createEmptySubSessionUsage = () => ({
|
|
|
19
22
|
peakOutputUsage: 0,
|
|
20
23
|
});
|
|
21
24
|
|
|
22
|
-
export const getRawRequestInputTokens = usage => (usage
|
|
25
|
+
export const getRawRequestInputTokens = usage => getRestoredContextInputTokens(usage);
|
|
23
26
|
|
|
24
|
-
export const getUsageInputTokens = usage => (usage
|
|
27
|
+
export const getUsageInputTokens = usage => getRestoredContextInputTokens(usage);
|
|
25
28
|
|
|
26
29
|
/**
|
|
27
30
|
* Helper: accumulates token usage from a JSONL entry into a model usage map
|
|
@@ -184,6 +187,7 @@ export const dumpBudgetTrace = async (usage, tokenUsage, log) => {
|
|
|
184
187
|
const reads = usage.cacheReadTokens || 0;
|
|
185
188
|
const inputs = usage.inputTokens || 0;
|
|
186
189
|
const outputs = usage.outputTokens || 0;
|
|
190
|
+
const explicitContextFill = getExplicitContextFillInputTokens(usage);
|
|
187
191
|
const webSearches = usage.webSearchRequests || 0;
|
|
188
192
|
const subSessionCount = (tokenUsage?.subSessions || []).length;
|
|
189
193
|
const source = usage._sourceResultJson ? 'jsonl + result-event' : 'jsonl';
|
|
@@ -194,6 +198,14 @@ export const dumpBudgetTrace = async (usage, tokenUsage, log) => {
|
|
|
194
198
|
// buckets split for cost and accounting review.
|
|
195
199
|
await log(` peak input: ${formatNumber(peak)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (largest request input + cache_creation + cache_read)`, { verbose: true });
|
|
196
200
|
await log(` cumulative: input ${formatNumber(inputs)}, cache_write ${formatNumber(writes)} (5m ${formatNumber(writes5m)} / 1h ${formatNumber(writes1h)}), cache_read ${formatNumber(reads)}, output ${formatNumber(outputs)}`, { verbose: true });
|
|
201
|
+
// Issue #1741: when peak is 0 (sub-agent only seen via result event), the
|
|
202
|
+
// detail row falls back to the cumulative-context proxy `input + cache_write`
|
|
203
|
+
// (cache_read is excluded because it represents the same cached prefix replayed
|
|
204
|
+
// across calls and would inflate the percentage past 100%).
|
|
205
|
+
if (explicitContextFill !== null || peak === 0) {
|
|
206
|
+
const contextFill = explicitContextFill ?? getCumulativeContextInputTokens(usage);
|
|
207
|
+
await log(` context fill: ${formatNumber(contextFill)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (input + cache_write; cache_read excluded — issue #1741)`, { verbose: true });
|
|
208
|
+
}
|
|
197
209
|
// Issue #1710 R1: web_search is now billed in calculateModelCost. The trace
|
|
198
210
|
// still surfaces the implied dollar cost so the residual remains debuggable
|
|
199
211
|
// from the saved log even if a future model lacks pricing data.
|
|
@@ -305,17 +317,25 @@ export const mergeResultModelUsage = (modelUsage, resultModelUsage) => {
|
|
|
305
317
|
if (!resultModelUsage || typeof resultModelUsage !== 'object') return;
|
|
306
318
|
for (const [modelId, resultUsage] of Object.entries(resultModelUsage)) {
|
|
307
319
|
if (modelId.startsWith('<') && modelId.endsWith('>')) continue;
|
|
320
|
+
const inputTokens = getInputTokenCount(resultUsage);
|
|
321
|
+
const cacheCreationTokens = getCacheWriteTokenCount(resultUsage);
|
|
322
|
+
const cacheReadTokens = getCacheReadTokenCount(resultUsage);
|
|
323
|
+
const outputTokens = getOutputTokenCount(resultUsage);
|
|
324
|
+
const explicitContextFill = getExplicitContextFillInputTokens(resultUsage);
|
|
308
325
|
if (!modelUsage[modelId]) {
|
|
309
326
|
modelUsage[modelId] = {
|
|
310
|
-
inputTokens
|
|
311
|
-
cacheCreationTokens
|
|
327
|
+
inputTokens,
|
|
328
|
+
cacheCreationTokens,
|
|
312
329
|
cacheCreation5mTokens: 0,
|
|
313
330
|
cacheCreation1hTokens: 0,
|
|
314
|
-
cacheReadTokens
|
|
315
|
-
outputTokens
|
|
331
|
+
cacheReadTokens,
|
|
332
|
+
outputTokens,
|
|
316
333
|
webSearchRequests: resultUsage.webSearchRequests || 0,
|
|
317
334
|
_sourceResultJson: true,
|
|
318
335
|
};
|
|
336
|
+
if (explicitContextFill !== null) {
|
|
337
|
+
modelUsage[modelId].contextFillInputTokens = explicitContextFill;
|
|
338
|
+
}
|
|
319
339
|
if (resultUsage.costUSD != null) {
|
|
320
340
|
modelUsage[modelId]._resultCostUSD = resultUsage.costUSD;
|
|
321
341
|
}
|
|
@@ -331,13 +351,16 @@ export const mergeResultModelUsage = (modelUsage, resultModelUsage) => {
|
|
|
331
351
|
} else {
|
|
332
352
|
const jsonlUsage = modelUsage[modelId];
|
|
333
353
|
const jsonlTotal = jsonlUsage.inputTokens + jsonlUsage.cacheCreationTokens + jsonlUsage.cacheReadTokens + jsonlUsage.outputTokens;
|
|
334
|
-
const resultTotal =
|
|
354
|
+
const resultTotal = inputTokens + cacheCreationTokens + cacheReadTokens + outputTokens;
|
|
335
355
|
if (resultTotal > jsonlTotal) {
|
|
336
|
-
jsonlUsage.inputTokens =
|
|
337
|
-
jsonlUsage.cacheCreationTokens =
|
|
338
|
-
jsonlUsage.cacheReadTokens =
|
|
339
|
-
jsonlUsage.outputTokens =
|
|
356
|
+
jsonlUsage.inputTokens = inputTokens;
|
|
357
|
+
jsonlUsage.cacheCreationTokens = cacheCreationTokens;
|
|
358
|
+
jsonlUsage.cacheReadTokens = cacheReadTokens;
|
|
359
|
+
jsonlUsage.outputTokens = outputTokens;
|
|
340
360
|
jsonlUsage._sourceResultJson = true;
|
|
361
|
+
if (explicitContextFill !== null) {
|
|
362
|
+
jsonlUsage.contextFillInputTokens = explicitContextFill;
|
|
363
|
+
}
|
|
341
364
|
}
|
|
342
365
|
if (resultUsage.costUSD != null) {
|
|
343
366
|
jsonlUsage._resultCostUSD = resultUsage.costUSD;
|
|
@@ -573,7 +596,7 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
|
|
|
573
596
|
stats += `\n\n**${modelName}:** (${subSessions.length} sub-sessions)`;
|
|
574
597
|
}
|
|
575
598
|
|
|
576
|
-
const peakContext = usage
|
|
599
|
+
const peakContext = getDisplayContextInputTokens(usage);
|
|
577
600
|
|
|
578
601
|
if (showSubSessions) {
|
|
579
602
|
// Issue #1600: Unified format — no "Context window:" prefix, same format as sub-agent calls
|
|
@@ -587,10 +610,14 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
|
|
|
587
610
|
// so peakContext stays at 0; without this fallback the rendered comment loses
|
|
588
611
|
// the sub-agent's input-token information entirely. The detail line is
|
|
589
612
|
// deliberately simple; the Total line below keeps the cache split.
|
|
613
|
+
// Issue #1741: For result-event-only rows we have cumulative totals, not a
|
|
614
|
+
// per-request peak, so the detail-line numerator must exclude cache_reads
|
|
615
|
+
// (which are the same cached prefix replayed across calls and would inflate
|
|
616
|
+
// the percentage past 100%). The Total line keeps the full split.
|
|
590
617
|
const parts = [];
|
|
591
618
|
const isResultSingleCall = usage._sourceResultJson || callCount > 0;
|
|
592
619
|
const inputPart = isResultSingleCall
|
|
593
|
-
? formatInputContextPart(
|
|
620
|
+
? formatInputContextPart(getCumulativeContextInputTokens(usage), contextLimit, formatTokensCompact)
|
|
594
621
|
: buildCumulativeInputPhrase({
|
|
595
622
|
input: usage.inputTokens || 0,
|
|
596
623
|
cacheWrites: usage.cacheCreationTokens || 0,
|
|
@@ -636,7 +663,12 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
|
|
|
636
663
|
for (let i = 0; i < matchingCalls.length; i++) {
|
|
637
664
|
const call = matchingCalls[i];
|
|
638
665
|
const cu = call.usage || {};
|
|
639
|
-
|
|
666
|
+
// Issue #1741: per-call usage is itself cumulative across the
|
|
667
|
+
// sub-agent's internal API requests (each Anthropic Agent call
|
|
668
|
+
// can run a tool loop), so cache_reads grow with the loop length
|
|
669
|
+
// and would push the displayed fill past 100%. Use the same
|
|
670
|
+
// input + cache_creation proxy as the result-event-only fallback.
|
|
671
|
+
const callInput = getCumulativeContextInputTokens(cu);
|
|
640
672
|
const callOutput = cu.outputTokens || 0;
|
|
641
673
|
const parts = [];
|
|
642
674
|
if (contextLimit) {
|
|
@@ -655,9 +687,13 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
|
|
|
655
687
|
}
|
|
656
688
|
} else {
|
|
657
689
|
// Estimated per-call breakdown when sub-agent stream tracking did not capture
|
|
658
|
-
// per-call usage.
|
|
659
|
-
//
|
|
660
|
-
|
|
690
|
+
// per-call usage. Issue #1741: cumulative cache_read tokens grow without
|
|
691
|
+
// bound across calls (the same cached prefix is replayed on every call),
|
|
692
|
+
// so we mustn't add them when projecting an average per-call fill —
|
|
693
|
+
// doing so would routinely exceed 100% of the context window. The
|
|
694
|
+
// estimate uses input + cache_creation (cache reads stay in the Total
|
|
695
|
+
// line below).
|
|
696
|
+
const aggregateInput = getCumulativeContextInputTokens(usage);
|
|
661
697
|
const avgInput = Math.round(aggregateInput / callCount);
|
|
662
698
|
const avgOutput = Math.round(usage.outputTokens / callCount);
|
|
663
699
|
for (let i = 0; i < matchingCalls.length; i++) {
|
|
@@ -696,7 +732,14 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
|
|
|
696
732
|
* @returns {Object|null} Budget stats data compatible with buildBudgetStatsString, or null if no data
|
|
697
733
|
*/
|
|
698
734
|
export const buildAgentBudgetStats = (tokenUsage, pricingInfo) => {
|
|
699
|
-
if (!tokenUsage
|
|
735
|
+
if (!tokenUsage) return null;
|
|
736
|
+
|
|
737
|
+
const inputTokens = getInputTokenCount(tokenUsage);
|
|
738
|
+
const cacheWriteTokens = getCacheWriteTokenCount(tokenUsage);
|
|
739
|
+
const cacheReadTokens = getCacheReadTokenCount(tokenUsage);
|
|
740
|
+
const outputTokens = getOutputTokenCount(tokenUsage);
|
|
741
|
+
const hasTokens = inputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0 || outputTokens > 0;
|
|
742
|
+
if ((tokenUsage.stepCount || 0) === 0 && !hasTokens) return null;
|
|
700
743
|
|
|
701
744
|
const modelName = pricingInfo?.modelName || tokenUsage.respondedModelId || tokenUsage.requestedModelId || 'Unknown';
|
|
702
745
|
const modelId = tokenUsage.respondedModelId || tokenUsage.requestedModelId || pricingInfo?.modelId || 'unknown';
|
|
@@ -704,14 +747,16 @@ export const buildAgentBudgetStats = (tokenUsage, pricingInfo) => {
|
|
|
704
747
|
// Use context limits from step_finish events if available, otherwise from pricing model info
|
|
705
748
|
const contextLimit = tokenUsage.contextLimit || pricingInfo?.modelInfo?.limit?.context || null;
|
|
706
749
|
const outputLimit = tokenUsage.outputLimit || pricingInfo?.modelInfo?.limit?.output || null;
|
|
750
|
+
const contextFillInputTokens = getExplicitContextFillInputTokens(tokenUsage) ?? getCumulativeContextInputTokens({ inputTokens, cacheWriteTokens });
|
|
707
751
|
|
|
708
752
|
const modelUsageEntry = {
|
|
709
|
-
inputTokens
|
|
710
|
-
cacheCreationTokens:
|
|
711
|
-
cacheReadTokens
|
|
712
|
-
outputTokens
|
|
753
|
+
inputTokens,
|
|
754
|
+
cacheCreationTokens: cacheWriteTokens,
|
|
755
|
+
cacheReadTokens,
|
|
756
|
+
outputTokens,
|
|
713
757
|
modelName,
|
|
714
758
|
modelInfo: contextLimit || outputLimit ? { limit: { context: contextLimit, output: outputLimit } } : null,
|
|
759
|
+
contextFillInputTokens,
|
|
715
760
|
peakContextUsage: tokenUsage.peakContextUsage || 0,
|
|
716
761
|
costUSD: pricingInfo?.totalCostUSD ?? null,
|
|
717
762
|
};
|
|
@@ -719,11 +764,11 @@ export const buildAgentBudgetStats = (tokenUsage, pricingInfo) => {
|
|
|
719
764
|
return {
|
|
720
765
|
modelUsage: { [modelId]: modelUsageEntry },
|
|
721
766
|
subSessions: [],
|
|
722
|
-
inputTokens
|
|
723
|
-
cacheCreationTokens:
|
|
724
|
-
cacheReadTokens
|
|
725
|
-
outputTokens
|
|
726
|
-
totalTokens:
|
|
767
|
+
inputTokens,
|
|
768
|
+
cacheCreationTokens: cacheWriteTokens,
|
|
769
|
+
cacheReadTokens,
|
|
770
|
+
outputTokens,
|
|
771
|
+
totalTokens: inputTokens + cacheWriteTokens + outputTokens,
|
|
727
772
|
};
|
|
728
773
|
};
|
|
729
774
|
|
package/src/codex.lib.mjs
CHANGED
|
@@ -26,6 +26,7 @@ import { fetchModelInfo } from './model-info.lib.mjs';
|
|
|
26
26
|
import { defaultModels } from './models/index.mjs';
|
|
27
27
|
import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
|
|
28
28
|
import { parseSubSessionSize, buildCodexSubSessionSizeConfigArgs, buildCodexDisable1mContextConfigArgs } from './sub-session-size.lib.mjs'; // Issue #1706
|
|
29
|
+
import { getCumulativeContextInputTokens } from './context-fill.lib.mjs';
|
|
29
30
|
import Decimal from 'decimal.js-light';
|
|
30
31
|
|
|
31
32
|
const CODEX_USAGE_FIELD_NAMES = ['input_tokens', 'cached_input_tokens', 'output_tokens', 'cache_write_tokens', 'cache_creation_input_tokens', 'reasoning_tokens', 'input_tokens_details.cached_tokens', 'input_tokens_details.cache_read_tokens', 'input_tokens_details.cache_write_tokens', 'input_tokens_details.cache_creation_tokens', 'input_tokens_details.cache_creation_input_tokens', 'output_tokens_details.reasoning_tokens'];
|
|
@@ -85,6 +86,7 @@ export const createCodexTokenUsage = requestedModelId => ({
|
|
|
85
86
|
respondedModelId: requestedModelId || null,
|
|
86
87
|
contextLimit: null,
|
|
87
88
|
outputLimit: null,
|
|
89
|
+
contextFillInputTokens: 0,
|
|
88
90
|
peakContextUsage: 0,
|
|
89
91
|
tokenFieldAvailability: createCodexTokenFieldAvailability(),
|
|
90
92
|
});
|
|
@@ -346,6 +348,13 @@ export const parseCodexExecJsonOutput = (output, state = {}, requestedModelId =
|
|
|
346
348
|
if (turnContextUsage > (nextState.tokenUsage.peakContextUsage || 0)) {
|
|
347
349
|
nextState.tokenUsage.peakContextUsage = turnContextUsage;
|
|
348
350
|
}
|
|
351
|
+
const turnContextFill = getCumulativeContextInputTokens({
|
|
352
|
+
inputTokens: nonCachedInputTokens,
|
|
353
|
+
cacheWriteTokens,
|
|
354
|
+
});
|
|
355
|
+
if (turnContextFill > (nextState.tokenUsage.contextFillInputTokens || 0)) {
|
|
356
|
+
nextState.tokenUsage.contextFillInputTokens = turnContextFill;
|
|
357
|
+
}
|
|
349
358
|
|
|
350
359
|
const usageFieldSet = CODEX_USAGE_FIELD_NAMES.filter(fieldName => hasOwnPath(data.usage, fieldName));
|
|
351
360
|
if (usageFieldSet.length > 0) nextState.observedUsageFieldSets.push(usageFieldSet);
|
|
@@ -407,6 +416,7 @@ export const buildCodexResultModelUsage = (modelId, tokenUsage, pricingInfo = nu
|
|
|
407
416
|
outputTokens: tokenUsage.outputTokens || 0,
|
|
408
417
|
modelName: pricingInfo?.modelName || modelId,
|
|
409
418
|
modelInfo: pricingInfo?.modelInfo || null,
|
|
419
|
+
contextFillInputTokens: tokenUsage.contextFillInputTokens || getCumulativeContextInputTokens(tokenUsage),
|
|
410
420
|
peakContextUsage: tokenUsage.peakContextUsage || 0,
|
|
411
421
|
costUSD: pricingInfo?.totalCostUSD ?? null,
|
|
412
422
|
},
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
// Shared context-window fill helpers.
|
|
2
|
+
|
|
3
|
+
const TOKEN_FIELD_ALIASES = {
|
|
4
|
+
input: ['inputTokens', 'input_tokens', 'input', 'promptTokens', 'prompt_tokens', 'prompt'],
|
|
5
|
+
output: ['outputTokens', 'output_tokens', 'output', 'completionTokens', 'completion_tokens', 'completion'],
|
|
6
|
+
cacheWrite: ['cacheCreationTokens', 'cacheWriteTokens', 'cacheCreationInputTokens', 'cache_creation_input_tokens', 'cache_write_tokens', 'cacheWrite'],
|
|
7
|
+
cacheRead: ['cacheReadTokens', 'cacheReadInputTokens', 'cache_read_input_tokens', 'cache_read_tokens', 'cachedInputTokens', 'cached_input_tokens', 'cacheRead'],
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export const toTokenCount = value => {
|
|
11
|
+
if (Number.isFinite(value)) return Math.max(0, value);
|
|
12
|
+
if (typeof value === 'string' && value.trim()) {
|
|
13
|
+
const parsed = Number(value);
|
|
14
|
+
if (Number.isFinite(parsed)) return Math.max(0, parsed);
|
|
15
|
+
}
|
|
16
|
+
return 0;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
const getFirstTokenField = (usage, fieldNames) => {
|
|
20
|
+
if (!usage || typeof usage !== 'object') return 0;
|
|
21
|
+
for (const fieldName of fieldNames) {
|
|
22
|
+
if (Object.hasOwn(usage, fieldName)) return toTokenCount(usage[fieldName]);
|
|
23
|
+
}
|
|
24
|
+
return 0;
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
export const getInputTokenCount = usage => getFirstTokenField(usage, TOKEN_FIELD_ALIASES.input);
|
|
28
|
+
|
|
29
|
+
export const getOutputTokenCount = usage => getFirstTokenField(usage, TOKEN_FIELD_ALIASES.output);
|
|
30
|
+
|
|
31
|
+
export const getCacheWriteTokenCount = usage => {
|
|
32
|
+
const direct = getFirstTokenField(usage, TOKEN_FIELD_ALIASES.cacheWrite);
|
|
33
|
+
if (direct > 0 || !usage?.cache || typeof usage.cache !== 'object') return direct;
|
|
34
|
+
return toTokenCount(usage.cache.write);
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
export const getCacheReadTokenCount = usage => {
|
|
38
|
+
const direct = getFirstTokenField(usage, TOKEN_FIELD_ALIASES.cacheRead);
|
|
39
|
+
if (direct > 0 || !usage?.cache || typeof usage.cache !== 'object') return direct;
|
|
40
|
+
return toTokenCount(usage.cache.read);
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Issue #1741: context-fill from cumulative/session usage.
|
|
45
|
+
*
|
|
46
|
+
* Cache reads are intentionally excluded. They are the same cached prefix replayed
|
|
47
|
+
* across requests, so summing them in a cumulative row can exceed the model's
|
|
48
|
+
* context window even though no single sub-session filled that much context.
|
|
49
|
+
*/
|
|
50
|
+
export const getCumulativeContextInputTokens = usage => getInputTokenCount(usage) + getCacheWriteTokenCount(usage);
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Issue #1737: restored prompt size for one concrete request/turn.
|
|
54
|
+
*
|
|
55
|
+
* Use this only when the source row is a single request or a tool-specific
|
|
56
|
+
* per-turn value. For cumulative model rows, use getCumulativeContextInputTokens.
|
|
57
|
+
*/
|
|
58
|
+
export const getRestoredContextInputTokens = usage => getInputTokenCount(usage) + getCacheWriteTokenCount(usage) + getCacheReadTokenCount(usage);
|
|
59
|
+
|
|
60
|
+
export const getExplicitContextFillInputTokens = usage => {
|
|
61
|
+
if (!usage || typeof usage !== 'object') return null;
|
|
62
|
+
if (Object.hasOwn(usage, 'contextFillInputTokens')) return toTokenCount(usage.contextFillInputTokens);
|
|
63
|
+
if (Object.hasOwn(usage, 'cumulativeContextInputTokens')) return toTokenCount(usage.cumulativeContextInputTokens);
|
|
64
|
+
return null;
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
export const getDisplayContextInputTokens = usage => {
|
|
68
|
+
const explicitContextFill = getExplicitContextFillInputTokens(usage);
|
|
69
|
+
if (explicitContextFill !== null) return explicitContextFill;
|
|
70
|
+
return toTokenCount(usage?.peakContextUsage);
|
|
71
|
+
};
|
package/src/gemini.lib.mjs
CHANGED
|
@@ -20,6 +20,7 @@ import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
|
|
|
20
20
|
import { defaultModels, geminiModels } from './models/index.mjs';
|
|
21
21
|
import { checkPlaywrightMcpPackageAvailability } from './playwright-mcp.lib.mjs';
|
|
22
22
|
import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
|
|
23
|
+
import { getCumulativeContextInputTokens, toTokenCount } from './context-fill.lib.mjs';
|
|
23
24
|
|
|
24
25
|
const shellQuote = value => `"${String(value).replaceAll('\\', '\\\\').replaceAll('"', '\\"')}"`;
|
|
25
26
|
|
|
@@ -46,20 +47,34 @@ const extractGeminiTextContent = value => {
|
|
|
46
47
|
return '';
|
|
47
48
|
};
|
|
48
49
|
|
|
49
|
-
const
|
|
50
|
+
const pickTokenValue = (...values) => {
|
|
51
|
+
for (const value of values) {
|
|
52
|
+
if (value !== undefined && value !== null) return toTokenCount(value);
|
|
53
|
+
}
|
|
54
|
+
return 0;
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
export const buildGeminiResultModelUsage = (modelId, stats = null) => {
|
|
50
58
|
const modelStats = stats?.models && typeof stats.models === 'object' ? stats.models : null;
|
|
51
59
|
if (modelStats) {
|
|
52
60
|
const usage = {};
|
|
53
61
|
for (const [id, data] of Object.entries(modelStats)) {
|
|
54
62
|
const tokens = data?.tokens || {};
|
|
63
|
+
const inputTokens = pickTokenValue(tokens.input, tokens.prompt);
|
|
64
|
+
const cacheCreationTokens = pickTokenValue(tokens.cacheWrite, tokens.cache_write, tokens.cacheCreationTokens);
|
|
65
|
+
const cacheReadTokens = pickTokenValue(tokens.cacheRead, tokens.cache_read, tokens.cacheReadTokens);
|
|
66
|
+
const outputTokens = pickTokenValue(tokens.output, tokens.completion);
|
|
67
|
+
const contextLimit = pickTokenValue(tokens.contextLimit, tokens.context_limit, data?.contextLimit, data?.limit?.context);
|
|
68
|
+
const outputLimit = pickTokenValue(tokens.outputLimit, tokens.output_limit, data?.outputLimit, data?.limit?.output);
|
|
55
69
|
usage[id] = {
|
|
56
|
-
inputTokens
|
|
57
|
-
cacheCreationTokens
|
|
58
|
-
cacheReadTokens
|
|
59
|
-
outputTokens
|
|
70
|
+
inputTokens,
|
|
71
|
+
cacheCreationTokens,
|
|
72
|
+
cacheReadTokens,
|
|
73
|
+
outputTokens,
|
|
60
74
|
modelName: data?.name || id,
|
|
61
|
-
modelInfo: null,
|
|
62
|
-
|
|
75
|
+
modelInfo: contextLimit || outputLimit ? { limit: { context: contextLimit || null, output: outputLimit || null } } : null,
|
|
76
|
+
contextFillInputTokens: getCumulativeContextInputTokens({ inputTokens, cacheCreationTokens }),
|
|
77
|
+
peakContextUsage: pickTokenValue(tokens.total),
|
|
63
78
|
costUSD: null,
|
|
64
79
|
};
|
|
65
80
|
}
|
package/src/qwen.lib.mjs
CHANGED
|
@@ -20,6 +20,7 @@ import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
|
|
|
20
20
|
import { qwenModels, defaultModels } from './models/index.mjs';
|
|
21
21
|
import { checkPlaywrightMcpPackageAvailability } from './playwright-mcp.lib.mjs';
|
|
22
22
|
import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
|
|
23
|
+
import { getCumulativeContextInputTokens, getRestoredContextInputTokens, toTokenCount } from './context-fill.lib.mjs';
|
|
23
24
|
|
|
24
25
|
export const mapModelToId = model => qwenModels[model] || model;
|
|
25
26
|
|
|
@@ -63,6 +64,59 @@ const findFirstValue = (object, paths) => {
|
|
|
63
64
|
return null;
|
|
64
65
|
};
|
|
65
66
|
|
|
67
|
+
const createQwenTokenFieldAvailability = () => ({
|
|
68
|
+
inputTokens: false,
|
|
69
|
+
outputTokens: false,
|
|
70
|
+
reasoningTokens: false,
|
|
71
|
+
cacheReadTokens: false,
|
|
72
|
+
cacheWriteTokens: false,
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
const createQwenTokenUsage = modelId => ({
|
|
76
|
+
inputTokens: 0,
|
|
77
|
+
outputTokens: 0,
|
|
78
|
+
reasoningTokens: 0,
|
|
79
|
+
cacheReadTokens: 0,
|
|
80
|
+
cacheWriteTokens: 0,
|
|
81
|
+
totalTokens: 0,
|
|
82
|
+
stepCount: 0,
|
|
83
|
+
requestedModelId: modelId || null,
|
|
84
|
+
respondedModelId: modelId || null,
|
|
85
|
+
contextLimit: null,
|
|
86
|
+
outputLimit: null,
|
|
87
|
+
contextFillInputTokens: 0,
|
|
88
|
+
peakContextUsage: 0,
|
|
89
|
+
tokenFieldAvailability: createQwenTokenFieldAvailability(),
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
const cloneQwenTokenUsage = usage => {
|
|
93
|
+
if (!usage) return createQwenTokenUsage();
|
|
94
|
+
return {
|
|
95
|
+
...usage,
|
|
96
|
+
tokenFieldAvailability: {
|
|
97
|
+
...createQwenTokenFieldAvailability(),
|
|
98
|
+
...(usage.tokenFieldAvailability || {}),
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
const getQwenUsageField = (usage, paths) => {
|
|
104
|
+
const value = findFirstValue(usage, paths);
|
|
105
|
+
if (value === null) return { observed: false, value: 0 };
|
|
106
|
+
return { observed: true, value: toTokenCount(value) };
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
const QWEN_USAGE_PATHS = {
|
|
110
|
+
input: [['inputTokens'], ['input_tokens'], ['input'], ['promptTokens'], ['prompt_tokens'], ['prompt']],
|
|
111
|
+
output: [['outputTokens'], ['output_tokens'], ['output'], ['completionTokens'], ['completion_tokens'], ['completion']],
|
|
112
|
+
reasoning: [['reasoningTokens'], ['reasoning_tokens'], ['thoughtsTokens'], ['thoughts_tokens']],
|
|
113
|
+
cacheRead: [['cacheReadTokens'], ['cache_read_tokens'], ['cache_read_input_tokens'], ['cachedInputTokens'], ['cached_input_tokens'], ['prompt_tokens_details', 'cached_tokens'], ['cache', 'read']],
|
|
114
|
+
cacheWrite: [['cacheWriteTokens'], ['cache_write_tokens'], ['cache_creation_input_tokens'], ['cacheCreationTokens'], ['cacheCreationInputTokens'], ['cache', 'write']],
|
|
115
|
+
contextLimit: [['contextLimit'], ['context_limit'], ['limit', 'context'], ['limits', 'context']],
|
|
116
|
+
outputLimit: [['outputLimit'], ['output_limit'], ['limit', 'output'], ['limits', 'output']],
|
|
117
|
+
model: [['model'], ['model_id'], ['modelId'], ['name']],
|
|
118
|
+
};
|
|
119
|
+
|
|
66
120
|
const extractTextFragments = value => {
|
|
67
121
|
if (typeof value === 'string') return [value];
|
|
68
122
|
if (!value || typeof value !== 'object') return [];
|
|
@@ -89,8 +143,139 @@ const createQwenParserState = state => ({
|
|
|
89
143
|
errors: Array.isArray(state?.errors) ? [...state.errors] : [],
|
|
90
144
|
sessionId: state?.sessionId || null,
|
|
91
145
|
lastTextContent: state?.lastTextContent || '',
|
|
146
|
+
tokenUsage: cloneQwenTokenUsage(state?.tokenUsage),
|
|
147
|
+
resultModelUsage: state?.resultModelUsage ? { ...state.resultModelUsage } : null,
|
|
92
148
|
});
|
|
93
149
|
|
|
150
|
+
const buildQwenResultModelUsage = tokenUsage => {
|
|
151
|
+
if (!tokenUsage || tokenUsage.stepCount === 0) return null;
|
|
152
|
+
const modelId = tokenUsage.respondedModelId || tokenUsage.requestedModelId || 'qwen';
|
|
153
|
+
const modelInfo = tokenUsage.contextLimit || tokenUsage.outputLimit ? { limit: { context: tokenUsage.contextLimit || null, output: tokenUsage.outputLimit || null } } : null;
|
|
154
|
+
return {
|
|
155
|
+
[modelId]: {
|
|
156
|
+
inputTokens: tokenUsage.inputTokens,
|
|
157
|
+
cacheCreationTokens: tokenUsage.cacheWriteTokens,
|
|
158
|
+
cacheReadTokens: tokenUsage.cacheReadTokens,
|
|
159
|
+
outputTokens: tokenUsage.outputTokens,
|
|
160
|
+
modelName: modelId,
|
|
161
|
+
modelInfo,
|
|
162
|
+
contextFillInputTokens: tokenUsage.contextFillInputTokens,
|
|
163
|
+
peakContextUsage: tokenUsage.peakContextUsage,
|
|
164
|
+
costUSD: null,
|
|
165
|
+
},
|
|
166
|
+
};
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
const applyQwenUsageObject = (state, rawUsage, fallbackModelId = null) => {
|
|
170
|
+
if (!rawUsage || typeof rawUsage !== 'object') return;
|
|
171
|
+
|
|
172
|
+
const model = findFirstValue(rawUsage, QWEN_USAGE_PATHS.model) || fallbackModelId;
|
|
173
|
+
if (model) {
|
|
174
|
+
state.tokenUsage.requestedModelId ||= String(model);
|
|
175
|
+
state.tokenUsage.respondedModelId = String(model);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const input = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.input);
|
|
179
|
+
const output = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.output);
|
|
180
|
+
const reasoning = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.reasoning);
|
|
181
|
+
const cacheRead = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.cacheRead);
|
|
182
|
+
const cacheWrite = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.cacheWrite);
|
|
183
|
+
const contextLimit = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.contextLimit);
|
|
184
|
+
const outputLimit = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.outputLimit);
|
|
185
|
+
|
|
186
|
+
const observedTokenField = input.observed || output.observed || reasoning.observed || cacheRead.observed || cacheWrite.observed;
|
|
187
|
+
if (!observedTokenField) return;
|
|
188
|
+
|
|
189
|
+
state.tokenUsage.stepCount += 1;
|
|
190
|
+
if (input.observed) {
|
|
191
|
+
state.tokenUsage.tokenFieldAvailability.inputTokens = true;
|
|
192
|
+
state.tokenUsage.inputTokens += input.value;
|
|
193
|
+
}
|
|
194
|
+
if (output.observed) {
|
|
195
|
+
state.tokenUsage.tokenFieldAvailability.outputTokens = true;
|
|
196
|
+
state.tokenUsage.outputTokens += output.value;
|
|
197
|
+
}
|
|
198
|
+
if (reasoning.observed) {
|
|
199
|
+
state.tokenUsage.tokenFieldAvailability.reasoningTokens = true;
|
|
200
|
+
state.tokenUsage.reasoningTokens += reasoning.value;
|
|
201
|
+
}
|
|
202
|
+
if (cacheRead.observed) {
|
|
203
|
+
state.tokenUsage.tokenFieldAvailability.cacheReadTokens = true;
|
|
204
|
+
state.tokenUsage.cacheReadTokens += cacheRead.value;
|
|
205
|
+
}
|
|
206
|
+
if (cacheWrite.observed) {
|
|
207
|
+
state.tokenUsage.tokenFieldAvailability.cacheWriteTokens = true;
|
|
208
|
+
state.tokenUsage.cacheWriteTokens += cacheWrite.value;
|
|
209
|
+
}
|
|
210
|
+
if (contextLimit.observed) state.tokenUsage.contextLimit = contextLimit.value;
|
|
211
|
+
if (outputLimit.observed) state.tokenUsage.outputLimit = outputLimit.value;
|
|
212
|
+
|
|
213
|
+
const stepContextFill = getCumulativeContextInputTokens({
|
|
214
|
+
inputTokens: input.value,
|
|
215
|
+
cacheWriteTokens: cacheWrite.value,
|
|
216
|
+
});
|
|
217
|
+
if (stepContextFill > (state.tokenUsage.contextFillInputTokens || 0)) {
|
|
218
|
+
state.tokenUsage.contextFillInputTokens = stepContextFill;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const stepRestoredContext = getRestoredContextInputTokens({
|
|
222
|
+
inputTokens: input.value,
|
|
223
|
+
cacheWriteTokens: cacheWrite.value,
|
|
224
|
+
cacheReadTokens: cacheRead.value,
|
|
225
|
+
});
|
|
226
|
+
if (stepRestoredContext > (state.tokenUsage.peakContextUsage || 0)) {
|
|
227
|
+
state.tokenUsage.peakContextUsage = stepRestoredContext;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
state.tokenUsage.totalTokens = state.tokenUsage.inputTokens + state.tokenUsage.cacheReadTokens + state.tokenUsage.cacheWriteTokens + state.tokenUsage.outputTokens;
|
|
231
|
+
state.resultModelUsage = buildQwenResultModelUsage(state.tokenUsage);
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
const applyQwenUsageToState = (state, event) => {
|
|
235
|
+
const rawUsage = event?.usage || event?.stats || event?.tokenUsage || null;
|
|
236
|
+
if (!rawUsage || typeof rawUsage !== 'object') return;
|
|
237
|
+
|
|
238
|
+
const modelStats = rawUsage.models && typeof rawUsage.models === 'object' ? rawUsage.models : null;
|
|
239
|
+
if (modelStats) {
|
|
240
|
+
for (const [modelId, data] of Object.entries(modelStats)) {
|
|
241
|
+
applyQwenUsageObject(state, data?.tokens || data?.usage || data, modelId);
|
|
242
|
+
}
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
applyQwenUsageObject(state, rawUsage, findFirstValue(event, QWEN_USAGE_PATHS.model));
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
const buildQwenPricingInfo = (state, mappedModel) => {
|
|
250
|
+
const tokenUsage = cloneQwenTokenUsage(state?.tokenUsage);
|
|
251
|
+
if (!tokenUsage || tokenUsage.stepCount === 0) {
|
|
252
|
+
return {
|
|
253
|
+
pricingInfo: null,
|
|
254
|
+
publicPricingEstimate: null,
|
|
255
|
+
tokenUsage: null,
|
|
256
|
+
resultModelUsage: null,
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
tokenUsage.requestedModelId ||= mappedModel || 'qwen';
|
|
261
|
+
tokenUsage.respondedModelId ||= tokenUsage.requestedModelId;
|
|
262
|
+
const modelId = tokenUsage.respondedModelId || tokenUsage.requestedModelId;
|
|
263
|
+
|
|
264
|
+
return {
|
|
265
|
+
pricingInfo: {
|
|
266
|
+
provider: 'Qwen Code',
|
|
267
|
+
modelId,
|
|
268
|
+
modelName: modelId,
|
|
269
|
+
totalCostUSD: null,
|
|
270
|
+
source: 'qwen-stream-json',
|
|
271
|
+
tokenUsage,
|
|
272
|
+
},
|
|
273
|
+
publicPricingEstimate: null,
|
|
274
|
+
tokenUsage,
|
|
275
|
+
resultModelUsage: buildQwenResultModelUsage(tokenUsage),
|
|
276
|
+
};
|
|
277
|
+
};
|
|
278
|
+
|
|
94
279
|
const addQwenEventToState = (state, rawEvent) => {
|
|
95
280
|
const event = sanitizeObjectStrings(rawEvent);
|
|
96
281
|
state.parsedEvents.push(event);
|
|
@@ -118,6 +303,8 @@ const addQwenEventToState = (state, rawEvent) => {
|
|
|
118
303
|
isAuthError: isQwenAuthError(errorMessage),
|
|
119
304
|
});
|
|
120
305
|
}
|
|
306
|
+
|
|
307
|
+
applyQwenUsageToState(state, event);
|
|
121
308
|
};
|
|
122
309
|
|
|
123
310
|
export const parseQwenStreamJsonOutput = (output, state = {}) => {
|
|
@@ -377,6 +564,7 @@ export const executeQwenCommand = async params => {
|
|
|
377
564
|
.join('\n');
|
|
378
565
|
const combinedErrorText = `${allOutput}\n${errorMessage}`.trim();
|
|
379
566
|
const limitInfo = detectUsageLimit(combinedErrorText);
|
|
567
|
+
const usageResult = buildQwenPricingInfo(qwenState, mappedModel);
|
|
380
568
|
|
|
381
569
|
if (limitInfo.isUsageLimit) {
|
|
382
570
|
const messageLines = formatUsageLimitMessage({
|
|
@@ -394,9 +582,7 @@ export const executeQwenCommand = async params => {
|
|
|
394
582
|
sessionId,
|
|
395
583
|
limitReached: true,
|
|
396
584
|
limitResetTime: limitInfo.resetTime,
|
|
397
|
-
|
|
398
|
-
publicPricingEstimate: null,
|
|
399
|
-
tokenUsage: null,
|
|
585
|
+
...usageResult,
|
|
400
586
|
resultSummary,
|
|
401
587
|
};
|
|
402
588
|
}
|
|
@@ -444,9 +630,7 @@ export const executeQwenCommand = async params => {
|
|
|
444
630
|
sessionId,
|
|
445
631
|
limitReached: false,
|
|
446
632
|
limitResetTime: null,
|
|
447
|
-
|
|
448
|
-
publicPricingEstimate: null,
|
|
449
|
-
tokenUsage: null,
|
|
633
|
+
...usageResult,
|
|
450
634
|
resultSummary,
|
|
451
635
|
};
|
|
452
636
|
}
|
|
@@ -461,9 +645,7 @@ export const executeQwenCommand = async params => {
|
|
|
461
645
|
sessionId,
|
|
462
646
|
limitReached: false,
|
|
463
647
|
limitResetTime: null,
|
|
464
|
-
|
|
465
|
-
publicPricingEstimate: null,
|
|
466
|
-
tokenUsage: null,
|
|
648
|
+
...usageResult,
|
|
467
649
|
resultSummary,
|
|
468
650
|
};
|
|
469
651
|
} catch (error) {
|