npm - @link-assistant/hive-mind - Versions diffs - 1.64.0 → 1.64.2 - Mend

@link-assistant/hive-mind 1.64.0 → 1.64.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/CHANGELOG.md +12 -0
package/package.json +1 -1
package/src/agent-commander.lib.mjs +47 -5
package/src/agent-token-usage.lib.mjs +15 -1
package/src/claude.budget-stats.lib.mjs +72 -27
package/src/codex.lib.mjs +10 -0
package/src/context-fill.lib.mjs +71 -0
package/src/gemini.lib.mjs +22 -7
package/src/qwen.lib.mjs +191 -9

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # @link-assistant/hive-mind
+## 1.64.2
+### Patch Changes
+- 320ca42: Fix budget stats sub-agent context-fill calculation so cumulative-only rows (e.g. Claude Haiku 4.5 sub-agent calls that never appear in the parent JSONL) use `input + cache_creation` instead of `input + cache_creation + cache_read`. The previous formula double-counted the cached prefix replayed across calls and produced impossible percentages such as `1.2M / 200K (583%)`.
+## 1.64.1
+### Patch Changes
+- 51a8721: Add a separate `konard/hive-mind-dind` Docker image for nested Docker testing.
 ## 1.64.0
 ### Minor Changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@link-assistant/hive-mind",
-  "version": "1.64.0",
+  "version": "1.64.2",
   "description": "AI-powered issue solver and hive mind for collaborative problem solving",
   "main": "src/hive.mjs",
   "type": "module",

package/src/agent-commander.lib.mjs CHANGED Viewed

@@ -11,6 +11,7 @@ import { resolveCodexReasoningEffort } from './codex.options.lib.mjs';
 import { mapModelForTool } from './models/index.mjs';
 import { buildCodexDisable1mContextConfigArgs, buildCodexSubSessionSizeConfigArgs, parseSubSessionSize } from './sub-session-size.lib.mjs';
 import { detectUsageLimit } from './usage-limit.lib.mjs';
+import { getCacheReadTokenCount, getCumulativeContextInputTokens, getOutputTokenCount } from './context-fill.lib.mjs';
 export const AGENT_COMMANDER_TOOLS = new Set(['claude', 'codex', 'opencode', 'agent', 'qwen', 'gemini']);
@@ -222,10 +223,45 @@ const extractResultSummary = (messages, plainOutput) => {
 const hasErrorMessage = messages => messages.some(message => message?.is_error === true || message?.type === 'error' || message?.type === 'step_error' || message?.error);
+const normalizeAgentCommanderTokenUsage = usage => {
+  if (!usage || typeof usage !== 'object') return null;
+  const normalized = {
+    ...usage,
+    contextFillInputTokens: usage.contextFillInputTokens ?? getCumulativeContextInputTokens(usage),
+  };
+  const cacheReadTokens = getCacheReadTokenCount(normalized);
+  const hasTokenCounts = getCumulativeContextInputTokens(normalized) > 0 || getOutputTokenCount(normalized) > 0 || cacheReadTokens > 0;
+  if (!hasTokenCounts) return null;
+  if (!normalized.stepCount) normalized.stepCount = 1;
+  return normalized;
+};
+const enrichPricingInfoWithTokenUsage = ({ pricingInfo = null, usage = null, tool = null, publicPricingEstimate = null }) => {
+  const tokenUsage = normalizeAgentCommanderTokenUsage(pricingInfo?.tokenUsage || usage);
+  if (!tokenUsage) return pricingInfo || null;
+  return {
+    source: 'agent-commander',
+    ...(pricingInfo || {}),
+    provider: pricingInfo?.provider || tool || 'agent-commander',
+    modelId: pricingInfo?.modelId || tokenUsage.respondedModelId || tokenUsage.requestedModelId || null,
+    modelName: pricingInfo?.modelName || tokenUsage.respondedModelId || tokenUsage.requestedModelId || null,
+    totalCostUSD: pricingInfo?.totalCostUSD ?? publicPricingEstimate ?? null,
+    tokenUsage,
+  };
+};
 export const summarizeAgentCommanderResult = ({ result, tool }) => {
   const plainOutput = result?.output?.plain || '';
   if (result?.metadata && typeof result.metadata === 'object') {
     const metadata = result.metadata;
+    const streamTokenUsage = metadata.streamTokenUsage || result.usage || null;
+    const pricingInfo = enrichPricingInfoWithTokenUsage({
+      pricingInfo: metadata.pricingInfo || null,
+      usage: streamTokenUsage,
+      tool,
+      publicPricingEstimate: metadata.publicPricingEstimate ?? metadata.pricingInfo?.totalCostUSD ?? null,
+    });
     return {
       success: metadata.success === true,
       sessionId: metadata.sessionId || result.sessionId || null,
@@ -233,11 +269,11 @@ export const summarizeAgentCommanderResult = ({ result, tool }) => {
       limitResetTime: metadata.limitResetTime || null,
       limitTimezone: metadata.limitTimezone || null,
       anthropicTotalCostUSD: metadata.anthropicTotalCostUSD ?? null,
-      publicPricingEstimate: metadata.publicPricingEstimate ?? metadata.pricingInfo?.totalCostUSD ?? null,
-      pricingInfo: metadata.pricingInfo || null,
+      publicPricingEstimate: metadata.publicPricingEstimate ?? pricingInfo?.totalCostUSD ?? null,
+      pricingInfo,
       resultSummary: metadata.resultSummary || null,
       resultModelUsage: metadata.resultModelUsage || null,
-      streamTokenUsage: metadata.streamTokenUsage || result.usage || null,
+      streamTokenUsage,
       subAgentCalls: metadata.subAgentCalls || null,
       errorDuringExecution: metadata.errorDuringExecution === true || result?.exitCode !== 0,
       result: plainOutput,
@@ -250,6 +286,12 @@ export const summarizeAgentCommanderResult = ({ result, tool }) => {
   const resultMessage = [...messages].reverse().find(message => message?.type === 'result') || null;
   const totalCost = typeof resultMessage?.total_cost_usd === 'number' ? resultMessage.total_cost_usd : null;
   const publicPricingEstimate = tool === 'agent' && typeof usage?.totalCost === 'number' ? usage.totalCost : null;
+  const pricingInfo = enrichPricingInfoWithTokenUsage({
+    pricingInfo: publicPricingEstimate !== null ? { totalCostUSD: publicPricingEstimate, source: 'agent-commander' } : null,
+    usage,
+    tool,
+    publicPricingEstimate,
+  });
   return {
     success: result?.exitCode === 0 && !usageLimit.isUsageLimit && !hasErrorMessage(messages),
@@ -258,8 +300,8 @@ export const summarizeAgentCommanderResult = ({ result, tool }) => {
     limitResetTime: usageLimit.resetTime,
     limitTimezone: usageLimit.timezone,
     anthropicTotalCostUSD: tool === 'claude' ? totalCost : null,
-    publicPricingEstimate,
-    pricingInfo: publicPricingEstimate !== null ? { totalCostUSD: publicPricingEstimate, source: 'agent-commander' } : null,
+    publicPricingEstimate: publicPricingEstimate ?? pricingInfo?.totalCostUSD ?? null,
+    pricingInfo,
     resultSummary: extractResultSummary(messages, plainOutput),
     resultModelUsage: null,
     streamTokenUsage: usage,

package/src/agent-token-usage.lib.mjs CHANGED Viewed

@@ -2,6 +2,7 @@
 import Decimal from 'decimal.js-light';
 import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
+import { getCumulativeContextInputTokens, getRestoredContextInputTokens } from './context-fill.lib.mjs';
 export const createTokenFieldAvailability = () => ({
   inputTokens: false,
@@ -23,6 +24,7 @@ export const createAgentTokenUsage = () => ({
   respondedModelId: null,
   contextLimit: null,
   outputLimit: null,
+  contextFillInputTokens: 0,
   peakContextUsage: 0,
   tokenFieldAvailability: createTokenFieldAvailability(),
 });
@@ -61,10 +63,22 @@ export const accumulateAgentStepFinishUsage = (usage, data) => {
     if (data.part.model.respondedModelID) usage.respondedModelId = data.part.model.respondedModelID;
   }
+  const stepContextFill = getCumulativeContextInputTokens({
+    inputTokens: getTokenCount(tokens.input),
+    cacheWriteTokens: getTokenCount(tokens.cache?.write),
+  });
+  if (stepContextFill > (usage.contextFillInputTokens || 0)) {
+    usage.contextFillInputTokens = stepContextFill;
+  }
   if (data.part.context) {
     if (data.part.context.contextLimit) usage.contextLimit = data.part.context.contextLimit;
     if (data.part.context.outputLimit) usage.outputLimit = data.part.context.outputLimit;
-    const stepContextUsage = getTokenCount(tokens.input) + getTokenCount(tokens.cache?.read);
+    const stepContextUsage = getRestoredContextInputTokens({
+      inputTokens: getTokenCount(tokens.input),
+      cacheWriteTokens: getTokenCount(tokens.cache?.write),
+      cacheReadTokens: getTokenCount(tokens.cache?.read),
+    });
     if (stepContextUsage > (usage.peakContextUsage || 0)) {
       usage.peakContextUsage = stepContextUsage;
     }

package/src/claude.budget-stats.lib.mjs CHANGED Viewed

@@ -4,6 +4,9 @@
 import { formatNumber } from './claude.lib.mjs';
 import Decimal from 'decimal.js-light';
+import { getCacheReadTokenCount, getCacheWriteTokenCount, getCumulativeContextInputTokens, getDisplayContextInputTokens, getExplicitContextFillInputTokens, getInputTokenCount, getOutputTokenCount, getRestoredContextInputTokens } from './context-fill.lib.mjs';
+export { getCumulativeContextInputTokens, getRestoredContextInputTokens };
 /**
  * Helper: creates a fresh sub-session usage object for tracking tokens between compactification events
@@ -19,9 +22,9 @@ export const createEmptySubSessionUsage = () => ({
   peakOutputUsage: 0,
 });
-export const getRawRequestInputTokens = usage => (usage?.input_tokens || 0) + (usage?.cache_creation_input_tokens || 0) + (usage?.cache_read_input_tokens || 0);
+export const getRawRequestInputTokens = usage => getRestoredContextInputTokens(usage);
-export const getUsageInputTokens = usage => (usage?.inputTokens || 0) + (usage?.cacheCreationTokens || 0) + (usage?.cacheReadTokens || 0);
+export const getUsageInputTokens = usage => getRestoredContextInputTokens(usage);
 /**
  * Helper: accumulates token usage from a JSONL entry into a model usage map
@@ -184,6 +187,7 @@ export const dumpBudgetTrace = async (usage, tokenUsage, log) => {
   const reads = usage.cacheReadTokens || 0;
   const inputs = usage.inputTokens || 0;
   const outputs = usage.outputTokens || 0;
+  const explicitContextFill = getExplicitContextFillInputTokens(usage);
   const webSearches = usage.webSearchRequests || 0;
   const subSessionCount = (tokenUsage?.subSessions || []).length;
   const source = usage._sourceResultJson ? 'jsonl + result-event' : 'jsonl';
@@ -194,6 +198,14 @@ export const dumpBudgetTrace = async (usage, tokenUsage, log) => {
   // buckets split for cost and accounting review.
   await log(`         peak input:      ${formatNumber(peak)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (largest request input + cache_creation + cache_read)`, { verbose: true });
   await log(`         cumulative:      input ${formatNumber(inputs)}, cache_write ${formatNumber(writes)} (5m ${formatNumber(writes5m)} / 1h ${formatNumber(writes1h)}), cache_read ${formatNumber(reads)}, output ${formatNumber(outputs)}`, { verbose: true });
+  // Issue #1741: when peak is 0 (sub-agent only seen via result event), the
+  // detail row falls back to the cumulative-context proxy `input + cache_write`
+  // (cache_read is excluded because it represents the same cached prefix replayed
+  // across calls and would inflate the percentage past 100%).
+  if (explicitContextFill !== null || peak === 0) {
+    const contextFill = explicitContextFill ?? getCumulativeContextInputTokens(usage);
+    await log(`         context fill:    ${formatNumber(contextFill)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (input + cache_write; cache_read excluded — issue #1741)`, { verbose: true });
+  }
   // Issue #1710 R1: web_search is now billed in calculateModelCost. The trace
   // still surfaces the implied dollar cost so the residual remains debuggable
   // from the saved log even if a future model lacks pricing data.
@@ -305,17 +317,25 @@ export const mergeResultModelUsage = (modelUsage, resultModelUsage) => {
   if (!resultModelUsage || typeof resultModelUsage !== 'object') return;
   for (const [modelId, resultUsage] of Object.entries(resultModelUsage)) {
     if (modelId.startsWith('<') && modelId.endsWith('>')) continue;
+    const inputTokens = getInputTokenCount(resultUsage);
+    const cacheCreationTokens = getCacheWriteTokenCount(resultUsage);
+    const cacheReadTokens = getCacheReadTokenCount(resultUsage);
+    const outputTokens = getOutputTokenCount(resultUsage);
+    const explicitContextFill = getExplicitContextFillInputTokens(resultUsage);
     if (!modelUsage[modelId]) {
       modelUsage[modelId] = {
-        inputTokens: resultUsage.inputTokens || 0,
-        cacheCreationTokens: resultUsage.cacheCreationInputTokens || 0,
+        inputTokens,
+        cacheCreationTokens,
         cacheCreation5mTokens: 0,
         cacheCreation1hTokens: 0,
-        cacheReadTokens: resultUsage.cacheReadInputTokens || 0,
-        outputTokens: resultUsage.outputTokens || 0,
+        cacheReadTokens,
+        outputTokens,
         webSearchRequests: resultUsage.webSearchRequests || 0,
         _sourceResultJson: true,
       };
+      if (explicitContextFill !== null) {
+        modelUsage[modelId].contextFillInputTokens = explicitContextFill;
+      }
       if (resultUsage.costUSD != null) {
         modelUsage[modelId]._resultCostUSD = resultUsage.costUSD;
       }
@@ -331,13 +351,16 @@ export const mergeResultModelUsage = (modelUsage, resultModelUsage) => {
     } else {
       const jsonlUsage = modelUsage[modelId];
       const jsonlTotal = jsonlUsage.inputTokens + jsonlUsage.cacheCreationTokens + jsonlUsage.cacheReadTokens + jsonlUsage.outputTokens;
-      const resultTotal = (resultUsage.inputTokens || 0) + (resultUsage.cacheCreationInputTokens || 0) + (resultUsage.cacheReadInputTokens || 0) + (resultUsage.outputTokens || 0);
+      const resultTotal = inputTokens + cacheCreationTokens + cacheReadTokens + outputTokens;
       if (resultTotal > jsonlTotal) {
-        jsonlUsage.inputTokens = resultUsage.inputTokens || 0;
-        jsonlUsage.cacheCreationTokens = resultUsage.cacheCreationInputTokens || 0;
-        jsonlUsage.cacheReadTokens = resultUsage.cacheReadInputTokens || 0;
-        jsonlUsage.outputTokens = resultUsage.outputTokens || 0;
+        jsonlUsage.inputTokens = inputTokens;
+        jsonlUsage.cacheCreationTokens = cacheCreationTokens;
+        jsonlUsage.cacheReadTokens = cacheReadTokens;
+        jsonlUsage.outputTokens = outputTokens;
         jsonlUsage._sourceResultJson = true;
+        if (explicitContextFill !== null) {
+          jsonlUsage.contextFillInputTokens = explicitContextFill;
+        }
       }
       if (resultUsage.costUSD != null) {
         jsonlUsage._resultCostUSD = resultUsage.costUSD;
@@ -573,7 +596,7 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
         stats += `\n\n**${modelName}:** (${subSessions.length} sub-sessions)`;
       }
-      const peakContext = usage.peakContextUsage || 0;
+      const peakContext = getDisplayContextInputTokens(usage);
       if (showSubSessions) {
         // Issue #1600: Unified format — no "Context window:" prefix, same format as sub-agent calls
@@ -587,10 +610,14 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
         // so peakContext stays at 0; without this fallback the rendered comment loses
         // the sub-agent's input-token information entirely. The detail line is
         // deliberately simple; the Total line below keeps the cache split.
+        // Issue #1741: For result-event-only rows we have cumulative totals, not a
+        // per-request peak, so the detail-line numerator must exclude cache_reads
+        // (which are the same cached prefix replayed across calls and would inflate
+        // the percentage past 100%). The Total line keeps the full split.
         const parts = [];
         const isResultSingleCall = usage._sourceResultJson || callCount > 0;
         const inputPart = isResultSingleCall
-          ? formatInputContextPart(getUsageInputTokens(usage), contextLimit, formatTokensCompact)
+          ? formatInputContextPart(getCumulativeContextInputTokens(usage), contextLimit, formatTokensCompact)
           : buildCumulativeInputPhrase({
               input: usage.inputTokens || 0,
               cacheWrites: usage.cacheCreationTokens || 0,
@@ -636,7 +663,12 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
           for (let i = 0; i < matchingCalls.length; i++) {
             const call = matchingCalls[i];
             const cu = call.usage || {};
-            const callInput = (cu.inputTokens || 0) + (cu.cacheCreationTokens || 0) + (cu.cacheReadTokens || 0);
+            // Issue #1741: per-call usage is itself cumulative across the
+            // sub-agent's internal API requests (each Anthropic Agent call
+            // can run a tool loop), so cache_reads grow with the loop length
+            // and would push the displayed fill past 100%. Use the same
+            // input + cache_creation proxy as the result-event-only fallback.
+            const callInput = getCumulativeContextInputTokens(cu);
             const callOutput = cu.outputTokens || 0;
             const parts = [];
             if (contextLimit) {
@@ -655,9 +687,13 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
           }
         } else {
           // Estimated per-call breakdown when sub-agent stream tracking did not capture
-          // per-call usage. Includes everything the model actually saw:
-          // input + cache_creation (writes) + cache_read.
-          const aggregateInput = (usage.inputTokens || 0) + (usage.cacheCreationTokens || 0) + (usage.cacheReadTokens || 0);
+          // per-call usage. Issue #1741: cumulative cache_read tokens grow without
+          // bound across calls (the same cached prefix is replayed on every call),
+          // so we mustn't add them when projecting an average per-call fill —
+          // doing so would routinely exceed 100% of the context window. The
+          // estimate uses input + cache_creation (cache reads stay in the Total
+          // line below).
+          const aggregateInput = getCumulativeContextInputTokens(usage);
           const avgInput = Math.round(aggregateInput / callCount);
           const avgOutput = Math.round(usage.outputTokens / callCount);
           for (let i = 0; i < matchingCalls.length; i++) {
@@ -696,7 +732,14 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
  * @returns {Object|null} Budget stats data compatible with buildBudgetStatsString, or null if no data
  */
 export const buildAgentBudgetStats = (tokenUsage, pricingInfo) => {
-  if (!tokenUsage || tokenUsage.stepCount === 0) return null;
+  if (!tokenUsage) return null;
+  const inputTokens = getInputTokenCount(tokenUsage);
+  const cacheWriteTokens = getCacheWriteTokenCount(tokenUsage);
+  const cacheReadTokens = getCacheReadTokenCount(tokenUsage);
+  const outputTokens = getOutputTokenCount(tokenUsage);
+  const hasTokens = inputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0 || outputTokens > 0;
+  if ((tokenUsage.stepCount || 0) === 0 && !hasTokens) return null;
   const modelName = pricingInfo?.modelName || tokenUsage.respondedModelId || tokenUsage.requestedModelId || 'Unknown';
   const modelId = tokenUsage.respondedModelId || tokenUsage.requestedModelId || pricingInfo?.modelId || 'unknown';
@@ -704,14 +747,16 @@ export const buildAgentBudgetStats = (tokenUsage, pricingInfo) => {
   // Use context limits from step_finish events if available, otherwise from pricing model info
   const contextLimit = tokenUsage.contextLimit || pricingInfo?.modelInfo?.limit?.context || null;
   const outputLimit = tokenUsage.outputLimit || pricingInfo?.modelInfo?.limit?.output || null;
+  const contextFillInputTokens = getExplicitContextFillInputTokens(tokenUsage) ?? getCumulativeContextInputTokens({ inputTokens, cacheWriteTokens });
   const modelUsageEntry = {
-    inputTokens: tokenUsage.inputTokens,
-    cacheCreationTokens: tokenUsage.cacheWriteTokens || 0,
-    cacheReadTokens: tokenUsage.cacheReadTokens || 0,
-    outputTokens: tokenUsage.outputTokens,
+    inputTokens,
+    cacheCreationTokens: cacheWriteTokens,
+    cacheReadTokens,
+    outputTokens,
     modelName,
     modelInfo: contextLimit || outputLimit ? { limit: { context: contextLimit, output: outputLimit } } : null,
+    contextFillInputTokens,
     peakContextUsage: tokenUsage.peakContextUsage || 0,
     costUSD: pricingInfo?.totalCostUSD ?? null,
   };
@@ -719,11 +764,11 @@ export const buildAgentBudgetStats = (tokenUsage, pricingInfo) => {
   return {
     modelUsage: { [modelId]: modelUsageEntry },
     subSessions: [],
-    inputTokens: tokenUsage.inputTokens,
-    cacheCreationTokens: tokenUsage.cacheWriteTokens || 0,
-    cacheReadTokens: tokenUsage.cacheReadTokens || 0,
-    outputTokens: tokenUsage.outputTokens,
-    totalTokens: tokenUsage.inputTokens + (tokenUsage.cacheWriteTokens || 0) + tokenUsage.outputTokens,
+    inputTokens,
+    cacheCreationTokens: cacheWriteTokens,
+    cacheReadTokens,
+    outputTokens,
+    totalTokens: inputTokens + cacheWriteTokens + outputTokens,
   };
 };

package/src/codex.lib.mjs CHANGED Viewed

@@ -26,6 +26,7 @@ import { fetchModelInfo } from './model-info.lib.mjs';
 import { defaultModels } from './models/index.mjs';
 import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
 import { parseSubSessionSize, buildCodexSubSessionSizeConfigArgs, buildCodexDisable1mContextConfigArgs } from './sub-session-size.lib.mjs'; // Issue #1706
+import { getCumulativeContextInputTokens } from './context-fill.lib.mjs';
 import Decimal from 'decimal.js-light';
 const CODEX_USAGE_FIELD_NAMES = ['input_tokens', 'cached_input_tokens', 'output_tokens', 'cache_write_tokens', 'cache_creation_input_tokens', 'reasoning_tokens', 'input_tokens_details.cached_tokens', 'input_tokens_details.cache_read_tokens', 'input_tokens_details.cache_write_tokens', 'input_tokens_details.cache_creation_tokens', 'input_tokens_details.cache_creation_input_tokens', 'output_tokens_details.reasoning_tokens'];
@@ -85,6 +86,7 @@ export const createCodexTokenUsage = requestedModelId => ({
   respondedModelId: requestedModelId || null,
   contextLimit: null,
   outputLimit: null,
+  contextFillInputTokens: 0,
   peakContextUsage: 0,
   tokenFieldAvailability: createCodexTokenFieldAvailability(),
 });
@@ -346,6 +348,13 @@ export const parseCodexExecJsonOutput = (output, state = {}, requestedModelId =
       if (turnContextUsage > (nextState.tokenUsage.peakContextUsage || 0)) {
         nextState.tokenUsage.peakContextUsage = turnContextUsage;
       }
+      const turnContextFill = getCumulativeContextInputTokens({
+        inputTokens: nonCachedInputTokens,
+        cacheWriteTokens,
+      });
+      if (turnContextFill > (nextState.tokenUsage.contextFillInputTokens || 0)) {
+        nextState.tokenUsage.contextFillInputTokens = turnContextFill;
+      }
       const usageFieldSet = CODEX_USAGE_FIELD_NAMES.filter(fieldName => hasOwnPath(data.usage, fieldName));
       if (usageFieldSet.length > 0) nextState.observedUsageFieldSets.push(usageFieldSet);
@@ -407,6 +416,7 @@ export const buildCodexResultModelUsage = (modelId, tokenUsage, pricingInfo = nu
       outputTokens: tokenUsage.outputTokens || 0,
       modelName: pricingInfo?.modelName || modelId,
       modelInfo: pricingInfo?.modelInfo || null,
+      contextFillInputTokens: tokenUsage.contextFillInputTokens || getCumulativeContextInputTokens(tokenUsage),
       peakContextUsage: tokenUsage.peakContextUsage || 0,
       costUSD: pricingInfo?.totalCostUSD ?? null,
     },

package/src/context-fill.lib.mjs ADDED Viewed

@@ -0,0 +1,71 @@
+// Shared context-window fill helpers.
+const TOKEN_FIELD_ALIASES = {
+  input: ['inputTokens', 'input_tokens', 'input', 'promptTokens', 'prompt_tokens', 'prompt'],
+  output: ['outputTokens', 'output_tokens', 'output', 'completionTokens', 'completion_tokens', 'completion'],
+  cacheWrite: ['cacheCreationTokens', 'cacheWriteTokens', 'cacheCreationInputTokens', 'cache_creation_input_tokens', 'cache_write_tokens', 'cacheWrite'],
+  cacheRead: ['cacheReadTokens', 'cacheReadInputTokens', 'cache_read_input_tokens', 'cache_read_tokens', 'cachedInputTokens', 'cached_input_tokens', 'cacheRead'],
+};
+export const toTokenCount = value => {
+  if (Number.isFinite(value)) return Math.max(0, value);
+  if (typeof value === 'string' && value.trim()) {
+    const parsed = Number(value);
+    if (Number.isFinite(parsed)) return Math.max(0, parsed);
+  }
+  return 0;
+};
+const getFirstTokenField = (usage, fieldNames) => {
+  if (!usage || typeof usage !== 'object') return 0;
+  for (const fieldName of fieldNames) {
+    if (Object.hasOwn(usage, fieldName)) return toTokenCount(usage[fieldName]);
+  }
+  return 0;
+};
+export const getInputTokenCount = usage => getFirstTokenField(usage, TOKEN_FIELD_ALIASES.input);
+export const getOutputTokenCount = usage => getFirstTokenField(usage, TOKEN_FIELD_ALIASES.output);
+export const getCacheWriteTokenCount = usage => {
+  const direct = getFirstTokenField(usage, TOKEN_FIELD_ALIASES.cacheWrite);
+  if (direct > 0 || !usage?.cache || typeof usage.cache !== 'object') return direct;
+  return toTokenCount(usage.cache.write);
+};
+export const getCacheReadTokenCount = usage => {
+  const direct = getFirstTokenField(usage, TOKEN_FIELD_ALIASES.cacheRead);
+  if (direct > 0 || !usage?.cache || typeof usage.cache !== 'object') return direct;
+  return toTokenCount(usage.cache.read);
+};
+/**
+ * Issue #1741: context-fill from cumulative/session usage.
+ *
+ * Cache reads are intentionally excluded. They are the same cached prefix replayed
+ * across requests, so summing them in a cumulative row can exceed the model's
+ * context window even though no single sub-session filled that much context.
+ */
+export const getCumulativeContextInputTokens = usage => getInputTokenCount(usage) + getCacheWriteTokenCount(usage);
+/**
+ * Issue #1737: restored prompt size for one concrete request/turn.
+ *
+ * Use this only when the source row is a single request or a tool-specific
+ * per-turn value. For cumulative model rows, use getCumulativeContextInputTokens.
+ */
+export const getRestoredContextInputTokens = usage => getInputTokenCount(usage) + getCacheWriteTokenCount(usage) + getCacheReadTokenCount(usage);
+export const getExplicitContextFillInputTokens = usage => {
+  if (!usage || typeof usage !== 'object') return null;
+  if (Object.hasOwn(usage, 'contextFillInputTokens')) return toTokenCount(usage.contextFillInputTokens);
+  if (Object.hasOwn(usage, 'cumulativeContextInputTokens')) return toTokenCount(usage.cumulativeContextInputTokens);
+  return null;
+};
+export const getDisplayContextInputTokens = usage => {
+  const explicitContextFill = getExplicitContextFillInputTokens(usage);
+  if (explicitContextFill !== null) return explicitContextFill;
+  return toTokenCount(usage?.peakContextUsage);
+};

package/src/gemini.lib.mjs CHANGED Viewed

@@ -20,6 +20,7 @@ import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 import { defaultModels, geminiModels } from './models/index.mjs';
 import { checkPlaywrightMcpPackageAvailability } from './playwright-mcp.lib.mjs';
 import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
+import { getCumulativeContextInputTokens, toTokenCount } from './context-fill.lib.mjs';
 const shellQuote = value => `"${String(value).replaceAll('\\', '\\\\').replaceAll('"', '\\"')}"`;
@@ -46,20 +47,34 @@ const extractGeminiTextContent = value => {
   return '';
 };
-const buildGeminiResultModelUsage = (modelId, stats = null) => {
+const pickTokenValue = (...values) => {
+  for (const value of values) {
+    if (value !== undefined && value !== null) return toTokenCount(value);
+  }
+  return 0;
+};
+export const buildGeminiResultModelUsage = (modelId, stats = null) => {
   const modelStats = stats?.models && typeof stats.models === 'object' ? stats.models : null;
   if (modelStats) {
     const usage = {};
     for (const [id, data] of Object.entries(modelStats)) {
       const tokens = data?.tokens || {};
+      const inputTokens = pickTokenValue(tokens.input, tokens.prompt);
+      const cacheCreationTokens = pickTokenValue(tokens.cacheWrite, tokens.cache_write, tokens.cacheCreationTokens);
+      const cacheReadTokens = pickTokenValue(tokens.cacheRead, tokens.cache_read, tokens.cacheReadTokens);
+      const outputTokens = pickTokenValue(tokens.output, tokens.completion);
+      const contextLimit = pickTokenValue(tokens.contextLimit, tokens.context_limit, data?.contextLimit, data?.limit?.context);
+      const outputLimit = pickTokenValue(tokens.outputLimit, tokens.output_limit, data?.outputLimit, data?.limit?.output);
       usage[id] = {
-        inputTokens: tokens.input || tokens.prompt || 0,
-        cacheCreationTokens: tokens.cacheWrite || 0,
-        cacheReadTokens: tokens.cacheRead || 0,
-        outputTokens: tokens.output || tokens.completion || 0,
+        inputTokens,
+        cacheCreationTokens,
+        cacheReadTokens,
+        outputTokens,
         modelName: data?.name || id,
-        modelInfo: null,
-        peakContextUsage: tokens.total || 0,
+        modelInfo: contextLimit || outputLimit ? { limit: { context: contextLimit || null, output: outputLimit || null } } : null,
+        contextFillInputTokens: getCumulativeContextInputTokens({ inputTokens, cacheCreationTokens }),
+        peakContextUsage: pickTokenValue(tokens.total),
         costUSD: null,
       };
     }

package/src/qwen.lib.mjs CHANGED Viewed

@@ -20,6 +20,7 @@ import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 import { qwenModels, defaultModels } from './models/index.mjs';
 import { checkPlaywrightMcpPackageAvailability } from './playwright-mcp.lib.mjs';
 import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
+import { getCumulativeContextInputTokens, getRestoredContextInputTokens, toTokenCount } from './context-fill.lib.mjs';
 export const mapModelToId = model => qwenModels[model] || model;
@@ -63,6 +64,59 @@ const findFirstValue = (object, paths) => {
   return null;
 };
+const createQwenTokenFieldAvailability = () => ({
+  inputTokens: false,
+  outputTokens: false,
+  reasoningTokens: false,
+  cacheReadTokens: false,
+  cacheWriteTokens: false,
+});
+const createQwenTokenUsage = modelId => ({
+  inputTokens: 0,
+  outputTokens: 0,
+  reasoningTokens: 0,
+  cacheReadTokens: 0,
+  cacheWriteTokens: 0,
+  totalTokens: 0,
+  stepCount: 0,
+  requestedModelId: modelId || null,
+  respondedModelId: modelId || null,
+  contextLimit: null,
+  outputLimit: null,
+  contextFillInputTokens: 0,
+  peakContextUsage: 0,
+  tokenFieldAvailability: createQwenTokenFieldAvailability(),
+});
+const cloneQwenTokenUsage = usage => {
+  if (!usage) return createQwenTokenUsage();
+  return {
+    ...usage,
+    tokenFieldAvailability: {
+      ...createQwenTokenFieldAvailability(),
+      ...(usage.tokenFieldAvailability || {}),
+    },
+  };
+};
+const getQwenUsageField = (usage, paths) => {
+  const value = findFirstValue(usage, paths);
+  if (value === null) return { observed: false, value: 0 };
+  return { observed: true, value: toTokenCount(value) };
+};
+const QWEN_USAGE_PATHS = {
+  input: [['inputTokens'], ['input_tokens'], ['input'], ['promptTokens'], ['prompt_tokens'], ['prompt']],
+  output: [['outputTokens'], ['output_tokens'], ['output'], ['completionTokens'], ['completion_tokens'], ['completion']],
+  reasoning: [['reasoningTokens'], ['reasoning_tokens'], ['thoughtsTokens'], ['thoughts_tokens']],
+  cacheRead: [['cacheReadTokens'], ['cache_read_tokens'], ['cache_read_input_tokens'], ['cachedInputTokens'], ['cached_input_tokens'], ['prompt_tokens_details', 'cached_tokens'], ['cache', 'read']],
+  cacheWrite: [['cacheWriteTokens'], ['cache_write_tokens'], ['cache_creation_input_tokens'], ['cacheCreationTokens'], ['cacheCreationInputTokens'], ['cache', 'write']],
+  contextLimit: [['contextLimit'], ['context_limit'], ['limit', 'context'], ['limits', 'context']],
+  outputLimit: [['outputLimit'], ['output_limit'], ['limit', 'output'], ['limits', 'output']],
+  model: [['model'], ['model_id'], ['modelId'], ['name']],
+};
 const extractTextFragments = value => {
   if (typeof value === 'string') return [value];
   if (!value || typeof value !== 'object') return [];
@@ -89,8 +143,139 @@ const createQwenParserState = state => ({
   errors: Array.isArray(state?.errors) ? [...state.errors] : [],
   sessionId: state?.sessionId || null,
   lastTextContent: state?.lastTextContent || '',
+  tokenUsage: cloneQwenTokenUsage(state?.tokenUsage),
+  resultModelUsage: state?.resultModelUsage ? { ...state.resultModelUsage } : null,
 });
+const buildQwenResultModelUsage = tokenUsage => {
+  if (!tokenUsage || tokenUsage.stepCount === 0) return null;
+  const modelId = tokenUsage.respondedModelId || tokenUsage.requestedModelId || 'qwen';
+  const modelInfo = tokenUsage.contextLimit || tokenUsage.outputLimit ? { limit: { context: tokenUsage.contextLimit || null, output: tokenUsage.outputLimit || null } } : null;
+  return {
+    [modelId]: {
+      inputTokens: tokenUsage.inputTokens,
+      cacheCreationTokens: tokenUsage.cacheWriteTokens,
+      cacheReadTokens: tokenUsage.cacheReadTokens,
+      outputTokens: tokenUsage.outputTokens,
+      modelName: modelId,
+      modelInfo,
+      contextFillInputTokens: tokenUsage.contextFillInputTokens,
+      peakContextUsage: tokenUsage.peakContextUsage,
+      costUSD: null,
+    },
+  };
+};
+const applyQwenUsageObject = (state, rawUsage, fallbackModelId = null) => {
+  if (!rawUsage || typeof rawUsage !== 'object') return;
+  const model = findFirstValue(rawUsage, QWEN_USAGE_PATHS.model) || fallbackModelId;
+  if (model) {
+    state.tokenUsage.requestedModelId ||= String(model);
+    state.tokenUsage.respondedModelId = String(model);
+  }
+  const input = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.input);
+  const output = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.output);
+  const reasoning = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.reasoning);
+  const cacheRead = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.cacheRead);
+  const cacheWrite = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.cacheWrite);
+  const contextLimit = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.contextLimit);
+  const outputLimit = getQwenUsageField(rawUsage, QWEN_USAGE_PATHS.outputLimit);
+  const observedTokenField = input.observed || output.observed || reasoning.observed || cacheRead.observed || cacheWrite.observed;
+  if (!observedTokenField) return;
+  state.tokenUsage.stepCount += 1;
+  if (input.observed) {
+    state.tokenUsage.tokenFieldAvailability.inputTokens = true;
+    state.tokenUsage.inputTokens += input.value;
+  }
+  if (output.observed) {
+    state.tokenUsage.tokenFieldAvailability.outputTokens = true;
+    state.tokenUsage.outputTokens += output.value;
+  }
+  if (reasoning.observed) {
+    state.tokenUsage.tokenFieldAvailability.reasoningTokens = true;
+    state.tokenUsage.reasoningTokens += reasoning.value;
+  }
+  if (cacheRead.observed) {
+    state.tokenUsage.tokenFieldAvailability.cacheReadTokens = true;
+    state.tokenUsage.cacheReadTokens += cacheRead.value;
+  }
+  if (cacheWrite.observed) {
+    state.tokenUsage.tokenFieldAvailability.cacheWriteTokens = true;
+    state.tokenUsage.cacheWriteTokens += cacheWrite.value;
+  }
+  if (contextLimit.observed) state.tokenUsage.contextLimit = contextLimit.value;
+  if (outputLimit.observed) state.tokenUsage.outputLimit = outputLimit.value;
+  const stepContextFill = getCumulativeContextInputTokens({
+    inputTokens: input.value,
+    cacheWriteTokens: cacheWrite.value,
+  });
+  if (stepContextFill > (state.tokenUsage.contextFillInputTokens || 0)) {
+    state.tokenUsage.contextFillInputTokens = stepContextFill;
+  }
+  const stepRestoredContext = getRestoredContextInputTokens({
+    inputTokens: input.value,
+    cacheWriteTokens: cacheWrite.value,
+    cacheReadTokens: cacheRead.value,
+  });
+  if (stepRestoredContext > (state.tokenUsage.peakContextUsage || 0)) {
+    state.tokenUsage.peakContextUsage = stepRestoredContext;
+  }
+  state.tokenUsage.totalTokens = state.tokenUsage.inputTokens + state.tokenUsage.cacheReadTokens + state.tokenUsage.cacheWriteTokens + state.tokenUsage.outputTokens;
+  state.resultModelUsage = buildQwenResultModelUsage(state.tokenUsage);
+};
+const applyQwenUsageToState = (state, event) => {
+  const rawUsage = event?.usage || event?.stats || event?.tokenUsage || null;
+  if (!rawUsage || typeof rawUsage !== 'object') return;
+  const modelStats = rawUsage.models && typeof rawUsage.models === 'object' ? rawUsage.models : null;
+  if (modelStats) {
+    for (const [modelId, data] of Object.entries(modelStats)) {
+      applyQwenUsageObject(state, data?.tokens || data?.usage || data, modelId);
+    }
+    return;
+  }
+  applyQwenUsageObject(state, rawUsage, findFirstValue(event, QWEN_USAGE_PATHS.model));
+};
+const buildQwenPricingInfo = (state, mappedModel) => {
+  const tokenUsage = cloneQwenTokenUsage(state?.tokenUsage);
+  if (!tokenUsage || tokenUsage.stepCount === 0) {
+    return {
+      pricingInfo: null,
+      publicPricingEstimate: null,
+      tokenUsage: null,
+      resultModelUsage: null,
+    };
+  }
+  tokenUsage.requestedModelId ||= mappedModel || 'qwen';
+  tokenUsage.respondedModelId ||= tokenUsage.requestedModelId;
+  const modelId = tokenUsage.respondedModelId || tokenUsage.requestedModelId;
+  return {
+    pricingInfo: {
+      provider: 'Qwen Code',
+      modelId,
+      modelName: modelId,
+      totalCostUSD: null,
+      source: 'qwen-stream-json',
+      tokenUsage,
+    },
+    publicPricingEstimate: null,
+    tokenUsage,
+    resultModelUsage: buildQwenResultModelUsage(tokenUsage),
+  };
+};
 const addQwenEventToState = (state, rawEvent) => {
   const event = sanitizeObjectStrings(rawEvent);
   state.parsedEvents.push(event);
@@ -118,6 +303,8 @@ const addQwenEventToState = (state, rawEvent) => {
       isAuthError: isQwenAuthError(errorMessage),
     });
   }
+  applyQwenUsageToState(state, event);
 };
 export const parseQwenStreamJsonOutput = (output, state = {}) => {
@@ -377,6 +564,7 @@ export const executeQwenCommand = async params => {
         .join('\n');
       const combinedErrorText = `${allOutput}\n${errorMessage}`.trim();
       const limitInfo = detectUsageLimit(combinedErrorText);
+      const usageResult = buildQwenPricingInfo(qwenState, mappedModel);
       if (limitInfo.isUsageLimit) {
         const messageLines = formatUsageLimitMessage({
@@ -394,9 +582,7 @@ export const executeQwenCommand = async params => {
           sessionId,
           limitReached: true,
           limitResetTime: limitInfo.resetTime,
-          pricingInfo: null,
-          publicPricingEstimate: null,
-          tokenUsage: null,
+          ...usageResult,
           resultSummary,
         };
       }
@@ -444,9 +630,7 @@ export const executeQwenCommand = async params => {
           sessionId,
           limitReached: false,
           limitResetTime: null,
-          pricingInfo: null,
-          publicPricingEstimate: null,
-          tokenUsage: null,
+          ...usageResult,
           resultSummary,
         };
       }
@@ -461,9 +645,7 @@ export const executeQwenCommand = async params => {
         sessionId,
         limitReached: false,
         limitResetTime: null,
-        pricingInfo: null,
-        publicPricingEstimate: null,
-        tokenUsage: null,
+        ...usageResult,
         resultSummary,
       };
     } catch (error) {