npm - @link-assistant/hive-mind - Versions diffs - 1.62.0 → 1.62.1 - Mend

@link-assistant/hive-mind 1.62.0 → 1.62.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md +6 -0
package/package.json +1 -1
package/src/claude.budget-stats.lib.mjs +49 -30
package/src/claude.lib.mjs +11 -15

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 # @link-assistant/hive-mind
+## 1.62.1
+### Patch Changes
+- a683edf: Fix budget stats restored-context input accounting so sub-session lines include cache reads, use `sub-sessions` wording, and no longer render the obsolete `peak request:` label.
 ## 1.62.0
 ### Minor Changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@link-assistant/hive-mind",
-  "version": "1.62.0",
+  "version": "1.62.1",
   "description": "AI-powered issue solver and hive mind for collaborative problem solving",
   "main": "src/hive.mjs",
   "type": "module",

package/src/claude.budget-stats.lib.mjs CHANGED Viewed

@@ -19,6 +19,10 @@ export const createEmptySubSessionUsage = () => ({
   peakOutputUsage: 0,
 });
+export const getRawRequestInputTokens = usage => (usage?.input_tokens || 0) + (usage?.cache_creation_input_tokens || 0) + (usage?.cache_read_input_tokens || 0);
+export const getUsageInputTokens = usage => (usage?.inputTokens || 0) + (usage?.cacheCreationTokens || 0) + (usage?.cacheReadTokens || 0);
 /**
  * Helper: accumulates token usage from a JSONL entry into a model usage map
  * @param {Object} modelUsageMap - Map of model ID to usage data
@@ -185,9 +189,10 @@ export const dumpBudgetTrace = async (usage, tokenUsage, log) => {
   const source = usage._sourceResultJson ? 'jsonl + result-event' : 'jsonl';
   await log(`\n      📊 [budget-trace] ${modelName}`, { verbose: true });
-  // Issue #1710 R5: peak request is `input + cache_creation` (cache reads
-  // tracked separately on the cumulative line).
-  await log(`         peak request:    ${formatNumber(peak)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (largest single-request input + cache_creation, excludes cache_read)`, { verbose: true });
+  // Issue #1737: peak input is the largest request's total input footprint:
+  // input + cache_creation + cache_read. The cumulative line still keeps those
+  // buckets split for cost and accounting review.
+  await log(`         peak input:      ${formatNumber(peak)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (largest request input + cache_creation + cache_read)`, { verbose: true });
   await log(`         cumulative:      input ${formatNumber(inputs)}, cache_write ${formatNumber(writes)} (5m ${formatNumber(writes5m)} / 1h ${formatNumber(writes1h)}), cache_read ${formatNumber(reads)}, output ${formatNumber(outputs)}`, { verbose: true });
   // Issue #1710 R1: web_search is now billed in calculateModelCost. The trace
   // still surfaces the implied dollar cost so the residual remains debuggable
@@ -234,17 +239,15 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
   if (hasMultipleSubSessions) {
     // Issue #1600: Unified format — numbered list without "Context window:" prefix.
-    // Issue #1710 R3/R5: Peak input is `input + cache_creation` (cache reads
-    // are tracked separately on the Total line), and the bullet is now
-    // labelled "peak request:" so a reader does not try to reconcile it with
-    // the cumulative Total figure.
+    // Issue #1737: Show peak input pressure per sub-session without the
+    // confusing "peak request:" label.
     for (let i = 0; i < subSessions.length; i++) {
       const sub = subSessions[i];
       const subPeak = sub.peakContextUsage || 0;
       const parts = [];
       if (contextLimit && subPeak > 0) {
         const pct = ((subPeak / contextLimit) * 100).toFixed(0);
-        parts.push(`peak request: ${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
+        parts.push(`${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
       }
       if (outputLimit) {
         const outPct = ((sub.outputTokens / outputLimit) * 100).toFixed(0);
@@ -258,7 +261,7 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
     const parts = [];
     if (contextLimit) {
       const pct = ((peakContext / contextLimit) * 100).toFixed(0);
-      parts.push(`peak request: ${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
+      parts.push(`${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
     }
     if (outputLimit) {
       const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
@@ -361,6 +364,17 @@ const formatTokensCompact = tokens => {
   return tokens.toLocaleString();
 };
+const formatInputContextPart = (inputTokens, contextLimit, format) => {
+  if (contextLimit && inputTokens > 0) {
+    const pct = ((inputTokens / contextLimit) * 100).toFixed(0);
+    return `${format(inputTokens)} / ${format(contextLimit)} (${pct}%) input tokens`;
+  }
+  if (inputTokens > 0) {
+    return `${format(inputTokens)} input tokens`;
+  }
+  return null;
+};
 /**
  * Issue #1710: Build the cumulative input-tokens phrase for the Total / fallback
  * lines, splitting cache writes and cache reads so neither category is ever
@@ -422,10 +436,10 @@ const formatSubSessionsList = (subSessions, contextLimit, outputLimit) => {
 /**
  * Issue #1600: Build a single-line context + output tokens string (unified format, no "Context window:" prefix).
- * Issue #1710 R3/R5: The input figure is the peak per-request `input + cache_creation`
- * (cache reads excluded). Labelling it "peak request:" lets readers tell it apart
- * from the cumulative Total line.
- * @param {number} peakContext - Peak context usage (0 if unknown — context display skipped)
+ * Issue #1737: The input figure is the peak restored-context input for the
+ * sub-session/request (`input + cache_creation + cache_read`), without the old
+ * "peak request:" label.
+ * @param {number} peakContext - Peak input usage (0 if unknown — context display skipped)
  * @param {number} contextLimit - Context window limit (null if unknown)
  * @param {number} outputTokens - Output tokens used
  * @param {number} outputLimit - Output token limit (null if unknown)
@@ -434,9 +448,9 @@ const formatSubSessionsList = (subSessions, contextLimit, outputLimit) => {
  */
 const formatContextOutputLine = (peakContext, contextLimit, outputTokens, outputLimit, prefix = '- ') => {
   const parts = [];
-  if (contextLimit && peakContext > 0) {
-    const pct = ((peakContext / contextLimit) * 100).toFixed(0);
-    parts.push(`peak request: ${formatTokensCompact(peakContext)} / ${formatTokensCompact(contextLimit)} (${pct}%) input tokens`);
+  const inputPart = formatInputContextPart(peakContext, contextLimit, formatTokensCompact);
+  if (inputPart) {
+    parts.push(inputPart);
   }
   if (outputLimit) {
     const outPct = ((outputTokens / outputLimit) * 100).toFixed(0);
@@ -547,16 +561,16 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
       if (isMultiModel) {
         // Issue #1590: Show sub-agent call count alongside model name
-        // Issue #1600: Show session segment count for primary model
+        // Issue #1737: Use "sub-sessions" for compactification-bounded sections.
         if (callCount > 1) {
           stats += `\n\n**${modelName}:** (${callCount} sub-agent calls)`;
         } else if (showSubSessions) {
-          stats += `\n\n**${modelName}:** (${subSessions.length} session segments)`;
+          stats += `\n\n**${modelName}:** (${subSessions.length} sub-sessions)`;
         } else {
           stats += `\n\n**${modelName}:**`;
         }
       } else if (showSubSessions) {
-        stats += `\n\n**${modelName}:** (${subSessions.length} session segments)`;
+        stats += `\n\n**${modelName}:** (${subSessions.length} sub-sessions)`;
       }
       const peakContext = usage.peakContextUsage || 0;
@@ -568,20 +582,25 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
         stats += formatContextOutputLine(peakContext, contextLimit, usage.outputTokens, outputLimit, '- ');
       } else if (outputLimit && callCount <= 1) {
         // Issue #1600: Sub-agent single sessions previously showed only an output line.
-        // Issue #1710 R2: Always surface the cumulative input information too — sub-agent
+        // Issue #1737: Always surface total input information too — sub-agent
         // models (e.g. Haiku) never appear as the responding model in the parent JSONL,
         // so peakContext stays at 0; without this fallback the rendered comment loses
-        // the sub-agent's input-token information entirely. Cache writes / reads are
-        // split via the same helper used for the Total line so the two lines stay
-        // arithmetically consistent.
-        const inputPhrase = buildCumulativeInputPhrase({
-          input: usage.inputTokens || 0,
-          cacheWrites: usage.cacheCreationTokens || 0,
-          cacheReads: usage.cacheReadTokens || 0,
-          format: formatTokensCompact,
-        });
+        // the sub-agent's input-token information entirely. The detail line is
+        // deliberately simple; the Total line below keeps the cache split.
+        const parts = [];
+        const isResultSingleCall = usage._sourceResultJson || callCount > 0;
+        const inputPart = isResultSingleCall
+          ? formatInputContextPart(getUsageInputTokens(usage), contextLimit, formatTokensCompact)
+          : buildCumulativeInputPhrase({
+              input: usage.inputTokens || 0,
+              cacheWrites: usage.cacheCreationTokens || 0,
+              cacheReads: usage.cacheReadTokens || 0,
+              format: formatTokensCompact,
+            });
+        if (inputPart) parts.push(inputPart);
         const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
-        stats += `\n- ${inputPhrase}, ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`;
+        parts.push(`${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`);
+        stats += `\n- ${parts.join(', ')}`;
       }
       // Cumulative totals per model: input tokens + cached shown separately.

package/src/claude.lib.mjs CHANGED Viewed

@@ -15,7 +15,7 @@ import { setupBidirectionalHandler, finalizeBidirectionalHandler, validateBidire
 import { initProgressMonitoring } from './solve.progress-monitoring.lib.mjs';
 import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 import Decimal from 'decimal.js-light';
-import { displayBudgetStats, createEmptySubSessionUsage, accumulateModelUsage, displayModelUsage, displayCostComparison, mergeResultModelUsage, createSubAgentCallEntry, accumulateSubAgentUsage } from './claude.budget-stats.lib.mjs';
+import { displayBudgetStats, createEmptySubSessionUsage, accumulateModelUsage, displayModelUsage, displayCostComparison, mergeResultModelUsage, createSubAgentCallEntry, accumulateSubAgentUsage, getRawRequestInputTokens } from './claude.budget-stats.lib.mjs';
 import { buildClaudeResumeCommand } from './claude.command-builder.lib.mjs';
 import { SESSION_FORCE_KILLED_MARKER, postTrackedComment } from './tool-comments.lib.mjs'; // Issue #1625
 import { handleClaudeRuntimeSwitch } from './claude.runtime-switch.lib.mjs'; // see issue #1141
@@ -394,9 +394,10 @@ export const checkModelVisionCapability = async modelId => {
 // this file under the 1500-line repo cap (see check-file-line-limits CI job).
 import { calculateModelCost } from './claude.cost.lib.mjs';
 export { calculateModelCost };
-export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsage = null) => {
+export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsage = null, options = {}) => {
   const os = (await use('os')).default;
-  const homeDir = os.homedir();
+  const homeDir = options.homeDir || os.homedir();
+  const fetchModelInfoForUsage = options.fetchModelInfo || fetchModelInfo;
   // Construct the path to the session JSONL file
   // Format: ~/.claude/projects/<project-dir>/<session-id>.jsonl
   // The project directory name is the full path with slashes replaced by dashes
@@ -454,15 +455,12 @@ export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsag
             seenMessageIds.add(msgId);
           }
           accumulateModelUsage(modelUsage, entry);
-          // Issue #1501: Track peak context usage per single API request
-          // Issue #1710: Exclude cache_read_input_tokens — sub-sessions and
-          // per-request peaks should reflect *new* input the model received,
-          // not cached prompt context. Cache reads remain visible in the
-          // cumulative Total line as `(X + Y cached)`. This makes the
-          // peak-request value reconcilable with the cumulative non-cached
-          // input figure (instead of mixing semantics across the two lines).
+          // Issue #1737: Track peak restored-context input per request.
+          // Anthropic splits a request's input into input_tokens,
+          // cache_creation_input_tokens, and cache_read_input_tokens; all three
+          // count toward "how much context will be restored if I resume here".
           const usage = entry.message.usage;
-          const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0);
+          const requestContext = getRawRequestInputTokens(usage);
           const model = entry.message.model;
           if (requestContext > (peakContextByModel[model] || 0)) {
             peakContextByModel[model] = requestContext;
@@ -500,7 +498,7 @@ export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsag
     }
     // Fetch model information for each model
     const modelInfoPromises = Object.keys(modelUsage).map(async modelId => {
-      const modelInfo = await fetchModelInfo(modelId);
+      const modelInfo = await fetchModelInfoForUsage(modelId);
       return { modelId, modelInfo };
     });
     const modelInfoResults = await Promise.all(modelInfoPromises);
@@ -1295,9 +1293,7 @@ export const executeClaudeCommand = async params => {
               await log(`\n⚠️  JSONL deduplication: skipped ${tokenUsage.duplicateEntriesSkipped} duplicate entries (upstream: anthropics/claude-code#6805)`, { verbose: true });
             }
             if (tokenUsage.peakContextUsage > 0) {
-              // Issue #1710: rename so the metric matches the new definition (input + cache_creation,
-              // excluding cache_read). Cache reads are still visible separately on the Total line.
-              await log(`📊 Peak single-request input (excl. cache reads): ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
+              await log(`📊 Peak restored-context input: ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
             }
             await log('\n💰 Token Usage Summary:');
             // Display per-model breakdown