@link-assistant/hive-mind 1.62.0 → 1.62.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.62.1
4
+
5
+ ### Patch Changes
6
+
7
+ - a683edf: Fix budget stats restored-context input accounting so sub-session lines include cache reads, use `sub-sessions` wording, and no longer render the obsolete `peak request:` label.
8
+
3
9
  ## 1.62.0
4
10
 
5
11
  ### Minor Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.62.0",
3
+ "version": "1.62.1",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -19,6 +19,10 @@ export const createEmptySubSessionUsage = () => ({
19
19
  peakOutputUsage: 0,
20
20
  });
21
21
 
22
+ export const getRawRequestInputTokens = usage => (usage?.input_tokens || 0) + (usage?.cache_creation_input_tokens || 0) + (usage?.cache_read_input_tokens || 0);
23
+
24
+ export const getUsageInputTokens = usage => (usage?.inputTokens || 0) + (usage?.cacheCreationTokens || 0) + (usage?.cacheReadTokens || 0);
25
+
22
26
  /**
23
27
  * Helper: accumulates token usage from a JSONL entry into a model usage map
24
28
  * @param {Object} modelUsageMap - Map of model ID to usage data
@@ -185,9 +189,10 @@ export const dumpBudgetTrace = async (usage, tokenUsage, log) => {
185
189
  const source = usage._sourceResultJson ? 'jsonl + result-event' : 'jsonl';
186
190
 
187
191
  await log(`\n šŸ“Š [budget-trace] ${modelName}`, { verbose: true });
188
- // Issue #1710 R5: peak request is `input + cache_creation` (cache reads
189
- // tracked separately on the cumulative line).
190
- await log(` peak request: ${formatNumber(peak)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (largest single-request input + cache_creation, excludes cache_read)`, { verbose: true });
192
+ // Issue #1737: peak input is the largest request's total input footprint:
193
+ // input + cache_creation + cache_read. The cumulative line still keeps those
194
+ // buckets split for cost and accounting review.
195
+ await log(` peak input: ${formatNumber(peak)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (largest request input + cache_creation + cache_read)`, { verbose: true });
191
196
  await log(` cumulative: input ${formatNumber(inputs)}, cache_write ${formatNumber(writes)} (5m ${formatNumber(writes5m)} / 1h ${formatNumber(writes1h)}), cache_read ${formatNumber(reads)}, output ${formatNumber(outputs)}`, { verbose: true });
192
197
  // Issue #1710 R1: web_search is now billed in calculateModelCost. The trace
193
198
  // still surfaces the implied dollar cost so the residual remains debuggable
@@ -234,17 +239,15 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
234
239
 
235
240
  if (hasMultipleSubSessions) {
236
241
  // Issue #1600: Unified format — numbered list without "Context window:" prefix.
237
- // Issue #1710 R3/R5: Peak input is `input + cache_creation` (cache reads
238
- // are tracked separately on the Total line), and the bullet is now
239
- // labelled "peak request:" so a reader does not try to reconcile it with
240
- // the cumulative Total figure.
242
+ // Issue #1737: Show peak input pressure per sub-session without the
243
+ // confusing "peak request:" label.
241
244
  for (let i = 0; i < subSessions.length; i++) {
242
245
  const sub = subSessions[i];
243
246
  const subPeak = sub.peakContextUsage || 0;
244
247
  const parts = [];
245
248
  if (contextLimit && subPeak > 0) {
246
249
  const pct = ((subPeak / contextLimit) * 100).toFixed(0);
247
- parts.push(`peak request: ${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
250
+ parts.push(`${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
248
251
  }
249
252
  if (outputLimit) {
250
253
  const outPct = ((sub.outputTokens / outputLimit) * 100).toFixed(0);
@@ -258,7 +261,7 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
258
261
  const parts = [];
259
262
  if (contextLimit) {
260
263
  const pct = ((peakContext / contextLimit) * 100).toFixed(0);
261
- parts.push(`peak request: ${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
264
+ parts.push(`${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
262
265
  }
263
266
  if (outputLimit) {
264
267
  const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
@@ -361,6 +364,17 @@ const formatTokensCompact = tokens => {
361
364
  return tokens.toLocaleString();
362
365
  };
363
366
 
367
+ const formatInputContextPart = (inputTokens, contextLimit, format) => {
368
+ if (contextLimit && inputTokens > 0) {
369
+ const pct = ((inputTokens / contextLimit) * 100).toFixed(0);
370
+ return `${format(inputTokens)} / ${format(contextLimit)} (${pct}%) input tokens`;
371
+ }
372
+ if (inputTokens > 0) {
373
+ return `${format(inputTokens)} input tokens`;
374
+ }
375
+ return null;
376
+ };
377
+
364
378
  /**
365
379
  * Issue #1710: Build the cumulative input-tokens phrase for the Total / fallback
366
380
  * lines, splitting cache writes and cache reads so neither category is ever
@@ -422,10 +436,10 @@ const formatSubSessionsList = (subSessions, contextLimit, outputLimit) => {
422
436
 
423
437
  /**
424
438
  * Issue #1600: Build a single-line context + output tokens string (unified format, no "Context window:" prefix).
425
- * Issue #1710 R3/R5: The input figure is the peak per-request `input + cache_creation`
426
- * (cache reads excluded). Labelling it "peak request:" lets readers tell it apart
427
- * from the cumulative Total line.
428
- * @param {number} peakContext - Peak context usage (0 if unknown — context display skipped)
439
+ * Issue #1737: The input figure is the peak restored-context input for the
440
+ * sub-session/request (`input + cache_creation + cache_read`), without the old
441
+ * "peak request:" label.
442
+ * @param {number} peakContext - Peak input usage (0 if unknown — context display skipped)
429
443
  * @param {number} contextLimit - Context window limit (null if unknown)
430
444
  * @param {number} outputTokens - Output tokens used
431
445
  * @param {number} outputLimit - Output token limit (null if unknown)
@@ -434,9 +448,9 @@ const formatSubSessionsList = (subSessions, contextLimit, outputLimit) => {
434
448
  */
435
449
  const formatContextOutputLine = (peakContext, contextLimit, outputTokens, outputLimit, prefix = '- ') => {
436
450
  const parts = [];
437
- if (contextLimit && peakContext > 0) {
438
- const pct = ((peakContext / contextLimit) * 100).toFixed(0);
439
- parts.push(`peak request: ${formatTokensCompact(peakContext)} / ${formatTokensCompact(contextLimit)} (${pct}%) input tokens`);
451
+ const inputPart = formatInputContextPart(peakContext, contextLimit, formatTokensCompact);
452
+ if (inputPart) {
453
+ parts.push(inputPart);
440
454
  }
441
455
  if (outputLimit) {
442
456
  const outPct = ((outputTokens / outputLimit) * 100).toFixed(0);
@@ -547,16 +561,16 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
547
561
 
548
562
  if (isMultiModel) {
549
563
  // Issue #1590: Show sub-agent call count alongside model name
550
- // Issue #1600: Show session segment count for primary model
564
+ // Issue #1737: Use "sub-sessions" for compactification-bounded sections.
551
565
  if (callCount > 1) {
552
566
  stats += `\n\n**${modelName}:** (${callCount} sub-agent calls)`;
553
567
  } else if (showSubSessions) {
554
- stats += `\n\n**${modelName}:** (${subSessions.length} session segments)`;
568
+ stats += `\n\n**${modelName}:** (${subSessions.length} sub-sessions)`;
555
569
  } else {
556
570
  stats += `\n\n**${modelName}:**`;
557
571
  }
558
572
  } else if (showSubSessions) {
559
- stats += `\n\n**${modelName}:** (${subSessions.length} session segments)`;
573
+ stats += `\n\n**${modelName}:** (${subSessions.length} sub-sessions)`;
560
574
  }
561
575
 
562
576
  const peakContext = usage.peakContextUsage || 0;
@@ -568,20 +582,25 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
568
582
  stats += formatContextOutputLine(peakContext, contextLimit, usage.outputTokens, outputLimit, '- ');
569
583
  } else if (outputLimit && callCount <= 1) {
570
584
  // Issue #1600: Sub-agent single sessions previously showed only an output line.
571
- // Issue #1710 R2: Always surface the cumulative input information too — sub-agent
585
+ // Issue #1737: Always surface total input information too — sub-agent
572
586
  // models (e.g. Haiku) never appear as the responding model in the parent JSONL,
573
587
  // so peakContext stays at 0; without this fallback the rendered comment loses
574
- // the sub-agent's input-token information entirely. Cache writes / reads are
575
- // split via the same helper used for the Total line so the two lines stay
576
- // arithmetically consistent.
577
- const inputPhrase = buildCumulativeInputPhrase({
578
- input: usage.inputTokens || 0,
579
- cacheWrites: usage.cacheCreationTokens || 0,
580
- cacheReads: usage.cacheReadTokens || 0,
581
- format: formatTokensCompact,
582
- });
588
+ // the sub-agent's input-token information entirely. The detail line is
589
+ // deliberately simple; the Total line below keeps the cache split.
590
+ const parts = [];
591
+ const isResultSingleCall = usage._sourceResultJson || callCount > 0;
592
+ const inputPart = isResultSingleCall
593
+ ? formatInputContextPart(getUsageInputTokens(usage), contextLimit, formatTokensCompact)
594
+ : buildCumulativeInputPhrase({
595
+ input: usage.inputTokens || 0,
596
+ cacheWrites: usage.cacheCreationTokens || 0,
597
+ cacheReads: usage.cacheReadTokens || 0,
598
+ format: formatTokensCompact,
599
+ });
600
+ if (inputPart) parts.push(inputPart);
583
601
  const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
584
- stats += `\n- ${inputPhrase}, ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`;
602
+ parts.push(`${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`);
603
+ stats += `\n- ${parts.join(', ')}`;
585
604
  }
586
605
 
587
606
  // Cumulative totals per model: input tokens + cached shown separately.
@@ -15,7 +15,7 @@ import { setupBidirectionalHandler, finalizeBidirectionalHandler, validateBidire
15
15
  import { initProgressMonitoring } from './solve.progress-monitoring.lib.mjs';
16
16
  import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
17
17
  import Decimal from 'decimal.js-light';
18
- import { displayBudgetStats, createEmptySubSessionUsage, accumulateModelUsage, displayModelUsage, displayCostComparison, mergeResultModelUsage, createSubAgentCallEntry, accumulateSubAgentUsage } from './claude.budget-stats.lib.mjs';
18
+ import { displayBudgetStats, createEmptySubSessionUsage, accumulateModelUsage, displayModelUsage, displayCostComparison, mergeResultModelUsage, createSubAgentCallEntry, accumulateSubAgentUsage, getRawRequestInputTokens } from './claude.budget-stats.lib.mjs';
19
19
  import { buildClaudeResumeCommand } from './claude.command-builder.lib.mjs';
20
20
  import { SESSION_FORCE_KILLED_MARKER, postTrackedComment } from './tool-comments.lib.mjs'; // Issue #1625
21
21
  import { handleClaudeRuntimeSwitch } from './claude.runtime-switch.lib.mjs'; // see issue #1141
@@ -394,9 +394,10 @@ export const checkModelVisionCapability = async modelId => {
394
394
  // this file under the 1500-line repo cap (see check-file-line-limits CI job).
395
395
  import { calculateModelCost } from './claude.cost.lib.mjs';
396
396
  export { calculateModelCost };
397
- export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsage = null) => {
397
+ export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsage = null, options = {}) => {
398
398
  const os = (await use('os')).default;
399
- const homeDir = os.homedir();
399
+ const homeDir = options.homeDir || os.homedir();
400
+ const fetchModelInfoForUsage = options.fetchModelInfo || fetchModelInfo;
400
401
  // Construct the path to the session JSONL file
401
402
  // Format: ~/.claude/projects/<project-dir>/<session-id>.jsonl
402
403
  // The project directory name is the full path with slashes replaced by dashes
@@ -454,15 +455,12 @@ export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsag
454
455
  seenMessageIds.add(msgId);
455
456
  }
456
457
  accumulateModelUsage(modelUsage, entry);
457
- // Issue #1501: Track peak context usage per single API request
458
- // Issue #1710: Exclude cache_read_input_tokens — sub-sessions and
459
- // per-request peaks should reflect *new* input the model received,
460
- // not cached prompt context. Cache reads remain visible in the
461
- // cumulative Total line as `(X + Y cached)`. This makes the
462
- // peak-request value reconcilable with the cumulative non-cached
463
- // input figure (instead of mixing semantics across the two lines).
458
+ // Issue #1737: Track peak restored-context input per request.
459
+ // Anthropic splits a request's input into input_tokens,
460
+ // cache_creation_input_tokens, and cache_read_input_tokens; all three
461
+ // count toward "how much context will be restored if I resume here".
464
462
  const usage = entry.message.usage;
465
- const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0);
463
+ const requestContext = getRawRequestInputTokens(usage);
466
464
  const model = entry.message.model;
467
465
  if (requestContext > (peakContextByModel[model] || 0)) {
468
466
  peakContextByModel[model] = requestContext;
@@ -500,7 +498,7 @@ export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsag
500
498
  }
501
499
  // Fetch model information for each model
502
500
  const modelInfoPromises = Object.keys(modelUsage).map(async modelId => {
503
- const modelInfo = await fetchModelInfo(modelId);
501
+ const modelInfo = await fetchModelInfoForUsage(modelId);
504
502
  return { modelId, modelInfo };
505
503
  });
506
504
  const modelInfoResults = await Promise.all(modelInfoPromises);
@@ -1295,9 +1293,7 @@ export const executeClaudeCommand = async params => {
1295
1293
  await log(`\nāš ļø JSONL deduplication: skipped ${tokenUsage.duplicateEntriesSkipped} duplicate entries (upstream: anthropics/claude-code#6805)`, { verbose: true });
1296
1294
  }
1297
1295
  if (tokenUsage.peakContextUsage > 0) {
1298
- // Issue #1710: rename so the metric matches the new definition (input + cache_creation,
1299
- // excluding cache_read). Cache reads are still visible separately on the Total line.
1300
- await log(`šŸ“Š Peak single-request input (excl. cache reads): ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
1296
+ await log(`šŸ“Š Peak restored-context input: ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
1301
1297
  }
1302
1298
  await log('\nšŸ’° Token Usage Summary:');
1303
1299
  // Display per-model breakdown