@link-assistant/hive-mind 1.62.0 ā 1.62.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/package.json +1 -1
- package/src/claude.budget-stats.lib.mjs +49 -30
- package/src/claude.lib.mjs +11 -15
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# @link-assistant/hive-mind
|
|
2
2
|
|
|
3
|
+
## 1.62.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- a683edf: Fix budget stats restored-context input accounting so sub-session lines include cache reads, use `sub-sessions` wording, and no longer render the obsolete `peak request:` label.
|
|
8
|
+
|
|
3
9
|
## 1.62.0
|
|
4
10
|
|
|
5
11
|
### Minor Changes
|
package/package.json
CHANGED
|
@@ -19,6 +19,10 @@ export const createEmptySubSessionUsage = () => ({
|
|
|
19
19
|
peakOutputUsage: 0,
|
|
20
20
|
});
|
|
21
21
|
|
|
22
|
+
export const getRawRequestInputTokens = usage => (usage?.input_tokens || 0) + (usage?.cache_creation_input_tokens || 0) + (usage?.cache_read_input_tokens || 0);
|
|
23
|
+
|
|
24
|
+
export const getUsageInputTokens = usage => (usage?.inputTokens || 0) + (usage?.cacheCreationTokens || 0) + (usage?.cacheReadTokens || 0);
|
|
25
|
+
|
|
22
26
|
/**
|
|
23
27
|
* Helper: accumulates token usage from a JSONL entry into a model usage map
|
|
24
28
|
* @param {Object} modelUsageMap - Map of model ID to usage data
|
|
@@ -185,9 +189,10 @@ export const dumpBudgetTrace = async (usage, tokenUsage, log) => {
|
|
|
185
189
|
const source = usage._sourceResultJson ? 'jsonl + result-event' : 'jsonl';
|
|
186
190
|
|
|
187
191
|
await log(`\n š [budget-trace] ${modelName}`, { verbose: true });
|
|
188
|
-
// Issue #
|
|
189
|
-
//
|
|
190
|
-
|
|
192
|
+
// Issue #1737: peak input is the largest request's total input footprint:
|
|
193
|
+
// input + cache_creation + cache_read. The cumulative line still keeps those
|
|
194
|
+
// buckets split for cost and accounting review.
|
|
195
|
+
await log(` peak input: ${formatNumber(peak)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (largest request input + cache_creation + cache_read)`, { verbose: true });
|
|
191
196
|
await log(` cumulative: input ${formatNumber(inputs)}, cache_write ${formatNumber(writes)} (5m ${formatNumber(writes5m)} / 1h ${formatNumber(writes1h)}), cache_read ${formatNumber(reads)}, output ${formatNumber(outputs)}`, { verbose: true });
|
|
192
197
|
// Issue #1710 R1: web_search is now billed in calculateModelCost. The trace
|
|
193
198
|
// still surfaces the implied dollar cost so the residual remains debuggable
|
|
@@ -234,17 +239,15 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
|
234
239
|
|
|
235
240
|
if (hasMultipleSubSessions) {
|
|
236
241
|
// Issue #1600: Unified format ā numbered list without "Context window:" prefix.
|
|
237
|
-
// Issue #
|
|
238
|
-
//
|
|
239
|
-
// labelled "peak request:" so a reader does not try to reconcile it with
|
|
240
|
-
// the cumulative Total figure.
|
|
242
|
+
// Issue #1737: Show peak input pressure per sub-session without the
|
|
243
|
+
// confusing "peak request:" label.
|
|
241
244
|
for (let i = 0; i < subSessions.length; i++) {
|
|
242
245
|
const sub = subSessions[i];
|
|
243
246
|
const subPeak = sub.peakContextUsage || 0;
|
|
244
247
|
const parts = [];
|
|
245
248
|
if (contextLimit && subPeak > 0) {
|
|
246
249
|
const pct = ((subPeak / contextLimit) * 100).toFixed(0);
|
|
247
|
-
parts.push(
|
|
250
|
+
parts.push(`${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
|
|
248
251
|
}
|
|
249
252
|
if (outputLimit) {
|
|
250
253
|
const outPct = ((sub.outputTokens / outputLimit) * 100).toFixed(0);
|
|
@@ -258,7 +261,7 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
|
258
261
|
const parts = [];
|
|
259
262
|
if (contextLimit) {
|
|
260
263
|
const pct = ((peakContext / contextLimit) * 100).toFixed(0);
|
|
261
|
-
parts.push(
|
|
264
|
+
parts.push(`${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
|
|
262
265
|
}
|
|
263
266
|
if (outputLimit) {
|
|
264
267
|
const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
|
|
@@ -361,6 +364,17 @@ const formatTokensCompact = tokens => {
|
|
|
361
364
|
return tokens.toLocaleString();
|
|
362
365
|
};
|
|
363
366
|
|
|
367
|
+
const formatInputContextPart = (inputTokens, contextLimit, format) => {
|
|
368
|
+
if (contextLimit && inputTokens > 0) {
|
|
369
|
+
const pct = ((inputTokens / contextLimit) * 100).toFixed(0);
|
|
370
|
+
return `${format(inputTokens)} / ${format(contextLimit)} (${pct}%) input tokens`;
|
|
371
|
+
}
|
|
372
|
+
if (inputTokens > 0) {
|
|
373
|
+
return `${format(inputTokens)} input tokens`;
|
|
374
|
+
}
|
|
375
|
+
return null;
|
|
376
|
+
};
|
|
377
|
+
|
|
364
378
|
/**
|
|
365
379
|
* Issue #1710: Build the cumulative input-tokens phrase for the Total / fallback
|
|
366
380
|
* lines, splitting cache writes and cache reads so neither category is ever
|
|
@@ -422,10 +436,10 @@ const formatSubSessionsList = (subSessions, contextLimit, outputLimit) => {
|
|
|
422
436
|
|
|
423
437
|
/**
|
|
424
438
|
* Issue #1600: Build a single-line context + output tokens string (unified format, no "Context window:" prefix).
|
|
425
|
-
* Issue #
|
|
426
|
-
* (
|
|
427
|
-
*
|
|
428
|
-
* @param {number} peakContext - Peak
|
|
439
|
+
* Issue #1737: The input figure is the peak restored-context input for the
|
|
440
|
+
* sub-session/request (`input + cache_creation + cache_read`), without the old
|
|
441
|
+
* "peak request:" label.
|
|
442
|
+
* @param {number} peakContext - Peak input usage (0 if unknown ā context display skipped)
|
|
429
443
|
* @param {number} contextLimit - Context window limit (null if unknown)
|
|
430
444
|
* @param {number} outputTokens - Output tokens used
|
|
431
445
|
* @param {number} outputLimit - Output token limit (null if unknown)
|
|
@@ -434,9 +448,9 @@ const formatSubSessionsList = (subSessions, contextLimit, outputLimit) => {
|
|
|
434
448
|
*/
|
|
435
449
|
const formatContextOutputLine = (peakContext, contextLimit, outputTokens, outputLimit, prefix = '- ') => {
|
|
436
450
|
const parts = [];
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
parts.push(
|
|
451
|
+
const inputPart = formatInputContextPart(peakContext, contextLimit, formatTokensCompact);
|
|
452
|
+
if (inputPart) {
|
|
453
|
+
parts.push(inputPart);
|
|
440
454
|
}
|
|
441
455
|
if (outputLimit) {
|
|
442
456
|
const outPct = ((outputTokens / outputLimit) * 100).toFixed(0);
|
|
@@ -547,16 +561,16 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
|
|
|
547
561
|
|
|
548
562
|
if (isMultiModel) {
|
|
549
563
|
// Issue #1590: Show sub-agent call count alongside model name
|
|
550
|
-
// Issue #
|
|
564
|
+
// Issue #1737: Use "sub-sessions" for compactification-bounded sections.
|
|
551
565
|
if (callCount > 1) {
|
|
552
566
|
stats += `\n\n**${modelName}:** (${callCount} sub-agent calls)`;
|
|
553
567
|
} else if (showSubSessions) {
|
|
554
|
-
stats += `\n\n**${modelName}:** (${subSessions.length}
|
|
568
|
+
stats += `\n\n**${modelName}:** (${subSessions.length} sub-sessions)`;
|
|
555
569
|
} else {
|
|
556
570
|
stats += `\n\n**${modelName}:**`;
|
|
557
571
|
}
|
|
558
572
|
} else if (showSubSessions) {
|
|
559
|
-
stats += `\n\n**${modelName}:** (${subSessions.length}
|
|
573
|
+
stats += `\n\n**${modelName}:** (${subSessions.length} sub-sessions)`;
|
|
560
574
|
}
|
|
561
575
|
|
|
562
576
|
const peakContext = usage.peakContextUsage || 0;
|
|
@@ -568,20 +582,25 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
|
|
|
568
582
|
stats += formatContextOutputLine(peakContext, contextLimit, usage.outputTokens, outputLimit, '- ');
|
|
569
583
|
} else if (outputLimit && callCount <= 1) {
|
|
570
584
|
// Issue #1600: Sub-agent single sessions previously showed only an output line.
|
|
571
|
-
// Issue #
|
|
585
|
+
// Issue #1737: Always surface total input information too ā sub-agent
|
|
572
586
|
// models (e.g. Haiku) never appear as the responding model in the parent JSONL,
|
|
573
587
|
// so peakContext stays at 0; without this fallback the rendered comment loses
|
|
574
|
-
// the sub-agent's input-token information entirely.
|
|
575
|
-
//
|
|
576
|
-
|
|
577
|
-
const
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
588
|
+
// the sub-agent's input-token information entirely. The detail line is
|
|
589
|
+
// deliberately simple; the Total line below keeps the cache split.
|
|
590
|
+
const parts = [];
|
|
591
|
+
const isResultSingleCall = usage._sourceResultJson || callCount > 0;
|
|
592
|
+
const inputPart = isResultSingleCall
|
|
593
|
+
? formatInputContextPart(getUsageInputTokens(usage), contextLimit, formatTokensCompact)
|
|
594
|
+
: buildCumulativeInputPhrase({
|
|
595
|
+
input: usage.inputTokens || 0,
|
|
596
|
+
cacheWrites: usage.cacheCreationTokens || 0,
|
|
597
|
+
cacheReads: usage.cacheReadTokens || 0,
|
|
598
|
+
format: formatTokensCompact,
|
|
599
|
+
});
|
|
600
|
+
if (inputPart) parts.push(inputPart);
|
|
583
601
|
const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
|
|
584
|
-
|
|
602
|
+
parts.push(`${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`);
|
|
603
|
+
stats += `\n- ${parts.join(', ')}`;
|
|
585
604
|
}
|
|
586
605
|
|
|
587
606
|
// Cumulative totals per model: input tokens + cached shown separately.
|
package/src/claude.lib.mjs
CHANGED
|
@@ -15,7 +15,7 @@ import { setupBidirectionalHandler, finalizeBidirectionalHandler, validateBidire
|
|
|
15
15
|
import { initProgressMonitoring } from './solve.progress-monitoring.lib.mjs';
|
|
16
16
|
import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
|
|
17
17
|
import Decimal from 'decimal.js-light';
|
|
18
|
-
import { displayBudgetStats, createEmptySubSessionUsage, accumulateModelUsage, displayModelUsage, displayCostComparison, mergeResultModelUsage, createSubAgentCallEntry, accumulateSubAgentUsage } from './claude.budget-stats.lib.mjs';
|
|
18
|
+
import { displayBudgetStats, createEmptySubSessionUsage, accumulateModelUsage, displayModelUsage, displayCostComparison, mergeResultModelUsage, createSubAgentCallEntry, accumulateSubAgentUsage, getRawRequestInputTokens } from './claude.budget-stats.lib.mjs';
|
|
19
19
|
import { buildClaudeResumeCommand } from './claude.command-builder.lib.mjs';
|
|
20
20
|
import { SESSION_FORCE_KILLED_MARKER, postTrackedComment } from './tool-comments.lib.mjs'; // Issue #1625
|
|
21
21
|
import { handleClaudeRuntimeSwitch } from './claude.runtime-switch.lib.mjs'; // see issue #1141
|
|
@@ -394,9 +394,10 @@ export const checkModelVisionCapability = async modelId => {
|
|
|
394
394
|
// this file under the 1500-line repo cap (see check-file-line-limits CI job).
|
|
395
395
|
import { calculateModelCost } from './claude.cost.lib.mjs';
|
|
396
396
|
export { calculateModelCost };
|
|
397
|
-
export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsage = null) => {
|
|
397
|
+
export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsage = null, options = {}) => {
|
|
398
398
|
const os = (await use('os')).default;
|
|
399
|
-
const homeDir = os.homedir();
|
|
399
|
+
const homeDir = options.homeDir || os.homedir();
|
|
400
|
+
const fetchModelInfoForUsage = options.fetchModelInfo || fetchModelInfo;
|
|
400
401
|
// Construct the path to the session JSONL file
|
|
401
402
|
// Format: ~/.claude/projects/<project-dir>/<session-id>.jsonl
|
|
402
403
|
// The project directory name is the full path with slashes replaced by dashes
|
|
@@ -454,15 +455,12 @@ export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsag
|
|
|
454
455
|
seenMessageIds.add(msgId);
|
|
455
456
|
}
|
|
456
457
|
accumulateModelUsage(modelUsage, entry);
|
|
457
|
-
// Issue #
|
|
458
|
-
//
|
|
459
|
-
//
|
|
460
|
-
//
|
|
461
|
-
// cumulative Total line as `(X + Y cached)`. This makes the
|
|
462
|
-
// peak-request value reconcilable with the cumulative non-cached
|
|
463
|
-
// input figure (instead of mixing semantics across the two lines).
|
|
458
|
+
// Issue #1737: Track peak restored-context input per request.
|
|
459
|
+
// Anthropic splits a request's input into input_tokens,
|
|
460
|
+
// cache_creation_input_tokens, and cache_read_input_tokens; all three
|
|
461
|
+
// count toward "how much context will be restored if I resume here".
|
|
464
462
|
const usage = entry.message.usage;
|
|
465
|
-
const requestContext = (usage
|
|
463
|
+
const requestContext = getRawRequestInputTokens(usage);
|
|
466
464
|
const model = entry.message.model;
|
|
467
465
|
if (requestContext > (peakContextByModel[model] || 0)) {
|
|
468
466
|
peakContextByModel[model] = requestContext;
|
|
@@ -500,7 +498,7 @@ export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsag
|
|
|
500
498
|
}
|
|
501
499
|
// Fetch model information for each model
|
|
502
500
|
const modelInfoPromises = Object.keys(modelUsage).map(async modelId => {
|
|
503
|
-
const modelInfo = await
|
|
501
|
+
const modelInfo = await fetchModelInfoForUsage(modelId);
|
|
504
502
|
return { modelId, modelInfo };
|
|
505
503
|
});
|
|
506
504
|
const modelInfoResults = await Promise.all(modelInfoPromises);
|
|
@@ -1295,9 +1293,7 @@ export const executeClaudeCommand = async params => {
|
|
|
1295
1293
|
await log(`\nā ļø JSONL deduplication: skipped ${tokenUsage.duplicateEntriesSkipped} duplicate entries (upstream: anthropics/claude-code#6805)`, { verbose: true });
|
|
1296
1294
|
}
|
|
1297
1295
|
if (tokenUsage.peakContextUsage > 0) {
|
|
1298
|
-
|
|
1299
|
-
// excluding cache_read). Cache reads are still visible separately on the Total line.
|
|
1300
|
-
await log(`š Peak single-request input (excl. cache reads): ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
|
|
1296
|
+
await log(`š Peak restored-context input: ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
|
|
1301
1297
|
}
|
|
1302
1298
|
await log('\nš° Token Usage Summary:');
|
|
1303
1299
|
// Display per-model breakdown
|