@link-assistant/hive-mind 1.38.0 → 1.38.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.38.2
4
+
5
+ ### Patch Changes
6
+
7
+ - 290139f: fix: correct cost and token/context budget calculations (#1501)
8
+ - Deduplicate JSONL session entries by message ID to fix inflated token counts caused by upstream anthropics/claude-code#6805
9
+ - Show peak context window usage (max single-request fill) instead of cumulative sum which produced nonsensical percentages like 7516%
10
+ - Add "Total tokens processed" as a separate cumulative metric for session throughput visibility
11
+ - Add verbose logging for JSONL deduplication stats and peak context values
12
+
13
+ ## 1.38.1
14
+
15
+ ### Patch Changes
16
+
17
+ - 1525ecb: fix: prevent 'Failed to send formatted message' Telegram error by adding safeReply helper and escaping unescaped Markdown in bot messages
18
+
3
19
  ## 1.38.0
4
20
 
5
21
  ### Minor Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.38.0",
3
+ "version": "1.38.2",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -14,6 +14,8 @@ export const createEmptySubSessionUsage = () => ({
14
14
  cacheReadTokens: 0,
15
15
  outputTokens: 0,
16
16
  messageCount: 0,
17
+ peakContextUsage: 0,
18
+ peakOutputUsage: 0,
17
19
  });
18
20
 
19
21
  /**
@@ -136,173 +138,161 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
136
138
  /**
137
139
  * Display token budget statistics (context window usage and ratios)
138
140
  * @param {Object} usage - Usage data for a model
141
+ * @param {Object} tokenUsage - Full token usage data (with subSessions)
139
142
  * @param {Function} log - Logging function
140
143
  */
141
- export const displayBudgetStats = async (usage, log) => {
144
+ export const displayBudgetStats = async (usage, tokenUsage, log) => {
142
145
  const modelInfo = usage.modelInfo;
143
146
  if (!modelInfo?.limit) {
144
147
  await log('\n ⚠️ Budget stats not available (no model limits found)');
145
148
  return;
146
149
  }
147
150
 
148
- await log('\n 📊 Token Budget Statistics:');
151
+ await log('\n 📊 Context and tokens usage:');
149
152
 
150
- // Context window usage
151
- if (modelInfo.limit.context) {
152
- const contextLimit = modelInfo.limit.context;
153
- // Input tokens include regular input + cache creation + cache read
154
- const totalInputUsed = usage.inputTokens + usage.cacheCreationTokens + usage.cacheReadTokens;
155
- const contextUsageRatio = totalInputUsed / contextLimit;
156
- const contextUsagePercent = (contextUsageRatio * 100).toFixed(2);
153
+ const contextLimit = modelInfo.limit.context;
154
+ const outputLimit = modelInfo.limit.output;
155
+ const subSessions = tokenUsage?.subSessions || [];
156
+ const hasMultipleSubSessions = subSessions.length > 1;
157
157
 
158
- await log(' Context window:');
159
- await log(` Used: ${formatNumber(totalInputUsed)} tokens`);
160
- await log(` Limit: ${formatNumber(contextLimit)} tokens`);
161
- await log(` Ratio: ${contextUsageRatio.toFixed(4)} (${contextUsagePercent}%)`);
162
- }
163
-
164
- // Output tokens usage
165
- if (modelInfo.limit.output) {
166
- const outputLimit = modelInfo.limit.output;
167
- const outputUsageRatio = usage.outputTokens / outputLimit;
168
- const outputUsagePercent = (outputUsageRatio * 100).toFixed(2);
169
-
170
- await log(' Output tokens:');
171
- await log(` Used: ${formatNumber(usage.outputTokens)} tokens`);
172
- await log(` Limit: ${formatNumber(outputLimit)} tokens`);
173
- await log(` Ratio: ${outputUsageRatio.toFixed(4)} (${outputUsagePercent}%)`);
174
- }
175
-
176
- // Total session tokens (input + cache_creation + output)
177
- const totalSessionTokens = usage.inputTokens + usage.cacheCreationTokens + usage.outputTokens;
178
- await log(` Total session tokens: ${formatNumber(totalSessionTokens)}`);
179
- };
180
-
181
- /**
182
- * Display sub-session breakdown when compactification events occurred (Issue #1491)
183
- * @param {Object} tokenUsage - Token usage data with subSessions and compactifications
184
- * @param {Object} modelInfo - Model info with context/output limits
185
- * @param {Function} log - Logging function
186
- */
187
- export const displaySubSessionStats = async (tokenUsage, modelInfo, log) => {
188
- if (!tokenUsage.subSessions || !tokenUsage.compactifications) return;
189
-
190
- const contextLimit = modelInfo?.limit?.context;
191
- await log(`\n 🔄 Compactification events: ${tokenUsage.compactifications.length}`);
192
-
193
- for (let i = 0; i < tokenUsage.subSessions.length; i++) {
194
- const sub = tokenUsage.subSessions[i];
195
- const totalInput = sub.inputTokens + sub.cacheCreationTokens + sub.cacheReadTokens;
196
- const label = i === 0 ? 'Initial session' : `After compactification #${i}`;
197
-
198
- await log(` Sub-session ${i + 1} (${label}):`);
199
- await log(` Messages: ${sub.messageCount}`);
200
- await log(` Context used: ${formatNumber(totalInput)} tokens`);
158
+ if (hasMultipleSubSessions) {
159
+ await log(' Sub sessions (between compact events):');
160
+ for (let i = 0; i < subSessions.length; i++) {
161
+ const sub = subSessions[i];
162
+ const subPeak = sub.peakContextUsage || 0;
163
+ let line = ` ${i + 1}. `;
164
+ if (contextLimit && subPeak > 0) {
165
+ const pct = ((subPeak / contextLimit) * 100).toFixed(0);
166
+ line += `${formatNumber(subPeak)} / ${formatNumber(contextLimit)} input tokens (${pct}%)`;
167
+ } else {
168
+ const subTotal = sub.inputTokens + sub.cacheCreationTokens + sub.cacheReadTokens;
169
+ line += `${formatNumber(subTotal)} input tokens`;
170
+ }
171
+ if (outputLimit) {
172
+ const outPct = ((sub.outputTokens / outputLimit) * 100).toFixed(0);
173
+ line += `; ${formatNumber(sub.outputTokens)} / ${formatNumber(outputLimit)} output tokens (${outPct}%)`;
174
+ } else {
175
+ line += `; ${formatNumber(sub.outputTokens)} output tokens`;
176
+ }
177
+ await log(line);
178
+ }
179
+ } else {
180
+ // Single sub-session: simplified format
181
+ const peakContext = usage.peakContextUsage || 0;
201
182
  if (contextLimit) {
202
- const pct = ((totalInput / contextLimit) * 100).toFixed(2);
203
- await log(` Context usage: ${pct}% of ${formatNumber(contextLimit)}`);
183
+ if (peakContext > 0) {
184
+ const pct = ((peakContext / contextLimit) * 100).toFixed(0);
185
+ await log(` Max context window: ${formatNumber(peakContext)} / ${formatNumber(contextLimit)} input tokens (${pct}%)`);
186
+ }
187
+ }
188
+ if (outputLimit) {
189
+ const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
190
+ await log(` Max output tokens: ${formatNumber(usage.outputTokens)} / ${formatNumber(outputLimit)} output tokens (${outPct}%)`);
204
191
  }
205
- await log(` Output: ${formatNumber(sub.outputTokens)} tokens`);
206
192
  }
207
193
 
208
- // Show compactification details
209
- for (let i = 0; i < tokenUsage.compactifications.length; i++) {
210
- const comp = tokenUsage.compactifications[i];
211
- let detail = ` Compactification #${i + 1}: trigger=${comp.trigger}`;
212
- if (comp.preTokens) detail += `, pre-compaction tokens=${formatNumber(comp.preTokens)}`;
213
- await log(detail);
214
- }
194
+ // Cumulative totals
195
+ const totalInputNonCached = usage.inputTokens + usage.cacheCreationTokens;
196
+ const cachedTokens = usage.cacheReadTokens;
197
+ let totalLine = ` Total input tokens: ${formatNumber(totalInputNonCached)}`;
198
+ if (cachedTokens > 0) totalLine += ` + ${formatNumber(cachedTokens)} cached`;
199
+ await log(totalLine);
200
+ await log(` Total output tokens: ${formatNumber(usage.outputTokens)}`);
215
201
  };
216
202
 
217
203
  /**
218
- * Display stream vs JSONL token comparison (Issue #1491)
219
- * Shows independent calculation from stream events vs JSONL session file
220
- * @param {Object} streamTokenUsage - Token usage accumulated from stream JSON events
221
- * @param {Object} jsonlTokenUsage - Token usage calculated from JSONL session file
222
- * @param {Function} log - Logging function
204
+ * Format a token count with K/M suffix for compact display
205
+ * @param {number} tokens - Token count
206
+ * @returns {string} Formatted string like "850K" or "1.5M"
223
207
  */
224
- export const displayTokenComparison = async (streamTokenUsage, jsonlTokenUsage, log) => {
225
- if (!streamTokenUsage || !jsonlTokenUsage) return;
226
-
227
- const streamTotal = streamTokenUsage.inputTokens + streamTokenUsage.cacheCreationTokens + streamTokenUsage.outputTokens;
228
- const jsonlTotal = jsonlTokenUsage.inputTokens + jsonlTokenUsage.cacheCreationTokens + jsonlTokenUsage.outputTokens;
229
-
230
- await log('\n 🔍 Token calculation comparison:');
231
- await log(` Stream JSON events: ${formatNumber(streamTotal)} tokens (${streamTokenUsage.eventCount} events)`);
232
- await log(` JSONL session file: ${formatNumber(jsonlTotal)} tokens`);
233
-
234
- if (streamTotal !== jsonlTotal) {
235
- const diff = jsonlTotal - streamTotal;
236
- const pct = streamTotal > 0 ? ((diff / streamTotal) * 100).toFixed(2) : 'N/A';
237
- await log(` Difference: ${formatNumber(Math.abs(diff))} tokens (${diff > 0 ? '+' : ''}${pct}%)`);
238
- } else {
239
- await log(' Match: calculations are consistent');
240
- }
208
+ const formatTokensCompact = tokens => {
209
+ if (tokens >= 1000000) return `${(tokens / 1000000).toFixed(tokens % 1000000 === 0 ? 0 : 1)}M`;
210
+ if (tokens >= 1000) return `${(tokens / 1000).toFixed(tokens % 1000 === 0 ? 0 : 1)}K`;
211
+ return tokens.toLocaleString();
241
212
  };
242
213
 
243
214
  /**
244
- * Build budget stats string for GitHub PR comments (Issue #1491)
245
- * Similar to buildCostInfoString but for token budget statistics
215
+ * Build budget stats string for GitHub PR comments (Issue #1491, #1501)
216
+ * Format requested by user: sub-sessions between compactification events,
217
+ * per-model breakdown, cumulative totals with cached tokens shown separately.
246
218
  * @param {Object} tokenUsage - Token usage data from calculateSessionTokens
247
- * @param {Object|null} streamTokenUsage - Token usage from stream JSON events
219
+ * @param {Object|null} streamTokenUsage - Token usage from stream JSON events (used for comparison, not displayed)
248
220
  * @returns {string} Formatted markdown string for PR comment
249
221
  */
250
- export const buildBudgetStatsString = (tokenUsage, streamTokenUsage) => {
222
+ export const buildBudgetStatsString = tokenUsage => {
251
223
  if (!tokenUsage) return '';
252
224
 
253
- let stats = '\n\n### 📊 **Token budget statistics:**';
225
+ let stats = '\n\n### 📊 **Context and tokens usage:**';
254
226
 
255
227
  // Per-model breakdown
256
228
  if (tokenUsage.modelUsage) {
257
229
  const modelIds = Object.keys(tokenUsage.modelUsage);
230
+ const isMultiModel = modelIds.length > 1;
231
+
258
232
  for (const modelId of modelIds) {
259
233
  const usage = tokenUsage.modelUsage[modelId];
260
234
  const modelName = usage.modelName || modelId;
261
235
  const contextLimit = usage.modelInfo?.limit?.context;
262
236
  const outputLimit = usage.modelInfo?.limit?.output;
263
- const totalInput = usage.inputTokens + usage.cacheCreationTokens + usage.cacheReadTokens;
264
237
 
265
- if (modelIds.length > 1) stats += `\n- **${modelName}**:`;
238
+ if (isMultiModel) stats += `\n\n**${modelName}:**`;
266
239
 
267
- if (contextLimit) {
268
- const contextPct = ((totalInput / contextLimit) * 100).toFixed(2);
269
- stats += `\n- Context window: ${totalInput.toLocaleString()} / ${contextLimit.toLocaleString()} tokens (${contextPct}%)`;
270
- } else {
271
- stats += `\n- Context tokens used: ${totalInput.toLocaleString()}`;
272
- }
240
+ // Sub-session display (Issue #1501: show per sub-session stats)
241
+ const subSessions = tokenUsage.subSessions || [];
242
+ const hasMultipleSubSessions = subSessions.length > 1;
273
243
 
274
- if (outputLimit) {
275
- const outputPct = ((usage.outputTokens / outputLimit) * 100).toFixed(2);
276
- stats += `\n- Output tokens: ${usage.outputTokens.toLocaleString()} / ${outputLimit.toLocaleString()} tokens (${outputPct}%)`;
244
+ if (hasMultipleSubSessions) {
245
+ // Multiple sub-sessions: show numbered list
246
+ stats += '\n\nSub sessions (between compact events):';
247
+ for (let i = 0; i < subSessions.length; i++) {
248
+ const sub = subSessions[i];
249
+ const subPeakContext = sub.peakContextUsage || 0;
250
+ const subTotalInput = sub.inputTokens + sub.cacheCreationTokens + sub.cacheReadTokens;
251
+ let line = `\n${i + 1}. `;
252
+ if (contextLimit && subPeakContext > 0) {
253
+ const pct = ((subPeakContext / contextLimit) * 100).toFixed(0);
254
+ line += `${formatTokensCompact(subPeakContext)} / ${formatTokensCompact(contextLimit)} input tokens (${pct}%)`;
255
+ } else {
256
+ line += `${formatTokensCompact(subTotalInput)} input tokens`;
257
+ }
258
+ if (outputLimit) {
259
+ const outPct = ((sub.outputTokens / outputLimit) * 100).toFixed(0);
260
+ line += `; ${formatTokensCompact(sub.outputTokens)} / ${formatTokensCompact(outputLimit)} output tokens (${outPct}%)`;
261
+ } else {
262
+ line += `; ${formatTokensCompact(sub.outputTokens)} output tokens`;
263
+ }
264
+ stats += line;
265
+ }
277
266
  } else {
278
- stats += `\n- Output tokens: ${usage.outputTokens.toLocaleString()}`;
267
+ // Single sub-session (or no sub-sessions): simplified format
268
+ const peakContext = usage.peakContextUsage || 0;
269
+ if (contextLimit) {
270
+ if (peakContext > 0) {
271
+ const pct = ((peakContext / contextLimit) * 100).toFixed(0);
272
+ stats += `\n- Max context window: ${formatTokensCompact(peakContext)} / ${formatTokensCompact(contextLimit)} input tokens (${pct}%)`;
273
+ } else {
274
+ const totalInput = usage.inputTokens + usage.cacheCreationTokens + usage.cacheReadTokens;
275
+ const pct = ((totalInput / contextLimit) * 100).toFixed(0);
276
+ stats += `\n- Context window: ${formatTokensCompact(totalInput)} / ${formatTokensCompact(contextLimit)} tokens (${pct}%)`;
277
+ }
278
+ }
279
+ if (outputLimit) {
280
+ const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
281
+ stats += `\n- Max output tokens: ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} output tokens (${outPct}%)`;
282
+ }
279
283
  }
280
- }
281
- }
282
284
 
283
- // Sub-session breakdown if compactification occurred
284
- if (tokenUsage.subSessions && tokenUsage.compactifications) {
285
- stats += `\n- Compactifications: ${tokenUsage.compactifications.length}`;
286
- for (let i = 0; i < tokenUsage.subSessions.length; i++) {
287
- const sub = tokenUsage.subSessions[i];
288
- const totalInput = sub.inputTokens + sub.cacheCreationTokens + sub.cacheReadTokens;
289
- const label = i === 0 ? 'initial' : `after compactification #${i}`;
290
- stats += `\n - Sub-session ${i + 1} (${label}): ${totalInput.toLocaleString()} context, ${sub.outputTokens.toLocaleString()} output, ${sub.messageCount} messages`;
285
+ // Cumulative totals: input tokens + cached shown separately
286
+ const totalInputNonCached = usage.inputTokens + usage.cacheCreationTokens;
287
+ const cachedTokens = usage.cacheReadTokens;
288
+ stats += `\n\nTotal input tokens: ${formatTokensCompact(totalInputNonCached)}`;
289
+ if (cachedTokens > 0) stats += ` + ${formatTokensCompact(cachedTokens)} cached`;
290
+ stats += `\nTotal output tokens: ${formatTokensCompact(usage.outputTokens)} output`;
291
291
  }
292
292
  }
293
293
 
294
- // Stream vs JSONL comparison
295
- if (streamTokenUsage) {
296
- const streamTotal = streamTokenUsage.inputTokens + streamTokenUsage.cacheCreationTokens + streamTokenUsage.outputTokens;
297
- const jsonlTotal = tokenUsage.inputTokens + tokenUsage.cacheCreationTokens + tokenUsage.outputTokens;
298
- stats += `\n- Own calculation (stream): ${streamTotal.toLocaleString()} tokens (${streamTokenUsage.eventCount} events)`;
299
- stats += `\n- JSONL calculation: ${jsonlTotal.toLocaleString()} tokens`;
300
- if (streamTotal !== jsonlTotal) {
301
- const diff = jsonlTotal - streamTotal;
302
- const pct = streamTotal > 0 ? ((diff / streamTotal) * 100).toFixed(2) : 'N/A';
303
- stats += ` (diff: ${diff > 0 ? '+' : ''}${pct}%)`;
304
- }
305
- }
294
+ // Stream vs JSONL comparison — kept for internal diagnostics only in verbose/debug mode
295
+ // Not shown to users per feedback (Issue #1501 PR comment)
306
296
 
307
297
  return stats;
308
298
  };
@@ -12,7 +12,7 @@ import { timeouts, retryLimits, claudeCode, getClaudeEnv, getThinkingLevelToToke
12
12
  import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
13
13
  import { createInteractiveHandler } from './interactive-mode.lib.mjs';
14
14
  import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
15
- import { displayBudgetStats, displaySubSessionStats, displayTokenComparison, createEmptySubSessionUsage, accumulateModelUsage, displayModelUsage, displayCostComparison } from './claude.budget-stats.lib.mjs';
15
+ import { displayBudgetStats, createEmptySubSessionUsage, accumulateModelUsage, displayModelUsage, displayCostComparison } from './claude.budget-stats.lib.mjs';
16
16
  import { buildClaudeResumeCommand } from './claude.command-builder.lib.mjs';
17
17
  import { handleClaudeRuntimeSwitch } from './claude.runtime-switch.lib.mjs'; // see issue #1141
18
18
  import { CLAUDE_MODELS as availableModels } from './models/index.mjs'; // Issue #1221
@@ -497,6 +497,15 @@ export const calculateSessionTokens = async (sessionId, tempDir) => {
497
497
  }
498
498
  // Initialize per-model usage tracking
499
499
  const modelUsage = {};
500
+ // Issue #1501: Deduplicate JSONL entries by message ID (upstream: anthropics/claude-code#6805)
501
+ // Claude Code's stream-json mode splits single API responses with multiple content blocks
502
+ // into separate JSONL entries, each with the same message ID and identical usage stats.
503
+ const seenMessageIds = new Set();
504
+ let duplicateCount = 0;
505
+ // Issue #1501: Track peak context usage per request (not cumulative)
506
+ // The context window limit is per-request, so we track the max single-request fill.
507
+ const peakContextByModel = {};
508
+ let globalPeakContext = 0;
500
509
  // Issue #1491: Track sub-sessions between compactification events
501
510
  const subSessions = [];
502
511
  let currentSubSession = createEmptySubSessionUsage();
@@ -524,14 +533,39 @@ export const calculateSessionTokens = async (sessionId, tempDir) => {
524
533
  continue;
525
534
  }
526
535
  if (entry.message && entry.message.usage && entry.message.model) {
536
+ // Issue #1501: Skip duplicate JSONL entries (same message ID = same API response)
537
+ const msgId = entry.message.id;
538
+ if (msgId) {
539
+ if (seenMessageIds.has(msgId)) {
540
+ duplicateCount++;
541
+ continue; // Skip — already counted this message's usage
542
+ }
543
+ seenMessageIds.add(msgId);
544
+ }
527
545
  accumulateModelUsage(modelUsage, entry);
528
- // Issue #1491: Also track per-sub-session usage
546
+ // Issue #1501: Track peak context usage per single API request
529
547
  const usage = entry.message.usage;
548
+ const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0) + (usage.cache_read_input_tokens || 0);
549
+ const model = entry.message.model;
550
+ if (requestContext > (peakContextByModel[model] || 0)) {
551
+ peakContextByModel[model] = requestContext;
552
+ }
553
+ if (requestContext > globalPeakContext) {
554
+ globalPeakContext = requestContext;
555
+ }
556
+ // Issue #1491: Also track per-sub-session usage
530
557
  if (usage.input_tokens) currentSubSession.inputTokens += usage.input_tokens;
531
558
  if (usage.cache_creation_input_tokens) currentSubSession.cacheCreationTokens += usage.cache_creation_input_tokens;
532
559
  if (usage.cache_read_input_tokens) currentSubSession.cacheReadTokens += usage.cache_read_input_tokens;
533
560
  if (usage.output_tokens) currentSubSession.outputTokens += usage.output_tokens;
534
561
  currentSubSession.messageCount++;
562
+ // Issue #1501: Track peak context and output per sub-session
563
+ if (requestContext > currentSubSession.peakContextUsage) {
564
+ currentSubSession.peakContextUsage = requestContext;
565
+ }
566
+ if ((usage.output_tokens || 0) > currentSubSession.peakOutputUsage) {
567
+ currentSubSession.peakOutputUsage = usage.output_tokens || 0;
568
+ }
535
569
  }
536
570
  } catch {
537
571
  // Skip lines that aren't valid JSON
@@ -561,6 +595,8 @@ export const calculateSessionTokens = async (sessionId, tempDir) => {
561
595
  // Calculate cost for each model and store all characteristics
562
596
  for (const [modelId, usage] of Object.entries(modelUsage)) {
563
597
  const modelInfo = modelInfoMap[modelId];
598
+ // Issue #1501: Attach peak context usage per model
599
+ usage.peakContextUsage = peakContextByModel[modelId] || 0;
564
600
  // Calculate cost using pricing API
565
601
  if (modelInfo) {
566
602
  const costData = calculateModelCost(usage, modelInfo, true);
@@ -604,8 +640,11 @@ export const calculateSessionTokens = async (sessionId, tempDir) => {
604
640
  outputTokens: totalOutputTokens,
605
641
  totalTokens,
606
642
  totalCostUSD: hasCostData ? totalCostUSD : null,
607
- // Issue #1491: Sub-session and compactification data
608
- subSessions: subSessions.length > 1 ? subSessions : null, // Only include if compactification occurred
643
+ // Issue #1501: Peak context usage (max single-request fill) and dedup stats
644
+ peakContextUsage: globalPeakContext,
645
+ duplicateEntriesSkipped: duplicateCount,
646
+ // Issue #1491/#1501: Sub-session and compactification data (always include for display)
647
+ subSessions,
609
648
  compactifications: compactifications.length > 0 ? compactifications : null,
610
649
  };
611
650
  } catch (readError) {
@@ -1248,6 +1287,13 @@ export const executeClaudeCommand = async params => {
1248
1287
  try {
1249
1288
  const tokenUsage = await calculateSessionTokens(sessionId, tempDir);
1250
1289
  if (tokenUsage) {
1290
+ // Issue #1501: Log deduplication stats in verbose mode
1291
+ if (tokenUsage.duplicateEntriesSkipped > 0) {
1292
+ await log(`\n⚠️ JSONL deduplication: skipped ${tokenUsage.duplicateEntriesSkipped} duplicate entries (upstream: anthropics/claude-code#6805)`, { verbose: true });
1293
+ }
1294
+ if (tokenUsage.peakContextUsage > 0) {
1295
+ await log(`📊 Peak single-request context: ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
1296
+ }
1251
1297
  await log('\n💰 Token Usage Summary:');
1252
1298
  // Display per-model breakdown
1253
1299
  if (tokenUsage.modelUsage) {
@@ -1258,18 +1304,9 @@ export const executeClaudeCommand = async params => {
1258
1304
  await displayModelUsage(usage, log);
1259
1305
  // Display budget stats if flag is enabled
1260
1306
  if (argv.tokensBudgetStats && usage.modelInfo?.limit) {
1261
- await displayBudgetStats(usage, log);
1307
+ await displayBudgetStats(usage, tokenUsage, log);
1262
1308
  }
1263
1309
  }
1264
- // Issue #1491: Display sub-session breakdown if compactification occurred
1265
- if (argv.tokensBudgetStats && tokenUsage.subSessions) {
1266
- const primaryModelInfo = Object.values(tokenUsage.modelUsage).find(u => u.modelInfo?.limit)?.modelInfo;
1267
- await displaySubSessionStats(tokenUsage, primaryModelInfo, log);
1268
- }
1269
- // Issue #1491: Display stream vs JSONL token comparison
1270
- if (argv.tokensBudgetStats && streamTokenUsage.eventCount > 0) {
1271
- await displayTokenComparison(streamTokenUsage, tokenUsage, log);
1272
- }
1273
1310
  // Show totals if multiple models were used
1274
1311
  if (modelIds.length > 1) {
1275
1312
  await log('\n 📈 Total across all models:');
@@ -368,7 +368,7 @@ export async function attachLogToGitHub(options) {
368
368
  resultModelUsage = null, // Issue #1454
369
369
  budgetStatsData = null, // Issue #1491: budget stats for comment
370
370
  } = options;
371
- const budgetStats = budgetStatsData ? buildBudgetStatsString(budgetStatsData.tokenUsage, budgetStatsData.streamTokenUsage) : '';
371
+ const budgetStats = budgetStatsData ? buildBudgetStatsString(budgetStatsData.tokenUsage) : '';
372
372
  const targetName = targetType === 'pr' ? 'Pull Request' : 'Issue';
373
373
  const ghCommand = targetType === 'pr' ? 'pr' : 'issue';
374
374
  try {
@@ -558,25 +558,26 @@ function validateGitHubUrl(args, options = {}) {
558
558
  return { valid: true, parsed, normalizedUrl: url };
559
559
  }
560
560
 
561
- /**
562
- * Escape special characters for Telegram's legacy Markdown parser.
563
- * In Telegram's Markdown, these characters need escaping: _ * [ ] ( ) ~ ` > # + - = | { } . !
564
- * However, for plain text (not inside markup), we primarily need to escape _ and *
565
- * to prevent them from being interpreted as formatting.
566
- *
567
- * @param {string} text - Text to escape
568
- * @returns {string} Escaped text safe for Markdown parse_mode
569
- */
570
- /**
571
- * Execute a start-screen command and update the initial message with the result.
572
- * Used by both /solve and /hive commands to reduce code duplication.
573
- *
574
- * @param {Object} ctx - Telegram context
575
- * @param {Object} startingMessage - The initial message to update
576
- * @param {string} commandName - Command name (e.g., 'solve' or 'hive')
577
- * @param {string[]} args - Command arguments
578
- * @param {string} infoBlock - Info block with request details
579
- */
561
+ // Issue #1460/#1497: safeReply - try Markdown first, fall back to plain text on parsing errors
562
+ async function safeReply(ctx, text, options = {}) {
563
+ try {
564
+ return await ctx.reply(text, { parse_mode: 'Markdown', ...options });
565
+ } catch (error) {
566
+ const isParsingError = error.message && (error.message.includes("can't parse entities") || error.message.includes("Can't parse entities") || error.message.includes("can't find end of") || (error.message.includes('Bad Request') && error.message.includes('400')));
567
+ if (!isParsingError) throw error;
568
+ console.error(`[telegram-bot] safeReply: Markdown parsing failed: ${error.message}`);
569
+ console.error(`[telegram-bot] safeReply: Failing message (${Buffer.byteLength(text, 'utf-8')} bytes): ${text}`);
570
+ const plainText = text
571
+ .replace(/\[([^\]]+)\]\(([^)]+)\)/g, '$1 ($2)')
572
+ .replace(/\\_/g, '_')
573
+ .replace(/\\\*/g, '*')
574
+ .replace(/\*([^*]+)\*/g, '$1')
575
+ .replace(/`([^`]+)`/g, '$1');
576
+ return await ctx.reply(plainText, { ...options, parse_mode: undefined });
577
+ }
578
+ }
579
+
580
+ // Execute a start-screen command and update the initial message with the result
580
581
  async function executeAndUpdateMessage(ctx, startingMessage, commandName, args, infoBlock) {
581
582
  const result = await executeStartScreen(commandName, args);
582
583
  const { chat, message_id } = startingMessage;
@@ -914,8 +915,7 @@ async function handleSolveCommand(ctx) {
914
915
  if (VERBOSE) {
915
916
  console.log('[VERBOSE] Multiple GitHub URLs found in replied message');
916
917
  }
917
- await ctx.reply(`❌ ${extraction.error}`, {
918
- parse_mode: 'Markdown',
918
+ await safeReply(ctx, `❌ ${escapeMarkdown(extraction.error)}`, {
919
919
  reply_to_message_id: ctx.message.message_id,
920
920
  });
921
921
  return;
@@ -931,7 +931,7 @@ async function handleSolveCommand(ctx) {
931
931
  if (VERBOSE) {
932
932
  console.log('[VERBOSE] No GitHub URL found in replied message');
933
933
  }
934
- await ctx.reply('❌ No GitHub issue/PR link found in the replied message.\n\nExample: Reply to a message containing a GitHub issue link with `/solve`\n\nOr with options: `/solve --model opus`', { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
934
+ await safeReply(ctx, '❌ No GitHub issue/PR link found in the replied message.\n\nExample: Reply to a message containing a GitHub issue link with `/solve`\n\nOr with options: `/solve --model opus`', { reply_to_message_id: ctx.message.message_id });
935
935
  return;
936
936
  }
937
937
  }
@@ -943,7 +943,7 @@ async function handleSolveCommand(ctx) {
943
943
  errorMsg += `\n\n💡 Did you mean: \`${validation.suggestion}\``;
944
944
  }
945
945
  errorMsg += '\n\nExample: `/solve https://github.com/owner/repo/issues/123`\n\nOr reply to a message containing a GitHub link with `/solve`';
946
- await ctx.reply(errorMsg, { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
946
+ await safeReply(ctx, errorMsg, { reply_to_message_id: ctx.message.message_id });
947
947
  return;
948
948
  }
949
949
 
@@ -963,19 +963,19 @@ async function handleSolveCommand(ctx) {
963
963
  // Validate model name with helpful error message (before yargs validation)
964
964
  const modelError = validateModelInArgs(args, solveTool);
965
965
  if (modelError) {
966
- await ctx.reply(`❌ ${modelError}`, { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
966
+ await safeReply(ctx, `❌ ${escapeMarkdown(modelError)}`, { reply_to_message_id: ctx.message.message_id });
967
967
  return;
968
968
  }
969
969
  // Issue #1482: Validate --base-branch early to reject URLs and invalid branch names
970
970
  const branchError = validateBranchInArgs(args);
971
971
  if (branchError) {
972
- await ctx.reply(`❌ ${branchError}`, { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
972
+ await safeReply(ctx, `❌ ${escapeMarkdown(branchError)}`, { reply_to_message_id: ctx.message.message_id });
973
973
  return;
974
974
  }
975
975
  // Issue #1092: Detect malformed flag patterns like "-- model" (space after --)
976
976
  const { malformed, errors: malformedErrors } = detectMalformedFlags(args);
977
977
  if (malformed.length > 0) {
978
- await ctx.reply(`❌ ${malformedErrors.join('\n')}\n\nPlease check your option syntax.`, { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
978
+ await safeReply(ctx, `❌ ${escapeMarkdown(malformedErrors.join('\n'))}\n\nPlease check your option syntax.`, { reply_to_message_id: ctx.message.message_id });
979
979
  return;
980
980
  }
981
981
  // Validate merged arguments using solve's yargs config
@@ -994,8 +994,7 @@ async function handleSolveCommand(ctx) {
994
994
 
995
995
  testYargs.parse(args);
996
996
  } catch (error) {
997
- await ctx.reply(`❌ Invalid options: ${error.message || String(error)}\n\nUse /help to see available options`, {
998
- parse_mode: 'Markdown',
997
+ await safeReply(ctx, `❌ Invalid options: ${escapeMarkdown(error.message || String(error))}\n\nUse /help to see available options`, {
999
998
  reply_to_message_id: ctx.message.message_id,
1000
999
  });
1001
1000
  return;
@@ -1019,7 +1018,7 @@ async function handleSolveCommand(ctx) {
1019
1018
  const existingItem = solveQueue.findByUrl(normalizedUrl);
1020
1019
  if (existingItem) {
1021
1020
  const statusText = existingItem.status === 'starting' || existingItem.status === 'started' ? 'being processed' : 'already in the queue';
1022
- await ctx.reply(`❌ This URL is ${statusText}.\n\nURL: ${escapeMarkdown(normalizedUrl)}\nStatus: ${existingItem.status}\n\n💡 Use /solve_queue to check the queue status.`, { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
1021
+ await safeReply(ctx, `❌ This URL is ${statusText}.\n\nURL: ${escapeMarkdown(normalizedUrl)}\nStatus: ${existingItem.status}\n\n💡 Use /solve\\_queue to check the queue status.`, { reply_to_message_id: ctx.message.message_id });
1023
1022
  return;
1024
1023
  }
1025
1024
 
@@ -1031,18 +1030,18 @@ async function handleSolveCommand(ctx) {
1031
1030
  // their command cannot be processed (e.g., disk full, server maintenance pending).
1032
1031
  // See: https://github.com/link-assistant/hive-mind/issues/1267
1033
1032
  if (check.rejected) {
1034
- await ctx.reply(`❌ Solve command rejected.\n\n${infoBlock}\n\n🚫 Reason: ${check.rejectReason}`, { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
1033
+ await safeReply(ctx, `❌ Solve command rejected.\n\n${infoBlock}\n\n🚫 Reason: ${escapeMarkdown(check.rejectReason || 'Unknown')}`, { reply_to_message_id: ctx.message.message_id });
1035
1034
  return;
1036
1035
  }
1037
1036
 
1038
1037
  if (check.canStart && queueStats.queued === 0) {
1039
- const startingMessage = await ctx.reply(`🚀 Starting solve command...\n\n${infoBlock}`, { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
1038
+ const startingMessage = await safeReply(ctx, `🚀 Starting solve command...\n\n${infoBlock}`, { reply_to_message_id: ctx.message.message_id });
1040
1039
  await executeAndUpdateMessage(ctx, startingMessage, 'solve', args, infoBlock);
1041
1040
  } else {
1042
1041
  const queueItem = solveQueue.enqueue({ url: normalizedUrl, args, ctx, requester, infoBlock, tool: solveTool });
1043
1042
  let queueMessage = `📋 Solve command queued (position #${queueStats.queued + 1})\n\n${infoBlock}`;
1044
- if (check.reason) queueMessage += `\n\n⏳ Waiting: ${check.reason}`;
1045
- const queuedMessage = await ctx.reply(queueMessage, { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
1043
+ if (check.reason) queueMessage += `\n\n⏳ Waiting: ${escapeMarkdown(check.reason)}`;
1044
+ const queuedMessage = await safeReply(ctx, queueMessage, { reply_to_message_id: ctx.message.message_id });
1046
1045
  queueItem.messageInfo = { chatId: queuedMessage.chat.id, messageId: queuedMessage.message_id };
1047
1046
  if (!solveQueue.executeCallback) solveQueue.executeCallback = createQueueExecuteCallback(executeStartScreen);
1048
1047
  }
@@ -1122,7 +1121,7 @@ async function handleHiveCommand(ctx) {
1122
1121
  let errorMsg = `❌ ${validation.error}`;
1123
1122
  if (validation.suggestion) errorMsg += `\n\n💡 Did you mean: \`${escapeMarkdown(validation.suggestion)}\``;
1124
1123
  errorMsg += '\n\nExample: `/hive https://github.com/owner/repo`';
1125
- await ctx.reply(errorMsg, { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
1124
+ await safeReply(ctx, errorMsg, { reply_to_message_id: ctx.message.message_id });
1126
1125
  return;
1127
1126
  }
1128
1127
  // Normalize issues_list/pulls_list to base repo URL, or use cleaned URL
@@ -1149,13 +1148,13 @@ async function handleHiveCommand(ctx) {
1149
1148
  // Validate model name with helpful error message (before yargs validation)
1150
1149
  const hiveModelError = validateModelInArgs(args, hiveTool);
1151
1150
  if (hiveModelError) {
1152
- await ctx.reply(`❌ ${hiveModelError}`, { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
1151
+ await safeReply(ctx, `❌ ${escapeMarkdown(hiveModelError)}`, { reply_to_message_id: ctx.message.message_id });
1153
1152
  return;
1154
1153
  }
1155
1154
  // Issue #1482: Validate branch flags early to reject URLs and invalid branch names
1156
1155
  const hiveBranchError = validateBranchInArgs(args);
1157
1156
  if (hiveBranchError) {
1158
- await ctx.reply(`❌ ${hiveBranchError}`, { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
1157
+ await safeReply(ctx, `❌ ${escapeMarkdown(hiveBranchError)}`, { reply_to_message_id: ctx.message.message_id });
1159
1158
  return;
1160
1159
  }
1161
1160
 
@@ -1175,8 +1174,7 @@ async function handleHiveCommand(ctx) {
1175
1174
 
1176
1175
  testYargs.parse(args);
1177
1176
  } catch (error) {
1178
- await ctx.reply(`❌ Invalid options: ${error.message || String(error)}\n\nUse /help to see available options`, {
1179
- parse_mode: 'Markdown',
1177
+ await safeReply(ctx, `❌ Invalid options: ${escapeMarkdown(error.message || String(error))}\n\nUse /help to see available options`, {
1180
1178
  reply_to_message_id: ctx.message.message_id,
1181
1179
  });
1182
1180
  return;
@@ -1193,7 +1191,7 @@ async function handleHiveCommand(ctx) {
1193
1191
  infoBlock += `${userOptionsRaw ? '\n' : '\n\n'}🔒 Locked options: ${escapeMarkdown(hiveOverrides.join(' '))}`;
1194
1192
  }
1195
1193
 
1196
- const startingMessage = await ctx.reply(`🚀 Starting hive command...\n\n${infoBlock}`, { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
1194
+ const startingMessage = await safeReply(ctx, `🚀 Starting hive command...\n\n${infoBlock}`, { reply_to_message_id: ctx.message.message_id });
1197
1195
  await executeAndUpdateMessage(ctx, startingMessage, 'hive', args, infoBlock);
1198
1196
  }
1199
1197