@link-assistant/hive-mind 1.58.0 → 1.59.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47,6 +47,11 @@ export const accumulateModelUsage = (modelUsageMap, entry) => {
47
47
  }
48
48
  if (usage.cache_read_input_tokens) modelUsageMap[model].cacheReadTokens += usage.cache_read_input_tokens;
49
49
  if (usage.output_tokens) modelUsageMap[model].outputTokens += usage.output_tokens;
50
+ // Issue #1710: track Anthropic server-tool usage from per-request JSONL entries
51
+ // so the public-pricing estimate can bill them at the documented per-request rate.
52
+ if (usage.server_tool_use?.web_search_requests) {
53
+ modelUsageMap[model].webSearchRequests += usage.server_tool_use.web_search_requests;
54
+ }
50
55
  };
51
56
 
52
57
  /**
@@ -109,6 +114,11 @@ export const displayModelUsage = async (usage, log) => {
109
114
  await log(` ${label}: ${formatNumber(breakdown[key].tokens)} tokens × $${breakdown[key].costPerMillion}/M = $${new Decimal(breakdown[key].cost).toFixed(6)}`);
110
115
  }
111
116
  }
117
+ // Issue #1710: itemise server-tool charges so the residual that puzzled
118
+ // readers in PR #1707 ($0.04 web_search) is visible in the breakdown.
119
+ if (breakdown.webSearch && breakdown.webSearch.requests > 0) {
120
+ await log(` Web search: ${breakdown.webSearch.requests} requests × $${breakdown.webSearch.costPerRequest}/req = $${new Decimal(breakdown.webSearch.cost).toFixed(6)}`);
121
+ }
112
122
  await log(' ─────────────────────────────────');
113
123
  await log(` Total: $${new Decimal(usage.costUSD).toFixed(6)}`);
114
124
  } else if (usage.modelInfo === null) {
@@ -147,6 +157,52 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
147
157
  }
148
158
  };
149
159
 
160
+ /**
161
+ * Issue #1710: Emit a verbose, machine-friendly trace of every input that
162
+ * feeds the budget-stats renderer for a single model. Hidden behind
163
+ * `{ verbose: true }` so it never pollutes the default log, but always
164
+ * captured when --verbose is set. The trace is what we wished we had had
165
+ * available *before* filing #1710 — it shows peak vs. cumulative side by
166
+ * side, splits cache writes from cache reads, and surfaces server-tool
167
+ * usage (web search) that the public-pricing estimator currently ignores.
168
+ *
169
+ * @param {Object} usage - Per-model usage entry from `tokenUsage.modelUsage`.
170
+ * @param {Object} tokenUsage - Full token usage object (used only for sub-session count).
171
+ * @param {Function} log - Async logger (must accept a `{verbose}` options arg).
172
+ */
173
+ export const dumpBudgetTrace = async (usage, tokenUsage, log) => {
174
+ const modelName = usage.modelName || usage.modelInfo?.name || 'unknown';
175
+ const limit = usage.modelInfo?.limit || {};
176
+ const peak = usage.peakContextUsage || 0;
177
+ const writes5m = usage.cacheCreation5mTokens || 0;
178
+ const writes1h = usage.cacheCreation1hTokens || 0;
179
+ const writes = usage.cacheCreationTokens || 0;
180
+ const reads = usage.cacheReadTokens || 0;
181
+ const inputs = usage.inputTokens || 0;
182
+ const outputs = usage.outputTokens || 0;
183
+ const webSearches = usage.webSearchRequests || 0;
184
+ const subSessionCount = (tokenUsage?.subSessions || []).length;
185
+ const source = usage._sourceResultJson ? 'jsonl + result-event' : 'jsonl';
186
+
187
+ await log(`\n 📊 [budget-trace] ${modelName}`, { verbose: true });
188
+ // Issue #1710 R5: peak request is `input + cache_creation` (cache reads
189
+ // tracked separately on the cumulative line).
190
+ await log(` peak request: ${formatNumber(peak)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (largest single-request input + cache_creation, excludes cache_read)`, { verbose: true });
191
+ await log(` cumulative: input ${formatNumber(inputs)}, cache_write ${formatNumber(writes)} (5m ${formatNumber(writes5m)} / 1h ${formatNumber(writes1h)}), cache_read ${formatNumber(reads)}, output ${formatNumber(outputs)}`, { verbose: true });
192
+ // Issue #1710 R1: web_search is now billed in calculateModelCost. The trace
193
+ // still surfaces the implied dollar cost so the residual remains debuggable
194
+ // from the saved log even if a future model lacks pricing data.
195
+ await log(` server tools: web_search ${webSearches}${webSearches > 0 ? ` (= $${(webSearches * 0.01).toFixed(6)} at $10 / 1k searches)` : ''}`, { verbose: true });
196
+ if (usage.costUSD !== null && usage.costUSD !== undefined) {
197
+ await log(` cost (public): $${new Decimal(usage.costUSD).toFixed(6)}`, { verbose: true });
198
+ }
199
+ if (usage._resultCostUSD !== null && usage._resultCostUSD !== undefined) {
200
+ await log(` cost (anthropic result-event): $${new Decimal(usage._resultCostUSD).toFixed(6)}`, { verbose: true });
201
+ }
202
+ await log(` sub-session count: ${subSessionCount}`, { verbose: true });
203
+ await log(` data source: ${source}`, { verbose: true });
204
+ };
205
+
150
206
  /**
151
207
  * Display token budget statistics (context window usage and ratios)
152
208
  * @param {Object} usage - Usage data for a model
@@ -155,6 +211,10 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
155
211
  */
156
212
  /**
157
213
  * Issue #1526: Updated to use single-line context+output format.
214
+ * Issue #1710: After the standard rendering, emit a verbose trace of the
215
+ * raw inputs that fed the renderer (gated behind --verbose),
216
+ * so future calculation-correctness reports can be triaged
217
+ * without re-running the session.
158
218
  */
159
219
  export const displayBudgetStats = async (usage, tokenUsage, log) => {
160
220
  const modelInfo = usage.modelInfo;
@@ -173,14 +233,18 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
173
233
  const peakContext = usage.peakContextUsage || 0;
174
234
 
175
235
  if (hasMultipleSubSessions) {
176
- // Issue #1600: Unified format — numbered list without "Context window:" prefix
236
+ // Issue #1600: Unified format — numbered list without "Context window:" prefix.
237
+ // Issue #1710 R3/R5: Peak input is `input + cache_creation` (cache reads
238
+ // are tracked separately on the Total line), and the bullet is now
239
+ // labelled "peak request:" so a reader does not try to reconcile it with
240
+ // the cumulative Total figure.
177
241
  for (let i = 0; i < subSessions.length; i++) {
178
242
  const sub = subSessions[i];
179
243
  const subPeak = sub.peakContextUsage || 0;
180
244
  const parts = [];
181
245
  if (contextLimit && subPeak > 0) {
182
246
  const pct = ((subPeak / contextLimit) * 100).toFixed(0);
183
- parts.push(`${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
247
+ parts.push(`peak request: ${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
184
248
  }
185
249
  if (outputLimit) {
186
250
  const outPct = ((sub.outputTokens / outputLimit) * 100).toFixed(0);
@@ -194,7 +258,7 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
194
258
  const parts = [];
195
259
  if (contextLimit) {
196
260
  const pct = ((peakContext / contextLimit) * 100).toFixed(0);
197
- parts.push(`${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
261
+ parts.push(`peak request: ${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
198
262
  }
199
263
  if (outputLimit) {
200
264
  const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
@@ -205,16 +269,16 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
205
269
  }
206
270
  }
207
271
 
208
- // Cumulative totals — single line
209
- // Issue #1547: Parenthesized cached format and consistent output format
210
- const totalInputNonCached = usage.inputTokens + usage.cacheCreationTokens;
211
- const cachedTokens = usage.cacheReadTokens;
212
- let totalLine;
213
- if (cachedTokens > 0) {
214
- totalLine = `(${formatNumber(totalInputNonCached)} + ${formatNumber(cachedTokens)} cached) input tokens`;
215
- } else {
216
- totalLine = `${formatNumber(totalInputNonCached)} input tokens`;
217
- }
272
+ // Cumulative totals — single line.
273
+ // Issue #1547: Parenthesized cached format and consistent output format.
274
+ // Issue #1710 R4: When cache writes are present, render them as a separate
275
+ // category instead of folding them into the input figure.
276
+ let totalLine = buildCumulativeInputPhrase({
277
+ input: usage.inputTokens || 0,
278
+ cacheWrites: usage.cacheCreationTokens || 0,
279
+ cacheReads: usage.cacheReadTokens || 0,
280
+ format: formatNumber,
281
+ });
218
282
  if (peakContext === 0 && outputLimit) {
219
283
  const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
220
284
  totalLine += `, ${formatNumber(usage.outputTokens)} / ${formatNumber(outputLimit)} (${outPct}%) output tokens`;
@@ -222,6 +286,9 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
222
286
  totalLine += `, ${formatNumber(usage.outputTokens)} output tokens`;
223
287
  }
224
288
  await log(` Total: ${totalLine}`);
289
+
290
+ // Issue #1710: verbose-only, never affects default output.
291
+ await dumpBudgetTrace(usage, tokenUsage, log);
225
292
  };
226
293
 
227
294
  /**
@@ -294,6 +361,44 @@ const formatTokensCompact = tokens => {
294
361
  return tokens.toLocaleString();
295
362
  };
296
363
 
364
+ /**
365
+ * Issue #1710: Build the cumulative input-tokens phrase for the Total / fallback
366
+ * lines, splitting cache writes and cache reads so neither category is ever
367
+ * silently fused with raw input tokens.
368
+ *
369
+ * Forms (in priority order):
370
+ * - reads > 0 && writes > 0 → "(X new + W cache writes + Y cache reads) input tokens"
371
+ * - reads > 0 && writes = 0 → "(X + Y cached) input tokens" (back-compat shape)
372
+ * - reads = 0 && writes > 0 → "(X new + W cache writes) input tokens"
373
+ * - reads = 0 && writes = 0 → "X input tokens"
374
+ *
375
+ * The legacy `(X + Y cached)` shape is preserved when only cache reads exist
376
+ * so we don't churn output for the common Opus-only case. The new explicit
377
+ * forms only appear when cache writes are non-zero (issue #1710 R4).
378
+ *
379
+ * @param {Object} opts
380
+ * @param {number} opts.input - non-cached input tokens (excludes cache writes/reads)
381
+ * @param {number} opts.cacheWrites - cache_creation_input_tokens (cumulative)
382
+ * @param {number} opts.cacheReads - cache_read_input_tokens (cumulative)
383
+ * @param {(n: number) => string} opts.format - formatter (compact or full)
384
+ * @returns {string} the cumulative input phrase, e.g. "(78K new + 57.6K cache writes) input tokens"
385
+ */
386
+ export const buildCumulativeInputPhrase = ({ input, cacheWrites, cacheReads, format }) => {
387
+ const w = Math.max(0, cacheWrites || 0);
388
+ const r = Math.max(0, cacheReads || 0);
389
+ const i = Math.max(0, input || 0);
390
+ if (w > 0 && r > 0) {
391
+ return `(${format(i)} new + ${format(w)} cache writes + ${format(r)} cache reads) input tokens`;
392
+ }
393
+ if (w > 0) {
394
+ return `(${format(i)} new + ${format(w)} cache writes) input tokens`;
395
+ }
396
+ if (r > 0) {
397
+ return `(${format(i)} + ${format(r)} cached) input tokens`;
398
+ }
399
+ return `${format(i)} input tokens`;
400
+ };
401
+
297
402
  /**
298
403
  * Format sub-sessions list for budget stats display
299
404
  * @param {Array} subSessions - Array of sub-session usage objects
@@ -317,6 +422,9 @@ const formatSubSessionsList = (subSessions, contextLimit, outputLimit) => {
317
422
 
318
423
  /**
319
424
  * Issue #1600: Build a single-line context + output tokens string (unified format, no "Context window:" prefix).
425
+ * Issue #1710 R3/R5: The input figure is the peak per-request `input + cache_creation`
426
+ * (cache reads excluded). Labelling it "peak request:" lets readers tell it apart
427
+ * from the cumulative Total line.
320
428
  * @param {number} peakContext - Peak context usage (0 if unknown — context display skipped)
321
429
  * @param {number} contextLimit - Context window limit (null if unknown)
322
430
  * @param {number} outputTokens - Output tokens used
@@ -328,7 +436,7 @@ const formatContextOutputLine = (peakContext, contextLimit, outputTokens, output
328
436
  const parts = [];
329
437
  if (contextLimit && peakContext > 0) {
330
438
  const pct = ((peakContext / contextLimit) * 100).toFixed(0);
331
- parts.push(`${formatTokensCompact(peakContext)} / ${formatTokensCompact(contextLimit)} (${pct}%) input tokens`);
439
+ parts.push(`peak request: ${formatTokensCompact(peakContext)} / ${formatTokensCompact(contextLimit)} (${pct}%) input tokens`);
332
440
  }
333
441
  if (outputLimit) {
334
442
  const outPct = ((outputTokens / outputLimit) * 100).toFixed(0);
@@ -459,20 +567,33 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
459
567
  } else if (peakContext > 0) {
460
568
  stats += formatContextOutputLine(peakContext, contextLimit, usage.outputTokens, outputLimit, '- ');
461
569
  } else if (outputLimit && callCount <= 1) {
462
- // Issue #1600: Show output-only detalization for sub-agent single sessions
570
+ // Issue #1600: Sub-agent single sessions previously showed only an output line.
571
+ // Issue #1710 R2: Always surface the cumulative input information too — sub-agent
572
+ // models (e.g. Haiku) never appear as the responding model in the parent JSONL,
573
+ // so peakContext stays at 0; without this fallback the rendered comment loses
574
+ // the sub-agent's input-token information entirely. Cache writes / reads are
575
+ // split via the same helper used for the Total line so the two lines stay
576
+ // arithmetically consistent.
577
+ const inputPhrase = buildCumulativeInputPhrase({
578
+ input: usage.inputTokens || 0,
579
+ cacheWrites: usage.cacheCreationTokens || 0,
580
+ cacheReads: usage.cacheReadTokens || 0,
581
+ format: formatTokensCompact,
582
+ });
463
583
  const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
464
- stats += `\n- ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`;
584
+ stats += `\n- ${inputPhrase}, ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`;
465
585
  }
466
586
 
467
- // Cumulative totals per model: input tokens + cached shown separately
468
- const totalInputNonCached = usage.inputTokens + usage.cacheCreationTokens;
469
- const cachedTokens = usage.cacheReadTokens;
470
- let totalLine;
471
- if (cachedTokens > 0) {
472
- totalLine = `(${formatTokensCompact(totalInputNonCached)} + ${formatTokensCompact(cachedTokens)} cached) input tokens`;
473
- } else {
474
- totalLine = `${formatTokensCompact(totalInputNonCached)} input tokens`;
475
- }
587
+ // Cumulative totals per model: input tokens + cached shown separately.
588
+ // Issue #1710 R4: Cache writes are now their own category (so the displayed
589
+ // "input tokens" figure never silently fuses 1.25× / 2× cache-write tokens
590
+ // with regular 1× input tokens — see issue #1710 root cause D).
591
+ let totalLine = buildCumulativeInputPhrase({
592
+ input: usage.inputTokens || 0,
593
+ cacheWrites: usage.cacheCreationTokens || 0,
594
+ cacheReads: usage.cacheReadTokens || 0,
595
+ format: formatTokensCompact,
596
+ });
476
597
 
477
598
  // Issue #1600: Output tokens on Total line — skip percentage if already shown above or aggregated
478
599
  if (callCount > 1) {
@@ -514,7 +635,11 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
514
635
  stats += `\n${i + 1}. ${parts.join(', ')}`;
515
636
  }
516
637
  } else {
517
- const avgInput = Math.round((totalInputNonCached + cachedTokens) / callCount);
638
+ // Estimated per-call breakdown when sub-agent stream tracking did not capture
639
+ // per-call usage. Includes everything the model actually saw:
640
+ // input + cache_creation (writes) + cache_read.
641
+ const aggregateInput = (usage.inputTokens || 0) + (usage.cacheCreationTokens || 0) + (usage.cacheReadTokens || 0);
642
+ const avgInput = Math.round(aggregateInput / callCount);
518
643
  const avgOutput = Math.round(usage.outputTokens / callCount);
519
644
  for (let i = 0; i < matchingCalls.length; i++) {
520
645
  const parts = [];
@@ -0,0 +1,88 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Issue #1710: Per-model cost calculation extracted from claude.lib.mjs to
5
+ * keep that file under the 1500-line repo cap. Behaviour is unchanged from
6
+ * the previous in-place implementation.
7
+ */
8
+ import Decimal from 'decimal.js-light';
9
+ import { SERVER_TOOL_PRICING_USD } from './anthropic-server-tool-pricing.lib.mjs';
10
+
11
+ /**
12
+ * Calculate USD cost for a model's usage with optional detailed breakdown.
13
+ *
14
+ * Cost components (Issue #1600 uses Decimal for precision):
15
+ * - input × cost.input / 1M
16
+ * - cacheWrite × cost.cache_write / 1M
17
+ * - cacheRead × cost.cache_read / 1M
18
+ * - output × cost.output / 1M
19
+ * - webSearch × $0.01 / request (Issue #1710 — see SERVER_TOOL_PRICING_USD)
20
+ *
21
+ * @param {Object} usage - per-model usage entry
22
+ * @param {Object|null} modelInfo - model-info shape (includes `cost` map)
23
+ * @param {boolean} [includeBreakdown=false] - return `{ total, breakdown }` when true
24
+ * @returns {number|{total: number, breakdown: Object}}
25
+ */
26
+ export const calculateModelCost = (usage, modelInfo, includeBreakdown = false) => {
27
+ if (!modelInfo || !modelInfo.cost) {
28
+ return includeBreakdown ? { total: 0, breakdown: null } : 0;
29
+ }
30
+ const cost = modelInfo.cost;
31
+ const million = new Decimal(1000000);
32
+ const breakdown = {
33
+ input: { tokens: 0, costPerMillion: 0, cost: 0 },
34
+ cacheWrite: { tokens: 0, costPerMillion: 0, cost: 0 },
35
+ cacheRead: { tokens: 0, costPerMillion: 0, cost: 0 },
36
+ output: { tokens: 0, costPerMillion: 0, cost: 0 },
37
+ // Issue #1710: server-side tool usage (web_search) is billed per-request,
38
+ // independent of token cost. Without this entry the public-pricing total
39
+ // diverges from Anthropic's reported total by exactly the per-request
40
+ // rate times the request count — the residual quoted in issue #1710.
41
+ webSearch: { requests: 0, costPerRequest: 0, cost: 0 },
42
+ };
43
+ if (usage.inputTokens && cost.input) {
44
+ breakdown.input = {
45
+ tokens: usage.inputTokens,
46
+ costPerMillion: cost.input,
47
+ cost: new Decimal(usage.inputTokens).div(million).mul(new Decimal(cost.input)).toNumber(),
48
+ };
49
+ }
50
+ if (usage.cacheCreationTokens && cost.cache_write) {
51
+ breakdown.cacheWrite = {
52
+ tokens: usage.cacheCreationTokens,
53
+ costPerMillion: cost.cache_write,
54
+ cost: new Decimal(usage.cacheCreationTokens).div(million).mul(new Decimal(cost.cache_write)).toNumber(),
55
+ };
56
+ }
57
+ if (usage.cacheReadTokens && cost.cache_read) {
58
+ breakdown.cacheRead = {
59
+ tokens: usage.cacheReadTokens,
60
+ costPerMillion: cost.cache_read,
61
+ cost: new Decimal(usage.cacheReadTokens).div(million).mul(new Decimal(cost.cache_read)).toNumber(),
62
+ };
63
+ }
64
+ if (usage.outputTokens && cost.output) {
65
+ breakdown.output = {
66
+ tokens: usage.outputTokens,
67
+ costPerMillion: cost.output,
68
+ cost: new Decimal(usage.outputTokens).div(million).mul(new Decimal(cost.output)).toNumber(),
69
+ };
70
+ }
71
+ // Issue #1710: bill web_search requests at the documented per-request rate.
72
+ if (usage.webSearchRequests && SERVER_TOOL_PRICING_USD.web_search.costPerRequest > 0) {
73
+ const perReq = SERVER_TOOL_PRICING_USD.web_search.costPerRequest;
74
+ breakdown.webSearch = {
75
+ requests: usage.webSearchRequests,
76
+ costPerRequest: perReq,
77
+ cost: new Decimal(usage.webSearchRequests).mul(new Decimal(perReq)).toNumber(),
78
+ };
79
+ }
80
+ const totalCost = new Decimal(breakdown.input.cost).plus(breakdown.cacheWrite.cost).plus(breakdown.cacheRead.cost).plus(breakdown.output.cost).plus(breakdown.webSearch.cost).toNumber();
81
+ if (includeBreakdown) {
82
+ return {
83
+ total: totalCost,
84
+ breakdown,
85
+ };
86
+ }
87
+ return totalCost;
88
+ };
@@ -374,6 +374,9 @@ export const executeClaude = async params => {
374
374
  owner,
375
375
  repo,
376
376
  prNumber,
377
+ // Issue #1708: forwarded so the bidirectional handler can poll
378
+ // issue title/body changes and uncommitted changes during the session.
379
+ issueNumber,
377
380
  });
378
381
  };
379
382
  /** Check if a model supports vision (image input) using models.dev API @returns {Promise<boolean>} */
@@ -387,56 +390,10 @@ export const checkModelVisionCapability = async modelId => {
387
390
  return false;
388
391
  }
389
392
  };
390
- /** Calculate USD cost for a model's usage with detailed breakdown (Issue #1600: uses Decimal for precision) */
391
- export const calculateModelCost = (usage, modelInfo, includeBreakdown = false) => {
392
- if (!modelInfo || !modelInfo.cost) {
393
- return includeBreakdown ? { total: 0, breakdown: null } : 0;
394
- }
395
- const cost = modelInfo.cost;
396
- const million = new Decimal(1000000);
397
- const breakdown = {
398
- input: { tokens: 0, costPerMillion: 0, cost: 0 },
399
- cacheWrite: { tokens: 0, costPerMillion: 0, cost: 0 },
400
- cacheRead: { tokens: 0, costPerMillion: 0, cost: 0 },
401
- output: { tokens: 0, costPerMillion: 0, cost: 0 },
402
- };
403
- if (usage.inputTokens && cost.input) {
404
- breakdown.input = {
405
- tokens: usage.inputTokens,
406
- costPerMillion: cost.input,
407
- cost: new Decimal(usage.inputTokens).div(million).mul(new Decimal(cost.input)).toNumber(),
408
- };
409
- }
410
- if (usage.cacheCreationTokens && cost.cache_write) {
411
- breakdown.cacheWrite = {
412
- tokens: usage.cacheCreationTokens,
413
- costPerMillion: cost.cache_write,
414
- cost: new Decimal(usage.cacheCreationTokens).div(million).mul(new Decimal(cost.cache_write)).toNumber(),
415
- };
416
- }
417
- if (usage.cacheReadTokens && cost.cache_read) {
418
- breakdown.cacheRead = {
419
- tokens: usage.cacheReadTokens,
420
- costPerMillion: cost.cache_read,
421
- cost: new Decimal(usage.cacheReadTokens).div(million).mul(new Decimal(cost.cache_read)).toNumber(),
422
- };
423
- }
424
- if (usage.outputTokens && cost.output) {
425
- breakdown.output = {
426
- tokens: usage.outputTokens,
427
- costPerMillion: cost.output,
428
- cost: new Decimal(usage.outputTokens).div(million).mul(new Decimal(cost.output)).toNumber(),
429
- };
430
- }
431
- const totalCost = new Decimal(breakdown.input.cost).plus(breakdown.cacheWrite.cost).plus(breakdown.cacheRead.cost).plus(breakdown.output.cost).toNumber();
432
- if (includeBreakdown) {
433
- return {
434
- total: totalCost,
435
- breakdown,
436
- };
437
- }
438
- return totalCost;
439
- };
393
+ // Issue #1710: calculateModelCost extracted to ./claude.cost.lib.mjs to keep
394
+ // this file under the 1500-line repo cap (see check-file-line-limits CI job).
395
+ import { calculateModelCost } from './claude.cost.lib.mjs';
396
+ export { calculateModelCost };
440
397
  export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsage = null) => {
441
398
  const os = (await use('os')).default;
442
399
  const homeDir = os.homedir();
@@ -498,8 +455,14 @@ export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsag
498
455
  }
499
456
  accumulateModelUsage(modelUsage, entry);
500
457
  // Issue #1501: Track peak context usage per single API request
458
+ // Issue #1710: Exclude cache_read_input_tokens — sub-sessions and
459
+ // per-request peaks should reflect *new* input the model received,
460
+ // not cached prompt context. Cache reads remain visible in the
461
+ // cumulative Total line as `(X + Y cached)`. This makes the
462
+ // peak-request value reconcilable with the cumulative non-cached
463
+ // input figure (instead of mixing semantics across the two lines).
501
464
  const usage = entry.message.usage;
502
- const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0) + (usage.cache_read_input_tokens || 0);
465
+ const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0);
503
466
  const model = entry.message.model;
504
467
  if (requestContext > (peakContextByModel[model] || 0)) {
505
468
  peakContextByModel[model] = requestContext;
@@ -634,6 +597,9 @@ export const executeClaudeCommand = async params => {
634
597
  owner,
635
598
  repo,
636
599
  prNumber,
600
+ // Issue #1708: enables status streaming (CI/uncommitted/PR-metadata)
601
+ // and issue body/title polling in setupBidirectionalHandler.
602
+ issueNumber,
637
603
  } = params;
638
604
  // Issue #817: Apply bidirectional-mode composition and tool-support validation before running.
639
605
  // This may enable argv.interactiveMode, argv.acceptIncommingCommentsAsInput, and
@@ -722,9 +688,11 @@ export const executeClaudeCommand = async params => {
722
688
  } else if (argv.interactiveMode) {
723
689
  await log('⚠️ Interactive mode: Disabled - missing PR info (owner/repo/prNumber)', { verbose: true });
724
690
  }
725
- // Issue #817: Set up bidirectional handler when --accept-incomming-comments-as-input
726
- // (or composite --bidirectional-interactive-mode) is enabled. Returns null when inactive.
727
- const bidirectionalHandler = await setupBidirectionalHandler({ argv, owner, repo, prNumber, $, log });
691
+ // Issue #817 / #1708: Set up bidirectional handler when --accept-incomming-comments-as-input
692
+ // (or composite --bidirectional-interactive-mode / --auto-input-until-mergeable) is enabled.
693
+ // Returns null when inactive. issueNumber + tempDir are forwarded so the handler can
694
+ // poll issue title/body changes and uncommitted changes during the session (Issue #1708).
695
+ const bidirectionalHandler = await setupBidirectionalHandler({ argv, owner, repo, prNumber, issueNumber, tempDir, $, log });
728
696
  const progressMonitor = await initProgressMonitoring(argv, { owner, repo, prNumber, $, log }); // works with or without --interactive-mode
729
697
  let execCommand;
730
698
  const mappedModel = mapModelToId(argv.model);
@@ -925,6 +893,18 @@ export const executeClaudeCommand = async params => {
925
893
  }
926
894
  if (data.type === 'message') messageCount++;
927
895
  else if (data.type === 'tool_use') toolUseCount++;
896
+ // Issue #1708: signal busy/idle to the bidirectional handler so
897
+ // queue-comments-to-input mode can hold frames until the AI is
898
+ // idle. Any assistant/tool_use/system event means the AI is
899
+ // actively processing; a result event means the turn is done
900
+ // and queued frames can flush.
901
+ if (bidirectionalHandler) {
902
+ if (data.type === 'assistant' || data.type === 'tool_use' || data.type === 'tool_result') {
903
+ if (typeof bidirectionalHandler.markAiBusy === 'function') {
904
+ bidirectionalHandler.markAiBusy();
905
+ }
906
+ }
907
+ }
928
908
  if (progressMonitor) await progressMonitor.processStreamEvent(data).catch(e => log(`⚠️ Progress: ${e.message}`, { verbose: true }));
929
909
  if (data.type === 'result') {
930
910
  if (!resultEventReceived) {
@@ -932,6 +912,15 @@ export const executeClaudeCommand = async params => {
932
912
  await log(`📌 Result event received, starting ${streamCloseTimeoutMs / 1000}s stream close timeout (Issue #1280)`, { verbose: true });
933
913
  resultTimeoutId = setTimeout(forceExitOnTimeout, streamCloseTimeoutMs);
934
914
  }
915
+ // Issue #1708: result event = AI is idle and waiting for next
916
+ // user input. Flush any frames queued by --queue-comments-to-input.
917
+ if (bidirectionalHandler && typeof bidirectionalHandler.markAiIdle === 'function') {
918
+ try {
919
+ await bidirectionalHandler.markAiIdle();
920
+ } catch (idleErr) {
921
+ if (argv.verbose) await log(`⚠️ Bidirectional mode: markAiIdle error: ${idleErr.message}`, { verbose: true });
922
+ }
923
+ }
935
924
  if (data.subtype === 'success') resultSuccessReceived = true;
936
925
  if (data.subtype === 'success' && data.total_cost_usd !== undefined && data.total_cost_usd !== null) {
937
926
  anthropicTotalCostUSD = data.total_cost_usd;
@@ -1306,7 +1295,9 @@ export const executeClaudeCommand = async params => {
1306
1295
  await log(`\n⚠️ JSONL deduplication: skipped ${tokenUsage.duplicateEntriesSkipped} duplicate entries (upstream: anthropics/claude-code#6805)`, { verbose: true });
1307
1296
  }
1308
1297
  if (tokenUsage.peakContextUsage > 0) {
1309
- await log(`📊 Peak single-request context: ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
1298
+ // Issue #1710: rename so the metric matches the new definition (input + cache_creation,
1299
+ // excluding cache_read). Cache reads are still visible separately on the Total line.
1300
+ await log(`📊 Peak single-request input (excl. cache reads): ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
1310
1301
  }
1311
1302
  await log('\n💰 Token Usage Summary:');
1312
1303
  // Display per-model breakdown
@@ -18,7 +18,11 @@ if (typeof globalThis.use === 'undefined') {
18
18
  }
19
19
  }
20
20
 
21
- const getenvModule = await use('getenv');
21
+ // Issue #1710: use-m occasionally hands back a truncated/corrupt global package
22
+ // (npm install -g flake on hosted CI). useWithRetry deletes the broken install
23
+ // dir and re-fetches when the failure is a SyntaxError mid-import.
24
+ const { useWithRetry } = await import('./use-with-retry.lib.mjs');
25
+ const getenvModule = await useWithRetry(globalThis.use, 'getenv');
22
26
  // Node 24 CJS/ESM interop may return the whole module object instead of the function directly
23
27
  const getenv = typeof getenvModule === 'function' ? getenvModule : getenvModule.default || getenvModule;
24
28
 
@@ -91,6 +91,60 @@ export async function getPRCommitShas(owner, repo, prNumber, verbose = false) {
91
91
  }
92
92
  }
93
93
 
94
+ /**
95
+ * Issue #1712: Collect every active (in_progress / pending / queued / waiting / requested)
96
+ * workflow run on the PR branch — across ALL commits, not only the head SHA.
97
+ *
98
+ * Why this exists: when the user watches `/merge`, the GitHub Actions tab shows yellow
99
+ * dots for every commit that ever had a run, including older commits whose runs were
100
+ * automatically cancelled by GitHub's concurrency group. The verbose log used to list
101
+ * only the head-SHA runs, so a user comparing the log to the GitHub UI would see
102
+ * "1 workflow run" in the log but two yellow dots on screen — looking like a bug.
103
+ *
104
+ * Returns runs grouped by SHA, deduplicated by run.id (a single run can be associated
105
+ * with one SHA, but the same workflow file can produce runs on multiple SHAs).
106
+ *
107
+ * @param {string} owner - Repository owner
108
+ * @param {string} repo - Repository name
109
+ * @param {number} prNumber - Pull request number
110
+ * @param {string} headSha - The PR head SHA (used to mark which group is "current")
111
+ * @param {boolean} verbose - Whether to log verbose output
112
+ * @param {Function} getWorkflowRunsForSha - Function to get workflow runs for a SHA
113
+ * @returns {Promise<{groups: Array<{sha: string, isHead: boolean, runs: Array}>, totalActive: number, headActive: number, otherActive: number}>}
114
+ */
115
+ export async function getActivePRWorkflowRuns(owner, repo, prNumber, headSha, verbose, getWorkflowRunsForSha) {
116
+ const shas = await getPRCommitShas(owner, repo, prNumber, false);
117
+ if (shas.length === 0) {
118
+ return { groups: [], totalActive: 0, headActive: 0, otherActive: 0 };
119
+ }
120
+
121
+ const ACTIVE_STATUSES = new Set(['in_progress', 'pending', 'queued', 'waiting', 'requested']);
122
+ const groups = [];
123
+ const seenRunIds = new Set();
124
+ let totalActive = 0;
125
+ let headActive = 0;
126
+ let otherActive = 0;
127
+
128
+ for (const sha of shas) {
129
+ const runs = await getWorkflowRunsForSha(owner, repo, sha, false);
130
+ const activeRuns = runs.filter(r => ACTIVE_STATUSES.has(r.status) && !seenRunIds.has(r.id));
131
+ for (const r of activeRuns) seenRunIds.add(r.id);
132
+ if (activeRuns.length === 0) continue;
133
+
134
+ const isHead = sha === headSha;
135
+ groups.push({ sha, isHead, runs: activeRuns });
136
+ totalActive += activeRuns.length;
137
+ if (isHead) headActive += activeRuns.length;
138
+ else otherActive += activeRuns.length;
139
+ }
140
+
141
+ if (verbose && totalActive > 0) {
142
+ console.log(`[VERBOSE] pr-commits: ${totalActive} active workflow run(s) across ${groups.length} commit(s) on PR #${prNumber} (${headActive} on HEAD, ${otherActive} on older commits)`);
143
+ }
144
+
145
+ return { groups, totalActive, headActive, otherActive };
146
+ }
147
+
94
148
  /**
95
149
  * Check that workflow runs for ALL commits on the PR branch have completed.
96
150
  * @param {string} owner - Repository owner