@link-assistant/hive-mind 1.57.3 → 1.59.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47,6 +47,11 @@ export const accumulateModelUsage = (modelUsageMap, entry) => {
47
47
  }
48
48
  if (usage.cache_read_input_tokens) modelUsageMap[model].cacheReadTokens += usage.cache_read_input_tokens;
49
49
  if (usage.output_tokens) modelUsageMap[model].outputTokens += usage.output_tokens;
50
+ // Issue #1710: track Anthropic server-tool usage from per-request JSONL entries
51
+ // so the public-pricing estimate can bill them at the documented per-request rate.
52
+ if (usage.server_tool_use?.web_search_requests) {
53
+ modelUsageMap[model].webSearchRequests += usage.server_tool_use.web_search_requests;
54
+ }
50
55
  };
51
56
 
52
57
  /**
@@ -109,6 +114,11 @@ export const displayModelUsage = async (usage, log) => {
109
114
  await log(` ${label}: ${formatNumber(breakdown[key].tokens)} tokens × $${breakdown[key].costPerMillion}/M = $${new Decimal(breakdown[key].cost).toFixed(6)}`);
110
115
  }
111
116
  }
117
+ // Issue #1710: itemise server-tool charges so the residual that puzzled
118
+ // readers in PR #1707 ($0.04 web_search) is visible in the breakdown.
119
+ if (breakdown.webSearch && breakdown.webSearch.requests > 0) {
120
+ await log(` Web search: ${breakdown.webSearch.requests} requests × $${breakdown.webSearch.costPerRequest}/req = $${new Decimal(breakdown.webSearch.cost).toFixed(6)}`);
121
+ }
112
122
  await log(' ─────────────────────────────────');
113
123
  await log(` Total: $${new Decimal(usage.costUSD).toFixed(6)}`);
114
124
  } else if (usage.modelInfo === null) {
@@ -129,7 +139,9 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
129
139
  const hasAnthropic = anthropicCost !== null && anthropicCost !== undefined;
130
140
  const publicDec = hasPublic ? new Decimal(publicCost) : null;
131
141
  const anthropicDec = hasAnthropic ? new Decimal(anthropicCost) : null;
132
- if (publicDec && anthropicDec && publicDec.toFixed(6) === anthropicDec.toFixed(6)) {
142
+ // Issue #1703: also collapse to the short form when the rounded difference is below display precision,
143
+ // so reports like "Difference: $-0.000000 (-0.00%)" no longer waste two extra lines.
144
+ if (publicDec && anthropicDec && anthropicDec.minus(publicDec).abs().toFixed(6) === '0.000000') {
133
145
  await log(`\n 💰 Cost: $${anthropicDec.toFixed(6)}`);
134
146
  return;
135
147
  }
@@ -145,6 +157,52 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
145
157
  }
146
158
  };
147
159
 
160
+ /**
161
+ * Issue #1710: Emit a verbose, machine-friendly trace of every input that
162
+ * feeds the budget-stats renderer for a single model. Hidden behind
163
+ * `{ verbose: true }` so it never pollutes the default log, but always
164
+ * captured when --verbose is set. The trace is what we wished we had had
165
+ * available *before* filing #1710 — it shows peak vs. cumulative side by
166
+ * side, splits cache writes from cache reads, and surfaces server-tool
167
+ * usage (web search) that the public-pricing estimator currently ignores.
168
+ *
169
+ * @param {Object} usage - Per-model usage entry from `tokenUsage.modelUsage`.
170
+ * @param {Object} tokenUsage - Full token usage object (used only for sub-session count).
171
+ * @param {Function} log - Async logger (must accept a `{verbose}` options arg).
172
+ */
173
+ export const dumpBudgetTrace = async (usage, tokenUsage, log) => {
174
+ const modelName = usage.modelName || usage.modelInfo?.name || 'unknown';
175
+ const limit = usage.modelInfo?.limit || {};
176
+ const peak = usage.peakContextUsage || 0;
177
+ const writes5m = usage.cacheCreation5mTokens || 0;
178
+ const writes1h = usage.cacheCreation1hTokens || 0;
179
+ const writes = usage.cacheCreationTokens || 0;
180
+ const reads = usage.cacheReadTokens || 0;
181
+ const inputs = usage.inputTokens || 0;
182
+ const outputs = usage.outputTokens || 0;
183
+ const webSearches = usage.webSearchRequests || 0;
184
+ const subSessionCount = (tokenUsage?.subSessions || []).length;
185
+ const source = usage._sourceResultJson ? 'jsonl + result-event' : 'jsonl';
186
+
187
+ await log(`\n 📊 [budget-trace] ${modelName}`, { verbose: true });
188
+ // Issue #1710 R5: peak request is `input + cache_creation` (cache reads
189
+ // tracked separately on the cumulative line).
190
+ await log(` peak request: ${formatNumber(peak)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (largest single-request input + cache_creation, excludes cache_read)`, { verbose: true });
191
+ await log(` cumulative: input ${formatNumber(inputs)}, cache_write ${formatNumber(writes)} (5m ${formatNumber(writes5m)} / 1h ${formatNumber(writes1h)}), cache_read ${formatNumber(reads)}, output ${formatNumber(outputs)}`, { verbose: true });
192
+ // Issue #1710 R1: web_search is now billed in calculateModelCost. The trace
193
+ // still surfaces the implied dollar cost so the residual remains debuggable
194
+ // from the saved log even if a future model lacks pricing data.
195
+ await log(` server tools: web_search ${webSearches}${webSearches > 0 ? ` (= $${(webSearches * 0.01).toFixed(6)} at $10 / 1k searches)` : ''}`, { verbose: true });
196
+ if (usage.costUSD !== null && usage.costUSD !== undefined) {
197
+ await log(` cost (public): $${new Decimal(usage.costUSD).toFixed(6)}`, { verbose: true });
198
+ }
199
+ if (usage._resultCostUSD !== null && usage._resultCostUSD !== undefined) {
200
+ await log(` cost (anthropic result-event): $${new Decimal(usage._resultCostUSD).toFixed(6)}`, { verbose: true });
201
+ }
202
+ await log(` sub-session count: ${subSessionCount}`, { verbose: true });
203
+ await log(` data source: ${source}`, { verbose: true });
204
+ };
205
+
148
206
  /**
149
207
  * Display token budget statistics (context window usage and ratios)
150
208
  * @param {Object} usage - Usage data for a model
@@ -153,6 +211,10 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
153
211
  */
154
212
  /**
155
213
  * Issue #1526: Updated to use single-line context+output format.
214
+ * Issue #1710: After the standard rendering, emit a verbose trace of the
215
+ * raw inputs that fed the renderer (gated behind --verbose),
216
+ * so future calculation-correctness reports can be triaged
217
+ * without re-running the session.
156
218
  */
157
219
  export const displayBudgetStats = async (usage, tokenUsage, log) => {
158
220
  const modelInfo = usage.modelInfo;
@@ -171,14 +233,18 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
171
233
  const peakContext = usage.peakContextUsage || 0;
172
234
 
173
235
  if (hasMultipleSubSessions) {
174
- // Issue #1600: Unified format — numbered list without "Context window:" prefix
236
+ // Issue #1600: Unified format — numbered list without "Context window:" prefix.
237
+ // Issue #1710 R3/R5: Peak input is `input + cache_creation` (cache reads
238
+ // are tracked separately on the Total line), and the bullet is now
239
+ // labelled "peak request:" so a reader does not try to reconcile it with
240
+ // the cumulative Total figure.
175
241
  for (let i = 0; i < subSessions.length; i++) {
176
242
  const sub = subSessions[i];
177
243
  const subPeak = sub.peakContextUsage || 0;
178
244
  const parts = [];
179
245
  if (contextLimit && subPeak > 0) {
180
246
  const pct = ((subPeak / contextLimit) * 100).toFixed(0);
181
- parts.push(`${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
247
+ parts.push(`peak request: ${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
182
248
  }
183
249
  if (outputLimit) {
184
250
  const outPct = ((sub.outputTokens / outputLimit) * 100).toFixed(0);
@@ -192,7 +258,7 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
192
258
  const parts = [];
193
259
  if (contextLimit) {
194
260
  const pct = ((peakContext / contextLimit) * 100).toFixed(0);
195
- parts.push(`${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
261
+ parts.push(`peak request: ${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
196
262
  }
197
263
  if (outputLimit) {
198
264
  const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
@@ -203,16 +269,16 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
203
269
  }
204
270
  }
205
271
 
206
- // Cumulative totals — single line
207
- // Issue #1547: Parenthesized cached format and consistent output format
208
- const totalInputNonCached = usage.inputTokens + usage.cacheCreationTokens;
209
- const cachedTokens = usage.cacheReadTokens;
210
- let totalLine;
211
- if (cachedTokens > 0) {
212
- totalLine = `(${formatNumber(totalInputNonCached)} + ${formatNumber(cachedTokens)} cached) input tokens`;
213
- } else {
214
- totalLine = `${formatNumber(totalInputNonCached)} input tokens`;
215
- }
272
+ // Cumulative totals — single line.
273
+ // Issue #1547: Parenthesized cached format and consistent output format.
274
+ // Issue #1710 R4: When cache writes are present, render them as a separate
275
+ // category instead of folding them into the input figure.
276
+ let totalLine = buildCumulativeInputPhrase({
277
+ input: usage.inputTokens || 0,
278
+ cacheWrites: usage.cacheCreationTokens || 0,
279
+ cacheReads: usage.cacheReadTokens || 0,
280
+ format: formatNumber,
281
+ });
216
282
  if (peakContext === 0 && outputLimit) {
217
283
  const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
218
284
  totalLine += `, ${formatNumber(usage.outputTokens)} / ${formatNumber(outputLimit)} (${outPct}%) output tokens`;
@@ -220,6 +286,9 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
220
286
  totalLine += `, ${formatNumber(usage.outputTokens)} output tokens`;
221
287
  }
222
288
  await log(` Total: ${totalLine}`);
289
+
290
+ // Issue #1710: verbose-only, never affects default output.
291
+ await dumpBudgetTrace(usage, tokenUsage, log);
223
292
  };
224
293
 
225
294
  /**
@@ -292,6 +361,44 @@ const formatTokensCompact = tokens => {
292
361
  return tokens.toLocaleString();
293
362
  };
294
363
 
364
+ /**
365
+ * Issue #1710: Build the cumulative input-tokens phrase for the Total / fallback
366
+ * lines, splitting cache writes and cache reads so neither category is ever
367
+ * silently fused with raw input tokens.
368
+ *
369
+ * Forms (in priority order):
370
+ * - reads > 0 && writes > 0 → "(X new + W cache writes + Y cache reads) input tokens"
371
+ * - reads > 0 && writes = 0 → "(X + Y cached) input tokens" (back-compat shape)
372
+ * - reads = 0 && writes > 0 → "(X new + W cache writes) input tokens"
373
+ * - reads = 0 && writes = 0 → "X input tokens"
374
+ *
375
+ * The legacy `(X + Y cached)` shape is preserved when only cache reads exist
376
+ * so we don't churn output for the common Opus-only case. The new explicit
377
+ * forms only appear when cache writes are non-zero (issue #1710 R4).
378
+ *
379
+ * @param {Object} opts
380
+ * @param {number} opts.input - non-cached input tokens (excludes cache writes/reads)
381
+ * @param {number} opts.cacheWrites - cache_creation_input_tokens (cumulative)
382
+ * @param {number} opts.cacheReads - cache_read_input_tokens (cumulative)
383
+ * @param {(n: number) => string} opts.format - formatter (compact or full)
384
+ * @returns {string} the cumulative input phrase, e.g. "(78K new + 57.6K cache writes) input tokens"
385
+ */
386
+ export const buildCumulativeInputPhrase = ({ input, cacheWrites, cacheReads, format }) => {
387
+ const w = Math.max(0, cacheWrites || 0);
388
+ const r = Math.max(0, cacheReads || 0);
389
+ const i = Math.max(0, input || 0);
390
+ if (w > 0 && r > 0) {
391
+ return `(${format(i)} new + ${format(w)} cache writes + ${format(r)} cache reads) input tokens`;
392
+ }
393
+ if (w > 0) {
394
+ return `(${format(i)} new + ${format(w)} cache writes) input tokens`;
395
+ }
396
+ if (r > 0) {
397
+ return `(${format(i)} + ${format(r)} cached) input tokens`;
398
+ }
399
+ return `${format(i)} input tokens`;
400
+ };
401
+
295
402
  /**
296
403
  * Format sub-sessions list for budget stats display
297
404
  * @param {Array} subSessions - Array of sub-session usage objects
@@ -315,6 +422,9 @@ const formatSubSessionsList = (subSessions, contextLimit, outputLimit) => {
315
422
 
316
423
  /**
317
424
  * Issue #1600: Build a single-line context + output tokens string (unified format, no "Context window:" prefix).
425
+ * Issue #1710 R3/R5: The input figure is the peak per-request `input + cache_creation`
426
+ * (cache reads excluded). Labelling it "peak request:" lets readers tell it apart
427
+ * from the cumulative Total line.
318
428
  * @param {number} peakContext - Peak context usage (0 if unknown — context display skipped)
319
429
  * @param {number} contextLimit - Context window limit (null if unknown)
320
430
  * @param {number} outputTokens - Output tokens used
@@ -326,7 +436,7 @@ const formatContextOutputLine = (peakContext, contextLimit, outputTokens, output
326
436
  const parts = [];
327
437
  if (contextLimit && peakContext > 0) {
328
438
  const pct = ((peakContext / contextLimit) * 100).toFixed(0);
329
- parts.push(`${formatTokensCompact(peakContext)} / ${formatTokensCompact(contextLimit)} (${pct}%) input tokens`);
439
+ parts.push(`peak request: ${formatTokensCompact(peakContext)} / ${formatTokensCompact(contextLimit)} (${pct}%) input tokens`);
330
440
  }
331
441
  if (outputLimit) {
332
442
  const outPct = ((outputTokens / outputLimit) * 100).toFixed(0);
@@ -457,20 +567,33 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
457
567
  } else if (peakContext > 0) {
458
568
  stats += formatContextOutputLine(peakContext, contextLimit, usage.outputTokens, outputLimit, '- ');
459
569
  } else if (outputLimit && callCount <= 1) {
460
- // Issue #1600: Show output-only detalization for sub-agent single sessions
570
+ // Issue #1600: Sub-agent single sessions previously showed only an output line.
571
+ // Issue #1710 R2: Always surface the cumulative input information too — sub-agent
572
+ // models (e.g. Haiku) never appear as the responding model in the parent JSONL,
573
+ // so peakContext stays at 0; without this fallback the rendered comment loses
574
+ // the sub-agent's input-token information entirely. Cache writes / reads are
575
+ // split via the same helper used for the Total line so the two lines stay
576
+ // arithmetically consistent.
577
+ const inputPhrase = buildCumulativeInputPhrase({
578
+ input: usage.inputTokens || 0,
579
+ cacheWrites: usage.cacheCreationTokens || 0,
580
+ cacheReads: usage.cacheReadTokens || 0,
581
+ format: formatTokensCompact,
582
+ });
461
583
  const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
462
- stats += `\n- ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`;
584
+ stats += `\n- ${inputPhrase}, ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`;
463
585
  }
464
586
 
465
- // Cumulative totals per model: input tokens + cached shown separately
466
- const totalInputNonCached = usage.inputTokens + usage.cacheCreationTokens;
467
- const cachedTokens = usage.cacheReadTokens;
468
- let totalLine;
469
- if (cachedTokens > 0) {
470
- totalLine = `(${formatTokensCompact(totalInputNonCached)} + ${formatTokensCompact(cachedTokens)} cached) input tokens`;
471
- } else {
472
- totalLine = `${formatTokensCompact(totalInputNonCached)} input tokens`;
473
- }
587
+ // Cumulative totals per model: input tokens + cached shown separately.
588
+ // Issue #1710 R4: Cache writes are now their own category (so the displayed
589
+ // "input tokens" figure never silently fuses 1.25× / 2× cache-write tokens
590
+ // with regular 1× input tokens — see issue #1710 root cause D).
591
+ let totalLine = buildCumulativeInputPhrase({
592
+ input: usage.inputTokens || 0,
593
+ cacheWrites: usage.cacheCreationTokens || 0,
594
+ cacheReads: usage.cacheReadTokens || 0,
595
+ format: formatTokensCompact,
596
+ });
474
597
 
475
598
  // Issue #1600: Output tokens on Total line — skip percentage if already shown above or aggregated
476
599
  if (callCount > 1) {
@@ -512,7 +635,11 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
512
635
  stats += `\n${i + 1}. ${parts.join(', ')}`;
513
636
  }
514
637
  } else {
515
- const avgInput = Math.round((totalInputNonCached + cachedTokens) / callCount);
638
+ // Estimated per-call breakdown when sub-agent stream tracking did not capture
639
+ // per-call usage. Includes everything the model actually saw:
640
+ // input + cache_creation (writes) + cache_read.
641
+ const aggregateInput = (usage.inputTokens || 0) + (usage.cacheCreationTokens || 0) + (usage.cacheReadTokens || 0);
642
+ const avgInput = Math.round(aggregateInput / callCount);
516
643
  const avgOutput = Math.round(usage.outputTokens / callCount);
517
644
  for (let i = 0; i < matchingCalls.length; i++) {
518
645
  const parts = [];
@@ -0,0 +1,88 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Issue #1710: Per-model cost calculation extracted from claude.lib.mjs to
5
+ * keep that file under the 1500-line repo cap. Behaviour is unchanged from
6
+ * the previous in-place implementation.
7
+ */
8
+ import Decimal from 'decimal.js-light';
9
+ import { SERVER_TOOL_PRICING_USD } from './anthropic-server-tool-pricing.lib.mjs';
10
+
11
+ /**
12
+ * Calculate USD cost for a model's usage with optional detailed breakdown.
13
+ *
14
+ * Cost components (Issue #1600 uses Decimal for precision):
15
+ * - input × cost.input / 1M
16
+ * - cacheWrite × cost.cache_write / 1M
17
+ * - cacheRead × cost.cache_read / 1M
18
+ * - output × cost.output / 1M
19
+ * - webSearch × $0.01 / request (Issue #1710 — see SERVER_TOOL_PRICING_USD)
20
+ *
21
+ * @param {Object} usage - per-model usage entry
22
+ * @param {Object|null} modelInfo - model-info shape (includes `cost` map)
23
+ * @param {boolean} [includeBreakdown=false] - return `{ total, breakdown }` when true
24
+ * @returns {number|{total: number, breakdown: Object}}
25
+ */
26
+ export const calculateModelCost = (usage, modelInfo, includeBreakdown = false) => {
27
+ if (!modelInfo || !modelInfo.cost) {
28
+ return includeBreakdown ? { total: 0, breakdown: null } : 0;
29
+ }
30
+ const cost = modelInfo.cost;
31
+ const million = new Decimal(1000000);
32
+ const breakdown = {
33
+ input: { tokens: 0, costPerMillion: 0, cost: 0 },
34
+ cacheWrite: { tokens: 0, costPerMillion: 0, cost: 0 },
35
+ cacheRead: { tokens: 0, costPerMillion: 0, cost: 0 },
36
+ output: { tokens: 0, costPerMillion: 0, cost: 0 },
37
+ // Issue #1710: server-side tool usage (web_search) is billed per-request,
38
+ // independent of token cost. Without this entry the public-pricing total
39
+ // diverges from Anthropic's reported total by exactly the per-request
40
+ // rate times the request count — the residual quoted in issue #1710.
41
+ webSearch: { requests: 0, costPerRequest: 0, cost: 0 },
42
+ };
43
+ if (usage.inputTokens && cost.input) {
44
+ breakdown.input = {
45
+ tokens: usage.inputTokens,
46
+ costPerMillion: cost.input,
47
+ cost: new Decimal(usage.inputTokens).div(million).mul(new Decimal(cost.input)).toNumber(),
48
+ };
49
+ }
50
+ if (usage.cacheCreationTokens && cost.cache_write) {
51
+ breakdown.cacheWrite = {
52
+ tokens: usage.cacheCreationTokens,
53
+ costPerMillion: cost.cache_write,
54
+ cost: new Decimal(usage.cacheCreationTokens).div(million).mul(new Decimal(cost.cache_write)).toNumber(),
55
+ };
56
+ }
57
+ if (usage.cacheReadTokens && cost.cache_read) {
58
+ breakdown.cacheRead = {
59
+ tokens: usage.cacheReadTokens,
60
+ costPerMillion: cost.cache_read,
61
+ cost: new Decimal(usage.cacheReadTokens).div(million).mul(new Decimal(cost.cache_read)).toNumber(),
62
+ };
63
+ }
64
+ if (usage.outputTokens && cost.output) {
65
+ breakdown.output = {
66
+ tokens: usage.outputTokens,
67
+ costPerMillion: cost.output,
68
+ cost: new Decimal(usage.outputTokens).div(million).mul(new Decimal(cost.output)).toNumber(),
69
+ };
70
+ }
71
+ // Issue #1710: bill web_search requests at the documented per-request rate.
72
+ if (usage.webSearchRequests && SERVER_TOOL_PRICING_USD.web_search.costPerRequest > 0) {
73
+ const perReq = SERVER_TOOL_PRICING_USD.web_search.costPerRequest;
74
+ breakdown.webSearch = {
75
+ requests: usage.webSearchRequests,
76
+ costPerRequest: perReq,
77
+ cost: new Decimal(usage.webSearchRequests).mul(new Decimal(perReq)).toNumber(),
78
+ };
79
+ }
80
+ const totalCost = new Decimal(breakdown.input.cost).plus(breakdown.cacheWrite.cost).plus(breakdown.cacheRead.cost).plus(breakdown.output.cost).plus(breakdown.webSearch.cost).toNumber();
81
+ if (includeBreakdown) {
82
+ return {
83
+ total: totalCost,
84
+ breakdown,
85
+ };
86
+ }
87
+ return totalCost;
88
+ };
@@ -25,6 +25,7 @@ import { resolveClaudeSessionToolFlags } from './useless-tools.lib.mjs';
25
25
  import { ensureClaudeQuietConfig } from './claude-quiet-config.lib.mjs';
26
26
  import { fetchModelInfo } from './model-info.lib.mjs';
27
27
  import { classifyRetryableError, maybeSwitchToFallbackModel } from './tool-retry.lib.mjs';
28
+ import { resolveSubSessionSize } from './sub-session-size.lib.mjs'; // Issue #1706
28
29
  export { availableModels }; // Re-export for backward compatibility
29
30
  export { fetchModelInfo };
30
31
  const showResumeCommand = async (sessionId, tempDir, claudePath, model, log) => {
@@ -373,6 +374,9 @@ export const executeClaude = async params => {
373
374
  owner,
374
375
  repo,
375
376
  prNumber,
377
+ // Issue #1708: forwarded so the bidirectional handler can poll
378
+ // issue title/body changes and uncommitted changes during the session.
379
+ issueNumber,
376
380
  });
377
381
  };
378
382
  /** Check if a model supports vision (image input) using models.dev API @returns {Promise<boolean>} */
@@ -386,56 +390,10 @@ export const checkModelVisionCapability = async modelId => {
386
390
  return false;
387
391
  }
388
392
  };
389
- /** Calculate USD cost for a model's usage with detailed breakdown (Issue #1600: uses Decimal for precision) */
390
- export const calculateModelCost = (usage, modelInfo, includeBreakdown = false) => {
391
- if (!modelInfo || !modelInfo.cost) {
392
- return includeBreakdown ? { total: 0, breakdown: null } : 0;
393
- }
394
- const cost = modelInfo.cost;
395
- const million = new Decimal(1000000);
396
- const breakdown = {
397
- input: { tokens: 0, costPerMillion: 0, cost: 0 },
398
- cacheWrite: { tokens: 0, costPerMillion: 0, cost: 0 },
399
- cacheRead: { tokens: 0, costPerMillion: 0, cost: 0 },
400
- output: { tokens: 0, costPerMillion: 0, cost: 0 },
401
- };
402
- if (usage.inputTokens && cost.input) {
403
- breakdown.input = {
404
- tokens: usage.inputTokens,
405
- costPerMillion: cost.input,
406
- cost: new Decimal(usage.inputTokens).div(million).mul(new Decimal(cost.input)).toNumber(),
407
- };
408
- }
409
- if (usage.cacheCreationTokens && cost.cache_write) {
410
- breakdown.cacheWrite = {
411
- tokens: usage.cacheCreationTokens,
412
- costPerMillion: cost.cache_write,
413
- cost: new Decimal(usage.cacheCreationTokens).div(million).mul(new Decimal(cost.cache_write)).toNumber(),
414
- };
415
- }
416
- if (usage.cacheReadTokens && cost.cache_read) {
417
- breakdown.cacheRead = {
418
- tokens: usage.cacheReadTokens,
419
- costPerMillion: cost.cache_read,
420
- cost: new Decimal(usage.cacheReadTokens).div(million).mul(new Decimal(cost.cache_read)).toNumber(),
421
- };
422
- }
423
- if (usage.outputTokens && cost.output) {
424
- breakdown.output = {
425
- tokens: usage.outputTokens,
426
- costPerMillion: cost.output,
427
- cost: new Decimal(usage.outputTokens).div(million).mul(new Decimal(cost.output)).toNumber(),
428
- };
429
- }
430
- const totalCost = new Decimal(breakdown.input.cost).plus(breakdown.cacheWrite.cost).plus(breakdown.cacheRead.cost).plus(breakdown.output.cost).toNumber();
431
- if (includeBreakdown) {
432
- return {
433
- total: totalCost,
434
- breakdown,
435
- };
436
- }
437
- return totalCost;
438
- };
393
+ // Issue #1710: calculateModelCost extracted to ./claude.cost.lib.mjs to keep
394
+ // this file under the 1500-line repo cap (see check-file-line-limits CI job).
395
+ import { calculateModelCost } from './claude.cost.lib.mjs';
396
+ export { calculateModelCost };
439
397
  export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsage = null) => {
440
398
  const os = (await use('os')).default;
441
399
  const homeDir = os.homedir();
@@ -497,8 +455,14 @@ export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsag
497
455
  }
498
456
  accumulateModelUsage(modelUsage, entry);
499
457
  // Issue #1501: Track peak context usage per single API request
458
+ // Issue #1710: Exclude cache_read_input_tokens — sub-sessions and
459
+ // per-request peaks should reflect *new* input the model received,
460
+ // not cached prompt context. Cache reads remain visible in the
461
+ // cumulative Total line as `(X + Y cached)`. This makes the
462
+ // peak-request value reconcilable with the cumulative non-cached
463
+ // input figure (instead of mixing semantics across the two lines).
500
464
  const usage = entry.message.usage;
501
- const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0) + (usage.cache_read_input_tokens || 0);
465
+ const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0);
502
466
  const model = entry.message.model;
503
467
  if (requestContext > (peakContextByModel[model] || 0)) {
504
468
  peakContextByModel[model] = requestContext;
@@ -633,6 +597,9 @@ export const executeClaudeCommand = async params => {
633
597
  owner,
634
598
  repo,
635
599
  prNumber,
600
+ // Issue #1708: enables status streaming (CI/uncommitted/PR-metadata)
601
+ // and issue body/title polling in setupBidirectionalHandler.
602
+ issueNumber,
636
603
  } = params;
637
604
  // Issue #817: Apply bidirectional-mode composition and tool-support validation before running.
638
605
  // This may enable argv.interactiveMode, argv.acceptIncommingCommentsAsInput, and
@@ -721,9 +688,11 @@ export const executeClaudeCommand = async params => {
721
688
  } else if (argv.interactiveMode) {
722
689
  await log('⚠️ Interactive mode: Disabled - missing PR info (owner/repo/prNumber)', { verbose: true });
723
690
  }
724
- // Issue #817: Set up bidirectional handler when --accept-incomming-comments-as-input
725
- // (or composite --bidirectional-interactive-mode) is enabled. Returns null when inactive.
726
- const bidirectionalHandler = await setupBidirectionalHandler({ argv, owner, repo, prNumber, $, log });
691
+ // Issue #817 / #1708: Set up bidirectional handler when --accept-incomming-comments-as-input
692
+ // (or composite --bidirectional-interactive-mode / --auto-input-until-mergeable) is enabled.
693
+ // Returns null when inactive. issueNumber + tempDir are forwarded so the handler can
694
+ // poll issue title/body changes and uncommitted changes during the session (Issue #1708).
695
+ const bidirectionalHandler = await setupBidirectionalHandler({ argv, owner, repo, prNumber, issueNumber, tempDir, $, log });
727
696
  const progressMonitor = await initProgressMonitoring(argv, { owner, repo, prNumber, $, log }); // works with or without --interactive-mode
728
697
  let execCommand;
729
698
  const mappedModel = mapModelToId(argv.model);
@@ -761,9 +730,10 @@ export const executeClaudeCommand = async params => {
761
730
  }
762
731
  try {
763
732
  const { thinkingBudget: resolvedThinkingBudget, thinkLevel, isNewVersion, maxBudget } = await resolveThinkingSettings(argv, log);
764
- // Issue #817: Streaming mode sets exitAfterStopDelayMs=60000 so the
765
- // headless Claude process stays alive between NDJSON turns.
766
- const claudeEnv = getClaudeEnv({ thinkingBudget: resolvedThinkingBudget, model: effectiveModel, thinkLevel, maxBudget, planModel: resolvedPlanModel, executionModel: resolvedExecutionModel, showThinkingContent: argv.showThinkingContent, exitAfterStopDelayMs: streamingInput ? 60_000 : undefined });
733
+ // Issue #1706: --sub-session-size + --disable-1m-context. Resolve here, then pass into getClaudeEnv along with the rest.
734
+ const { parsed: parsedSubSessionSize, contextWindowTokens } = await resolveSubSessionSize({ rawValue: argv.subSessionSize, tool: 'claude', modelId: effectiveModel, fetchModelInfo, log });
735
+ // Issue #817: streaming mode sets exitAfterStopDelayMs=60000 so the headless Claude process stays alive between NDJSON turns.
736
+ const claudeEnv = getClaudeEnv({ thinkingBudget: resolvedThinkingBudget, model: effectiveModel, thinkLevel, maxBudget, planModel: resolvedPlanModel, executionModel: resolvedExecutionModel, showThinkingContent: argv.showThinkingContent, exitAfterStopDelayMs: streamingInput ? 60_000 : undefined, disable1mContext: !!argv.disable1mContext, subSessionSize: parsedSubSessionSize, contextWindowTokens });
767
737
  if (argv.verbose) claudeEnv.ANTHROPIC_LOG = 'debug';
768
738
  const modelMaxOutputTokens = getMaxOutputTokensForModel(effectiveModel);
769
739
  if (argv.verbose) {
@@ -772,6 +742,9 @@ export const executeClaudeCommand = async params => {
772
742
  if (resolvedThinkingBudget !== undefined) await log(`📊 MAX_THINKING_TOKENS: ${resolvedThinkingBudget}`, { verbose: true });
773
743
  if (claudeEnv.CLAUDE_CODE_EFFORT_LEVEL) await log(`📊 CLAUDE_CODE_EFFORT_LEVEL: ${claudeEnv.CLAUDE_CODE_EFFORT_LEVEL}`, { verbose: true });
774
744
  if (claudeEnv.CLAUDE_CODE_SHOW_THINKING) await log(`📊 CLAUDE_CODE_SHOW_THINKING: ${claudeEnv.CLAUDE_CODE_SHOW_THINKING}`, { verbose: true });
745
+ // Issue #1706: log applied env vars (--disable-1m-context, --sub-session-size).
746
+ const sub1706 = ['CLAUDE_CODE_DISABLE_1M_CONTEXT', 'CLAUDE_CODE_AUTO_COMPACT_WINDOW', 'CLAUDE_AUTOCOMPACT_PCT_OVERRIDE'].filter(k => claudeEnv[k]).map(k => `${k}=${claudeEnv[k]}`);
747
+ if (sub1706.length) await log(`📊 ${sub1706.join(', ')}`, { verbose: true });
775
748
  if (!isNewVersion && thinkLevel) await log(`📊 Thinking level (via keywords): ${thinkLevel}`, { verbose: true });
776
749
  }
777
750
  const simpleEscapedSystem = systemPrompt.replace(/"/g, '\\"');
@@ -920,6 +893,18 @@ export const executeClaudeCommand = async params => {
920
893
  }
921
894
  if (data.type === 'message') messageCount++;
922
895
  else if (data.type === 'tool_use') toolUseCount++;
896
+ // Issue #1708: signal busy/idle to the bidirectional handler so
897
+ // queue-comments-to-input mode can hold frames until the AI is
898
+ // idle. Any assistant/tool_use/system event means the AI is
899
+ // actively processing; a result event means the turn is done
900
+ // and queued frames can flush.
901
+ if (bidirectionalHandler) {
902
+ if (data.type === 'assistant' || data.type === 'tool_use' || data.type === 'tool_result') {
903
+ if (typeof bidirectionalHandler.markAiBusy === 'function') {
904
+ bidirectionalHandler.markAiBusy();
905
+ }
906
+ }
907
+ }
923
908
  if (progressMonitor) await progressMonitor.processStreamEvent(data).catch(e => log(`⚠️ Progress: ${e.message}`, { verbose: true }));
924
909
  if (data.type === 'result') {
925
910
  if (!resultEventReceived) {
@@ -927,6 +912,15 @@ export const executeClaudeCommand = async params => {
927
912
  await log(`📌 Result event received, starting ${streamCloseTimeoutMs / 1000}s stream close timeout (Issue #1280)`, { verbose: true });
928
913
  resultTimeoutId = setTimeout(forceExitOnTimeout, streamCloseTimeoutMs);
929
914
  }
915
+ // Issue #1708: result event = AI is idle and waiting for next
916
+ // user input. Flush any frames queued by --queue-comments-to-input.
917
+ if (bidirectionalHandler && typeof bidirectionalHandler.markAiIdle === 'function') {
918
+ try {
919
+ await bidirectionalHandler.markAiIdle();
920
+ } catch (idleErr) {
921
+ if (argv.verbose) await log(`⚠️ Bidirectional mode: markAiIdle error: ${idleErr.message}`, { verbose: true });
922
+ }
923
+ }
930
924
  if (data.subtype === 'success') resultSuccessReceived = true;
931
925
  if (data.subtype === 'success' && data.total_cost_usd !== undefined && data.total_cost_usd !== null) {
932
926
  anthropicTotalCostUSD = data.total_cost_usd;
@@ -1301,7 +1295,9 @@ export const executeClaudeCommand = async params => {
1301
1295
  await log(`\n⚠️ JSONL deduplication: skipped ${tokenUsage.duplicateEntriesSkipped} duplicate entries (upstream: anthropics/claude-code#6805)`, { verbose: true });
1302
1296
  }
1303
1297
  if (tokenUsage.peakContextUsage > 0) {
1304
- await log(`📊 Peak single-request context: ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
1298
+ // Issue #1710: rename so the metric matches the new definition (input + cache_creation,
1299
+ // excluding cache_read). Cache reads are still visible separately on the Total line.
1300
+ await log(`📊 Peak single-request input (excl. cache reads): ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
1305
1301
  }
1306
1302
  await log('\n💰 Token Usage Summary:');
1307
1303
  // Display per-model breakdown
package/src/codex.lib.mjs CHANGED
@@ -25,6 +25,7 @@ import { getCodexPlaywrightMcpDisableConfigArgs } from './playwright-mcp.lib.mjs
25
25
  import { fetchModelInfo } from './model-info.lib.mjs';
26
26
  import { defaultModels } from './models/index.mjs';
27
27
  import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
28
+ import { parseSubSessionSize, buildCodexSubSessionSizeConfigArgs, buildCodexDisable1mContextConfigArgs } from './sub-session-size.lib.mjs'; // Issue #1706
28
29
  import Decimal from 'decimal.js-light';
29
30
 
30
31
  const CODEX_USAGE_FIELD_NAMES = ['input_tokens', 'cached_input_tokens', 'output_tokens', 'cache_write_tokens', 'cache_creation_input_tokens', 'reasoning_tokens', 'input_tokens_details.cached_tokens', 'input_tokens_details.cache_read_tokens', 'input_tokens_details.cache_write_tokens', 'input_tokens_details.cache_creation_tokens', 'input_tokens_details.cache_creation_input_tokens', 'output_tokens_details.reasoning_tokens'];
@@ -741,6 +742,36 @@ export const executeCodexCommand = async params => {
741
742
  }
742
743
  codexArgs += ` --json --skip-git-repo-check -o ${shellQuote(lastMessageFile)} -c ${shellQuote(`model_reasoning_effort=${reasoningEffort}`)} -c ${shellQuote('model_reasoning_summary=auto')} --dangerously-bypass-approvals-and-sandbox`;
743
744
 
745
+ // Issue #1706: Append --disable-1m-context and --sub-session-size as Codex -c overrides.
746
+ let parsedSubSessionSize;
747
+ try {
748
+ parsedSubSessionSize = parseSubSessionSize(argv.subSessionSize);
749
+ } catch (parseError) {
750
+ await log(`⚠️ ${parseError.message}`, { level: 'warn' });
751
+ parsedSubSessionSize = { kind: 'default', tokens: null, percent: null, raw: '' };
752
+ }
753
+ let codexContextWindowTokens = null;
754
+ if (parsedSubSessionSize.kind === 'percent') {
755
+ try {
756
+ const codexModelMeta = await fetchModelInfo(mappedModel, { preferredProviderIds: ['openai'] });
757
+ codexContextWindowTokens = codexModelMeta?.limit?.context || null;
758
+ } catch {
759
+ codexContextWindowTokens = null;
760
+ }
761
+ }
762
+ const disable1mArgs = buildCodexDisable1mContextConfigArgs(!!argv.disable1mContext);
763
+ for (const arg of disable1mArgs) {
764
+ codexArgs += ` ${shellQuote(arg)}`;
765
+ }
766
+ const subSessionSizeArgs = buildCodexSubSessionSizeConfigArgs(parsedSubSessionSize, { contextWindow: codexContextWindowTokens });
767
+ for (const arg of subSessionSizeArgs) {
768
+ codexArgs += ` ${shellQuote(arg)}`;
769
+ }
770
+ if (argv.verbose) {
771
+ if (disable1mArgs.length) await log(`📊 Codex --disable-1m-context: ${disable1mArgs.join(' ')}`, { verbose: true });
772
+ if (subSessionSizeArgs.length) await log(`📊 Codex --sub-session-size: ${subSessionSizeArgs.join(' ')}`, { verbose: true });
773
+ }
774
+
744
775
  const fullCommand = `(cd ${shellQuote(tempDir)} && cat ${shellQuote(promptFile)} | ${codexPath} ${codexArgs})`;
745
776
 
746
777
  await log(`\n${formatAligned('📝', 'Raw command:', '')}`);