npm - @link-assistant/hive-mind - Versions diffs - 1.57.3 → 1.59.0 - Mend

@link-assistant/hive-mind 1.57.3 → 1.59.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/CHANGELOG.md +186 -0
package/package.json +1 -1
package/src/anthropic-server-tool-pricing.lib.mjs +34 -0
package/src/bidirectional-interactive.lib.mjs +392 -21
package/src/claude.budget-stats.lib.mjs +154 -27
package/src/claude.cost.lib.mjs +88 -0
package/src/claude.lib.mjs +54 -58
package/src/codex.lib.mjs +31 -0
package/src/config.lib.mjs +39 -2
package/src/github-cost-info.lib.mjs +4 -1
package/src/lino.lib.mjs +3 -1
package/src/solve.auto-merge.lib.mjs +5 -0
package/src/solve.config.lib.mjs +39 -0
package/src/sub-session-size.lib.mjs +239 -0
package/src/use-with-retry.lib.mjs +91 -0

package/src/claude.budget-stats.lib.mjs CHANGED Viewed

@@ -47,6 +47,11 @@ export const accumulateModelUsage = (modelUsageMap, entry) => {
   }
   if (usage.cache_read_input_tokens) modelUsageMap[model].cacheReadTokens += usage.cache_read_input_tokens;
   if (usage.output_tokens) modelUsageMap[model].outputTokens += usage.output_tokens;
+  // Issue #1710: track Anthropic server-tool usage from per-request JSONL entries
+  // so the public-pricing estimate can bill them at the documented per-request rate.
+  if (usage.server_tool_use?.web_search_requests) {
+    modelUsageMap[model].webSearchRequests += usage.server_tool_use.web_search_requests;
+  }
 };
 /**
@@ -109,6 +114,11 @@ export const displayModelUsage = async (usage, log) => {
         await log(`        ${label}: ${formatNumber(breakdown[key].tokens)} tokens × $${breakdown[key].costPerMillion}/M = $${new Decimal(breakdown[key].cost).toFixed(6)}`);
       }
     }
+    // Issue #1710: itemise server-tool charges so the residual that puzzled
+    // readers in PR #1707 ($0.04 web_search) is visible in the breakdown.
+    if (breakdown.webSearch && breakdown.webSearch.requests > 0) {
+      await log(`        Web search: ${breakdown.webSearch.requests} requests × $${breakdown.webSearch.costPerRequest}/req = $${new Decimal(breakdown.webSearch.cost).toFixed(6)}`);
+    }
     await log('        ─────────────────────────────────');
     await log(`        Total: $${new Decimal(usage.costUSD).toFixed(6)}`);
   } else if (usage.modelInfo === null) {
@@ -129,7 +139,9 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
   const hasAnthropic = anthropicCost !== null && anthropicCost !== undefined;
   const publicDec = hasPublic ? new Decimal(publicCost) : null;
   const anthropicDec = hasAnthropic ? new Decimal(anthropicCost) : null;
-  if (publicDec && anthropicDec && publicDec.toFixed(6) === anthropicDec.toFixed(6)) {
+  // Issue #1703: also collapse to the short form when the rounded difference is below display precision,
+  // so reports like "Difference: $-0.000000 (-0.00%)" no longer waste two extra lines.
+  if (publicDec && anthropicDec && anthropicDec.minus(publicDec).abs().toFixed(6) === '0.000000') {
     await log(`\n   💰 Cost: $${anthropicDec.toFixed(6)}`);
     return;
   }
@@ -145,6 +157,52 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
   }
 };
+/**
+ * Issue #1710: Emit a verbose, machine-friendly trace of every input that
+ * feeds the budget-stats renderer for a single model. Hidden behind
+ * `{ verbose: true }` so it never pollutes the default log, but always
+ * captured when --verbose is set. The trace is what we wished we had had
+ * available *before* filing #1710 — it shows peak vs. cumulative side by
+ * side, splits cache writes from cache reads, and surfaces server-tool
+ * usage (web search) that the public-pricing estimator currently ignores.
+ *
+ * @param {Object} usage      - Per-model usage entry from `tokenUsage.modelUsage`.
+ * @param {Object} tokenUsage - Full token usage object (used only for sub-session count).
+ * @param {Function} log      - Async logger (must accept a `{verbose}` options arg).
+ */
+export const dumpBudgetTrace = async (usage, tokenUsage, log) => {
+  const modelName = usage.modelName || usage.modelInfo?.name || 'unknown';
+  const limit = usage.modelInfo?.limit || {};
+  const peak = usage.peakContextUsage || 0;
+  const writes5m = usage.cacheCreation5mTokens || 0;
+  const writes1h = usage.cacheCreation1hTokens || 0;
+  const writes = usage.cacheCreationTokens || 0;
+  const reads = usage.cacheReadTokens || 0;
+  const inputs = usage.inputTokens || 0;
+  const outputs = usage.outputTokens || 0;
+  const webSearches = usage.webSearchRequests || 0;
+  const subSessionCount = (tokenUsage?.subSessions || []).length;
+  const source = usage._sourceResultJson ? 'jsonl + result-event' : 'jsonl';
+  await log(`\n      📊 [budget-trace] ${modelName}`, { verbose: true });
+  // Issue #1710 R5: peak request is `input + cache_creation` (cache reads
+  // tracked separately on the cumulative line).
+  await log(`         peak request:    ${formatNumber(peak)}${limit.context ? ` / ${formatNumber(limit.context)} context` : ''} (largest single-request input + cache_creation, excludes cache_read)`, { verbose: true });
+  await log(`         cumulative:      input ${formatNumber(inputs)}, cache_write ${formatNumber(writes)} (5m ${formatNumber(writes5m)} / 1h ${formatNumber(writes1h)}), cache_read ${formatNumber(reads)}, output ${formatNumber(outputs)}`, { verbose: true });
+  // Issue #1710 R1: web_search is now billed in calculateModelCost. The trace
+  // still surfaces the implied dollar cost so the residual remains debuggable
+  // from the saved log even if a future model lacks pricing data.
+  await log(`         server tools:    web_search ${webSearches}${webSearches > 0 ? ` (= $${(webSearches * 0.01).toFixed(6)} at $10 / 1k searches)` : ''}`, { verbose: true });
+  if (usage.costUSD !== null && usage.costUSD !== undefined) {
+    await log(`         cost (public):   $${new Decimal(usage.costUSD).toFixed(6)}`, { verbose: true });
+  }
+  if (usage._resultCostUSD !== null && usage._resultCostUSD !== undefined) {
+    await log(`         cost (anthropic result-event): $${new Decimal(usage._resultCostUSD).toFixed(6)}`, { verbose: true });
+  }
+  await log(`         sub-session count: ${subSessionCount}`, { verbose: true });
+  await log(`         data source:     ${source}`, { verbose: true });
+};
 /**
  * Display token budget statistics (context window usage and ratios)
  * @param {Object} usage - Usage data for a model
@@ -153,6 +211,10 @@ export const displayCostComparison = async (publicCost, anthropicCost, log) => {
  */
 /**
  * Issue #1526: Updated to use single-line context+output format.
+ * Issue #1710: After the standard rendering, emit a verbose trace of the
+ *              raw inputs that fed the renderer (gated behind --verbose),
+ *              so future calculation-correctness reports can be triaged
+ *              without re-running the session.
  */
 export const displayBudgetStats = async (usage, tokenUsage, log) => {
   const modelInfo = usage.modelInfo;
@@ -171,14 +233,18 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
   const peakContext = usage.peakContextUsage || 0;
   if (hasMultipleSubSessions) {
-    // Issue #1600: Unified format — numbered list without "Context window:" prefix
+    // Issue #1600: Unified format — numbered list without "Context window:" prefix.
+    // Issue #1710 R3/R5: Peak input is `input + cache_creation` (cache reads
+    // are tracked separately on the Total line), and the bullet is now
+    // labelled "peak request:" so a reader does not try to reconcile it with
+    // the cumulative Total figure.
     for (let i = 0; i < subSessions.length; i++) {
       const sub = subSessions[i];
       const subPeak = sub.peakContextUsage || 0;
       const parts = [];
       if (contextLimit && subPeak > 0) {
         const pct = ((subPeak / contextLimit) * 100).toFixed(0);
-        parts.push(`${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
+        parts.push(`peak request: ${formatNumber(subPeak)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
       }
       if (outputLimit) {
         const outPct = ((sub.outputTokens / outputLimit) * 100).toFixed(0);
@@ -192,7 +258,7 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
     const parts = [];
     if (contextLimit) {
       const pct = ((peakContext / contextLimit) * 100).toFixed(0);
-      parts.push(`${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
+      parts.push(`peak request: ${formatNumber(peakContext)} / ${formatNumber(contextLimit)} (${pct}%) input tokens`);
     }
     if (outputLimit) {
       const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
@@ -203,16 +269,16 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
     }
   }
-  // Cumulative totals — single line
-  // Issue #1547: Parenthesized cached format and consistent output format
-  const totalInputNonCached = usage.inputTokens + usage.cacheCreationTokens;
-  const cachedTokens = usage.cacheReadTokens;
-  let totalLine;
-  if (cachedTokens > 0) {
-    totalLine = `(${formatNumber(totalInputNonCached)} + ${formatNumber(cachedTokens)} cached) input tokens`;
-  } else {
-    totalLine = `${formatNumber(totalInputNonCached)} input tokens`;
-  }
+  // Cumulative totals — single line.
+  // Issue #1547: Parenthesized cached format and consistent output format.
+  // Issue #1710 R4: When cache writes are present, render them as a separate
+  // category instead of folding them into the input figure.
+  let totalLine = buildCumulativeInputPhrase({
+    input: usage.inputTokens || 0,
+    cacheWrites: usage.cacheCreationTokens || 0,
+    cacheReads: usage.cacheReadTokens || 0,
+    format: formatNumber,
+  });
   if (peakContext === 0 && outputLimit) {
     const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
     totalLine += `, ${formatNumber(usage.outputTokens)} / ${formatNumber(outputLimit)} (${outPct}%) output tokens`;
@@ -220,6 +286,9 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
     totalLine += `, ${formatNumber(usage.outputTokens)} output tokens`;
   }
   await log(`        Total: ${totalLine}`);
+  // Issue #1710: verbose-only, never affects default output.
+  await dumpBudgetTrace(usage, tokenUsage, log);
 };
 /**
@@ -292,6 +361,44 @@ const formatTokensCompact = tokens => {
   return tokens.toLocaleString();
 };
+/**
+ * Issue #1710: Build the cumulative input-tokens phrase for the Total / fallback
+ * lines, splitting cache writes and cache reads so neither category is ever
+ * silently fused with raw input tokens.
+ *
+ * Forms (in priority order):
+ *   - reads > 0 && writes > 0 → "(X new + W cache writes + Y cache reads) input tokens"
+ *   - reads > 0 && writes = 0 → "(X + Y cached) input tokens"        (back-compat shape)
+ *   - reads = 0 && writes > 0 → "(X new + W cache writes) input tokens"
+ *   - reads = 0 && writes = 0 → "X input tokens"
+ *
+ * The legacy `(X + Y cached)` shape is preserved when only cache reads exist
+ * so we don't churn output for the common Opus-only case. The new explicit
+ * forms only appear when cache writes are non-zero (issue #1710 R4).
+ *
+ * @param {Object} opts
+ * @param {number} opts.input - non-cached input tokens (excludes cache writes/reads)
+ * @param {number} opts.cacheWrites - cache_creation_input_tokens (cumulative)
+ * @param {number} opts.cacheReads - cache_read_input_tokens (cumulative)
+ * @param {(n: number) => string} opts.format - formatter (compact or full)
+ * @returns {string} the cumulative input phrase, e.g. "(78K new + 57.6K cache writes) input tokens"
+ */
+export const buildCumulativeInputPhrase = ({ input, cacheWrites, cacheReads, format }) => {
+  const w = Math.max(0, cacheWrites || 0);
+  const r = Math.max(0, cacheReads || 0);
+  const i = Math.max(0, input || 0);
+  if (w > 0 && r > 0) {
+    return `(${format(i)} new + ${format(w)} cache writes + ${format(r)} cache reads) input tokens`;
+  }
+  if (w > 0) {
+    return `(${format(i)} new + ${format(w)} cache writes) input tokens`;
+  }
+  if (r > 0) {
+    return `(${format(i)} + ${format(r)} cached) input tokens`;
+  }
+  return `${format(i)} input tokens`;
+};
 /**
  * Format sub-sessions list for budget stats display
  * @param {Array} subSessions - Array of sub-session usage objects
@@ -315,6 +422,9 @@ const formatSubSessionsList = (subSessions, contextLimit, outputLimit) => {
 /**
  * Issue #1600: Build a single-line context + output tokens string (unified format, no "Context window:" prefix).
+ * Issue #1710 R3/R5: The input figure is the peak per-request `input + cache_creation`
+ * (cache reads excluded). Labelling it "peak request:" lets readers tell it apart
+ * from the cumulative Total line.
  * @param {number} peakContext - Peak context usage (0 if unknown — context display skipped)
  * @param {number} contextLimit - Context window limit (null if unknown)
  * @param {number} outputTokens - Output tokens used
@@ -326,7 +436,7 @@ const formatContextOutputLine = (peakContext, contextLimit, outputTokens, output
   const parts = [];
   if (contextLimit && peakContext > 0) {
     const pct = ((peakContext / contextLimit) * 100).toFixed(0);
-    parts.push(`${formatTokensCompact(peakContext)} / ${formatTokensCompact(contextLimit)} (${pct}%) input tokens`);
+    parts.push(`peak request: ${formatTokensCompact(peakContext)} / ${formatTokensCompact(contextLimit)} (${pct}%) input tokens`);
   }
   if (outputLimit) {
     const outPct = ((outputTokens / outputLimit) * 100).toFixed(0);
@@ -457,20 +567,33 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
       } else if (peakContext > 0) {
         stats += formatContextOutputLine(peakContext, contextLimit, usage.outputTokens, outputLimit, '- ');
       } else if (outputLimit && callCount <= 1) {
-        // Issue #1600: Show output-only detalization for sub-agent single sessions
+        // Issue #1600: Sub-agent single sessions previously showed only an output line.
+        // Issue #1710 R2: Always surface the cumulative input information too — sub-agent
+        // models (e.g. Haiku) never appear as the responding model in the parent JSONL,
+        // so peakContext stays at 0; without this fallback the rendered comment loses
+        // the sub-agent's input-token information entirely. Cache writes / reads are
+        // split via the same helper used for the Total line so the two lines stay
+        // arithmetically consistent.
+        const inputPhrase = buildCumulativeInputPhrase({
+          input: usage.inputTokens || 0,
+          cacheWrites: usage.cacheCreationTokens || 0,
+          cacheReads: usage.cacheReadTokens || 0,
+          format: formatTokensCompact,
+        });
         const outPct = ((usage.outputTokens / outputLimit) * 100).toFixed(0);
-        stats += `\n- ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`;
+        stats += `\n- ${inputPhrase}, ${formatTokensCompact(usage.outputTokens)} / ${formatTokensCompact(outputLimit)} (${outPct}%) output tokens`;
       }
-      // Cumulative totals per model: input tokens + cached shown separately
-      const totalInputNonCached = usage.inputTokens + usage.cacheCreationTokens;
-      const cachedTokens = usage.cacheReadTokens;
-      let totalLine;
-      if (cachedTokens > 0) {
-        totalLine = `(${formatTokensCompact(totalInputNonCached)} + ${formatTokensCompact(cachedTokens)} cached) input tokens`;
-      } else {
-        totalLine = `${formatTokensCompact(totalInputNonCached)} input tokens`;
-      }
+      // Cumulative totals per model: input tokens + cached shown separately.
+      // Issue #1710 R4: Cache writes are now their own category (so the displayed
+      // "input tokens" figure never silently fuses 1.25× / 2× cache-write tokens
+      // with regular 1× input tokens — see issue #1710 root cause D).
+      let totalLine = buildCumulativeInputPhrase({
+        input: usage.inputTokens || 0,
+        cacheWrites: usage.cacheCreationTokens || 0,
+        cacheReads: usage.cacheReadTokens || 0,
+        format: formatTokensCompact,
+      });
       // Issue #1600: Output tokens on Total line — skip percentage if already shown above or aggregated
       if (callCount > 1) {
@@ -512,7 +635,11 @@ export const buildBudgetStatsString = (tokenUsage, subAgentCalls = null) => {
             stats += `\n${i + 1}. ${parts.join(', ')}`;
           }
         } else {
-          const avgInput = Math.round((totalInputNonCached + cachedTokens) / callCount);
+          // Estimated per-call breakdown when sub-agent stream tracking did not capture
+          // per-call usage. Includes everything the model actually saw:
+          // input + cache_creation (writes) + cache_read.
+          const aggregateInput = (usage.inputTokens || 0) + (usage.cacheCreationTokens || 0) + (usage.cacheReadTokens || 0);
+          const avgInput = Math.round(aggregateInput / callCount);
           const avgOutput = Math.round(usage.outputTokens / callCount);
           for (let i = 0; i < matchingCalls.length; i++) {
             const parts = [];

package/src/claude.cost.lib.mjs ADDED Viewed

@@ -0,0 +1,88 @@
+#!/usr/bin/env node
+/**
+ * Issue #1710: Per-model cost calculation extracted from claude.lib.mjs to
+ * keep that file under the 1500-line repo cap. Behaviour is unchanged from
+ * the previous in-place implementation.
+ */
+import Decimal from 'decimal.js-light';
+import { SERVER_TOOL_PRICING_USD } from './anthropic-server-tool-pricing.lib.mjs';
+/**
+ * Calculate USD cost for a model's usage with optional detailed breakdown.
+ *
+ * Cost components (Issue #1600 uses Decimal for precision):
+ *   - input        × cost.input        / 1M
+ *   - cacheWrite   × cost.cache_write  / 1M
+ *   - cacheRead    × cost.cache_read   / 1M
+ *   - output       × cost.output       / 1M
+ *   - webSearch    × $0.01 / request   (Issue #1710 — see SERVER_TOOL_PRICING_USD)
+ *
+ * @param {Object} usage - per-model usage entry
+ * @param {Object|null} modelInfo - model-info shape (includes `cost` map)
+ * @param {boolean} [includeBreakdown=false] - return `{ total, breakdown }` when true
+ * @returns {number|{total: number, breakdown: Object}}
+ */
+export const calculateModelCost = (usage, modelInfo, includeBreakdown = false) => {
+  if (!modelInfo || !modelInfo.cost) {
+    return includeBreakdown ? { total: 0, breakdown: null } : 0;
+  }
+  const cost = modelInfo.cost;
+  const million = new Decimal(1000000);
+  const breakdown = {
+    input: { tokens: 0, costPerMillion: 0, cost: 0 },
+    cacheWrite: { tokens: 0, costPerMillion: 0, cost: 0 },
+    cacheRead: { tokens: 0, costPerMillion: 0, cost: 0 },
+    output: { tokens: 0, costPerMillion: 0, cost: 0 },
+    // Issue #1710: server-side tool usage (web_search) is billed per-request,
+    // independent of token cost. Without this entry the public-pricing total
+    // diverges from Anthropic's reported total by exactly the per-request
+    // rate times the request count — the residual quoted in issue #1710.
+    webSearch: { requests: 0, costPerRequest: 0, cost: 0 },
+  };
+  if (usage.inputTokens && cost.input) {
+    breakdown.input = {
+      tokens: usage.inputTokens,
+      costPerMillion: cost.input,
+      cost: new Decimal(usage.inputTokens).div(million).mul(new Decimal(cost.input)).toNumber(),
+    };
+  }
+  if (usage.cacheCreationTokens && cost.cache_write) {
+    breakdown.cacheWrite = {
+      tokens: usage.cacheCreationTokens,
+      costPerMillion: cost.cache_write,
+      cost: new Decimal(usage.cacheCreationTokens).div(million).mul(new Decimal(cost.cache_write)).toNumber(),
+    };
+  }
+  if (usage.cacheReadTokens && cost.cache_read) {
+    breakdown.cacheRead = {
+      tokens: usage.cacheReadTokens,
+      costPerMillion: cost.cache_read,
+      cost: new Decimal(usage.cacheReadTokens).div(million).mul(new Decimal(cost.cache_read)).toNumber(),
+    };
+  }
+  if (usage.outputTokens && cost.output) {
+    breakdown.output = {
+      tokens: usage.outputTokens,
+      costPerMillion: cost.output,
+      cost: new Decimal(usage.outputTokens).div(million).mul(new Decimal(cost.output)).toNumber(),
+    };
+  }
+  // Issue #1710: bill web_search requests at the documented per-request rate.
+  if (usage.webSearchRequests && SERVER_TOOL_PRICING_USD.web_search.costPerRequest > 0) {
+    const perReq = SERVER_TOOL_PRICING_USD.web_search.costPerRequest;
+    breakdown.webSearch = {
+      requests: usage.webSearchRequests,
+      costPerRequest: perReq,
+      cost: new Decimal(usage.webSearchRequests).mul(new Decimal(perReq)).toNumber(),
+    };
+  }
+  const totalCost = new Decimal(breakdown.input.cost).plus(breakdown.cacheWrite.cost).plus(breakdown.cacheRead.cost).plus(breakdown.output.cost).plus(breakdown.webSearch.cost).toNumber();
+  if (includeBreakdown) {
+    return {
+      total: totalCost,
+      breakdown,
+    };
+  }
+  return totalCost;
+};

package/src/claude.lib.mjs CHANGED Viewed

@@ -25,6 +25,7 @@ import { resolveClaudeSessionToolFlags } from './useless-tools.lib.mjs';
 import { ensureClaudeQuietConfig } from './claude-quiet-config.lib.mjs';
 import { fetchModelInfo } from './model-info.lib.mjs';
 import { classifyRetryableError, maybeSwitchToFallbackModel } from './tool-retry.lib.mjs';
+import { resolveSubSessionSize } from './sub-session-size.lib.mjs'; // Issue #1706
 export { availableModels }; // Re-export for backward compatibility
 export { fetchModelInfo };
 const showResumeCommand = async (sessionId, tempDir, claudePath, model, log) => {
@@ -373,6 +374,9 @@ export const executeClaude = async params => {
     owner,
     repo,
     prNumber,
+    // Issue #1708: forwarded so the bidirectional handler can poll
+    // issue title/body changes and uncommitted changes during the session.
+    issueNumber,
   });
 };
 /** Check if a model supports vision (image input) using models.dev API @returns {Promise<boolean>} */
@@ -386,56 +390,10 @@ export const checkModelVisionCapability = async modelId => {
     return false;
   }
 };
-/** Calculate USD cost for a model's usage with detailed breakdown (Issue #1600: uses Decimal for precision) */
-export const calculateModelCost = (usage, modelInfo, includeBreakdown = false) => {
-  if (!modelInfo || !modelInfo.cost) {
-    return includeBreakdown ? { total: 0, breakdown: null } : 0;
-  }
-  const cost = modelInfo.cost;
-  const million = new Decimal(1000000);
-  const breakdown = {
-    input: { tokens: 0, costPerMillion: 0, cost: 0 },
-    cacheWrite: { tokens: 0, costPerMillion: 0, cost: 0 },
-    cacheRead: { tokens: 0, costPerMillion: 0, cost: 0 },
-    output: { tokens: 0, costPerMillion: 0, cost: 0 },
-  };
-  if (usage.inputTokens && cost.input) {
-    breakdown.input = {
-      tokens: usage.inputTokens,
-      costPerMillion: cost.input,
-      cost: new Decimal(usage.inputTokens).div(million).mul(new Decimal(cost.input)).toNumber(),
-    };
-  }
-  if (usage.cacheCreationTokens && cost.cache_write) {
-    breakdown.cacheWrite = {
-      tokens: usage.cacheCreationTokens,
-      costPerMillion: cost.cache_write,
-      cost: new Decimal(usage.cacheCreationTokens).div(million).mul(new Decimal(cost.cache_write)).toNumber(),
-    };
-  }
-  if (usage.cacheReadTokens && cost.cache_read) {
-    breakdown.cacheRead = {
-      tokens: usage.cacheReadTokens,
-      costPerMillion: cost.cache_read,
-      cost: new Decimal(usage.cacheReadTokens).div(million).mul(new Decimal(cost.cache_read)).toNumber(),
-    };
-  }
-  if (usage.outputTokens && cost.output) {
-    breakdown.output = {
-      tokens: usage.outputTokens,
-      costPerMillion: cost.output,
-      cost: new Decimal(usage.outputTokens).div(million).mul(new Decimal(cost.output)).toNumber(),
-    };
-  }
-  const totalCost = new Decimal(breakdown.input.cost).plus(breakdown.cacheWrite.cost).plus(breakdown.cacheRead.cost).plus(breakdown.output.cost).toNumber();
-  if (includeBreakdown) {
-    return {
-      total: totalCost,
-      breakdown,
-    };
-  }
-  return totalCost;
-};
+// Issue #1710: calculateModelCost extracted to ./claude.cost.lib.mjs to keep
+// this file under the 1500-line repo cap (see check-file-line-limits CI job).
+import { calculateModelCost } from './claude.cost.lib.mjs';
+export { calculateModelCost };
 export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsage = null) => {
   const os = (await use('os')).default;
   const homeDir = os.homedir();
@@ -497,8 +455,14 @@ export const calculateSessionTokens = async (sessionId, tempDir, resultModelUsag
           }
           accumulateModelUsage(modelUsage, entry);
           // Issue #1501: Track peak context usage per single API request
+          // Issue #1710: Exclude cache_read_input_tokens — sub-sessions and
+          // per-request peaks should reflect *new* input the model received,
+          // not cached prompt context. Cache reads remain visible in the
+          // cumulative Total line as `(X + Y cached)`. This makes the
+          // peak-request value reconcilable with the cumulative non-cached
+          // input figure (instead of mixing semantics across the two lines).
           const usage = entry.message.usage;
-          const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0) + (usage.cache_read_input_tokens || 0);
+          const requestContext = (usage.input_tokens || 0) + (usage.cache_creation_input_tokens || 0);
           const model = entry.message.model;
           if (requestContext > (peakContextByModel[model] || 0)) {
             peakContextByModel[model] = requestContext;
@@ -633,6 +597,9 @@ export const executeClaudeCommand = async params => {
     owner,
     repo,
     prNumber,
+    // Issue #1708: enables status streaming (CI/uncommitted/PR-metadata)
+    // and issue body/title polling in setupBidirectionalHandler.
+    issueNumber,
   } = params;
   // Issue #817: Apply bidirectional-mode composition and tool-support validation before running.
   // This may enable argv.interactiveMode, argv.acceptIncommingCommentsAsInput, and
@@ -721,9 +688,11 @@ export const executeClaudeCommand = async params => {
     } else if (argv.interactiveMode) {
       await log('⚠️ Interactive mode: Disabled - missing PR info (owner/repo/prNumber)', { verbose: true });
     }
-    // Issue #817: Set up bidirectional handler when --accept-incomming-comments-as-input
-    // (or composite --bidirectional-interactive-mode) is enabled. Returns null when inactive.
-    const bidirectionalHandler = await setupBidirectionalHandler({ argv, owner, repo, prNumber, $, log });
+    // Issue #817 / #1708: Set up bidirectional handler when --accept-incomming-comments-as-input
+    // (or composite --bidirectional-interactive-mode / --auto-input-until-mergeable) is enabled.
+    // Returns null when inactive. issueNumber + tempDir are forwarded so the handler can
+    // poll issue title/body changes and uncommitted changes during the session (Issue #1708).
+    const bidirectionalHandler = await setupBidirectionalHandler({ argv, owner, repo, prNumber, issueNumber, tempDir, $, log });
     const progressMonitor = await initProgressMonitoring(argv, { owner, repo, prNumber, $, log }); // works with or without --interactive-mode
     let execCommand;
     const mappedModel = mapModelToId(argv.model);
@@ -761,9 +730,10 @@ export const executeClaudeCommand = async params => {
     }
     try {
       const { thinkingBudget: resolvedThinkingBudget, thinkLevel, isNewVersion, maxBudget } = await resolveThinkingSettings(argv, log);
-      // Issue #817: Streaming mode sets exitAfterStopDelayMs=60000 so the
-      // headless Claude process stays alive between NDJSON turns.
-      const claudeEnv = getClaudeEnv({ thinkingBudget: resolvedThinkingBudget, model: effectiveModel, thinkLevel, maxBudget, planModel: resolvedPlanModel, executionModel: resolvedExecutionModel, showThinkingContent: argv.showThinkingContent, exitAfterStopDelayMs: streamingInput ? 60_000 : undefined });
+      // Issue #1706: --sub-session-size + --disable-1m-context. Resolve here, then pass into getClaudeEnv along with the rest.
+      const { parsed: parsedSubSessionSize, contextWindowTokens } = await resolveSubSessionSize({ rawValue: argv.subSessionSize, tool: 'claude', modelId: effectiveModel, fetchModelInfo, log });
+      // Issue #817: streaming mode sets exitAfterStopDelayMs=60000 so the headless Claude process stays alive between NDJSON turns.
+      const claudeEnv = getClaudeEnv({ thinkingBudget: resolvedThinkingBudget, model: effectiveModel, thinkLevel, maxBudget, planModel: resolvedPlanModel, executionModel: resolvedExecutionModel, showThinkingContent: argv.showThinkingContent, exitAfterStopDelayMs: streamingInput ? 60_000 : undefined, disable1mContext: !!argv.disable1mContext, subSessionSize: parsedSubSessionSize, contextWindowTokens });
       if (argv.verbose) claudeEnv.ANTHROPIC_LOG = 'debug';
       const modelMaxOutputTokens = getMaxOutputTokensForModel(effectiveModel);
       if (argv.verbose) {
@@ -772,6 +742,9 @@ export const executeClaudeCommand = async params => {
         if (resolvedThinkingBudget !== undefined) await log(`📊 MAX_THINKING_TOKENS: ${resolvedThinkingBudget}`, { verbose: true });
         if (claudeEnv.CLAUDE_CODE_EFFORT_LEVEL) await log(`📊 CLAUDE_CODE_EFFORT_LEVEL: ${claudeEnv.CLAUDE_CODE_EFFORT_LEVEL}`, { verbose: true });
         if (claudeEnv.CLAUDE_CODE_SHOW_THINKING) await log(`📊 CLAUDE_CODE_SHOW_THINKING: ${claudeEnv.CLAUDE_CODE_SHOW_THINKING}`, { verbose: true });
+        // Issue #1706: log applied env vars (--disable-1m-context, --sub-session-size).
+        const sub1706 = ['CLAUDE_CODE_DISABLE_1M_CONTEXT', 'CLAUDE_CODE_AUTO_COMPACT_WINDOW', 'CLAUDE_AUTOCOMPACT_PCT_OVERRIDE'].filter(k => claudeEnv[k]).map(k => `${k}=${claudeEnv[k]}`);
+        if (sub1706.length) await log(`📊 ${sub1706.join(', ')}`, { verbose: true });
         if (!isNewVersion && thinkLevel) await log(`📊 Thinking level (via keywords): ${thinkLevel}`, { verbose: true });
       }
       const simpleEscapedSystem = systemPrompt.replace(/"/g, '\\"');
@@ -920,6 +893,18 @@ export const executeClaudeCommand = async params => {
               }
               if (data.type === 'message') messageCount++;
               else if (data.type === 'tool_use') toolUseCount++;
+              // Issue #1708: signal busy/idle to the bidirectional handler so
+              // queue-comments-to-input mode can hold frames until the AI is
+              // idle. Any assistant/tool_use/system event means the AI is
+              // actively processing; a result event means the turn is done
+              // and queued frames can flush.
+              if (bidirectionalHandler) {
+                if (data.type === 'assistant' || data.type === 'tool_use' || data.type === 'tool_result') {
+                  if (typeof bidirectionalHandler.markAiBusy === 'function') {
+                    bidirectionalHandler.markAiBusy();
+                  }
+                }
+              }
               if (progressMonitor) await progressMonitor.processStreamEvent(data).catch(e => log(`⚠️ Progress: ${e.message}`, { verbose: true }));
               if (data.type === 'result') {
                 if (!resultEventReceived) {
@@ -927,6 +912,15 @@ export const executeClaudeCommand = async params => {
                   await log(`📌 Result event received, starting ${streamCloseTimeoutMs / 1000}s stream close timeout (Issue #1280)`, { verbose: true });
                   resultTimeoutId = setTimeout(forceExitOnTimeout, streamCloseTimeoutMs);
                 }
+                // Issue #1708: result event = AI is idle and waiting for next
+                // user input. Flush any frames queued by --queue-comments-to-input.
+                if (bidirectionalHandler && typeof bidirectionalHandler.markAiIdle === 'function') {
+                  try {
+                    await bidirectionalHandler.markAiIdle();
+                  } catch (idleErr) {
+                    if (argv.verbose) await log(`⚠️ Bidirectional mode: markAiIdle error: ${idleErr.message}`, { verbose: true });
+                  }
+                }
                 if (data.subtype === 'success') resultSuccessReceived = true;
                 if (data.subtype === 'success' && data.total_cost_usd !== undefined && data.total_cost_usd !== null) {
                   anthropicTotalCostUSD = data.total_cost_usd;
@@ -1301,7 +1295,9 @@ export const executeClaudeCommand = async params => {
               await log(`\n⚠️  JSONL deduplication: skipped ${tokenUsage.duplicateEntriesSkipped} duplicate entries (upstream: anthropics/claude-code#6805)`, { verbose: true });
             }
             if (tokenUsage.peakContextUsage > 0) {
-              await log(`📊 Peak single-request context: ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
+              // Issue #1710: rename so the metric matches the new definition (input + cache_creation,
+              // excluding cache_read). Cache reads are still visible separately on the Total line.
+              await log(`📊 Peak single-request input (excl. cache reads): ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
             }
             await log('\n💰 Token Usage Summary:');
             // Display per-model breakdown

package/src/codex.lib.mjs CHANGED Viewed

@@ -25,6 +25,7 @@ import { getCodexPlaywrightMcpDisableConfigArgs } from './playwright-mcp.lib.mjs
 import { fetchModelInfo } from './model-info.lib.mjs';
 import { defaultModels } from './models/index.mjs';
 import { classifyRetryableError, getRetryDelayMs, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
+import { parseSubSessionSize, buildCodexSubSessionSizeConfigArgs, buildCodexDisable1mContextConfigArgs } from './sub-session-size.lib.mjs'; // Issue #1706
 import Decimal from 'decimal.js-light';
 const CODEX_USAGE_FIELD_NAMES = ['input_tokens', 'cached_input_tokens', 'output_tokens', 'cache_write_tokens', 'cache_creation_input_tokens', 'reasoning_tokens', 'input_tokens_details.cached_tokens', 'input_tokens_details.cache_read_tokens', 'input_tokens_details.cache_write_tokens', 'input_tokens_details.cache_creation_tokens', 'input_tokens_details.cache_creation_input_tokens', 'output_tokens_details.reasoning_tokens'];
@@ -741,6 +742,36 @@ export const executeCodexCommand = async params => {
     }
     codexArgs += ` --json --skip-git-repo-check -o ${shellQuote(lastMessageFile)} -c ${shellQuote(`model_reasoning_effort=${reasoningEffort}`)} -c ${shellQuote('model_reasoning_summary=auto')} --dangerously-bypass-approvals-and-sandbox`;
+    // Issue #1706: Append --disable-1m-context and --sub-session-size as Codex -c overrides.
+    let parsedSubSessionSize;
+    try {
+      parsedSubSessionSize = parseSubSessionSize(argv.subSessionSize);
+    } catch (parseError) {
+      await log(`⚠️  ${parseError.message}`, { level: 'warn' });
+      parsedSubSessionSize = { kind: 'default', tokens: null, percent: null, raw: '' };
+    }
+    let codexContextWindowTokens = null;
+    if (parsedSubSessionSize.kind === 'percent') {
+      try {
+        const codexModelMeta = await fetchModelInfo(mappedModel, { preferredProviderIds: ['openai'] });
+        codexContextWindowTokens = codexModelMeta?.limit?.context || null;
+      } catch {
+        codexContextWindowTokens = null;
+      }
+    }
+    const disable1mArgs = buildCodexDisable1mContextConfigArgs(!!argv.disable1mContext);
+    for (const arg of disable1mArgs) {
+      codexArgs += ` ${shellQuote(arg)}`;
+    }
+    const subSessionSizeArgs = buildCodexSubSessionSizeConfigArgs(parsedSubSessionSize, { contextWindow: codexContextWindowTokens });
+    for (const arg of subSessionSizeArgs) {
+      codexArgs += ` ${shellQuote(arg)}`;
+    }
+    if (argv.verbose) {
+      if (disable1mArgs.length) await log(`📊 Codex --disable-1m-context: ${disable1mArgs.join(' ')}`, { verbose: true });
+      if (subSessionSizeArgs.length) await log(`📊 Codex --sub-session-size: ${subSessionSizeArgs.join(' ')}`, { verbose: true });
+    }
     const fullCommand = `(cd ${shellQuote(tempDir)} && cat ${shellQuote(promptFile)} | ${codexPath} ${codexArgs})`;
     await log(`\n${formatAligned('📝', 'Raw command:', '')}`);