npm - claude-code-cache-fix - Versions diffs - 2.0.0-beta.1 → 2.0.0-beta.2 - Mend

claude-code-cache-fix 2.0.0-beta.1 → 2.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -4,7 +4,9 @@
 English | [中文](./README.zh.md) | [한국어](./README.ko.md) | [Português](./docs/guia-pt-br.md)
-Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.107.
+Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.111. Opus 4.7 compatible.
+> **Opus 4.7 advisory:** Our metered data shows 4.7 burns Q5h quota at **~2.4x the rate of 4.6** for equivalent visible token counts. Two factors: a new tokenizer (up to 35% more tokens, [documented](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7)) and adaptive thinking overhead (~105%, not documented in usage response). Workaround: `CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING=1` (may reduce quality). Image stripping (`CACHE_FIX_IMAGE_KEEP_LAST`) is even more important on 4.7 due to high-res image support increasing image token counts. See [Discussion #25](https://github.com/cnighswonger/claude-code-cache-fix/discussions/25) for full analysis.
 ## Security model
@@ -308,7 +310,7 @@ When the server downgrades your TTL to 5m (Layer 2 — quota-aware downgrade at
 ## Image stripping
-Images read via the Read tool are encoded as base64 and stored in `tool_result` blocks in conversation history. They ride along on **every subsequent API call** until compaction. A single 500KB image costs ~62,500 tokens per turn in carry-forward.
+Images read via the Read tool are encoded as base64 and stored in `tool_result` blocks in conversation history. They ride along on **every subsequent API call** until compaction. A single 500KB image costs ~62,500 tokens per turn on Opus 4.6, and potentially **~85,000+ tokens on Opus 4.7** due to the new tokenizer (up to 35% inflation) and high-res image support (2576px max, up from 1568px). Image stripping is strongly recommended on 4.7.
 Enable image stripping to remove old images from tool results:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-code-cache-fix",
-  "version": "2.0.0-beta.1",
+  "version": "2.0.0-beta.2",
   "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
   "type": "module",
   "exports": "./preload.mjs",

package/preload.mjs CHANGED Viewed

@@ -726,6 +726,7 @@ const _STATS_SCHEMA = {
   identity: { applied: 0, skipped: 0, lastApplied: null },
   git_status: { applied: 0, skipped: 0, lastApplied: null },
   cwd_normalize: { applied: 0, skipped: 0, lastApplied: null },
+  smoosh_normalize: { applied: 0, skipped: 0, lastApplied: null },
 };
 function _createEmptyStats() {
@@ -1347,6 +1348,62 @@ globalThis.fetch = async function (url, options) {
         }
       }
+      // Optimization: normalize smooshed dynamic system-reminders in tool_result content
+      // CC's smooshSystemReminderSiblings (messages.ts:1835) folds <system-reminder> text
+      // blocks into tool_result.content strings. Dynamic values (token_usage, budget_usd,
+      // output_token_usage, todo_reminder) change every turn, causing mid-history cache
+      // busts even without resume or attachment scatter.
+      // Bug: anthropics/claude-code#49585 (deafsquad)
+      // Opt-in via CACHE_FIX_NORMALIZE_SMOOSH=1.
+      if (process.env.CACHE_FIX_NORMALIZE_SMOOSH === "1" && shouldApplyFix("smoosh_normalize") && payload.messages) {
+        let smooshNormalized = 0;
+        const smooshPatterns = [
+          // Token usage: 12345/50000; 37655 remaining
+          /(<system-reminder>\nToken usage: )\d+\/\d+; \d+ remaining/g,
+          // USD budget: $1.23/$10.00; $8.77 remaining
+          /(<system-reminder>\nUSD budget: )\$[\d.]+\/\$[\d.]+; \$[\d.]+ remaining/g,
+          // Output tokens — turn: 1,234 / 5,000 · session: 12,345
+          /(<system-reminder>\nOutput tokens \u2014 turn: )[\d,./\s]+ \u00b7 session: [\d,]+/g,
+          // TodoWrite reminder with variable todo list content
+          /(<system-reminder>\nThe TodoWrite tool hasn't been used recently\..*?)(\n\nHere are the existing contents of your todo list:\n\n\[[\s\S]*?\])?(\n<\/system-reminder>)/g,
+        ];
+        const smooshReplacements = [
+          "$1[normalized]/[normalized]; [normalized] remaining",
+          "$1$[normalized]/$[normalized]; $[normalized] remaining",
+          "$1[normalized] \u00b7 session: [normalized]",
+          "$1$3",  // strip the variable todo list, keep the static reminder text
+        ];
+        for (const msg of payload.messages) {
+          if (msg.role !== "user") continue;
+          // Handle both string content (smooshed tool_result) and array content
+          if (Array.isArray(msg.content)) {
+            for (let i = 0; i < msg.content.length; i++) {
+              const block = msg.content[i];
+              // Smooshed tool_result with string content
+              if (block.type === "tool_result" && typeof block.content === "string" && block.content.includes("<system-reminder>")) {
+                let newContent = block.content;
+                for (let p = 0; p < smooshPatterns.length; p++) {
+                  smooshPatterns[p].lastIndex = 0; // reset regex state
+                  newContent = newContent.replace(smooshPatterns[p], smooshReplacements[p]);
+                }
+                if (newContent !== block.content) {
+                  msg.content[i] = { ...block, content: newContent };
+                  smooshNormalized++;
+                }
+              }
+            }
+          }
+        }
+        if (smooshNormalized > 0) {
+          modified = true;
+          debugLog(`APPLIED: smoosh-normalized ${smooshNormalized} tool_result block(s) with dynamic system-reminders`);
+          recordFixResult("smoosh_normalize", "applied");
+        } else {
+          recordFixResult("smoosh_normalize", "skipped");
+        }
+      }
       // Bug 5: TTL enforcement (configurable per request type)
       // The client gates 1h cache TTL behind a GrowthBook allowlist that checks
       // querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".