claude-code-cache-fix 2.0.0-beta.1 → 2.0.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +4 -2
  2. package/package.json +1 -1
  3. package/preload.mjs +69 -0
package/README.md CHANGED
@@ -4,7 +4,9 @@
4
4
 
5
5
  English | [中文](./README.zh.md) | [한국어](./README.ko.md) | [Português](./docs/guia-pt-br.md)
6
6
 
7
- Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.107.
7
+ Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.111. Opus 4.7 compatible.
8
+
9
+ > **Opus 4.7 advisory:** Our metered data shows 4.7 burns Q5h quota at **~2.4x the rate of 4.6** for equivalent visible token counts. Two factors: a new tokenizer (up to 35% more tokens, [documented](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7)) and adaptive thinking overhead (~105%, not documented in usage response). Workaround: `CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING=1` (may reduce quality). Image stripping (`CACHE_FIX_IMAGE_KEEP_LAST`) is even more important on 4.7 due to high-res image support increasing image token counts. See [Discussion #25](https://github.com/cnighswonger/claude-code-cache-fix/discussions/25) for full analysis.
8
10
 
9
11
  ## Security model
10
12
 
@@ -308,7 +310,7 @@ When the server downgrades your TTL to 5m (Layer 2 — quota-aware downgrade at
308
310
 
309
311
  ## Image stripping
310
312
 
311
- Images read via the Read tool are encoded as base64 and stored in `tool_result` blocks in conversation history. They ride along on **every subsequent API call** until compaction. A single 500KB image costs ~62,500 tokens per turn in carry-forward.
313
+ Images read via the Read tool are encoded as base64 and stored in `tool_result` blocks in conversation history. They ride along on **every subsequent API call** until compaction. A single 500KB image costs ~62,500 tokens per turn on Opus 4.6, and potentially **~85,000+ tokens on Opus 4.7** due to the new tokenizer (up to 35% inflation) and high-res image support (2576px max, up from 1568px). Image stripping is strongly recommended on 4.7.
312
314
 
313
315
  Enable image stripping to remove old images from tool results:
314
316
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "2.0.0-beta.1",
3
+ "version": "2.0.0-beta.3",
4
4
  "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
package/preload.mjs CHANGED
@@ -726,6 +726,7 @@ const _STATS_SCHEMA = {
726
726
  identity: { applied: 0, skipped: 0, lastApplied: null },
727
727
  git_status: { applied: 0, skipped: 0, lastApplied: null },
728
728
  cwd_normalize: { applied: 0, skipped: 0, lastApplied: null },
729
+ smoosh_normalize: { applied: 0, skipped: 0, lastApplied: null },
729
730
  };
730
731
 
731
732
  function _createEmptyStats() {
@@ -1347,6 +1348,74 @@ globalThis.fetch = async function (url, options) {
1347
1348
  }
1348
1349
  }
1349
1350
 
1351
+ // Optimization: normalize smooshed dynamic system-reminders in tool_result content
1352
+ // CC's smooshSystemReminderSiblings (messages.ts:1835) folds <system-reminder> text
1353
+ // blocks into tool_result.content strings. Dynamic values (token_usage, budget_usd,
1354
+ // output_token_usage, todo_reminder) change every turn, causing mid-history cache
1355
+ // busts even without resume or attachment scatter.
1356
+ // Bug: anthropics/claude-code#49585 (deafsquad)
1357
+ // Opt-in via CACHE_FIX_NORMALIZE_SMOOSH=1.
1358
+ if (process.env.CACHE_FIX_NORMALIZE_SMOOSH === "1" && shouldApplyFix("smoosh_normalize") && payload.messages) {
1359
+ let smooshNormalized = 0;
1360
+ const smooshPatterns = [
1361
+ // Token usage: 12345/50000; 37655 remaining
1362
+ /(<system-reminder>\nToken usage: )\d+\/\d+; \d+ remaining/g,
1363
+ // USD budget: $1.23/$10.00; $8.77 remaining
1364
+ /(<system-reminder>\nUSD budget: )\$[\d.]+\/\$[\d.]+; \$[\d.]+ remaining/g,
1365
+ // Output tokens — turn: 1,234 / 5,000 · session: 12,345
1366
+ /(<system-reminder>\nOutput tokens \u2014 turn: )[\d,./\s]+ \u00b7 session: [\d,]+/g,
1367
+ // TodoWrite reminder with variable todo list content
1368
+ /(<system-reminder>\nThe TodoWrite tool hasn't been used recently\..*?)(\n\nHere are the existing contents of your todo list:\n\n\[[\s\S]*?\])?(\n<\/system-reminder>)/g,
1369
+ ];
1370
+ const smooshReplacements = [
1371
+ "$1[normalized]/[normalized]; [normalized] remaining",
1372
+ "$1$[normalized]/$[normalized]; $[normalized] remaining",
1373
+ "$1[normalized] \u00b7 session: [normalized]",
1374
+ "$1$3", // strip the variable todo list, keep the static reminder text
1375
+ ];
1376
+
1377
+ for (const msg of payload.messages) {
1378
+ if (msg.role !== "user") continue;
1379
+ // Handle both string content (smooshed tool_result) and array content
1380
+ if (Array.isArray(msg.content)) {
1381
+ for (let i = 0; i < msg.content.length; i++) {
1382
+ const block = msg.content[i];
1383
+ // Smooshed tool_result with string content
1384
+ if (block.type === "tool_result" && typeof block.content === "string" && block.content.includes("<system-reminder>")) {
1385
+ let newContent = block.content;
1386
+ for (let p = 0; p < smooshPatterns.length; p++) {
1387
+ smooshPatterns[p].lastIndex = 0;
1388
+ newContent = newContent.replace(smooshPatterns[p], smooshReplacements[p]);
1389
+ }
1390
+ if (newContent !== block.content) {
1391
+ msg.content[i] = { ...block, content: newContent };
1392
+ smooshNormalized++;
1393
+ }
1394
+ }
1395
+ // Unsmooshed standalone text blocks with dynamic system-reminder content
1396
+ if (block.type === "text" && typeof block.text === "string" && block.text.startsWith("<system-reminder>")) {
1397
+ let newText = block.text;
1398
+ for (let p = 0; p < smooshPatterns.length; p++) {
1399
+ smooshPatterns[p].lastIndex = 0;
1400
+ newText = newText.replace(smooshPatterns[p], smooshReplacements[p]);
1401
+ }
1402
+ if (newText !== block.text) {
1403
+ msg.content[i] = { ...block, text: newText };
1404
+ smooshNormalized++;
1405
+ }
1406
+ }
1407
+ }
1408
+ }
1409
+ }
1410
+ if (smooshNormalized > 0) {
1411
+ modified = true;
1412
+ debugLog(`APPLIED: smoosh-normalized ${smooshNormalized} tool_result block(s) with dynamic system-reminders`);
1413
+ recordFixResult("smoosh_normalize", "applied");
1414
+ } else {
1415
+ recordFixResult("smoosh_normalize", "skipped");
1416
+ }
1417
+ }
1418
+
1350
1419
  // Bug 5: TTL enforcement (configurable per request type)
1351
1420
  // The client gates 1h cache TTL behind a GrowthBook allowlist that checks
1352
1421
  // querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".