claude-code-cache-fix 1.11.0 → 2.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +4 -2
  2. package/package.json +1 -1
  3. package/preload.mjs +100 -0
package/README.md CHANGED
@@ -4,7 +4,9 @@
4
4
 
5
5
  English | [中文](./README.zh.md) | [한국어](./README.ko.md) | [Português](./docs/guia-pt-br.md)
6
6
 
7
- Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.107.
7
+ Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.111. Opus 4.7 compatible.
8
+
9
+ > **Opus 4.7 advisory:** Our metered data shows 4.7 burns Q5h quota at **~2.4x the rate of 4.6** for equivalent visible token counts. Two factors: a new tokenizer (up to 35% more tokens, [documented](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7)) and adaptive thinking overhead (~105%, not documented in usage response). Workaround: `CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING=1` (may reduce quality). Image stripping (`CACHE_FIX_IMAGE_KEEP_LAST`) is even more important on 4.7 due to high-res image support increasing image token counts. See [Discussion #25](https://github.com/cnighswonger/claude-code-cache-fix/discussions/25) for full analysis.
8
10
 
9
11
  ## Security model
10
12
 
@@ -308,7 +310,7 @@ When the server downgrades your TTL to 5m (Layer 2 — quota-aware downgrade at
308
310
 
309
311
  ## Image stripping
310
312
 
311
- Images read via the Read tool are encoded as base64 and stored in `tool_result` blocks in conversation history. They ride along on **every subsequent API call** until compaction. A single 500KB image costs ~62,500 tokens per turn in carry-forward.
313
+ Images read via the Read tool are encoded as base64 and stored in `tool_result` blocks in conversation history. They ride along on **every subsequent API call** until compaction. A single 500KB image costs ~62,500 tokens per turn on Opus 4.6, and potentially **~85,000+ tokens on Opus 4.7** due to the new tokenizer (up to 35% inflation) and high-res image support (2576px max, up from 1568px). Image stripping is strongly recommended on 4.7.
312
314
 
313
315
  Enable image stripping to remove old images from tool results:
314
316
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "1.11.0",
3
+ "version": "2.0.0-beta.2",
4
4
  "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
package/preload.mjs CHANGED
@@ -680,6 +680,7 @@ const DEBUG = process.env.CACHE_FIX_DEBUG === "1";
680
680
  const PREFIXDIFF = process.env.CACHE_FIX_PREFIXDIFF === "1";
681
681
  const NORMALIZE_IDENTITY = process.env.CACHE_FIX_NORMALIZE_IDENTITY === "1";
682
682
  const STRIP_GIT_STATUS = process.env.CACHE_FIX_STRIP_GIT_STATUS === "1";
683
+ const NORMALIZE_CWD = process.env.CACHE_FIX_NORMALIZE_CWD === "1";
683
684
  const TTL_MAIN = (process.env.CACHE_FIX_TTL_MAIN || "1h").toLowerCase();
684
685
  const TTL_SUBAGENT = (process.env.CACHE_FIX_TTL_SUBAGENT || "1h").toLowerCase();
685
686
  const LOG_PATH = join(homedir(), ".claude", "cache-fix-debug.log");
@@ -724,6 +725,8 @@ const _STATS_SCHEMA = {
724
725
  ttl: { applied: 0, skipped: 0, lastApplied: null },
725
726
  identity: { applied: 0, skipped: 0, lastApplied: null },
726
727
  git_status: { applied: 0, skipped: 0, lastApplied: null },
728
+ cwd_normalize: { applied: 0, skipped: 0, lastApplied: null },
729
+ smoosh_normalize: { applied: 0, skipped: 0, lastApplied: null },
727
730
  };
728
731
 
729
732
  function _createEmptyStats() {
@@ -1304,6 +1307,103 @@ globalThis.fetch = async function (url, options) {
1304
1307
  }
1305
1308
  }
1306
1309
 
1310
+ // Optimization: normalize CWD and path references in system prompt
1311
+ // CC injects the full working directory path, additional directories, and
1312
+ // path references into system text blocks. These change per project/worktree,
1313
+ // busting the prefix cache across different working directories.
1314
+ // Opt-in via CACHE_FIX_NORMALIZE_CWD=1.
1315
+ // The model can still discover paths via Bash (pwd, ls) when needed.
1316
+ if (NORMALIZE_CWD && shouldApplyFix("cwd_normalize") && payload.system && Array.isArray(payload.system)) {
1317
+ let normalized = 0;
1318
+ payload.system = payload.system.map((block) => {
1319
+ if (block?.type !== "text" || typeof block.text !== "string") return block;
1320
+ let newText = block.text;
1321
+ // Normalize "Primary working directory: /path/to/project"
1322
+ newText = newText.replace(
1323
+ /( - Primary working directory: ).+/g,
1324
+ "$1[normalized by cache-fix]"
1325
+ );
1326
+ // Normalize "Additional working directories:" section
1327
+ newText = newText.replace(
1328
+ /( - Additional working directories:\n)((?: - .+\n)*)/g,
1329
+ "$1 - [normalized by cache-fix]\n"
1330
+ );
1331
+ // Normalize "Contents of /path/to/..." in claudeMd/memory references
1332
+ newText = newText.replace(
1333
+ /Contents of \/[^\s(]+/g,
1334
+ "Contents of [path normalized by cache-fix]"
1335
+ );
1336
+ if (newText !== block.text) {
1337
+ normalized++;
1338
+ return { ...block, text: newText };
1339
+ }
1340
+ return block;
1341
+ });
1342
+ if (normalized > 0) {
1343
+ modified = true;
1344
+ debugLog(`APPLIED: CWD/paths normalized in ${normalized} system block(s)`);
1345
+ recordFixResult("cwd_normalize", "applied");
1346
+ } else {
1347
+ recordFixResult("cwd_normalize", "skipped");
1348
+ }
1349
+ }
1350
+
1351
+ // Optimization: normalize smooshed dynamic system-reminders in tool_result content
1352
+ // CC's smooshSystemReminderSiblings (messages.ts:1835) folds <system-reminder> text
1353
+ // blocks into tool_result.content strings. Dynamic values (token_usage, budget_usd,
1354
+ // output_token_usage, todo_reminder) change every turn, causing mid-history cache
1355
+ // busts even without resume or attachment scatter.
1356
+ // Bug: anthropics/claude-code#49585 (deafsquad)
1357
+ // Opt-in via CACHE_FIX_NORMALIZE_SMOOSH=1.
1358
+ if (process.env.CACHE_FIX_NORMALIZE_SMOOSH === "1" && shouldApplyFix("smoosh_normalize") && payload.messages) {
1359
+ let smooshNormalized = 0;
1360
+ const smooshPatterns = [
1361
+ // Token usage: 12345/50000; 37655 remaining
1362
+ /(<system-reminder>\nToken usage: )\d+\/\d+; \d+ remaining/g,
1363
+ // USD budget: $1.23/$10.00; $8.77 remaining
1364
+ /(<system-reminder>\nUSD budget: )\$[\d.]+\/\$[\d.]+; \$[\d.]+ remaining/g,
1365
+ // Output tokens — turn: 1,234 / 5,000 · session: 12,345
1366
+ /(<system-reminder>\nOutput tokens \u2014 turn: )[\d,./\s]+ \u00b7 session: [\d,]+/g,
1367
+ // TodoWrite reminder with variable todo list content
1368
+ /(<system-reminder>\nThe TodoWrite tool hasn't been used recently\..*?)(\n\nHere are the existing contents of your todo list:\n\n\[[\s\S]*?\])?(\n<\/system-reminder>)/g,
1369
+ ];
1370
+ const smooshReplacements = [
1371
+ "$1[normalized]/[normalized]; [normalized] remaining",
1372
+ "$1$[normalized]/$[normalized]; $[normalized] remaining",
1373
+ "$1[normalized] \u00b7 session: [normalized]",
1374
+ "$1$3", // strip the variable todo list, keep the static reminder text
1375
+ ];
1376
+
1377
+ for (const msg of payload.messages) {
1378
+ if (msg.role !== "user") continue;
1379
+ // Handle both string content (smooshed tool_result) and array content
1380
+ if (Array.isArray(msg.content)) {
1381
+ for (let i = 0; i < msg.content.length; i++) {
1382
+ const block = msg.content[i];
1383
+ // Smooshed tool_result with string content
1384
+ if (block.type === "tool_result" && typeof block.content === "string" && block.content.includes("<system-reminder>")) {
1385
+ let newContent = block.content;
1386
+ for (let p = 0; p < smooshPatterns.length; p++) {
1387
+ smooshPatterns[p].lastIndex = 0; // reset regex state
1388
+ newContent = newContent.replace(smooshPatterns[p], smooshReplacements[p]);
1389
+ }
1390
+ if (newContent !== block.content) {
1391
+ msg.content[i] = { ...block, content: newContent };
1392
+ smooshNormalized++;
1393
+ }
1394
+ }
1395
+ }
1396
+ }
1397
+ }
1398
+ if (smooshNormalized > 0) {
1399
+ modified = true;
1400
+ debugLog(`APPLIED: smoosh-normalized ${smooshNormalized} tool_result block(s) with dynamic system-reminders`);
1401
+ recordFixResult("smoosh_normalize", "applied");
1402
+ } else {
1403
+ recordFixResult("smoosh_normalize", "skipped");
1404
+ }
1405
+ }
1406
+
1307
1407
  // Bug 5: TTL enforcement (configurable per request type)
1308
1408
  // The client gates 1h cache TTL behind a GrowthBook allowlist that checks
1309
1409
  // querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".