claude-code-cache-fix 1.11.0 → 2.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/package.json +1 -1
- package/preload.mjs +100 -0
package/README.md
CHANGED
|
@@ -4,7 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
English | [中文](./README.zh.md) | [한국어](./README.ko.md) | [Português](./docs/guia-pt-br.md)
|
|
6
6
|
|
|
7
|
-
Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.
|
|
7
|
+
Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.111. Opus 4.7 compatible.
|
|
8
|
+
|
|
9
|
+
> **Opus 4.7 advisory:** Our metered data shows 4.7 burns Q5h quota at **~2.4x the rate of 4.6** for equivalent visible token counts. Two factors: a new tokenizer (up to 35% more tokens, [documented](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7)) and adaptive thinking overhead (~105%, not documented in usage response). Workaround: `CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING=1` (may reduce quality). Image stripping (`CACHE_FIX_IMAGE_KEEP_LAST`) is even more important on 4.7 due to high-res image support increasing image token counts. See [Discussion #25](https://github.com/cnighswonger/claude-code-cache-fix/discussions/25) for full analysis.
|
|
8
10
|
|
|
9
11
|
## Security model
|
|
10
12
|
|
|
@@ -308,7 +310,7 @@ When the server downgrades your TTL to 5m (Layer 2 — quota-aware downgrade at
|
|
|
308
310
|
|
|
309
311
|
## Image stripping
|
|
310
312
|
|
|
311
|
-
Images read via the Read tool are encoded as base64 and stored in `tool_result` blocks in conversation history. They ride along on **every subsequent API call** until compaction. A single 500KB image costs ~62,500 tokens per turn
|
|
313
|
+
Images read via the Read tool are encoded as base64 and stored in `tool_result` blocks in conversation history. They ride along on **every subsequent API call** until compaction. A single 500KB image costs ~62,500 tokens per turn on Opus 4.6, and potentially **~85,000+ tokens on Opus 4.7** due to the new tokenizer (up to 35% inflation) and high-res image support (2576px max, up from 1568px). Image stripping is strongly recommended on 4.7.
|
|
312
314
|
|
|
313
315
|
Enable image stripping to remove old images from tool results:
|
|
314
316
|
|
package/package.json
CHANGED
package/preload.mjs
CHANGED
|
@@ -680,6 +680,7 @@ const DEBUG = process.env.CACHE_FIX_DEBUG === "1";
|
|
|
680
680
|
const PREFIXDIFF = process.env.CACHE_FIX_PREFIXDIFF === "1";
|
|
681
681
|
const NORMALIZE_IDENTITY = process.env.CACHE_FIX_NORMALIZE_IDENTITY === "1";
|
|
682
682
|
const STRIP_GIT_STATUS = process.env.CACHE_FIX_STRIP_GIT_STATUS === "1";
|
|
683
|
+
const NORMALIZE_CWD = process.env.CACHE_FIX_NORMALIZE_CWD === "1";
|
|
683
684
|
const TTL_MAIN = (process.env.CACHE_FIX_TTL_MAIN || "1h").toLowerCase();
|
|
684
685
|
const TTL_SUBAGENT = (process.env.CACHE_FIX_TTL_SUBAGENT || "1h").toLowerCase();
|
|
685
686
|
const LOG_PATH = join(homedir(), ".claude", "cache-fix-debug.log");
|
|
@@ -724,6 +725,8 @@ const _STATS_SCHEMA = {
|
|
|
724
725
|
ttl: { applied: 0, skipped: 0, lastApplied: null },
|
|
725
726
|
identity: { applied: 0, skipped: 0, lastApplied: null },
|
|
726
727
|
git_status: { applied: 0, skipped: 0, lastApplied: null },
|
|
728
|
+
cwd_normalize: { applied: 0, skipped: 0, lastApplied: null },
|
|
729
|
+
smoosh_normalize: { applied: 0, skipped: 0, lastApplied: null },
|
|
727
730
|
};
|
|
728
731
|
|
|
729
732
|
function _createEmptyStats() {
|
|
@@ -1304,6 +1307,103 @@ globalThis.fetch = async function (url, options) {
|
|
|
1304
1307
|
}
|
|
1305
1308
|
}
|
|
1306
1309
|
|
|
1310
|
+
// Optimization: normalize CWD and path references in system prompt
|
|
1311
|
+
// CC injects the full working directory path, additional directories, and
|
|
1312
|
+
// path references into system text blocks. These change per project/worktree,
|
|
1313
|
+
// busting the prefix cache across different working directories.
|
|
1314
|
+
// Opt-in via CACHE_FIX_NORMALIZE_CWD=1.
|
|
1315
|
+
// The model can still discover paths via Bash (pwd, ls) when needed.
|
|
1316
|
+
if (NORMALIZE_CWD && shouldApplyFix("cwd_normalize") && payload.system && Array.isArray(payload.system)) {
|
|
1317
|
+
let normalized = 0;
|
|
1318
|
+
payload.system = payload.system.map((block) => {
|
|
1319
|
+
if (block?.type !== "text" || typeof block.text !== "string") return block;
|
|
1320
|
+
let newText = block.text;
|
|
1321
|
+
// Normalize "Primary working directory: /path/to/project"
|
|
1322
|
+
newText = newText.replace(
|
|
1323
|
+
/( - Primary working directory: ).+/g,
|
|
1324
|
+
"$1[normalized by cache-fix]"
|
|
1325
|
+
);
|
|
1326
|
+
// Normalize "Additional working directories:" section
|
|
1327
|
+
newText = newText.replace(
|
|
1328
|
+
/( - Additional working directories:\n)((?: - .+\n)*)/g,
|
|
1329
|
+
"$1 - [normalized by cache-fix]\n"
|
|
1330
|
+
);
|
|
1331
|
+
// Normalize "Contents of /path/to/..." in claudeMd/memory references
|
|
1332
|
+
newText = newText.replace(
|
|
1333
|
+
/Contents of \/[^\s(]+/g,
|
|
1334
|
+
"Contents of [path normalized by cache-fix]"
|
|
1335
|
+
);
|
|
1336
|
+
if (newText !== block.text) {
|
|
1337
|
+
normalized++;
|
|
1338
|
+
return { ...block, text: newText };
|
|
1339
|
+
}
|
|
1340
|
+
return block;
|
|
1341
|
+
});
|
|
1342
|
+
if (normalized > 0) {
|
|
1343
|
+
modified = true;
|
|
1344
|
+
debugLog(`APPLIED: CWD/paths normalized in ${normalized} system block(s)`);
|
|
1345
|
+
recordFixResult("cwd_normalize", "applied");
|
|
1346
|
+
} else {
|
|
1347
|
+
recordFixResult("cwd_normalize", "skipped");
|
|
1348
|
+
}
|
|
1349
|
+
}
|
|
1350
|
+
|
|
1351
|
+
// Optimization: normalize smooshed dynamic system-reminders in tool_result content
|
|
1352
|
+
// CC's smooshSystemReminderSiblings (messages.ts:1835) folds <system-reminder> text
|
|
1353
|
+
// blocks into tool_result.content strings. Dynamic values (token_usage, budget_usd,
|
|
1354
|
+
// output_token_usage, todo_reminder) change every turn, causing mid-history cache
|
|
1355
|
+
// busts even without resume or attachment scatter.
|
|
1356
|
+
// Bug: anthropics/claude-code#49585 (deafsquad)
|
|
1357
|
+
// Opt-in via CACHE_FIX_NORMALIZE_SMOOSH=1.
|
|
1358
|
+
if (process.env.CACHE_FIX_NORMALIZE_SMOOSH === "1" && shouldApplyFix("smoosh_normalize") && payload.messages) {
|
|
1359
|
+
let smooshNormalized = 0;
|
|
1360
|
+
const smooshPatterns = [
|
|
1361
|
+
// Token usage: 12345/50000; 37655 remaining
|
|
1362
|
+
/(<system-reminder>\nToken usage: )\d+\/\d+; \d+ remaining/g,
|
|
1363
|
+
// USD budget: $1.23/$10.00; $8.77 remaining
|
|
1364
|
+
/(<system-reminder>\nUSD budget: )\$[\d.]+\/\$[\d.]+; \$[\d.]+ remaining/g,
|
|
1365
|
+
// Output tokens — turn: 1,234 / 5,000 · session: 12,345
|
|
1366
|
+
/(<system-reminder>\nOutput tokens \u2014 turn: )[\d,./\s]+ \u00b7 session: [\d,]+/g,
|
|
1367
|
+
// TodoWrite reminder with variable todo list content
|
|
1368
|
+
/(<system-reminder>\nThe TodoWrite tool hasn't been used recently\..*?)(\n\nHere are the existing contents of your todo list:\n\n\[[\s\S]*?\])?(\n<\/system-reminder>)/g,
|
|
1369
|
+
];
|
|
1370
|
+
const smooshReplacements = [
|
|
1371
|
+
"$1[normalized]/[normalized]; [normalized] remaining",
|
|
1372
|
+
"$1$[normalized]/$[normalized]; $[normalized] remaining",
|
|
1373
|
+
"$1[normalized] \u00b7 session: [normalized]",
|
|
1374
|
+
"$1$3", // strip the variable todo list, keep the static reminder text
|
|
1375
|
+
];
|
|
1376
|
+
|
|
1377
|
+
for (const msg of payload.messages) {
|
|
1378
|
+
if (msg.role !== "user") continue;
|
|
1379
|
+
// Handle both string content (smooshed tool_result) and array content
|
|
1380
|
+
if (Array.isArray(msg.content)) {
|
|
1381
|
+
for (let i = 0; i < msg.content.length; i++) {
|
|
1382
|
+
const block = msg.content[i];
|
|
1383
|
+
// Smooshed tool_result with string content
|
|
1384
|
+
if (block.type === "tool_result" && typeof block.content === "string" && block.content.includes("<system-reminder>")) {
|
|
1385
|
+
let newContent = block.content;
|
|
1386
|
+
for (let p = 0; p < smooshPatterns.length; p++) {
|
|
1387
|
+
smooshPatterns[p].lastIndex = 0; // reset regex state
|
|
1388
|
+
newContent = newContent.replace(smooshPatterns[p], smooshReplacements[p]);
|
|
1389
|
+
}
|
|
1390
|
+
if (newContent !== block.content) {
|
|
1391
|
+
msg.content[i] = { ...block, content: newContent };
|
|
1392
|
+
smooshNormalized++;
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
}
|
|
1396
|
+
}
|
|
1397
|
+
}
|
|
1398
|
+
if (smooshNormalized > 0) {
|
|
1399
|
+
modified = true;
|
|
1400
|
+
debugLog(`APPLIED: smoosh-normalized ${smooshNormalized} tool_result block(s) with dynamic system-reminders`);
|
|
1401
|
+
recordFixResult("smoosh_normalize", "applied");
|
|
1402
|
+
} else {
|
|
1403
|
+
recordFixResult("smoosh_normalize", "skipped");
|
|
1404
|
+
}
|
|
1405
|
+
}
|
|
1406
|
+
|
|
1307
1407
|
// Bug 5: TTL enforcement (configurable per request type)
|
|
1308
1408
|
// The client gates 1h cache TTL behind a GrowthBook allowlist that checks
|
|
1309
1409
|
// querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".
|