claude-code-cache-fix 2.0.0-beta.1 → 2.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/package.json +1 -1
- package/preload.mjs +57 -0
package/README.md
CHANGED
|
@@ -4,7 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
English | [中文](./README.zh.md) | [한국어](./README.ko.md) | [Português](./docs/guia-pt-br.md)
|
|
6
6
|
|
|
7
|
-
Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.
|
|
7
|
+
Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.111. Opus 4.7 compatible.
|
|
8
|
+
|
|
9
|
+
> **Opus 4.7 advisory:** Our metered data shows 4.7 burns Q5h quota at **~2.4x the rate of 4.6** for equivalent visible token counts. Two factors: a new tokenizer (up to 35% more tokens, [documented](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7)) and adaptive thinking overhead (~105%, not documented in usage response). Workaround: `CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING=1` (may reduce quality). Image stripping (`CACHE_FIX_IMAGE_KEEP_LAST`) is even more important on 4.7 due to high-res image support increasing image token counts. See [Discussion #25](https://github.com/cnighswonger/claude-code-cache-fix/discussions/25) for full analysis.
|
|
8
10
|
|
|
9
11
|
## Security model
|
|
10
12
|
|
|
@@ -308,7 +310,7 @@ When the server downgrades your TTL to 5m (Layer 2 — quota-aware downgrade at
|
|
|
308
310
|
|
|
309
311
|
## Image stripping
|
|
310
312
|
|
|
311
|
-
Images read via the Read tool are encoded as base64 and stored in `tool_result` blocks in conversation history. They ride along on **every subsequent API call** until compaction. A single 500KB image costs ~62,500 tokens per turn
|
|
313
|
+
Images read via the Read tool are encoded as base64 and stored in `tool_result` blocks in conversation history. They ride along on **every subsequent API call** until compaction. A single 500KB image costs ~62,500 tokens per turn on Opus 4.6, and potentially **~85,000+ tokens on Opus 4.7** due to the new tokenizer (up to 35% inflation) and high-res image support (2576px max, up from 1568px). Image stripping is strongly recommended on 4.7.
|
|
312
314
|
|
|
313
315
|
Enable image stripping to remove old images from tool results:
|
|
314
316
|
|
package/package.json
CHANGED
package/preload.mjs
CHANGED
|
@@ -726,6 +726,7 @@ const _STATS_SCHEMA = {
|
|
|
726
726
|
identity: { applied: 0, skipped: 0, lastApplied: null },
|
|
727
727
|
git_status: { applied: 0, skipped: 0, lastApplied: null },
|
|
728
728
|
cwd_normalize: { applied: 0, skipped: 0, lastApplied: null },
|
|
729
|
+
smoosh_normalize: { applied: 0, skipped: 0, lastApplied: null },
|
|
729
730
|
};
|
|
730
731
|
|
|
731
732
|
function _createEmptyStats() {
|
|
@@ -1347,6 +1348,62 @@ globalThis.fetch = async function (url, options) {
|
|
|
1347
1348
|
}
|
|
1348
1349
|
}
|
|
1349
1350
|
|
|
1351
|
+
// Optimization: normalize smooshed dynamic system-reminders in tool_result content
|
|
1352
|
+
// CC's smooshSystemReminderSiblings (messages.ts:1835) folds <system-reminder> text
|
|
1353
|
+
// blocks into tool_result.content strings. Dynamic values (token_usage, budget_usd,
|
|
1354
|
+
// output_token_usage, todo_reminder) change every turn, causing mid-history cache
|
|
1355
|
+
// busts even without resume or attachment scatter.
|
|
1356
|
+
// Bug: anthropics/claude-code#49585 (deafsquad)
|
|
1357
|
+
// Opt-in via CACHE_FIX_NORMALIZE_SMOOSH=1.
|
|
1358
|
+
if (process.env.CACHE_FIX_NORMALIZE_SMOOSH === "1" && shouldApplyFix("smoosh_normalize") && payload.messages) {
|
|
1359
|
+
let smooshNormalized = 0;
|
|
1360
|
+
const smooshPatterns = [
|
|
1361
|
+
// Token usage: 12345/50000; 37655 remaining
|
|
1362
|
+
/(<system-reminder>\nToken usage: )\d+\/\d+; \d+ remaining/g,
|
|
1363
|
+
// USD budget: $1.23/$10.00; $8.77 remaining
|
|
1364
|
+
/(<system-reminder>\nUSD budget: )\$[\d.]+\/\$[\d.]+; \$[\d.]+ remaining/g,
|
|
1365
|
+
// Output tokens — turn: 1,234 / 5,000 · session: 12,345
|
|
1366
|
+
/(<system-reminder>\nOutput tokens \u2014 turn: )[\d,./\s]+ \u00b7 session: [\d,]+/g,
|
|
1367
|
+
// TodoWrite reminder with variable todo list content
|
|
1368
|
+
/(<system-reminder>\nThe TodoWrite tool hasn't been used recently\..*?)(\n\nHere are the existing contents of your todo list:\n\n\[[\s\S]*?\])?(\n<\/system-reminder>)/g,
|
|
1369
|
+
];
|
|
1370
|
+
const smooshReplacements = [
|
|
1371
|
+
"$1[normalized]/[normalized]; [normalized] remaining",
|
|
1372
|
+
"$1$[normalized]/$[normalized]; $[normalized] remaining",
|
|
1373
|
+
"$1[normalized] \u00b7 session: [normalized]",
|
|
1374
|
+
"$1$3", // strip the variable todo list, keep the static reminder text
|
|
1375
|
+
];
|
|
1376
|
+
|
|
1377
|
+
for (const msg of payload.messages) {
|
|
1378
|
+
if (msg.role !== "user") continue;
|
|
1379
|
+
// Handle both string content (smooshed tool_result) and array content
|
|
1380
|
+
if (Array.isArray(msg.content)) {
|
|
1381
|
+
for (let i = 0; i < msg.content.length; i++) {
|
|
1382
|
+
const block = msg.content[i];
|
|
1383
|
+
// Smooshed tool_result with string content
|
|
1384
|
+
if (block.type === "tool_result" && typeof block.content === "string" && block.content.includes("<system-reminder>")) {
|
|
1385
|
+
let newContent = block.content;
|
|
1386
|
+
for (let p = 0; p < smooshPatterns.length; p++) {
|
|
1387
|
+
smooshPatterns[p].lastIndex = 0; // reset regex state
|
|
1388
|
+
newContent = newContent.replace(smooshPatterns[p], smooshReplacements[p]);
|
|
1389
|
+
}
|
|
1390
|
+
if (newContent !== block.content) {
|
|
1391
|
+
msg.content[i] = { ...block, content: newContent };
|
|
1392
|
+
smooshNormalized++;
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
}
|
|
1396
|
+
}
|
|
1397
|
+
}
|
|
1398
|
+
if (smooshNormalized > 0) {
|
|
1399
|
+
modified = true;
|
|
1400
|
+
debugLog(`APPLIED: smoosh-normalized ${smooshNormalized} tool_result block(s) with dynamic system-reminders`);
|
|
1401
|
+
recordFixResult("smoosh_normalize", "applied");
|
|
1402
|
+
} else {
|
|
1403
|
+
recordFixResult("smoosh_normalize", "skipped");
|
|
1404
|
+
}
|
|
1405
|
+
}
|
|
1406
|
+
|
|
1350
1407
|
// Bug 5: TTL enforcement (configurable per request type)
|
|
1351
1408
|
// The client gates 1h cache TTL behind a GrowthBook allowlist that checks
|
|
1352
1409
|
// querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".
|