claude-code-cache-fix 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +2 -24
  2. package/package.json +1 -1
  3. package/preload.mjs +144 -171
package/README.md CHANGED
@@ -92,27 +92,6 @@ This keeps images in the last 3 user messages and replaces older ones with a tex
92
92
 
93
93
  Set to `0` (default) to disable.
94
94
 
95
- ## Prefix lock (resume cache hit)
96
-
97
- Even with the block relocation fix, the first API call after `--resume` triggers a full cache rebuild because CC reassembles messages with different system-reminder blocks, changing the prefix bytes. On a 300k token context at Opus rates, that's ~$2.80 per resume.
98
-
99
- The prefix lock eliminates this by saving the exact `messages[0]` content after all fixes are applied, then replaying it on the next resume to produce a byte-identical prefix.
100
-
101
- ```bash
102
- export CACHE_FIX_PREFIX_LOCK=1
103
- ```
104
-
105
- Safety guards — the lock only fires when ALL of these match:
106
- - System prompt hash (same project, no CLAUDE.md changes)
107
- - Tools hash (no MCP/plugin changes)
108
- - User message text (same conversation)
109
- - User content hash (no substantive context changes)
110
- - Not a post-compaction conversation
111
-
112
- If any guard fails, the lock skips and falls back to normal behavior. The worst case is a skip — the lock cannot increase costs or cause context loss.
113
-
114
- Set to `0` (default) to disable.
115
-
116
95
  ## Monitoring
117
96
 
118
97
  The interceptor includes monitoring for several additional issues identified by the community:
@@ -152,8 +131,8 @@ Logs are written to `~/.claude/cache-fix-debug.log`. Look for:
152
131
  - `BUDGET WARNING: tool result chars at N / 200,000 threshold` — approaching budget cap
153
132
  - `FALSE RATE LIMIT: synthetic model detected` — client-side false rate limit
154
133
  - `GROWTHBOOK FLAGS: {...}` — server-controlled feature flags on first call
155
- - `PREFIX LOCK: APPLIED replayed saved messages[0]` — resume cache hit achieved
156
- - `PREFIX LOCK: skipped <reason>`guard prevented lock (expected, safe)
134
+ - `PROMPT SIZE: system=N tools=N injected=N (skills=N mcp=N ...)` — per-call prompt size breakdown
135
+ - `CACHE TTL: tier=1h create=N read=N hit=N% (1h=N 5m=N)` TTL tier and cache hit rate per call
157
136
  - `SKIPPED: resume relocation (not a resume or already correct)` — no fix needed
158
137
 
159
138
  ### Prefix diff mode
@@ -173,7 +152,6 @@ Snapshots are saved to `~/.claude/cache-fix-snapshots/` and diff reports are gen
173
152
  | `CACHE_FIX_DEBUG` | `0` | Enable debug logging to `~/.claude/cache-fix-debug.log` |
174
153
  | `CACHE_FIX_PREFIXDIFF` | `0` | Enable prefix snapshot diffing |
175
154
  | `CACHE_FIX_IMAGE_KEEP_LAST` | `0` | Keep images in last N user messages (0 = disabled) |
176
- | `CACHE_FIX_PREFIX_LOCK` | `0` | Replay saved messages[0] on resume for cache hit (0 = disabled) |
177
155
 
178
156
  ## Limitations
179
157
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "1.2.0",
3
+ "version": "1.4.0",
4
4
  "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
package/preload.mjs CHANGED
@@ -365,156 +365,6 @@ function stripOldToolResultImages(messages, keepLast) {
365
365
  return { messages: strippedCount > 0 ? result : messages, stats };
366
366
  }
367
367
 
368
- // --------------------------------------------------------------------------
369
- // Prefix lock — replay saved messages[0] on resume for cache hit
370
- // --------------------------------------------------------------------------
371
-
372
- // CACHE_FIX_PREFIX_LOCK=1 — save messages[0] on every call and replay it on
373
- // resume to avoid a cache rebuild. Disabled by default.
374
- //
375
- // On resume, CC reassembles messages with blocks in different positions and
376
- // injects fresh system-reminders, changing the prefix bytes. Even after our
377
- // relocation fix corrects the blocks, the prefix differs from what the server
378
- // cached on the last pre-exit call, causing a full cache rebuild.
379
- //
380
- // This feature saves the exact messages[0] content after all fixes are applied.
381
- // On the first call of a new process (resume), if system prompt hash and tools
382
- // hash match the saved snapshot, and the real user message text matches, we
383
- // replay the saved messages[0] to produce a byte-identical prefix → cache hit.
384
-
385
- const PREFIX_LOCK = process.env.CACHE_FIX_PREFIX_LOCK === "1";
386
- const PREFIX_LOCK_FILE = join(homedir(), ".claude", "cache-fix-prefix-lock.json");
387
-
388
- let _prefixLockFirstCall = true;
389
-
390
- /**
391
- * Compute hashes for prefix lock comparison.
392
- */
393
- function computePrefixHashes(system, tools) {
394
- const sysHash = system
395
- ? createHash("sha256").update(JSON.stringify(system)).digest("hex").slice(0, 16)
396
- : "none";
397
- const toolHash = tools
398
- ? createHash("sha256").update(JSON.stringify(tools.map(t => t.name).sort())).digest("hex").slice(0, 16)
399
- : "none";
400
- return { sysHash, toolHash };
401
- }
402
-
403
- /**
404
- * Extract the real user message text from messages[0] (skipping system-reminders).
405
- */
406
- function extractUserTextFromFirstMsg(msg) {
407
- if (!msg || !Array.isArray(msg.content)) return "";
408
- for (const block of msg.content) {
409
- if (block.type === "text" && typeof block.text === "string" &&
410
- !block.text.startsWith("<system-reminder>") &&
411
- !block.text.startsWith("<local-command")) {
412
- return block.text.slice(0, 200); // enough to identify, not too much to compare
413
- }
414
- }
415
- return "";
416
- }
417
-
418
- /**
419
- * Hash all non-system-reminder user content in messages[0] to detect
420
- * substantive changes that the userText check (first 200 chars) might miss.
421
- */
422
- function hashUserContent(msg) {
423
- if (!msg || !Array.isArray(msg.content)) return "empty";
424
- const userBlocks = msg.content.filter(b =>
425
- b.type === "text" && typeof b.text === "string" &&
426
- !b.text.startsWith("<system-reminder>") &&
427
- !b.text.startsWith("<local-command")
428
- );
429
- if (userBlocks.length === 0) return "empty";
430
- return createHash("sha256")
431
- .update(userBlocks.map(b => b.text).join("\n"))
432
- .digest("hex").slice(0, 16);
433
- }
434
-
435
- /**
436
- * On resume: try to replay saved messages[0] for cache hit.
437
- * Returns the locked messages array or the original if lock doesn't apply.
438
- */
439
- function applyPrefixLock(messages, system, tools) {
440
- if (!PREFIX_LOCK || !Array.isArray(messages) || messages.length < 2) return messages;
441
-
442
- const firstUserIdx = messages.findIndex(m => m.role === "user");
443
- if (firstUserIdx === -1) return messages;
444
-
445
- const { sysHash, toolHash } = computePrefixHashes(system, tools);
446
- const currentUserText = extractUserTextFromFirstMsg(messages[firstUserIdx]);
447
- const currentContentHash = hashUserContent(messages[firstUserIdx]);
448
-
449
- // Skip if this looks like a compacted conversation (system-reminder as first block
450
- // with compaction summary markers)
451
- const firstBlock = messages[firstUserIdx]?.content?.[0];
452
- if (firstBlock?.text?.includes("CompactBoundary") || firstBlock?.text?.includes("compacted")) {
453
- debugLog("PREFIX LOCK: skipped — compacted conversation detected");
454
- return messages;
455
- }
456
-
457
- if (_prefixLockFirstCall) {
458
- _prefixLockFirstCall = false;
459
-
460
- // Try to load and apply saved prefix
461
- try {
462
- const saved = JSON.parse(readFileSync(PREFIX_LOCK_FILE, "utf8"));
463
-
464
- if (saved.sysHash !== sysHash) {
465
- debugLog("PREFIX LOCK: skipped — system prompt changed");
466
- } else if (saved.toolHash !== toolHash) {
467
- debugLog("PREFIX LOCK: skipped — tools changed");
468
- } else if (saved.userText !== currentUserText) {
469
- debugLog("PREFIX LOCK: skipped — user message text changed");
470
- } else if (saved.contentHash && saved.contentHash !== currentContentHash) {
471
- debugLog("PREFIX LOCK: skipped — user content hash changed (substantive context change)");
472
- } else if (!saved.content || !Array.isArray(saved.content)) {
473
- debugLog("PREFIX LOCK: skipped — saved content invalid");
474
- } else {
475
- // Apply the saved messages[0] content
476
- const result = [...messages];
477
- result[firstUserIdx] = { ...result[firstUserIdx], content: saved.content };
478
- debugLog(`PREFIX LOCK: APPLIED — replayed saved messages[0] (${saved.content.length} blocks)`);
479
- return result;
480
- }
481
- } catch {
482
- debugLog("PREFIX LOCK: no saved prefix found (first run or file missing)");
483
- }
484
- }
485
-
486
- return messages;
487
- }
488
-
489
- /**
490
- * Save current messages[0] content for future resume replay.
491
- * Called after all fixes are applied, before the request is sent.
492
- */
493
- function savePrefixLock(messages, system, tools) {
494
- if (!PREFIX_LOCK || !Array.isArray(messages)) return;
495
-
496
- const firstUserIdx = messages.findIndex(m => m.role === "user");
497
- if (firstUserIdx === -1) return;
498
-
499
- const { sysHash, toolHash } = computePrefixHashes(system, tools);
500
- const userText = extractUserTextFromFirstMsg(messages[firstUserIdx]);
501
- const contentHash = hashUserContent(messages[firstUserIdx]);
502
- const content = messages[firstUserIdx].content;
503
-
504
- try {
505
- writeFileSync(PREFIX_LOCK_FILE, JSON.stringify({
506
- timestamp: new Date().toISOString(),
507
- sysHash,
508
- toolHash,
509
- userText,
510
- contentHash,
511
- content,
512
- }));
513
- } catch (e) {
514
- debugLog("PREFIX LOCK: failed to save:", e?.message);
515
- }
516
- }
517
-
518
368
  // --------------------------------------------------------------------------
519
369
  // Tool schema stabilization (Bug 2 secondary cause)
520
370
  // --------------------------------------------------------------------------
@@ -844,15 +694,6 @@ globalThis.fetch = async function (url, options) {
844
694
  }
845
695
  }
846
696
 
847
- // Prefix lock: replay saved messages[0] on resume for cache hit
848
- if (payload.messages && payload.system) {
849
- const locked = applyPrefixLock(payload.messages, payload.system, payload.tools);
850
- if (locked !== payload.messages) {
851
- payload.messages = locked;
852
- modified = true;
853
- }
854
- }
855
-
856
697
  // Bug 2a: Stabilize tool ordering
857
698
  if (payload.tools) {
858
699
  const sorted = stabilizeToolOrder(payload.tools);
@@ -885,16 +726,42 @@ globalThis.fetch = async function (url, options) {
885
726
  debugLog("Request body rewritten");
886
727
  }
887
728
 
888
- // Save prefix lock after all fixes applied
889
- if (payload.messages && payload.system) {
890
- savePrefixLock(payload.messages, payload.system, payload.tools);
891
- }
892
-
893
729
  // Monitor for microcompact / budget enforcement degradation
894
730
  if (payload.messages) {
895
731
  monitorContextDegradation(payload.messages);
896
732
  }
897
733
 
734
+ // Prompt size measurement — log system prompt, tools, and injected block sizes
735
+ if (DEBUG && payload.system && payload.tools && payload.messages) {
736
+ const sysChars = JSON.stringify(payload.system).length;
737
+ const toolsChars = JSON.stringify(payload.tools).length;
738
+ const firstUserIdx = payload.messages.findIndex(m => m.role === "user");
739
+ if (firstUserIdx !== -1) {
740
+ const msg0 = payload.messages[firstUserIdx];
741
+ if (Array.isArray(msg0.content)) {
742
+ let skillsChars = 0;
743
+ let mcpChars = 0;
744
+ let deferredChars = 0;
745
+ let hooksChars = 0;
746
+ for (const block of msg0.content) {
747
+ const text = block.text || "";
748
+ if (isSkillsBlock(text)) skillsChars += text.length;
749
+ else if (isMcpBlock(text)) mcpChars += text.length;
750
+ else if (isDeferredToolsBlock(text)) deferredChars += text.length;
751
+ else if (isHooksBlock(text)) hooksChars += text.length;
752
+ }
753
+ const injectedTotal = skillsChars + mcpChars + deferredChars + hooksChars;
754
+ if (injectedTotal > 0) {
755
+ debugLog(
756
+ `PROMPT SIZE: system=${sysChars} tools=${toolsChars}`,
757
+ `injected=${injectedTotal} (skills=${skillsChars} mcp=${mcpChars}`,
758
+ `deferred=${deferredChars} hooks=${hooksChars})`
759
+ );
760
+ }
761
+ }
762
+ }
763
+ }
764
+
898
765
  // Capture prefix snapshot for cross-process diff analysis
899
766
  snapshotPrefix(payload);
900
767
 
@@ -917,20 +784,126 @@ globalThis.fetch = async function (url, options) {
917
784
  const overage = response.headers.get("anthropic-ratelimit-unified-overage-status");
918
785
 
919
786
  if (h5 || h7d) {
920
- const quota = {
921
- timestamp: new Date().toISOString(),
922
- five_hour: h5 ? { utilization: parseFloat(h5), pct: Math.round(parseFloat(h5) * 100), resets_at: reset5h ? parseInt(reset5h) : null } : null,
923
- seven_day: h7d ? { utilization: parseFloat(h7d), pct: Math.round(parseFloat(h7d) * 100), resets_at: reset7d ? parseInt(reset7d) : null } : null,
924
- status: status || null,
925
- overage_status: overage || null,
926
- };
927
787
  const quotaFile = join(homedir(), ".claude", "quota-status.json");
788
+ let quota = {};
789
+ try { quota = JSON.parse(readFileSync(quotaFile, "utf8")); } catch {}
790
+ quota.timestamp = new Date().toISOString();
791
+ quota.five_hour = h5 ? { utilization: parseFloat(h5), pct: Math.round(parseFloat(h5) * 100), resets_at: reset5h ? parseInt(reset5h) : null } : quota.five_hour;
792
+ quota.seven_day = h7d ? { utilization: parseFloat(h7d), pct: Math.round(parseFloat(h7d) * 100), resets_at: reset7d ? parseInt(reset7d) : null } : quota.seven_day;
793
+ quota.status = status || null;
794
+ quota.overage_status = overage || null;
928
795
  writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
929
796
  }
930
797
  } catch {
931
798
  // Non-critical — don't break the response
932
799
  }
800
+
801
+ // Clone response to extract TTL tier from usage (SSE stream)
802
+ try {
803
+ const clone = response.clone();
804
+ drainTTLFromClone(clone).catch(() => {});
805
+ } catch {
806
+ // clone() failure is non-fatal
807
+ }
933
808
  }
934
809
 
935
810
  return response;
936
811
  };
812
+
813
+ // --------------------------------------------------------------------------
814
+ // TTL tier extraction from SSE response stream
815
+ // --------------------------------------------------------------------------
816
+
817
+ /**
818
+ * Drain a cloned SSE response to extract cache TTL tier from the usage object.
819
+ * The message_start event contains usage.cache_creation with ephemeral_1h and
820
+ * ephemeral_5m token counts, revealing which TTL tier the server applied.
821
+ *
822
+ * Writes TTL tier to ~/.claude/quota-status.json (merges with existing data)
823
+ * and logs to debug log.
824
+ */
825
+ async function drainTTLFromClone(clone) {
826
+ if (!clone.body) return;
827
+
828
+ const reader = clone.body.getReader();
829
+ const decoder = new TextDecoder();
830
+ let buffer = "";
831
+
832
+ try {
833
+ while (true) {
834
+ const { done, value } = await reader.read();
835
+ if (done) break;
836
+ buffer += decoder.decode(value, { stream: true });
837
+
838
+ let newlineIdx;
839
+ while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
840
+ const line = buffer.slice(0, newlineIdx).trim();
841
+ buffer = buffer.slice(newlineIdx + 1);
842
+
843
+ if (!line.startsWith("data: ") || line === "data: [DONE]") continue;
844
+
845
+ try {
846
+ const event = JSON.parse(line.slice(6));
847
+
848
+ if (event.type === "message_start" && event.message?.usage) {
849
+ const u = event.message.usage;
850
+ const cc = u.cache_creation || {};
851
+ const e1h = cc.ephemeral_1h_input_tokens ?? 0;
852
+ const e5m = cc.ephemeral_5m_input_tokens ?? 0;
853
+ const cacheCreate = u.cache_creation_input_tokens ?? 0;
854
+ const cacheRead = u.cache_read_input_tokens ?? 0;
855
+
856
+ // Determine TTL tier from which ephemeral bucket got tokens
857
+ // When cache is fully warm (no creation), infer tier from previous
858
+ let ttlTier = "unknown";
859
+ if (e1h > 0 && e5m === 0) ttlTier = "1h";
860
+ else if (e5m > 0 && e1h === 0) ttlTier = "5m";
861
+ else if (e1h === 0 && e5m === 0 && cacheCreate === 0) {
862
+ // Fully cached — no creation to determine tier. Preserve previous.
863
+ try {
864
+ const prev = JSON.parse(readFileSync(join(homedir(), ".claude", "quota-status.json"), "utf8"));
865
+ ttlTier = prev.cache?.ttl_tier || "1h";
866
+ } catch { ttlTier = "1h"; }
867
+ }
868
+ else if (e1h > 0 && e5m > 0) ttlTier = "mixed";
869
+
870
+ const hitRate = (cacheRead + cacheCreate) > 0
871
+ ? (cacheRead / (cacheRead + cacheCreate) * 100).toFixed(1)
872
+ : "N/A";
873
+
874
+ debugLog(
875
+ `CACHE TTL: tier=${ttlTier}`,
876
+ `create=${cacheCreate} read=${cacheRead} hit=${hitRate}%`,
877
+ `(1h=${e1h} 5m=${e5m})`
878
+ );
879
+
880
+ // Merge TTL data into quota-status.json
881
+ try {
882
+ const quotaFile = join(homedir(), ".claude", "quota-status.json");
883
+ let quota = {};
884
+ try { quota = JSON.parse(readFileSync(quotaFile, "utf8")); } catch {}
885
+ quota.cache = {
886
+ ttl_tier: ttlTier,
887
+ cache_creation: cacheCreate,
888
+ cache_read: cacheRead,
889
+ ephemeral_1h: e1h,
890
+ ephemeral_5m: e5m,
891
+ hit_rate: hitRate,
892
+ timestamp: new Date().toISOString(),
893
+ };
894
+ writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
895
+ } catch {}
896
+
897
+ // Got what we need — stop reading
898
+ reader.cancel();
899
+ return;
900
+ }
901
+ } catch {
902
+ // Skip malformed SSE lines
903
+ }
904
+ }
905
+ }
906
+ } finally {
907
+ try { reader.releaseLock(); } catch {}
908
+ }
909
+ }