claude-code-cache-fix 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/preload.mjs +297 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "2.0.0",
3
+ "version": "2.0.2",
4
4
  "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
package/preload.mjs CHANGED
@@ -727,6 +727,266 @@ function normalizeToolUseInputsInBody(body) {
727
727
  return modified;
728
728
  }
729
729
 
730
+ // --------------------------------------------------------------------------
731
+ // cache_control_sticky — preserve historical marker positions across turns
732
+ // --------------------------------------------------------------------------
733
+ //
734
+ // Covers a cache-miss class that cache_control_normalize can't reach by
735
+ // itself. CC maintains at most one user-side cache_control marker at a time:
736
+ // as conversation grows, CC moves the marker from the tail of one user turn
737
+ // to the tail of the next, DROPPING it from the previous position. The
738
+ // dropped position's block loses the ~43 bytes of `"cache_control":{"type":
739
+ // "ephemeral","ttl":"1h"}` framing — a tail-of-message byte diff that
740
+ // invalidates every downstream cached block (~600K tokens' worth on a
741
+ // long-running session).
742
+ //
743
+ // Observed instance: at 16:27:13 UTC today, a 1284-message session emitted
744
+ // cw=804,428 (hit=2.3%). Diff of main-session bodies 585 → 587 showed ONE
745
+ // message diverged — msg[1281] — which lost its cache_control marker (43
746
+ // bytes) because CC had moved the marker to the new last user msg[1283].
747
+ //
748
+ // cache_control_normalize places exactly ONE canonical marker at the last
749
+ // block of the last user message on every outbound body. That solves the
750
+ // current-marker-drift class but cannot preserve historical markers — CC
751
+ // has already dropped them by the time the payload reaches this extension.
752
+ //
753
+ // This sticky extension maintains per-session state tracking where markers
754
+ // have appeared in prior turns, and reinstates them on future turns as
755
+ // additive preservation. Up to 2 historical message-level markers are
756
+ // tracked (Anthropic's hard limit is 4 cache_control markers total — 1 for
757
+ // system[2] + 1 canonical from cache_control_normalize + 2 historical from
758
+ // sticky = 4). When a historical position
759
+ // would exceed the cap, the oldest tracked entry is dropped (LRU).
760
+ //
761
+ // Messages are identified by a stable hash so that compaction rewrites /
762
+ // index shifts don't confuse the tracker:
763
+ // - If the message has a tool_use or tool_result block with an `id` or
764
+ // `tool_use_id`, hash `role|id`.
765
+ // - Otherwise hash `role|firstTextContent.slice(0, 256)`.
766
+ //
767
+ // Pipeline order: runs AFTER cache_control_normalize (when it's present) so
768
+ // normalize first pins the canonical marker at the last user msg, then
769
+ // sticky re-adds historical markers on their hashed messages. Skips any
770
+ // message already carrying a marker (fast no-op when sticky fires first).
771
+ //
772
+ // Opt-out via CACHE_FIX_SKIP_CACHE_CONTROL_STICKY=1 (defaults ON).
773
+ // --------------------------------------------------------------------------
774
+
775
+ const CACHE_CONTROL_STICKY_DIR = join(homedir(), ".claude", "cache-fix-state");
776
+ // Anthropic hard limit: 4 cache_control markers total per request.
777
+ // CC uses 1 on system[2] + cache_control_normalize places 1 on last user msg = 2 reserved.
778
+ // Sticky can use at most 2 historical positions to stay within the 4-marker cap.
779
+ const CACHE_CONTROL_STICKY_MAX_POSITIONS = 2;
780
+ const CACHE_CONTROL_STICKY_DEFAULT_MARKER = { type: "ephemeral", ttl: "1h" };
781
+
782
+ /**
783
+ * Build the absolute state-file path for a given project key. Exported so
784
+ * tests can assert on path derivation without duplicating hash logic.
785
+ */
786
+ function cacheControlStickyStatePath(key) {
787
+ const hash = createHash("sha1").update(String(key)).digest("hex").slice(0, 16);
788
+ return join(CACHE_CONTROL_STICKY_DIR, `cache-control-sticky-${hash}.json`);
789
+ }
790
+
791
+ /**
792
+ * Compute a stable hash identifier for a message that survives content-
793
+ * block insertions (e.g. smoosh_split peeling a reminder into a new block
794
+ * but the first text block's first 256 bytes don't change) and index shifts
795
+ * (e.g. compaction). Returns null if the message has no identifiable
796
+ * content. Pure; exported for unit tests.
797
+ */
798
+ function computeStickyMessageHash(msg) {
799
+ if (!msg || typeof msg !== "object") return null;
800
+ const role = typeof msg.role === "string" ? msg.role : "";
801
+ if (!Array.isArray(msg.content) || msg.content.length === 0) return null;
802
+ // Prefer tool_use/tool_result identifiers when present — they're the
803
+ // most stable anchors.
804
+ for (const b of msg.content) {
805
+ if (!b || typeof b !== "object") continue;
806
+ if (b.type === "tool_use" && typeof b.id === "string" && b.id) {
807
+ return createHash("sha1").update(`${role}|tool_use|${b.id}`).digest("hex").slice(0, 16);
808
+ }
809
+ if (b.type === "tool_result" && typeof b.tool_use_id === "string" && b.tool_use_id) {
810
+ return createHash("sha1").update(`${role}|tool_result|${b.tool_use_id}`).digest("hex").slice(0, 16);
811
+ }
812
+ }
813
+ // Fallback: first text block's first 256 bytes.
814
+ for (const b of msg.content) {
815
+ if (!b || typeof b !== "object") continue;
816
+ if (b.type === "text" && typeof b.text === "string") {
817
+ const prefix = b.text.slice(0, 256);
818
+ return createHash("sha1").update(`${role}|text|${prefix}`).digest("hex").slice(0, 16);
819
+ }
820
+ }
821
+ return null;
822
+ }
823
+
824
+ /**
825
+ * Read persisted sticky state for a project key. Returns a fresh empty
826
+ * state on missing file, unreadable file, or corrupt JSON — never throws.
827
+ * Shape: `{ version: 1, positions: [{msg_hash, position_hint, marker}] }`.
828
+ */
829
+ function readCacheControlStickyState(key) {
830
+ const path = cacheControlStickyStatePath(key);
831
+ let raw;
832
+ try {
833
+ raw = readFileSync(path, "utf-8");
834
+ } catch {
835
+ return { version: 1, positions: [] };
836
+ }
837
+ try {
838
+ const parsed = JSON.parse(raw);
839
+ if (!parsed || typeof parsed !== "object" || !Array.isArray(parsed.positions)) {
840
+ debugLog("cache_control_sticky: state file malformed shape — resetting");
841
+ return { version: 1, positions: [] };
842
+ }
843
+ const positions = [];
844
+ for (const p of parsed.positions) {
845
+ if (!p || typeof p !== "object") continue;
846
+ if (typeof p.msg_hash !== "string" || !p.msg_hash) continue;
847
+ positions.push({
848
+ msg_hash: p.msg_hash,
849
+ position_hint: p.position_hint === "last_block" ? "last_block" : "last_block",
850
+ marker:
851
+ p.marker && typeof p.marker === "object" && typeof p.marker.type === "string"
852
+ ? { ...p.marker }
853
+ : { ...CACHE_CONTROL_STICKY_DEFAULT_MARKER },
854
+ });
855
+ }
856
+ return { version: 1, positions };
857
+ } catch (e) {
858
+ debugLog(`cache_control_sticky: state JSON parse error (${e?.message}) — resetting`);
859
+ return { version: 1, positions: [] };
860
+ }
861
+ }
862
+
863
+ /**
864
+ * Atomic-write persisted sticky state. Best-effort; silent on I/O errors.
865
+ */
866
+ function writeCacheControlStickyState(key, state) {
867
+ const path = cacheControlStickyStatePath(key);
868
+ try {
869
+ mkdirSync(CACHE_CONTROL_STICKY_DIR, { recursive: true });
870
+ const tmp = path + ".tmp";
871
+ writeFileSync(tmp, JSON.stringify(state, null, 2), "utf-8");
872
+ renameSync(tmp, path);
873
+ } catch (e) {
874
+ debugLog(`cache_control_sticky: state write error (${e?.message})`);
875
+ }
876
+ }
877
+
878
+ /**
879
+ * Pure core: given a body and the currently-persisted state, compute the
880
+ * next state and the list of marker mutations to apply to the body. No
881
+ * I/O, no body mutation — the wrapper is responsible for applying results.
882
+ *
883
+ * Algorithm:
884
+ * 1. Walk user-role messages; for each block-with-cache_control, record
885
+ * `{msg_hash, marker}` into `observed`. Duplicate hashes keep the
886
+ * first (most recent in message order).
887
+ * 2. Merge `observed` into the prior `state.positions`: newly-observed
888
+ * hashes are appended (or moved to the front if re-seen); absent-from-
889
+ * this-body hashes are kept so they persist across turns.
890
+ * 3. For each hash in the new state, locate the corresponding message in
891
+ * the body (by hash match). If found AND the message's last block
892
+ * does NOT already carry a marker, emit a mutation to set it.
893
+ * 4. Cap the new state at CACHE_CONTROL_STICKY_MAX_POSITIONS (oldest
894
+ * entries dropped first — LRU keyed on most-recent touch).
895
+ *
896
+ * Returns `{newState, mutations}` where mutations =
897
+ * `[{msgIdx, blockIdx, marker}]`. Pure; exported for unit tests.
898
+ */
899
+ function updateCacheControlStickyState(body, priorState) {
900
+ const empty = { newState: { version: 1, positions: [] }, mutations: [] };
901
+ if (!body || typeof body !== "object" || !Array.isArray(body.messages)) return empty;
902
+ const prior =
903
+ priorState && Array.isArray(priorState.positions)
904
+ ? { version: 1, positions: priorState.positions.slice() }
905
+ : { version: 1, positions: [] };
906
+
907
+ // Build hash → msgIdx index for this body's user messages.
908
+ const hashToMsgIdx = new Map();
909
+ const observed = []; // [{msg_hash, marker}] in message order
910
+ for (let m = 0; m < body.messages.length; m++) {
911
+ const msg = body.messages[m];
912
+ if (!msg || msg.role !== "user" || !Array.isArray(msg.content) || msg.content.length === 0) continue;
913
+ const h = computeStickyMessageHash(msg);
914
+ if (!h) continue;
915
+ if (!hashToMsgIdx.has(h)) hashToMsgIdx.set(h, m);
916
+ // Observe any existing marker on this message (any block).
917
+ for (const b of msg.content) {
918
+ if (b && typeof b === "object" && b.cache_control && typeof b.cache_control === "object") {
919
+ observed.push({ msg_hash: h, marker: { ...b.cache_control } });
920
+ break;
921
+ }
922
+ }
923
+ }
924
+
925
+ // Merge observed into prior: move observed hashes to the end (most
926
+ // recent), refresh their marker. Unobserved prior entries stay in place.
927
+ const priorIndex = new Map(prior.positions.map((p, i) => [p.msg_hash, i]));
928
+ const nextPositions = prior.positions.slice();
929
+ for (const ob of observed) {
930
+ if (priorIndex.has(ob.msg_hash)) {
931
+ const i = priorIndex.get(ob.msg_hash);
932
+ nextPositions[i] = { msg_hash: ob.msg_hash, position_hint: "last_block", marker: ob.marker };
933
+ } else {
934
+ nextPositions.push({ msg_hash: ob.msg_hash, position_hint: "last_block", marker: ob.marker });
935
+ priorIndex.set(ob.msg_hash, nextPositions.length - 1);
936
+ }
937
+ }
938
+
939
+ // Cap at MAX_POSITIONS: keep the NEWEST (end of array) entries.
940
+ let capped = nextPositions;
941
+ if (capped.length > CACHE_CONTROL_STICKY_MAX_POSITIONS) {
942
+ capped = capped.slice(capped.length - CACHE_CONTROL_STICKY_MAX_POSITIONS);
943
+ }
944
+
945
+ // Compute mutations: for each tracked hash present in this body, if the
946
+ // message doesn't already have any marker, add one at its last block.
947
+ const mutations = [];
948
+ for (const pos of capped) {
949
+ const msgIdx = hashToMsgIdx.get(pos.msg_hash);
950
+ if (msgIdx === undefined) continue;
951
+ const msg = body.messages[msgIdx];
952
+ if (!msg || !Array.isArray(msg.content) || msg.content.length === 0) continue;
953
+ const hasMarker = msg.content.some(
954
+ (b) => b && typeof b === "object" && b.cache_control && typeof b.cache_control === "object"
955
+ );
956
+ if (hasMarker) continue;
957
+ mutations.push({
958
+ msgIdx,
959
+ blockIdx: msg.content.length - 1,
960
+ marker: { ...pos.marker },
961
+ });
962
+ }
963
+
964
+ return { newState: { version: 1, positions: capped }, mutations };
965
+ }
966
+
967
+ /**
968
+ * Wrapper: read state, compute mutations via
969
+ * updateCacheControlStickyState, apply mutations to `body` in place, write
970
+ * next state. Returns the count of marker mutations applied. Silent on
971
+ * any I/O error (best-effort).
972
+ */
973
+ function applyCacheControlSticky(body, key) {
974
+ if (!body || typeof body !== "object" || !Array.isArray(body.messages)) return 0;
975
+ const prior = readCacheControlStickyState(key);
976
+ const { newState, mutations } = updateCacheControlStickyState(body, prior);
977
+ for (const mut of mutations) {
978
+ const msg = body.messages[mut.msgIdx];
979
+ if (!msg || !Array.isArray(msg.content)) continue;
980
+ const newContent = msg.content.slice();
981
+ const target = newContent[mut.blockIdx];
982
+ if (!target || typeof target !== "object") continue;
983
+ newContent[mut.blockIdx] = { ...target, cache_control: { ...mut.marker } };
984
+ body.messages[mut.msgIdx] = { ...msg, content: newContent };
985
+ }
986
+ writeCacheControlStickyState(key, newState);
987
+ return mutations.length;
988
+ }
989
+
730
990
  /**
731
991
  * Core fix: on EVERY call, scan the entire message array for the LATEST
732
992
  * relocatable blocks (skills, MCP, deferred tools, hooks) and ensure they
@@ -1131,6 +1391,7 @@ const _STATS_SCHEMA = {
1131
1391
  reminder_strip: { applied: 0, skipped: 0, lastApplied: null },
1132
1392
  cache_control_normalize: { applied: 0, skipped: 0, lastApplied: null },
1133
1393
  tool_use_input_normalize: { applied: 0, skipped: 0, lastApplied: null },
1394
+ cache_control_sticky: { applied: 0, skipped: 0, lastApplied: null },
1134
1395
  };
1135
1396
 
1136
1397
  function _createEmptyStats() {
@@ -2091,6 +2352,34 @@ globalThis.fetch = async function (url, options) {
2091
2352
  }
2092
2353
  }
2093
2354
 
2355
+ // Extension: cache_control_sticky — reinstate historical cache_control
2356
+ // markers on messages whose position CC has moved past. CC maintains
2357
+ // at most one user-side marker at a time; as it moves the marker to
2358
+ // the tail of each new user turn, the previous position loses the ~43
2359
+ // bytes of cache_control framing — a tail-of-message byte drift that
2360
+ // breaks every downstream cached block. This extension tracks marker
2361
+ // positions by stable message-hash across turns (up to 2) and re-adds
2362
+ // them on future bodies. Runs AFTER cache_control_normalize (when
2363
+ // present) so normalize pins the canonical tail-marker first and
2364
+ // sticky re-adds the historical ones. State file is per-project at
2365
+ // ~/.claude/cache-fix-state/cache-control-sticky-<sha1(cwd)>.json.
2366
+ // Opt-out via CACHE_FIX_SKIP_CACHE_CONTROL_STICKY=1 (defaults ON).
2367
+ if (shouldApplyFix("cache_control_sticky") && payload.messages) {
2368
+ try {
2369
+ const stickyApplied = applyCacheControlSticky(payload, process.cwd());
2370
+ if (stickyApplied > 0) {
2371
+ modified = true;
2372
+ debugLog(`APPLIED: cache_control_sticky reinstated ${stickyApplied} historical marker(s)`);
2373
+ recordFixResult("cache_control_sticky", "applied");
2374
+ } else {
2375
+ recordFixResult("cache_control_sticky", "skipped");
2376
+ }
2377
+ } catch (e) {
2378
+ debugLog(`cache_control_sticky: error (${e?.message}) — skipped`);
2379
+ recordFixResult("cache_control_sticky", "skipped");
2380
+ }
2381
+ }
2382
+
2094
2383
  // Bug 5: TTL enforcement (configurable per request type)
2095
2384
  // The client gates 1h cache TTL behind a GrowthBook allowlist that checks
2096
2385
  // querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".
@@ -2528,5 +2817,13 @@ export {
2528
2817
  countUserCacheControlMarkers,
2529
2818
  CACHE_CONTROL_CANONICAL_MARKER,
2530
2819
  normalizeToolUseInputsInBody,
2820
+ computeStickyMessageHash,
2821
+ cacheControlStickyStatePath,
2822
+ updateCacheControlStickyState,
2823
+ applyCacheControlSticky,
2824
+ readCacheControlStickyState,
2825
+ writeCacheControlStickyState,
2826
+ CACHE_CONTROL_STICKY_MAX_POSITIONS,
2827
+ CACHE_CONTROL_STICKY_DEFAULT_MARKER,
2531
2828
  _pinnedBlocks, // exported so tests can reset between runs
2532
2829
  };