claude-code-cache-fix 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/preload.mjs +293 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "2.0.0",
3
+ "version": "2.0.1",
4
4
  "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
package/preload.mjs CHANGED
@@ -727,6 +727,262 @@ function normalizeToolUseInputsInBody(body) {
727
727
  return modified;
728
728
  }
729
729
 
730
+ // --------------------------------------------------------------------------
731
+ // cache_control_sticky — preserve historical marker positions across turns
732
+ // --------------------------------------------------------------------------
733
+ //
734
+ // Covers a cache-miss class that cache_control_normalize can't reach by
735
+ // itself. CC maintains at most one user-side cache_control marker at a time:
736
+ // as conversation grows, CC moves the marker from the tail of one user turn
737
+ // to the tail of the next, DROPPING it from the previous position. The
738
+ // dropped position's block loses the ~43 bytes of `"cache_control":{"type":
739
+ // "ephemeral","ttl":"1h"}` framing — a tail-of-message byte diff that
740
+ // invalidates every downstream cached block (~600K tokens' worth on a
741
+ // long-running session).
742
+ //
743
+ // Observed instance: at 16:27:13 UTC today, a 1284-message session emitted
744
+ // cw=804,428 (hit=2.3%). Diff of main-session bodies 585 → 587 showed ONE
745
+ // message diverged — msg[1281] — which lost its cache_control marker (43
746
+ // bytes) because CC had moved the marker to the new last user msg[1283].
747
+ //
748
+ // cache_control_normalize places exactly ONE canonical marker at the last
749
+ // block of the last user message on every outbound body. That solves the
750
+ // current-marker-drift class but cannot preserve historical markers — CC
751
+ // has already dropped them by the time the payload reaches this extension.
752
+ //
753
+ // This sticky extension maintains per-session state tracking where markers
754
+ // have appeared in prior turns, and reinstates them on future turns as
755
+ // additive preservation. Up to 3 historical message-level markers are
756
+ // tracked (Anthropic's hard limit is 4 cache_control markers total — 1 for
757
+ // system[2] + 3 for message-level breakpoints). When a historical position
758
+ // would exceed the cap, the oldest tracked entry is dropped (LRU).
759
+ //
760
+ // Messages are identified by a stable hash so that compaction rewrites /
761
+ // index shifts don't confuse the tracker:
762
+ // - If the message has a tool_use or tool_result block with an `id` or
763
+ // `tool_use_id`, hash `role|id`.
764
+ // - Otherwise hash `role|firstTextContent.slice(0, 256)`.
765
+ //
766
+ // Pipeline order: runs AFTER cache_control_normalize (when it's present) so
767
+ // normalize first pins the canonical marker at the last user msg, then
768
+ // sticky re-adds historical markers on their hashed messages. Skips any
769
+ // message already carrying a marker (fast no-op when sticky fires first).
770
+ //
771
+ // Opt-out via CACHE_FIX_SKIP_CACHE_CONTROL_STICKY=1 (defaults ON).
772
+ // --------------------------------------------------------------------------
773
+
774
+ const CACHE_CONTROL_STICKY_DIR = join(homedir(), ".claude", "cache-fix-state");
775
+ const CACHE_CONTROL_STICKY_MAX_POSITIONS = 3;
776
+ const CACHE_CONTROL_STICKY_DEFAULT_MARKER = { type: "ephemeral", ttl: "1h" };
777
+
778
+ /**
779
+ * Build the absolute state-file path for a given project key. Exported so
780
+ * tests can assert on path derivation without duplicating hash logic.
781
+ */
782
+ function cacheControlStickyStatePath(key) {
783
+ const hash = createHash("sha1").update(String(key)).digest("hex").slice(0, 16);
784
+ return join(CACHE_CONTROL_STICKY_DIR, `cache-control-sticky-${hash}.json`);
785
+ }
786
+
787
+ /**
788
+ * Compute a stable hash identifier for a message that survives content-
789
+ * block insertions (e.g. smoosh_split peeling a reminder into a new block
790
+ * but the first text block's first 256 bytes don't change) and index shifts
791
+ * (e.g. compaction). Returns null if the message has no identifiable
792
+ * content. Pure; exported for unit tests.
793
+ */
794
+ function computeStickyMessageHash(msg) {
795
+ if (!msg || typeof msg !== "object") return null;
796
+ const role = typeof msg.role === "string" ? msg.role : "";
797
+ if (!Array.isArray(msg.content) || msg.content.length === 0) return null;
798
+ // Prefer tool_use/tool_result identifiers when present — they're the
799
+ // most stable anchors.
800
+ for (const b of msg.content) {
801
+ if (!b || typeof b !== "object") continue;
802
+ if (b.type === "tool_use" && typeof b.id === "string" && b.id) {
803
+ return createHash("sha1").update(`${role}|tool_use|${b.id}`).digest("hex").slice(0, 16);
804
+ }
805
+ if (b.type === "tool_result" && typeof b.tool_use_id === "string" && b.tool_use_id) {
806
+ return createHash("sha1").update(`${role}|tool_result|${b.tool_use_id}`).digest("hex").slice(0, 16);
807
+ }
808
+ }
809
+ // Fallback: first text block's first 256 bytes.
810
+ for (const b of msg.content) {
811
+ if (!b || typeof b !== "object") continue;
812
+ if (b.type === "text" && typeof b.text === "string") {
813
+ const prefix = b.text.slice(0, 256);
814
+ return createHash("sha1").update(`${role}|text|${prefix}`).digest("hex").slice(0, 16);
815
+ }
816
+ }
817
+ return null;
818
+ }
819
+
820
+ /**
821
+ * Read persisted sticky state for a project key. Returns a fresh empty
822
+ * state on missing file, unreadable file, or corrupt JSON — never throws.
823
+ * Shape: `{ version: 1, positions: [{msg_hash, position_hint, marker}] }`.
824
+ */
825
+ function readCacheControlStickyState(key) {
826
+ const path = cacheControlStickyStatePath(key);
827
+ let raw;
828
+ try {
829
+ raw = readFileSync(path, "utf-8");
830
+ } catch {
831
+ return { version: 1, positions: [] };
832
+ }
833
+ try {
834
+ const parsed = JSON.parse(raw);
835
+ if (!parsed || typeof parsed !== "object" || !Array.isArray(parsed.positions)) {
836
+ debugLog("cache_control_sticky: state file malformed shape — resetting");
837
+ return { version: 1, positions: [] };
838
+ }
839
+ const positions = [];
840
+ for (const p of parsed.positions) {
841
+ if (!p || typeof p !== "object") continue;
842
+ if (typeof p.msg_hash !== "string" || !p.msg_hash) continue;
843
+ positions.push({
844
+ msg_hash: p.msg_hash,
845
+ position_hint: p.position_hint === "last_block" ? "last_block" : "last_block",
846
+ marker:
847
+ p.marker && typeof p.marker === "object" && typeof p.marker.type === "string"
848
+ ? { ...p.marker }
849
+ : { ...CACHE_CONTROL_STICKY_DEFAULT_MARKER },
850
+ });
851
+ }
852
+ return { version: 1, positions };
853
+ } catch (e) {
854
+ debugLog(`cache_control_sticky: state JSON parse error (${e?.message}) — resetting`);
855
+ return { version: 1, positions: [] };
856
+ }
857
+ }
858
+
859
+ /**
860
+ * Atomic-write persisted sticky state. Best-effort; silent on I/O errors.
861
+ */
862
+ function writeCacheControlStickyState(key, state) {
863
+ const path = cacheControlStickyStatePath(key);
864
+ try {
865
+ mkdirSync(CACHE_CONTROL_STICKY_DIR, { recursive: true });
866
+ const tmp = path + ".tmp";
867
+ writeFileSync(tmp, JSON.stringify(state, null, 2), "utf-8");
868
+ renameSync(tmp, path);
869
+ } catch (e) {
870
+ debugLog(`cache_control_sticky: state write error (${e?.message})`);
871
+ }
872
+ }
873
+
874
+ /**
875
+ * Pure core: given a body and the currently-persisted state, compute the
876
+ * next state and the list of marker mutations to apply to the body. No
877
+ * I/O, no body mutation — the wrapper is responsible for applying results.
878
+ *
879
+ * Algorithm:
880
+ * 1. Walk user-role messages; for each block-with-cache_control, record
881
+ * `{msg_hash, marker}` into `observed`. Duplicate hashes keep the
882
+ * first (most recent in message order).
883
+ * 2. Merge `observed` into the prior `state.positions`: newly-observed
884
+ * hashes are appended (or moved to the front if re-seen); absent-from-
885
+ * this-body hashes are kept so they persist across turns.
886
+ * 3. For each hash in the new state, locate the corresponding message in
887
+ * the body (by hash match). If found AND the message's last block
888
+ * does NOT already carry a marker, emit a mutation to set it.
889
+ * 4. Cap the new state at CACHE_CONTROL_STICKY_MAX_POSITIONS (oldest
890
+ * entries dropped first — LRU keyed on most-recent touch).
891
+ *
892
+ * Returns `{newState, mutations}` where mutations =
893
+ * `[{msgIdx, blockIdx, marker}]`. Pure; exported for unit tests.
894
+ */
895
+ function updateCacheControlStickyState(body, priorState) {
896
+ const empty = { newState: { version: 1, positions: [] }, mutations: [] };
897
+ if (!body || typeof body !== "object" || !Array.isArray(body.messages)) return empty;
898
+ const prior =
899
+ priorState && Array.isArray(priorState.positions)
900
+ ? { version: 1, positions: priorState.positions.slice() }
901
+ : { version: 1, positions: [] };
902
+
903
+ // Build hash → msgIdx index for this body's user messages.
904
+ const hashToMsgIdx = new Map();
905
+ const observed = []; // [{msg_hash, marker}] in message order
906
+ for (let m = 0; m < body.messages.length; m++) {
907
+ const msg = body.messages[m];
908
+ if (!msg || msg.role !== "user" || !Array.isArray(msg.content) || msg.content.length === 0) continue;
909
+ const h = computeStickyMessageHash(msg);
910
+ if (!h) continue;
911
+ if (!hashToMsgIdx.has(h)) hashToMsgIdx.set(h, m);
912
+ // Observe any existing marker on this message (any block).
913
+ for (const b of msg.content) {
914
+ if (b && typeof b === "object" && b.cache_control && typeof b.cache_control === "object") {
915
+ observed.push({ msg_hash: h, marker: { ...b.cache_control } });
916
+ break;
917
+ }
918
+ }
919
+ }
920
+
921
+ // Merge observed into prior: move observed hashes to the end (most
922
+ // recent), refresh their marker. Unobserved prior entries stay in place.
923
+ const priorIndex = new Map(prior.positions.map((p, i) => [p.msg_hash, i]));
924
+ const nextPositions = prior.positions.slice();
925
+ for (const ob of observed) {
926
+ if (priorIndex.has(ob.msg_hash)) {
927
+ const i = priorIndex.get(ob.msg_hash);
928
+ nextPositions[i] = { msg_hash: ob.msg_hash, position_hint: "last_block", marker: ob.marker };
929
+ } else {
930
+ nextPositions.push({ msg_hash: ob.msg_hash, position_hint: "last_block", marker: ob.marker });
931
+ priorIndex.set(ob.msg_hash, nextPositions.length - 1);
932
+ }
933
+ }
934
+
935
+ // Cap at MAX_POSITIONS: keep the NEWEST (end of array) entries.
936
+ let capped = nextPositions;
937
+ if (capped.length > CACHE_CONTROL_STICKY_MAX_POSITIONS) {
938
+ capped = capped.slice(capped.length - CACHE_CONTROL_STICKY_MAX_POSITIONS);
939
+ }
940
+
941
+ // Compute mutations: for each tracked hash present in this body, if the
942
+ // message doesn't already have any marker, add one at its last block.
943
+ const mutations = [];
944
+ for (const pos of capped) {
945
+ const msgIdx = hashToMsgIdx.get(pos.msg_hash);
946
+ if (msgIdx === undefined) continue;
947
+ const msg = body.messages[msgIdx];
948
+ if (!msg || !Array.isArray(msg.content) || msg.content.length === 0) continue;
949
+ const hasMarker = msg.content.some(
950
+ (b) => b && typeof b === "object" && b.cache_control && typeof b.cache_control === "object"
951
+ );
952
+ if (hasMarker) continue;
953
+ mutations.push({
954
+ msgIdx,
955
+ blockIdx: msg.content.length - 1,
956
+ marker: { ...pos.marker },
957
+ });
958
+ }
959
+
960
+ return { newState: { version: 1, positions: capped }, mutations };
961
+ }
962
+
963
+ /**
964
+ * Wrapper: read state, compute mutations via
965
+ * updateCacheControlStickyState, apply mutations to `body` in place, write
966
+ * next state. Returns the count of marker mutations applied. Silent on
967
+ * any I/O error (best-effort).
968
+ */
969
+ function applyCacheControlSticky(body, key) {
970
+ if (!body || typeof body !== "object" || !Array.isArray(body.messages)) return 0;
971
+ const prior = readCacheControlStickyState(key);
972
+ const { newState, mutations } = updateCacheControlStickyState(body, prior);
973
+ for (const mut of mutations) {
974
+ const msg = body.messages[mut.msgIdx];
975
+ if (!msg || !Array.isArray(msg.content)) continue;
976
+ const newContent = msg.content.slice();
977
+ const target = newContent[mut.blockIdx];
978
+ if (!target || typeof target !== "object") continue;
979
+ newContent[mut.blockIdx] = { ...target, cache_control: { ...mut.marker } };
980
+ body.messages[mut.msgIdx] = { ...msg, content: newContent };
981
+ }
982
+ writeCacheControlStickyState(key, newState);
983
+ return mutations.length;
984
+ }
985
+
730
986
  /**
731
987
  * Core fix: on EVERY call, scan the entire message array for the LATEST
732
988
  * relocatable blocks (skills, MCP, deferred tools, hooks) and ensure they
@@ -1131,6 +1387,7 @@ const _STATS_SCHEMA = {
1131
1387
  reminder_strip: { applied: 0, skipped: 0, lastApplied: null },
1132
1388
  cache_control_normalize: { applied: 0, skipped: 0, lastApplied: null },
1133
1389
  tool_use_input_normalize: { applied: 0, skipped: 0, lastApplied: null },
1390
+ cache_control_sticky: { applied: 0, skipped: 0, lastApplied: null },
1134
1391
  };
1135
1392
 
1136
1393
  function _createEmptyStats() {
@@ -2091,6 +2348,34 @@ globalThis.fetch = async function (url, options) {
2091
2348
  }
2092
2349
  }
2093
2350
 
2351
+ // Extension: cache_control_sticky — reinstate historical cache_control
2352
+ // markers on messages whose position CC has moved past. CC maintains
2353
+ // at most one user-side marker at a time; as it moves the marker to
2354
+ // the tail of each new user turn, the previous position loses the ~43
2355
+ // bytes of cache_control framing — a tail-of-message byte drift that
2356
+ // breaks every downstream cached block. This extension tracks marker
2357
+ // positions by stable message-hash across turns (up to 3) and re-adds
2358
+ // them on future bodies. Runs AFTER cache_control_normalize (when
2359
+ // present) so normalize pins the canonical tail-marker first and
2360
+ // sticky re-adds the historical ones. State file is per-project at
2361
+ // ~/.claude/cache-fix-state/cache-control-sticky-<sha1(cwd)>.json.
2362
+ // Opt-out via CACHE_FIX_SKIP_CACHE_CONTROL_STICKY=1 (defaults ON).
2363
+ if (shouldApplyFix("cache_control_sticky") && payload.messages) {
2364
+ try {
2365
+ const stickyApplied = applyCacheControlSticky(payload, process.cwd());
2366
+ if (stickyApplied > 0) {
2367
+ modified = true;
2368
+ debugLog(`APPLIED: cache_control_sticky reinstated ${stickyApplied} historical marker(s)`);
2369
+ recordFixResult("cache_control_sticky", "applied");
2370
+ } else {
2371
+ recordFixResult("cache_control_sticky", "skipped");
2372
+ }
2373
+ } catch (e) {
2374
+ debugLog(`cache_control_sticky: error (${e?.message}) — skipped`);
2375
+ recordFixResult("cache_control_sticky", "skipped");
2376
+ }
2377
+ }
2378
+
2094
2379
  // Bug 5: TTL enforcement (configurable per request type)
2095
2380
  // The client gates 1h cache TTL behind a GrowthBook allowlist that checks
2096
2381
  // querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".
@@ -2528,5 +2813,13 @@ export {
2528
2813
  countUserCacheControlMarkers,
2529
2814
  CACHE_CONTROL_CANONICAL_MARKER,
2530
2815
  normalizeToolUseInputsInBody,
2816
+ computeStickyMessageHash,
2817
+ cacheControlStickyStatePath,
2818
+ updateCacheControlStickyState,
2819
+ applyCacheControlSticky,
2820
+ readCacheControlStickyState,
2821
+ writeCacheControlStickyState,
2822
+ CACHE_CONTROL_STICKY_MAX_POSITIONS,
2823
+ CACHE_CONTROL_STICKY_DEFAULT_MARKER,
2531
2824
  _pinnedBlocks, // exported so tests can reset between runs
2532
2825
  };