npm - claude-code-cache-fix - Versions diffs - 2.0.0-beta.4 → 2.0.1 - Mend

claude-code-cache-fix 2.0.0-beta.4 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -4,7 +4,7 @@
 English | [中文](./README.zh.md) | [한국어](./README.ko.md) | [Português](./docs/guia-pt-br.md)
-Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.111. Opus 4.7 compatible.
+Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.112. Opus 4.7 compatible.
 > **Opus 4.7 advisory:** Our metered data shows 4.7 burns Q5h quota at **~2.4x the rate of 4.6** for equivalent visible token counts. Two factors: a new tokenizer (up to 35% more tokens, [documented](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7)) and adaptive thinking overhead (~105%, not documented in usage response). Workaround: `CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING=1` (may reduce quality). Image stripping (`CACHE_FIX_IMAGE_KEEP_LAST`) is even more important on 4.7 due to high-res image support increasing image token counts. See [Discussion #25](https://github.com/cnighswonger/claude-code-cache-fix/discussions/25) for full analysis.
@@ -607,6 +607,7 @@ measurable signature of cache-efficiency degradation.
 - **[@JEONG-JIWOO](https://github.com/JEONG-JIWOO)** — VS Code extension investigation: discovered `claudeCode.claudeProcessWrapper` as the working integration path, wrote the C wrapper for Windows (#16)
 - **[@X-15](https://github.com/X-15)** — VS Code extension validation, per-fix health status analysis confirming safety check behavior on v2.1.105 (#16)
 - **[@ArkNill](https://github.com/ArkNill)** — Fingerprint verification fix for CC v2.1.108+ (`isMeta` filter change, PR #21), Korean README (PR #22), original [claude-code-hidden-problem-analysis](https://github.com/ArkNill/claude-code-hidden-problem-analysis) research
+- **[@deafsquad](https://github.com/deafsquad)** — Universal smoosh_split un-smoosh fix (PR #26), source-level function attribution of resume scatter bug (anthropics/claude-code#43657), OTEL telemetry discovery
 If you contributed to the community effort on these issues and aren't listed here, please open an issue or PR — we want to credit everyone properly.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-code-cache-fix",
-  "version": "2.0.0-beta.4",
+  "version": "2.0.1",
   "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
   "type": "module",
   "exports": "./preload.mjs",

package/preload.mjs CHANGED Viewed

@@ -330,6 +330,659 @@ function stripSessionKnowledge(text) {
   );
 }
+// --------------------------------------------------------------------------
+// SessionStart:resume → :startup rewrite (Bug: anthropics/claude-code#43657)
+// --------------------------------------------------------------------------
+//
+// On `claude --continue`, CC fires processSessionStartHooks('resume', …) at
+// src/utils/sessionStart.ts:35. The resulting attachment text wraps the
+// hook's stdout in `<system-reminder>\nSessionStart:resume hook success: …`.
+// The original (pre-resume) session sent the same block as
+// `SessionStart:startup hook success: …`. Byte difference at msg[0] content[N]
+// → whole message prefix re-caches → full-session-cost miss.
+//
+// Some SessionStart hooks additionally embed `<session-id>` tags or
+// `Last active: <timestamp>` lines inside the reminder body, both of which
+// carry UUID/date volatility on top of the event-name flip.
+//
+// This helper rewrites the outbound text to match the originally-cached
+// form. Runs on both standalone text blocks and tool_result.content strings
+// (covers the case where the SessionStart reminder got smooshed by CC's
+// smooshSystemReminderSiblings pass before we see it).
+//
+// Agent behavior is unaffected — CC does not condition behavior on the
+// event-name text, and session-id / timestamps are ephemeral runtime
+// metadata, not semantic inputs.
+// --------------------------------------------------------------------------
+const SESSION_START_RESUME_MARKER = /SessionStart:resume hook success:/g;
+const SESSION_START_ID_TAG = /\n?<session-id>[^<]*<\/session-id>/g;
+const SESSION_START_LAST_ACTIVE_LINE = /\nLast active:[^\n]*/g;
+/**
+ * Normalize a single text payload (a text block's .text or a tool_result's
+ * string .content) to remove SessionStart-resume volatility. Returns
+ * [newText, mutationCount]. Callers only need the text, but the count is
+ * exposed for stats. The function is a pure string-to-string transform
+ * (idempotent: running twice produces the same output as running once).
+ */
+function normalizeSessionStartText(text) {
+  if (typeof text !== "string" || !text.includes("SessionStart:")) return [text, 0];
+  let count = 0;
+  let out = text;
+  if (SESSION_START_RESUME_MARKER.test(out)) {
+    SESSION_START_RESUME_MARKER.lastIndex = 0;
+    out = out.replace(SESSION_START_RESUME_MARKER, "SessionStart:startup hook success:");
+    count++;
+  }
+  if (SESSION_START_ID_TAG.test(out)) {
+    SESSION_START_ID_TAG.lastIndex = 0;
+    out = out.replace(SESSION_START_ID_TAG, "");
+    count++;
+  }
+  if (SESSION_START_LAST_ACTIVE_LINE.test(out)) {
+    SESSION_START_LAST_ACTIVE_LINE.lastIndex = 0;
+    out = out.replace(SESSION_START_LAST_ACTIVE_LINE, "");
+    count++;
+  }
+  return [out, count];
+}
+// --------------------------------------------------------------------------
+// Continue-trailer strip (Bug: anthropics/claude-code#12 / resume UX)
+// --------------------------------------------------------------------------
+//
+// On `claude --continue`, CC appends a text block whose text is EXACTLY
+// "Continue from where you left off." to the last user message before
+// firing the first post-resume request. The pre-exit body did not carry
+// that block, so its presence in the resumed body creates a tail-of-last-
+// user-message drift (~40 bytes plus JSON framing) that breaks cache at
+// that position.
+//
+// The trailer is a semantic no-op — the agent already has the full prior
+// conversation as context. Removing it makes the post-resume body byte-
+// match what the pre-exit body cached at the tail.
+//
+// Match is intentionally narrow (exact string equality on the block's
+// .text) so mentions of the phrase inside a longer user sentence don't
+// get caught.
+// --------------------------------------------------------------------------
+const CONTINUE_TRAILER_TEXT = "Continue from where you left off.";
+/**
+ * Returns true iff the block is an exact-match Continue-trailer text block
+ * (a `{type: "text", text: "Continue from where you left off."}` shape —
+ * cache_control field on the same block is allowed and ignored). Pure
+ * predicate; exported for unit tests.
+ */
+function isContinueTrailerBlock(block) {
+  return (
+    !!block &&
+    typeof block === "object" &&
+    block.type === "text" &&
+    block.text === CONTINUE_TRAILER_TEXT
+  );
+}
+// --------------------------------------------------------------------------
+// Deferred-tools restore (MCP reconnect race)
+// --------------------------------------------------------------------------
+//
+// Observed empirically: on `claude --continue`, if MCP servers haven't
+// finished reconnecting by the time CC fires the first post-resume
+// request, the `<system-reminder>The following deferred tools are now
+// available via ToolSearch…` block at msg[0] (or wherever the attachment
+// lands post-compaction) shrinks dramatically. A full list of ~40 tools
+// collapses to a handful of CC built-ins (AskUserQuestion, EnterPlanMode,
+// ExitPlanMode, PushNotification) and CC injects a trailing
+// `The following deferred tools are no longer available (their MCP server
+// disconnected). Do not search for them — ToolSearch will return no match:`
+// notice.
+//
+// That block change at the root of the message array breaks cache at the
+// very top — the entire ~940K prompt re-caches. By the time the second
+// post-resume request fires, MCPs are usually reconnected and the block is
+// full again, but the cache is already committed to the shrunk version
+// for this session.
+//
+// This extension snapshots the block to
+// `~/.claude/cache-fix-state/deferred-tools-<sha1(key)>.txt` every time
+// it's sent in its full form (no UNAVAILABLE marker), keyed by a caller-
+// supplied project key (default: cwd). On a subsequent request where the
+// block is shorter AND contains the UNAVAILABLE marker, the persisted
+// full bytes are substituted so the on-wire body matches the server's
+// cached prefix.
+//
+// Trade-off: the restored block may reference MCP tools that haven't
+// actually reconnected yet. Agent calls ToolSearch → no match → one retry.
+// Tiny cost versus a full-prompt cache miss on every resume.
+// --------------------------------------------------------------------------
+const DEFERRED_TOOLS_AVAILABLE_MARKER =
+  "The following deferred tools are now available via ToolSearch";
+const DEFERRED_TOOLS_UNAVAILABLE_MARKER =
+  "The following deferred tools are no longer available";
+const DEFERRED_TOOLS_SNAPSHOT_DIR = join(homedir(), ".claude", "cache-fix-state");
+/**
+ * Build the absolute snapshot path for a given key. Exported for tests so
+ * they can assert on path derivation without duplicating the hash logic.
+ */
+function deferredToolsSnapshotPath(key) {
+  const hash = createHash("sha1").update(String(key)).digest("hex").slice(0, 16);
+  return join(DEFERRED_TOOLS_SNAPSHOT_DIR, `deferred-tools-${hash}.txt`);
+}
+/**
+ * Locate the deferred-tools reminder block anywhere in `body.messages`.
+ * The block's position varies by session shape (pre-compaction it often
+ * sits at `msg[0].content[0]`; post-compaction it can land at
+ * `msg[1].content[N]` next to other attachments). Returns
+ * `{ msgIdx, blockIdx, text } | null`.
+ *
+ * Assistant messages are skipped so that if the agent happens to mention
+ * the AVAILABLE_MARKER phrase verbatim in its own output, we don't
+ * misidentify it as a real deferred-tools block.
+ */
+function findDeferredToolsBlockInBody(body) {
+  if (!body || !Array.isArray(body.messages)) return null;
+  for (let m = 0; m < body.messages.length; m++) {
+    const msg = body.messages[m];
+    if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
+    for (let i = 0; i < msg.content.length; i++) {
+      const b = msg.content[i];
+      if (
+        b?.type === "text" &&
+        typeof b.text === "string" &&
+        b.text.includes(DEFERRED_TOOLS_AVAILABLE_MARKER)
+      ) {
+        return { msgIdx: m, blockIdx: i, text: b.text };
+      }
+    }
+  }
+  return null;
+}
+// --------------------------------------------------------------------------
+// Bookkeeping-reminder strip
+// --------------------------------------------------------------------------
+//
+// Complements `smoosh_normalize` / `smoosh_split`: where normalize stabilizes
+// bytes in-place and split peels smooshed reminders back into standalone
+// text blocks, this pass REMOVES purely-bookkeeping reminder blocks entirely
+// from the outbound body. Zero model visibility, zero drift.
+//
+// Targeted patterns (all CC-internal, per-turn values the agent doesn't need
+// to condition behavior on):
+//   - `Token usage: <N>/<M>; <K> remaining`
+//   - `Output tokens — turn: <X> · session: <Y>`
+//   - `USD budget: $<X>/$<Y>; $<Z> remaining`
+//   - `The task tools haven't been used recently. …`
+//   - `The TodoWrite tool hasn't been used recently. …`
+//   - `Remaining conversation turns: <N>`
+//   - `Messages until auto-compact: <N>`
+//
+// Hook-injected reminders (thinking-enrichment, action-tracker,
+// PreToolUse/PostToolUse blocking errors, UserPromptSubmit additional
+// context, custom user hooks) are deliberately NOT stripped here — the
+// agent needs that feedback visible in the turn it fires, and attempting a
+// history-only filter creates per-turn drift of its own (the "last user
+// message" shifts each turn, so a reminder preserved at turn N gets
+// stripped at N+1 when its host message falls into history). Leaving hook
+// reminders untouched is the safer choice; their residual drift is small
+// compared to bookkeeping churn.
+// --------------------------------------------------------------------------
+const REMINDER_WRAP_REGEX =
+  /^<system-reminder>\n([\s\S]*?)\n<\/system-reminder>\s*$/;
+const BOOKKEEPING_REMINDER_PATTERNS = [
+  /^Token usage: \d+\/\d+; \d+ remaining\s*$/,
+  /^Output tokens \u2014 turn: [^\n]+ \u00b7 session: [^\n]+\s*$/,
+  /^USD budget: \$[\d.]+\/\$[\d.]+; \$[\d.]+ remaining\s*$/,
+  /^The task tools haven't been used recently\./,
+  /^The TodoWrite tool hasn't been used recently\./,
+  /^Remaining conversation turns: /,
+  /^Messages? until auto-compact: /,
+];
+/**
+ * Returns true iff the text is a `<system-reminder>`-wrapped block whose
+ * inner content matches a bookkeeping pattern. Pure predicate, exported
+ * for unit tests.
+ */
+function isBookkeepingReminder(text) {
+  if (typeof text !== "string") return false;
+  const m = text.match(REMINDER_WRAP_REGEX);
+  if (!m) return false;
+  const inner = m[1];
+  for (const rx of BOOKKEEPING_REMINDER_PATTERNS) {
+    if (rx.test(inner)) return true;
+  }
+  return false;
+}
+// --------------------------------------------------------------------------
+// cache_control marker position-normalizer
+// --------------------------------------------------------------------------
+//
+// Anthropic's prompt-cache uses `cache_control: {type: "ephemeral", ttl: ...}`
+// markers on content blocks as cache breakpoints. CC places this marker on
+// "the last block of the last user message" each turn — which shifts as new
+// turns arrive. When the marker moves, the PREVIOUS last-block's JSON loses
+// the cache_control field → that block's bytes differ from the server's
+// cached version → partial re-cache on top of the stable system-prompt
+// cache.
+//
+// Enforce a canonical position on every outbound body:
+//   1. Strip every existing cache_control marker from user-message content
+//      blocks.
+//   2. Place a single {type: "ephemeral", ttl: "1h"} marker on the LAST
+//      content block of the LAST user message.
+//
+// Fast path: if the canonical block already has the correct marker AND it's
+// the only user-side marker, the body is left untouched — ensures the pass
+// is a true no-op when nothing changed.
+//
+// System-side markers (e.g., on `system[2]` for the global prompt) are NOT
+// touched — they're CC's stable breakpoint for the system prompt and work
+// correctly.
+// --------------------------------------------------------------------------
+const CACHE_CONTROL_CANONICAL_MARKER = { type: "ephemeral", ttl: "1h" };
+/**
+ * Strip every cache_control marker from a single user message's content
+ * blocks. Returns the number stripped. Mutates the message's content array
+ * in place.
+ */
+function stripCacheControlMarkers(msg) {
+  if (!msg || msg.role !== "user" || !Array.isArray(msg.content)) return 0;
+  let n = 0;
+  for (let i = 0; i < msg.content.length; i++) {
+    const block = msg.content[i];
+    if (block && typeof block === "object" && block.cache_control) {
+      const { cache_control, ...rest } = block;
+      msg.content[i] = rest;
+      n++;
+    }
+  }
+  return n;
+}
+/**
+ * Count cache_control markers across all user-message content blocks.
+ * Exported so the call-site's fast-path check has a tested helper.
+ */
+function countUserCacheControlMarkers(body) {
+  if (!body || !Array.isArray(body.messages)) return 0;
+  let n = 0;
+  for (const msg of body.messages) {
+    if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
+    for (const block of msg.content) {
+      if (block && typeof block === "object" && block.cache_control) n++;
+    }
+  }
+  return n;
+}
+// --------------------------------------------------------------------------
+// tool_use.input field-set normalization
+// --------------------------------------------------------------------------
+//
+// CC's serialization of `tool_use.input` can drift between turns when the
+// caller passes fields not declared in the tool's `input_schema.properties`.
+// Observed case: a SendMessage tool call where the caller passed
+// `{to, summary, message, type, recipient, content}`. Pre-miss body
+// serialized input as `{to, summary, message}` (3 schema-only keys).
+// Post-miss body (same tool_use_id, same turn position) serialized the
+// same block as `{to, summary, message, type, recipient, content}` (6 keys
+// — extras preserved). That byte drift at a mid-history assistant message
+// re-caches every block from that message forward → full-session-cost miss.
+//
+// Concrete instance: 2334-byte drift on ONE assistant-side tool_use block
+// caused a 619,722 `cache_creation_input_tokens` miss at 15:16:52 UTC on
+// msg[844] of a long-running session.
+//
+// This helper walks every assistant-role message's tool_use blocks, looks
+// up the tool's declared `input_schema.properties` from `body.tools`, and
+// rewrites `input` to contain ONLY the schema keys (in schema declaration
+// order). Tools with no schema in `body.tools` are left untouched — we
+// can't determine what's legitimate vs extra.
+//
+// Agent behavior is unaffected — extras weren't declared in the schema so
+// downstream consumers shouldn't rely on them. The point of this pass is
+// to pin the serialization to the schema's field set so CC's own drift
+// between turns can't break cache.
+// --------------------------------------------------------------------------
+/**
+ * Mutate `body` in place: for every assistant-role message's tool_use
+ * blocks whose tool name matches an entry in `body.tools` with a known
+ * `input_schema.properties`, replace `input` with a new object containing
+ * ONLY the schema-declared keys, preserved in schema declaration order.
+ * Returns the count of tool_use blocks modified (0 if nothing changed or
+ * preconditions missing). Pure transform: safe to call repeatedly.
+ */
+function normalizeToolUseInputsInBody(body) {
+  if (!body || typeof body !== "object") return 0;
+  if (!Array.isArray(body.messages) || !Array.isArray(body.tools)) return 0;
+  // Build toolSchemas: { name: orderedKeys[] } from body.tools entries
+  // that declare input_schema.properties.
+  const toolSchemas = Object.create(null);
+  for (const tool of body.tools) {
+    if (!tool || typeof tool !== "object") continue;
+    const name = tool.name;
+    if (typeof name !== "string") continue;
+    const props = tool.input_schema && tool.input_schema.properties;
+    if (!props || typeof props !== "object") continue;
+    toolSchemas[name] = Object.keys(props);
+  }
+  let modified = 0;
+  for (const msg of body.messages) {
+    if (!msg || msg.role !== "assistant") continue;
+    if (!Array.isArray(msg.content)) continue;
+    for (let i = 0; i < msg.content.length; i++) {
+      const block = msg.content[i];
+      if (!block || block.type !== "tool_use") continue;
+      if (!block.input || typeof block.input !== "object" || Array.isArray(block.input)) continue;
+      const schemaKeys = toolSchemas[block.name];
+      if (!schemaKeys) continue; // unknown tool — skip
+      const currentKeys = Object.keys(block.input);
+      // Determine if any non-schema key is present. If all current keys
+      // are in schema AND their order already matches a subset of
+      // schemaKeys order, we could skip — but we always rebuild to also
+      // canonicalize key order, which is what JSON.stringify consumers
+      // depend on for byte stability.
+      const schemaKeySet = new Set(schemaKeys);
+      const hasExtras = currentKeys.some((k) => !schemaKeySet.has(k));
+      // Also rebuild when order differs from schema declaration order,
+      // because extras stripping alone doesn't guarantee a canonical
+      // byte sequence across turns.
+      const presentSchemaKeys = schemaKeys.filter((k) =>
+        Object.prototype.hasOwnProperty.call(block.input, k)
+      );
+      const currentInSchema = currentKeys.filter((k) => schemaKeySet.has(k));
+      let orderDiffers = presentSchemaKeys.length !== currentInSchema.length;
+      if (!orderDiffers) {
+        for (let j = 0; j < presentSchemaKeys.length; j++) {
+          if (presentSchemaKeys[j] !== currentInSchema[j]) {
+            orderDiffers = true;
+            break;
+          }
+        }
+      }
+      if (!hasExtras && !orderDiffers) continue;
+      const newInput = {};
+      for (const k of presentSchemaKeys) {
+        newInput[k] = block.input[k];
+      }
+      msg.content[i] = { ...block, input: newInput };
+      modified++;
+    }
+  }
+  return modified;
+}
+// --------------------------------------------------------------------------
+// cache_control_sticky — preserve historical marker positions across turns
+// --------------------------------------------------------------------------
+//
+// Covers a cache-miss class that cache_control_normalize can't reach by
+// itself. CC maintains at most one user-side cache_control marker at a time:
+// as conversation grows, CC moves the marker from the tail of one user turn
+// to the tail of the next, DROPPING it from the previous position. The
+// dropped position's block loses the ~43 bytes of `"cache_control":{"type":
+// "ephemeral","ttl":"1h"}` framing — a tail-of-message byte diff that
+// invalidates every downstream cached block (~600K tokens' worth on a
+// long-running session).
+//
+// Observed instance: at 16:27:13 UTC today, a 1284-message session emitted
+// cw=804,428 (hit=2.3%). Diff of main-session bodies 585 → 587 showed ONE
+// message diverged — msg[1281] — which lost its cache_control marker (43
+// bytes) because CC had moved the marker to the new last user msg[1283].
+//
+// cache_control_normalize places exactly ONE canonical marker at the last
+// block of the last user message on every outbound body. That solves the
+// current-marker-drift class but cannot preserve historical markers — CC
+// has already dropped them by the time the payload reaches this extension.
+//
+// This sticky extension maintains per-session state tracking where markers
+// have appeared in prior turns, and reinstates them on future turns as
+// additive preservation. Up to 3 historical message-level markers are
+// tracked (Anthropic's hard limit is 4 cache_control markers total — 1 for
+// system[2] + 3 for message-level breakpoints). When a historical position
+// would exceed the cap, the oldest tracked entry is dropped (LRU).
+//
+// Messages are identified by a stable hash so that compaction rewrites /
+// index shifts don't confuse the tracker:
+//   - If the message has a tool_use or tool_result block with an `id` or
+//     `tool_use_id`, hash `role|id`.
+//   - Otherwise hash `role|firstTextContent.slice(0, 256)`.
+//
+// Pipeline order: runs AFTER cache_control_normalize (when it's present) so
+// normalize first pins the canonical marker at the last user msg, then
+// sticky re-adds historical markers on their hashed messages. Skips any
+// message already carrying a marker (fast no-op when sticky fires first).
+//
+// Opt-out via CACHE_FIX_SKIP_CACHE_CONTROL_STICKY=1 (defaults ON).
+// --------------------------------------------------------------------------
+const CACHE_CONTROL_STICKY_DIR = join(homedir(), ".claude", "cache-fix-state");
+const CACHE_CONTROL_STICKY_MAX_POSITIONS = 3;
+const CACHE_CONTROL_STICKY_DEFAULT_MARKER = { type: "ephemeral", ttl: "1h" };
+/**
+ * Build the absolute state-file path for a given project key. Exported so
+ * tests can assert on path derivation without duplicating hash logic.
+ */
+function cacheControlStickyStatePath(key) {
+  const hash = createHash("sha1").update(String(key)).digest("hex").slice(0, 16);
+  return join(CACHE_CONTROL_STICKY_DIR, `cache-control-sticky-${hash}.json`);
+}
+/**
+ * Compute a stable hash identifier for a message that survives content-
+ * block insertions (e.g. smoosh_split peeling a reminder into a new block
+ * but the first text block's first 256 bytes don't change) and index shifts
+ * (e.g. compaction). Returns null if the message has no identifiable
+ * content. Pure; exported for unit tests.
+ */
+function computeStickyMessageHash(msg) {
+  if (!msg || typeof msg !== "object") return null;
+  const role = typeof msg.role === "string" ? msg.role : "";
+  if (!Array.isArray(msg.content) || msg.content.length === 0) return null;
+  // Prefer tool_use/tool_result identifiers when present — they're the
+  // most stable anchors.
+  for (const b of msg.content) {
+    if (!b || typeof b !== "object") continue;
+    if (b.type === "tool_use" && typeof b.id === "string" && b.id) {
+      return createHash("sha1").update(`${role}|tool_use|${b.id}`).digest("hex").slice(0, 16);
+    }
+    if (b.type === "tool_result" && typeof b.tool_use_id === "string" && b.tool_use_id) {
+      return createHash("sha1").update(`${role}|tool_result|${b.tool_use_id}`).digest("hex").slice(0, 16);
+    }
+  }
+  // Fallback: first text block's first 256 bytes.
+  for (const b of msg.content) {
+    if (!b || typeof b !== "object") continue;
+    if (b.type === "text" && typeof b.text === "string") {
+      const prefix = b.text.slice(0, 256);
+      return createHash("sha1").update(`${role}|text|${prefix}`).digest("hex").slice(0, 16);
+    }
+  }
+  return null;
+}
+/**
+ * Read persisted sticky state for a project key. Returns a fresh empty
+ * state on missing file, unreadable file, or corrupt JSON — never throws.
+ * Shape: `{ version: 1, positions: [{msg_hash, position_hint, marker}] }`.
+ */
+function readCacheControlStickyState(key) {
+  const path = cacheControlStickyStatePath(key);
+  let raw;
+  try {
+    raw = readFileSync(path, "utf-8");
+  } catch {
+    return { version: 1, positions: [] };
+  }
+  try {
+    const parsed = JSON.parse(raw);
+    if (!parsed || typeof parsed !== "object" || !Array.isArray(parsed.positions)) {
+      debugLog("cache_control_sticky: state file malformed shape — resetting");
+      return { version: 1, positions: [] };
+    }
+    const positions = [];
+    for (const p of parsed.positions) {
+      if (!p || typeof p !== "object") continue;
+      if (typeof p.msg_hash !== "string" || !p.msg_hash) continue;
+      positions.push({
+        msg_hash: p.msg_hash,
+        position_hint: p.position_hint === "last_block" ? "last_block" : "last_block",
+        marker:
+          p.marker && typeof p.marker === "object" && typeof p.marker.type === "string"
+            ? { ...p.marker }
+            : { ...CACHE_CONTROL_STICKY_DEFAULT_MARKER },
+      });
+    }
+    return { version: 1, positions };
+  } catch (e) {
+    debugLog(`cache_control_sticky: state JSON parse error (${e?.message}) — resetting`);
+    return { version: 1, positions: [] };
+  }
+}
+/**
+ * Atomic-write persisted sticky state. Best-effort; silent on I/O errors.
+ */
+function writeCacheControlStickyState(key, state) {
+  const path = cacheControlStickyStatePath(key);
+  try {
+    mkdirSync(CACHE_CONTROL_STICKY_DIR, { recursive: true });
+    const tmp = path + ".tmp";
+    writeFileSync(tmp, JSON.stringify(state, null, 2), "utf-8");
+    renameSync(tmp, path);
+  } catch (e) {
+    debugLog(`cache_control_sticky: state write error (${e?.message})`);
+  }
+}
+/**
+ * Pure core: given a body and the currently-persisted state, compute the
+ * next state and the list of marker mutations to apply to the body. No
+ * I/O, no body mutation — the wrapper is responsible for applying results.
+ *
+ * Algorithm:
+ *  1. Walk user-role messages; for each block-with-cache_control, record
+ *     `{msg_hash, marker}` into `observed`. Duplicate hashes keep the
+ *     first (most recent in message order).
+ *  2. Merge `observed` into the prior `state.positions`: newly-observed
+ *     hashes are appended (or moved to the front if re-seen); absent-from-
+ *     this-body hashes are kept so they persist across turns.
+ *  3. For each hash in the new state, locate the corresponding message in
+ *     the body (by hash match). If found AND the message's last block
+ *     does NOT already carry a marker, emit a mutation to set it.
+ *  4. Cap the new state at CACHE_CONTROL_STICKY_MAX_POSITIONS (oldest
+ *     entries dropped first — LRU keyed on most-recent touch).
+ *
+ * Returns `{newState, mutations}` where mutations =
+ * `[{msgIdx, blockIdx, marker}]`. Pure; exported for unit tests.
+ */
+function updateCacheControlStickyState(body, priorState) {
+  const empty = { newState: { version: 1, positions: [] }, mutations: [] };
+  if (!body || typeof body !== "object" || !Array.isArray(body.messages)) return empty;
+  const prior =
+    priorState && Array.isArray(priorState.positions)
+      ? { version: 1, positions: priorState.positions.slice() }
+      : { version: 1, positions: [] };
+  // Build hash → msgIdx index for this body's user messages.
+  const hashToMsgIdx = new Map();
+  const observed = []; // [{msg_hash, marker}] in message order
+  for (let m = 0; m < body.messages.length; m++) {
+    const msg = body.messages[m];
+    if (!msg || msg.role !== "user" || !Array.isArray(msg.content) || msg.content.length === 0) continue;
+    const h = computeStickyMessageHash(msg);
+    if (!h) continue;
+    if (!hashToMsgIdx.has(h)) hashToMsgIdx.set(h, m);
+    // Observe any existing marker on this message (any block).
+    for (const b of msg.content) {
+      if (b && typeof b === "object" && b.cache_control && typeof b.cache_control === "object") {
+        observed.push({ msg_hash: h, marker: { ...b.cache_control } });
+        break;
+      }
+    }
+  }
+  // Merge observed into prior: move observed hashes to the end (most
+  // recent), refresh their marker. Unobserved prior entries stay in place.
+  const priorIndex = new Map(prior.positions.map((p, i) => [p.msg_hash, i]));
+  const nextPositions = prior.positions.slice();
+  for (const ob of observed) {
+    if (priorIndex.has(ob.msg_hash)) {
+      const i = priorIndex.get(ob.msg_hash);
+      nextPositions[i] = { msg_hash: ob.msg_hash, position_hint: "last_block", marker: ob.marker };
+    } else {
+      nextPositions.push({ msg_hash: ob.msg_hash, position_hint: "last_block", marker: ob.marker });
+      priorIndex.set(ob.msg_hash, nextPositions.length - 1);
+    }
+  }
+  // Cap at MAX_POSITIONS: keep the NEWEST (end of array) entries.
+  let capped = nextPositions;
+  if (capped.length > CACHE_CONTROL_STICKY_MAX_POSITIONS) {
+    capped = capped.slice(capped.length - CACHE_CONTROL_STICKY_MAX_POSITIONS);
+  }
+  // Compute mutations: for each tracked hash present in this body, if the
+  // message doesn't already have any marker, add one at its last block.
+  const mutations = [];
+  for (const pos of capped) {
+    const msgIdx = hashToMsgIdx.get(pos.msg_hash);
+    if (msgIdx === undefined) continue;
+    const msg = body.messages[msgIdx];
+    if (!msg || !Array.isArray(msg.content) || msg.content.length === 0) continue;
+    const hasMarker = msg.content.some(
+      (b) => b && typeof b === "object" && b.cache_control && typeof b.cache_control === "object"
+    );
+    if (hasMarker) continue;
+    mutations.push({
+      msgIdx,
+      blockIdx: msg.content.length - 1,
+      marker: { ...pos.marker },
+    });
+  }
+  return { newState: { version: 1, positions: capped }, mutations };
+}
+/**
+ * Wrapper: read state, compute mutations via
+ * updateCacheControlStickyState, apply mutations to `body` in place, write
+ * next state. Returns the count of marker mutations applied. Silent on
+ * any I/O error (best-effort).
+ */
+function applyCacheControlSticky(body, key) {
+  if (!body || typeof body !== "object" || !Array.isArray(body.messages)) return 0;
+  const prior = readCacheControlStickyState(key);
+  const { newState, mutations } = updateCacheControlStickyState(body, prior);
+  for (const mut of mutations) {
+    const msg = body.messages[mut.msgIdx];
+    if (!msg || !Array.isArray(msg.content)) continue;
+    const newContent = msg.content.slice();
+    const target = newContent[mut.blockIdx];
+    if (!target || typeof target !== "object") continue;
+    newContent[mut.blockIdx] = { ...target, cache_control: { ...mut.marker } };
+    body.messages[mut.msgIdx] = { ...msg, content: newContent };
+  }
+  writeCacheControlStickyState(key, newState);
+  return mutations.length;
+}
 /**
  * Core fix: on EVERY call, scan the entire message array for the LATEST
  * relocatable blocks (skills, MCP, deferred tools, hooks) and ensure they
@@ -728,6 +1381,13 @@ const _STATS_SCHEMA = {
   cwd_normalize: { applied: 0, skipped: 0, lastApplied: null },
   smoosh_normalize: { applied: 0, skipped: 0, lastApplied: null },
   smoosh_split: { applied: 0, skipped: 0, lastApplied: null },
+  session_start_normalize: { applied: 0, skipped: 0, lastApplied: null },
+  continue_trailer_strip: { applied: 0, skipped: 0, lastApplied: null },
+  deferred_tools_restore: { applied: 0, skipped: 0, lastApplied: null },
+  reminder_strip: { applied: 0, skipped: 0, lastApplied: null },
+  cache_control_normalize: { applied: 0, skipped: 0, lastApplied: null },
+  tool_use_input_normalize: { applied: 0, skipped: 0, lastApplied: null },
+  cache_control_sticky: { applied: 0, skipped: 0, lastApplied: null },
 };
 function _createEmptyStats() {
@@ -1349,6 +2009,67 @@ globalThis.fetch = async function (url, options) {
         }
       }
+      // Extension: session_start_normalize — SessionStart:resume → :startup rewrite
+      // and ephemeral session-id / Last-active strip. Runs BEFORE smoosh_normalize
+      // so drift at msg[0] content[N] is stabilized before any subsequent pass
+      // reads from the same text. Applies to both standalone text blocks and
+      // tool_result.content strings (in case CC's smooshSystemReminderSiblings
+      // folded the reminder before we see it).
+      // Bug: anthropics/claude-code#43657
+      // Opt-out via CACHE_FIX_SKIP_SESSION_START_NORMALIZE=1 (defaults ON).
+      if (shouldApplyFix("session_start_normalize") && payload.messages) {
+        let ssnApplied = 0;
+        for (const msg of payload.messages) {
+          if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
+          for (let i = 0; i < msg.content.length; i++) {
+            const block = msg.content[i];
+            if (block?.type === "text" && typeof block.text === "string") {
+              const [t, n] = normalizeSessionStartText(block.text);
+              if (n > 0) {
+                msg.content[i] = { ...block, text: t };
+                ssnApplied += n;
+              }
+            } else if (block?.type === "tool_result" && typeof block.content === "string") {
+              const [c, n] = normalizeSessionStartText(block.content);
+              if (n > 0) {
+                msg.content[i] = { ...block, content: c };
+                ssnApplied += n;
+              }
+            }
+          }
+        }
+        if (ssnApplied > 0) {
+          modified = true;
+          debugLog(`APPLIED: session-start-normalize rewrote ${ssnApplied} marker(s)`);
+          recordFixResult("session_start_normalize", "applied");
+        } else {
+          recordFixResult("session_start_normalize", "skipped");
+        }
+      }
+      // Extension: tool_use_input_normalize — strip tool_use.input keys not
+      // declared in body.tools[*].input_schema.properties. CC's serialization
+      // of tool_use.input can drift between turns when the caller passed
+      // extra fields; the pre-miss body may serialize only the schema keys
+      // while the post-miss body serializes the full caller-supplied set
+      // (or vice versa). That byte drift at a mid-history assistant message
+      // re-caches every block from that message forward.
+      //
+      // Runs AFTER session_start_normalize so mid-history drift is pinned
+      // before any downstream pass (smoosh_*, fingerprint, ttl) hashes the
+      // same block. Default ON, opt-out via
+      // CACHE_FIX_SKIP_TOOL_USE_INPUT_NORMALIZE=1.
+      if (shouldApplyFix("tool_use_input_normalize")) {
+        const tuinApplied = normalizeToolUseInputsInBody(payload);
+        if (tuinApplied > 0) {
+          modified = true;
+          debugLog(`APPLIED: tool-use-input-normalize rewrote ${tuinApplied} tool_use block(s)`);
+          recordFixResult("tool_use_input_normalize", "applied");
+        } else {
+          recordFixResult("tool_use_input_normalize", "skipped");
+        }
+      }
       // Optimization: normalize smooshed dynamic system-reminders in tool_result content
       // CC's smooshSystemReminderSiblings (messages.ts:1835) folds <system-reminder> text
       // blocks into tool_result.content strings. Dynamic values (token_usage, budget_usd,
@@ -1474,6 +2195,187 @@ globalThis.fetch = async function (url, options) {
         }
       }
+      // Extension: continue_trailer_strip — remove the "Continue from where
+      // you left off." text block CC appends to the last user message on
+      // --continue. Pre-exit bodies didn't carry it, so its presence in the
+      // resumed body creates tail-of-last-msg drift that breaks cache.
+      // Exact-match string equality on `.text` — user sentences mentioning
+      // the phrase inside longer content are not touched.
+      // Bug: anthropics/claude-code#12 (resume UX), observed empirically.
+      // Opt-out via CACHE_FIX_SKIP_CONTINUE_TRAILER_STRIP=1 (defaults ON).
+      if (shouldApplyFix("continue_trailer_strip") && payload.messages) {
+        let trailerStripped = 0;
+        for (const msg of payload.messages) {
+          if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
+          const kept = msg.content.filter((block) => {
+            if (isContinueTrailerBlock(block)) {
+              trailerStripped++;
+              return false;
+            }
+            return true;
+          });
+          if (kept.length !== msg.content.length) msg.content = kept;
+        }
+        if (trailerStripped > 0) {
+          modified = true;
+          debugLog(`APPLIED: continue-trailer-strip removed ${trailerStripped} trailer block(s)`);
+          recordFixResult("continue_trailer_strip", "applied");
+        } else {
+          recordFixResult("continue_trailer_strip", "skipped");
+        }
+      }
+      // Extension: deferred_tools_restore — persist-and-restore the
+      // deferred-tools attachment block across sessions so MCP reconnect
+      // race at resume-time doesn't shrink msg[0] and bust the whole cache.
+      // Snapshot key defaults to process.cwd() (one snapshot per project).
+      // Opt-out via CACHE_FIX_SKIP_DEFERRED_TOOLS_RESTORE=1 (defaults ON).
+      if (shouldApplyFix("deferred_tools_restore") && payload.messages) {
+        let dtrRestored = 0;
+        const found = findDeferredToolsBlockInBody(payload);
+        if (found) {
+          const hasUnavail = found.text.includes(DEFERRED_TOOLS_UNAVAILABLE_MARKER);
+          const snapshotPath = deferredToolsSnapshotPath(process.cwd());
+          if (!hasUnavail) {
+            // Clean baseline — persist it for future resumes. Silent on
+            // any I/O error; snapshot is best-effort.
+            try {
+              mkdirSync(DEFERRED_TOOLS_SNAPSHOT_DIR, { recursive: true });
+              writeFileSync(snapshotPath, found.text, "utf-8");
+            } catch {}
+          } else {
+            // Shrunk block with explicit "no longer available" signal →
+            // attempt restore. Only substitute if the persisted version is
+            // strictly longer (never downgrade to a stale shorter snapshot).
+            let snapshot = null;
+            try { snapshot = readFileSync(snapshotPath, "utf-8"); } catch {}
+            if (snapshot && snapshot.length > found.text.length) {
+              const targetMsg = payload.messages[found.msgIdx];
+              const newContent = targetMsg.content.slice();
+              newContent[found.blockIdx] = { ...newContent[found.blockIdx], text: snapshot };
+              payload.messages[found.msgIdx] = { ...targetMsg, content: newContent };
+              dtrRestored = 1;
+            }
+          }
+        }
+        if (dtrRestored > 0) {
+          modified = true;
+          debugLog(`APPLIED: deferred-tools-restore substituted full block at msg[${found.msgIdx}].content[${found.blockIdx}]`);
+          recordFixResult("deferred_tools_restore", "applied");
+        } else {
+          recordFixResult("deferred_tools_restore", "skipped");
+        }
+      }
+      // Extension: reminder_strip — remove bookkeeping system-reminder blocks
+      // (Token usage / USD budget / Output tokens / TodoWrite nudge / turn
+      // counters) entirely from user messages. Runs AFTER smoosh_split so
+      // blocks peeled out of tool_result.content are visible as standalone
+      // text and can be matched by isBookkeepingReminder.
+      // Zero model visibility, zero drift.
+      // Opt-out via CACHE_FIX_SKIP_REMINDER_STRIP=1 (defaults ON).
+      if (shouldApplyFix("reminder_strip") && payload.messages) {
+        let reminderStripped = 0;
+        for (const msg of payload.messages) {
+          if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
+          const kept = msg.content.filter((block) => {
+            if (block?.type !== "text") return true;
+            if (isBookkeepingReminder(block.text)) {
+              reminderStripped++;
+              return false;
+            }
+            return true;
+          });
+          if (kept.length !== msg.content.length) msg.content = kept;
+        }
+        if (reminderStripped > 0) {
+          modified = true;
+          debugLog(`APPLIED: reminder-strip removed ${reminderStripped} bookkeeping reminder block(s)`);
+          recordFixResult("reminder_strip", "applied");
+        } else {
+          recordFixResult("reminder_strip", "skipped");
+        }
+      }
+      // Extension: cache_control_normalize — pin the cache_control marker at
+      // a canonical position (last block of last user message) on every
+      // outbound body. Prevents marker-shuffle drift between turns from
+      // invalidating the previous-last-block's cached bytes. Runs LAST
+      // (after smoosh_split and any other content-mutating pass) so the
+      // canonical position is calculated against the final content array.
+      // Fast path: if canonical position already holds the correct marker
+      // and it's the only user-side marker, body passes through untouched.
+      // Opt-out via CACHE_FIX_SKIP_CACHE_CONTROL_NORMALIZE=1 (defaults ON).
+      if (shouldApplyFix("cache_control_normalize") && payload.messages && payload.messages.length > 0) {
+        // Locate canonical position: last block of last user message with an
+        // array content. If no valid target, skip.
+        let targetMsgIdx = -1;
+        let targetBlockIdx = -1;
+        for (let i = payload.messages.length - 1; i >= 0; i--) {
+          const m = payload.messages[i];
+          if (m?.role !== "user") continue;
+          if (!Array.isArray(m.content) || m.content.length === 0) break;
+          targetMsgIdx = i;
+          targetBlockIdx = m.content.length - 1;
+          break;
+        }
+        let ccMutated = false;
+        if (targetMsgIdx !== -1) {
+          const targetBlock = payload.messages[targetMsgIdx].content[targetBlockIdx];
+          const existingCC = targetBlock?.cache_control;
+          const canonicalAlreadyCorrect =
+            existingCC &&
+            existingCC.type === CACHE_CONTROL_CANONICAL_MARKER.type &&
+            existingCC.ttl === CACHE_CONTROL_CANONICAL_MARKER.ttl;
+          if (!(canonicalAlreadyCorrect && countUserCacheControlMarkers(payload) === 1)) {
+            // Strip all markers from user messages, then place canonical.
+            for (const msg of payload.messages) stripCacheControlMarkers(msg);
+            const tm = payload.messages[targetMsgIdx];
+            const newContent = tm.content.slice();
+            newContent[targetBlockIdx] = { ...newContent[targetBlockIdx], cache_control: { ...CACHE_CONTROL_CANONICAL_MARKER } };
+            payload.messages[targetMsgIdx] = { ...tm, content: newContent };
+            ccMutated = true;
+          }
+        }
+        if (ccMutated) {
+          modified = true;
+          debugLog(`APPLIED: cache_control_normalize pinned marker at msg[${targetMsgIdx}].content[${targetBlockIdx}]`);
+          recordFixResult("cache_control_normalize", "applied");
+        } else {
+          recordFixResult("cache_control_normalize", "skipped");
+        }
+      }
+      // Extension: cache_control_sticky — reinstate historical cache_control
+      // markers on messages whose position CC has moved past. CC maintains
+      // at most one user-side marker at a time; as it moves the marker to
+      // the tail of each new user turn, the previous position loses the ~43
+      // bytes of cache_control framing — a tail-of-message byte drift that
+      // breaks every downstream cached block. This extension tracks marker
+      // positions by stable message-hash across turns (up to 3) and re-adds
+      // them on future bodies. Runs AFTER cache_control_normalize (when
+      // present) so normalize pins the canonical tail-marker first and
+      // sticky re-adds the historical ones. State file is per-project at
+      // ~/.claude/cache-fix-state/cache-control-sticky-<sha1(cwd)>.json.
+      // Opt-out via CACHE_FIX_SKIP_CACHE_CONTROL_STICKY=1 (defaults ON).
+      if (shouldApplyFix("cache_control_sticky") && payload.messages) {
+        try {
+          const stickyApplied = applyCacheControlSticky(payload, process.cwd());
+          if (stickyApplied > 0) {
+            modified = true;
+            debugLog(`APPLIED: cache_control_sticky reinstated ${stickyApplied} historical marker(s)`);
+            recordFixResult("cache_control_sticky", "applied");
+          } else {
+            recordFixResult("cache_control_sticky", "skipped");
+          }
+        } catch (e) {
+          debugLog(`cache_control_sticky: error (${e?.message}) — skipped`);
+          recordFixResult("cache_control_sticky", "skipped");
+        }
+      }
       // Bug 5: TTL enforcement (configurable per request type)
       // The client gates 1h cache TTL behind a GrowthBook allowlist that checks
       // querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".
@@ -1899,5 +2801,25 @@ export {
   isClearArtifact,
   rewriteOutputEfficiencyInstruction,
   normalizeOutputEfficiencyReplacement,
+  normalizeSessionStartText,
+  isContinueTrailerBlock,
+  CONTINUE_TRAILER_TEXT,
+  findDeferredToolsBlockInBody,
+  deferredToolsSnapshotPath,
+  DEFERRED_TOOLS_AVAILABLE_MARKER,
+  DEFERRED_TOOLS_UNAVAILABLE_MARKER,
+  isBookkeepingReminder,
+  stripCacheControlMarkers,
+  countUserCacheControlMarkers,
+  CACHE_CONTROL_CANONICAL_MARKER,
+  normalizeToolUseInputsInBody,
+  computeStickyMessageHash,
+  cacheControlStickyStatePath,
+  updateCacheControlStickyState,
+  applyCacheControlSticky,
+  readCacheControlStickyState,
+  writeCacheControlStickyState,
+  CACHE_CONTROL_STICKY_MAX_POSITIONS,
+  CACHE_CONTROL_STICKY_DEFAULT_MARKER,
   _pinnedBlocks,  // exported so tests can reset between runs
 };