npm - claude-code-cache-fix - Versions diffs - 2.0.0-beta.3 → 2.0.0 - Mend

claude-code-cache-fix 2.0.0-beta.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -4,7 +4,7 @@
 English | [中文](./README.zh.md) | [한국어](./README.ko.md) | [Português](./docs/guia-pt-br.md)
-Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.111. Opus 4.7 compatible.
+Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.112. Opus 4.7 compatible.
 > **Opus 4.7 advisory:** Our metered data shows 4.7 burns Q5h quota at **~2.4x the rate of 4.6** for equivalent visible token counts. Two factors: a new tokenizer (up to 35% more tokens, [documented](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7)) and adaptive thinking overhead (~105%, not documented in usage response). Workaround: `CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING=1` (may reduce quality). Image stripping (`CACHE_FIX_IMAGE_KEEP_LAST`) is even more important on 4.7 due to high-res image support increasing image token counts. See [Discussion #25](https://github.com/cnighswonger/claude-code-cache-fix/discussions/25) for full analysis.
@@ -607,6 +607,7 @@ measurable signature of cache-efficiency degradation.
 - **[@JEONG-JIWOO](https://github.com/JEONG-JIWOO)** — VS Code extension investigation: discovered `claudeCode.claudeProcessWrapper` as the working integration path, wrote the C wrapper for Windows (#16)
 - **[@X-15](https://github.com/X-15)** — VS Code extension validation, per-fix health status analysis confirming safety check behavior on v2.1.105 (#16)
 - **[@ArkNill](https://github.com/ArkNill)** — Fingerprint verification fix for CC v2.1.108+ (`isMeta` filter change, PR #21), Korean README (PR #22), original [claude-code-hidden-problem-analysis](https://github.com/ArkNill/claude-code-hidden-problem-analysis) research
+- **[@deafsquad](https://github.com/deafsquad)** — Universal smoosh_split un-smoosh fix (PR #26), source-level function attribution of resume scatter bug (anthropics/claude-code#43657), OTEL telemetry discovery
 If you contributed to the community effort on these issues and aren't listed here, please open an issue or PR — we want to credit everyone properly.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-code-cache-fix",
-  "version": "2.0.0-beta.3",
+  "version": "2.0.0",
   "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
   "type": "module",
   "exports": "./preload.mjs",

package/preload.mjs CHANGED Viewed

@@ -330,6 +330,403 @@ function stripSessionKnowledge(text) {
   );
 }
+// --------------------------------------------------------------------------
+// SessionStart:resume → :startup rewrite (Bug: anthropics/claude-code#43657)
+// --------------------------------------------------------------------------
+//
+// On `claude --continue`, CC fires processSessionStartHooks('resume', …) at
+// src/utils/sessionStart.ts:35. The resulting attachment text wraps the
+// hook's stdout in `<system-reminder>\nSessionStart:resume hook success: …`.
+// The original (pre-resume) session sent the same block as
+// `SessionStart:startup hook success: …`. Byte difference at msg[0] content[N]
+// → whole message prefix re-caches → full-session-cost miss.
+//
+// Some SessionStart hooks additionally embed `<session-id>` tags or
+// `Last active: <timestamp>` lines inside the reminder body, both of which
+// carry UUID/date volatility on top of the event-name flip.
+//
+// This helper rewrites the outbound text to match the originally-cached
+// form. Runs on both standalone text blocks and tool_result.content strings
+// (covers the case where the SessionStart reminder got smooshed by CC's
+// smooshSystemReminderSiblings pass before we see it).
+//
+// Agent behavior is unaffected — CC does not condition behavior on the
+// event-name text, and session-id / timestamps are ephemeral runtime
+// metadata, not semantic inputs.
+// --------------------------------------------------------------------------
+const SESSION_START_RESUME_MARKER = /SessionStart:resume hook success:/g;
+const SESSION_START_ID_TAG = /\n?<session-id>[^<]*<\/session-id>/g;
+const SESSION_START_LAST_ACTIVE_LINE = /\nLast active:[^\n]*/g;
+/**
+ * Normalize a single text payload (a text block's .text or a tool_result's
+ * string .content) to remove SessionStart-resume volatility. Returns
+ * [newText, mutationCount]. Callers only need the text, but the count is
+ * exposed for stats. The function is a pure string-to-string transform
+ * (idempotent: running twice produces the same output as running once).
+ */
+function normalizeSessionStartText(text) {
+  if (typeof text !== "string" || !text.includes("SessionStart:")) return [text, 0];
+  let count = 0;
+  let out = text;
+  if (SESSION_START_RESUME_MARKER.test(out)) {
+    SESSION_START_RESUME_MARKER.lastIndex = 0;
+    out = out.replace(SESSION_START_RESUME_MARKER, "SessionStart:startup hook success:");
+    count++;
+  }
+  if (SESSION_START_ID_TAG.test(out)) {
+    SESSION_START_ID_TAG.lastIndex = 0;
+    out = out.replace(SESSION_START_ID_TAG, "");
+    count++;
+  }
+  if (SESSION_START_LAST_ACTIVE_LINE.test(out)) {
+    SESSION_START_LAST_ACTIVE_LINE.lastIndex = 0;
+    out = out.replace(SESSION_START_LAST_ACTIVE_LINE, "");
+    count++;
+  }
+  return [out, count];
+}
+// --------------------------------------------------------------------------
+// Continue-trailer strip (Bug: anthropics/claude-code#12 / resume UX)
+// --------------------------------------------------------------------------
+//
+// On `claude --continue`, CC appends a text block whose text is EXACTLY
+// "Continue from where you left off." to the last user message before
+// firing the first post-resume request. The pre-exit body did not carry
+// that block, so its presence in the resumed body creates a tail-of-last-
+// user-message drift (~40 bytes plus JSON framing) that breaks cache at
+// that position.
+//
+// The trailer is a semantic no-op — the agent already has the full prior
+// conversation as context. Removing it makes the post-resume body byte-
+// match what the pre-exit body cached at the tail.
+//
+// Match is intentionally narrow (exact string equality on the block's
+// .text) so mentions of the phrase inside a longer user sentence don't
+// get caught.
+// --------------------------------------------------------------------------
+const CONTINUE_TRAILER_TEXT = "Continue from where you left off.";
+/**
+ * Returns true iff the block is an exact-match Continue-trailer text block
+ * (a `{type: "text", text: "Continue from where you left off."}` shape —
+ * cache_control field on the same block is allowed and ignored). Pure
+ * predicate; exported for unit tests.
+ */
+function isContinueTrailerBlock(block) {
+  return (
+    !!block &&
+    typeof block === "object" &&
+    block.type === "text" &&
+    block.text === CONTINUE_TRAILER_TEXT
+  );
+}
+// --------------------------------------------------------------------------
+// Deferred-tools restore (MCP reconnect race)
+// --------------------------------------------------------------------------
+//
+// Observed empirically: on `claude --continue`, if MCP servers haven't
+// finished reconnecting by the time CC fires the first post-resume
+// request, the `<system-reminder>The following deferred tools are now
+// available via ToolSearch…` block at msg[0] (or wherever the attachment
+// lands post-compaction) shrinks dramatically. A full list of ~40 tools
+// collapses to a handful of CC built-ins (AskUserQuestion, EnterPlanMode,
+// ExitPlanMode, PushNotification) and CC injects a trailing
+// `The following deferred tools are no longer available (their MCP server
+// disconnected). Do not search for them — ToolSearch will return no match:`
+// notice.
+//
+// That block change at the root of the message array breaks cache at the
+// very top — the entire ~940K prompt re-caches. By the time the second
+// post-resume request fires, MCPs are usually reconnected and the block is
+// full again, but the cache is already committed to the shrunk version
+// for this session.
+//
+// This extension snapshots the block to
+// `~/.claude/cache-fix-state/deferred-tools-<sha1(key)>.txt` every time
+// it's sent in its full form (no UNAVAILABLE marker), keyed by a caller-
+// supplied project key (default: cwd). On a subsequent request where the
+// block is shorter AND contains the UNAVAILABLE marker, the persisted
+// full bytes are substituted so the on-wire body matches the server's
+// cached prefix.
+//
+// Trade-off: the restored block may reference MCP tools that haven't
+// actually reconnected yet. Agent calls ToolSearch → no match → one retry.
+// Tiny cost versus a full-prompt cache miss on every resume.
+// --------------------------------------------------------------------------
+const DEFERRED_TOOLS_AVAILABLE_MARKER =
+  "The following deferred tools are now available via ToolSearch";
+const DEFERRED_TOOLS_UNAVAILABLE_MARKER =
+  "The following deferred tools are no longer available";
+const DEFERRED_TOOLS_SNAPSHOT_DIR = join(homedir(), ".claude", "cache-fix-state");
+/**
+ * Build the absolute snapshot path for a given key. Exported for tests so
+ * they can assert on path derivation without duplicating the hash logic.
+ */
+function deferredToolsSnapshotPath(key) {
+  const hash = createHash("sha1").update(String(key)).digest("hex").slice(0, 16);
+  return join(DEFERRED_TOOLS_SNAPSHOT_DIR, `deferred-tools-${hash}.txt`);
+}
+/**
+ * Locate the deferred-tools reminder block anywhere in `body.messages`.
+ * The block's position varies by session shape (pre-compaction it often
+ * sits at `msg[0].content[0]`; post-compaction it can land at
+ * `msg[1].content[N]` next to other attachments). Returns
+ * `{ msgIdx, blockIdx, text } | null`.
+ *
+ * Assistant messages are skipped so that if the agent happens to mention
+ * the AVAILABLE_MARKER phrase verbatim in its own output, we don't
+ * misidentify it as a real deferred-tools block.
+ */
+function findDeferredToolsBlockInBody(body) {
+  if (!body || !Array.isArray(body.messages)) return null;
+  for (let m = 0; m < body.messages.length; m++) {
+    const msg = body.messages[m];
+    if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
+    for (let i = 0; i < msg.content.length; i++) {
+      const b = msg.content[i];
+      if (
+        b?.type === "text" &&
+        typeof b.text === "string" &&
+        b.text.includes(DEFERRED_TOOLS_AVAILABLE_MARKER)
+      ) {
+        return { msgIdx: m, blockIdx: i, text: b.text };
+      }
+    }
+  }
+  return null;
+}
+// --------------------------------------------------------------------------
+// Bookkeeping-reminder strip
+// --------------------------------------------------------------------------
+//
+// Complements `smoosh_normalize` / `smoosh_split`: where normalize stabilizes
+// bytes in-place and split peels smooshed reminders back into standalone
+// text blocks, this pass REMOVES purely-bookkeeping reminder blocks entirely
+// from the outbound body. Zero model visibility, zero drift.
+//
+// Targeted patterns (all CC-internal, per-turn values the agent doesn't need
+// to condition behavior on):
+//   - `Token usage: <N>/<M>; <K> remaining`
+//   - `Output tokens — turn: <X> · session: <Y>`
+//   - `USD budget: $<X>/$<Y>; $<Z> remaining`
+//   - `The task tools haven't been used recently. …`
+//   - `The TodoWrite tool hasn't been used recently. …`
+//   - `Remaining conversation turns: <N>`
+//   - `Messages until auto-compact: <N>`
+//
+// Hook-injected reminders (thinking-enrichment, action-tracker,
+// PreToolUse/PostToolUse blocking errors, UserPromptSubmit additional
+// context, custom user hooks) are deliberately NOT stripped here — the
+// agent needs that feedback visible in the turn it fires, and attempting a
+// history-only filter creates per-turn drift of its own (the "last user
+// message" shifts each turn, so a reminder preserved at turn N gets
+// stripped at N+1 when its host message falls into history). Leaving hook
+// reminders untouched is the safer choice; their residual drift is small
+// compared to bookkeeping churn.
+// --------------------------------------------------------------------------
+const REMINDER_WRAP_REGEX =
+  /^<system-reminder>\n([\s\S]*?)\n<\/system-reminder>\s*$/;
+const BOOKKEEPING_REMINDER_PATTERNS = [
+  /^Token usage: \d+\/\d+; \d+ remaining\s*$/,
+  /^Output tokens \u2014 turn: [^\n]+ \u00b7 session: [^\n]+\s*$/,
+  /^USD budget: \$[\d.]+\/\$[\d.]+; \$[\d.]+ remaining\s*$/,
+  /^The task tools haven't been used recently\./,
+  /^The TodoWrite tool hasn't been used recently\./,
+  /^Remaining conversation turns: /,
+  /^Messages? until auto-compact: /,
+];
+/**
+ * Returns true iff the text is a `<system-reminder>`-wrapped block whose
+ * inner content matches a bookkeeping pattern. Pure predicate, exported
+ * for unit tests.
+ */
+function isBookkeepingReminder(text) {
+  if (typeof text !== "string") return false;
+  const m = text.match(REMINDER_WRAP_REGEX);
+  if (!m) return false;
+  const inner = m[1];
+  for (const rx of BOOKKEEPING_REMINDER_PATTERNS) {
+    if (rx.test(inner)) return true;
+  }
+  return false;
+}
+// --------------------------------------------------------------------------
+// cache_control marker position-normalizer
+// --------------------------------------------------------------------------
+//
+// Anthropic's prompt-cache uses `cache_control: {type: "ephemeral", ttl: ...}`
+// markers on content blocks as cache breakpoints. CC places this marker on
+// "the last block of the last user message" each turn — which shifts as new
+// turns arrive. When the marker moves, the PREVIOUS last-block's JSON loses
+// the cache_control field → that block's bytes differ from the server's
+// cached version → partial re-cache on top of the stable system-prompt
+// cache.
+//
+// Enforce a canonical position on every outbound body:
+//   1. Strip every existing cache_control marker from user-message content
+//      blocks.
+//   2. Place a single {type: "ephemeral", ttl: "1h"} marker on the LAST
+//      content block of the LAST user message.
+//
+// Fast path: if the canonical block already has the correct marker AND it's
+// the only user-side marker, the body is left untouched — ensures the pass
+// is a true no-op when nothing changed.
+//
+// System-side markers (e.g., on `system[2]` for the global prompt) are NOT
+// touched — they're CC's stable breakpoint for the system prompt and work
+// correctly.
+// --------------------------------------------------------------------------
+const CACHE_CONTROL_CANONICAL_MARKER = { type: "ephemeral", ttl: "1h" };
+/**
+ * Strip every cache_control marker from a single user message's content
+ * blocks. Returns the number stripped. Mutates the message's content array
+ * in place.
+ */
+function stripCacheControlMarkers(msg) {
+  if (!msg || msg.role !== "user" || !Array.isArray(msg.content)) return 0;
+  let n = 0;
+  for (let i = 0; i < msg.content.length; i++) {
+    const block = msg.content[i];
+    if (block && typeof block === "object" && block.cache_control) {
+      const { cache_control, ...rest } = block;
+      msg.content[i] = rest;
+      n++;
+    }
+  }
+  return n;
+}
+/**
+ * Count cache_control markers across all user-message content blocks.
+ * Exported so the call-site's fast-path check has a tested helper.
+ */
+function countUserCacheControlMarkers(body) {
+  if (!body || !Array.isArray(body.messages)) return 0;
+  let n = 0;
+  for (const msg of body.messages) {
+    if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
+    for (const block of msg.content) {
+      if (block && typeof block === "object" && block.cache_control) n++;
+    }
+  }
+  return n;
+}
+// --------------------------------------------------------------------------
+// tool_use.input field-set normalization
+// --------------------------------------------------------------------------
+//
+// CC's serialization of `tool_use.input` can drift between turns when the
+// caller passes fields not declared in the tool's `input_schema.properties`.
+// Observed case: a SendMessage tool call where the caller passed
+// `{to, summary, message, type, recipient, content}`. Pre-miss body
+// serialized input as `{to, summary, message}` (3 schema-only keys).
+// Post-miss body (same tool_use_id, same turn position) serialized the
+// same block as `{to, summary, message, type, recipient, content}` (6 keys
+// — extras preserved). That byte drift at a mid-history assistant message
+// re-caches every block from that message forward → full-session-cost miss.
+//
+// Concrete instance: 2334-byte drift on ONE assistant-side tool_use block
+// caused a 619,722 `cache_creation_input_tokens` miss at 15:16:52 UTC on
+// msg[844] of a long-running session.
+//
+// This helper walks every assistant-role message's tool_use blocks, looks
+// up the tool's declared `input_schema.properties` from `body.tools`, and
+// rewrites `input` to contain ONLY the schema keys (in schema declaration
+// order). Tools with no schema in `body.tools` are left untouched — we
+// can't determine what's legitimate vs extra.
+//
+// Agent behavior is unaffected — extras weren't declared in the schema so
+// downstream consumers shouldn't rely on them. The point of this pass is
+// to pin the serialization to the schema's field set so CC's own drift
+// between turns can't break cache.
+// --------------------------------------------------------------------------
+/**
+ * Mutate `body` in place: for every assistant-role message's tool_use
+ * blocks whose tool name matches an entry in `body.tools` with a known
+ * `input_schema.properties`, replace `input` with a new object containing
+ * ONLY the schema-declared keys, preserved in schema declaration order.
+ * Returns the count of tool_use blocks modified (0 if nothing changed or
+ * preconditions missing). Pure transform: safe to call repeatedly.
+ */
+function normalizeToolUseInputsInBody(body) {
+  if (!body || typeof body !== "object") return 0;
+  if (!Array.isArray(body.messages) || !Array.isArray(body.tools)) return 0;
+  // Build toolSchemas: { name: orderedKeys[] } from body.tools entries
+  // that declare input_schema.properties.
+  const toolSchemas = Object.create(null);
+  for (const tool of body.tools) {
+    if (!tool || typeof tool !== "object") continue;
+    const name = tool.name;
+    if (typeof name !== "string") continue;
+    const props = tool.input_schema && tool.input_schema.properties;
+    if (!props || typeof props !== "object") continue;
+    toolSchemas[name] = Object.keys(props);
+  }
+  let modified = 0;
+  for (const msg of body.messages) {
+    if (!msg || msg.role !== "assistant") continue;
+    if (!Array.isArray(msg.content)) continue;
+    for (let i = 0; i < msg.content.length; i++) {
+      const block = msg.content[i];
+      if (!block || block.type !== "tool_use") continue;
+      if (!block.input || typeof block.input !== "object" || Array.isArray(block.input)) continue;
+      const schemaKeys = toolSchemas[block.name];
+      if (!schemaKeys) continue; // unknown tool — skip
+      const currentKeys = Object.keys(block.input);
+      // Determine if any non-schema key is present. If all current keys
+      // are in schema AND their order already matches a subset of
+      // schemaKeys order, we could skip — but we always rebuild to also
+      // canonicalize key order, which is what JSON.stringify consumers
+      // depend on for byte stability.
+      const schemaKeySet = new Set(schemaKeys);
+      const hasExtras = currentKeys.some((k) => !schemaKeySet.has(k));
+      // Also rebuild when order differs from schema declaration order,
+      // because extras stripping alone doesn't guarantee a canonical
+      // byte sequence across turns.
+      const presentSchemaKeys = schemaKeys.filter((k) =>
+        Object.prototype.hasOwnProperty.call(block.input, k)
+      );
+      const currentInSchema = currentKeys.filter((k) => schemaKeySet.has(k));
+      let orderDiffers = presentSchemaKeys.length !== currentInSchema.length;
+      if (!orderDiffers) {
+        for (let j = 0; j < presentSchemaKeys.length; j++) {
+          if (presentSchemaKeys[j] !== currentInSchema[j]) {
+            orderDiffers = true;
+            break;
+          }
+        }
+      }
+      if (!hasExtras && !orderDiffers) continue;
+      const newInput = {};
+      for (const k of presentSchemaKeys) {
+        newInput[k] = block.input[k];
+      }
+      msg.content[i] = { ...block, input: newInput };
+      modified++;
+    }
+  }
+  return modified;
+}
 /**
  * Core fix: on EVERY call, scan the entire message array for the LATEST
  * relocatable blocks (skills, MCP, deferred tools, hooks) and ensure they
@@ -727,6 +1124,13 @@ const _STATS_SCHEMA = {
   git_status: { applied: 0, skipped: 0, lastApplied: null },
   cwd_normalize: { applied: 0, skipped: 0, lastApplied: null },
   smoosh_normalize: { applied: 0, skipped: 0, lastApplied: null },
+  smoosh_split: { applied: 0, skipped: 0, lastApplied: null },
+  session_start_normalize: { applied: 0, skipped: 0, lastApplied: null },
+  continue_trailer_strip: { applied: 0, skipped: 0, lastApplied: null },
+  deferred_tools_restore: { applied: 0, skipped: 0, lastApplied: null },
+  reminder_strip: { applied: 0, skipped: 0, lastApplied: null },
+  cache_control_normalize: { applied: 0, skipped: 0, lastApplied: null },
+  tool_use_input_normalize: { applied: 0, skipped: 0, lastApplied: null },
 };
 function _createEmptyStats() {
@@ -1348,6 +1752,67 @@ globalThis.fetch = async function (url, options) {
         }
       }
+      // Extension: session_start_normalize — SessionStart:resume → :startup rewrite
+      // and ephemeral session-id / Last-active strip. Runs BEFORE smoosh_normalize
+      // so drift at msg[0] content[N] is stabilized before any subsequent pass
+      // reads from the same text. Applies to both standalone text blocks and
+      // tool_result.content strings (in case CC's smooshSystemReminderSiblings
+      // folded the reminder before we see it).
+      // Bug: anthropics/claude-code#43657
+      // Opt-out via CACHE_FIX_SKIP_SESSION_START_NORMALIZE=1 (defaults ON).
+      if (shouldApplyFix("session_start_normalize") && payload.messages) {
+        let ssnApplied = 0;
+        for (const msg of payload.messages) {
+          if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
+          for (let i = 0; i < msg.content.length; i++) {
+            const block = msg.content[i];
+            if (block?.type === "text" && typeof block.text === "string") {
+              const [t, n] = normalizeSessionStartText(block.text);
+              if (n > 0) {
+                msg.content[i] = { ...block, text: t };
+                ssnApplied += n;
+              }
+            } else if (block?.type === "tool_result" && typeof block.content === "string") {
+              const [c, n] = normalizeSessionStartText(block.content);
+              if (n > 0) {
+                msg.content[i] = { ...block, content: c };
+                ssnApplied += n;
+              }
+            }
+          }
+        }
+        if (ssnApplied > 0) {
+          modified = true;
+          debugLog(`APPLIED: session-start-normalize rewrote ${ssnApplied} marker(s)`);
+          recordFixResult("session_start_normalize", "applied");
+        } else {
+          recordFixResult("session_start_normalize", "skipped");
+        }
+      }
+      // Extension: tool_use_input_normalize — strip tool_use.input keys not
+      // declared in body.tools[*].input_schema.properties. CC's serialization
+      // of tool_use.input can drift between turns when the caller passed
+      // extra fields; the pre-miss body may serialize only the schema keys
+      // while the post-miss body serializes the full caller-supplied set
+      // (or vice versa). That byte drift at a mid-history assistant message
+      // re-caches every block from that message forward.
+      //
+      // Runs AFTER session_start_normalize so mid-history drift is pinned
+      // before any downstream pass (smoosh_*, fingerprint, ttl) hashes the
+      // same block. Default ON, opt-out via
+      // CACHE_FIX_SKIP_TOOL_USE_INPUT_NORMALIZE=1.
+      if (shouldApplyFix("tool_use_input_normalize")) {
+        const tuinApplied = normalizeToolUseInputsInBody(payload);
+        if (tuinApplied > 0) {
+          modified = true;
+          debugLog(`APPLIED: tool-use-input-normalize rewrote ${tuinApplied} tool_use block(s)`);
+          recordFixResult("tool_use_input_normalize", "applied");
+        } else {
+          recordFixResult("tool_use_input_normalize", "skipped");
+        }
+      }
       // Optimization: normalize smooshed dynamic system-reminders in tool_result content
       // CC's smooshSystemReminderSiblings (messages.ts:1835) folds <system-reminder> text
       // blocks into tool_result.content strings. Dynamic values (token_usage, budget_usd,
@@ -1416,6 +1881,216 @@ globalThis.fetch = async function (url, options) {
         }
       }
+      // Extension: smoosh_split — universal un-smoosh, complements smoosh_normalize.
+      // CC's smooshSystemReminderSiblings (messages.ts:1835) folds any
+      // `<system-reminder>`-prefixed text block adjacent to a tool_result
+      // into that tool_result's content string with a leading `\n\n`.
+      // The existing smoosh_normalize above stabilizes bytes for 4 enumerated
+      // patterns (Token usage, USD budget, Output tokens, TodoWrite), but
+      // hook-injected reminders (thinking-enrichment, action-tracker, MCP
+      // deltas, custom user hooks) don't match those patterns and still drift.
+      // smoosh_split peels any trailing `\n\n<system-reminder>...\n</system-reminder>`
+      // off tool_result.content strings and restores it as a standalone text
+      // block — the pre-smoosh shape. Dynamic drift in the peeled reminder
+      // lives in a small block instead of a multi-KB tool_result string.
+      // Composed with smoosh_normalize: normalize stabilizes known patterns
+      // in-place; split peels any remainder. Full universal coverage.
+      // Bug: anthropics/claude-code#49585
+      // Opt-out via CACHE_FIX_SKIP_SMOOSH_SPLIT=1 (defaults ON).
+      if (shouldApplyFix("smoosh_split") && payload.messages) {
+        const TRAILING_SMOOSH_TAIL = /\n\n(<system-reminder>\n(?:(?!<\/system-reminder>)[\s\S])*?\n<\/system-reminder>)\s*$/;
+        let splitApplied = 0;
+        for (const msg of payload.messages) {
+          if (msg.role !== "user" || !Array.isArray(msg.content)) continue;
+          const out = [];
+          let mutated = false;
+          const peeledReminders = [];
+          for (const block of msg.content) {
+            if (block?.type === "tool_result" && typeof block.content === "string") {
+              const reminders = [];
+              let s = block.content;
+              while (true) {
+                const m = s.match(TRAILING_SMOOSH_TAIL);
+                if (!m) break;
+                reminders.unshift(m[1]);
+                s = s.slice(0, m.index);
+              }
+              if (reminders.length > 0) {
+                out.push({ ...block, content: s });
+                for (const r of reminders) peeledReminders.push({ type: "text", text: r });
+                splitApplied += reminders.length;
+                mutated = true;
+                continue;
+              }
+            }
+            out.push(block);
+          }
+          // Peeled reminders go AFTER all other blocks so tool_results stay
+          // consecutive (avoids API 400 "tool use concurrency" errors).
+          if (mutated) msg.content = [...out, ...peeledReminders];
+        }
+        if (splitApplied > 0) {
+          modified = true;
+          debugLog(`APPLIED: smoosh-split peeled ${splitApplied} trailing system-reminder(s) from tool_result.content`);
+          recordFixResult("smoosh_split", "applied");
+        } else {
+          recordFixResult("smoosh_split", "skipped");
+        }
+      }
+      // Extension: continue_trailer_strip — remove the "Continue from where
+      // you left off." text block CC appends to the last user message on
+      // --continue. Pre-exit bodies didn't carry it, so its presence in the
+      // resumed body creates tail-of-last-msg drift that breaks cache.
+      // Exact-match string equality on `.text` — user sentences mentioning
+      // the phrase inside longer content are not touched.
+      // Bug: anthropics/claude-code#12 (resume UX), observed empirically.
+      // Opt-out via CACHE_FIX_SKIP_CONTINUE_TRAILER_STRIP=1 (defaults ON).
+      if (shouldApplyFix("continue_trailer_strip") && payload.messages) {
+        let trailerStripped = 0;
+        for (const msg of payload.messages) {
+          if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
+          const kept = msg.content.filter((block) => {
+            if (isContinueTrailerBlock(block)) {
+              trailerStripped++;
+              return false;
+            }
+            return true;
+          });
+          if (kept.length !== msg.content.length) msg.content = kept;
+        }
+        if (trailerStripped > 0) {
+          modified = true;
+          debugLog(`APPLIED: continue-trailer-strip removed ${trailerStripped} trailer block(s)`);
+          recordFixResult("continue_trailer_strip", "applied");
+        } else {
+          recordFixResult("continue_trailer_strip", "skipped");
+        }
+      }
+      // Extension: deferred_tools_restore — persist-and-restore the
+      // deferred-tools attachment block across sessions so MCP reconnect
+      // race at resume-time doesn't shrink msg[0] and bust the whole cache.
+      // Snapshot key defaults to process.cwd() (one snapshot per project).
+      // Opt-out via CACHE_FIX_SKIP_DEFERRED_TOOLS_RESTORE=1 (defaults ON).
+      if (shouldApplyFix("deferred_tools_restore") && payload.messages) {
+        let dtrRestored = 0;
+        const found = findDeferredToolsBlockInBody(payload);
+        if (found) {
+          const hasUnavail = found.text.includes(DEFERRED_TOOLS_UNAVAILABLE_MARKER);
+          const snapshotPath = deferredToolsSnapshotPath(process.cwd());
+          if (!hasUnavail) {
+            // Clean baseline — persist it for future resumes. Silent on
+            // any I/O error; snapshot is best-effort.
+            try {
+              mkdirSync(DEFERRED_TOOLS_SNAPSHOT_DIR, { recursive: true });
+              writeFileSync(snapshotPath, found.text, "utf-8");
+            } catch {}
+          } else {
+            // Shrunk block with explicit "no longer available" signal →
+            // attempt restore. Only substitute if the persisted version is
+            // strictly longer (never downgrade to a stale shorter snapshot).
+            let snapshot = null;
+            try { snapshot = readFileSync(snapshotPath, "utf-8"); } catch {}
+            if (snapshot && snapshot.length > found.text.length) {
+              const targetMsg = payload.messages[found.msgIdx];
+              const newContent = targetMsg.content.slice();
+              newContent[found.blockIdx] = { ...newContent[found.blockIdx], text: snapshot };
+              payload.messages[found.msgIdx] = { ...targetMsg, content: newContent };
+              dtrRestored = 1;
+            }
+          }
+        }
+        if (dtrRestored > 0) {
+          modified = true;
+          debugLog(`APPLIED: deferred-tools-restore substituted full block at msg[${found.msgIdx}].content[${found.blockIdx}]`);
+          recordFixResult("deferred_tools_restore", "applied");
+        } else {
+          recordFixResult("deferred_tools_restore", "skipped");
+        }
+      }
+      // Extension: reminder_strip — remove bookkeeping system-reminder blocks
+      // (Token usage / USD budget / Output tokens / TodoWrite nudge / turn
+      // counters) entirely from user messages. Runs AFTER smoosh_split so
+      // blocks peeled out of tool_result.content are visible as standalone
+      // text and can be matched by isBookkeepingReminder.
+      // Zero model visibility, zero drift.
+      // Opt-out via CACHE_FIX_SKIP_REMINDER_STRIP=1 (defaults ON).
+      if (shouldApplyFix("reminder_strip") && payload.messages) {
+        let reminderStripped = 0;
+        for (const msg of payload.messages) {
+          if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
+          const kept = msg.content.filter((block) => {
+            if (block?.type !== "text") return true;
+            if (isBookkeepingReminder(block.text)) {
+              reminderStripped++;
+              return false;
+            }
+            return true;
+          });
+          if (kept.length !== msg.content.length) msg.content = kept;
+        }
+        if (reminderStripped > 0) {
+          modified = true;
+          debugLog(`APPLIED: reminder-strip removed ${reminderStripped} bookkeeping reminder block(s)`);
+          recordFixResult("reminder_strip", "applied");
+        } else {
+          recordFixResult("reminder_strip", "skipped");
+        }
+      }
+      // Extension: cache_control_normalize — pin the cache_control marker at
+      // a canonical position (last block of last user message) on every
+      // outbound body. Prevents marker-shuffle drift between turns from
+      // invalidating the previous-last-block's cached bytes. Runs LAST
+      // (after smoosh_split and any other content-mutating pass) so the
+      // canonical position is calculated against the final content array.
+      // Fast path: if canonical position already holds the correct marker
+      // and it's the only user-side marker, body passes through untouched.
+      // Opt-out via CACHE_FIX_SKIP_CACHE_CONTROL_NORMALIZE=1 (defaults ON).
+      if (shouldApplyFix("cache_control_normalize") && payload.messages && payload.messages.length > 0) {
+        // Locate canonical position: last block of last user message with an
+        // array content. If no valid target, skip.
+        let targetMsgIdx = -1;
+        let targetBlockIdx = -1;
+        for (let i = payload.messages.length - 1; i >= 0; i--) {
+          const m = payload.messages[i];
+          if (m?.role !== "user") continue;
+          if (!Array.isArray(m.content) || m.content.length === 0) break;
+          targetMsgIdx = i;
+          targetBlockIdx = m.content.length - 1;
+          break;
+        }
+        let ccMutated = false;
+        if (targetMsgIdx !== -1) {
+          const targetBlock = payload.messages[targetMsgIdx].content[targetBlockIdx];
+          const existingCC = targetBlock?.cache_control;
+          const canonicalAlreadyCorrect =
+            existingCC &&
+            existingCC.type === CACHE_CONTROL_CANONICAL_MARKER.type &&
+            existingCC.ttl === CACHE_CONTROL_CANONICAL_MARKER.ttl;
+          if (!(canonicalAlreadyCorrect && countUserCacheControlMarkers(payload) === 1)) {
+            // Strip all markers from user messages, then place canonical.
+            for (const msg of payload.messages) stripCacheControlMarkers(msg);
+            const tm = payload.messages[targetMsgIdx];
+            const newContent = tm.content.slice();
+            newContent[targetBlockIdx] = { ...newContent[targetBlockIdx], cache_control: { ...CACHE_CONTROL_CANONICAL_MARKER } };
+            payload.messages[targetMsgIdx] = { ...tm, content: newContent };
+            ccMutated = true;
+          }
+        }
+        if (ccMutated) {
+          modified = true;
+          debugLog(`APPLIED: cache_control_normalize pinned marker at msg[${targetMsgIdx}].content[${targetBlockIdx}]`);
+          recordFixResult("cache_control_normalize", "applied");
+        } else {
+          recordFixResult("cache_control_normalize", "skipped");
+        }
+      }
       // Bug 5: TTL enforcement (configurable per request type)
       // The client gates 1h cache TTL behind a GrowthBook allowlist that checks
       // querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".
@@ -1841,5 +2516,17 @@ export {
   isClearArtifact,
   rewriteOutputEfficiencyInstruction,
   normalizeOutputEfficiencyReplacement,
+  normalizeSessionStartText,
+  isContinueTrailerBlock,
+  CONTINUE_TRAILER_TEXT,
+  findDeferredToolsBlockInBody,
+  deferredToolsSnapshotPath,
+  DEFERRED_TOOLS_AVAILABLE_MARKER,
+  DEFERRED_TOOLS_UNAVAILABLE_MARKER,
+  isBookkeepingReminder,
+  stripCacheControlMarkers,
+  countUserCacheControlMarkers,
+  CACHE_CONTROL_CANONICAL_MARKER,
+  normalizeToolUseInputsInBody,
   _pinnedBlocks,  // exported so tests can reset between runs
 };