claude-code-cache-fix 2.0.0-beta.4 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +2 -1
  2. package/package.json +1 -1
  3. package/preload.mjs +922 -0
package/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  English | [中文](./README.zh.md) | [한국어](./README.ko.md) | [Português](./docs/guia-pt-br.md)
6
6
 
7
- Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.111. Opus 4.7 compatible.
7
+ Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.112. Opus 4.7 compatible.
8
8
 
9
9
  > **Opus 4.7 advisory:** Our metered data shows 4.7 burns Q5h quota at **~2.4x the rate of 4.6** for equivalent visible token counts. Two factors: a new tokenizer (up to 35% more tokens, [documented](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7)) and adaptive thinking overhead (~105%, not documented in usage response). Workaround: `CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING=1` (may reduce quality). Image stripping (`CACHE_FIX_IMAGE_KEEP_LAST`) is even more important on 4.7 due to high-res image support increasing image token counts. See [Discussion #25](https://github.com/cnighswonger/claude-code-cache-fix/discussions/25) for full analysis.
10
10
 
@@ -607,6 +607,7 @@ measurable signature of cache-efficiency degradation.
607
607
  - **[@JEONG-JIWOO](https://github.com/JEONG-JIWOO)** — VS Code extension investigation: discovered `claudeCode.claudeProcessWrapper` as the working integration path, wrote the C wrapper for Windows (#16)
608
608
  - **[@X-15](https://github.com/X-15)** — VS Code extension validation, per-fix health status analysis confirming safety check behavior on v2.1.105 (#16)
609
609
  - **[@ArkNill](https://github.com/ArkNill)** — Fingerprint verification fix for CC v2.1.108+ (`isMeta` filter change, PR #21), Korean README (PR #22), original [claude-code-hidden-problem-analysis](https://github.com/ArkNill/claude-code-hidden-problem-analysis) research
610
+ - **[@deafsquad](https://github.com/deafsquad)** — Universal smoosh_split un-smoosh fix (PR #26), source-level function attribution of resume scatter bug (anthropics/claude-code#43657), OTEL telemetry discovery
610
611
 
611
612
  If you contributed to the community effort on these issues and aren't listed here, please open an issue or PR — we want to credit everyone properly.
612
613
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "2.0.0-beta.4",
3
+ "version": "2.0.1",
4
4
  "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
package/preload.mjs CHANGED
@@ -330,6 +330,659 @@ function stripSessionKnowledge(text) {
330
330
  );
331
331
  }
332
332
 
333
+ // --------------------------------------------------------------------------
334
+ // SessionStart:resume → :startup rewrite (Bug: anthropics/claude-code#43657)
335
+ // --------------------------------------------------------------------------
336
+ //
337
+ // On `claude --continue`, CC fires processSessionStartHooks('resume', …) at
338
+ // src/utils/sessionStart.ts:35. The resulting attachment text wraps the
339
+ // hook's stdout in `<system-reminder>\nSessionStart:resume hook success: …`.
340
+ // The original (pre-resume) session sent the same block as
341
+ // `SessionStart:startup hook success: …`. Byte difference at msg[0] content[N]
342
+ // → whole message prefix re-caches → full-session-cost miss.
343
+ //
344
+ // Some SessionStart hooks additionally embed `<session-id>` tags or
345
+ // `Last active: <timestamp>` lines inside the reminder body, both of which
346
+ // carry UUID/date volatility on top of the event-name flip.
347
+ //
348
+ // This helper rewrites the outbound text to match the originally-cached
349
+ // form. Runs on both standalone text blocks and tool_result.content strings
350
+ // (covers the case where the SessionStart reminder got smooshed by CC's
351
+ // smooshSystemReminderSiblings pass before we see it).
352
+ //
353
+ // Agent behavior is unaffected — CC does not condition behavior on the
354
+ // event-name text, and session-id / timestamps are ephemeral runtime
355
+ // metadata, not semantic inputs.
356
+ // --------------------------------------------------------------------------
357
+
358
+ const SESSION_START_RESUME_MARKER = /SessionStart:resume hook success:/g;
359
+ const SESSION_START_ID_TAG = /\n?<session-id>[^<]*<\/session-id>/g;
360
+ const SESSION_START_LAST_ACTIVE_LINE = /\nLast active:[^\n]*/g;
361
+
362
+ /**
363
+ * Normalize a single text payload (a text block's .text or a tool_result's
364
+ * string .content) to remove SessionStart-resume volatility. Returns
365
+ * [newText, mutationCount]. Callers only need the text, but the count is
366
+ * exposed for stats. The function is a pure string-to-string transform
367
+ * (idempotent: running twice produces the same output as running once).
368
+ */
369
+ function normalizeSessionStartText(text) {
370
+ if (typeof text !== "string" || !text.includes("SessionStart:")) return [text, 0];
371
+ let count = 0;
372
+ let out = text;
373
+ if (SESSION_START_RESUME_MARKER.test(out)) {
374
+ SESSION_START_RESUME_MARKER.lastIndex = 0;
375
+ out = out.replace(SESSION_START_RESUME_MARKER, "SessionStart:startup hook success:");
376
+ count++;
377
+ }
378
+ if (SESSION_START_ID_TAG.test(out)) {
379
+ SESSION_START_ID_TAG.lastIndex = 0;
380
+ out = out.replace(SESSION_START_ID_TAG, "");
381
+ count++;
382
+ }
383
+ if (SESSION_START_LAST_ACTIVE_LINE.test(out)) {
384
+ SESSION_START_LAST_ACTIVE_LINE.lastIndex = 0;
385
+ out = out.replace(SESSION_START_LAST_ACTIVE_LINE, "");
386
+ count++;
387
+ }
388
+ return [out, count];
389
+ }
390
+
391
+ // --------------------------------------------------------------------------
392
+ // Continue-trailer strip (Bug: anthropics/claude-code#12 / resume UX)
393
+ // --------------------------------------------------------------------------
394
+ //
395
+ // On `claude --continue`, CC appends a text block whose text is EXACTLY
396
+ // "Continue from where you left off." to the last user message before
397
+ // firing the first post-resume request. The pre-exit body did not carry
398
+ // that block, so its presence in the resumed body creates a tail-of-last-
399
+ // user-message drift (~40 bytes plus JSON framing) that breaks cache at
400
+ // that position.
401
+ //
402
+ // The trailer is a semantic no-op — the agent already has the full prior
403
+ // conversation as context. Removing it makes the post-resume body byte-
404
+ // match what the pre-exit body cached at the tail.
405
+ //
406
+ // Match is intentionally narrow (exact string equality on the block's
407
+ // .text) so mentions of the phrase inside a longer user sentence don't
408
+ // get caught.
409
+ // --------------------------------------------------------------------------
410
+
411
+ const CONTINUE_TRAILER_TEXT = "Continue from where you left off.";
412
+
413
+ /**
414
+ * Returns true iff the block is an exact-match Continue-trailer text block
415
+ * (a `{type: "text", text: "Continue from where you left off."}` shape —
416
+ * cache_control field on the same block is allowed and ignored). Pure
417
+ * predicate; exported for unit tests.
418
+ */
419
+ function isContinueTrailerBlock(block) {
420
+ return (
421
+ !!block &&
422
+ typeof block === "object" &&
423
+ block.type === "text" &&
424
+ block.text === CONTINUE_TRAILER_TEXT
425
+ );
426
+ }
427
+
428
+ // --------------------------------------------------------------------------
429
+ // Deferred-tools restore (MCP reconnect race)
430
+ // --------------------------------------------------------------------------
431
+ //
432
+ // Observed empirically: on `claude --continue`, if MCP servers haven't
433
+ // finished reconnecting by the time CC fires the first post-resume
434
+ // request, the `<system-reminder>The following deferred tools are now
435
+ // available via ToolSearch…` block at msg[0] (or wherever the attachment
436
+ // lands post-compaction) shrinks dramatically. A full list of ~40 tools
437
+ // collapses to a handful of CC built-ins (AskUserQuestion, EnterPlanMode,
438
+ // ExitPlanMode, PushNotification) and CC injects a trailing
439
+ // `The following deferred tools are no longer available (their MCP server
440
+ // disconnected). Do not search for them — ToolSearch will return no match:`
441
+ // notice.
442
+ //
443
+ // That block change at the root of the message array breaks cache at the
444
+ // very top — the entire ~940K prompt re-caches. By the time the second
445
+ // post-resume request fires, MCPs are usually reconnected and the block is
446
+ // full again, but the cache is already committed to the shrunk version
447
+ // for this session.
448
+ //
449
+ // This extension snapshots the block to
450
+ // `~/.claude/cache-fix-state/deferred-tools-<sha1(key)>.txt` every time
451
+ // it's sent in its full form (no UNAVAILABLE marker), keyed by a caller-
452
+ // supplied project key (default: cwd). On a subsequent request where the
453
+ // block is shorter AND contains the UNAVAILABLE marker, the persisted
454
+ // full bytes are substituted so the on-wire body matches the server's
455
+ // cached prefix.
456
+ //
457
+ // Trade-off: the restored block may reference MCP tools that haven't
458
+ // actually reconnected yet. Agent calls ToolSearch → no match → one retry.
459
+ // Tiny cost versus a full-prompt cache miss on every resume.
460
+ // --------------------------------------------------------------------------
461
+
462
+ const DEFERRED_TOOLS_AVAILABLE_MARKER =
463
+ "The following deferred tools are now available via ToolSearch";
464
+ const DEFERRED_TOOLS_UNAVAILABLE_MARKER =
465
+ "The following deferred tools are no longer available";
466
+ const DEFERRED_TOOLS_SNAPSHOT_DIR = join(homedir(), ".claude", "cache-fix-state");
467
+
468
+ /**
469
+ * Build the absolute snapshot path for a given key. Exported for tests so
470
+ * they can assert on path derivation without duplicating the hash logic.
471
+ */
472
+ function deferredToolsSnapshotPath(key) {
473
+ const hash = createHash("sha1").update(String(key)).digest("hex").slice(0, 16);
474
+ return join(DEFERRED_TOOLS_SNAPSHOT_DIR, `deferred-tools-${hash}.txt`);
475
+ }
476
+
477
+ /**
478
+ * Locate the deferred-tools reminder block anywhere in `body.messages`.
479
+ * The block's position varies by session shape (pre-compaction it often
480
+ * sits at `msg[0].content[0]`; post-compaction it can land at
481
+ * `msg[1].content[N]` next to other attachments). Returns
482
+ * `{ msgIdx, blockIdx, text } | null`.
483
+ *
484
+ * Assistant messages are skipped so that if the agent happens to mention
485
+ * the AVAILABLE_MARKER phrase verbatim in its own output, we don't
486
+ * misidentify it as a real deferred-tools block.
487
+ */
488
+ function findDeferredToolsBlockInBody(body) {
489
+ if (!body || !Array.isArray(body.messages)) return null;
490
+ for (let m = 0; m < body.messages.length; m++) {
491
+ const msg = body.messages[m];
492
+ if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
493
+ for (let i = 0; i < msg.content.length; i++) {
494
+ const b = msg.content[i];
495
+ if (
496
+ b?.type === "text" &&
497
+ typeof b.text === "string" &&
498
+ b.text.includes(DEFERRED_TOOLS_AVAILABLE_MARKER)
499
+ ) {
500
+ return { msgIdx: m, blockIdx: i, text: b.text };
501
+ }
502
+ }
503
+ }
504
+ return null;
505
+ }
506
+
507
+ // --------------------------------------------------------------------------
508
+ // Bookkeeping-reminder strip
509
+ // --------------------------------------------------------------------------
510
+ //
511
+ // Complements `smoosh_normalize` / `smoosh_split`: where normalize stabilizes
512
+ // bytes in-place and split peels smooshed reminders back into standalone
513
+ // text blocks, this pass REMOVES purely-bookkeeping reminder blocks entirely
514
+ // from the outbound body. Zero model visibility, zero drift.
515
+ //
516
+ // Targeted patterns (all CC-internal, per-turn values the agent doesn't need
517
+ // to condition behavior on):
518
+ // - `Token usage: <N>/<M>; <K> remaining`
519
+ // - `Output tokens — turn: <X> · session: <Y>`
520
+ // - `USD budget: $<X>/$<Y>; $<Z> remaining`
521
+ // - `The task tools haven't been used recently. …`
522
+ // - `The TodoWrite tool hasn't been used recently. …`
523
+ // - `Remaining conversation turns: <N>`
524
+ // - `Messages until auto-compact: <N>`
525
+ //
526
+ // Hook-injected reminders (thinking-enrichment, action-tracker,
527
+ // PreToolUse/PostToolUse blocking errors, UserPromptSubmit additional
528
+ // context, custom user hooks) are deliberately NOT stripped here — the
529
+ // agent needs that feedback visible in the turn it fires, and attempting a
530
+ // history-only filter creates per-turn drift of its own (the "last user
531
+ // message" shifts each turn, so a reminder preserved at turn N gets
532
+ // stripped at N+1 when its host message falls into history). Leaving hook
533
+ // reminders untouched is the safer choice; their residual drift is small
534
+ // compared to bookkeeping churn.
535
+ // --------------------------------------------------------------------------
536
+
537
+ const REMINDER_WRAP_REGEX =
538
+ /^<system-reminder>\n([\s\S]*?)\n<\/system-reminder>\s*$/;
539
+
540
+ const BOOKKEEPING_REMINDER_PATTERNS = [
541
+ /^Token usage: \d+\/\d+; \d+ remaining\s*$/,
542
+ /^Output tokens \u2014 turn: [^\n]+ \u00b7 session: [^\n]+\s*$/,
543
+ /^USD budget: \$[\d.]+\/\$[\d.]+; \$[\d.]+ remaining\s*$/,
544
+ /^The task tools haven't been used recently\./,
545
+ /^The TodoWrite tool hasn't been used recently\./,
546
+ /^Remaining conversation turns: /,
547
+ /^Messages? until auto-compact: /,
548
+ ];
549
+
550
+ /**
551
+ * Returns true iff the text is a `<system-reminder>`-wrapped block whose
552
+ * inner content matches a bookkeeping pattern. Pure predicate, exported
553
+ * for unit tests.
554
+ */
555
+ function isBookkeepingReminder(text) {
556
+ if (typeof text !== "string") return false;
557
+ const m = text.match(REMINDER_WRAP_REGEX);
558
+ if (!m) return false;
559
+ const inner = m[1];
560
+ for (const rx of BOOKKEEPING_REMINDER_PATTERNS) {
561
+ if (rx.test(inner)) return true;
562
+ }
563
+ return false;
564
+ }
565
+
566
+ // --------------------------------------------------------------------------
567
+ // cache_control marker position-normalizer
568
+ // --------------------------------------------------------------------------
569
+ //
570
+ // Anthropic's prompt-cache uses `cache_control: {type: "ephemeral", ttl: ...}`
571
+ // markers on content blocks as cache breakpoints. CC places this marker on
572
+ // "the last block of the last user message" each turn — which shifts as new
573
+ // turns arrive. When the marker moves, the PREVIOUS last-block's JSON loses
574
+ // the cache_control field → that block's bytes differ from the server's
575
+ // cached version → partial re-cache on top of the stable system-prompt
576
+ // cache.
577
+ //
578
+ // Enforce a canonical position on every outbound body:
579
+ // 1. Strip every existing cache_control marker from user-message content
580
+ // blocks.
581
+ // 2. Place a single {type: "ephemeral", ttl: "1h"} marker on the LAST
582
+ // content block of the LAST user message.
583
+ //
584
+ // Fast path: if the canonical block already has the correct marker AND it's
585
+ // the only user-side marker, the body is left untouched — ensures the pass
586
+ // is a true no-op when nothing changed.
587
+ //
588
+ // System-side markers (e.g., on `system[2]` for the global prompt) are NOT
589
+ // touched — they're CC's stable breakpoint for the system prompt and work
590
+ // correctly.
591
+ // --------------------------------------------------------------------------
592
+
593
+ const CACHE_CONTROL_CANONICAL_MARKER = { type: "ephemeral", ttl: "1h" };
594
+
595
+ /**
596
+ * Strip every cache_control marker from a single user message's content
597
+ * blocks. Returns the number stripped. Mutates the message's content array
598
+ * in place.
599
+ */
600
+ function stripCacheControlMarkers(msg) {
601
+ if (!msg || msg.role !== "user" || !Array.isArray(msg.content)) return 0;
602
+ let n = 0;
603
+ for (let i = 0; i < msg.content.length; i++) {
604
+ const block = msg.content[i];
605
+ if (block && typeof block === "object" && block.cache_control) {
606
+ const { cache_control, ...rest } = block;
607
+ msg.content[i] = rest;
608
+ n++;
609
+ }
610
+ }
611
+ return n;
612
+ }
613
+
614
+ /**
615
+ * Count cache_control markers across all user-message content blocks.
616
+ * Exported so the call-site's fast-path check has a tested helper.
617
+ */
618
+ function countUserCacheControlMarkers(body) {
619
+ if (!body || !Array.isArray(body.messages)) return 0;
620
+ let n = 0;
621
+ for (const msg of body.messages) {
622
+ if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
623
+ for (const block of msg.content) {
624
+ if (block && typeof block === "object" && block.cache_control) n++;
625
+ }
626
+ }
627
+ return n;
628
+ }
629
+
630
+ // --------------------------------------------------------------------------
631
+ // tool_use.input field-set normalization
632
+ // --------------------------------------------------------------------------
633
+ //
634
+ // CC's serialization of `tool_use.input` can drift between turns when the
635
+ // caller passes fields not declared in the tool's `input_schema.properties`.
636
+ // Observed case: a SendMessage tool call where the caller passed
637
+ // `{to, summary, message, type, recipient, content}`. Pre-miss body
638
+ // serialized input as `{to, summary, message}` (3 schema-only keys).
639
+ // Post-miss body (same tool_use_id, same turn position) serialized the
640
+ // same block as `{to, summary, message, type, recipient, content}` (6 keys
641
+ // — extras preserved). That byte drift at a mid-history assistant message
642
+ // re-caches every block from that message forward → full-session-cost miss.
643
+ //
644
+ // Concrete instance: 2334-byte drift on ONE assistant-side tool_use block
645
+ // caused a 619,722 `cache_creation_input_tokens` miss at 15:16:52 UTC on
646
+ // msg[844] of a long-running session.
647
+ //
648
+ // This helper walks every assistant-role message's tool_use blocks, looks
649
+ // up the tool's declared `input_schema.properties` from `body.tools`, and
650
+ // rewrites `input` to contain ONLY the schema keys (in schema declaration
651
+ // order). Tools with no schema in `body.tools` are left untouched — we
652
+ // can't determine what's legitimate vs extra.
653
+ //
654
+ // Agent behavior is unaffected — extras weren't declared in the schema so
655
+ // downstream consumers shouldn't rely on them. The point of this pass is
656
+ // to pin the serialization to the schema's field set so CC's own drift
657
+ // between turns can't break cache.
658
+ // --------------------------------------------------------------------------
659
+
660
+ /**
661
+ * Mutate `body` in place: for every assistant-role message's tool_use
662
+ * blocks whose tool name matches an entry in `body.tools` with a known
663
+ * `input_schema.properties`, replace `input` with a new object containing
664
+ * ONLY the schema-declared keys, preserved in schema declaration order.
665
+ * Returns the count of tool_use blocks modified (0 if nothing changed or
666
+ * preconditions missing). Pure transform: safe to call repeatedly.
667
+ */
668
+ function normalizeToolUseInputsInBody(body) {
669
+ if (!body || typeof body !== "object") return 0;
670
+ if (!Array.isArray(body.messages) || !Array.isArray(body.tools)) return 0;
671
+
672
+ // Build toolSchemas: { name: orderedKeys[] } from body.tools entries
673
+ // that declare input_schema.properties.
674
+ const toolSchemas = Object.create(null);
675
+ for (const tool of body.tools) {
676
+ if (!tool || typeof tool !== "object") continue;
677
+ const name = tool.name;
678
+ if (typeof name !== "string") continue;
679
+ const props = tool.input_schema && tool.input_schema.properties;
680
+ if (!props || typeof props !== "object") continue;
681
+ toolSchemas[name] = Object.keys(props);
682
+ }
683
+
684
+ let modified = 0;
685
+ for (const msg of body.messages) {
686
+ if (!msg || msg.role !== "assistant") continue;
687
+ if (!Array.isArray(msg.content)) continue;
688
+ for (let i = 0; i < msg.content.length; i++) {
689
+ const block = msg.content[i];
690
+ if (!block || block.type !== "tool_use") continue;
691
+ if (!block.input || typeof block.input !== "object" || Array.isArray(block.input)) continue;
692
+ const schemaKeys = toolSchemas[block.name];
693
+ if (!schemaKeys) continue; // unknown tool — skip
694
+ const currentKeys = Object.keys(block.input);
695
+ // Determine if any non-schema key is present. If all current keys
696
+ // are in schema AND their order already matches a subset of
697
+ // schemaKeys order, we could skip — but we always rebuild to also
698
+ // canonicalize key order, which is what JSON.stringify consumers
699
+ // depend on for byte stability.
700
+ const schemaKeySet = new Set(schemaKeys);
701
+ const hasExtras = currentKeys.some((k) => !schemaKeySet.has(k));
702
+ // Also rebuild when order differs from schema declaration order,
703
+ // because extras stripping alone doesn't guarantee a canonical
704
+ // byte sequence across turns.
705
+ const presentSchemaKeys = schemaKeys.filter((k) =>
706
+ Object.prototype.hasOwnProperty.call(block.input, k)
707
+ );
708
+ const currentInSchema = currentKeys.filter((k) => schemaKeySet.has(k));
709
+ let orderDiffers = presentSchemaKeys.length !== currentInSchema.length;
710
+ if (!orderDiffers) {
711
+ for (let j = 0; j < presentSchemaKeys.length; j++) {
712
+ if (presentSchemaKeys[j] !== currentInSchema[j]) {
713
+ orderDiffers = true;
714
+ break;
715
+ }
716
+ }
717
+ }
718
+ if (!hasExtras && !orderDiffers) continue;
719
+ const newInput = {};
720
+ for (const k of presentSchemaKeys) {
721
+ newInput[k] = block.input[k];
722
+ }
723
+ msg.content[i] = { ...block, input: newInput };
724
+ modified++;
725
+ }
726
+ }
727
+ return modified;
728
+ }
729
+
730
+ // --------------------------------------------------------------------------
731
+ // cache_control_sticky — preserve historical marker positions across turns
732
+ // --------------------------------------------------------------------------
733
+ //
734
+ // Covers a cache-miss class that cache_control_normalize can't reach by
735
+ // itself. CC maintains at most one user-side cache_control marker at a time:
736
+ // as conversation grows, CC moves the marker from the tail of one user turn
737
+ // to the tail of the next, DROPPING it from the previous position. The
738
+ // dropped position's block loses the ~43 bytes of `"cache_control":{"type":
739
+ // "ephemeral","ttl":"1h"}` framing — a tail-of-message byte diff that
740
+ // invalidates every downstream cached block (~600K tokens' worth on a
741
+ // long-running session).
742
+ //
743
+ // Observed instance: at 16:27:13 UTC today, a 1284-message session emitted
744
+ // cw=804,428 (hit=2.3%). Diff of main-session bodies 585 → 587 showed ONE
745
+ // message diverged — msg[1281] — which lost its cache_control marker (43
746
+ // bytes) because CC had moved the marker to the new last user msg[1283].
747
+ //
748
+ // cache_control_normalize places exactly ONE canonical marker at the last
749
+ // block of the last user message on every outbound body. That solves the
750
+ // current-marker-drift class but cannot preserve historical markers — CC
751
+ // has already dropped them by the time the payload reaches this extension.
752
+ //
753
+ // This sticky extension maintains per-session state tracking where markers
754
+ // have appeared in prior turns, and reinstates them on future turns as
755
+ // additive preservation. Up to 3 historical message-level markers are
756
+ // tracked (Anthropic's hard limit is 4 cache_control markers total — 1 for
757
+ // system[2] + 3 for message-level breakpoints). When a historical position
758
+ // would exceed the cap, the oldest tracked entry is dropped (LRU).
759
+ //
760
+ // Messages are identified by a stable hash so that compaction rewrites /
761
+ // index shifts don't confuse the tracker:
762
+ // - If the message has a tool_use or tool_result block with an `id` or
763
+ // `tool_use_id`, hash `role|id`.
764
+ // - Otherwise hash `role|firstTextContent.slice(0, 256)`.
765
+ //
766
+ // Pipeline order: runs AFTER cache_control_normalize (when it's present) so
767
+ // normalize first pins the canonical marker at the last user msg, then
768
+ // sticky re-adds historical markers on their hashed messages. Skips any
769
+ // message already carrying a marker (fast no-op when sticky fires first).
770
+ //
771
+ // Opt-out via CACHE_FIX_SKIP_CACHE_CONTROL_STICKY=1 (defaults ON).
772
+ // --------------------------------------------------------------------------
773
+
774
+ const CACHE_CONTROL_STICKY_DIR = join(homedir(), ".claude", "cache-fix-state");
775
+ const CACHE_CONTROL_STICKY_MAX_POSITIONS = 3;
776
+ const CACHE_CONTROL_STICKY_DEFAULT_MARKER = { type: "ephemeral", ttl: "1h" };
777
+
778
+ /**
779
+ * Build the absolute state-file path for a given project key. Exported so
780
+ * tests can assert on path derivation without duplicating hash logic.
781
+ */
782
+ function cacheControlStickyStatePath(key) {
783
+ const hash = createHash("sha1").update(String(key)).digest("hex").slice(0, 16);
784
+ return join(CACHE_CONTROL_STICKY_DIR, `cache-control-sticky-${hash}.json`);
785
+ }
786
+
787
+ /**
788
+ * Compute a stable hash identifier for a message that survives content-
789
+ * block insertions (e.g. smoosh_split peeling a reminder into a new block
790
+ * but the first text block's first 256 bytes don't change) and index shifts
791
+ * (e.g. compaction). Returns null if the message has no identifiable
792
+ * content. Pure; exported for unit tests.
793
+ */
794
+ function computeStickyMessageHash(msg) {
795
+ if (!msg || typeof msg !== "object") return null;
796
+ const role = typeof msg.role === "string" ? msg.role : "";
797
+ if (!Array.isArray(msg.content) || msg.content.length === 0) return null;
798
+ // Prefer tool_use/tool_result identifiers when present — they're the
799
+ // most stable anchors.
800
+ for (const b of msg.content) {
801
+ if (!b || typeof b !== "object") continue;
802
+ if (b.type === "tool_use" && typeof b.id === "string" && b.id) {
803
+ return createHash("sha1").update(`${role}|tool_use|${b.id}`).digest("hex").slice(0, 16);
804
+ }
805
+ if (b.type === "tool_result" && typeof b.tool_use_id === "string" && b.tool_use_id) {
806
+ return createHash("sha1").update(`${role}|tool_result|${b.tool_use_id}`).digest("hex").slice(0, 16);
807
+ }
808
+ }
809
+ // Fallback: first text block's first 256 bytes.
810
+ for (const b of msg.content) {
811
+ if (!b || typeof b !== "object") continue;
812
+ if (b.type === "text" && typeof b.text === "string") {
813
+ const prefix = b.text.slice(0, 256);
814
+ return createHash("sha1").update(`${role}|text|${prefix}`).digest("hex").slice(0, 16);
815
+ }
816
+ }
817
+ return null;
818
+ }
819
+
820
+ /**
821
+ * Read persisted sticky state for a project key. Returns a fresh empty
822
+ * state on missing file, unreadable file, or corrupt JSON — never throws.
823
+ * Shape: `{ version: 1, positions: [{msg_hash, position_hint, marker}] }`.
824
+ */
825
+ function readCacheControlStickyState(key) {
826
+ const path = cacheControlStickyStatePath(key);
827
+ let raw;
828
+ try {
829
+ raw = readFileSync(path, "utf-8");
830
+ } catch {
831
+ return { version: 1, positions: [] };
832
+ }
833
+ try {
834
+ const parsed = JSON.parse(raw);
835
+ if (!parsed || typeof parsed !== "object" || !Array.isArray(parsed.positions)) {
836
+ debugLog("cache_control_sticky: state file malformed shape — resetting");
837
+ return { version: 1, positions: [] };
838
+ }
839
+ const positions = [];
840
+ for (const p of parsed.positions) {
841
+ if (!p || typeof p !== "object") continue;
842
+ if (typeof p.msg_hash !== "string" || !p.msg_hash) continue;
843
+ positions.push({
844
+ msg_hash: p.msg_hash,
845
+ position_hint: p.position_hint === "last_block" ? "last_block" : "last_block",
846
+ marker:
847
+ p.marker && typeof p.marker === "object" && typeof p.marker.type === "string"
848
+ ? { ...p.marker }
849
+ : { ...CACHE_CONTROL_STICKY_DEFAULT_MARKER },
850
+ });
851
+ }
852
+ return { version: 1, positions };
853
+ } catch (e) {
854
+ debugLog(`cache_control_sticky: state JSON parse error (${e?.message}) — resetting`);
855
+ return { version: 1, positions: [] };
856
+ }
857
+ }
858
+
859
+ /**
860
+ * Atomic-write persisted sticky state. Best-effort; silent on I/O errors.
861
+ */
862
+ function writeCacheControlStickyState(key, state) {
863
+ const path = cacheControlStickyStatePath(key);
864
+ try {
865
+ mkdirSync(CACHE_CONTROL_STICKY_DIR, { recursive: true });
866
+ const tmp = path + ".tmp";
867
+ writeFileSync(tmp, JSON.stringify(state, null, 2), "utf-8");
868
+ renameSync(tmp, path);
869
+ } catch (e) {
870
+ debugLog(`cache_control_sticky: state write error (${e?.message})`);
871
+ }
872
+ }
873
+
874
+ /**
875
+ * Pure core: given a body and the currently-persisted state, compute the
876
+ * next state and the list of marker mutations to apply to the body. No
877
+ * I/O, no body mutation — the wrapper is responsible for applying results.
878
+ *
879
+ * Algorithm:
880
+ * 1. Walk user-role messages; for each block-with-cache_control, record
881
+ * `{msg_hash, marker}` into `observed`. Duplicate hashes keep the
882
+ * first (most recent in message order).
883
+ * 2. Merge `observed` into the prior `state.positions`: newly-observed
884
+ * hashes are appended (or moved to the front if re-seen); absent-from-
885
+ * this-body hashes are kept so they persist across turns.
886
+ * 3. For each hash in the new state, locate the corresponding message in
887
+ * the body (by hash match). If found AND the message's last block
888
+ * does NOT already carry a marker, emit a mutation to set it.
889
+ * 4. Cap the new state at CACHE_CONTROL_STICKY_MAX_POSITIONS (oldest
890
+ * entries dropped first — LRU keyed on most-recent touch).
891
+ *
892
+ * Returns `{newState, mutations}` where mutations =
893
+ * `[{msgIdx, blockIdx, marker}]`. Pure; exported for unit tests.
894
+ */
895
+ function updateCacheControlStickyState(body, priorState) {
896
+ const empty = { newState: { version: 1, positions: [] }, mutations: [] };
897
+ if (!body || typeof body !== "object" || !Array.isArray(body.messages)) return empty;
898
+ const prior =
899
+ priorState && Array.isArray(priorState.positions)
900
+ ? { version: 1, positions: priorState.positions.slice() }
901
+ : { version: 1, positions: [] };
902
+
903
+ // Build hash → msgIdx index for this body's user messages.
904
+ const hashToMsgIdx = new Map();
905
+ const observed = []; // [{msg_hash, marker}] in message order
906
+ for (let m = 0; m < body.messages.length; m++) {
907
+ const msg = body.messages[m];
908
+ if (!msg || msg.role !== "user" || !Array.isArray(msg.content) || msg.content.length === 0) continue;
909
+ const h = computeStickyMessageHash(msg);
910
+ if (!h) continue;
911
+ if (!hashToMsgIdx.has(h)) hashToMsgIdx.set(h, m);
912
+ // Observe any existing marker on this message (any block).
913
+ for (const b of msg.content) {
914
+ if (b && typeof b === "object" && b.cache_control && typeof b.cache_control === "object") {
915
+ observed.push({ msg_hash: h, marker: { ...b.cache_control } });
916
+ break;
917
+ }
918
+ }
919
+ }
920
+
921
+ // Merge observed into prior: move observed hashes to the end (most
922
+ // recent), refresh their marker. Unobserved prior entries stay in place.
923
+ const priorIndex = new Map(prior.positions.map((p, i) => [p.msg_hash, i]));
924
+ const nextPositions = prior.positions.slice();
925
+ for (const ob of observed) {
926
+ if (priorIndex.has(ob.msg_hash)) {
927
+ const i = priorIndex.get(ob.msg_hash);
928
+ nextPositions[i] = { msg_hash: ob.msg_hash, position_hint: "last_block", marker: ob.marker };
929
+ } else {
930
+ nextPositions.push({ msg_hash: ob.msg_hash, position_hint: "last_block", marker: ob.marker });
931
+ priorIndex.set(ob.msg_hash, nextPositions.length - 1);
932
+ }
933
+ }
934
+
935
+ // Cap at MAX_POSITIONS: keep the NEWEST (end of array) entries.
936
+ let capped = nextPositions;
937
+ if (capped.length > CACHE_CONTROL_STICKY_MAX_POSITIONS) {
938
+ capped = capped.slice(capped.length - CACHE_CONTROL_STICKY_MAX_POSITIONS);
939
+ }
940
+
941
+ // Compute mutations: for each tracked hash present in this body, if the
942
+ // message doesn't already have any marker, add one at its last block.
943
+ const mutations = [];
944
+ for (const pos of capped) {
945
+ const msgIdx = hashToMsgIdx.get(pos.msg_hash);
946
+ if (msgIdx === undefined) continue;
947
+ const msg = body.messages[msgIdx];
948
+ if (!msg || !Array.isArray(msg.content) || msg.content.length === 0) continue;
949
+ const hasMarker = msg.content.some(
950
+ (b) => b && typeof b === "object" && b.cache_control && typeof b.cache_control === "object"
951
+ );
952
+ if (hasMarker) continue;
953
+ mutations.push({
954
+ msgIdx,
955
+ blockIdx: msg.content.length - 1,
956
+ marker: { ...pos.marker },
957
+ });
958
+ }
959
+
960
+ return { newState: { version: 1, positions: capped }, mutations };
961
+ }
962
+
963
+ /**
964
+ * Wrapper: read state, compute mutations via
965
+ * updateCacheControlStickyState, apply mutations to `body` in place, write
966
+ * next state. Returns the count of marker mutations applied. Silent on
967
+ * any I/O error (best-effort).
968
+ */
969
+ function applyCacheControlSticky(body, key) {
970
+ if (!body || typeof body !== "object" || !Array.isArray(body.messages)) return 0;
971
+ const prior = readCacheControlStickyState(key);
972
+ const { newState, mutations } = updateCacheControlStickyState(body, prior);
973
+ for (const mut of mutations) {
974
+ const msg = body.messages[mut.msgIdx];
975
+ if (!msg || !Array.isArray(msg.content)) continue;
976
+ const newContent = msg.content.slice();
977
+ const target = newContent[mut.blockIdx];
978
+ if (!target || typeof target !== "object") continue;
979
+ newContent[mut.blockIdx] = { ...target, cache_control: { ...mut.marker } };
980
+ body.messages[mut.msgIdx] = { ...msg, content: newContent };
981
+ }
982
+ writeCacheControlStickyState(key, newState);
983
+ return mutations.length;
984
+ }
985
+
333
986
  /**
334
987
  * Core fix: on EVERY call, scan the entire message array for the LATEST
335
988
  * relocatable blocks (skills, MCP, deferred tools, hooks) and ensure they
@@ -728,6 +1381,13 @@ const _STATS_SCHEMA = {
728
1381
  cwd_normalize: { applied: 0, skipped: 0, lastApplied: null },
729
1382
  smoosh_normalize: { applied: 0, skipped: 0, lastApplied: null },
730
1383
  smoosh_split: { applied: 0, skipped: 0, lastApplied: null },
1384
+ session_start_normalize: { applied: 0, skipped: 0, lastApplied: null },
1385
+ continue_trailer_strip: { applied: 0, skipped: 0, lastApplied: null },
1386
+ deferred_tools_restore: { applied: 0, skipped: 0, lastApplied: null },
1387
+ reminder_strip: { applied: 0, skipped: 0, lastApplied: null },
1388
+ cache_control_normalize: { applied: 0, skipped: 0, lastApplied: null },
1389
+ tool_use_input_normalize: { applied: 0, skipped: 0, lastApplied: null },
1390
+ cache_control_sticky: { applied: 0, skipped: 0, lastApplied: null },
731
1391
  };
732
1392
 
733
1393
  function _createEmptyStats() {
@@ -1349,6 +2009,67 @@ globalThis.fetch = async function (url, options) {
1349
2009
  }
1350
2010
  }
1351
2011
 
2012
+ // Extension: session_start_normalize — SessionStart:resume → :startup rewrite
2013
+ // and ephemeral session-id / Last-active strip. Runs BEFORE smoosh_normalize
2014
+ // so drift at msg[0] content[N] is stabilized before any subsequent pass
2015
+ // reads from the same text. Applies to both standalone text blocks and
2016
+ // tool_result.content strings (in case CC's smooshSystemReminderSiblings
2017
+ // folded the reminder before we see it).
2018
+ // Bug: anthropics/claude-code#43657
2019
+ // Opt-out via CACHE_FIX_SKIP_SESSION_START_NORMALIZE=1 (defaults ON).
2020
+ if (shouldApplyFix("session_start_normalize") && payload.messages) {
2021
+ let ssnApplied = 0;
2022
+ for (const msg of payload.messages) {
2023
+ if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
2024
+ for (let i = 0; i < msg.content.length; i++) {
2025
+ const block = msg.content[i];
2026
+ if (block?.type === "text" && typeof block.text === "string") {
2027
+ const [t, n] = normalizeSessionStartText(block.text);
2028
+ if (n > 0) {
2029
+ msg.content[i] = { ...block, text: t };
2030
+ ssnApplied += n;
2031
+ }
2032
+ } else if (block?.type === "tool_result" && typeof block.content === "string") {
2033
+ const [c, n] = normalizeSessionStartText(block.content);
2034
+ if (n > 0) {
2035
+ msg.content[i] = { ...block, content: c };
2036
+ ssnApplied += n;
2037
+ }
2038
+ }
2039
+ }
2040
+ }
2041
+ if (ssnApplied > 0) {
2042
+ modified = true;
2043
+ debugLog(`APPLIED: session-start-normalize rewrote ${ssnApplied} marker(s)`);
2044
+ recordFixResult("session_start_normalize", "applied");
2045
+ } else {
2046
+ recordFixResult("session_start_normalize", "skipped");
2047
+ }
2048
+ }
2049
+
2050
+ // Extension: tool_use_input_normalize — strip tool_use.input keys not
2051
+ // declared in body.tools[*].input_schema.properties. CC's serialization
2052
+ // of tool_use.input can drift between turns when the caller passed
2053
+ // extra fields; the pre-miss body may serialize only the schema keys
2054
+ // while the post-miss body serializes the full caller-supplied set
2055
+ // (or vice versa). That byte drift at a mid-history assistant message
2056
+ // re-caches every block from that message forward.
2057
+ //
2058
+ // Runs AFTER session_start_normalize so mid-history drift is pinned
2059
+ // before any downstream pass (smoosh_*, fingerprint, ttl) hashes the
2060
+ // same block. Default ON, opt-out via
2061
+ // CACHE_FIX_SKIP_TOOL_USE_INPUT_NORMALIZE=1.
2062
+ if (shouldApplyFix("tool_use_input_normalize")) {
2063
+ const tuinApplied = normalizeToolUseInputsInBody(payload);
2064
+ if (tuinApplied > 0) {
2065
+ modified = true;
2066
+ debugLog(`APPLIED: tool-use-input-normalize rewrote ${tuinApplied} tool_use block(s)`);
2067
+ recordFixResult("tool_use_input_normalize", "applied");
2068
+ } else {
2069
+ recordFixResult("tool_use_input_normalize", "skipped");
2070
+ }
2071
+ }
2072
+
1352
2073
  // Optimization: normalize smooshed dynamic system-reminders in tool_result content
1353
2074
  // CC's smooshSystemReminderSiblings (messages.ts:1835) folds <system-reminder> text
1354
2075
  // blocks into tool_result.content strings. Dynamic values (token_usage, budget_usd,
@@ -1474,6 +2195,187 @@ globalThis.fetch = async function (url, options) {
1474
2195
  }
1475
2196
  }
1476
2197
 
2198
+ // Extension: continue_trailer_strip — remove the "Continue from where
2199
+ // you left off." text block CC appends to the last user message on
2200
+ // --continue. Pre-exit bodies didn't carry it, so its presence in the
2201
+ // resumed body creates tail-of-last-msg drift that breaks cache.
2202
+ // Exact-match string equality on `.text` — user sentences mentioning
2203
+ // the phrase inside longer content are not touched.
2204
+ // Bug: anthropics/claude-code#12 (resume UX), observed empirically.
2205
+ // Opt-out via CACHE_FIX_SKIP_CONTINUE_TRAILER_STRIP=1 (defaults ON).
2206
+ if (shouldApplyFix("continue_trailer_strip") && payload.messages) {
2207
+ let trailerStripped = 0;
2208
+ for (const msg of payload.messages) {
2209
+ if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
2210
+ const kept = msg.content.filter((block) => {
2211
+ if (isContinueTrailerBlock(block)) {
2212
+ trailerStripped++;
2213
+ return false;
2214
+ }
2215
+ return true;
2216
+ });
2217
+ if (kept.length !== msg.content.length) msg.content = kept;
2218
+ }
2219
+ if (trailerStripped > 0) {
2220
+ modified = true;
2221
+ debugLog(`APPLIED: continue-trailer-strip removed ${trailerStripped} trailer block(s)`);
2222
+ recordFixResult("continue_trailer_strip", "applied");
2223
+ } else {
2224
+ recordFixResult("continue_trailer_strip", "skipped");
2225
+ }
2226
+ }
2227
+
2228
+ // Extension: deferred_tools_restore — persist-and-restore the
2229
+ // deferred-tools attachment block across sessions so MCP reconnect
2230
+ // race at resume-time doesn't shrink msg[0] and bust the whole cache.
2231
+ // Snapshot key defaults to process.cwd() (one snapshot per project).
2232
+ // Opt-out via CACHE_FIX_SKIP_DEFERRED_TOOLS_RESTORE=1 (defaults ON).
2233
+ if (shouldApplyFix("deferred_tools_restore") && payload.messages) {
2234
+ let dtrRestored = 0;
2235
+ const found = findDeferredToolsBlockInBody(payload);
2236
+ if (found) {
2237
+ const hasUnavail = found.text.includes(DEFERRED_TOOLS_UNAVAILABLE_MARKER);
2238
+ const snapshotPath = deferredToolsSnapshotPath(process.cwd());
2239
+ if (!hasUnavail) {
2240
+ // Clean baseline — persist it for future resumes. Silent on
2241
+ // any I/O error; snapshot is best-effort.
2242
+ try {
2243
+ mkdirSync(DEFERRED_TOOLS_SNAPSHOT_DIR, { recursive: true });
2244
+ writeFileSync(snapshotPath, found.text, "utf-8");
2245
+ } catch {}
2246
+ } else {
2247
+ // Shrunk block with explicit "no longer available" signal →
2248
+ // attempt restore. Only substitute if the persisted version is
2249
+ // strictly longer (never downgrade to a stale shorter snapshot).
2250
+ let snapshot = null;
2251
+ try { snapshot = readFileSync(snapshotPath, "utf-8"); } catch {}
2252
+ if (snapshot && snapshot.length > found.text.length) {
2253
+ const targetMsg = payload.messages[found.msgIdx];
2254
+ const newContent = targetMsg.content.slice();
2255
+ newContent[found.blockIdx] = { ...newContent[found.blockIdx], text: snapshot };
2256
+ payload.messages[found.msgIdx] = { ...targetMsg, content: newContent };
2257
+ dtrRestored = 1;
2258
+ }
2259
+ }
2260
+ }
2261
+ if (dtrRestored > 0) {
2262
+ modified = true;
2263
+ debugLog(`APPLIED: deferred-tools-restore substituted full block at msg[${found.msgIdx}].content[${found.blockIdx}]`);
2264
+ recordFixResult("deferred_tools_restore", "applied");
2265
+ } else {
2266
+ recordFixResult("deferred_tools_restore", "skipped");
2267
+ }
2268
+ }
2269
+
2270
+ // Extension: reminder_strip — remove bookkeeping system-reminder blocks
2271
+ // (Token usage / USD budget / Output tokens / TodoWrite nudge / turn
2272
+ // counters) entirely from user messages. Runs AFTER smoosh_split so
2273
+ // blocks peeled out of tool_result.content are visible as standalone
2274
+ // text and can be matched by isBookkeepingReminder.
2275
+ // Zero model visibility, zero drift.
2276
+ // Opt-out via CACHE_FIX_SKIP_REMINDER_STRIP=1 (defaults ON).
2277
+ if (shouldApplyFix("reminder_strip") && payload.messages) {
2278
+ let reminderStripped = 0;
2279
+ for (const msg of payload.messages) {
2280
+ if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
2281
+ const kept = msg.content.filter((block) => {
2282
+ if (block?.type !== "text") return true;
2283
+ if (isBookkeepingReminder(block.text)) {
2284
+ reminderStripped++;
2285
+ return false;
2286
+ }
2287
+ return true;
2288
+ });
2289
+ if (kept.length !== msg.content.length) msg.content = kept;
2290
+ }
2291
+ if (reminderStripped > 0) {
2292
+ modified = true;
2293
+ debugLog(`APPLIED: reminder-strip removed ${reminderStripped} bookkeeping reminder block(s)`);
2294
+ recordFixResult("reminder_strip", "applied");
2295
+ } else {
2296
+ recordFixResult("reminder_strip", "skipped");
2297
+ }
2298
+ }
2299
+
2300
+ // Extension: cache_control_normalize — pin the cache_control marker at
2301
+ // a canonical position (last block of last user message) on every
2302
+ // outbound body. Prevents marker-shuffle drift between turns from
2303
+ // invalidating the previous-last-block's cached bytes. Runs LAST
2304
+ // (after smoosh_split and any other content-mutating pass) so the
2305
+ // canonical position is calculated against the final content array.
2306
+ // Fast path: if canonical position already holds the correct marker
2307
+ // and it's the only user-side marker, body passes through untouched.
2308
+ // Opt-out via CACHE_FIX_SKIP_CACHE_CONTROL_NORMALIZE=1 (defaults ON).
2309
+ if (shouldApplyFix("cache_control_normalize") && payload.messages && payload.messages.length > 0) {
2310
+ // Locate canonical position: last block of last user message with an
2311
+ // array content. If no valid target, skip.
2312
+ let targetMsgIdx = -1;
2313
+ let targetBlockIdx = -1;
2314
+ for (let i = payload.messages.length - 1; i >= 0; i--) {
2315
+ const m = payload.messages[i];
2316
+ if (m?.role !== "user") continue;
2317
+ if (!Array.isArray(m.content) || m.content.length === 0) break;
2318
+ targetMsgIdx = i;
2319
+ targetBlockIdx = m.content.length - 1;
2320
+ break;
2321
+ }
2322
+
2323
+ let ccMutated = false;
2324
+ if (targetMsgIdx !== -1) {
2325
+ const targetBlock = payload.messages[targetMsgIdx].content[targetBlockIdx];
2326
+ const existingCC = targetBlock?.cache_control;
2327
+ const canonicalAlreadyCorrect =
2328
+ existingCC &&
2329
+ existingCC.type === CACHE_CONTROL_CANONICAL_MARKER.type &&
2330
+ existingCC.ttl === CACHE_CONTROL_CANONICAL_MARKER.ttl;
2331
+
2332
+ if (!(canonicalAlreadyCorrect && countUserCacheControlMarkers(payload) === 1)) {
2333
+ // Strip all markers from user messages, then place canonical.
2334
+ for (const msg of payload.messages) stripCacheControlMarkers(msg);
2335
+ const tm = payload.messages[targetMsgIdx];
2336
+ const newContent = tm.content.slice();
2337
+ newContent[targetBlockIdx] = { ...newContent[targetBlockIdx], cache_control: { ...CACHE_CONTROL_CANONICAL_MARKER } };
2338
+ payload.messages[targetMsgIdx] = { ...tm, content: newContent };
2339
+ ccMutated = true;
2340
+ }
2341
+ }
2342
+ if (ccMutated) {
2343
+ modified = true;
2344
+ debugLog(`APPLIED: cache_control_normalize pinned marker at msg[${targetMsgIdx}].content[${targetBlockIdx}]`);
2345
+ recordFixResult("cache_control_normalize", "applied");
2346
+ } else {
2347
+ recordFixResult("cache_control_normalize", "skipped");
2348
+ }
2349
+ }
2350
+
2351
+ // Extension: cache_control_sticky — reinstate historical cache_control
2352
+ // markers on messages whose position CC has moved past. CC maintains
2353
+ // at most one user-side marker at a time; as it moves the marker to
2354
+ // the tail of each new user turn, the previous position loses the ~43
2355
+ // bytes of cache_control framing — a tail-of-message byte drift that
2356
+ // breaks every downstream cached block. This extension tracks marker
2357
+ // positions by stable message-hash across turns (up to 3) and re-adds
2358
+ // them on future bodies. Runs AFTER cache_control_normalize (when
2359
+ // present) so normalize pins the canonical tail-marker first and
2360
+ // sticky re-adds the historical ones. State file is per-project at
2361
+ // ~/.claude/cache-fix-state/cache-control-sticky-<sha1(cwd)>.json.
2362
+ // Opt-out via CACHE_FIX_SKIP_CACHE_CONTROL_STICKY=1 (defaults ON).
2363
+ if (shouldApplyFix("cache_control_sticky") && payload.messages) {
2364
+ try {
2365
+ const stickyApplied = applyCacheControlSticky(payload, process.cwd());
2366
+ if (stickyApplied > 0) {
2367
+ modified = true;
2368
+ debugLog(`APPLIED: cache_control_sticky reinstated ${stickyApplied} historical marker(s)`);
2369
+ recordFixResult("cache_control_sticky", "applied");
2370
+ } else {
2371
+ recordFixResult("cache_control_sticky", "skipped");
2372
+ }
2373
+ } catch (e) {
2374
+ debugLog(`cache_control_sticky: error (${e?.message}) — skipped`);
2375
+ recordFixResult("cache_control_sticky", "skipped");
2376
+ }
2377
+ }
2378
+
1477
2379
  // Bug 5: TTL enforcement (configurable per request type)
1478
2380
  // The client gates 1h cache TTL behind a GrowthBook allowlist that checks
1479
2381
  // querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".
@@ -1899,5 +2801,25 @@ export {
1899
2801
  isClearArtifact,
1900
2802
  rewriteOutputEfficiencyInstruction,
1901
2803
  normalizeOutputEfficiencyReplacement,
2804
+ normalizeSessionStartText,
2805
+ isContinueTrailerBlock,
2806
+ CONTINUE_TRAILER_TEXT,
2807
+ findDeferredToolsBlockInBody,
2808
+ deferredToolsSnapshotPath,
2809
+ DEFERRED_TOOLS_AVAILABLE_MARKER,
2810
+ DEFERRED_TOOLS_UNAVAILABLE_MARKER,
2811
+ isBookkeepingReminder,
2812
+ stripCacheControlMarkers,
2813
+ countUserCacheControlMarkers,
2814
+ CACHE_CONTROL_CANONICAL_MARKER,
2815
+ normalizeToolUseInputsInBody,
2816
+ computeStickyMessageHash,
2817
+ cacheControlStickyStatePath,
2818
+ updateCacheControlStickyState,
2819
+ applyCacheControlSticky,
2820
+ readCacheControlStickyState,
2821
+ writeCacheControlStickyState,
2822
+ CACHE_CONTROL_STICKY_MAX_POSITIONS,
2823
+ CACHE_CONTROL_STICKY_DEFAULT_MARKER,
1902
2824
  _pinnedBlocks, // exported so tests can reset between runs
1903
2825
  };