claude-code-cache-fix 2.0.0-beta.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +2 -1
  2. package/package.json +1 -1
  3. package/preload.mjs +629 -0
package/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  English | [中文](./README.zh.md) | [한국어](./README.ko.md) | [Português](./docs/guia-pt-br.md)
6
6
 
7
- Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.111. Opus 4.7 compatible.
7
+ Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.112. Opus 4.7 compatible.
8
8
 
9
9
  > **Opus 4.7 advisory:** Our metered data shows 4.7 burns Q5h quota at **~2.4x the rate of 4.6** for equivalent visible token counts. Two factors: a new tokenizer (up to 35% more tokens, [documented](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7)) and adaptive thinking overhead (~105%, not documented in usage response). Workaround: `CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING=1` (may reduce quality). Image stripping (`CACHE_FIX_IMAGE_KEEP_LAST`) is even more important on 4.7 due to high-res image support increasing image token counts. See [Discussion #25](https://github.com/cnighswonger/claude-code-cache-fix/discussions/25) for full analysis.
10
10
 
@@ -607,6 +607,7 @@ measurable signature of cache-efficiency degradation.
607
607
  - **[@JEONG-JIWOO](https://github.com/JEONG-JIWOO)** — VS Code extension investigation: discovered `claudeCode.claudeProcessWrapper` as the working integration path, wrote the C wrapper for Windows (#16)
608
608
  - **[@X-15](https://github.com/X-15)** — VS Code extension validation, per-fix health status analysis confirming safety check behavior on v2.1.105 (#16)
609
609
  - **[@ArkNill](https://github.com/ArkNill)** — Fingerprint verification fix for CC v2.1.108+ (`isMeta` filter change, PR #21), Korean README (PR #22), original [claude-code-hidden-problem-analysis](https://github.com/ArkNill/claude-code-hidden-problem-analysis) research
610
+ - **[@deafsquad](https://github.com/deafsquad)** — Universal smoosh_split un-smoosh fix (PR #26), source-level function attribution of resume scatter bug (anthropics/claude-code#43657), OTEL telemetry discovery
610
611
 
611
612
  If you contributed to the community effort on these issues and aren't listed here, please open an issue or PR — we want to credit everyone properly.
612
613
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "2.0.0-beta.4",
3
+ "version": "2.0.0",
4
4
  "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
package/preload.mjs CHANGED
@@ -330,6 +330,403 @@ function stripSessionKnowledge(text) {
330
330
  );
331
331
  }
332
332
 
333
+ // --------------------------------------------------------------------------
334
+ // SessionStart:resume → :startup rewrite (Bug: anthropics/claude-code#43657)
335
+ // --------------------------------------------------------------------------
336
+ //
337
+ // On `claude --continue`, CC fires processSessionStartHooks('resume', …) at
338
+ // src/utils/sessionStart.ts:35. The resulting attachment text wraps the
339
+ // hook's stdout in `<system-reminder>\nSessionStart:resume hook success: …`.
340
+ // The original (pre-resume) session sent the same block as
341
+ // `SessionStart:startup hook success: …`. Byte difference at msg[0] content[N]
342
+ // → whole message prefix re-caches → full-session-cost miss.
343
+ //
344
+ // Some SessionStart hooks additionally embed `<session-id>` tags or
345
+ // `Last active: <timestamp>` lines inside the reminder body, both of which
346
+ // carry UUID/date volatility on top of the event-name flip.
347
+ //
348
+ // This helper rewrites the outbound text to match the originally-cached
349
+ // form. Runs on both standalone text blocks and tool_result.content strings
350
+ // (covers the case where the SessionStart reminder got smooshed by CC's
351
+ // smooshSystemReminderSiblings pass before we see it).
352
+ //
353
+ // Agent behavior is unaffected — CC does not condition behavior on the
354
+ // event-name text, and session-id / timestamps are ephemeral runtime
355
+ // metadata, not semantic inputs.
356
+ // --------------------------------------------------------------------------
357
+
358
+ const SESSION_START_RESUME_MARKER = /SessionStart:resume hook success:/g;
359
+ const SESSION_START_ID_TAG = /\n?<session-id>[^<]*<\/session-id>/g;
360
+ const SESSION_START_LAST_ACTIVE_LINE = /\nLast active:[^\n]*/g;
361
+
362
+ /**
363
+ * Normalize a single text payload (a text block's .text or a tool_result's
364
+ * string .content) to remove SessionStart-resume volatility. Returns
365
+ * [newText, mutationCount]. Callers only need the text, but the count is
366
+ * exposed for stats. The function is a pure string-to-string transform
367
+ * (idempotent: running twice produces the same output as running once).
368
+ */
369
+ function normalizeSessionStartText(text) {
370
+ if (typeof text !== "string" || !text.includes("SessionStart:")) return [text, 0];
371
+ let count = 0;
372
+ let out = text;
373
+ if (SESSION_START_RESUME_MARKER.test(out)) {
374
+ SESSION_START_RESUME_MARKER.lastIndex = 0;
375
+ out = out.replace(SESSION_START_RESUME_MARKER, "SessionStart:startup hook success:");
376
+ count++;
377
+ }
378
+ if (SESSION_START_ID_TAG.test(out)) {
379
+ SESSION_START_ID_TAG.lastIndex = 0;
380
+ out = out.replace(SESSION_START_ID_TAG, "");
381
+ count++;
382
+ }
383
+ if (SESSION_START_LAST_ACTIVE_LINE.test(out)) {
384
+ SESSION_START_LAST_ACTIVE_LINE.lastIndex = 0;
385
+ out = out.replace(SESSION_START_LAST_ACTIVE_LINE, "");
386
+ count++;
387
+ }
388
+ return [out, count];
389
+ }
390
+
391
+ // --------------------------------------------------------------------------
392
+ // Continue-trailer strip (Bug: anthropics/claude-code#12 / resume UX)
393
+ // --------------------------------------------------------------------------
394
+ //
395
+ // On `claude --continue`, CC appends a text block whose text is EXACTLY
396
+ // "Continue from where you left off." to the last user message before
397
+ // firing the first post-resume request. The pre-exit body did not carry
398
+ // that block, so its presence in the resumed body creates a tail-of-last-
399
+ // user-message drift (~40 bytes plus JSON framing) that breaks cache at
400
+ // that position.
401
+ //
402
+ // The trailer is a semantic no-op — the agent already has the full prior
403
+ // conversation as context. Removing it makes the post-resume body byte-
404
+ // match what the pre-exit body cached at the tail.
405
+ //
406
+ // Match is intentionally narrow (exact string equality on the block's
407
+ // .text) so mentions of the phrase inside a longer user sentence don't
408
+ // get caught.
409
+ // --------------------------------------------------------------------------
410
+
411
+ const CONTINUE_TRAILER_TEXT = "Continue from where you left off.";
412
+
413
+ /**
414
+ * Returns true iff the block is an exact-match Continue-trailer text block
415
+ * (a `{type: "text", text: "Continue from where you left off."}` shape —
416
+ * cache_control field on the same block is allowed and ignored). Pure
417
+ * predicate; exported for unit tests.
418
+ */
419
+ function isContinueTrailerBlock(block) {
420
+ return (
421
+ !!block &&
422
+ typeof block === "object" &&
423
+ block.type === "text" &&
424
+ block.text === CONTINUE_TRAILER_TEXT
425
+ );
426
+ }
427
+
428
+ // --------------------------------------------------------------------------
429
+ // Deferred-tools restore (MCP reconnect race)
430
+ // --------------------------------------------------------------------------
431
+ //
432
+ // Observed empirically: on `claude --continue`, if MCP servers haven't
433
+ // finished reconnecting by the time CC fires the first post-resume
434
+ // request, the `<system-reminder>The following deferred tools are now
435
+ // available via ToolSearch…` block at msg[0] (or wherever the attachment
436
+ // lands post-compaction) shrinks dramatically. A full list of ~40 tools
437
+ // collapses to a handful of CC built-ins (AskUserQuestion, EnterPlanMode,
438
+ // ExitPlanMode, PushNotification) and CC injects a trailing
439
+ // `The following deferred tools are no longer available (their MCP server
440
+ // disconnected). Do not search for them — ToolSearch will return no match:`
441
+ // notice.
442
+ //
443
+ // That block change at the root of the message array breaks cache at the
444
+ // very top — the entire ~940K prompt re-caches. By the time the second
445
+ // post-resume request fires, MCPs are usually reconnected and the block is
446
+ // full again, but the cache is already committed to the shrunk version
447
+ // for this session.
448
+ //
449
+ // This extension snapshots the block to
450
+ // `~/.claude/cache-fix-state/deferred-tools-<sha1(key)>.txt` every time
451
+ // it's sent in its full form (no UNAVAILABLE marker), keyed by a caller-
452
+ // supplied project key (default: cwd). On a subsequent request where the
453
+ // block is shorter AND contains the UNAVAILABLE marker, the persisted
454
+ // full bytes are substituted so the on-wire body matches the server's
455
+ // cached prefix.
456
+ //
457
+ // Trade-off: the restored block may reference MCP tools that haven't
458
+ // actually reconnected yet. Agent calls ToolSearch → no match → one retry.
459
+ // Tiny cost versus a full-prompt cache miss on every resume.
460
+ // --------------------------------------------------------------------------
461
+
462
+ const DEFERRED_TOOLS_AVAILABLE_MARKER =
463
+ "The following deferred tools are now available via ToolSearch";
464
+ const DEFERRED_TOOLS_UNAVAILABLE_MARKER =
465
+ "The following deferred tools are no longer available";
466
+ const DEFERRED_TOOLS_SNAPSHOT_DIR = join(homedir(), ".claude", "cache-fix-state");
467
+
468
+ /**
469
+ * Build the absolute snapshot path for a given key. Exported for tests so
470
+ * they can assert on path derivation without duplicating the hash logic.
471
+ */
472
+ function deferredToolsSnapshotPath(key) {
473
+ const hash = createHash("sha1").update(String(key)).digest("hex").slice(0, 16);
474
+ return join(DEFERRED_TOOLS_SNAPSHOT_DIR, `deferred-tools-${hash}.txt`);
475
+ }
476
+
477
+ /**
478
+ * Locate the deferred-tools reminder block anywhere in `body.messages`.
479
+ * The block's position varies by session shape (pre-compaction it often
480
+ * sits at `msg[0].content[0]`; post-compaction it can land at
481
+ * `msg[1].content[N]` next to other attachments). Returns
482
+ * `{ msgIdx, blockIdx, text } | null`.
483
+ *
484
+ * Assistant messages are skipped so that if the agent happens to mention
485
+ * the AVAILABLE_MARKER phrase verbatim in its own output, we don't
486
+ * misidentify it as a real deferred-tools block.
487
+ */
488
+ function findDeferredToolsBlockInBody(body) {
489
+ if (!body || !Array.isArray(body.messages)) return null;
490
+ for (let m = 0; m < body.messages.length; m++) {
491
+ const msg = body.messages[m];
492
+ if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
493
+ for (let i = 0; i < msg.content.length; i++) {
494
+ const b = msg.content[i];
495
+ if (
496
+ b?.type === "text" &&
497
+ typeof b.text === "string" &&
498
+ b.text.includes(DEFERRED_TOOLS_AVAILABLE_MARKER)
499
+ ) {
500
+ return { msgIdx: m, blockIdx: i, text: b.text };
501
+ }
502
+ }
503
+ }
504
+ return null;
505
+ }
506
+
507
+ // --------------------------------------------------------------------------
508
+ // Bookkeeping-reminder strip
509
+ // --------------------------------------------------------------------------
510
+ //
511
+ // Complements `smoosh_normalize` / `smoosh_split`: where normalize stabilizes
512
+ // bytes in-place and split peels smooshed reminders back into standalone
513
+ // text blocks, this pass REMOVES purely-bookkeeping reminder blocks entirely
514
+ // from the outbound body. Zero model visibility, zero drift.
515
+ //
516
+ // Targeted patterns (all CC-internal, per-turn values the agent doesn't need
517
+ // to condition behavior on):
518
+ // - `Token usage: <N>/<M>; <K> remaining`
519
+ // - `Output tokens — turn: <X> · session: <Y>`
520
+ // - `USD budget: $<X>/$<Y>; $<Z> remaining`
521
+ // - `The task tools haven't been used recently. …`
522
+ // - `The TodoWrite tool hasn't been used recently. …`
523
+ // - `Remaining conversation turns: <N>`
524
+ // - `Messages until auto-compact: <N>`
525
+ //
526
+ // Hook-injected reminders (thinking-enrichment, action-tracker,
527
+ // PreToolUse/PostToolUse blocking errors, UserPromptSubmit additional
528
+ // context, custom user hooks) are deliberately NOT stripped here — the
529
+ // agent needs that feedback visible in the turn it fires, and attempting a
530
+ // history-only filter creates per-turn drift of its own (the "last user
531
+ // message" shifts each turn, so a reminder preserved at turn N gets
532
+ // stripped at N+1 when its host message falls into history). Leaving hook
533
+ // reminders untouched is the safer choice; their residual drift is small
534
+ // compared to bookkeeping churn.
535
+ // --------------------------------------------------------------------------
536
+
537
+ const REMINDER_WRAP_REGEX =
538
+ /^<system-reminder>\n([\s\S]*?)\n<\/system-reminder>\s*$/;
539
+
540
+ const BOOKKEEPING_REMINDER_PATTERNS = [
541
+ /^Token usage: \d+\/\d+; \d+ remaining\s*$/,
542
+ /^Output tokens \u2014 turn: [^\n]+ \u00b7 session: [^\n]+\s*$/,
543
+ /^USD budget: \$[\d.]+\/\$[\d.]+; \$[\d.]+ remaining\s*$/,
544
+ /^The task tools haven't been used recently\./,
545
+ /^The TodoWrite tool hasn't been used recently\./,
546
+ /^Remaining conversation turns: /,
547
+ /^Messages? until auto-compact: /,
548
+ ];
549
+
550
+ /**
551
+ * Returns true iff the text is a `<system-reminder>`-wrapped block whose
552
+ * inner content matches a bookkeeping pattern. Pure predicate, exported
553
+ * for unit tests.
554
+ */
555
+ function isBookkeepingReminder(text) {
556
+ if (typeof text !== "string") return false;
557
+ const m = text.match(REMINDER_WRAP_REGEX);
558
+ if (!m) return false;
559
+ const inner = m[1];
560
+ for (const rx of BOOKKEEPING_REMINDER_PATTERNS) {
561
+ if (rx.test(inner)) return true;
562
+ }
563
+ return false;
564
+ }
565
+
566
+ // --------------------------------------------------------------------------
567
+ // cache_control marker position-normalizer
568
+ // --------------------------------------------------------------------------
569
+ //
570
+ // Anthropic's prompt-cache uses `cache_control: {type: "ephemeral", ttl: ...}`
571
+ // markers on content blocks as cache breakpoints. CC places this marker on
572
+ // "the last block of the last user message" each turn — which shifts as new
573
+ // turns arrive. When the marker moves, the PREVIOUS last-block's JSON loses
574
+ // the cache_control field → that block's bytes differ from the server's
575
+ // cached version → partial re-cache on top of the stable system-prompt
576
+ // cache.
577
+ //
578
+ // Enforce a canonical position on every outbound body:
579
+ // 1. Strip every existing cache_control marker from user-message content
580
+ // blocks.
581
+ // 2. Place a single {type: "ephemeral", ttl: "1h"} marker on the LAST
582
+ // content block of the LAST user message.
583
+ //
584
+ // Fast path: if the canonical block already has the correct marker AND it's
585
+ // the only user-side marker, the body is left untouched — ensures the pass
586
+ // is a true no-op when nothing changed.
587
+ //
588
+ // System-side markers (e.g., on `system[2]` for the global prompt) are NOT
589
+ // touched — they're CC's stable breakpoint for the system prompt and work
590
+ // correctly.
591
+ // --------------------------------------------------------------------------
592
+
593
+ const CACHE_CONTROL_CANONICAL_MARKER = { type: "ephemeral", ttl: "1h" };
594
+
595
+ /**
596
+ * Strip every cache_control marker from a single user message's content
597
+ * blocks. Returns the number stripped. Mutates the message's content array
598
+ * in place.
599
+ */
600
+ function stripCacheControlMarkers(msg) {
601
+ if (!msg || msg.role !== "user" || !Array.isArray(msg.content)) return 0;
602
+ let n = 0;
603
+ for (let i = 0; i < msg.content.length; i++) {
604
+ const block = msg.content[i];
605
+ if (block && typeof block === "object" && block.cache_control) {
606
+ const { cache_control, ...rest } = block;
607
+ msg.content[i] = rest;
608
+ n++;
609
+ }
610
+ }
611
+ return n;
612
+ }
613
+
614
+ /**
615
+ * Count cache_control markers across all user-message content blocks.
616
+ * Exported so the call-site's fast-path check has a tested helper.
617
+ */
618
+ function countUserCacheControlMarkers(body) {
619
+ if (!body || !Array.isArray(body.messages)) return 0;
620
+ let n = 0;
621
+ for (const msg of body.messages) {
622
+ if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
623
+ for (const block of msg.content) {
624
+ if (block && typeof block === "object" && block.cache_control) n++;
625
+ }
626
+ }
627
+ return n;
628
+ }
629
+
630
+ // --------------------------------------------------------------------------
631
+ // tool_use.input field-set normalization
632
+ // --------------------------------------------------------------------------
633
+ //
634
+ // CC's serialization of `tool_use.input` can drift between turns when the
635
+ // caller passes fields not declared in the tool's `input_schema.properties`.
636
+ // Observed case: a SendMessage tool call where the caller passed
637
+ // `{to, summary, message, type, recipient, content}`. Pre-miss body
638
+ // serialized input as `{to, summary, message}` (3 schema-only keys).
639
+ // Post-miss body (same tool_use_id, same turn position) serialized the
640
+ // same block as `{to, summary, message, type, recipient, content}` (6 keys
641
+ // — extras preserved). That byte drift at a mid-history assistant message
642
+ // re-caches every block from that message forward → full-session-cost miss.
643
+ //
644
+ // Concrete instance: 2334-byte drift on ONE assistant-side tool_use block
645
+ // caused a 619,722 `cache_creation_input_tokens` miss at 15:16:52 UTC on
646
+ // msg[844] of a long-running session.
647
+ //
648
+ // This helper walks every assistant-role message's tool_use blocks, looks
649
+ // up the tool's declared `input_schema.properties` from `body.tools`, and
650
+ // rewrites `input` to contain ONLY the schema keys (in schema declaration
651
+ // order). Tools with no schema in `body.tools` are left untouched — we
652
+ // can't determine what's legitimate vs extra.
653
+ //
654
+ // Agent behavior is unaffected — extras weren't declared in the schema so
655
+ // downstream consumers shouldn't rely on them. The point of this pass is
656
+ // to pin the serialization to the schema's field set so CC's own drift
657
+ // between turns can't break cache.
658
+ // --------------------------------------------------------------------------
659
+
660
+ /**
661
+ * Mutate `body` in place: for every assistant-role message's tool_use
662
+ * blocks whose tool name matches an entry in `body.tools` with a known
663
+ * `input_schema.properties`, replace `input` with a new object containing
664
+ * ONLY the schema-declared keys, preserved in schema declaration order.
665
+ * Returns the count of tool_use blocks modified (0 if nothing changed or
666
+ * preconditions missing). Pure transform: safe to call repeatedly.
667
+ */
668
+ function normalizeToolUseInputsInBody(body) {
669
+ if (!body || typeof body !== "object") return 0;
670
+ if (!Array.isArray(body.messages) || !Array.isArray(body.tools)) return 0;
671
+
672
+ // Build toolSchemas: { name: orderedKeys[] } from body.tools entries
673
+ // that declare input_schema.properties.
674
+ const toolSchemas = Object.create(null);
675
+ for (const tool of body.tools) {
676
+ if (!tool || typeof tool !== "object") continue;
677
+ const name = tool.name;
678
+ if (typeof name !== "string") continue;
679
+ const props = tool.input_schema && tool.input_schema.properties;
680
+ if (!props || typeof props !== "object") continue;
681
+ toolSchemas[name] = Object.keys(props);
682
+ }
683
+
684
+ let modified = 0;
685
+ for (const msg of body.messages) {
686
+ if (!msg || msg.role !== "assistant") continue;
687
+ if (!Array.isArray(msg.content)) continue;
688
+ for (let i = 0; i < msg.content.length; i++) {
689
+ const block = msg.content[i];
690
+ if (!block || block.type !== "tool_use") continue;
691
+ if (!block.input || typeof block.input !== "object" || Array.isArray(block.input)) continue;
692
+ const schemaKeys = toolSchemas[block.name];
693
+ if (!schemaKeys) continue; // unknown tool — skip
694
+ const currentKeys = Object.keys(block.input);
695
+ // Determine if any non-schema key is present. If all current keys
696
+ // are in schema AND their order already matches a subset of
697
+ // schemaKeys order, we could skip — but we always rebuild to also
698
+ // canonicalize key order, which is what JSON.stringify consumers
699
+ // depend on for byte stability.
700
+ const schemaKeySet = new Set(schemaKeys);
701
+ const hasExtras = currentKeys.some((k) => !schemaKeySet.has(k));
702
+ // Also rebuild when order differs from schema declaration order,
703
+ // because extras stripping alone doesn't guarantee a canonical
704
+ // byte sequence across turns.
705
+ const presentSchemaKeys = schemaKeys.filter((k) =>
706
+ Object.prototype.hasOwnProperty.call(block.input, k)
707
+ );
708
+ const currentInSchema = currentKeys.filter((k) => schemaKeySet.has(k));
709
+ let orderDiffers = presentSchemaKeys.length !== currentInSchema.length;
710
+ if (!orderDiffers) {
711
+ for (let j = 0; j < presentSchemaKeys.length; j++) {
712
+ if (presentSchemaKeys[j] !== currentInSchema[j]) {
713
+ orderDiffers = true;
714
+ break;
715
+ }
716
+ }
717
+ }
718
+ if (!hasExtras && !orderDiffers) continue;
719
+ const newInput = {};
720
+ for (const k of presentSchemaKeys) {
721
+ newInput[k] = block.input[k];
722
+ }
723
+ msg.content[i] = { ...block, input: newInput };
724
+ modified++;
725
+ }
726
+ }
727
+ return modified;
728
+ }
729
+
333
730
  /**
334
731
  * Core fix: on EVERY call, scan the entire message array for the LATEST
335
732
  * relocatable blocks (skills, MCP, deferred tools, hooks) and ensure they
@@ -728,6 +1125,12 @@ const _STATS_SCHEMA = {
728
1125
  cwd_normalize: { applied: 0, skipped: 0, lastApplied: null },
729
1126
  smoosh_normalize: { applied: 0, skipped: 0, lastApplied: null },
730
1127
  smoosh_split: { applied: 0, skipped: 0, lastApplied: null },
1128
+ session_start_normalize: { applied: 0, skipped: 0, lastApplied: null },
1129
+ continue_trailer_strip: { applied: 0, skipped: 0, lastApplied: null },
1130
+ deferred_tools_restore: { applied: 0, skipped: 0, lastApplied: null },
1131
+ reminder_strip: { applied: 0, skipped: 0, lastApplied: null },
1132
+ cache_control_normalize: { applied: 0, skipped: 0, lastApplied: null },
1133
+ tool_use_input_normalize: { applied: 0, skipped: 0, lastApplied: null },
731
1134
  };
732
1135
 
733
1136
  function _createEmptyStats() {
@@ -1349,6 +1752,67 @@ globalThis.fetch = async function (url, options) {
1349
1752
  }
1350
1753
  }
1351
1754
 
1755
+ // Extension: session_start_normalize — SessionStart:resume → :startup rewrite
1756
+ // and ephemeral session-id / Last-active strip. Runs BEFORE smoosh_normalize
1757
+ // so drift at msg[0] content[N] is stabilized before any subsequent pass
1758
+ // reads from the same text. Applies to both standalone text blocks and
1759
+ // tool_result.content strings (in case CC's smooshSystemReminderSiblings
1760
+ // folded the reminder before we see it).
1761
+ // Bug: anthropics/claude-code#43657
1762
+ // Opt-out via CACHE_FIX_SKIP_SESSION_START_NORMALIZE=1 (defaults ON).
1763
+ if (shouldApplyFix("session_start_normalize") && payload.messages) {
1764
+ let ssnApplied = 0;
1765
+ for (const msg of payload.messages) {
1766
+ if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
1767
+ for (let i = 0; i < msg.content.length; i++) {
1768
+ const block = msg.content[i];
1769
+ if (block?.type === "text" && typeof block.text === "string") {
1770
+ const [t, n] = normalizeSessionStartText(block.text);
1771
+ if (n > 0) {
1772
+ msg.content[i] = { ...block, text: t };
1773
+ ssnApplied += n;
1774
+ }
1775
+ } else if (block?.type === "tool_result" && typeof block.content === "string") {
1776
+ const [c, n] = normalizeSessionStartText(block.content);
1777
+ if (n > 0) {
1778
+ msg.content[i] = { ...block, content: c };
1779
+ ssnApplied += n;
1780
+ }
1781
+ }
1782
+ }
1783
+ }
1784
+ if (ssnApplied > 0) {
1785
+ modified = true;
1786
+ debugLog(`APPLIED: session-start-normalize rewrote ${ssnApplied} marker(s)`);
1787
+ recordFixResult("session_start_normalize", "applied");
1788
+ } else {
1789
+ recordFixResult("session_start_normalize", "skipped");
1790
+ }
1791
+ }
1792
+
1793
+ // Extension: tool_use_input_normalize — strip tool_use.input keys not
1794
+ // declared in body.tools[*].input_schema.properties. CC's serialization
1795
+ // of tool_use.input can drift between turns when the caller passed
1796
+ // extra fields; the pre-miss body may serialize only the schema keys
1797
+ // while the post-miss body serializes the full caller-supplied set
1798
+ // (or vice versa). That byte drift at a mid-history assistant message
1799
+ // re-caches every block from that message forward.
1800
+ //
1801
+ // Runs AFTER session_start_normalize so mid-history drift is pinned
1802
+ // before any downstream pass (smoosh_*, fingerprint, ttl) hashes the
1803
+ // same block. Default ON, opt-out via
1804
+ // CACHE_FIX_SKIP_TOOL_USE_INPUT_NORMALIZE=1.
1805
+ if (shouldApplyFix("tool_use_input_normalize")) {
1806
+ const tuinApplied = normalizeToolUseInputsInBody(payload);
1807
+ if (tuinApplied > 0) {
1808
+ modified = true;
1809
+ debugLog(`APPLIED: tool-use-input-normalize rewrote ${tuinApplied} tool_use block(s)`);
1810
+ recordFixResult("tool_use_input_normalize", "applied");
1811
+ } else {
1812
+ recordFixResult("tool_use_input_normalize", "skipped");
1813
+ }
1814
+ }
1815
+
1352
1816
  // Optimization: normalize smooshed dynamic system-reminders in tool_result content
1353
1817
  // CC's smooshSystemReminderSiblings (messages.ts:1835) folds <system-reminder> text
1354
1818
  // blocks into tool_result.content strings. Dynamic values (token_usage, budget_usd,
@@ -1474,6 +1938,159 @@ globalThis.fetch = async function (url, options) {
1474
1938
  }
1475
1939
  }
1476
1940
 
1941
+ // Extension: continue_trailer_strip — remove the "Continue from where
1942
+ // you left off." text block CC appends to the last user message on
1943
+ // --continue. Pre-exit bodies didn't carry it, so its presence in the
1944
+ // resumed body creates tail-of-last-msg drift that breaks cache.
1945
+ // Exact-match string equality on `.text` — user sentences mentioning
1946
+ // the phrase inside longer content are not touched.
1947
+ // Bug: anthropics/claude-code#12 (resume UX), observed empirically.
1948
+ // Opt-out via CACHE_FIX_SKIP_CONTINUE_TRAILER_STRIP=1 (defaults ON).
1949
+ if (shouldApplyFix("continue_trailer_strip") && payload.messages) {
1950
+ let trailerStripped = 0;
1951
+ for (const msg of payload.messages) {
1952
+ if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
1953
+ const kept = msg.content.filter((block) => {
1954
+ if (isContinueTrailerBlock(block)) {
1955
+ trailerStripped++;
1956
+ return false;
1957
+ }
1958
+ return true;
1959
+ });
1960
+ if (kept.length !== msg.content.length) msg.content = kept;
1961
+ }
1962
+ if (trailerStripped > 0) {
1963
+ modified = true;
1964
+ debugLog(`APPLIED: continue-trailer-strip removed ${trailerStripped} trailer block(s)`);
1965
+ recordFixResult("continue_trailer_strip", "applied");
1966
+ } else {
1967
+ recordFixResult("continue_trailer_strip", "skipped");
1968
+ }
1969
+ }
1970
+
1971
+ // Extension: deferred_tools_restore — persist-and-restore the
1972
+ // deferred-tools attachment block across sessions so MCP reconnect
1973
+ // race at resume-time doesn't shrink msg[0] and bust the whole cache.
1974
+ // Snapshot key defaults to process.cwd() (one snapshot per project).
1975
+ // Opt-out via CACHE_FIX_SKIP_DEFERRED_TOOLS_RESTORE=1 (defaults ON).
1976
+ if (shouldApplyFix("deferred_tools_restore") && payload.messages) {
1977
+ let dtrRestored = 0;
1978
+ const found = findDeferredToolsBlockInBody(payload);
1979
+ if (found) {
1980
+ const hasUnavail = found.text.includes(DEFERRED_TOOLS_UNAVAILABLE_MARKER);
1981
+ const snapshotPath = deferredToolsSnapshotPath(process.cwd());
1982
+ if (!hasUnavail) {
1983
+ // Clean baseline — persist it for future resumes. Silent on
1984
+ // any I/O error; snapshot is best-effort.
1985
+ try {
1986
+ mkdirSync(DEFERRED_TOOLS_SNAPSHOT_DIR, { recursive: true });
1987
+ writeFileSync(snapshotPath, found.text, "utf-8");
1988
+ } catch {}
1989
+ } else {
1990
+ // Shrunk block with explicit "no longer available" signal →
1991
+ // attempt restore. Only substitute if the persisted version is
1992
+ // strictly longer (never downgrade to a stale shorter snapshot).
1993
+ let snapshot = null;
1994
+ try { snapshot = readFileSync(snapshotPath, "utf-8"); } catch {}
1995
+ if (snapshot && snapshot.length > found.text.length) {
1996
+ const targetMsg = payload.messages[found.msgIdx];
1997
+ const newContent = targetMsg.content.slice();
1998
+ newContent[found.blockIdx] = { ...newContent[found.blockIdx], text: snapshot };
1999
+ payload.messages[found.msgIdx] = { ...targetMsg, content: newContent };
2000
+ dtrRestored = 1;
2001
+ }
2002
+ }
2003
+ }
2004
+ if (dtrRestored > 0) {
2005
+ modified = true;
2006
+ debugLog(`APPLIED: deferred-tools-restore substituted full block at msg[${found.msgIdx}].content[${found.blockIdx}]`);
2007
+ recordFixResult("deferred_tools_restore", "applied");
2008
+ } else {
2009
+ recordFixResult("deferred_tools_restore", "skipped");
2010
+ }
2011
+ }
2012
+
2013
+ // Extension: reminder_strip — remove bookkeeping system-reminder blocks
2014
+ // (Token usage / USD budget / Output tokens / TodoWrite nudge / turn
2015
+ // counters) entirely from user messages. Runs AFTER smoosh_split so
2016
+ // blocks peeled out of tool_result.content are visible as standalone
2017
+ // text and can be matched by isBookkeepingReminder.
2018
+ // Zero model visibility, zero drift.
2019
+ // Opt-out via CACHE_FIX_SKIP_REMINDER_STRIP=1 (defaults ON).
2020
+ if (shouldApplyFix("reminder_strip") && payload.messages) {
2021
+ let reminderStripped = 0;
2022
+ for (const msg of payload.messages) {
2023
+ if (msg?.role !== "user" || !Array.isArray(msg.content)) continue;
2024
+ const kept = msg.content.filter((block) => {
2025
+ if (block?.type !== "text") return true;
2026
+ if (isBookkeepingReminder(block.text)) {
2027
+ reminderStripped++;
2028
+ return false;
2029
+ }
2030
+ return true;
2031
+ });
2032
+ if (kept.length !== msg.content.length) msg.content = kept;
2033
+ }
2034
+ if (reminderStripped > 0) {
2035
+ modified = true;
2036
+ debugLog(`APPLIED: reminder-strip removed ${reminderStripped} bookkeeping reminder block(s)`);
2037
+ recordFixResult("reminder_strip", "applied");
2038
+ } else {
2039
+ recordFixResult("reminder_strip", "skipped");
2040
+ }
2041
+ }
2042
+
2043
+ // Extension: cache_control_normalize — pin the cache_control marker at
2044
+ // a canonical position (last block of last user message) on every
2045
+ // outbound body. Prevents marker-shuffle drift between turns from
2046
+ // invalidating the previous-last-block's cached bytes. Runs LAST
2047
+ // (after smoosh_split and any other content-mutating pass) so the
2048
+ // canonical position is calculated against the final content array.
2049
+ // Fast path: if canonical position already holds the correct marker
2050
+ // and it's the only user-side marker, body passes through untouched.
2051
+ // Opt-out via CACHE_FIX_SKIP_CACHE_CONTROL_NORMALIZE=1 (defaults ON).
2052
+ if (shouldApplyFix("cache_control_normalize") && payload.messages && payload.messages.length > 0) {
2053
+ // Locate canonical position: last block of last user message with an
2054
+ // array content. If no valid target, skip.
2055
+ let targetMsgIdx = -1;
2056
+ let targetBlockIdx = -1;
2057
+ for (let i = payload.messages.length - 1; i >= 0; i--) {
2058
+ const m = payload.messages[i];
2059
+ if (m?.role !== "user") continue;
2060
+ if (!Array.isArray(m.content) || m.content.length === 0) break;
2061
+ targetMsgIdx = i;
2062
+ targetBlockIdx = m.content.length - 1;
2063
+ break;
2064
+ }
2065
+
2066
+ let ccMutated = false;
2067
+ if (targetMsgIdx !== -1) {
2068
+ const targetBlock = payload.messages[targetMsgIdx].content[targetBlockIdx];
2069
+ const existingCC = targetBlock?.cache_control;
2070
+ const canonicalAlreadyCorrect =
2071
+ existingCC &&
2072
+ existingCC.type === CACHE_CONTROL_CANONICAL_MARKER.type &&
2073
+ existingCC.ttl === CACHE_CONTROL_CANONICAL_MARKER.ttl;
2074
+
2075
+ if (!(canonicalAlreadyCorrect && countUserCacheControlMarkers(payload) === 1)) {
2076
+ // Strip all markers from user messages, then place canonical.
2077
+ for (const msg of payload.messages) stripCacheControlMarkers(msg);
2078
+ const tm = payload.messages[targetMsgIdx];
2079
+ const newContent = tm.content.slice();
2080
+ newContent[targetBlockIdx] = { ...newContent[targetBlockIdx], cache_control: { ...CACHE_CONTROL_CANONICAL_MARKER } };
2081
+ payload.messages[targetMsgIdx] = { ...tm, content: newContent };
2082
+ ccMutated = true;
2083
+ }
2084
+ }
2085
+ if (ccMutated) {
2086
+ modified = true;
2087
+ debugLog(`APPLIED: cache_control_normalize pinned marker at msg[${targetMsgIdx}].content[${targetBlockIdx}]`);
2088
+ recordFixResult("cache_control_normalize", "applied");
2089
+ } else {
2090
+ recordFixResult("cache_control_normalize", "skipped");
2091
+ }
2092
+ }
2093
+
1477
2094
  // Bug 5: TTL enforcement (configurable per request type)
1478
2095
  // The client gates 1h cache TTL behind a GrowthBook allowlist that checks
1479
2096
  // querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".
@@ -1899,5 +2516,17 @@ export {
1899
2516
  isClearArtifact,
1900
2517
  rewriteOutputEfficiencyInstruction,
1901
2518
  normalizeOutputEfficiencyReplacement,
2519
+ normalizeSessionStartText,
2520
+ isContinueTrailerBlock,
2521
+ CONTINUE_TRAILER_TEXT,
2522
+ findDeferredToolsBlockInBody,
2523
+ deferredToolsSnapshotPath,
2524
+ DEFERRED_TOOLS_AVAILABLE_MARKER,
2525
+ DEFERRED_TOOLS_UNAVAILABLE_MARKER,
2526
+ isBookkeepingReminder,
2527
+ stripCacheControlMarkers,
2528
+ countUserCacheControlMarkers,
2529
+ CACHE_CONTROL_CANONICAL_MARKER,
2530
+ normalizeToolUseInputsInBody,
1902
2531
  _pinnedBlocks, // exported so tests can reset between runs
1903
2532
  };