npm - claude-code-cache-fix - Versions diffs - 2.0.3 → 2.0.5 - Mend

claude-code-cache-fix 2.0.3 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-code-cache-fix",
-  "version": "2.0.3",
+  "version": "2.0.5",
   "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
   "type": "module",
   "exports": "./preload.mjs",

package/preload.mjs CHANGED Viewed

@@ -590,7 +590,17 @@ function isBookkeepingReminder(text) {
 // correctly.
 // --------------------------------------------------------------------------
-const CACHE_CONTROL_CANONICAL_MARKER = { type: "ephemeral", ttl: "1h" };
+// Detected per-request from existing markers. Default 1h; downgraded to 5m
+// if any existing block already carries ttl="5m" (Q5h=100% tier).
+// The API rejects 1h markers after 5m markers, so all injected markers
+// must match the lowest existing tier.
+let _detectedTtlTier = "1h";
+function getCanonicalMarker() {
+  return { type: "ephemeral", ttl: _detectedTtlTier };
+}
+const CACHE_CONTROL_CANONICAL_MARKER_LEGACY = { type: "ephemeral", ttl: "1h" };
 /**
  * Strip every cache_control marker from a single user message's content
@@ -777,7 +787,9 @@ const CACHE_CONTROL_STICKY_DIR = join(homedir(), ".claude", "cache-fix-state");
 // CC uses 1 on system[2] + cache_control_normalize places 1 on last user msg = 2 reserved.
 // Sticky can use at most 2 historical positions to stay within the 4-marker cap.
 const CACHE_CONTROL_STICKY_MAX_POSITIONS = 2;
-const CACHE_CONTROL_STICKY_DEFAULT_MARKER = { type: "ephemeral", ttl: "1h" };
+function getCacheControlStickyDefaultMarker() {
+  return { type: "ephemeral", ttl: _detectedTtlTier };
+}
 /**
  * Build the absolute state-file path for a given project key. Exported so
@@ -850,7 +862,7 @@ function readCacheControlStickyState(key) {
         marker:
           p.marker && typeof p.marker === "object" && typeof p.marker.type === "string"
             ? { ...p.marker }
-            : { ...CACHE_CONTROL_STICKY_DEFAULT_MARKER },
+            : { ...getCacheControlStickyDefaultMarker() },
       });
     }
     return { version: 1, positions };
@@ -1800,6 +1812,24 @@ globalThis.fetch = async function (url, options) {
         debugLog("CACHE_FIX_DISABLED=1 — all bug fixes bypassed, monitoring active");
       }
+      // Detect existing TTL tier from the payload. If any block already has
+      // ttl="5m" (Q5h=100% tier), all injected markers must use 5m too —
+      // the API rejects 1h after 5m in processing order (tools → system → messages).
+      _detectedTtlTier = "1h";
+      const allBlocks = [
+        ...(Array.isArray(payload.system) ? payload.system : []),
+        ...(Array.isArray(payload.messages) ? payload.messages.flatMap(m => Array.isArray(m.content) ? m.content : []) : []),
+      ];
+      for (const block of allBlocks) {
+        if (block?.cache_control?.ttl === "5m") {
+          _detectedTtlTier = "5m";
+          break;
+        }
+      }
+      if (_detectedTtlTier === "5m") {
+        debugLog("TTL TIER DETECT: existing 5m markers found — all injected markers will use 5m");
+      }
       debugLog("--- API call to", urlStr);
       debugLog("message count:", payload.messages?.length);
@@ -2350,15 +2380,15 @@ globalThis.fetch = async function (url, options) {
           const existingCC = targetBlock?.cache_control;
           const canonicalAlreadyCorrect =
             existingCC &&
-            existingCC.type === CACHE_CONTROL_CANONICAL_MARKER.type &&
-            existingCC.ttl === CACHE_CONTROL_CANONICAL_MARKER.ttl;
+            existingCC.type === getCanonicalMarker().type &&
+            existingCC.ttl === getCanonicalMarker().ttl;
           if (!(canonicalAlreadyCorrect && countUserCacheControlMarkers(payload) === 1)) {
             // Strip all markers from user messages, then place canonical.
             for (const msg of payload.messages) stripCacheControlMarkers(msg);
             const tm = payload.messages[targetMsgIdx];
             const newContent = tm.content.slice();
-            newContent[targetBlockIdx] = { ...newContent[targetBlockIdx], cache_control: { ...CACHE_CONTROL_CANONICAL_MARKER } };
+            newContent[targetBlockIdx] = { ...newContent[targetBlockIdx], cache_control: { ...getCanonicalMarker() } };
             payload.messages[targetMsgIdx] = { ...tm, content: newContent };
             ccMutated = true;
           }
@@ -2423,7 +2453,8 @@ globalThis.fetch = async function (url, options) {
           debugLog(`SKIPPED: TTL injection (${requestType} set to 'none' — pass-through)`);
           recordFixResult("ttl", "skipped");
         } else {
-          const ttlParam = ttlValue === "5m" ? "5m" : "1h";
+          // Respect detected tier: if existing blocks have 5m, never inject 1h
+          const ttlParam = ttlValue === "5m" || _detectedTtlTier === "5m" ? "5m" : "1h";
           let ttlInjected = 0;
           payload.system = payload.system.map((block) => {
             if (block.cache_control?.type === "ephemeral" && !block.cache_control.ttl) {
@@ -2835,7 +2866,8 @@ export {
   isBookkeepingReminder,
   stripCacheControlMarkers,
   countUserCacheControlMarkers,
-  CACHE_CONTROL_CANONICAL_MARKER,
+  CACHE_CONTROL_CANONICAL_MARKER_LEGACY,
+  getCanonicalMarker,
   normalizeToolUseInputsInBody,
   computeStickyMessageHash,
   cacheControlStickyStatePath,
@@ -2844,6 +2876,6 @@ export {
   readCacheControlStickyState,
   writeCacheControlStickyState,
   CACHE_CONTROL_STICKY_MAX_POSITIONS,
-  CACHE_CONTROL_STICKY_DEFAULT_MARKER,
+  getCacheControlStickyDefaultMarker,
   _pinnedBlocks,  // exported so tests can reset between runs
 };

package/tools/MANUAL-COMPACT.md ADDED Viewed

@@ -0,0 +1,138 @@
+# manual-compact.sh — Manual Compaction for 1M Context Hack Sessions
+## Purpose
+When using the 1M context window hack (`DISABLE_COMPACT=1` + `CLAUDE_CODE_MAX_CONTEXT_TOKENS=1000000`), the `/compact` command is disabled by CC. This tool provides a manual compaction alternative: extract the conversation, summarize it via Claude, and restore context after `/clear`.
+**This tool is specifically for sessions running the 1M hack.** If you have `/compact` available, use that instead — it's built-in, integrated, and handles the full compaction lifecycle automatically.
+## How It Works
+1. Extracts conversation turns from the session JSONL transcript
+2. Splits turns into three weighted segments:
+   - **Foundational** (first 20%) — truncated to 200 chars each
+   - **Working** (middle 40%) — truncated to 400 chars each
+   - **Active** (last 40%) — preserved up to 2000 chars each
+3. Sends the weighted extract to Claude Sonnet for summarization
+4. Produces a structured summary optimized for agent handoff
+The weighting ensures recent active work (the part you're most likely to need) gets full detail, while earlier completed work is compressed.
+## Usage
+```bash
+# By project directory (recommended) — auto-finds the most recent session
+manual-compact.sh ~/git_repos/myproject
+# By project directory with user context
+manual-compact.sh ~/git_repos/myproject /tmp/context.txt
+# By direct JSONL path (if you know the exact session)
+manual-compact.sh ~/.claude/projects/-home-user-git-repos-myproject/abc123.jsonl
+# By direct JSONL path with user context
+manual-compact.sh ~/.claude/projects/-home-user-git-repos-myproject/abc123.jsonl /tmp/context.txt
+```
+When you pass a project directory, the tool:
+1. Converts it to CC's internal project path format
+2. Finds the most recently modified session JSONL
+3. Shows you the session details (modified date, size)
+4. **Asks for confirmation** before proceeding
+### WARNING: Wrong Session = Wrong Context
+**If you select the wrong session JSONL, the summary will be from a completely different conversation.** Loading that summary after `/clear` will inject false context — the agent will confidently act on information from another session, another project, or another agent's work.
+Always:
+- Verify the session timestamp matches your active session
+- Review the summary output before feeding it to an agent
+- When in doubt, check the last few lines of the JSONL to confirm it's the right conversation
+### Example: Basic Compaction
+```bash
+./tools/manual-compact.sh ~/git_repos/kanfei-nowcast-e3b
+```
+```
+Project directory: /home/manager/git_repos/kanfei_nowcast_e3b
+Auto-detected session: db11f377-4ca8-4fc3-9b6d-1069da58c1b2.jsonl
+  Modified: 2026-04-19 13:26:42
+  Size: 4.8M
+Is this the correct session? [Y/n]
+```
+Output: `/tmp/db11f377-...-compact-summary.txt`
+### Example: With User Context
+If there's specific context you know the summary might miss:
+```bash
+echo "The MR2 OOM debugging took 3 days. The PR #75 architectural recommendation
+was max(dualpol_lr, hail_lr) for correlation grouping." > /tmp/context.txt
+./tools/manual-compact.sh ~/git_repos/kanfei-nowcast-e3b /tmp/context.txt
+```
+The user context is injected into the summarization prompt, ensuring those details appear in the output.
+### Restoring Context After /clear
+In the CC session:
+```
+/clear
+```
+Then as your first message:
+```
+Read /tmp/<session-id>-compact-summary.txt for context on where we left off.
+```
+## Limitations
+### This tool is a workaround, not a replacement for /compact
+- `/compact` operates inside CC with full access to the internal message array, system prompt, tool schemas, and session state. This tool only sees the JSONL transcript, which is a subset.
+- `/compact` preserves CC's internal state (tool registration, MCP connections, plugin state). This tool + `/clear` resets all of that. The agent must re-establish any stateful connections.
+- `/compact` is atomic — one command, seamless continuation. This tool requires `/clear` + paste, which is a hard context boundary.
+### Summary fidelity
+Tested at ~95% fidelity for active work resumption, ~70% for broader project context. Gaps typically include:
+- **Operational debugging history** — multi-day debugging sagas compress away
+- **Timeline information** — the summary doesn't indicate when things happened or how long they took
+- **Depth of architectural discussions** — detailed technical recommendations get compressed to one-liners
+- **Background process context** — overnight watchers, cron monitoring, polling patterns
+Use the user context file to fill known gaps.
+### Token cost
+The summarization call costs tokens against your Q5h quota. At ~50K extract tokens through Sonnet, expect ~1-2% Q5h per compaction. This is comparable to what `/compact` costs.
+### Requires Claude Sonnet access
+The tool uses `claude --print --model claude-sonnet-4-6` for summarization. Sonnet is used instead of Opus to minimize Q5h impact. If Sonnet is unavailable, change the model in the script.
+## Why the 1M Hack Disables /compact
+The 1M context hack works by setting `DISABLE_COMPACT=1`, which CC reads as "disable all compaction." CC's code uses a single env var to control both:
+- The context window calculation (`ff()` returns 1M when `DISABLE_COMPACT=1`)
+- The `/compact` command availability (`isEnabled: () => !DISABLE_COMPACT`)
+These are coupled in CC's source — there is no way to get 1M context AND `/compact` simultaneously without CC code changes. The coupling is in the CC binary, not in our interceptor.
+We attempted to toggle `DISABLE_COMPACT` via the interceptor (set during API calls, unset between turns), but CC registers available commands at startup before any API call, so the toggle cannot re-enable `/compact` after session start.
+## Requirements
+- Claude Code v2.1.112 (the last Node.js version — v2.1.113+ uses Bun)
+- The cache-fix interceptor loaded via `NODE_OPTIONS=--import`
+- `DISABLE_COMPACT=1` and `CLAUDE_CODE_MAX_CONTEXT_TOKENS=1000000` set
+- `claude` CLI available in PATH (used for summarization)

package/tools/manual-compact.sh ADDED Viewed

@@ -0,0 +1,212 @@
+#!/bin/bash
+# manual-compact.sh — Generate a compaction summary for a CC session
+#
+# Usage:
+#   manual-compact.sh <project-dir-or-session-jsonl> [user-context-file]
+#
+# Accepts either:
+#   - A project working directory (e.g. ~/git_repos/myproject)
+#     → auto-finds the most recent session JSONL
+#   - A direct path to a session JSONL file
+#
+# Produces a summary at /tmp/<session-id>-compact-summary.txt
+# that can be pasted or referenced after /clear.
+#
+# The optional user-context-file is additional context the user wants
+# preserved in the summary (equivalent to /compact <instructions>).
+#
+# WARNING: Using the wrong session JSONL will produce a summary from
+# a DIFFERENT conversation. Loading that into your session after /clear
+# will inject completely wrong context. Always verify the output before
+# feeding it to an agent.
+set -euo pipefail
+if [ $# -lt 1 ]; then
+  echo "Usage: $0 <project-dir-or-session-jsonl> [user-context-file]"
+  echo ""
+  echo "Generates a compaction summary from a CC session JSONL transcript."
+  echo "After /clear, reference the output file to restore context."
+  echo ""
+  echo "Arguments:"
+  echo "  <project-dir>   Working directory of the CC session (e.g. ~/git_repos/myproject)"
+  echo "                  Auto-detects the most recent session JSONL."
+  echo "  <session-jsonl>  Direct path to a session JSONL file."
+  echo "  [user-context]   Optional file with additional context to preserve."
+  echo ""
+  echo "WARNING: Verify the output summary before loading it after /clear."
+  echo "         A wrong session JSONL = wrong context = confused agent."
+  exit 1
+fi
+INPUT="$1"
+USER_CONTEXT_FILE="${2:-}"
+# Determine if input is a directory or a JSONL file
+if [ -d "$INPUT" ]; then
+  # Convert project directory to CC's project path format
+  REAL_PATH=$(realpath "$INPUT")
+  # CC replaces / with - and prepends -
+  PROJECT_KEY=$(echo "$REAL_PATH" | sed 's|/|-|g')
+  PROJECT_DIR="$HOME/.claude/projects/${PROJECT_KEY}"
+  if [ ! -d "$PROJECT_DIR" ]; then
+    echo "ERROR: No CC project found for directory: $INPUT"
+    echo "       Expected: $PROJECT_DIR"
+    echo ""
+    echo "Available projects:"
+    ls -d ~/.claude/projects/*/ 2>/dev/null | head -10
+    exit 1
+  fi
+  # Find the most recent JSONL (exclude subdirectories like subagents/)
+  JSONL=$(find "$PROJECT_DIR" -maxdepth 1 -name "*.jsonl" -type f -printf '%T@ %p\n' 2>/dev/null | sort -rn | head -1 | cut -d' ' -f2-)
+  if [ -z "$JSONL" ]; then
+    echo "ERROR: No session JSONL found in: $PROJECT_DIR"
+    exit 1
+  fi
+  echo "Project directory: $INPUT"
+  echo "Auto-detected session: $(basename "$JSONL")"
+  echo "  Modified: $(stat -c '%y' "$JSONL" | cut -d'.' -f1)"
+  echo "  Size: $(du -h "$JSONL" | cut -f1)"
+  echo ""
+  read -p "Is this the correct session? [Y/n] " CONFIRM
+  if [[ "${CONFIRM:-Y}" =~ ^[Nn] ]]; then
+    echo ""
+    echo "Available sessions in $PROJECT_DIR:"
+    ls -lt "$PROJECT_DIR"/*.jsonl 2>/dev/null | awk '{print "  " $6, $7, $8, $NF}'
+    echo ""
+    echo "Re-run with the specific JSONL path."
+    exit 1
+  fi
+elif [ -f "$INPUT" ]; then
+  JSONL="$INPUT"
+else
+  echo "ERROR: $INPUT is not a directory or file."
+  exit 1
+fi
+SESSION_ID=$(basename "$JSONL" .jsonl)
+OUTPUT="/tmp/${SESSION_ID}-compact-summary.txt"
+EXTRACT="/tmp/${SESSION_ID}-conv-extract.txt"
+echo ""
+echo "Extracting conversation from: $JSONL"
+# Extract conversation turns, keeping more detail for recent turns
+python3 << PYEOF
+import json, sys
+conversation = []
+with open("$JSONL") as f:
+    for line in f:
+        try:
+            d = json.loads(line.strip())
+            if d.get('type') == 'user':
+                msg = d.get('message', {})
+                content = msg.get('content', '')
+                if isinstance(content, str) and len(content.strip()) > 0:
+                    if content.startswith('<local-command') or content.startswith('<command-name>'):
+                        continue
+                    conversation.append(('user', content))
+                elif isinstance(content, list):
+                    texts = []
+                    for b in content:
+                        if isinstance(b, dict):
+                            if b.get('type') == 'text' and b.get('text'):
+                                t = b['text']
+                                if not t.startswith('<local-command') and not t.startswith('<command-name>'):
+                                    texts.append(t)
+                            elif b.get('type') == 'tool_result' and b.get('content'):
+                                c = b['content']
+                                if isinstance(c, str):
+                                    texts.append(c)
+                                elif isinstance(c, list):
+                                    for tb in c:
+                                        if isinstance(tb, dict) and tb.get('text'):
+                                            texts.append(tb['text'])
+                    if texts:
+                        conversation.append(('user', ' '.join(texts)))
+            elif d.get('type') == 'assistant':
+                msg = d.get('message', {})
+                content = msg.get('content', [])
+                if isinstance(content, list):
+                    texts = [b.get('text', '') for b in content if isinstance(b, dict) and b.get('type') == 'text' and b.get('text')]
+                    if texts:
+                        conversation.append(('assistant', ' '.join(texts)))
+        except:
+            pass
+total = len(conversation)
+if total == 0:
+    print("No conversation found.", file=sys.stderr)
+    sys.exit(1)
+# Split into three segments with different detail levels:
+# - First 20%: truncate to 200 chars each (foundational context)
+# - Middle 40%: truncate to 400 chars each (working context)
+# - Last 40%: full text up to 2000 chars each (active work — most important)
+seg1_end = int(total * 0.2)
+seg2_end = int(total * 0.6)
+with open("$EXTRACT", 'w') as f:
+    f.write("=== FOUNDATIONAL CONTEXT (early session) ===\n\n")
+    for role, text in conversation[:seg1_end]:
+        f.write(f"[{role}]: {text[:200]}\n\n")
+    f.write("\n=== WORKING CONTEXT (mid session) ===\n\n")
+    for role, text in conversation[seg1_end:seg2_end]:
+        f.write(f"[{role}]: {text[:400]}\n\n")
+    f.write("\n=== ACTIVE WORK (recent — preserve in full detail) ===\n\n")
+    for role, text in conversation[seg2_end:]:
+        f.write(f"[{role}]: {text[:2000]}\n\n")
+import os
+size = os.path.getsize("$EXTRACT")
+print(f"Extracted {total} turns ({size:,} bytes, ~{size//4:,} est. tokens)")
+print(f"  Foundational: {seg1_end} turns (truncated to 200 chars)")
+print(f"  Working: {seg2_end - seg1_end} turns (truncated to 400 chars)")
+print(f"  Active: {total - seg2_end} turns (up to 2000 chars)")
+PYEOF
+# Build the summarization prompt
+USER_CONTEXT=""
+if [ -n "$USER_CONTEXT_FILE" ] && [ -f "$USER_CONTEXT_FILE" ]; then
+  USER_CONTEXT=$(cat "$USER_CONTEXT_FILE")
+  echo "User context loaded from: $USER_CONTEXT_FILE"
+fi
+PROMPT="Summarize this conversation for context continuity after a /clear.
+CRITICAL PRIORITIES (in order):
+1. ACTIVE WORK STATE — What is the agent doing RIGHT NOW? What branch, what uncommitted changes, what task is in progress, what was the last action taken? This is the most important section. Be precise about exactly where things stand — do not understate progress.
+2. RECENT DECISIONS — Key decisions made in the last ~20% of the conversation and their rationale.
+3. PENDING NEXT STEPS — What was about to happen next? What was queued?
+4. COMPLETED WORK — PRs merged, issues closed, features shipped. Brief — the git history has the details.
+5. FOUNDATIONAL CONTEXT — Agent identity, repo location, key collaborators, infrastructure. Brief.
+FORMAT: Use headers and bullet points. Be specific about file paths, branch names, commit SHAs, function names. The agent reading this will have zero prior context — every detail that matters must be explicit.
+DO NOT understate progress on in-flight work. If the last 20% of the conversation shows implementation was done, say it was done — do not say 'investigation started'."
+if [ -n "$USER_CONTEXT" ]; then
+  PROMPT="$PROMPT
+ADDITIONAL USER CONTEXT TO PRESERVE:
+$USER_CONTEXT"
+fi
+echo ""
+echo "Sending to Claude for summarization..."
+cat "$EXTRACT" | claude --print --model claude-sonnet-4-6 "$PROMPT" > "$OUTPUT" 2>/dev/null
+SIZE=$(wc -c < "$OUTPUT")
+echo ""
+echo "Summary generated: $OUTPUT ($SIZE bytes)"
+echo ""
+echo "To restore context after /clear:"
+echo "  Read $OUTPUT for context on where we left off."