npm - claude-code-cache-fix - Versions diffs - 3.7.1 → 3.9.0 - Mend

claude-code-cache-fix 3.7.1 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +55 -1
package/README.zh.md +691 -159
package/hooks/README.md +36 -0
package/hooks/examples/worktree-edit-guard.py +93 -0
package/package.json +2 -1
package/proxy/extensions/auto-1m-guard.mjs +117 -0
package/proxy/extensions/cache-telemetry.mjs +19 -0
package/proxy/extensions/session-health.mjs +152 -0
package/proxy/extensions/thinking-block-sanitize.mjs +130 -0
package/proxy/extensions/ttl-management.mjs +10 -0
package/proxy/extensions.json +80 -18
package/tools/MANUAL-COMPACT.md +15 -8
package/tools/manual-compact.sh +17 -11
package/tools/quota-statusline.sh +4 -2

package/proxy/extensions.json CHANGED Viewed

@@ -1,20 +1,82 @@
 {
-  "bootstrap-defense": { "enabled": true, "order": 45 },
-  "ttl-tier-detect": { "enabled": true, "order": 75 },
-  "fingerprint-strip": { "enabled": true, "order": 100 },
-  "image-strip": { "enabled": true, "order": 150 },
-  "sort-stabilization": { "enabled": true, "order": 200 },
-  "fresh-session-sort": { "enabled": true, "order": 250 },
-  "identity-normalization": { "enabled": true, "order": 300 },
-  "smoosh-split": { "enabled": true, "order": 320 },
-  "content-strip": { "enabled": true, "order": 330 },
-  "tool-input-normalize": { "enabled": true, "order": 340 },
-  "microcompact-stability": { "enabled": true, "order": 350 },
-  "thinking-display": { "enabled": true, "order": 360 },
-  "cache-control-normalize": { "enabled": true, "order": 400 },
-  "messages-cache-breakpoint": { "enabled": true, "order": 410 },
-  "ttl-management": { "enabled": true, "order": 500 },
-  "cache-telemetry": { "enabled": true, "order": 600 },
-  "overage-warning": { "enabled": true, "order": 610 },
-  "request-log": { "enabled": false, "order": 700 }
+  "bootstrap-defense": {
+    "enabled": true,
+    "order": 45
+  },
+  "ttl-tier-detect": {
+    "enabled": true,
+    "order": 75
+  },
+  "fingerprint-strip": {
+    "enabled": true,
+    "order": 100
+  },
+  "image-strip": {
+    "enabled": true,
+    "order": 150
+  },
+  "sort-stabilization": {
+    "enabled": true,
+    "order": 200
+  },
+  "fresh-session-sort": {
+    "enabled": true,
+    "order": 250
+  },
+  "identity-normalization": {
+    "enabled": true,
+    "order": 300
+  },
+  "smoosh-split": {
+    "enabled": true,
+    "order": 320
+  },
+  "content-strip": {
+    "enabled": true,
+    "order": 330
+  },
+  "tool-input-normalize": {
+    "enabled": true,
+    "order": 340
+  },
+  "microcompact-stability": {
+    "enabled": true,
+    "order": 350
+  },
+  "thinking-display": {
+    "enabled": true,
+    "order": 360
+  },
+  "cache-control-normalize": {
+    "enabled": true,
+    "order": 400
+  },
+  "messages-cache-breakpoint": {
+    "enabled": true,
+    "order": 410
+  },
+  "ttl-management": {
+    "enabled": true,
+    "order": 500
+  },
+  "cache-telemetry": {
+    "enabled": true,
+    "order": 600
+  },
+  "overage-warning": {
+    "enabled": true,
+    "order": 610
+  },
+  "request-log": {
+    "enabled": false,
+    "order": 700
+  },
+  "usage-log": {
+    "enabled": true,
+    "order": 650
+  },
+  "rate-limit-log": {
+    "enabled": true,
+    "order": 660
+  }
 }

package/tools/MANUAL-COMPACT.md CHANGED Viewed

@@ -10,10 +10,10 @@ When using the 1M context window hack (`DISABLE_COMPACT=1` + `CLAUDE_CODE_MAX_CO
 1. Extracts conversation turns from the session JSONL transcript
 2. Splits turns into three weighted segments:
-   - **Foundational** (first 20%) — truncated to 200 chars each
-   - **Working** (middle 40%) — truncated to 400 chars each
-   - **Active** (last 40%) — preserved up to 2000 chars each
-3. Sends the weighted extract to Claude Sonnet for summarization
+   - **Foundational** (first 20%) — truncated to 300 chars each
+   - **Working** (middle 40%) — truncated to 1500 chars each
+   - **Active** (last 40%) — preserved up to 8000 chars each
+3. Sends the weighted extract to Claude Opus for summarization
 4. Produces a structured summary optimized for agent handoff
 The weighting ensures recent active work (the part you're most likely to need) gets full detail, while earlier completed work is compressed.
@@ -142,7 +142,7 @@ Use the user context file to fill known gaps.
 Two costs to account for:
-1. **Summarization call** — the `claude --print` call through Sonnet. At ~50K extract tokens, expect ~1-2% Q5h.
+1. **Summarization call** — the `claude --print` call through Opus. With the relaxed recent-turn caps the extract is larger (and Opus costs more per token than Sonnet), so expect a few % Q5h rather than ~1-2%. The tradeoff buys markedly higher-fidelity summaries; override with `MANUAL_COMPACT_MODEL=claude-sonnet-4-6` if you need to minimize cost.
 2. **Cold start after /clear** — the first API call rebuilds the full cache from scratch. Real-world example from a 954K-token session:
 ```
@@ -153,11 +153,18 @@ Second call:    cache_read=957,253  cache_creation=5,569   (warm again)
 The cold rebuild consumed ~15% Q5h in one call on our Max 5x account. After that single rebuild, the session is warm again and cache hits resume at 99%+.
-**Total cost of a manual compact cycle:** ~17% Q5h (2% summarization + 15% cold rebuild). Compare to hitting the 1M wall and losing the session entirely.
+**Total cost of a manual compact cycle:** roughly ~15% cold rebuild plus a few % for the Opus summarization. Compare to hitting the 1M wall and losing the session entirely.
-### Requires Claude Sonnet access
+### Summarizer model
-The tool uses `claude --print --model claude-sonnet-4-6` for summarization. Sonnet is used instead of Opus to minimize Q5h impact. If Sonnet is unavailable, change the model in the script.
+The tool defaults to `claude --print --model claude-opus-4-7` for the highest-fidelity summary. Override with the `MANUAL_COMPACT_MODEL` env var — e.g. `MANUAL_COMPACT_MODEL=claude-sonnet-4-6` to minimize Q5h impact, or to point at a different model if Opus is rate-limited or retired.
+### Troubleshooting: empty summary output
+If `$OUTPUT` comes back empty, the most likely cause is that the extract exceeded the summarizer's context window — this tool runs near the 1M wall, and the relaxed recent-turn caps (active turns up to 8000 chars) make the extract large on exactly those big sessions. The summarizer call swallows stderr, so an oversized-input rejection surfaces as an empty file rather than a visible error. Fixes, in order of preference:
+- Use a 1M-window model for the summarization: `MANUAL_COMPACT_MODEL='claude-opus-4-7[1m]' manual-compact.sh ...`
+- Or lower the per-turn caps in the script's extraction block (the `text[:8000]` / `text[:1500]` / `text[:300]` slices).
 ## Why the 1M Hack Disables /compact

package/tools/manual-compact.sh CHANGED Viewed

@@ -145,31 +145,33 @@ if total == 0:
     sys.exit(1)
 # Split into three segments with different detail levels:
-# - First 20%: truncate to 200 chars each (foundational context)
-# - Middle 40%: truncate to 400 chars each (working context)
-# - Last 40%: full text up to 2000 chars each (active work — most important)
+# - First 20%: truncate to 300 chars each (foundational context)
+# - Middle 40%: truncate to 1500 chars each (working context)
+# - Last 40%: full text up to 8000 chars each (active work — most important)
+# Recent-turn caps were relaxed (was 200/400/2000) so the summarizer sees the
+# active work in near-full detail; the stronger model (Opus, below) handles it.
 seg1_end = int(total * 0.2)
 seg2_end = int(total * 0.6)
 with open("$EXTRACT", 'w') as f:
     f.write("=== FOUNDATIONAL CONTEXT (early session) ===\n\n")
     for role, text in conversation[:seg1_end]:
-        f.write(f"[{role}]: {text[:200]}\n\n")
+        f.write(f"[{role}]: {text[:300]}\n\n")
     f.write("\n=== WORKING CONTEXT (mid session) ===\n\n")
     for role, text in conversation[seg1_end:seg2_end]:
-        f.write(f"[{role}]: {text[:400]}\n\n")
+        f.write(f"[{role}]: {text[:1500]}\n\n")
     f.write("\n=== ACTIVE WORK (recent — preserve in full detail) ===\n\n")
     for role, text in conversation[seg2_end:]:
-        f.write(f"[{role}]: {text[:2000]}\n\n")
+        f.write(f"[{role}]: {text[:8000]}\n\n")
 import os
 size = os.path.getsize("$EXTRACT")
 print(f"Extracted {total} turns ({size:,} bytes, ~{size//4:,} est. tokens)")
-print(f"  Foundational: {seg1_end} turns (truncated to 200 chars)")
-print(f"  Working: {seg2_end - seg1_end} turns (truncated to 400 chars)")
-print(f"  Active: {total - seg2_end} turns (up to 2000 chars)")
+print(f"  Foundational: {seg1_end} turns (truncated to 300 chars)")
+print(f"  Working: {seg2_end - seg1_end} turns (truncated to 1500 chars)")
+print(f"  Active: {total - seg2_end} turns (up to 8000 chars)")
 PYEOF
 # Build the summarization prompt
@@ -199,10 +201,14 @@ ADDITIONAL USER CONTEXT TO PRESERVE:
 $USER_CONTEXT"
 fi
+# Summarizer model. Defaults to Opus for highest-fidelity summaries; override
+# with MANUAL_COMPACT_MODEL (e.g. when Opus is rate-limited or retired).
+COMPACT_MODEL="${MANUAL_COMPACT_MODEL:-claude-opus-4-7}"
 echo ""
-echo "Sending to Claude for summarization..."
+echo "Sending to Claude ($COMPACT_MODEL) for summarization..."
-cat "$EXTRACT" | claude --print --model claude-sonnet-4-6 "$PROMPT" > "$OUTPUT" 2>/dev/null
+cat "$EXTRACT" | claude --print --model "$COMPACT_MODEL" "$PROMPT" > "$OUTPUT" 2>/dev/null
 SIZE=$(wc -c < "$OUTPUT")
 echo ""

package/tools/quota-statusline.sh CHANGED Viewed

@@ -115,11 +115,13 @@ def draw_bar(consumed_pct, elapsed_pct, width=BAR_WIDTH):
     # Tick overlays a fill cell when consumed > elapsed, keeping bar width
     # constant — that's what makes the over-pace state legible (┃ inside the
     # filled run) rather than just pushing fill cells around.
-    fill = int(round(max(0, min(100, consumed_pct)) / 100 * width))
+    def to_cells(pct):
+        return int(round(max(0, min(100, pct)) / 100 * width))
+    fill = to_cells(consumed_pct)
     if elapsed_pct is None:
         tick = -1
     else:
-        tick = min(int(max(0, min(100, elapsed_pct)) / 100 * width), width - 1)
+        tick = min(to_cells(elapsed_pct), width - 1)
     cells = []
     remaining = fill
     for i in range(width):