claude-code-cache-fix 3.7.1 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,82 @@
1
1
  {
2
- "bootstrap-defense": { "enabled": true, "order": 45 },
3
- "ttl-tier-detect": { "enabled": true, "order": 75 },
4
- "fingerprint-strip": { "enabled": true, "order": 100 },
5
- "image-strip": { "enabled": true, "order": 150 },
6
- "sort-stabilization": { "enabled": true, "order": 200 },
7
- "fresh-session-sort": { "enabled": true, "order": 250 },
8
- "identity-normalization": { "enabled": true, "order": 300 },
9
- "smoosh-split": { "enabled": true, "order": 320 },
10
- "content-strip": { "enabled": true, "order": 330 },
11
- "tool-input-normalize": { "enabled": true, "order": 340 },
12
- "microcompact-stability": { "enabled": true, "order": 350 },
13
- "thinking-display": { "enabled": true, "order": 360 },
14
- "cache-control-normalize": { "enabled": true, "order": 400 },
15
- "messages-cache-breakpoint": { "enabled": true, "order": 410 },
16
- "ttl-management": { "enabled": true, "order": 500 },
17
- "cache-telemetry": { "enabled": true, "order": 600 },
18
- "overage-warning": { "enabled": true, "order": 610 },
19
- "request-log": { "enabled": false, "order": 700 }
2
+ "bootstrap-defense": {
3
+ "enabled": true,
4
+ "order": 45
5
+ },
6
+ "ttl-tier-detect": {
7
+ "enabled": true,
8
+ "order": 75
9
+ },
10
+ "fingerprint-strip": {
11
+ "enabled": true,
12
+ "order": 100
13
+ },
14
+ "image-strip": {
15
+ "enabled": true,
16
+ "order": 150
17
+ },
18
+ "sort-stabilization": {
19
+ "enabled": true,
20
+ "order": 200
21
+ },
22
+ "fresh-session-sort": {
23
+ "enabled": true,
24
+ "order": 250
25
+ },
26
+ "identity-normalization": {
27
+ "enabled": true,
28
+ "order": 300
29
+ },
30
+ "smoosh-split": {
31
+ "enabled": true,
32
+ "order": 320
33
+ },
34
+ "content-strip": {
35
+ "enabled": true,
36
+ "order": 330
37
+ },
38
+ "tool-input-normalize": {
39
+ "enabled": true,
40
+ "order": 340
41
+ },
42
+ "microcompact-stability": {
43
+ "enabled": true,
44
+ "order": 350
45
+ },
46
+ "thinking-display": {
47
+ "enabled": true,
48
+ "order": 360
49
+ },
50
+ "cache-control-normalize": {
51
+ "enabled": true,
52
+ "order": 400
53
+ },
54
+ "messages-cache-breakpoint": {
55
+ "enabled": true,
56
+ "order": 410
57
+ },
58
+ "ttl-management": {
59
+ "enabled": true,
60
+ "order": 500
61
+ },
62
+ "cache-telemetry": {
63
+ "enabled": true,
64
+ "order": 600
65
+ },
66
+ "overage-warning": {
67
+ "enabled": true,
68
+ "order": 610
69
+ },
70
+ "request-log": {
71
+ "enabled": false,
72
+ "order": 700
73
+ },
74
+ "usage-log": {
75
+ "enabled": true,
76
+ "order": 650
77
+ },
78
+ "rate-limit-log": {
79
+ "enabled": true,
80
+ "order": 660
81
+ }
20
82
  }
@@ -10,10 +10,10 @@ When using the 1M context window hack (`DISABLE_COMPACT=1` + `CLAUDE_CODE_MAX_CO
10
10
 
11
11
  1. Extracts conversation turns from the session JSONL transcript
12
12
  2. Splits turns into three weighted segments:
13
- - **Foundational** (first 20%) — truncated to 200 chars each
14
- - **Working** (middle 40%) — truncated to 400 chars each
15
- - **Active** (last 40%) — preserved up to 2000 chars each
16
- 3. Sends the weighted extract to Claude Sonnet for summarization
13
+ - **Foundational** (first 20%) — truncated to 300 chars each
14
+ - **Working** (middle 40%) — truncated to 1500 chars each
15
+ - **Active** (last 40%) — preserved up to 8000 chars each
16
+ 3. Sends the weighted extract to Claude Opus for summarization
17
17
  4. Produces a structured summary optimized for agent handoff
18
18
 
19
19
  The weighting ensures recent active work (the part you're most likely to need) gets full detail, while earlier completed work is compressed.
@@ -142,7 +142,7 @@ Use the user context file to fill known gaps.
142
142
 
143
143
  Two costs to account for:
144
144
 
145
- 1. **Summarization call** — the `claude --print` call through Sonnet. At ~50K extract tokens, expect ~1-2% Q5h.
145
+ 1. **Summarization call** — the `claude --print` call through Opus. With the relaxed recent-turn caps the extract is larger (and Opus costs more per token than Sonnet), so expect a few % Q5h rather than ~1-2%. The tradeoff buys markedly higher-fidelity summaries; override with `MANUAL_COMPACT_MODEL=claude-sonnet-4-6` if you need to minimize cost.
146
146
  2. **Cold start after /clear** — the first API call rebuilds the full cache from scratch. Real-world example from a 954K-token session:
147
147
 
148
148
  ```
@@ -153,11 +153,18 @@ Second call: cache_read=957,253 cache_creation=5,569 (warm again)
153
153
 
154
154
  The cold rebuild consumed ~15% Q5h in one call on our Max 5x account. After that single rebuild, the session is warm again and cache hits resume at 99%+.
155
155
 
156
- **Total cost of a manual compact cycle:** ~17% Q5h (2% summarization + 15% cold rebuild). Compare to hitting the 1M wall and losing the session entirely.
156
+ **Total cost of a manual compact cycle:** roughly ~15% cold rebuild plus a few % for the Opus summarization. Compare to hitting the 1M wall and losing the session entirely.
157
157
 
158
- ### Requires Claude Sonnet access
158
+ ### Summarizer model
159
159
 
160
- The tool uses `claude --print --model claude-sonnet-4-6` for summarization. Sonnet is used instead of Opus to minimize Q5h impact. If Sonnet is unavailable, change the model in the script.
160
+ The tool defaults to `claude --print --model claude-opus-4-7` for the highest-fidelity summary. Override with the `MANUAL_COMPACT_MODEL` env var — e.g. `MANUAL_COMPACT_MODEL=claude-sonnet-4-6` to minimize Q5h impact, or to point at a different model if Opus is rate-limited or retired.
161
+
162
+ ### Troubleshooting: empty summary output
163
+
164
+ If `$OUTPUT` comes back empty, the most likely cause is that the extract exceeded the summarizer's context window — this tool runs near the 1M wall, and the relaxed recent-turn caps (active turns up to 8000 chars) make the extract large on exactly those big sessions. The summarizer call swallows stderr, so an oversized-input rejection surfaces as an empty file rather than a visible error. Fixes, in order of preference:
165
+
166
+ - Use a 1M-window model for the summarization: `MANUAL_COMPACT_MODEL='claude-opus-4-7[1m]' manual-compact.sh ...`
167
+ - Or lower the per-turn caps in the script's extraction block (the `text[:8000]` / `text[:1500]` / `text[:300]` slices).
161
168
 
162
169
  ## Why the 1M Hack Disables /compact
163
170
 
@@ -145,31 +145,33 @@ if total == 0:
145
145
  sys.exit(1)
146
146
 
147
147
  # Split into three segments with different detail levels:
148
- # - First 20%: truncate to 200 chars each (foundational context)
149
- # - Middle 40%: truncate to 400 chars each (working context)
150
- # - Last 40%: full text up to 2000 chars each (active work — most important)
148
+ # - First 20%: truncate to 300 chars each (foundational context)
149
+ # - Middle 40%: truncate to 1500 chars each (working context)
150
+ # - Last 40%: full text up to 8000 chars each (active work — most important)
151
+ # Recent-turn caps were relaxed (was 200/400/2000) so the summarizer sees the
152
+ # active work in near-full detail; the stronger model (Opus, below) handles it.
151
153
  seg1_end = int(total * 0.2)
152
154
  seg2_end = int(total * 0.6)
153
155
 
154
156
  with open("$EXTRACT", 'w') as f:
155
157
  f.write("=== FOUNDATIONAL CONTEXT (early session) ===\n\n")
156
158
  for role, text in conversation[:seg1_end]:
157
- f.write(f"[{role}]: {text[:200]}\n\n")
159
+ f.write(f"[{role}]: {text[:300]}\n\n")
158
160
 
159
161
  f.write("\n=== WORKING CONTEXT (mid session) ===\n\n")
160
162
  for role, text in conversation[seg1_end:seg2_end]:
161
- f.write(f"[{role}]: {text[:400]}\n\n")
163
+ f.write(f"[{role}]: {text[:1500]}\n\n")
162
164
 
163
165
  f.write("\n=== ACTIVE WORK (recent — preserve in full detail) ===\n\n")
164
166
  for role, text in conversation[seg2_end:]:
165
- f.write(f"[{role}]: {text[:2000]}\n\n")
167
+ f.write(f"[{role}]: {text[:8000]}\n\n")
166
168
 
167
169
  import os
168
170
  size = os.path.getsize("$EXTRACT")
169
171
  print(f"Extracted {total} turns ({size:,} bytes, ~{size//4:,} est. tokens)")
170
- print(f" Foundational: {seg1_end} turns (truncated to 200 chars)")
171
- print(f" Working: {seg2_end - seg1_end} turns (truncated to 400 chars)")
172
- print(f" Active: {total - seg2_end} turns (up to 2000 chars)")
172
+ print(f" Foundational: {seg1_end} turns (truncated to 300 chars)")
173
+ print(f" Working: {seg2_end - seg1_end} turns (truncated to 1500 chars)")
174
+ print(f" Active: {total - seg2_end} turns (up to 8000 chars)")
173
175
  PYEOF
174
176
 
175
177
  # Build the summarization prompt
@@ -199,10 +201,14 @@ ADDITIONAL USER CONTEXT TO PRESERVE:
199
201
  $USER_CONTEXT"
200
202
  fi
201
203
 
204
+ # Summarizer model. Defaults to Opus for highest-fidelity summaries; override
205
+ # with MANUAL_COMPACT_MODEL (e.g. when Opus is rate-limited or retired).
206
+ COMPACT_MODEL="${MANUAL_COMPACT_MODEL:-claude-opus-4-7}"
207
+
202
208
  echo ""
203
- echo "Sending to Claude for summarization..."
209
+ echo "Sending to Claude ($COMPACT_MODEL) for summarization..."
204
210
 
205
- cat "$EXTRACT" | claude --print --model claude-sonnet-4-6 "$PROMPT" > "$OUTPUT" 2>/dev/null
211
+ cat "$EXTRACT" | claude --print --model "$COMPACT_MODEL" "$PROMPT" > "$OUTPUT" 2>/dev/null
206
212
 
207
213
  SIZE=$(wc -c < "$OUTPUT")
208
214
  echo ""
@@ -115,11 +115,13 @@ def draw_bar(consumed_pct, elapsed_pct, width=BAR_WIDTH):
115
115
  # Tick overlays a fill cell when consumed > elapsed, keeping bar width
116
116
  # constant — that's what makes the over-pace state legible (┃ inside the
117
117
  # filled run) rather than just pushing fill cells around.
118
- fill = int(round(max(0, min(100, consumed_pct)) / 100 * width))
118
+ def to_cells(pct):
119
+ return int(round(max(0, min(100, pct)) / 100 * width))
120
+ fill = to_cells(consumed_pct)
119
121
  if elapsed_pct is None:
120
122
  tick = -1
121
123
  else:
122
- tick = min(int(max(0, min(100, elapsed_pct)) / 100 * width), width - 1)
124
+ tick = min(to_cells(elapsed_pct), width - 1)
123
125
  cells = []
124
126
  remaining = fill
125
127
  for i in range(width):