claude-code-cache-fix 3.7.1 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ # cache-fix hook examples
2
+
3
+ Standalone `PreToolUse` / `PostToolUse` / `SessionStart` hook scripts that address specific Claude Code behaviors. These are **examples** — you install them by pointing at them from your own `~/.claude/settings.json` (or per-project `.claude/settings.json`). cache-fix does not register them automatically.
4
+
5
+ Independent of the proxy. Hooks run client-side via CC's hooks contract; they don't touch the API request path.
6
+
7
+ ## Available examples
8
+
9
+ | Script | Event | Purpose | Docs |
10
+ |---|---|---|---|
11
+ | `examples/worktree-edit-guard.py` | `PreToolUse` | Block `Edit`/`Write`/`MultiEdit`/`NotebookEdit` calls whose target path falls outside the active git worktree root. Addresses [CC#59628](https://github.com/anthropics/claude-code/issues/59628). | [`docs/hooks/worktree-edit-guard.md`](../docs/hooks/worktree-edit-guard.md) |
12
+
13
+ ## Installing a hook
14
+
15
+ Each script's docs page has its own settings.json snippet. The general shape:
16
+
17
+ ```jsonc
18
+ {
19
+ "hooks": {
20
+ "<EventName>": [
21
+ {
22
+ "matcher": "<ToolName1>|<ToolName2>",
23
+ "hooks": [
24
+ { "type": "command", "command": "/abs/path/to/hooks/examples/<script>" }
25
+ ]
26
+ }
27
+ ]
28
+ }
29
+ }
30
+ ```
31
+
32
+ The `command` field must be an absolute path per CC's hooks contract. Make sure the script is executable.
33
+
34
+ ## CC hooks reference
35
+
36
+ https://code.claude.com/docs/en/hooks — exit-code semantics, structured output schema, matcher patterns, the full event taxonomy.
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env python3
2
+ """PreToolUse hook: refuse Edit/Write/MultiEdit/NotebookEdit calls whose
3
+ target path falls outside the active git worktree root.
4
+
5
+ Addresses anthropics/claude-code#59628 (worktree sessions can corrupt the
6
+ parent main checkout). See docs/hooks/worktree-edit-guard.md for install.
7
+
8
+ Exit codes (per CC PreToolUse hook contract):
9
+ 0 pass-through (allow)
10
+ 2 block (CC feeds stderr back to the agent)
11
+ Posture: environmental failures fail open (exit 0); protocol-shape failures
12
+ (missing expected path field on an in-scope tool) fail closed (exit 2)."""
13
+
14
+ import json
15
+ import os
16
+ import subprocess
17
+ import sys
18
+
19
+ IN_SCOPE = {"Edit", "Write", "MultiEdit", "NotebookEdit"}
20
+ PATH_FIELD = {"Edit": "file_path", "Write": "file_path",
21
+ "MultiEdit": "file_path", "NotebookEdit": "notebook_path"}
22
+
23
+
24
+ def git(*args, cwd):
25
+ """Run git; return stripped stdout on success, None on any failure."""
26
+ try:
27
+ r = subprocess.run(("git",) + args, cwd=cwd, timeout=2,
28
+ capture_output=True, text=True, check=False)
29
+ return r.stdout.strip() if r.returncode == 0 else None
30
+ except (subprocess.TimeoutExpired, OSError):
31
+ return None
32
+
33
+
34
+ def worktree_root(cwd):
35
+ """Return the worktree root if cwd is inside a linked worktree, else None.
36
+
37
+ Detection: realpath-equality of --git-dir and --git-common-dir. They are
38
+ equal in a regular checkout (from any depth) and differ inside a linked
39
+ worktree. Compare realpaths because --git-common-dir returns paths
40
+ relative to cwd, so raw string compare breaks below the repo root."""
41
+ top = git("rev-parse", "--show-toplevel", cwd=cwd)
42
+ gd = git("rev-parse", "--git-dir", cwd=cwd)
43
+ gcd = git("rev-parse", "--git-common-dir", cwd=cwd)
44
+ if not (top and gd and gcd):
45
+ return None
46
+ if os.path.realpath(os.path.join(cwd, gd)) == os.path.realpath(os.path.join(cwd, gcd)):
47
+ return None
48
+ return os.path.realpath(top)
49
+
50
+
51
+ def resolved_target(target):
52
+ """Realpath the target. If the target exists (including as a broken
53
+ symlink), realpath it directly so a target that IS a symlink resolves
54
+ to its destination (not back to itself). If it doesn't exist, fall
55
+ back to realpath(parent_dir) + basename so a symlinked PARENT still
56
+ gets caught even when the leaf will be created by the tool."""
57
+ if os.path.lexists(target):
58
+ return os.path.realpath(target)
59
+ return os.path.join(os.path.realpath(os.path.dirname(target)),
60
+ os.path.basename(target))
61
+
62
+
63
+ def main():
64
+ try:
65
+ payload = json.load(sys.stdin)
66
+ except (json.JSONDecodeError, ValueError):
67
+ return 0 # fail-open: malformed input is an environmental fault
68
+ tool = payload.get("tool_name")
69
+ if tool not in IN_SCOPE:
70
+ return 0
71
+ field = PATH_FIELD[tool]
72
+ target = (payload.get("tool_input") or {}).get(field)
73
+ if not isinstance(target, str) or not target:
74
+ sys.stderr.write(f"worktree-edit-guard: refusing {tool} — "
75
+ f"missing tool_input.{field}.\n")
76
+ return 2 # fail-closed: protocol-shape mismatch
77
+ cwd = payload.get("cwd") or os.getcwd()
78
+ root = worktree_root(cwd)
79
+ if root is None:
80
+ return 0 # not in a linked worktree; nothing to enforce
81
+ if not os.path.isabs(target):
82
+ target = os.path.join(cwd, target)
83
+ abs_target = resolved_target(target)
84
+ if abs_target == root or abs_target.startswith(root + os.sep):
85
+ return 0
86
+ sys.stderr.write(f"worktree-edit-guard: refusing {tool} on {abs_target} — "
87
+ f"outside worktree {root}. Use a path inside the worktree, "
88
+ f"or disable this hook in settings.json.\n")
89
+ return 2
90
+
91
+
92
+ if __name__ == "__main__":
93
+ sys.exit(main())
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "3.7.1",
3
+ "version": "3.9.0",
4
4
  "description": "Cache optimization proxy and interceptor for Claude Code. Fixes prompt cache bugs, stabilizes prefix, reduces quota burn.",
5
5
  "type": "module",
6
6
  "exports": {
@@ -15,6 +15,7 @@
15
15
  "preload.mjs",
16
16
  "postinstall.js",
17
17
  "tools/",
18
+ "hooks/",
18
19
  "claude-fixed.bat",
19
20
  "proxy/",
20
21
  "bin/",
@@ -0,0 +1,117 @@
1
+ // auto-1m-guard — detect/warn/strip the 1M-context beta token on outbound
2
+ // requests. Addresses anthropics/claude-code#64919 (VS Code Extension forcing
3
+ // 1M context on Pro Plan).
4
+ //
5
+ // Binary-walk (CC v2.1.148 / v2.1.161 — same code body, names churned):
6
+ // sL→kJ: function strips /\[(1|2)m\]/gi from the model string
7
+ // W2→bZ: gates 1M-beta inclusion on /\[1m\]/i.test(model)
8
+ // xKH→E9H: kill switch keys off CLAUDE_CODE_DISABLE_1M_CONTEXT
9
+ // CC always applies the sanitizer at messages.create call sites:
10
+ // messages.create({...J, model: kJ(J.model)})
11
+ // So req.body.model NEVER carries [1m] on the wire — the proxy-visible
12
+ // signal is the anthropic-beta REQUEST HEADER carrying context-1m-2025-08-07.
13
+ //
14
+ // Three modes (env: CACHE_FIX_AUTO_1M_GUARD):
15
+ // off no-op
16
+ // warn (default) stash _auto1mGuard annotation + stderr line; no mutation
17
+ // strip also remove context-1m-2025-08-07 from the anthropic-beta header
18
+ //
19
+ // Order 520: after ttl-management (500) and before thinking-block-sanitize
20
+ // (550) / session-health (590) / cache-telemetry (600). The stashed flat
21
+ // object at ctx.meta._auto1mGuard is spread top-level into the per-session
22
+ // JSON by cache-telemetry, matching the _sessionHealth / _thinkingSanitize
23
+ // pattern.
24
+ //
25
+ // See docs/directives/proxy-auto-1m-guard.md.
26
+
27
+ const BETA_TOKEN_1M = "context-1m-2025-08-07";
28
+ const HEADER_NAME = "anthropic-beta";
29
+ const ADVICE =
30
+ "Outbound request carries the context-1m-2025-08-07 beta header, which enables 1M context. " +
31
+ "On Pro plans this consumes overage credits immediately. To prevent CC from auto-selecting 1M: " +
32
+ "set CLAUDE_CODE_DISABLE_1M_CONTEXT=1 in your env, or use /model with a non-[1m] model variant " +
33
+ "in-session. Strip mode (CACHE_FIX_AUTO_1M_GUARD=strip) intercepts the header at the proxy.";
34
+
35
+ function modeFromEnv() {
36
+ const v = process.env.CACHE_FIX_AUTO_1M_GUARD;
37
+ if (v === "off" || v === "strip") return v;
38
+ return "warn";
39
+ }
40
+
41
+ // Case-insensitive read of the anthropic-beta header. Mirrors
42
+ // upstream-change-detection.mjs:200-207. Returns { key, raw } where key is
43
+ // the actual property name found (so the rewrite can replace in-place),
44
+ // or null if absent.
45
+ export function findBetaHeader(headers) {
46
+ if (!headers) return null;
47
+ for (const k of Object.keys(headers)) {
48
+ if (k.toLowerCase() === HEADER_NAME) {
49
+ return { key: k, raw: headers[k] };
50
+ }
51
+ }
52
+ return null;
53
+ }
54
+
55
+ // Parse the comma-separated header value into a trimmed token array.
56
+ // Tolerates string or array input.
57
+ export function parseBetaTokens(raw) {
58
+ if (!raw) return [];
59
+ if (Array.isArray(raw)) return raw.map(String).map((s) => s.trim()).filter(Boolean);
60
+ if (typeof raw === "string") return raw.split(",").map((s) => s.trim()).filter(Boolean);
61
+ return [];
62
+ }
63
+
64
+ // Pure planner: returns { detected, stripped, tokensAfter } given the
65
+ // parsed token array. Strip removes ALL occurrences (defensive against
66
+ // duplicates introduced by intermediaries).
67
+ export function planSanitizeBetaHeader(tokens, mode) {
68
+ const detected = tokens.includes(BETA_TOKEN_1M);
69
+ if (!detected || mode !== "strip") {
70
+ return { detected, stripped: false, tokensAfter: tokens };
71
+ }
72
+ const tokensAfter = tokens.filter((t) => t !== BETA_TOKEN_1M);
73
+ return { detected, stripped: true, tokensAfter };
74
+ }
75
+
76
+ // Rejoin tokens with the CC-canonical ", " separator. Empty array → "".
77
+ export function joinBetaTokens(tokens) {
78
+ return tokens.join(", ");
79
+ }
80
+
81
+ export default {
82
+ name: "auto-1m-guard",
83
+ description:
84
+ "Detect (warn) or remove (strip) the context-1m-2025-08-07 token from the outbound anthropic-beta header. " +
85
+ "Addresses CC#64919 (VS Code Extension forcing 1M context on Pro Plan). " +
86
+ "Modes via CACHE_FIX_AUTO_1M_GUARD: off | warn (default) | strip.",
87
+ order: 520,
88
+
89
+ async onRequest(ctx) {
90
+ const mode = modeFromEnv();
91
+ if (mode === "off") return;
92
+
93
+ const found = findBetaHeader(ctx.headers);
94
+ if (!found) return;
95
+
96
+ const tokens = parseBetaTokens(found.raw);
97
+ const plan = planSanitizeBetaHeader(tokens, mode);
98
+ if (!plan.detected) return;
99
+
100
+ if (plan.stripped) {
101
+ ctx.headers[found.key] = joinBetaTokens(plan.tokensAfter);
102
+ }
103
+
104
+ ctx.meta._auto1mGuard = {
105
+ auto_1m_detected: true,
106
+ auto_1m_action: plan.stripped ? "stripped" : "warn",
107
+ auto_1m_advice: ADVICE,
108
+ };
109
+
110
+ process.stderr.write(
111
+ `[auto-1m-guard] ${BETA_TOKEN_1M} detected in outbound betas` +
112
+ (plan.stripped ? " — stripped" : "") +
113
+ ` — see CACHE_FIX_AUTO_1M_GUARD=strip to intercept. ` +
114
+ `Set CLAUDE_CODE_DISABLE_1M_CONTEXT=1 to prevent CC from sending it.\n`,
115
+ );
116
+ },
117
+ };
@@ -49,6 +49,13 @@ export function sessionFilename(rawId) {
49
49
  return "inv-" + createHash("sha256").update(s).digest("hex").slice(0, 16);
50
50
  }
51
51
 
52
+ // Full path to the per-session file for a raw session id. Exported so sibling
53
+ // extensions (e.g. session-health) can READ the prior state this writer wrote,
54
+ // using the identical filename rule — reuse, not duplicate.
55
+ export function sessionFilePath(rawId) {
56
+ return join(paths().sessionsDir, `${sessionFilename(rawId)}.json`);
57
+ }
58
+
52
59
  function resolveSessionId(headers) {
53
60
  if (!headers) return null;
54
61
  const sid =
@@ -222,6 +229,18 @@ export default {
222
229
  hit_rate: hitRate,
223
230
  timestamp,
224
231
  },
232
+ // Additive session-health fields (session-health extension, order
233
+ // 590, stashes these before this writer runs). Optional — absent if
234
+ // that extension is disabled or produced nothing this request.
235
+ ...(ctx.meta._sessionHealth || {}),
236
+ // Additive thinking-block-sanitize drop count (order 550, opt-in).
237
+ // Optional — absent unless CACHE_FIX_THINKING_SANITIZE=on.
238
+ ...(ctx.meta._thinkingSanitize || {}),
239
+ // Additive auto-1m-guard annotation (order 520). Optional — absent
240
+ // unless the outbound request carried context-1m-2025-08-07 and the
241
+ // mode wasn't off. Keys: auto_1m_detected / auto_1m_action /
242
+ // auto_1m_advice.
243
+ ...(ctx.meta._auto1mGuard || {}),
225
244
  timestamp,
226
245
  session_id: rawSid,
227
246
  },
@@ -0,0 +1,152 @@
1
+ import { readFileSync } from "node:fs";
2
+ import { sessionFilename, sessionFilePath } from "./cache-telemetry.mjs";
3
+
4
+ // session-health — read-only early-warning for the CC thinking-desync wedge
5
+ // (anthropics/claude-code#63147). Long-running Opus 4.7 [1m] sessions grow
6
+ // their live context until CC's own history reconstruction desyncs a
7
+ // thinking-block signature, producing a permanent 400 on every subsequent
8
+ // turn. This extension OBSERVES (never mutates the body) and records the
9
+ // conditions that correlate with the trip, plus emits a one-time stderr warn
10
+ // so the operator can retire the session deliberately before it dies.
11
+ //
12
+ // It hands its computed fields to the existing per-session writer
13
+ // (cache-telemetry, order 600) via ctx.meta._sessionHealth; cache-telemetry
14
+ // merges them into the single per-session JSON write. This extension never
15
+ // writes that file itself (single-writer invariant).
16
+
17
+ const THINKING_TYPES = new Set(["thinking", "redacted_thinking"]);
18
+
19
+ const DEFAULT_WARN_TOKENS = 250_000;
20
+ const DEFAULT_HIGH_TOKENS = 340_000; // just under the observed ~382K trip
21
+
22
+ // --- Module-scope state ---
23
+ // Cross-request accumulators, seeded once-per-process from the prior persisted
24
+ // file so first_seen / max / count stay accurate across the proxy restarts
25
+ // that multi-week sessions inevitably span.
26
+ const sessionState = new Map(); // key -> { firstSeen, max, count }
27
+ // Sessions already given the one-time "high" stderr warn this process.
28
+ const warnedSessions = new Set();
29
+
30
+ function parseTokenEnv(raw, def) {
31
+ if (raw === undefined || raw === "") return def;
32
+ const n = Number(raw);
33
+ return Number.isFinite(n) && n >= 0 ? n : def;
34
+ }
35
+
36
+ // Exported for unit testing.
37
+ export function loadConfig(env = process.env) {
38
+ return {
39
+ warnTokens: parseTokenEnv(env.CACHE_FIX_THINKING_RISK_WARN_TOKENS, DEFAULT_WARN_TOKENS),
40
+ highTokens: parseTokenEnv(env.CACHE_FIX_THINKING_RISK_HIGH_TOKENS, DEFAULT_HIGH_TOKENS),
41
+ enabled: env.CACHE_FIX_THINKING_RISK !== "off",
42
+ };
43
+ }
44
+
45
+ export function countThinkingBlocks(body) {
46
+ if (!body || !Array.isArray(body.messages)) return 0;
47
+ let n = 0;
48
+ for (const msg of body.messages) {
49
+ if (!Array.isArray(msg.content)) continue;
50
+ for (const block of msg.content) {
51
+ if (block && THINKING_TYPES.has(block.type)) n++;
52
+ }
53
+ }
54
+ return n;
55
+ }
56
+
57
+ export function computeContextTokens(cacheStats) {
58
+ if (!cacheStats) return 0;
59
+ return (
60
+ (cacheStats.inputTokens || 0) +
61
+ (cacheStats.cacheRead || 0) +
62
+ (cacheStats.cacheCreation || 0)
63
+ );
64
+ }
65
+
66
+ export function computeRisk(contextTokens, { warnTokens, highTokens }) {
67
+ if (contextTokens >= highTokens) return "high";
68
+ if (contextTokens >= warnTokens) return "warn";
69
+ return "ok";
70
+ }
71
+
72
+ function seedFromFile(rawSid, now) {
73
+ let prev = null;
74
+ try {
75
+ prev = JSON.parse(readFileSync(sessionFilePath(rawSid), "utf8"));
76
+ } catch {}
77
+ return {
78
+ firstSeen: typeof prev?.first_seen === "string" ? prev.first_seen : now,
79
+ max: Number.isFinite(prev?.thinking_block_max) ? prev.thinking_block_max : 0,
80
+ count: Number.isFinite(prev?.request_count) ? prev.request_count : 0,
81
+ };
82
+ }
83
+
84
+ export default {
85
+ name: "session-health",
86
+ description:
87
+ "Observe per-session thinking-desync risk (context size + thinking-block count) and warn before the session reaches the danger zone. Read-only; never mutates the body.",
88
+ order: 590, // after request-body mutators (so the count is the forwarded body), before the writer (cache-telemetry, 600)
89
+
90
+ async onRequest(ctx) {
91
+ // Count thinking blocks in the (near-final) forwarded body. Session id is
92
+ // resolved by cache-telemetry's onRequest (order 600), which runs AFTER
93
+ // this hook — so we don't read the session id here; we read it in
94
+ // onStreamEvent, by which time it is set.
95
+ ctx.meta._thinkingBlockCount = countThinkingBlocks(ctx.body);
96
+ },
97
+
98
+ async onStreamEvent(ctx) {
99
+ const { event } = ctx;
100
+ if (!event || event.type !== "message_delta") return;
101
+ // Once per response, regardless of how many message_delta events arrive.
102
+ if (ctx.meta._sessionHealthDone) return;
103
+ ctx.meta._sessionHealthDone = true;
104
+
105
+ const now = new Date().toISOString();
106
+ const rawSid = ctx.meta._sessionId ?? null;
107
+ const key = sessionFilename(rawSid);
108
+ const thinkingBlockCount = ctx.meta._thinkingBlockCount || 0;
109
+ const contextTokens = computeContextTokens(ctx.meta.cacheStats);
110
+
111
+ let st = sessionState.get(key);
112
+ if (!st) {
113
+ st = seedFromFile(rawSid, now);
114
+ sessionState.set(key, st);
115
+ }
116
+ st.count += 1;
117
+ st.max = Math.max(st.max, thinkingBlockCount);
118
+
119
+ const health = {
120
+ context_tokens: contextTokens,
121
+ thinking_block_count: thinkingBlockCount,
122
+ thinking_block_max: st.max,
123
+ first_seen: st.firstSeen,
124
+ request_count: st.count,
125
+ };
126
+
127
+ const cfg = loadConfig();
128
+ if (cfg.enabled) {
129
+ const risk = computeRisk(contextTokens, cfg);
130
+ health.thinking_desync_risk = risk;
131
+ if (risk === "high" && !warnedSessions.has(key)) {
132
+ warnedSessions.add(key);
133
+ const sidLabel = rawSid || "unknown";
134
+ process.stderr.write(
135
+ `[session-health] session ${sidLabel} high thinking-desync risk: ` +
136
+ `context_tokens=${contextTokens} (>= ${cfg.highTokens}), ` +
137
+ `thinking_block_count=${thinkingBlockCount}. ` +
138
+ `Consider retiring this session (write SESSION_STATE + /clear).\n`,
139
+ );
140
+ }
141
+ }
142
+
143
+ // Hand off to cache-telemetry (order 600) to persist in its single write.
144
+ ctx.meta._sessionHealth = health;
145
+ },
146
+
147
+ // Test-only: reset module state between tests.
148
+ __resetForTests() {
149
+ sessionState.clear();
150
+ warnedSessions.clear();
151
+ },
152
+ };
@@ -0,0 +1,130 @@
1
+ // thinking-block-sanitize — request-path mitigation for the CC thinking-desync
2
+ // wedge (anthropics/claude-code#63147). On replay paths (resume / --continue /
3
+ // auto-compaction / parallel-tool-cancel), CC re-sends prior assistant turns'
4
+ // thinking in the OMITTED shape `{ type:"thinking", thinking:"", signature }`.
5
+ // The API rejects modified thinking in the *latest* assistant message with a
6
+ // permanent 400, which wedges the session. This extension drops the omitted
7
+ // thinking blocks the API treats as optional, before the request is forwarded.
8
+ //
9
+ // Resolved turn-selection rule (directive Open Question 1, empirical capture):
10
+ // - drop omitted thinking from ALL prior assistant turns, AND
11
+ // - from the LATEST assistant turn UNLESS it is an active tool-continuation
12
+ // (last block is a tool_use with a following tool_result) — that case is
13
+ // uncoverable by the proxy (the API needs the signed thinking for the
14
+ // pending tool call; we can't restore the emptied text). No env var both
15
+ // preserves thinking and avoids the wedge there — CLAUDE_CODE_DISABLE_THINKING=1
16
+ // / MAX_THINKING_TOKENS=0 stop it only by disabling thinking entirely
17
+ // (lossy); DISABLE_INTERLEAVED_THINKING=1 does NOT stop the 400 — so the
18
+ // answer for that case is don't-resume + heal/retire.
19
+ // Never touches non-empty thinking, and never touches redacted_thinking (v1).
20
+ //
21
+ // OPT-IN for v1: only runs when CACHE_FIX_THINKING_SANITIZE=on (default off) —
22
+ // it mutates request bodies and its coverage is not yet live-validated.
23
+ //
24
+ // Order 550: after the request-body mutators (ttl-management 500) and before
25
+ // session-health (590), so #160's thinking_block_count reflects the forwarded
26
+ // body. The per-request drop count is exposed via ctx.meta._thinkingSanitize
27
+ // for cache-telemetry (600) to merge into the per-session JSON.
28
+
29
+ export function isOmittedThinking(block) {
30
+ return (
31
+ !!block &&
32
+ block.type === "thinking" &&
33
+ typeof block.thinking === "string" &&
34
+ block.thinking.trim() === ""
35
+ );
36
+ }
37
+
38
+ function answersToolUse(msg, toolUseId) {
39
+ return (
40
+ !!msg &&
41
+ Array.isArray(msg.content) &&
42
+ msg.content.some(
43
+ (b) => b && b.type === "tool_result" && b.tool_use_id === toolUseId,
44
+ )
45
+ );
46
+ }
47
+
48
+ // The latest assistant message is an active tool-continuation when its terminal
49
+ // block is a `tool_use` that is *paired with* — i.e. answered by — a following
50
+ // `tool_result` carrying the same `tool_use_id`. Only then does the API require
51
+ // that turn's thinking intact, so only then must we leave it untouched. Matching
52
+ // the id (not merely the presence of any later tool_result) keeps the guard as
53
+ // narrow as the approved rule: an unanswered terminal tool_use, or a later
54
+ // tool_result that answers a *different* call, is not the protected case.
55
+ export function isActiveToolContinuation(messages, idx) {
56
+ const msg = messages[idx];
57
+ if (!msg || !Array.isArray(msg.content) || msg.content.length === 0) return false;
58
+ const last = msg.content[msg.content.length - 1];
59
+ if (!last || last.type !== "tool_use" || !last.id) return false;
60
+ for (let j = idx + 1; j < messages.length; j++) {
61
+ if (answersToolUse(messages[j], last.id)) return true;
62
+ }
63
+ return false;
64
+ }
65
+
66
+ function latestAssistantIndex(messages) {
67
+ for (let i = messages.length - 1; i >= 0; i--) {
68
+ if (messages[i] && messages[i].role === "assistant") return i;
69
+ }
70
+ return -1;
71
+ }
72
+
73
+ // Pure planner: returns { messages, dropped }. Does not mutate the input.
74
+ // `messages` is the new array (a message that loses all content is dropped).
75
+ export function planSanitize(messages) {
76
+ if (!Array.isArray(messages)) return { messages, dropped: 0 };
77
+ const latestAsst = latestAssistantIndex(messages);
78
+ const protectLatest = latestAsst >= 0 && isActiveToolContinuation(messages, latestAsst);
79
+
80
+ let dropped = 0;
81
+ let changed = false;
82
+ const out = [];
83
+ for (let i = 0; i < messages.length; i++) {
84
+ const msg = messages[i];
85
+ if (!msg || msg.role !== "assistant" || !Array.isArray(msg.content)) {
86
+ out.push(msg);
87
+ continue;
88
+ }
89
+ if (i === latestAsst && protectLatest) {
90
+ out.push(msg); // active continuation — leave its thinking intact
91
+ continue;
92
+ }
93
+ const kept = msg.content.filter((b) => {
94
+ if (isOmittedThinking(b)) {
95
+ dropped++;
96
+ return false;
97
+ }
98
+ return true;
99
+ });
100
+ if (kept.length === msg.content.length) {
101
+ out.push(msg); // unchanged
102
+ } else if (kept.length === 0) {
103
+ changed = true; // message became empty → drop it entirely
104
+ } else {
105
+ out.push({ ...msg, content: kept });
106
+ changed = true;
107
+ }
108
+ }
109
+ return { messages: changed ? out : messages, dropped };
110
+ }
111
+
112
+ export default {
113
+ name: "thinking-block-sanitize",
114
+ description:
115
+ "Drop omitted (empty-text) thinking blocks from prior assistant turns and the latest non-continuation turn, to head off the CC thinking-desync 400 (#63147). Opt-in via CACHE_FIX_THINKING_SANITIZE=on.",
116
+ order: 550,
117
+
118
+ async onRequest(ctx) {
119
+ if (process.env.CACHE_FIX_THINKING_SANITIZE !== "on") return;
120
+ const body = ctx.body;
121
+ if (!body || !Array.isArray(body.messages)) return;
122
+
123
+ const { messages, dropped } = planSanitize(body.messages);
124
+ if (dropped > 0) body.messages = messages;
125
+
126
+ // Counts only — never content. Exposed for cache-telemetry to persist and
127
+ // for the #160 session-health signal.
128
+ ctx.meta._thinkingSanitize = { thinking_blocks_dropped: dropped };
129
+ },
130
+ };
@@ -10,7 +10,17 @@ function detectRequestType(system) {
10
10
  return isSubagent ? "subagent" : "main";
11
11
  }
12
12
 
13
+ // Thinking and redacted_thinking blocks must be returned to the API byte-identical
14
+ // to the original model response — the API validates them and rejects any
15
+ // modification with "thinking blocks ... cannot be modified" (a 400 on the whole
16
+ // request). On Opus 4.7 interleaved thinking, CC can place a cache_control
17
+ // breakpoint on a thinking block; injecting a ttl there would mutate the block
18
+ // and break the request. Skip them — the marginal TTL benefit on one breakpoint
19
+ // is never worth corrupting a thinking turn.
20
+ const PROTECTED_BLOCK_TYPES = new Set(["thinking", "redacted_thinking"]);
21
+
13
22
  function injectTtl(block, ttlParam) {
23
+ if (block && PROTECTED_BLOCK_TYPES.has(block.type)) return block;
14
24
  if (block.cache_control?.type === "ephemeral" && !block.cache_control.ttl) {
15
25
  return { ...block, cache_control: { ...block.cache_control, ttl: ttlParam } };
16
26
  }