claude-code-cache-fix 3.0.5 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ko.md +1 -1
- package/README.md +3 -1
- package/README.zh.md +1 -1
- package/package.json +1 -1
- package/proxy/extensions/cache-control-normalize.mjs +2 -0
- package/proxy/extensions/content-strip.mjs +89 -0
- package/proxy/extensions/deferred-tools-restore.mjs +361 -0
- package/proxy/extensions/fingerprint-strip.mjs +2 -0
- package/proxy/extensions/fresh-session-sort.mjs +2 -0
- package/proxy/extensions/identity-normalization.mjs +2 -0
- package/proxy/extensions/image-strip.mjs +83 -0
- package/proxy/extensions/output-efficiency-rewrite.mjs +64 -0
- package/proxy/extensions/prefix-diff.mjs +277 -0
- package/proxy/extensions/smoosh-split.mjs +68 -0
- package/proxy/extensions/sort-stabilization.mjs +2 -0
- package/proxy/extensions/tool-input-normalize.mjs +73 -0
- package/proxy/extensions/ttl-management.mjs +2 -0
- package/proxy/extensions/usage-log.mjs +46 -0
- package/proxy/extensions.json +3 -0
package/README.ko.md
CHANGED
|
@@ -125,7 +125,7 @@ VSIX 없이 수동 VS Code 래퍼를 설정하려면 [docs/preload-setup.md](doc
|
|
|
125
125
|
|
|
126
126
|
**하는 것:** 캐시 버그 수정을 위해 발신 요청 구조(블록 순서, 핑거프린트, TTL, git-status)를 수정합니다. 모니터링을 위해 응답 헤더와 SSE 사용량 데이터를 읽습니다.
|
|
127
127
|
|
|
128
|
-
**하지 않는 것:** 프록시 또는 인터셉터에서 네트워크 호출을 하지 않습니다. 모든 텔레메트리는 `~/.claude/` 아래 로컬 파일에 기록됩니다.
|
|
128
|
+
**하지 않는 것:** 프록시 또는 인터셉터에서 네트워크 호출을 하지 않습니다. 모든 텔레메트리는 `~/.claude/` 아래 로컬 파일에 기록됩니다. 데이터는 사용자의 컴퓨터를 떠나지 않습니다.
|
|
129
129
|
|
|
130
130
|
**공급망:** 프록시 모드: `proxy/extensions/`에 7개 소형 확장 모듈(각 200줄 미만). 프리로드 모드: 단일 비축소 파일(`preload.mjs`, ~1,700줄). 개발 의존성 1개(`zod`, 테스트 스키마 검증용). 설치 전 코드를 직접 검토하십시오. npm provenance로 각 버전이 소스 커밋에 연결됩니다.
|
|
131
131
|
|
package/README.md
CHANGED
|
@@ -162,7 +162,7 @@ For manual VS Code wrapper setup (without the VSIX), see [docs/preload-setup.md]
|
|
|
162
162
|
|
|
163
163
|
**What it does:** Modifies outgoing request structure (block order, fingerprint, TTL, git-status) to fix cache bugs. Reads response headers and SSE usage data for monitoring.
|
|
164
164
|
|
|
165
|
-
**What it does NOT do:** No network calls from the proxy or interceptor. All telemetry is written to local files under `~/.claude/`. No data leaves your machine
|
|
165
|
+
**What it does NOT do:** No network calls from the proxy or interceptor. All telemetry is written to local files under `~/.claude/`. No data leaves your machine.
|
|
166
166
|
|
|
167
167
|
**Supply chain:** Proxy mode: 7 small extension modules in `proxy/extensions/` (each under 200 lines). Preload mode: single unminified file (`preload.mjs`, ~1,700 lines). One dev dependency (`zod` for schema validation in tests only). Review before installing. npm provenance links each published version to its source commit.
|
|
168
168
|
|
|
@@ -281,6 +281,8 @@ export CLAUDE_CODE_DISABLE_GIT_INSTRUCTIONS=1
|
|
|
281
281
|
|
|
282
282
|
Or add `"includeGitInstructions": false` to `~/.claude/settings.json`. Claude Code can still run `git status` via the Bash tool when it needs context. Community-validated by [@wadabum](https://github.com/cnighswonger/claude-code-cache-fix/issues/11): 18-token cache creation across git state changes (vs thousands without the flag).
|
|
283
283
|
|
|
284
|
+
**Why we don't ship a proxy extension for this:** the proxy intercepts requests after Claude Code has already composed the system prompt — by then the volatile `git status` text is already part of the prefix that the model conditioned on in the previous turn, and stripping it post-hoc would itself bust the cache. The fix has to happen at the source. `CLAUDE_CODE_DISABLE_GIT_INSTRUCTIONS=1` prevents the injection before the prompt is composed, which is why the native flag is the right tool. Stripping post-hoc would also remove model-visible context that an explicit Bash call can recover, and would risk false-positive matches against assistant-written text.
|
|
285
|
+
|
|
284
286
|
## Image stripping (preload mode)
|
|
285
287
|
|
|
286
288
|
Images read via the Read tool persist as base64 in conversation history, riding along on every subsequent API call. A single 500KB image costs ~62,500 tokens per turn on Opus 4.6, and **~85,000+ on Opus 4.7** due to the new tokenizer. Image stripping is strongly recommended on 4.7.
|
package/README.zh.md
CHANGED
|
@@ -137,7 +137,7 @@ NODE_OPTIONS="--import claude-code-cache-fix" claude
|
|
|
137
137
|
|
|
138
138
|
**它做什么:** 修改出站请求结构(块排序、指纹、TTL、git-status)以修复缓存 bug。读取响应头和 SSE 使用量数据用于监控。
|
|
139
139
|
|
|
140
|
-
**它不做什么:** 代理或拦截器不会发起网络调用。所有遥测数据写入 `~/.claude/`
|
|
140
|
+
**它不做什么:** 代理或拦截器不会发起网络调用。所有遥测数据写入 `~/.claude/` 下的本地文件。数据不会离开你的机器。
|
|
141
141
|
|
|
142
142
|
**供应链:** 代理模式:7 个小型扩展模块在 `proxy/extensions/` 中(每个不到 200 行)。预加载模式:单个未压缩文件(`preload.mjs`,约 1,700 行)。一个开发依赖(`zod`,仅用于测试中的模式验证)。安装前请审查代码。npm 出处(provenance)将每个发布版本链接到其源代码提交。
|
|
143
143
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-code-cache-fix",
|
|
3
|
-
"version": "3.0
|
|
3
|
+
"version": "3.1.0",
|
|
4
4
|
"description": "Cache optimization proxy and interceptor for Claude Code. Fixes prompt cache bugs, stabilizes prefix, reduces quota burn.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": "./preload.mjs",
|
|
@@ -24,6 +24,8 @@ function countUserCacheControlMarkers(body) {
|
|
|
24
24
|
return n;
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
+
export { stripCacheControlMarkers, countUserCacheControlMarkers };
|
|
28
|
+
|
|
27
29
|
export default {
|
|
28
30
|
name: "cache-control-normalize",
|
|
29
31
|
description: "Strip scattered cache_control markers from user messages and apply canonical placement",
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
const CONTINUE_TRAILER_TEXT = "Continue from where you left off.";
|
|
2
|
+
|
|
3
|
+
const REMINDER_WRAP_REGEX = /^<system-reminder>\n([\s\S]*?)\n<\/system-reminder>\s*$/;
|
|
4
|
+
const BOOKKEEPING_PATTERNS = [
|
|
5
|
+
/^Token usage: \d+\/\d+; \d+ remaining\s*$/,
|
|
6
|
+
/^Output tokens — turn: [^\n]+ · session: [^\n]+\s*$/,
|
|
7
|
+
/^USD budget: \$[\d.]+\/\$[\d.]+; \$[\d.]+ remaining\s*$/,
|
|
8
|
+
/^The task tools haven't been used recently\./,
|
|
9
|
+
/^The TodoWrite tool hasn't been used recently\./,
|
|
10
|
+
/^Remaining conversation turns: /,
|
|
11
|
+
/^Messages? until auto-compact: /,
|
|
12
|
+
];
|
|
13
|
+
|
|
14
|
+
function isContinueTrailerBlock(block) {
|
|
15
|
+
return (
|
|
16
|
+
!!block &&
|
|
17
|
+
typeof block === "object" &&
|
|
18
|
+
block.type === "text" &&
|
|
19
|
+
block.text === CONTINUE_TRAILER_TEXT
|
|
20
|
+
);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function isBookkeepingReminder(text) {
|
|
24
|
+
if (typeof text !== "string") return false;
|
|
25
|
+
const m = text.match(REMINDER_WRAP_REGEX);
|
|
26
|
+
if (!m) return false;
|
|
27
|
+
const inner = m[1];
|
|
28
|
+
for (const rx of BOOKKEEPING_PATTERNS) {
|
|
29
|
+
if (rx.test(inner)) return true;
|
|
30
|
+
}
|
|
31
|
+
return false;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function stripContentBlocks(messages) {
|
|
35
|
+
if (!Array.isArray(messages)) return { messages, stats: null };
|
|
36
|
+
|
|
37
|
+
let trailerCount = 0;
|
|
38
|
+
let reminderCount = 0;
|
|
39
|
+
|
|
40
|
+
const result = messages.map((msg) => {
|
|
41
|
+
if (msg.role !== "user" || !Array.isArray(msg.content)) return msg;
|
|
42
|
+
|
|
43
|
+
let msgTrailers = 0;
|
|
44
|
+
let msgReminders = 0;
|
|
45
|
+
|
|
46
|
+
const kept = msg.content.filter((block) => {
|
|
47
|
+
if (isContinueTrailerBlock(block)) {
|
|
48
|
+
msgTrailers++;
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
if (block.type === "text" && isBookkeepingReminder(block.text)) {
|
|
52
|
+
msgReminders++;
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
return true;
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
if (kept.length === 0 || kept.length === msg.content.length) return msg;
|
|
59
|
+
|
|
60
|
+
trailerCount += msgTrailers;
|
|
61
|
+
reminderCount += msgReminders;
|
|
62
|
+
return { ...msg, content: kept };
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
const total = trailerCount + reminderCount;
|
|
66
|
+
return {
|
|
67
|
+
messages: total > 0 ? result : messages,
|
|
68
|
+
stats: total > 0 ? { trailerCount, reminderCount } : null,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export { isContinueTrailerBlock, isBookkeepingReminder, stripContentBlocks };
|
|
73
|
+
|
|
74
|
+
export default {
|
|
75
|
+
name: "content-strip",
|
|
76
|
+
description: "Strip continue trailers and bookkeeping system-reminders from user messages",
|
|
77
|
+
enabled: false,
|
|
78
|
+
order: 350,
|
|
79
|
+
|
|
80
|
+
async onRequest(ctx) {
|
|
81
|
+
if (!ctx.body.messages) return;
|
|
82
|
+
|
|
83
|
+
const { messages, stats } = stripContentBlocks(ctx.body.messages);
|
|
84
|
+
if (stats) {
|
|
85
|
+
ctx.body.messages = messages;
|
|
86
|
+
ctx.meta.contentStripStats = stats;
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
};
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
// deferred-tools-restore — preserve cache prefix across MCP reconnect race.
|
|
2
|
+
//
|
|
3
|
+
// PROBLEM (mirrored from preload.mjs ~429-518):
|
|
4
|
+
// On `claude --continue`, if MCP servers haven't finished reconnecting before
|
|
5
|
+
// CC fires the first post-resume request, the
|
|
6
|
+
// <system-reminder>The following deferred tools are now available via ToolSearch…</system-reminder>
|
|
7
|
+
// block at msg[0] (or wherever the attachment lands post-compaction) shrinks
|
|
8
|
+
// dramatically. ~40 tools collapse to a handful of CC built-ins, and CC
|
|
9
|
+
// injects a trailing
|
|
10
|
+
// "The following deferred tools are no longer available (their MCP server
|
|
11
|
+
// disconnected). Do not search for them — ToolSearch will return no match:"
|
|
12
|
+
// notice. That block change at the root of the message array busts the cache
|
|
13
|
+
// at the very top — the entire ~940K prompt re-caches.
|
|
14
|
+
//
|
|
15
|
+
// FIX: Persist the clean (no-UNAVAILABLE-marker) form of the block. On a
|
|
16
|
+
// subsequent request where the block has shrunk and contains the UNAVAILABLE
|
|
17
|
+
// marker, substitute the persisted full bytes. Restore only if the snapshot
|
|
18
|
+
// is STRICTLY LONGER than the current block — never downgrade to a stale
|
|
19
|
+
// shorter snapshot.
|
|
20
|
+
//
|
|
21
|
+
// SNAPSHOT KEY (proxy-specific adaptation):
|
|
22
|
+
// Preload uses process.cwd() because each preload runs in the CC process,
|
|
23
|
+
// where cwd identifies the project. The proxy is a long-lived daemon; its
|
|
24
|
+
// cwd is shared across all CC sessions on the host. To key per-project, the
|
|
25
|
+
// proxy parses the cwd OUT of the system prompt content (CC injects
|
|
26
|
+
// " - Primary working directory: <path>" in the # Environment section) and
|
|
27
|
+
// keys on sha1("cwd:" + that path). Verified empirically against CC v2.1.117.
|
|
28
|
+
//
|
|
29
|
+
// FAIL-OPEN: If the cwd marker is absent (CC format drift, missing system
|
|
30
|
+
// prompt), the extension no-ops the request entirely. Result: status quo
|
|
31
|
+
// cache-bust, never silently restores the wrong block.
|
|
32
|
+
|
|
33
|
+
import {
|
|
34
|
+
mkdir as _mkdir,
|
|
35
|
+
readFile as _readFile,
|
|
36
|
+
writeFile as _writeFile,
|
|
37
|
+
rename as _rename,
|
|
38
|
+
} from "node:fs/promises";
|
|
39
|
+
import { join } from "node:path";
|
|
40
|
+
import { homedir } from "node:os";
|
|
41
|
+
import { createHash } from "node:crypto";
|
|
42
|
+
|
|
43
|
+
const SKIP = process.env.CACHE_FIX_SKIP_DEFERRED_TOOLS_RESTORE === "1";
|
|
44
|
+
const DEBUG = process.env.CACHE_FIX_DEBUG === "1";
|
|
45
|
+
|
|
46
|
+
const AVAILABLE_MARKER =
|
|
47
|
+
"The following deferred tools are now available via ToolSearch";
|
|
48
|
+
const UNAVAILABLE_MARKER =
|
|
49
|
+
"The following deferred tools are no longer available";
|
|
50
|
+
|
|
51
|
+
const DEFAULT_FS = {
|
|
52
|
+
mkdir: _mkdir,
|
|
53
|
+
readFile: _readFile,
|
|
54
|
+
writeFile: _writeFile,
|
|
55
|
+
rename: _rename,
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
function getSnapshotDir() {
|
|
59
|
+
return join(homedir(), ".claude", "cache-fix-state");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function debug(msg) {
|
|
63
|
+
if (DEBUG) process.stderr.write(`[deferred-tools-restore] ${msg}\n`);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Extract the working-directory path from CC's system prompt.
|
|
68
|
+
* Returns the parsed path string, or null if the marker is not found OR
|
|
69
|
+
* the prompt structure is ambiguous (multiple valid env sections).
|
|
70
|
+
*
|
|
71
|
+
* STRICT STRUCTURAL PARSER (line-based, not regex-window):
|
|
72
|
+
*
|
|
73
|
+
* Recognizes a valid # Environment section ONLY when ALL of these hold:
|
|
74
|
+
* 1. A line that is exactly `# Environment` (whitespace-trimmed)
|
|
75
|
+
* 2. The next non-blank line is exactly the CC intro line:
|
|
76
|
+
* `You have been invoked in the following environment:`
|
|
77
|
+
* 3. The marker appears in a bullet line (`- Primary working directory: ...`)
|
|
78
|
+
* within the bullet list immediately following the intro line — bounded
|
|
79
|
+
* by the first blank line or first non-bullet line.
|
|
80
|
+
*
|
|
81
|
+
* Only the conjunction of all three rejects the false positives Codex flagged:
|
|
82
|
+
* - bare `# Environment` mention in narrative/code (fails rule 2)
|
|
83
|
+
* - code-fenced `Primary working directory:` example without the env header
|
|
84
|
+
* (fails rule 1)
|
|
85
|
+
* - a fake marker line elsewhere in the same block but not in a bullet
|
|
86
|
+
* list immediately following the intro (fails rule 3)
|
|
87
|
+
*
|
|
88
|
+
* AMBIGUITY GUARD: if MULTIPLE distinct valid env sections produce different
|
|
89
|
+
* cwd values (across blocks or within one block), refuse to pick — return
|
|
90
|
+
* null. The fail-open path (extension no-ops the request) is strictly safer
|
|
91
|
+
* than restoring with the wrong snapshot key.
|
|
92
|
+
*
|
|
93
|
+
* Accepts:
|
|
94
|
+
* - array of content blocks (CC's normal shape): walks .text fields in order
|
|
95
|
+
* - a single string (rare; older clients): scans directly
|
|
96
|
+
* - anything else: returns null
|
|
97
|
+
*/
|
|
98
|
+
const ENV_HEADER_LINE = "# Environment";
|
|
99
|
+
const ENV_INTRO_LINE = "You have been invoked in the following environment:";
|
|
100
|
+
const CWD_BULLET_RE = /^-\s+Primary working directory:\s*(.+?)\s*$/;
|
|
101
|
+
|
|
102
|
+
function parseAllCwdsFromBlock(text) {
|
|
103
|
+
const found = [];
|
|
104
|
+
const lines = text.split("\n");
|
|
105
|
+
for (let i = 0; i < lines.length; i++) {
|
|
106
|
+
if (lines[i].trim() !== ENV_HEADER_LINE) continue;
|
|
107
|
+
// Skip blank lines after the header (CC emits exactly one intro line
|
|
108
|
+
// immediately following, but be tolerant of whitespace).
|
|
109
|
+
let j = i + 1;
|
|
110
|
+
while (j < lines.length && lines[j].trim() === "") j++;
|
|
111
|
+
if (j >= lines.length) continue;
|
|
112
|
+
if (lines[j].trim() !== ENV_INTRO_LINE) continue;
|
|
113
|
+
// Walk the bullet list following the intro line; first cwd bullet wins
|
|
114
|
+
// for this section. A blank line or non-bullet line ends the section.
|
|
115
|
+
for (let k = j + 1; k < lines.length; k++) {
|
|
116
|
+
const trimmed = lines[k].trimStart();
|
|
117
|
+
if (lines[k].trim() === "") break;
|
|
118
|
+
if (!trimmed.startsWith("-")) break;
|
|
119
|
+
const m = trimmed.match(CWD_BULLET_RE);
|
|
120
|
+
if (m && m[1]) {
|
|
121
|
+
found.push(m[1]);
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return found;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function extractCwdFromSystem(system) {
|
|
130
|
+
if (!system) return null;
|
|
131
|
+
const texts = [];
|
|
132
|
+
if (typeof system === "string") {
|
|
133
|
+
texts.push(system);
|
|
134
|
+
} else if (Array.isArray(system)) {
|
|
135
|
+
for (const block of system) {
|
|
136
|
+
if (block && typeof block === "object" && typeof block.text === "string") {
|
|
137
|
+
texts.push(block.text);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
} else {
|
|
141
|
+
return null;
|
|
142
|
+
}
|
|
143
|
+
const seen = new Set();
|
|
144
|
+
for (const t of texts) {
|
|
145
|
+
const matches = parseAllCwdsFromBlock(t);
|
|
146
|
+
for (const m of matches) {
|
|
147
|
+
seen.add(m);
|
|
148
|
+
if (seen.size > 1) return null; // ambiguous → no-op
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
if (seen.size === 1) return [...seen][0];
|
|
152
|
+
return null;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function deriveSnapshotKey(cwd) {
|
|
156
|
+
return createHash("sha1").update(`cwd:${cwd}`).digest("hex").slice(0, 16);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Locate the deferred-tools attachment block in body.messages.
|
|
161
|
+
* Only inspects user messages (skips assistant so the agent quoting the
|
|
162
|
+
* AVAILABLE marker verbatim doesn't trigger a false match).
|
|
163
|
+
* Returns { msgIdx, blockIdx, text } or null.
|
|
164
|
+
*/
|
|
165
|
+
function findDeferredToolsBlockInBody(body) {
|
|
166
|
+
if (!body || !Array.isArray(body.messages)) return null;
|
|
167
|
+
for (let m = 0; m < body.messages.length; m++) {
|
|
168
|
+
const msg = body.messages[m];
|
|
169
|
+
if (!msg || msg.role !== "user" || !Array.isArray(msg.content)) continue;
|
|
170
|
+
for (let i = 0; i < msg.content.length; i++) {
|
|
171
|
+
const b = msg.content[i];
|
|
172
|
+
if (
|
|
173
|
+
b &&
|
|
174
|
+
b.type === "text" &&
|
|
175
|
+
typeof b.text === "string" &&
|
|
176
|
+
b.text.includes(AVAILABLE_MARKER)
|
|
177
|
+
) {
|
|
178
|
+
return { msgIdx: m, blockIdx: i, text: b.text };
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return null;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Atomic write (same lesson as prefix-diff): unique tmp per invocation so
|
|
186
|
+
// concurrent calls don't collide on a shared .tmp path.
|
|
187
|
+
async function atomicWriteText(finalPath, data, fs) {
|
|
188
|
+
const tmpPath = `${finalPath}.${process.pid}.${Date.now()}.${Math.random()
|
|
189
|
+
.toString(36)
|
|
190
|
+
.slice(2, 10)}.tmp`;
|
|
191
|
+
await fs.writeFile(tmpPath, data);
|
|
192
|
+
await fs.rename(tmpPath, finalPath);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Persist a snapshot of the clean deferred-tools block.
|
|
197
|
+
*
|
|
198
|
+
* @param {string} text The full block text to persist.
|
|
199
|
+
* @param {object} options
|
|
200
|
+
* @param {string} options.dir Snapshot directory.
|
|
201
|
+
* @param {string} options.key Snapshot key (from deriveSnapshotKey).
|
|
202
|
+
* @param {object} [options.fs] fs/promises overrides for tests.
|
|
203
|
+
* @returns {Promise<{persisted: boolean, bytes: number, path: string}>}
|
|
204
|
+
*/
|
|
205
|
+
async function persistDeferredTools(text, options) {
|
|
206
|
+
const dir = options.dir;
|
|
207
|
+
const key = options.key;
|
|
208
|
+
const fs = { ...DEFAULT_FS, ...(options.fs || {}) };
|
|
209
|
+
const path = join(dir, `deferred-tools-${key}.txt`);
|
|
210
|
+
try {
|
|
211
|
+
await fs.mkdir(dir, { recursive: true });
|
|
212
|
+
await atomicWriteText(path, text, fs);
|
|
213
|
+
return { persisted: true, bytes: Buffer.byteLength(text, "utf-8"), path };
|
|
214
|
+
} catch (err) {
|
|
215
|
+
debug(`persist failed at ${path}: ${err?.message ?? err}`);
|
|
216
|
+
return { persisted: false, bytes: 0, path };
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Read and validate a snapshot. Returns the snapshot text on success, null
|
|
222
|
+
* otherwise. Validation:
|
|
223
|
+
* - file exists and is readable
|
|
224
|
+
* - byte length >= AVAILABLE_MARKER length (sanity floor)
|
|
225
|
+
* - content contains the AVAILABLE marker (defense in depth against a
|
|
226
|
+
* truncated-but-readable file passing only the length check)
|
|
227
|
+
*/
|
|
228
|
+
async function restoreDeferredTools(options) {
|
|
229
|
+
const dir = options.dir;
|
|
230
|
+
const key = options.key;
|
|
231
|
+
const fs = { ...DEFAULT_FS, ...(options.fs || {}) };
|
|
232
|
+
const path = join(dir, `deferred-tools-${key}.txt`);
|
|
233
|
+
let snapshot;
|
|
234
|
+
try {
|
|
235
|
+
snapshot = await fs.readFile(path, "utf-8");
|
|
236
|
+
} catch (err) {
|
|
237
|
+
if (err && err.code !== "ENOENT") {
|
|
238
|
+
debug(`snapshot read failed at ${path}: ${err?.message ?? err}`);
|
|
239
|
+
}
|
|
240
|
+
return null;
|
|
241
|
+
}
|
|
242
|
+
if (typeof snapshot !== "string") return null;
|
|
243
|
+
if (snapshot.length < AVAILABLE_MARKER.length) {
|
|
244
|
+
debug(`snapshot rejected (too short: ${snapshot.length} bytes) at ${path}`);
|
|
245
|
+
return null;
|
|
246
|
+
}
|
|
247
|
+
if (!snapshot.includes(AVAILABLE_MARKER)) {
|
|
248
|
+
debug(`snapshot rejected (missing AVAILABLE marker) at ${path}`);
|
|
249
|
+
return null;
|
|
250
|
+
}
|
|
251
|
+
// Defense in depth: persisted snapshots should be clean by construction
|
|
252
|
+
// (we only persist when !hasUnavail), but if a snapshot ever contains the
|
|
253
|
+
// UNAVAILABLE marker we refuse to restore it — restoring a "no longer
|
|
254
|
+
// available" block would be worse than no restore.
|
|
255
|
+
if (snapshot.includes(UNAVAILABLE_MARKER)) {
|
|
256
|
+
debug(`snapshot rejected (contains UNAVAILABLE marker, not a clean baseline) at ${path}`);
|
|
257
|
+
return null;
|
|
258
|
+
}
|
|
259
|
+
return snapshot;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
export {
|
|
263
|
+
extractCwdFromSystem,
|
|
264
|
+
deriveSnapshotKey,
|
|
265
|
+
findDeferredToolsBlockInBody,
|
|
266
|
+
persistDeferredTools,
|
|
267
|
+
restoreDeferredTools,
|
|
268
|
+
AVAILABLE_MARKER,
|
|
269
|
+
UNAVAILABLE_MARKER,
|
|
270
|
+
};
|
|
271
|
+
|
|
272
|
+
export default {
|
|
273
|
+
name: "deferred-tools-restore",
|
|
274
|
+
description:
|
|
275
|
+
"Persist and restore the deferred-tools attachment block across sessions to prevent MCP-reconnect-race cache busts at resume time",
|
|
276
|
+
enabled: true,
|
|
277
|
+
order: 350,
|
|
278
|
+
|
|
279
|
+
async onRequest(ctx) {
|
|
280
|
+
if (SKIP) return;
|
|
281
|
+
if (!ctx || !ctx.body) return;
|
|
282
|
+
const body = ctx.body;
|
|
283
|
+
|
|
284
|
+
// 1. Parse cwd from system. No marker → no-op (honest degradation).
|
|
285
|
+
const cwd = extractCwdFromSystem(body.system);
|
|
286
|
+
if (!cwd) {
|
|
287
|
+
ctx.meta = ctx.meta || {};
|
|
288
|
+
ctx.meta.deferredToolsRestoreStats = { action: "skipped", reason: "no-cwd" };
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
const key = deriveSnapshotKey(cwd);
|
|
292
|
+
|
|
293
|
+
// 2-4. Locate the deferred-tools block.
|
|
294
|
+
const found = findDeferredToolsBlockInBody(body);
|
|
295
|
+
if (!found) {
|
|
296
|
+
ctx.meta = ctx.meta || {};
|
|
297
|
+
ctx.meta.deferredToolsRestoreStats = { action: "skipped", reason: "no-block", key };
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const dir = getSnapshotDir();
|
|
302
|
+
const hasUnavail = found.text.includes(UNAVAILABLE_MARKER);
|
|
303
|
+
|
|
304
|
+
if (!hasUnavail) {
|
|
305
|
+
// 5. Clean baseline → persist.
|
|
306
|
+
const result = await persistDeferredTools(found.text, { dir, key });
|
|
307
|
+
ctx.meta = ctx.meta || {};
|
|
308
|
+
ctx.meta.deferredToolsRestoreStats = {
|
|
309
|
+
action: result.persisted ? "persisted" : "skipped",
|
|
310
|
+
bytes: result.bytes,
|
|
311
|
+
key,
|
|
312
|
+
};
|
|
313
|
+
if (result.persisted) {
|
|
314
|
+
process.stderr.write(
|
|
315
|
+
`[deferred-tools-restore] persisted ${result.bytes} bytes (key=${key})\n`,
|
|
316
|
+
);
|
|
317
|
+
}
|
|
318
|
+
return;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// 6. Block has UNAVAILABLE marker → attempt restore.
|
|
322
|
+
const snapshot = await restoreDeferredTools({ dir, key });
|
|
323
|
+
if (!snapshot) {
|
|
324
|
+
ctx.meta = ctx.meta || {};
|
|
325
|
+
ctx.meta.deferredToolsRestoreStats = { action: "skipped", reason: "no-snapshot", key };
|
|
326
|
+
return;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Strictly-longer guard. Equal-length snapshots are not restored.
|
|
330
|
+
if (snapshot.length <= found.text.length) {
|
|
331
|
+
ctx.meta = ctx.meta || {};
|
|
332
|
+
ctx.meta.deferredToolsRestoreStats = {
|
|
333
|
+
action: "skipped",
|
|
334
|
+
reason: "snapshot-not-longer",
|
|
335
|
+
key,
|
|
336
|
+
snapshotBytes: snapshot.length,
|
|
337
|
+
currentBytes: found.text.length,
|
|
338
|
+
};
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// Substitute. Build new content array and new message; do not mutate
|
|
343
|
+
// the original arrays / objects.
|
|
344
|
+
const targetMsg = body.messages[found.msgIdx];
|
|
345
|
+
const newContent = targetMsg.content.slice();
|
|
346
|
+
newContent[found.blockIdx] = { ...newContent[found.blockIdx], text: snapshot };
|
|
347
|
+
body.messages[found.msgIdx] = { ...targetMsg, content: newContent };
|
|
348
|
+
|
|
349
|
+
ctx.meta = ctx.meta || {};
|
|
350
|
+
ctx.meta.deferredToolsRestoreStats = {
|
|
351
|
+
action: "restored",
|
|
352
|
+
bytes: snapshot.length,
|
|
353
|
+
previousBytes: found.text.length,
|
|
354
|
+
key,
|
|
355
|
+
};
|
|
356
|
+
process.stderr.write(
|
|
357
|
+
`[deferred-tools-restore] restored ${found.text.length}→${snapshot.length} bytes ` +
|
|
358
|
+
`at msg[${found.msgIdx}].content[${found.blockIdx}] (key=${key})\n`,
|
|
359
|
+
);
|
|
360
|
+
},
|
|
361
|
+
};
|
|
@@ -88,6 +88,8 @@ function stabilizeFingerprint(system, messages) {
|
|
|
88
88
|
return { attrIdx, newText, oldFingerprint, stableFingerprint };
|
|
89
89
|
}
|
|
90
90
|
|
|
91
|
+
export { computeFingerprint, extractRealUserMessageText, extractFirstMessageText, stabilizeFingerprint };
|
|
92
|
+
|
|
91
93
|
export default {
|
|
92
94
|
name: "fingerprint-strip",
|
|
93
95
|
description: "Stabilize cc_version fingerprint in system prompt for cache prefix consistency",
|
|
@@ -86,6 +86,8 @@ function fixBlockText(blockType, text) {
|
|
|
86
86
|
return pinBlockContent(blockType, fixed);
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
+
export { isSystemReminder, isHooksBlock, isSkillsBlock, isDeferredToolsBlock, isMcpBlock, isRelocatableBlock, isClearArtifact, sortSkillsBlock, sortDeferredToolsBlock, stripSessionKnowledge, pinBlockContent, getBlockType, fixBlockText };
|
|
90
|
+
|
|
89
91
|
export default {
|
|
90
92
|
name: "fresh-session-sort",
|
|
91
93
|
description: "Relocate scattered blocks to messages[0] in deterministic fresh-session order",
|
|
@@ -76,6 +76,8 @@ function isBookkeepingReminder(text) {
|
|
|
76
76
|
return false;
|
|
77
77
|
}
|
|
78
78
|
|
|
79
|
+
export { pinBlockContent, stripSessionKnowledge, normalizeSessionStartText, isContinueTrailerBlock, isBookkeepingReminder };
|
|
80
|
+
|
|
79
81
|
export default {
|
|
80
82
|
name: "identity-normalization",
|
|
81
83
|
description: "Normalize volatile identity fields (SessionStart, Continue trailers, bookkeeping) for cache stability",
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
const KEEP_LAST = parseInt(process.env.CACHE_FIX_IMAGE_KEEP_LAST || "0", 10);
|
|
2
|
+
const PLACEHOLDER = "[image stripped from history — file may still be on disk]";
|
|
3
|
+
|
|
4
|
+
function stripOldToolResultImages(messages, keepLast) {
|
|
5
|
+
if (!keepLast || keepLast <= 0 || !Array.isArray(messages)) {
|
|
6
|
+
return { messages, stats: null };
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
const userMsgIndices = [];
|
|
10
|
+
for (let i = 0; i < messages.length; i++) {
|
|
11
|
+
if (messages[i].role === "user") userMsgIndices.push(i);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
if (userMsgIndices.length <= keepLast) {
|
|
15
|
+
return { messages, stats: null };
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const cutoffIdx = userMsgIndices[userMsgIndices.length - keepLast];
|
|
19
|
+
|
|
20
|
+
let strippedCount = 0;
|
|
21
|
+
let strippedBytes = 0;
|
|
22
|
+
|
|
23
|
+
const result = messages.map((msg, msgIdx) => {
|
|
24
|
+
if (msg.role !== "user" || msgIdx >= cutoffIdx || !Array.isArray(msg.content)) {
|
|
25
|
+
return msg;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
let msgModified = false;
|
|
29
|
+
const newContent = msg.content.map((block) => {
|
|
30
|
+
if (block.type === "tool_result" && Array.isArray(block.content)) {
|
|
31
|
+
let toolModified = false;
|
|
32
|
+
const newToolContent = block.content.map((item) => {
|
|
33
|
+
if (item.type === "image") {
|
|
34
|
+
strippedCount++;
|
|
35
|
+
if (item.source?.data) {
|
|
36
|
+
strippedBytes += item.source.data.length;
|
|
37
|
+
}
|
|
38
|
+
toolModified = true;
|
|
39
|
+
return { type: "text", text: PLACEHOLDER };
|
|
40
|
+
}
|
|
41
|
+
return item;
|
|
42
|
+
});
|
|
43
|
+
if (toolModified) {
|
|
44
|
+
msgModified = true;
|
|
45
|
+
return { ...block, content: newToolContent };
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return block;
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
return msgModified ? { ...msg, content: newContent } : msg;
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
const stats = strippedCount > 0
|
|
55
|
+
? { strippedCount, strippedBytes, estimatedTokens: Math.ceil(strippedBytes * 0.125) }
|
|
56
|
+
: null;
|
|
57
|
+
|
|
58
|
+
return { messages: strippedCount > 0 ? result : messages, stats };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export { stripOldToolResultImages, PLACEHOLDER };
|
|
62
|
+
|
|
63
|
+
export default {
|
|
64
|
+
name: "image-strip",
|
|
65
|
+
description: "Strip base64 images from old tool results to reduce token waste",
|
|
66
|
+
enabled: false,
|
|
67
|
+
order: 150,
|
|
68
|
+
|
|
69
|
+
async onRequest(ctx) {
|
|
70
|
+
const keepLast = parseInt(ctx.meta.imageKeepLast ?? KEEP_LAST, 10);
|
|
71
|
+
if (!keepLast || keepLast <= 0) return;
|
|
72
|
+
if (!ctx.body.messages) return;
|
|
73
|
+
|
|
74
|
+
const { messages, stats } = stripOldToolResultImages(ctx.body.messages, keepLast);
|
|
75
|
+
if (stats) {
|
|
76
|
+
ctx.body.messages = messages;
|
|
77
|
+
ctx.meta.imageStripStats = stats;
|
|
78
|
+
process.stderr.write(
|
|
79
|
+
`[image-strip] stripped ${stats.strippedCount} images (~${stats.estimatedTokens} tokens saved)\n`
|
|
80
|
+
);
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
};
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
const SECTION_HEADER = "# Output efficiency";
|
|
2
|
+
const REPLACEMENT_RAW = process.env.CACHE_FIX_OUTPUT_EFFICIENCY_REPLACEMENT || "";
|
|
3
|
+
|
|
4
|
+
function normalizeReplacement(text) {
|
|
5
|
+
const trimmed = typeof text === "string" ? text.trim() : "";
|
|
6
|
+
if (!trimmed) return "";
|
|
7
|
+
return trimmed.startsWith(SECTION_HEADER) ? trimmed : `${SECTION_HEADER}\n\n${trimmed}`;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function replaceSection(text, replacement) {
|
|
11
|
+
const start = text.indexOf(SECTION_HEADER);
|
|
12
|
+
if (start === -1) return null;
|
|
13
|
+
|
|
14
|
+
const afterHeader = start + SECTION_HEADER.length;
|
|
15
|
+
const remainder = text.slice(afterHeader);
|
|
16
|
+
const nextHeadingMatch = remainder.match(/\n# [^\n]+/);
|
|
17
|
+
|
|
18
|
+
if (!nextHeadingMatch || nextHeadingMatch.index == null) {
|
|
19
|
+
return text.slice(0, start) + replacement;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const nextHeadingStart = afterHeader + nextHeadingMatch.index + 1;
|
|
23
|
+
return text.slice(0, start) + replacement + "\n\n" + text.slice(nextHeadingStart);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function rewriteOutputEfficiency(system, replacement) {
|
|
27
|
+
if (!Array.isArray(system) || !replacement) return null;
|
|
28
|
+
|
|
29
|
+
let changed = false;
|
|
30
|
+
const rewritten = system.map((block) => {
|
|
31
|
+
if (block?.type !== "text" || typeof block.text !== "string" || !block.text.includes(SECTION_HEADER)) {
|
|
32
|
+
return block;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const nextText = replaceSection(block.text, replacement);
|
|
36
|
+
if (!nextText || nextText === block.text) return block;
|
|
37
|
+
|
|
38
|
+
changed = true;
|
|
39
|
+
return { ...block, text: nextText };
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
return changed ? rewritten : null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export { normalizeReplacement, replaceSection, rewriteOutputEfficiency, SECTION_HEADER };
|
|
46
|
+
|
|
47
|
+
export default {
|
|
48
|
+
name: "output-efficiency-rewrite",
|
|
49
|
+
description: "Replace Claude Code's # Output efficiency system prompt section with custom text",
|
|
50
|
+
enabled: false,
|
|
51
|
+
order: 90,
|
|
52
|
+
|
|
53
|
+
async onRequest(ctx) {
|
|
54
|
+
const raw = ctx.meta.outputEfficiencyReplacement ?? REPLACEMENT_RAW;
|
|
55
|
+
const replacement = normalizeReplacement(raw);
|
|
56
|
+
if (!replacement) return;
|
|
57
|
+
if (!ctx.body.system) return;
|
|
58
|
+
|
|
59
|
+
const result = rewriteOutputEfficiency(ctx.body.system, replacement);
|
|
60
|
+
if (result) {
|
|
61
|
+
ctx.body.system = result;
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
};
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
// prefix-diff — diagnostic extension for hunting cache-bust sources.
|
|
2
|
+
//
|
|
3
|
+
// On every request, snapshots a small projection of the prefix (system
|
|
4
|
+
// prompt + tools + first 5 messages) and writes it to
|
|
5
|
+
// `~/.claude/cache-fix-snapshots/<key>-last.json`. If a prior snapshot
|
|
6
|
+
// exists and differs, also writes a `<key>-diff.json` and emits a
|
|
7
|
+
// one-line stderr summary.
|
|
8
|
+
//
|
|
9
|
+
// No request mutation. The diagnostic is fail-open: any I/O error is
|
|
10
|
+
// swallowed silently in production. Set CACHE_FIX_DEBUG=1 to also log
|
|
11
|
+
// swallowed errors so silent failures stay observable.
|
|
12
|
+
//
|
|
13
|
+
// Adaptation from preload's `snapshotPrefix(payload)` (preload.mjs ~1656):
|
|
14
|
+
// preload fired the diff once per process restart. The proxy is long-lived
|
|
15
|
+
// and supports hot-reload, so we drop the "first call" gate and run the
|
|
16
|
+
// diff per call. Trade-off: more disk writes, but each is tiny and the
|
|
17
|
+
// diagnostic value is higher (drift visible across every turn, not just
|
|
18
|
+
// at startup).
|
|
19
|
+
|
|
20
|
+
import {
|
|
21
|
+
mkdir as _mkdir,
|
|
22
|
+
readFile as _readFile,
|
|
23
|
+
writeFile as _writeFile,
|
|
24
|
+
rename as _rename,
|
|
25
|
+
} from "node:fs/promises";
|
|
26
|
+
import { join } from "node:path";
|
|
27
|
+
import { homedir } from "node:os";
|
|
28
|
+
import { createHash } from "node:crypto";
|
|
29
|
+
|
|
30
|
+
const ENABLED = process.env.CACHE_FIX_PREFIXDIFF === "1";
|
|
31
|
+
const DEBUG = process.env.CACHE_FIX_DEBUG === "1";
|
|
32
|
+
|
|
33
|
+
const DEFAULT_FS = {
|
|
34
|
+
mkdir: _mkdir,
|
|
35
|
+
readFile: _readFile,
|
|
36
|
+
writeFile: _writeFile,
|
|
37
|
+
rename: _rename,
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
function getSnapshotDir() {
|
|
41
|
+
return join(homedir(), ".claude", "cache-fix-snapshots");
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function debug(msg) {
|
|
45
|
+
if (DEBUG) process.stderr.write(`[prefix-diff] ${msg}\n`);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function computeSessionKey(system) {
|
|
49
|
+
return createHash("sha256")
|
|
50
|
+
.update(JSON.stringify(system).slice(0, 2000))
|
|
51
|
+
.digest("hex")
|
|
52
|
+
.slice(0, 12);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function computeToolsHash(tools) {
|
|
56
|
+
if (!Array.isArray(tools) || tools.length === 0) return "none";
|
|
57
|
+
// Match preload behavior: hash unsorted tool names so order changes
|
|
58
|
+
// surface as hash mismatches (a real cache-bust signal).
|
|
59
|
+
return createHash("sha256")
|
|
60
|
+
.update(JSON.stringify(tools.map((t) => t?.name ?? "")))
|
|
61
|
+
.digest("hex")
|
|
62
|
+
.slice(0, 16);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function computeSystemHash(system) {
|
|
66
|
+
if (!system) return "none";
|
|
67
|
+
return createHash("sha256")
|
|
68
|
+
.update(JSON.stringify(system))
|
|
69
|
+
.digest("hex")
|
|
70
|
+
.slice(0, 16);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Project the first 5 user/assistant messages: strip cache_control,
|
|
74
|
+
// truncate text >500 chars with `...[N chars]` marker. Pure: returns
|
|
75
|
+
// new objects, never mutates input.
|
|
76
|
+
function truncatePrefixMessages(messages) {
|
|
77
|
+
if (!Array.isArray(messages)) return [];
|
|
78
|
+
return messages.slice(0, 5).map((msg) => {
|
|
79
|
+
if (!msg || !Array.isArray(msg.content)) {
|
|
80
|
+
return { role: msg?.role, content: msg?.content };
|
|
81
|
+
}
|
|
82
|
+
const cleanedContent = msg.content.map((block) => {
|
|
83
|
+
if (!block || typeof block !== "object") return block;
|
|
84
|
+
const { cache_control, ...rest } = block;
|
|
85
|
+
if (typeof rest.text === "string" && rest.text.length > 500) {
|
|
86
|
+
return {
|
|
87
|
+
...rest,
|
|
88
|
+
text: rest.text.slice(0, 500) + `...[${rest.text.length} chars]`,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
return rest;
|
|
92
|
+
});
|
|
93
|
+
return { role: msg.role, content: cleanedContent };
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function buildSnapshot(payload) {
|
|
98
|
+
if (!payload || !payload.system) return null;
|
|
99
|
+
return {
|
|
100
|
+
timestamp: new Date().toISOString(),
|
|
101
|
+
messageCount: Array.isArray(payload.messages) ? payload.messages.length : 0,
|
|
102
|
+
toolsHash: computeToolsHash(payload.tools),
|
|
103
|
+
systemHash: computeSystemHash(payload.system),
|
|
104
|
+
prefixMessages: truncatePrefixMessages(payload.messages),
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function computeDiff(prev, current) {
|
|
109
|
+
const diff = {
|
|
110
|
+
timestamp: current.timestamp,
|
|
111
|
+
prevTimestamp: prev.timestamp,
|
|
112
|
+
toolsMatch: prev.toolsHash === current.toolsHash,
|
|
113
|
+
systemMatch: prev.systemHash === current.systemHash,
|
|
114
|
+
messageCountPrev: prev.messageCount,
|
|
115
|
+
messageCountNow: current.messageCount,
|
|
116
|
+
prefixDiffs: [],
|
|
117
|
+
};
|
|
118
|
+
const prevMsgs = Array.isArray(prev.prefixMessages) ? prev.prefixMessages : [];
|
|
119
|
+
const nowMsgs = Array.isArray(current.prefixMessages) ? current.prefixMessages : [];
|
|
120
|
+
const maxIdx = Math.max(prevMsgs.length, nowMsgs.length);
|
|
121
|
+
for (let i = 0; i < maxIdx; i++) {
|
|
122
|
+
const prevSer = JSON.stringify(prevMsgs[i] ?? null);
|
|
123
|
+
const nowSer = JSON.stringify(nowMsgs[i] ?? null);
|
|
124
|
+
if (prevSer !== nowSer) {
|
|
125
|
+
diff.prefixDiffs.push({
|
|
126
|
+
index: i,
|
|
127
|
+
prev: prevMsgs[i] ?? null,
|
|
128
|
+
now: nowMsgs[i] ?? null,
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return diff;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function diffHasChanges(diff) {
|
|
136
|
+
return (
|
|
137
|
+
diff.prefixDiffs.length > 0 ||
|
|
138
|
+
!diff.toolsMatch ||
|
|
139
|
+
!diff.systemMatch ||
|
|
140
|
+
diff.messageCountPrev !== diff.messageCountNow
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Atomic write: stage to a unique-per-invocation .tmp, then rename to
|
|
145
|
+
// final path. The unique suffix is essential under concurrency — two
|
|
146
|
+
// parallel callers writing to the same finalPath would otherwise share
|
|
147
|
+
// a single .tmp and corrupt each other's content.
|
|
148
|
+
//
|
|
149
|
+
// On rename failure the prior final-path file (if any) remains intact.
|
|
150
|
+
// The orphan .tmp persists on disk — because each invocation uses a
|
|
151
|
+
// unique temp name, later calls do NOT implicitly overwrite it. This is
|
|
152
|
+
// a small leak (accepted: failures are rare, files are tiny) rather than
|
|
153
|
+
// a correctness issue. A follow-up could add best-effort cleanup.
|
|
154
|
+
async function atomicWriteJson(finalPath, obj, fs) {
|
|
155
|
+
const tmpPath = `${finalPath}.${process.pid}.${Date.now()}.${Math.random()
|
|
156
|
+
.toString(36)
|
|
157
|
+
.slice(2, 10)}.tmp`;
|
|
158
|
+
await fs.writeFile(tmpPath, JSON.stringify(obj, null, 2));
|
|
159
|
+
await fs.rename(tmpPath, finalPath);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Snapshot the prefix of `payload` and diff against the prior snapshot.
|
|
164
|
+
*
|
|
165
|
+
* Pure-ish: never throws, never mutates `payload`. All I/O is gated by
|
|
166
|
+
* try/catch; failures are debug-logged when CACHE_FIX_DEBUG=1.
|
|
167
|
+
*
|
|
168
|
+
* @param {object} payload The request body (system, tools, messages).
|
|
169
|
+
* @param {object} options
|
|
170
|
+
* @param {string} [options.dir] Snapshot directory. Defaults to ~/.claude/cache-fix-snapshots.
|
|
171
|
+
* @param {object} [options.fs] fs/promises overrides for tests:
|
|
172
|
+
* { mkdir, readFile, writeFile, rename }.
|
|
173
|
+
* Any subset replaces the corresponding default.
|
|
174
|
+
* @returns {Promise<{ key, wroteSnapshot, wroteDiff } | null>} Result for tests; null if no system.
|
|
175
|
+
*/
|
|
176
|
+
async function snapshotPrefix(payload, options = {}) {
|
|
177
|
+
const current = buildSnapshot(payload);
|
|
178
|
+
if (!current) return null;
|
|
179
|
+
|
|
180
|
+
const dir = options.dir || getSnapshotDir();
|
|
181
|
+
const fs = { ...DEFAULT_FS, ...(options.fs || {}) };
|
|
182
|
+
|
|
183
|
+
const sessionKey = computeSessionKey(payload.system);
|
|
184
|
+
const lastPath = join(dir, `${sessionKey}-last.json`);
|
|
185
|
+
const diffPath = join(dir, `${sessionKey}-diff.json`);
|
|
186
|
+
|
|
187
|
+
// Ensure directory exists. mkdir failure aborts — without dir, nothing
|
|
188
|
+
// else can succeed.
|
|
189
|
+
try {
|
|
190
|
+
await fs.mkdir(dir, { recursive: true });
|
|
191
|
+
} catch (err) {
|
|
192
|
+
debug(`mkdir failed for ${dir}: ${err?.message ?? err}`);
|
|
193
|
+
return { key: sessionKey, wroteSnapshot: false, wroteDiff: false };
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Read prior snapshot if it exists. Missing file is normal; corrupt
|
|
197
|
+
// file is treated as no prior (skip diff, proceed to overwrite).
|
|
198
|
+
let prev = null;
|
|
199
|
+
try {
|
|
200
|
+
const txt = await fs.readFile(lastPath, "utf-8");
|
|
201
|
+
prev = JSON.parse(txt);
|
|
202
|
+
} catch (err) {
|
|
203
|
+
if (err && err.code !== "ENOENT") {
|
|
204
|
+
debug(`prior snapshot unreadable at ${lastPath}: ${err?.message ?? err}`);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Compute and write diff if anything changed.
|
|
209
|
+
let wroteDiff = false;
|
|
210
|
+
if (prev) {
|
|
211
|
+
const diff = computeDiff(prev, current);
|
|
212
|
+
if (diffHasChanges(diff)) {
|
|
213
|
+
try {
|
|
214
|
+
await atomicWriteJson(diffPath, diff, fs);
|
|
215
|
+
wroteDiff = true;
|
|
216
|
+
// Always log the summary line when a diff fires (not just under
|
|
217
|
+
// CACHE_FIX_DEBUG) — this is the diagnostic's whole purpose.
|
|
218
|
+
process.stderr.write(
|
|
219
|
+
`[prefix-diff] ${sessionKey}: ${diff.prefixDiffs.length} differences, ` +
|
|
220
|
+
`tools=${diff.toolsMatch ? "match" : "DIFFER"}, ` +
|
|
221
|
+
`system=${diff.systemMatch ? "match" : "DIFFER"}, ` +
|
|
222
|
+
`messages=${diff.messageCountPrev}→${diff.messageCountNow}\n`,
|
|
223
|
+
);
|
|
224
|
+
} catch (err) {
|
|
225
|
+
debug(`diff write failed at ${diffPath}: ${err?.message ?? err}`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Always write the new snapshot atomically so the next call has a
|
|
231
|
+
// fresh baseline. On failure, prior snapshot is intact.
|
|
232
|
+
let wroteSnapshot = false;
|
|
233
|
+
try {
|
|
234
|
+
await atomicWriteJson(lastPath, current, fs);
|
|
235
|
+
wroteSnapshot = true;
|
|
236
|
+
} catch (err) {
|
|
237
|
+
debug(`snapshot write failed at ${lastPath}: ${err?.message ?? err}`);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return { key: sessionKey, wroteSnapshot, wroteDiff };
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// The named exports below are internal test seams, not part of the
|
|
244
|
+
// proxy extension contract. Pipeline loading consumes only `default`.
|
|
245
|
+
// They're exposed so tests can call the helpers directly with their own
|
|
246
|
+
// options (tmpdir, failing fs mocks) instead of mutating process env or
|
|
247
|
+
// monkey-patching node:fs/promises at module scope.
|
|
248
|
+
export {
|
|
249
|
+
snapshotPrefix,
|
|
250
|
+
buildSnapshot,
|
|
251
|
+
computeDiff,
|
|
252
|
+
computeSessionKey,
|
|
253
|
+
truncatePrefixMessages,
|
|
254
|
+
diffHasChanges,
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
export default {
|
|
258
|
+
name: "prefix-diff",
|
|
259
|
+
description:
|
|
260
|
+
"Snapshot prefix (first 5 msgs + system + tools) and diff against previous run for cache-bust hunting",
|
|
261
|
+
// Always loaded; gated at runtime by CACHE_FIX_PREFIXDIFF=1 inside onRequest.
|
|
262
|
+
// This matches the acceptance criteria (env var alone activates) — the
|
|
263
|
+
// extension is cheap to load (one no-op check per request when disabled).
|
|
264
|
+
enabled: true,
|
|
265
|
+
order: 680,
|
|
266
|
+
|
|
267
|
+
async onRequest(ctx) {
|
|
268
|
+
if (!ENABLED) return;
|
|
269
|
+
if (!ctx || !ctx.body) return;
|
|
270
|
+
// snapshotPrefix never throws; double-belt try/catch is defense in depth.
|
|
271
|
+
try {
|
|
272
|
+
await snapshotPrefix(ctx.body);
|
|
273
|
+
} catch (err) {
|
|
274
|
+
debug(`onRequest unexpected: ${err?.message ?? err}`);
|
|
275
|
+
}
|
|
276
|
+
},
|
|
277
|
+
};
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
const TRAILING_SMOOSH = /\n\n(<system-reminder>\n(?:(?!<\/system-reminder>)[\s\S])*?\n<\/system-reminder>)\s*$/;
|
|
2
|
+
|
|
3
|
+
function splitSmooshedReminders(messages) {
|
|
4
|
+
if (!Array.isArray(messages)) return { messages, stats: null };
|
|
5
|
+
|
|
6
|
+
let totalPeeled = 0;
|
|
7
|
+
|
|
8
|
+
const result = messages.map((msg) => {
|
|
9
|
+
if (msg.role !== "user" || !Array.isArray(msg.content)) return msg;
|
|
10
|
+
|
|
11
|
+
const out = [];
|
|
12
|
+
const peeledReminders = [];
|
|
13
|
+
let mutated = false;
|
|
14
|
+
|
|
15
|
+
for (const block of msg.content) {
|
|
16
|
+
if (block?.type === "tool_result" && typeof block.content === "string") {
|
|
17
|
+
const reminders = [];
|
|
18
|
+
let s = block.content;
|
|
19
|
+
while (true) {
|
|
20
|
+
const m = s.match(TRAILING_SMOOSH);
|
|
21
|
+
if (!m) break;
|
|
22
|
+
reminders.unshift(m[1]);
|
|
23
|
+
s = s.slice(0, m.index);
|
|
24
|
+
}
|
|
25
|
+
if (reminders.length > 0) {
|
|
26
|
+
out.push({ ...block, content: s });
|
|
27
|
+
for (const r of reminders) peeledReminders.push({ type: "text", text: r });
|
|
28
|
+
totalPeeled += reminders.length;
|
|
29
|
+
mutated = true;
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
out.push(block);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (mutated) {
|
|
37
|
+
return { ...msg, content: [...out, ...peeledReminders] };
|
|
38
|
+
}
|
|
39
|
+
return msg;
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
messages: totalPeeled > 0 ? result : messages,
|
|
44
|
+
stats: totalPeeled > 0 ? { peeled: totalPeeled } : null,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export { splitSmooshedReminders, TRAILING_SMOOSH };
|
|
49
|
+
|
|
50
|
+
export default {
|
|
51
|
+
name: "smoosh-split",
|
|
52
|
+
description: "Peel smooshed system-reminders from tool_result content into standalone blocks",
|
|
53
|
+
enabled: false,
|
|
54
|
+
order: 320,
|
|
55
|
+
|
|
56
|
+
async onRequest(ctx) {
|
|
57
|
+
if (!ctx.body.messages) return;
|
|
58
|
+
|
|
59
|
+
const { messages, stats } = splitSmooshedReminders(ctx.body.messages);
|
|
60
|
+
if (stats) {
|
|
61
|
+
ctx.body.messages = messages;
|
|
62
|
+
ctx.meta.smooshSplitStats = stats;
|
|
63
|
+
process.stderr.write(
|
|
64
|
+
`[smoosh-split] peeled ${stats.peeled} reminder(s) from tool_result.content\n`
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
};
|
|
@@ -28,6 +28,8 @@ function isDeferredToolsBlock(text) {
|
|
|
28
28
|
return typeof text === "string" && text.includes("deferred tools are now available");
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
+
export { sortSkillsBlock, sortDeferredToolsBlock, isSkillsBlock, isDeferredToolsBlock };
|
|
32
|
+
|
|
31
33
|
export default {
|
|
32
34
|
name: "sort-stabilization",
|
|
33
35
|
description: "Deterministic ordering of skills, deferred tools, and tool definitions",
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
function normalizeToolUseInputs(body) {
|
|
2
|
+
if (!body || typeof body !== "object") return 0;
|
|
3
|
+
if (!Array.isArray(body.messages) || !Array.isArray(body.tools)) return 0;
|
|
4
|
+
|
|
5
|
+
const toolSchemas = Object.create(null);
|
|
6
|
+
for (const tool of body.tools) {
|
|
7
|
+
if (!tool || typeof tool !== "object") continue;
|
|
8
|
+
const name = tool.name;
|
|
9
|
+
if (typeof name !== "string") continue;
|
|
10
|
+
const props = tool.input_schema?.properties;
|
|
11
|
+
if (!props || typeof props !== "object") continue;
|
|
12
|
+
toolSchemas[name] = Object.keys(props);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
let modified = 0;
|
|
16
|
+
for (const msg of body.messages) {
|
|
17
|
+
if (!msg || msg.role !== "assistant") continue;
|
|
18
|
+
if (!Array.isArray(msg.content)) continue;
|
|
19
|
+
for (let i = 0; i < msg.content.length; i++) {
|
|
20
|
+
const block = msg.content[i];
|
|
21
|
+
if (!block || block.type !== "tool_use") continue;
|
|
22
|
+
if (!block.input || typeof block.input !== "object" || Array.isArray(block.input)) continue;
|
|
23
|
+
const schemaKeys = toolSchemas[block.name];
|
|
24
|
+
if (!schemaKeys) continue;
|
|
25
|
+
|
|
26
|
+
const currentKeys = Object.keys(block.input);
|
|
27
|
+
const schemaKeySet = new Set(schemaKeys);
|
|
28
|
+
const hasExtras = currentKeys.some((k) => !schemaKeySet.has(k));
|
|
29
|
+
|
|
30
|
+
const presentSchemaKeys = schemaKeys.filter((k) =>
|
|
31
|
+
Object.prototype.hasOwnProperty.call(block.input, k)
|
|
32
|
+
);
|
|
33
|
+
const currentInSchema = currentKeys.filter((k) => schemaKeySet.has(k));
|
|
34
|
+
|
|
35
|
+
let orderDiffers = presentSchemaKeys.length !== currentInSchema.length;
|
|
36
|
+
if (!orderDiffers) {
|
|
37
|
+
for (let j = 0; j < presentSchemaKeys.length; j++) {
|
|
38
|
+
if (presentSchemaKeys[j] !== currentInSchema[j]) {
|
|
39
|
+
orderDiffers = true;
|
|
40
|
+
break;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (!hasExtras && !orderDiffers) continue;
|
|
46
|
+
|
|
47
|
+
const newInput = {};
|
|
48
|
+
for (const k of presentSchemaKeys) {
|
|
49
|
+
newInput[k] = block.input[k];
|
|
50
|
+
}
|
|
51
|
+
msg.content[i] = { ...block, input: newInput };
|
|
52
|
+
modified++;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return modified;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export { normalizeToolUseInputs };
|
|
59
|
+
|
|
60
|
+
export default {
|
|
61
|
+
name: "tool-input-normalize",
|
|
62
|
+
description: "Normalize tool_use input field ordering to match schema for cache stability",
|
|
63
|
+
enabled: false,
|
|
64
|
+
order: 280,
|
|
65
|
+
|
|
66
|
+
async onRequest(ctx) {
|
|
67
|
+
if (!ctx.body.messages || !ctx.body.tools) return;
|
|
68
|
+
const count = normalizeToolUseInputs(ctx.body);
|
|
69
|
+
if (count > 0) {
|
|
70
|
+
ctx.meta.toolInputNormalizeCount = count;
|
|
71
|
+
}
|
|
72
|
+
},
|
|
73
|
+
};
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { appendFile, mkdir } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
|
|
5
|
+
const LOG_PATH = process.env.CACHE_FIX_USAGE_LOG || join(homedir(), ".claude", "usage.jsonl");
|
|
6
|
+
|
|
7
|
+
function buildRecord(meta, telemetry, responseHeaders) {
|
|
8
|
+
const now = new Date();
|
|
9
|
+
const utcHour = now.getUTCHours();
|
|
10
|
+
const utcDay = now.getUTCDay();
|
|
11
|
+
|
|
12
|
+
const stats = meta.cacheStats || {};
|
|
13
|
+
const quota = meta._quotaData || {};
|
|
14
|
+
|
|
15
|
+
return {
|
|
16
|
+
timestamp: now.toISOString(),
|
|
17
|
+
model: telemetry.model || "unknown",
|
|
18
|
+
input_tokens: stats.inputTokens || 0,
|
|
19
|
+
output_tokens: stats.outputTokens || 0,
|
|
20
|
+
cache_read_input_tokens: stats.cacheRead || 0,
|
|
21
|
+
cache_creation_input_tokens: stats.cacheCreation || 0,
|
|
22
|
+
q5h_pct: quota.five_hour ? quota.five_hour.pct : null,
|
|
23
|
+
q7d_pct: quota.seven_day ? quota.seven_day.pct : null,
|
|
24
|
+
peak_hour: utcDay >= 1 && utcDay <= 5 && utcHour >= 13 && utcHour < 19,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export { buildRecord, LOG_PATH };
|
|
29
|
+
|
|
30
|
+
export default {
|
|
31
|
+
name: "usage-log",
|
|
32
|
+
description: "Append per-call usage record to ~/.claude/usage.jsonl",
|
|
33
|
+
enabled: false,
|
|
34
|
+
order: 650,
|
|
35
|
+
|
|
36
|
+
async onStreamEvent(ctx) {
|
|
37
|
+
if (!ctx.event || ctx.event.type !== "message_delta" || !ctx.event.usage) return;
|
|
38
|
+
|
|
39
|
+
const record = buildRecord(ctx.meta, ctx.telemetry || {}, ctx.responseHeaders);
|
|
40
|
+
|
|
41
|
+
try {
|
|
42
|
+
await mkdir(join(homedir(), ".claude"), { recursive: true });
|
|
43
|
+
await appendFile(LOG_PATH, JSON.stringify(record) + "\n");
|
|
44
|
+
} catch {}
|
|
45
|
+
},
|
|
46
|
+
};
|
package/proxy/extensions.json
CHANGED
|
@@ -3,6 +3,9 @@
|
|
|
3
3
|
"sort-stabilization": { "enabled": true, "order": 200 },
|
|
4
4
|
"fresh-session-sort": { "enabled": true, "order": 250 },
|
|
5
5
|
"identity-normalization": { "enabled": true, "order": 300 },
|
|
6
|
+
"smoosh-split": { "enabled": true, "order": 320 },
|
|
7
|
+
"content-strip": { "enabled": true, "order": 330 },
|
|
8
|
+
"tool-input-normalize": { "enabled": true, "order": 340 },
|
|
6
9
|
"cache-control-normalize": { "enabled": true, "order": 400 },
|
|
7
10
|
"ttl-management": { "enabled": true, "order": 500 },
|
|
8
11
|
"cache-telemetry": { "enabled": true, "order": 600 },
|