claude-code-cache-fix 3.3.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -0
- package/package.json +2 -2
- package/proxy/extensions/identity-normalization.mjs +1 -1
- package/proxy/extensions/image-strip.mjs +7 -2
- package/proxy/extensions/messages-cache-breakpoint.mjs +314 -0
- package/proxy/extensions/microcompact-stability.mjs +428 -0
- package/proxy/extensions/ttl-management.mjs +2 -1
- package/proxy/extensions/ttl-tier-detect.mjs +33 -0
- package/proxy/extensions.json +3 -0
package/README.md
CHANGED
|
@@ -409,6 +409,59 @@ If `sharp` is missing, Pass 3 skips cleanly (telemetry records `library_missing:
|
|
|
409
409
|
| `CACHE_FIX_IMAGE_REQUEST_SIZE_MAX` | 31457280 (30 MB) | Pass 2 byte budget. 2 MB headroom from Anthropic's 32 MB ceiling. |
|
|
410
410
|
| `CACHE_FIX_IMAGE_COUNT_MAX` | 100 | Hard image-count cap. Set to 600 for legacy Claude 1/2.x/Instant if needed. |
|
|
411
411
|
|
|
412
|
+
## Cache breakpoints (proxy mode, opt-in)
|
|
413
|
+
|
|
414
|
+
Anthropic's prompt cache supports up to **four** `cache_control` markers per request. Claude Code currently uses three of the four; the third (between auto-injected `messages[0]` content — hooks, skills, project CLAUDE.md, deferred tools, MCP server descriptions — and the first real user content) is missing entirely. Without that marker, every change inside the auto-injected span busts the cache for everything that follows. wadabum projected ~6,500 token savings per fresh-session first turn from adding it ([anthropics/claude-code#47098](https://github.com/anthropics/claude-code/issues/47098)).
|
|
415
|
+
|
|
416
|
+
The proxy can inject the missing marker on opt-in. Default off until validated against community data.
|
|
417
|
+
|
|
418
|
+
```sh
|
|
419
|
+
export CACHE_FIX_INJECT_MESSAGES_BREAKPOINT=1
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
The injection is conservative: it only fires when the request already carries 1–3 markers (typical CC shape) and refuses if the request is at the 4-marker limit (would 400) or has zero markers (Agent SDK / API-direct shape this extension isn't built for). Boundary detection covers all five observed auto-injected block kinds — hooks, skills, CLAUDE.md, deferred-tools, MCP — and lands the marker on the LAST auto-injected block.
|
|
423
|
+
|
|
424
|
+
A diagnostic-only env var dumps the structural shape of `messages[0]` for fixture sourcing without mutating the request:
|
|
425
|
+
|
|
426
|
+
```sh
|
|
427
|
+
export CACHE_FIX_DUMP_MESSAGES_HEAD=/tmp/messages-head.jsonl
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
| Env var | Default | Purpose |
|
|
431
|
+
|---------|---------|---------|
|
|
432
|
+
| `CACHE_FIX_INJECT_MESSAGES_BREAKPOINT` | unset | Enable breakpoint #3 injection (`=1` opt-in). |
|
|
433
|
+
| `CACHE_FIX_DUMP_MESSAGES_HEAD` | unset | Diagnostic JSONL dump of `messages[0].content` shape — read-only, no mutation. |
|
|
434
|
+
|
|
435
|
+
## Microcompact stability (proxy mode, opt-in)
|
|
436
|
+
|
|
437
|
+
After ~90 minutes idle, Claude Code's `time_based_microcompact` (and the cold-compact path triggered by `FDY()`) replaces old `tool_result` content with a sentinel string. The original content is gone for cache purposes; that part is unrecoverable from the proxy. But the sentinel itself can carry an embedded timestamp (`[Old tool result content cleared at 2026-04-30T13:42:11Z]`), which means a *second* microcompact pass against the same already-cleared position writes different bytes — busting the cache for everything after that position even though no new content was added.
|
|
438
|
+
|
|
439
|
+
This extension addresses the recoverable half: normalize the sentinel to a byte-stable canonical form so repeat microcompacts don't churn the cache. **Phase 1 only** — diagnostic + opt-in normalization. Phase 2 (snapshot-and-restore of original tool_result content) is deferred to v3.5.0+ pending Phase 1 production data.
|
|
440
|
+
|
|
441
|
+
```sh
|
|
442
|
+
# Step 1 (diagnostic): characterize what CC's sentinel actually looks like.
|
|
443
|
+
export CACHE_FIX_DUMP_MICROCOMPACT=/tmp/microcompact-dump.jsonl
|
|
444
|
+
|
|
445
|
+
# Step 2 (normalize): once the sentinel format is confirmed, opt-in.
|
|
446
|
+
export CACHE_FIX_NORMALIZE_MICROCOMPACT=1
|
|
447
|
+
```
|
|
448
|
+
|
|
449
|
+
Detection has two modes:
|
|
450
|
+
- **Mode A** — exact match against confirmed CC sentinel patterns (the bare form and the ISO-8601 timestamp variant). Mode A matches are eligible for normalization.
|
|
451
|
+
- **Mode B** — prefix-only match (text begins with `[Old tool result content cleared` but does not exactly match a Mode A pattern). Mode B is **diagnostic-only**: never normalized, dump records redact to a 64-char prefix only.
|
|
452
|
+
|
|
453
|
+
The Mode A/B separation protects against cases where the sentinel might be followed by user-derived content (e.g., a tool that echoed user input back into its result) — the redaction guarantee on Mode B keeps that content out of the diagnostic dump.
|
|
454
|
+
|
|
455
|
+
| Env var | Default | Purpose |
|
|
456
|
+
|---------|---------|---------|
|
|
457
|
+
| `CACHE_FIX_DUMP_MICROCOMPACT` | unset | Path for diagnostic JSONL dump of detected sentinels. Read-only — no mutation. |
|
|
458
|
+
| `CACHE_FIX_NORMALIZE_MICROCOMPACT` | unset | Enable normalization (`=1` opts in). Mutates Mode A matches to canonical form. |
|
|
459
|
+
| `CACHE_FIX_MICROCOMPACT_NORMALIZED` | `[Old tool result content cleared]` | Override the canonical replacement string. |
|
|
460
|
+
| `CACHE_FIX_MICROCOMPACT_SENTINEL_PATTERN_<N>` | unset | Add custom Mode A regex pattern(s). Numbered (1-indexed, sparse OK). |
|
|
461
|
+
| `CACHE_FIX_MICROCOMPACT_SENTINEL_PREFIX_<N>` | unset | Custom Mode B literal prefix(es). Pair with a custom Mode A pattern from a non-default sentinel family so prefix-only variants of that family also get redacted Mode B capture. |
|
|
462
|
+
| `CACHE_FIX_MICROCOMPACT_REDACT_LEN` | `64` | Mode B prefix length in dump records. Set to `0` to suppress the prefix entirely. |
|
|
463
|
+
| `CACHE_FIX_DUMP_MICROCOMPACT_INCLUDE_NORMALIZED` | unset | Add post-normalization text alongside (not replacing) raw `sentinel_text` in dump records. |
|
|
464
|
+
|
|
412
465
|
## System prompt rewrite (preload mode, optional)
|
|
413
466
|
|
|
414
467
|
The interceptor can rewrite Claude Code's `# Output efficiency` system-prompt section. Disabled by default. Enable with `CACHE_FIX_OUTPUT_EFFICIENCY_REPLACEMENT`. See [docs/output-efficiency-prompts.md](docs/output-efficiency-prompts.md) for the three known prompt variants and usage instructions.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-code-cache-fix",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.4.0",
|
|
4
4
|
"description": "Cache optimization proxy and interceptor for Claude Code. Fixes prompt cache bugs, stabilizes prefix, reduces quota burn.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": "./preload.mjs",
|
|
@@ -53,5 +53,5 @@
|
|
|
53
53
|
"url": "https://buymeacoffee.com/vsits"
|
|
54
54
|
},
|
|
55
55
|
"license": "MIT",
|
|
56
|
-
"author": "Chris Nighswonger <
|
|
56
|
+
"author": "Chris Nighswonger <dev@vsits.co> (https://vsits.co)"
|
|
57
57
|
}
|
|
@@ -2,7 +2,7 @@ import { createHash } from "node:crypto";
|
|
|
2
2
|
|
|
3
3
|
const _pinnedBlocks = new Map();
|
|
4
4
|
|
|
5
|
-
const SESSION_START_RESUME_MARKER = /SessionStart:
|
|
5
|
+
const SESSION_START_RESUME_MARKER = /SessionStart:resume hook success:/g;
|
|
6
6
|
const SESSION_START_ID_TAG = /\n?<session-id>[^<]*<\/session-id>/g;
|
|
7
7
|
const SESSION_START_LAST_ACTIVE_LINE = /\nLast active:[^\n]*/g;
|
|
8
8
|
const CONTINUE_TRAILER_TEXT = "Continue from where you left off.";
|
|
@@ -48,6 +48,9 @@ function getRequestSizeMax() {
|
|
|
48
48
|
const v = parseInt(process.env.CACHE_FIX_IMAGE_REQUEST_SIZE_MAX || "31457280", 10);
|
|
49
49
|
return v > 0 ? v : 31457280;
|
|
50
50
|
}
|
|
51
|
+
function isDebug() {
|
|
52
|
+
return process.env.CACHE_FIX_DEBUG === "1";
|
|
53
|
+
}
|
|
51
54
|
function getImageCountMax() {
|
|
52
55
|
// Default 100 — single cap covering the only model family in active CC use.
|
|
53
56
|
// Users on legacy Claude 1/2.x/Instant who genuinely need 600 can override.
|
|
@@ -649,7 +652,9 @@ export default {
|
|
|
649
652
|
|
|
650
653
|
if (logParts.length > 0) {
|
|
651
654
|
ctx.body.messages = messages;
|
|
652
|
-
|
|
655
|
+
if (isDebug()) {
|
|
656
|
+
process.stderr.write(`[image-strip] ${logParts.join("; ")}\n`);
|
|
657
|
+
}
|
|
653
658
|
}
|
|
654
659
|
return;
|
|
655
660
|
}
|
|
@@ -676,7 +681,7 @@ export default {
|
|
|
676
681
|
stats.resize_succeeded > 0 ||
|
|
677
682
|
stats.unsupported_format_count > 0 ||
|
|
678
683
|
stats.dimension_probe_fail_count > 0;
|
|
679
|
-
if (didSomething) {
|
|
684
|
+
if (didSomething && isDebug()) {
|
|
680
685
|
const parts = [];
|
|
681
686
|
if (stats.resize_succeeded > 0) parts.push(`resized=${stats.resize_succeeded}`);
|
|
682
687
|
if (stats.resize_failed > 0) parts.push(`resize_failed=${stats.resize_failed}`);
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
// messages-cache-breakpoint — inject the missing breakpoint #3 cache_control
|
|
2
|
+
// at the boundary between Claude Code's auto-injected blocks (hooks, skills,
|
|
3
|
+
// project CLAUDE.md, deferred-tools, MCP server descriptions) and the first
|
|
4
|
+
// real user content inside `messages[0]`.
|
|
5
|
+
//
|
|
6
|
+
// Activation: `enabled: true` in extensions.json (always loaded), runtime
|
|
7
|
+
// gates per env var:
|
|
8
|
+
//
|
|
9
|
+
// - CACHE_FIX_INJECT_MESSAGES_BREAKPOINT=1 → opt-in injection
|
|
10
|
+
// - CACHE_FIX_DUMP_MESSAGES_HEAD=<path> → diagnostic-only JSONL dump
|
|
11
|
+
// of messages[0].content shape
|
|
12
|
+
//
|
|
13
|
+
// Order 410 — runs immediately after `cache-control-normalize` (400), so we
|
|
14
|
+
// count markers and place breakpoint #3 against a normalized baseline.
|
|
15
|
+
//
|
|
16
|
+
// See `docs/directives/proxy-messages-cache-breakpoint.md` for the full
|
|
17
|
+
// design (boundary detection algorithm, marker-count guard, telemetry surface).
|
|
18
|
+
|
|
19
|
+
import { appendFile, mkdir } from "node:fs/promises";
|
|
20
|
+
import { dirname } from "node:path";
|
|
21
|
+
|
|
22
|
+
// --- Env gates (read per-call so tests can flip without re-importing) ---
|
|
23
|
+
|
|
24
|
+
function isInjectEnabled() {
|
|
25
|
+
return process.env.CACHE_FIX_INJECT_MESSAGES_BREAKPOINT === "1";
|
|
26
|
+
}
|
|
27
|
+
function getDumpPath() {
|
|
28
|
+
const v = process.env.CACHE_FIX_DUMP_MESSAGES_HEAD;
|
|
29
|
+
return v && v.length > 0 ? v : null;
|
|
30
|
+
}
|
|
31
|
+
function isDebug() {
|
|
32
|
+
return process.env.CACHE_FIX_DEBUG === "1";
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function debug(msg) {
|
|
36
|
+
if (isDebug()) process.stderr.write(`[messages-breakpoint] DEBUG: ${msg}\n`);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// --- Block classification ---
|
|
40
|
+
//
|
|
41
|
+
// Auto-injected block kinds that CC writes into `messages[0].content` ahead of
|
|
42
|
+
// the real user content. Order matters: each block runs through these checks
|
|
43
|
+
// in declaration order and the first match wins. Tightening notes:
|
|
44
|
+
//
|
|
45
|
+
// - Hooks: requires both `<system-reminder>` opening AND `hook success`
|
|
46
|
+
// substring — narrow enough that user prose discussing hook semantics
|
|
47
|
+
// won't false-positive.
|
|
48
|
+
// - Skills: anchored on `<system-reminder>` opening tag; won't match user
|
|
49
|
+
// messages that quote `<available-skills>` from documentation.
|
|
50
|
+
// - CLAUDE.md: regex anchored on absolute-path prefix (`/`); won't match
|
|
51
|
+
// "see CLAUDE.md in the docs".
|
|
52
|
+
// - Deferred-tools: exact `<deferred-tools>` tag substring; won't match
|
|
53
|
+
// user prose about "deferred tools".
|
|
54
|
+
// - MCP: two specific sentinels (`<mcp-resources>` tag OR
|
|
55
|
+
// `Available MCP servers:` literal); won't match generic MCP prose.
|
|
56
|
+
|
|
57
|
+
const CLAUDE_MD_RE = /Contents of \/[^\n]*?CLAUDE\.md/;
|
|
58
|
+
|
|
59
|
+
function getBlockText(block) {
|
|
60
|
+
if (!block || typeof block !== "object") return null;
|
|
61
|
+
if (block.type !== "text") return null;
|
|
62
|
+
if (typeof block.text !== "string") return null;
|
|
63
|
+
return block.text;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export function classifyBlock(block) {
|
|
67
|
+
const text = getBlockText(block);
|
|
68
|
+
if (text === null) return "user";
|
|
69
|
+
|
|
70
|
+
// Hooks: <system-reminder> + "hook success"
|
|
71
|
+
if (text.startsWith("<system-reminder>") && text.includes("hook success")) {
|
|
72
|
+
return "hooks";
|
|
73
|
+
}
|
|
74
|
+
// Skills: <system-reminder> + (<available-skills> OR <plugin-skills>)
|
|
75
|
+
if (
|
|
76
|
+
text.startsWith("<system-reminder>") &&
|
|
77
|
+
(text.includes("<available-skills>") || text.includes("<plugin-skills>"))
|
|
78
|
+
) {
|
|
79
|
+
return "skills";
|
|
80
|
+
}
|
|
81
|
+
// Project CLAUDE.md: <system-reminder> wrapper + absolute-path Contents-of
|
|
82
|
+
// marker. The system-reminder wrapper is required to keep user prose that
|
|
83
|
+
// happens to mention "Contents of /path/to/CLAUDE.md" from matching.
|
|
84
|
+
if (text.includes("<system-reminder>") && CLAUDE_MD_RE.test(text)) {
|
|
85
|
+
return "claude_md";
|
|
86
|
+
}
|
|
87
|
+
// Deferred tools: exact <deferred-tools> tag
|
|
88
|
+
if (text.includes("<deferred-tools>")) {
|
|
89
|
+
return "deferred_tools";
|
|
90
|
+
}
|
|
91
|
+
// MCP: either sentinel
|
|
92
|
+
if (text.includes("<mcp-resources>") || text.includes("Available MCP servers:")) {
|
|
93
|
+
return "mcp_resources";
|
|
94
|
+
}
|
|
95
|
+
return "user";
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const AUTO_INJECTED_KINDS = new Set([
|
|
99
|
+
"hooks",
|
|
100
|
+
"skills",
|
|
101
|
+
"claude_md",
|
|
102
|
+
"deferred_tools",
|
|
103
|
+
"mcp_resources",
|
|
104
|
+
]);
|
|
105
|
+
|
|
106
|
+
// Return the LAST index in `content` whose block classifies as auto-injected,
|
|
107
|
+
// or -1 if no auto-injected block is found. Walking the full array (rather
|
|
108
|
+
// than stopping at the first user block) keeps us correct in the defensive
|
|
109
|
+
// case where auto-injected and user blocks are interleaved.
|
|
110
|
+
export function detectAutoInjectedBoundary(content) {
|
|
111
|
+
if (!Array.isArray(content)) return -1;
|
|
112
|
+
let lastIdx = -1;
|
|
113
|
+
for (let i = 0; i < content.length; i++) {
|
|
114
|
+
const kind = classifyBlock(content[i]);
|
|
115
|
+
if (AUTO_INJECTED_KINDS.has(kind)) lastIdx = i;
|
|
116
|
+
}
|
|
117
|
+
return lastIdx;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// --- Marker counting ---
|
|
121
|
+
|
|
122
|
+
export function countAllCacheControlMarkers(body) {
|
|
123
|
+
if (!body || typeof body !== "object") return 0;
|
|
124
|
+
let n = 0;
|
|
125
|
+
if (Array.isArray(body.system)) {
|
|
126
|
+
for (const block of body.system) {
|
|
127
|
+
if (block && typeof block === "object" && block.cache_control) n++;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
if (Array.isArray(body.messages)) {
|
|
131
|
+
for (const msg of body.messages) {
|
|
132
|
+
if (!msg || !Array.isArray(msg.content)) continue;
|
|
133
|
+
for (const block of msg.content) {
|
|
134
|
+
if (block && typeof block === "object" && block.cache_control) n++;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return n;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// --- Stats shape (also used as telemetry on ctx.meta) ---
|
|
142
|
+
|
|
143
|
+
function initStats() {
|
|
144
|
+
return {
|
|
145
|
+
enabled: true,
|
|
146
|
+
injected: false,
|
|
147
|
+
boundary_idx: -1,
|
|
148
|
+
boundary_block_kind: null,
|
|
149
|
+
blocks_examined: 0,
|
|
150
|
+
existing_marker_count: 0,
|
|
151
|
+
skip_reason: null,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// --- Orchestrator (pure on body — no I/O) ---
|
|
156
|
+
|
|
157
|
+
export function injectMessagesBreakpoint(reqCtx) {
|
|
158
|
+
const stats = initStats();
|
|
159
|
+
if (!reqCtx || !reqCtx.body) {
|
|
160
|
+
stats.skip_reason = "unexpected_role_or_shape";
|
|
161
|
+
return stats;
|
|
162
|
+
}
|
|
163
|
+
const body = reqCtx.body;
|
|
164
|
+
const messages = body.messages;
|
|
165
|
+
if (!Array.isArray(messages) || messages.length === 0) {
|
|
166
|
+
stats.skip_reason = "unexpected_role_or_shape";
|
|
167
|
+
return stats;
|
|
168
|
+
}
|
|
169
|
+
const first = messages[0];
|
|
170
|
+
if (!first || first.role !== "user" || !Array.isArray(first.content)) {
|
|
171
|
+
stats.skip_reason = "unexpected_role_or_shape";
|
|
172
|
+
return stats;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const existingMarkers = countAllCacheControlMarkers(body);
|
|
176
|
+
stats.existing_marker_count = existingMarkers;
|
|
177
|
+
|
|
178
|
+
if (existingMarkers === 0) {
|
|
179
|
+
stats.skip_reason = "no_existing_markers";
|
|
180
|
+
return stats;
|
|
181
|
+
}
|
|
182
|
+
if (existingMarkers >= 4) {
|
|
183
|
+
stats.skip_reason = "at_marker_limit";
|
|
184
|
+
if (existingMarkers > 4) {
|
|
185
|
+
process.stderr.write(
|
|
186
|
+
`[messages-breakpoint] warn: existing_markers=${existingMarkers} exceeds Anthropic's documented max of 4\n`,
|
|
187
|
+
);
|
|
188
|
+
}
|
|
189
|
+
return stats;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
stats.blocks_examined = first.content.length;
|
|
193
|
+
const boundaryIdx = detectAutoInjectedBoundary(first.content);
|
|
194
|
+
stats.boundary_idx = boundaryIdx;
|
|
195
|
+
if (boundaryIdx === -1) {
|
|
196
|
+
stats.skip_reason = "boundary_not_found";
|
|
197
|
+
return stats;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const target = first.content[boundaryIdx];
|
|
201
|
+
stats.boundary_block_kind = classifyBlock(target);
|
|
202
|
+
|
|
203
|
+
if (target && target.cache_control) {
|
|
204
|
+
stats.skip_reason = "boundary_already_marked";
|
|
205
|
+
return stats;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
first.content[boundaryIdx] = {
|
|
209
|
+
...target,
|
|
210
|
+
cache_control: { type: "ephemeral", ttl: "1h" },
|
|
211
|
+
};
|
|
212
|
+
stats.injected = true;
|
|
213
|
+
return stats;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// --- Diagnostic dump ---
|
|
217
|
+
//
|
|
218
|
+
// Dumps the structural shape of messages[0].content (per-block kind, first
|
|
219
|
+
// 200 chars of text, cache_control presence flag) to a JSONL file. Read-only
|
|
220
|
+
// — no body mutation. Independent of injection: a user can enable the dump
|
|
221
|
+
// without enabling injection to gather fixture data first.
|
|
222
|
+
|
|
223
|
+
const DUMP_TEXT_PREFIX_CHARS = 200;
|
|
224
|
+
|
|
225
|
+
export function buildDumpRecord(body, ts = new Date().toISOString()) {
|
|
226
|
+
const messages = body?.messages;
|
|
227
|
+
const first = Array.isArray(messages) ? messages[0] : null;
|
|
228
|
+
const content = first && Array.isArray(first.content) ? first.content : null;
|
|
229
|
+
const blocks = content
|
|
230
|
+
? content.map((block, idx) => {
|
|
231
|
+
const kind = classifyBlock(block);
|
|
232
|
+
const text = getBlockText(block);
|
|
233
|
+
return {
|
|
234
|
+
idx,
|
|
235
|
+
type: block?.type ?? null,
|
|
236
|
+
kind,
|
|
237
|
+
text_prefix: text === null ? null : text.slice(0, DUMP_TEXT_PREFIX_CHARS),
|
|
238
|
+
has_cache_control: !!(block && block.cache_control),
|
|
239
|
+
};
|
|
240
|
+
})
|
|
241
|
+
: [];
|
|
242
|
+
return {
|
|
243
|
+
ts,
|
|
244
|
+
role: first?.role ?? null,
|
|
245
|
+
block_count: blocks.length,
|
|
246
|
+
existing_marker_count: countAllCacheControlMarkers(body),
|
|
247
|
+
blocks,
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
async function writeDump(path, record) {
|
|
252
|
+
await mkdir(dirname(path), { recursive: true });
|
|
253
|
+
await appendFile(path, JSON.stringify(record) + "\n");
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// --- Stderr summary ---
|
|
257
|
+
|
|
258
|
+
function emitStderrSummary(stats) {
|
|
259
|
+
if (stats.injected) {
|
|
260
|
+
process.stderr.write(
|
|
261
|
+
`[messages-breakpoint] injected boundary_idx=${stats.boundary_idx} kind=${stats.boundary_block_kind} existing_markers=${stats.existing_marker_count}\n`,
|
|
262
|
+
);
|
|
263
|
+
} else {
|
|
264
|
+
process.stderr.write(
|
|
265
|
+
`[messages-breakpoint] skipped reason=${stats.skip_reason} existing_markers=${stats.existing_marker_count}\n`,
|
|
266
|
+
);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// --- Extension contract ---
|
|
271
|
+
|
|
272
|
+
export default {
|
|
273
|
+
name: "messages-cache-breakpoint",
|
|
274
|
+
description:
|
|
275
|
+
"Inject the missing breakpoint #3 cache_control marker at the boundary " +
|
|
276
|
+
"between Claude Code's auto-injected messages[0] blocks (hooks, skills, " +
|
|
277
|
+
"CLAUDE.md, deferred-tools, MCP) and the first real user content",
|
|
278
|
+
enabled: false, // overridden by extensions.json
|
|
279
|
+
order: 410,
|
|
280
|
+
|
|
281
|
+
async onRequest(ctx) {
|
|
282
|
+
const dumpPath = getDumpPath();
|
|
283
|
+
const inject = isInjectEnabled();
|
|
284
|
+
|
|
285
|
+
// Both gates off → no-op. Avoid even building stats so the disabled path
|
|
286
|
+
// is essentially free.
|
|
287
|
+
if (!dumpPath && !inject) return;
|
|
288
|
+
|
|
289
|
+
if (!ctx || !ctx.body) return;
|
|
290
|
+
|
|
291
|
+
// Diagnostic dump runs first and is independent of injection. We dump
|
|
292
|
+
// BEFORE injection so the recorded shape is the request as CC sent it,
|
|
293
|
+
// not as we mutated it.
|
|
294
|
+
if (dumpPath) {
|
|
295
|
+
try {
|
|
296
|
+
const record = buildDumpRecord(ctx.body);
|
|
297
|
+
await writeDump(dumpPath, record);
|
|
298
|
+
} catch (err) {
|
|
299
|
+
debug(`dump write failed: ${err?.message ?? err}`);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
if (!inject) return;
|
|
304
|
+
|
|
305
|
+
try {
|
|
306
|
+
const stats = injectMessagesBreakpoint(ctx);
|
|
307
|
+
ctx.meta = ctx.meta || {};
|
|
308
|
+
ctx.meta.messagesBreakpointStats = stats;
|
|
309
|
+
emitStderrSummary(stats);
|
|
310
|
+
} catch (err) {
|
|
311
|
+
debug(`onRequest unexpected: ${err?.message ?? err}`);
|
|
312
|
+
}
|
|
313
|
+
},
|
|
314
|
+
};
|
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
// microcompact-stability — detect, optionally dump, and optionally normalize
|
|
2
|
+
// CC's `time_based_microcompact` sentinel string in tool_result content.
|
|
3
|
+
//
|
|
4
|
+
// Order 350: between `tool-input-normalize` (340) and `cache-control-normalize`
|
|
5
|
+
// (400). Runs BEFORE cache-control-normalize so the latter sees post-normalized
|
|
6
|
+
// content when computing sticky-marker hashes.
|
|
7
|
+
//
|
|
8
|
+
// Two independent runtime gates:
|
|
9
|
+
// - CACHE_FIX_DUMP_MICROCOMPACT=<path> → diagnostic JSONL dump (read-only).
|
|
10
|
+
// - CACHE_FIX_NORMALIZE_MICROCOMPACT=1 → mutate matched sentinels to a
|
|
11
|
+
// canonical byte-stable form.
|
|
12
|
+
//
|
|
13
|
+
// Two detection modes:
|
|
14
|
+
// - Mode A (exact match against confirmed patterns) → eligible for
|
|
15
|
+
// normalization. `sentinel_text` captured in full in dump records.
|
|
16
|
+
// - Mode B (prefix-only match) → diagnostic-only, NEVER normalized. Records
|
|
17
|
+
// redact to a configurable prefix length (default 64).
|
|
18
|
+
//
|
|
19
|
+
// The diagnostic dump always captures the **raw pre-normalization** bytes —
|
|
20
|
+
// this is the rule. Setting CACHE_FIX_DUMP_MICROCOMPACT_INCLUDE_NORMALIZED=1
|
|
21
|
+
// additionally records the post-normalized form alongside the raw text.
|
|
22
|
+
//
|
|
23
|
+
// See `docs/directives/proxy-microcompact-cache-stability.md` for the full
|
|
24
|
+
// design (Mode A/B contract, privacy guarantees, Phase 2 deferral).
|
|
25
|
+
|
|
26
|
+
import { appendFile, mkdir } from "node:fs/promises";
|
|
27
|
+
import { dirname } from "node:path";
|
|
28
|
+
import { createHash } from "node:crypto";
|
|
29
|
+
|
|
30
|
+
// --- Env gates (read per-call so tests can flip without re-importing) ---
|
|
31
|
+
|
|
32
|
+
function getDumpPath() {
|
|
33
|
+
const v = process.env.CACHE_FIX_DUMP_MICROCOMPACT;
|
|
34
|
+
return v && v.length > 0 ? v : null;
|
|
35
|
+
}
|
|
36
|
+
function isNormalizeEnabled() {
|
|
37
|
+
return process.env.CACHE_FIX_NORMALIZE_MICROCOMPACT === "1";
|
|
38
|
+
}
|
|
39
|
+
function isIncludeNormalizedEnabled() {
|
|
40
|
+
return process.env.CACHE_FIX_DUMP_MICROCOMPACT_INCLUDE_NORMALIZED === "1";
|
|
41
|
+
}
|
|
42
|
+
function getCanonicalText() {
|
|
43
|
+
const v = process.env.CACHE_FIX_MICROCOMPACT_NORMALIZED;
|
|
44
|
+
return typeof v === "string" && v.length > 0 ? v : DEFAULT_CANONICAL_TEXT;
|
|
45
|
+
}
|
|
46
|
+
function getRedactLen() {
|
|
47
|
+
const v = process.env.CACHE_FIX_MICROCOMPACT_REDACT_LEN;
|
|
48
|
+
if (v === undefined || v === null || v === "") return DEFAULT_REDACT_LEN;
|
|
49
|
+
const n = parseInt(v, 10);
|
|
50
|
+
return Number.isFinite(n) && n >= 0 ? n : DEFAULT_REDACT_LEN;
|
|
51
|
+
}
|
|
52
|
+
function getCustomPatterns() {
|
|
53
|
+
// CACHE_FIX_MICROCOMPACT_SENTINEL_PATTERN_<N>=<regex> (1-indexed, sparse OK)
|
|
54
|
+
const out = [];
|
|
55
|
+
for (const [k, v] of Object.entries(process.env)) {
|
|
56
|
+
if (!k.startsWith("CACHE_FIX_MICROCOMPACT_SENTINEL_PATTERN_")) continue;
|
|
57
|
+
if (typeof v !== "string" || v.length === 0) continue;
|
|
58
|
+
try {
|
|
59
|
+
out.push({ source: v, re: new RegExp(v) });
|
|
60
|
+
} catch {
|
|
61
|
+
process.stderr.write(`[microcompact] invalid regex in ${k}: ${v}\n`);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return out;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Custom Mode B literal prefixes, paired with custom Mode A regex patterns.
|
|
68
|
+
// A user who configures CACHE_FIX_MICROCOMPACT_SENTINEL_PATTERN_<N> for a
|
|
69
|
+
// non-default sentinel family should also set CACHE_FIX_MICROCOMPACT_SENTINEL_PREFIX_<N>
|
|
70
|
+
// to the LITERAL string the family begins with — that's what enables Mode B
|
|
71
|
+
// (redacted prefix capture) for variants that don't exact-match the regex.
|
|
72
|
+
//
|
|
73
|
+
// We can't safely derive a prefix from an arbitrary regex, so we accept the
|
|
74
|
+
// prefix as a separate input. The two env-var families don't have to agree
|
|
75
|
+
// on numeric suffixes; we collect all prefixes regardless of index.
|
|
76
|
+
function getCustomPrefixes() {
|
|
77
|
+
// CACHE_FIX_MICROCOMPACT_SENTINEL_PREFIX_<N>=<literal> (1-indexed, sparse OK)
|
|
78
|
+
const out = [];
|
|
79
|
+
for (const [k, v] of Object.entries(process.env)) {
|
|
80
|
+
if (!k.startsWith("CACHE_FIX_MICROCOMPACT_SENTINEL_PREFIX_")) continue;
|
|
81
|
+
if (typeof v !== "string" || v.length === 0) continue;
|
|
82
|
+
out.push(v);
|
|
83
|
+
}
|
|
84
|
+
return out;
|
|
85
|
+
}
|
|
86
|
+
function isDebug() {
|
|
87
|
+
return process.env.CACHE_FIX_DEBUG === "1";
|
|
88
|
+
}
|
|
89
|
+
function debug(msg) {
|
|
90
|
+
if (isDebug()) process.stderr.write(`[microcompact] DEBUG: ${msg}\n`);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// --- Constants ---
|
|
94
|
+
|
|
95
|
+
const DEFAULT_CANONICAL_TEXT = "[Old tool result content cleared]";
|
|
96
|
+
const DEFAULT_REDACT_LEN = 64;
|
|
97
|
+
|
|
98
|
+
// Default Mode A patterns (confirmed sentinel forms eligible for normalization).
|
|
99
|
+
// Adding a new exact form here promotes it from Mode B prefix capture to
|
|
100
|
+
// Mode A normalization-eligibility. Keep the list narrow.
|
|
101
|
+
const DEFAULT_EXACT_PATTERNS = [
|
|
102
|
+
{
|
|
103
|
+
source: "^\\[Old tool result content cleared\\]\\s*$",
|
|
104
|
+
re: /^\[Old tool result content cleared\]\s*$/,
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
source:
|
|
108
|
+
"^\\[Old tool result content cleared at \\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?Z\\]\\s*$",
|
|
109
|
+
re: /^\[Old tool result content cleared at \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d{3})?Z\]\s*$/,
|
|
110
|
+
},
|
|
111
|
+
];
|
|
112
|
+
|
|
113
|
+
// Mode B prefix — anything beginning with this is a candidate for redacted
|
|
114
|
+
// diagnostic capture, even if it doesn't match an exact pattern.
|
|
115
|
+
const SENTINEL_PREFIX = "[Old tool result content cleared";
|
|
116
|
+
|
|
117
|
+
// --- Pattern matching (pure) ---
|
|
118
|
+
|
|
119
|
+
// Returns the source string of the first matching exact pattern, or null.
|
|
120
|
+
// `extraPatterns` are user-supplied patterns from env vars; they're appended
|
|
121
|
+
// to the defaults so a custom regex doesn't silently disable a default.
|
|
122
|
+
export function matchesSentinelPattern(text, extraPatterns = []) {
|
|
123
|
+
if (typeof text !== "string") return null;
|
|
124
|
+
const all = DEFAULT_EXACT_PATTERNS.concat(extraPatterns);
|
|
125
|
+
for (const p of all) {
|
|
126
|
+
if (p.re.test(text)) return p.source;
|
|
127
|
+
}
|
|
128
|
+
return null;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function isPartialMatch(text, extraPrefixes = []) {
|
|
132
|
+
if (typeof text !== "string") return false;
|
|
133
|
+
if (text.startsWith(SENTINEL_PREFIX)) return true;
|
|
134
|
+
for (const p of extraPrefixes) {
|
|
135
|
+
if (text.startsWith(p)) return true;
|
|
136
|
+
}
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// --- Walking tool_result content ---
|
|
141
|
+
//
|
|
142
|
+
// Returns { exact_matches, partial_matches, total_tool_results }.
|
|
143
|
+
//
|
|
144
|
+
// Match record shape (exact_matches[]):
|
|
145
|
+
// { msg_idx, block_idx, content_kind: "string"|"array_item",
|
|
146
|
+
// item_idx?, text, matched_pattern }
|
|
147
|
+
// Match record shape (partial_matches[]):
|
|
148
|
+
// { msg_idx, block_idx, content_kind: "string"|"array_item",
|
|
149
|
+
// item_idx?, text, byte_length }
|
|
150
|
+
//
|
|
151
|
+
// `text` on partial_matches is kept on the in-memory record for redaction at
|
|
152
|
+
// serialize time (the dump never persists the full text).
|
|
153
|
+
|
|
154
|
+
export function walkToolResultsForSentinels(messages, extraPatterns = [], extraPrefixes = []) {
|
|
155
|
+
const exact_matches = [];
|
|
156
|
+
const partial_matches = [];
|
|
157
|
+
let total_tool_results = 0;
|
|
158
|
+
if (!Array.isArray(messages)) {
|
|
159
|
+
return { exact_matches, partial_matches, total_tool_results };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
for (let mi = 0; mi < messages.length; mi++) {
|
|
163
|
+
const msg = messages[mi];
|
|
164
|
+
if (!msg || !Array.isArray(msg.content)) continue;
|
|
165
|
+
for (let bi = 0; bi < msg.content.length; bi++) {
|
|
166
|
+
const block = msg.content[bi];
|
|
167
|
+
if (!block || block.type !== "tool_result") continue;
|
|
168
|
+
total_tool_results++;
|
|
169
|
+
|
|
170
|
+
const content = block.content;
|
|
171
|
+
if (typeof content === "string") {
|
|
172
|
+
classify(mi, bi, "string", undefined, content);
|
|
173
|
+
} else if (Array.isArray(content)) {
|
|
174
|
+
for (let ii = 0; ii < content.length; ii++) {
|
|
175
|
+
const item = content[ii];
|
|
176
|
+
if (!item || item.type !== "text" || typeof item.text !== "string") continue;
|
|
177
|
+
classify(mi, bi, "array_item", ii, item.text);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return { exact_matches, partial_matches, total_tool_results };
|
|
183
|
+
|
|
184
|
+
function classify(msg_idx, block_idx, content_kind, item_idx, text) {
|
|
185
|
+
const matched = matchesSentinelPattern(text, extraPatterns);
|
|
186
|
+
if (matched !== null) {
|
|
187
|
+
exact_matches.push({
|
|
188
|
+
msg_idx,
|
|
189
|
+
block_idx,
|
|
190
|
+
content_kind,
|
|
191
|
+
...(item_idx !== undefined ? { item_idx } : {}),
|
|
192
|
+
text,
|
|
193
|
+
matched_pattern: matched,
|
|
194
|
+
});
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
if (isPartialMatch(text, extraPrefixes)) {
|
|
198
|
+
partial_matches.push({
|
|
199
|
+
msg_idx,
|
|
200
|
+
block_idx,
|
|
201
|
+
content_kind,
|
|
202
|
+
...(item_idx !== undefined ? { item_idx } : {}),
|
|
203
|
+
text,
|
|
204
|
+
byte_length: Buffer.byteLength(text, "utf8"),
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// --- Normalization (mutates the message block in place) ---
|
|
211
|
+
//
|
|
212
|
+
// `match` is an entry from `exact_matches` (Mode A). We use its msg_idx /
|
|
213
|
+
// block_idx / content_kind / item_idx to find the exact place to rewrite.
|
|
214
|
+
// Mode B matches are NEVER passed to this function.
|
|
215
|
+
|
|
216
|
+
export function normalizeToolResultContent(messages, match, canonicalText) {
|
|
217
|
+
const block = messages?.[match.msg_idx]?.content?.[match.block_idx];
|
|
218
|
+
if (!block || block.type !== "tool_result") return false;
|
|
219
|
+
if (match.content_kind === "string") {
|
|
220
|
+
block.content = canonicalText;
|
|
221
|
+
return true;
|
|
222
|
+
}
|
|
223
|
+
if (match.content_kind === "array_item" && Array.isArray(block.content)) {
|
|
224
|
+
const item = block.content[match.item_idx];
|
|
225
|
+
if (!item || item.type !== "text") return false;
|
|
226
|
+
item.text = canonicalText;
|
|
227
|
+
return true;
|
|
228
|
+
}
|
|
229
|
+
return false;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// --- Session ID hashing ---
|
|
233
|
+
|
|
234
|
+
function hashSessionId(reqCtx) {
|
|
235
|
+
const sid =
|
|
236
|
+
reqCtx?.meta?.session_id ||
|
|
237
|
+
reqCtx?.headers?.["x-session-id"] ||
|
|
238
|
+
reqCtx?.headers?.["x-anthropic-session-id"] ||
|
|
239
|
+
null;
|
|
240
|
+
if (!sid) return null;
|
|
241
|
+
return createHash("sha256").update(String(sid)).digest("hex").slice(0, 8);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// --- Diagnostic record build (pure) ---
|
|
245
|
+
|
|
246
|
+
function serializeExactMatch(m, includeNormalizedText) {
|
|
247
|
+
const rec = {
|
|
248
|
+
msg_idx: m.msg_idx,
|
|
249
|
+
block_idx: m.block_idx,
|
|
250
|
+
content_kind: m.content_kind,
|
|
251
|
+
matched_pattern: m.matched_pattern,
|
|
252
|
+
sentinel_text: m.text,
|
|
253
|
+
byte_length: Buffer.byteLength(m.text, "utf8"),
|
|
254
|
+
};
|
|
255
|
+
if (m.item_idx !== undefined) rec.item_idx = m.item_idx;
|
|
256
|
+
if (typeof includeNormalizedText === "string") {
|
|
257
|
+
rec.normalized_text = includeNormalizedText;
|
|
258
|
+
}
|
|
259
|
+
return rec;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
function serializePartialMatch(m, redactLen) {
|
|
263
|
+
const rec = {
|
|
264
|
+
msg_idx: m.msg_idx,
|
|
265
|
+
block_idx: m.block_idx,
|
|
266
|
+
content_kind: m.content_kind,
|
|
267
|
+
byte_length: m.byte_length,
|
|
268
|
+
};
|
|
269
|
+
if (m.item_idx !== undefined) rec.item_idx = m.item_idx;
|
|
270
|
+
if (redactLen > 0) {
|
|
271
|
+
rec.prefix_64 = m.text.slice(0, redactLen);
|
|
272
|
+
}
|
|
273
|
+
return rec;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
export function buildDiagnosticRecord(reqCtx, exact_matches, partial_matches, totalToolResults, opts = {}) {
|
|
277
|
+
const includeNormalized = opts.includeNormalized === true;
|
|
278
|
+
const canonicalText = opts.canonicalText;
|
|
279
|
+
const redactLen = typeof opts.redactLen === "number" ? opts.redactLen : DEFAULT_REDACT_LEN;
|
|
280
|
+
return {
|
|
281
|
+
ts: opts.ts || new Date().toISOString(),
|
|
282
|
+
session_id_hash: hashSessionId(reqCtx),
|
|
283
|
+
exact_matches: exact_matches.map((m) =>
|
|
284
|
+
serializeExactMatch(m, includeNormalized && typeof canonicalText === "string" ? canonicalText : null),
|
|
285
|
+
),
|
|
286
|
+
partial_matches: partial_matches.map((m) => serializePartialMatch(m, redactLen)),
|
|
287
|
+
total_messages: Array.isArray(reqCtx?.body?.messages) ? reqCtx.body.messages.length : 0,
|
|
288
|
+
total_tool_results: totalToolResults,
|
|
289
|
+
model: reqCtx?.body?.model ?? null,
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// --- I/O ---
|
|
294
|
+
|
|
295
|
+
export async function appendDiagnosticRecord(path, record) {
|
|
296
|
+
await mkdir(dirname(path), { recursive: true });
|
|
297
|
+
await appendFile(path, JSON.stringify(record) + "\n");
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// --- Stats shape ---
|
|
301
|
+
|
|
302
|
+
function initStats() {
|
|
303
|
+
return {
|
|
304
|
+
diagnostic_enabled: false,
|
|
305
|
+
normalization_enabled: false,
|
|
306
|
+
sentinel_pattern_used: null, // first matched pattern source (Mode A only)
|
|
307
|
+
total_tool_results_scanned: 0,
|
|
308
|
+
exact_matches_count: 0,
|
|
309
|
+
partial_matches_count: 0,
|
|
310
|
+
sentinels_matched: 0, // exact + partial
|
|
311
|
+
sentinels_normalized: 0,
|
|
312
|
+
bytes_original: 0,
|
|
313
|
+
bytes_normalized: 0,
|
|
314
|
+
bytes_saved: 0,
|
|
315
|
+
diagnostic_records_written: 0,
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// --- Stderr summary ---
|
|
320
|
+
|
|
321
|
+
function emitStderrSummary(stats, dumpPath) {
|
|
322
|
+
const parts = [`matched=${stats.sentinels_matched}`];
|
|
323
|
+
if (stats.normalization_enabled) {
|
|
324
|
+
parts.push(`normalized=${stats.sentinels_normalized}`);
|
|
325
|
+
parts.push(`bytes=${stats.bytes_original}->${stats.bytes_normalized}`);
|
|
326
|
+
if (stats.sentinel_pattern_used) {
|
|
327
|
+
parts.push(`sentinel_pattern=${stats.sentinel_pattern_used === DEFAULT_EXACT_PATTERNS[0].source || stats.sentinel_pattern_used === DEFAULT_EXACT_PATTERNS[1].source ? "default" : "custom"}`);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
if (stats.diagnostic_enabled) {
|
|
331
|
+
parts.push(`dump=${dumpPath}`);
|
|
332
|
+
if (!stats.normalization_enabled) parts.push("(normalize disabled)");
|
|
333
|
+
}
|
|
334
|
+
process.stderr.write(`[microcompact] ${parts.join(" ")}\n`);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// --- Orchestrator ---
|
|
338
|
+
|
|
339
|
+
export async function runMicrocompactStability(reqCtx) {
|
|
340
|
+
const stats = initStats();
|
|
341
|
+
const dumpPath = getDumpPath();
|
|
342
|
+
const normalize = isNormalizeEnabled();
|
|
343
|
+
stats.diagnostic_enabled = !!dumpPath;
|
|
344
|
+
stats.normalization_enabled = normalize;
|
|
345
|
+
|
|
346
|
+
if (!dumpPath && !normalize) return stats;
|
|
347
|
+
if (!reqCtx || !reqCtx.body || !Array.isArray(reqCtx.body.messages)) return stats;
|
|
348
|
+
|
|
349
|
+
const extraPatterns = getCustomPatterns();
|
|
350
|
+
const extraPrefixes = getCustomPrefixes();
|
|
351
|
+
const { exact_matches, partial_matches, total_tool_results } = walkToolResultsForSentinels(
|
|
352
|
+
reqCtx.body.messages,
|
|
353
|
+
extraPatterns,
|
|
354
|
+
extraPrefixes,
|
|
355
|
+
);
|
|
356
|
+
stats.total_tool_results_scanned = total_tool_results;
|
|
357
|
+
stats.exact_matches_count = exact_matches.length;
|
|
358
|
+
stats.partial_matches_count = partial_matches.length;
|
|
359
|
+
stats.sentinels_matched = exact_matches.length + partial_matches.length;
|
|
360
|
+
if (exact_matches.length > 0) {
|
|
361
|
+
stats.sentinel_pattern_used = exact_matches[0].matched_pattern;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Diagnostic dump runs FIRST (raw pre-normalization bytes). Mode B is
|
|
365
|
+
// redacted to prefix_64 by the serializer; Mode A captures full text.
|
|
366
|
+
if (dumpPath && (exact_matches.length > 0 || partial_matches.length > 0)) {
|
|
367
|
+
try {
|
|
368
|
+
const canonicalText = normalize ? getCanonicalText() : null;
|
|
369
|
+
const record = buildDiagnosticRecord(reqCtx, exact_matches, partial_matches, total_tool_results, {
|
|
370
|
+
includeNormalized: isIncludeNormalizedEnabled(),
|
|
371
|
+
canonicalText,
|
|
372
|
+
redactLen: getRedactLen(),
|
|
373
|
+
});
|
|
374
|
+
await appendDiagnosticRecord(dumpPath, record);
|
|
375
|
+
stats.diagnostic_records_written = 1;
|
|
376
|
+
} catch (err) {
|
|
377
|
+
debug(`dump write failed: ${err?.message ?? err}`);
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// Normalization runs AFTER dump. Only Mode A matches are eligible.
|
|
382
|
+
if (normalize && exact_matches.length > 0) {
|
|
383
|
+
const canonicalText = getCanonicalText();
|
|
384
|
+
for (const m of exact_matches) {
|
|
385
|
+
stats.bytes_original += Buffer.byteLength(m.text, "utf8");
|
|
386
|
+
const ok = normalizeToolResultContent(reqCtx.body.messages, m, canonicalText);
|
|
387
|
+
if (ok) {
|
|
388
|
+
stats.bytes_normalized += Buffer.byteLength(canonicalText, "utf8");
|
|
389
|
+
stats.sentinels_normalized++;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
stats.bytes_saved = stats.bytes_original - stats.bytes_normalized;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
return stats;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// --- Extension contract ---
|
|
399
|
+
|
|
400
|
+
export default {
|
|
401
|
+
name: "microcompact-stability",
|
|
402
|
+
description:
|
|
403
|
+
"Phase 1 microcompact cache stability — diagnostic capture of CC's " +
|
|
404
|
+
"time_based_microcompact sentinel + opt-in normalization to a canonical " +
|
|
405
|
+
"byte-stable form. Phase 2 (snapshot/restore) deferred to v3.5.0+.",
|
|
406
|
+
enabled: false, // overridden by extensions.json
|
|
407
|
+
order: 350,
|
|
408
|
+
|
|
409
|
+
async onRequest(ctx) {
|
|
410
|
+
try {
|
|
411
|
+
const stats = await runMicrocompactStability(ctx);
|
|
412
|
+
// Only attach telemetry / emit summary if we did something observable.
|
|
413
|
+
if (stats.diagnostic_enabled || stats.normalization_enabled) {
|
|
414
|
+
ctx.meta = ctx.meta || {};
|
|
415
|
+
ctx.meta.microcompactStats = stats;
|
|
416
|
+
if (stats.sentinels_matched > 0 || stats.diagnostic_enabled) {
|
|
417
|
+
// Summary on enabled invocations: always when we matched, or when
|
|
418
|
+
// diagnostic is on (so users can verify it's running with no matches).
|
|
419
|
+
if (stats.sentinels_matched > 0) {
|
|
420
|
+
emitStderrSummary(stats, getDumpPath());
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
} catch (err) {
|
|
425
|
+
debug(`onRequest unexpected: ${err?.message ?? err}`);
|
|
426
|
+
}
|
|
427
|
+
},
|
|
428
|
+
};
|
|
@@ -33,7 +33,8 @@ export default {
|
|
|
33
33
|
|
|
34
34
|
if (ttlValue === "none") return;
|
|
35
35
|
|
|
36
|
-
const
|
|
36
|
+
const detectedTier = ctx.meta?._ttlTier || "1h";
|
|
37
|
+
const ttlParam = ttlValue === "5m" || detectedTier === "5m" ? "5m" : "1h";
|
|
37
38
|
|
|
38
39
|
if (Array.isArray(body.system)) {
|
|
39
40
|
body.system = body.system.map((block) => injectTtl(block, ttlParam));
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
// ttl-tier-detect — port of preload.mjs:1815-1828 in-payload tier detection.
|
|
2
|
+
//
|
|
3
|
+
// Runs at order 75 (between read-only upstream-change-detection at 50 and
|
|
4
|
+
// every cache_control mutator) so that downstream strips by fresh-session-sort
|
|
5
|
+
// (250) and cache-control-normalize (400) cannot hide a ttl="5m" signal from
|
|
6
|
+
// ttl-management at order 500.
|
|
7
|
+
//
|
|
8
|
+
// Pure detection. Sets ctx.meta._ttlTier. Does not mutate ctx.body.
|
|
9
|
+
|
|
10
|
+
function detectExistingTier(body) {
|
|
11
|
+
const blocks = [
|
|
12
|
+
...(Array.isArray(body?.system) ? body.system : []),
|
|
13
|
+
...(Array.isArray(body?.messages)
|
|
14
|
+
? body.messages.flatMap((m) => (Array.isArray(m?.content) ? m.content : []))
|
|
15
|
+
: []),
|
|
16
|
+
];
|
|
17
|
+
for (const block of blocks) {
|
|
18
|
+
if (block?.cache_control?.ttl === "5m") return "5m";
|
|
19
|
+
}
|
|
20
|
+
return "1h";
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export { detectExistingTier };
|
|
24
|
+
|
|
25
|
+
export default {
|
|
26
|
+
name: "ttl-tier-detect",
|
|
27
|
+
description: "Detect existing TTL tier from incoming payload before cache_control normalization",
|
|
28
|
+
order: 75,
|
|
29
|
+
|
|
30
|
+
async onRequest(ctx) {
|
|
31
|
+
ctx.meta._ttlTier = detectExistingTier(ctx.body);
|
|
32
|
+
},
|
|
33
|
+
};
|
package/proxy/extensions.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
+
"ttl-tier-detect": { "enabled": true, "order": 75 },
|
|
2
3
|
"fingerprint-strip": { "enabled": true, "order": 100 },
|
|
3
4
|
"image-strip": { "enabled": true, "order": 150 },
|
|
4
5
|
"sort-stabilization": { "enabled": true, "order": 200 },
|
|
@@ -7,7 +8,9 @@
|
|
|
7
8
|
"smoosh-split": { "enabled": true, "order": 320 },
|
|
8
9
|
"content-strip": { "enabled": true, "order": 330 },
|
|
9
10
|
"tool-input-normalize": { "enabled": true, "order": 340 },
|
|
11
|
+
"microcompact-stability": { "enabled": true, "order": 350 },
|
|
10
12
|
"cache-control-normalize": { "enabled": true, "order": 400 },
|
|
13
|
+
"messages-cache-breakpoint": { "enabled": true, "order": 410 },
|
|
11
14
|
"ttl-management": { "enabled": true, "order": 500 },
|
|
12
15
|
"cache-telemetry": { "enabled": true, "order": 600 },
|
|
13
16
|
"overage-warning": { "enabled": true, "order": 610 },
|