claude-code-cache-fix 3.3.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ko.md +3 -3
- package/README.md +58 -3
- package/README.zh.md +3 -3
- package/package.json +2 -2
- package/proxy/extensions/cache-telemetry.mjs +139 -17
- package/proxy/extensions/identity-normalization.mjs +1 -1
- package/proxy/extensions/image-strip.mjs +7 -2
- package/proxy/extensions/messages-cache-breakpoint.mjs +314 -0
- package/proxy/extensions/microcompact-stability.mjs +429 -0
- package/proxy/extensions/ttl-management.mjs +2 -1
- package/proxy/extensions/ttl-tier-detect.mjs +33 -0
- package/proxy/extensions.json +3 -0
- package/tools/cache-test.sh +19 -11
- package/tools/cross-version-cache-test.sh +4 -4
- package/tools/quota-statusline.sh +75 -19
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
// microcompact-stability — detect, optionally dump, and optionally normalize
|
|
2
|
+
// CC's `time_based_microcompact` sentinel string in tool_result content.
|
|
3
|
+
//
|
|
4
|
+
// Order 350: between `tool-input-normalize` (340) and `cache-control-normalize`
|
|
5
|
+
// (400). Runs BEFORE cache-control-normalize so the latter sees post-normalized
|
|
6
|
+
// content when computing sticky-marker hashes.
|
|
7
|
+
//
|
|
8
|
+
// Two independent runtime gates:
|
|
9
|
+
// - CACHE_FIX_DUMP_MICROCOMPACT=<path> → diagnostic JSONL dump (read-only).
|
|
10
|
+
// - CACHE_FIX_NORMALIZE_MICROCOMPACT=1 → mutate matched sentinels to a
|
|
11
|
+
// canonical byte-stable form.
|
|
12
|
+
//
|
|
13
|
+
// Two detection modes:
|
|
14
|
+
// - Mode A (exact match against confirmed patterns) → eligible for
|
|
15
|
+
// normalization. `sentinel_text` captured in full in dump records.
|
|
16
|
+
// - Mode B (prefix-only match) → diagnostic-only, NEVER normalized. Records
|
|
17
|
+
// redact to a configurable prefix length (default 64).
|
|
18
|
+
//
|
|
19
|
+
// The diagnostic dump always captures the **raw pre-normalization** bytes —
|
|
20
|
+
// this is the rule. Setting CACHE_FIX_DUMP_MICROCOMPACT_INCLUDE_NORMALIZED=1
|
|
21
|
+
// additionally records the post-normalized form alongside the raw text.
|
|
22
|
+
//
|
|
23
|
+
// See `docs/directives/proxy-microcompact-cache-stability.md` for the full
|
|
24
|
+
// design (Mode A/B contract, privacy guarantees, Phase 2 deferral).
|
|
25
|
+
|
|
26
|
+
import { appendFile, mkdir } from "node:fs/promises";
|
|
27
|
+
import { dirname } from "node:path";
|
|
28
|
+
import { createHash } from "node:crypto";
|
|
29
|
+
|
|
30
|
+
// --- Env gates (read per-call so tests can flip without re-importing) ---
|
|
31
|
+
|
|
32
|
+
function getDumpPath() {
|
|
33
|
+
const v = process.env.CACHE_FIX_DUMP_MICROCOMPACT;
|
|
34
|
+
return v && v.length > 0 ? v : null;
|
|
35
|
+
}
|
|
36
|
+
function isNormalizeEnabled() {
|
|
37
|
+
return process.env.CACHE_FIX_NORMALIZE_MICROCOMPACT === "1";
|
|
38
|
+
}
|
|
39
|
+
function isIncludeNormalizedEnabled() {
|
|
40
|
+
return process.env.CACHE_FIX_DUMP_MICROCOMPACT_INCLUDE_NORMALIZED === "1";
|
|
41
|
+
}
|
|
42
|
+
function getCanonicalText() {
|
|
43
|
+
const v = process.env.CACHE_FIX_MICROCOMPACT_NORMALIZED;
|
|
44
|
+
return typeof v === "string" && v.length > 0 ? v : DEFAULT_CANONICAL_TEXT;
|
|
45
|
+
}
|
|
46
|
+
function getRedactLen() {
|
|
47
|
+
const v = process.env.CACHE_FIX_MICROCOMPACT_REDACT_LEN;
|
|
48
|
+
if (v === undefined || v === null || v === "") return DEFAULT_REDACT_LEN;
|
|
49
|
+
const n = parseInt(v, 10);
|
|
50
|
+
return Number.isFinite(n) && n >= 0 ? n : DEFAULT_REDACT_LEN;
|
|
51
|
+
}
|
|
52
|
+
function getCustomPatterns() {
|
|
53
|
+
// CACHE_FIX_MICROCOMPACT_SENTINEL_PATTERN_<N>=<regex> (1-indexed, sparse OK)
|
|
54
|
+
const out = [];
|
|
55
|
+
for (const [k, v] of Object.entries(process.env)) {
|
|
56
|
+
if (!k.startsWith("CACHE_FIX_MICROCOMPACT_SENTINEL_PATTERN_")) continue;
|
|
57
|
+
if (typeof v !== "string" || v.length === 0) continue;
|
|
58
|
+
try {
|
|
59
|
+
out.push({ source: v, re: new RegExp(v) });
|
|
60
|
+
} catch {
|
|
61
|
+
process.stderr.write(`[microcompact] invalid regex in ${k}: ${v}\n`);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return out;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Custom Mode B literal prefixes, paired with custom Mode A regex patterns.
|
|
68
|
+
// A user who configures CACHE_FIX_MICROCOMPACT_SENTINEL_PATTERN_<N> for a
|
|
69
|
+
// non-default sentinel family should also set CACHE_FIX_MICROCOMPACT_SENTINEL_PREFIX_<N>
|
|
70
|
+
// to the LITERAL string the family begins with — that's what enables Mode B
|
|
71
|
+
// (redacted prefix capture) for variants that don't exact-match the regex.
|
|
72
|
+
//
|
|
73
|
+
// We can't safely derive a prefix from an arbitrary regex, so we accept the
|
|
74
|
+
// prefix as a separate input. The two env-var families don't have to agree
|
|
75
|
+
// on numeric suffixes; we collect all prefixes regardless of index.
|
|
76
|
+
function getCustomPrefixes() {
|
|
77
|
+
// CACHE_FIX_MICROCOMPACT_SENTINEL_PREFIX_<N>=<literal> (1-indexed, sparse OK)
|
|
78
|
+
const out = [];
|
|
79
|
+
for (const [k, v] of Object.entries(process.env)) {
|
|
80
|
+
if (!k.startsWith("CACHE_FIX_MICROCOMPACT_SENTINEL_PREFIX_")) continue;
|
|
81
|
+
if (typeof v !== "string" || v.length === 0) continue;
|
|
82
|
+
out.push(v);
|
|
83
|
+
}
|
|
84
|
+
return out;
|
|
85
|
+
}
|
|
86
|
+
function isDebug() {
|
|
87
|
+
return process.env.CACHE_FIX_DEBUG === "1";
|
|
88
|
+
}
|
|
89
|
+
function debug(msg) {
|
|
90
|
+
if (isDebug()) process.stderr.write(`[microcompact] DEBUG: ${msg}\n`);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// --- Constants ---
|
|
94
|
+
|
|
95
|
+
const DEFAULT_CANONICAL_TEXT = "[Old tool result content cleared]";
|
|
96
|
+
const DEFAULT_REDACT_LEN = 64;
|
|
97
|
+
|
|
98
|
+
// Default Mode A patterns (confirmed sentinel forms eligible for normalization).
|
|
99
|
+
// Adding a new exact form here promotes it from Mode B prefix capture to
|
|
100
|
+
// Mode A normalization-eligibility. Keep the list narrow.
|
|
101
|
+
const DEFAULT_EXACT_PATTERNS = [
|
|
102
|
+
{
|
|
103
|
+
source: "^\\[Old tool result content cleared\\]\\s*$",
|
|
104
|
+
re: /^\[Old tool result content cleared\]\s*$/,
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
source:
|
|
108
|
+
"^\\[Old tool result content cleared at \\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?Z\\]\\s*$",
|
|
109
|
+
re: /^\[Old tool result content cleared at \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d{3})?Z\]\s*$/,
|
|
110
|
+
},
|
|
111
|
+
];
|
|
112
|
+
|
|
113
|
+
// Mode B prefix — anything beginning with this is a candidate for redacted
|
|
114
|
+
// diagnostic capture, even if it doesn't match an exact pattern.
|
|
115
|
+
const SENTINEL_PREFIX = "[Old tool result content cleared";
|
|
116
|
+
|
|
117
|
+
// --- Pattern matching (pure) ---
|
|
118
|
+
|
|
119
|
+
// Returns the source string of the first matching exact pattern, or null.
|
|
120
|
+
// `extraPatterns` are user-supplied patterns from env vars; they're appended
|
|
121
|
+
// to the defaults so a custom regex doesn't silently disable a default.
|
|
122
|
+
export function matchesSentinelPattern(text, extraPatterns = []) {
|
|
123
|
+
if (typeof text !== "string") return null;
|
|
124
|
+
const all = DEFAULT_EXACT_PATTERNS.concat(extraPatterns);
|
|
125
|
+
for (const p of all) {
|
|
126
|
+
if (p.re.test(text)) return p.source;
|
|
127
|
+
}
|
|
128
|
+
return null;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function isPartialMatch(text, extraPrefixes = []) {
|
|
132
|
+
if (typeof text !== "string") return false;
|
|
133
|
+
if (text.startsWith(SENTINEL_PREFIX)) return true;
|
|
134
|
+
for (const p of extraPrefixes) {
|
|
135
|
+
if (text.startsWith(p)) return true;
|
|
136
|
+
}
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// --- Walking tool_result content ---
|
|
141
|
+
//
|
|
142
|
+
// Returns { exact_matches, partial_matches, total_tool_results }.
|
|
143
|
+
//
|
|
144
|
+
// Match record shape (exact_matches[]):
|
|
145
|
+
// { msg_idx, block_idx, content_kind: "string"|"array_item",
|
|
146
|
+
// item_idx?, text, matched_pattern }
|
|
147
|
+
// Match record shape (partial_matches[]):
|
|
148
|
+
// { msg_idx, block_idx, content_kind: "string"|"array_item",
|
|
149
|
+
// item_idx?, text, byte_length }
|
|
150
|
+
//
|
|
151
|
+
// `text` on partial_matches is kept on the in-memory record for redaction at
|
|
152
|
+
// serialize time (the dump never persists the full text).
|
|
153
|
+
|
|
154
|
+
export function walkToolResultsForSentinels(messages, extraPatterns = [], extraPrefixes = []) {
|
|
155
|
+
const exact_matches = [];
|
|
156
|
+
const partial_matches = [];
|
|
157
|
+
let total_tool_results = 0;
|
|
158
|
+
if (!Array.isArray(messages)) {
|
|
159
|
+
return { exact_matches, partial_matches, total_tool_results };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
for (let mi = 0; mi < messages.length; mi++) {
|
|
163
|
+
const msg = messages[mi];
|
|
164
|
+
if (!msg || !Array.isArray(msg.content)) continue;
|
|
165
|
+
for (let bi = 0; bi < msg.content.length; bi++) {
|
|
166
|
+
const block = msg.content[bi];
|
|
167
|
+
if (!block || block.type !== "tool_result") continue;
|
|
168
|
+
total_tool_results++;
|
|
169
|
+
|
|
170
|
+
const content = block.content;
|
|
171
|
+
if (typeof content === "string") {
|
|
172
|
+
classify(mi, bi, "string", undefined, content);
|
|
173
|
+
} else if (Array.isArray(content)) {
|
|
174
|
+
for (let ii = 0; ii < content.length; ii++) {
|
|
175
|
+
const item = content[ii];
|
|
176
|
+
if (!item || item.type !== "text" || typeof item.text !== "string") continue;
|
|
177
|
+
classify(mi, bi, "array_item", ii, item.text);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return { exact_matches, partial_matches, total_tool_results };
|
|
183
|
+
|
|
184
|
+
function classify(msg_idx, block_idx, content_kind, item_idx, text) {
|
|
185
|
+
const matched = matchesSentinelPattern(text, extraPatterns);
|
|
186
|
+
if (matched !== null) {
|
|
187
|
+
exact_matches.push({
|
|
188
|
+
msg_idx,
|
|
189
|
+
block_idx,
|
|
190
|
+
content_kind,
|
|
191
|
+
...(item_idx !== undefined ? { item_idx } : {}),
|
|
192
|
+
text,
|
|
193
|
+
matched_pattern: matched,
|
|
194
|
+
});
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
if (isPartialMatch(text, extraPrefixes)) {
|
|
198
|
+
partial_matches.push({
|
|
199
|
+
msg_idx,
|
|
200
|
+
block_idx,
|
|
201
|
+
content_kind,
|
|
202
|
+
...(item_idx !== undefined ? { item_idx } : {}),
|
|
203
|
+
text,
|
|
204
|
+
byte_length: Buffer.byteLength(text, "utf8"),
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// --- Normalization (mutates the message block in place) ---
|
|
211
|
+
//
|
|
212
|
+
// `match` is an entry from `exact_matches` (Mode A). We use its msg_idx /
|
|
213
|
+
// block_idx / content_kind / item_idx to find the exact place to rewrite.
|
|
214
|
+
// Mode B matches are NEVER passed to this function.
|
|
215
|
+
|
|
216
|
+
export function normalizeToolResultContent(messages, match, canonicalText) {
|
|
217
|
+
const block = messages?.[match.msg_idx]?.content?.[match.block_idx];
|
|
218
|
+
if (!block || block.type !== "tool_result") return false;
|
|
219
|
+
if (match.content_kind === "string") {
|
|
220
|
+
block.content = canonicalText;
|
|
221
|
+
return true;
|
|
222
|
+
}
|
|
223
|
+
if (match.content_kind === "array_item" && Array.isArray(block.content)) {
|
|
224
|
+
const item = block.content[match.item_idx];
|
|
225
|
+
if (!item || item.type !== "text") return false;
|
|
226
|
+
item.text = canonicalText;
|
|
227
|
+
return true;
|
|
228
|
+
}
|
|
229
|
+
return false;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// --- Session ID hashing ---
|
|
233
|
+
|
|
234
|
+
function hashSessionId(reqCtx) {
|
|
235
|
+
const sid =
|
|
236
|
+
reqCtx?.meta?.session_id ||
|
|
237
|
+
reqCtx?.headers?.["x-claude-code-session-id"] ||
|
|
238
|
+
reqCtx?.headers?.["x-session-id"] ||
|
|
239
|
+
reqCtx?.headers?.["x-anthropic-session-id"] ||
|
|
240
|
+
null;
|
|
241
|
+
if (!sid) return null;
|
|
242
|
+
return createHash("sha256").update(String(sid)).digest("hex").slice(0, 8);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// --- Diagnostic record build (pure) ---
|
|
246
|
+
|
|
247
|
+
function serializeExactMatch(m, includeNormalizedText) {
|
|
248
|
+
const rec = {
|
|
249
|
+
msg_idx: m.msg_idx,
|
|
250
|
+
block_idx: m.block_idx,
|
|
251
|
+
content_kind: m.content_kind,
|
|
252
|
+
matched_pattern: m.matched_pattern,
|
|
253
|
+
sentinel_text: m.text,
|
|
254
|
+
byte_length: Buffer.byteLength(m.text, "utf8"),
|
|
255
|
+
};
|
|
256
|
+
if (m.item_idx !== undefined) rec.item_idx = m.item_idx;
|
|
257
|
+
if (typeof includeNormalizedText === "string") {
|
|
258
|
+
rec.normalized_text = includeNormalizedText;
|
|
259
|
+
}
|
|
260
|
+
return rec;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function serializePartialMatch(m, redactLen) {
|
|
264
|
+
const rec = {
|
|
265
|
+
msg_idx: m.msg_idx,
|
|
266
|
+
block_idx: m.block_idx,
|
|
267
|
+
content_kind: m.content_kind,
|
|
268
|
+
byte_length: m.byte_length,
|
|
269
|
+
};
|
|
270
|
+
if (m.item_idx !== undefined) rec.item_idx = m.item_idx;
|
|
271
|
+
if (redactLen > 0) {
|
|
272
|
+
rec.prefix_64 = m.text.slice(0, redactLen);
|
|
273
|
+
}
|
|
274
|
+
return rec;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
export function buildDiagnosticRecord(reqCtx, exact_matches, partial_matches, totalToolResults, opts = {}) {
|
|
278
|
+
const includeNormalized = opts.includeNormalized === true;
|
|
279
|
+
const canonicalText = opts.canonicalText;
|
|
280
|
+
const redactLen = typeof opts.redactLen === "number" ? opts.redactLen : DEFAULT_REDACT_LEN;
|
|
281
|
+
return {
|
|
282
|
+
ts: opts.ts || new Date().toISOString(),
|
|
283
|
+
session_id_hash: hashSessionId(reqCtx),
|
|
284
|
+
exact_matches: exact_matches.map((m) =>
|
|
285
|
+
serializeExactMatch(m, includeNormalized && typeof canonicalText === "string" ? canonicalText : null),
|
|
286
|
+
),
|
|
287
|
+
partial_matches: partial_matches.map((m) => serializePartialMatch(m, redactLen)),
|
|
288
|
+
total_messages: Array.isArray(reqCtx?.body?.messages) ? reqCtx.body.messages.length : 0,
|
|
289
|
+
total_tool_results: totalToolResults,
|
|
290
|
+
model: reqCtx?.body?.model ?? null,
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// --- I/O ---
|
|
295
|
+
|
|
296
|
+
export async function appendDiagnosticRecord(path, record) {
|
|
297
|
+
await mkdir(dirname(path), { recursive: true });
|
|
298
|
+
await appendFile(path, JSON.stringify(record) + "\n");
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// --- Stats shape ---
|
|
302
|
+
|
|
303
|
+
function initStats() {
|
|
304
|
+
return {
|
|
305
|
+
diagnostic_enabled: false,
|
|
306
|
+
normalization_enabled: false,
|
|
307
|
+
sentinel_pattern_used: null, // first matched pattern source (Mode A only)
|
|
308
|
+
total_tool_results_scanned: 0,
|
|
309
|
+
exact_matches_count: 0,
|
|
310
|
+
partial_matches_count: 0,
|
|
311
|
+
sentinels_matched: 0, // exact + partial
|
|
312
|
+
sentinels_normalized: 0,
|
|
313
|
+
bytes_original: 0,
|
|
314
|
+
bytes_normalized: 0,
|
|
315
|
+
bytes_saved: 0,
|
|
316
|
+
diagnostic_records_written: 0,
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// --- Stderr summary ---
|
|
321
|
+
|
|
322
|
+
function emitStderrSummary(stats, dumpPath) {
|
|
323
|
+
const parts = [`matched=${stats.sentinels_matched}`];
|
|
324
|
+
if (stats.normalization_enabled) {
|
|
325
|
+
parts.push(`normalized=${stats.sentinels_normalized}`);
|
|
326
|
+
parts.push(`bytes=${stats.bytes_original}->${stats.bytes_normalized}`);
|
|
327
|
+
if (stats.sentinel_pattern_used) {
|
|
328
|
+
parts.push(`sentinel_pattern=${stats.sentinel_pattern_used === DEFAULT_EXACT_PATTERNS[0].source || stats.sentinel_pattern_used === DEFAULT_EXACT_PATTERNS[1].source ? "default" : "custom"}`);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
if (stats.diagnostic_enabled) {
|
|
332
|
+
parts.push(`dump=${dumpPath}`);
|
|
333
|
+
if (!stats.normalization_enabled) parts.push("(normalize disabled)");
|
|
334
|
+
}
|
|
335
|
+
process.stderr.write(`[microcompact] ${parts.join(" ")}\n`);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// --- Orchestrator ---
|
|
339
|
+
|
|
340
|
+
export async function runMicrocompactStability(reqCtx) {
|
|
341
|
+
const stats = initStats();
|
|
342
|
+
const dumpPath = getDumpPath();
|
|
343
|
+
const normalize = isNormalizeEnabled();
|
|
344
|
+
stats.diagnostic_enabled = !!dumpPath;
|
|
345
|
+
stats.normalization_enabled = normalize;
|
|
346
|
+
|
|
347
|
+
if (!dumpPath && !normalize) return stats;
|
|
348
|
+
if (!reqCtx || !reqCtx.body || !Array.isArray(reqCtx.body.messages)) return stats;
|
|
349
|
+
|
|
350
|
+
const extraPatterns = getCustomPatterns();
|
|
351
|
+
const extraPrefixes = getCustomPrefixes();
|
|
352
|
+
const { exact_matches, partial_matches, total_tool_results } = walkToolResultsForSentinels(
|
|
353
|
+
reqCtx.body.messages,
|
|
354
|
+
extraPatterns,
|
|
355
|
+
extraPrefixes,
|
|
356
|
+
);
|
|
357
|
+
stats.total_tool_results_scanned = total_tool_results;
|
|
358
|
+
stats.exact_matches_count = exact_matches.length;
|
|
359
|
+
stats.partial_matches_count = partial_matches.length;
|
|
360
|
+
stats.sentinels_matched = exact_matches.length + partial_matches.length;
|
|
361
|
+
if (exact_matches.length > 0) {
|
|
362
|
+
stats.sentinel_pattern_used = exact_matches[0].matched_pattern;
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Diagnostic dump runs FIRST (raw pre-normalization bytes). Mode B is
|
|
366
|
+
// redacted to prefix_64 by the serializer; Mode A captures full text.
|
|
367
|
+
if (dumpPath && (exact_matches.length > 0 || partial_matches.length > 0)) {
|
|
368
|
+
try {
|
|
369
|
+
const canonicalText = normalize ? getCanonicalText() : null;
|
|
370
|
+
const record = buildDiagnosticRecord(reqCtx, exact_matches, partial_matches, total_tool_results, {
|
|
371
|
+
includeNormalized: isIncludeNormalizedEnabled(),
|
|
372
|
+
canonicalText,
|
|
373
|
+
redactLen: getRedactLen(),
|
|
374
|
+
});
|
|
375
|
+
await appendDiagnosticRecord(dumpPath, record);
|
|
376
|
+
stats.diagnostic_records_written = 1;
|
|
377
|
+
} catch (err) {
|
|
378
|
+
debug(`dump write failed: ${err?.message ?? err}`);
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// Normalization runs AFTER dump. Only Mode A matches are eligible.
|
|
383
|
+
if (normalize && exact_matches.length > 0) {
|
|
384
|
+
const canonicalText = getCanonicalText();
|
|
385
|
+
for (const m of exact_matches) {
|
|
386
|
+
stats.bytes_original += Buffer.byteLength(m.text, "utf8");
|
|
387
|
+
const ok = normalizeToolResultContent(reqCtx.body.messages, m, canonicalText);
|
|
388
|
+
if (ok) {
|
|
389
|
+
stats.bytes_normalized += Buffer.byteLength(canonicalText, "utf8");
|
|
390
|
+
stats.sentinels_normalized++;
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
stats.bytes_saved = stats.bytes_original - stats.bytes_normalized;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
return stats;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// --- Extension contract ---
|
|
400
|
+
|
|
401
|
+
export default {
|
|
402
|
+
name: "microcompact-stability",
|
|
403
|
+
description:
|
|
404
|
+
"Phase 1 microcompact cache stability — diagnostic capture of CC's " +
|
|
405
|
+
"time_based_microcompact sentinel + opt-in normalization to a canonical " +
|
|
406
|
+
"byte-stable form. Phase 2 (snapshot/restore) deferred to v3.5.0+.",
|
|
407
|
+
enabled: false, // overridden by extensions.json
|
|
408
|
+
order: 350,
|
|
409
|
+
|
|
410
|
+
async onRequest(ctx) {
|
|
411
|
+
try {
|
|
412
|
+
const stats = await runMicrocompactStability(ctx);
|
|
413
|
+
// Only attach telemetry / emit summary if we did something observable.
|
|
414
|
+
if (stats.diagnostic_enabled || stats.normalization_enabled) {
|
|
415
|
+
ctx.meta = ctx.meta || {};
|
|
416
|
+
ctx.meta.microcompactStats = stats;
|
|
417
|
+
if (stats.sentinels_matched > 0 || stats.diagnostic_enabled) {
|
|
418
|
+
// Summary on enabled invocations: always when we matched, or when
|
|
419
|
+
// diagnostic is on (so users can verify it's running with no matches).
|
|
420
|
+
if (stats.sentinels_matched > 0) {
|
|
421
|
+
emitStderrSummary(stats, getDumpPath());
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
} catch (err) {
|
|
426
|
+
debug(`onRequest unexpected: ${err?.message ?? err}`);
|
|
427
|
+
}
|
|
428
|
+
},
|
|
429
|
+
};
|
|
@@ -33,7 +33,8 @@ export default {
|
|
|
33
33
|
|
|
34
34
|
if (ttlValue === "none") return;
|
|
35
35
|
|
|
36
|
-
const
|
|
36
|
+
const detectedTier = ctx.meta?._ttlTier || "1h";
|
|
37
|
+
const ttlParam = ttlValue === "5m" || detectedTier === "5m" ? "5m" : "1h";
|
|
37
38
|
|
|
38
39
|
if (Array.isArray(body.system)) {
|
|
39
40
|
body.system = body.system.map((block) => injectTtl(block, ttlParam));
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
// ttl-tier-detect — port of preload.mjs:1815-1828 in-payload tier detection.
|
|
2
|
+
//
|
|
3
|
+
// Runs at order 75 (between read-only upstream-change-detection at 50 and
|
|
4
|
+
// every cache_control mutator) so that downstream strips by fresh-session-sort
|
|
5
|
+
// (250) and cache-control-normalize (400) cannot hide a ttl="5m" signal from
|
|
6
|
+
// ttl-management at order 500.
|
|
7
|
+
//
|
|
8
|
+
// Pure detection. Sets ctx.meta._ttlTier. Does not mutate ctx.body.
|
|
9
|
+
|
|
10
|
+
function detectExistingTier(body) {
|
|
11
|
+
const blocks = [
|
|
12
|
+
...(Array.isArray(body?.system) ? body.system : []),
|
|
13
|
+
...(Array.isArray(body?.messages)
|
|
14
|
+
? body.messages.flatMap((m) => (Array.isArray(m?.content) ? m.content : []))
|
|
15
|
+
: []),
|
|
16
|
+
];
|
|
17
|
+
for (const block of blocks) {
|
|
18
|
+
if (block?.cache_control?.ttl === "5m") return "5m";
|
|
19
|
+
}
|
|
20
|
+
return "1h";
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export { detectExistingTier };
|
|
24
|
+
|
|
25
|
+
export default {
|
|
26
|
+
name: "ttl-tier-detect",
|
|
27
|
+
description: "Detect existing TTL tier from incoming payload before cache_control normalization",
|
|
28
|
+
order: 75,
|
|
29
|
+
|
|
30
|
+
async onRequest(ctx) {
|
|
31
|
+
ctx.meta._ttlTier = detectExistingTier(ctx.body);
|
|
32
|
+
},
|
|
33
|
+
};
|
package/proxy/extensions.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
+
"ttl-tier-detect": { "enabled": true, "order": 75 },
|
|
2
3
|
"fingerprint-strip": { "enabled": true, "order": 100 },
|
|
3
4
|
"image-strip": { "enabled": true, "order": 150 },
|
|
4
5
|
"sort-stabilization": { "enabled": true, "order": 200 },
|
|
@@ -7,7 +8,9 @@
|
|
|
7
8
|
"smoosh-split": { "enabled": true, "order": 320 },
|
|
8
9
|
"content-strip": { "enabled": true, "order": 330 },
|
|
9
10
|
"tool-input-normalize": { "enabled": true, "order": 340 },
|
|
11
|
+
"microcompact-stability": { "enabled": true, "order": 350 },
|
|
10
12
|
"cache-control-normalize": { "enabled": true, "order": 400 },
|
|
13
|
+
"messages-cache-breakpoint": { "enabled": true, "order": 410 },
|
|
11
14
|
"ttl-management": { "enabled": true, "order": 500 },
|
|
12
15
|
"cache-telemetry": { "enabled": true, "order": 600 },
|
|
13
16
|
"overage-warning": { "enabled": true, "order": 610 },
|
package/tools/cache-test.sh
CHANGED
|
@@ -19,7 +19,9 @@ set -euo pipefail
|
|
|
19
19
|
|
|
20
20
|
CLAUDE_CLI="$HOME/.npm-global/lib/node_modules/@anthropic-ai/claude-code/cli.js"
|
|
21
21
|
PRELOAD="$HOME/.claude/cache-fix-preload.mjs"
|
|
22
|
-
|
|
22
|
+
QUOTA_DIR="$HOME/.claude/quota-status"
|
|
23
|
+
ACCOUNT_FILE="$QUOTA_DIR/account.json"
|
|
24
|
+
SESSIONS_DIR="$QUOTA_DIR/sessions"
|
|
23
25
|
USAGE_LOG="$HOME/.claude/usage.jsonl"
|
|
24
26
|
DEBUG_LOG="$HOME/.claude/cache-fix-debug.log"
|
|
25
27
|
REPORT_DIR="/tmp/cache-test-$(date +%Y%m%d_%H%M%S)"
|
|
@@ -54,21 +56,27 @@ echo ""
|
|
|
54
56
|
|
|
55
57
|
mkdir -p "$REPORT_DIR"
|
|
56
58
|
|
|
57
|
-
# Helper: snapshot cache state from quota-status
|
|
59
|
+
# Helper: snapshot cache state from the most-recent per-session quota-status
|
|
60
|
+
# file. Each one-shot CC invocation generates its own session, so the latest
|
|
61
|
+
# sessions/<filename>.json corresponds to the call we just made.
|
|
58
62
|
snapshot_cache() {
|
|
59
63
|
local label="$1"
|
|
60
64
|
local outfile="$REPORT_DIR/${label}.json"
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
local
|
|
68
|
-
local
|
|
65
|
+
local sess_file=""
|
|
66
|
+
if [ -d "$SESSIONS_DIR" ]; then
|
|
67
|
+
sess_file=$(ls -t "$SESSIONS_DIR"/*.json 2>/dev/null | head -1)
|
|
68
|
+
fi
|
|
69
|
+
if [ -n "$sess_file" ] && [ -f "$sess_file" ]; then
|
|
70
|
+
cp "$sess_file" "$outfile"
|
|
71
|
+
local tier=$(python3 -c "import json; d=json.load(open('$sess_file')); print(d.get('cache',{}).get('ttl_tier','?'))" 2>/dev/null || echo "?")
|
|
72
|
+
local create=$(python3 -c "import json; d=json.load(open('$sess_file')); print(d.get('cache',{}).get('cache_creation',0))" 2>/dev/null || echo "?")
|
|
73
|
+
local read=$(python3 -c "import json; d=json.load(open('$sess_file')); print(d.get('cache',{}).get('cache_read',0))" 2>/dev/null || echo "?")
|
|
74
|
+
local e1h=$(python3 -c "import json; d=json.load(open('$sess_file')); print(d.get('cache',{}).get('ephemeral_1h',0))" 2>/dev/null || echo "?")
|
|
75
|
+
local e5m=$(python3 -c "import json; d=json.load(open('$sess_file')); print(d.get('cache',{}).get('ephemeral_5m',0))" 2>/dev/null || echo "?")
|
|
76
|
+
local hit=$(python3 -c "import json; d=json.load(open('$sess_file')); print(d.get('cache',{}).get('hit_rate','?'))" 2>/dev/null || echo "?")
|
|
69
77
|
echo " [$label] TTL=$tier create=$create read=$read 1h=$e1h 5m=$e5m hit=$hit%"
|
|
70
78
|
else
|
|
71
|
-
echo " [$label] No quota-status
|
|
79
|
+
echo " [$label] No per-session quota-status file found in $SESSIONS_DIR"
|
|
72
80
|
fi
|
|
73
81
|
}
|
|
74
82
|
|
|
@@ -101,7 +101,7 @@ done
|
|
|
101
101
|
Q5H=$(python3 -c "
|
|
102
102
|
import json
|
|
103
103
|
try:
|
|
104
|
-
q = json.load(open('$HOME/.claude/quota-status.json'))
|
|
104
|
+
q = json.load(open('$HOME/.claude/quota-status/account.json'))
|
|
105
105
|
print(q['five_hour']['pct'])
|
|
106
106
|
except Exception:
|
|
107
107
|
print(0)
|
|
@@ -116,7 +116,7 @@ echo "Preflight OK: Q5h at ${Q5H}%, 4 versions installed, launcher present." | t
|
|
|
116
116
|
echo "" | tee -a "$SUMMARY"
|
|
117
117
|
|
|
118
118
|
# Snapshot quota state at start
|
|
119
|
-
cp "$HOME/.claude/quota-status.json" "$OUTPUT_DIR/raw-quota-status-start.json" 2>/dev/null || true
|
|
119
|
+
cp "$HOME/.claude/quota-status/account.json" "$OUTPUT_DIR/raw-quota-status-start.json" 2>/dev/null || true
|
|
120
120
|
|
|
121
121
|
# ─── Phase A: steady-state per version ─────────────────────────────────────
|
|
122
122
|
|
|
@@ -189,7 +189,7 @@ if [[ "$INCLUDE_IDLE" -eq 1 ]]; then
|
|
|
189
189
|
fi
|
|
190
190
|
|
|
191
191
|
# Snapshot quota state at end
|
|
192
|
-
cp "$HOME/.claude/quota-status.json" "$OUTPUT_DIR/raw-quota-status-end.json" 2>/dev/null || true
|
|
192
|
+
cp "$HOME/.claude/quota-status/account.json" "$OUTPUT_DIR/raw-quota-status-end.json" 2>/dev/null || true
|
|
193
193
|
|
|
194
194
|
# ─── Analysis ──────────────────────────────────────────────────────────────
|
|
195
195
|
|
|
@@ -295,7 +295,7 @@ if [[ "$Q5H" -lt 50 ]]; then
|
|
|
295
295
|
NEW_Q5H=$(python3 -c "
|
|
296
296
|
import json
|
|
297
297
|
try:
|
|
298
|
-
print(json.load(open('$HOME/.claude/quota-status.json'))['five_hour']['pct'])
|
|
298
|
+
print(json.load(open('$HOME/.claude/quota-status/account.json'))['five_hour']['pct'])
|
|
299
299
|
except Exception:
|
|
300
300
|
print('?')
|
|
301
301
|
" 2>/dev/null)
|