claude-code-cache-fix 3.9.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,27 @@
1
1
  // thinking-block-sanitize — request-path mitigation for the CC thinking-desync
2
- // wedge (anthropics/claude-code#63147). On replay paths (resume / --continue /
2
+ // wedge (anthropics/claude-code#63147).
3
+ //
4
+ // v1 (default since v4.0.0; CACHE_FIX_THINKING_SANITIZE unset or =on): On replay paths (resume / --continue /
3
5
  // auto-compaction / parallel-tool-cancel), CC re-sends prior assistant turns'
4
6
  // thinking in the OMITTED shape `{ type:"thinking", thinking:"", signature }`.
5
7
  // The API rejects modified thinking in the *latest* assistant message with a
6
- // permanent 400, which wedges the session. This extension drops the omitted
7
- // thinking blocks the API treats as optional, before the request is forwarded.
8
+ // permanent 400, which wedges the session. v1 drops these omitted blocks
9
+ // before forwarding. Never touches non-empty thinking; never touches
10
+ // redacted_thinking (v1's empirical exclusion — zero observed in worst-case
11
+ // wedged transcripts).
12
+ //
13
+ // v2 (CACHE_FIX_THINKING_SANITIZE=v2): Additionally handles yurukusa's "13E"
14
+ // pattern — when ToolSearch dynamically loads a tool mid-conversation, the
15
+ // prior assistant turn's thinking signature is invalidated because it was
16
+ // computed over the now-stale tools surface. The API rejects + CC's harness
17
+ // strips-and-retries, paying a 400 + retry tax every turn. v2 detects
18
+ // cross-request tools-surface change via a per-session tools-hash baseline,
19
+ // and strips ALL prior-turn signed thinking (both `thinking` blocks with
20
+ // non-empty text AND `redacted_thinking` blocks — v2's scope is structural,
21
+ // not empirical) on hash mismatch. Same active-tool-continuation latest-turn
22
+ // guard as v1.
8
23
  //
9
- // Resolved turn-selection rule (directive Open Question 1, empirical capture):
24
+ // Resolved turn-selection rule (v1 directive Open Question 1, empirical capture):
10
25
  // - drop omitted thinking from ALL prior assistant turns, AND
11
26
  // - from the LATEST assistant turn UNLESS it is an active tool-continuation
12
27
  // (last block is a tool_use with a following tool_result) — that case is
@@ -16,15 +31,47 @@
16
31
  // / MAX_THINKING_TOKENS=0 stop it only by disabling thinking entirely
17
32
  // (lossy); DISABLE_INTERLEAVED_THINKING=1 does NOT stop the 400 — so the
18
33
  // answer for that case is don't-resume + heal/retire.
19
- // Never touches non-empty thinking, and never touches redacted_thinking (v1).
20
34
  //
21
- // OPT-IN for v1: only runs when CACHE_FIX_THINKING_SANITIZE=on (default off)
22
- // it mutates request bodies and its coverage is not yet live-validated.
35
+ // v2 state pattern (per directive proxy-thinking-block-sanitize-v2.md):
36
+ // - In-memory per-session map keyed by canonical session filename, seeded
37
+ // once from sessions/<sid>.json on first request that session. Mirrors
38
+ // session-health's pattern. Lives at module scope.
39
+ // - Baseline updates ONLY on response success (HTTP 2xx). 4xx/5xx leave
40
+ // the baseline unchanged so a failed request's hash doesn't become the
41
+ // new ground truth.
42
+ // - First request observes-and-establishes (no strip; baseline is set
43
+ // after the response succeeds).
44
+ // - When canonical session id is "unknown" (raw id null/empty/whitespace),
45
+ // v2 no-ops entirely. The shared sessions/unknown.json would cross-
46
+ // contaminate baselines across unrelated agents otherwise.
47
+ //
48
+ // Modes via CACHE_FIX_THINKING_SANITIZE (as of v4.0.0 — v1 default-on flip):
49
+ // unset (or "on") — v1 only (omitted-text drop). DEFAULT.
50
+ // "off" — extension no-ops (explicit disable)
51
+ // "v2" — v1 + v2 (omitted-text drop AND
52
+ // tools-hash-mismatch drop). v2 is
53
+ // strict superset of v1.
54
+ // any other value — treated as v1 (the default), not off.
55
+ // Matches the precedent of being
56
+ // permissive about the on-path.
57
+ //
58
+ // v1 default-on rationale: 7-day prod dogfood across 37 sessions (2026-05-29
59
+ // → 2026-06-05) on `=on`: zero `cannot be modified` 400s, cache hit-rate
60
+ // aggregate 94.66% vs 92.44% baseline (no prefix degradation), sanitize fired
61
+ // on ~35% of sessions, ~800 blocks dropped per day, max 938K context healthy.
62
+ // v2 stays opt-in via `=v2` because the dogfood only ran v1.
23
63
  //
24
64
  // Order 550: after the request-body mutators (ttl-management 500) and before
25
65
  // session-health (590), so #160's thinking_block_count reflects the forwarded
26
- // body. The per-request drop count is exposed via ctx.meta._thinkingSanitize
27
- // for cache-telemetry (600) to merge into the per-session JSON.
66
+ // body. The per-request drop counts are exposed via ctx.meta._thinkingSanitize
67
+ // (v1 counter) and ctx.meta._thinkingSanitizeV2 (v2 counter + baseline) for
68
+ // cache-telemetry (600) to merge into the per-session JSON.
69
+
70
+ import { readFileSync } from "node:fs";
71
+ import { resolveSessionId, sessionFilePath, sessionFilename } from "./cache-telemetry.mjs";
72
+ import { computeSignatureSurfaceHash } from "./signature-surface-hash.mjs";
73
+
74
+ // --- v1 predicates ---
28
75
 
29
76
  export function isOmittedThinking(block) {
30
77
  return (
@@ -70,14 +117,38 @@ function latestAssistantIndex(messages) {
70
117
  return -1;
71
118
  }
72
119
 
73
- // Pure planner: returns { messages, dropped }. Does not mutate the input.
74
- // `messages` is the new array (a message that loses all content is dropped).
75
- export function planSanitize(messages) {
76
- if (!Array.isArray(messages)) return { messages, dropped: 0 };
120
+ // --- v2 predicate ---
121
+
122
+ // v2 strips signed `thinking` blocks (non-empty text) AND `redacted_thinking`
123
+ // blocks. v1's `isOmittedThinking` filter handles the empty-text case
124
+ // independently — when both flags are active, v1 drops the empty ones and v2
125
+ // drops the signed ones; predicates are non-overlapping.
126
+ export function isSignedThinkingForV2(block) {
127
+ if (!block) return false;
128
+ if (block.type === "redacted_thinking") return true;
129
+ // Non-empty thinking with a signature — v1 leaves these alone by design.
130
+ return (
131
+ block.type === "thinking" &&
132
+ typeof block.thinking === "string" &&
133
+ block.thinking.trim() !== "" &&
134
+ typeof block.signature === "string" &&
135
+ block.signature.length > 0
136
+ );
137
+ }
138
+
139
+ // --- Pure planner ---
140
+ //
141
+ // Returns { messages, dropped, droppedV2 }. Does not mutate input.
142
+ // `v2StripSigned` is the externally-determined boolean: should v2's
143
+ // signed-thinking drop fire this request? (Caller has already computed
144
+ // hash mismatch + session-state checks.)
145
+ export function planSanitize(messages, { v2StripSigned = false } = {}) {
146
+ if (!Array.isArray(messages)) return { messages, dropped: 0, droppedV2: 0 };
77
147
  const latestAsst = latestAssistantIndex(messages);
78
148
  const protectLatest = latestAsst >= 0 && isActiveToolContinuation(messages, latestAsst);
79
149
 
80
150
  let dropped = 0;
151
+ let droppedV2 = 0;
81
152
  let changed = false;
82
153
  const out = [];
83
154
  for (let i = 0; i < messages.length; i++) {
@@ -87,14 +158,24 @@ export function planSanitize(messages) {
87
158
  continue;
88
159
  }
89
160
  if (i === latestAsst && protectLatest) {
90
- out.push(msg); // active continuation — leave its thinking intact
161
+ // Active continuation — leave thinking intact (both v1 and v2 respect
162
+ // this; the API needs the signed thinking for the pending tool call).
163
+ out.push(msg);
91
164
  continue;
92
165
  }
93
166
  const kept = msg.content.filter((b) => {
167
+ // v1 always-active drop predicate.
94
168
  if (isOmittedThinking(b)) {
95
169
  dropped++;
96
170
  return false;
97
171
  }
172
+ // v2-only drop predicate. Predicates are mutually exclusive on a single
173
+ // block: omitted thinking matches v1's predicate but not v2's, and
174
+ // signed/redacted thinking matches v2's predicate but not v1's.
175
+ if (v2StripSigned && isSignedThinkingForV2(b)) {
176
+ droppedV2++;
177
+ return false;
178
+ }
98
179
  return true;
99
180
  });
100
181
  if (kept.length === msg.content.length) {
@@ -106,25 +187,158 @@ export function planSanitize(messages) {
106
187
  changed = true;
107
188
  }
108
189
  }
109
- return { messages: changed ? out : messages, dropped };
190
+ return { messages: changed ? out : messages, dropped, droppedV2 };
191
+ }
192
+
193
+ // --- v2 mode + state ---
194
+
195
+ // "off" | "on" | "v2". As of v4.0.0 the default flipped from "off" to "on" —
196
+ // v1 (omitted-text drop) is the new default behavior. Set
197
+ // CACHE_FIX_THINKING_SANITIZE=off to explicitly disable; =v2 to additionally
198
+ // enable the v2 tools-hash-mismatch drop (still opt-in pending its own
199
+ // prod-dogfood window after #200 closes the silent-load failure mode).
200
+ // Unknown values fall through to "on" — we are permissive about the on-path
201
+ // and only treat the literal "off" as a disable.
202
+ export function modeFromEnv(env = process.env) {
203
+ const v = env.CACHE_FIX_THINKING_SANITIZE;
204
+ if (v === "off") return "off";
205
+ if (v === "v2") return "v2";
206
+ return "on";
207
+ }
208
+
209
+ // Per-session state, in memory. Keyed by canonical session filename
210
+ // (sessionFilename(rawId)). Each entry: { tools_hash_baseline }.
211
+ // Mirrors session-health's pattern: seeded once from disk on first request
212
+ // that session, then maintained in memory + persisted via cache-telemetry's
213
+ // spread of ctx.meta._thinkingSanitizeV2.
214
+ const v2SessionState = new Map();
215
+
216
+ function seedV2FromFile(rawSid) {
217
+ let prev = null;
218
+ try {
219
+ prev = JSON.parse(readFileSync(sessionFilePath(rawSid), "utf8"));
220
+ } catch {}
221
+ return {
222
+ tools_hash_baseline:
223
+ typeof prev?.tools_hash_baseline === "string" ? prev.tools_hash_baseline : null,
224
+ };
225
+ }
226
+
227
+ // Test-only reset (also useful for proxy-restart simulation in unit tests).
228
+ export function _resetV2State() {
229
+ v2SessionState.clear();
110
230
  }
111
231
 
232
+ // --- Extension default-export ---
233
+
112
234
  export default {
113
235
  name: "thinking-block-sanitize",
114
236
  description:
115
- "Drop omitted (empty-text) thinking blocks from prior assistant turns and the latest non-continuation turn, to head off the CC thinking-desync 400 (#63147). Opt-in via CACHE_FIX_THINKING_SANITIZE=on.",
237
+ "Drop omitted (empty-text) thinking blocks from prior assistant turns and the latest non-continuation turn, to head off the CC thinking-desync 400 (#63147). v1 mode: omitted-text drop only. v2 mode: also drop signed thinking + redacted_thinking on cross-request tools-hash mismatch (ToolSearch surface). v1 is now ON by default as of v4.0.0; set CACHE_FIX_THINKING_SANITIZE=off to disable, =v2 to additionally opt into v2.",
116
238
  order: 550,
117
239
 
118
240
  async onRequest(ctx) {
119
- if (process.env.CACHE_FIX_THINKING_SANITIZE !== "on") return;
241
+ const mode = modeFromEnv();
242
+ if (mode === "off") return;
243
+
120
244
  const body = ctx.body;
121
245
  if (!body || !Array.isArray(body.messages)) return;
122
246
 
123
- const { messages, dropped } = planSanitize(body.messages);
124
- if (dropped > 0) body.messages = messages;
247
+ // v2 only fires when mode === "v2" AND we have a usable session id.
248
+ let v2StripSigned = false;
249
+ let stateKey = null;
250
+ let currentHash = null;
251
+
252
+ if (mode === "v2") {
253
+ // Resolve session id inline — cache-telemetry's onRequest runs at order
254
+ // 600, after us, so ctx.meta._sessionId is not yet set when we fire at
255
+ // order 550. We import resolveSessionId from cache-telemetry to keep
256
+ // canonicalization consistent.
257
+ const rawSid = resolveSessionId(ctx.headers);
258
+ stateKey = sessionFilename(rawSid);
259
+
260
+ // "unknown" canonical id → no-op for v2 (cross-contamination risk on
261
+ // the shared sessions/unknown.json baseline). v1's strip still runs
262
+ // below regardless.
263
+ if (stateKey !== "unknown") {
264
+ currentHash = computeSignatureSurfaceHash({ tools: body.tools });
265
+
266
+ // Seed in-memory state from disk on first encounter that session
267
+ // (covers proxy restart — re-reads persisted baseline).
268
+ let st = v2SessionState.get(stateKey);
269
+ if (!st) {
270
+ st = seedV2FromFile(rawSid);
271
+ v2SessionState.set(stateKey, st);
272
+ }
273
+
274
+ const baseline = st.tools_hash_baseline;
275
+ // Mismatch only fires when there IS a baseline AND it differs.
276
+ // First request (baseline === null) observes-and-establishes — no strip.
277
+ v2StripSigned = baseline !== null && baseline !== currentHash;
278
+
279
+ // Stash for the onResponseStart hook to advance the baseline iff the
280
+ // response succeeded. Stash BEFORE the plan + strip so the response
281
+ // path has access regardless of whether anything was dropped.
282
+ ctx.meta._thinkingSanitizeV2PendingHash = currentHash;
283
+ ctx.meta._thinkingSanitizeV2StateKey = stateKey;
284
+ }
285
+ }
286
+
287
+ const { messages, dropped, droppedV2 } = planSanitize(body.messages, {
288
+ v2StripSigned,
289
+ });
290
+ if (dropped > 0 || droppedV2 > 0) body.messages = messages;
125
291
 
126
292
  // Counts only — never content. Exposed for cache-telemetry to persist and
127
293
  // for the #160 session-health signal.
294
+ // v1 counter — unchanged, fires for both modes.
128
295
  ctx.meta._thinkingSanitize = { thinking_blocks_dropped: dropped };
296
+
297
+ // v2 counter — fires only in v2 mode. Includes the post-mismatch baseline
298
+ // value that cache-telemetry will persist (so consumers can see the
299
+ // current baseline in the session JSON even on requests that didn't
300
+ // strip). The actual advance only happens on response success below.
301
+ if (mode === "v2" && stateKey && stateKey !== "unknown") {
302
+ ctx.meta._thinkingSanitizeV2 = {
303
+ thinking_blocks_dropped_v2: droppedV2,
304
+ // Persist the SOON-TO-BE-NEW baseline (it'll be advanced on success).
305
+ // On 4xx/5xx, the cache-telemetry write still happens but the
306
+ // in-memory state isn't advanced — next request re-reads disk and
307
+ // sees the persisted value, which may now disagree with in-memory.
308
+ // We resolve that by NOT writing the new hash to disk on failure:
309
+ // see onResponseStart below, which is the only thing that advances
310
+ // both in-memory and (indirectly via cache-telemetry's spread) disk.
311
+ // For now, leave tools_hash_baseline at the CURRENT baseline value;
312
+ // onResponseStart will overwrite this in meta if the response is 2xx.
313
+ tools_hash_baseline: v2SessionState.get(stateKey)?.tools_hash_baseline ?? null,
314
+ };
315
+ }
316
+ },
317
+
318
+ // Advance the baseline only on HTTP 2xx response. 4xx/5xx leaves the
319
+ // in-memory state and the meta-stashed baseline untouched, so a failed
320
+ // request's hash never becomes the new ground truth.
321
+ async onResponseStart(ctx) {
322
+ const stateKey = ctx.meta._thinkingSanitizeV2StateKey;
323
+ const pendingHash = ctx.meta._thinkingSanitizeV2PendingHash;
324
+ if (!stateKey || !pendingHash) return;
325
+ if (typeof ctx.status !== "number") return;
326
+ if (ctx.status < 200 || ctx.status >= 300) return;
327
+
328
+ // Advance in-memory baseline.
329
+ let st = v2SessionState.get(stateKey);
330
+ if (!st) {
331
+ st = { tools_hash_baseline: null };
332
+ v2SessionState.set(stateKey, st);
333
+ }
334
+ st.tools_hash_baseline = pendingHash;
335
+
336
+ // Update the meta-stashed baseline so cache-telemetry's spread writes
337
+ // the new value to disk. If meta._thinkingSanitizeV2 wasn't stashed
338
+ // (e.g. mode flip mid-request), construct it now.
339
+ if (!ctx.meta._thinkingSanitizeV2) {
340
+ ctx.meta._thinkingSanitizeV2 = { thinking_blocks_dropped_v2: 0 };
341
+ }
342
+ ctx.meta._thinkingSanitizeV2.tools_hash_baseline = pendingHash;
129
343
  },
130
344
  };
@@ -26,6 +26,7 @@
26
26
  // overage_disabled_reason?: string ≤64 (optional)
27
27
  // cache_hit_rate: float 0–1
28
28
  // q5h_delta, q7d_delta: float (0 on first call after restart)
29
+ // request_id?: string ≤64 (optional, gated)
29
30
  //
30
31
  // `peak_hour` is NOT in the wire format. It can be derived from `ts` if any
31
32
  // consumer needs it.
@@ -36,6 +37,17 @@
36
37
  // CACHE_FIX_USAGE_LOG=<path> overrides the destination path only — it is NOT
37
38
  // an enable flag and never has been.
38
39
  //
40
+ // CACHE_FIX_USAGE_LOG_REQID=on emits the optional `request_id` field
41
+ // (sourced from the upstream `request-id` response header). Default-off in
42
+ // v4.1.0 to avoid breaking unpatched claude-meter installs whose strict-
43
+ // object schema rejects unknown keys. claude-meter v0.7.0+ accepts the
44
+ // optional field; the v4.2.0 flip to default-on assumes that floor.
45
+ // The field is the post-hoc join key against CC's per-session JSONL
46
+ // transcripts
47
+ // (`~/.claude/projects/<project>/<session-uuid>.jsonl` carry `requestId`
48
+ // for every API call), which recovers per-CC-session attribution that
49
+ // `sid` alone cannot provide. See docs/directives/proxy-usage-log-request-id.md.
50
+ //
39
51
  // See `docs/directives/proxy-claude-meter-compat.md` for full design.
40
52
 
41
53
  import { appendFile, mkdir } from "node:fs/promises";
@@ -91,6 +103,17 @@ export function extractMessageDeltaFields(event) {
91
103
  return { output_tokens: event.usage.output_tokens || 0 };
92
104
  }
93
105
 
106
+ // Extract upstream request-id from response headers, guarded against the
107
+ // max(64) MeterRowSchema constraint. Returns the string when valid, or
108
+ // `undefined` so the optional schema field is omitted on bad input rather
109
+ // than emitting a row that would fail meter-side validation.
110
+ export function extractRequestId(headers) {
111
+ const raw = headers?.["request-id"];
112
+ if (typeof raw !== "string") return undefined;
113
+ if (raw.length === 0 || raw.length > 64) return undefined;
114
+ return raw;
115
+ }
116
+
94
117
  function num(headers, key) {
95
118
  const v = headers?.[key];
96
119
  if (v === undefined || v === null || v === "") return null;
@@ -134,7 +157,7 @@ export function computeDelta(current, previous) {
134
157
  return current - previous;
135
158
  }
136
159
 
137
- export function assembleRecord({ start, delta, quota, requestedModel, sid, prevQ5h, prevQ7d, now = new Date() }) {
160
+ export function assembleRecord({ start, delta, quota, requestedModel, sid, prevQ5h, prevQ7d, requestId, now = new Date() }) {
138
161
  const s = start || {};
139
162
  const d = delta || {};
140
163
  const q = quota || {};
@@ -194,6 +217,26 @@ export function assembleRecord({ start, delta, quota, requestedModel, sid, prevQ
194
217
  record.overage_disabled_reason = q.overage_disabled_reason;
195
218
  }
196
219
 
220
+ // Optional: emit request_id when CACHE_FIX_USAGE_LOG_REQID=on AND the
221
+ // captured value is a non-empty string within the schema's max(64)
222
+ // constraint. Belt-and-braces: extractRequestId enforces these guards at
223
+ // capture time, and assembleRecord re-enforces them here so a future
224
+ // refactor that bypasses the extractor can't emit a row that would fail
225
+ // claude-meter's strict-object validation.
226
+ // Env read happens per-call so operators can flip it at runtime without
227
+ // proxy restart, matching the image-strip debug-gate pattern.
228
+ // Cross-repo contract: claude-code-meter v0.7.0+ accepts this optional
229
+ // field; older meter installs reject rows that carry it, so the gate
230
+ // stays default-off in v4.1.0. Default flips on in cache-fix v4.2.0.
231
+ if (
232
+ process.env.CACHE_FIX_USAGE_LOG_REQID === "on" &&
233
+ typeof requestId === "string" &&
234
+ requestId.length > 0 &&
235
+ requestId.length <= 64
236
+ ) {
237
+ record.request_id = requestId;
238
+ }
239
+
197
240
  return record;
198
241
  }
199
242
 
@@ -250,6 +293,7 @@ export default {
250
293
  const delta = extractMessageDeltaFields(ctx.event);
251
294
  const quota = parseQuotaHeaders(ctx.responseHeaders || {});
252
295
  const requestedModel = ctx.telemetry?.requestedModel || undefined;
296
+ const requestId = extractRequestId(ctx.responseHeaders || {});
253
297
 
254
298
  const record = assembleRecord({
255
299
  start,
@@ -259,6 +303,7 @@ export default {
259
303
  sid: _sid,
260
304
  prevQ5h: _lastQ5h,
261
305
  prevQ7d: _lastQ7d,
306
+ requestId,
262
307
  now: new Date(),
263
308
  });
264
309
 
@@ -1,82 +1,20 @@
1
1
  {
2
- "bootstrap-defense": {
3
- "enabled": true,
4
- "order": 45
5
- },
6
- "ttl-tier-detect": {
7
- "enabled": true,
8
- "order": 75
9
- },
10
- "fingerprint-strip": {
11
- "enabled": true,
12
- "order": 100
13
- },
14
- "image-strip": {
15
- "enabled": true,
16
- "order": 150
17
- },
18
- "sort-stabilization": {
19
- "enabled": true,
20
- "order": 200
21
- },
22
- "fresh-session-sort": {
23
- "enabled": true,
24
- "order": 250
25
- },
26
- "identity-normalization": {
27
- "enabled": true,
28
- "order": 300
29
- },
30
- "smoosh-split": {
31
- "enabled": true,
32
- "order": 320
33
- },
34
- "content-strip": {
35
- "enabled": true,
36
- "order": 330
37
- },
38
- "tool-input-normalize": {
39
- "enabled": true,
40
- "order": 340
41
- },
42
- "microcompact-stability": {
43
- "enabled": true,
44
- "order": 350
45
- },
46
- "thinking-display": {
47
- "enabled": true,
48
- "order": 360
49
- },
50
- "cache-control-normalize": {
51
- "enabled": true,
52
- "order": 400
53
- },
54
- "messages-cache-breakpoint": {
55
- "enabled": true,
56
- "order": 410
57
- },
58
- "ttl-management": {
59
- "enabled": true,
60
- "order": 500
61
- },
62
- "cache-telemetry": {
63
- "enabled": true,
64
- "order": 600
65
- },
66
- "overage-warning": {
67
- "enabled": true,
68
- "order": 610
69
- },
70
- "request-log": {
71
- "enabled": false,
72
- "order": 700
73
- },
74
- "usage-log": {
75
- "enabled": true,
76
- "order": 650
77
- },
78
- "rate-limit-log": {
79
- "enabled": true,
80
- "order": 660
81
- }
2
+ "bootstrap-defense": { "enabled": true, "order": 45 },
3
+ "ttl-tier-detect": { "enabled": true, "order": 75 },
4
+ "fingerprint-strip": { "enabled": true, "order": 100 },
5
+ "image-strip": { "enabled": true, "order": 150 },
6
+ "sort-stabilization": { "enabled": true, "order": 200 },
7
+ "fresh-session-sort": { "enabled": true, "order": 250 },
8
+ "identity-normalization": { "enabled": true, "order": 300 },
9
+ "smoosh-split": { "enabled": true, "order": 320 },
10
+ "content-strip": { "enabled": true, "order": 330 },
11
+ "tool-input-normalize": { "enabled": true, "order": 340 },
12
+ "microcompact-stability": { "enabled": true, "order": 350 },
13
+ "thinking-display": { "enabled": true, "order": 360 },
14
+ "cache-control-normalize": { "enabled": true, "order": 400 },
15
+ "messages-cache-breakpoint": { "enabled": true, "order": 410 },
16
+ "ttl-management": { "enabled": true, "order": 500 },
17
+ "cache-telemetry": { "enabled": true, "order": 600 },
18
+ "overage-warning": { "enabled": true, "order": 610 },
19
+ "request-log": { "enabled": false, "order": 700 }
82
20
  }
@@ -0,0 +1,30 @@
1
+ // Escape a value for safe rendering into a systemd `Environment=KEY=VALUE` line.
2
+ //
3
+ // Per systemd.exec(5) Environment= and systemd.unit(5) Specifier Expansion:
4
+ // - Literal `%` is the specifier-expansion marker; to embed one in a value
5
+ // the unit file must write `%%`. Without escaping, `a%20b` is parsed as
6
+ // a failed `%20` specifier expansion, systemd logs "Invalid slot" and
7
+ // silently drops the variable (empirically reproduced 2026-06-07).
8
+ // - Backslash is a C-string escape inside quoted strings AND inside the
9
+ // Environment= value parser; `\b` becomes byte 0x08 (backspace), `\n`
10
+ // becomes LF, etc. To embed a literal `\` the unit must write `\\`.
11
+ // - `"` requires `\"` (after the backslash escape rule above).
12
+ // - Whitespace requires the whole value to be quoted (`"..."`).
13
+ //
14
+ // Order matters: escape `%` first (it produces `%%`, neither of which we
15
+ // want to re-escape later), then handle `\` and `"` together inside the
16
+ // quoting branch.
17
+ export const systemdEscape = (v) => {
18
+ const percentEscaped = v.replace(/%/g, '%%');
19
+ const needsQuoting = /[\s"\\]/.test(v);
20
+ if (!needsQuoting) return percentEscaped;
21
+ return `"${percentEscaped.replace(/[\\"]/g, '\\$&')}"`;
22
+ };
23
+
24
+ export const xmlEscape = (v) => v.replace(/[&<>'"]/g, c => ({
25
+ '&': '&amp;',
26
+ '<': '&lt;',
27
+ '>': '&gt;',
28
+ "'": '&apos;',
29
+ '"': '&quot;'
30
+ })[c]);
@@ -3,6 +3,7 @@ import { join } from "node:path";
3
3
  import { pathToFileURL } from "node:url";
4
4
 
5
5
  let registry = [];
6
+ let failedExtensions = []; // [{ file, error, lastAttempt }]
6
7
 
7
8
  export async function loadExtensions(dir, configPath) {
8
9
  let config = {};
@@ -15,6 +16,7 @@ export async function loadExtensions(dir, configPath) {
15
16
  const mjsFiles = files.filter((f) => f.endsWith(".mjs")).sort();
16
17
 
17
18
  const extensions = [];
19
+ const newlyFailed = [];
18
20
  for (const file of mjsFiles) {
19
21
  try {
20
22
  const mod = await import(pathToFileURL(join(dir, file)).href + "?t=" + Date.now());
@@ -29,12 +31,24 @@ export async function loadExtensions(dir, configPath) {
29
31
  extensions.push({ ...ext, order, _file: file });
30
32
  }
31
33
  } catch (err) {
32
- process.stderr.write(`[pipeline] failed to load ${file}: ${err.message}\n`);
34
+ // Load-bearing observability: this branch is the only signal that the
35
+ // proxy is running with a degraded extension graph. See #196: a Node
36
+ // ESM cache stale-import race silently broke thinking-block-sanitize
37
+ // v2 for 17 hours post-merge before AITL grepped the journal. The
38
+ // [CRITICAL] prefix is harder to miss than the prior [pipeline] one,
39
+ // and the explicit "restart proxy to recover" hint tells the operator
40
+ // what to do — the underlying Node ESM cache problem can't be fixed
41
+ // in-process (you can't evict cached transitive imports), so a full
42
+ // process restart is the only path to recover the extension graph.
43
+ const msg = `[CRITICAL] extension load failed: ${file}: ${err.message} — restart the proxy via your supervisor to recover (in-process reload cannot fix stale ESM cache; see #196)\n`;
44
+ process.stderr.write(msg);
45
+ newlyFailed.push({ file, error: String(err.message || err), lastAttempt: new Date().toISOString() });
33
46
  }
34
47
  }
35
48
 
36
49
  extensions.sort((a, b) => a.order - b.order);
37
50
  registry = extensions;
51
+ failedExtensions = newlyFailed;
38
52
  return extensions;
39
53
  }
40
54
 
@@ -46,6 +60,13 @@ export function snapshotRegistry() {
46
60
  return [...registry];
47
61
  }
48
62
 
63
+ // Exposed for /health and any operator-facing tool that wants to surface
64
+ // extension-load failures. Returns a fresh array per call so callers can't
65
+ // mutate internal state.
66
+ export function getFailedExtensions() {
67
+ return failedExtensions.map((f) => ({ ...f }));
68
+ }
69
+
49
70
  // Route scoping: extensions default to messages-only so that adding a new
50
71
  // route (e.g. /api/claude_cli/bootstrap) doesn't drag every existing
51
72
  // message-mutating extension onto it — most throw on a null body because