npm - claude-code-cache-fix - Versions diffs - 3.2.0 → 3.2.1 - Mend

claude-code-cache-fix 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md +22 -1
package/package.json +1 -1
package/proxy/extensions/image-strip.mjs +106 -9
package/proxy/image-dimensions.mjs +120 -0

package/README.md CHANGED Viewed

@@ -112,7 +112,7 @@ docker run -d --name cache-fix-proxy --restart=always -p 9801:9801 \
   ghcr.io/cnighswonger/claude-code-cache-fix:latest
 ```
-Image tags: `latest`, `3`, `3.2`, `3.2.0` (semver-ladder, so `3` always points to the newest 3.x). `latest` always tracks the newest tagged release.
+Image tags: `latest`, `3`, `3.2`, `3.2.1` (semver-ladder, so `3` always points to the newest 3.x). `latest` always tracks the newest tagged release.
 **Linux note:** the chained-upstream `host.docker.internal` example below is automatic on Docker Desktop (macOS / Windows). On plain Linux Docker Engine you usually need `--add-host=host.docker.internal:host-gateway` so the name resolves to the host bridge. Without it, the container's name lookup fails and the proxy can't reach the upstream service running on the host. Example chaining cache-fix proxy through `llm-relay` running on the host:
@@ -334,6 +334,27 @@ export CACHE_FIX_IMAGE_KEEP_LAST=3
 Keeps images in the last 3 user messages, replaces older ones with a text placeholder. Only targets `tool_result` blocks — user-pasted images are never touched.
+### Oversized-image guard
+```bash
+export CACHE_FIX_IMAGE_MAX_DIM=2000
+```
+The Anthropic API enforces TWO image-related limits on multi-image requests, and the same error message can fire for either:
+> `"An image in the conversation exceeds the dimension limit for many-image requests (2000px). Start a new session with fewer images."`
+Two pressure axes to address them:
+| Pressure | Variable | What it does |
+|---|---|---|
+| **Too many images in conversation** | `CACHE_FIX_IMAGE_KEEP_LAST=N` | Strips images from old user messages, keeps only the last N. |
+| **Any single image too large** | `CACHE_FIX_IMAGE_MAX_DIM=2000` | Replaces images exceeding the dimension limit with a forensic placeholder noting the original dimensions. Covers both user-message direct images and tool_result-nested images. |
+The two compose: with both set, `KEEP_LAST` runs first (drops the count), then `MAX_DIM` runs on what remains (caps the size of the kept ones). Common triggers for the dimension axis: hi-res manuscript scans, retina screenshots, photos at full resolution.
+Pure-JS PNG and JPEG header parsing — no native deps. Other formats (GIF, WebP, AVIF, BMP) pass through unchanged regardless of dimension. Fail-open: images whose dimensions can't be parsed (truncated header, unsupported format) are kept rather than stripped — better to send a request that might error than to strip a valid image we just couldn't measure.
 ## System prompt rewrite (preload mode, optional)
 The interceptor can rewrite Claude Code's `# Output efficiency` system-prompt section. Disabled by default. Enable with `CACHE_FIX_OUTPUT_EFFICIENCY_REPLACEMENT`. See [docs/output-efficiency-prompts.md](docs/output-efficiency-prompts.md) for the three known prompt variants and usage instructions.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-code-cache-fix",
-  "version": "3.2.0",
+  "version": "3.2.1",
   "description": "Cache optimization proxy and interceptor for Claude Code. Fixes prompt cache bugs, stabilizes prefix, reduces quota burn.",
   "type": "module",
   "exports": "./preload.mjs",

package/proxy/extensions/image-strip.mjs CHANGED Viewed

@@ -1,6 +1,13 @@
+import { parseImageDimensions } from "../image-dimensions.mjs";
 const KEEP_LAST = parseInt(process.env.CACHE_FIX_IMAGE_KEEP_LAST || "0", 10);
+const MAX_DIM = parseInt(process.env.CACHE_FIX_IMAGE_MAX_DIM || "0", 10);
 const PLACEHOLDER = "[image stripped from history — file may still be on disk]";
+function oversizedPlaceholder(maxDim, w, h) {
+  return `[image stripped — exceeded ${maxDim}px max dimension (was ${w}x${h}px)]`;
+}
 function stripOldToolResultImages(messages, keepLast) {
   if (!keepLast || keepLast <= 0 || !Array.isArray(messages)) {
     return { messages, stats: null };
@@ -58,26 +65,116 @@ function stripOldToolResultImages(messages, keepLast) {
   return { messages: strippedCount > 0 ? result : messages, stats };
 }
-export { stripOldToolResultImages, PLACEHOLDER };
+// Strip oversized images from BOTH user-message direct content and
+// tool_result-nested content. Orthogonal to KEEP_LAST: scans every image
+// remaining in the message list and replaces any whose width or height
+// exceeds maxDim. Fail-open: images we can't measure (unsupported format,
+// truncated header) are kept rather than stripped.
+//
+// Stripping by oversize prevents the Anthropic API error:
+//   "An image in the conversation exceeds the dimension limit for many-image
+//    requests (2000px). Start a new session with fewer images."
+function stripOversizedImages(messages, maxDim) {
+  if (!maxDim || maxDim <= 0 || !Array.isArray(messages)) {
+    return { messages, stats: null };
+  }
+  let strippedCount = 0;
+  let strippedBytes = 0;
+  function maybeStrip(item) {
+    if (!item || item.type !== "image") return item;
+    const src = item.source;
+    if (!src || !src.data || !src.media_type) return item;
+    const dims = parseImageDimensions(src.media_type, src.data);
+    if (!dims) return item; // can't measure → keep
+    if (dims.width <= maxDim && dims.height <= maxDim) return item;
+    strippedCount++;
+    strippedBytes += src.data.length;
+    return { type: "text", text: oversizedPlaceholder(maxDim, dims.width, dims.height) };
+  }
+  const result = messages.map((msg) => {
+    if (!Array.isArray(msg.content)) return msg;
+    let mutated = false;
+    const newContent = msg.content.map((block) => {
+      // Direct image block on a user message
+      if (block && block.type === "image") {
+        const replaced = maybeStrip(block);
+        if (replaced !== block) {
+          mutated = true;
+          return replaced;
+        }
+        return block;
+      }
+      // Image nested inside a tool_result.content array
+      if (block && block.type === "tool_result" && Array.isArray(block.content)) {
+        let toolMutated = false;
+        const newToolContent = block.content.map((item) => {
+          const replaced = maybeStrip(item);
+          if (replaced !== item) toolMutated = true;
+          return replaced;
+        });
+        if (toolMutated) {
+          mutated = true;
+          return { ...block, content: newToolContent };
+        }
+      }
+      return block;
+    });
+    return mutated ? { ...msg, content: newContent } : msg;
+  });
+  const stats = strippedCount > 0
+    ? { strippedCount, strippedBytes, estimatedTokens: Math.ceil(strippedBytes * 0.125) }
+    : null;
+  return { messages: strippedCount > 0 ? result : messages, stats };
+}
+export { stripOldToolResultImages, stripOversizedImages, PLACEHOLDER, oversizedPlaceholder };
 export default {
   name: "image-strip",
-  description: "Strip base64 images from old tool results to reduce token waste",
+  description:
+    "Strip base64 images from old tool results AND optionally strip oversized images that would trigger Anthropic's many-image dimension limit",
   enabled: false,
   order: 150,
   async onRequest(ctx) {
     const keepLast = parseInt(ctx.meta.imageKeepLast ?? KEEP_LAST, 10);
-    if (!keepLast || keepLast <= 0) return;
+    const maxDim = parseInt(ctx.meta.imageMaxDim ?? MAX_DIM, 10);
+    if ((!keepLast || keepLast <= 0) && (!maxDim || maxDim <= 0)) return;
     if (!ctx.body.messages) return;
-    const { messages, stats } = stripOldToolResultImages(ctx.body.messages, keepLast);
-    if (stats) {
+    let messages = ctx.body.messages;
+    const logParts = [];
+    // Pass 1: existing keep_last behavior. Sets ctx.meta.imageStripStats with
+    // the same shape as before this PR — back-compat preserved.
+    if (keepLast > 0) {
+      const r = stripOldToolResultImages(messages, keepLast);
+      if (r.stats) {
+        messages = r.messages;
+        ctx.meta.imageStripStats = r.stats;
+        logParts.push(`keep_last: ${r.stats.strippedCount} stripped (~${r.stats.estimatedTokens} tokens saved)`);
+      }
+    }
+    // Pass 2: new max_dim behavior. Stats land on a new field so consumers
+    // already reading imageStripStats don't see a shape change.
+    if (maxDim > 0) {
+      const r = stripOversizedImages(messages, maxDim);
+      if (r.stats) {
+        messages = r.messages;
+        ctx.meta.imageStripOversizedStats = r.stats;
+        logParts.push(`max_dim: ${r.stats.strippedCount} oversized stripped (~${r.stats.estimatedTokens} tokens saved)`);
+      }
+    }
+    if (logParts.length > 0) {
       ctx.body.messages = messages;
-      ctx.meta.imageStripStats = stats;
-      process.stderr.write(
-        `[image-strip] stripped ${stats.strippedCount} images (~${stats.estimatedTokens} tokens saved)\n`
-      );
+      process.stderr.write(`[image-strip] ${logParts.join("; ")}\n`);
     }
   },
 };

package/proxy/image-dimensions.mjs ADDED Viewed

@@ -0,0 +1,120 @@
+// Pure-JS image header dimension parsing for PNG and JPEG.
+//
+// Used by the image-strip extension to detect images exceeding a configurable
+// max dimension. Stays in a separate module so it can be unit-tested without
+// the rest of the proxy machinery.
+//
+// No native deps. Decode-only — never modifies the image data.
+const PNG_MAGIC = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]);
+// PNG: after the 8-byte magic, the IHDR chunk begins. IHDR layout:
+//   [4 bytes length][4 bytes "IHDR"][4 bytes width BE][4 bytes height BE]...
+// Width starts at byte 16, height at byte 20, both 32-bit big-endian.
+export function parsePngDimensions(buffer) {
+  if (!buffer || buffer.length < 24) return null;
+  for (let i = 0; i < PNG_MAGIC.length; i++) {
+    if (buffer[i] !== PNG_MAGIC[i]) return null;
+  }
+  // Verify the IHDR chunk type (offset 12-15 should be ASCII "IHDR")
+  if (
+    buffer[12] !== 0x49 || buffer[13] !== 0x48 ||
+    buffer[14] !== 0x44 || buffer[15] !== 0x52
+  ) {
+    return null;
+  }
+  const width = buffer.readUInt32BE(16);
+  const height = buffer.readUInt32BE(20);
+  if (width <= 0 || height <= 0) return null;
+  return { width, height };
+}
+// JPEG SOF (Start Of Frame) markers we care about. We don't differentiate
+// between SOF0 (baseline), SOF1 (extended sequential), SOF2 (progressive),
+// SOF3 (lossless) etc — all carry dimensions in the same layout.
+const JPEG_SOF_MARKERS = new Set([
+  0xc0, 0xc1, 0xc2, 0xc3, 0xc5, 0xc6, 0xc7, 0xc9, 0xca, 0xcb, 0xcd, 0xce, 0xcf,
+]);
+// JPEG: starts with FF D8 (SOI). Each segment after is FF <marker> [length BE]
+// [data...]. We scan for an SOF marker and read width/height from its segment.
+// SOF segment layout after the marker: [length 2B][precision 1B][height 2B][width 2B]...
+export function parseJpegDimensions(buffer) {
+  if (!buffer || buffer.length < 4) return null;
+  if (buffer[0] !== 0xff || buffer[1] !== 0xd8) return null;
+  let i = 2;
+  const max = buffer.length;
+  // Bound iterations to keep malformed inputs from looping. JPEG headers we
+  // care about are well under 1KB; cap at the buffer size we got.
+  let iterations = 0;
+  while (i < max - 8 && iterations++ < 1000) {
+    // Each marker segment starts with 0xFF followed by the marker byte.
+    if (buffer[i] !== 0xff) {
+      // Skip over fill bytes / pad bytes (not valid in standard JPEG but tolerated)
+      i++;
+      continue;
+    }
+    // Skip multiple 0xFF prefixes (pad fill — valid per spec)
+    while (i < max - 1 && buffer[i] === 0xff) i++;
+    const marker = buffer[i];
+    i++;
+    // Markers without a length-prefixed segment: SOI (D8), EOI (D9), RST0-7 (D0-D7).
+    if (marker === 0xd8 || marker === 0xd9 || (marker >= 0xd0 && marker <= 0xd7)) {
+      continue;
+    }
+    if (i + 1 >= max) return null;
+    const segLen = (buffer[i] << 8) | buffer[i + 1];
+    if (segLen < 2 || i + segLen > max) return null;
+    if (JPEG_SOF_MARKERS.has(marker)) {
+      // Layout: [length 2B][precision 1B][height 2B][width 2B]
+      // i currently points at the length field's first byte.
+      if (i + 6 >= max) return null;
+      const height = (buffer[i + 3] << 8) | buffer[i + 4];
+      const width = (buffer[i + 5] << 8) | buffer[i + 6];
+      if (width <= 0 || height <= 0) return null;
+      return { width, height };
+    }
+    // Skip this segment to its end and continue scanning.
+    i += segLen;
+  }
+  return null;
+}
+// Decode a small prefix of base64 data and dispatch to the right parser based
+// on media_type. Returns { width, height } or null.
+//
+// We decode only the first ~512 bytes — enough for PNG IHDR (always near the
+// start) and the typical JPEG SOF location (most image encoders place the SOF
+// within the first few hundred bytes).
+const HEADER_PROBE_BYTES = 1024;
+export function parseImageDimensions(mediaType, base64Data) {
+  if (!mediaType || !base64Data || typeof base64Data !== "string") return null;
+  // Base64 expands by ~4/3, so to get HEADER_PROBE_BYTES decoded bytes we need
+  // ~HEADER_PROBE_BYTES * 4 / 3 base64 chars. Round up generously.
+  const probeChars = Math.min(base64Data.length, HEADER_PROBE_BYTES * 2);
+  let buffer;
+  try {
+    buffer = Buffer.from(base64Data.slice(0, probeChars), "base64");
+  } catch {
+    return null;
+  }
+  if (!buffer || buffer.length === 0) return null;
+  switch (mediaType.toLowerCase()) {
+    case "image/png":
+      return parsePngDimensions(buffer);
+    case "image/jpeg":
+    case "image/jpg":
+      return parseJpegDimensions(buffer);
+    default:
+      // Unsupported format — fail-open. Caller treats null as "can't measure"
+      // and keeps the image rather than stripping what it can't verify.
+      return null;
+  }
+}