npm - squeezr-ai - Versions diffs - 1.11.4 → 1.13.1 - Mend

squeezr-ai 1.11.4 → 1.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +44 -2
package/bin/squeezr.js +30 -9
package/dist/__tests__/cache.test.js +15 -12
package/dist/__tests__/codexMitm.test.d.ts +1 -0
package/dist/__tests__/codexMitm.test.js +288 -0
package/dist/cache.d.ts +2 -1
package/dist/cache.js +7 -5
package/dist/codexMitm.d.ts +4 -0
package/dist/codexMitm.js +428 -0
package/dist/index.js +6 -29
package/dist/server.js +26 -2
package/dist/version.d.ts +1 -1
package/dist/version.js +1 -1
package/package.json +60 -58

package/README.md CHANGED Viewed

@@ -177,7 +177,7 @@ Squeezr auto-detects which provider each request targets from the auth headers.
 | CLI | Set this env var | Compresses with | Extra keys needed |
 |---|---|---|---|
 | **Claude Code** | `ANTHROPIC_BASE_URL=http://localhost:8080` | Claude Haiku | None |
-| **Codex CLI** | `openai_base_url=http://localhost:8080` | GPT-4o-mini | None |
+| **Codex CLI** | `squeezr setup` (see below) | gpt-5.4-mini (via your Codex sub) | None |
 | **Aider** (OpenAI backend) | `openai_base_url=http://localhost:8080` | GPT-4o-mini | None |
 | **Aider** (Anthropic backend) | `ANTHROPIC_BASE_URL=http://localhost:8080` | Claude Haiku | None |
 | **OpenCode** | `openai_base_url=http://localhost:8080` | GPT-4o-mini | None |
@@ -202,7 +202,11 @@ Then point your CLI at the proxy:
 export ANTHROPIC_BASE_URL=http://localhost:8080        # macOS / Linux
 $env:ANTHROPIC_BASE_URL="http://localhost:8080"        # Windows PowerShell
-# Codex / Aider / OpenCode
+# Codex (uses MITM proxy — see "Codex deep compression" below)
+export HTTPS_PROXY=http://localhost:8081
+export SSL_CERT_FILE=~/.squeezr/mitm-ca/bundle.crt
+# Aider / OpenCode
 export openai_base_url=http://localhost:8080
 # Gemini CLI
@@ -416,6 +420,44 @@ Shows which deterministic patterns fired, how many outputs hit the AI fallback,
 ---
+## Codex deep compression
+Codex CLI talks to `chatgpt.com` over WebSocket, not the standard OpenAI API. This means a regular HTTP proxy can't inspect or modify the traffic. Squeezr solves this with a TLS-terminating MITM proxy on port 8081.
+### How it works
+1. `squeezr setup` generates a local CA and configures `HTTPS_PROXY` + `SSL_CERT_FILE` in your shell
+2. When Codex connects to `chatgpt.com`, Squeezr intercepts the TLS tunnel and generates a per-host certificate signed by the local CA
+3. Squeezr strips `permessage-deflate` from the WebSocket handshake so frames arrive as plain JSON
+4. On every client-to-server WebSocket frame, Squeezr looks for `function_call_output` messages (tool results) exceeding the compression threshold
+5. For each large tool result, Squeezr opens a **separate** WebSocket to `chatgpt.com/backend-api/codex/responses` using the same OAuth token, and asks `gpt-5.4-mini` to summarize it
+6. The compressed output replaces the original in the frame before forwarding to the server
+### Setup
+```bash
+squeezr setup   # auto-configures everything (HTTPS_PROXY, SSL_CERT_FILE, CA)
+```
+Or manually:
+```bash
+export HTTPS_PROXY=http://localhost:8081
+export SSL_CERT_FILE=~/.squeezr/mitm-ca/bundle.crt
+```
+### What it costs
+Nothing extra. The compression calls use `gpt-5.4-mini` through the same ChatGPT WebSocket endpoint that your Codex subscription already covers. No API key required.
+### Results
+In testing, Codex tool results (file reads, command output) are compressed by **80-90%** per turn. A typical file read of 5,000 chars compresses to ~700 chars, saving thousands of tokens across a session.
+For a detailed technical explanation, see [CODEX.md](CODEX.md).
+---
 ## How session-level optimisations work
 ### Session cache + differential compression

package/bin/squeezr.js CHANGED Viewed

@@ -113,12 +113,23 @@ function stopProxy() {
       const match = out.match(/LISTENING\s+(\d+)/)
       pid = match?.[1]
     } else {
-      pid = execSync(`lsof -ti :${port}`, { encoding: 'utf-8', stdio: 'pipe' }).trim()
+      // Use -sTCP:LISTEN to get only the listening process, not connected clients.
+      // lsof may return multiple PIDs without this flag.
+      try {
+        pid = execSync(`lsof -ti :${port} -sTCP:LISTEN`, { encoding: 'utf-8', stdio: 'pipe' }).trim()
+      } catch {
+        // fallback: fuser (available on most Linux/WSL)
+        try {
+          pid = execSync(`fuser ${port}/tcp 2>/dev/null`, { encoding: 'utf-8', stdio: 'pipe' }).trim()
+        } catch {}
+      }
     }
     if (!pid) {
       console.log(`Squeezr is not running on port ${port}`)
       return
     }
+    // Take only the first PID in case multiple are returned
+    pid = pid.split(/\s+/)[0]
     if (process.platform === 'win32') {
       execSync(`taskkill /F /PID ${pid}`, { stdio: 'pipe' })
     } else {
@@ -254,11 +265,16 @@ function setupUnix() {
   // 1. Set env vars + auto-heal guard in shell profile
   const distIndex = path.join(ROOT, 'dist', 'index.js')
   const port = process.env.SQUEEZR_PORT || 8080
+  const mitmPort = Number(port) + 1
+  const bundlePath = path.join(os.homedir(), '.squeezr', 'mitm-ca', 'bundle.crt')
   const shellBlock = [
     `# squeezr env vars`,
     `export ANTHROPIC_BASE_URL=http://localhost:${port}`,
     `export openai_base_url=http://localhost:${port}`,
     `export GEMINI_API_BASE_URL=http://localhost:${port}`,
+    `# squeezr MITM proxy for Codex (TLS interception)`,
+    `export HTTPS_PROXY=http://localhost:${mitmPort}`,
+    `export SSL_CERT_FILE=${bundlePath}`,
     `# squeezr auto-heal: start proxy if not running`,
     `if ! curl -sf http://localhost:${port}/squeezr/health >/dev/null 2>&1; then`,
     `  nohup ${nodeExe} ${distIndex} >> "${os.homedir()}/.squeezr/squeezr.log" 2>&1 &`,
@@ -277,13 +293,14 @@ function setupUnix() {
     fs.appendFileSync(profile, `\n${shellBlock}\n`)
     console.log(`  [ok] Env vars + auto-heal added to ${profile}`)
   } else {
-    if (!existing.includes('squeezr auto-heal')) {
+    if (!existing.includes('SSL_CERT_FILE') || !existing.includes('squeezr MITM')) {
+      // Re-write block to include MITM vars
       const updatedContent = existing.replace(
-        /# squeezr env vars\n(?:export [A-Z_]+=http:\/\/localhost:\d+\n?)*/,
+        /# squeezr env vars[\s\S]*?fi\n/,
         shellBlock + '\n'
       )
       fs.writeFileSync(profile, updatedContent)
-      console.log(`  [ok] Auto-heal guard added to ${profile}`)
+      console.log(`  [ok] Shell profile updated with MITM proxy vars`)
     } else {
       console.log(`  [skip] Env vars + auto-heal already in ${profile}`)
     }
@@ -381,11 +398,15 @@ function setupWSL() {
   //    it in the background. This is the safety net for WSL2 where systemd and
   //    Task Scheduler may both fail.
   const port = process.env.SQUEEZR_PORT || 8080
+  const mitmPort = Number(port) + 1
+  const bundlePath = path.join(os.homedir(), '.squeezr', 'mitm-ca', 'bundle.crt')
   const shellBlock = [
     `# squeezr env vars`,
     `export ANTHROPIC_BASE_URL=http://localhost:${port}`,
     `export openai_base_url=http://localhost:${port}`,
     `export GEMINI_API_BASE_URL=http://localhost:${port}`,
+    `export HTTPS_PROXY=http://localhost:${mitmPort}`,
+    `export SSL_CERT_FILE=${bundlePath}`,
     `# squeezr auto-heal: start proxy if not running`,
     `if ! curl -sf http://localhost:${port}/squeezr/health >/dev/null 2>&1; then`,
     `  nohup ${nodeExe} ${distIndex} >> "${os.homedir()}/.squeezr/squeezr.log" 2>&1 &`,
@@ -404,16 +425,16 @@ function setupWSL() {
     fs.appendFileSync(profile, `\n${shellBlock}\n`)
     console.log(`  [ok] Env vars + auto-heal added to ${profile}`)
   } else {
-    // Update existing block to include auto-heal if missing
-    if (!existing.includes('squeezr auto-heal')) {
+    // Update existing block if missing MITM proxy vars
+    if (!existing.includes('SSL_CERT_FILE') || !existing.includes('HTTPS_PROXY')) {
       const updatedContent = existing.replace(
-        /# squeezr env vars\n(?:export [A-Z_]+=http:\/\/localhost:\d+\n?)*/,
+        /# squeezr env vars[\s\S]*?fi\n/,
         shellBlock + '\n'
       )
       fs.writeFileSync(profile, updatedContent)
-      console.log(`  [ok] Auto-heal guard added to ${profile}`)
+      console.log(`  [ok] Shell profile updated with MITM proxy vars`)
     } else {
-      console.log(`  [skip] Env vars + auto-heal already in ${profile}`)
+      console.log(`  [skip] Env vars already in ${profile}`)
     }
   }

package/dist/__tests__/cache.test.js CHANGED Viewed

@@ -1,10 +1,14 @@
 import { describe, it, expect, beforeEach } from 'vitest';
 import { CompressionCache } from '../cache.js';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+// Use a unique temp path per test run so no disk state bleeds between tests
+const tmpPath = () => join(tmpdir(), `squeezr-cache-test-${Date.now()}-${Math.random().toString(36).slice(2)}.json`);
 describe('CompressionCache', () => {
     let cache;
     beforeEach(() => {
-        // maxEntries=5 for fast LRU testing; file I/O fails silently in test env
-        cache = new CompressionCache(5);
+        // maxEntries=5, isolated temp file — no disk bleed between runs
+        cache = new CompressionCache(5, tmpPath());
     });
     it('returns undefined for a cache miss', () => {
         expect(cache.get('never stored this')).toBeUndefined();
@@ -42,7 +46,6 @@ describe('CompressionCache', () => {
         cache.set('c', '3');
         cache.set('d', '4');
         cache.set('e', '5');
-        // All 5 entries stored
         expect(cache.stats().size).toBe(5);
         // Add one more — oldest ('a') should be evicted
         cache.set('f', '6');
@@ -50,19 +53,19 @@ describe('CompressionCache', () => {
         expect(cache.get('a')).toBeUndefined();
         expect(cache.get('f')).toBe('6');
     });
-    it('reports correct size (relative to initial)', () => {
-        // Use a large maxEntries so LRU eviction doesn't interfere
-        const bigCache = new CompressionCache(1000);
-        const initialSize = bigCache.stats().size;
-        bigCache.set('unique-key-x-' + Date.now(), 'y');
-        expect(bigCache.stats().size).toBe(initialSize + 1);
-        bigCache.set('unique-key-z-' + Date.now(), 'w');
-        expect(bigCache.stats().size).toBe(initialSize + 2);
+    it('reports correct size after additions', () => {
+        expect(cache.stats().size).toBe(0); // fresh isolated cache
+        cache.set('key1', 'val1');
+        expect(cache.stats().size).toBe(1);
+        cache.set('key2', 'val2');
+        expect(cache.stats().size).toBe(2);
     });
-    it('overwrites existing entry', () => {
+    it('overwrites existing entry without growing size', () => {
         cache.set('key', 'first');
+        expect(cache.stats().size).toBe(1);
         cache.set('key', 'second');
         expect(cache.get('key')).toBe('second');
+        expect(cache.stats().size).toBe(1);
     });
     it('different texts produce different cache entries', () => {
         cache.set('text1', 'compressed1');

package/dist/__tests__/codexMitm.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/__tests__/codexMitm.test.js ADDED Viewed

@@ -0,0 +1,288 @@
+import { describe, it, expect } from 'vitest';
+// ── WS frame helpers (inline reimplementation for testing) ────────────────────
+// These mirror the logic in codexMitm.ts without importing it directly
+// (importing would require node-forge CA files to exist)
+function xorMask(data, key) {
+    const out = Buffer.from(data);
+    for (let i = 0; i < out.length; i++)
+        out[i] ^= key[i % 4];
+    return out;
+}
+function buildWsFrame(opcode, payload, masked) {
+    const key = masked ? Buffer.from([0x37, 0xfa, 0x21, 0x3d]) : Buffer.alloc(0);
+    const plen = payload.length;
+    let hlen = 2 + (masked ? 4 : 0);
+    if (plen >= 65536)
+        hlen += 8;
+    else if (plen >= 126)
+        hlen += 2;
+    const frame = Buffer.alloc(hlen + plen);
+    frame[0] = 0x80 | opcode;
+    if (plen >= 126) {
+        frame[1] = (masked ? 0x80 : 0) | 126;
+        frame.writeUInt16BE(plen, 2);
+        if (masked)
+            key.copy(frame, 4);
+    }
+    else {
+        frame[1] = (masked ? 0x80 : 0) | plen;
+        if (masked)
+            key.copy(frame, 2);
+    }
+    const body = masked ? xorMask(payload, key) : payload;
+    body.copy(frame, hlen);
+    return frame;
+}
+function parseWsFrame(buf) {
+    if (buf.length < 2)
+        return null;
+    const opcode = buf[0] & 0x0F;
+    const masked = !!(buf[1] & 0x80);
+    let plen = buf[1] & 0x7F;
+    let hlen = 2;
+    if (plen === 126) {
+        if (buf.length < 4)
+            return null;
+        plen = buf.readUInt16BE(2);
+        hlen = 4;
+    }
+    else if (plen === 127) {
+        if (buf.length < 10)
+            return null;
+        plen = Number(buf.readBigUInt64BE(2));
+        hlen = 10;
+    }
+    const mask = Buffer.alloc(4);
+    if (masked) {
+        if (buf.length < hlen + 4)
+            return null;
+        buf.copy(mask, 0, hlen, hlen + 4);
+        hlen += 4;
+    }
+    if (buf.length < hlen + plen)
+        return null;
+    return { opcode, masked, mask, payload: buf.slice(hlen, hlen + plen), total: hlen + plen };
+}
+// ── WS frame tests ────────────────────────────────────────────────────────────
+describe('WS frame helpers', () => {
+    it('xorMask is its own inverse', () => {
+        const data = Buffer.from('hello world');
+        const key = Buffer.from([0xAB, 0xCD, 0xEF, 0x12]);
+        expect(xorMask(xorMask(data, key), key).toString()).toBe('hello world');
+    });
+    it('builds and parses an unmasked text frame', () => {
+        const payload = Buffer.from('{"type":"ping"}');
+        const frame = buildWsFrame(1, payload, false);
+        const parsed = parseWsFrame(frame);
+        expect(parsed).not.toBeNull();
+        expect(parsed.opcode).toBe(1);
+        expect(parsed.masked).toBe(false);
+        expect(parsed.payload.toString()).toBe('{"type":"ping"}');
+        expect(parsed.total).toBe(frame.length);
+    });
+    it('builds and parses a masked text frame', () => {
+        const payload = Buffer.from('{"type":"response.create"}');
+        const frame = buildWsFrame(1, payload, true);
+        const parsed = parseWsFrame(frame);
+        expect(parsed).not.toBeNull();
+        expect(parsed.opcode).toBe(1);
+        expect(parsed.masked).toBe(true);
+        const plain = xorMask(parsed.payload, parsed.mask);
+        expect(plain.toString()).toBe('{"type":"response.create"}');
+    });
+    it('builds a 126-byte extended length frame', () => {
+        const payload = Buffer.alloc(130, 0x41); // 130 'A' chars
+        const frame = buildWsFrame(2, payload, false);
+        const parsed = parseWsFrame(frame);
+        expect(parsed.opcode).toBe(2);
+        expect(parsed.payload.length).toBe(130);
+    });
+    it('returns null for incomplete frame', () => {
+        const partial = Buffer.from([0x81, 0x05, 0x48]); // says 5-byte payload, only 1 byte
+        expect(parseWsFrame(partial)).toBeNull();
+    });
+    it('handles empty payload', () => {
+        const frame = buildWsFrame(1, Buffer.alloc(0), false);
+        const parsed = parseWsFrame(frame);
+        expect(parsed.payload.length).toBe(0);
+        expect(parsed.total).toBe(2);
+    });
+    it('FIN bit is always set', () => {
+        const frame = buildWsFrame(1, Buffer.from('x'), false);
+        expect(frame[0] & 0x80).toBe(0x80);
+    });
+    it('roundtrip: masked frame → parse → unmask → same payload', () => {
+        const original = Buffer.from(JSON.stringify({ type: 'response.create', model: 'gpt-5.4-mini' }));
+        const frame = buildWsFrame(1, original, true);
+        const parsed = parseWsFrame(frame);
+        const plain = xorMask(parsed.payload, parsed.mask);
+        expect(plain.toString()).toBe(original.toString());
+    });
+});
+// ── Compression threshold logic (unit test, no network) ──────────────────────
+describe('processCodexRequest logic', () => {
+    // Replicate the field detection logic from codexMitm.ts
+    function findToolMessages(input) {
+        return input.flatMap(msg => {
+            const isToolMsg = msg.type === 'function_call_output' || msg.role === 'tool' || msg.role === 'function';
+            if (!isToolMsg)
+                return [];
+            const text = msg.output ?? (typeof msg.content === 'string' ? msg.content : null);
+            if (!text)
+                return [];
+            return [{ text, field: msg.output !== undefined ? 'output' : 'content' }];
+        });
+    }
+    it('detects function_call_output (Responses API format)', () => {
+        const msgs = [
+            { type: 'function_call_output', call_id: 'c1', output: 'file contents here' },
+            { role: 'user', content: 'read the file' },
+        ];
+        const found = findToolMessages(msgs);
+        expect(found).toHaveLength(1);
+        expect(found[0].text).toBe('file contents here');
+        expect(found[0].field).toBe('output');
+    });
+    it('detects role=tool (Chat Completions format)', () => {
+        const msgs = [
+            { role: 'tool', tool_call_id: 't1', content: 'shell output' },
+            { role: 'user', content: 'run ls' },
+        ];
+        const found = findToolMessages(msgs);
+        expect(found).toHaveLength(1);
+        expect(found[0].text).toBe('shell output');
+        expect(found[0].field).toBe('content');
+    });
+    it('detects role=function', () => {
+        const msgs = [{ role: 'function', name: 'bash', content: 'stdout here' }];
+        const found = findToolMessages(msgs);
+        expect(found).toHaveLength(1);
+    });
+    it('ignores non-tool messages', () => {
+        const msgs = [
+            { role: 'user', content: 'hello' },
+            { role: 'assistant', content: 'world' },
+            { type: 'response.create', model: 'gpt-5.4' },
+        ];
+        expect(findToolMessages(msgs)).toHaveLength(0);
+    });
+    it('ignores function_call_output with no output field', () => {
+        const msgs = [{ type: 'function_call_output', call_id: 'c1' }];
+        expect(findToolMessages(msgs)).toHaveLength(0);
+    });
+    it('handles multiple tool messages in one request', () => {
+        const msgs = [
+            { type: 'function_call_output', call_id: 'c1', output: 'first tool' },
+            { type: 'function_call_output', call_id: 'c2', output: 'second tool' },
+            { role: 'user', content: 'question' },
+        ];
+        expect(findToolMessages(msgs)).toHaveLength(2);
+    });
+});
+// ── MITM request format ───────────────────────────────────────────────────────
+describe('Codex compression request format', () => {
+    const COMPRESS_PROMPT = 'Extract ONLY essential info: errors, file paths, function names, test failures, key values, warnings. Very concise, under 150 tokens. No preamble.';
+    function buildCompressMsg(text, model = 'gpt-5.4-mini') {
+        return {
+            type: 'response.create',
+            model,
+            instructions: COMPRESS_PROMPT,
+            input: [{ role: 'user', content: text.slice(0, 4000) }],
+        };
+    }
+    it('has required top-level fields', () => {
+        const msg = buildCompressMsg('some tool output');
+        expect(msg.type).toBe('response.create');
+        expect(msg.model).toBe('gpt-5.4-mini');
+        expect(msg.instructions).toBeTruthy();
+        expect(Array.isArray(msg.input)).toBe(true);
+    });
+    it('instructions are at top level, not nested', () => {
+        const msg = buildCompressMsg('x');
+        expect(msg.instructions).toBeTruthy();
+        expect(msg.response).toBeUndefined();
+    });
+    it('truncates input to 4000 chars', () => {
+        const longText = 'a'.repeat(10_000);
+        const msg = buildCompressMsg(longText);
+        expect(msg.input[0].content.length).toBe(4000);
+    });
+    it('uses gpt-5.4-mini model', () => {
+        expect(buildCompressMsg('x').model).toBe('gpt-5.4-mini');
+    });
+    it('serializes to valid JSON', () => {
+        const msg = buildCompressMsg('tool output content');
+        expect(() => JSON.parse(JSON.stringify(msg))).not.toThrow();
+    });
+    it('wraps properly in a WS frame', () => {
+        const msg = buildCompressMsg('tool output');
+        const payload = Buffer.from(JSON.stringify(msg));
+        const frame = buildWsFrame(1, payload, true); // masked, client→server
+        const parsed = parseWsFrame(frame);
+        const plain = xorMask(parsed.payload, parsed.mask);
+        const decoded = JSON.parse(plain.toString());
+        expect(decoded.type).toBe('response.create');
+        expect(decoded.model).toBe('gpt-5.4-mini');
+    });
+});
+// ── Compression threshold ─────────────────────────────────────────────────────
+describe('compression threshold', () => {
+    const THRESHOLD = 800;
+    it('skips short tool outputs', () => {
+        const text = 'short output';
+        expect(text.length < THRESHOLD).toBe(true);
+    });
+    it('compresses long tool outputs', () => {
+        const text = 'a'.repeat(1000);
+        expect(text.length >= THRESHOLD).toBe(true);
+    });
+    it('only saves if compressed is shorter', () => {
+        const original = 'a'.repeat(1000);
+        const compressed = 'summary';
+        const saved = original.length - compressed.length;
+        expect(saved).toBeGreaterThan(0);
+    });
+    it('falls back to original if compression made it longer', () => {
+        const original = 'short';
+        const compressed = 'this is actually longer than the original text';
+        const shouldApply = compressed.length < original.length;
+        expect(shouldApply).toBe(false);
+    });
+});
+// ── WebSocket upgrade request manipulation ────────────────────────────────────
+describe('upgrade request header stripping', () => {
+    it('strips Sec-WebSocket-Extensions header', () => {
+        const upgrade = [
+            'GET /backend-api/codex/responses HTTP/1.1',
+            'Host: chatgpt.com',
+            'Authorization: Bearer eyJ...',
+            'Upgrade: websocket',
+            'Sec-WebSocket-Extensions: permessage-deflate; client_max_window_bits',
+            'Sec-WebSocket-Key: abc123==',
+            '',
+        ].join('\r\n');
+        const stripped = upgrade.replace(/Sec-WebSocket-Extensions:[^\r\n]*\r\n/gi, '');
+        expect(stripped).not.toContain('Sec-WebSocket-Extensions');
+        expect(stripped).toContain('Upgrade: websocket');
+        expect(stripped).toContain('Authorization: Bearer');
+    });
+    it('extracts Authorization header', () => {
+        const raw = 'GET /backend-api/codex/responses HTTP/1.1\r\nAuthorization: Bearer eyJmoo\r\n\r\n';
+        const match = raw.match(/[Aa]uthorization:\s*(Bearer [^\r\n]+)/);
+        expect(match?.[1]).toBe('Bearer eyJmoo');
+    });
+    it('extracts chatgpt-account-id header', () => {
+        const raw = 'GET / HTTP/1.1\r\nchatgpt-account-id: acc-abc123\r\n\r\n';
+        const match = raw.match(/chatgpt-account-id:\s*([^\r\n]+)/i);
+        expect(match?.[1]).toBe('acc-abc123');
+    });
+    it('detects Codex WS path', () => {
+        const peek = 'get /backend-api/codex/responses http/1.1\r\nupgrade: websocket\r\n';
+        expect(peek.includes('upgrade: websocket')).toBe(true);
+        expect(peek.includes('/backend-api/codex/responses')).toBe(true);
+    });
+    it('does not detect non-Codex WS as Codex', () => {
+        const peek = 'get /chat/stream http/1.1\r\nupgrade: websocket\r\n';
+        expect(peek.includes('/backend-api/codex/responses')).toBe(false);
+    });
+});

package/dist/cache.d.ts CHANGED Viewed

@@ -3,7 +3,8 @@ export declare class CompressionCache {
     private store;
     private hits;
     private misses;
-    constructor(maxEntries: number);
+    private readonly cachePath;
+    constructor(maxEntries: number, cachePath?: string);
     private key;
     get(text: string): string | undefined;
     set(text: string, compressed: string): void;

package/dist/cache.js CHANGED Viewed

@@ -2,14 +2,16 @@ import { createHash } from 'crypto';
 import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
 import { join } from 'path';
 import { homedir } from 'os';
-const CACHE_FILE = join(homedir(), '.squeezr', 'cache.json');
+const DEFAULT_CACHE_PATH = join(homedir(), '.squeezr', 'cache.json');
 export class CompressionCache {
     maxEntries;
     store = new Map();
     hits = 0;
     misses = 0;
-    constructor(maxEntries) {
+    cachePath;
+    constructor(maxEntries, cachePath = DEFAULT_CACHE_PATH) {
         this.maxEntries = maxEntries;
+        this.cachePath = cachePath;
         this.load();
     }
     key(text) {
@@ -44,8 +46,8 @@ export class CompressionCache {
     }
     load() {
         try {
-            if (existsSync(CACHE_FILE)) {
-                const raw = JSON.parse(readFileSync(CACHE_FILE, 'utf-8'));
+            if (existsSync(this.cachePath)) {
+                const raw = JSON.parse(readFileSync(this.cachePath, 'utf-8'));
                 for (const [k, v] of Object.entries(raw)) {
                     this.store.set(k, v);
                 }
@@ -58,7 +60,7 @@ export class CompressionCache {
             const dir = join(homedir(), '.squeezr');
             if (!existsSync(dir))
                 mkdirSync(dir, { recursive: true });
-            writeFileSync(CACHE_FILE, JSON.stringify(Object.fromEntries(this.store)));
+            writeFileSync(this.cachePath, JSON.stringify(Object.fromEntries(this.store)));
         }
         catch { /* ignore */ }
     }

package/dist/codexMitm.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+export declare const BUNDLE_PATH: string;
+export declare const MITM_PORT: number;
+export declare function startMitmProxy(): void;
+export declare function stopMitmProxy(): void;

package/dist/codexMitm.js ADDED Viewed

@@ -0,0 +1,428 @@
+import tls from 'node:tls';
+import http from 'node:http';
+import https from 'node:https';
+import fs from 'node:fs';
+import crypto from 'node:crypto';
+import { homedir } from 'node:os';
+import { join } from 'node:path';
+import forge from 'node-forge';
+import { config } from './config.js';
+// ── CA / cert paths ───────────────────────────────────────────────────────────
+const CA_DIR = join(homedir(), '.squeezr', 'mitm-ca');
+const CA_KEY_PATH = join(CA_DIR, 'ca.key');
+const CA_CERT_PATH = join(CA_DIR, 'ca.crt');
+export const BUNDLE_PATH = join(CA_DIR, 'bundle.crt');
+export const MITM_PORT = (config.port ?? 8080) + 1;
+// ── CA generation ─────────────────────────────────────────────────────────────
+function ensureCA() {
+    if (fs.existsSync(CA_KEY_PATH) && fs.existsSync(CA_CERT_PATH))
+        return;
+    fs.mkdirSync(CA_DIR, { recursive: true, mode: 0o700 });
+    const keys = forge.pki.rsa.generateKeyPair(2048);
+    const cert = forge.pki.createCertificate();
+    cert.publicKey = keys.publicKey;
+    cert.serialNumber = '01';
+    cert.validity.notBefore = new Date();
+    cert.validity.notAfter = new Date();
+    cert.validity.notAfter.setFullYear(cert.validity.notBefore.getFullYear() + 10);
+    const attrs = [{ name: 'commonName', value: 'Squeezr-MITM-CA' }];
+    cert.setSubject(attrs);
+    cert.setIssuer(attrs);
+    cert.setExtensions([
+        { name: 'basicConstraints', cA: true },
+        { name: 'keyUsage', keyCertSign: true, cRLSign: true },
+    ]);
+    cert.sign(keys.privateKey, forge.md.sha256.create());
+    fs.writeFileSync(CA_KEY_PATH, forge.pki.privateKeyToPem(keys.privateKey), { mode: 0o600 });
+    fs.writeFileSync(CA_CERT_PATH, forge.pki.certificateToPem(cert), { mode: 0o644 });
+    const systemCAs = [
+        '/etc/ssl/certs/ca-certificates.crt',
+        '/etc/ssl/cert.pem',
+    ].find(p => fs.existsSync(p));
+    const bundle = forge.pki.certificateToPem(cert) + (systemCAs ? fs.readFileSync(systemCAs, 'utf-8') : '');
+    fs.writeFileSync(BUNDLE_PATH, bundle, { mode: 0o644 });
+    console.log(`[squeezr/mitm] CA generated → ${CA_CERT_PATH}`);
+}
+// ── Per-host cert (cached) ────────────────────────────────────────────────────
+const certCache = new Map();
+function getCert(hostname) {
+    if (certCache.has(hostname))
+        return certCache.get(hostname);
+    const caKey = forge.pki.privateKeyFromPem(fs.readFileSync(CA_KEY_PATH, 'utf-8'));
+    const caCert = forge.pki.certificateFromPem(fs.readFileSync(CA_CERT_PATH, 'utf-8'));
+    const keys = forge.pki.rsa.generateKeyPair(2048);
+    const cert = forge.pki.createCertificate();
+    cert.publicKey = keys.publicKey;
+    cert.serialNumber = crypto.randomBytes(8).toString('hex');
+    cert.validity.notBefore = new Date();
+    cert.validity.notAfter = new Date();
+    cert.validity.notAfter.setFullYear(cert.validity.notBefore.getFullYear() + 1);
+    cert.setSubject([{ name: 'commonName', value: hostname }]);
+    cert.setIssuer(caCert.subject.attributes);
+    cert.setExtensions([{ name: 'subjectAltName', altNames: [{ type: 2, value: hostname }] }]);
+    cert.sign(caKey, forge.md.sha256.create());
+    const result = {
+        key: forge.pki.privateKeyToPem(keys.privateKey),
+        cert: forge.pki.certificateToPem(cert),
+    };
+    certCache.set(hostname, result);
+    return result;
+}
+// ── WebSocket frame helpers ───────────────────────────────────────────────────
+function xorMask(data, key) {
+    const out = Buffer.from(data);
+    for (let i = 0; i < out.length; i++)
+        out[i] ^= key[i % 4];
+    return out;
+}
+function parseWsFrame(buf) {
+    if (buf.length < 2)
+        return null;
+    const opcode = buf[0] & 0x0F;
+    const masked = !!(buf[1] & 0x80);
+    let plen = buf[1] & 0x7F;
+    let hlen = 2;
+    if (plen === 126) {
+        if (buf.length < 4)
+            return null;
+        plen = buf.readUInt16BE(2);
+        hlen = 4;
+    }
+    else if (plen === 127) {
+        if (buf.length < 10)
+            return null;
+        plen = Number(buf.readBigUInt64BE(2));
+        hlen = 10;
+    }
+    const mask = Buffer.alloc(4);
+    if (masked) {
+        if (buf.length < hlen + 4)
+            return null;
+        buf.copy(mask, 0, hlen, hlen + 4);
+        hlen += 4;
+    }
+    if (buf.length < hlen + plen)
+        return null;
+    return { opcode, masked, mask, payload: buf.slice(hlen, hlen + plen), total: hlen + plen };
+}
+function buildWsFrame(opcode, payload, masked) {
+    const key = masked ? crypto.randomBytes(4) : Buffer.alloc(0);
+    const plen = payload.length;
+    let hlen = 2 + (masked ? 4 : 0);
+    if (plen >= 65536)
+        hlen += 8;
+    else if (plen >= 126)
+        hlen += 2;
+    const frame = Buffer.alloc(hlen + plen);
+    frame[0] = 0x80 | opcode;
+    if (plen >= 65536) {
+        frame[1] = (masked ? 0x80 : 0) | 127;
+        frame.writeBigUInt64BE(BigInt(plen), 2);
+        if (masked)
+            key.copy(frame, 10);
+    }
+    else if (plen >= 126) {
+        frame[1] = (masked ? 0x80 : 0) | 126;
+        frame.writeUInt16BE(plen, 2);
+        if (masked)
+            key.copy(frame, 4);
+    }
+    else {
+        frame[1] = (masked ? 0x80 : 0) | plen;
+        if (masked)
+            key.copy(frame, 2);
+    }
+    const body = masked ? xorMask(payload, key) : payload;
+    body.copy(frame, hlen);
+    return frame;
+}
+// ── Compress via separate WS to chatgpt.com ──────────────────────────────────
+const COMPRESS_THRESHOLD = config.threshold ?? 800;
+const COMPRESS_MODEL = 'gpt-5.4-mini';
+const COMPRESS_PROMPT = 'Extract ONLY essential info: errors, file paths, function names, test failures, key values, warnings. Very concise, under 150 tokens. No preamble.';
+function compressViaWs(text, authToken, accountId) {
+    return new Promise((resolve) => {
+        const timeout = setTimeout(() => { resolve(text); socket.destroy(); }, 15_000);
+        const wsKey = crypto.randomBytes(16).toString('base64');
+        const upgradeReq = [
+            'GET /backend-api/codex/responses HTTP/1.1',
+            'Host: chatgpt.com',
+            `Authorization: ${authToken}`,
+            'Upgrade: websocket',
+            'Connection: Upgrade',
+            `Sec-WebSocket-Key: ${wsKey}`,
+            'Sec-WebSocket-Version: 13',
+            'Originator: codex_exec',
+            ...(accountId ? [`chatgpt-account-id: ${accountId}`] : []),
+            '', '',
+        ].join('\r\n');
+        const socket = tls.connect(443, 'chatgpt.com', { servername: 'chatgpt.com' }, () => {
+            socket.write(upgradeReq);
+        });
+        socket.on('error', () => { clearTimeout(timeout); resolve(text); });
+        let gotUpgrade = false;
+        let buf = Buffer.alloc(0);
+        socket.on('data', (chunk) => {
+            buf = Buffer.concat([buf, chunk]);
+            if (!gotUpgrade) {
+                const str = buf.toString('latin1');
+                if (!str.includes('\r\n\r\n'))
+                    return;
+                const headerEnd = str.indexOf('\r\n\r\n');
+                const headers = str.slice(0, headerEnd);
+                if (!headers.startsWith('HTTP/1.1 101')) {
+                    clearTimeout(timeout);
+                    resolve(text);
+                    socket.destroy();
+                    return;
+                }
+                gotUpgrade = true;
+                buf = buf.slice(headerEnd + 4);
+                // Send compression request
+                const msg = JSON.stringify({
+                    type: 'response.create',
+                    model: COMPRESS_MODEL,
+                    instructions: COMPRESS_PROMPT,
+                    input: [{ role: 'user', content: text.slice(0, 4000) }],
+                });
+                socket.write(buildWsFrame(1, Buffer.from(msg), true));
+            }
+            // Parse response frames
+            while (buf.length >= 2) {
+                const f = parseWsFrame(buf);
+                if (!f)
+                    break;
+                buf = buf.slice(f.total);
+                if (f.opcode === 1) {
+                    const payload = f.masked ? xorMask(f.payload, f.mask) : f.payload;
+                    try {
+                        const evt = JSON.parse(payload.toString('utf-8'));
+                        if (evt.type === 'response.output_text.done') {
+                            clearTimeout(timeout);
+                            resolve(evt.text || text);
+                            socket.destroy();
+                            return;
+                        }
+                        if (evt.type === 'response.completed' || evt.type === 'response.done') {
+                            const output = evt.response?.output?.[0]?.content?.[0]?.text ?? '';
+                            clearTimeout(timeout);
+                            resolve(output || text);
+                            socket.destroy();
+                            return;
+                        }
+                    }
+                    catch { }
+                }
+                else if (f.opcode === 8) {
+                    clearTimeout(timeout);
+                    resolve(text);
+                    socket.destroy();
+                    return;
+                }
+            }
+        });
+    });
+}
+// ── Process Codex request: find tool outputs and compress ─────────────────────
+async function processCodexRequest(json, authToken, accountId) {
+    const messages = json.input ?? json.messages ?? [];
+    let saved = 0;
+    for (const msg of messages) {
+        // Responses API: type=function_call_output, output field
+        // Chat Completions API: role=tool/function, content field
+        const isToolMsg = msg.type === 'function_call_output' || msg.role === 'tool' || msg.role === 'function';
+        if (!isToolMsg)
+            continue;
+        const text = msg.output ?? (typeof msg.content === 'string' ? msg.content : null);
+        if (!text || text.length < COMPRESS_THRESHOLD)
+            continue;
+        const compressed = await compressViaWs(text, authToken, accountId);
+        if (compressed.length < text.length) {
+            if (msg.output !== undefined)
+                msg.output = compressed;
+            else
+                msg.content = compressed;
+            saved += text.length - compressed.length;
+        }
+    }
+    return saved;
+}
+// ── CONNECT handler (HTTPS MITM) ─────────────────────────────────────────────
+function handleConnect(req, clientSocket, _head) {
+    const [hostname, portStr] = (req.url ?? '').split(':');
+    const port = parseInt(portStr) || 443;
+    clientSocket.write('HTTP/1.1 200 Connection Established\r\n\r\n');
+    const { key, cert } = getCert(hostname);
+    const clientTls = new tls.TLSSocket(clientSocket, { isServer: true, key, cert });
+    clientTls.on('error', () => { });
+    // Capture chatgpt-account-id from any HTTP request to chatgpt.com
+    let accountId = '';
+    clientTls.once('data', (firstChunk) => {
+        const raw = firstChunk.toString('latin1');
+        const peek = raw.toLowerCase();
+        // Capture account-id header
+        const acctMatch = raw.match(/chatgpt-account-id:\s*([^\r\n]+)/i);
+        if (acctMatch)
+            accountId = acctMatch[1].trim();
+        // ── WebSocket upgrade ─────────────────────────────────────────────────────
+        if (peek.includes('upgrade: websocket')) {
+            const isCodexWs = peek.includes('/backend-api/codex/responses');
+            // Extract auth token
+            const authMatch = raw.match(/[Aa]uthorization:\s*(Bearer [^\r\n]+)/);
+            const authToken = authMatch ? authMatch[1].trim() : '';
+            // Strip permessage-deflate so frames are plain text (avoids context desync)
+            const modified = raw.replace(/Sec-WebSocket-Extensions:[^\r\n]*\r\n/gi, '');
+            const upChunk = Buffer.from(modified, 'latin1');
+            const upSocket = tls.connect(port, hostname, { servername: hostname }, () => {
+                upSocket.write(upChunk);
+            });
+            upSocket.on('error', () => { try {
+                clientTls.destroy();
+            }
+            catch { } });
+            upSocket.once('data', (upgradeResp) => {
+                clientTls.write(upgradeResp);
+                if (!isCodexWs) {
+                    // Non-Codex WS: bidirectional passthrough
+                    upSocket.on('data', (c) => { try {
+                        clientTls.write(c);
+                    }
+                    catch { } });
+                    clientTls.on('data', (c) => { try {
+                        upSocket.write(c);
+                    }
+                    catch { } });
+                    return;
+                }
+                // ── Codex WS: intercept client→server, compress tool results ──────────
+                let clientBuf = Buffer.alloc(0);
+                clientTls.on('data', (chunk) => {
+                    clientBuf = Buffer.concat([clientBuf, chunk]);
+                    const processNext = async () => {
+                        while (clientBuf.length >= 2) {
+                            const frame = parseWsFrame(clientBuf);
+                            if (!frame)
+                                break;
+                            const originalFrame = clientBuf.slice(0, frame.total);
+                            clientBuf = clientBuf.slice(frame.total);
+                            if (frame.opcode === 1) {
+                                const plain = frame.masked ? xorMask(frame.payload, frame.mask) : frame.payload;
+                                try {
+                                    const json = JSON.parse(plain.toString('utf-8'));
+                                    const saved = await processCodexRequest(json, authToken, accountId);
+                                    if (saved > 0) {
+                                        console.log(`[squeezr/mitm] Codex compressed: -${saved} chars via ${COMPRESS_MODEL}`);
+                                        const newFrame = buildWsFrame(frame.opcode, Buffer.from(JSON.stringify(json)), frame.masked);
+                                        try {
+                                            upSocket.write(newFrame);
+                                        }
+                                        catch { }
+                                        continue;
+                                    }
+                                }
+                                catch { }
+                            }
+                            try {
+                                upSocket.write(originalFrame);
+                            }
+                            catch { }
+                        }
+                    };
+                    processNext().catch(() => { });
+                });
+                // Server→client: pass through unmodified
+                upSocket.on('data', (c) => { try {
+                    clientTls.write(c);
+                }
+                catch { } });
+            });
+            clientTls.on('error', () => { try {
+                upSocket.destroy();
+            }
+            catch { } });
+            clientTls.on('close', () => { try {
+                upSocket.destroy();
+            }
+            catch { } });
+            upSocket.on('close', () => { try {
+                clientTls.destroy();
+            }
+            catch { } });
+            return;
+        }
+        // ── Regular HTTP/1.1 (non-WebSocket) ─────────────────────────────────────
+        const fakeServer = new http.Server();
+        fakeServer.emit('connection', clientTls);
+        setImmediate(() => { if (!clientTls.destroyed)
+            clientTls.emit('data', firstChunk); });
+        fakeServer.on('request', (clientReq, clientRes) => {
+            const headers = {};
+            for (const [k, v] of Object.entries(clientReq.headers)) {
+                if (/^[\w\-]+$/.test(k))
+                    headers[k] = Array.isArray(v) ? v.join(', ') : (v ?? '');
+            }
+            headers['host'] = hostname;
+            // Capture account-id from HTTP requests too
+            if (clientReq.headers['chatgpt-account-id'] && !accountId) {
+                accountId = String(clientReq.headers['chatgpt-account-id']);
+            }
+            const upReq = https.request({
+                hostname, port,
+                path: clientReq.url ?? '/',
+                method: clientReq.method ?? 'GET',
+                headers,
+            }, (upRes) => {
+                clientRes.writeHead(upRes.statusCode ?? 200, upRes.headers);
+                upRes.pipe(clientRes);
+            });
+            upReq.on('error', () => { try {
+                clientRes.destroy();
+            }
+            catch { } });
+            clientReq.pipe(upReq);
+        });
+        fakeServer.on('error', () => { try {
+            clientTls.destroy();
+        }
+        catch { } });
+    });
+}
+// ── Plain HTTP handler ────────────────────────────────────────────────────────
+function handleHttp(req, res) {
+    const upReq = http.request({
+        hostname: req.headers.host?.split(':')[0] ?? 'localhost',
+        port: 80,
+        path: req.url,
+        method: req.method,
+        headers: req.headers,
+    }, (upRes) => {
+        res.writeHead(upRes.statusCode ?? 200, upRes.headers);
+        upRes.pipe(res);
+    });
+    upReq.on('error', () => res.writeHead(502).end());
+    req.pipe(upReq);
+}
+// ── Server lifecycle ──────────────────────────────────────────────────────────
+let mitmServer = null;
+export function startMitmProxy() {
+    try {
+        ensureCA();
+    }
+    catch (err) {
+        console.error('[squeezr/mitm] CA generation failed:', err);
+        return;
+    }
+    mitmServer = http.createServer(handleHttp);
+    mitmServer.on('connect', handleConnect);
+    mitmServer.on('error', (err) => {
+        if (err.code !== 'EADDRINUSE')
+            console.error('[squeezr/mitm] error:', err.message);
+    });
+    mitmServer.listen(MITM_PORT, () => {
+        console.log(`[squeezr/mitm] HTTPS proxy on http://localhost:${MITM_PORT}`);
+    });
+}
+export function stopMitmProxy() {
+    mitmServer?.close();
+    mitmServer = null;
+}

package/dist/index.js CHANGED Viewed

@@ -1,10 +1,11 @@
-import tls from 'node:tls';
-import { serve } from '@hono/node-server';
+import { createAdaptorServer } from '@hono/node-server';
 import { app, stats } from './server.js';
 import { config } from './config.js';
 import { VERSION } from './version.js';
+import { startMitmProxy } from './codexMitm.js';
 const PORT = config.port;
-const server = serve({ fetch: app.fetch, port: PORT }, () => {
+const httpServer = createAdaptorServer({ fetch: app.fetch });
+httpServer.listen(PORT, () => {
     console.log(`Squeezr v${VERSION} listening on http://localhost:${PORT}`);
     console.log(`Mode: ${config.dryRun ? 'dry-run' : 'active'}`);
     if (config.disabled)
@@ -12,38 +13,14 @@ const server = serve({ fetch: app.fetch, port: PORT }, () => {
     console.log(`Backends: Anthropic → Haiku | OpenAI → GPT-4o-mini | Gemini → Flash-8B | Local → ${config.localCompressionModel}`);
     console.log(`Stats: http://localhost:${PORT}/squeezr/stats`);
 });
-server.on('upgrade', (req, socket, head) => {
-    if (req.url !== '/responses') {
-        socket.destroy();
-        return;
-    }
-    const targetHost = 'api.openai.com';
-    const targetPath = '/v1/responses';
-    const upstream = tls.connect({ host: targetHost, port: 443, servername: targetHost }, () => {
-        // Rebuild the HTTP upgrade request with the correct host and path
-        const fwdHeaders = Object.entries(req.headers)
-            .filter(([k]) => k.toLowerCase() !== 'host')
-            .map(([k, v]) => `${k}: ${v}`)
-            .join('\r\n');
-        const upgradeReq = `GET ${targetPath} HTTP/1.1\r\nHost: ${targetHost}\r\n${fwdHeaders}\r\n\r\n`;
-        upstream.write(upgradeReq);
-        if (head.length > 0)
-            upstream.write(head);
-        upstream.pipe(socket);
-        socket.pipe(upstream);
-    });
-    upstream.on('error', () => socket.destroy());
-    socket.on('error', () => upstream.destroy());
-});
+// Start MITM proxy for Codex OAuth (chatgpt.com/backend-api)
+startMitmProxy();
 const isDaemon = !!process.env.SQUEEZR_DAEMON;
 if (isDaemon) {
-    // Daemon mode: ignore SIGINT (Ctrl+C) and SIGHUP (terminal close)
-    // Only stop via `squeezr stop` which sends SIGTERM
     process.on('SIGINT', () => { });
     process.on('SIGHUP', () => { });
 }
 else {
-    // Dev mode (npm run dev): allow Ctrl+C to stop
     process.on('SIGINT', () => {
         const s = stats.summary();
         console.log(`\n[squeezr] Session summary: ${s.requests} requests | -${s.total_saved_chars.toLocaleString()} chars (~${s.total_saved_tokens.toLocaleString()} tokens, ${s.savings_pct}% saved)`);

package/dist/server.js CHANGED Viewed

@@ -232,12 +232,24 @@ app.get('/squeezr/expand/:id', (c) => {
         return c.json({ error: 'Not found or expired' }, 404);
     return c.json({ id, content: original });
 });
+// ── OAuth token refresh proxy (Codex: set CODEX_REFRESH_TOKEN_URL_OVERRIDE=http://localhost:PORT/oauth/token) ──
+app.post('/oauth/token', async (c) => {
+    const body = await c.req.arrayBuffer();
+    const resp = await fetch('https://auth.openai.com/oauth/token', {
+        method: 'POST',
+        headers: { 'content-type': c.req.header('content-type') ?? 'application/json' },
+        body,
+    });
+    const data = await resp.arrayBuffer();
+    return c.body(data, resp.status, { 'content-type': 'application/json' });
+});
 // ── Catch-all ─────────────────────────────────────────────────────────────────
 app.all('*', async (c) => {
     const upstream = detectUpstream(c.req.raw.headers);
     const url = new URL(c.req.url);
-    const targetPath = url.pathname === '/responses' ? '/v1/responses' : url.pathname;
-    const targetUrl = `${upstream}${targetPath}${url.search}`;
+    const NEEDS_V1 = new Set(['/models', '/engines', '/files', '/embeddings', '/moderations', '/completions', '/edits']);
+    const pathname = NEEDS_V1.has(url.pathname) ? `/v1${url.pathname}` : url.pathname;
+    const targetUrl = `${upstream}${pathname}${url.search}`;
     const body = await c.req.arrayBuffer();
     const fwdHeaders = forwardHeaders(c.req.raw.headers);
     const resp = await fetch(targetUrl, {
@@ -250,5 +262,17 @@ app.all('*', async (c) => {
         if (!SKIP_RESP_HEADERS.has(k.toLowerCase()))
             respHeaders[k] = v;
     }
+    const contentType = resp.headers.get('content-type') ?? '';
+    if (contentType.includes('text/event-stream')) {
+        return stream(c, async (s) => {
+            const reader = resp.body.getReader();
+            while (true) {
+                const { done, value } = await reader.read();
+                if (done)
+                    break;
+                await s.write(value);
+            }
+        });
+    }
     return c.body(await resp.arrayBuffer(), resp.status, respHeaders);
 });

package/dist/version.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export declare const VERSION = "1.11.4";
1	+ export declare const VERSION = "1.13.1";

package/dist/version.js CHANGED Viewed

	@@ -1 +1 @@
1	- export const VERSION = '1.11.4';
1	+ export const VERSION = '1.13.1';

package/package.json CHANGED Viewed

@@ -1,58 +1,60 @@
-{
-  "name": "squeezr-ai",
-  "version": "1.11.4",
-  "description": "AI proxy that compresses Claude Code, Codex, Aider, Gemini CLI and Ollama context windows to save thousands of tokens per session",
-  "keywords": [
-    "claude",
-    "claude-code",
-    "codex",
-    "ollama",
-    "aider",
-    "gemini",
-    "token",
-    "compression",
-    "proxy",
-    "llm",
-    "ai"
-  ],
-  "license": "MIT",
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/sergioramosv/Squeezr.git"
-  },
-  "homepage": "https://github.com/sergioramosv/Squeezr#readme",
-  "type": "module",
-  "bin": {
-    "squeezr": "bin/squeezr.js"
-  },
-  "scripts": {
-    "build": "tsc",
-    "dev": "tsx src/index.ts",
-    "start": "node dist/index.js",
-    "gain": "node dist/gain.js",
-    "discover": "node dist/discover.js",
-    "test": "vitest run",
-    "test:watch": "vitest"
-  },
-  "files": [
-    "bin/",
-    "dist/",
-    "squeezr.toml"
-  ],
-  "dependencies": {
-    "@anthropic-ai/sdk": "^0.39.0",
-    "@hono/node-server": "^1.13.7",
-    "hono": "^4.7.5",
-    "openai": "^4.93.0",
-    "smol-toml": "^1.3.1"
-  },
-  "devDependencies": {
-    "@types/node": "^22.14.0",
-    "tsx": "^4.19.3",
-    "typescript": "^5.8.3",
-    "vitest": "^3.1.1"
-  },
-  "engines": {
-    "node": ">=18"
-  }
-}
+{
+  "name": "squeezr-ai",
+  "version": "1.13.1",
+  "description": "AI proxy that compresses Claude Code, Codex, Aider, Gemini CLI and Ollama context windows to save thousands of tokens per session",
+  "keywords": [
+    "claude",
+    "claude-code",
+    "codex",
+    "ollama",
+    "aider",
+    "gemini",
+    "token",
+    "compression",
+    "proxy",
+    "llm",
+    "ai"
+  ],
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/sergioramosv/Squeezr.git"
+  },
+  "homepage": "https://github.com/sergioramosv/Squeezr#readme",
+  "type": "module",
+  "bin": {
+    "squeezr": "bin/squeezr.js"
+  },
+  "scripts": {
+    "build": "tsc",
+    "dev": "tsx src/index.ts",
+    "start": "node dist/index.js",
+    "gain": "node dist/gain.js",
+    "discover": "node dist/discover.js",
+    "test": "vitest run",
+    "test:watch": "vitest"
+  },
+  "files": [
+    "bin/",
+    "dist/",
+    "squeezr.toml"
+  ],
+  "dependencies": {
+    "@anthropic-ai/sdk": "^0.39.0",
+    "@hono/node-server": "^1.13.7",
+    "hono": "^4.7.5",
+    "node-forge": "^1.4.0",
+    "openai": "^4.93.0",
+    "smol-toml": "^1.3.1"
+  },
+  "devDependencies": {
+    "@types/node": "^22.14.0",
+    "@types/node-forge": "^1.3.14",
+    "tsx": "^4.19.3",
+    "typescript": "^5.8.3",
+    "vitest": "^3.1.1"
+  },
+  "engines": {
+    "node": ">=18"
+  }
+}