squeezr-ai 1.23.0 → 1.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -302,7 +302,7 @@ If the output says `a foreign service is` listening on the port, you have three
302
302
  2. **Stop the offending service**: `docker ps` to find what owns 8080, then `docker stop <id>`.
303
303
  3. **Inspect runtime info**: `cat ~/.squeezr/runtime.json` shows the *actual* port Squeezr is bound to. If it differs from your `ANTHROPIC_BASE_URL`, run `squeezr setup` to refresh your shell profile.
304
304
 
305
- Squeezr v1.23.0+ runs a self-test on every startup that detects this exact failure mode and prints actionable hints. You can re-run it any time with:
305
+ Squeezr v1.24.0+ runs a self-test on every startup that detects this exact failure mode and prints actionable hints. You can re-run it any time with:
306
306
 
307
307
  ```bash
308
308
  curl -s "http://localhost:$(jq -r .port ~/.squeezr/runtime.json)/squeezr/selftest?run=1" | jq
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Tests that anthropic-ratelimit-* headers from upstream are forwarded back to
3
+ * the client in streaming responses (issue #4).
4
+ *
5
+ * Claude Code reads these headers to populate rate_limits in the statusline JSON.
6
+ * Previously the proxy consumed them internally but never relayed them.
7
+ */
8
+ export {};
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Tests that anthropic-ratelimit-* headers from upstream are forwarded back to
3
+ * the client in streaming responses (issue #4).
4
+ *
5
+ * Claude Code reads these headers to populate rate_limits in the statusline JSON.
6
+ * Previously the proxy consumed them internally but never relayed them.
7
+ */
8
+ import { describe, it, expect, vi, afterEach } from 'vitest';
9
+ import { createServer } from 'node:http';
10
+ // ── Helper: start a minimal mock Anthropic server ─────────────────────────────
11
+ const RATE_LIMIT_HEADERS = {
12
+ 'anthropic-ratelimit-requests-limit': '50',
13
+ 'anthropic-ratelimit-requests-remaining': '49',
14
+ 'anthropic-ratelimit-requests-reset': '2026-01-01T00:00:00Z',
15
+ 'anthropic-ratelimit-tokens-limit': '100000',
16
+ 'anthropic-ratelimit-tokens-remaining': '99000',
17
+ 'anthropic-ratelimit-tokens-reset': '2026-01-01T00:01:00Z',
18
+ };
19
+ /** Start an upstream mock that returns rate-limit headers + a minimal SSE stream. */
20
+ function startMockAnthropic() {
21
+ return new Promise((resolve) => {
22
+ const server = createServer((req, res) => {
23
+ if (req.url === '/v1/messages' && req.method === 'POST') {
24
+ // Consume request body so the socket doesn't stall
25
+ req.resume();
26
+ req.on('end', () => {
27
+ const headers = {
28
+ 'content-type': 'text/event-stream',
29
+ 'cache-control': 'no-cache',
30
+ ...RATE_LIMIT_HEADERS,
31
+ };
32
+ res.writeHead(200, headers);
33
+ // Minimal well-formed SSE stream
34
+ res.write('data: {"type":"message_start","message":{"id":"msg_1","type":"message","role":"assistant","content":[],"model":"claude-opus-4-5","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0}}}\n\n');
35
+ res.write('data: {"type":"message_stop"}\n\n');
36
+ res.end();
37
+ });
38
+ }
39
+ else {
40
+ res.writeHead(404);
41
+ res.end();
42
+ }
43
+ });
44
+ server.listen(0, '127.0.0.1', () => {
45
+ resolve({ port: server.address().port, server });
46
+ });
47
+ });
48
+ }
49
+ // ── Tests ─────────────────────────────────────────────────────────────────────
50
+ describe('rate-limit header forwarding (issue #4)', () => {
51
+ const servers = [];
52
+ afterEach(async () => {
53
+ while (servers.length) {
54
+ const s = servers.pop();
55
+ await new Promise((r) => s.close(() => r()));
56
+ }
57
+ vi.unstubAllEnvs();
58
+ });
59
+ it('forwards anthropic-ratelimit-* headers on streaming /v1/messages responses', async () => {
60
+ const { port: upstreamPort, server: upstream } = await startMockAnthropic();
61
+ servers.push(upstream);
62
+ // Dynamically import server after env is set so ANTHROPIC_API points at mock.
63
+ // We monkey-patch the module-level constant by intercepting fetch.
64
+ const upstreamBase = `http://127.0.0.1:${upstreamPort}`;
65
+ // Stub global fetch to redirect api.anthropic.com calls to our mock
66
+ const realFetch = global.fetch;
67
+ vi.stubGlobal('fetch', async (input, init) => {
68
+ const url = typeof input === 'string' ? input : input instanceof URL ? input.href : input.url;
69
+ const redirected = url.replace('https://api.anthropic.com', upstreamBase);
70
+ return realFetch(redirected, init);
71
+ });
72
+ // Import app after stubbing fetch
73
+ const { app } = await import('../server.js');
74
+ const request = new Request('http://localhost/v1/messages', {
75
+ method: 'POST',
76
+ headers: {
77
+ 'content-type': 'application/json',
78
+ 'x-api-key': 'sk-ant-test-key',
79
+ },
80
+ body: JSON.stringify({
81
+ model: 'claude-opus-4-5',
82
+ max_tokens: 100,
83
+ stream: true,
84
+ messages: [{ role: 'user', content: 'hi' }],
85
+ }),
86
+ });
87
+ const response = await app.fetch(request);
88
+ expect(response.status).toBe(200);
89
+ // Consume body so connection closes cleanly
90
+ await response.text();
91
+ // The key assertion: rate-limit headers must be present
92
+ for (const [name, expected] of Object.entries(RATE_LIMIT_HEADERS)) {
93
+ expect(response.headers.get(name), `Expected header "${name}" to be forwarded`).toBe(expected);
94
+ }
95
+ });
96
+ });
package/dist/server.js CHANGED
@@ -183,6 +183,10 @@ app.post('/v1/messages', async (c) => {
183
183
  if (body.stream) {
184
184
  const upstream = await proxyStream(`${ANTHROPIC_API}/v1/messages`, body, fwdHeaders);
185
185
  updateAnthropicFromHeaders(upstream.headers);
186
+ for (const [k, v] of upstream.headers.entries()) {
187
+ if (!SKIP_RESP_HEADERS.has(k.toLowerCase()))
188
+ c.header(k, v);
189
+ }
186
190
  return stream(c, async (s) => {
187
191
  const reader = upstream.body.getReader();
188
192
  const decoder = new TextDecoder();
@@ -246,6 +250,12 @@ app.post('/v1/messages', async (c) => {
246
250
  const upstream = await proxyStream(`${ANTHROPIC_API}/v1/messages`, body, fwdHeaders);
247
251
  // Extract rate limit headers immediately (available before body starts)
248
252
  updateAnthropicFromHeaders(upstream.headers);
253
+ // Forward anthropic-ratelimit-* (and other response) headers so Claude Code
254
+ // can populate rate_limits in the statusline JSON (issue #4).
255
+ for (const [k, v] of upstream.headers.entries()) {
256
+ if (!SKIP_RESP_HEADERS.has(k.toLowerCase()))
257
+ c.header(k, v);
258
+ }
249
259
  return stream(c, async (s) => {
250
260
  const reader = upstream.body.getReader();
251
261
  const decoder = new TextDecoder();
@@ -290,8 +300,14 @@ app.post('/v1/messages', async (c) => {
290
300
  headers: { ...fwdHeaders, 'content-type': 'application/json' },
291
301
  body: JSON.stringify(body),
292
302
  });
303
+ updateAnthropicFromHeaders(continuedResp.headers);
293
304
  const continuedBody = await continuedResp.json();
294
- return c.json(continuedBody, continuedResp.status);
305
+ const continuedHeaders = {};
306
+ for (const [k, v] of continuedResp.headers.entries()) {
307
+ if (!SKIP_RESP_HEADERS.has(k.toLowerCase()))
308
+ continuedHeaders[k] = v;
309
+ }
310
+ return c.json(continuedBody, continuedResp.status, continuedHeaders);
295
311
  }
296
312
  const respHeaders = {};
297
313
  for (const [k, v] of resp.headers.entries()) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squeezr-ai",
3
- "version": "1.23.0",
3
+ "version": "1.24.0",
4
4
  "description": "AI proxy that compresses Claude Code, Codex, Aider, Gemini CLI and Ollama context windows to save thousands of tokens per session",
5
5
  "keywords": [
6
6
  "claude",
@@ -58,7 +58,7 @@
58
58
  "@vitest/coverage-v8": "^4.1.2",
59
59
  "tsx": "^4.19.3",
60
60
  "typescript": "^5.8.3",
61
- "vitest": "^3.1.1"
61
+ "vitest": "^4.1.2"
62
62
  },
63
63
  "engines": {
64
64
  "node": ">=18"