squeezr-ai 1.23.0 → 1.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -302,7 +302,7 @@ If the output says `a foreign service is` listening on the port, you have three
|
|
|
302
302
|
2. **Stop the offending service**: `docker ps` to find what owns 8080, then `docker stop <id>`.
|
|
303
303
|
3. **Inspect runtime info**: `cat ~/.squeezr/runtime.json` shows the *actual* port Squeezr is bound to. If it differs from your `ANTHROPIC_BASE_URL`, run `squeezr setup` to refresh your shell profile.
|
|
304
304
|
|
|
305
|
-
Squeezr v1.
|
|
305
|
+
Squeezr v1.24.0+ runs a self-test on every startup that detects this exact failure mode and prints actionable hints. You can re-run it any time with:
|
|
306
306
|
|
|
307
307
|
```bash
|
|
308
308
|
curl -s "http://localhost:$(jq -r .port ~/.squeezr/runtime.json)/squeezr/selftest?run=1" | jq
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests that anthropic-ratelimit-* headers from upstream are forwarded back to
|
|
3
|
+
* the client in streaming responses (issue #4).
|
|
4
|
+
*
|
|
5
|
+
* Claude Code reads these headers to populate rate_limits in the statusline JSON.
|
|
6
|
+
* Previously the proxy consumed them internally but never relayed them.
|
|
7
|
+
*/
|
|
8
|
+
export {};
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests that anthropic-ratelimit-* headers from upstream are forwarded back to
|
|
3
|
+
* the client in streaming responses (issue #4).
|
|
4
|
+
*
|
|
5
|
+
* Claude Code reads these headers to populate rate_limits in the statusline JSON.
|
|
6
|
+
* Previously the proxy consumed them internally but never relayed them.
|
|
7
|
+
*/
|
|
8
|
+
import { describe, it, expect, vi, afterEach } from 'vitest';
|
|
9
|
+
import { createServer } from 'node:http';
|
|
10
|
+
// ── Helper: start a minimal mock Anthropic server ─────────────────────────────
|
|
11
|
+
const RATE_LIMIT_HEADERS = {
|
|
12
|
+
'anthropic-ratelimit-requests-limit': '50',
|
|
13
|
+
'anthropic-ratelimit-requests-remaining': '49',
|
|
14
|
+
'anthropic-ratelimit-requests-reset': '2026-01-01T00:00:00Z',
|
|
15
|
+
'anthropic-ratelimit-tokens-limit': '100000',
|
|
16
|
+
'anthropic-ratelimit-tokens-remaining': '99000',
|
|
17
|
+
'anthropic-ratelimit-tokens-reset': '2026-01-01T00:01:00Z',
|
|
18
|
+
};
|
|
19
|
+
/** Start an upstream mock that returns rate-limit headers + a minimal SSE stream. */
|
|
20
|
+
function startMockAnthropic() {
|
|
21
|
+
return new Promise((resolve) => {
|
|
22
|
+
const server = createServer((req, res) => {
|
|
23
|
+
if (req.url === '/v1/messages' && req.method === 'POST') {
|
|
24
|
+
// Consume request body so the socket doesn't stall
|
|
25
|
+
req.resume();
|
|
26
|
+
req.on('end', () => {
|
|
27
|
+
const headers = {
|
|
28
|
+
'content-type': 'text/event-stream',
|
|
29
|
+
'cache-control': 'no-cache',
|
|
30
|
+
...RATE_LIMIT_HEADERS,
|
|
31
|
+
};
|
|
32
|
+
res.writeHead(200, headers);
|
|
33
|
+
// Minimal well-formed SSE stream
|
|
34
|
+
res.write('data: {"type":"message_start","message":{"id":"msg_1","type":"message","role":"assistant","content":[],"model":"claude-opus-4-5","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0}}}\n\n');
|
|
35
|
+
res.write('data: {"type":"message_stop"}\n\n');
|
|
36
|
+
res.end();
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
res.writeHead(404);
|
|
41
|
+
res.end();
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
server.listen(0, '127.0.0.1', () => {
|
|
45
|
+
resolve({ port: server.address().port, server });
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
// ── Tests ─────────────────────────────────────────────────────────────────────
|
|
50
|
+
describe('rate-limit header forwarding (issue #4)', () => {
|
|
51
|
+
const servers = [];
|
|
52
|
+
afterEach(async () => {
|
|
53
|
+
while (servers.length) {
|
|
54
|
+
const s = servers.pop();
|
|
55
|
+
await new Promise((r) => s.close(() => r()));
|
|
56
|
+
}
|
|
57
|
+
vi.unstubAllEnvs();
|
|
58
|
+
});
|
|
59
|
+
it('forwards anthropic-ratelimit-* headers on streaming /v1/messages responses', async () => {
|
|
60
|
+
const { port: upstreamPort, server: upstream } = await startMockAnthropic();
|
|
61
|
+
servers.push(upstream);
|
|
62
|
+
// Dynamically import server after env is set so ANTHROPIC_API points at mock.
|
|
63
|
+
// We monkey-patch the module-level constant by intercepting fetch.
|
|
64
|
+
const upstreamBase = `http://127.0.0.1:${upstreamPort}`;
|
|
65
|
+
// Stub global fetch to redirect api.anthropic.com calls to our mock
|
|
66
|
+
const realFetch = global.fetch;
|
|
67
|
+
vi.stubGlobal('fetch', async (input, init) => {
|
|
68
|
+
const url = typeof input === 'string' ? input : input instanceof URL ? input.href : input.url;
|
|
69
|
+
const redirected = url.replace('https://api.anthropic.com', upstreamBase);
|
|
70
|
+
return realFetch(redirected, init);
|
|
71
|
+
});
|
|
72
|
+
// Import app after stubbing fetch
|
|
73
|
+
const { app } = await import('../server.js');
|
|
74
|
+
const request = new Request('http://localhost/v1/messages', {
|
|
75
|
+
method: 'POST',
|
|
76
|
+
headers: {
|
|
77
|
+
'content-type': 'application/json',
|
|
78
|
+
'x-api-key': 'sk-ant-test-key',
|
|
79
|
+
},
|
|
80
|
+
body: JSON.stringify({
|
|
81
|
+
model: 'claude-opus-4-5',
|
|
82
|
+
max_tokens: 100,
|
|
83
|
+
stream: true,
|
|
84
|
+
messages: [{ role: 'user', content: 'hi' }],
|
|
85
|
+
}),
|
|
86
|
+
});
|
|
87
|
+
const response = await app.fetch(request);
|
|
88
|
+
expect(response.status).toBe(200);
|
|
89
|
+
// Consume body so connection closes cleanly
|
|
90
|
+
await response.text();
|
|
91
|
+
// The key assertion: rate-limit headers must be present
|
|
92
|
+
for (const [name, expected] of Object.entries(RATE_LIMIT_HEADERS)) {
|
|
93
|
+
expect(response.headers.get(name), `Expected header "${name}" to be forwarded`).toBe(expected);
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
});
|
package/dist/server.js
CHANGED
|
@@ -183,6 +183,10 @@ app.post('/v1/messages', async (c) => {
|
|
|
183
183
|
if (body.stream) {
|
|
184
184
|
const upstream = await proxyStream(`${ANTHROPIC_API}/v1/messages`, body, fwdHeaders);
|
|
185
185
|
updateAnthropicFromHeaders(upstream.headers);
|
|
186
|
+
for (const [k, v] of upstream.headers.entries()) {
|
|
187
|
+
if (!SKIP_RESP_HEADERS.has(k.toLowerCase()))
|
|
188
|
+
c.header(k, v);
|
|
189
|
+
}
|
|
186
190
|
return stream(c, async (s) => {
|
|
187
191
|
const reader = upstream.body.getReader();
|
|
188
192
|
const decoder = new TextDecoder();
|
|
@@ -246,6 +250,12 @@ app.post('/v1/messages', async (c) => {
|
|
|
246
250
|
const upstream = await proxyStream(`${ANTHROPIC_API}/v1/messages`, body, fwdHeaders);
|
|
247
251
|
// Extract rate limit headers immediately (available before body starts)
|
|
248
252
|
updateAnthropicFromHeaders(upstream.headers);
|
|
253
|
+
// Forward anthropic-ratelimit-* (and other response) headers so Claude Code
|
|
254
|
+
// can populate rate_limits in the statusline JSON (issue #4).
|
|
255
|
+
for (const [k, v] of upstream.headers.entries()) {
|
|
256
|
+
if (!SKIP_RESP_HEADERS.has(k.toLowerCase()))
|
|
257
|
+
c.header(k, v);
|
|
258
|
+
}
|
|
249
259
|
return stream(c, async (s) => {
|
|
250
260
|
const reader = upstream.body.getReader();
|
|
251
261
|
const decoder = new TextDecoder();
|
|
@@ -290,8 +300,14 @@ app.post('/v1/messages', async (c) => {
|
|
|
290
300
|
headers: { ...fwdHeaders, 'content-type': 'application/json' },
|
|
291
301
|
body: JSON.stringify(body),
|
|
292
302
|
});
|
|
303
|
+
updateAnthropicFromHeaders(continuedResp.headers);
|
|
293
304
|
const continuedBody = await continuedResp.json();
|
|
294
|
-
|
|
305
|
+
const continuedHeaders = {};
|
|
306
|
+
for (const [k, v] of continuedResp.headers.entries()) {
|
|
307
|
+
if (!SKIP_RESP_HEADERS.has(k.toLowerCase()))
|
|
308
|
+
continuedHeaders[k] = v;
|
|
309
|
+
}
|
|
310
|
+
return c.json(continuedBody, continuedResp.status, continuedHeaders);
|
|
295
311
|
}
|
|
296
312
|
const respHeaders = {};
|
|
297
313
|
for (const [k, v] of resp.headers.entries()) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "squeezr-ai",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.24.0",
|
|
4
4
|
"description": "AI proxy that compresses Claude Code, Codex, Aider, Gemini CLI and Ollama context windows to save thousands of tokens per session",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"claude",
|
|
@@ -58,7 +58,7 @@
|
|
|
58
58
|
"@vitest/coverage-v8": "^4.1.2",
|
|
59
59
|
"tsx": "^4.19.3",
|
|
60
60
|
"typescript": "^5.8.3",
|
|
61
|
-
"vitest": "^
|
|
61
|
+
"vitest": "^4.1.2"
|
|
62
62
|
},
|
|
63
63
|
"engines": {
|
|
64
64
|
"node": ">=18"
|