maxpool 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +314 -0
- package/package.json +41 -0
- package/src/account-config.js +30 -0
- package/src/account-manager.js +1729 -0
- package/src/config.js +162 -0
- package/src/index.js +1007 -0
- package/src/oauth.js +391 -0
- package/src/prober.js +82 -0
- package/src/restart-controller.js +58 -0
- package/src/server.js +1425 -0
- package/src/tui.js +958 -0
package/src/server.js
ADDED
|
@@ -0,0 +1,1425 @@
|
|
|
1
|
+
import http from 'node:http';
|
|
2
|
+
import { writeFile, mkdir } from 'node:fs/promises';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
const HOP_BY_HOP_HEADERS = new Set([
|
|
7
|
+
'host', 'connection', 'keep-alive', 'transfer-encoding',
|
|
8
|
+
'te', 'trailer', 'upgrade', 'proxy-authorization', 'proxy-authenticate',
|
|
9
|
+
]);
|
|
10
|
+
const MAXPOOL_HEADER_PREFIX = 'x-maxpool-';
|
|
11
|
+
|
|
12
|
+
const DEFAULT_RETRY = {
|
|
13
|
+
maxAttemptsPerRequest: 0,
|
|
14
|
+
maxRetryBufferBytes: 10 * 1024 * 1024,
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
const DEFAULT_QUEUE = {
|
|
18
|
+
enabled: true,
|
|
19
|
+
maxWaitMs: 24 * 60 * 60 * 1000,
|
|
20
|
+
autoMaxWaitMs: null,
|
|
21
|
+
capacityMaxWaitMs: 15 * 60 * 1000,
|
|
22
|
+
weeklyMaxWaitMs: 0,
|
|
23
|
+
pollMs: 1000,
|
|
24
|
+
heartbeatMs: 10_000,
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
export function createProxyServer(accountManager, config, hooks = {}) {
|
|
28
|
+
const upstream = config.upstream || 'https://api.anthropic.com';
|
|
29
|
+
const proxyApiKey = config.proxy?.apiKey;
|
|
30
|
+
const logDir = config.logDir || null;
|
|
31
|
+
let requestCounter = 0;
|
|
32
|
+
|
|
33
|
+
if (logDir) {
|
|
34
|
+
mkdir(logDir, { recursive: true }).catch(() => {});
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const server = http.createServer(async (req, res) => {
|
|
38
|
+
try {
|
|
39
|
+
// Auth check — skip for localhost connections
|
|
40
|
+
const clientKey = req.headers['x-api-key'];
|
|
41
|
+
const remoteAddr = req.socket.remoteAddress;
|
|
42
|
+
const isLocal = remoteAddr === '127.0.0.1' || remoteAddr === '::1' || remoteAddr === '::ffff:127.0.0.1';
|
|
43
|
+
|
|
44
|
+
// Status exposes account names and quota state, so require the local
|
|
45
|
+
// proxy key even for loopback callers.
|
|
46
|
+
if (req.method === 'GET' && req.url === '/maxpool/status') {
|
|
47
|
+
if (proxyApiKey && clientKey !== proxyApiKey) {
|
|
48
|
+
res.writeHead(401, { 'Content-Type': 'application/json' });
|
|
49
|
+
res.end(JSON.stringify({
|
|
50
|
+
type: 'error',
|
|
51
|
+
error: { type: 'authentication_error', message: 'Invalid proxy API key' },
|
|
52
|
+
}));
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
56
|
+
res.end(JSON.stringify(accountManager.getStatus(), null, 2));
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (proxyApiKey && clientKey !== proxyApiKey && !isLocal) {
|
|
61
|
+
res.writeHead(401, { 'Content-Type': 'application/json' });
|
|
62
|
+
res.end(JSON.stringify({
|
|
63
|
+
type: 'error',
|
|
64
|
+
error: { type: 'authentication_error', message: 'Invalid proxy API key' },
|
|
65
|
+
}));
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Let client token refresh requests pass through to upstream untouched.
|
|
70
|
+
// The proxy manages its own tokens via ensureTokenFresh(); intercepting
|
|
71
|
+
// or rewriting client refreshes would cause token rotation conflicts.
|
|
72
|
+
if (req.method === 'POST' && req.url === '/v1/oauth/token') {
|
|
73
|
+
const reqId = ++requestCounter;
|
|
74
|
+
const ctx = { account: '(oauth relay)', status: null };
|
|
75
|
+
const accepted = hooks.onRequestStart?.(reqId, { method: req.method, path: req.url });
|
|
76
|
+
if (accepted === false) {
|
|
77
|
+
res.writeHead(503, {
|
|
78
|
+
'Content-Type': 'application/json',
|
|
79
|
+
'retry-after': '1',
|
|
80
|
+
Connection: 'close',
|
|
81
|
+
});
|
|
82
|
+
res.end(JSON.stringify({
|
|
83
|
+
type: 'error',
|
|
84
|
+
error: {
|
|
85
|
+
type: 'restart_in_progress',
|
|
86
|
+
message: 'Maxpool is restarting. Retry immediately.',
|
|
87
|
+
},
|
|
88
|
+
}));
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
hooks.onRequestRouted?.(reqId, { account: ctx.account });
|
|
92
|
+
try {
|
|
93
|
+
await relayRaw(req, res, upstream);
|
|
94
|
+
ctx.status = res.statusCode;
|
|
95
|
+
} finally {
|
|
96
|
+
hooks.onRequestEnd?.(reqId, {
|
|
97
|
+
method: req.method,
|
|
98
|
+
path: req.url,
|
|
99
|
+
account: ctx.account,
|
|
100
|
+
status: ctx.status,
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Track request
|
|
107
|
+
const reqId = ++requestCounter;
|
|
108
|
+
const accepted = hooks.onRequestStart?.(reqId, { method: req.method, path: req.url });
|
|
109
|
+
if (accepted === false) {
|
|
110
|
+
res.writeHead(503, {
|
|
111
|
+
'Content-Type': 'application/json',
|
|
112
|
+
'retry-after': '1',
|
|
113
|
+
Connection: 'close',
|
|
114
|
+
});
|
|
115
|
+
res.end(JSON.stringify({
|
|
116
|
+
type: 'error',
|
|
117
|
+
error: {
|
|
118
|
+
type: 'restart_in_progress',
|
|
119
|
+
message: 'Maxpool is restarting. Retry immediately.',
|
|
120
|
+
},
|
|
121
|
+
}));
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Buffer request body (needed for retry on 429)
|
|
126
|
+
const bodyChunks = [];
|
|
127
|
+
for await (const chunk of req) {
|
|
128
|
+
bodyChunks.push(chunk);
|
|
129
|
+
}
|
|
130
|
+
const body = Buffer.concat(bodyChunks);
|
|
131
|
+
const retryConfig = { ...DEFAULT_RETRY, ...(config.retry || {}) };
|
|
132
|
+
const queueConfig = { ...DEFAULT_QUEUE, ...(config.queue || {}) };
|
|
133
|
+
const canRetryBufferedBody = body.length <= retryConfig.maxRetryBufferBytes;
|
|
134
|
+
const requestInfo = describeRequest(req, body);
|
|
135
|
+
const maxQueuedBodyBytes = queueConfig.maxQueuedBodyBytes == null
|
|
136
|
+
? Infinity
|
|
137
|
+
: Math.max(0, Number(queueConfig.maxQueuedBodyBytes) || 0);
|
|
138
|
+
const canQueueBufferedBody = body.length <= maxQueuedBodyBytes;
|
|
139
|
+
if (!canQueueBufferedBody) {
|
|
140
|
+
requestInfo.queueBlockedReason = `request body ${body.length} bytes exceeds queue.maxQueuedBodyBytes ${maxQueuedBodyBytes}`;
|
|
141
|
+
}
|
|
142
|
+
requestInfo.profile = getMaxpoolProfile(req.headers);
|
|
143
|
+
requestInfo.sessionKey = headerValue(req.headers, 'x-maxpool-session');
|
|
144
|
+
if (requestInfo.requiresAnthropicThinkingIntegrity && requestInfo.profile === 'all') {
|
|
145
|
+
console.log('[Maxpool] Anthropic thinking detected; provider fallback disabled for this session/request');
|
|
146
|
+
}
|
|
147
|
+
prepareRuntimeProviders(accountManager, req.headers);
|
|
148
|
+
|
|
149
|
+
const ctx = { account: null, status: null };
|
|
150
|
+
try {
|
|
151
|
+
await forwardRequest(
|
|
152
|
+
req, res, body, accountManager, upstream, 0, hooks, reqId, ctx, logDir,
|
|
153
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, new Set(),
|
|
154
|
+
);
|
|
155
|
+
} catch (err) {
|
|
156
|
+
ctx.status = ctx.status || 502;
|
|
157
|
+
console.error('[Maxpool] Unhandled error:', err);
|
|
158
|
+
sendErrorResponse(res, requestInfo, 502, {
|
|
159
|
+
type: 'error',
|
|
160
|
+
error: { type: 'proxy_error', message: 'Internal proxy error' },
|
|
161
|
+
});
|
|
162
|
+
} finally {
|
|
163
|
+
hooks.onRequestEnd?.(reqId, {
|
|
164
|
+
method: req.method, path: req.url,
|
|
165
|
+
account: ctx.account, status: ctx.status,
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
} catch (err) {
|
|
169
|
+
console.error('[Maxpool] Unhandled error:', err);
|
|
170
|
+
}
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
return server;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Relay a request to upstream with no header rewriting — pure passthrough.
|
|
178
|
+
*/
|
|
179
|
+
async function relayRaw(req, res, upstream) {
|
|
180
|
+
const bodyChunks = [];
|
|
181
|
+
for await (const chunk of req) bodyChunks.push(chunk);
|
|
182
|
+
const body = Buffer.concat(bodyChunks);
|
|
183
|
+
|
|
184
|
+
try {
|
|
185
|
+
const upstreamRes = await fetch(`${upstream}${req.url}`, {
|
|
186
|
+
method: req.method,
|
|
187
|
+
headers: {
|
|
188
|
+
'content-type': req.headers['content-type'] || 'application/json',
|
|
189
|
+
'accept': req.headers['accept'] || 'application/json',
|
|
190
|
+
'user-agent': req.headers['user-agent'] || 'node',
|
|
191
|
+
},
|
|
192
|
+
body: body.length > 0 ? body : undefined,
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
const responseBody = await upstreamRes.text();
|
|
196
|
+
const responseHeaders = {};
|
|
197
|
+
for (const [key, value] of upstreamRes.headers.entries()) {
|
|
198
|
+
if (key === 'transfer-encoding' || key === 'connection') continue;
|
|
199
|
+
responseHeaders[key] = value;
|
|
200
|
+
}
|
|
201
|
+
res.writeHead(upstreamRes.status, responseHeaders);
|
|
202
|
+
res.end(responseBody);
|
|
203
|
+
} catch (err) {
|
|
204
|
+
console.error('[Maxpool] Raw relay error:', err.message);
|
|
205
|
+
if (!res.headersSent) {
|
|
206
|
+
res.writeHead(502, { 'Content-Type': 'application/json' });
|
|
207
|
+
res.end(JSON.stringify({ type: 'error', error: { type: 'proxy_error', message: 'Upstream unreachable' } }));
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
function logTimestamp() {
|
|
214
|
+
const d = new Date();
|
|
215
|
+
const pad = (n, w = 2) => String(n).padStart(w, '0');
|
|
216
|
+
return `${d.getFullYear()}${pad(d.getMonth() + 1)}${pad(d.getDate())}_${pad(d.getHours())}${pad(d.getMinutes())}${pad(d.getSeconds())}.${pad(d.getMilliseconds(), 3)}`;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
async function writeRequestLog(logDir, reqId, sections) {
|
|
220
|
+
if (!logDir) return;
|
|
221
|
+
const ts = logTimestamp();
|
|
222
|
+
const filename = `${ts}_${String(reqId).padStart(5, '0')}.log`;
|
|
223
|
+
try {
|
|
224
|
+
await writeFile(join(logDir, filename), sections.join('\n\n'), 'utf-8');
|
|
225
|
+
} catch (err) {
|
|
226
|
+
console.error(`[Maxpool] Failed to write log: ${err.message}`);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function formatHeaders(headers) {
|
|
231
|
+
if (headers.entries) {
|
|
232
|
+
return [...headers.entries()].map(([k, v]) => ` ${k}: ${v}`).join('\n');
|
|
233
|
+
}
|
|
234
|
+
return Object.entries(headers).map(([k, v]) => ` ${k}: ${v}`).join('\n');
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
async function forwardRequest(
|
|
238
|
+
req, res, body, accountManager, upstream, retryCount, hooks, reqId, ctx, logDir,
|
|
239
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, excludedIndexes,
|
|
240
|
+
) {
|
|
241
|
+
const configuredAttempts = Number(retryConfig.maxAttemptsPerRequest) || accountManager.accounts.length;
|
|
242
|
+
const maxAttempts = Math.max(1, configuredAttempts);
|
|
243
|
+
|
|
244
|
+
// Select account
|
|
245
|
+
const lease = accountManager.acquireAccount(requestInfo, excludedIndexes);
|
|
246
|
+
const account = lease?.account;
|
|
247
|
+
if (!account) {
|
|
248
|
+
const queued = await queueAndRetry(
|
|
249
|
+
'no eligible account/provider currently available',
|
|
250
|
+
req, res, body, accountManager, upstream, hooks, reqId, ctx, logDir,
|
|
251
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, 'quota',
|
|
252
|
+
);
|
|
253
|
+
if (queued) return;
|
|
254
|
+
|
|
255
|
+
ctx.status = 429;
|
|
256
|
+
ctx.account = '(none available)';
|
|
257
|
+
const retryPlan = accountManager.nextRetryForRequest?.(requestInfo, new Set()) || {};
|
|
258
|
+
const willRecoverSoon = Number.isFinite(retryPlan.retryAfterMs);
|
|
259
|
+
const retryAfter = willRecoverSoon ? Math.max(1, Math.ceil(retryPlan.retryAfterMs / 1000)) : 60;
|
|
260
|
+
// Surface the routing decision (logs go to the TUI; this is the only record
|
|
261
|
+
// of WHY a request was rejected rather than queued).
|
|
262
|
+
console.log(`[Maxpool] No route for request — returning 429 (cause: ${retryPlan.cause || 'unavailable'}, recovers-soon: ${willRecoverSoon})`);
|
|
263
|
+
sendErrorResponse(res, requestInfo, 429, {
|
|
264
|
+
type: 'error',
|
|
265
|
+
error: {
|
|
266
|
+
type: 'rate_limit_error',
|
|
267
|
+
message: unavailableMessage(accountManager, requestInfo, retryAfter, willRecoverSoon),
|
|
268
|
+
},
|
|
269
|
+
}, { 'retry-after': String(retryAfter) });
|
|
270
|
+
return;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Track which account handles this request
|
|
274
|
+
ctx.account = account.name;
|
|
275
|
+
hooks.onRequestRouted?.(reqId, { account: account.name });
|
|
276
|
+
|
|
277
|
+
// Refresh OAuth token if needed
|
|
278
|
+
const tokenReady = await accountManager.ensureTokenFresh(account.index);
|
|
279
|
+
if (!tokenReady) {
|
|
280
|
+
accountManager.releaseAccount(lease);
|
|
281
|
+
excludedIndexes.add(account.index);
|
|
282
|
+
if (
|
|
283
|
+
retryCount + 1 < maxAttempts &&
|
|
284
|
+
hasEligibleRoute(accountManager, requestInfo, excludedIndexes)
|
|
285
|
+
) {
|
|
286
|
+
return forwardRequest(
|
|
287
|
+
req, res, body, accountManager, upstream, retryCount + 1, hooks, reqId, ctx, logDir,
|
|
288
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, excludedIndexes,
|
|
289
|
+
);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
const queued = await queueAndRetry(
|
|
293
|
+
`OAuth token refresh unavailable for "${account.name}"`,
|
|
294
|
+
req, res, body, accountManager, upstream, hooks, reqId, ctx, logDir,
|
|
295
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, 'quota',
|
|
296
|
+
);
|
|
297
|
+
if (queued) return;
|
|
298
|
+
|
|
299
|
+
ctx.status = 401;
|
|
300
|
+
sendErrorResponse(res, requestInfo, 401, {
|
|
301
|
+
type: 'error',
|
|
302
|
+
error: {
|
|
303
|
+
type: 'authentication_error',
|
|
304
|
+
message: `Claude account "${account.name}" could not refresh its OAuth token. Run maxpool accounts -v or log in again.`,
|
|
305
|
+
},
|
|
306
|
+
});
|
|
307
|
+
return;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// Build upstream request headers
|
|
311
|
+
const isOAuth = account.type === 'oauth';
|
|
312
|
+
const headers = {};
|
|
313
|
+
for (const [key, value] of Object.entries(req.headers)) {
|
|
314
|
+
const lk = key.toLowerCase();
|
|
315
|
+
if (HOP_BY_HOP_HEADERS.has(lk)) continue;
|
|
316
|
+
if (lk.startsWith(MAXPOOL_HEADER_PREFIX)) continue;
|
|
317
|
+
if (lk === 'x-api-key') continue;
|
|
318
|
+
if (lk === 'content-length') continue;
|
|
319
|
+
if (account.stripBetaHeaders && lk === 'anthropic-beta') continue;
|
|
320
|
+
// Strip accept-encoding: Node fetch auto-decompresses, which would
|
|
321
|
+
// mismatch the Content-Encoding header we forward to the client
|
|
322
|
+
if (lk === 'accept-encoding') continue;
|
|
323
|
+
headers[key] = value;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
if (account.authHeader === 'authorization' || account.type === 'provider' || isOAuth) {
|
|
327
|
+
headers['authorization'] = `Bearer ${account.credential}`;
|
|
328
|
+
delete headers['x-api-key'];
|
|
329
|
+
} else if (account.authHeader === 'x-api-key' || !isOAuth) {
|
|
330
|
+
headers['x-api-key'] = account.credential;
|
|
331
|
+
delete headers['authorization'];
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
const upstreamUrl = `${account.upstream || upstream}${req.url}`;
|
|
335
|
+
const method = req.method;
|
|
336
|
+
const upstreamBody = rewriteBodyForAccount(body, account);
|
|
337
|
+
|
|
338
|
+
// Build log sections
|
|
339
|
+
const logSections = [];
|
|
340
|
+
if (logDir) {
|
|
341
|
+
const safeHeaders = { ...headers };
|
|
342
|
+
// Mask credentials in logs
|
|
343
|
+
if (safeHeaders['x-api-key']) {
|
|
344
|
+
safeHeaders['x-api-key'] = safeHeaders['x-api-key'].slice(0, 15) + '...';
|
|
345
|
+
}
|
|
346
|
+
if (safeHeaders['authorization']) {
|
|
347
|
+
safeHeaders['authorization'] = safeHeaders['authorization'].slice(0, 20) + '...';
|
|
348
|
+
}
|
|
349
|
+
logSections.push(
|
|
350
|
+
`=== REQUEST (account: ${account.name}, retry: ${retryCount}) ===\n${method} ${upstreamUrl}\n${formatHeaders(safeHeaders)}`,
|
|
351
|
+
);
|
|
352
|
+
if (body.length > 0) {
|
|
353
|
+
try {
|
|
354
|
+
logSections.push(`=== REQUEST BODY ===\n${JSON.stringify(JSON.parse(body.toString()), null, 2)}`);
|
|
355
|
+
} catch {
|
|
356
|
+
logSections.push(`=== REQUEST BODY (${body.length} bytes) ===\n${body.toString().slice(0, 4096)}`);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
try {
|
|
362
|
+
const upstreamRes = await fetch(upstreamUrl, {
|
|
363
|
+
method,
|
|
364
|
+
headers,
|
|
365
|
+
body: ['GET', 'HEAD'].includes(method) ? undefined : upstreamBody,
|
|
366
|
+
redirect: 'manual',
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
// Extract rate limit headers
|
|
370
|
+
const rateLimitHeaders = {};
|
|
371
|
+
for (const [key, value] of upstreamRes.headers.entries()) {
|
|
372
|
+
rateLimitHeaders[key] = value;
|
|
373
|
+
}
|
|
374
|
+
accountManager.updateQuota(account.index, rateLimitHeaders);
|
|
375
|
+
|
|
376
|
+
// Retry/failover can only happen before response bytes are sent. Once a
|
|
377
|
+
// streaming response starts, rerouting would corrupt Claude Code's stream.
|
|
378
|
+
if (upstreamRes.status === 429) {
|
|
379
|
+
const errorBody = await readErrorBody(upstreamRes);
|
|
380
|
+
const retryAfter = parseRetryAfter(upstreamRes.headers.get('retry-after'))
|
|
381
|
+
|| parseProviderRetryAfter(errorBody, account.provider);
|
|
382
|
+
const rateLimit = classifyRateLimit(account, rateLimitHeaders, errorBody);
|
|
383
|
+
if (rateLimit.scope === 'upstream') {
|
|
384
|
+
const parsedError = parseJsonError(errorBody);
|
|
385
|
+
const fingerprint = `429:${rateLimit.fingerprint || overloadFingerprint(errorBody, body)}`;
|
|
386
|
+
const incident = recordRequestIncident(requestInfo, fingerprint, account.index, retryAfter);
|
|
387
|
+
accountManager.markProvisionalUpstreamFailure(account.index, 429, fingerprint, retryAfter);
|
|
388
|
+
accountManager.releaseAccount(lease, {
|
|
389
|
+
status: 429,
|
|
390
|
+
error: 'upstream_throttled',
|
|
391
|
+
neutral: true,
|
|
392
|
+
});
|
|
393
|
+
excludedIndexes.add(account.index);
|
|
394
|
+
|
|
395
|
+
if (logDir) {
|
|
396
|
+
logSections.push(`=== RESPONSE 429 — "${account.name}" server-throttled ${retryAfter}s ===\n${formatHeaders(upstreamRes.headers)}`);
|
|
397
|
+
if (errorBody) logSections.push(`=== ERROR BODY ===\n${errorBody}`);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
if (
|
|
401
|
+
canRetryBufferedBody
|
|
402
|
+
&& retryCount + 1 < maxAttempts
|
|
403
|
+
&& !res.headersSent
|
|
404
|
+
&& hasEligibleRoute(accountManager, requestInfo, excludedIndexes)
|
|
405
|
+
) {
|
|
406
|
+
return forwardRequest(
|
|
407
|
+
req, res, body, accountManager, upstream, retryCount + 1, hooks, reqId, ctx, logDir,
|
|
408
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, excludedIndexes,
|
|
409
|
+
);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
if (accountManager.shouldPromoteUpstreamFailure(incident, requestInfo)) {
|
|
413
|
+
accountManager.clearProvisionalUpstreamFailures(fingerprint, incident.accounts);
|
|
414
|
+
accountManager.markUpstreamThrottled(
|
|
415
|
+
incident.retryAfter,
|
|
416
|
+
parsedError?.message || parsedError?.type || 'matching_request_wide_429s',
|
|
417
|
+
);
|
|
418
|
+
console.log('[Maxpool] Every eligible Claude account returned the same server-side 429; opening shared Anthropic throttle');
|
|
419
|
+
|
|
420
|
+
const queued = await queueAndRetry(
|
|
421
|
+
'Anthropic upstream is temporarily limiting requests',
|
|
422
|
+
req, res, body, accountManager, upstream, hooks, reqId, ctx, logDir,
|
|
423
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, 'upstream_throttle',
|
|
424
|
+
);
|
|
425
|
+
if (queued) return;
|
|
426
|
+
|
|
427
|
+
ctx.status = 429;
|
|
428
|
+
sendErrorResponse(res, requestInfo, 429, {
|
|
429
|
+
type: 'error',
|
|
430
|
+
error: {
|
|
431
|
+
type: 'rate_limit_error',
|
|
432
|
+
message: 'Anthropic is temporarily limiting requests. Maxpool will retry automatically when capacity returns.',
|
|
433
|
+
},
|
|
434
|
+
}, { 'retry-after': String(computeRetryAfter(accountManager, requestInfo)) });
|
|
435
|
+
return;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
const queued = await queueAndRetry(
|
|
439
|
+
`all routes failed after server-side 429 from "${account.name}"`,
|
|
440
|
+
req, res, body, accountManager, upstream, hooks, reqId, ctx, logDir,
|
|
441
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, 'capacity',
|
|
442
|
+
);
|
|
443
|
+
if (queued) return;
|
|
444
|
+
|
|
445
|
+
ctx.status = 429;
|
|
446
|
+
sendErrorBody(res, requestInfo, 429, errorBody, upstreamRes.headers);
|
|
447
|
+
return;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
const promotedAmbiguous = rateLimit.scope === 'unknown'
|
|
451
|
+
&& accountManager.noteAmbiguousRateLimit(account.index, rateLimit.fingerprint, retryAfter);
|
|
452
|
+
if (promotedAmbiguous) {
|
|
453
|
+
const parsedError = parseJsonError(errorBody);
|
|
454
|
+
accountManager.markUpstreamThrottled(
|
|
455
|
+
retryAfter,
|
|
456
|
+
parsedError?.message || parsedError?.type || 'matching_ambiguous_429s',
|
|
457
|
+
);
|
|
458
|
+
accountManager.releaseAccount(lease, {
|
|
459
|
+
status: 429,
|
|
460
|
+
error: 'upstream_throttled',
|
|
461
|
+
upstreamThrottled: true,
|
|
462
|
+
neutral: true,
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
const queued = await queueAndRetry(
|
|
466
|
+
'Anthropic upstream is temporarily limiting requests',
|
|
467
|
+
req, res, body, accountManager, upstream, hooks, reqId, ctx, logDir,
|
|
468
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, 'upstream_throttle',
|
|
469
|
+
);
|
|
470
|
+
if (queued) return;
|
|
471
|
+
|
|
472
|
+
ctx.status = 429;
|
|
473
|
+
sendErrorBody(res, requestInfo, 429, errorBody, upstreamRes.headers);
|
|
474
|
+
return;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
accountManager.markRateLimited(account.index, retryAfter, {
|
|
478
|
+
status: 429,
|
|
479
|
+
recordFailure: false,
|
|
480
|
+
fingerprint: rateLimit.scope === 'unknown' ? rateLimit.fingerprint : null,
|
|
481
|
+
});
|
|
482
|
+
accountManager.releaseAccount(lease, { status: 429, error: 'rate_limited' });
|
|
483
|
+
|
|
484
|
+
if (logDir) {
|
|
485
|
+
logSections.push(`=== RESPONSE 429 — "${account.name}" rate-limited ${retryAfter}s ===\n${formatHeaders(upstreamRes.headers)}`);
|
|
486
|
+
if (errorBody) logSections.push(`=== ERROR BODY ===\n${errorBody}`);
|
|
487
|
+
}
|
|
488
|
+
console.log(`[Maxpool] 429 on "${account.name}" — failing over before first byte`);
|
|
489
|
+
excludedIndexes.add(account.index);
|
|
490
|
+
|
|
491
|
+
if (
|
|
492
|
+
canRetryBufferedBody &&
|
|
493
|
+
retryCount + 1 < maxAttempts &&
|
|
494
|
+
!res.headersSent &&
|
|
495
|
+
hasEligibleRoute(accountManager, requestInfo, excludedIndexes)
|
|
496
|
+
) {
|
|
497
|
+
return forwardRequest(
|
|
498
|
+
req, res, body, accountManager, upstream, retryCount + 1, hooks, reqId, ctx, logDir,
|
|
499
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, excludedIndexes,
|
|
500
|
+
);
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
const queued = await queueAndRetry(
|
|
504
|
+
`all routes failed after 429 from "${account.name}"`,
|
|
505
|
+
req, res, body, accountManager, upstream, hooks, reqId, ctx, logDir,
|
|
506
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canRetryBufferedBody, 'quota',
|
|
507
|
+
);
|
|
508
|
+
if (queued) return;
|
|
509
|
+
|
|
510
|
+
ctx.status = 429;
|
|
511
|
+
if (logDir) writeRequestLog(logDir, reqId, logSections);
|
|
512
|
+
const retryPlan = accountManager.nextRetryForRequest?.(requestInfo, new Set()) || {};
|
|
513
|
+
const willRecoverSoon = Number.isFinite(retryPlan.retryAfterMs);
|
|
514
|
+
const clientRetryAfter = willRecoverSoon ? Math.max(1, Math.ceil(retryPlan.retryAfterMs / 1000)) : 60;
|
|
515
|
+
console.log(`[Maxpool] No route after failover — returning 429 (cause: ${retryPlan.cause || 'unavailable'}, recovers-soon: ${willRecoverSoon})`);
|
|
516
|
+
sendErrorResponse(res, requestInfo, 429, {
|
|
517
|
+
type: 'error',
|
|
518
|
+
error: {
|
|
519
|
+
type: 'rate_limit_error',
|
|
520
|
+
message: unavailableMessage(accountManager, requestInfo, clientRetryAfter, willRecoverSoon),
|
|
521
|
+
},
|
|
522
|
+
}, { 'retry-after': String(clientRetryAfter) });
|
|
523
|
+
return;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
if (account.type === 'provider' && isProviderAuthStatus(upstreamRes.status)) {
|
|
527
|
+
const errorBody = await readErrorBody(upstreamRes);
|
|
528
|
+
const reason = upstreamRes.status === 401 ? 'auth_failed' : 'forbidden';
|
|
529
|
+
accountManager.markAuthFailed(account.index, upstreamRes.status, reason);
|
|
530
|
+
accountManager.releaseAccount(lease, { status: upstreamRes.status, error: reason });
|
|
531
|
+
excludedIndexes.add(account.index);
|
|
532
|
+
|
|
533
|
+
if (logDir) {
|
|
534
|
+
logSections.push(`=== RESPONSE ${upstreamRes.status} — "${account.name}" disabled (${reason}), failing over ===\n${formatHeaders(upstreamRes.headers)}`);
|
|
535
|
+
if (errorBody) logSections.push(`=== ERROR BODY ===\n${errorBody}`);
|
|
536
|
+
}
|
|
537
|
+
console.log(`[Maxpool] ${upstreamRes.status} on provider "${account.name}" — disabled and failing over before first byte`);
|
|
538
|
+
|
|
539
|
+
if (
|
|
540
|
+
canRetryBufferedBody &&
|
|
541
|
+
retryCount + 1 < maxAttempts &&
|
|
542
|
+
!res.headersSent &&
|
|
543
|
+
hasEligibleRoute(accountManager, requestInfo, excludedIndexes)
|
|
544
|
+
) {
|
|
545
|
+
return forwardRequest(
|
|
546
|
+
req, res, body, accountManager, upstream, retryCount + 1, hooks, reqId, ctx, logDir,
|
|
547
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, excludedIndexes,
|
|
548
|
+
);
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
ctx.status = 502;
|
|
552
|
+
if (logDir) writeRequestLog(logDir, reqId, logSections);
|
|
553
|
+
sendErrorResponse(res, requestInfo, 502, {
|
|
554
|
+
type: 'error',
|
|
555
|
+
error: {
|
|
556
|
+
type: 'provider_auth_error',
|
|
557
|
+
message: `Fallback provider "${account.name}" returned HTTP ${upstreamRes.status}. Check its token, base URL, and model config.`,
|
|
558
|
+
},
|
|
559
|
+
});
|
|
560
|
+
return;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
if (upstreamRes.status === 529 && account.type !== 'provider') {
|
|
564
|
+
const errorBody = await readErrorBody(upstreamRes);
|
|
565
|
+
const retryAfter = parseRetryAfter(upstreamRes.headers.get('retry-after')) || 30;
|
|
566
|
+
const fingerprint = overloadFingerprint(errorBody, body);
|
|
567
|
+
const incident = recordRequestIncident(requestInfo, fingerprint, account.index, retryAfter);
|
|
568
|
+
|
|
569
|
+
if (logDir) {
|
|
570
|
+
logSections.push(`=== RESPONSE 529 — "${account.name}" overloaded ${retryAfter}s ===\n${formatHeaders(upstreamRes.headers)}`);
|
|
571
|
+
if (errorBody) logSections.push(`=== ERROR BODY ===\n${errorBody}`);
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
accountManager.markProvisionalUpstreamFailure(account.index, 529, fingerprint, retryAfter);
|
|
575
|
+
accountManager.releaseAccount(lease, {
|
|
576
|
+
status: 529,
|
|
577
|
+
error: 'upstream_overloaded',
|
|
578
|
+
neutral: true,
|
|
579
|
+
});
|
|
580
|
+
excludedIndexes.add(account.index);
|
|
581
|
+
|
|
582
|
+
if (
|
|
583
|
+
canRetryBufferedBody
|
|
584
|
+
&& retryCount + 1 < maxAttempts
|
|
585
|
+
&& !res.headersSent
|
|
586
|
+
&& hasEligibleRoute(accountManager, requestInfo, excludedIndexes)
|
|
587
|
+
) {
|
|
588
|
+
return forwardRequest(
|
|
589
|
+
req, res, body, accountManager, upstream, retryCount + 1, hooks, reqId, ctx, logDir,
|
|
590
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, excludedIndexes,
|
|
591
|
+
);
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
if (accountManager.shouldPromoteUpstreamFailure(incident, requestInfo)) {
|
|
595
|
+
accountManager.clearProvisionalUpstreamFailures(fingerprint, incident.accounts);
|
|
596
|
+
accountManager.markUpstreamThrottled(incident.retryAfter, 'matching_request_wide_529s');
|
|
597
|
+
console.log('[Maxpool] Every eligible Claude account returned the same 529; opening shared Anthropic throttle');
|
|
598
|
+
|
|
599
|
+
const queued = await queueAndRetry(
|
|
600
|
+
'Anthropic upstream is overloaded',
|
|
601
|
+
req, res, body, accountManager, upstream, hooks, reqId, ctx, logDir,
|
|
602
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, 'upstream_throttle',
|
|
603
|
+
);
|
|
604
|
+
if (queued) return;
|
|
605
|
+
|
|
606
|
+
ctx.status = 529;
|
|
607
|
+
sendErrorResponse(res, requestInfo, 529, {
|
|
608
|
+
type: 'error',
|
|
609
|
+
error: {
|
|
610
|
+
type: 'overloaded_error',
|
|
611
|
+
message: 'Anthropic is temporarily overloaded. Maxpool will retry automatically when capacity returns.',
|
|
612
|
+
},
|
|
613
|
+
});
|
|
614
|
+
return;
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
const queued = await queueAndRetry(
|
|
618
|
+
`all routes failed after HTTP 529 from "${account.name}"`,
|
|
619
|
+
req, res, body, accountManager, upstream, hooks, reqId, ctx, logDir,
|
|
620
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, 'capacity',
|
|
621
|
+
);
|
|
622
|
+
if (queued) return;
|
|
623
|
+
|
|
624
|
+
ctx.status = 529;
|
|
625
|
+
sendErrorBody(res, requestInfo, 529, errorBody, upstreamRes.headers);
|
|
626
|
+
return;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
if (isRetriableUpstreamStatus(upstreamRes.status)) {
|
|
630
|
+
await upstreamRes.body?.cancel();
|
|
631
|
+
accountManager.markTransientFailure(account.index, `HTTP ${upstreamRes.status}`);
|
|
632
|
+
accountManager.releaseAccount(lease);
|
|
633
|
+
excludedIndexes.add(account.index);
|
|
634
|
+
|
|
635
|
+
if (logDir) {
|
|
636
|
+
logSections.push(`=== RESPONSE ${upstreamRes.status} — "${account.name}" cooling down, failing over ===\n${formatHeaders(upstreamRes.headers)}`);
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
if (canRetryBufferedBody && retryCount + 1 < maxAttempts && !res.headersSent) {
|
|
640
|
+
return forwardRequest(
|
|
641
|
+
req, res, body, accountManager, upstream, retryCount + 1, hooks, reqId, ctx, logDir,
|
|
642
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, excludedIndexes,
|
|
643
|
+
);
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
const queued = await queueAndRetry(
|
|
647
|
+
`all routes failed after ${upstreamRes.status} from "${account.name}"`,
|
|
648
|
+
req, res, body, accountManager, upstream, hooks, reqId, ctx, logDir,
|
|
649
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canRetryBufferedBody, 'capacity',
|
|
650
|
+
);
|
|
651
|
+
if (queued) return;
|
|
652
|
+
|
|
653
|
+
ctx.status = upstreamRes.status;
|
|
654
|
+
sendErrorResponse(res, requestInfo, upstreamRes.status, {
|
|
655
|
+
type: 'error',
|
|
656
|
+
error: { type: 'overloaded_error', message: `Upstream returned ${upstreamRes.status}` },
|
|
657
|
+
});
|
|
658
|
+
return;
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
if (upstreamRes.status >= 400 && upstreamRes.status < 500) {
|
|
662
|
+
const errorBody = await readErrorBody(upstreamRes);
|
|
663
|
+
const errorType = errorBody.includes('Invalid `signature` in `thinking` block')
|
|
664
|
+
? 'invalid_thinking_signature'
|
|
665
|
+
: `HTTP ${upstreamRes.status}`;
|
|
666
|
+
accountManager.releaseAccount(lease, { status: upstreamRes.status, error: errorType });
|
|
667
|
+
|
|
668
|
+
if (logDir) {
|
|
669
|
+
logSections.push(`=== RESPONSE ${upstreamRes.status} — non-retryable client error from "${account.name}" ===\n${formatHeaders(upstreamRes.headers)}`);
|
|
670
|
+
if (errorBody) logSections.push(`=== ERROR BODY ===\n${errorBody}`);
|
|
671
|
+
writeRequestLog(logDir, reqId, logSections);
|
|
672
|
+
}
|
|
673
|
+
if (errorType === 'invalid_thinking_signature') {
|
|
674
|
+
console.log(`[Maxpool] Non-retryable Anthropic thinking signature error on "${account.name}"`);
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
ctx.status = upstreamRes.status;
|
|
678
|
+
sendErrorBody(res, requestInfo, upstreamRes.status, errorBody, upstreamRes.headers);
|
|
679
|
+
return;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
if (upstreamRes.status < 400) {
|
|
683
|
+
accountManager.confirmUpstreamProbe?.(lease);
|
|
684
|
+
accountManager.markUpstreamAccepted?.(account.index);
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
// Log response headers
|
|
688
|
+
if (logDir) {
|
|
689
|
+
logSections.push(`=== RESPONSE ${upstreamRes.status} ===\n${formatHeaders(upstreamRes.headers)}`);
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
ctx.status = upstreamRes.status;
|
|
693
|
+
|
|
694
|
+
// Build response headers (skip hop-by-hop and encoding headers)
|
|
695
|
+
const responseHeaders = {};
|
|
696
|
+
for (const [key, value] of upstreamRes.headers.entries()) {
|
|
697
|
+
if (key === 'transfer-encoding' || key === 'connection') continue;
|
|
698
|
+
// Strip content-encoding/content-length since fetch may auto-decompress
|
|
699
|
+
if (key === 'content-encoding' || key === 'content-length') continue;
|
|
700
|
+
responseHeaders[key] = value;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
if (!upstreamRes.body) {
|
|
704
|
+
if (!res.headersSent) res.writeHead(upstreamRes.status, responseHeaders);
|
|
705
|
+
accountManager.releaseAccount(lease, { success: upstreamRes.status < 500, status: upstreamRes.status });
|
|
706
|
+
if (logDir) {
|
|
707
|
+
logSections.push(`=== RESPONSE BODY ===\n(empty)`);
|
|
708
|
+
writeRequestLog(logDir, reqId, logSections);
|
|
709
|
+
}
|
|
710
|
+
res.end();
|
|
711
|
+
return;
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
const isStreaming = (upstreamRes.headers.get('content-type') || '').includes('text/event-stream');
|
|
715
|
+
|
|
716
|
+
if (isStreaming) {
|
|
717
|
+
const streamLog = logDir ? [] : null;
|
|
718
|
+
await streamResponse(upstreamRes.body, res, upstreamRes.status, responseHeaders, account.index, accountManager, streamLog, requestInfo);
|
|
719
|
+
accountManager.releaseAccount(lease, { success: true, status: upstreamRes.status });
|
|
720
|
+
if (logDir) {
|
|
721
|
+
logSections.push(`=== RESPONSE BODY (streamed) ===\n${streamLog.join('')}`);
|
|
722
|
+
writeRequestLog(logDir, reqId, logSections);
|
|
723
|
+
}
|
|
724
|
+
} else {
|
|
725
|
+
const buf = Buffer.from(await upstreamRes.arrayBuffer());
|
|
726
|
+
extractUsageFromBody(buf, account.index, accountManager);
|
|
727
|
+
markThinkingFromResponse(buf, accountManager, requestInfo);
|
|
728
|
+
accountManager.releaseAccount(lease, { success: upstreamRes.status < 500, status: upstreamRes.status });
|
|
729
|
+
if (logDir) {
|
|
730
|
+
try {
|
|
731
|
+
logSections.push(`=== RESPONSE BODY ===\n${JSON.stringify(JSON.parse(buf.toString()), null, 2)}`);
|
|
732
|
+
} catch {
|
|
733
|
+
logSections.push(`=== RESPONSE BODY (${buf.length} bytes) ===\n${buf.toString().slice(0, 8192)}`);
|
|
734
|
+
}
|
|
735
|
+
writeRequestLog(logDir, reqId, logSections);
|
|
736
|
+
}
|
|
737
|
+
if (requestInfo.queueHeartbeatActive) {
|
|
738
|
+
clearQueueHeartbeat(requestInfo);
|
|
739
|
+
if (!res.destroyed && !res.writableEnded) {
|
|
740
|
+
res.write(`data: ${buf.toString()}\n\n`);
|
|
741
|
+
res.end();
|
|
742
|
+
}
|
|
743
|
+
} else {
|
|
744
|
+
if (!res.headersSent) res.writeHead(upstreamRes.status, responseHeaders);
|
|
745
|
+
res.end(buf);
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
} catch (err) {
|
|
749
|
+
console.error(`[Maxpool] Upstream error (account "${account.name}"):`, err.message);
|
|
750
|
+
|
|
751
|
+
if (logDir) {
|
|
752
|
+
logSections.push(`=== ERROR ===\n${err.stack || err.message}`);
|
|
753
|
+
writeRequestLog(logDir, reqId, logSections);
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
const isTransient = err instanceof Error &&
|
|
757
|
+
(err.message.includes('fetch failed') ||
|
|
758
|
+
err.code === 'ECONNRESET' || err.code === 'ECONNREFUSED' ||
|
|
759
|
+
err.code === 'ETIMEDOUT' || err.code === 'UND_ERR_CONNECT_TIMEOUT' ||
|
|
760
|
+
err.message.includes('terminated'));
|
|
761
|
+
|
|
762
|
+
if (isTransient) {
|
|
763
|
+
accountManager.markTransientFailure(account.index, err.code || err.message || 'network_error');
|
|
764
|
+
accountManager.releaseAccount(lease);
|
|
765
|
+
excludedIndexes.add(account.index);
|
|
766
|
+
if (canRetryBufferedBody && retryCount + 1 < maxAttempts && !res.headersSent) {
|
|
767
|
+
return forwardRequest(
|
|
768
|
+
req, res, body, accountManager, upstream, retryCount + 1, hooks, reqId, ctx, logDir,
|
|
769
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, excludedIndexes,
|
|
770
|
+
);
|
|
771
|
+
}
|
|
772
|
+
const queued = await queueAndRetry(
|
|
773
|
+
`all routes failed after network error from "${account.name}"`,
|
|
774
|
+
req, res, body, accountManager, upstream, hooks, reqId, ctx, logDir,
|
|
775
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canRetryBufferedBody, 'network',
|
|
776
|
+
);
|
|
777
|
+
if (queued) return;
|
|
778
|
+
ctx.status = 503;
|
|
779
|
+
sendErrorResponse(res, requestInfo, 503, {
|
|
780
|
+
type: 'error',
|
|
781
|
+
error: {
|
|
782
|
+
type: 'connection_unavailable',
|
|
783
|
+
message: 'Could not connect to Claude or a configured fallback provider. Check your internet connection and try again. This is not an account quota issue.',
|
|
784
|
+
},
|
|
785
|
+
});
|
|
786
|
+
return;
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
accountManager.releaseAccount(lease, { error: err.message });
|
|
790
|
+
if (canRetryBufferedBody && retryCount + 1 < maxAttempts && !res.headersSent) {
|
|
791
|
+
account.status = 'error';
|
|
792
|
+
excludedIndexes.add(account.index);
|
|
793
|
+
return forwardRequest(
|
|
794
|
+
req, res, body, accountManager, upstream, retryCount + 1, hooks, reqId, ctx, logDir,
|
|
795
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, excludedIndexes,
|
|
796
|
+
);
|
|
797
|
+
}
|
|
798
|
+
const queued = await queueAndRetry(
|
|
799
|
+
`all routes failed after proxy error from "${account.name}"`,
|
|
800
|
+
req, res, body, accountManager, upstream, hooks, reqId, ctx, logDir,
|
|
801
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canRetryBufferedBody, 'proxy',
|
|
802
|
+
);
|
|
803
|
+
if (queued) return;
|
|
804
|
+
ctx.status = 502;
|
|
805
|
+
|
|
806
|
+
sendErrorResponse(res, requestInfo, 502, {
|
|
807
|
+
type: 'error',
|
|
808
|
+
error: { type: 'proxy_error', message: `Upstream error: ${err.message}` },
|
|
809
|
+
});
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
function parseRetryAfter(value) {
|
|
814
|
+
if (value == null) return null;
|
|
815
|
+
const n = parseInt(value, 10);
|
|
816
|
+
if (Number.isNaN(n)) return null;
|
|
817
|
+
return Math.min(Math.max(n, 1), 24 * 60 * 60);
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
function isProviderAuthStatus(status) {
|
|
821
|
+
return status === 401 || status === 403;
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
function hasEligibleRoute(accountManager, requestInfo = {}, excludedIndexes = new Set()) {
|
|
825
|
+
return accountManager.hasAvailableRoute?.(requestInfo, excludedIndexes) || false;
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
function unavailableMessage(accountManager, requestInfo = {}, retryAfter, willRecoverSoon = true) {
|
|
829
|
+
const thinking = requestInfo.requiresAnthropicThinkingIntegrity
|
|
830
|
+
|| accountManager._requiresAnthropicThinkingIntegrity?.(requestInfo);
|
|
831
|
+
const n = accountManager.accounts.length;
|
|
832
|
+
|
|
833
|
+
// No route is expected to recover within the queue window — i.e. every Claude
|
|
834
|
+
// account is at its own 5h/weekly limit. A short "retry in Ns" would be a lie;
|
|
835
|
+
// tell the user the real fix.
|
|
836
|
+
if (!willRecoverSoon) {
|
|
837
|
+
const base = `No Claude account can take this request — all ${n} are at their 5h or weekly limit. Add another Claude account or wait for a quota reset.`;
|
|
838
|
+
return thinking
|
|
839
|
+
? `${base} GLM/Kimi fallback is unavailable because this session contains Anthropic signed thinking blocks; start a fresh non-thinking session to use them.`
|
|
840
|
+
: base;
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
if (thinking) {
|
|
844
|
+
return `No Claude account could accept this request. Non-Claude fallback is disabled because this session contains Anthropic signed thinking blocks. Retry in ${retryAfter}s, wait for Claude capacity, or start a fresh non-thinking session to use GLM/Kimi.`;
|
|
845
|
+
}
|
|
846
|
+
return `All ${n} accounts exhausted. Retry in ${retryAfter}s.`;
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
export const __serverTest = { unavailableMessage, isRetriableUpstreamStatus };
|
|
850
|
+
|
|
851
|
+
async function readErrorBody(upstreamRes, limitBytes = 64 * 1024) {
|
|
852
|
+
if (!upstreamRes.body) return '';
|
|
853
|
+
try {
|
|
854
|
+
const reader = upstreamRes.body.getReader();
|
|
855
|
+
const chunks = [];
|
|
856
|
+
let total = 0;
|
|
857
|
+
while (limitBytes == null || total < limitBytes) {
|
|
858
|
+
const { done, value } = await reader.read();
|
|
859
|
+
if (done) break;
|
|
860
|
+
const slice = limitBytes != null && value.length > limitBytes - total
|
|
861
|
+
? value.slice(0, limitBytes - total)
|
|
862
|
+
: value;
|
|
863
|
+
chunks.push(slice);
|
|
864
|
+
total += slice.length;
|
|
865
|
+
if (limitBytes != null && slice.length !== value.length) break;
|
|
866
|
+
}
|
|
867
|
+
reader.cancel().catch(() => {});
|
|
868
|
+
return Buffer.concat(chunks).toString('utf8');
|
|
869
|
+
} catch {
|
|
870
|
+
return '';
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
function parseProviderRetryAfter(body, provider) {
|
|
875
|
+
const parsed = parseJsonError(body);
|
|
876
|
+
const code = parsed?.code;
|
|
877
|
+
const message = parsed?.message || '';
|
|
878
|
+
|
|
879
|
+
if (provider === 'zai') {
|
|
880
|
+
const nextFlush = message.match(/reset at\s+`?([^`\n]+?)`?$/i)?.[1]
|
|
881
|
+
|| message.match(/next_flush_time[:\s]+`?([^`\n]+?)`?$/i)?.[1];
|
|
882
|
+
const resetSeconds = secondsUntilParsedTime(nextFlush);
|
|
883
|
+
if (resetSeconds) return resetSeconds;
|
|
884
|
+
|
|
885
|
+
if (['1302', '1303', '1305'].includes(String(code))) return 60;
|
|
886
|
+
if (['1304', '1308', '1310'].includes(String(code))) return 60 * 60;
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
if (provider === 'kimi') {
|
|
890
|
+
const seconds = message.match(/after\s+(\d+)\s+seconds?/i)?.[1];
|
|
891
|
+
if (seconds) return Math.min(Math.max(parseInt(seconds, 10), 1), 24 * 60 * 60);
|
|
892
|
+
if (parsed?.type === 'rate_limit_reached_error' || parsed?.type === 'engine_overloaded_error') return 60;
|
|
893
|
+
if (parsed?.type === 'exceeded_current_quota_error') return 60 * 60;
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
return 60;
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
function parseJsonError(body) {
|
|
900
|
+
if (!body) return null;
|
|
901
|
+
try {
|
|
902
|
+
const json = JSON.parse(body);
|
|
903
|
+
const error = json.error || json;
|
|
904
|
+
return {
|
|
905
|
+
type: error.type,
|
|
906
|
+
code: error.code,
|
|
907
|
+
message: error.message || '',
|
|
908
|
+
};
|
|
909
|
+
} catch {
|
|
910
|
+
return { message: body };
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
function classifyRateLimit(account, headers, body) {
|
|
915
|
+
if (account.type === 'provider') return { scope: 'account', fingerprint: null };
|
|
916
|
+
|
|
917
|
+
const parsed = parseJsonError(body);
|
|
918
|
+
const message = String(parsed?.message || '').toLowerCase();
|
|
919
|
+
const type = String(parsed?.type || '').toLowerCase();
|
|
920
|
+
const unifiedStatus = String(headers['anthropic-ratelimit-unified-status'] || '').toLowerCase();
|
|
921
|
+
const fiveHour = Number(headers['anthropic-ratelimit-unified-5h-utilization']);
|
|
922
|
+
const weekly = Number(headers['anthropic-ratelimit-unified-7d-utilization']);
|
|
923
|
+
const tokensRemaining = Number(headers['anthropic-ratelimit-tokens-remaining']);
|
|
924
|
+
const requestsRemaining = Number(headers['anthropic-ratelimit-requests-remaining']);
|
|
925
|
+
|
|
926
|
+
const quotaHeaderExhaustion =
|
|
927
|
+
unifiedStatus === 'rejected'
|
|
928
|
+
|| (Number.isFinite(fiveHour) && fiveHour >= 0.985)
|
|
929
|
+
|| (Number.isFinite(weekly) && weekly >= 0.985)
|
|
930
|
+
|| (headers['anthropic-ratelimit-tokens-remaining'] != null && tokensRemaining <= 0)
|
|
931
|
+
|| (headers['anthropic-ratelimit-requests-remaining'] != null && requestsRemaining <= 0);
|
|
932
|
+
if (quotaHeaderExhaustion) return { scope: 'account', fingerprint: null };
|
|
933
|
+
|
|
934
|
+
const quotaBodyExhaustion =
|
|
935
|
+
/\b(account|plan|session|weekly|quota)\b.{0,40}\b(exhausted|limit|exceeded|reached)\b/i.test(message)
|
|
936
|
+
|| /\busage\b.{0,40}\b(exhausted|exceeded|reached)\b/i.test(message);
|
|
937
|
+
if (quotaBodyExhaustion) return { scope: 'account', fingerprint: null };
|
|
938
|
+
|
|
939
|
+
const explicitSharedThrottle =
|
|
940
|
+
message.includes('not your usage limit')
|
|
941
|
+
|| message.includes('temporarily limiting requests')
|
|
942
|
+
|| message.includes('server is temporarily limiting')
|
|
943
|
+
|| type === 'overloaded_error';
|
|
944
|
+
const normalized = `${type}:${message}`
|
|
945
|
+
.replace(/\b[0-9a-f]{8,}\b/gi, '#')
|
|
946
|
+
.replace(/\b\d+\b/g, '#')
|
|
947
|
+
.replace(/\s+/g, ' ')
|
|
948
|
+
.trim()
|
|
949
|
+
.slice(0, 240);
|
|
950
|
+
if (explicitSharedThrottle) return { scope: 'upstream', fingerprint: normalized || 'explicit_429' };
|
|
951
|
+
return { scope: 'unknown', fingerprint: normalized || 'unknown_429' };
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
function overloadFingerprint(errorBody, requestBody) {
|
|
955
|
+
const parsed = parseJsonError(errorBody);
|
|
956
|
+
let model = '';
|
|
957
|
+
try {
|
|
958
|
+
model = JSON.parse(requestBody.toString())?.model || '';
|
|
959
|
+
} catch {
|
|
960
|
+
// The response fingerprint still works when the request is not JSON.
|
|
961
|
+
}
|
|
962
|
+
return `529:${model}:${parsed?.type || ''}:${parsed?.message || ''}`
|
|
963
|
+
.toLowerCase()
|
|
964
|
+
.replace(/\b[0-9a-f]{8,}\b/gi, '#')
|
|
965
|
+
.replace(/\b\d+\b/g, '#')
|
|
966
|
+
.replace(/\s+/g, ' ')
|
|
967
|
+
.trim()
|
|
968
|
+
.slice(0, 240);
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
function recordRequestIncident(requestInfo, fingerprint, accountIndex, retryAfter) {
|
|
972
|
+
requestInfo.upstreamIncidents ||= new Map();
|
|
973
|
+
const incident = requestInfo.upstreamIncidents.get(fingerprint) || {
|
|
974
|
+
accounts: new Set(),
|
|
975
|
+
firstAt: Date.now(),
|
|
976
|
+
retryAfter: 0,
|
|
977
|
+
};
|
|
978
|
+
incident.accounts.add(accountIndex);
|
|
979
|
+
incident.retryAfter = Math.max(incident.retryAfter, retryAfter);
|
|
980
|
+
requestInfo.upstreamIncidents.set(fingerprint, incident);
|
|
981
|
+
return incident;
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
function secondsUntilParsedTime(value) {
|
|
985
|
+
if (!value) return null;
|
|
986
|
+
const trimmed = String(value).trim();
|
|
987
|
+
const dateMs = Date.parse(trimmed);
|
|
988
|
+
if (!Number.isNaN(dateMs)) {
|
|
989
|
+
return Math.min(Math.max(Math.ceil((dateMs - Date.now()) / 1000), 1), 24 * 60 * 60);
|
|
990
|
+
}
|
|
991
|
+
const n = Number(trimmed);
|
|
992
|
+
if (Number.isFinite(n)) {
|
|
993
|
+
const ms = n > 10_000_000_000 ? n : n > 1_000_000_000 ? n * 1000 : Date.now() + n * 1000;
|
|
994
|
+
return Math.min(Math.max(Math.ceil((ms - Date.now()) / 1000), 1), 24 * 60 * 60);
|
|
995
|
+
}
|
|
996
|
+
return null;
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
function isRetriableUpstreamStatus(status) {
|
|
1000
|
+
// 500 included: Anthropic 500s are transient server errors (same class as
|
|
1001
|
+
// 502/503/504). Without this they were passed straight through to the client
|
|
1002
|
+
// ("Internal server error") instead of failing over to another account.
|
|
1003
|
+
return status === 500 || status === 529 || status === 502 || status === 503 || status === 504;
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
function sendErrorResponse(res, requestInfo, status, payload, headers = {}) {
|
|
1007
|
+
if (requestInfo.queueHeartbeatActive || res.headersSent) {
|
|
1008
|
+
clearQueueHeartbeat(requestInfo);
|
|
1009
|
+
if (!res.destroyed && !res.writableEnded) {
|
|
1010
|
+
res.write(`event: error\ndata: ${JSON.stringify(payload)}\n\n`);
|
|
1011
|
+
res.end();
|
|
1012
|
+
}
|
|
1013
|
+
return;
|
|
1014
|
+
}
|
|
1015
|
+
res.writeHead(status, { 'Content-Type': 'application/json', ...headers });
|
|
1016
|
+
res.end(JSON.stringify(payload));
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
function sendErrorBody(res, requestInfo, status, body, headers) {
|
|
1020
|
+
if (requestInfo.queueHeartbeatActive || res.headersSent) {
|
|
1021
|
+
let payload;
|
|
1022
|
+
try {
|
|
1023
|
+
payload = JSON.parse(body);
|
|
1024
|
+
} catch {
|
|
1025
|
+
payload = {
|
|
1026
|
+
type: 'error',
|
|
1027
|
+
error: { type: 'upstream_error', message: body || `Upstream returned ${status}` },
|
|
1028
|
+
};
|
|
1029
|
+
}
|
|
1030
|
+
sendErrorResponse(res, requestInfo, status, payload);
|
|
1031
|
+
return;
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
const responseHeaders = {};
|
|
1035
|
+
for (const [key, value] of headers.entries()) {
|
|
1036
|
+
if (key === 'transfer-encoding' || key === 'connection') continue;
|
|
1037
|
+
if (key === 'content-encoding' || key === 'content-length') continue;
|
|
1038
|
+
responseHeaders[key] = value;
|
|
1039
|
+
}
|
|
1040
|
+
responseHeaders['content-type'] ||= 'application/json';
|
|
1041
|
+
res.writeHead(status, responseHeaders);
|
|
1042
|
+
res.end(body);
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
async function queueAndRetry(
|
|
1046
|
+
reason, req, res, body, accountManager, upstream, hooks, reqId, ctx, logDir,
|
|
1047
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody = canRetryBufferedBody, cause = 'quota',
|
|
1048
|
+
) {
|
|
1049
|
+
if (!queueConfig.enabled || !canQueueBufferedBody || (res.headersSent && !requestInfo.queueHeartbeatActive) || res.destroyed) {
|
|
1050
|
+
if (queueConfig.enabled && !canQueueBufferedBody && requestInfo.queueBlockedReason) {
|
|
1051
|
+
console.log(`[Maxpool] Not queueing request: ${requestInfo.queueBlockedReason}`);
|
|
1052
|
+
}
|
|
1053
|
+
return false;
|
|
1054
|
+
}
|
|
1055
|
+
if (cause === 'network' || cause === 'proxy') return false;
|
|
1056
|
+
|
|
1057
|
+
const maxWaitMs = Math.max(0, Number(queueConfig.maxWaitMs) || 0);
|
|
1058
|
+
const autoMaxWaitMs = queueConfig.autoMaxWaitMs == null
|
|
1059
|
+
? maxWaitMs
|
|
1060
|
+
: Math.max(0, Number(queueConfig.autoMaxWaitMs) || 0);
|
|
1061
|
+
const capacityMaxWaitMs = queueConfig.capacityMaxWaitMs == null
|
|
1062
|
+
? autoMaxWaitMs
|
|
1063
|
+
: Math.max(0, Number(queueConfig.capacityMaxWaitMs) || 0);
|
|
1064
|
+
const weeklyMaxWaitMs = Math.max(0, Number(queueConfig.weeklyMaxWaitMs) || 0);
|
|
1065
|
+
const retryPlan = accountManager.nextRetryForRequest?.(requestInfo, new Set()) || {
|
|
1066
|
+
retryAfterMs: Infinity,
|
|
1067
|
+
cause: 'unavailable',
|
|
1068
|
+
};
|
|
1069
|
+
const queueWindowMs = retryPlan.cause === 'weekly_exhausted'
|
|
1070
|
+
? Math.min(maxWaitMs, weeklyMaxWaitMs)
|
|
1071
|
+
: cause === 'capacity'
|
|
1072
|
+
? Math.min(maxWaitMs, capacityMaxWaitMs)
|
|
1073
|
+
: Math.min(maxWaitMs, autoMaxWaitMs);
|
|
1074
|
+
if (queueWindowMs <= 0) return finishQueuedStreamIfNeeded(res, requestInfo, 'No retry window is available.');
|
|
1075
|
+
|
|
1076
|
+
const retryAfterMs = retryPlan.retryAfterMs;
|
|
1077
|
+
if (!Number.isFinite(retryAfterMs) || retryAfterMs > queueWindowMs) {
|
|
1078
|
+
return finishQueuedStreamIfNeeded(res, requestInfo, 'No route is expected to recover within the configured queue window.');
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
requestInfo.queueStartedAt ||= Date.now();
|
|
1082
|
+
accountManager.registerQueuedRequest?.(requestInfo);
|
|
1083
|
+
const elapsed = Date.now() - requestInfo.queueStartedAt;
|
|
1084
|
+
const remaining = queueWindowMs - elapsed;
|
|
1085
|
+
if (remaining <= 0) {
|
|
1086
|
+
accountManager.removeQueuedRequest?.(requestInfo);
|
|
1087
|
+
return finishQueuedStreamIfNeeded(res, requestInfo, 'The Maxpool queue wait expired.');
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
ctx.account = '(queued)';
|
|
1091
|
+
hooks.onRequestRouted?.(reqId, { account: '(queued)' });
|
|
1092
|
+
console.log(`[Maxpool] ${reason}; queueing request for up to ${Math.ceil(remaining / 1000)}s (cause: ${cause}, retry: ${retryPlan.cause})`);
|
|
1093
|
+
ensureQueueHeartbeat(res, requestInfo, queueConfig);
|
|
1094
|
+
|
|
1095
|
+
const available = await waitForAvailableRoute(req, res, accountManager, requestInfo, queueConfig, remaining);
|
|
1096
|
+
if (!available) {
|
|
1097
|
+
if (res.destroyed || req.destroyed) return true;
|
|
1098
|
+
accountManager.removeQueuedRequest?.(requestInfo);
|
|
1099
|
+
return finishQueuedStreamIfNeeded(res, requestInfo, 'The Maxpool queue wait expired.');
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
return forwardRequest(
|
|
1103
|
+
req, res, body, accountManager, upstream, 0, hooks, reqId, ctx, logDir,
|
|
1104
|
+
retryConfig, queueConfig, requestInfo, canRetryBufferedBody, canQueueBufferedBody, new Set(),
|
|
1105
|
+
).then(() => true).finally(() => clearQueueHeartbeat(requestInfo));
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
function ensureQueueHeartbeat(res, requestInfo, queueConfig) {
|
|
1109
|
+
if (!requestInfo.stream || requestInfo.queueHeartbeatActive || res.headersSent) return;
|
|
1110
|
+
const heartbeatMs = Math.max(1000, Number(queueConfig.heartbeatMs) || 10_000);
|
|
1111
|
+
res.writeHead(200, {
|
|
1112
|
+
'Content-Type': 'text/event-stream',
|
|
1113
|
+
'Cache-Control': 'no-cache',
|
|
1114
|
+
Connection: 'keep-alive',
|
|
1115
|
+
'X-Accel-Buffering': 'no',
|
|
1116
|
+
});
|
|
1117
|
+
res.flushHeaders?.();
|
|
1118
|
+
res.write(': maxpool queued\n\n');
|
|
1119
|
+
requestInfo.queueHeartbeatActive = true;
|
|
1120
|
+
requestInfo.queueHeartbeatTimer = setInterval(() => {
|
|
1121
|
+
if (res.destroyed || res.writableEnded) {
|
|
1122
|
+
clearQueueHeartbeat(requestInfo);
|
|
1123
|
+
return;
|
|
1124
|
+
}
|
|
1125
|
+
res.write(': maxpool queued\n\n');
|
|
1126
|
+
}, heartbeatMs);
|
|
1127
|
+
requestInfo.queueHeartbeatTimer.unref?.();
|
|
1128
|
+
}
|
|
1129
|
+
|
|
1130
|
+
function clearQueueHeartbeat(requestInfo) {
|
|
1131
|
+
if (requestInfo.queueHeartbeatTimer) clearInterval(requestInfo.queueHeartbeatTimer);
|
|
1132
|
+
requestInfo.queueHeartbeatTimer = null;
|
|
1133
|
+
requestInfo.queueHeartbeatActive = false;
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
function finishQueuedStreamIfNeeded(res, requestInfo, message) {
|
|
1137
|
+
if (!requestInfo.queueHeartbeatActive) return false;
|
|
1138
|
+
clearQueueHeartbeat(requestInfo);
|
|
1139
|
+
if (!res.destroyed && !res.writableEnded) {
|
|
1140
|
+
res.write(`event: error\ndata: ${JSON.stringify({
|
|
1141
|
+
type: 'error',
|
|
1142
|
+
error: { type: 'rate_limit_error', message },
|
|
1143
|
+
})}\n\n`);
|
|
1144
|
+
res.end();
|
|
1145
|
+
}
|
|
1146
|
+
return true;
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
async function waitForAvailableRoute(req, res, accountManager, requestInfo, queueConfig, maxWaitMs) {
|
|
1150
|
+
const startedAt = Date.now();
|
|
1151
|
+
const pollMs = Math.max(100, Number(queueConfig.pollMs) || 1000);
|
|
1152
|
+
let closed = false;
|
|
1153
|
+
const markClosed = () => { closed = true; };
|
|
1154
|
+
req.once('aborted', markClosed);
|
|
1155
|
+
res.once('close', markClosed);
|
|
1156
|
+
|
|
1157
|
+
try {
|
|
1158
|
+
while (Date.now() - startedAt < maxWaitMs) {
|
|
1159
|
+
if (closed || res.destroyed) return false;
|
|
1160
|
+
if (
|
|
1161
|
+
accountManager.hasAvailableRoute(requestInfo, new Set())
|
|
1162
|
+
&& accountManager.canAdmitQueuedRequest?.(requestInfo) !== false
|
|
1163
|
+
) return true;
|
|
1164
|
+
|
|
1165
|
+
const remaining = maxWaitMs - (Date.now() - startedAt);
|
|
1166
|
+
await sleep(Math.min(pollMs, remaining));
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
return accountManager.hasAvailableRoute(requestInfo, new Set())
|
|
1170
|
+
&& accountManager.canAdmitQueuedRequest?.(requestInfo) !== false;
|
|
1171
|
+
} finally {
|
|
1172
|
+
if (closed || res.destroyed) {
|
|
1173
|
+
accountManager.removeQueuedRequest?.(requestInfo);
|
|
1174
|
+
clearQueueHeartbeat(requestInfo);
|
|
1175
|
+
}
|
|
1176
|
+
req.off('aborted', markClosed);
|
|
1177
|
+
res.off('close', markClosed);
|
|
1178
|
+
}
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
function sleep(ms) {
|
|
1182
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
function describeRequest(req, body) {
|
|
1186
|
+
let weight = Math.max(1, Math.ceil(body.length / 64_000));
|
|
1187
|
+
const info = {
|
|
1188
|
+
method: req.method,
|
|
1189
|
+
path: req.url,
|
|
1190
|
+
bodyBytes: body.length,
|
|
1191
|
+
weight,
|
|
1192
|
+
};
|
|
1193
|
+
try {
|
|
1194
|
+
const json = JSON.parse(body.toString());
|
|
1195
|
+
if (json.model) info.model = json.model;
|
|
1196
|
+
if (json.stream) info.stream = true;
|
|
1197
|
+
if (json.max_tokens && json.max_tokens > 16_000) weight += 1;
|
|
1198
|
+
if (json.thinking || json.effort) weight += 1;
|
|
1199
|
+
if (requiresAnthropicThinkingIntegrity(json)) {
|
|
1200
|
+
info.requiresAnthropicThinkingIntegrity = true;
|
|
1201
|
+
}
|
|
1202
|
+
} catch {
|
|
1203
|
+
// Non-JSON requests are rare; body size still gives a useful load signal.
|
|
1204
|
+
}
|
|
1205
|
+
info.weight = Math.max(1, weight);
|
|
1206
|
+
return info;
|
|
1207
|
+
}
|
|
1208
|
+
|
|
1209
|
+
function requiresAnthropicThinkingIntegrity(json) {
|
|
1210
|
+
if (!json || typeof json !== 'object') return false;
|
|
1211
|
+
if (json.thinking || json.effort) return true;
|
|
1212
|
+
return containsThinkingBlock(json.messages);
|
|
1213
|
+
}
|
|
1214
|
+
|
|
1215
|
+
function containsThinkingBlock(value) {
|
|
1216
|
+
if (!value) return false;
|
|
1217
|
+
if (Array.isArray(value)) return value.some(containsThinkingBlock);
|
|
1218
|
+
if (typeof value !== 'object') return false;
|
|
1219
|
+
|
|
1220
|
+
if (value.type === 'thinking' || value.type === 'redacted_thinking') return true;
|
|
1221
|
+
if (value.type === 'signature_delta') return true;
|
|
1222
|
+
if (value.signature && (value.thinking != null || value.type == null)) return true;
|
|
1223
|
+
|
|
1224
|
+
if (value.content && containsThinkingBlock(value.content)) return true;
|
|
1225
|
+
if (value.messages && containsThinkingBlock(value.messages)) return true;
|
|
1226
|
+
return false;
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
function getMaxpoolProfile(headers) {
|
|
1230
|
+
const profile = String(headers['x-maxpool-profile'] || 'claude').trim().toLowerCase();
|
|
1231
|
+
return profile || 'claude';
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
function prepareRuntimeProviders(accountManager, headers) {
|
|
1235
|
+
if (getMaxpoolProfile(headers) !== 'all') return;
|
|
1236
|
+
|
|
1237
|
+
const zaiToken = headerValue(headers, 'x-maxpool-zai-token');
|
|
1238
|
+
if (zaiToken) {
|
|
1239
|
+
const opus = headerValue(headers, 'x-maxpool-zai-opus-model') || headerValue(headers, 'x-maxpool-zai-model') || 'glm-5.2';
|
|
1240
|
+
const sonnet = headerValue(headers, 'x-maxpool-zai-sonnet-model') || headerValue(headers, 'x-maxpool-zai-model') || opus;
|
|
1241
|
+
const haiku = headerValue(headers, 'x-maxpool-zai-haiku-model') || 'glm-5.1';
|
|
1242
|
+
accountManager.upsertRuntimeAccount({
|
|
1243
|
+
name: 'glm-fallback',
|
|
1244
|
+
type: 'provider',
|
|
1245
|
+
provider: 'zai',
|
|
1246
|
+
authToken: zaiToken,
|
|
1247
|
+
upstream: trimTrailingSlash(headerValue(headers, 'x-maxpool-zai-base-url') || 'https://api.z.ai/api/anthropic'),
|
|
1248
|
+
authHeader: 'authorization',
|
|
1249
|
+
profiles: ['all'],
|
|
1250
|
+
priority: 10,
|
|
1251
|
+
modelMap: { opus, sonnet, haiku, default: sonnet },
|
|
1252
|
+
stripBetaHeaders: true,
|
|
1253
|
+
});
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
const kimiToken = headerValue(headers, 'x-maxpool-kimi-token');
|
|
1257
|
+
if (kimiToken) {
|
|
1258
|
+
const model = headerValue(headers, 'x-maxpool-kimi-model') || 'kimi-k2.7';
|
|
1259
|
+
accountManager.upsertRuntimeAccount({
|
|
1260
|
+
name: 'kimi-fallback',
|
|
1261
|
+
type: 'provider',
|
|
1262
|
+
provider: 'kimi',
|
|
1263
|
+
authToken: kimiToken,
|
|
1264
|
+
upstream: trimTrailingSlash(headerValue(headers, 'x-maxpool-kimi-base-url') || 'https://api.kimi.com/coding'),
|
|
1265
|
+
authHeader: 'authorization',
|
|
1266
|
+
profiles: ['all'],
|
|
1267
|
+
priority: 20,
|
|
1268
|
+
model,
|
|
1269
|
+
stripBetaHeaders: true,
|
|
1270
|
+
});
|
|
1271
|
+
}
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1274
|
+
function headerValue(headers, name) {
|
|
1275
|
+
const value = headers[name.toLowerCase()];
|
|
1276
|
+
if (Array.isArray(value)) return value[0];
|
|
1277
|
+
return value ? String(value).trim() : '';
|
|
1278
|
+
}
|
|
1279
|
+
|
|
1280
|
+
function trimTrailingSlash(value) {
|
|
1281
|
+
return String(value).replace(/\/+$/, '');
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1284
|
+
function rewriteBodyForAccount(body, account) {
|
|
1285
|
+
if (!body.length || (!account.model && !account.modelMap)) return body;
|
|
1286
|
+
|
|
1287
|
+
try {
|
|
1288
|
+
const json = JSON.parse(body.toString());
|
|
1289
|
+
if (!json || typeof json !== 'object' || !json.model) return body;
|
|
1290
|
+
json.model = mappedModel(json.model, account);
|
|
1291
|
+
return Buffer.from(JSON.stringify(json));
|
|
1292
|
+
} catch {
|
|
1293
|
+
return body;
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
function mappedModel(originalModel, account) {
|
|
1298
|
+
if (account.model) return account.model;
|
|
1299
|
+
const map = account.modelMap || {};
|
|
1300
|
+
const model = String(originalModel || '').toLowerCase();
|
|
1301
|
+
if (model.includes('haiku')) return map.haiku || map.default || originalModel;
|
|
1302
|
+
if (model.includes('opus')) return map.opus || map.default || originalModel;
|
|
1303
|
+
if (model.includes('sonnet')) return map.sonnet || map.default || originalModel;
|
|
1304
|
+
return map.default || originalModel;
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
/**
|
|
1308
|
+
* Stream an SSE response to the client, parsing usage data along the way.
|
|
1309
|
+
*/
|
|
1310
|
+
async function streamResponse(webStream, res, status, responseHeaders, accountIndex, accountManager, streamLog, requestInfo = {}) {
|
|
1311
|
+
const reader = webStream.getReader();
|
|
1312
|
+
const decoder = new TextDecoder();
|
|
1313
|
+
let sseBuffer = '';
|
|
1314
|
+
let committed = res.headersSent;
|
|
1315
|
+
let readFailed = false;
|
|
1316
|
+
|
|
1317
|
+
try {
|
|
1318
|
+
while (true) {
|
|
1319
|
+
const { done, value } = await reader.read();
|
|
1320
|
+
if (done) break;
|
|
1321
|
+
|
|
1322
|
+
// Client disconnected — stop reading from upstream
|
|
1323
|
+
if (res.destroyed) break;
|
|
1324
|
+
|
|
1325
|
+
if (!committed) {
|
|
1326
|
+
res.writeHead(status, responseHeaders);
|
|
1327
|
+
committed = true;
|
|
1328
|
+
}
|
|
1329
|
+
|
|
1330
|
+
// Forward chunk immediately
|
|
1331
|
+
const ok = res.write(value);
|
|
1332
|
+
|
|
1333
|
+
const text = decoder.decode(value, { stream: true });
|
|
1334
|
+
|
|
1335
|
+
// Capture for logging
|
|
1336
|
+
if (streamLog) streamLog.push(text);
|
|
1337
|
+
|
|
1338
|
+
// Parse SSE events for usage tracking
|
|
1339
|
+
sseBuffer += text;
|
|
1340
|
+
const events = sseBuffer.split('\n\n');
|
|
1341
|
+
sseBuffer = events.pop(); // keep incomplete event
|
|
1342
|
+
|
|
1343
|
+
for (const event of events) {
|
|
1344
|
+
parseSSEEvent(event, accountIndex, accountManager, requestInfo);
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
// Handle backpressure — also bail out if client disconnects,
|
|
1348
|
+
// because 'drain' will never fire on a destroyed socket
|
|
1349
|
+
if (!ok) {
|
|
1350
|
+
await new Promise(resolve => {
|
|
1351
|
+
res.once('drain', resolve);
|
|
1352
|
+
res.once('close', resolve);
|
|
1353
|
+
});
|
|
1354
|
+
if (res.destroyed) break;
|
|
1355
|
+
}
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
// Parse any remaining buffer
|
|
1359
|
+
if (sseBuffer.trim()) {
|
|
1360
|
+
parseSSEEvent(sseBuffer, accountIndex, accountManager, requestInfo);
|
|
1361
|
+
}
|
|
1362
|
+
} catch (err) {
|
|
1363
|
+
readFailed = true;
|
|
1364
|
+
throw err;
|
|
1365
|
+
} finally {
|
|
1366
|
+
// Cancel upstream reader to stop consuming data nobody needs
|
|
1367
|
+
reader.cancel().catch(() => {});
|
|
1368
|
+
if (!readFailed) {
|
|
1369
|
+
if (!committed && !res.headersSent) res.writeHead(status, responseHeaders);
|
|
1370
|
+
if (!res.writableEnded) res.end();
|
|
1371
|
+
}
|
|
1372
|
+
}
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
function parseSSEEvent(event, accountIndex, accountManager, requestInfo = {}) {
|
|
1376
|
+
const dataLine = event.split('\n').find(l => l.startsWith('data: '));
|
|
1377
|
+
if (!dataLine) return;
|
|
1378
|
+
|
|
1379
|
+
try {
|
|
1380
|
+
const data = JSON.parse(dataLine.slice(6));
|
|
1381
|
+
if (data.type === 'message_start' && data.message?.usage) {
|
|
1382
|
+
accountManager.updateUsage(accountIndex, data.message.usage.input_tokens, 0);
|
|
1383
|
+
} else if (data.type === 'message_delta' && data.usage) {
|
|
1384
|
+
accountManager.updateUsage(accountIndex, 0, data.usage.output_tokens);
|
|
1385
|
+
}
|
|
1386
|
+
if (sseEventContainsThinking(data)) {
|
|
1387
|
+
accountManager.markSessionThinkingProtected?.(requestInfo.sessionKey, requestInfo.model);
|
|
1388
|
+
}
|
|
1389
|
+
} catch {
|
|
1390
|
+
// not valid JSON, skip
|
|
1391
|
+
}
|
|
1392
|
+
}
|
|
1393
|
+
|
|
1394
|
+
function sseEventContainsThinking(data) {
|
|
1395
|
+
return data?.content_block?.type === 'thinking'
|
|
1396
|
+
|| data?.content_block?.type === 'redacted_thinking'
|
|
1397
|
+
|| data?.delta?.type === 'signature_delta';
|
|
1398
|
+
}
|
|
1399
|
+
|
|
1400
|
+
function extractUsageFromBody(buffer, accountIndex, accountManager) {
|
|
1401
|
+
try {
|
|
1402
|
+
const json = JSON.parse(buffer.toString());
|
|
1403
|
+
if (json.usage) {
|
|
1404
|
+
accountManager.updateUsage(accountIndex, json.usage.input_tokens, json.usage.output_tokens);
|
|
1405
|
+
}
|
|
1406
|
+
} catch {
|
|
1407
|
+
// not JSON or no usage
|
|
1408
|
+
}
|
|
1409
|
+
}
|
|
1410
|
+
|
|
1411
|
+
function markThinkingFromResponse(buffer, accountManager, requestInfo = {}) {
|
|
1412
|
+
try {
|
|
1413
|
+
const json = JSON.parse(buffer.toString());
|
|
1414
|
+
if (containsThinkingBlock(json?.content)) {
|
|
1415
|
+
accountManager.markSessionThinkingProtected?.(requestInfo.sessionKey, requestInfo.model);
|
|
1416
|
+
}
|
|
1417
|
+
} catch {
|
|
1418
|
+
// not JSON
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
function computeRetryAfter(accountManager, requestInfo = {}) {
|
|
1423
|
+
const ms = accountManager.nextRetryForRequest?.(requestInfo, new Set())?.retryAfterMs ?? Infinity;
|
|
1424
|
+
return ms === Infinity ? 60 : Math.max(1, Math.ceil(ms / 1000));
|
|
1425
|
+
}
|