protocol-proxy 2.3.4 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,357 +1,636 @@
1
- const express = require('express');
2
- const { detectInboundProtocol } = require('./detector');
3
- const o2a = require('./converters/openai-to-anthropic');
4
- const a2o = require('./converters/anthropic-to-openai');
5
- const o2g = require('./converters/openai-to-gemini');
6
- const g2o = require('./converters/gemini-to-openai');
7
- const a2g = require('./converters/anthropic-to-gemini');
8
- const g2a = require('./converters/gemini-to-anthropic');
9
- const { recordUsage } = require('./stats-store');
10
-
11
- function createProxyApp(proxyConfigOrGetter) {
12
- const getProxyConfig = typeof proxyConfigOrGetter === 'function'
13
- ? proxyConfigOrGetter
14
- : () => proxyConfigOrGetter;
15
- const app = express();
16
- app.use(express.json({ limit: '50mb' }));
17
-
18
- // reasoning_content 缓存(用于 DeepSeek 等 reasoning model)
19
- // key: assistant message content, value: reasoning_content
20
- const reasoningCache = new Map();
21
- const MAX_CACHE_SIZE = 100;
22
-
23
- function getReasoningKey(msg) {
24
- const toolIds = msg.tool_calls?.map(t => t.id).join(',') || '';
25
- return msg.content + '|' + toolIds;
26
- }
27
-
28
- function setReasoning(msg, reasoning) {
29
- if (!msg?.content || !reasoning) return;
30
- const key = getReasoningKey(msg);
31
- if (reasoningCache.size >= MAX_CACHE_SIZE) {
32
- const firstKey = reasoningCache.keys().next().value;
33
- reasoningCache.delete(firstKey);
34
- }
35
- reasoningCache.set(key, reasoning);
36
- }
37
-
38
- function getReasoning(msg) {
39
- if (!msg?.content) return undefined;
40
- return reasoningCache.get(getReasoningKey(msg));
41
- }
42
-
43
- function estimateTokens(text) {
44
- if (!text) return 0;
45
- let tokens = 0;
46
- for (let i = 0; i < text.length; i++) {
47
- const code = text.charCodeAt(i);
48
- // CJK 字符 ~1.5 token/字
49
- if (code >= 0x4E00 && code <= 0x9FFF) tokens += 1.5;
50
- // 全角标点等 ~1 token
51
- else if (code >= 0x3000 && code <= 0x303F) tokens += 1;
52
- // 其他(ASCII 字母、数字、标点、空格)~0.25 token
53
- else tokens += 0.25;
54
- }
55
- return Math.ceil(tokens);
56
- }
57
-
58
- function estimateInputTokens(body) {
59
- if (!body?.messages) return 0;
60
- let text = '';
61
- for (const msg of body.messages) {
62
- if (typeof msg.content === 'string') {
63
- text += msg.content;
64
- } else if (Array.isArray(msg.content)) {
65
- for (const block of msg.content) {
66
- if (block.text) text += block.text;
67
- if (block.type === 'tool_result' && block.content) {
68
- text += typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
69
- }
70
- }
71
- }
72
- if (msg.tool_calls) {
73
- for (const tc of msg.tool_calls) {
74
- text += (tc.function?.arguments || '') + (tc.function?.name || '');
75
- }
76
- }
77
- }
78
- if (body.tools) {
79
- text += JSON.stringify(body.tools);
80
- }
81
- return estimateTokens(text);
82
- }
83
-
84
- function injectReasoningToMessages(messages) {
85
- if (!Array.isArray(messages)) return;
86
- for (const msg of messages) {
87
- if (msg.role === 'assistant' && msg.reasoning_content === undefined) {
88
- const reasoning = getReasoning(msg);
89
- // DeepSeek 等 reasoning model 要求 assistant message 必须包含 reasoning_content 字段
90
- msg.reasoning_content = reasoning || '';
91
- }
92
- }
93
- }
94
-
95
- function extractReasoningFromResponse(body) {
96
- const choice = body.choices?.[0];
97
- const message = choice?.message;
98
- if (message?.role === 'assistant' && message.reasoning_content) {
99
- setReasoning(message, message.reasoning_content);
100
- }
101
- }
102
-
103
- app.use((req, res, next) => {
104
- res.header('Access-Control-Allow-Origin', '*');
105
- res.header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
106
- res.header('Access-Control-Allow-Headers', 'Content-Type, Authorization, X-Api-Key');
107
- if (req.method === 'OPTIONS') return res.sendStatus(200);
108
- next();
109
- });
110
-
111
- app.use((req, res, next) => {
112
- const proxyConfig = getProxyConfig();
113
- if (!proxyConfig.requireAuth || !proxyConfig.authToken) {
114
- return next();
115
- }
116
-
117
- const token = req.headers.authorization?.replace('Bearer ', '') || req.headers['x-api-key'];
118
- if (token !== proxyConfig.authToken) {
119
- return res.status(401).json({ error: 'Unauthorized' });
120
- }
121
- next();
122
- });
123
-
124
- app.post('/v1/chat/completions', handleRequest);
125
- app.post('/v1/messages', handleRequest);
126
-
127
- async function handleRequest(req, res) {
128
- const requestId = `req-${Date.now()}-${Math.random().toString(36).slice(2, 6)}`;
129
- try {
130
- const proxyConfig = getProxyConfig();
131
- const inboundProtocol = detectInboundProtocol(req, req.body);
132
- const target = proxyConfig.target;
133
-
134
- if (!target) {
135
- return res.status(500).json({ error: 'Proxy target not configured' });
136
- }
137
-
138
- const targetProtocol = target.protocol;
139
- const isStream = req.body?.stream === true;
140
-
141
- console.log(`[${requestId}] ⬅️ ${(inboundProtocol || 'unknown').toUpperCase()} → ${targetProtocol.toUpperCase()} | path=${req.path}`);
142
-
143
- // 决定转换方向
144
- let convertReq, convertRes, createSSEConv, nameToId = null;
145
- if (inboundProtocol === 'openai' && targetProtocol === 'anthropic') {
146
- convertReq = o2a.convertRequest;
147
- convertRes = o2a.convertResponse;
148
- createSSEConv = o2a.createSSEConverter;
149
- } else if (inboundProtocol === 'anthropic' && targetProtocol === 'openai') {
150
- convertReq = a2o.convertRequest;
151
- convertRes = a2o.convertResponse;
152
- createSSEConv = a2o.createSSEConverter;
153
- } else if (inboundProtocol === 'openai' && targetProtocol === 'gemini') {
154
- convertReq = o2g.convertRequest;
155
- convertRes = o2g.convertResponse;
156
- createSSEConv = o2g.createSSEConverter;
157
- } else if (inboundProtocol === 'gemini' && targetProtocol === 'openai') {
158
- convertReq = g2o.convertRequest;
159
- convertRes = g2o.convertResponse;
160
- createSSEConv = g2o.createSSEConverter;
161
- } else if (inboundProtocol === 'anthropic' && targetProtocol === 'gemini') {
162
- convertReq = a2g.convertRequest;
163
- convertRes = a2g.convertResponse;
164
- createSSEConv = a2g.createSSEConverter;
165
- } else if (inboundProtocol === 'gemini' && targetProtocol === 'anthropic') {
166
- // g2a.convertRequest 返回 { ...body, nameToId },需要提取映射
167
- convertReq = (body, model) => {
168
- const result = g2a.convertRequest(body, model);
169
- nameToId = result.nameToId;
170
- const { nameToId: _, ...bodyOnly } = result;
171
- return bodyOnly;
172
- };
173
- convertRes = g2a.convertResponse;
174
- createSSEConv = (model) => g2a.createSSEConverter(nameToId);
175
- } else {
176
- convertReq = (body, model) => ({ ...body, model: body.model || model });
177
- convertRes = (body) => body;
178
- createSSEConv = null;
179
- }
180
-
181
- // 如果请求没有 model,注入默认 model
182
- const inboundModel = req.body?.model;
183
- const effectiveModel = target.defaultModel || inboundModel;
184
- if (effectiveModel) {
185
- req.body = { ...req.body, model: effectiveModel };
186
- }
187
-
188
- const targetBody = convertReq(req.body, effectiveModel);
189
-
190
- const isAzure = !!target.azureDeployment && /azure/i.test(target.providerUrl);
191
-
192
- // 流式请求时注入 stream_options 以获取 usage 统计(Azure 不支持)
193
- if (isStream && targetProtocol === 'openai' && !isAzure) {
194
- targetBody.stream_options = { include_usage: true };
195
- }
196
-
197
- // 注入 reasoning_content(针对 DeepSeek 等 reasoning model)
198
- injectReasoningToMessages(targetBody.messages);
199
-
200
- // 构建目标 URL
201
- const targetUrl = buildTargetUrl(target, req.path, isStream, effectiveModel);
202
- console.log(`[${requestId}] 🔗 ${targetUrl} | model=${effectiveModel}`);
203
-
204
- // 构建请求头
205
- const headers = {
206
- 'Content-Type': 'application/json',
207
- 'Accept': isStream ? 'text/event-stream' : 'application/json',
208
- };
209
-
210
- if (targetProtocol === 'openai') {
211
- if (isAzure) {
212
- headers['api-key'] = target.apiKey;
213
- } else {
214
- headers['Authorization'] = `Bearer ${target.apiKey}`;
215
- }
216
- } else if (targetProtocol === 'gemini') {
217
- headers['x-goog-api-key'] = target.apiKey;
218
- } else if (targetProtocol === 'anthropic') {
219
- headers['X-Api-Key'] = target.apiKey;
220
- headers['Anthropic-Version'] = '2023-06-01';
221
- headers['Authorization'] = `Bearer ${target.apiKey}`;
222
- }
223
-
224
- const fetchRes = await fetch(targetUrl, {
225
- method: 'POST',
226
- headers,
227
- body: JSON.stringify(targetBody),
228
- signal: AbortSignal.timeout(300000),
229
- });
230
-
231
- if (!fetchRes.ok) {
232
- const errBody = await fetchRes.text();
233
- console.log(`[${requestId}] Target error: HTTP ${fetchRes.status} | ${errBody.slice(0, 500)}`);
234
- res.status(fetchRes.status);
235
- res.set('Content-Type', fetchRes.headers.get('content-type') || 'application/json');
236
- return res.send(errBody);
237
- }
238
-
239
- // 流式响应(以客户端请求意图为准,不依赖上游 Content-Type)
240
- if (isStream) {
241
- res.setHeader('Content-Type', 'text/event-stream');
242
- res.setHeader('Cache-Control', 'no-cache');
243
- res.setHeader('Connection', 'keep-alive');
244
-
245
- const sseConverter = createSSEConv ? createSSEConv(effectiveModel) : null;
246
- const reader = fetchRes.body.getReader();
247
- const decoder = new TextDecoder();
248
- let streamUsage = null;
249
- let responseText = '';
250
- let toolCallCount = 0;
251
-
252
- req.on('close', () => {
253
- try { reader.cancel(); } catch (err) { /* ignore */ }
254
- });
255
-
256
- try {
257
- while (true) {
258
- const { done, value } = await reader.read();
259
- if (done) break;
260
- const chunk = decoder.decode(value, { stream: true });
261
- // 从流中提取 usage 和响应内容
262
- const lines = chunk.split('\n');
263
- for (const line of lines) {
264
- const trimmed = line.trim();
265
- if (!trimmed.startsWith('data:') || trimmed === 'data: [DONE]') continue;
266
- try {
267
- const d = JSON.parse(trimmed.slice(5).trim());
268
- if (d.usage) streamUsage = d.usage;
269
- const delta = d.choices?.[0]?.delta;
270
- if (delta?.content) responseText += delta.content;
271
- if (delta?.tool_calls) {
272
- for (const tc of delta.tool_calls) {
273
- if (tc.function?.name) toolCallCount++;
274
- }
275
- }
276
- } catch { /* ignore */ }
277
- }
278
- if (sseConverter) {
279
- const converted = sseConverter.convertChunk(chunk);
280
- if (converted) res.write(converted);
281
- } else {
282
- res.write(chunk);
283
- }
284
- }
285
-
286
- if (streamUsage) {
287
- recordUsage(proxyConfig.id, proxyConfig.target?.providerName, req.body?.model, streamUsage, false);
288
- } else if (responseText || toolCallCount > 0) {
289
- // 上游未返回 usage,从响应内容估算
290
- const inputTokens = estimateInputTokens(req.body);
291
- const outputTokens = estimateTokens(responseText) + toolCallCount * 15;
292
- recordUsage(proxyConfig.id, proxyConfig.target?.providerName, req.body?.model, {
293
- prompt_tokens: inputTokens,
294
- completion_tokens: outputTokens,
295
- }, true);
296
- }
297
- if (sseConverter) {
298
- const flushed = sseConverter.flush();
299
- if (flushed) res.write(flushed);
300
- }
301
- } catch (err) {
302
- console.error(`[${requestId}] Stream error:`, err.message);
303
- if (!res.writableEnded) {
304
- try {
305
- res.write(`data: ${JSON.stringify({ error: { message: err.message, type: 'proxy_error' } })}\n\n`);
306
- } catch { /* ignore */ }
307
- }
308
- } finally {
309
- res.end();
310
- }
311
- return;
312
- }
313
-
314
- const responseBody = await fetchRes.json();
315
- extractReasoningFromResponse(responseBody);
316
- recordUsage(proxyConfig.id, proxyConfig.target?.providerName, req.body?.model, responseBody.usage);
317
- const convertedBody = convertRes(responseBody);
318
- res.json(convertedBody);
319
-
320
- } catch (err) {
321
- console.error(`[${requestId}] Proxy error:`, err.message);
322
- res.status(500).json({ error: 'Proxy error', message: err.message });
323
- }
324
- }
325
-
326
- return app;
327
- }
328
-
329
- function buildTargetUrl(target, originalPath, isStream, effectiveModel) {
330
- const base = target.providerUrl.replace(/\/$/, '');
331
- const hasV1Suffix = base.endsWith('/v1');
332
-
333
- if (target.protocol === 'openai') {
334
- // Azure OpenAI
335
- if (target.azureDeployment) {
336
- const ver = target.azureApiVersion || '2024-02-01';
337
- return `${base}/openai/deployments/${target.azureDeployment}/chat/completions?api-version=${ver}`;
338
- }
339
- if (hasV1Suffix) return `${base}/chat/completions`;
340
- return `${base}/v1/chat/completions`;
341
- }
342
-
343
- if (target.protocol === 'anthropic') {
344
- if (hasV1Suffix) return `${base}/messages`;
345
- return `${base}/v1/messages`;
346
- }
347
-
348
- if (target.protocol === 'gemini') {
349
- const model = effectiveModel || 'gemini-pro';
350
- const action = isStream ? 'streamGenerateContent?alt=sse' : 'generateContent';
351
- return `${base}/v1beta/models/${model}:${action}`;
352
- }
353
-
354
- return base + originalPath;
355
- }
356
-
357
- module.exports = { createProxyApp };
1
+ const express = require('express');
2
+ const { detectInboundProtocol } = require('./detector');
3
+ const o2a = require('./converters/openai-to-anthropic');
4
+ const a2o = require('./converters/anthropic-to-openai');
5
+ const o2g = require('./converters/openai-to-gemini');
6
+ const g2o = require('./converters/gemini-to-openai');
7
+ const a2g = require('./converters/anthropic-to-gemini');
8
+ const g2a = require('./converters/gemini-to-anthropic');
9
+ const { recordUsage } = require('./stats-store');
10
+ const logger = require('./logger');
11
+
12
+ function createProxyApp(proxyConfigOrGetter) {
13
+ const getProxyConfig = typeof proxyConfigOrGetter === 'function'
14
+ ? proxyConfigOrGetter
15
+ : () => proxyConfigOrGetter;
16
+ const app = express();
17
+ app.use(express.json({ limit: '50mb' }));
18
+
19
+ const reasoningCache = new Map();
20
+ const MAX_CACHE_SIZE = 100;
21
+ const routeState = new Map();
22
+ const FAILURE_THRESHOLD = 3;
23
+ const OPEN_DURATION_MS = 60 * 1000;
24
+
25
+ function getReasoningKey(msg) {
26
+ const toolIds = msg.tool_calls?.map(t => t.id).join(',') || '';
27
+ return msg.content + '|' + toolIds;
28
+ }
29
+
30
+ function setReasoning(msg, reasoning) {
31
+ if (!msg?.content || !reasoning) return;
32
+ const key = getReasoningKey(msg);
33
+ if (reasoningCache.size >= MAX_CACHE_SIZE) {
34
+ const firstKey = reasoningCache.keys().next().value;
35
+ reasoningCache.delete(firstKey);
36
+ }
37
+ reasoningCache.set(key, reasoning);
38
+ }
39
+
40
+ function getReasoning(msg) {
41
+ if (!msg?.content) return undefined;
42
+ return reasoningCache.get(getReasoningKey(msg));
43
+ }
44
+
45
+ function estimateTokens(text) {
46
+ if (!text) return 0;
47
+ let tokens = 0;
48
+ for (let i = 0; i < text.length; i++) {
49
+ const code = text.charCodeAt(i);
50
+ if (code >= 0x4E00 && code <= 0x9FFF) tokens += 1.5;
51
+ else if (code >= 0x3000 && code <= 0x303F) tokens += 1;
52
+ else tokens += 0.25;
53
+ }
54
+ return Math.ceil(tokens);
55
+ }
56
+
57
+ function estimateInputTokens(body) {
58
+ if (!body?.messages) return 0;
59
+ let text = '';
60
+ for (const msg of body.messages) {
61
+ if (typeof msg.content === 'string') {
62
+ text += msg.content;
63
+ } else if (Array.isArray(msg.content)) {
64
+ for (const block of msg.content) {
65
+ if (block.text) text += block.text;
66
+ if (block.type === 'tool_result' && block.content) {
67
+ text += typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
68
+ }
69
+ }
70
+ }
71
+ if (msg.tool_calls) {
72
+ for (const tc of msg.tool_calls) {
73
+ text += (tc.function?.arguments || '') + (tc.function?.name || '');
74
+ }
75
+ }
76
+ }
77
+ if (body.tools) text += JSON.stringify(body.tools);
78
+ return estimateTokens(text);
79
+ }
80
+
81
+ function injectReasoningToMessages(messages) {
82
+ if (!Array.isArray(messages)) return;
83
+ for (const msg of messages) {
84
+ if (msg.role === 'assistant' && msg.reasoning_content === undefined) {
85
+ const reasoning = getReasoning(msg);
86
+ msg.reasoning_content = reasoning || '';
87
+ }
88
+ }
89
+ }
90
+
91
+ function extractReasoningFromResponse(body) {
92
+ const choice = body.choices?.[0];
93
+ const message = choice?.message;
94
+ if (message?.role === 'assistant' && message.reasoning_content) {
95
+ setReasoning(message, message.reasoning_content);
96
+ }
97
+ }
98
+
99
+ // Extract thinking blocks from Anthropic response and cache by assistant text content
100
+ function extractAnthropicThinking(body) {
101
+ const content = body.content;
102
+ if (!Array.isArray(content)) return;
103
+ const thinkingBlocks = content.filter(b => b.type === 'thinking');
104
+ if (thinkingBlocks.length === 0) return;
105
+ const textContent = content.filter(b => b.type === 'text').map(b => b.text).join('');
106
+ if (!textContent) return;
107
+ const msg = { content: textContent, tool_calls: null };
108
+ setReasoning(msg, thinkingBlocks);
109
+ }
110
+
111
+ // Inject cached thinking blocks into Anthropic-format assistant messages
112
+ function injectAnthropicThinking(messages) {
113
+ if (!Array.isArray(messages)) return;
114
+ for (const msg of messages) {
115
+ if (msg.role !== 'assistant' || !Array.isArray(msg.content)) continue;
116
+ const hasThinking = msg.content.some(b => b.type === 'thinking');
117
+ if (hasThinking) continue;
118
+ const textContent = msg.content.filter(b => b.type === 'text').map(b => b.text).join('');
119
+ if (!textContent) continue;
120
+ const cached = getReasoning({ content: textContent, tool_calls: null });
121
+ if (cached) {
122
+ msg.content = [...cached, ...msg.content];
123
+ }
124
+ }
125
+ }
126
+
127
+ function getRouteState(proxyId) {
128
+ if (!routeState.has(proxyId)) {
129
+ routeState.set(proxyId, { rrIndex: 0, metrics: new Map() });
130
+ }
131
+ return routeState.get(proxyId);
132
+ }
133
+
134
+ function getMetrics(proxyId, providerId) {
135
+ const state = getRouteState(proxyId);
136
+ if (!state.metrics.has(providerId)) {
137
+ state.metrics.set(providerId, {
138
+ successCount: 0,
139
+ failureCount: 0,
140
+ avgLatencyMs: null,
141
+ lastErrorAt: 0,
142
+ circuitOpenUntil: 0,
143
+ });
144
+ }
145
+ return state.metrics.get(providerId);
146
+ }
147
+
148
+ function isRetryableStatus(status) {
149
+ return status === 401
150
+ || status === 403
151
+ || status === 408
152
+ || status === 409
153
+ || status === 425
154
+ || status === 429
155
+ || status >= 500;
156
+ }
157
+
158
+ function isProviderAvailable(metrics) {
159
+ return !metrics.circuitOpenUntil || metrics.circuitOpenUntil <= Date.now();
160
+ }
161
+
162
+ function recordSuccess(proxyId, providerId, latencyMs) {
163
+ const metrics = getMetrics(proxyId, providerId);
164
+ metrics.successCount += 1;
165
+ metrics.lastErrorAt = 0;
166
+ metrics.failureCount = 0;
167
+ metrics.circuitOpenUntil = 0;
168
+ metrics.avgLatencyMs = metrics.avgLatencyMs == null
169
+ ? latencyMs
170
+ : Math.round(metrics.avgLatencyMs * 0.7 + latencyMs * 0.3);
171
+ }
172
+
173
+ function recordFailure(proxyId, providerId) {
174
+ const metrics = getMetrics(proxyId, providerId);
175
+ metrics.failureCount += 1;
176
+ metrics.lastErrorAt = Date.now();
177
+ if (metrics.failureCount >= FAILURE_THRESHOLD) {
178
+ metrics.circuitOpenUntil = Date.now() + OPEN_DURATION_MS;
179
+ }
180
+ }
181
+
182
+ // ==================== API Key 轮转 ====================
183
+
184
+ const keyPoolState = new Map();
185
+ const KEY_COOLDOWN_MS = 60 * 1000;
186
+
187
+ function getKeyState(providerId, apiKeys) {
188
+ if (!keyPoolState.has(providerId)) {
189
+ keyPoolState.set(providerId, {
190
+ keys: apiKeys || [],
191
+ index: 0,
192
+ cooldowns: new Map(), // key -> cooldownUntil timestamp
193
+ });
194
+ }
195
+ return keyPoolState.get(providerId);
196
+ }
197
+
198
+ function selectKey(providerId, apiKeys) {
199
+ if (!apiKeys || apiKeys.length === 0) return '';
200
+ // Filter out disabled keys (enabled defaults to true)
201
+ const enabledKeys = apiKeys.filter(k => (typeof k === 'object' ? k.enabled !== false : true));
202
+ if (enabledKeys.length === 0) return '';
203
+ // Normalize to string array (handle {key, alias} objects)
204
+ const keys = enabledKeys.map(k => typeof k === 'string' ? k : k.key);
205
+ if (keys.length === 1) return keys[0];
206
+
207
+ const state = getKeyState(providerId, keys);
208
+ // Sync keys in case they changed
209
+ state.keys = keys;
210
+ const now = Date.now();
211
+
212
+ // Clean expired cooldowns
213
+ for (const [key, until] of state.cooldowns) {
214
+ if (until <= now) state.cooldowns.delete(key);
215
+ }
216
+
217
+ // Try to find an available key starting from current index
218
+ for (let i = 0; i < keys.length; i++) {
219
+ const idx = (state.index + i) % keys.length;
220
+ const key = keys[idx];
221
+ if (!state.cooldowns.has(key)) {
222
+ state.index = (idx + 1) % keys.length;
223
+ return key;
224
+ }
225
+ }
226
+
227
+ // All keys on cooldown — pick the one with shortest remaining cooldown
228
+ let earliest = Infinity;
229
+ let bestKey = keys[0];
230
+ for (const [key, until] of state.cooldowns) {
231
+ if (keys.includes(key) && until < earliest) {
232
+ earliest = until;
233
+ bestKey = key;
234
+ }
235
+ }
236
+ state.index = (keys.indexOf(bestKey) + 1) % keys.length;
237
+ return bestKey;
238
+ }
239
+
240
+ function markKeyCooldown(providerId, key) {
241
+ const state = keyPoolState.get(providerId);
242
+ if (state) {
243
+ state.cooldowns.set(key, Date.now() + KEY_COOLDOWN_MS);
244
+ logger.log(`[KeyPool] ${providerId} key ${key.slice(0, 8)}... cooldown 60s`);
245
+ }
246
+ }
247
+
248
+ function buildCandidates(proxyConfig) {
249
+ const target = proxyConfig.target;
250
+ if (!target || !Array.isArray(target.providerPool) || target.providerPool.length === 0) return [];
251
+ const pool = target.providerPool;
252
+
253
+ const ordered = pool.map((item, index) => ({
254
+ ...item,
255
+ providerId: item.providerId || `provider-${index}`,
256
+ weight: Math.max(1, parseInt(item.weight, 10) || 1),
257
+ }));
258
+
259
+ const strategy = target.routingStrategy || 'primary_fallback';
260
+ const proxyId = proxyConfig.id || 'default';
261
+
262
+ const byHealth = ordered.filter(item => isProviderAvailable(getMetrics(proxyId, item.providerId)));
263
+ const healthy = byHealth.length > 0 ? byHealth : ordered;
264
+
265
+ if (strategy === 'weighted') {
266
+ // 加权随机选择第一个候选,剩余按权重排序作为 fallback
267
+ const totalWeight = healthy.reduce((sum, c) => sum + c.weight, 0);
268
+ let rand = Math.random() * totalWeight;
269
+ let picked = healthy.length - 1;
270
+ for (let i = 0; i < healthy.length; i++) {
271
+ rand -= healthy[i].weight;
272
+ if (rand <= 0) { picked = i; break; }
273
+ }
274
+ const first = healthy[picked];
275
+ const rest = healthy.filter((_, i) => i !== picked).sort((a, b) => b.weight - a.weight);
276
+ return [first, ...rest];
277
+ }
278
+
279
+ if (strategy === 'fastest') {
280
+ return healthy.slice().sort((a, b) => {
281
+ const am = getMetrics(proxyId, a.providerId).avgLatencyMs ?? Number.MAX_SAFE_INTEGER;
282
+ const bm = getMetrics(proxyId, b.providerId).avgLatencyMs ?? Number.MAX_SAFE_INTEGER;
283
+ return am - bm;
284
+ });
285
+ }
286
+
287
+ if (strategy === 'round_robin') {
288
+ const state = getRouteState(proxyId);
289
+ const start = state.rrIndex % healthy.length;
290
+ state.rrIndex = (state.rrIndex + 1) % healthy.length;
291
+ return healthy.slice(start).concat(healthy.slice(0, start));
292
+ }
293
+
294
+ return healthy;
295
+ }
296
+
297
+ function getRoutingHealth(proxyConfig) {
298
+ const proxyId = proxyConfig.id || 'default';
299
+ const target = proxyConfig.target || {};
300
+ const pool = Array.isArray(target.providerPool) && target.providerPool.length > 0
301
+ ? target.providerPool
302
+ : [];
303
+ return pool.map(item => {
304
+ const metrics = getMetrics(proxyId, item.providerId || 'primary');
305
+ return {
306
+ providerId: item.providerId || 'primary',
307
+ providerName: item.providerName || '',
308
+ successCount: metrics.successCount,
309
+ failureCount: metrics.failureCount,
310
+ avgLatencyMs: metrics.avgLatencyMs,
311
+ circuitOpenUntil: metrics.circuitOpenUntil,
312
+ available: isProviderAvailable(metrics),
313
+ };
314
+ });
315
+ }
316
+
317
+ app.use((req, res, next) => {
318
+ res.header('Access-Control-Allow-Origin', '*');
319
+ res.header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
320
+ res.header('Access-Control-Allow-Headers', 'Content-Type, Authorization, X-Api-Key');
321
+ if (req.method === 'OPTIONS') return res.sendStatus(200);
322
+ next();
323
+ });
324
+
325
+ app.use((req, res, next) => {
326
+ const proxyConfig = getProxyConfig();
327
+ if (!proxyConfig.requireAuth || !proxyConfig.authToken) return next();
328
+ const token = req.headers.authorization?.replace('Bearer ', '') || req.headers['x-api-key'];
329
+ if (token !== proxyConfig.authToken) {
330
+ return res.status(401).json({ error: 'Unauthorized' });
331
+ }
332
+ next();
333
+ });
334
+
335
+ app.post('/v1/chat/completions', handleRequest);
336
+ app.post('/v1/messages', handleRequest);
337
+ app.get('/_internal/routing-health', (req, res) => {
338
+ res.json({
339
+ proxy: getProxyConfig()?.id || null,
340
+ providers: getRoutingHealth(getProxyConfig() || {}),
341
+ });
342
+ });
343
+
344
+ async function handleRequest(req, res) {
345
+ const requestId = `req-${Date.now()}-${Math.random().toString(36).slice(2, 6)}`;
346
+ const proxyConfig = getProxyConfig();
347
+ const inboundProtocol = detectInboundProtocol(req, req.body);
348
+ const candidates = buildCandidates(proxyConfig);
349
+
350
+ if (candidates.length === 0) {
351
+ return res.status(500).json({ error: 'Proxy target not configured' });
352
+ }
353
+
354
+ const isStream = req.body?.stream === true;
355
+ const proxyId = proxyConfig.id || 'default';
356
+ const inboundModel = req.body?.model;
357
+ const effectiveModel = proxyConfig.target?.defaultModel || inboundModel;
358
+ const baseRequestBody = effectiveModel ? { ...req.body, model: effectiveModel } : { ...req.body };
359
+
360
+ // Inject cached reasoning for OpenAI inbound (OpenAI protocol lacks reasoning_content)
361
+ if (inboundProtocol === 'openai') {
362
+ injectReasoningToMessages(baseRequestBody.messages);
363
+ }
364
+
365
+ // Pre-build request templates for each protocol
366
+ const passthrough = (body, model) => ({ ...body, model: body.model || model });
367
+ const requestTemplates = {};
368
+ requestTemplates.openai = inboundProtocol === 'openai' ? passthrough(baseRequestBody, effectiveModel) :
369
+ inboundProtocol === 'anthropic' ? a2o.convertRequest(baseRequestBody, effectiveModel) :
370
+ inboundProtocol === 'gemini' ? g2o.convertRequest(baseRequestBody, effectiveModel) :
371
+ passthrough(baseRequestBody, effectiveModel);
372
+ requestTemplates.anthropic = inboundProtocol === 'anthropic' ? passthrough(baseRequestBody, effectiveModel) :
373
+ inboundProtocol === 'openai' ? o2a.convertRequest(baseRequestBody, effectiveModel) :
374
+ inboundProtocol === 'gemini' ? (() => { const r = g2a.convertRequest(baseRequestBody, effectiveModel); return { body: r, nameToId: r.nameToId }; })() :
375
+ passthrough(baseRequestBody, effectiveModel);
376
+ requestTemplates.gemini = inboundProtocol === 'gemini' ? passthrough(baseRequestBody, effectiveModel) :
377
+ inboundProtocol === 'openai' ? o2g.convertRequest(baseRequestBody, effectiveModel) :
378
+ inboundProtocol === 'anthropic' ? a2g.convertRequest(baseRequestBody, effectiveModel) :
379
+ passthrough(baseRequestBody, effectiveModel);
380
+
381
+ logger.log(`[${requestId}] ${(inboundProtocol || 'unknown').toUpperCase()} -> mixed | path=${req.path}`);
382
+
383
+ for (const candidate of candidates) {
384
+ const targetProtocol = candidate.protocol;
385
+ const isAzure = !!candidate.azureDeployment && /azure/i.test(candidate.providerUrl);
386
+
387
+ let convertRes;
388
+ let createSSEConv;
389
+ let nameToId = null;
390
+ let targetBody;
391
+
392
+ if (inboundProtocol === 'openai' && targetProtocol === 'anthropic') {
393
+ targetBody = { ...requestTemplates.anthropic };
394
+ convertRes = o2a.convertResponse;
395
+ createSSEConv = o2a.createSSEConverter;
396
+ } else if (inboundProtocol === 'anthropic' && targetProtocol === 'openai') {
397
+ targetBody = { ...requestTemplates.openai };
398
+ convertRes = a2o.convertResponse;
399
+ createSSEConv = a2o.createSSEConverter;
400
+ } else if (inboundProtocol === 'openai' && targetProtocol === 'gemini') {
401
+ targetBody = { ...requestTemplates.gemini };
402
+ convertRes = o2g.convertResponse;
403
+ createSSEConv = o2g.createSSEConverter;
404
+ } else if (inboundProtocol === 'gemini' && targetProtocol === 'openai') {
405
+ const result = g2o.convertRequest(baseRequestBody, effectiveModel);
406
+ nameToId = result.nameToId;
407
+ const { nameToId: _, ...bodyOnly } = result;
408
+ targetBody = bodyOnly;
409
+ convertRes = g2o.convertResponse;
410
+ createSSEConv = g2o.createSSEConverter;
411
+ } else if (inboundProtocol === 'anthropic' && targetProtocol === 'gemini') {
412
+ targetBody = { ...requestTemplates.gemini };
413
+ convertRes = a2g.convertResponse;
414
+ createSSEConv = a2g.createSSEConverter;
415
+ } else if (inboundProtocol === 'gemini' && targetProtocol === 'anthropic') {
416
+ const tpl = requestTemplates.anthropic;
417
+ targetBody = { ...tpl.body };
418
+ nameToId = tpl.nameToId;
419
+ convertRes = g2a.convertResponse;
420
+ createSSEConv = () => g2a.createSSEConverter(nameToId);
421
+ } else {
422
+ targetBody = { ...baseRequestBody };
423
+ convertRes = (body) => body;
424
+ createSSEConv = null;
425
+ }
426
+
427
+ // If candidate has a specific model override, apply it
428
+ if (candidate.model) {
429
+ targetBody.model = candidate.model;
430
+ }
431
+
432
+ const candidateModel = candidate.model || effectiveModel;
433
+ logger.log(`[${requestId}] -> ${candidate.providerName} (${targetProtocol}) | model=${candidateModel || '(default)'}`);
434
+
435
+ if (isStream && candidate.protocol === 'openai' && !isAzure) {
436
+ targetBody.stream_options = { include_usage: true };
437
+ }
438
+
439
+ const targetUrl = buildTargetUrl(candidate, req.path, isStream, candidateModel);
440
+ // Forward client headers (preserve anthropic-beta, user-agent, etc.)
441
+ const skipHeaders = new Set(['host', 'connection', 'content-length', 'content-type', 'accept', 'authorization', 'x-api-key', 'anthropic-version']);
442
+ const headers = {};
443
+ for (const [key, val] of Object.entries(req.headers)) {
444
+ if (!skipHeaders.has(key.toLowerCase())) headers[key] = val;
445
+ }
446
+ headers['Content-Type'] = 'application/json';
447
+ headers['Accept'] = isStream ? 'text/event-stream' : 'application/json';
448
+
449
+ const maxKeyRetries = (candidate.apiKeys || []).filter(k => typeof k === 'object' ? k.enabled !== false : true).length || 1;
450
+ let lastKeyError = null;
451
+
452
+ for (let keyAttempt = 0; keyAttempt < maxKeyRetries; keyAttempt++) {
453
+ const currentKey = selectKey(candidate.providerId, candidate.apiKeys || []);
454
+ const keyHeaders = { ...headers };
455
+
456
+ if (candidate.protocol === 'openai') {
457
+ if (isAzure) keyHeaders['api-key'] = currentKey;
458
+ else keyHeaders['Authorization'] = `Bearer ${currentKey}`;
459
+ } else if (candidate.protocol === 'gemini') {
460
+ keyHeaders['x-goog-api-key'] = currentKey;
461
+ } else if (candidate.protocol === 'anthropic') {
462
+ keyHeaders['X-Api-Key'] = currentKey;
463
+ keyHeaders['anthropic-version'] = keyHeaders['anthropic-version'] || '2023-06-01';
464
+ keyHeaders['Authorization'] = `Bearer ${currentKey}`;
465
+ }
466
+
467
+ const startedAt = Date.now();
468
+
469
+ try {
470
+ const fetchRes = await fetch(targetUrl, {
471
+ method: 'POST',
472
+ headers: keyHeaders,
473
+ body: JSON.stringify(targetBody),
474
+ signal: AbortSignal.timeout(300000),
475
+ });
476
+
477
+ if (!fetchRes.ok) {
478
+ const errBody = await fetchRes.text();
479
+ const error = Object.assign(new Error(errBody.slice(0, 500) || `HTTP ${fetchRes.status}`), { status: fetchRes.status });
480
+ // 429: mark key cooldown and retry with next key
481
+ if (fetchRes.status === 429 && maxKeyRetries > 1) {
482
+ markKeyCooldown(candidate.providerId, currentKey);
483
+ lastKeyError = error;
484
+ logger.log(`[${requestId}] 429 on key ${currentKey.slice(0, 8)}..., trying next key`);
485
+ continue;
486
+ }
487
+ if (isRetryableStatus(fetchRes.status)) {
488
+ throw error;
489
+ }
490
+ return res.status(fetchRes.status).json({ error: error.message });
491
+ }
492
+
493
+ recordSuccess(proxyId, candidate.providerId, Date.now() - startedAt);
494
+ const keyEntry = (candidate.apiKeys || []).find(k => (typeof k === 'string' ? k : k.key) === currentKey);
495
+ const alias = keyEntry && typeof keyEntry === 'object' ? keyEntry.alias : '';
496
+ const keyLabel = alias ? `${alias}(…${currentKey.slice(-4)})` : (currentKey ? `…${currentKey.slice(-4)}` : '-');
497
+ logger.log(`[${requestId}] ✓ ${candidate.providerName} | model=${candidateModel || '(default)'} key=${keyLabel} (${Date.now() - startedAt}ms)`);
498
+
499
+ if (isStream) {
500
+ res.setHeader('Content-Type', 'text/event-stream');
501
+ res.setHeader('Cache-Control', 'no-cache');
502
+ res.setHeader('Connection', 'keep-alive');
503
+
504
+ const sseConverter = createSSEConv ? createSSEConv(effectiveModel) : null;
505
+ const reader = fetchRes.body.getReader();
506
+ const decoder = new TextDecoder();
507
+ let streamUsage = null;
508
+ let responseText = '';
509
+ let toolCallCount = 0;
510
+
511
+ req.on('close', () => {
512
+ try { reader.cancel(); } catch { /* ignore */ }
513
+ });
514
+
515
+ try {
516
+ while (true) {
517
+ const { done, value } = await reader.read();
518
+ if (done) break;
519
+ const chunk = decoder.decode(value, { stream: true });
520
+ for (const line of chunk.split('\n')) {
521
+ const trimmed = line.trim();
522
+ if (!trimmed.startsWith('data:') || trimmed === 'data: [DONE]') continue;
523
+ try {
524
+ const d = JSON.parse(trimmed.slice(5).trim());
525
+ if (d.usage) streamUsage = d.usage;
526
+ const delta = d.choices?.[0]?.delta;
527
+ if (delta?.content) responseText += delta.content;
528
+ if (delta?.tool_calls) {
529
+ for (const tc of delta.tool_calls) {
530
+ if (tc.function?.name) toolCallCount++;
531
+ }
532
+ }
533
+ } catch { /* ignore */ }
534
+ }
535
+
536
+ if (sseConverter) {
537
+ const converted = sseConverter.convertChunk(chunk);
538
+ if (converted) res.write(converted);
539
+ } else {
540
+ res.write(chunk);
541
+ }
542
+ }
543
+
544
+ if (streamUsage) {
545
+ recordUsage(proxyConfig.id, candidate.providerName, candidateModel, streamUsage, false);
546
+ } else if (responseText || toolCallCount > 0) {
547
+ const inputTokens = estimateInputTokens(req.body);
548
+ const outputTokens = estimateTokens(responseText) + toolCallCount * 15;
549
+ recordUsage(proxyConfig.id, candidate.providerName, candidateModel, {
550
+ prompt_tokens: inputTokens,
551
+ completion_tokens: outputTokens,
552
+ }, true);
553
+ }
554
+
555
+ if (sseConverter) {
556
+ const flushed = sseConverter.flush();
557
+ if (flushed) res.write(flushed);
558
+ }
559
+ } catch (err) {
560
+ recordFailure(proxyId, candidate.providerId);
561
+ logger.error(`[${requestId}] Stream error:`, err.message);
562
+ if (!res.writableEnded) {
563
+ try {
564
+ res.write(`data: ${JSON.stringify({ error: { message: err.message, type: 'proxy_error' } })}\n\n`);
565
+ } catch { /* ignore */ }
566
+ }
567
+ } finally {
568
+ res.end();
569
+ }
570
+ return;
571
+ }
572
+
573
+ const responseBody = await fetchRes.json();
574
+ extractReasoningFromResponse(responseBody);
575
+ extractAnthropicThinking(responseBody);
576
+ recordUsage(proxyConfig.id, candidate.providerName, candidateModel, responseBody.usage);
577
+ const convertedBody = convertRes(responseBody);
578
+ return res.json(convertedBody);
579
+ } catch (err) {
580
+ // 429 already handled by key retry loop above
581
+ if (err?.status === 429 && maxKeyRetries > 1) {
582
+ lastKeyError = err;
583
+ continue; // retry with next key
584
+ }
585
+ recordFailure(proxyId, candidate.providerId);
586
+ logger.error(`[${requestId}] ✗ ${candidate.providerName} | model=${candidateModel || '(default)'} - ${err.message}`);
587
+ if (err?.status && !isRetryableStatus(err.status)) {
588
+ return res.status(err.status).json({ error: err.message });
589
+ }
590
+ break; // break key retry loop, continue to next candidate
591
+ }
592
+ break; // success, exit key retry loop
593
+ } // end key retry loop
594
+
595
+ // All keys exhausted with 429 — trigger circuit breaker
596
+ if (lastKeyError) {
597
+ recordFailure(proxyId, candidate.providerId);
598
+ logger.error(`[${requestId}] ✗ ${candidate.providerName} | all keys rate-limited (429)`);
599
+ }
600
+ } // end candidate loop
601
+
602
+ logger.error(`[${requestId}] 所有供应商均失败`);
603
+ return res.status(502).json({ error: 'All providers failed' });
604
+ }
605
+
606
+ return app;
607
+ }
608
+
609
+ function buildTargetUrl(target, originalPath, isStream, effectiveModel) {
610
+ const base = target.providerUrl.replace(/\/$/, '');
611
+ const hasV1Suffix = base.endsWith('/v1');
612
+
613
+ if (target.protocol === 'openai') {
614
+ if (target.azureDeployment) {
615
+ const ver = target.azureApiVersion || '2024-02-01';
616
+ return `${base}/openai/deployments/${target.azureDeployment}/chat/completions?api-version=${ver}`;
617
+ }
618
+ if (hasV1Suffix) return `${base}/chat/completions`;
619
+ return `${base}/v1/chat/completions`;
620
+ }
621
+
622
+ if (target.protocol === 'anthropic') {
623
+ if (hasV1Suffix) return `${base}/messages`;
624
+ return `${base}/v1/messages`;
625
+ }
626
+
627
+ if (target.protocol === 'gemini') {
628
+ const model = effectiveModel || 'gemini-pro';
629
+ const action = isStream ? 'streamGenerateContent?alt=sse' : 'generateContent';
630
+ return `${base}/v1beta/models/${model}:${action}`;
631
+ }
632
+
633
+ return base + originalPath;
634
+ }
635
+
636
+ module.exports = { createProxyApp };