free-coding-models 0.3.11 → 0.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +112 -1134
- package/bin/free-coding-models.js +13 -167
- package/package.json +2 -3
- package/src/cli-help.js +0 -18
- package/src/config.js +5 -117
- package/src/endpoint-installer.js +26 -64
- package/src/key-handler.js +56 -437
- package/src/legacy-proxy-cleanup.js +432 -0
- package/src/openclaw.js +69 -108
- package/src/opencode-config.js +48 -0
- package/src/opencode.js +6 -248
- package/src/overlays.js +23 -517
- package/src/product-flags.js +14 -0
- package/src/render-helpers.js +2 -34
- package/src/render-table.js +10 -18
- package/src/testfcm.js +90 -43
- package/src/token-usage-reader.js +9 -38
- package/src/tool-launchers.js +235 -409
- package/src/tool-metadata.js +0 -7
- package/src/utils.js +3 -68
- package/bin/fcm-proxy-daemon.js +0 -242
- package/src/account-manager.js +0 -634
- package/src/anthropic-translator.js +0 -440
- package/src/daemon-manager.js +0 -527
- package/src/error-classifier.js +0 -157
- package/src/log-reader.js +0 -195
- package/src/opencode-sync.js +0 -200
- package/src/proxy-foreground.js +0 -234
- package/src/proxy-server.js +0 -1506
- package/src/proxy-sync.js +0 -591
- package/src/proxy-topology.js +0 -85
- package/src/request-transformer.js +0 -180
- package/src/responses-translator.js +0 -423
- package/src/token-stats.js +0 -320
package/src/proxy-server.js
DELETED
|
@@ -1,1506 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @file lib/proxy-server.js
|
|
3
|
-
* @description Multi-account rotation proxy server with SSE streaming,
|
|
4
|
-
* token stats tracking, Anthropic/OpenAI translation, and persistent request logging.
|
|
5
|
-
*
|
|
6
|
-
* Design:
|
|
7
|
-
* - Binds to 127.0.0.1 only (never 0.0.0.0)
|
|
8
|
-
* - SSE is piped through without buffering (upstreamRes.pipe(clientRes))
|
|
9
|
-
* - HTTP/HTTPS module is chosen BEFORE the request is created (single code-path)
|
|
10
|
-
* - x-ratelimit-* headers are stripped from all responses forwarded to clients
|
|
11
|
-
* - Retry loop: first attempt uses sticky session fingerprint; subsequent
|
|
12
|
-
* retries use fresh P2C to avoid hitting the same failed account
|
|
13
|
-
* - Claude-family aliases are resolved inside the proxy so Claude Code can
|
|
14
|
-
* keep emitting `claude-*` / `sonnet` / `haiku` style model ids safely
|
|
15
|
-
*
|
|
16
|
-
* @exports ProxyServer
|
|
17
|
-
*/
|
|
18
|
-
|
|
19
|
-
import http from 'node:http'
|
|
20
|
-
import https from 'node:https'
|
|
21
|
-
import { AccountManager } from './account-manager.js'
|
|
22
|
-
import { classifyError } from './error-classifier.js'
|
|
23
|
-
import { applyThinkingBudget, compressContext } from './request-transformer.js'
|
|
24
|
-
import { TokenStats } from './token-stats.js'
|
|
25
|
-
import { createHash } from 'node:crypto'
|
|
26
|
-
import {
|
|
27
|
-
translateAnthropicToOpenAI,
|
|
28
|
-
translateOpenAIToAnthropic,
|
|
29
|
-
createAnthropicSSETransformer,
|
|
30
|
-
estimateAnthropicTokens,
|
|
31
|
-
} from './anthropic-translator.js'
|
|
32
|
-
import {
|
|
33
|
-
translateResponsesToOpenAI,
|
|
34
|
-
translateOpenAIToResponses,
|
|
35
|
-
createResponsesSSETransformer,
|
|
36
|
-
} from './responses-translator.js'
|
|
37
|
-
|
|
38
|
-
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
39
|
-
|
|
40
|
-
/**
|
|
41
|
-
* Choose the http or https module based on the URL scheme.
|
|
42
|
-
* MUST be called before creating the request (single code-path).
|
|
43
|
-
*
|
|
44
|
-
* @param {string} url
|
|
45
|
-
* @returns {typeof import('http') | typeof import('https')}
|
|
46
|
-
*/
|
|
47
|
-
function selectClient(url) {
|
|
48
|
-
return url.startsWith('https') ? https : http
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Return a copy of the headers object with all x-ratelimit-* entries removed.
|
|
53
|
-
*
|
|
54
|
-
* @param {Record<string, string | string[]>} headers
|
|
55
|
-
* @returns {Record<string, string | string[]>}
|
|
56
|
-
*/
|
|
57
|
-
function stripRateLimitHeaders(headers) {
|
|
58
|
-
const result = {}
|
|
59
|
-
for (const [key, value] of Object.entries(headers)) {
|
|
60
|
-
if (!key.toLowerCase().startsWith('x-ratelimit')) {
|
|
61
|
-
result[key] = value
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
return result
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
// 📖 Max body size limit to prevent memory exhaustion attacks (10 MB)
|
|
68
|
-
const MAX_BODY_SIZE = 10 * 1024 * 1024
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Buffer all chunks from an http.IncomingMessage and return the body as a string.
|
|
72
|
-
* Enforces a size limit to prevent memory exhaustion from oversized payloads.
|
|
73
|
-
*
|
|
74
|
-
* @param {http.IncomingMessage} req
|
|
75
|
-
* @returns {Promise<string>}
|
|
76
|
-
* @throws {Error} with statusCode 413 if body exceeds MAX_BODY_SIZE
|
|
77
|
-
*/
|
|
78
|
-
function readBody(req) {
|
|
79
|
-
return new Promise((resolve, reject) => {
|
|
80
|
-
const chunks = []
|
|
81
|
-
let totalSize = 0
|
|
82
|
-
req.on('data', chunk => {
|
|
83
|
-
totalSize += chunk.length
|
|
84
|
-
if (totalSize > MAX_BODY_SIZE) {
|
|
85
|
-
req.destroy()
|
|
86
|
-
const err = new Error('Request body too large')
|
|
87
|
-
err.statusCode = 413
|
|
88
|
-
return reject(err)
|
|
89
|
-
}
|
|
90
|
-
chunks.push(chunk)
|
|
91
|
-
})
|
|
92
|
-
req.on('end', () => resolve(Buffer.concat(chunks).toString()))
|
|
93
|
-
req.on('error', reject)
|
|
94
|
-
})
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
/**
|
|
98
|
-
* Write a JSON (or pre-serialised) response to the client.
|
|
99
|
-
*
|
|
100
|
-
* @param {http.ServerResponse} res
|
|
101
|
-
* @param {number} statusCode
|
|
102
|
-
* @param {object | string} body
|
|
103
|
-
*/
|
|
104
|
-
function sendJson(res, statusCode, body) {
|
|
105
|
-
if (res.headersSent) return
|
|
106
|
-
const json = typeof body === 'string' ? body : JSON.stringify(body)
|
|
107
|
-
res.writeHead(statusCode, { 'content-type': 'application/json' })
|
|
108
|
-
res.end(json)
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/**
|
|
112
|
-
* 📖 Match routes on the URL pathname only so Claude Code's `?beta=true`
|
|
113
|
-
* 📖 Anthropic requests resolve exactly like FastAPI routes do in free-claude-code.
|
|
114
|
-
*
|
|
115
|
-
* @param {http.IncomingMessage} req
|
|
116
|
-
* @returns {string}
|
|
117
|
-
*/
|
|
118
|
-
function getRequestPathname(req) {
|
|
119
|
-
try {
|
|
120
|
-
return new URL(req.url || '/', 'http://127.0.0.1').pathname || '/'
|
|
121
|
-
} catch {
|
|
122
|
-
return req.url || '/'
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
function normalizeRequestedModel(modelId) {
|
|
127
|
-
if (typeof modelId !== 'string') return null
|
|
128
|
-
const trimmed = modelId.trim()
|
|
129
|
-
if (!trimmed) return null
|
|
130
|
-
return trimmed.replace(/^fcm-proxy\//, '')
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
function normalizeAnthropicRouting(anthropicRouting = null) {
|
|
134
|
-
return {
|
|
135
|
-
model: normalizeRequestedModel(anthropicRouting?.model),
|
|
136
|
-
modelOpus: normalizeRequestedModel(anthropicRouting?.modelOpus),
|
|
137
|
-
modelSonnet: normalizeRequestedModel(anthropicRouting?.modelSonnet),
|
|
138
|
-
modelHaiku: normalizeRequestedModel(anthropicRouting?.modelHaiku),
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
function classifyClaudeVirtualModel(modelId) {
|
|
143
|
-
const normalized = normalizeRequestedModel(modelId)
|
|
144
|
-
if (!normalized) return null
|
|
145
|
-
|
|
146
|
-
const lower = normalized.toLowerCase()
|
|
147
|
-
|
|
148
|
-
// 📖 Mirror free-claude-code's family routing approach: classify by Claude
|
|
149
|
-
// 📖 family keywords, not only exact ids. Claude Code regularly emits both
|
|
150
|
-
// 📖 short aliases (`sonnet`) and full versioned ids (`claude-3-5-sonnet-*`).
|
|
151
|
-
if (lower === 'default') return 'default'
|
|
152
|
-
if (/^opus(?:plan)?(?:\[1m\])?$/.test(lower)) return 'opus'
|
|
153
|
-
if (/^sonnet(?:\[1m\])?$/.test(lower)) return 'sonnet'
|
|
154
|
-
if (lower === 'haiku') return 'haiku'
|
|
155
|
-
if (!lower.startsWith('claude-')) return null
|
|
156
|
-
if (lower.includes('opus')) return 'opus'
|
|
157
|
-
if (lower.includes('haiku')) return 'haiku'
|
|
158
|
-
if (lower.includes('sonnet')) return 'sonnet'
|
|
159
|
-
return null
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
function resolveAnthropicMappedModel(modelId, anthropicRouting) {
|
|
163
|
-
const routing = normalizeAnthropicRouting(anthropicRouting)
|
|
164
|
-
const fallbackModel = routing.model
|
|
165
|
-
if (!fallbackModel && !routing.modelOpus && !routing.modelSonnet && !routing.modelHaiku) {
|
|
166
|
-
return null
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
const family = classifyClaudeVirtualModel(modelId)
|
|
170
|
-
if (family === 'opus') return routing.modelOpus || fallbackModel
|
|
171
|
-
if (family === 'sonnet') return routing.modelSonnet || fallbackModel
|
|
172
|
-
if (family === 'haiku') return routing.modelHaiku || fallbackModel
|
|
173
|
-
|
|
174
|
-
// 📖 free-claude-code falls back to MODEL for unknown Claude ids too.
|
|
175
|
-
return fallbackModel
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
// 📖 Accepts both standard Bearer auth and Anthropic SDK x-api-key header
|
|
179
|
-
// 📖 Claude Code sends credentials via x-api-key, not Authorization: Bearer
|
|
180
|
-
function parseProxyAuthorizationHeader(authorization, expectedToken, xApiKey = null) {
|
|
181
|
-
if (!expectedToken) return { authorized: true, modelHint: null }
|
|
182
|
-
|
|
183
|
-
// 📖 Check standard Bearer auth first
|
|
184
|
-
if (typeof authorization === 'string' && authorization.startsWith('Bearer ')) {
|
|
185
|
-
const rawToken = authorization.slice('Bearer '.length).trim()
|
|
186
|
-
if (rawToken === expectedToken) return { authorized: true, modelHint: null }
|
|
187
|
-
if (rawToken.startsWith(`${expectedToken}:`)) {
|
|
188
|
-
const modelHint = normalizeRequestedModel(rawToken.slice(expectedToken.length + 1))
|
|
189
|
-
return modelHint ? { authorized: true, modelHint } : { authorized: false, modelHint: null }
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
// 📖 Fallback: Anthropic SDK x-api-key header
|
|
194
|
-
if (typeof xApiKey === 'string' && xApiKey.trim()) {
|
|
195
|
-
const trimmed = xApiKey.trim()
|
|
196
|
-
if (trimmed === expectedToken) return { authorized: true, modelHint: null }
|
|
197
|
-
if (trimmed.startsWith(`${expectedToken}:`)) {
|
|
198
|
-
const modelHint = normalizeRequestedModel(trimmed.slice(expectedToken.length + 1))
|
|
199
|
-
return modelHint ? { authorized: true, modelHint } : { authorized: false, modelHint: null }
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
// 📖 Accept real Anthropic API keys (sk-ant-*) — Claude Code uses its own stored key
|
|
204
|
-
// 📖 even when ANTHROPIC_BASE_URL is overridden to point at the proxy.
|
|
205
|
-
// 📖 The proxy is bound to 127.0.0.1 only, so accepting these keys is safe.
|
|
206
|
-
const candidateToken = (typeof authorization === 'string' && authorization.startsWith('Bearer '))
|
|
207
|
-
? authorization.slice('Bearer '.length).trim()
|
|
208
|
-
: (typeof xApiKey === 'string' ? xApiKey.trim() : '')
|
|
209
|
-
if (candidateToken.startsWith('sk-ant-')) {
|
|
210
|
-
return { authorized: true, modelHint: null }
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
return { authorized: false, modelHint: null }
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
// ─── ProxyServer ─────────────────────────────────────────────────────────────
|
|
217
|
-
|
|
218
|
-
export class ProxyServer {
|
|
219
|
-
/**
|
|
220
|
-
* @param {{
|
|
221
|
-
* port?: number,
|
|
222
|
-
* accounts?: Array<{ id: string, providerKey: string, apiKey: string, modelId: string, url: string }>,
|
|
223
|
-
* retries?: number,
|
|
224
|
-
* proxyApiKey?: string,
|
|
225
|
-
* anthropicRouting?: { model?: string|null, modelOpus?: string|null, modelSonnet?: string|null, modelHaiku?: string|null },
|
|
226
|
-
* accountManagerOpts?: object,
|
|
227
|
-
* tokenStatsOpts?: object,
|
|
228
|
-
* thinkingConfig?: { mode: string, budget_tokens?: number },
|
|
229
|
-
* compressionOpts?: { level?: number, toolResultMaxChars?: number, thinkingMaxChars?: number, maxTotalChars?: number },
|
|
230
|
-
* upstreamTimeoutMs?: number
|
|
231
|
-
* }} opts
|
|
232
|
-
*/
|
|
233
|
-
constructor({
|
|
234
|
-
port = 0,
|
|
235
|
-
accounts = [],
|
|
236
|
-
retries = 8,
|
|
237
|
-
proxyApiKey = null,
|
|
238
|
-
anthropicRouting = null,
|
|
239
|
-
accountManagerOpts = {},
|
|
240
|
-
tokenStatsOpts = {},
|
|
241
|
-
thinkingConfig,
|
|
242
|
-
compressionOpts,
|
|
243
|
-
upstreamTimeoutMs = 45_000,
|
|
244
|
-
} = {}) {
|
|
245
|
-
this._port = port
|
|
246
|
-
this._retries = retries
|
|
247
|
-
this._thinkingConfig = thinkingConfig
|
|
248
|
-
this._compressionOpts = compressionOpts
|
|
249
|
-
this._proxyApiKey = proxyApiKey
|
|
250
|
-
this._anthropicRouting = normalizeAnthropicRouting(anthropicRouting)
|
|
251
|
-
this._accounts = accounts
|
|
252
|
-
this._upstreamTimeoutMs = upstreamTimeoutMs
|
|
253
|
-
// 📖 Progressive backoff delays (ms) for retries — first attempt is immediate,
|
|
254
|
-
// subsequent ones add increasing delay + random jitter (0-100ms) to avoid
|
|
255
|
-
// re-hitting the same rate-limit window on 429s from providers
|
|
256
|
-
this._retryDelays = [0, 300, 800]
|
|
257
|
-
this._accountManager = new AccountManager(accounts, accountManagerOpts)
|
|
258
|
-
this._tokenStats = new TokenStats(tokenStatsOpts)
|
|
259
|
-
this._startTime = Date.now()
|
|
260
|
-
this._running = false
|
|
261
|
-
this._listeningPort = null
|
|
262
|
-
this._server = http.createServer((req, res) => this._handleRequest(req, res))
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
/**
|
|
266
|
-
* Start listening on 127.0.0.1.
|
|
267
|
-
*
|
|
268
|
-
* @returns {Promise<{ port: number }>}
|
|
269
|
-
*/
|
|
270
|
-
start() {
|
|
271
|
-
return new Promise((resolve, reject) => {
|
|
272
|
-
this._server.once('error', reject)
|
|
273
|
-
this._server.listen(this._port, '127.0.0.1', () => {
|
|
274
|
-
this._server.removeListener('error', reject)
|
|
275
|
-
this._running = true
|
|
276
|
-
this._listeningPort = this._server.address().port
|
|
277
|
-
resolve({ port: this._listeningPort })
|
|
278
|
-
})
|
|
279
|
-
})
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
/**
|
|
283
|
-
* Save stats and close the server.
|
|
284
|
-
*
|
|
285
|
-
* @returns {Promise<void>}
|
|
286
|
-
*/
|
|
287
|
-
stop() {
|
|
288
|
-
this._tokenStats.save()
|
|
289
|
-
return new Promise(resolve => {
|
|
290
|
-
this._server.close(() => {
|
|
291
|
-
this._running = false
|
|
292
|
-
this._listeningPort = null
|
|
293
|
-
resolve()
|
|
294
|
-
})
|
|
295
|
-
})
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
getStatus() {
|
|
299
|
-
return {
|
|
300
|
-
running: this._running,
|
|
301
|
-
port: this._listeningPort,
|
|
302
|
-
accountCount: this._accounts.length,
|
|
303
|
-
healthByAccount: this._accountManager.getAllHealth(),
|
|
304
|
-
anthropicRouting: this._anthropicRouting,
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
_getAuthContext(req) {
|
|
309
|
-
return parseProxyAuthorizationHeader(req.headers.authorization, this._proxyApiKey, req.headers['x-api-key'])
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
_isAuthorized(req) {
|
|
313
|
-
return this._getAuthContext(req).authorized
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
_resolveAnthropicRequestedModel(modelId, authModelHint = null) {
|
|
317
|
-
const requestedModel = normalizeRequestedModel(modelId)
|
|
318
|
-
if (requestedModel && this._accountManager.hasAccountsForModel(requestedModel)) {
|
|
319
|
-
return requestedModel
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
const mappedModel = resolveAnthropicMappedModel(requestedModel, this._anthropicRouting)
|
|
323
|
-
if (mappedModel && this._accountManager.hasAccountsForModel(mappedModel)) {
|
|
324
|
-
return mappedModel
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
// 📖 Claude Code still emits internal aliases / tier model ids for some
|
|
328
|
-
// 📖 background and helper paths. Keep the old auth-token hint as a final
|
|
329
|
-
// 📖 compatibility fallback for already-launched sessions, but the primary
|
|
330
|
-
// 📖 routing path is now the free-claude-code style proxy-side mapping above.
|
|
331
|
-
if (authModelHint && this._accountManager.hasAccountsForModel(authModelHint)) {
|
|
332
|
-
if (!requestedModel || classifyClaudeVirtualModel(requestedModel) || requestedModel.toLowerCase().startsWith('claude-')) {
|
|
333
|
-
return authModelHint
|
|
334
|
-
}
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
// 📖 Last resort: when the requested model is a Claude virtual model and no routing resolved,
|
|
338
|
-
// 📖 fall back to the first available account's model (free-claude-code behavior)
|
|
339
|
-
if (!requestedModel || classifyClaudeVirtualModel(requestedModel) || requestedModel.toLowerCase().startsWith('claude-')) {
|
|
340
|
-
const firstModel = this._accounts[0]?.modelId
|
|
341
|
-
if (firstModel) return firstModel
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
return requestedModel
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
// ── Request routing ────────────────────────────────────────────────────────
|
|
348
|
-
|
|
349
|
-
_handleRequest(req, res) {
|
|
350
|
-
const pathname = getRequestPathname(req)
|
|
351
|
-
|
|
352
|
-
// 📖 Root endpoint is unauthenticated so a browser hit on http://127.0.0.1:{port}/
|
|
353
|
-
// 📖 gives a useful status payload instead of a misleading Unauthorized error.
|
|
354
|
-
if (req.method === 'GET' && pathname === '/') {
|
|
355
|
-
return this._handleRoot(res)
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
// 📖 Health endpoint is unauthenticated so external monitors can probe it
|
|
359
|
-
if (req.method === 'GET' && pathname === '/v1/health') {
|
|
360
|
-
return this._handleHealth(res)
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
const authContext = this._getAuthContext(req)
|
|
364
|
-
if (!authContext.authorized) {
|
|
365
|
-
return sendJson(res, 401, { error: 'Unauthorized' })
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
if (req.method === 'GET' && pathname === '/v1/models') {
|
|
369
|
-
this._handleModels(res)
|
|
370
|
-
} else if (req.method === 'GET' && pathname === '/v1/stats') {
|
|
371
|
-
this._handleStats(res)
|
|
372
|
-
} else if (req.method === 'POST' && pathname === '/v1/chat/completions') {
|
|
373
|
-
this._handleChatCompletions(req, res).catch(err => {
|
|
374
|
-
console.error('[proxy] Internal error:', err)
|
|
375
|
-
// 📖 Return 413 for body-too-large, generic 500 for everything else — never leak stack traces
|
|
376
|
-
const status = err.statusCode === 413 ? 413 : 500
|
|
377
|
-
const msg = err.statusCode === 413 ? 'Request body too large' : 'Internal server error'
|
|
378
|
-
sendJson(res, status, { error: msg })
|
|
379
|
-
})
|
|
380
|
-
} else if (req.method === 'POST' && pathname === '/v1/messages') {
|
|
381
|
-
// 📖 Anthropic Messages API translation — enables Claude Code compatibility
|
|
382
|
-
this._handleAnthropicMessages(req, res, authContext).catch(err => {
|
|
383
|
-
console.error('[proxy] Internal error:', err)
|
|
384
|
-
const status = err.statusCode === 413 ? 413 : 500
|
|
385
|
-
const msg = err.statusCode === 413 ? 'Request body too large' : 'Internal server error'
|
|
386
|
-
sendJson(res, status, { error: msg })
|
|
387
|
-
})
|
|
388
|
-
} else if (req.method === 'POST' && pathname === '/v1/messages/count_tokens') {
|
|
389
|
-
this._handleAnthropicCountTokens(req, res).catch(err => {
|
|
390
|
-
console.error('[proxy] Internal error:', err)
|
|
391
|
-
const status = err.statusCode === 413 ? 413 : 500
|
|
392
|
-
const msg = err.statusCode === 413 ? 'Request body too large' : 'Internal server error'
|
|
393
|
-
sendJson(res, status, { error: msg })
|
|
394
|
-
})
|
|
395
|
-
} else if (req.method === 'POST' && pathname === '/v1/responses') {
|
|
396
|
-
this._handleResponses(req, res).catch(err => {
|
|
397
|
-
console.error('[proxy] Internal error:', err)
|
|
398
|
-
const status = err.statusCode === 413 ? 413 : 500
|
|
399
|
-
const msg = err.statusCode === 413 ? 'Request body too large' : 'Internal server error'
|
|
400
|
-
sendJson(res, status, { error: msg })
|
|
401
|
-
})
|
|
402
|
-
} else if (req.method === 'POST' && pathname === '/v1/completions') {
|
|
403
|
-
// These legacy/alternative OpenAI endpoints are not supported by the proxy.
|
|
404
|
-
// Return 501 (not 404) so callers get a clear signal instead of silently failing.
|
|
405
|
-
sendJson(res, 501, {
|
|
406
|
-
error: 'Not Implemented',
|
|
407
|
-
message: `${pathname} is not supported by this proxy. Use POST /v1/chat/completions instead.`,
|
|
408
|
-
})
|
|
409
|
-
} else {
|
|
410
|
-
sendJson(res, 404, { error: 'Not found' })
|
|
411
|
-
}
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
// ── GET /v1/models ─────────────────────────────────────────────────────────
|
|
415
|
-
|
|
416
|
-
_handleModels(res) {
|
|
417
|
-
const seen = new Set()
|
|
418
|
-
const data = []
|
|
419
|
-
const models = []
|
|
420
|
-
for (const acct of this._accounts) {
|
|
421
|
-
const publicModelId = acct.proxyModelId || acct.modelId
|
|
422
|
-
if (!seen.has(publicModelId)) {
|
|
423
|
-
seen.add(publicModelId)
|
|
424
|
-
const modelEntry = {
|
|
425
|
-
id: publicModelId,
|
|
426
|
-
slug: publicModelId,
|
|
427
|
-
name: publicModelId,
|
|
428
|
-
object: 'model',
|
|
429
|
-
created: Math.floor(Date.now() / 1000),
|
|
430
|
-
owned_by: 'proxy',
|
|
431
|
-
}
|
|
432
|
-
data.push(modelEntry)
|
|
433
|
-
models.push(modelEntry)
|
|
434
|
-
}
|
|
435
|
-
}
|
|
436
|
-
sendJson(res, 200, { object: 'list', data, models })
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
// ── POST /v1/chat/completions ──────────────────────────────────────────────
|
|
440
|
-
|
|
441
|
-
async _handleChatCompletions(clientReq, clientRes) {
|
|
442
|
-
// 1. Read and parse request body
|
|
443
|
-
const rawBody = await readBody(clientReq)
|
|
444
|
-
let body
|
|
445
|
-
try {
|
|
446
|
-
body = JSON.parse(rawBody)
|
|
447
|
-
} catch {
|
|
448
|
-
return sendJson(clientRes, 400, { error: 'Invalid JSON body' })
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
// 2. Optional transformations (both functions return new objects, no mutation)
|
|
452
|
-
if (this._compressionOpts && Array.isArray(body.messages)) {
|
|
453
|
-
body = { ...body, messages: compressContext(body.messages, this._compressionOpts) }
|
|
454
|
-
}
|
|
455
|
-
if (this._thinkingConfig) {
|
|
456
|
-
body = applyThinkingBudget(body, this._thinkingConfig)
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
// 3. Session fingerprint for first-attempt sticky routing
|
|
460
|
-
const fingerprint = createHash('sha256')
|
|
461
|
-
.update(JSON.stringify(body.messages?.slice(-1) ?? []))
|
|
462
|
-
.digest('hex')
|
|
463
|
-
.slice(0, 16)
|
|
464
|
-
|
|
465
|
-
const requestedModel = typeof body.model === 'string'
|
|
466
|
-
? body.model.replace(/^fcm-proxy\//, '')
|
|
467
|
-
: undefined
|
|
468
|
-
|
|
469
|
-
// 4. Early check: if a specific model is requested but has no registered accounts,
|
|
470
|
-
// return 404 immediately with a clear message rather than silently failing.
|
|
471
|
-
if (requestedModel && !this._accountManager.hasAccountsForModel(requestedModel)) {
|
|
472
|
-
return sendJson(clientRes, 404, {
|
|
473
|
-
error: 'Model not found',
|
|
474
|
-
message: `Model '${requestedModel}' is not available through this proxy. Use GET /v1/models to list available models.`,
|
|
475
|
-
})
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
const formatSwitchReason = (classified) => {
|
|
479
|
-
switch (classified?.type) {
|
|
480
|
-
case 'QUOTA_EXHAUSTED':
|
|
481
|
-
return 'quota'
|
|
482
|
-
case 'RATE_LIMITED':
|
|
483
|
-
return '429'
|
|
484
|
-
case 'MODEL_NOT_FOUND':
|
|
485
|
-
return '404'
|
|
486
|
-
case 'MODEL_CAPACITY':
|
|
487
|
-
return 'capacity'
|
|
488
|
-
case 'SERVER_ERROR':
|
|
489
|
-
return '5xx'
|
|
490
|
-
case 'NETWORK_ERROR':
|
|
491
|
-
return 'network'
|
|
492
|
-
default:
|
|
493
|
-
return 'retry'
|
|
494
|
-
}
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
// 5. Retry loop with progressive backoff
|
|
498
|
-
let pendingSwitchReason = null
|
|
499
|
-
let previousAccount = null
|
|
500
|
-
for (let attempt = 0; attempt < this._retries; attempt++) {
|
|
501
|
-
// 📖 Apply backoff delay before retries (first attempt is immediate)
|
|
502
|
-
const delay = this._retryDelays[Math.min(attempt, this._retryDelays.length - 1)]
|
|
503
|
-
if (delay > 0) await new Promise(r => setTimeout(r, delay + Math.random() * 100))
|
|
504
|
-
|
|
505
|
-
// First attempt: respect sticky session.
|
|
506
|
-
// Subsequent retries: fresh P2C (don't hammer the same failed account).
|
|
507
|
-
const selectOpts = attempt === 0
|
|
508
|
-
? { sessionFingerprint: fingerprint, requestedModel }
|
|
509
|
-
: { requestedModel }
|
|
510
|
-
const account = this._accountManager.selectAccount(selectOpts)
|
|
511
|
-
if (!account) break // No available accounts → fall through to 503
|
|
512
|
-
|
|
513
|
-
const result = await this._forwardRequest(account, body, clientRes, {
|
|
514
|
-
requestedModel,
|
|
515
|
-
switched: attempt > 0,
|
|
516
|
-
switchReason: pendingSwitchReason,
|
|
517
|
-
switchedFromProviderKey: previousAccount?.providerKey,
|
|
518
|
-
switchedFromModelId: previousAccount?.modelId,
|
|
519
|
-
})
|
|
520
|
-
|
|
521
|
-
// Response fully sent (success JSON or SSE pipe established)
|
|
522
|
-
if (result.done) return
|
|
523
|
-
|
|
524
|
-
// Error path: classify → record → retry or forward error
|
|
525
|
-
const { statusCode, responseBody, responseHeaders, networkError } = result
|
|
526
|
-
const classified = classifyError(
|
|
527
|
-
networkError ? 0 : statusCode,
|
|
528
|
-
responseBody || '',
|
|
529
|
-
responseHeaders || {}
|
|
530
|
-
)
|
|
531
|
-
|
|
532
|
-
this._accountManager.recordFailure(account.id, classified, { providerKey: account.providerKey })
|
|
533
|
-
if (responseHeaders) {
|
|
534
|
-
const quotaUpdated = this._accountManager.updateQuota(account.id, responseHeaders)
|
|
535
|
-
this._persistQuotaSnapshot(account, quotaUpdated)
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
if (!classified.shouldRetry) {
|
|
539
|
-
// Non-retryable (auth error, unknown) → return upstream response directly
|
|
540
|
-
return sendJson(
|
|
541
|
-
clientRes,
|
|
542
|
-
statusCode || 500,
|
|
543
|
-
responseBody || JSON.stringify({ error: 'Upstream error' })
|
|
544
|
-
)
|
|
545
|
-
}
|
|
546
|
-
// shouldRetry === true → next attempt
|
|
547
|
-
pendingSwitchReason = formatSwitchReason(classified)
|
|
548
|
-
previousAccount = account
|
|
549
|
-
}
|
|
550
|
-
|
|
551
|
-
// All retries consumed, or no accounts available from the start
|
|
552
|
-
sendJson(clientRes, 503, { error: 'All accounts exhausted or unavailable' })
|
|
553
|
-
}
|
|
554
|
-
|
|
555
|
-
// ── Upstream forwarding ────────────────────────────────────────────────────
|
|
556
|
-
|
|
557
|
-
/**
|
|
558
|
-
* Forward one attempt to the upstream API.
|
|
559
|
-
*
|
|
560
|
-
* Resolves with:
|
|
561
|
-
* { done: true }
|
|
562
|
-
* — The response has been committed to clientRes (success JSON sent, or
|
|
563
|
-
* SSE pipe established). The retry loop must return immediately.
|
|
564
|
-
*
|
|
565
|
-
* { done: false, statusCode, responseBody, responseHeaders, networkError }
|
|
566
|
-
* — An error occurred; the retry loop decides whether to retry or give up.
|
|
567
|
-
*
|
|
568
|
-
* @param {{ id: string, apiKey: string, modelId: string, url: string }} account
|
|
569
|
-
* @param {object} body
|
|
570
|
-
* @param {http.ServerResponse} clientRes
|
|
571
|
-
* @param {{ requestedModel?: string, switched?: boolean, switchReason?: string|null, switchedFromProviderKey?: string, switchedFromModelId?: string }} [logContext]
|
|
572
|
-
* @returns {Promise<{ done: boolean }>}
|
|
573
|
-
*/
|
|
574
|
-
_forwardRequest(account, body, clientRes, logContext = {}) {
|
|
575
|
-
return new Promise(resolve => {
|
|
576
|
-
// Replace client-supplied model name with the account's model ID
|
|
577
|
-
const newBody = { ...body, model: account.modelId }
|
|
578
|
-
const bodyStr = JSON.stringify(newBody)
|
|
579
|
-
|
|
580
|
-
// Build the full upstream URL from the account's base URL
|
|
581
|
-
const baseUrl = account.url.replace(/\/$/, '')
|
|
582
|
-
let upstreamUrl
|
|
583
|
-
try {
|
|
584
|
-
upstreamUrl = new URL(baseUrl + '/chat/completions')
|
|
585
|
-
} catch {
|
|
586
|
-
// 📖 Malformed upstream URL — resolve as network error so retry loop can continue
|
|
587
|
-
return resolve({ done: false, statusCode: 0, responseBody: 'Invalid upstream URL', networkError: true })
|
|
588
|
-
}
|
|
589
|
-
|
|
590
|
-
// Choose http or https module BEFORE creating the request
|
|
591
|
-
const client = selectClient(account.url)
|
|
592
|
-
const startTime = Date.now()
|
|
593
|
-
|
|
594
|
-
const requestOptions = {
|
|
595
|
-
hostname: upstreamUrl.hostname,
|
|
596
|
-
port: upstreamUrl.port || (upstreamUrl.protocol === 'https:' ? 443 : 80),
|
|
597
|
-
path: upstreamUrl.pathname + (upstreamUrl.search || ''),
|
|
598
|
-
method: 'POST',
|
|
599
|
-
headers: {
|
|
600
|
-
'authorization': `Bearer ${account.apiKey}`,
|
|
601
|
-
'content-type': 'application/json',
|
|
602
|
-
'content-length': Buffer.byteLength(bodyStr),
|
|
603
|
-
},
|
|
604
|
-
}
|
|
605
|
-
|
|
606
|
-
const upstreamReq = client.request(requestOptions, upstreamRes => {
|
|
607
|
-
const { statusCode } = upstreamRes
|
|
608
|
-
const headers = upstreamRes.headers
|
|
609
|
-
const contentType = headers['content-type'] || ''
|
|
610
|
-
const isSSE = contentType.includes('text/event-stream')
|
|
611
|
-
|
|
612
|
-
if (statusCode >= 200 && statusCode < 300) {
|
|
613
|
-
if (isSSE) {
|
|
614
|
-
// ── SSE passthrough: MUST NOT buffer ──────────────────────────
|
|
615
|
-
const strippedHeaders = stripRateLimitHeaders(headers)
|
|
616
|
-
clientRes.writeHead(statusCode, {
|
|
617
|
-
...strippedHeaders,
|
|
618
|
-
'content-type': 'text/event-stream',
|
|
619
|
-
'cache-control': 'no-cache',
|
|
620
|
-
})
|
|
621
|
-
|
|
622
|
-
// Tap the data stream to capture usage from the last data line.
|
|
623
|
-
// Register BEFORE pipe() so both listeners share the same event queue.
|
|
624
|
-
// 📖 sseLineBuffer persists between chunks to handle lines split across boundaries
|
|
625
|
-
let lastChunkData = ''
|
|
626
|
-
let sseLineBuffer = ''
|
|
627
|
-
upstreamRes.on('data', chunk => {
|
|
628
|
-
sseLineBuffer += chunk.toString()
|
|
629
|
-
const lines = sseLineBuffer.split('\n')
|
|
630
|
-
// 📖 Last element may be an incomplete line — keep it for next chunk
|
|
631
|
-
sseLineBuffer = lines.pop() || ''
|
|
632
|
-
for (const line of lines) {
|
|
633
|
-
if (line.startsWith('data: ') && !line.includes('[DONE]')) {
|
|
634
|
-
lastChunkData = line.slice(6).trim()
|
|
635
|
-
}
|
|
636
|
-
}
|
|
637
|
-
})
|
|
638
|
-
|
|
639
|
-
upstreamRes.on('end', () => {
|
|
640
|
-
let promptTokens = 0
|
|
641
|
-
let completionTokens = 0
|
|
642
|
-
try {
|
|
643
|
-
const parsed = JSON.parse(lastChunkData)
|
|
644
|
-
if (parsed.usage) {
|
|
645
|
-
promptTokens = parsed.usage.prompt_tokens || 0
|
|
646
|
-
completionTokens = parsed.usage.completion_tokens || 0
|
|
647
|
-
}
|
|
648
|
-
} catch { /* no usage in stream — ignore */ }
|
|
649
|
-
// Always record every upstream attempt so the log page shows real requests
|
|
650
|
-
this._tokenStats.record({
|
|
651
|
-
accountId: account.id,
|
|
652
|
-
modelId: account.modelId,
|
|
653
|
-
providerKey: account.providerKey,
|
|
654
|
-
statusCode,
|
|
655
|
-
requestType: 'chat.completions',
|
|
656
|
-
promptTokens,
|
|
657
|
-
completionTokens,
|
|
658
|
-
latencyMs: Date.now() - startTime,
|
|
659
|
-
success: true,
|
|
660
|
-
requestedModelId: logContext.requestedModel,
|
|
661
|
-
switched: logContext.switched === true,
|
|
662
|
-
switchReason: logContext.switchReason,
|
|
663
|
-
switchedFromProviderKey: logContext.switchedFromProviderKey,
|
|
664
|
-
switchedFromModelId: logContext.switchedFromModelId,
|
|
665
|
-
})
|
|
666
|
-
this._accountManager.recordSuccess(account.id, Date.now() - startTime)
|
|
667
|
-
const quotaUpdated = this._accountManager.updateQuota(account.id, headers)
|
|
668
|
-
this._persistQuotaSnapshot(account, quotaUpdated)
|
|
669
|
-
})
|
|
670
|
-
|
|
671
|
-
// 📖 Error handlers on both sides of the pipe to prevent uncaught errors
|
|
672
|
-
upstreamRes.on('error', err => { if (!clientRes.destroyed) clientRes.destroy(err) })
|
|
673
|
-
clientRes.on('error', () => { if (!upstreamRes.destroyed) upstreamRes.destroy() })
|
|
674
|
-
|
|
675
|
-
// Pipe after listeners are registered; upstream → client, no buffering
|
|
676
|
-
upstreamRes.pipe(clientRes)
|
|
677
|
-
|
|
678
|
-
// ── Downstream disconnect cleanup ─────────────────────────────
|
|
679
|
-
// If the client closes its connection mid-stream, destroy the
|
|
680
|
-
// upstream request and response promptly so we don't hold the
|
|
681
|
-
// upstream connection open indefinitely.
|
|
682
|
-
clientRes.on('close', () => {
|
|
683
|
-
if (!upstreamRes.destroyed) upstreamRes.destroy()
|
|
684
|
-
if (!upstreamReq.destroyed) upstreamReq.destroy()
|
|
685
|
-
})
|
|
686
|
-
|
|
687
|
-
// The pipe handles the rest asynchronously; signal done to retry loop
|
|
688
|
-
resolve({ done: true })
|
|
689
|
-
} else {
|
|
690
|
-
// ── JSON response ─────────────────────────────────────────────
|
|
691
|
-
const chunks = []
|
|
692
|
-
upstreamRes.on('data', chunk => chunks.push(chunk))
|
|
693
|
-
upstreamRes.on('end', () => {
|
|
694
|
-
const responseBody = Buffer.concat(chunks).toString()
|
|
695
|
-
const latencyMs = Date.now() - startTime
|
|
696
|
-
|
|
697
|
-
const quotaUpdated = this._accountManager.updateQuota(account.id, headers)
|
|
698
|
-
this._accountManager.recordSuccess(account.id, latencyMs)
|
|
699
|
-
this._persistQuotaSnapshot(account, quotaUpdated)
|
|
700
|
-
|
|
701
|
-
// Always record every upstream attempt so the log page shows real requests.
|
|
702
|
-
// Extract tokens if upstream provides them; default to 0 when not present.
|
|
703
|
-
let promptTokens = 0
|
|
704
|
-
let completionTokens = 0
|
|
705
|
-
try {
|
|
706
|
-
const parsed = JSON.parse(responseBody)
|
|
707
|
-
if (parsed.usage) {
|
|
708
|
-
promptTokens = parsed.usage.prompt_tokens || 0
|
|
709
|
-
completionTokens = parsed.usage.completion_tokens || 0
|
|
710
|
-
}
|
|
711
|
-
} catch { /* non-JSON body — tokens stay 0 */ }
|
|
712
|
-
this._tokenStats.record({
|
|
713
|
-
accountId: account.id,
|
|
714
|
-
modelId: account.modelId,
|
|
715
|
-
providerKey: account.providerKey,
|
|
716
|
-
statusCode,
|
|
717
|
-
requestType: 'chat.completions',
|
|
718
|
-
promptTokens,
|
|
719
|
-
completionTokens,
|
|
720
|
-
latencyMs,
|
|
721
|
-
success: true,
|
|
722
|
-
requestedModelId: logContext.requestedModel,
|
|
723
|
-
switched: logContext.switched === true,
|
|
724
|
-
switchReason: logContext.switchReason,
|
|
725
|
-
switchedFromProviderKey: logContext.switchedFromProviderKey,
|
|
726
|
-
switchedFromModelId: logContext.switchedFromModelId,
|
|
727
|
-
})
|
|
728
|
-
|
|
729
|
-
// Forward stripped response to client
|
|
730
|
-
const strippedHeaders = stripRateLimitHeaders(headers)
|
|
731
|
-
clientRes.writeHead(statusCode, {
|
|
732
|
-
...strippedHeaders,
|
|
733
|
-
'content-type': 'application/json',
|
|
734
|
-
})
|
|
735
|
-
clientRes.end(responseBody)
|
|
736
|
-
resolve({ done: true })
|
|
737
|
-
})
|
|
738
|
-
}
|
|
739
|
-
} else {
|
|
740
|
-
// ── Error response: buffer for classification in retry loop ─────
|
|
741
|
-
const chunks = []
|
|
742
|
-
upstreamRes.on('data', chunk => chunks.push(chunk))
|
|
743
|
-
upstreamRes.on('end', () => {
|
|
744
|
-
const latencyMs = Date.now() - startTime
|
|
745
|
-
// Log every failed upstream attempt so the log page shows real requests
|
|
746
|
-
this._tokenStats.record({
|
|
747
|
-
accountId: account.id,
|
|
748
|
-
modelId: account.modelId,
|
|
749
|
-
providerKey: account.providerKey,
|
|
750
|
-
statusCode,
|
|
751
|
-
requestType: 'chat.completions',
|
|
752
|
-
promptTokens: 0,
|
|
753
|
-
completionTokens: 0,
|
|
754
|
-
latencyMs,
|
|
755
|
-
success: false,
|
|
756
|
-
requestedModelId: logContext.requestedModel,
|
|
757
|
-
switched: logContext.switched === true,
|
|
758
|
-
switchReason: logContext.switchReason,
|
|
759
|
-
switchedFromProviderKey: logContext.switchedFromProviderKey,
|
|
760
|
-
switchedFromModelId: logContext.switchedFromModelId,
|
|
761
|
-
})
|
|
762
|
-
resolve({
|
|
763
|
-
done: false,
|
|
764
|
-
statusCode,
|
|
765
|
-
responseBody: Buffer.concat(chunks).toString(),
|
|
766
|
-
responseHeaders: headers,
|
|
767
|
-
networkError: false,
|
|
768
|
-
})
|
|
769
|
-
})
|
|
770
|
-
}
|
|
771
|
-
})
|
|
772
|
-
|
|
773
|
-
upstreamReq.on('error', err => {
|
|
774
|
-
// TCP / DNS / timeout errors — log as network failure
|
|
775
|
-
const latencyMs = Date.now() - startTime
|
|
776
|
-
this._tokenStats.record({
|
|
777
|
-
accountId: account.id,
|
|
778
|
-
modelId: account.modelId,
|
|
779
|
-
providerKey: account.providerKey,
|
|
780
|
-
statusCode: 0,
|
|
781
|
-
requestType: 'chat.completions',
|
|
782
|
-
promptTokens: 0,
|
|
783
|
-
completionTokens: 0,
|
|
784
|
-
latencyMs,
|
|
785
|
-
success: false,
|
|
786
|
-
requestedModelId: logContext.requestedModel,
|
|
787
|
-
switched: logContext.switched === true,
|
|
788
|
-
switchReason: logContext.switchReason,
|
|
789
|
-
switchedFromProviderKey: logContext.switchedFromProviderKey,
|
|
790
|
-
switchedFromModelId: logContext.switchedFromModelId,
|
|
791
|
-
})
|
|
792
|
-
// TCP / DNS / timeout errors
|
|
793
|
-
resolve({
|
|
794
|
-
done: false,
|
|
795
|
-
statusCode: 0,
|
|
796
|
-
responseBody: err.message,
|
|
797
|
-
responseHeaders: {},
|
|
798
|
-
networkError: true,
|
|
799
|
-
})
|
|
800
|
-
})
|
|
801
|
-
|
|
802
|
-
// Abort the upstream request if it exceeds the configured timeout.
|
|
803
|
-
// This prevents indefinite hangs (e.g. nvidia returning 504 after 302 s).
|
|
804
|
-
// The 'timeout' event fires but does NOT automatically abort; we must call destroy().
|
|
805
|
-
upstreamReq.setTimeout(this._upstreamTimeoutMs, () => {
|
|
806
|
-
upstreamReq.destroy(new Error(`Upstream request timed out after ${this._upstreamTimeoutMs}ms`))
|
|
807
|
-
})
|
|
808
|
-
|
|
809
|
-
upstreamReq.write(bodyStr)
|
|
810
|
-
upstreamReq.end()
|
|
811
|
-
})
|
|
812
|
-
}
|
|
813
|
-
|
|
814
|
-
/**
|
|
815
|
-
* Persist a quota snapshot for the given account into TokenStats.
|
|
816
|
-
* Called after every `AccountManager.updateQuota()` so TUI can read fresh data.
|
|
817
|
-
* Never exposes apiKey.
|
|
818
|
-
*
|
|
819
|
-
* @param {{ id: string, providerKey?: string, modelId?: string }} account
|
|
820
|
-
* @param {boolean} quotaUpdated
|
|
821
|
-
*/
|
|
822
|
-
_persistQuotaSnapshot(account, quotaUpdated = true) {
|
|
823
|
-
if (!quotaUpdated) return
|
|
824
|
-
const health = this._accountManager.getHealth(account.id)
|
|
825
|
-
if (!health) return
|
|
826
|
-
this._tokenStats.updateQuotaSnapshot(account.id, {
|
|
827
|
-
quotaPercent: health.quotaPercent,
|
|
828
|
-
...(account.providerKey !== undefined && { providerKey: account.providerKey }),
|
|
829
|
-
...(account.modelId !== undefined && { modelId: account.modelId }),
|
|
830
|
-
})
|
|
831
|
-
}
|
|
832
|
-
|
|
833
|
-
// ── GET /v1/health ──────────────────────────────────────────────────────────
|
|
834
|
-
|
|
835
|
-
/**
|
|
836
|
-
* 📖 Friendly unauthenticated landing endpoint for browsers and quick local checks.
|
|
837
|
-
*/
|
|
838
|
-
_handleRoot(res) {
|
|
839
|
-
const status = this.getStatus()
|
|
840
|
-
const uniqueModels = new Set(this._accounts.map(acct => acct.proxyModelId || acct.modelId)).size
|
|
841
|
-
sendJson(res, 200, {
|
|
842
|
-
status: 'ok',
|
|
843
|
-
service: 'fcm-proxy-v2',
|
|
844
|
-
running: status.running,
|
|
845
|
-
accountCount: status.accountCount,
|
|
846
|
-
modelCount: uniqueModels,
|
|
847
|
-
endpoints: {
|
|
848
|
-
health: '/v1/health',
|
|
849
|
-
models: '/v1/models',
|
|
850
|
-
stats: '/v1/stats',
|
|
851
|
-
},
|
|
852
|
-
})
|
|
853
|
-
}
|
|
854
|
-
|
|
855
|
-
/**
|
|
856
|
-
* 📖 Health endpoint for daemon liveness checks. Unauthenticated so external
|
|
857
|
-
* monitors (TUI, launchctl, systemd) can probe without needing the token.
|
|
858
|
-
*/
|
|
859
|
-
_handleHealth(res) {
|
|
860
|
-
const status = this.getStatus()
|
|
861
|
-
sendJson(res, 200, {
|
|
862
|
-
status: 'ok',
|
|
863
|
-
uptime: process.uptime(),
|
|
864
|
-
port: status.port,
|
|
865
|
-
accountCount: status.accountCount,
|
|
866
|
-
running: status.running,
|
|
867
|
-
})
|
|
868
|
-
}
|
|
869
|
-
|
|
870
|
-
// ── GET /v1/stats ──────────────────────────────────────────────────────────
|
|
871
|
-
|
|
872
|
-
/**
|
|
873
|
-
* 📖 Authenticated stats endpoint — returns per-account health, token stats summary,
|
|
874
|
-
* and proxy uptime. Useful for monitoring and debugging.
|
|
875
|
-
*/
|
|
876
|
-
_handleStats(res) {
|
|
877
|
-
const healthByAccount = this._accountManager.getAllHealth()
|
|
878
|
-
const summary = this._tokenStats.getSummary()
|
|
879
|
-
|
|
880
|
-
// 📖 Compute totals from the summary data
|
|
881
|
-
const dailyEntries = Object.values(summary.daily || {})
|
|
882
|
-
const totalRequests = dailyEntries.reduce((sum, d) => sum + (d.requests || 0), 0)
|
|
883
|
-
const totalTokens = dailyEntries.reduce((sum, d) => sum + (d.tokens || 0), 0)
|
|
884
|
-
|
|
885
|
-
sendJson(res, 200, {
|
|
886
|
-
accounts: healthByAccount,
|
|
887
|
-
tokenStats: {
|
|
888
|
-
byModel: summary.byModel || {},
|
|
889
|
-
recentRequests: summary.recentRequests || [],
|
|
890
|
-
},
|
|
891
|
-
anthropicRouting: this._anthropicRouting,
|
|
892
|
-
totals: {
|
|
893
|
-
requests: totalRequests,
|
|
894
|
-
tokens: totalTokens,
|
|
895
|
-
},
|
|
896
|
-
uptime: Math.floor((Date.now() - this._startTime) / 1000),
|
|
897
|
-
})
|
|
898
|
-
}
|
|
899
|
-
|
|
900
|
-
// ── POST /v1/messages (Anthropic translation) ──────────────────────────────
|
|
901
|
-
|
|
902
|
-
/**
|
|
903
|
-
* 📖 Handle Anthropic Messages API requests by translating to OpenAI format,
|
|
904
|
-
* forwarding through the existing chat completions handler, then translating
|
|
905
|
-
* the response back to Anthropic format.
|
|
906
|
-
*
|
|
907
|
-
* 📖 This makes Claude Code work natively through the FCM proxy.
|
|
908
|
-
*/
|
|
909
|
-
async _handleAnthropicMessages(clientReq, clientRes, authContext = { modelHint: null }) {
|
|
910
|
-
const rawBody = await readBody(clientReq)
|
|
911
|
-
let anthropicBody
|
|
912
|
-
try {
|
|
913
|
-
anthropicBody = JSON.parse(rawBody)
|
|
914
|
-
} catch {
|
|
915
|
-
return sendJson(clientRes, 400, { error: { type: 'invalid_request_error', message: 'Invalid JSON body' } })
|
|
916
|
-
}
|
|
917
|
-
|
|
918
|
-
// 📖 Translate Anthropic → OpenAI
|
|
919
|
-
const openaiBody = translateAnthropicToOpenAI(anthropicBody)
|
|
920
|
-
const resolvedModel = this._resolveAnthropicRequestedModel(openaiBody.model, authContext.modelHint)
|
|
921
|
-
if (resolvedModel) openaiBody.model = resolvedModel
|
|
922
|
-
const isStreaming = openaiBody.stream === true
|
|
923
|
-
|
|
924
|
-
if (isStreaming) {
|
|
925
|
-
// 📖 Streaming mode: pipe through SSE transformer
|
|
926
|
-
await this._handleAnthropicMessagesStreaming(openaiBody, anthropicBody.model, clientRes)
|
|
927
|
-
} else {
|
|
928
|
-
// 📖 JSON mode: forward, translate response, return
|
|
929
|
-
await this._handleAnthropicMessagesJson(openaiBody, anthropicBody.model, clientRes)
|
|
930
|
-
}
|
|
931
|
-
}
|
|
932
|
-
|
|
933
|
-
/**
|
|
934
|
-
* 📖 Count tokens for Anthropic Messages requests without calling upstream.
|
|
935
|
-
* 📖 Claude Code uses this endpoint for budgeting / UI hints, so a fast local
|
|
936
|
-
* 📖 estimate is enough to keep the flow working through the proxy.
|
|
937
|
-
*/
|
|
938
|
-
async _handleAnthropicCountTokens(clientReq, clientRes) {
|
|
939
|
-
const rawBody = await readBody(clientReq)
|
|
940
|
-
let anthropicBody
|
|
941
|
-
try {
|
|
942
|
-
anthropicBody = JSON.parse(rawBody)
|
|
943
|
-
} catch {
|
|
944
|
-
return sendJson(clientRes, 400, { error: { type: 'invalid_request_error', message: 'Invalid JSON body' } })
|
|
945
|
-
}
|
|
946
|
-
|
|
947
|
-
sendJson(clientRes, 200, {
|
|
948
|
-
input_tokens: estimateAnthropicTokens(anthropicBody),
|
|
949
|
-
})
|
|
950
|
-
}
|
|
951
|
-
|
|
952
|
-
/**
|
|
953
|
-
* 📖 Handle OpenAI Responses API requests by translating them to chat
|
|
954
|
-
* 📖 completions, forwarding through the existing proxy path, then converting
|
|
955
|
-
* 📖 the result back to the Responses wire format.
|
|
956
|
-
*/
|
|
957
|
-
async _handleResponses(clientReq, clientRes) {
|
|
958
|
-
const rawBody = await readBody(clientReq)
|
|
959
|
-
let responsesBody
|
|
960
|
-
try {
|
|
961
|
-
responsesBody = JSON.parse(rawBody)
|
|
962
|
-
} catch {
|
|
963
|
-
return sendJson(clientRes, 400, { error: 'Invalid JSON body' })
|
|
964
|
-
}
|
|
965
|
-
|
|
966
|
-
const isStreaming = responsesBody.stream === true || String(clientReq.headers.accept || '').includes('text/event-stream')
|
|
967
|
-
const openaiBody = translateResponsesToOpenAI({ ...responsesBody, stream: isStreaming })
|
|
968
|
-
|
|
969
|
-
if (isStreaming) {
|
|
970
|
-
await this._handleResponsesStreaming(openaiBody, responsesBody.model, clientRes)
|
|
971
|
-
} else {
|
|
972
|
-
await this._handleResponsesJson(openaiBody, responsesBody.model, clientRes)
|
|
973
|
-
}
|
|
974
|
-
}
|
|
975
|
-
|
|
976
|
-
async _handleResponsesJson(openaiBody, requestModel, clientRes) {
|
|
977
|
-
const capturedChunks = []
|
|
978
|
-
let capturedStatusCode = 200
|
|
979
|
-
let capturedHeaders = {}
|
|
980
|
-
|
|
981
|
-
const fakeRes = {
|
|
982
|
-
headersSent: false,
|
|
983
|
-
destroyed: false,
|
|
984
|
-
socket: null,
|
|
985
|
-
writeHead(statusCode, headers) {
|
|
986
|
-
capturedStatusCode = statusCode
|
|
987
|
-
capturedHeaders = headers || {}
|
|
988
|
-
this.headersSent = true
|
|
989
|
-
},
|
|
990
|
-
write(chunk) { capturedChunks.push(chunk) },
|
|
991
|
-
end(data) {
|
|
992
|
-
if (data) capturedChunks.push(data)
|
|
993
|
-
},
|
|
994
|
-
on() { return this },
|
|
995
|
-
once() { return this },
|
|
996
|
-
emit() { return false },
|
|
997
|
-
destroy() { this.destroyed = true },
|
|
998
|
-
removeListener() { return this },
|
|
999
|
-
}
|
|
1000
|
-
|
|
1001
|
-
await this._handleChatCompletionsInternal(openaiBody, fakeRes)
|
|
1002
|
-
|
|
1003
|
-
const responseBody = capturedChunks.join('')
|
|
1004
|
-
if (capturedStatusCode >= 200 && capturedStatusCode < 300) {
|
|
1005
|
-
try {
|
|
1006
|
-
const openaiResponse = JSON.parse(responseBody)
|
|
1007
|
-
const responsesResponse = translateOpenAIToResponses(openaiResponse, requestModel)
|
|
1008
|
-
sendJson(clientRes, 200, responsesResponse)
|
|
1009
|
-
} catch {
|
|
1010
|
-
sendJson(clientRes, capturedStatusCode, responseBody)
|
|
1011
|
-
}
|
|
1012
|
-
return
|
|
1013
|
-
}
|
|
1014
|
-
|
|
1015
|
-
// 📖 Forward upstream-style JSON errors unchanged for OpenAI-compatible clients.
|
|
1016
|
-
sendJson(clientRes, capturedStatusCode, responseBody)
|
|
1017
|
-
}
|
|
1018
|
-
|
|
1019
|
-
async _handleResponsesStreaming(openaiBody, requestModel, clientRes) {
|
|
1020
|
-
const { transform } = createResponsesSSETransformer(requestModel)
|
|
1021
|
-
await this._handleResponsesStreamDirect(openaiBody, clientRes, transform)
|
|
1022
|
-
}
|
|
1023
|
-
|
|
1024
|
-
async _handleResponsesStreamDirect(openaiBody, clientRes, sseTransform) {
|
|
1025
|
-
const fingerprint = createHash('sha256')
|
|
1026
|
-
.update(JSON.stringify(openaiBody.messages?.slice(-1) ?? []))
|
|
1027
|
-
.digest('hex')
|
|
1028
|
-
.slice(0, 16)
|
|
1029
|
-
|
|
1030
|
-
const requestedModel = typeof openaiBody.model === 'string'
|
|
1031
|
-
? openaiBody.model.replace(/^fcm-proxy\//, '')
|
|
1032
|
-
: undefined
|
|
1033
|
-
|
|
1034
|
-
if (requestedModel && !this._accountManager.hasAccountsForModel(requestedModel)) {
|
|
1035
|
-
return sendJson(clientRes, 404, {
|
|
1036
|
-
error: 'Model not found',
|
|
1037
|
-
message: `Model '${requestedModel}' is not available.`,
|
|
1038
|
-
})
|
|
1039
|
-
}
|
|
1040
|
-
|
|
1041
|
-
sseTransform.pipe(clientRes)
|
|
1042
|
-
|
|
1043
|
-
for (let attempt = 0; attempt < this._retries; attempt++) {
|
|
1044
|
-
const delay = this._retryDelays[Math.min(attempt, this._retryDelays.length - 1)]
|
|
1045
|
-
if (delay > 0) await new Promise(r => setTimeout(r, delay + Math.random() * 100))
|
|
1046
|
-
|
|
1047
|
-
const selectOpts = attempt === 0
|
|
1048
|
-
? { sessionFingerprint: fingerprint, requestedModel }
|
|
1049
|
-
: { requestedModel }
|
|
1050
|
-
const account = this._accountManager.selectAccount(selectOpts)
|
|
1051
|
-
if (!account) break
|
|
1052
|
-
|
|
1053
|
-
const result = await this._forwardRequestForResponsesStream(account, openaiBody, sseTransform, clientRes)
|
|
1054
|
-
if (result.done) return
|
|
1055
|
-
|
|
1056
|
-
const { statusCode, responseBody, responseHeaders, networkError } = result
|
|
1057
|
-
const classified = classifyError(
|
|
1058
|
-
networkError ? 0 : statusCode,
|
|
1059
|
-
responseBody || '',
|
|
1060
|
-
responseHeaders || {}
|
|
1061
|
-
)
|
|
1062
|
-
this._accountManager.recordFailure(account.id, classified, { providerKey: account.providerKey })
|
|
1063
|
-
if (!classified.shouldRetry) {
|
|
1064
|
-
sseTransform.end()
|
|
1065
|
-
return sendJson(clientRes, statusCode || 500, responseBody || JSON.stringify({ error: 'Upstream error' }))
|
|
1066
|
-
}
|
|
1067
|
-
}
|
|
1068
|
-
|
|
1069
|
-
sseTransform.end()
|
|
1070
|
-
sendJson(clientRes, 503, { error: 'All accounts exhausted or unavailable' })
|
|
1071
|
-
}
|
|
1072
|
-
|
|
1073
|
-
/**
|
|
1074
|
-
* 📖 Handle non-streaming Anthropic Messages by internally dispatching to
|
|
1075
|
-
* chat completions logic and translating the JSON response back.
|
|
1076
|
-
*/
|
|
1077
|
-
async _handleAnthropicMessagesJson(openaiBody, requestModel, clientRes) {
|
|
1078
|
-
// 📖 Create a fake request/response pair to capture the OpenAI response
|
|
1079
|
-
const capturedChunks = []
|
|
1080
|
-
let capturedStatusCode = 200
|
|
1081
|
-
let capturedHeaders = {}
|
|
1082
|
-
|
|
1083
|
-
const fakeRes = {
|
|
1084
|
-
headersSent: false,
|
|
1085
|
-
destroyed: false,
|
|
1086
|
-
socket: null,
|
|
1087
|
-
writeHead(statusCode, headers) {
|
|
1088
|
-
capturedStatusCode = statusCode
|
|
1089
|
-
capturedHeaders = headers || {}
|
|
1090
|
-
this.headersSent = true
|
|
1091
|
-
},
|
|
1092
|
-
write(chunk) { capturedChunks.push(chunk) },
|
|
1093
|
-
end(data) {
|
|
1094
|
-
if (data) capturedChunks.push(data)
|
|
1095
|
-
},
|
|
1096
|
-
on() { return this },
|
|
1097
|
-
once() { return this },
|
|
1098
|
-
emit() { return false },
|
|
1099
|
-
destroy() { this.destroyed = true },
|
|
1100
|
-
removeListener() { return this },
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
|
-
// 📖 Build a fake IncomingMessage-like with pre-parsed body
|
|
1104
|
-
const fakeReq = {
|
|
1105
|
-
method: 'POST',
|
|
1106
|
-
url: '/v1/chat/completions',
|
|
1107
|
-
headers: { 'content-type': 'application/json' },
|
|
1108
|
-
on(event, cb) {
|
|
1109
|
-
if (event === 'data') cb(Buffer.from(JSON.stringify(openaiBody)))
|
|
1110
|
-
if (event === 'end') cb()
|
|
1111
|
-
return this
|
|
1112
|
-
},
|
|
1113
|
-
removeListener() { return this },
|
|
1114
|
-
}
|
|
1115
|
-
|
|
1116
|
-
// 📖 Use internal handler directly instead of fake request
|
|
1117
|
-
await this._handleChatCompletionsInternal(openaiBody, fakeRes)
|
|
1118
|
-
|
|
1119
|
-
const responseBody = capturedChunks.join('')
|
|
1120
|
-
|
|
1121
|
-
if (capturedStatusCode >= 200 && capturedStatusCode < 300) {
|
|
1122
|
-
try {
|
|
1123
|
-
const openaiResponse = JSON.parse(responseBody)
|
|
1124
|
-
const anthropicResponse = translateOpenAIToAnthropic(openaiResponse, requestModel)
|
|
1125
|
-
sendJson(clientRes, 200, anthropicResponse)
|
|
1126
|
-
} catch {
|
|
1127
|
-
// 📖 Couldn't parse — forward raw
|
|
1128
|
-
sendJson(clientRes, capturedStatusCode, responseBody)
|
|
1129
|
-
}
|
|
1130
|
-
} else {
|
|
1131
|
-
// 📖 Error — wrap in Anthropic error format
|
|
1132
|
-
sendJson(clientRes, capturedStatusCode, {
|
|
1133
|
-
type: 'error',
|
|
1134
|
-
error: { type: 'api_error', message: responseBody },
|
|
1135
|
-
})
|
|
1136
|
-
}
|
|
1137
|
-
}
|
|
1138
|
-
|
|
1139
|
-
/**
|
|
1140
|
-
* 📖 Handle streaming Anthropic Messages by forwarding as streaming OpenAI
|
|
1141
|
-
* chat completions and piping through the SSE translator.
|
|
1142
|
-
*/
|
|
1143
|
-
async _handleAnthropicMessagesStreaming(openaiBody, requestModel, clientRes) {
|
|
1144
|
-
// 📖 We need to intercept the SSE response and translate it
|
|
1145
|
-
const { transform, getUsage } = createAnthropicSSETransformer(requestModel)
|
|
1146
|
-
|
|
1147
|
-
let resolveForward
|
|
1148
|
-
const forwardPromise = new Promise(r => { resolveForward = r })
|
|
1149
|
-
|
|
1150
|
-
const fakeRes = {
|
|
1151
|
-
headersSent: false,
|
|
1152
|
-
destroyed: false,
|
|
1153
|
-
socket: null,
|
|
1154
|
-
writeHead(statusCode, headers) {
|
|
1155
|
-
this.headersSent = true
|
|
1156
|
-
if (statusCode >= 200 && statusCode < 300) {
|
|
1157
|
-
// 📖 Write Anthropic SSE headers
|
|
1158
|
-
clientRes.writeHead(200, {
|
|
1159
|
-
'content-type': 'text/event-stream',
|
|
1160
|
-
'cache-control': 'no-cache',
|
|
1161
|
-
'connection': 'keep-alive',
|
|
1162
|
-
})
|
|
1163
|
-
} else {
|
|
1164
|
-
clientRes.writeHead(statusCode, headers)
|
|
1165
|
-
}
|
|
1166
|
-
},
|
|
1167
|
-
write(chunk) { /* SSE data handled via pipe */ },
|
|
1168
|
-
end(data) {
|
|
1169
|
-
if (data && !this.headersSent) {
|
|
1170
|
-
// 📖 Non-streaming error response
|
|
1171
|
-
clientRes.end(data)
|
|
1172
|
-
}
|
|
1173
|
-
resolveForward()
|
|
1174
|
-
},
|
|
1175
|
-
on() { return this },
|
|
1176
|
-
once() { return this },
|
|
1177
|
-
emit() { return false },
|
|
1178
|
-
destroy() { this.destroyed = true },
|
|
1179
|
-
removeListener() { return this },
|
|
1180
|
-
}
|
|
1181
|
-
|
|
1182
|
-
// 📖 Actually we need to pipe the upstream SSE through our transformer.
|
|
1183
|
-
// 📖 The simplest approach: use _handleChatCompletionsInternal with stream=true
|
|
1184
|
-
// 📖 and capture the piped response through our transformer.
|
|
1185
|
-
|
|
1186
|
-
// 📖 For streaming, we go lower level — use the retry loop directly
|
|
1187
|
-
await this._handleAnthropicStreamDirect(openaiBody, requestModel, clientRes, transform)
|
|
1188
|
-
}
|
|
1189
|
-
|
|
1190
|
-
/**
|
|
1191
|
-
* 📖 Direct streaming handler for Anthropic messages.
|
|
1192
|
-
* 📖 Runs the retry loop, pipes upstream SSE through the Anthropic transformer.
|
|
1193
|
-
*/
|
|
1194
|
-
async _handleAnthropicStreamDirect(openaiBody, requestModel, clientRes, sseTransform) {
|
|
1195
|
-
const { createHash: _createHash } = await import('node:crypto')
|
|
1196
|
-
const fingerprint = _createHash('sha256')
|
|
1197
|
-
.update(JSON.stringify(openaiBody.messages?.slice(-1) ?? []))
|
|
1198
|
-
.digest('hex')
|
|
1199
|
-
.slice(0, 16)
|
|
1200
|
-
|
|
1201
|
-
const requestedModel = typeof openaiBody.model === 'string'
|
|
1202
|
-
? openaiBody.model.replace(/^fcm-proxy\//, '')
|
|
1203
|
-
: undefined
|
|
1204
|
-
|
|
1205
|
-
if (requestedModel && !this._accountManager.hasAccountsForModel(requestedModel)) {
|
|
1206
|
-
return sendJson(clientRes, 404, {
|
|
1207
|
-
type: 'error',
|
|
1208
|
-
error: { type: 'not_found_error', message: `Model '${requestedModel}' is not available.` },
|
|
1209
|
-
})
|
|
1210
|
-
}
|
|
1211
|
-
|
|
1212
|
-
// 📖 Pipe the transform to client
|
|
1213
|
-
sseTransform.pipe(clientRes)
|
|
1214
|
-
|
|
1215
|
-
for (let attempt = 0; attempt < this._retries; attempt++) {
|
|
1216
|
-
// 📖 Progressive backoff for retries (same as chat completions)
|
|
1217
|
-
const delay = this._retryDelays[Math.min(attempt, this._retryDelays.length - 1)]
|
|
1218
|
-
if (delay > 0) await new Promise(r => setTimeout(r, delay + Math.random() * 100))
|
|
1219
|
-
|
|
1220
|
-
const selectOpts = attempt === 0
|
|
1221
|
-
? { sessionFingerprint: fingerprint, requestedModel }
|
|
1222
|
-
: { requestedModel }
|
|
1223
|
-
const account = this._accountManager.selectAccount(selectOpts)
|
|
1224
|
-
if (!account) break
|
|
1225
|
-
|
|
1226
|
-
const result = await this._forwardRequestForAnthropicStream(account, openaiBody, sseTransform, clientRes)
|
|
1227
|
-
|
|
1228
|
-
if (result.done) return
|
|
1229
|
-
|
|
1230
|
-
const { statusCode, responseBody, responseHeaders, networkError } = result
|
|
1231
|
-
const classified = classifyError(
|
|
1232
|
-
networkError ? 0 : statusCode,
|
|
1233
|
-
responseBody || '',
|
|
1234
|
-
responseHeaders || {}
|
|
1235
|
-
)
|
|
1236
|
-
this._accountManager.recordFailure(account.id, classified, { providerKey: account.providerKey })
|
|
1237
|
-
if (!classified.shouldRetry) {
|
|
1238
|
-
sseTransform.end()
|
|
1239
|
-
return sendJson(clientRes, statusCode || 500, {
|
|
1240
|
-
type: 'error',
|
|
1241
|
-
error: { type: 'api_error', message: responseBody || 'Upstream error' },
|
|
1242
|
-
})
|
|
1243
|
-
}
|
|
1244
|
-
}
|
|
1245
|
-
|
|
1246
|
-
sseTransform.end()
|
|
1247
|
-
sendJson(clientRes, 503, {
|
|
1248
|
-
type: 'error',
|
|
1249
|
-
error: { type: 'overloaded_error', message: 'All accounts exhausted or unavailable' },
|
|
1250
|
-
})
|
|
1251
|
-
}
|
|
1252
|
-
|
|
1253
|
-
/**
|
|
1254
|
-
* 📖 Forward a streaming request to upstream and pipe SSE through transform.
|
|
1255
|
-
*/
|
|
1256
|
-
_forwardRequestForAnthropicStream(account, body, sseTransform, clientRes) {
|
|
1257
|
-
return new Promise(resolve => {
|
|
1258
|
-
const newBody = { ...body, model: account.modelId, stream: true }
|
|
1259
|
-
const bodyStr = JSON.stringify(newBody)
|
|
1260
|
-
const baseUrl = account.url.replace(/\/$/, '')
|
|
1261
|
-
let upstreamUrl
|
|
1262
|
-
try {
|
|
1263
|
-
upstreamUrl = new URL(baseUrl + '/chat/completions')
|
|
1264
|
-
} catch {
|
|
1265
|
-
return resolve({ done: false, statusCode: 0, responseBody: 'Invalid upstream URL', networkError: true })
|
|
1266
|
-
}
|
|
1267
|
-
const client = selectClient(account.url)
|
|
1268
|
-
const startTime = Date.now()
|
|
1269
|
-
|
|
1270
|
-
const requestOptions = {
|
|
1271
|
-
hostname: upstreamUrl.hostname,
|
|
1272
|
-
port: upstreamUrl.port || (upstreamUrl.protocol === 'https:' ? 443 : 80),
|
|
1273
|
-
path: upstreamUrl.pathname + (upstreamUrl.search || ''),
|
|
1274
|
-
method: 'POST',
|
|
1275
|
-
headers: {
|
|
1276
|
-
'authorization': `Bearer ${account.apiKey}`,
|
|
1277
|
-
'content-type': 'application/json',
|
|
1278
|
-
'content-length': Buffer.byteLength(bodyStr),
|
|
1279
|
-
},
|
|
1280
|
-
}
|
|
1281
|
-
|
|
1282
|
-
const upstreamReq = client.request(requestOptions, upstreamRes => {
|
|
1283
|
-
const { statusCode } = upstreamRes
|
|
1284
|
-
|
|
1285
|
-
if (statusCode >= 200 && statusCode < 300) {
|
|
1286
|
-
// 📖 Write Anthropic SSE headers if not already sent
|
|
1287
|
-
if (!clientRes.headersSent) {
|
|
1288
|
-
clientRes.writeHead(200, {
|
|
1289
|
-
'content-type': 'text/event-stream',
|
|
1290
|
-
'cache-control': 'no-cache',
|
|
1291
|
-
})
|
|
1292
|
-
}
|
|
1293
|
-
|
|
1294
|
-
// 📖 Error handlers on both sides of the pipe to prevent uncaught errors
|
|
1295
|
-
upstreamRes.on('error', err => { if (!clientRes.destroyed) clientRes.destroy(err) })
|
|
1296
|
-
clientRes.on('error', () => { if (!upstreamRes.destroyed) upstreamRes.destroy() })
|
|
1297
|
-
|
|
1298
|
-
// 📖 Pipe upstream SSE through Anthropic translator
|
|
1299
|
-
upstreamRes.pipe(sseTransform, { end: true })
|
|
1300
|
-
|
|
1301
|
-
upstreamRes.on('end', () => {
|
|
1302
|
-
this._accountManager.recordSuccess(account.id, Date.now() - startTime)
|
|
1303
|
-
})
|
|
1304
|
-
|
|
1305
|
-
clientRes.on('close', () => {
|
|
1306
|
-
if (!upstreamRes.destroyed) upstreamRes.destroy()
|
|
1307
|
-
if (!upstreamReq.destroyed) upstreamReq.destroy()
|
|
1308
|
-
})
|
|
1309
|
-
|
|
1310
|
-
resolve({ done: true })
|
|
1311
|
-
} else {
|
|
1312
|
-
const chunks = []
|
|
1313
|
-
upstreamRes.on('data', chunk => chunks.push(chunk))
|
|
1314
|
-
upstreamRes.on('end', () => {
|
|
1315
|
-
resolve({
|
|
1316
|
-
done: false,
|
|
1317
|
-
statusCode,
|
|
1318
|
-
responseBody: Buffer.concat(chunks).toString(),
|
|
1319
|
-
responseHeaders: upstreamRes.headers,
|
|
1320
|
-
networkError: false,
|
|
1321
|
-
})
|
|
1322
|
-
})
|
|
1323
|
-
}
|
|
1324
|
-
})
|
|
1325
|
-
|
|
1326
|
-
upstreamReq.on('error', err => {
|
|
1327
|
-
resolve({
|
|
1328
|
-
done: false,
|
|
1329
|
-
statusCode: 0,
|
|
1330
|
-
responseBody: err.message,
|
|
1331
|
-
responseHeaders: {},
|
|
1332
|
-
networkError: true,
|
|
1333
|
-
})
|
|
1334
|
-
})
|
|
1335
|
-
|
|
1336
|
-
upstreamReq.setTimeout(this._upstreamTimeoutMs, () => {
|
|
1337
|
-
upstreamReq.destroy(new Error(`Upstream request timed out after ${this._upstreamTimeoutMs}ms`))
|
|
1338
|
-
})
|
|
1339
|
-
|
|
1340
|
-
upstreamReq.write(bodyStr)
|
|
1341
|
-
upstreamReq.end()
|
|
1342
|
-
})
|
|
1343
|
-
}
|
|
1344
|
-
|
|
1345
|
-
/**
|
|
1346
|
-
* 📖 Forward a streaming chat-completions request and translate the upstream
|
|
1347
|
-
* 📖 SSE stream into Responses API events on the fly.
|
|
1348
|
-
*/
|
|
1349
|
-
_forwardRequestForResponsesStream(account, body, sseTransform, clientRes) {
|
|
1350
|
-
return new Promise(resolve => {
|
|
1351
|
-
const newBody = { ...body, model: account.modelId, stream: true }
|
|
1352
|
-
const bodyStr = JSON.stringify(newBody)
|
|
1353
|
-
const baseUrl = account.url.replace(/\/$/, '')
|
|
1354
|
-
let upstreamUrl
|
|
1355
|
-
try {
|
|
1356
|
-
upstreamUrl = new URL(baseUrl + '/chat/completions')
|
|
1357
|
-
} catch {
|
|
1358
|
-
return resolve({ done: false, statusCode: 0, responseBody: 'Invalid upstream URL', networkError: true })
|
|
1359
|
-
}
|
|
1360
|
-
|
|
1361
|
-
const client = selectClient(account.url)
|
|
1362
|
-
const startTime = Date.now()
|
|
1363
|
-
const requestOptions = {
|
|
1364
|
-
hostname: upstreamUrl.hostname,
|
|
1365
|
-
port: upstreamUrl.port || (upstreamUrl.protocol === 'https:' ? 443 : 80),
|
|
1366
|
-
path: upstreamUrl.pathname + (upstreamUrl.search || ''),
|
|
1367
|
-
method: 'POST',
|
|
1368
|
-
headers: {
|
|
1369
|
-
'authorization': `Bearer ${account.apiKey}`,
|
|
1370
|
-
'content-type': 'application/json',
|
|
1371
|
-
'content-length': Buffer.byteLength(bodyStr),
|
|
1372
|
-
},
|
|
1373
|
-
}
|
|
1374
|
-
|
|
1375
|
-
const upstreamReq = client.request(requestOptions, upstreamRes => {
|
|
1376
|
-
const { statusCode } = upstreamRes
|
|
1377
|
-
|
|
1378
|
-
if (statusCode >= 200 && statusCode < 300) {
|
|
1379
|
-
if (!clientRes.headersSent) {
|
|
1380
|
-
clientRes.writeHead(200, {
|
|
1381
|
-
'content-type': 'text/event-stream',
|
|
1382
|
-
'cache-control': 'no-cache',
|
|
1383
|
-
})
|
|
1384
|
-
}
|
|
1385
|
-
|
|
1386
|
-
upstreamRes.on('error', err => { if (!clientRes.destroyed) clientRes.destroy(err) })
|
|
1387
|
-
clientRes.on('error', () => { if (!upstreamRes.destroyed) upstreamRes.destroy() })
|
|
1388
|
-
|
|
1389
|
-
upstreamRes.pipe(sseTransform, { end: true })
|
|
1390
|
-
upstreamRes.on('end', () => {
|
|
1391
|
-
this._accountManager.recordSuccess(account.id, Date.now() - startTime)
|
|
1392
|
-
})
|
|
1393
|
-
|
|
1394
|
-
clientRes.on('close', () => {
|
|
1395
|
-
if (!upstreamRes.destroyed) upstreamRes.destroy()
|
|
1396
|
-
if (!upstreamReq.destroyed) upstreamReq.destroy()
|
|
1397
|
-
})
|
|
1398
|
-
|
|
1399
|
-
resolve({ done: true })
|
|
1400
|
-
} else {
|
|
1401
|
-
const chunks = []
|
|
1402
|
-
upstreamRes.on('data', chunk => chunks.push(chunk))
|
|
1403
|
-
upstreamRes.on('end', () => {
|
|
1404
|
-
resolve({
|
|
1405
|
-
done: false,
|
|
1406
|
-
statusCode,
|
|
1407
|
-
responseBody: Buffer.concat(chunks).toString(),
|
|
1408
|
-
responseHeaders: upstreamRes.headers,
|
|
1409
|
-
networkError: false,
|
|
1410
|
-
})
|
|
1411
|
-
})
|
|
1412
|
-
}
|
|
1413
|
-
})
|
|
1414
|
-
|
|
1415
|
-
upstreamReq.on('error', err => {
|
|
1416
|
-
resolve({
|
|
1417
|
-
done: false,
|
|
1418
|
-
statusCode: 0,
|
|
1419
|
-
responseBody: err.message,
|
|
1420
|
-
responseHeaders: {},
|
|
1421
|
-
networkError: true,
|
|
1422
|
-
})
|
|
1423
|
-
})
|
|
1424
|
-
|
|
1425
|
-
upstreamReq.setTimeout(this._upstreamTimeoutMs, () => {
|
|
1426
|
-
upstreamReq.destroy(new Error(`Upstream request timed out after ${this._upstreamTimeoutMs}ms`))
|
|
1427
|
-
})
|
|
1428
|
-
|
|
1429
|
-
upstreamReq.write(bodyStr)
|
|
1430
|
-
upstreamReq.end()
|
|
1431
|
-
})
|
|
1432
|
-
}
|
|
1433
|
-
|
|
1434
|
-
/**
|
|
1435
|
-
* 📖 Internal version of chat completions handler that takes a pre-parsed body.
|
|
1436
|
-
* 📖 Used by the Anthropic JSON translation path to avoid re-parsing.
|
|
1437
|
-
*/
|
|
1438
|
-
async _handleChatCompletionsInternal(body, clientRes) {
|
|
1439
|
-
// 📖 Reuse the exact same logic as _handleChatCompletions but with pre-parsed body
|
|
1440
|
-
if (this._compressionOpts && Array.isArray(body.messages)) {
|
|
1441
|
-
body = { ...body, messages: compressContext(body.messages, this._compressionOpts) }
|
|
1442
|
-
}
|
|
1443
|
-
if (this._thinkingConfig) {
|
|
1444
|
-
body = applyThinkingBudget(body, this._thinkingConfig)
|
|
1445
|
-
}
|
|
1446
|
-
|
|
1447
|
-
const fingerprint = createHash('sha256')
|
|
1448
|
-
.update(JSON.stringify(body.messages?.slice(-1) ?? []))
|
|
1449
|
-
.digest('hex')
|
|
1450
|
-
.slice(0, 16)
|
|
1451
|
-
|
|
1452
|
-
const requestedModel = typeof body.model === 'string'
|
|
1453
|
-
? body.model.replace(/^fcm-proxy\//, '')
|
|
1454
|
-
: undefined
|
|
1455
|
-
|
|
1456
|
-
if (requestedModel && !this._accountManager.hasAccountsForModel(requestedModel)) {
|
|
1457
|
-
return sendJson(clientRes, 404, {
|
|
1458
|
-
error: 'Model not found',
|
|
1459
|
-
message: `Model '${requestedModel}' is not available.`,
|
|
1460
|
-
})
|
|
1461
|
-
}
|
|
1462
|
-
|
|
1463
|
-
for (let attempt = 0; attempt < this._retries; attempt++) {
|
|
1464
|
-
const delay = this._retryDelays[Math.min(attempt, this._retryDelays.length - 1)]
|
|
1465
|
-
if (delay > 0) await new Promise(r => setTimeout(r, delay + Math.random() * 100))
|
|
1466
|
-
|
|
1467
|
-
const selectOpts = attempt === 0
|
|
1468
|
-
? { sessionFingerprint: fingerprint, requestedModel }
|
|
1469
|
-
: { requestedModel }
|
|
1470
|
-
const account = this._accountManager.selectAccount(selectOpts)
|
|
1471
|
-
if (!account) break
|
|
1472
|
-
|
|
1473
|
-
const result = await this._forwardRequest(account, body, clientRes, { requestedModel })
|
|
1474
|
-
if (result.done) return
|
|
1475
|
-
|
|
1476
|
-
const { statusCode, responseBody, responseHeaders, networkError } = result
|
|
1477
|
-
const classified = classifyError(
|
|
1478
|
-
networkError ? 0 : statusCode,
|
|
1479
|
-
responseBody || '',
|
|
1480
|
-
responseHeaders || {}
|
|
1481
|
-
)
|
|
1482
|
-
this._accountManager.recordFailure(account.id, classified, { providerKey: account.providerKey })
|
|
1483
|
-
if (!classified.shouldRetry) {
|
|
1484
|
-
return sendJson(clientRes, statusCode || 500, responseBody || JSON.stringify({ error: 'Upstream error' }))
|
|
1485
|
-
}
|
|
1486
|
-
}
|
|
1487
|
-
|
|
1488
|
-
sendJson(clientRes, 503, { error: 'All accounts exhausted or unavailable' })
|
|
1489
|
-
}
|
|
1490
|
-
|
|
1491
|
-
// ── Hot-reload accounts ─────────────────────────────────────────────────────
|
|
1492
|
-
|
|
1493
|
-
/**
|
|
1494
|
-
* 📖 Atomically swap the account list and rebuild the AccountManager.
|
|
1495
|
-
* 📖 Used by the daemon when config changes (new API keys, providers toggled).
|
|
1496
|
-
* 📖 In-flight requests on old accounts will finish naturally.
|
|
1497
|
-
*
|
|
1498
|
-
* @param {Array} accounts — new account list
|
|
1499
|
-
* @param {{ model?: string|null, modelOpus?: string|null, modelSonnet?: string|null, modelHaiku?: string|null }} anthropicRouting
|
|
1500
|
-
*/
|
|
1501
|
-
updateAccounts(accounts, anthropicRouting = this._anthropicRouting) {
|
|
1502
|
-
this._accounts = accounts
|
|
1503
|
-
this._anthropicRouting = normalizeAnthropicRouting(anthropicRouting)
|
|
1504
|
-
this._accountManager = new AccountManager(accounts, {})
|
|
1505
|
-
}
|
|
1506
|
-
}
|