free-coding-models 0.2.17 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,6 +21,17 @@ import { classifyError } from './error-classifier.js'
21
21
  import { applyThinkingBudget, compressContext } from './request-transformer.js'
22
22
  import { TokenStats } from './token-stats.js'
23
23
  import { createHash } from 'node:crypto'
24
+ import {
25
+ translateAnthropicToOpenAI,
26
+ translateOpenAIToAnthropic,
27
+ createAnthropicSSETransformer,
28
+ estimateAnthropicTokens,
29
+ } from './anthropic-translator.js'
30
+ import {
31
+ translateResponsesToOpenAI,
32
+ translateOpenAIToResponses,
33
+ createResponsesSSETransformer,
34
+ } from './responses-translator.js'
24
35
 
25
36
  // ─── Helpers ─────────────────────────────────────────────────────────────────
26
37
 
@@ -51,16 +62,31 @@ function stripRateLimitHeaders(headers) {
51
62
  return result
52
63
  }
53
64
 
65
+ // 📖 Max body size limit to prevent memory exhaustion attacks (10 MB)
66
+ const MAX_BODY_SIZE = 10 * 1024 * 1024
67
+
54
68
  /**
55
69
  * Buffer all chunks from an http.IncomingMessage and return the body as a string.
70
+ * Enforces a size limit to prevent memory exhaustion from oversized payloads.
56
71
  *
57
72
  * @param {http.IncomingMessage} req
58
73
  * @returns {Promise<string>}
74
+ * @throws {Error} with statusCode 413 if body exceeds MAX_BODY_SIZE
59
75
  */
60
76
  function readBody(req) {
61
77
  return new Promise((resolve, reject) => {
62
78
  const chunks = []
63
- req.on('data', chunk => chunks.push(chunk))
79
+ let totalSize = 0
80
+ req.on('data', chunk => {
81
+ totalSize += chunk.length
82
+ if (totalSize > MAX_BODY_SIZE) {
83
+ req.destroy()
84
+ const err = new Error('Request body too large')
85
+ err.statusCode = 413
86
+ return reject(err)
87
+ }
88
+ chunks.push(chunk)
89
+ })
64
90
  req.on('end', () => resolve(Buffer.concat(chunks).toString()))
65
91
  req.on('error', reject)
66
92
  })
@@ -114,8 +140,13 @@ export class ProxyServer {
114
140
  this._proxyApiKey = proxyApiKey
115
141
  this._accounts = accounts
116
142
  this._upstreamTimeoutMs = upstreamTimeoutMs
143
+ // 📖 Progressive backoff delays (ms) for retries — first attempt is immediate,
144
+ // subsequent ones add increasing delay + random jitter (0-100ms) to avoid
145
+ // re-hitting the same rate-limit window on 429s from providers
146
+ this._retryDelays = [0, 300, 800]
117
147
  this._accountManager = new AccountManager(accounts, accountManagerOpts)
118
148
  this._tokenStats = new TokenStats(tokenStatsOpts)
149
+ this._startTime = Date.now()
119
150
  this._running = false
120
151
  this._listeningPort = null
121
152
  this._server = http.createServer((req, res) => this._handleRequest(req, res))
@@ -173,17 +204,50 @@ export class ProxyServer {
173
204
  // ── Request routing ────────────────────────────────────────────────────────
174
205
 
175
206
  _handleRequest(req, res) {
207
+ // 📖 Health endpoint is unauthenticated so external monitors can probe it
208
+ if (req.method === 'GET' && req.url === '/v1/health') {
209
+ return this._handleHealth(res)
210
+ }
211
+
176
212
  if (!this._isAuthorized(req)) {
177
213
  return sendJson(res, 401, { error: 'Unauthorized' })
178
214
  }
179
215
 
180
216
  if (req.method === 'GET' && req.url === '/v1/models') {
181
217
  this._handleModels(res)
218
+ } else if (req.method === 'GET' && req.url === '/v1/stats') {
219
+ this._handleStats(res)
182
220
  } else if (req.method === 'POST' && req.url === '/v1/chat/completions') {
183
221
  this._handleChatCompletions(req, res).catch(err => {
184
- sendJson(res, 500, { error: 'Internal server error', message: err.message })
222
+ console.error('[proxy] Internal error:', err)
223
+ // 📖 Return 413 for body-too-large, generic 500 for everything else — never leak stack traces
224
+ const status = err.statusCode === 413 ? 413 : 500
225
+ const msg = err.statusCode === 413 ? 'Request body too large' : 'Internal server error'
226
+ sendJson(res, status, { error: msg })
227
+ })
228
+ } else if (req.method === 'POST' && req.url === '/v1/messages') {
229
+ // 📖 Anthropic Messages API translation — enables Claude Code compatibility
230
+ this._handleAnthropicMessages(req, res).catch(err => {
231
+ console.error('[proxy] Internal error:', err)
232
+ const status = err.statusCode === 413 ? 413 : 500
233
+ const msg = err.statusCode === 413 ? 'Request body too large' : 'Internal server error'
234
+ sendJson(res, status, { error: msg })
185
235
  })
186
- } else if (req.method === 'POST' && (req.url === '/v1/completions' || req.url === '/v1/responses')) {
236
+ } else if (req.method === 'POST' && req.url === '/v1/messages/count_tokens') {
237
+ this._handleAnthropicCountTokens(req, res).catch(err => {
238
+ console.error('[proxy] Internal error:', err)
239
+ const status = err.statusCode === 413 ? 413 : 500
240
+ const msg = err.statusCode === 413 ? 'Request body too large' : 'Internal server error'
241
+ sendJson(res, status, { error: msg })
242
+ })
243
+ } else if (req.method === 'POST' && req.url === '/v1/responses') {
244
+ this._handleResponses(req, res).catch(err => {
245
+ console.error('[proxy] Internal error:', err)
246
+ const status = err.statusCode === 413 ? 413 : 500
247
+ const msg = err.statusCode === 413 ? 'Request body too large' : 'Internal server error'
248
+ sendJson(res, status, { error: msg })
249
+ })
250
+ } else if (req.method === 'POST' && req.url === '/v1/completions') {
187
251
  // These legacy/alternative OpenAI endpoints are not supported by the proxy.
188
252
  // Return 501 (not 404) so callers get a clear signal instead of silently failing.
189
253
  sendJson(res, 501, {
@@ -200,19 +264,24 @@ export class ProxyServer {
200
264
  _handleModels(res) {
201
265
  const seen = new Set()
202
266
  const data = []
267
+ const models = []
203
268
  for (const acct of this._accounts) {
204
269
  const publicModelId = acct.proxyModelId || acct.modelId
205
270
  if (!seen.has(publicModelId)) {
206
271
  seen.add(publicModelId)
207
- data.push({
272
+ const modelEntry = {
208
273
  id: publicModelId,
274
+ slug: publicModelId,
275
+ name: publicModelId,
209
276
  object: 'model',
210
277
  created: Math.floor(Date.now() / 1000),
211
278
  owned_by: 'proxy',
212
- })
279
+ }
280
+ data.push(modelEntry)
281
+ models.push(modelEntry)
213
282
  }
214
283
  }
215
- sendJson(res, 200, { object: 'list', data })
284
+ sendJson(res, 200, { object: 'list', data, models })
216
285
  }
217
286
 
218
287
  // ── POST /v1/chat/completions ──────────────────────────────────────────────
@@ -273,10 +342,14 @@ export class ProxyServer {
273
342
  }
274
343
  }
275
344
 
276
- // 5. Retry loop
345
+ // 5. Retry loop with progressive backoff
277
346
  let pendingSwitchReason = null
278
347
  let previousAccount = null
279
348
  for (let attempt = 0; attempt < this._retries; attempt++) {
349
+ // 📖 Apply backoff delay before retries (first attempt is immediate)
350
+ const delay = this._retryDelays[Math.min(attempt, this._retryDelays.length - 1)]
351
+ if (delay > 0) await new Promise(r => setTimeout(r, delay + Math.random() * 100))
352
+
280
353
  // First attempt: respect sticky session.
281
354
  // Subsequent retries: fresh P2C (don't hammer the same failed account).
282
355
  const selectOpts = attempt === 0
@@ -354,7 +427,13 @@ export class ProxyServer {
354
427
 
355
428
  // Build the full upstream URL from the account's base URL
356
429
  const baseUrl = account.url.replace(/\/$/, '')
357
- const upstreamUrl = new URL(baseUrl + '/chat/completions')
430
+ let upstreamUrl
431
+ try {
432
+ upstreamUrl = new URL(baseUrl + '/chat/completions')
433
+ } catch {
434
+ // 📖 Malformed upstream URL — resolve as network error so retry loop can continue
435
+ return resolve({ done: false, statusCode: 0, responseBody: 'Invalid upstream URL', networkError: true })
436
+ }
358
437
 
359
438
  // Choose http or https module BEFORE creating the request
360
439
  const client = selectClient(account.url)
@@ -390,10 +469,14 @@ export class ProxyServer {
390
469
 
391
470
  // Tap the data stream to capture usage from the last data line.
392
471
  // Register BEFORE pipe() so both listeners share the same event queue.
472
+ // 📖 sseLineBuffer persists between chunks to handle lines split across boundaries
393
473
  let lastChunkData = ''
474
+ let sseLineBuffer = ''
394
475
  upstreamRes.on('data', chunk => {
395
- const text = chunk.toString()
396
- const lines = text.split('\n')
476
+ sseLineBuffer += chunk.toString()
477
+ const lines = sseLineBuffer.split('\n')
478
+ // 📖 Last element may be an incomplete line — keep it for next chunk
479
+ sseLineBuffer = lines.pop() || ''
397
480
  for (const line of lines) {
398
481
  if (line.startsWith('data: ') && !line.includes('[DONE]')) {
399
482
  lastChunkData = line.slice(6).trim()
@@ -433,6 +516,10 @@ export class ProxyServer {
433
516
  this._persistQuotaSnapshot(account, quotaUpdated)
434
517
  })
435
518
 
519
+ // 📖 Error handlers on both sides of the pipe to prevent uncaught errors
520
+ upstreamRes.on('error', err => { if (!clientRes.destroyed) clientRes.destroy(err) })
521
+ clientRes.on('error', () => { if (!upstreamRes.destroyed) upstreamRes.destroy() })
522
+
436
523
  // Pipe after listeners are registered; upstream → client, no buffering
437
524
  upstreamRes.pipe(clientRes)
438
525
 
@@ -590,4 +677,653 @@ export class ProxyServer {
590
677
  ...(account.modelId !== undefined && { modelId: account.modelId }),
591
678
  })
592
679
  }
680
+
681
+ // ── GET /v1/health ──────────────────────────────────────────────────────────
682
+
683
+ /**
684
+ * 📖 Health endpoint for daemon liveness checks. Unauthenticated so external
685
+ * monitors (TUI, launchctl, systemd) can probe without needing the token.
686
+ */
687
+ _handleHealth(res) {
688
+ const status = this.getStatus()
689
+ sendJson(res, 200, {
690
+ status: 'ok',
691
+ uptime: process.uptime(),
692
+ port: status.port,
693
+ accountCount: status.accountCount,
694
+ running: status.running,
695
+ })
696
+ }
697
+
698
+ // ── GET /v1/stats ──────────────────────────────────────────────────────────
699
+
700
+ /**
701
+ * 📖 Authenticated stats endpoint — returns per-account health, token stats summary,
702
+ * and proxy uptime. Useful for monitoring and debugging.
703
+ */
704
+ _handleStats(res) {
705
+ const healthByAccount = this._accountManager.getAllHealth()
706
+ const summary = this._tokenStats.getSummary()
707
+
708
+ // 📖 Compute totals from the summary data
709
+ const dailyEntries = Object.values(summary.daily || {})
710
+ const totalRequests = dailyEntries.reduce((sum, d) => sum + (d.requests || 0), 0)
711
+ const totalTokens = dailyEntries.reduce((sum, d) => sum + (d.tokens || 0), 0)
712
+
713
+ sendJson(res, 200, {
714
+ accounts: healthByAccount,
715
+ tokenStats: {
716
+ byModel: summary.byModel || {},
717
+ recentRequests: summary.recentRequests || [],
718
+ },
719
+ totals: {
720
+ requests: totalRequests,
721
+ tokens: totalTokens,
722
+ },
723
+ uptime: Math.floor((Date.now() - this._startTime) / 1000),
724
+ })
725
+ }
726
+
727
+ // ── POST /v1/messages (Anthropic translation) ──────────────────────────────
728
+
729
+ /**
730
+ * 📖 Handle Anthropic Messages API requests by translating to OpenAI format,
731
+ * forwarding through the existing chat completions handler, then translating
732
+ * the response back to Anthropic format.
733
+ *
734
+ * 📖 This makes Claude Code work natively through the FCM proxy.
735
+ */
736
+ async _handleAnthropicMessages(clientReq, clientRes) {
737
+ const rawBody = await readBody(clientReq)
738
+ let anthropicBody
739
+ try {
740
+ anthropicBody = JSON.parse(rawBody)
741
+ } catch {
742
+ return sendJson(clientRes, 400, { error: { type: 'invalid_request_error', message: 'Invalid JSON body' } })
743
+ }
744
+
745
+ // 📖 Translate Anthropic → OpenAI
746
+ const openaiBody = translateAnthropicToOpenAI(anthropicBody)
747
+ const isStreaming = openaiBody.stream === true
748
+
749
+ if (isStreaming) {
750
+ // 📖 Streaming mode: pipe through SSE transformer
751
+ await this._handleAnthropicMessagesStreaming(openaiBody, anthropicBody.model, clientRes)
752
+ } else {
753
+ // 📖 JSON mode: forward, translate response, return
754
+ await this._handleAnthropicMessagesJson(openaiBody, anthropicBody.model, clientRes)
755
+ }
756
+ }
757
+
758
+ /**
759
+ * 📖 Count tokens for Anthropic Messages requests without calling upstream.
760
+ * 📖 Claude Code uses this endpoint for budgeting / UI hints, so a fast local
761
+ * 📖 estimate is enough to keep the flow working through the proxy.
762
+ */
763
+ async _handleAnthropicCountTokens(clientReq, clientRes) {
764
+ const rawBody = await readBody(clientReq)
765
+ let anthropicBody
766
+ try {
767
+ anthropicBody = JSON.parse(rawBody)
768
+ } catch {
769
+ return sendJson(clientRes, 400, { error: { type: 'invalid_request_error', message: 'Invalid JSON body' } })
770
+ }
771
+
772
+ sendJson(clientRes, 200, {
773
+ input_tokens: estimateAnthropicTokens(anthropicBody),
774
+ })
775
+ }
776
+
777
+ /**
778
+ * 📖 Handle OpenAI Responses API requests by translating them to chat
779
+ * 📖 completions, forwarding through the existing proxy path, then converting
780
+ * 📖 the result back to the Responses wire format.
781
+ */
782
+ async _handleResponses(clientReq, clientRes) {
783
+ const rawBody = await readBody(clientReq)
784
+ let responsesBody
785
+ try {
786
+ responsesBody = JSON.parse(rawBody)
787
+ } catch {
788
+ return sendJson(clientRes, 400, { error: 'Invalid JSON body' })
789
+ }
790
+
791
+ const isStreaming = responsesBody.stream === true || String(clientReq.headers.accept || '').includes('text/event-stream')
792
+ const openaiBody = translateResponsesToOpenAI({ ...responsesBody, stream: isStreaming })
793
+
794
+ if (isStreaming) {
795
+ await this._handleResponsesStreaming(openaiBody, responsesBody.model, clientRes)
796
+ } else {
797
+ await this._handleResponsesJson(openaiBody, responsesBody.model, clientRes)
798
+ }
799
+ }
800
+
801
+ async _handleResponsesJson(openaiBody, requestModel, clientRes) {
802
+ const capturedChunks = []
803
+ let capturedStatusCode = 200
804
+ let capturedHeaders = {}
805
+
806
+ const fakeRes = {
807
+ headersSent: false,
808
+ destroyed: false,
809
+ socket: null,
810
+ writeHead(statusCode, headers) {
811
+ capturedStatusCode = statusCode
812
+ capturedHeaders = headers || {}
813
+ this.headersSent = true
814
+ },
815
+ write(chunk) { capturedChunks.push(chunk) },
816
+ end(data) {
817
+ if (data) capturedChunks.push(data)
818
+ },
819
+ on() { return this },
820
+ once() { return this },
821
+ emit() { return false },
822
+ destroy() { this.destroyed = true },
823
+ removeListener() { return this },
824
+ }
825
+
826
+ await this._handleChatCompletionsInternal(openaiBody, fakeRes)
827
+
828
+ const responseBody = capturedChunks.join('')
829
+ if (capturedStatusCode >= 200 && capturedStatusCode < 300) {
830
+ try {
831
+ const openaiResponse = JSON.parse(responseBody)
832
+ const responsesResponse = translateOpenAIToResponses(openaiResponse, requestModel)
833
+ sendJson(clientRes, 200, responsesResponse)
834
+ } catch {
835
+ sendJson(clientRes, capturedStatusCode, responseBody)
836
+ }
837
+ return
838
+ }
839
+
840
+ // 📖 Forward upstream-style JSON errors unchanged for OpenAI-compatible clients.
841
+ sendJson(clientRes, capturedStatusCode, responseBody)
842
+ }
843
+
844
+ async _handleResponsesStreaming(openaiBody, requestModel, clientRes) {
845
+ const { transform } = createResponsesSSETransformer(requestModel)
846
+ await this._handleResponsesStreamDirect(openaiBody, clientRes, transform)
847
+ }
848
+
849
+ async _handleResponsesStreamDirect(openaiBody, clientRes, sseTransform) {
850
+ const fingerprint = createHash('sha256')
851
+ .update(JSON.stringify(openaiBody.messages?.slice(-1) ?? []))
852
+ .digest('hex')
853
+ .slice(0, 16)
854
+
855
+ const requestedModel = typeof openaiBody.model === 'string'
856
+ ? openaiBody.model.replace(/^fcm-proxy\//, '')
857
+ : undefined
858
+
859
+ if (requestedModel && !this._accountManager.hasAccountsForModel(requestedModel)) {
860
+ return sendJson(clientRes, 404, {
861
+ error: 'Model not found',
862
+ message: `Model '${requestedModel}' is not available.`,
863
+ })
864
+ }
865
+
866
+ sseTransform.pipe(clientRes)
867
+
868
+ for (let attempt = 0; attempt < this._retries; attempt++) {
869
+ const delay = this._retryDelays[Math.min(attempt, this._retryDelays.length - 1)]
870
+ if (delay > 0) await new Promise(r => setTimeout(r, delay + Math.random() * 100))
871
+
872
+ const selectOpts = attempt === 0
873
+ ? { sessionFingerprint: fingerprint, requestedModel }
874
+ : { requestedModel }
875
+ const account = this._accountManager.selectAccount(selectOpts)
876
+ if (!account) break
877
+
878
+ const result = await this._forwardRequestForResponsesStream(account, openaiBody, sseTransform, clientRes)
879
+ if (result.done) return
880
+
881
+ const { statusCode, responseBody, responseHeaders, networkError } = result
882
+ const classified = classifyError(
883
+ networkError ? 0 : statusCode,
884
+ responseBody || '',
885
+ responseHeaders || {}
886
+ )
887
+ this._accountManager.recordFailure(account.id, classified, { providerKey: account.providerKey })
888
+ if (!classified.shouldRetry) {
889
+ sseTransform.end()
890
+ return sendJson(clientRes, statusCode || 500, responseBody || JSON.stringify({ error: 'Upstream error' }))
891
+ }
892
+ }
893
+
894
+ sseTransform.end()
895
+ sendJson(clientRes, 503, { error: 'All accounts exhausted or unavailable' })
896
+ }
897
+
898
+ /**
899
+ * 📖 Handle non-streaming Anthropic Messages by internally dispatching to
900
+ * chat completions logic and translating the JSON response back.
901
+ */
902
+ async _handleAnthropicMessagesJson(openaiBody, requestModel, clientRes) {
903
+ // 📖 Create a fake request/response pair to capture the OpenAI response
904
+ const capturedChunks = []
905
+ let capturedStatusCode = 200
906
+ let capturedHeaders = {}
907
+
908
+ const fakeRes = {
909
+ headersSent: false,
910
+ destroyed: false,
911
+ socket: null,
912
+ writeHead(statusCode, headers) {
913
+ capturedStatusCode = statusCode
914
+ capturedHeaders = headers || {}
915
+ this.headersSent = true
916
+ },
917
+ write(chunk) { capturedChunks.push(chunk) },
918
+ end(data) {
919
+ if (data) capturedChunks.push(data)
920
+ },
921
+ on() { return this },
922
+ once() { return this },
923
+ emit() { return false },
924
+ destroy() { this.destroyed = true },
925
+ removeListener() { return this },
926
+ }
927
+
928
+ // 📖 Build a fake IncomingMessage-like with pre-parsed body
929
+ const fakeReq = {
930
+ method: 'POST',
931
+ url: '/v1/chat/completions',
932
+ headers: { 'content-type': 'application/json' },
933
+ on(event, cb) {
934
+ if (event === 'data') cb(Buffer.from(JSON.stringify(openaiBody)))
935
+ if (event === 'end') cb()
936
+ return this
937
+ },
938
+ removeListener() { return this },
939
+ }
940
+
941
+ // 📖 Use internal handler directly instead of fake request
942
+ await this._handleChatCompletionsInternal(openaiBody, fakeRes)
943
+
944
+ const responseBody = capturedChunks.join('')
945
+
946
+ if (capturedStatusCode >= 200 && capturedStatusCode < 300) {
947
+ try {
948
+ const openaiResponse = JSON.parse(responseBody)
949
+ const anthropicResponse = translateOpenAIToAnthropic(openaiResponse, requestModel)
950
+ sendJson(clientRes, 200, anthropicResponse)
951
+ } catch {
952
+ // 📖 Couldn't parse — forward raw
953
+ sendJson(clientRes, capturedStatusCode, responseBody)
954
+ }
955
+ } else {
956
+ // 📖 Error — wrap in Anthropic error format
957
+ sendJson(clientRes, capturedStatusCode, {
958
+ type: 'error',
959
+ error: { type: 'api_error', message: responseBody },
960
+ })
961
+ }
962
+ }
963
+
964
+ /**
965
+ * 📖 Handle streaming Anthropic Messages by forwarding as streaming OpenAI
966
+ * chat completions and piping through the SSE translator.
967
+ */
968
+ async _handleAnthropicMessagesStreaming(openaiBody, requestModel, clientRes) {
969
+ // 📖 We need to intercept the SSE response and translate it
970
+ const { transform, getUsage } = createAnthropicSSETransformer(requestModel)
971
+
972
+ let resolveForward
973
+ const forwardPromise = new Promise(r => { resolveForward = r })
974
+
975
+ const fakeRes = {
976
+ headersSent: false,
977
+ destroyed: false,
978
+ socket: null,
979
+ writeHead(statusCode, headers) {
980
+ this.headersSent = true
981
+ if (statusCode >= 200 && statusCode < 300) {
982
+ // 📖 Write Anthropic SSE headers
983
+ clientRes.writeHead(200, {
984
+ 'content-type': 'text/event-stream',
985
+ 'cache-control': 'no-cache',
986
+ 'connection': 'keep-alive',
987
+ })
988
+ } else {
989
+ clientRes.writeHead(statusCode, headers)
990
+ }
991
+ },
992
+ write(chunk) { /* SSE data handled via pipe */ },
993
+ end(data) {
994
+ if (data && !this.headersSent) {
995
+ // 📖 Non-streaming error response
996
+ clientRes.end(data)
997
+ }
998
+ resolveForward()
999
+ },
1000
+ on() { return this },
1001
+ once() { return this },
1002
+ emit() { return false },
1003
+ destroy() { this.destroyed = true },
1004
+ removeListener() { return this },
1005
+ }
1006
+
1007
+ // 📖 Actually we need to pipe the upstream SSE through our transformer.
1008
+ // 📖 The simplest approach: use _handleChatCompletionsInternal with stream=true
1009
+ // 📖 and capture the piped response through our transformer.
1010
+
1011
+ // 📖 For streaming, we go lower level — use the retry loop directly
1012
+ await this._handleAnthropicStreamDirect(openaiBody, requestModel, clientRes, transform)
1013
+ }
1014
+
1015
+ /**
1016
+ * 📖 Direct streaming handler for Anthropic messages.
1017
+ * 📖 Runs the retry loop, pipes upstream SSE through the Anthropic transformer.
1018
+ */
1019
+ async _handleAnthropicStreamDirect(openaiBody, requestModel, clientRes, sseTransform) {
1020
+ const { createHash: _createHash } = await import('node:crypto')
1021
+ const fingerprint = _createHash('sha256')
1022
+ .update(JSON.stringify(openaiBody.messages?.slice(-1) ?? []))
1023
+ .digest('hex')
1024
+ .slice(0, 16)
1025
+
1026
+ const requestedModel = typeof openaiBody.model === 'string'
1027
+ ? openaiBody.model.replace(/^fcm-proxy\//, '')
1028
+ : undefined
1029
+
1030
+ if (requestedModel && !this._accountManager.hasAccountsForModel(requestedModel)) {
1031
+ return sendJson(clientRes, 404, {
1032
+ type: 'error',
1033
+ error: { type: 'not_found_error', message: `Model '${requestedModel}' is not available.` },
1034
+ })
1035
+ }
1036
+
1037
+ // 📖 Pipe the transform to client
1038
+ sseTransform.pipe(clientRes)
1039
+
1040
+ for (let attempt = 0; attempt < this._retries; attempt++) {
1041
+ // 📖 Progressive backoff for retries (same as chat completions)
1042
+ const delay = this._retryDelays[Math.min(attempt, this._retryDelays.length - 1)]
1043
+ if (delay > 0) await new Promise(r => setTimeout(r, delay + Math.random() * 100))
1044
+
1045
+ const selectOpts = attempt === 0
1046
+ ? { sessionFingerprint: fingerprint, requestedModel }
1047
+ : { requestedModel }
1048
+ const account = this._accountManager.selectAccount(selectOpts)
1049
+ if (!account) break
1050
+
1051
+ const result = await this._forwardRequestForAnthropicStream(account, openaiBody, sseTransform, clientRes)
1052
+
1053
+ if (result.done) return
1054
+
1055
+ const { statusCode, responseBody, responseHeaders, networkError } = result
1056
+ const classified = classifyError(
1057
+ networkError ? 0 : statusCode,
1058
+ responseBody || '',
1059
+ responseHeaders || {}
1060
+ )
1061
+ this._accountManager.recordFailure(account.id, classified, { providerKey: account.providerKey })
1062
+ if (!classified.shouldRetry) {
1063
+ sseTransform.end()
1064
+ return sendJson(clientRes, statusCode || 500, {
1065
+ type: 'error',
1066
+ error: { type: 'api_error', message: responseBody || 'Upstream error' },
1067
+ })
1068
+ }
1069
+ }
1070
+
1071
+ sseTransform.end()
1072
+ sendJson(clientRes, 503, {
1073
+ type: 'error',
1074
+ error: { type: 'overloaded_error', message: 'All accounts exhausted or unavailable' },
1075
+ })
1076
+ }
1077
+
1078
+ /**
1079
+ * 📖 Forward a streaming request to upstream and pipe SSE through transform.
1080
+ */
1081
+ _forwardRequestForAnthropicStream(account, body, sseTransform, clientRes) {
1082
+ return new Promise(resolve => {
1083
+ const newBody = { ...body, model: account.modelId, stream: true }
1084
+ const bodyStr = JSON.stringify(newBody)
1085
+ const baseUrl = account.url.replace(/\/$/, '')
1086
+ let upstreamUrl
1087
+ try {
1088
+ upstreamUrl = new URL(baseUrl + '/chat/completions')
1089
+ } catch {
1090
+ return resolve({ done: false, statusCode: 0, responseBody: 'Invalid upstream URL', networkError: true })
1091
+ }
1092
+ const client = selectClient(account.url)
1093
+ const startTime = Date.now()
1094
+
1095
+ const requestOptions = {
1096
+ hostname: upstreamUrl.hostname,
1097
+ port: upstreamUrl.port || (upstreamUrl.protocol === 'https:' ? 443 : 80),
1098
+ path: upstreamUrl.pathname + (upstreamUrl.search || ''),
1099
+ method: 'POST',
1100
+ headers: {
1101
+ 'authorization': `Bearer ${account.apiKey}`,
1102
+ 'content-type': 'application/json',
1103
+ 'content-length': Buffer.byteLength(bodyStr),
1104
+ },
1105
+ }
1106
+
1107
+ const upstreamReq = client.request(requestOptions, upstreamRes => {
1108
+ const { statusCode } = upstreamRes
1109
+
1110
+ if (statusCode >= 200 && statusCode < 300) {
1111
+ // 📖 Write Anthropic SSE headers if not already sent
1112
+ if (!clientRes.headersSent) {
1113
+ clientRes.writeHead(200, {
1114
+ 'content-type': 'text/event-stream',
1115
+ 'cache-control': 'no-cache',
1116
+ })
1117
+ }
1118
+
1119
+ // 📖 Error handlers on both sides of the pipe to prevent uncaught errors
1120
+ upstreamRes.on('error', err => { if (!clientRes.destroyed) clientRes.destroy(err) })
1121
+ clientRes.on('error', () => { if (!upstreamRes.destroyed) upstreamRes.destroy() })
1122
+
1123
+ // 📖 Pipe upstream SSE through Anthropic translator
1124
+ upstreamRes.pipe(sseTransform, { end: true })
1125
+
1126
+ upstreamRes.on('end', () => {
1127
+ this._accountManager.recordSuccess(account.id, Date.now() - startTime)
1128
+ })
1129
+
1130
+ clientRes.on('close', () => {
1131
+ if (!upstreamRes.destroyed) upstreamRes.destroy()
1132
+ if (!upstreamReq.destroyed) upstreamReq.destroy()
1133
+ })
1134
+
1135
+ resolve({ done: true })
1136
+ } else {
1137
+ const chunks = []
1138
+ upstreamRes.on('data', chunk => chunks.push(chunk))
1139
+ upstreamRes.on('end', () => {
1140
+ resolve({
1141
+ done: false,
1142
+ statusCode,
1143
+ responseBody: Buffer.concat(chunks).toString(),
1144
+ responseHeaders: upstreamRes.headers,
1145
+ networkError: false,
1146
+ })
1147
+ })
1148
+ }
1149
+ })
1150
+
1151
+ upstreamReq.on('error', err => {
1152
+ resolve({
1153
+ done: false,
1154
+ statusCode: 0,
1155
+ responseBody: err.message,
1156
+ responseHeaders: {},
1157
+ networkError: true,
1158
+ })
1159
+ })
1160
+
1161
+ upstreamReq.setTimeout(this._upstreamTimeoutMs, () => {
1162
+ upstreamReq.destroy(new Error(`Upstream request timed out after ${this._upstreamTimeoutMs}ms`))
1163
+ })
1164
+
1165
+ upstreamReq.write(bodyStr)
1166
+ upstreamReq.end()
1167
+ })
1168
+ }
1169
+
1170
+ /**
1171
+ * 📖 Forward a streaming chat-completions request and translate the upstream
1172
+ * 📖 SSE stream into Responses API events on the fly.
1173
+ */
1174
+ _forwardRequestForResponsesStream(account, body, sseTransform, clientRes) {
1175
+ return new Promise(resolve => {
1176
+ const newBody = { ...body, model: account.modelId, stream: true }
1177
+ const bodyStr = JSON.stringify(newBody)
1178
+ const baseUrl = account.url.replace(/\/$/, '')
1179
+ let upstreamUrl
1180
+ try {
1181
+ upstreamUrl = new URL(baseUrl + '/chat/completions')
1182
+ } catch {
1183
+ return resolve({ done: false, statusCode: 0, responseBody: 'Invalid upstream URL', networkError: true })
1184
+ }
1185
+
1186
+ const client = selectClient(account.url)
1187
+ const startTime = Date.now()
1188
+ const requestOptions = {
1189
+ hostname: upstreamUrl.hostname,
1190
+ port: upstreamUrl.port || (upstreamUrl.protocol === 'https:' ? 443 : 80),
1191
+ path: upstreamUrl.pathname + (upstreamUrl.search || ''),
1192
+ method: 'POST',
1193
+ headers: {
1194
+ 'authorization': `Bearer ${account.apiKey}`,
1195
+ 'content-type': 'application/json',
1196
+ 'content-length': Buffer.byteLength(bodyStr),
1197
+ },
1198
+ }
1199
+
1200
+ const upstreamReq = client.request(requestOptions, upstreamRes => {
1201
+ const { statusCode } = upstreamRes
1202
+
1203
+ if (statusCode >= 200 && statusCode < 300) {
1204
+ if (!clientRes.headersSent) {
1205
+ clientRes.writeHead(200, {
1206
+ 'content-type': 'text/event-stream',
1207
+ 'cache-control': 'no-cache',
1208
+ })
1209
+ }
1210
+
1211
+ upstreamRes.on('error', err => { if (!clientRes.destroyed) clientRes.destroy(err) })
1212
+ clientRes.on('error', () => { if (!upstreamRes.destroyed) upstreamRes.destroy() })
1213
+
1214
+ upstreamRes.pipe(sseTransform, { end: true })
1215
+ upstreamRes.on('end', () => {
1216
+ this._accountManager.recordSuccess(account.id, Date.now() - startTime)
1217
+ })
1218
+
1219
+ clientRes.on('close', () => {
1220
+ if (!upstreamRes.destroyed) upstreamRes.destroy()
1221
+ if (!upstreamReq.destroyed) upstreamReq.destroy()
1222
+ })
1223
+
1224
+ resolve({ done: true })
1225
+ } else {
1226
+ const chunks = []
1227
+ upstreamRes.on('data', chunk => chunks.push(chunk))
1228
+ upstreamRes.on('end', () => {
1229
+ resolve({
1230
+ done: false,
1231
+ statusCode,
1232
+ responseBody: Buffer.concat(chunks).toString(),
1233
+ responseHeaders: upstreamRes.headers,
1234
+ networkError: false,
1235
+ })
1236
+ })
1237
+ }
1238
+ })
1239
+
1240
+ upstreamReq.on('error', err => {
1241
+ resolve({
1242
+ done: false,
1243
+ statusCode: 0,
1244
+ responseBody: err.message,
1245
+ responseHeaders: {},
1246
+ networkError: true,
1247
+ })
1248
+ })
1249
+
1250
+ upstreamReq.setTimeout(this._upstreamTimeoutMs, () => {
1251
+ upstreamReq.destroy(new Error(`Upstream request timed out after ${this._upstreamTimeoutMs}ms`))
1252
+ })
1253
+
1254
+ upstreamReq.write(bodyStr)
1255
+ upstreamReq.end()
1256
+ })
1257
+ }
1258
+
1259
+ /**
1260
+ * 📖 Internal version of chat completions handler that takes a pre-parsed body.
1261
+ * 📖 Used by the Anthropic JSON translation path to avoid re-parsing.
1262
+ */
1263
+ async _handleChatCompletionsInternal(body, clientRes) {
1264
+ // 📖 Reuse the exact same logic as _handleChatCompletions but with pre-parsed body
1265
+ if (this._compressionOpts && Array.isArray(body.messages)) {
1266
+ body = { ...body, messages: compressContext(body.messages, this._compressionOpts) }
1267
+ }
1268
+ if (this._thinkingConfig) {
1269
+ body = applyThinkingBudget(body, this._thinkingConfig)
1270
+ }
1271
+
1272
+ const fingerprint = createHash('sha256')
1273
+ .update(JSON.stringify(body.messages?.slice(-1) ?? []))
1274
+ .digest('hex')
1275
+ .slice(0, 16)
1276
+
1277
+ const requestedModel = typeof body.model === 'string'
1278
+ ? body.model.replace(/^fcm-proxy\//, '')
1279
+ : undefined
1280
+
1281
+ if (requestedModel && !this._accountManager.hasAccountsForModel(requestedModel)) {
1282
+ return sendJson(clientRes, 404, {
1283
+ error: 'Model not found',
1284
+ message: `Model '${requestedModel}' is not available.`,
1285
+ })
1286
+ }
1287
+
1288
+ for (let attempt = 0; attempt < this._retries; attempt++) {
1289
+ const delay = this._retryDelays[Math.min(attempt, this._retryDelays.length - 1)]
1290
+ if (delay > 0) await new Promise(r => setTimeout(r, delay + Math.random() * 100))
1291
+
1292
+ const selectOpts = attempt === 0
1293
+ ? { sessionFingerprint: fingerprint, requestedModel }
1294
+ : { requestedModel }
1295
+ const account = this._accountManager.selectAccount(selectOpts)
1296
+ if (!account) break
1297
+
1298
+ const result = await this._forwardRequest(account, body, clientRes, { requestedModel })
1299
+ if (result.done) return
1300
+
1301
+ const { statusCode, responseBody, responseHeaders, networkError } = result
1302
+ const classified = classifyError(
1303
+ networkError ? 0 : statusCode,
1304
+ responseBody || '',
1305
+ responseHeaders || {}
1306
+ )
1307
+ this._accountManager.recordFailure(account.id, classified, { providerKey: account.providerKey })
1308
+ if (!classified.shouldRetry) {
1309
+ return sendJson(clientRes, statusCode || 500, responseBody || JSON.stringify({ error: 'Upstream error' }))
1310
+ }
1311
+ }
1312
+
1313
+ sendJson(clientRes, 503, { error: 'All accounts exhausted or unavailable' })
1314
+ }
1315
+
1316
+ // ── Hot-reload accounts ─────────────────────────────────────────────────────
1317
+
1318
+ /**
1319
+ * 📖 Atomically swap the account list and rebuild the AccountManager.
1320
+ * 📖 Used by the daemon when config changes (new API keys, providers toggled).
1321
+ * 📖 In-flight requests on old accounts will finish naturally.
1322
+ *
1323
+ * @param {Array} accounts — new account list
1324
+ */
1325
+ updateAccounts(accounts) {
1326
+ this._accounts = accounts
1327
+ this._accountManager = new AccountManager(accounts, {})
1328
+ }
593
1329
  }