bingocode 1.1.124 → 1.1.125
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/server/proxy/handler.ts +20 -10
- package/src/server/proxy/streaming/openaiChatStreamToAnthropic.ts +20 -3
- package/src/server/proxy/streaming/openaiResponsesStreamToAnthropic.ts +9 -1
- package/src/server/proxy/transform/anthropicToOpenaiChat.ts +3 -4
- package/src/server/proxy/transform/anthropicToOpenaiResponses.ts +22 -17
package/package.json
CHANGED
|
@@ -28,6 +28,9 @@ import { homedir } from 'node:os'
|
|
|
28
28
|
|
|
29
29
|
const providerService = new ProviderService()
|
|
30
30
|
|
|
31
|
+
// Stream timeout: configurable via BINGO_STREAM_TIMEOUT_MS, default 300s
|
|
32
|
+
const STREAM_TIMEOUT_MS = parseInt(process.env.BINGO_STREAM_TIMEOUT_MS ?? '300000', 10) || 300_000
|
|
33
|
+
|
|
31
34
|
async function logToFile(message: string) {
|
|
32
35
|
// Disabled log output for production
|
|
33
36
|
}
|
|
@@ -108,6 +111,7 @@ export async function handleProxyRequest(req: Request, url: URL): Promise<Respon
|
|
|
108
111
|
}
|
|
109
112
|
|
|
110
113
|
const isStream = body.stream === true
|
|
114
|
+
const betaHeader = req.headers.get('anthropic-beta')
|
|
111
115
|
|
|
112
116
|
// --- Slot-based routing ---
|
|
113
117
|
const slot = await identifySlot(body.model ?? '')
|
|
@@ -123,7 +127,7 @@ export async function handleProxyRequest(req: Request, url: URL): Promise<Respon
|
|
|
123
127
|
|
|
124
128
|
try {
|
|
125
129
|
if (slotConfig.apiFormat === 'anthropic') {
|
|
126
|
-
return await handleAnthropicPassthrough(proxiedBody, baseUrl, slotConfig.apiKey, isStream, uiLabel)
|
|
130
|
+
return await handleAnthropicPassthrough(proxiedBody, baseUrl, slotConfig.apiKey, isStream, uiLabel, betaHeader)
|
|
127
131
|
} else if (slotConfig.apiFormat === 'openai_chat') {
|
|
128
132
|
return await handleOpenaiChat(proxiedBody, baseUrl, slotConfig.apiKey, isStream, uiLabel)
|
|
129
133
|
} else {
|
|
@@ -170,17 +174,21 @@ async function handleAnthropicPassthrough(
|
|
|
170
174
|
apiKey: string,
|
|
171
175
|
isStream: boolean,
|
|
172
176
|
uiLabel: string | null = null,
|
|
177
|
+
betaHeader: string | null = null,
|
|
173
178
|
): Promise<Response> {
|
|
174
179
|
const url = `${baseUrl}/v1/messages`
|
|
180
|
+
const upstreamHeaders: Record<string, string> = {
|
|
181
|
+
'Content-Type': 'application/json',
|
|
182
|
+
'x-api-key': apiKey,
|
|
183
|
+
'anthropic-version': '2023-06-01',
|
|
184
|
+
}
|
|
185
|
+
if (betaHeader) upstreamHeaders['anthropic-beta'] = betaHeader
|
|
186
|
+
|
|
175
187
|
const upstream = await fetch(url, {
|
|
176
188
|
method: 'POST',
|
|
177
|
-
headers:
|
|
178
|
-
'Content-Type': 'application/json',
|
|
179
|
-
'x-api-key': apiKey,
|
|
180
|
-
'anthropic-version': '2023-06-01',
|
|
181
|
-
},
|
|
189
|
+
headers: upstreamHeaders,
|
|
182
190
|
body: JSON.stringify(body),
|
|
183
|
-
signal: isStream ? AbortSignal.timeout(
|
|
191
|
+
signal: isStream ? AbortSignal.timeout(STREAM_TIMEOUT_MS) : AbortSignal.timeout(300_000),
|
|
184
192
|
})
|
|
185
193
|
|
|
186
194
|
if (!upstream.ok) {
|
|
@@ -196,7 +204,9 @@ async function handleAnthropicPassthrough(
|
|
|
196
204
|
return new Response(upstream.body, {
|
|
197
205
|
status: 200,
|
|
198
206
|
headers: {
|
|
199
|
-
'Content-Type': '
|
|
207
|
+
'Content-Type': 'text/event-stream',
|
|
208
|
+
'Cache-Control': 'no-cache',
|
|
209
|
+
Connection: 'keep-alive',
|
|
200
210
|
},
|
|
201
211
|
})
|
|
202
212
|
}
|
|
@@ -222,7 +232,7 @@ async function handleOpenaiChat(
|
|
|
222
232
|
method: 'POST',
|
|
223
233
|
headers: buildUpstreamHeaders(apiKey),
|
|
224
234
|
body: JSON.stringify(transformed),
|
|
225
|
-
signal: isStream ? AbortSignal.timeout(
|
|
235
|
+
signal: isStream ? AbortSignal.timeout(STREAM_TIMEOUT_MS) : AbortSignal.timeout(300_000),
|
|
226
236
|
})
|
|
227
237
|
|
|
228
238
|
if (!upstream.ok) {
|
|
@@ -273,7 +283,7 @@ async function handleOpenaiResponses(
|
|
|
273
283
|
method: 'POST',
|
|
274
284
|
headers: buildUpstreamHeaders(apiKey),
|
|
275
285
|
body: JSON.stringify(transformed),
|
|
276
|
-
signal: isStream ? AbortSignal.timeout(
|
|
286
|
+
signal: isStream ? AbortSignal.timeout(STREAM_TIMEOUT_MS) : AbortSignal.timeout(300_000),
|
|
277
287
|
})
|
|
278
288
|
|
|
279
289
|
if (!upstream.ok) {
|
|
@@ -59,6 +59,9 @@ type StreamState = {
|
|
|
59
59
|
// Holding pattern: hold message_delta until usage arrives
|
|
60
60
|
// (some providers send finish_reason and usage in separate chunks)
|
|
61
61
|
heldMessageDelta: SseEvent | null
|
|
62
|
+
|
|
63
|
+
// Accumulated input token count from upstream usage chunks
|
|
64
|
+
inputTokens: number
|
|
62
65
|
}
|
|
63
66
|
|
|
64
67
|
// ─── Helpers ───────────────────────────────────────────────
|
|
@@ -81,6 +84,7 @@ function createState(model: string): StreamState {
|
|
|
81
84
|
messageDeltaSent: false,
|
|
82
85
|
messageStopSent: false,
|
|
83
86
|
heldMessageDelta: null,
|
|
87
|
+
inputTokens: 0,
|
|
84
88
|
}
|
|
85
89
|
}
|
|
86
90
|
|
|
@@ -138,6 +142,14 @@ export function openaiChatStreamToAnthropic(
|
|
|
138
142
|
}
|
|
139
143
|
} catch (err) {
|
|
140
144
|
errored = true
|
|
145
|
+
// Emit Anthropic-format error event before closing the stream
|
|
146
|
+
const errMsg = err instanceof Error ? err.message : String(err)
|
|
147
|
+
try {
|
|
148
|
+
controller.enqueue(encoder.encode(formatSse('error', {
|
|
149
|
+
type: 'error',
|
|
150
|
+
error: { type: 'api_error', message: `[Bingo Proxy] Stream error: ${errMsg}` },
|
|
151
|
+
})))
|
|
152
|
+
} catch { /* controller may already be closed */ }
|
|
141
153
|
controller.error(err)
|
|
142
154
|
} finally {
|
|
143
155
|
if (!errored) {
|
|
@@ -464,9 +476,13 @@ function handleFinishReason(
|
|
|
464
476
|
closeAllOpenBlocks(state)
|
|
465
477
|
|
|
466
478
|
const stopReason = mapFinishReason(finishReason)
|
|
479
|
+
|
|
480
|
+
// Capture input_tokens if available in this chunk
|
|
481
|
+
if (chunk.usage?.prompt_tokens) state.inputTokens = chunk.usage.prompt_tokens
|
|
482
|
+
|
|
467
483
|
const usage = chunk.usage
|
|
468
|
-
? { output_tokens: chunk.usage.completion_tokens || 0 }
|
|
469
|
-
: { output_tokens: 0 }
|
|
484
|
+
? { input_tokens: chunk.usage.prompt_tokens || 0, output_tokens: chunk.usage.completion_tokens || 0 }
|
|
485
|
+
: { input_tokens: state.inputTokens, output_tokens: 0 }
|
|
470
486
|
|
|
471
487
|
const messageDelta: SseEvent = {
|
|
472
488
|
event: 'message_delta',
|
|
@@ -493,8 +509,9 @@ function mergeUsageIntoHeldDelta(
|
|
|
493
509
|
): void {
|
|
494
510
|
if (!state.heldMessageDelta) return
|
|
495
511
|
|
|
512
|
+
if (usage.prompt_tokens) state.inputTokens = usage.prompt_tokens
|
|
496
513
|
const data = state.heldMessageDelta.data as Record<string, unknown>
|
|
497
|
-
data.usage = { output_tokens: usage.completion_tokens || 0 }
|
|
514
|
+
data.usage = { input_tokens: state.inputTokens, output_tokens: usage.completion_tokens || 0 }
|
|
498
515
|
state.messageDeltaSent = true
|
|
499
516
|
state.queue.push(state.heldMessageDelta)
|
|
500
517
|
state.heldMessageDelta = null
|
|
@@ -90,6 +90,14 @@ export function openaiResponsesStreamToAnthropic(
|
|
|
90
90
|
}
|
|
91
91
|
}
|
|
92
92
|
} catch (err) {
|
|
93
|
+
// Emit Anthropic-format error event before closing the stream
|
|
94
|
+
const errMsg = err instanceof Error ? err.message : String(err)
|
|
95
|
+
try {
|
|
96
|
+
controller.enqueue(encoder.encode(formatSse('error', {
|
|
97
|
+
type: 'error',
|
|
98
|
+
error: { type: 'api_error', message: `[Bingo Proxy] Stream error: ${errMsg}` },
|
|
99
|
+
})))
|
|
100
|
+
} catch { /* controller may already be closed */ }
|
|
93
101
|
controller.error(err)
|
|
94
102
|
return // don't call close() after error()
|
|
95
103
|
}
|
|
@@ -265,7 +273,7 @@ function processEvent(
|
|
|
265
273
|
controller.enqueue(encoder.encode(formatSse('message_delta', {
|
|
266
274
|
type: 'message_delta',
|
|
267
275
|
delta: { stop_reason: stopReason, stop_sequence: null },
|
|
268
|
-
usage: { output_tokens: usage?.output_tokens ?? 0 },
|
|
276
|
+
usage: { input_tokens: usage?.input_tokens ?? 0, output_tokens: usage?.output_tokens ?? 0 },
|
|
269
277
|
})))
|
|
270
278
|
if (!state.messageStopped) {
|
|
271
279
|
state.messageStopped = true
|
|
@@ -43,14 +43,13 @@ export function anthropicToOpenaiChat(body: AnthropicRequest): OpenAIChatRequest
|
|
|
43
43
|
stream: body.stream,
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
-
// max_tokens —
|
|
46
|
+
// max_tokens — cap to avoid upstream 400 errors from Claude's high defaults (e.g. 64k).
|
|
47
|
+
// DeepSeek: tools/thinking fail above 8192. Other providers: 32768 covers most upstreams.
|
|
47
48
|
if (body.max_tokens !== undefined) {
|
|
48
49
|
if (body.model.toLowerCase().includes('deepseek')) {
|
|
49
|
-
// DeepSeek R1 often fails if max_tokens is set to Claude's high defaults (like 128k)
|
|
50
|
-
// Especially when tools or thinking are involved. 8192 is a safe upper limit for most.
|
|
51
50
|
result.max_tokens = Math.min(body.max_tokens, 8192)
|
|
52
51
|
} else {
|
|
53
|
-
result.max_tokens = body.max_tokens
|
|
52
|
+
result.max_tokens = Math.min(body.max_tokens, 32768)
|
|
54
53
|
}
|
|
55
54
|
}
|
|
56
55
|
|
|
@@ -106,6 +106,26 @@ function convertMessageToInputItems(msg: AnthropicMessage, output: OpenAIRespons
|
|
|
106
106
|
// Collect text/image parts and handle tool blocks separately
|
|
107
107
|
const contentParts: (string | OpenAIChatContentPart)[] = []
|
|
108
108
|
|
|
109
|
+
/** Flush accumulated contentParts as a message input item. Preserves image objects. */
|
|
110
|
+
function flushContentParts(): void {
|
|
111
|
+
if (contentParts.length === 0) return
|
|
112
|
+
let flushed: string | OpenAIChatContentPart[]
|
|
113
|
+
const hasRich = contentParts.some((p) => typeof p !== 'string')
|
|
114
|
+
if (hasRich) {
|
|
115
|
+
// Mixed text + images: emit as content-part array
|
|
116
|
+
flushed = contentParts.map((p) =>
|
|
117
|
+
typeof p === 'string' ? { type: 'text' as const, text: p } : p,
|
|
118
|
+
)
|
|
119
|
+
} else {
|
|
120
|
+
// Pure text: emit as plain string
|
|
121
|
+
flushed = (contentParts as string[]).join('')
|
|
122
|
+
}
|
|
123
|
+
contentParts.length = 0
|
|
124
|
+
if (flushed && (typeof flushed === 'string' ? flushed : flushed.length > 0)) {
|
|
125
|
+
output.push({ type: 'message', role: msg.role, content: flushed })
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
109
129
|
for (const block of content) {
|
|
110
130
|
if (block.type === 'text') {
|
|
111
131
|
contentParts.push(block.text)
|
|
@@ -116,15 +136,7 @@ function convertMessageToInputItems(msg: AnthropicMessage, output: OpenAIRespons
|
|
|
116
136
|
})
|
|
117
137
|
} else if (block.type === 'tool_use') {
|
|
118
138
|
// Flush any accumulated content first
|
|
119
|
-
|
|
120
|
-
const flatContent = contentParts.length === 1 && typeof contentParts[0] === 'string'
|
|
121
|
-
? contentParts[0]
|
|
122
|
-
: contentParts.map((p) => typeof p === 'string' ? p : '').join('')
|
|
123
|
-
if (flatContent) {
|
|
124
|
-
output.push({ type: 'message', role: msg.role, content: flatContent })
|
|
125
|
-
}
|
|
126
|
-
contentParts.length = 0
|
|
127
|
-
}
|
|
139
|
+
flushContentParts()
|
|
128
140
|
// Lift to function_call item
|
|
129
141
|
output.push({
|
|
130
142
|
type: 'function_call',
|
|
@@ -152,14 +164,7 @@ function convertMessageToInputItems(msg: AnthropicMessage, output: OpenAIRespons
|
|
|
152
164
|
}
|
|
153
165
|
|
|
154
166
|
// Flush remaining content
|
|
155
|
-
|
|
156
|
-
const flatContent = contentParts.length === 1 && typeof contentParts[0] === 'string'
|
|
157
|
-
? contentParts[0]
|
|
158
|
-
: contentParts.map((p) => typeof p === 'string' ? p : '').join('')
|
|
159
|
-
if (flatContent) {
|
|
160
|
-
output.push({ type: 'message', role: msg.role, content: flatContent })
|
|
161
|
-
}
|
|
162
|
-
}
|
|
167
|
+
flushContentParts()
|
|
163
168
|
}
|
|
164
169
|
|
|
165
170
|
function convertToolChoice(choice: unknown): unknown {
|