bingocode 1.1.124 → 1.1.126

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bingocode",
3
- "version": "1.1.124",
3
+ "version": "1.1.126",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "claude": "bin/claude-win.cjs",
@@ -173,8 +173,14 @@ export function supportsExtendedKeys(): boolean {
173
173
  * SetConsoleCursorPosition follows the cursor into scrollback
174
174
  * (microsoft/terminal#14774), yanking users to the top of their buffer
175
175
  * mid-stream. WT_SESSION catches WSL-in-Windows-Terminal where platform
176
- * is linux but output still routes through conhost. */
176
+ * is linux but output still routes through conhost.
177
+ *
178
+ * xterm.js (VS Code, Cursor, Windsurf integrated terminals) does NOT have
179
+ * this bug — cursor-up (CSI A) clamps at viewport top without yanking into
180
+ * scrollback. Even when WT_SESSION is set (e.g. VS Code running inside
181
+ * Windows Terminal), the actual pty renderer is xterm.js, so we exempt it. */
177
182
  export function hasCursorUpViewportYankBug(): boolean {
183
+ if (isXtermJs()) return false
178
184
  return process.platform === 'win32' || !!process.env.WT_SESSION
179
185
  }
180
186
 
@@ -28,6 +28,9 @@ import { homedir } from 'node:os'
28
28
 
29
29
  const providerService = new ProviderService()
30
30
 
31
+ // Stream timeout: configurable via BINGO_STREAM_TIMEOUT_MS, default 300s
32
+ const STREAM_TIMEOUT_MS = parseInt(process.env.BINGO_STREAM_TIMEOUT_MS ?? '300000', 10) || 300_000
33
+
31
34
  async function logToFile(message: string) {
32
35
  // Disabled log output for production
33
36
  }
@@ -108,6 +111,7 @@ export async function handleProxyRequest(req: Request, url: URL): Promise<Respon
108
111
  }
109
112
 
110
113
  const isStream = body.stream === true
114
+ const betaHeader = req.headers.get('anthropic-beta')
111
115
 
112
116
  // --- Slot-based routing ---
113
117
  const slot = await identifySlot(body.model ?? '')
@@ -123,7 +127,7 @@ export async function handleProxyRequest(req: Request, url: URL): Promise<Respon
123
127
 
124
128
  try {
125
129
  if (slotConfig.apiFormat === 'anthropic') {
126
- return await handleAnthropicPassthrough(proxiedBody, baseUrl, slotConfig.apiKey, isStream, uiLabel)
130
+ return await handleAnthropicPassthrough(proxiedBody, baseUrl, slotConfig.apiKey, isStream, uiLabel, betaHeader)
127
131
  } else if (slotConfig.apiFormat === 'openai_chat') {
128
132
  return await handleOpenaiChat(proxiedBody, baseUrl, slotConfig.apiKey, isStream, uiLabel)
129
133
  } else {
@@ -170,17 +174,21 @@ async function handleAnthropicPassthrough(
170
174
  apiKey: string,
171
175
  isStream: boolean,
172
176
  uiLabel: string | null = null,
177
+ betaHeader: string | null = null,
173
178
  ): Promise<Response> {
174
179
  const url = `${baseUrl}/v1/messages`
180
+ const upstreamHeaders: Record<string, string> = {
181
+ 'Content-Type': 'application/json',
182
+ 'x-api-key': apiKey,
183
+ 'anthropic-version': '2023-06-01',
184
+ }
185
+ if (betaHeader) upstreamHeaders['anthropic-beta'] = betaHeader
186
+
175
187
  const upstream = await fetch(url, {
176
188
  method: 'POST',
177
- headers: {
178
- 'Content-Type': 'application/json',
179
- 'x-api-key': apiKey,
180
- 'anthropic-version': '2023-06-01',
181
- },
189
+ headers: upstreamHeaders,
182
190
  body: JSON.stringify(body),
183
- signal: isStream ? AbortSignal.timeout(30_000) : AbortSignal.timeout(300_000),
191
+ signal: isStream ? AbortSignal.timeout(STREAM_TIMEOUT_MS) : AbortSignal.timeout(300_000),
184
192
  })
185
193
 
186
194
  if (!upstream.ok) {
@@ -196,7 +204,9 @@ async function handleAnthropicPassthrough(
196
204
  return new Response(upstream.body, {
197
205
  status: 200,
198
206
  headers: {
199
- 'Content-Type': 'application/json',
207
+ 'Content-Type': 'text/event-stream',
208
+ 'Cache-Control': 'no-cache',
209
+ Connection: 'keep-alive',
200
210
  },
201
211
  })
202
212
  }
@@ -222,7 +232,7 @@ async function handleOpenaiChat(
222
232
  method: 'POST',
223
233
  headers: buildUpstreamHeaders(apiKey),
224
234
  body: JSON.stringify(transformed),
225
- signal: isStream ? AbortSignal.timeout(30_000) : AbortSignal.timeout(300_000),
235
+ signal: isStream ? AbortSignal.timeout(STREAM_TIMEOUT_MS) : AbortSignal.timeout(300_000),
226
236
  })
227
237
 
228
238
  if (!upstream.ok) {
@@ -273,7 +283,7 @@ async function handleOpenaiResponses(
273
283
  method: 'POST',
274
284
  headers: buildUpstreamHeaders(apiKey),
275
285
  body: JSON.stringify(transformed),
276
- signal: isStream ? AbortSignal.timeout(30_000) : AbortSignal.timeout(300_000),
286
+ signal: isStream ? AbortSignal.timeout(STREAM_TIMEOUT_MS) : AbortSignal.timeout(300_000),
277
287
  })
278
288
 
279
289
  if (!upstream.ok) {
@@ -59,6 +59,9 @@ type StreamState = {
59
59
  // Holding pattern: hold message_delta until usage arrives
60
60
  // (some providers send finish_reason and usage in separate chunks)
61
61
  heldMessageDelta: SseEvent | null
62
+
63
+ // Accumulated input token count from upstream usage chunks
64
+ inputTokens: number
62
65
  }
63
66
 
64
67
  // ─── Helpers ───────────────────────────────────────────────
@@ -81,6 +84,7 @@ function createState(model: string): StreamState {
81
84
  messageDeltaSent: false,
82
85
  messageStopSent: false,
83
86
  heldMessageDelta: null,
87
+ inputTokens: 0,
84
88
  }
85
89
  }
86
90
 
@@ -138,6 +142,14 @@ export function openaiChatStreamToAnthropic(
138
142
  }
139
143
  } catch (err) {
140
144
  errored = true
145
+ // Emit Anthropic-format error event before closing the stream
146
+ const errMsg = err instanceof Error ? err.message : String(err)
147
+ try {
148
+ controller.enqueue(encoder.encode(formatSse('error', {
149
+ type: 'error',
150
+ error: { type: 'api_error', message: `[Bingo Proxy] Stream error: ${errMsg}` },
151
+ })))
152
+ } catch { /* controller may already be closed */ }
141
153
  controller.error(err)
142
154
  } finally {
143
155
  if (!errored) {
@@ -464,9 +476,13 @@ function handleFinishReason(
464
476
  closeAllOpenBlocks(state)
465
477
 
466
478
  const stopReason = mapFinishReason(finishReason)
479
+
480
+ // Capture input_tokens if available in this chunk
481
+ if (chunk.usage?.prompt_tokens != null) state.inputTokens = chunk.usage.prompt_tokens
482
+
467
483
  const usage = chunk.usage
468
- ? { output_tokens: chunk.usage.completion_tokens || 0 }
469
- : { output_tokens: 0 }
484
+ ? { input_tokens: chunk.usage.prompt_tokens || 0, output_tokens: chunk.usage.completion_tokens || 0 }
485
+ : { input_tokens: state.inputTokens, output_tokens: 0 }
470
486
 
471
487
  const messageDelta: SseEvent = {
472
488
  event: 'message_delta',
@@ -493,8 +509,9 @@ function mergeUsageIntoHeldDelta(
493
509
  ): void {
494
510
  if (!state.heldMessageDelta) return
495
511
 
512
+ if (usage.prompt_tokens != null) state.inputTokens = usage.prompt_tokens
496
513
  const data = state.heldMessageDelta.data as Record<string, unknown>
497
- data.usage = { output_tokens: usage.completion_tokens || 0 }
514
+ data.usage = { input_tokens: state.inputTokens, output_tokens: usage.completion_tokens || 0 }
498
515
  state.messageDeltaSent = true
499
516
  state.queue.push(state.heldMessageDelta)
500
517
  state.heldMessageDelta = null
@@ -90,6 +90,14 @@ export function openaiResponsesStreamToAnthropic(
90
90
  }
91
91
  }
92
92
  } catch (err) {
93
+ // Emit Anthropic-format error event before closing the stream
94
+ const errMsg = err instanceof Error ? err.message : String(err)
95
+ try {
96
+ controller.enqueue(encoder.encode(formatSse('error', {
97
+ type: 'error',
98
+ error: { type: 'api_error', message: `[Bingo Proxy] Stream error: ${errMsg}` },
99
+ })))
100
+ } catch { /* controller may already be closed */ }
93
101
  controller.error(err)
94
102
  return // don't call close() after error()
95
103
  }
@@ -265,7 +273,7 @@ function processEvent(
265
273
  controller.enqueue(encoder.encode(formatSse('message_delta', {
266
274
  type: 'message_delta',
267
275
  delta: { stop_reason: stopReason, stop_sequence: null },
268
- usage: { output_tokens: usage?.output_tokens ?? 0 },
276
+ usage: { input_tokens: usage?.input_tokens ?? 0, output_tokens: usage?.output_tokens ?? 0 },
269
277
  })))
270
278
  if (!state.messageStopped) {
271
279
  state.messageStopped = true
@@ -43,14 +43,13 @@ export function anthropicToOpenaiChat(body: AnthropicRequest): OpenAIChatRequest
43
43
  stream: body.stream,
44
44
  }
45
45
 
46
- // max_tokens — limit for DeepSeek to avoid invalid parameter errors
46
+ // max_tokens — cap to avoid upstream 400 errors from Claude's high defaults (e.g. 64k).
47
+ // DeepSeek: tools/thinking fail above 8192. Other providers: 32768 covers most upstreams.
47
48
  if (body.max_tokens !== undefined) {
48
49
  if (body.model.toLowerCase().includes('deepseek')) {
49
- // DeepSeek R1 often fails if max_tokens is set to Claude's high defaults (like 128k)
50
- // Especially when tools or thinking are involved. 8192 is a safe upper limit for most.
51
50
  result.max_tokens = Math.min(body.max_tokens, 8192)
52
51
  } else {
53
- result.max_tokens = body.max_tokens
52
+ result.max_tokens = Math.min(body.max_tokens, 32768)
54
53
  }
55
54
  }
56
55
 
@@ -89,7 +88,7 @@ export function anthropicToOpenaiChat(body: AnthropicRequest): OpenAIChatRequest
89
88
  }
90
89
 
91
90
  // thinking → reasoning_effort
92
- if (body.thinking && !body.model.toLowerCase().includes('deepseek')) {
91
+ if (body.thinking) {
93
92
  const budget = body.thinking.budget_tokens
94
93
  if (budget !== undefined) {
95
94
  if (budget <= 1024) result.reasoning_effort = 'low'
@@ -106,6 +106,26 @@ function convertMessageToInputItems(msg: AnthropicMessage, output: OpenAIRespons
106
106
  // Collect text/image parts and handle tool blocks separately
107
107
  const contentParts: (string | OpenAIChatContentPart)[] = []
108
108
 
109
+ /** Flush accumulated contentParts as a message input item. Preserves image objects. */
110
+ function flushContentParts(): void {
111
+ if (contentParts.length === 0) return
112
+ let flushed: string | OpenAIChatContentPart[]
113
+ const hasRich = contentParts.some((p) => typeof p !== 'string')
114
+ if (hasRich) {
115
+ // Mixed text + images: emit as content-part array
116
+ flushed = contentParts.map((p) =>
117
+ typeof p === 'string' ? { type: 'text' as const, text: p } : p,
118
+ )
119
+ } else {
120
+ // Pure text: emit as plain string
121
+ flushed = (contentParts as string[]).join('')
122
+ }
123
+ contentParts.length = 0
124
+ if (flushed && (typeof flushed === 'string' ? flushed : flushed.length > 0)) {
125
+ output.push({ type: 'message', role: msg.role, content: flushed })
126
+ }
127
+ }
128
+
109
129
  for (const block of content) {
110
130
  if (block.type === 'text') {
111
131
  contentParts.push(block.text)
@@ -116,15 +136,7 @@ function convertMessageToInputItems(msg: AnthropicMessage, output: OpenAIRespons
116
136
  })
117
137
  } else if (block.type === 'tool_use') {
118
138
  // Flush any accumulated content first
119
- if (contentParts.length > 0) {
120
- const flatContent = contentParts.length === 1 && typeof contentParts[0] === 'string'
121
- ? contentParts[0]
122
- : contentParts.map((p) => typeof p === 'string' ? p : '').join('')
123
- if (flatContent) {
124
- output.push({ type: 'message', role: msg.role, content: flatContent })
125
- }
126
- contentParts.length = 0
127
- }
139
+ flushContentParts()
128
140
  // Lift to function_call item
129
141
  output.push({
130
142
  type: 'function_call',
@@ -152,14 +164,7 @@ function convertMessageToInputItems(msg: AnthropicMessage, output: OpenAIRespons
152
164
  }
153
165
 
154
166
  // Flush remaining content
155
- if (contentParts.length > 0) {
156
- const flatContent = contentParts.length === 1 && typeof contentParts[0] === 'string'
157
- ? contentParts[0]
158
- : contentParts.map((p) => typeof p === 'string' ? p : '').join('')
159
- if (flatContent) {
160
- output.push({ type: 'message', role: msg.role, content: flatContent })
161
- }
162
- }
167
+ flushContentParts()
163
168
  }
164
169
 
165
170
  function convertToolChoice(choice: unknown): unknown {