npm - bingocode - Versions diffs - 1.1.124 → 1.1.125 - Mend

bingocode 1.1.124 → 1.1.125

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bingocode",
-  "version": "1.1.124",
+  "version": "1.1.125",
   "type": "module",
   "bin": {
     "claude": "bin/claude-win.cjs",

package/src/server/proxy/handler.ts CHANGED Viewed

@@ -28,6 +28,9 @@ import { homedir } from 'node:os'
 const providerService = new ProviderService()
+// Stream timeout: configurable via BINGO_STREAM_TIMEOUT_MS, default 300s
+const STREAM_TIMEOUT_MS = parseInt(process.env.BINGO_STREAM_TIMEOUT_MS ?? '300000', 10) || 300_000
 async function logToFile(message: string) {
   // Disabled log output for production
 }
@@ -108,6 +111,7 @@ export async function handleProxyRequest(req: Request, url: URL): Promise<Respon
   }
   const isStream = body.stream === true
+  const betaHeader = req.headers.get('anthropic-beta')
   // --- Slot-based routing ---
   const slot = await identifySlot(body.model ?? '')
@@ -123,7 +127,7 @@ export async function handleProxyRequest(req: Request, url: URL): Promise<Respon
     try {
       if (slotConfig.apiFormat === 'anthropic') {
-        return await handleAnthropicPassthrough(proxiedBody, baseUrl, slotConfig.apiKey, isStream, uiLabel)
+        return await handleAnthropicPassthrough(proxiedBody, baseUrl, slotConfig.apiKey, isStream, uiLabel, betaHeader)
       } else if (slotConfig.apiFormat === 'openai_chat') {
         return await handleOpenaiChat(proxiedBody, baseUrl, slotConfig.apiKey, isStream, uiLabel)
       } else {
@@ -170,17 +174,21 @@ async function handleAnthropicPassthrough(
   apiKey: string,
   isStream: boolean,
   uiLabel: string | null = null,
+  betaHeader: string | null = null,
 ): Promise<Response> {
   const url = `${baseUrl}/v1/messages`
+  const upstreamHeaders: Record<string, string> = {
+    'Content-Type': 'application/json',
+    'x-api-key': apiKey,
+    'anthropic-version': '2023-06-01',
+  }
+  if (betaHeader) upstreamHeaders['anthropic-beta'] = betaHeader
   const upstream = await fetch(url, {
     method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-      'x-api-key': apiKey,
-      'anthropic-version': '2023-06-01',
-    },
+    headers: upstreamHeaders,
     body: JSON.stringify(body),
-    signal: isStream ? AbortSignal.timeout(30_000) : AbortSignal.timeout(300_000),
+    signal: isStream ? AbortSignal.timeout(STREAM_TIMEOUT_MS) : AbortSignal.timeout(300_000),
   })
   if (!upstream.ok) {
@@ -196,7 +204,9 @@ async function handleAnthropicPassthrough(
     return new Response(upstream.body, {
       status: 200,
       headers: {
-        'Content-Type': 'application/json',
+        'Content-Type': 'text/event-stream',
+        'Cache-Control': 'no-cache',
+        Connection: 'keep-alive',
       },
     })
   }
@@ -222,7 +232,7 @@ async function handleOpenaiChat(
     method: 'POST',
     headers: buildUpstreamHeaders(apiKey),
     body: JSON.stringify(transformed),
-    signal: isStream ? AbortSignal.timeout(30_000) : AbortSignal.timeout(300_000),
+    signal: isStream ? AbortSignal.timeout(STREAM_TIMEOUT_MS) : AbortSignal.timeout(300_000),
   })
   if (!upstream.ok) {
@@ -273,7 +283,7 @@ async function handleOpenaiResponses(
     method: 'POST',
     headers: buildUpstreamHeaders(apiKey),
     body: JSON.stringify(transformed),
-    signal: isStream ? AbortSignal.timeout(30_000) : AbortSignal.timeout(300_000),
+    signal: isStream ? AbortSignal.timeout(STREAM_TIMEOUT_MS) : AbortSignal.timeout(300_000),
   })
   if (!upstream.ok) {

package/src/server/proxy/streaming/openaiChatStreamToAnthropic.ts CHANGED Viewed

@@ -59,6 +59,9 @@ type StreamState = {
   // Holding pattern: hold message_delta until usage arrives
   // (some providers send finish_reason and usage in separate chunks)
   heldMessageDelta: SseEvent | null
+  // Accumulated input token count from upstream usage chunks
+  inputTokens: number
 }
 // ─── Helpers ───────────────────────────────────────────────
@@ -81,6 +84,7 @@ function createState(model: string): StreamState {
     messageDeltaSent: false,
     messageStopSent: false,
     heldMessageDelta: null,
+    inputTokens: 0,
   }
 }
@@ -138,6 +142,14 @@ export function openaiChatStreamToAnthropic(
         }
       } catch (err) {
         errored = true
+        // Emit Anthropic-format error event before closing the stream
+        const errMsg = err instanceof Error ? err.message : String(err)
+        try {
+          controller.enqueue(encoder.encode(formatSse('error', {
+            type: 'error',
+            error: { type: 'api_error', message: `[Bingo Proxy] Stream error: ${errMsg}` },
+          })))
+        } catch { /* controller may already be closed */ }
         controller.error(err)
       } finally {
         if (!errored) {
@@ -464,9 +476,13 @@ function handleFinishReason(
   closeAllOpenBlocks(state)
   const stopReason = mapFinishReason(finishReason)
+  // Capture input_tokens if available in this chunk
+  if (chunk.usage?.prompt_tokens) state.inputTokens = chunk.usage.prompt_tokens
   const usage = chunk.usage
-    ? { output_tokens: chunk.usage.completion_tokens || 0 }
-    : { output_tokens: 0 }
+    ? { input_tokens: chunk.usage.prompt_tokens || 0, output_tokens: chunk.usage.completion_tokens || 0 }
+    : { input_tokens: state.inputTokens, output_tokens: 0 }
   const messageDelta: SseEvent = {
     event: 'message_delta',
@@ -493,8 +509,9 @@ function mergeUsageIntoHeldDelta(
 ): void {
   if (!state.heldMessageDelta) return
+  if (usage.prompt_tokens) state.inputTokens = usage.prompt_tokens
   const data = state.heldMessageDelta.data as Record<string, unknown>
-  data.usage = { output_tokens: usage.completion_tokens || 0 }
+  data.usage = { input_tokens: state.inputTokens, output_tokens: usage.completion_tokens || 0 }
   state.messageDeltaSent = true
   state.queue.push(state.heldMessageDelta)
   state.heldMessageDelta = null

package/src/server/proxy/streaming/openaiResponsesStreamToAnthropic.ts CHANGED Viewed

@@ -90,6 +90,14 @@ export function openaiResponsesStreamToAnthropic(
           }
         }
       } catch (err) {
+        // Emit Anthropic-format error event before closing the stream
+        const errMsg = err instanceof Error ? err.message : String(err)
+        try {
+          controller.enqueue(encoder.encode(formatSse('error', {
+            type: 'error',
+            error: { type: 'api_error', message: `[Bingo Proxy] Stream error: ${errMsg}` },
+          })))
+        } catch { /* controller may already be closed */ }
         controller.error(err)
         return // don't call close() after error()
       }
@@ -265,7 +273,7 @@ function processEvent(
       controller.enqueue(encoder.encode(formatSse('message_delta', {
         type: 'message_delta',
         delta: { stop_reason: stopReason, stop_sequence: null },
-        usage: { output_tokens: usage?.output_tokens ?? 0 },
+        usage: { input_tokens: usage?.input_tokens ?? 0, output_tokens: usage?.output_tokens ?? 0 },
       })))
       if (!state.messageStopped) {
         state.messageStopped = true

package/src/server/proxy/transform/anthropicToOpenaiChat.ts CHANGED Viewed

@@ -43,14 +43,13 @@ export function anthropicToOpenaiChat(body: AnthropicRequest): OpenAIChatRequest
     stream: body.stream,
   }
-  // max_tokens — limit for DeepSeek to avoid invalid parameter errors
+  // max_tokens — cap to avoid upstream 400 errors from Claude's high defaults (e.g. 64k).
+  // DeepSeek: tools/thinking fail above 8192. Other providers: 32768 covers most upstreams.
   if (body.max_tokens !== undefined) {
     if (body.model.toLowerCase().includes('deepseek')) {
-      // DeepSeek R1 often fails if max_tokens is set to Claude's high defaults (like 128k)
-      // Especially when tools or thinking are involved. 8192 is a safe upper limit for most.
       result.max_tokens = Math.min(body.max_tokens, 8192)
     } else {
-      result.max_tokens = body.max_tokens
+      result.max_tokens = Math.min(body.max_tokens, 32768)
     }
   }

package/src/server/proxy/transform/anthropicToOpenaiResponses.ts CHANGED Viewed

@@ -106,6 +106,26 @@ function convertMessageToInputItems(msg: AnthropicMessage, output: OpenAIRespons
   // Collect text/image parts and handle tool blocks separately
   const contentParts: (string | OpenAIChatContentPart)[] = []
+  /** Flush accumulated contentParts as a message input item. Preserves image objects. */
+  function flushContentParts(): void {
+    if (contentParts.length === 0) return
+    let flushed: string | OpenAIChatContentPart[]
+    const hasRich = contentParts.some((p) => typeof p !== 'string')
+    if (hasRich) {
+      // Mixed text + images: emit as content-part array
+      flushed = contentParts.map((p) =>
+        typeof p === 'string' ? { type: 'text' as const, text: p } : p,
+      )
+    } else {
+      // Pure text: emit as plain string
+      flushed = (contentParts as string[]).join('')
+    }
+    contentParts.length = 0
+    if (flushed && (typeof flushed === 'string' ? flushed : flushed.length > 0)) {
+      output.push({ type: 'message', role: msg.role, content: flushed })
+    }
+  }
   for (const block of content) {
     if (block.type === 'text') {
       contentParts.push(block.text)
@@ -116,15 +136,7 @@ function convertMessageToInputItems(msg: AnthropicMessage, output: OpenAIRespons
       })
     } else if (block.type === 'tool_use') {
       // Flush any accumulated content first
-      if (contentParts.length > 0) {
-        const flatContent = contentParts.length === 1 && typeof contentParts[0] === 'string'
-          ? contentParts[0]
-          : contentParts.map((p) => typeof p === 'string' ? p : '').join('')
-        if (flatContent) {
-          output.push({ type: 'message', role: msg.role, content: flatContent })
-        }
-        contentParts.length = 0
-      }
+      flushContentParts()
       // Lift to function_call item
       output.push({
         type: 'function_call',
@@ -152,14 +164,7 @@ function convertMessageToInputItems(msg: AnthropicMessage, output: OpenAIRespons
   }
   // Flush remaining content
-  if (contentParts.length > 0) {
-    const flatContent = contentParts.length === 1 && typeof contentParts[0] === 'string'
-      ? contentParts[0]
-      : contentParts.map((p) => typeof p === 'string' ? p : '').join('')
-    if (flatContent) {
-      output.push({ type: 'message', role: msg.role, content: flatContent })
-    }
-  }
+  flushContentParts()
 }
 function convertToolChoice(choice: unknown): unknown {