npm - bingocode - Versions diffs - 1.1.131 → 1.1.132 - Mend

bingocode 1.1.131 → 1.1.132

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/.claude/settings.local.json +2 -4
package/package.json +1 -1
package/src/server/proxy/streaming/openaiChatStreamToAnthropic.ts +53 -35
package/src/server/proxy/transform/anthropicToOpenaiChat.ts +5 -1

package/.claude/settings.local.json CHANGED Viewed

@@ -1,10 +1,8 @@
 {
   "permissions": {
     "allow": [
-      "Bash(dir \"F:\\\\Leanchy\\\\VirtuosAgent\\\\BingoCode\\\\src\\\\server\\\\proxy\")",
-      "Bash(dir \"F:\\\\Leanchy\\\\VirtuosAgent\\\\BingoCode\\\\src\\\\server\")",
-      "Bash(grep -rn \"stream\\\\|proxy\\\\|SSE\\\\|duplicate\\\\|repeat\\\\|render\" F:LeanchyVirtuosAgentBingoCodesrc --include=*.ts -l)",
-      "Bash(xargs grep:*)"
+      "Read(//c/Users/qi.lin/.claude/**)",
+      "Bash(dir \"F:\\\\Leanchy\\\\VirtuosAgent\\\\BingoCode\\\\src\\\\server\\\\proxy\")"
     ]
   }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bingocode",
-  "version": "1.1.131",
+  "version": "1.1.132",
   "type": "module",
   "bin": {
     "claude": "bin/claude-win.cjs",

package/src/server/proxy/streaming/openaiChatStreamToAnthropic.ts CHANGED Viewed

@@ -298,20 +298,40 @@ function extractReasoning(delta: DeltaEx): { thinking: string; signature: string
   return null
 }
-// ─── Main chunk processing ─────────────────────────────────
 /**
- * Process a single SSE chunk using dual-pass logic:
- *   Pass 1 — reasoning/thinking (if present)
- *   Pass 2 — text content (if present)
- *   Pass 3 — tool calls (if present; mutually exclusive with text/thinking)
- *
- * This avoids the single-return priority chain that caused spurious
- * close/open cycles when providers (Gemini via OpenRouter, DeepSeek, Qwen3, …)
- * send reasoning_content and content in the same chunk or in alternating chunks,
- * which previously produced multiple text content_block_start events and
- * duplicate rendering in Claude Code's Ink terminal UI.
+ * Determine what block type this chunk carries and whether it's a new block.
+ * Priority (matches LiteLLM): tool_calls > text > reasoning > ignore
  */
+function detectBlockTransition(
+  delta: DeltaEx,
+  state: StreamState,
+): { type: ContentBlockType; isNew: boolean } | null {
+  // Priority 1: Tool calls
+  if (delta.tool_calls && delta.tool_calls.length > 0) {
+    const tc = delta.tool_calls[0]
+    // A tool call with function.name signals a NEW tool block
+    const isNew = state.currentBlockType !== 'tool_use' || !!(tc.function?.name)
+    return { type: 'tool_use', isNew }
+  }
+  // Priority 2: Text content
+  if (delta.content != null && delta.content !== '') {
+    const isNew = state.currentBlockType !== 'text' || !state.blockStartSent
+    return { type: 'text', isNew }
+  }
+  // Priority 3: Reasoning/thinking
+  const reasoning = extractReasoning(delta)
+  if (reasoning) {
+    const isNew = state.currentBlockType !== 'thinking' || !state.blockStartSent
+    return { type: 'thinking', isNew }
+  }
+  return null
+}
+// ─── Main chunk processing ─────────────────────────────────
 function processChunk(chunk: OpenAIChatStreamChunk, state: StreamState): void {
   const choice = chunk.choices?.[0]
@@ -330,33 +350,31 @@ function processChunk(chunk: OpenAIChatStreamChunk, state: StreamState): void {
   const delta = choice.delta as DeltaEx
-  // Tool calls are mutually exclusive with text/thinking — handle separately
-  if (delta.tool_calls && delta.tool_calls.length > 0) {
-    // Close any open text/thinking block before entering tool_use
-    if (state.currentBlockType !== 'tool_use' && state.blockStartSent && !state.blockStopSent) {
-      closeCurrentBlock(state)
-    }
-    handleToolCalls(delta, state)
-  } else {
-    // Pass 1: reasoning/thinking
-    const reasoning = extractReasoning(delta)
-    if (reasoning) {
-      // If currently in a text block, close it before opening thinking
-      if (state.currentBlockType === 'text' && state.blockStartSent && !state.blockStopSent) {
+  // Detect what this chunk carries
+  const transition = detectBlockTransition(delta, state)
+  if (transition) {
+    // Handle block transition: close previous block if type changed
+    if (transition.isNew && state.blockStartSent && !state.blockStopSent) {
+      if (transition.type !== 'tool_use') {
+        // For text/thinking, close the current block
+        closeCurrentBlock(state)
+      } else if (state.currentBlockType !== 'tool_use') {
+        // Switching TO tool_use from text/thinking: close current
         closeCurrentBlock(state)
       }
-      handleThinking(delta, state)
     }
-    // Pass 2: text content
-    // After thinking is handled, resume/open text block independently.
-    // This is the key fix: text is NOT skipped when reasoning was also present.
-    if (delta.content != null && delta.content !== '') {
-      // If currently in a thinking block, close it before opening text
-      if (state.currentBlockType === 'thinking' && state.blockStartSent && !state.blockStopSent) {
-        closeCurrentBlock(state)
-      }
-      handleText(delta, state)
+    switch (transition.type) {
+      case 'thinking':
+        handleThinking(delta, state)
+        break
+      case 'text':
+        handleText(delta, state)
+        break
+      case 'tool_use':
+        handleToolCalls(delta, state)
+        break
     }
   }

package/src/server/proxy/transform/anthropicToOpenaiChat.ts CHANGED Viewed

@@ -45,9 +45,13 @@ export function anthropicToOpenaiChat(body: AnthropicRequest): OpenAIChatRequest
   // max_tokens — cap to avoid upstream 400 errors from Claude's high defaults (e.g. 64k).
   // DeepSeek: tools/thinking fail above 8192. Other providers: 32768 covers most upstreams.
+  // GPT models (gpt-*): use max_completion_tokens instead of max_tokens (required by newer GPT models).
   if (body.max_tokens !== undefined) {
-    if (body.model.toLowerCase().includes('deepseek')) {
+    const modelLower = body.model.toLowerCase()
+    if (modelLower.includes('deepseek')) {
       result.max_tokens = Math.min(body.max_tokens, 8192)
+    } else if (modelLower.startsWith('gpt-') || modelLower.startsWith('o1') || modelLower.startsWith('o3') || modelLower.startsWith('o4')) {
+      result.max_completion_tokens = body.max_tokens
     } else {
       result.max_tokens = Math.min(body.max_tokens, 32768)
     }