bingocode 1.1.131 → 1.1.132

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,8 @@
1
1
  {
2
2
  "permissions": {
3
3
  "allow": [
4
- "Bash(dir \"F:\\\\Leanchy\\\\VirtuosAgent\\\\BingoCode\\\\src\\\\server\\\\proxy\")",
5
- "Bash(dir \"F:\\\\Leanchy\\\\VirtuosAgent\\\\BingoCode\\\\src\\\\server\")",
6
- "Bash(grep -rn \"stream\\\\|proxy\\\\|SSE\\\\|duplicate\\\\|repeat\\\\|render\" F:LeanchyVirtuosAgentBingoCodesrc --include=*.ts -l)",
7
- "Bash(xargs grep:*)"
4
+ "Read(//c/Users/qi.lin/.claude/**)",
5
+ "Bash(dir \"F:\\\\Leanchy\\\\VirtuosAgent\\\\BingoCode\\\\src\\\\server\\\\proxy\")"
8
6
  ]
9
7
  }
10
8
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bingocode",
3
- "version": "1.1.131",
3
+ "version": "1.1.132",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "claude": "bin/claude-win.cjs",
@@ -298,20 +298,40 @@ function extractReasoning(delta: DeltaEx): { thinking: string; signature: string
298
298
  return null
299
299
  }
300
300
 
301
- // ─── Main chunk processing ─────────────────────────────────
302
-
303
301
  /**
304
- * Process a single SSE chunk using dual-pass logic:
305
- * Pass 1 reasoning/thinking (if present)
306
- * Pass 2 — text content (if present)
307
- * Pass 3 — tool calls (if present; mutually exclusive with text/thinking)
308
- *
309
- * This avoids the single-return priority chain that caused spurious
310
- * close/open cycles when providers (Gemini via OpenRouter, DeepSeek, Qwen3, …)
311
- * send reasoning_content and content in the same chunk or in alternating chunks,
312
- * which previously produced multiple text content_block_start events and
313
- * duplicate rendering in Claude Code's Ink terminal UI.
302
+ * Determine what block type this chunk carries and whether it's a new block.
303
+ * Priority (matches LiteLLM): tool_calls > text > reasoning > ignore
314
304
  */
305
+ function detectBlockTransition(
306
+ delta: DeltaEx,
307
+ state: StreamState,
308
+ ): { type: ContentBlockType; isNew: boolean } | null {
309
+ // Priority 1: Tool calls
310
+ if (delta.tool_calls && delta.tool_calls.length > 0) {
311
+ const tc = delta.tool_calls[0]
312
+ // A tool call with function.name signals a NEW tool block
313
+ const isNew = state.currentBlockType !== 'tool_use' || !!(tc.function?.name)
314
+ return { type: 'tool_use', isNew }
315
+ }
316
+
317
+ // Priority 2: Text content
318
+ if (delta.content != null && delta.content !== '') {
319
+ const isNew = state.currentBlockType !== 'text' || !state.blockStartSent
320
+ return { type: 'text', isNew }
321
+ }
322
+
323
+ // Priority 3: Reasoning/thinking
324
+ const reasoning = extractReasoning(delta)
325
+ if (reasoning) {
326
+ const isNew = state.currentBlockType !== 'thinking' || !state.blockStartSent
327
+ return { type: 'thinking', isNew }
328
+ }
329
+
330
+ return null
331
+ }
332
+
333
+ // ─── Main chunk processing ─────────────────────────────────
334
+
315
335
  function processChunk(chunk: OpenAIChatStreamChunk, state: StreamState): void {
316
336
  const choice = chunk.choices?.[0]
317
337
 
@@ -330,33 +350,31 @@ function processChunk(chunk: OpenAIChatStreamChunk, state: StreamState): void {
330
350
 
331
351
  const delta = choice.delta as DeltaEx
332
352
 
333
- // Tool calls are mutually exclusive with text/thinking — handle separately
334
- if (delta.tool_calls && delta.tool_calls.length > 0) {
335
- // Close any open text/thinking block before entering tool_use
336
- if (state.currentBlockType !== 'tool_use' && state.blockStartSent && !state.blockStopSent) {
337
- closeCurrentBlock(state)
338
- }
339
- handleToolCalls(delta, state)
340
- } else {
341
- // Pass 1: reasoning/thinking
342
- const reasoning = extractReasoning(delta)
343
- if (reasoning) {
344
- // If currently in a text block, close it before opening thinking
345
- if (state.currentBlockType === 'text' && state.blockStartSent && !state.blockStopSent) {
353
+ // Detect what this chunk carries
354
+ const transition = detectBlockTransition(delta, state)
355
+
356
+ if (transition) {
357
+ // Handle block transition: close previous block if type changed
358
+ if (transition.isNew && state.blockStartSent && !state.blockStopSent) {
359
+ if (transition.type !== 'tool_use') {
360
+ // For text/thinking, close the current block
361
+ closeCurrentBlock(state)
362
+ } else if (state.currentBlockType !== 'tool_use') {
363
+ // Switching TO tool_use from text/thinking: close current
346
364
  closeCurrentBlock(state)
347
365
  }
348
- handleThinking(delta, state)
349
366
  }
350
367
 
351
- // Pass 2: text content
352
- // After thinking is handled, resume/open text block independently.
353
- // This is the key fix: text is NOT skipped when reasoning was also present.
354
- if (delta.content != null && delta.content !== '') {
355
- // If currently in a thinking block, close it before opening text
356
- if (state.currentBlockType === 'thinking' && state.blockStartSent && !state.blockStopSent) {
357
- closeCurrentBlock(state)
358
- }
359
- handleText(delta, state)
368
+ switch (transition.type) {
369
+ case 'thinking':
370
+ handleThinking(delta, state)
371
+ break
372
+ case 'text':
373
+ handleText(delta, state)
374
+ break
375
+ case 'tool_use':
376
+ handleToolCalls(delta, state)
377
+ break
360
378
  }
361
379
  }
362
380
 
@@ -45,9 +45,13 @@ export function anthropicToOpenaiChat(body: AnthropicRequest): OpenAIChatRequest
45
45
 
46
46
  // max_tokens — cap to avoid upstream 400 errors from Claude's high defaults (e.g. 64k).
47
47
  // DeepSeek: tools/thinking fail above 8192. Other providers: 32768 covers most upstreams.
48
+ // GPT models (gpt-*): use max_completion_tokens instead of max_tokens (required by newer GPT models).
48
49
  if (body.max_tokens !== undefined) {
49
- if (body.model.toLowerCase().includes('deepseek')) {
50
+ const modelLower = body.model.toLowerCase()
51
+ if (modelLower.includes('deepseek')) {
50
52
  result.max_tokens = Math.min(body.max_tokens, 8192)
53
+ } else if (modelLower.startsWith('gpt-') || modelLower.startsWith('o1') || modelLower.startsWith('o3') || modelLower.startsWith('o4')) {
54
+ result.max_completion_tokens = body.max_tokens
51
55
  } else {
52
56
  result.max_tokens = Math.min(body.max_tokens, 32768)
53
57
  }