npm - @mmmbuto/zai-codex-bridge - Versions diffs - 0.2.1 → 0.3.1 - Mend

@mmmbuto/zai-codex-bridge 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +1 -1
package/src/server.js +285 -127

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mmmbuto/zai-codex-bridge",
-  "version": "0.2.1",
+  "version": "0.3.1",
   "description": "Local proxy that translates OpenAI Responses API format to Z.AI Chat Completions format for Codex",
   "main": "src/server.js",
   "bin": {

package/src/server.js CHANGED Viewed

@@ -11,6 +11,7 @@
  */
 const http = require('http');
+const crypto = require('crypto');
 // Configuration from environment
 const PORT = parseInt(process.env.PORT || '31415', 10);
@@ -192,55 +193,39 @@ function translateResponsesToChat(request) {
 /**
  * Translate Chat Completions response to Responses format
+ * Handles both output_text and reasoning_text content
  */
-function translateChatToResponses(chatResponse) {
-  let text = '';
-  // Extract content from Chat format
-  if (chatResponse.choices && chatResponse.choices.length > 0) {
-    const choice = chatResponse.choices[0];
-    if (choice.message && choice.message.content) {
-      text = choice.message.content;
-    }
-  }
-  // Map usage
-  const usage = {};
-  if (chatResponse.usage) {
-    if (chatResponse.usage.prompt_tokens) {
-      usage.input_tokens = chatResponse.usage.prompt_tokens;
-    }
-    if (chatResponse.usage.completion_tokens) {
-      usage.output_tokens = chatResponse.usage.completion_tokens;
-    }
-    if (chatResponse.usage.total_tokens) {
-      usage.total_tokens = chatResponse.usage.total_tokens;
-    }
+function translateChatToResponses(outputText, reasoningText = '', responseId = null, messageItemId = null, model = 'unknown') {
+  const rid = responseId || `resp_${crypto.randomUUID().replace(/-/g, '')}`;
+  const mid = messageItemId || `msg_${crypto.randomUUID().replace(/-/g, '')}`;
+  const createdAt = Math.floor(Date.now() / 1000);
+  const content = [];
+  if (reasoningText) {
+    content.push({ type: 'reasoning_text', text: reasoningText });
   }
+  content.push({ type: 'output_text', text: outputText });
-  const responseId = 'resp_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
-  // OpenAI Responses API format
   const response = {
-    id: responseId,
+    id: rid,
     object: 'response',
-    created_at: Math.floor(Date.now() / 1000),
+    created_at: createdAt,
+    model,
     status: 'completed',
-    model: chatResponse.model || 'glm-4.7',
-    output: [{
-      type: 'message',
-      role: 'assistant',
-      content: [{
-        type: 'output_text',
-        text: text
-      }]
-    }],
-    usage: Object.keys(usage).length > 0 ? usage : undefined
+    output: [
+      {
+        type: 'message',
+        id: mid,
+        role: 'assistant',
+        content
+      }
+    ]
   };
   log('debug', 'Translated Chat->Responses:', {
     id: response.id,
-    outputLength: text.length,
+    outputLength: outputText.length,
+    reasoningLength: reasoningText.length,
     status: response.status
   });
@@ -310,124 +295,293 @@ async function makeUpstreamRequest(path, body, headers) {
 }
 /**
- * Handle streaming response from Z.AI
+ * Handle streaming response from Z.AI with proper Responses API event format
+ * Separates reasoning_content, content, and tool_calls into distinct events
  */
-async function streamChatToResponses(stream, res, responseId, itemId) {
-  const decoder = new TextDecoder();
+async function streamChatToResponses(stream, res, responseId, messageItemId) {
   let buffer = '';
-  let chunkCount = 0;
-  let deltaCount = 0;
-  let lastParsed = null;
-  let didComplete = false;
+  let seq = 0;
+  let outputText = '';
+  let reasoningText = '';
+  // tool call state: key = call_id (or id), value = { itemId, outputIndex, name, args }
+  const toolCalls = new Map();
+  let nextOutputIndex = 1; // 0 is the message item
+  const createdAt = Math.floor(Date.now() / 1000);
+  function send(ev) {
+    // Responses streaming: only "data: {json}\n\n"
+    res.write(`data: ${JSON.stringify(ev)}\n\n`);
+  }
+  // 1) response.created
+  send({
+    type: 'response.created',
+    sequence_number: seq++,
+    response: {
+      id: responseId,
+      object: 'response',
+      created_at: createdAt,
+      status: 'in_progress',
+      output: [],
+    },
+  });
-  log('debug', 'Starting to process stream');
+  // 2) response.in_progress
+  send({
+    type: 'response.in_progress',
+    sequence_number: seq++,
+    response: {
+      id: responseId,
+      object: 'response',
+      created_at: createdAt,
+      status: 'in_progress',
+      output: [],
+    },
+  });
-  // Send initial event to create the output item - using "added" not "add"
-  const addEvent = {
+  // 3) message item added (output_index=0)
+  send({
     type: 'response.output_item.added',
+    sequence_number: seq++,
+    output_index: 0,
     item: {
       type: 'message',
+      id: messageItemId,
       role: 'assistant',
-      content: [{ type: 'output_text', text: '' }],
-      id: itemId
+      content: [],
     },
-    output_index: 0,
-    response_id: responseId
-  };
-  res.write(`data: ${JSON.stringify(addEvent)}\n\n`);
-  log('debug', 'Sent output_item.added event');
+  });
+  async function finalizeAndClose() {
+    // done events (if we received deltas)
+    if (reasoningText) {
+      send({
+        type: 'response.reasoning_text.done',
+        sequence_number: seq++,
+        item_id: messageItemId,
+        output_index: 0,
+        content_index: 0,
+        text: reasoningText,
+      });
+    }
+    send({
+      type: 'response.output_text.done',
+      sequence_number: seq++,
+      item_id: messageItemId,
+      output_index: 0,
+      content_index: reasoningText ? 1 : 0,
+      text: outputText,
+    });
+    // close any tool call items
+    for (const [callId, st] of toolCalls.entries()) {
+      send({
+        type: 'response.function_call_arguments.done',
+        sequence_number: seq++,
+        item_id: st.itemId,
+        output_index: st.outputIndex,
+        arguments: st.args,
+      });
+      send({
+        type: 'response.output_item.done',
+        sequence_number: seq++,
+        output_index: st.outputIndex,
+        item: {
+          type: 'function_call',
+          id: st.itemId,
+          call_id: callId,
+          name: st.name,
+          arguments: st.args,
+        },
+      });
+    }
+    // output_item.done for message
+    const messageContent = [];
+    if (reasoningText) messageContent.push({ type: 'reasoning_text', text: reasoningText });
+    messageContent.push({ type: 'output_text', text: outputText });
+    send({
+      type: 'response.output_item.done',
+      sequence_number: seq++,
+      output_index: 0,
+      item: {
+        type: 'message',
+        id: messageItemId,
+        role: 'assistant',
+        content: messageContent,
+      },
+    });
+    // response.completed
+    const outputItems = [
+      {
+        type: 'message',
+        id: messageItemId,
+        role: 'assistant',
+        content: messageContent,
+      },
+      ...Array.from(toolCalls.entries()).map(([callId, st]) => ({
+        type: 'function_call',
+        id: st.itemId,
+        call_id: callId,
+        name: st.name,
+        arguments: st.args,
+      })),
+    ];
+    send({
+      type: 'response.completed',
+      sequence_number: seq++,
+      response: {
+        id: responseId,
+        object: 'response',
+        created_at: createdAt,
+        status: 'completed',
+        output: outputItems,
+      },
+    });
+    // SSE terminator
+    res.write('data: [DONE]\n\n');
+    res.end();
+    log('info', `Stream completed - ${outputText.length} output, ${reasoningText.length} reasoning, ${toolCalls.size} tools`);
+  }
   try {
     for await (const chunk of stream) {
-      buffer += decoder.decode(chunk, { stream: true });
+      buffer += chunk.toString('utf8');
+      // Z.ai stream: SSE lines "data: {...}\n"
+      // Split by newline and process each complete line
       const lines = buffer.split('\n');
+      // Keep the last line if it's incomplete (doesn't end with data pattern)
       buffer = lines.pop() || '';
-      chunkCount++;
       for (const line of lines) {
-        if (!line.trim() || !line.startsWith('data: ')) {
+        if (!line.trim() || !line.startsWith('data:')) {
+          // Skip empty lines and comments (starting with :)
           if (line.trim() && !line.startsWith(':')) {
             log('debug', 'Non-data line:', line.substring(0, 50));
           }
           continue;
         }
-        const data = line.slice(6).trim();
-        log('debug', 'SSE data:', data.substring(0, 100));
-        // Check for stream end
-        if (data === '[DONE]') {
-          log('info', `Stream end received - wrote ${deltaCount} deltas total`);
-          didComplete = true;
-          break;
+        const payload = line.slice(5).trim();
+        if (payload === '[DONE]') {
+          log('info', 'Stream received [DONE]');
+          await finalizeAndClose();
+          return;
         }
+        if (!payload) continue;
+        let json;
         try {
-          const parsed = JSON.parse(data);
-          lastParsed = parsed;
-          log('debug', 'Parsed SSE:', JSON.stringify(parsed).substring(0, 150));
-          const delta = parsed.choices?.[0]?.delta;
-          const content = delta?.content || delta?.reasoning_content || '';
-          if (content) {
-            deltaCount++;
-            log('debug', 'Writing delta:', content.substring(0, 30));
-            // OpenAI Responses API format for text delta
-            const deltaEvent = {
-              type: 'response.output_text.delta',
-              delta: content,
-              output_index: 0,
-              item_id: itemId,
-              sequence_number: deltaCount - 1
-            };
-            res.write(`data: ${JSON.stringify(deltaEvent)}\n\n`);
-          }
+          json = JSON.parse(payload);
         } catch (e) {
-          log('warn', 'Failed to parse SSE chunk:', e.message, 'data:', data.substring(0, 100));
+          log('warn', 'Failed to parse SSE payload:', e.message, 'payload:', payload.substring(0, 100));
+          continue;
         }
-      }
-      if (didComplete) break;
+        const choice = json?.choices?.[0];
+        const delta = choice?.delta ?? {};
+        // 1) reasoning
+        if (typeof delta.reasoning_content === 'string' && delta.reasoning_content.length) {
+          reasoningText += delta.reasoning_content;
+          send({
+            type: 'response.reasoning_text.delta',
+            sequence_number: seq++,
+            item_id: messageItemId,
+            output_index: 0,
+            content_index: 0,
+            delta: delta.reasoning_content,
+          });
+          log('debug', `Reasoning delta: ${delta.reasoning_content.substring(0, 30)}...`);
+        }
+        // 2) normal output
+        if (typeof delta.content === 'string' && delta.content.length) {
+          outputText += delta.content;
+          send({
+            type: 'response.output_text.delta',
+            sequence_number: seq++,
+            item_id: messageItemId,
+            output_index: 0,
+            content_index: reasoningText ? 1 : 0,
+            delta: delta.content,
+          });
+          log('debug', `Output delta: ${delta.content.substring(0, 30)}...`);
+        }
-      if (chunkCount > 1000) {
-        log('warn', 'Too many chunks, possible loop');
-        break;
+        // 3) tool calls (OpenAI-style in chat.completions delta.tool_calls)
+        if (Array.isArray(delta.tool_calls)) {
+          for (const tc of delta.tool_calls) {
+            // tc: {id, type:"function", function:{name, arguments}}
+            const callId = tc.id || `call_${tc.index ?? 0}`;
+            const name = tc.function?.name || 'unknown';
+            const argsDelta = tc.function?.arguments || '';
+            let st = toolCalls.get(callId);
+            if (!st) {
+              st = {
+                itemId: `fc_${crypto.randomUUID().replace(/-/g, '')}`,
+                outputIndex: nextOutputIndex++,
+                name,
+                args: '',
+              };
+              toolCalls.set(callId, st);
+              send({
+                type: 'response.output_item.added',
+                sequence_number: seq++,
+                output_index: st.outputIndex,
+                item: {
+                  type: 'function_call',
+                  id: st.itemId,
+                  call_id: callId,
+                  name: st.name,
+                  arguments: '',
+                },
+              });
+              log('debug', `Tool call added: ${name} (${callId})`);
+            }
+            if (argsDelta) {
+              st.args += argsDelta;
+              send({
+                type: 'response.function_call_arguments.delta',
+                sequence_number: seq++,
+                item_id: st.itemId,
+                output_index: st.outputIndex,
+                delta: argsDelta,
+              });
+            }
+          }
+        }
+        // 4) finish
+        if (choice?.finish_reason) {
+          log('info', `Stream finish_reason: ${choice.finish_reason}`);
+          await finalizeAndClose();
+          return;
+        }
       }
     }
   } catch (e) {
     log('error', 'Stream processing error:', e);
   }
-  // ALWAYS send response.completed event (even if stream ended without [DONE])
-  const zaiUsage = lastParsed?.usage;
-  const completedEvent = {
-    type: 'response.completed',
-    response: {
-      id: responseId,
-      status: 'completed',
-      output: [{
-        type: 'message',
-        role: 'assistant',
-        content: [{ type: 'output_text', text: '' }]
-      }],
-      usage: zaiUsage ? {
-        input_tokens: zaiUsage.prompt_tokens || 0,
-        output_tokens: zaiUsage.completion_tokens || 0,
-        total_tokens: zaiUsage.total_tokens || 0
-      } : {
-        input_tokens: 0,
-        output_tokens: 0,
-        total_tokens: 0
-      }
-    },
-    sequence_number: deltaCount + 1
-  };
-  log('info', 'Sending response.completed event');
-  res.write(`data: ${JSON.stringify(completedEvent)}\n\n`);
-  log('info', `Stream ended - wrote ${deltaCount} deltas total`);
+  // fallback (stream finished without finish_reason)
+  log('warn', 'Stream ended without finish_reason, finalizing anyway');
+  await finalizeAndClose();
 }
 /**
@@ -509,8 +663,8 @@ async function handlePostRequest(req, res) {
     // Handle streaming response
     if (upstreamBody.stream) {
-      const responseId = 'resp_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
-      const itemId = 'item_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
+      const responseId = `resp_${crypto.randomUUID().replace(/-/g, '')}`;
+      const messageItemId = `msg_${crypto.randomUUID().replace(/-/g, '')}`;
       log('info', 'Starting streaming response');
       res.writeHead(200, {
         'Content-Type': 'text/event-stream; charset=utf-8',
@@ -519,16 +673,20 @@ async function handlePostRequest(req, res) {
       });
       try {
-        await streamChatToResponses(upstreamResponse.body, res, responseId, itemId);
+        await streamChatToResponses(upstreamResponse.body, res, responseId, messageItemId);
         log('info', 'Streaming completed');
       } catch (e) {
         log('error', 'Streaming error:', e);
       }
-      res.end();
     } else {
       // Non-streaming response
       const chatResponse = await upstreamResponse.json();
-      const response = translateChatToResponses(chatResponse);
+      const msg = chatResponse?.choices?.[0]?.message ?? {};
+      const outputText = msg.content ?? '';
+      const reasoningText = msg.reasoning_content ?? '';
+      const model = chatResponse?.model ?? upstreamBody.model ?? 'GLM';
+      const response = translateChatToResponses(outputText, reasoningText, null, null, model);
       res.writeHead(200, { 'Content-Type': 'application/json' });
       res.end(JSON.stringify(response));