@mmmbuto/zai-codex-bridge 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/server.js +276 -134
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mmmbuto/zai-codex-bridge",
3
- "version": "0.2.1",
3
+ "version": "0.3.0",
4
4
  "description": "Local proxy that translates OpenAI Responses API format to Z.AI Chat Completions format for Codex",
5
5
  "main": "src/server.js",
6
6
  "bin": {
package/src/server.js CHANGED
@@ -11,6 +11,7 @@
11
11
  */
12
12
 
13
13
  const http = require('http');
14
+ const crypto = require('crypto');
14
15
 
15
16
  // Configuration from environment
16
17
  const PORT = parseInt(process.env.PORT || '31415', 10);
@@ -192,55 +193,39 @@ function translateResponsesToChat(request) {
192
193
 
193
194
  /**
194
195
  * Translate Chat Completions response to Responses format
196
+ * Handles both output_text and reasoning_text content
195
197
  */
196
- function translateChatToResponses(chatResponse) {
197
- let text = '';
198
-
199
- // Extract content from Chat format
200
- if (chatResponse.choices && chatResponse.choices.length > 0) {
201
- const choice = chatResponse.choices[0];
202
- if (choice.message && choice.message.content) {
203
- text = choice.message.content;
204
- }
198
+ function translateChatToResponses(outputText, reasoningText = '', responseId = null, messageItemId = null, model = 'unknown') {
199
+ const rid = responseId || `resp_${crypto.randomUUID().replace(/-/g, '')}`;
200
+ const mid = messageItemId || `msg_${crypto.randomUUID().replace(/-/g, '')}`;
201
+ const createdAt = Math.floor(Date.now() / 1000);
202
+
203
+ const content = [];
204
+ if (reasoningText) {
205
+ content.push({ type: 'reasoning_text', text: reasoningText });
205
206
  }
207
+ content.push({ type: 'output_text', text: outputText });
206
208
 
207
- // Map usage
208
- const usage = {};
209
- if (chatResponse.usage) {
210
- if (chatResponse.usage.prompt_tokens) {
211
- usage.input_tokens = chatResponse.usage.prompt_tokens;
212
- }
213
- if (chatResponse.usage.completion_tokens) {
214
- usage.output_tokens = chatResponse.usage.completion_tokens;
215
- }
216
- if (chatResponse.usage.total_tokens) {
217
- usage.total_tokens = chatResponse.usage.total_tokens;
218
- }
219
- }
220
-
221
- const responseId = 'resp_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
222
-
223
- // OpenAI Responses API format
224
209
  const response = {
225
- id: responseId,
210
+ id: rid,
226
211
  object: 'response',
227
- created_at: Math.floor(Date.now() / 1000),
212
+ created_at: createdAt,
213
+ model,
228
214
  status: 'completed',
229
- model: chatResponse.model || 'glm-4.7',
230
- output: [{
231
- type: 'message',
232
- role: 'assistant',
233
- content: [{
234
- type: 'output_text',
235
- text: text
236
- }]
237
- }],
238
- usage: Object.keys(usage).length > 0 ? usage : undefined
215
+ output: [
216
+ {
217
+ type: 'message',
218
+ id: mid,
219
+ role: 'assistant',
220
+ content
221
+ }
222
+ ]
239
223
  };
240
224
 
241
225
  log('debug', 'Translated Chat->Responses:', {
242
226
  id: response.id,
243
- outputLength: text.length,
227
+ outputLength: outputText.length,
228
+ reasoningLength: reasoningText.length,
244
229
  status: response.status
245
230
  });
246
231
 
@@ -310,124 +295,277 @@ async function makeUpstreamRequest(path, body, headers) {
310
295
  }
311
296
 
312
297
  /**
313
- * Handle streaming response from Z.AI
298
+ * Handle streaming response from Z.AI with proper Responses API event format
299
+ * Separates reasoning_content, content, and tool_calls into distinct events
314
300
  */
315
- async function streamChatToResponses(stream, res, responseId, itemId) {
316
- const decoder = new TextDecoder();
301
+ async function streamChatToResponses(stream, res, responseId, messageItemId) {
317
302
  let buffer = '';
318
- let chunkCount = 0;
319
- let deltaCount = 0;
320
- let lastParsed = null;
321
- let didComplete = false;
303
+ let seq = 0;
304
+
305
+ let outputText = '';
306
+ let reasoningText = '';
307
+
308
+ // tool call state: key = call_id (or id), value = { itemId, outputIndex, name, args }
309
+ const toolCalls = new Map();
310
+ let nextOutputIndex = 1; // 0 is the message item
322
311
 
323
- log('debug', 'Starting to process stream');
312
+ const createdAt = Math.floor(Date.now() / 1000);
324
313
 
325
- // Send initial event to create the output item - using "added" not "add"
326
- const addEvent = {
314
+ function send(ev) {
315
+ // Responses streaming: only "data: {json}\n\n"
316
+ res.write(`data: ${JSON.stringify(ev)}\n\n`);
317
+ }
318
+
319
+ // 1) response.created
320
+ send({
321
+ type: 'response.created',
322
+ sequence_number: seq++,
323
+ response: {
324
+ id: responseId,
325
+ object: 'response',
326
+ created_at: createdAt,
327
+ status: 'in_progress',
328
+ output: [],
329
+ },
330
+ });
331
+
332
+ // 2) response.in_progress
333
+ send({
334
+ type: 'response.in_progress',
335
+ sequence_number: seq++,
336
+ response: {
337
+ id: responseId,
338
+ object: 'response',
339
+ created_at: createdAt,
340
+ status: 'in_progress',
341
+ output: [],
342
+ },
343
+ });
344
+
345
+ // 3) message item added (output_index=0)
346
+ send({
327
347
  type: 'response.output_item.added',
348
+ sequence_number: seq++,
349
+ output_index: 0,
328
350
  item: {
329
351
  type: 'message',
352
+ id: messageItemId,
330
353
  role: 'assistant',
331
- content: [{ type: 'output_text', text: '' }],
332
- id: itemId
354
+ content: [],
333
355
  },
334
- output_index: 0,
335
- response_id: responseId
336
- };
337
- res.write(`data: ${JSON.stringify(addEvent)}\n\n`);
338
- log('debug', 'Sent output_item.added event');
356
+ });
339
357
 
340
- try {
341
- for await (const chunk of stream) {
342
- buffer += decoder.decode(chunk, { stream: true });
343
- const lines = buffer.split('\n');
344
- buffer = lines.pop() || '';
358
+ async function finalizeAndClose() {
359
+ // done events (if we received deltas)
360
+ if (reasoningText) {
361
+ send({
362
+ type: 'response.reasoning_text.done',
363
+ sequence_number: seq++,
364
+ item_id: messageItemId,
365
+ output_index: 0,
366
+ content_index: 0,
367
+ text: reasoningText,
368
+ });
369
+ }
370
+
371
+ send({
372
+ type: 'response.output_text.done',
373
+ sequence_number: seq++,
374
+ item_id: messageItemId,
375
+ output_index: 0,
376
+ content_index: reasoningText ? 1 : 0,
377
+ text: outputText,
378
+ });
345
379
 
346
- chunkCount++;
380
+ // close any tool call items
381
+ for (const [callId, st] of toolCalls.entries()) {
382
+ send({
383
+ type: 'response.function_call_arguments.done',
384
+ sequence_number: seq++,
385
+ item_id: st.itemId,
386
+ output_index: st.outputIndex,
387
+ arguments: st.args,
388
+ });
347
389
 
348
- for (const line of lines) {
349
- if (!line.trim() || !line.startsWith('data: ')) {
350
- if (line.trim() && !line.startsWith(':')) {
351
- log('debug', 'Non-data line:', line.substring(0, 50));
352
- }
353
- continue;
354
- }
390
+ send({
391
+ type: 'response.output_item.done',
392
+ sequence_number: seq++,
393
+ output_index: st.outputIndex,
394
+ item: {
395
+ type: 'function_call',
396
+ id: st.itemId,
397
+ call_id: callId,
398
+ name: st.name,
399
+ arguments: st.args,
400
+ },
401
+ });
402
+ }
355
403
 
356
- const data = line.slice(6).trim();
357
- log('debug', 'SSE data:', data.substring(0, 100));
404
+ // output_item.done for message
405
+ const messageContent = [];
406
+ if (reasoningText) messageContent.push({ type: 'reasoning_text', text: reasoningText });
407
+ messageContent.push({ type: 'output_text', text: outputText });
358
408
 
359
- // Check for stream end
360
- if (data === '[DONE]') {
361
- log('info', `Stream end received - wrote ${deltaCount} deltas total`);
362
- didComplete = true;
363
- break;
364
- }
409
+ send({
410
+ type: 'response.output_item.done',
411
+ sequence_number: seq++,
412
+ output_index: 0,
413
+ item: {
414
+ type: 'message',
415
+ id: messageItemId,
416
+ role: 'assistant',
417
+ content: messageContent,
418
+ },
419
+ });
365
420
 
366
- try {
367
- const parsed = JSON.parse(data);
368
- lastParsed = parsed;
369
- log('debug', 'Parsed SSE:', JSON.stringify(parsed).substring(0, 150));
421
+ // response.completed
422
+ const outputItems = [
423
+ {
424
+ type: 'message',
425
+ id: messageItemId,
426
+ role: 'assistant',
427
+ content: messageContent,
428
+ },
429
+ ...Array.from(toolCalls.entries()).map(([callId, st]) => ({
430
+ type: 'function_call',
431
+ id: st.itemId,
432
+ call_id: callId,
433
+ name: st.name,
434
+ arguments: st.args,
435
+ })),
436
+ ];
437
+
438
+ send({
439
+ type: 'response.completed',
440
+ sequence_number: seq++,
441
+ response: {
442
+ id: responseId,
443
+ object: 'response',
444
+ created_at: createdAt,
445
+ status: 'completed',
446
+ output: outputItems,
447
+ },
448
+ });
370
449
 
371
- const delta = parsed.choices?.[0]?.delta;
372
- const content = delta?.content || delta?.reasoning_content || '';
450
+ // SSE terminator
451
+ res.write('data: [DONE]\n\n');
452
+ res.end();
373
453
 
374
- if (content) {
375
- deltaCount++;
376
- log('debug', 'Writing delta:', content.substring(0, 30));
377
- // OpenAI Responses API format for text delta
378
- const deltaEvent = {
454
+ log('info', `Stream completed - ${outputText.length} output, ${reasoningText.length} reasoning, ${toolCalls.size} tools`);
455
+ }
456
+
457
+ try {
458
+ for await (const chunk of stream) {
459
+ buffer += chunk.toString('utf8');
460
+
461
+ // Z.ai stream: SSE lines "data: {...}\n\n"
462
+ let idx;
463
+ while ((idx = buffer.indexOf('\n\n')) !== -1) {
464
+ const raw = buffer.slice(0, idx);
465
+ buffer = buffer.slice(idx + 2);
466
+
467
+ const lines = raw.split('\n');
468
+ for (const line of lines) {
469
+ if (!line.startsWith('data:')) continue;
470
+ const payload = line.slice(5).trim();
471
+ if (!payload || payload === '[DONE]') continue;
472
+
473
+ let json;
474
+ try { json = JSON.parse(payload); } catch { continue; }
475
+
476
+ const choice = json?.choices?.[0];
477
+ const delta = choice?.delta ?? {};
478
+
479
+ // 1) reasoning
480
+ if (typeof delta.reasoning_content === 'string' && delta.reasoning_content.length) {
481
+ reasoningText += delta.reasoning_content;
482
+ send({
483
+ type: 'response.reasoning_text.delta',
484
+ sequence_number: seq++,
485
+ item_id: messageItemId,
486
+ output_index: 0,
487
+ content_index: 0,
488
+ delta: delta.reasoning_content,
489
+ });
490
+ log('debug', `Reasoning delta: ${delta.reasoning_content.substring(0, 30)}...`);
491
+ }
492
+
493
+ // 2) normal output
494
+ if (typeof delta.content === 'string' && delta.content.length) {
495
+ outputText += delta.content;
496
+ send({
379
497
  type: 'response.output_text.delta',
380
- delta: content,
498
+ sequence_number: seq++,
499
+ item_id: messageItemId,
381
500
  output_index: 0,
382
- item_id: itemId,
383
- sequence_number: deltaCount - 1
384
- };
385
- res.write(`data: ${JSON.stringify(deltaEvent)}\n\n`);
501
+ content_index: reasoningText ? 1 : 0,
502
+ delta: delta.content,
503
+ });
504
+ log('debug', `Output delta: ${delta.content.substring(0, 30)}...`);
386
505
  }
387
- } catch (e) {
388
- log('warn', 'Failed to parse SSE chunk:', e.message, 'data:', data.substring(0, 100));
389
- }
390
- }
391
506
 
392
- if (didComplete) break;
507
+ // 3) tool calls (OpenAI-style in chat.completions delta.tool_calls)
508
+ if (Array.isArray(delta.tool_calls)) {
509
+ for (const tc of delta.tool_calls) {
510
+ // tc: {id, type:"function", function:{name, arguments}}
511
+ const callId = tc.id || `call_${tc.index ?? 0}`;
512
+ const name = tc.function?.name || 'unknown';
513
+ const argsDelta = tc.function?.arguments || '';
514
+
515
+ let st = toolCalls.get(callId);
516
+ if (!st) {
517
+ st = {
518
+ itemId: `fc_${crypto.randomUUID().replace(/-/g, '')}`,
519
+ outputIndex: nextOutputIndex++,
520
+ name,
521
+ args: '',
522
+ };
523
+ toolCalls.set(callId, st);
524
+
525
+ send({
526
+ type: 'response.output_item.added',
527
+ sequence_number: seq++,
528
+ output_index: st.outputIndex,
529
+ item: {
530
+ type: 'function_call',
531
+ id: st.itemId,
532
+ call_id: callId,
533
+ name: st.name,
534
+ arguments: '',
535
+ },
536
+ });
537
+ log('debug', `Tool call added: ${name} (${callId})`);
538
+ }
539
+
540
+ if (argsDelta) {
541
+ st.args += argsDelta;
542
+ send({
543
+ type: 'response.function_call_arguments.delta',
544
+ sequence_number: seq++,
545
+ item_id: st.itemId,
546
+ output_index: st.outputIndex,
547
+ delta: argsDelta,
548
+ });
549
+ }
550
+ }
551
+ }
393
552
 
394
- if (chunkCount > 1000) {
395
- log('warn', 'Too many chunks, possible loop');
396
- break;
553
+ // 4) finish
554
+ if (choice?.finish_reason) {
555
+ log('info', `Stream finish_reason: ${choice.finish_reason}`);
556
+ await finalizeAndClose();
557
+ return;
558
+ }
559
+ }
397
560
  }
398
561
  }
399
562
  } catch (e) {
400
563
  log('error', 'Stream processing error:', e);
401
564
  }
402
565
 
403
- // ALWAYS send response.completed event (even if stream ended without [DONE])
404
- const zaiUsage = lastParsed?.usage;
405
- const completedEvent = {
406
- type: 'response.completed',
407
- response: {
408
- id: responseId,
409
- status: 'completed',
410
- output: [{
411
- type: 'message',
412
- role: 'assistant',
413
- content: [{ type: 'output_text', text: '' }]
414
- }],
415
- usage: zaiUsage ? {
416
- input_tokens: zaiUsage.prompt_tokens || 0,
417
- output_tokens: zaiUsage.completion_tokens || 0,
418
- total_tokens: zaiUsage.total_tokens || 0
419
- } : {
420
- input_tokens: 0,
421
- output_tokens: 0,
422
- total_tokens: 0
423
- }
424
- },
425
- sequence_number: deltaCount + 1
426
- };
427
-
428
- log('info', 'Sending response.completed event');
429
- res.write(`data: ${JSON.stringify(completedEvent)}\n\n`);
430
- log('info', `Stream ended - wrote ${deltaCount} deltas total`);
566
+ // fallback (stream finished without finish_reason)
567
+ log('warn', 'Stream ended without finish_reason, finalizing anyway');
568
+ await finalizeAndClose();
431
569
  }
432
570
 
433
571
  /**
@@ -509,8 +647,8 @@ async function handlePostRequest(req, res) {
509
647
 
510
648
  // Handle streaming response
511
649
  if (upstreamBody.stream) {
512
- const responseId = 'resp_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
513
- const itemId = 'item_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
650
+ const responseId = `resp_${crypto.randomUUID().replace(/-/g, '')}`;
651
+ const messageItemId = `msg_${crypto.randomUUID().replace(/-/g, '')}`;
514
652
  log('info', 'Starting streaming response');
515
653
  res.writeHead(200, {
516
654
  'Content-Type': 'text/event-stream; charset=utf-8',
@@ -519,16 +657,20 @@ async function handlePostRequest(req, res) {
519
657
  });
520
658
 
521
659
  try {
522
- await streamChatToResponses(upstreamResponse.body, res, responseId, itemId);
660
+ await streamChatToResponses(upstreamResponse.body, res, responseId, messageItemId);
523
661
  log('info', 'Streaming completed');
524
662
  } catch (e) {
525
663
  log('error', 'Streaming error:', e);
526
664
  }
527
- res.end();
528
665
  } else {
529
666
  // Non-streaming response
530
667
  const chatResponse = await upstreamResponse.json();
531
- const response = translateChatToResponses(chatResponse);
668
+ const msg = chatResponse?.choices?.[0]?.message ?? {};
669
+ const outputText = msg.content ?? '';
670
+ const reasoningText = msg.reasoning_content ?? '';
671
+ const model = chatResponse?.model ?? upstreamBody.model ?? 'GLM';
672
+
673
+ const response = translateChatToResponses(outputText, reasoningText, null, null, model);
532
674
 
533
675
  res.writeHead(200, { 'Content-Type': 'application/json' });
534
676
  res.end(JSON.stringify(response));