@mmmbuto/zai-codex-bridge 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/server.js +285 -127
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mmmbuto/zai-codex-bridge",
3
- "version": "0.2.1",
3
+ "version": "0.3.1",
4
4
  "description": "Local proxy that translates OpenAI Responses API format to Z.AI Chat Completions format for Codex",
5
5
  "main": "src/server.js",
6
6
  "bin": {
package/src/server.js CHANGED
@@ -11,6 +11,7 @@
11
11
  */
12
12
 
13
13
  const http = require('http');
14
+ const crypto = require('crypto');
14
15
 
15
16
  // Configuration from environment
16
17
  const PORT = parseInt(process.env.PORT || '31415', 10);
@@ -192,55 +193,39 @@ function translateResponsesToChat(request) {
192
193
 
193
194
  /**
194
195
  * Translate Chat Completions response to Responses format
196
+ * Handles both output_text and reasoning_text content
195
197
  */
196
- function translateChatToResponses(chatResponse) {
197
- let text = '';
198
-
199
- // Extract content from Chat format
200
- if (chatResponse.choices && chatResponse.choices.length > 0) {
201
- const choice = chatResponse.choices[0];
202
- if (choice.message && choice.message.content) {
203
- text = choice.message.content;
204
- }
205
- }
206
-
207
- // Map usage
208
- const usage = {};
209
- if (chatResponse.usage) {
210
- if (chatResponse.usage.prompt_tokens) {
211
- usage.input_tokens = chatResponse.usage.prompt_tokens;
212
- }
213
- if (chatResponse.usage.completion_tokens) {
214
- usage.output_tokens = chatResponse.usage.completion_tokens;
215
- }
216
- if (chatResponse.usage.total_tokens) {
217
- usage.total_tokens = chatResponse.usage.total_tokens;
218
- }
198
+ function translateChatToResponses(outputText, reasoningText = '', responseId = null, messageItemId = null, model = 'unknown') {
199
+ const rid = responseId || `resp_${crypto.randomUUID().replace(/-/g, '')}`;
200
+ const mid = messageItemId || `msg_${crypto.randomUUID().replace(/-/g, '')}`;
201
+ const createdAt = Math.floor(Date.now() / 1000);
202
+
203
+ const content = [];
204
+ if (reasoningText) {
205
+ content.push({ type: 'reasoning_text', text: reasoningText });
219
206
  }
207
+ content.push({ type: 'output_text', text: outputText });
220
208
 
221
- const responseId = 'resp_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
222
-
223
- // OpenAI Responses API format
224
209
  const response = {
225
- id: responseId,
210
+ id: rid,
226
211
  object: 'response',
227
- created_at: Math.floor(Date.now() / 1000),
212
+ created_at: createdAt,
213
+ model,
228
214
  status: 'completed',
229
- model: chatResponse.model || 'glm-4.7',
230
- output: [{
231
- type: 'message',
232
- role: 'assistant',
233
- content: [{
234
- type: 'output_text',
235
- text: text
236
- }]
237
- }],
238
- usage: Object.keys(usage).length > 0 ? usage : undefined
215
+ output: [
216
+ {
217
+ type: 'message',
218
+ id: mid,
219
+ role: 'assistant',
220
+ content
221
+ }
222
+ ]
239
223
  };
240
224
 
241
225
  log('debug', 'Translated Chat->Responses:', {
242
226
  id: response.id,
243
- outputLength: text.length,
227
+ outputLength: outputText.length,
228
+ reasoningLength: reasoningText.length,
244
229
  status: response.status
245
230
  });
246
231
 
@@ -310,124 +295,293 @@ async function makeUpstreamRequest(path, body, headers) {
310
295
  }
311
296
 
312
297
  /**
313
- * Handle streaming response from Z.AI
298
+ * Handle streaming response from Z.AI with proper Responses API event format
299
+ * Separates reasoning_content, content, and tool_calls into distinct events
314
300
  */
315
- async function streamChatToResponses(stream, res, responseId, itemId) {
316
- const decoder = new TextDecoder();
301
+ async function streamChatToResponses(stream, res, responseId, messageItemId) {
317
302
  let buffer = '';
318
- let chunkCount = 0;
319
- let deltaCount = 0;
320
- let lastParsed = null;
321
- let didComplete = false;
303
+ let seq = 0;
304
+
305
+ let outputText = '';
306
+ let reasoningText = '';
307
+
308
+ // tool call state: key = call_id (or id), value = { itemId, outputIndex, name, args }
309
+ const toolCalls = new Map();
310
+ let nextOutputIndex = 1; // 0 is the message item
311
+
312
+ const createdAt = Math.floor(Date.now() / 1000);
313
+
314
+ function send(ev) {
315
+ // Responses streaming: only "data: {json}\n\n"
316
+ res.write(`data: ${JSON.stringify(ev)}\n\n`);
317
+ }
318
+
319
+ // 1) response.created
320
+ send({
321
+ type: 'response.created',
322
+ sequence_number: seq++,
323
+ response: {
324
+ id: responseId,
325
+ object: 'response',
326
+ created_at: createdAt,
327
+ status: 'in_progress',
328
+ output: [],
329
+ },
330
+ });
322
331
 
323
- log('debug', 'Starting to process stream');
332
+ // 2) response.in_progress
333
+ send({
334
+ type: 'response.in_progress',
335
+ sequence_number: seq++,
336
+ response: {
337
+ id: responseId,
338
+ object: 'response',
339
+ created_at: createdAt,
340
+ status: 'in_progress',
341
+ output: [],
342
+ },
343
+ });
324
344
 
325
- // Send initial event to create the output item - using "added" not "add"
326
- const addEvent = {
345
+ // 3) message item added (output_index=0)
346
+ send({
327
347
  type: 'response.output_item.added',
348
+ sequence_number: seq++,
349
+ output_index: 0,
328
350
  item: {
329
351
  type: 'message',
352
+ id: messageItemId,
330
353
  role: 'assistant',
331
- content: [{ type: 'output_text', text: '' }],
332
- id: itemId
354
+ content: [],
333
355
  },
334
- output_index: 0,
335
- response_id: responseId
336
- };
337
- res.write(`data: ${JSON.stringify(addEvent)}\n\n`);
338
- log('debug', 'Sent output_item.added event');
356
+ });
357
+
358
+ async function finalizeAndClose() {
359
+ // done events (if we received deltas)
360
+ if (reasoningText) {
361
+ send({
362
+ type: 'response.reasoning_text.done',
363
+ sequence_number: seq++,
364
+ item_id: messageItemId,
365
+ output_index: 0,
366
+ content_index: 0,
367
+ text: reasoningText,
368
+ });
369
+ }
370
+
371
+ send({
372
+ type: 'response.output_text.done',
373
+ sequence_number: seq++,
374
+ item_id: messageItemId,
375
+ output_index: 0,
376
+ content_index: reasoningText ? 1 : 0,
377
+ text: outputText,
378
+ });
379
+
380
+ // close any tool call items
381
+ for (const [callId, st] of toolCalls.entries()) {
382
+ send({
383
+ type: 'response.function_call_arguments.done',
384
+ sequence_number: seq++,
385
+ item_id: st.itemId,
386
+ output_index: st.outputIndex,
387
+ arguments: st.args,
388
+ });
389
+
390
+ send({
391
+ type: 'response.output_item.done',
392
+ sequence_number: seq++,
393
+ output_index: st.outputIndex,
394
+ item: {
395
+ type: 'function_call',
396
+ id: st.itemId,
397
+ call_id: callId,
398
+ name: st.name,
399
+ arguments: st.args,
400
+ },
401
+ });
402
+ }
403
+
404
+ // output_item.done for message
405
+ const messageContent = [];
406
+ if (reasoningText) messageContent.push({ type: 'reasoning_text', text: reasoningText });
407
+ messageContent.push({ type: 'output_text', text: outputText });
408
+
409
+ send({
410
+ type: 'response.output_item.done',
411
+ sequence_number: seq++,
412
+ output_index: 0,
413
+ item: {
414
+ type: 'message',
415
+ id: messageItemId,
416
+ role: 'assistant',
417
+ content: messageContent,
418
+ },
419
+ });
420
+
421
+ // response.completed
422
+ const outputItems = [
423
+ {
424
+ type: 'message',
425
+ id: messageItemId,
426
+ role: 'assistant',
427
+ content: messageContent,
428
+ },
429
+ ...Array.from(toolCalls.entries()).map(([callId, st]) => ({
430
+ type: 'function_call',
431
+ id: st.itemId,
432
+ call_id: callId,
433
+ name: st.name,
434
+ arguments: st.args,
435
+ })),
436
+ ];
437
+
438
+ send({
439
+ type: 'response.completed',
440
+ sequence_number: seq++,
441
+ response: {
442
+ id: responseId,
443
+ object: 'response',
444
+ created_at: createdAt,
445
+ status: 'completed',
446
+ output: outputItems,
447
+ },
448
+ });
449
+
450
+ // SSE terminator
451
+ res.write('data: [DONE]\n\n');
452
+ res.end();
453
+
454
+ log('info', `Stream completed - ${outputText.length} output, ${reasoningText.length} reasoning, ${toolCalls.size} tools`);
455
+ }
339
456
 
340
457
  try {
341
458
  for await (const chunk of stream) {
342
- buffer += decoder.decode(chunk, { stream: true });
459
+ buffer += chunk.toString('utf8');
460
+
461
+ // Z.ai stream: SSE lines "data: {...}\n"
462
+ // Split by newline and process each complete line
343
463
  const lines = buffer.split('\n');
464
+ // Keep the last line if it's incomplete (doesn't end with data pattern)
344
465
  buffer = lines.pop() || '';
345
466
 
346
- chunkCount++;
347
-
348
467
  for (const line of lines) {
349
- if (!line.trim() || !line.startsWith('data: ')) {
468
+ if (!line.trim() || !line.startsWith('data:')) {
469
+ // Skip empty lines and comments (starting with :)
350
470
  if (line.trim() && !line.startsWith(':')) {
351
471
  log('debug', 'Non-data line:', line.substring(0, 50));
352
472
  }
353
473
  continue;
354
474
  }
355
475
 
356
- const data = line.slice(6).trim();
357
- log('debug', 'SSE data:', data.substring(0, 100));
358
-
359
- // Check for stream end
360
- if (data === '[DONE]') {
361
- log('info', `Stream end received - wrote ${deltaCount} deltas total`);
362
- didComplete = true;
363
- break;
476
+ const payload = line.slice(5).trim();
477
+ if (payload === '[DONE]') {
478
+ log('info', 'Stream received [DONE]');
479
+ await finalizeAndClose();
480
+ return;
364
481
  }
365
482
 
483
+ if (!payload) continue;
484
+
485
+ let json;
366
486
  try {
367
- const parsed = JSON.parse(data);
368
- lastParsed = parsed;
369
- log('debug', 'Parsed SSE:', JSON.stringify(parsed).substring(0, 150));
370
-
371
- const delta = parsed.choices?.[0]?.delta;
372
- const content = delta?.content || delta?.reasoning_content || '';
373
-
374
- if (content) {
375
- deltaCount++;
376
- log('debug', 'Writing delta:', content.substring(0, 30));
377
- // OpenAI Responses API format for text delta
378
- const deltaEvent = {
379
- type: 'response.output_text.delta',
380
- delta: content,
381
- output_index: 0,
382
- item_id: itemId,
383
- sequence_number: deltaCount - 1
384
- };
385
- res.write(`data: ${JSON.stringify(deltaEvent)}\n\n`);
386
- }
487
+ json = JSON.parse(payload);
387
488
  } catch (e) {
388
- log('warn', 'Failed to parse SSE chunk:', e.message, 'data:', data.substring(0, 100));
489
+ log('warn', 'Failed to parse SSE payload:', e.message, 'payload:', payload.substring(0, 100));
490
+ continue;
389
491
  }
390
- }
391
492
 
392
- if (didComplete) break;
493
+ const choice = json?.choices?.[0];
494
+ const delta = choice?.delta ?? {};
495
+
496
+ // 1) reasoning
497
+ if (typeof delta.reasoning_content === 'string' && delta.reasoning_content.length) {
498
+ reasoningText += delta.reasoning_content;
499
+ send({
500
+ type: 'response.reasoning_text.delta',
501
+ sequence_number: seq++,
502
+ item_id: messageItemId,
503
+ output_index: 0,
504
+ content_index: 0,
505
+ delta: delta.reasoning_content,
506
+ });
507
+ log('debug', `Reasoning delta: ${delta.reasoning_content.substring(0, 30)}...`);
508
+ }
509
+
510
+ // 2) normal output
511
+ if (typeof delta.content === 'string' && delta.content.length) {
512
+ outputText += delta.content;
513
+ send({
514
+ type: 'response.output_text.delta',
515
+ sequence_number: seq++,
516
+ item_id: messageItemId,
517
+ output_index: 0,
518
+ content_index: reasoningText ? 1 : 0,
519
+ delta: delta.content,
520
+ });
521
+ log('debug', `Output delta: ${delta.content.substring(0, 30)}...`);
522
+ }
393
523
 
394
- if (chunkCount > 1000) {
395
- log('warn', 'Too many chunks, possible loop');
396
- break;
524
+ // 3) tool calls (OpenAI-style in chat.completions delta.tool_calls)
525
+ if (Array.isArray(delta.tool_calls)) {
526
+ for (const tc of delta.tool_calls) {
527
+ // tc: {id, type:"function", function:{name, arguments}}
528
+ const callId = tc.id || `call_${tc.index ?? 0}`;
529
+ const name = tc.function?.name || 'unknown';
530
+ const argsDelta = tc.function?.arguments || '';
531
+
532
+ let st = toolCalls.get(callId);
533
+ if (!st) {
534
+ st = {
535
+ itemId: `fc_${crypto.randomUUID().replace(/-/g, '')}`,
536
+ outputIndex: nextOutputIndex++,
537
+ name,
538
+ args: '',
539
+ };
540
+ toolCalls.set(callId, st);
541
+
542
+ send({
543
+ type: 'response.output_item.added',
544
+ sequence_number: seq++,
545
+ output_index: st.outputIndex,
546
+ item: {
547
+ type: 'function_call',
548
+ id: st.itemId,
549
+ call_id: callId,
550
+ name: st.name,
551
+ arguments: '',
552
+ },
553
+ });
554
+ log('debug', `Tool call added: ${name} (${callId})`);
555
+ }
556
+
557
+ if (argsDelta) {
558
+ st.args += argsDelta;
559
+ send({
560
+ type: 'response.function_call_arguments.delta',
561
+ sequence_number: seq++,
562
+ item_id: st.itemId,
563
+ output_index: st.outputIndex,
564
+ delta: argsDelta,
565
+ });
566
+ }
567
+ }
568
+ }
569
+
570
+ // 4) finish
571
+ if (choice?.finish_reason) {
572
+ log('info', `Stream finish_reason: ${choice.finish_reason}`);
573
+ await finalizeAndClose();
574
+ return;
575
+ }
397
576
  }
398
577
  }
399
578
  } catch (e) {
400
579
  log('error', 'Stream processing error:', e);
401
580
  }
402
581
 
403
- // ALWAYS send response.completed event (even if stream ended without [DONE])
404
- const zaiUsage = lastParsed?.usage;
405
- const completedEvent = {
406
- type: 'response.completed',
407
- response: {
408
- id: responseId,
409
- status: 'completed',
410
- output: [{
411
- type: 'message',
412
- role: 'assistant',
413
- content: [{ type: 'output_text', text: '' }]
414
- }],
415
- usage: zaiUsage ? {
416
- input_tokens: zaiUsage.prompt_tokens || 0,
417
- output_tokens: zaiUsage.completion_tokens || 0,
418
- total_tokens: zaiUsage.total_tokens || 0
419
- } : {
420
- input_tokens: 0,
421
- output_tokens: 0,
422
- total_tokens: 0
423
- }
424
- },
425
- sequence_number: deltaCount + 1
426
- };
427
-
428
- log('info', 'Sending response.completed event');
429
- res.write(`data: ${JSON.stringify(completedEvent)}\n\n`);
430
- log('info', `Stream ended - wrote ${deltaCount} deltas total`);
582
+ // fallback (stream finished without finish_reason)
583
+ log('warn', 'Stream ended without finish_reason, finalizing anyway');
584
+ await finalizeAndClose();
431
585
  }
432
586
 
433
587
  /**
@@ -509,8 +663,8 @@ async function handlePostRequest(req, res) {
509
663
 
510
664
  // Handle streaming response
511
665
  if (upstreamBody.stream) {
512
- const responseId = 'resp_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
513
- const itemId = 'item_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
666
+ const responseId = `resp_${crypto.randomUUID().replace(/-/g, '')}`;
667
+ const messageItemId = `msg_${crypto.randomUUID().replace(/-/g, '')}`;
514
668
  log('info', 'Starting streaming response');
515
669
  res.writeHead(200, {
516
670
  'Content-Type': 'text/event-stream; charset=utf-8',
@@ -519,16 +673,20 @@ async function handlePostRequest(req, res) {
519
673
  });
520
674
 
521
675
  try {
522
- await streamChatToResponses(upstreamResponse.body, res, responseId, itemId);
676
+ await streamChatToResponses(upstreamResponse.body, res, responseId, messageItemId);
523
677
  log('info', 'Streaming completed');
524
678
  } catch (e) {
525
679
  log('error', 'Streaming error:', e);
526
680
  }
527
- res.end();
528
681
  } else {
529
682
  // Non-streaming response
530
683
  const chatResponse = await upstreamResponse.json();
531
- const response = translateChatToResponses(chatResponse);
684
+ const msg = chatResponse?.choices?.[0]?.message ?? {};
685
+ const outputText = msg.content ?? '';
686
+ const reasoningText = msg.reasoning_content ?? '';
687
+ const model = chatResponse?.model ?? upstreamBody.model ?? 'GLM';
688
+
689
+ const response = translateChatToResponses(outputText, reasoningText, null, null, model);
532
690
 
533
691
  res.writeHead(200, { 'Content-Type': 'application/json' });
534
692
  res.end(JSON.stringify(response));