@mmmbuto/zai-codex-bridge 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/server.js +308 -139
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mmmbuto/zai-codex-bridge",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "Local proxy that translates OpenAI Responses API format to Z.AI Chat Completions format for Codex",
5
5
  "main": "src/server.js",
6
6
  "bin": {
package/src/server.js CHANGED
@@ -11,6 +11,7 @@
11
11
  */
12
12
 
13
13
  const http = require('http');
14
+ const crypto = require('crypto');
14
15
 
15
16
  // Configuration from environment
16
17
  const PORT = parseInt(process.env.PORT || '31415', 10);
@@ -192,61 +193,69 @@ function translateResponsesToChat(request) {
192
193
 
193
194
  /**
194
195
  * Translate Chat Completions response to Responses format
196
+ * Handles both output_text and reasoning_text content
195
197
  */
196
- function translateChatToResponses(chatResponse) {
197
- let text = '';
198
-
199
- // Extract content from Chat format
200
- if (chatResponse.choices && chatResponse.choices.length > 0) {
201
- const choice = chatResponse.choices[0];
202
- if (choice.message && choice.message.content) {
203
- text = choice.message.content;
204
- }
205
- }
206
-
207
- // Map usage
208
- const usage = {};
209
- if (chatResponse.usage) {
210
- if (chatResponse.usage.prompt_tokens) {
211
- usage.input_tokens = chatResponse.usage.prompt_tokens;
212
- }
213
- if (chatResponse.usage.completion_tokens) {
214
- usage.output_tokens = chatResponse.usage.completion_tokens;
215
- }
216
- if (chatResponse.usage.total_tokens) {
217
- usage.total_tokens = chatResponse.usage.total_tokens;
218
- }
198
+ function translateChatToResponses(outputText, reasoningText = '', responseId = null, messageItemId = null, model = 'unknown') {
199
+ const rid = responseId || `resp_${crypto.randomUUID().replace(/-/g, '')}`;
200
+ const mid = messageItemId || `msg_${crypto.randomUUID().replace(/-/g, '')}`;
201
+ const createdAt = Math.floor(Date.now() / 1000);
202
+
203
+ const content = [];
204
+ if (reasoningText) {
205
+ content.push({ type: 'reasoning_text', text: reasoningText });
219
206
  }
207
+ content.push({ type: 'output_text', text: outputText });
220
208
 
221
- const responseId = 'resp_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
222
-
223
- // OpenAI Responses API format
224
209
  const response = {
225
- id: responseId,
210
+ id: rid,
226
211
  object: 'response',
227
- created_at: Math.floor(Date.now() / 1000),
212
+ created_at: createdAt,
213
+ model,
228
214
  status: 'completed',
229
- model: chatResponse.model || 'glm-4.7',
230
- output: [{
231
- type: 'message',
232
- role: 'assistant',
233
- content: [{
234
- type: 'output_text',
235
- text: text
236
- }]
237
- }],
238
- usage: Object.keys(usage).length > 0 ? usage : undefined
215
+ output: [
216
+ {
217
+ type: 'message',
218
+ id: mid,
219
+ role: 'assistant',
220
+ content
221
+ }
222
+ ]
239
223
  };
240
224
 
241
225
  log('debug', 'Translated Chat->Responses:', {
242
226
  id: response.id,
243
- outputLength: text.length,
227
+ outputLength: outputText.length,
228
+ reasoningLength: reasoningText.length,
244
229
  status: response.status
245
230
  });
246
231
 
247
232
  return response;
248
233
  }
249
234
 
235
+ /**
236
+ * Extract and normalize Bearer token
237
+ */
238
+ function getBearer(raw) {
239
+ if (!raw) return '';
240
+ let t = String(raw).trim();
241
+ if (!t) return '';
242
+ // If already "Bearer xxx" keep it, otherwise add it
243
+ if (!t.toLowerCase().startsWith('bearer ')) t = `Bearer ${t}`;
244
+ return t;
245
+ }
246
+
247
+ /**
248
+ * Pick auth token from env ZAI_API_KEY (priority) or incoming headers
249
+ */
250
+ function pickAuth(incomingHeaders) {
251
+ // PRIORITY: env ZAI_API_KEY (force correct key) -> incoming header
252
+ const envTok = (process.env.ZAI_API_KEY || '').trim();
253
+ if (envTok) return getBearer(envTok);
254
+
255
+ const h = (incomingHeaders['authorization'] || incomingHeaders['Authorization'] || '').trim();
256
+ return getBearer(h);
257
+ }
258
+
250
259
  /**
251
260
  * Make upstream request to Z.AI
252
261
  */
@@ -257,9 +266,10 @@ async function makeUpstreamRequest(path, body, headers) {
257
266
  const cleanPath = path.startsWith('/') ? path.slice(1) : path;
258
267
  const url = new URL(cleanPath, baseUrl);
259
268
 
269
+ const auth = pickAuth(headers);
260
270
  const upstreamHeaders = {
261
271
  'Content-Type': 'application/json',
262
- 'Authorization': headers['authorization'] || headers['Authorization'] || ''
272
+ 'Authorization': auth
263
273
  };
264
274
 
265
275
  log('info', 'Upstream request:', {
@@ -267,7 +277,8 @@ async function makeUpstreamRequest(path, body, headers) {
267
277
  path: path,
268
278
  cleanPath: cleanPath,
269
279
  base: ZAI_BASE_URL,
270
- hasAuth: !!upstreamHeaders.Authorization,
280
+ auth_len: auth.length,
281
+ auth_prefix: auth.slice(0, 14), // "Bearer xxxxxx"
271
282
  bodyKeys: Object.keys(body),
272
283
  bodyPreview: JSON.stringify(body).substring(0, 800),
273
284
  messagesCount: body.messages?.length || 0,
@@ -284,124 +295,277 @@ async function makeUpstreamRequest(path, body, headers) {
284
295
  }
285
296
 
286
297
  /**
287
- * Handle streaming response from Z.AI
298
+ * Handle streaming response from Z.AI with proper Responses API event format
299
+ * Separates reasoning_content, content, and tool_calls into distinct events
288
300
  */
289
- async function streamChatToResponses(stream, res, responseId, itemId) {
290
- const decoder = new TextDecoder();
301
+ async function streamChatToResponses(stream, res, responseId, messageItemId) {
291
302
  let buffer = '';
292
- let chunkCount = 0;
293
- let deltaCount = 0;
294
- let lastParsed = null;
295
- let didComplete = false;
303
+ let seq = 0;
304
+
305
+ let outputText = '';
306
+ let reasoningText = '';
307
+
308
+ // tool call state: key = call_id (or id), value = { itemId, outputIndex, name, args }
309
+ const toolCalls = new Map();
310
+ let nextOutputIndex = 1; // 0 is the message item
311
+
312
+ const createdAt = Math.floor(Date.now() / 1000);
313
+
314
+ function send(ev) {
315
+ // Responses streaming: only "data: {json}\n\n"
316
+ res.write(`data: ${JSON.stringify(ev)}\n\n`);
317
+ }
318
+
319
+ // 1) response.created
320
+ send({
321
+ type: 'response.created',
322
+ sequence_number: seq++,
323
+ response: {
324
+ id: responseId,
325
+ object: 'response',
326
+ created_at: createdAt,
327
+ status: 'in_progress',
328
+ output: [],
329
+ },
330
+ });
296
331
 
297
- log('debug', 'Starting to process stream');
332
+ // 2) response.in_progress
333
+ send({
334
+ type: 'response.in_progress',
335
+ sequence_number: seq++,
336
+ response: {
337
+ id: responseId,
338
+ object: 'response',
339
+ created_at: createdAt,
340
+ status: 'in_progress',
341
+ output: [],
342
+ },
343
+ });
298
344
 
299
- // Send initial event to create the output item - using "added" not "add"
300
- const addEvent = {
345
+ // 3) message item added (output_index=0)
346
+ send({
301
347
  type: 'response.output_item.added',
348
+ sequence_number: seq++,
349
+ output_index: 0,
302
350
  item: {
303
351
  type: 'message',
352
+ id: messageItemId,
304
353
  role: 'assistant',
305
- content: [{ type: 'output_text', text: '' }],
306
- id: itemId
354
+ content: [],
307
355
  },
308
- output_index: 0,
309
- response_id: responseId
310
- };
311
- res.write(`data: ${JSON.stringify(addEvent)}\n\n`);
312
- log('debug', 'Sent output_item.added event');
356
+ });
313
357
 
314
- try {
315
- for await (const chunk of stream) {
316
- buffer += decoder.decode(chunk, { stream: true });
317
- const lines = buffer.split('\n');
318
- buffer = lines.pop() || '';
358
+ async function finalizeAndClose() {
359
+ // done events (if we received deltas)
360
+ if (reasoningText) {
361
+ send({
362
+ type: 'response.reasoning_text.done',
363
+ sequence_number: seq++,
364
+ item_id: messageItemId,
365
+ output_index: 0,
366
+ content_index: 0,
367
+ text: reasoningText,
368
+ });
369
+ }
319
370
 
320
- chunkCount++;
371
+ send({
372
+ type: 'response.output_text.done',
373
+ sequence_number: seq++,
374
+ item_id: messageItemId,
375
+ output_index: 0,
376
+ content_index: reasoningText ? 1 : 0,
377
+ text: outputText,
378
+ });
321
379
 
322
- for (const line of lines) {
323
- if (!line.trim() || !line.startsWith('data: ')) {
324
- if (line.trim() && !line.startsWith(':')) {
325
- log('debug', 'Non-data line:', line.substring(0, 50));
326
- }
327
- continue;
328
- }
380
+ // close any tool call items
381
+ for (const [callId, st] of toolCalls.entries()) {
382
+ send({
383
+ type: 'response.function_call_arguments.done',
384
+ sequence_number: seq++,
385
+ item_id: st.itemId,
386
+ output_index: st.outputIndex,
387
+ arguments: st.args,
388
+ });
389
+
390
+ send({
391
+ type: 'response.output_item.done',
392
+ sequence_number: seq++,
393
+ output_index: st.outputIndex,
394
+ item: {
395
+ type: 'function_call',
396
+ id: st.itemId,
397
+ call_id: callId,
398
+ name: st.name,
399
+ arguments: st.args,
400
+ },
401
+ });
402
+ }
329
403
 
330
- const data = line.slice(6).trim();
331
- log('debug', 'SSE data:', data.substring(0, 100));
404
+ // output_item.done for message
405
+ const messageContent = [];
406
+ if (reasoningText) messageContent.push({ type: 'reasoning_text', text: reasoningText });
407
+ messageContent.push({ type: 'output_text', text: outputText });
332
408
 
333
- // Check for stream end
334
- if (data === '[DONE]') {
335
- log('info', `Stream end received - wrote ${deltaCount} deltas total`);
336
- didComplete = true;
337
- break;
338
- }
409
+ send({
410
+ type: 'response.output_item.done',
411
+ sequence_number: seq++,
412
+ output_index: 0,
413
+ item: {
414
+ type: 'message',
415
+ id: messageItemId,
416
+ role: 'assistant',
417
+ content: messageContent,
418
+ },
419
+ });
339
420
 
340
- try {
341
- const parsed = JSON.parse(data);
342
- lastParsed = parsed;
343
- log('debug', 'Parsed SSE:', JSON.stringify(parsed).substring(0, 150));
421
+ // response.completed
422
+ const outputItems = [
423
+ {
424
+ type: 'message',
425
+ id: messageItemId,
426
+ role: 'assistant',
427
+ content: messageContent,
428
+ },
429
+ ...Array.from(toolCalls.entries()).map(([callId, st]) => ({
430
+ type: 'function_call',
431
+ id: st.itemId,
432
+ call_id: callId,
433
+ name: st.name,
434
+ arguments: st.args,
435
+ })),
436
+ ];
437
+
438
+ send({
439
+ type: 'response.completed',
440
+ sequence_number: seq++,
441
+ response: {
442
+ id: responseId,
443
+ object: 'response',
444
+ created_at: createdAt,
445
+ status: 'completed',
446
+ output: outputItems,
447
+ },
448
+ });
344
449
 
345
- const delta = parsed.choices?.[0]?.delta;
346
- const content = delta?.content || delta?.reasoning_content || '';
450
+ // SSE terminator
451
+ res.write('data: [DONE]\n\n');
452
+ res.end();
453
+
454
+ log('info', `Stream completed - ${outputText.length} output, ${reasoningText.length} reasoning, ${toolCalls.size} tools`);
455
+ }
347
456
 
348
- if (content) {
349
- deltaCount++;
350
- log('debug', 'Writing delta:', content.substring(0, 30));
351
- // OpenAI Responses API format for text delta
352
- const deltaEvent = {
457
+ try {
458
+ for await (const chunk of stream) {
459
+ buffer += chunk.toString('utf8');
460
+
461
+ // Z.ai stream: SSE lines "data: {...}\n\n"
462
+ let idx;
463
+ while ((idx = buffer.indexOf('\n\n')) !== -1) {
464
+ const raw = buffer.slice(0, idx);
465
+ buffer = buffer.slice(idx + 2);
466
+
467
+ const lines = raw.split('\n');
468
+ for (const line of lines) {
469
+ if (!line.startsWith('data:')) continue;
470
+ const payload = line.slice(5).trim();
471
+ if (!payload || payload === '[DONE]') continue;
472
+
473
+ let json;
474
+ try { json = JSON.parse(payload); } catch { continue; }
475
+
476
+ const choice = json?.choices?.[0];
477
+ const delta = choice?.delta ?? {};
478
+
479
+ // 1) reasoning
480
+ if (typeof delta.reasoning_content === 'string' && delta.reasoning_content.length) {
481
+ reasoningText += delta.reasoning_content;
482
+ send({
483
+ type: 'response.reasoning_text.delta',
484
+ sequence_number: seq++,
485
+ item_id: messageItemId,
486
+ output_index: 0,
487
+ content_index: 0,
488
+ delta: delta.reasoning_content,
489
+ });
490
+ log('debug', `Reasoning delta: ${delta.reasoning_content.substring(0, 30)}...`);
491
+ }
492
+
493
+ // 2) normal output
494
+ if (typeof delta.content === 'string' && delta.content.length) {
495
+ outputText += delta.content;
496
+ send({
353
497
  type: 'response.output_text.delta',
354
- delta: content,
498
+ sequence_number: seq++,
499
+ item_id: messageItemId,
355
500
  output_index: 0,
356
- item_id: itemId,
357
- sequence_number: deltaCount - 1
358
- };
359
- res.write(`data: ${JSON.stringify(deltaEvent)}\n\n`);
501
+ content_index: reasoningText ? 1 : 0,
502
+ delta: delta.content,
503
+ });
504
+ log('debug', `Output delta: ${delta.content.substring(0, 30)}...`);
360
505
  }
361
- } catch (e) {
362
- log('warn', 'Failed to parse SSE chunk:', e.message, 'data:', data.substring(0, 100));
363
- }
364
- }
365
506
 
366
- if (didComplete) break;
507
+ // 3) tool calls (OpenAI-style in chat.completions delta.tool_calls)
508
+ if (Array.isArray(delta.tool_calls)) {
509
+ for (const tc of delta.tool_calls) {
510
+ // tc: {id, type:"function", function:{name, arguments}}
511
+ const callId = tc.id || `call_${tc.index ?? 0}`;
512
+ const name = tc.function?.name || 'unknown';
513
+ const argsDelta = tc.function?.arguments || '';
514
+
515
+ let st = toolCalls.get(callId);
516
+ if (!st) {
517
+ st = {
518
+ itemId: `fc_${crypto.randomUUID().replace(/-/g, '')}`,
519
+ outputIndex: nextOutputIndex++,
520
+ name,
521
+ args: '',
522
+ };
523
+ toolCalls.set(callId, st);
524
+
525
+ send({
526
+ type: 'response.output_item.added',
527
+ sequence_number: seq++,
528
+ output_index: st.outputIndex,
529
+ item: {
530
+ type: 'function_call',
531
+ id: st.itemId,
532
+ call_id: callId,
533
+ name: st.name,
534
+ arguments: '',
535
+ },
536
+ });
537
+ log('debug', `Tool call added: ${name} (${callId})`);
538
+ }
539
+
540
+ if (argsDelta) {
541
+ st.args += argsDelta;
542
+ send({
543
+ type: 'response.function_call_arguments.delta',
544
+ sequence_number: seq++,
545
+ item_id: st.itemId,
546
+ output_index: st.outputIndex,
547
+ delta: argsDelta,
548
+ });
549
+ }
550
+ }
551
+ }
367
552
 
368
- if (chunkCount > 1000) {
369
- log('warn', 'Too many chunks, possible loop');
370
- break;
553
+ // 4) finish
554
+ if (choice?.finish_reason) {
555
+ log('info', `Stream finish_reason: ${choice.finish_reason}`);
556
+ await finalizeAndClose();
557
+ return;
558
+ }
559
+ }
371
560
  }
372
561
  }
373
562
  } catch (e) {
374
563
  log('error', 'Stream processing error:', e);
375
564
  }
376
565
 
377
- // ALWAYS send response.completed event (even if stream ended without [DONE])
378
- const zaiUsage = lastParsed?.usage;
379
- const completedEvent = {
380
- type: 'response.completed',
381
- response: {
382
- id: responseId,
383
- status: 'completed',
384
- output: [{
385
- type: 'message',
386
- role: 'assistant',
387
- content: [{ type: 'output_text', text: '' }]
388
- }],
389
- usage: zaiUsage ? {
390
- input_tokens: zaiUsage.prompt_tokens || 0,
391
- output_tokens: zaiUsage.completion_tokens || 0,
392
- total_tokens: zaiUsage.total_tokens || 0
393
- } : {
394
- input_tokens: 0,
395
- output_tokens: 0,
396
- total_tokens: 0
397
- }
398
- },
399
- sequence_number: deltaCount + 1
400
- };
401
-
402
- log('info', 'Sending response.completed event');
403
- res.write(`data: ${JSON.stringify(completedEvent)}\n\n`);
404
- log('info', `Stream ended - wrote ${deltaCount} deltas total`);
566
+ // fallback (stream finished without finish_reason)
567
+ log('warn', 'Stream ended without finish_reason, finalizing anyway');
568
+ await finalizeAndClose();
405
569
  }
406
570
 
407
571
  /**
@@ -466,15 +630,16 @@ async function handlePostRequest(req, res) {
466
630
 
467
631
  if (!upstreamResponse.ok) {
468
632
  const errorBody = await upstreamResponse.text();
633
+ const status = upstreamResponse.status;
469
634
  log('error', 'Upstream error:', {
470
- status: upstreamResponse.status,
635
+ status: status,
471
636
  body: errorBody.substring(0, 200)
472
637
  });
473
638
 
474
- res.writeHead(502, { 'Content-Type': 'application/json' });
639
+ res.writeHead(status, { 'Content-Type': 'application/json' });
475
640
  res.end(JSON.stringify({
476
641
  error: 'Upstream request failed',
477
- upstream_status: upstreamResponse.status,
642
+ upstream_status: status,
478
643
  upstream_body: errorBody
479
644
  }));
480
645
  return;
@@ -482,8 +647,8 @@ async function handlePostRequest(req, res) {
482
647
 
483
648
  // Handle streaming response
484
649
  if (upstreamBody.stream) {
485
- const responseId = 'resp_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
486
- const itemId = 'item_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
650
+ const responseId = `resp_${crypto.randomUUID().replace(/-/g, '')}`;
651
+ const messageItemId = `msg_${crypto.randomUUID().replace(/-/g, '')}`;
487
652
  log('info', 'Starting streaming response');
488
653
  res.writeHead(200, {
489
654
  'Content-Type': 'text/event-stream; charset=utf-8',
@@ -492,16 +657,20 @@ async function handlePostRequest(req, res) {
492
657
  });
493
658
 
494
659
  try {
495
- await streamChatToResponses(upstreamResponse.body, res, responseId, itemId);
660
+ await streamChatToResponses(upstreamResponse.body, res, responseId, messageItemId);
496
661
  log('info', 'Streaming completed');
497
662
  } catch (e) {
498
663
  log('error', 'Streaming error:', e);
499
664
  }
500
- res.end();
501
665
  } else {
502
666
  // Non-streaming response
503
667
  const chatResponse = await upstreamResponse.json();
504
- const response = translateChatToResponses(chatResponse);
668
+ const msg = chatResponse?.choices?.[0]?.message ?? {};
669
+ const outputText = msg.content ?? '';
670
+ const reasoningText = msg.reasoning_content ?? '';
671
+ const model = chatResponse?.model ?? upstreamBody.model ?? 'GLM';
672
+
673
+ const response = translateChatToResponses(outputText, reasoningText, null, null, model);
505
674
 
506
675
  res.writeHead(200, { 'Content-Type': 'application/json' });
507
676
  res.end(JSON.stringify(response));