@mmmbuto/zai-codex-bridge 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/server.js +212 -282
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mmmbuto/zai-codex-bridge",
3
- "version": "0.3.2",
3
+ "version": "0.4.0",
4
4
  "description": "Local proxy that translates OpenAI Responses API format to Z.AI Chat Completions format for Codex",
5
5
  "main": "src/server.js",
6
6
  "bin": {
package/src/server.js CHANGED
@@ -11,20 +11,62 @@
11
11
  */
12
12
 
13
13
  const http = require('http');
14
- const crypto = require('crypto');
15
- const { createGunzip } = require('zlib');
16
- const { pipeline } = require('stream');
14
+ const { randomUUID } = require('crypto');
17
15
 
18
16
  // Configuration from environment
19
17
  const PORT = parseInt(process.env.PORT || '31415', 10);
20
18
  const HOST = process.env.HOST || '127.0.0.1';
21
19
  const ZAI_BASE_URL = process.env.ZAI_BASE_URL || 'https://api.z.ai/api/coding/paas/v4';
22
20
  const LOG_LEVEL = process.env.LOG_LEVEL || 'info';
21
+ const DEFAULT_MODEL = process.env.DEFAULT_MODEL || 'glm-4.7';
23
22
 
24
23
  // Env toggles for compatibility
25
24
  const ALLOW_SYSTEM = process.env.ALLOW_SYSTEM === '1';
26
25
  const ALLOW_TOOLS = process.env.ALLOW_TOOLS === '1';
27
26
 
27
+ function nowSec() {
28
+ return Math.floor(Date.now() / 1000);
29
+ }
30
+
31
+ function buildResponseObject({
32
+ id,
33
+ model,
34
+ status,
35
+ created_at,
36
+ completed_at = null,
37
+ input = [],
38
+ output = [],
39
+ tools = [],
40
+ }) {
41
+ // Struttura compatibile con Responses API per Codex CLI
42
+ return {
43
+ id,
44
+ object: 'response',
45
+ created_at,
46
+ status,
47
+ completed_at,
48
+ error: null,
49
+ incomplete_details: null,
50
+ input,
51
+ instructions: null,
52
+ max_output_tokens: null,
53
+ model,
54
+ output,
55
+ previous_response_id: null,
56
+ reasoning_effort: null,
57
+ store: false,
58
+ temperature: 1,
59
+ text: { format: { type: 'text' } },
60
+ tool_choice: 'auto',
61
+ tools,
62
+ top_p: 1,
63
+ truncation: 'disabled',
64
+ usage: null,
65
+ user: null,
66
+ metadata: {},
67
+ };
68
+ }
69
+
28
70
  /**
29
71
  * Logger
30
72
  */
@@ -197,41 +239,39 @@ function translateResponsesToChat(request) {
197
239
  * Translate Chat Completions response to Responses format
198
240
  * Handles both output_text and reasoning_text content
199
241
  */
200
- function translateChatToResponses(outputText, reasoningText = '', responseId = null, messageItemId = null, model = 'unknown') {
201
- const rid = responseId || `resp_${crypto.randomUUID().replace(/-/g, '')}`;
202
- const mid = messageItemId || `msg_${crypto.randomUUID().replace(/-/g, '')}`;
203
- const createdAt = Math.floor(Date.now() / 1000);
242
+ function translateChatToResponses(chatResponse, responsesRequest, ids) {
243
+ const msg = chatResponse.choices?.[0]?.message ?? {};
244
+ const outputText = msg.content ?? '';
245
+ const reasoningText = msg.reasoning_content ?? '';
246
+
247
+ const createdAt = ids?.createdAt ?? nowSec();
248
+ const responseId = ids?.responseId ?? `resp_${randomUUID().replace(/-/g, '')}`;
249
+ const msgId = ids?.msgId ?? `msg_${randomUUID().replace(/-/g, '')}`;
204
250
 
205
251
  const content = [];
206
252
  if (reasoningText) {
207
- content.push({ type: 'reasoning_text', text: reasoningText });
253
+ content.push({ type: 'reasoning_text', text: reasoningText, annotations: [] });
208
254
  }
209
- content.push({ type: 'output_text', text: outputText });
255
+ content.push({ type: 'output_text', text: outputText, annotations: [] });
210
256
 
211
- const response = {
212
- id: rid,
213
- object: 'response',
214
- created_at: createdAt,
215
- model,
257
+ const msgItem = {
258
+ id: msgId,
259
+ type: 'message',
216
260
  status: 'completed',
217
- output: [
218
- {
219
- type: 'message',
220
- id: mid,
221
- role: 'assistant',
222
- content
223
- }
224
- ]
261
+ role: 'assistant',
262
+ content,
225
263
  };
226
264
 
227
- log('debug', 'Translated Chat->Responses:', {
228
- id: response.id,
229
- outputLength: outputText.length,
230
- reasoningLength: reasoningText.length,
231
- status: response.status
265
+ return buildResponseObject({
266
+ id: responseId,
267
+ model: responsesRequest?.model || chatResponse.model || DEFAULT_MODEL,
268
+ status: 'completed',
269
+ created_at: createdAt,
270
+ completed_at: nowSec(),
271
+ input: responsesRequest?.input || [],
272
+ output: [msgItem],
273
+ tools: responsesRequest?.tools || [],
232
274
  });
233
-
234
- return response;
235
275
  }
236
276
 
237
277
  /**
@@ -301,291 +341,175 @@ async function makeUpstreamRequest(path, body, headers) {
301
341
  * Handle streaming response from Z.AI with proper Responses API event format
302
342
  * Separates reasoning_content, content, and tool_calls into distinct events
303
343
  */
304
- async function streamChatToResponses(stream, res, responseId, messageItemId) {
344
+ async function streamChatToResponses(upstreamBody, res, responsesRequest, ids) {
345
+ const decoder = new TextDecoder();
346
+ const reader = upstreamBody.getReader();
305
347
  let buffer = '';
306
- let seq = 0;
307
-
308
- let outputText = '';
309
- let reasoningText = '';
310
348
 
311
- // tool call state: key = call_id (or id), value = { itemId, outputIndex, name, args }
312
- const toolCalls = new Map();
313
- let nextOutputIndex = 1; // 0 is the message item
349
+ const createdAt = ids.createdAt;
350
+ const responseId = ids.responseId;
351
+ const msgId = ids.msgId;
314
352
 
315
- const createdAt = Math.floor(Date.now() / 1000);
353
+ let seq = 1;
354
+ const OUTPUT_INDEX = 0;
355
+ const CONTENT_INDEX = 0;
316
356
 
317
- function send(ev) {
318
- // Responses streaming: only "data: {json}\n\n"
319
- res.write(`data: ${JSON.stringify(ev)}\n\n`);
357
+ function sse(obj) {
358
+ if (obj.sequence_number == null) obj.sequence_number = seq++;
359
+ res.write(`data: ${JSON.stringify(obj)}\n\n`);
320
360
  }
321
361
 
322
- // 1) response.created
323
- send({
324
- type: 'response.created',
325
- sequence_number: seq++,
326
- response: {
327
- id: responseId,
328
- object: 'response',
329
- created_at: createdAt,
330
- status: 'in_progress',
331
- output: [],
332
- },
362
+ // response.created / response.in_progress
363
+ const baseResp = buildResponseObject({
364
+ id: responseId,
365
+ model: responsesRequest?.model || DEFAULT_MODEL,
366
+ status: 'in_progress',
367
+ created_at: createdAt,
368
+ completed_at: null,
369
+ input: responsesRequest?.input || [],
370
+ output: [],
371
+ tools: responsesRequest?.tools || [],
333
372
  });
334
373
 
335
- // 2) response.in_progress
336
- send({
337
- type: 'response.in_progress',
338
- sequence_number: seq++,
339
- response: {
340
- id: responseId,
341
- object: 'response',
342
- created_at: createdAt,
343
- status: 'in_progress',
344
- output: [],
345
- },
346
- });
374
+ sse({ type: 'response.created', response: baseResp });
375
+ sse({ type: 'response.in_progress', response: baseResp });
347
376
 
348
- // 3) message item added (output_index=0)
349
- send({
377
+ // output_item.added + content_part.added (output_text)
378
+ const msgItemInProgress = {
379
+ id: msgId,
380
+ type: 'message',
381
+ status: 'in_progress',
382
+ role: 'assistant',
383
+ content: [],
384
+ };
385
+
386
+ sse({
350
387
  type: 'response.output_item.added',
351
- sequence_number: seq++,
352
- output_index: 0,
353
- item: {
354
- type: 'message',
355
- id: messageItemId,
356
- role: 'assistant',
357
- content: [],
358
- },
388
+ output_index: OUTPUT_INDEX,
389
+ item: msgItemInProgress,
359
390
  });
360
391
 
361
- async function finalizeAndClose() {
362
- // done events (if we received deltas)
363
- if (reasoningText) {
364
- send({
365
- type: 'response.reasoning_text.done',
366
- sequence_number: seq++,
367
- item_id: messageItemId,
368
- output_index: 0,
369
- content_index: 0,
370
- text: reasoningText,
371
- });
372
- }
373
-
374
- send({
375
- type: 'response.output_text.done',
376
- sequence_number: seq++,
377
- item_id: messageItemId,
378
- output_index: 0,
379
- content_index: reasoningText ? 1 : 0,
380
- text: outputText,
381
- });
382
-
383
- // close any tool call items
384
- for (const [callId, st] of toolCalls.entries()) {
385
- send({
386
- type: 'response.function_call_arguments.done',
387
- sequence_number: seq++,
388
- item_id: st.itemId,
389
- output_index: st.outputIndex,
390
- arguments: st.args,
391
- });
392
-
393
- send({
394
- type: 'response.output_item.done',
395
- sequence_number: seq++,
396
- output_index: st.outputIndex,
397
- item: {
398
- type: 'function_call',
399
- id: st.itemId,
400
- call_id: callId,
401
- name: st.name,
402
- arguments: st.args,
403
- },
404
- });
405
- }
406
-
407
- // output_item.done for message
408
- const messageContent = [];
409
- if (reasoningText) messageContent.push({ type: 'reasoning_text', text: reasoningText });
410
- messageContent.push({ type: 'output_text', text: outputText });
411
-
412
- send({
413
- type: 'response.output_item.done',
414
- sequence_number: seq++,
415
- output_index: 0,
416
- item: {
417
- type: 'message',
418
- id: messageItemId,
419
- role: 'assistant',
420
- content: messageContent,
421
- },
422
- });
423
-
424
- // response.completed
425
- const outputItems = [
426
- {
427
- type: 'message',
428
- id: messageItemId,
429
- role: 'assistant',
430
- content: messageContent,
431
- },
432
- ...Array.from(toolCalls.entries()).map(([callId, st]) => ({
433
- type: 'function_call',
434
- id: st.itemId,
435
- call_id: callId,
436
- name: st.name,
437
- arguments: st.args,
438
- })),
439
- ];
440
-
441
- send({
442
- type: 'response.completed',
443
- sequence_number: seq++,
444
- response: {
445
- id: responseId,
446
- object: 'response',
447
- created_at: createdAt,
448
- status: 'completed',
449
- output: outputItems,
450
- },
451
- });
452
-
453
- // SSE terminator
454
- res.write('data: [DONE]\n\n');
455
- res.end();
392
+ sse({
393
+ type: 'response.content_part.added',
394
+ item_id: msgId,
395
+ output_index: OUTPUT_INDEX,
396
+ content_index: CONTENT_INDEX,
397
+ part: { type: 'output_text', text: '', annotations: [] },
398
+ });
456
399
 
457
- log('info', `Stream completed - ${outputText.length} output, ${reasoningText.length} reasoning, ${toolCalls.size} tools`);
458
- }
400
+ let out = '';
401
+ let reasoning = '';
459
402
 
460
- try {
461
- for await (const chunk of stream) {
462
- const chunkStr = Buffer.from(chunk).toString('utf8');
463
- buffer += chunkStr;
403
+ while (true) {
404
+ const { done, value } = await reader.read();
405
+ if (done) break;
464
406
 
465
- // Z.ai stream: SSE lines "data: {...}\n"
466
- // Split by newline and process each complete line
467
- const lines = buffer.split('\n');
468
- // Keep the last line if it's incomplete (doesn't end with data pattern)
469
- buffer = lines.pop() || '';
407
+ buffer += decoder.decode(value, { stream: true });
408
+ const events = buffer.split('\n\n');
409
+ buffer = events.pop() || '';
470
410
 
411
+ for (const evt of events) {
412
+ const lines = evt.split('\n');
471
413
  for (const line of lines) {
472
- if (!line.trim() || !line.startsWith('data:')) {
473
- // Skip empty lines and comments (starting with :)
474
- if (line.trim() && !line.startsWith(':')) {
475
- log('debug', 'Non-data line:', line.substring(0, 50));
476
- }
477
- continue;
478
- }
479
-
414
+ if (!line.startsWith('data:')) continue;
480
415
  const payload = line.slice(5).trim();
416
+ if (!payload) continue;
481
417
  if (payload === '[DONE]') {
482
- log('info', 'Stream received [DONE]');
483
- await finalizeAndClose();
484
- return;
418
+ // termina upstream
419
+ continue;
485
420
  }
486
421
 
487
- if (!payload) continue;
488
-
489
- let json;
422
+ let chunk;
490
423
  try {
491
- json = JSON.parse(payload);
492
- } catch (e) {
493
- log('warn', 'Failed to parse SSE payload:', e.message, 'payload:', payload.substring(0, 100));
424
+ chunk = JSON.parse(payload);
425
+ } catch {
494
426
  continue;
495
427
  }
496
428
 
497
- const choice = json?.choices?.[0];
498
- const delta = choice?.delta ?? {};
429
+ const delta = chunk.choices?.[0]?.delta || {};
499
430
 
500
- // 1) reasoning
431
+ // NON mescolare reasoning in output_text
501
432
  if (typeof delta.reasoning_content === 'string' && delta.reasoning_content.length) {
502
- reasoningText += delta.reasoning_content;
503
- send({
433
+ reasoning += delta.reasoning_content;
434
+ sse({
504
435
  type: 'response.reasoning_text.delta',
505
- sequence_number: seq++,
506
- item_id: messageItemId,
507
- output_index: 0,
508
- content_index: 0,
436
+ item_id: msgId,
437
+ output_index: OUTPUT_INDEX,
438
+ content_index: CONTENT_INDEX,
509
439
  delta: delta.reasoning_content,
510
440
  });
511
- log('debug', `Reasoning delta: ${delta.reasoning_content.substring(0, 30)}...`);
512
441
  }
513
442
 
514
- // 2) normal output
515
443
  if (typeof delta.content === 'string' && delta.content.length) {
516
- outputText += delta.content;
517
- send({
444
+ out += delta.content;
445
+ sse({
518
446
  type: 'response.output_text.delta',
519
- sequence_number: seq++,
520
- item_id: messageItemId,
521
- output_index: 0,
522
- content_index: reasoningText ? 1 : 0,
447
+ item_id: msgId,
448
+ output_index: OUTPUT_INDEX,
449
+ content_index: CONTENT_INDEX,
523
450
  delta: delta.content,
524
451
  });
525
- log('debug', `Output delta: ${delta.content.substring(0, 30)}...`);
526
- }
527
-
528
- // 3) tool calls (OpenAI-style in chat.completions delta.tool_calls)
529
- if (Array.isArray(delta.tool_calls)) {
530
- for (const tc of delta.tool_calls) {
531
- // tc: {id, type:"function", function:{name, arguments}}
532
- const callId = tc.id || `call_${tc.index ?? 0}`;
533
- const name = tc.function?.name || 'unknown';
534
- const argsDelta = tc.function?.arguments || '';
535
-
536
- let st = toolCalls.get(callId);
537
- if (!st) {
538
- st = {
539
- itemId: `fc_${crypto.randomUUID().replace(/-/g, '')}`,
540
- outputIndex: nextOutputIndex++,
541
- name,
542
- args: '',
543
- };
544
- toolCalls.set(callId, st);
545
-
546
- send({
547
- type: 'response.output_item.added',
548
- sequence_number: seq++,
549
- output_index: st.outputIndex,
550
- item: {
551
- type: 'function_call',
552
- id: st.itemId,
553
- call_id: callId,
554
- name: st.name,
555
- arguments: '',
556
- },
557
- });
558
- log('debug', `Tool call added: ${name} (${callId})`);
559
- }
560
-
561
- if (argsDelta) {
562
- st.args += argsDelta;
563
- send({
564
- type: 'response.function_call_arguments.delta',
565
- sequence_number: seq++,
566
- item_id: st.itemId,
567
- output_index: st.outputIndex,
568
- delta: argsDelta,
569
- });
570
- }
571
- }
572
- }
573
-
574
- // 4) finish
575
- if (choice?.finish_reason) {
576
- log('info', `Stream finish_reason: ${choice.finish_reason}`);
577
- await finalizeAndClose();
578
- return;
579
452
  }
580
453
  }
581
454
  }
582
- } catch (e) {
583
- log('error', 'Stream processing error:', e);
584
455
  }
585
456
 
586
- // fallback (stream finished without finish_reason)
587
- log('warn', 'Stream ended without finish_reason, finalizing anyway');
588
- await finalizeAndClose();
457
+ // done events
458
+ if (reasoning.length) {
459
+ sse({
460
+ type: 'response.reasoning_text.done',
461
+ item_id: msgId,
462
+ output_index: OUTPUT_INDEX,
463
+ content_index: CONTENT_INDEX,
464
+ text: reasoning,
465
+ });
466
+ }
467
+
468
+ sse({
469
+ type: 'response.output_text.done',
470
+ item_id: msgId,
471
+ output_index: OUTPUT_INDEX,
472
+ content_index: CONTENT_INDEX,
473
+ text: out,
474
+ });
475
+
476
+ sse({
477
+ type: 'response.content_part.done',
478
+ item_id: msgId,
479
+ output_index: OUTPUT_INDEX,
480
+ content_index: CONTENT_INDEX,
481
+ part: { type: 'output_text', text: out, annotations: [] },
482
+ });
483
+
484
+ const msgItemDone = {
485
+ id: msgId,
486
+ type: 'message',
487
+ status: 'completed',
488
+ role: 'assistant',
489
+ content: [{ type: 'output_text', text: out, annotations: [] }],
490
+ };
491
+
492
+ sse({
493
+ type: 'response.output_item.done',
494
+ output_index: OUTPUT_INDEX,
495
+ item: msgItemDone,
496
+ });
497
+
498
+ const completed = buildResponseObject({
499
+ id: responseId,
500
+ model: responsesRequest?.model || DEFAULT_MODEL,
501
+ status: 'completed',
502
+ created_at: createdAt,
503
+ completed_at: nowSec(),
504
+ input: responsesRequest?.input || [],
505
+ output: [msgItemDone],
506
+ tools: responsesRequest?.tools || [],
507
+ });
508
+
509
+ sse({ type: 'response.completed', response: completed });
510
+ res.end();
511
+
512
+ log('info', `Stream completed - ${out.length} output, ${reasoning.length} reasoning`);
589
513
  }
590
514
 
591
515
  /**
@@ -667,17 +591,21 @@ async function handlePostRequest(req, res) {
667
591
 
668
592
  // Handle streaming response
669
593
  if (upstreamBody.stream) {
670
- const responseId = `resp_${crypto.randomUUID().replace(/-/g, '')}`;
671
- const messageItemId = `msg_${crypto.randomUUID().replace(/-/g, '')}`;
594
+ const ids = {
595
+ createdAt: nowSec(),
596
+ responseId: `resp_${randomUUID().replace(/-/g, '')}`,
597
+ msgId: `msg_${randomUUID().replace(/-/g, '')}`,
598
+ };
672
599
  log('info', 'Starting streaming response');
673
600
  res.writeHead(200, {
674
601
  'Content-Type': 'text/event-stream; charset=utf-8',
675
602
  'Cache-Control': 'no-cache',
676
- 'Connection': 'keep-alive'
603
+ 'Connection': 'keep-alive',
604
+ 'X-Accel-Buffering': 'no',
677
605
  });
678
606
 
679
607
  try {
680
- await streamChatToResponses(upstreamResponse.body, res, responseId, messageItemId);
608
+ await streamChatToResponses(upstreamResponse.body, res, request, ids);
681
609
  log('info', 'Streaming completed');
682
610
  } catch (e) {
683
611
  log('error', 'Streaming error:', e);
@@ -685,12 +613,14 @@ async function handlePostRequest(req, res) {
685
613
  } else {
686
614
  // Non-streaming response
687
615
  const chatResponse = await upstreamResponse.json();
688
- const msg = chatResponse?.choices?.[0]?.message ?? {};
689
- const outputText = msg.content ?? '';
690
- const reasoningText = msg.reasoning_content ?? '';
691
- const model = chatResponse?.model ?? upstreamBody.model ?? 'GLM';
692
616
 
693
- const response = translateChatToResponses(outputText, reasoningText, null, null, model);
617
+ const ids = {
618
+ createdAt: nowSec(),
619
+ responseId: `resp_${randomUUID().replace(/-/g, '')}`,
620
+ msgId: `msg_${randomUUID().replace(/-/g, '')}`,
621
+ };
622
+
623
+ const response = translateChatToResponses(chatResponse, request, ids);
694
624
 
695
625
  res.writeHead(200, { 'Content-Type': 'application/json' });
696
626
  res.end(JSON.stringify(response));