@mmmbuto/zai-codex-bridge 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/server.js +214 -280
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mmmbuto/zai-codex-bridge",
3
- "version": "0.3.1",
3
+ "version": "0.4.0",
4
4
  "description": "Local proxy that translates OpenAI Responses API format to Z.AI Chat Completions format for Codex",
5
5
  "main": "src/server.js",
6
6
  "bin": {
package/src/server.js CHANGED
@@ -11,18 +11,62 @@
11
11
  */
12
12
 
13
13
  const http = require('http');
14
- const crypto = require('crypto');
14
+ const { randomUUID } = require('crypto');
15
15
 
16
16
  // Configuration from environment
17
17
  const PORT = parseInt(process.env.PORT || '31415', 10);
18
18
  const HOST = process.env.HOST || '127.0.0.1';
19
19
  const ZAI_BASE_URL = process.env.ZAI_BASE_URL || 'https://api.z.ai/api/coding/paas/v4';
20
20
  const LOG_LEVEL = process.env.LOG_LEVEL || 'info';
21
+ const DEFAULT_MODEL = process.env.DEFAULT_MODEL || 'glm-4.7';
21
22
 
22
23
  // Env toggles for compatibility
23
24
  const ALLOW_SYSTEM = process.env.ALLOW_SYSTEM === '1';
24
25
  const ALLOW_TOOLS = process.env.ALLOW_TOOLS === '1';
25
26
 
27
+ function nowSec() {
28
+ return Math.floor(Date.now() / 1000);
29
+ }
30
+
31
+ function buildResponseObject({
32
+ id,
33
+ model,
34
+ status,
35
+ created_at,
36
+ completed_at = null,
37
+ input = [],
38
+ output = [],
39
+ tools = [],
40
+ }) {
41
+ // Struttura compatibile con Responses API per Codex CLI
42
+ return {
43
+ id,
44
+ object: 'response',
45
+ created_at,
46
+ status,
47
+ completed_at,
48
+ error: null,
49
+ incomplete_details: null,
50
+ input,
51
+ instructions: null,
52
+ max_output_tokens: null,
53
+ model,
54
+ output,
55
+ previous_response_id: null,
56
+ reasoning_effort: null,
57
+ store: false,
58
+ temperature: 1,
59
+ text: { format: { type: 'text' } },
60
+ tool_choice: 'auto',
61
+ tools,
62
+ top_p: 1,
63
+ truncation: 'disabled',
64
+ usage: null,
65
+ user: null,
66
+ metadata: {},
67
+ };
68
+ }
69
+
26
70
  /**
27
71
  * Logger
28
72
  */
@@ -195,41 +239,39 @@ function translateResponsesToChat(request) {
195
239
  * Translate Chat Completions response to Responses format
196
240
  * Handles both output_text and reasoning_text content
197
241
  */
198
- function translateChatToResponses(outputText, reasoningText = '', responseId = null, messageItemId = null, model = 'unknown') {
199
- const rid = responseId || `resp_${crypto.randomUUID().replace(/-/g, '')}`;
200
- const mid = messageItemId || `msg_${crypto.randomUUID().replace(/-/g, '')}`;
201
- const createdAt = Math.floor(Date.now() / 1000);
242
+ function translateChatToResponses(chatResponse, responsesRequest, ids) {
243
+ const msg = chatResponse.choices?.[0]?.message ?? {};
244
+ const outputText = msg.content ?? '';
245
+ const reasoningText = msg.reasoning_content ?? '';
246
+
247
+ const createdAt = ids?.createdAt ?? nowSec();
248
+ const responseId = ids?.responseId ?? `resp_${randomUUID().replace(/-/g, '')}`;
249
+ const msgId = ids?.msgId ?? `msg_${randomUUID().replace(/-/g, '')}`;
202
250
 
203
251
  const content = [];
204
252
  if (reasoningText) {
205
- content.push({ type: 'reasoning_text', text: reasoningText });
253
+ content.push({ type: 'reasoning_text', text: reasoningText, annotations: [] });
206
254
  }
207
- content.push({ type: 'output_text', text: outputText });
255
+ content.push({ type: 'output_text', text: outputText, annotations: [] });
208
256
 
209
- const response = {
210
- id: rid,
211
- object: 'response',
212
- created_at: createdAt,
213
- model,
257
+ const msgItem = {
258
+ id: msgId,
259
+ type: 'message',
214
260
  status: 'completed',
215
- output: [
216
- {
217
- type: 'message',
218
- id: mid,
219
- role: 'assistant',
220
- content
221
- }
222
- ]
261
+ role: 'assistant',
262
+ content,
223
263
  };
224
264
 
225
- log('debug', 'Translated Chat->Responses:', {
226
- id: response.id,
227
- outputLength: outputText.length,
228
- reasoningLength: reasoningText.length,
229
- status: response.status
265
+ return buildResponseObject({
266
+ id: responseId,
267
+ model: responsesRequest?.model || chatResponse.model || DEFAULT_MODEL,
268
+ status: 'completed',
269
+ created_at: createdAt,
270
+ completed_at: nowSec(),
271
+ input: responsesRequest?.input || [],
272
+ output: [msgItem],
273
+ tools: responsesRequest?.tools || [],
230
274
  });
231
-
232
- return response;
233
275
  }
234
276
 
235
277
  /**
@@ -269,7 +311,8 @@ async function makeUpstreamRequest(path, body, headers) {
269
311
  const auth = pickAuth(headers);
270
312
  const upstreamHeaders = {
271
313
  'Content-Type': 'application/json',
272
- 'Authorization': auth
314
+ 'Authorization': auth,
315
+ 'Accept-Encoding': 'identity' // Disable compression to avoid gzip issues
273
316
  };
274
317
 
275
318
  log('info', 'Upstream request:', {
@@ -298,290 +341,175 @@ async function makeUpstreamRequest(path, body, headers) {
298
341
  * Handle streaming response from Z.AI with proper Responses API event format
299
342
  * Separates reasoning_content, content, and tool_calls into distinct events
300
343
  */
301
- async function streamChatToResponses(stream, res, responseId, messageItemId) {
344
+ async function streamChatToResponses(upstreamBody, res, responsesRequest, ids) {
345
+ const decoder = new TextDecoder();
346
+ const reader = upstreamBody.getReader();
302
347
  let buffer = '';
303
- let seq = 0;
304
-
305
- let outputText = '';
306
- let reasoningText = '';
307
348
 
308
- // tool call state: key = call_id (or id), value = { itemId, outputIndex, name, args }
309
- const toolCalls = new Map();
310
- let nextOutputIndex = 1; // 0 is the message item
349
+ const createdAt = ids.createdAt;
350
+ const responseId = ids.responseId;
351
+ const msgId = ids.msgId;
311
352
 
312
- const createdAt = Math.floor(Date.now() / 1000);
353
+ let seq = 1;
354
+ const OUTPUT_INDEX = 0;
355
+ const CONTENT_INDEX = 0;
313
356
 
314
- function send(ev) {
315
- // Responses streaming: only "data: {json}\n\n"
316
- res.write(`data: ${JSON.stringify(ev)}\n\n`);
357
+ function sse(obj) {
358
+ if (obj.sequence_number == null) obj.sequence_number = seq++;
359
+ res.write(`data: ${JSON.stringify(obj)}\n\n`);
317
360
  }
318
361
 
319
- // 1) response.created
320
- send({
321
- type: 'response.created',
322
- sequence_number: seq++,
323
- response: {
324
- id: responseId,
325
- object: 'response',
326
- created_at: createdAt,
327
- status: 'in_progress',
328
- output: [],
329
- },
362
+ // response.created / response.in_progress
363
+ const baseResp = buildResponseObject({
364
+ id: responseId,
365
+ model: responsesRequest?.model || DEFAULT_MODEL,
366
+ status: 'in_progress',
367
+ created_at: createdAt,
368
+ completed_at: null,
369
+ input: responsesRequest?.input || [],
370
+ output: [],
371
+ tools: responsesRequest?.tools || [],
330
372
  });
331
373
 
332
- // 2) response.in_progress
333
- send({
334
- type: 'response.in_progress',
335
- sequence_number: seq++,
336
- response: {
337
- id: responseId,
338
- object: 'response',
339
- created_at: createdAt,
340
- status: 'in_progress',
341
- output: [],
342
- },
343
- });
374
+ sse({ type: 'response.created', response: baseResp });
375
+ sse({ type: 'response.in_progress', response: baseResp });
344
376
 
345
- // 3) message item added (output_index=0)
346
- send({
377
+ // output_item.added + content_part.added (output_text)
378
+ const msgItemInProgress = {
379
+ id: msgId,
380
+ type: 'message',
381
+ status: 'in_progress',
382
+ role: 'assistant',
383
+ content: [],
384
+ };
385
+
386
+ sse({
347
387
  type: 'response.output_item.added',
348
- sequence_number: seq++,
349
- output_index: 0,
350
- item: {
351
- type: 'message',
352
- id: messageItemId,
353
- role: 'assistant',
354
- content: [],
355
- },
388
+ output_index: OUTPUT_INDEX,
389
+ item: msgItemInProgress,
356
390
  });
357
391
 
358
- async function finalizeAndClose() {
359
- // done events (if we received deltas)
360
- if (reasoningText) {
361
- send({
362
- type: 'response.reasoning_text.done',
363
- sequence_number: seq++,
364
- item_id: messageItemId,
365
- output_index: 0,
366
- content_index: 0,
367
- text: reasoningText,
368
- });
369
- }
370
-
371
- send({
372
- type: 'response.output_text.done',
373
- sequence_number: seq++,
374
- item_id: messageItemId,
375
- output_index: 0,
376
- content_index: reasoningText ? 1 : 0,
377
- text: outputText,
378
- });
379
-
380
- // close any tool call items
381
- for (const [callId, st] of toolCalls.entries()) {
382
- send({
383
- type: 'response.function_call_arguments.done',
384
- sequence_number: seq++,
385
- item_id: st.itemId,
386
- output_index: st.outputIndex,
387
- arguments: st.args,
388
- });
389
-
390
- send({
391
- type: 'response.output_item.done',
392
- sequence_number: seq++,
393
- output_index: st.outputIndex,
394
- item: {
395
- type: 'function_call',
396
- id: st.itemId,
397
- call_id: callId,
398
- name: st.name,
399
- arguments: st.args,
400
- },
401
- });
402
- }
403
-
404
- // output_item.done for message
405
- const messageContent = [];
406
- if (reasoningText) messageContent.push({ type: 'reasoning_text', text: reasoningText });
407
- messageContent.push({ type: 'output_text', text: outputText });
408
-
409
- send({
410
- type: 'response.output_item.done',
411
- sequence_number: seq++,
412
- output_index: 0,
413
- item: {
414
- type: 'message',
415
- id: messageItemId,
416
- role: 'assistant',
417
- content: messageContent,
418
- },
419
- });
420
-
421
- // response.completed
422
- const outputItems = [
423
- {
424
- type: 'message',
425
- id: messageItemId,
426
- role: 'assistant',
427
- content: messageContent,
428
- },
429
- ...Array.from(toolCalls.entries()).map(([callId, st]) => ({
430
- type: 'function_call',
431
- id: st.itemId,
432
- call_id: callId,
433
- name: st.name,
434
- arguments: st.args,
435
- })),
436
- ];
437
-
438
- send({
439
- type: 'response.completed',
440
- sequence_number: seq++,
441
- response: {
442
- id: responseId,
443
- object: 'response',
444
- created_at: createdAt,
445
- status: 'completed',
446
- output: outputItems,
447
- },
448
- });
449
-
450
- // SSE terminator
451
- res.write('data: [DONE]\n\n');
452
- res.end();
392
+ sse({
393
+ type: 'response.content_part.added',
394
+ item_id: msgId,
395
+ output_index: OUTPUT_INDEX,
396
+ content_index: CONTENT_INDEX,
397
+ part: { type: 'output_text', text: '', annotations: [] },
398
+ });
453
399
 
454
- log('info', `Stream completed - ${outputText.length} output, ${reasoningText.length} reasoning, ${toolCalls.size} tools`);
455
- }
400
+ let out = '';
401
+ let reasoning = '';
456
402
 
457
- try {
458
- for await (const chunk of stream) {
459
- buffer += chunk.toString('utf8');
403
+ while (true) {
404
+ const { done, value } = await reader.read();
405
+ if (done) break;
460
406
 
461
- // Z.ai stream: SSE lines "data: {...}\n"
462
- // Split by newline and process each complete line
463
- const lines = buffer.split('\n');
464
- // Keep the last line if it's incomplete (doesn't end with data pattern)
465
- buffer = lines.pop() || '';
407
+ buffer += decoder.decode(value, { stream: true });
408
+ const events = buffer.split('\n\n');
409
+ buffer = events.pop() || '';
466
410
 
411
+ for (const evt of events) {
412
+ const lines = evt.split('\n');
467
413
  for (const line of lines) {
468
- if (!line.trim() || !line.startsWith('data:')) {
469
- // Skip empty lines and comments (starting with :)
470
- if (line.trim() && !line.startsWith(':')) {
471
- log('debug', 'Non-data line:', line.substring(0, 50));
472
- }
473
- continue;
474
- }
475
-
414
+ if (!line.startsWith('data:')) continue;
476
415
  const payload = line.slice(5).trim();
416
+ if (!payload) continue;
477
417
  if (payload === '[DONE]') {
478
- log('info', 'Stream received [DONE]');
479
- await finalizeAndClose();
480
- return;
418
+ // termina upstream
419
+ continue;
481
420
  }
482
421
 
483
- if (!payload) continue;
484
-
485
- let json;
422
+ let chunk;
486
423
  try {
487
- json = JSON.parse(payload);
488
- } catch (e) {
489
- log('warn', 'Failed to parse SSE payload:', e.message, 'payload:', payload.substring(0, 100));
424
+ chunk = JSON.parse(payload);
425
+ } catch {
490
426
  continue;
491
427
  }
492
428
 
493
- const choice = json?.choices?.[0];
494
- const delta = choice?.delta ?? {};
429
+ const delta = chunk.choices?.[0]?.delta || {};
495
430
 
496
- // 1) reasoning
431
+ // NON mescolare reasoning in output_text
497
432
  if (typeof delta.reasoning_content === 'string' && delta.reasoning_content.length) {
498
- reasoningText += delta.reasoning_content;
499
- send({
433
+ reasoning += delta.reasoning_content;
434
+ sse({
500
435
  type: 'response.reasoning_text.delta',
501
- sequence_number: seq++,
502
- item_id: messageItemId,
503
- output_index: 0,
504
- content_index: 0,
436
+ item_id: msgId,
437
+ output_index: OUTPUT_INDEX,
438
+ content_index: CONTENT_INDEX,
505
439
  delta: delta.reasoning_content,
506
440
  });
507
- log('debug', `Reasoning delta: ${delta.reasoning_content.substring(0, 30)}...`);
508
441
  }
509
442
 
510
- // 2) normal output
511
443
  if (typeof delta.content === 'string' && delta.content.length) {
512
- outputText += delta.content;
513
- send({
444
+ out += delta.content;
445
+ sse({
514
446
  type: 'response.output_text.delta',
515
- sequence_number: seq++,
516
- item_id: messageItemId,
517
- output_index: 0,
518
- content_index: reasoningText ? 1 : 0,
447
+ item_id: msgId,
448
+ output_index: OUTPUT_INDEX,
449
+ content_index: CONTENT_INDEX,
519
450
  delta: delta.content,
520
451
  });
521
- log('debug', `Output delta: ${delta.content.substring(0, 30)}...`);
522
- }
523
-
524
- // 3) tool calls (OpenAI-style in chat.completions delta.tool_calls)
525
- if (Array.isArray(delta.tool_calls)) {
526
- for (const tc of delta.tool_calls) {
527
- // tc: {id, type:"function", function:{name, arguments}}
528
- const callId = tc.id || `call_${tc.index ?? 0}`;
529
- const name = tc.function?.name || 'unknown';
530
- const argsDelta = tc.function?.arguments || '';
531
-
532
- let st = toolCalls.get(callId);
533
- if (!st) {
534
- st = {
535
- itemId: `fc_${crypto.randomUUID().replace(/-/g, '')}`,
536
- outputIndex: nextOutputIndex++,
537
- name,
538
- args: '',
539
- };
540
- toolCalls.set(callId, st);
541
-
542
- send({
543
- type: 'response.output_item.added',
544
- sequence_number: seq++,
545
- output_index: st.outputIndex,
546
- item: {
547
- type: 'function_call',
548
- id: st.itemId,
549
- call_id: callId,
550
- name: st.name,
551
- arguments: '',
552
- },
553
- });
554
- log('debug', `Tool call added: ${name} (${callId})`);
555
- }
556
-
557
- if (argsDelta) {
558
- st.args += argsDelta;
559
- send({
560
- type: 'response.function_call_arguments.delta',
561
- sequence_number: seq++,
562
- item_id: st.itemId,
563
- output_index: st.outputIndex,
564
- delta: argsDelta,
565
- });
566
- }
567
- }
568
- }
569
-
570
- // 4) finish
571
- if (choice?.finish_reason) {
572
- log('info', `Stream finish_reason: ${choice.finish_reason}`);
573
- await finalizeAndClose();
574
- return;
575
452
  }
576
453
  }
577
454
  }
578
- } catch (e) {
579
- log('error', 'Stream processing error:', e);
580
455
  }
581
456
 
582
- // fallback (stream finished without finish_reason)
583
- log('warn', 'Stream ended without finish_reason, finalizing anyway');
584
- await finalizeAndClose();
457
+ // done events
458
+ if (reasoning.length) {
459
+ sse({
460
+ type: 'response.reasoning_text.done',
461
+ item_id: msgId,
462
+ output_index: OUTPUT_INDEX,
463
+ content_index: CONTENT_INDEX,
464
+ text: reasoning,
465
+ });
466
+ }
467
+
468
+ sse({
469
+ type: 'response.output_text.done',
470
+ item_id: msgId,
471
+ output_index: OUTPUT_INDEX,
472
+ content_index: CONTENT_INDEX,
473
+ text: out,
474
+ });
475
+
476
+ sse({
477
+ type: 'response.content_part.done',
478
+ item_id: msgId,
479
+ output_index: OUTPUT_INDEX,
480
+ content_index: CONTENT_INDEX,
481
+ part: { type: 'output_text', text: out, annotations: [] },
482
+ });
483
+
484
+ const msgItemDone = {
485
+ id: msgId,
486
+ type: 'message',
487
+ status: 'completed',
488
+ role: 'assistant',
489
+ content: [{ type: 'output_text', text: out, annotations: [] }],
490
+ };
491
+
492
+ sse({
493
+ type: 'response.output_item.done',
494
+ output_index: OUTPUT_INDEX,
495
+ item: msgItemDone,
496
+ });
497
+
498
+ const completed = buildResponseObject({
499
+ id: responseId,
500
+ model: responsesRequest?.model || DEFAULT_MODEL,
501
+ status: 'completed',
502
+ created_at: createdAt,
503
+ completed_at: nowSec(),
504
+ input: responsesRequest?.input || [],
505
+ output: [msgItemDone],
506
+ tools: responsesRequest?.tools || [],
507
+ });
508
+
509
+ sse({ type: 'response.completed', response: completed });
510
+ res.end();
511
+
512
+ log('info', `Stream completed - ${out.length} output, ${reasoning.length} reasoning`);
585
513
  }
586
514
 
587
515
  /**
@@ -663,17 +591,21 @@ async function handlePostRequest(req, res) {
663
591
 
664
592
  // Handle streaming response
665
593
  if (upstreamBody.stream) {
666
- const responseId = `resp_${crypto.randomUUID().replace(/-/g, '')}`;
667
- const messageItemId = `msg_${crypto.randomUUID().replace(/-/g, '')}`;
594
+ const ids = {
595
+ createdAt: nowSec(),
596
+ responseId: `resp_${randomUUID().replace(/-/g, '')}`,
597
+ msgId: `msg_${randomUUID().replace(/-/g, '')}`,
598
+ };
668
599
  log('info', 'Starting streaming response');
669
600
  res.writeHead(200, {
670
601
  'Content-Type': 'text/event-stream; charset=utf-8',
671
602
  'Cache-Control': 'no-cache',
672
- 'Connection': 'keep-alive'
603
+ 'Connection': 'keep-alive',
604
+ 'X-Accel-Buffering': 'no',
673
605
  });
674
606
 
675
607
  try {
676
- await streamChatToResponses(upstreamResponse.body, res, responseId, messageItemId);
608
+ await streamChatToResponses(upstreamResponse.body, res, request, ids);
677
609
  log('info', 'Streaming completed');
678
610
  } catch (e) {
679
611
  log('error', 'Streaming error:', e);
@@ -681,12 +613,14 @@ async function handlePostRequest(req, res) {
681
613
  } else {
682
614
  // Non-streaming response
683
615
  const chatResponse = await upstreamResponse.json();
684
- const msg = chatResponse?.choices?.[0]?.message ?? {};
685
- const outputText = msg.content ?? '';
686
- const reasoningText = msg.reasoning_content ?? '';
687
- const model = chatResponse?.model ?? upstreamBody.model ?? 'GLM';
688
616
 
689
- const response = translateChatToResponses(outputText, reasoningText, null, null, model);
617
+ const ids = {
618
+ createdAt: nowSec(),
619
+ responseId: `resp_${randomUUID().replace(/-/g, '')}`,
620
+ msgId: `msg_${randomUUID().replace(/-/g, '')}`,
621
+ };
622
+
623
+ const response = translateChatToResponses(chatResponse, request, ids);
690
624
 
691
625
  res.writeHead(200, { 'Content-Type': 'application/json' });
692
626
  res.end(JSON.stringify(response));