@semalt-ai/code 1.8.1 → 1.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/api.js CHANGED
@@ -4,6 +4,11 @@ const http = require('http');
4
4
  const https = require('https');
5
5
  const { URL } = require('url');
6
6
 
7
+ const { buildToolsSchema, isUIActive } = require('./tools');
8
+ const { TOOL_SPECS } = require('./tool_specs');
9
+ const writer = require('./ui/writer');
10
+ const messages = require('./ui/messages');
11
+
7
12
  function createApiClient({ getConfig, saveConfig, ui }) {
8
13
  const {
9
14
  BOLD,
@@ -14,7 +19,6 @@ function createApiClient({ getConfig, saveConfig, ui }) {
14
19
  FG_RED,
15
20
  FG_TEAL,
16
21
  RST,
17
- StatusBar,
18
22
  StreamRenderer,
19
23
  } = ui;
20
24
 
@@ -55,9 +59,10 @@ function createApiClient({ getConfig, saveConfig, ui }) {
55
59
  return Math.floor((text || '').length / 4);
56
60
  }
57
61
 
58
- // Discovered context limit for this process lifetime.
59
- // Set on the first context-overflow 400; used to proactively trim all subsequent calls.
60
- let _sessionInputLimit = null;
62
+ // Discovered context limit per model for this process lifetime.
63
+ // Keyed by resolved model name; set on the first context-overflow 400
64
+ // for that model and used to proactively trim subsequent calls.
65
+ const _sessionInputLimits = new Map();
61
66
 
62
67
  function httpRequest(urlStr, options, body) {
63
68
  return new Promise((resolve, reject) => {
@@ -71,7 +76,10 @@ function createApiClient({ getConfig, saveConfig, ui }) {
71
76
  headers: options.headers || {},
72
77
  };
73
78
 
74
- const req = lib.request(reqOpts, (res) => resolve(res));
79
+ const req = lib.request(reqOpts, (res) => {
80
+ if (options.onResponse) options.onResponse(res);
81
+ resolve(res);
82
+ });
75
83
  req.on('error', reject);
76
84
 
77
85
  if (options.timeout) {
@@ -80,6 +88,18 @@ function createApiClient({ getConfig, saveConfig, ui }) {
80
88
  });
81
89
  }
82
90
 
91
+ if (options.signal) {
92
+ if (options.signal.aborted) {
93
+ req.destroy(new Error('Aborted'));
94
+ return reject(new Error('Aborted'));
95
+ }
96
+ options.signal.addEventListener('abort', () => {
97
+ req.destroy(new Error('Aborted'));
98
+ });
99
+ }
100
+
101
+ if (options.onRequest) options.onRequest(req);
102
+
83
103
  if (body) req.write(body);
84
104
  req.end();
85
105
  });
@@ -224,17 +244,32 @@ function createApiClient({ getConfig, saveConfig, ui }) {
224
244
  });
225
245
  }
226
246
 
227
- async function chatStream(messages, { model, temperature, maxTokens, linePrefix = '', showThink = false, onToken = null, silent = false } = {}) {
247
+ async function chatStream(messages, { model, temperature, maxTokens, linePrefix = '', showThink = false, onToken = null, silent = false, signal = null, onTrim = null, nativeTools = true } = {}) {
248
+ // nativeTools is plumbed through for downstream use (tools param + tool_calls parsing); no behavior change yet.
228
249
  const config = getConfig();
250
+ const resolvedModel = model || config.default_model;
251
+
252
+ if (signal && signal.aborted) throw new Error('Aborted');
253
+
254
+ let trimNotified = false;
255
+ function notifyTrim(info) {
256
+ if (trimNotified) return;
257
+ trimNotified = true;
258
+ if (typeof onTrim === 'function') {
259
+ try { onTrim(info); } catch {}
260
+ }
261
+ }
229
262
 
230
263
  // Fit messages into tokenBudget tokens.
231
- // Uses chars/3conservative for token-dense content (code, JSON, HTML).
264
+ // Uses chars/4aligned with estimateTokens; a deliberate under-estimate
265
+ // for token-dense content (code, JSON, HTML) but consistent across the
266
+ // codebase.
232
267
  //
233
268
  // Always keeps: system prompt + first non-system message (original task).
234
269
  // Drops intermediate messages oldest-first, then truncates the last tail
235
270
  // message (typically a large tool result) if still over budget.
236
271
  function trimToTokenBudget(msgs, tokenBudget) {
237
- const CHARS_PER_TOKEN = 3;
272
+ const CHARS_PER_TOKEN = 4;
238
273
  const system = msgs.filter((m) => m.role === 'system');
239
274
  const nonSystem = msgs.filter((m) => m.role !== 'system');
240
275
  if (nonSystem.length === 0) return [...system];
@@ -271,28 +306,62 @@ function createApiClient({ getConfig, saveConfig, ui }) {
271
306
  return tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
272
307
  }
273
308
 
274
- // Proactive trim: apply the session input limit discovered from a prior 400.
309
+ // Proactive trim: prefer a limit learned from a prior 400 overflow; otherwise
310
+ // fall back to config.context_length (with a ~10% safety margin) as a hint.
311
+ // The fallback is not written to _sessionInputLimits so a real overflow
312
+ // always overrides the config hint.
275
313
  let trimmedMessages = messages;
276
- if (_sessionInputLimit !== null) {
277
- if (Math.floor(JSON.stringify(messages).length / 3) > _sessionInputLimit) {
278
- trimmedMessages = trimToTokenBudget(messages, _sessionInputLimit);
314
+ let sessionLimit = _sessionInputLimits.get(resolvedModel);
315
+ if (sessionLimit == null &&
316
+ Number.isInteger(config.context_length) && config.context_length > 0) {
317
+ sessionLimit = Math.floor(config.context_length * 0.9);
318
+ }
319
+ if (sessionLimit != null) {
320
+ if (Math.floor(JSON.stringify(messages).length / 4) > sessionLimit) {
321
+ trimmedMessages = trimToTokenBudget(messages, sessionLimit);
322
+ const dropped = messages.length - trimmedMessages.length;
323
+ const keptTokens = Math.floor(JSON.stringify(trimmedMessages).length / 4);
324
+ notifyTrim({ reason: 'proactive', dropped, keptTokens, limit: sessionLimit });
279
325
  }
280
326
  }
281
327
 
328
+ // MiniMax supports `reasoning_split: true` which moves thinking content
329
+ // into a separate reasoning_details field on the response (and
330
+ // delta.reasoning_content during streaming) instead of embedding
331
+ // <think>...</think> inside message.content. Only send this flag to
332
+ // MiniMax — other providers may reject unknown fields.
333
+ const isMiniMax =
334
+ /api\.minimax\.io/i.test(config.api_base || '') ||
335
+ /^minimax[-\/]/i.test(resolvedModel || '');
336
+
282
337
  const payload = {
283
- model: model || config.default_model,
338
+ model: resolvedModel,
284
339
  messages: trimmedMessages,
285
340
  temperature: temperature !== undefined ? temperature : config.temperature,
286
341
  stream: true,
287
342
  stream_options: { include_usage: true },
288
343
  };
289
344
 
345
+ if (isMiniMax) payload.reasoning_split = true;
290
346
  if (maxTokens !== undefined) payload.max_tokens = maxTokens;
291
347
 
348
+ // Native function-calling: advertise the tool schema and let the model
349
+ // emit structured tool_calls. Wrappers are XML envelopes, not callable
350
+ // tools — filter them out per the TOOL_SPECS contract.
351
+ if (nativeTools) {
352
+ const callable = Object.fromEntries(
353
+ Object.entries(TOOL_SPECS).filter(([, spec]) => !spec.wrapper)
354
+ );
355
+ payload.tools = buildToolsSchema(callable);
356
+ payload.tool_choice = 'auto';
357
+ }
358
+
359
+ const endpoint = apiUrl('/v1/chat/completions');
360
+
292
361
  async function doRequest(msgs) {
293
362
  const reqPayload = { ...payload, messages: msgs };
294
363
  const reqBody = JSON.stringify(reqPayload);
295
- const res = await httpRequest(apiUrl('/v1/chat/completions'), {
364
+ const res = await httpRequest(endpoint, {
296
365
  method: 'POST',
297
366
  timeout: config.request_timeout_ms,
298
367
  headers: {
@@ -300,6 +369,7 @@ function createApiClient({ getConfig, saveConfig, ui }) {
300
369
  'Authorization': `Bearer ${config.api_key}`,
301
370
  'Content-Length': Buffer.byteLength(reqBody),
302
371
  },
372
+ signal,
303
373
  }, reqBody);
304
374
 
305
375
  if (res.statusCode !== 200) {
@@ -322,6 +392,7 @@ function createApiClient({ getConfig, saveConfig, ui }) {
322
392
  err.detail = detail;
323
393
  err.rawBody = errBody;
324
394
  err.responseHeaders = res.headers;
395
+ err.endpoint = endpoint;
325
396
  throw err;
326
397
  }
327
398
  return res;
@@ -330,7 +401,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
330
401
  // On payload-too-large errors, trim and retry.
331
402
  // 400 with context-overflow detail → parse exact context window, budget = window/2
332
403
  // 413 Request Entity Too Large (Nginx/proxy) → no size hint, halve current estimate
333
- // In both cases _sessionInputLimit is set so all subsequent calls are proactively trimmed.
404
+ // In both cases the per-model session input limit is set so all subsequent
405
+ // calls for this model are proactively trimmed.
334
406
  let res;
335
407
  try {
336
408
  res = await doRequest(trimmedMessages);
@@ -345,15 +417,41 @@ function createApiClient({ getConfig, saveConfig, ui }) {
345
417
  const limitMatch = err.detail.match(/context length is only (\d+)/i) ||
346
418
  err.detail.match(/maximum.*?(\d+)\s*token/i);
347
419
  const contextWindow = limitMatch ? parseInt(limitMatch[1], 10) : null;
348
- budget = contextWindow
349
- ? Math.floor(contextWindow / 2)
350
- : Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 3) * 0.5);
420
+ if (contextWindow) {
421
+ budget = Math.floor(contextWindow * 0.9);
422
+ // Persist the learned context window so future turns/runs trim
423
+ // proactively without needing a second 400. Must not block the
424
+ // retry if the write fails.
425
+ try {
426
+ const currentConfig = getConfig();
427
+ const next = { ...currentConfig, context_length: contextWindow };
428
+ if (Array.isArray(currentConfig.models)) {
429
+ next.models = currentConfig.models.map((m) =>
430
+ m && m.api_base === currentConfig.api_base && m.model === resolvedModel
431
+ ? { ...m, context_length: contextWindow }
432
+ : m
433
+ );
434
+ }
435
+ saveConfig(next);
436
+ } catch {}
437
+ } else {
438
+ budget = Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 4) * 0.5);
439
+ }
351
440
  } else {
352
441
  // 413: no token info available — halve the estimated size of the current payload.
353
- budget = Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 3) * 0.5);
442
+ budget = Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 4) * 0.5);
354
443
  }
355
- _sessionInputLimit = budget;
444
+ _sessionInputLimits.set(resolvedModel, budget);
445
+ const before = trimmedMessages;
356
446
  trimmedMessages = trimToTokenBudget(trimmedMessages, budget);
447
+ const dropped = before.length - trimmedMessages.length;
448
+ const keptTokens = Math.floor(JSON.stringify(trimmedMessages).length / 4);
449
+ notifyTrim({
450
+ reason: is413 ? 'overflow-413' : 'overflow-400',
451
+ dropped,
452
+ keptTokens,
453
+ limit: budget,
454
+ });
357
455
  res = await doRequest(trimmedMessages);
358
456
  } else {
359
457
  throw err;
@@ -364,19 +462,21 @@ function createApiClient({ getConfig, saveConfig, ui }) {
364
462
  const startTime = Date.now();
365
463
  let fullText = '';
366
464
  let reasoningText = '';
465
+ let reasoningDetailsText = '';
367
466
  let tokenCount = 0;
368
467
  let inReasoning = false;
369
468
  let streamUsage = null;
469
+ let streamFinishReason = null;
370
470
  let resolved = false;
371
471
  // delta.tool_calls accumulator (OpenAI function-calling streaming format).
372
472
  // Keyed by `index` per the OpenAI spec.
373
473
  const toolCallAcc = [];
374
474
  const renderer = new StreamRenderer({ firstLinePrefix: linePrefix, showThink });
375
475
  if (!silent) {
476
+ // audit: allowed — non-TUI streaming setup, must interleave with StreamRenderer sync writes.
376
477
  process.stdout.write('\n');
377
478
  renderer._linesWritten = 1;
378
479
  }
379
- let firstContentToken = true;
380
480
  let lineBuffer = '';
381
481
 
382
482
  function escapeXml(s) {
@@ -407,16 +507,17 @@ function createApiClient({ getConfig, saveConfig, ui }) {
407
507
  function finalize() {
408
508
  if (resolved) return;
409
509
  resolved = true;
410
- appendToolCallsXml();
510
+ // Native mode: surface tool calls as structured data; skip XML serialization.
511
+ // Legacy mode: serialize into <minimax:tool_call> XML so extractToolCalls picks them up.
512
+ const validToolCalls = toolCallAcc
513
+ .filter((t) => t && t.name)
514
+ .map((t, i) => ({
515
+ id: t.id || `call_${i}`,
516
+ type: 'function',
517
+ function: { name: t.name, arguments: t.arguments || '{}' },
518
+ }));
519
+ if (!nativeTools) appendToolCallsXml();
411
520
  if (!silent) renderer.flush();
412
- const elapsed = (Date.now() - startTime) / 1000;
413
- const tps = tokenCount / (elapsed || 1);
414
- if (StatusBar.current) {
415
- let latency = `${Math.round(tps)} tok/s · ${elapsed.toFixed(1)}s`;
416
- if (reasoningText) latency += ` · ${estimateTokens(reasoningText)} think`;
417
- StatusBar.current.liveUpdate({ tokens: `${tokenCount} tok`, latency });
418
- StatusBar.current.render();
419
- }
420
521
  // Fallback for endpoints that don't honor stream_options.include_usage:
421
522
  // estimate prompt/completion tokens locally so the status bar still updates.
422
523
  let usage = streamUsage;
@@ -426,7 +527,38 @@ function createApiClient({ getConfig, saveConfig, ui }) {
426
527
  completion_tokens: estimateTokens(fullText) + estimateTokens(reasoningText),
427
528
  };
428
529
  }
429
- resolve({ content: fullText, usage });
530
+ const elapsedMs = Date.now() - startTime;
531
+ resolve({
532
+ content: fullText,
533
+ toolCalls: nativeTools ? validToolCalls : [],
534
+ usage,
535
+ usage_from_provider: !!streamUsage,
536
+ tool_calls_count: validToolCalls.length,
537
+ finish_reason: streamFinishReason,
538
+ finishReason: streamFinishReason,
539
+ elapsed_ms: elapsedMs,
540
+ reasoning: reasoningText,
541
+ reasoning_details: reasoningDetailsText,
542
+ endpoint,
543
+ request: {
544
+ model: payload.model,
545
+ temperature: payload.temperature,
546
+ max_tokens: payload.max_tokens,
547
+ stream: payload.stream,
548
+ stop: payload.stop,
549
+ native_tools: nativeTools,
550
+ },
551
+ });
552
+ }
553
+
554
+ if (signal) {
555
+ signal.addEventListener('abort', () => {
556
+ try { res?.destroy(); } catch {}
557
+ if (!resolved) {
558
+ resolved = true;
559
+ reject(new Error('Aborted'));
560
+ }
561
+ });
430
562
  }
431
563
 
432
564
  res.setEncoding('utf8');
@@ -450,20 +582,39 @@ function createApiClient({ getConfig, saveConfig, ui }) {
450
582
  if (obj.usage && (obj.usage.prompt_tokens !== undefined || obj.usage.completion_tokens !== undefined)) {
451
583
  streamUsage = obj.usage;
452
584
  }
453
- const delta = ((obj.choices || [])[0] || {}).delta || {};
585
+ const choice = (obj.choices || [])[0] || {};
586
+ if (choice.finish_reason) streamFinishReason = choice.finish_reason;
587
+ const delta = choice.delta || {};
588
+
589
+ // MiniMax `reasoning_split: true` surfaces a structured
590
+ // reasoning_details field. It may arrive as a streaming delta
591
+ // (delta.reasoning_details) or as an authoritative final value
592
+ // on choice.message. Preserve it for debug output; not routed to
593
+ // the UI and not fed back into messages[] on subsequent turns.
594
+ const rdDelta = delta.reasoning_details;
595
+ if (rdDelta !== undefined && rdDelta !== null) {
596
+ reasoningDetailsText += typeof rdDelta === 'string' ? rdDelta : JSON.stringify(rdDelta);
597
+ }
598
+ const rdFinal = choice.message && choice.message.reasoning_details;
599
+ if (rdFinal !== undefined && rdFinal !== null) {
600
+ reasoningDetailsText = typeof rdFinal === 'string' ? rdFinal : JSON.stringify(rdFinal);
601
+ }
454
602
 
455
603
  const reasoning = delta.reasoning_content || '';
456
604
  if (reasoning) {
605
+ const uiActive = isUIActive();
457
606
  if (!inReasoning) {
458
607
  inReasoning = true;
459
- if (showThink) {
608
+ if (showThink && !uiActive) {
609
+ // audit: allowed — non-TUI thinking output, interleaves with StreamRenderer sync writes.
460
610
  process.stdout.write(`\n ${FG_DARK}${DIM}⟨thinking⟩${RST}`);
461
611
  renderer._linesWritten++;
462
612
  }
463
613
  }
464
614
  reasoningText += reasoning;
465
615
  tokenCount++;
466
- if (showThink) {
616
+ if (showThink && !uiActive) {
617
+ // audit: allowed — non-TUI thinking output, interleaves with StreamRenderer sync writes.
467
618
  process.stdout.write(`${FG_DARK}${DIM}${reasoning}${RST}`);
468
619
  }
469
620
  }
@@ -473,17 +624,10 @@ function createApiClient({ getConfig, saveConfig, ui }) {
473
624
  for (const tc of toolCallsDelta) {
474
625
  const idx = typeof tc.index === 'number' ? tc.index : toolCallAcc.length;
475
626
  const isNew = !toolCallAcc[idx];
476
- if (isNew) toolCallAcc[idx] = { name: '', arguments: '' };
627
+ if (isNew) toolCallAcc[idx] = { id: '', name: '', arguments: '' };
628
+ if (tc.id) toolCallAcc[idx].id = tc.id;
477
629
  if (tc.function?.name) toolCallAcc[idx].name += tc.function.name;
478
630
  if (tc.function?.arguments) toolCallAcc[idx].arguments += tc.function.arguments;
479
- // When the model streams purely via delta.tool_calls (no
480
- // delta.content), firstContentToken never flips, so the status
481
- // bar stays on "Thinking…" for the entire tool-call stream.
482
- // Surface each new tool slot the moment its name is known so
483
- // the user sees "Using tool: <name>" instead of a frozen UI.
484
- if (isNew && StatusBar.current && toolCallAcc[idx].name) {
485
- StatusBar.current.update('tool', `Using tool: ${toolCallAcc[idx].name}`);
486
- }
487
631
  }
488
632
  }
489
633
 
@@ -492,28 +636,18 @@ function createApiClient({ getConfig, saveConfig, ui }) {
492
636
  if (inReasoning) {
493
637
  inReasoning = false;
494
638
  if (showThink && !silent) {
639
+ // audit: allowed — non-TUI thinking output, interleaves with StreamRenderer sync writes.
495
640
  process.stdout.write(`${FG_DARK}⟨/thinking⟩${RST}\n`);
496
641
  renderer._linesWritten++;
497
642
  }
498
643
  }
499
644
  if (onToken) {
500
- if (firstContentToken) {
501
- firstContentToken = false;
502
- if (StatusBar.current) StatusBar.current.update({ status: 'streaming' });
503
- }
504
645
  onToken(content);
505
646
  } else {
506
647
  renderer.feed(content);
507
648
  }
508
649
  fullText += content;
509
650
  tokenCount++;
510
- if (tokenCount % 20 === 0 && StatusBar.current) {
511
- const elapsedSec = (Date.now() - startTime) / 1000 || 0.001;
512
- StatusBar.current.liveUpdate({
513
- tokens: `${tokenCount} tok`,
514
- latency: `${Math.round(tokenCount / elapsedSec)} tok/s`,
515
- });
516
- }
517
651
  }
518
652
  } catch {}
519
653
  }
@@ -555,7 +689,7 @@ function createApiClient({ getConfig, saveConfig, ui }) {
555
689
  },
556
690
  }, body);
557
691
  } catch (error) {
558
- console.log(` ${FG_RED}✗ ${error.message}${RST}`);
692
+ messages.netError(error.message);
559
693
  return '';
560
694
  }
561
695
 
@@ -567,7 +701,7 @@ function createApiClient({ getConfig, saveConfig, ui }) {
567
701
  });
568
702
  res.on('end', () => {
569
703
  if (res.statusCode !== 200) {
570
- console.log(` ${FG_RED}✗ Error: HTTP ${res.statusCode} — ${data}${RST}`);
704
+ messages.netError(`HTTP ${res.statusCode} — ${data}`);
571
705
  resolve('');
572
706
  return;
573
707
  }
@@ -575,15 +709,15 @@ function createApiClient({ getConfig, saveConfig, ui }) {
575
709
  try {
576
710
  const parsed = JSON.parse(data);
577
711
  const content = parsed.choices[0].message.content;
578
- console.log(content);
712
+ writer.scrollback(content);
579
713
  resolve(content);
580
714
  } catch (error) {
581
- console.log(` ${FG_RED}✗ Parse error: ${error.message}${RST}`);
715
+ messages.netError(`Parse error: ${error.message}`);
582
716
  resolve('');
583
717
  }
584
718
  });
585
719
  res.on('error', (error) => {
586
- console.log(` ${FG_RED}✗ ${error.message}${RST}`);
720
+ messages.netError(error.message);
587
721
  resolve('');
588
722
  });
589
723
  });