@semalt-ai/code 1.8.0 → 1.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/api.js CHANGED
@@ -4,6 +4,9 @@ const http = require('http');
4
4
  const https = require('https');
5
5
  const { URL } = require('url');
6
6
 
7
+ const { buildToolsSchema, isUIActive } = require('./tools');
8
+ const { TOOL_SPECS } = require('./tool_specs');
9
+
7
10
  function createApiClient({ getConfig, saveConfig, ui }) {
8
11
  const {
9
12
  BOLD,
@@ -55,9 +58,10 @@ function createApiClient({ getConfig, saveConfig, ui }) {
55
58
  return Math.floor((text || '').length / 4);
56
59
  }
57
60
 
58
- // Discovered context limit for this process lifetime.
59
- // Set on the first context-overflow 400; used to proactively trim all subsequent calls.
60
- let _sessionInputLimit = null;
61
+ // Discovered context limit per model for this process lifetime.
62
+ // Keyed by resolved model name; set on the first context-overflow 400
63
+ // for that model and used to proactively trim subsequent calls.
64
+ const _sessionInputLimits = new Map();
61
65
 
62
66
  function httpRequest(urlStr, options, body) {
63
67
  return new Promise((resolve, reject) => {
@@ -71,7 +75,10 @@ function createApiClient({ getConfig, saveConfig, ui }) {
71
75
  headers: options.headers || {},
72
76
  };
73
77
 
74
- const req = lib.request(reqOpts, (res) => resolve(res));
78
+ const req = lib.request(reqOpts, (res) => {
79
+ if (options.onResponse) options.onResponse(res);
80
+ resolve(res);
81
+ });
75
82
  req.on('error', reject);
76
83
 
77
84
  if (options.timeout) {
@@ -80,6 +87,18 @@ function createApiClient({ getConfig, saveConfig, ui }) {
80
87
  });
81
88
  }
82
89
 
90
+ if (options.signal) {
91
+ if (options.signal.aborted) {
92
+ req.destroy(new Error('Aborted'));
93
+ return reject(new Error('Aborted'));
94
+ }
95
+ options.signal.addEventListener('abort', () => {
96
+ req.destroy(new Error('Aborted'));
97
+ });
98
+ }
99
+
100
+ if (options.onRequest) options.onRequest(req);
101
+
83
102
  if (body) req.write(body);
84
103
  req.end();
85
104
  });
@@ -224,17 +243,32 @@ function createApiClient({ getConfig, saveConfig, ui }) {
224
243
  });
225
244
  }
226
245
 
227
- async function chatStream(messages, { model, temperature, maxTokens, linePrefix = '', showThink = false, onToken = null, silent = false } = {}) {
246
+ async function chatStream(messages, { model, temperature, maxTokens, linePrefix = '', showThink = false, onToken = null, silent = false, signal = null, onTrim = null, nativeTools = true } = {}) {
247
+ // nativeTools is plumbed through for downstream use (tools param + tool_calls parsing); no behavior change yet.
228
248
  const config = getConfig();
249
+ const resolvedModel = model || config.default_model;
250
+
251
+ if (signal && signal.aborted) throw new Error('Aborted');
252
+
253
+ let trimNotified = false;
254
+ function notifyTrim(info) {
255
+ if (trimNotified) return;
256
+ trimNotified = true;
257
+ if (typeof onTrim === 'function') {
258
+ try { onTrim(info); } catch {}
259
+ }
260
+ }
229
261
 
230
262
  // Fit messages into tokenBudget tokens.
231
- // Uses chars/3conservative for token-dense content (code, JSON, HTML).
263
+ // Uses chars/4aligned with estimateTokens; a deliberate under-estimate
264
+ // for token-dense content (code, JSON, HTML) but consistent across the
265
+ // codebase.
232
266
  //
233
267
  // Always keeps: system prompt + first non-system message (original task).
234
268
  // Drops intermediate messages oldest-first, then truncates the last tail
235
269
  // message (typically a large tool result) if still over budget.
236
270
  function trimToTokenBudget(msgs, tokenBudget) {
237
- const CHARS_PER_TOKEN = 3;
271
+ const CHARS_PER_TOKEN = 4;
238
272
  const system = msgs.filter((m) => m.role === 'system');
239
273
  const nonSystem = msgs.filter((m) => m.role !== 'system');
240
274
  if (nonSystem.length === 0) return [...system];
@@ -271,27 +305,62 @@ function createApiClient({ getConfig, saveConfig, ui }) {
271
305
  return tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
272
306
  }
273
307
 
274
- // Proactive trim: apply the session input limit discovered from a prior 400.
308
+ // Proactive trim: prefer a limit learned from a prior 400 overflow; otherwise
309
+ // fall back to config.context_length (with a ~10% safety margin) as a hint.
310
+ // The fallback is not written to _sessionInputLimits so a real overflow
311
+ // always overrides the config hint.
275
312
  let trimmedMessages = messages;
276
- if (_sessionInputLimit !== null) {
277
- if (Math.floor(JSON.stringify(messages).length / 3) > _sessionInputLimit) {
278
- trimmedMessages = trimToTokenBudget(messages, _sessionInputLimit);
313
+ let sessionLimit = _sessionInputLimits.get(resolvedModel);
314
+ if (sessionLimit == null &&
315
+ Number.isInteger(config.context_length) && config.context_length > 0) {
316
+ sessionLimit = Math.floor(config.context_length * 0.9);
317
+ }
318
+ if (sessionLimit != null) {
319
+ if (Math.floor(JSON.stringify(messages).length / 4) > sessionLimit) {
320
+ trimmedMessages = trimToTokenBudget(messages, sessionLimit);
321
+ const dropped = messages.length - trimmedMessages.length;
322
+ const keptTokens = Math.floor(JSON.stringify(trimmedMessages).length / 4);
323
+ notifyTrim({ reason: 'proactive', dropped, keptTokens, limit: sessionLimit });
279
324
  }
280
325
  }
281
326
 
327
+ // MiniMax supports `reasoning_split: true` which moves thinking content
328
+ // into a separate reasoning_details field on the response (and
329
+ // delta.reasoning_content during streaming) instead of embedding
330
+ // <think>...</think> inside message.content. Only send this flag to
331
+ // MiniMax — other providers may reject unknown fields.
332
+ const isMiniMax =
333
+ /api\.minimax\.io/i.test(config.api_base || '') ||
334
+ /^minimax[-\/]/i.test(resolvedModel || '');
335
+
282
336
  const payload = {
283
- model: model || config.default_model,
337
+ model: resolvedModel,
284
338
  messages: trimmedMessages,
285
339
  temperature: temperature !== undefined ? temperature : config.temperature,
286
340
  stream: true,
341
+ stream_options: { include_usage: true },
287
342
  };
288
343
 
344
+ if (isMiniMax) payload.reasoning_split = true;
289
345
  if (maxTokens !== undefined) payload.max_tokens = maxTokens;
290
346
 
347
+ // Native function-calling: advertise the tool schema and let the model
348
+ // emit structured tool_calls. Wrappers are XML envelopes, not callable
349
+ // tools — filter them out per the TOOL_SPECS contract.
350
+ if (nativeTools) {
351
+ const callable = Object.fromEntries(
352
+ Object.entries(TOOL_SPECS).filter(([, spec]) => !spec.wrapper)
353
+ );
354
+ payload.tools = buildToolsSchema(callable);
355
+ payload.tool_choice = 'auto';
356
+ }
357
+
358
+ const endpoint = apiUrl('/v1/chat/completions');
359
+
291
360
  async function doRequest(msgs) {
292
361
  const reqPayload = { ...payload, messages: msgs };
293
362
  const reqBody = JSON.stringify(reqPayload);
294
- const res = await httpRequest(apiUrl('/v1/chat/completions'), {
363
+ const res = await httpRequest(endpoint, {
295
364
  method: 'POST',
296
365
  timeout: config.request_timeout_ms,
297
366
  headers: {
@@ -299,6 +368,7 @@ function createApiClient({ getConfig, saveConfig, ui }) {
299
368
  'Authorization': `Bearer ${config.api_key}`,
300
369
  'Content-Length': Buffer.byteLength(reqBody),
301
370
  },
371
+ signal,
302
372
  }, reqBody);
303
373
 
304
374
  if (res.statusCode !== 200) {
@@ -319,6 +389,9 @@ function createApiClient({ getConfig, saveConfig, ui }) {
319
389
  err.statusCode = res.statusCode;
320
390
  err.parsedErr = parsedErr;
321
391
  err.detail = detail;
392
+ err.rawBody = errBody;
393
+ err.responseHeaders = res.headers;
394
+ err.endpoint = endpoint;
322
395
  throw err;
323
396
  }
324
397
  return res;
@@ -327,7 +400,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
327
400
  // On payload-too-large errors, trim and retry.
328
401
  // 400 with context-overflow detail → parse exact context window, budget = window/2
329
402
  // 413 Request Entity Too Large (Nginx/proxy) → no size hint, halve current estimate
330
- // In both cases _sessionInputLimit is set so all subsequent calls are proactively trimmed.
403
+ // In both cases the per-model session input limit is set so all subsequent
404
+ // calls for this model are proactively trimmed.
331
405
  let res;
332
406
  try {
333
407
  res = await doRequest(trimmedMessages);
@@ -342,15 +416,41 @@ function createApiClient({ getConfig, saveConfig, ui }) {
342
416
  const limitMatch = err.detail.match(/context length is only (\d+)/i) ||
343
417
  err.detail.match(/maximum.*?(\d+)\s*token/i);
344
418
  const contextWindow = limitMatch ? parseInt(limitMatch[1], 10) : null;
345
- budget = contextWindow
346
- ? Math.floor(contextWindow / 2)
347
- : Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 3) * 0.5);
419
+ if (contextWindow) {
420
+ budget = Math.floor(contextWindow * 0.9);
421
+ // Persist the learned context window so future turns/runs trim
422
+ // proactively without needing a second 400. Must not block the
423
+ // retry if the write fails.
424
+ try {
425
+ const currentConfig = getConfig();
426
+ const next = { ...currentConfig, context_length: contextWindow };
427
+ if (Array.isArray(currentConfig.models)) {
428
+ next.models = currentConfig.models.map((m) =>
429
+ m && m.api_base === currentConfig.api_base && m.model === resolvedModel
430
+ ? { ...m, context_length: contextWindow }
431
+ : m
432
+ );
433
+ }
434
+ saveConfig(next);
435
+ } catch {}
436
+ } else {
437
+ budget = Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 4) * 0.5);
438
+ }
348
439
  } else {
349
440
  // 413: no token info available — halve the estimated size of the current payload.
350
- budget = Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 3) * 0.5);
441
+ budget = Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 4) * 0.5);
351
442
  }
352
- _sessionInputLimit = budget;
443
+ _sessionInputLimits.set(resolvedModel, budget);
444
+ const before = trimmedMessages;
353
445
  trimmedMessages = trimToTokenBudget(trimmedMessages, budget);
446
+ const dropped = before.length - trimmedMessages.length;
447
+ const keptTokens = Math.floor(JSON.stringify(trimmedMessages).length / 4);
448
+ notifyTrim({
449
+ reason: is413 ? 'overflow-413' : 'overflow-400',
450
+ dropped,
451
+ keptTokens,
452
+ limit: budget,
453
+ });
354
454
  res = await doRequest(trimmedMessages);
355
455
  } else {
356
456
  throw err;
@@ -361,10 +461,15 @@ function createApiClient({ getConfig, saveConfig, ui }) {
361
461
  const startTime = Date.now();
362
462
  let fullText = '';
363
463
  let reasoningText = '';
464
+ let reasoningDetailsText = '';
364
465
  let tokenCount = 0;
365
466
  let inReasoning = false;
366
467
  let streamUsage = null;
468
+ let streamFinishReason = null;
367
469
  let resolved = false;
470
+ // delta.tool_calls accumulator (OpenAI function-calling streaming format).
471
+ // Keyed by `index` per the OpenAI spec.
472
+ const toolCallAcc = [];
368
473
  const renderer = new StreamRenderer({ firstLinePrefix: linePrefix, showThink });
369
474
  if (!silent) {
370
475
  process.stdout.write('\n');
@@ -373,9 +478,44 @@ function createApiClient({ getConfig, saveConfig, ui }) {
373
478
  let firstContentToken = true;
374
479
  let lineBuffer = '';
375
480
 
481
+ function escapeXml(s) {
482
+ return String(s)
483
+ .replace(/&/g, '&amp;')
484
+ .replace(/</g, '&lt;')
485
+ .replace(/>/g, '&gt;');
486
+ }
487
+
488
+ // Convert any accumulated tool_calls into a MiniMax XML block and
489
+ // append it to fullText so extractToolCalls() picks them up. Runs once
490
+ // at stream end.
491
+ function appendToolCallsXml() {
492
+ const valid = toolCallAcc.filter((t) => t && t.name);
493
+ if (valid.length === 0) return;
494
+ const invokes = valid.map((tc) => {
495
+ let args = {};
496
+ try { args = tc.arguments ? JSON.parse(tc.arguments) : {}; } catch {}
497
+ const params = Object.entries(args).map(([k, v]) => {
498
+ const val = typeof v === 'string' ? v : JSON.stringify(v);
499
+ return `<parameter name="${escapeXml(k)}">${val}</parameter>`;
500
+ }).join('\n');
501
+ return `<invoke name="${escapeXml(tc.name)}">\n${params}\n</invoke>`;
502
+ }).join('\n');
503
+ fullText += `\n<minimax:tool_call>\n${invokes}\n</minimax:tool_call>`;
504
+ }
505
+
376
506
  function finalize() {
377
507
  if (resolved) return;
378
508
  resolved = true;
509
+ // Native mode: surface tool calls as structured data; skip XML serialization.
510
+ // Legacy mode: serialize into <minimax:tool_call> XML so extractToolCalls picks them up.
511
+ const validToolCalls = toolCallAcc
512
+ .filter((t) => t && t.name)
513
+ .map((t, i) => ({
514
+ id: t.id || `call_${i}`,
515
+ type: 'function',
516
+ function: { name: t.name, arguments: t.arguments || '{}' },
517
+ }));
518
+ if (!nativeTools) appendToolCallsXml();
379
519
  if (!silent) renderer.flush();
380
520
  const elapsed = (Date.now() - startTime) / 1000;
381
521
  const tps = tokenCount / (elapsed || 1);
@@ -385,7 +525,47 @@ function createApiClient({ getConfig, saveConfig, ui }) {
385
525
  StatusBar.current.liveUpdate({ tokens: `${tokenCount} tok`, latency });
386
526
  StatusBar.current.render();
387
527
  }
388
- resolve({ content: fullText, usage: streamUsage });
528
+ // Fallback for endpoints that don't honor stream_options.include_usage:
529
+ // estimate prompt/completion tokens locally so the status bar still updates.
530
+ let usage = streamUsage;
531
+ if (!usage) {
532
+ usage = {
533
+ prompt_tokens: estimateTokens(JSON.stringify(trimmedMessages)),
534
+ completion_tokens: estimateTokens(fullText) + estimateTokens(reasoningText),
535
+ };
536
+ }
537
+ const elapsedMs = Date.now() - startTime;
538
+ resolve({
539
+ content: fullText,
540
+ toolCalls: nativeTools ? validToolCalls : [],
541
+ usage,
542
+ usage_from_provider: !!streamUsage,
543
+ tool_calls_count: validToolCalls.length,
544
+ finish_reason: streamFinishReason,
545
+ finishReason: streamFinishReason,
546
+ elapsed_ms: elapsedMs,
547
+ reasoning: reasoningText,
548
+ reasoning_details: reasoningDetailsText,
549
+ endpoint,
550
+ request: {
551
+ model: payload.model,
552
+ temperature: payload.temperature,
553
+ max_tokens: payload.max_tokens,
554
+ stream: payload.stream,
555
+ stop: payload.stop,
556
+ native_tools: nativeTools,
557
+ },
558
+ });
559
+ }
560
+
561
+ if (signal) {
562
+ signal.addEventListener('abort', () => {
563
+ try { res?.destroy(); } catch {}
564
+ if (!resolved) {
565
+ resolved = true;
566
+ reject(new Error('Aborted'));
567
+ }
568
+ });
389
569
  }
390
570
 
391
571
  res.setEncoding('utf8');
@@ -409,24 +589,61 @@ function createApiClient({ getConfig, saveConfig, ui }) {
409
589
  if (obj.usage && (obj.usage.prompt_tokens !== undefined || obj.usage.completion_tokens !== undefined)) {
410
590
  streamUsage = obj.usage;
411
591
  }
412
- const delta = ((obj.choices || [])[0] || {}).delta || {};
592
+ const choice = (obj.choices || [])[0] || {};
593
+ if (choice.finish_reason) streamFinishReason = choice.finish_reason;
594
+ const delta = choice.delta || {};
595
+
596
+ // MiniMax `reasoning_split: true` surfaces a structured
597
+ // reasoning_details field. It may arrive as a streaming delta
598
+ // (delta.reasoning_details) or as an authoritative final value
599
+ // on choice.message. Preserve it for debug output; not routed to
600
+ // the UI and not fed back into messages[] on subsequent turns.
601
+ const rdDelta = delta.reasoning_details;
602
+ if (rdDelta !== undefined && rdDelta !== null) {
603
+ reasoningDetailsText += typeof rdDelta === 'string' ? rdDelta : JSON.stringify(rdDelta);
604
+ }
605
+ const rdFinal = choice.message && choice.message.reasoning_details;
606
+ if (rdFinal !== undefined && rdFinal !== null) {
607
+ reasoningDetailsText = typeof rdFinal === 'string' ? rdFinal : JSON.stringify(rdFinal);
608
+ }
413
609
 
414
610
  const reasoning = delta.reasoning_content || '';
415
611
  if (reasoning) {
612
+ const uiActive = isUIActive();
416
613
  if (!inReasoning) {
417
614
  inReasoning = true;
418
- if (showThink) {
615
+ if (showThink && !uiActive) {
419
616
  process.stdout.write(`\n ${FG_DARK}${DIM}⟨thinking⟩${RST}`);
420
617
  renderer._linesWritten++;
421
618
  }
422
619
  }
423
620
  reasoningText += reasoning;
424
621
  tokenCount++;
425
- if (showThink) {
622
+ if (showThink && !uiActive) {
426
623
  process.stdout.write(`${FG_DARK}${DIM}${reasoning}${RST}`);
427
624
  }
428
625
  }
429
626
 
627
+ const toolCallsDelta = delta.tool_calls;
628
+ if (Array.isArray(toolCallsDelta)) {
629
+ for (const tc of toolCallsDelta) {
630
+ const idx = typeof tc.index === 'number' ? tc.index : toolCallAcc.length;
631
+ const isNew = !toolCallAcc[idx];
632
+ if (isNew) toolCallAcc[idx] = { id: '', name: '', arguments: '' };
633
+ if (tc.id) toolCallAcc[idx].id = tc.id;
634
+ if (tc.function?.name) toolCallAcc[idx].name += tc.function.name;
635
+ if (tc.function?.arguments) toolCallAcc[idx].arguments += tc.function.arguments;
636
+ // When the model streams purely via delta.tool_calls (no
637
+ // delta.content), firstContentToken never flips, so the status
638
+ // bar stays on "Thinking…" for the entire tool-call stream.
639
+ // Surface each new tool slot the moment its name is known so
640
+ // the user sees "Using tool: <name>" instead of a frozen UI.
641
+ if (isNew && StatusBar.current && toolCallAcc[idx].name) {
642
+ StatusBar.current.update('tool', `Using tool: ${toolCallAcc[idx].name}`);
643
+ }
644
+ }
645
+ }
646
+
430
647
  const content = delta.content || '';
431
648
  if (content) {
432
649
  if (inReasoning) {