@semalt-ai/code 1.7.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/api.js CHANGED
@@ -14,9 +14,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
14
14
  FG_RED,
15
15
  FG_TEAL,
16
16
  RST,
17
+ StatusBar,
17
18
  StreamRenderer,
18
- getCols,
19
- printStatusBar,
20
19
  } = ui;
21
20
 
22
21
  function apiUrl(urlPath) {
@@ -34,10 +33,6 @@ function createApiClient({ getConfig, saveConfig, ui }) {
34
33
  return `${base}${normalizedPath}`;
35
34
  }
36
35
 
37
- function describeModelProfile(profile) {
38
- return `${profile.model} @ ${profile.api_base}`;
39
- }
40
-
41
36
  function requireAuthToken() {
42
37
  const config = getConfig();
43
38
  if (!config.auth_token) {
@@ -56,46 +51,14 @@ function createApiClient({ getConfig, saveConfig, ui }) {
56
51
  saveConfig(config);
57
52
  }
58
53
 
59
- function chooseSavedModelProfile(rl, currentModel, cwd, onDone) {
60
- const config = getConfig();
61
- if (!config.models.length) {
62
- console.log(` ${FG_RED}✗${RST} ${FG_GRAY}No saved model profiles. Use semalt-code models add first.${RST}`);
63
- onDone(currentModel);
64
- return;
65
- }
66
-
67
- console.log();
68
- console.log(` ${FG_TEAL}${BOLD}◆ Saved Models${RST}`);
69
- console.log(` ${FG_DARK}${'─'.repeat(40)}${RST}`);
70
- config.models.forEach((profile, index) => {
71
- const active = profile.api_base === config.api_base &&
72
- profile.api_key === config.api_key &&
73
- profile.model === currentModel;
74
- const marker = active ? `${FG_GREEN}●${RST}` : `${FG_DARK}○${RST}`;
75
- console.log(` ${marker} ${ui.FG_CYAN}${index + 1}.${RST} ${describeModelProfile(profile)}`);
76
- });
77
- console.log();
78
-
79
- rl.question(` ${FG_TEAL}${BOLD}Select model>${RST} `, (answer) => {
80
- const selected = Number((answer || '').trim());
81
- if (!Number.isInteger(selected) || selected < 1 || selected > config.models.length) {
82
- console.log(` ${FG_RED}✗${RST} ${FG_GRAY}Invalid selection${RST}`);
83
- onDone(currentModel);
84
- return;
85
- }
86
-
87
- const profile = config.models[selected - 1];
88
- setActiveModelProfile(profile);
89
- console.log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Model profile → ${describeModelProfile(profile)}${RST}`);
90
- printStatusBar(profile.model, cwd);
91
- onDone(profile.model);
92
- });
93
- }
94
-
95
54
  function estimateTokens(text) {
96
55
  return Math.floor((text || '').length / 4);
97
56
  }
98
57
 
58
+ // Discovered context limit for this process lifetime.
59
+ // Set on the first context-overflow 400; used to proactively trim all subsequent calls.
60
+ let _sessionInputLimit = null;
61
+
99
62
  function httpRequest(urlStr, options, body) {
100
63
  return new Promise((resolve, reject) => {
101
64
  const url = new URL(urlStr);
@@ -223,50 +186,249 @@ function createApiClient({ getConfig, saveConfig, ui }) {
223
186
  });
224
187
  }
225
188
 
226
- async function chatStream(messages, { model, temperature, maxTokens } = {}) {
189
+ function dashboardCreateChat(title, modelDbId) {
190
+ const authToken = requireAuthToken();
191
+ return requestJson(dashboardUrl('/api/chats'), {
192
+ method: 'POST',
193
+ timeout: 15000,
194
+ headers: { 'Authorization': `Bearer ${authToken}` },
195
+ body: { title, model_id: modelDbId },
196
+ });
197
+ }
198
+
199
+ function dashboardListChats() {
200
+ const authToken = requireAuthToken();
201
+ return requestJson(dashboardUrl('/api/chats'), {
202
+ method: 'GET',
203
+ timeout: 15000,
204
+ headers: { 'Authorization': `Bearer ${authToken}` },
205
+ });
206
+ }
207
+
208
+ function dashboardGetChat(id) {
209
+ const authToken = requireAuthToken();
210
+ return requestJson(dashboardUrl(`/api/chats/${encodeURIComponent(String(id))}`), {
211
+ method: 'GET',
212
+ timeout: 15000,
213
+ headers: { 'Authorization': `Bearer ${authToken}` },
214
+ });
215
+ }
216
+
217
+ function dashboardSaveMessages(chatId, messages) {
218
+ const authToken = requireAuthToken();
219
+ return requestJson(dashboardUrl(`/api/chats/${encodeURIComponent(String(chatId))}/messages/batch`), {
220
+ method: 'POST',
221
+ timeout: 15000,
222
+ headers: { 'Authorization': `Bearer ${authToken}` },
223
+ body: { messages },
224
+ });
225
+ }
226
+
227
+ async function chatStream(messages, { model, temperature, maxTokens, linePrefix = '', showThink = false, onToken = null, silent = false } = {}) {
227
228
  const config = getConfig();
229
+
230
+ // Fit messages into tokenBudget tokens.
231
+ // Uses chars/3 — conservative for token-dense content (code, JSON, HTML).
232
+ //
233
+ // Always keeps: system prompt + first non-system message (original task).
234
+ // Drops intermediate messages oldest-first, then truncates the last tail
235
+ // message (typically a large tool result) if still over budget.
236
+ function trimToTokenBudget(msgs, tokenBudget) {
237
+ const CHARS_PER_TOKEN = 3;
238
+ const system = msgs.filter((m) => m.role === 'system');
239
+ const nonSystem = msgs.filter((m) => m.role !== 'system');
240
+ if (nonSystem.length === 0) return [...system];
241
+
242
+ const pinned = nonSystem[0]; // original task — never dropped
243
+ let tail = nonSystem.slice(1);
244
+
245
+ const estimate = () => {
246
+ const all = tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
247
+ return Math.floor(JSON.stringify(all).length / CHARS_PER_TOKEN);
248
+ };
249
+
250
+ while (tail.length > 1 && estimate() > tokenBudget) {
251
+ tail = tail.slice(1);
252
+ }
253
+
254
+ if (tail.length === 1 && estimate() > tokenBudget) {
255
+ const msg = tail[0];
256
+ const otherChars = JSON.stringify([...system, pinned]).length;
257
+ const available = tokenBudget * CHARS_PER_TOKEN - otherChars - 200;
258
+ if (available > 0 && typeof msg.content === 'string' && msg.content.length > available) {
259
+ tail = [{ ...msg, content: '[…content truncated to fit model limit…]\n' + msg.content.slice(-available) }];
260
+ }
261
+ }
262
+
263
+ if (tail.length === 0 && estimate() > tokenBudget) {
264
+ const systemChars = JSON.stringify(system).length;
265
+ const available = tokenBudget * CHARS_PER_TOKEN - systemChars - 200;
266
+ if (available > 0 && typeof pinned.content === 'string' && pinned.content.length > available) {
267
+ return [...system, { ...pinned, content: '[…content truncated to fit model limit…]\n' + pinned.content.slice(-available) }];
268
+ }
269
+ }
270
+
271
+ return tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
272
+ }
273
+
274
+ // Proactive trim: apply the session input limit discovered from a prior 400.
275
+ let trimmedMessages = messages;
276
+ if (_sessionInputLimit !== null) {
277
+ if (Math.floor(JSON.stringify(messages).length / 3) > _sessionInputLimit) {
278
+ trimmedMessages = trimToTokenBudget(messages, _sessionInputLimit);
279
+ }
280
+ }
281
+
228
282
  const payload = {
229
283
  model: model || config.default_model,
230
- messages,
284
+ messages: trimmedMessages,
231
285
  temperature: temperature !== undefined ? temperature : config.temperature,
232
286
  stream: true,
287
+ stream_options: { include_usage: true },
233
288
  };
234
289
 
235
290
  if (maxTokens !== undefined) payload.max_tokens = maxTokens;
236
291
 
237
- const body = JSON.stringify(payload);
238
- let res;
239
-
240
- try {
241
- res = await httpRequest(apiUrl('/v1/chat/completions'), {
292
+ async function doRequest(msgs) {
293
+ const reqPayload = { ...payload, messages: msgs };
294
+ const reqBody = JSON.stringify(reqPayload);
295
+ const res = await httpRequest(apiUrl('/v1/chat/completions'), {
242
296
  method: 'POST',
243
297
  timeout: config.request_timeout_ms,
244
298
  headers: {
245
299
  'Content-Type': 'application/json',
246
300
  'Authorization': `Bearer ${config.api_key}`,
247
- 'Content-Length': Buffer.byteLength(body),
301
+ 'Content-Length': Buffer.byteLength(reqBody),
248
302
  },
249
- }, body);
250
- } catch (error) {
251
- process.stdout.write(`\n ${FG_RED}✗ ${error.message}${RST}\n`);
252
- return '';
303
+ }, reqBody);
304
+
305
+ if (res.statusCode !== 200) {
306
+ const errBody = await new Promise((resolve) => {
307
+ let d = '';
308
+ res.setEncoding('utf8');
309
+ res.on('data', (c) => { d += c; });
310
+ res.on('end', () => resolve(d));
311
+ res.on('error', () => resolve(''));
312
+ });
313
+ let detail = '';
314
+ let parsedErr = null;
315
+ try {
316
+ parsedErr = JSON.parse(errBody);
317
+ detail = (parsedErr && (parsedErr.error?.message || parsedErr.error || parsedErr.message)) || '';
318
+ } catch { detail = errBody.slice(0, 200); }
319
+ const err = new Error(`HTTP ${res.statusCode}${detail ? `: ${detail}` : ''}`);
320
+ err.statusCode = res.statusCode;
321
+ err.parsedErr = parsedErr;
322
+ err.detail = detail;
323
+ err.rawBody = errBody;
324
+ err.responseHeaders = res.headers;
325
+ throw err;
326
+ }
327
+ return res;
253
328
  }
254
329
 
255
- if (res.statusCode !== 200) {
256
- process.stdout.write(`\n ${FG_RED}✗ Error: HTTP ${res.statusCode}${RST}\n`);
257
- res.resume();
258
- return '';
330
+ // On payload-too-large errors, trim and retry.
331
+ // 400 with context-overflow detail → parse exact context window, budget = window/2
332
+ // 413 Request Entity Too Large (Nginx/proxy) → no size hint, halve current estimate
333
+ // In both cases _sessionInputLimit is set so all subsequent calls are proactively trimmed.
334
+ let res;
335
+ try {
336
+ res = await doRequest(trimmedMessages);
337
+ } catch (err) {
338
+ const is400Overflow = err.statusCode === 400 && err.detail &&
339
+ /context.length|input.token|context_length|maximum.*token|token.*limit/i.test(err.detail);
340
+ const is413 = err.statusCode === 413;
341
+
342
+ if (is400Overflow || is413) {
343
+ let budget;
344
+ if (is400Overflow) {
345
+ const limitMatch = err.detail.match(/context length is only (\d+)/i) ||
346
+ err.detail.match(/maximum.*?(\d+)\s*token/i);
347
+ const contextWindow = limitMatch ? parseInt(limitMatch[1], 10) : null;
348
+ budget = contextWindow
349
+ ? Math.floor(contextWindow / 2)
350
+ : Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 3) * 0.5);
351
+ } else {
352
+ // 413: no token info available — halve the estimated size of the current payload.
353
+ budget = Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 3) * 0.5);
354
+ }
355
+ _sessionInputLimit = budget;
356
+ trimmedMessages = trimToTokenBudget(trimmedMessages, budget);
357
+ res = await doRequest(trimmedMessages);
358
+ } else {
359
+ throw err;
360
+ }
259
361
  }
260
362
 
261
- return new Promise((resolve) => {
363
+ return new Promise((resolve, reject) => {
262
364
  const startTime = Date.now();
263
365
  let fullText = '';
264
366
  let reasoningText = '';
265
367
  let tokenCount = 0;
266
368
  let inReasoning = false;
267
- const renderer = new StreamRenderer();
369
+ let streamUsage = null;
370
+ let resolved = false;
371
+ // delta.tool_calls accumulator (OpenAI function-calling streaming format).
372
+ // Keyed by `index` per the OpenAI spec.
373
+ const toolCallAcc = [];
374
+ const renderer = new StreamRenderer({ firstLinePrefix: linePrefix, showThink });
375
+ if (!silent) {
376
+ process.stdout.write('\n');
377
+ renderer._linesWritten = 1;
378
+ }
379
+ let firstContentToken = true;
268
380
  let lineBuffer = '';
269
381
 
382
+ function escapeXml(s) {
383
+ return String(s)
384
+ .replace(/&/g, '&amp;')
385
+ .replace(/</g, '&lt;')
386
+ .replace(/>/g, '&gt;');
387
+ }
388
+
389
+ // Convert any accumulated tool_calls into a MiniMax XML block and
390
+ // append it to fullText so extractToolCalls() picks them up. Runs once
391
+ // at stream end.
392
+ function appendToolCallsXml() {
393
+ const valid = toolCallAcc.filter((t) => t && t.name);
394
+ if (valid.length === 0) return;
395
+ const invokes = valid.map((tc) => {
396
+ let args = {};
397
+ try { args = tc.arguments ? JSON.parse(tc.arguments) : {}; } catch {}
398
+ const params = Object.entries(args).map(([k, v]) => {
399
+ const val = typeof v === 'string' ? v : JSON.stringify(v);
400
+ return `<parameter name="${escapeXml(k)}">${val}</parameter>`;
401
+ }).join('\n');
402
+ return `<invoke name="${escapeXml(tc.name)}">\n${params}\n</invoke>`;
403
+ }).join('\n');
404
+ fullText += `\n<minimax:tool_call>\n${invokes}\n</minimax:tool_call>`;
405
+ }
406
+
407
+ function finalize() {
408
+ if (resolved) return;
409
+ resolved = true;
410
+ appendToolCallsXml();
411
+ if (!silent) renderer.flush();
412
+ const elapsed = (Date.now() - startTime) / 1000;
413
+ const tps = tokenCount / (elapsed || 1);
414
+ if (StatusBar.current) {
415
+ let latency = `${Math.round(tps)} tok/s · ${elapsed.toFixed(1)}s`;
416
+ if (reasoningText) latency += ` · ${estimateTokens(reasoningText)} think`;
417
+ StatusBar.current.liveUpdate({ tokens: `${tokenCount} tok`, latency });
418
+ StatusBar.current.render();
419
+ }
420
+ // Fallback for endpoints that don't honor stream_options.include_usage:
421
+ // estimate prompt/completion tokens locally so the status bar still updates.
422
+ let usage = streamUsage;
423
+ if (!usage) {
424
+ usage = {
425
+ prompt_tokens: estimateTokens(JSON.stringify(trimmedMessages)),
426
+ completion_tokens: estimateTokens(fullText) + estimateTokens(reasoningText),
427
+ };
428
+ }
429
+ resolve({ content: fullText, usage });
430
+ }
431
+
270
432
  res.setEncoding('utf8');
271
433
 
272
434
  res.on('data', (chunk) => {
@@ -277,53 +439,95 @@ function createApiClient({ getConfig, saveConfig, ui }) {
277
439
  for (const line of lines) {
278
440
  if (!line.startsWith('data: ')) continue;
279
441
  const data = line.slice(6).trim();
280
- if (data === '[DONE]') continue;
442
+ if (data === '[DONE]') {
443
+ finalize();
444
+ res.destroy();
445
+ return;
446
+ }
281
447
 
282
448
  try {
283
449
  const obj = JSON.parse(data);
450
+ if (obj.usage && (obj.usage.prompt_tokens !== undefined || obj.usage.completion_tokens !== undefined)) {
451
+ streamUsage = obj.usage;
452
+ }
284
453
  const delta = ((obj.choices || [])[0] || {}).delta || {};
285
454
 
286
455
  const reasoning = delta.reasoning_content || '';
287
456
  if (reasoning) {
288
457
  if (!inReasoning) {
289
458
  inReasoning = true;
290
- process.stdout.write(`\n ${FG_DARK}${DIM}⟨thinking⟩${RST}`);
459
+ if (showThink) {
460
+ process.stdout.write(`\n ${FG_DARK}${DIM}⟨thinking⟩${RST}`);
461
+ renderer._linesWritten++;
462
+ }
291
463
  }
292
464
  reasoningText += reasoning;
293
465
  tokenCount++;
294
- if (tokenCount % 20 === 0) process.stdout.write(`${FG_DARK}.${RST}`);
466
+ if (showThink) {
467
+ process.stdout.write(`${FG_DARK}${DIM}${reasoning}${RST}`);
468
+ }
469
+ }
470
+
471
+ const toolCallsDelta = delta.tool_calls;
472
+ if (Array.isArray(toolCallsDelta)) {
473
+ for (const tc of toolCallsDelta) {
474
+ const idx = typeof tc.index === 'number' ? tc.index : toolCallAcc.length;
475
+ const isNew = !toolCallAcc[idx];
476
+ if (isNew) toolCallAcc[idx] = { name: '', arguments: '' };
477
+ if (tc.function?.name) toolCallAcc[idx].name += tc.function.name;
478
+ if (tc.function?.arguments) toolCallAcc[idx].arguments += tc.function.arguments;
479
+ // When the model streams purely via delta.tool_calls (no
480
+ // delta.content), firstContentToken never flips, so the status
481
+ // bar stays on "Thinking…" for the entire tool-call stream.
482
+ // Surface each new tool slot the moment its name is known so
483
+ // the user sees "Using tool: <name>" instead of a frozen UI.
484
+ if (isNew && StatusBar.current && toolCallAcc[idx].name) {
485
+ StatusBar.current.update('tool', `Using tool: ${toolCallAcc[idx].name}`);
486
+ }
487
+ }
295
488
  }
296
489
 
297
490
  const content = delta.content || '';
298
491
  if (content) {
299
492
  if (inReasoning) {
300
493
  inReasoning = false;
301
- process.stdout.write(`${FG_DARK}⟨/thinking⟩${RST}\n`);
494
+ if (showThink && !silent) {
495
+ process.stdout.write(`${FG_DARK}⟨/thinking⟩${RST}\n`);
496
+ renderer._linesWritten++;
497
+ }
498
+ }
499
+ if (onToken) {
500
+ if (firstContentToken) {
501
+ firstContentToken = false;
502
+ if (StatusBar.current) StatusBar.current.update({ status: 'streaming' });
503
+ }
504
+ onToken(content);
505
+ } else {
506
+ renderer.feed(content);
302
507
  }
303
- renderer.feed(content);
304
508
  fullText += content;
305
509
  tokenCount++;
510
+ if (tokenCount % 20 === 0 && StatusBar.current) {
511
+ const elapsedSec = (Date.now() - startTime) / 1000 || 0.001;
512
+ StatusBar.current.liveUpdate({
513
+ tokens: `${tokenCount} tok`,
514
+ latency: `${Math.round(tokenCount / elapsedSec)} tok/s`,
515
+ });
516
+ }
306
517
  }
307
518
  } catch {}
308
519
  }
309
520
  });
310
521
 
311
522
  res.on('end', () => {
312
- renderer.flush();
313
- const elapsed = (Date.now() - startTime) / 1000;
314
- const estTokens = estimateTokens(fullText + reasoningText);
315
- const tps = tokenCount / (elapsed || 1);
316
- const cols = getCols();
317
- process.stdout.write(`\n ${FG_DARK}${'─'.repeat(Math.min(cols, 60) - 4)}${RST}\n`);
318
- let costLine = `${FG_DARK}~${estTokens} tokens · ${elapsed.toFixed(1)}s · ${Math.round(tps)} tok/s${RST}`;
319
- if (reasoningText) costLine += ` ${FG_DARK}· ${estimateTokens(reasoningText)} thinking${RST}`;
320
- process.stdout.write(` ${costLine}\n`);
321
- resolve(fullText);
523
+ finalize();
322
524
  });
323
525
 
324
526
  res.on('error', (error) => {
325
- process.stdout.write(`\n ${FG_RED}✗ ${error.message}${RST}\n`);
326
- resolve('');
527
+ if (!resolved) {
528
+ resolved = true;
529
+ reject(error);
530
+ }
327
531
  });
328
532
  });
329
533
  }
@@ -388,12 +592,14 @@ function createApiClient({ getConfig, saveConfig, ui }) {
388
592
  return {
389
593
  chatStream,
390
594
  chatSync,
391
- chooseSavedModelProfile,
595
+ dashboardCreateChat,
596
+ dashboardGetChat,
392
597
  dashboardGetModelForCli,
598
+ dashboardListChats,
393
599
  dashboardListModels,
394
600
  dashboardLogout,
601
+ dashboardSaveMessages,
395
602
  dashboardWhoAmI,
396
- describeModelProfile,
397
603
  estimateTokens,
398
604
  getCliLoginStatus,
399
605
  requestCliLogin,
package/lib/args.js CHANGED
@@ -34,6 +34,37 @@ function parseArgs(argv) {
34
34
  case '--default-model':
35
35
  opts.defaultModel = argv[++i];
36
36
  break;
37
+ case '-r':
38
+ case '--resume':
39
+ opts.resume = argv[++i];
40
+ break;
41
+ case '--allow-fs':
42
+ (opts.allowedTiers = opts.allowedTiers || []).push('fs');
43
+ break;
44
+ case '--allow-exec':
45
+ (opts.allowedTiers = opts.allowedTiers || []).push('exec');
46
+ break;
47
+ case '--allow-net':
48
+ (opts.allowedTiers = opts.allowedTiers || []).push('net');
49
+ break;
50
+ case '--allow-all':
51
+ opts.allowedTiers = ['fs', 'exec', 'net', 'sys'];
52
+ break;
53
+ case '--readonly':
54
+ opts.readonly = true;
55
+ break;
56
+ case '--new':
57
+ opts.new = true;
58
+ break;
59
+ case '--show-think':
60
+ opts.showThink = true;
61
+ break;
62
+ case '--debug':
63
+ opts.debug = true;
64
+ break;
65
+ case '--system-prompt':
66
+ opts.systemPromptFile = argv[++i];
67
+ break;
37
68
  default:
38
69
  positional.push(argv[i]);
39
70
  }
package/lib/audit.js ADDED
@@ -0,0 +1,31 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const os = require('os');
5
+ const path = require('path');
6
+
7
+ const AUDIT_LOG = path.join(os.homedir(), '.semalt-ai', 'audit.log');
8
+
9
+ function logToolCall(tag, input, approved, resultStatus) {
10
+ try {
11
+ let safeInput = input;
12
+ if (tag === 'write_file' && input !== null && typeof input === 'object' && 'content' in input) {
13
+ const n = typeof input.content === 'string' ? input.content.length : 0;
14
+ safeInput = { ...input, content: `<${n} bytes>` };
15
+ }
16
+ let inputStr = typeof safeInput === 'string' ? safeInput : JSON.stringify(safeInput);
17
+ if (inputStr.length > 200) inputStr = inputStr.slice(0, 197) + '...';
18
+ const entry = JSON.stringify({
19
+ ts: new Date().toISOString(),
20
+ tag,
21
+ input: inputStr,
22
+ approved: Boolean(approved),
23
+ result: resultStatus,
24
+ });
25
+ fs.appendFileSync(AUDIT_LOG, entry + '\n');
26
+ } catch {
27
+ // never throw
28
+ }
29
+ }
30
+
31
+ module.exports = { AUDIT_LOG, logToolCall };