@semalt-ai/code 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/api.js CHANGED
@@ -14,9 +14,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
14
14
  FG_RED,
15
15
  FG_TEAL,
16
16
  RST,
17
+ StatusBar,
17
18
  StreamRenderer,
18
- getCols,
19
- printStatusBar,
20
19
  } = ui;
21
20
 
22
21
  function apiUrl(urlPath) {
@@ -34,10 +33,6 @@ function createApiClient({ getConfig, saveConfig, ui }) {
34
33
  return `${base}${normalizedPath}`;
35
34
  }
36
35
 
37
- function describeModelProfile(profile) {
38
- return `${profile.model} @ ${profile.api_base}`;
39
- }
40
-
41
36
  function requireAuthToken() {
42
37
  const config = getConfig();
43
38
  if (!config.auth_token) {
@@ -56,46 +51,14 @@ function createApiClient({ getConfig, saveConfig, ui }) {
56
51
  saveConfig(config);
57
52
  }
58
53
 
59
- function chooseSavedModelProfile(rl, currentModel, cwd, onDone) {
60
- const config = getConfig();
61
- if (!config.models.length) {
62
- console.log(` ${FG_RED}✗${RST} ${FG_GRAY}No saved model profiles. Use semalt-code models add first.${RST}`);
63
- onDone(currentModel);
64
- return;
65
- }
66
-
67
- console.log();
68
- console.log(` ${FG_TEAL}${BOLD}◆ Saved Models${RST}`);
69
- console.log(` ${FG_DARK}${'─'.repeat(40)}${RST}`);
70
- config.models.forEach((profile, index) => {
71
- const active = profile.api_base === config.api_base &&
72
- profile.api_key === config.api_key &&
73
- profile.model === currentModel;
74
- const marker = active ? `${FG_GREEN}●${RST}` : `${FG_DARK}○${RST}`;
75
- console.log(` ${marker} ${ui.FG_CYAN}${index + 1}.${RST} ${describeModelProfile(profile)}`);
76
- });
77
- console.log();
78
-
79
- rl.question(` ${FG_TEAL}${BOLD}Select model>${RST} `, (answer) => {
80
- const selected = Number((answer || '').trim());
81
- if (!Number.isInteger(selected) || selected < 1 || selected > config.models.length) {
82
- console.log(` ${FG_RED}✗${RST} ${FG_GRAY}Invalid selection${RST}`);
83
- onDone(currentModel);
84
- return;
85
- }
86
-
87
- const profile = config.models[selected - 1];
88
- setActiveModelProfile(profile);
89
- console.log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Model profile → ${describeModelProfile(profile)}${RST}`);
90
- printStatusBar(profile.model, cwd);
91
- onDone(profile.model);
92
- });
93
- }
94
-
95
54
  function estimateTokens(text) {
96
55
  return Math.floor((text || '').length / 4);
97
56
  }
98
57
 
58
+ // Discovered context limit for this process lifetime.
59
+ // Set on the first context-overflow 400; used to proactively trim all subsequent calls.
60
+ let _sessionInputLimit = null;
61
+
99
62
  function httpRequest(urlStr, options, body) {
100
63
  return new Promise((resolve, reject) => {
101
64
  const url = new URL(urlStr);
@@ -223,50 +186,208 @@ function createApiClient({ getConfig, saveConfig, ui }) {
223
186
  });
224
187
  }
225
188
 
226
- async function chatStream(messages, { model, temperature, maxTokens } = {}) {
189
+ function dashboardCreateChat(title, modelDbId) {
190
+ const authToken = requireAuthToken();
191
+ return requestJson(dashboardUrl('/api/chats'), {
192
+ method: 'POST',
193
+ timeout: 15000,
194
+ headers: { 'Authorization': `Bearer ${authToken}` },
195
+ body: { title, model_id: modelDbId },
196
+ });
197
+ }
198
+
199
+ function dashboardListChats() {
200
+ const authToken = requireAuthToken();
201
+ return requestJson(dashboardUrl('/api/chats'), {
202
+ method: 'GET',
203
+ timeout: 15000,
204
+ headers: { 'Authorization': `Bearer ${authToken}` },
205
+ });
206
+ }
207
+
208
+ function dashboardGetChat(id) {
209
+ const authToken = requireAuthToken();
210
+ return requestJson(dashboardUrl(`/api/chats/${encodeURIComponent(String(id))}`), {
211
+ method: 'GET',
212
+ timeout: 15000,
213
+ headers: { 'Authorization': `Bearer ${authToken}` },
214
+ });
215
+ }
216
+
217
+ function dashboardSaveMessages(chatId, messages) {
218
+ const authToken = requireAuthToken();
219
+ return requestJson(dashboardUrl(`/api/chats/${encodeURIComponent(String(chatId))}/messages/batch`), {
220
+ method: 'POST',
221
+ timeout: 15000,
222
+ headers: { 'Authorization': `Bearer ${authToken}` },
223
+ body: { messages },
224
+ });
225
+ }
226
+
227
+ async function chatStream(messages, { model, temperature, maxTokens, linePrefix = '', showThink = false, onToken = null, silent = false } = {}) {
227
228
  const config = getConfig();
229
+
230
+ // Fit messages into tokenBudget tokens.
231
+ // Uses chars/3 — conservative for token-dense content (code, JSON, HTML).
232
+ //
233
+ // Always keeps: system prompt + first non-system message (original task).
234
+ // Drops intermediate messages oldest-first, then truncates the last tail
235
+ // message (typically a large tool result) if still over budget.
236
+ function trimToTokenBudget(msgs, tokenBudget) {
237
+ const CHARS_PER_TOKEN = 3;
238
+ const system = msgs.filter((m) => m.role === 'system');
239
+ const nonSystem = msgs.filter((m) => m.role !== 'system');
240
+ if (nonSystem.length === 0) return [...system];
241
+
242
+ const pinned = nonSystem[0]; // original task — never dropped
243
+ let tail = nonSystem.slice(1);
244
+
245
+ const estimate = () => {
246
+ const all = tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
247
+ return Math.floor(JSON.stringify(all).length / CHARS_PER_TOKEN);
248
+ };
249
+
250
+ while (tail.length > 1 && estimate() > tokenBudget) {
251
+ tail = tail.slice(1);
252
+ }
253
+
254
+ if (tail.length === 1 && estimate() > tokenBudget) {
255
+ const msg = tail[0];
256
+ const otherChars = JSON.stringify([...system, pinned]).length;
257
+ const available = tokenBudget * CHARS_PER_TOKEN - otherChars - 200;
258
+ if (available > 0 && typeof msg.content === 'string' && msg.content.length > available) {
259
+ tail = [{ ...msg, content: '[…content truncated to fit model limit…]\n' + msg.content.slice(-available) }];
260
+ }
261
+ }
262
+
263
+ if (tail.length === 0 && estimate() > tokenBudget) {
264
+ const systemChars = JSON.stringify(system).length;
265
+ const available = tokenBudget * CHARS_PER_TOKEN - systemChars - 200;
266
+ if (available > 0 && typeof pinned.content === 'string' && pinned.content.length > available) {
267
+ return [...system, { ...pinned, content: '[…content truncated to fit model limit…]\n' + pinned.content.slice(-available) }];
268
+ }
269
+ }
270
+
271
+ return tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
272
+ }
273
+
274
+ // Proactive trim: apply the session input limit discovered from a prior 400.
275
+ let trimmedMessages = messages;
276
+ if (_sessionInputLimit !== null) {
277
+ if (Math.floor(JSON.stringify(messages).length / 3) > _sessionInputLimit) {
278
+ trimmedMessages = trimToTokenBudget(messages, _sessionInputLimit);
279
+ }
280
+ }
281
+
228
282
  const payload = {
229
283
  model: model || config.default_model,
230
- messages,
284
+ messages: trimmedMessages,
231
285
  temperature: temperature !== undefined ? temperature : config.temperature,
232
286
  stream: true,
233
287
  };
234
288
 
235
289
  if (maxTokens !== undefined) payload.max_tokens = maxTokens;
236
290
 
237
- const body = JSON.stringify(payload);
238
- let res;
239
-
240
- try {
241
- res = await httpRequest(apiUrl('/v1/chat/completions'), {
291
+ async function doRequest(msgs) {
292
+ const reqPayload = { ...payload, messages: msgs };
293
+ const reqBody = JSON.stringify(reqPayload);
294
+ const res = await httpRequest(apiUrl('/v1/chat/completions'), {
242
295
  method: 'POST',
243
296
  timeout: config.request_timeout_ms,
244
297
  headers: {
245
298
  'Content-Type': 'application/json',
246
299
  'Authorization': `Bearer ${config.api_key}`,
247
- 'Content-Length': Buffer.byteLength(body),
300
+ 'Content-Length': Buffer.byteLength(reqBody),
248
301
  },
249
- }, body);
250
- } catch (error) {
251
- process.stdout.write(`\n ${FG_RED}✗ ${error.message}${RST}\n`);
252
- return '';
302
+ }, reqBody);
303
+
304
+ if (res.statusCode !== 200) {
305
+ const errBody = await new Promise((resolve) => {
306
+ let d = '';
307
+ res.setEncoding('utf8');
308
+ res.on('data', (c) => { d += c; });
309
+ res.on('end', () => resolve(d));
310
+ res.on('error', () => resolve(''));
311
+ });
312
+ let detail = '';
313
+ let parsedErr = null;
314
+ try {
315
+ parsedErr = JSON.parse(errBody);
316
+ detail = (parsedErr && (parsedErr.error?.message || parsedErr.error || parsedErr.message)) || '';
317
+ } catch { detail = errBody.slice(0, 200); }
318
+ const err = new Error(`HTTP ${res.statusCode}${detail ? `: ${detail}` : ''}`);
319
+ err.statusCode = res.statusCode;
320
+ err.parsedErr = parsedErr;
321
+ err.detail = detail;
322
+ throw err;
323
+ }
324
+ return res;
253
325
  }
254
326
 
255
- if (res.statusCode !== 200) {
256
- process.stdout.write(`\n ${FG_RED}✗ Error: HTTP ${res.statusCode}${RST}\n`);
257
- res.resume();
258
- return '';
327
+ // On payload-too-large errors, trim and retry.
328
+ // 400 with context-overflow detail → parse exact context window, budget = window/2
329
+ // 413 Request Entity Too Large (Nginx/proxy) → no size hint, halve current estimate
330
+ // In both cases _sessionInputLimit is set so all subsequent calls are proactively trimmed.
331
+ let res;
332
+ try {
333
+ res = await doRequest(trimmedMessages);
334
+ } catch (err) {
335
+ const is400Overflow = err.statusCode === 400 && err.detail &&
336
+ /context.length|input.token|context_length|maximum.*token|token.*limit/i.test(err.detail);
337
+ const is413 = err.statusCode === 413;
338
+
339
+ if (is400Overflow || is413) {
340
+ let budget;
341
+ if (is400Overflow) {
342
+ const limitMatch = err.detail.match(/context length is only (\d+)/i) ||
343
+ err.detail.match(/maximum.*?(\d+)\s*token/i);
344
+ const contextWindow = limitMatch ? parseInt(limitMatch[1], 10) : null;
345
+ budget = contextWindow
346
+ ? Math.floor(contextWindow / 2)
347
+ : Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 3) * 0.5);
348
+ } else {
349
+ // 413: no token info available — halve the estimated size of the current payload.
350
+ budget = Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 3) * 0.5);
351
+ }
352
+ _sessionInputLimit = budget;
353
+ trimmedMessages = trimToTokenBudget(trimmedMessages, budget);
354
+ res = await doRequest(trimmedMessages);
355
+ } else {
356
+ throw err;
357
+ }
259
358
  }
260
359
 
261
- return new Promise((resolve) => {
360
+ return new Promise((resolve, reject) => {
262
361
  const startTime = Date.now();
263
362
  let fullText = '';
264
363
  let reasoningText = '';
265
364
  let tokenCount = 0;
266
365
  let inReasoning = false;
267
- const renderer = new StreamRenderer();
366
+ let streamUsage = null;
367
+ let resolved = false;
368
+ const renderer = new StreamRenderer({ firstLinePrefix: linePrefix, showThink });
369
+ if (!silent) {
370
+ process.stdout.write('\n');
371
+ renderer._linesWritten = 1;
372
+ }
373
+ let firstContentToken = true;
268
374
  let lineBuffer = '';
269
375
 
376
+ function finalize() {
377
+ if (resolved) return;
378
+ resolved = true;
379
+ if (!silent) renderer.flush();
380
+ const elapsed = (Date.now() - startTime) / 1000;
381
+ const tps = tokenCount / (elapsed || 1);
382
+ if (StatusBar.current) {
383
+ let latency = `${Math.round(tps)} tok/s · ${elapsed.toFixed(1)}s`;
384
+ if (reasoningText) latency += ` · ${estimateTokens(reasoningText)} think`;
385
+ StatusBar.current.liveUpdate({ tokens: `${tokenCount} tok`, latency });
386
+ StatusBar.current.render();
387
+ }
388
+ resolve({ content: fullText, usage: streamUsage });
389
+ }
390
+
270
391
  res.setEncoding('utf8');
271
392
 
272
393
  res.on('data', (chunk) => {
@@ -277,53 +398,76 @@ function createApiClient({ getConfig, saveConfig, ui }) {
277
398
  for (const line of lines) {
278
399
  if (!line.startsWith('data: ')) continue;
279
400
  const data = line.slice(6).trim();
280
- if (data === '[DONE]') continue;
401
+ if (data === '[DONE]') {
402
+ finalize();
403
+ res.destroy();
404
+ return;
405
+ }
281
406
 
282
407
  try {
283
408
  const obj = JSON.parse(data);
409
+ if (obj.usage && (obj.usage.prompt_tokens !== undefined || obj.usage.completion_tokens !== undefined)) {
410
+ streamUsage = obj.usage;
411
+ }
284
412
  const delta = ((obj.choices || [])[0] || {}).delta || {};
285
413
 
286
414
  const reasoning = delta.reasoning_content || '';
287
415
  if (reasoning) {
288
416
  if (!inReasoning) {
289
417
  inReasoning = true;
290
- process.stdout.write(`\n ${FG_DARK}${DIM}⟨thinking⟩${RST}`);
418
+ if (showThink) {
419
+ process.stdout.write(`\n ${FG_DARK}${DIM}⟨thinking⟩${RST}`);
420
+ renderer._linesWritten++;
421
+ }
291
422
  }
292
423
  reasoningText += reasoning;
293
424
  tokenCount++;
294
- if (tokenCount % 20 === 0) process.stdout.write(`${FG_DARK}.${RST}`);
425
+ if (showThink) {
426
+ process.stdout.write(`${FG_DARK}${DIM}${reasoning}${RST}`);
427
+ }
295
428
  }
296
429
 
297
430
  const content = delta.content || '';
298
431
  if (content) {
299
432
  if (inReasoning) {
300
433
  inReasoning = false;
301
- process.stdout.write(`${FG_DARK}⟨/thinking⟩${RST}\n`);
434
+ if (showThink && !silent) {
435
+ process.stdout.write(`${FG_DARK}⟨/thinking⟩${RST}\n`);
436
+ renderer._linesWritten++;
437
+ }
438
+ }
439
+ if (onToken) {
440
+ if (firstContentToken) {
441
+ firstContentToken = false;
442
+ if (StatusBar.current) StatusBar.current.update({ status: 'streaming' });
443
+ }
444
+ onToken(content);
445
+ } else {
446
+ renderer.feed(content);
302
447
  }
303
- renderer.feed(content);
304
448
  fullText += content;
305
449
  tokenCount++;
450
+ if (tokenCount % 20 === 0 && StatusBar.current) {
451
+ const elapsedSec = (Date.now() - startTime) / 1000 || 0.001;
452
+ StatusBar.current.liveUpdate({
453
+ tokens: `${tokenCount} tok`,
454
+ latency: `${Math.round(tokenCount / elapsedSec)} tok/s`,
455
+ });
456
+ }
306
457
  }
307
458
  } catch {}
308
459
  }
309
460
  });
310
461
 
311
462
  res.on('end', () => {
312
- renderer.flush();
313
- const elapsed = (Date.now() - startTime) / 1000;
314
- const estTokens = estimateTokens(fullText + reasoningText);
315
- const tps = tokenCount / (elapsed || 1);
316
- const cols = getCols();
317
- process.stdout.write(`\n ${FG_DARK}${'─'.repeat(Math.min(cols, 60) - 4)}${RST}\n`);
318
- let costLine = `${FG_DARK}~${estTokens} tokens · ${elapsed.toFixed(1)}s · ${Math.round(tps)} tok/s${RST}`;
319
- if (reasoningText) costLine += ` ${FG_DARK}· ${estimateTokens(reasoningText)} thinking${RST}`;
320
- process.stdout.write(` ${costLine}\n`);
321
- resolve(fullText);
463
+ finalize();
322
464
  });
323
465
 
324
466
  res.on('error', (error) => {
325
- process.stdout.write(`\n ${FG_RED}✗ ${error.message}${RST}\n`);
326
- resolve('');
467
+ if (!resolved) {
468
+ resolved = true;
469
+ reject(error);
470
+ }
327
471
  });
328
472
  });
329
473
  }
@@ -388,12 +532,14 @@ function createApiClient({ getConfig, saveConfig, ui }) {
388
532
  return {
389
533
  chatStream,
390
534
  chatSync,
391
- chooseSavedModelProfile,
535
+ dashboardCreateChat,
536
+ dashboardGetChat,
392
537
  dashboardGetModelForCli,
538
+ dashboardListChats,
393
539
  dashboardListModels,
394
540
  dashboardLogout,
541
+ dashboardSaveMessages,
395
542
  dashboardWhoAmI,
396
- describeModelProfile,
397
543
  estimateTokens,
398
544
  getCliLoginStatus,
399
545
  requestCliLogin,
package/lib/args.js CHANGED
@@ -34,6 +34,37 @@ function parseArgs(argv) {
34
34
  case '--default-model':
35
35
  opts.defaultModel = argv[++i];
36
36
  break;
37
+ case '-r':
38
+ case '--resume':
39
+ opts.resume = argv[++i];
40
+ break;
41
+ case '--allow-fs':
42
+ (opts.allowedTiers = opts.allowedTiers || []).push('fs');
43
+ break;
44
+ case '--allow-exec':
45
+ (opts.allowedTiers = opts.allowedTiers || []).push('exec');
46
+ break;
47
+ case '--allow-net':
48
+ (opts.allowedTiers = opts.allowedTiers || []).push('net');
49
+ break;
50
+ case '--allow-all':
51
+ opts.allowedTiers = ['fs', 'exec', 'net', 'sys'];
52
+ break;
53
+ case '--readonly':
54
+ opts.readonly = true;
55
+ break;
56
+ case '--new':
57
+ opts.new = true;
58
+ break;
59
+ case '--show-think':
60
+ opts.showThink = true;
61
+ break;
62
+ case '--debug':
63
+ opts.debug = true;
64
+ break;
65
+ case '--system-prompt':
66
+ opts.systemPromptFile = argv[++i];
67
+ break;
37
68
  default:
38
69
  positional.push(argv[i]);
39
70
  }
package/lib/audit.js ADDED
@@ -0,0 +1,31 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const os = require('os');
5
+ const path = require('path');
6
+
7
+ const AUDIT_LOG = path.join(os.homedir(), '.semalt-ai', 'audit.log');
8
+
9
+ function logToolCall(tag, input, approved, resultStatus) {
10
+ try {
11
+ let safeInput = input;
12
+ if (tag === 'write_file' && input !== null && typeof input === 'object' && 'content' in input) {
13
+ const n = typeof input.content === 'string' ? input.content.length : 0;
14
+ safeInput = { ...input, content: `<${n} bytes>` };
15
+ }
16
+ let inputStr = typeof safeInput === 'string' ? safeInput : JSON.stringify(safeInput);
17
+ if (inputStr.length > 200) inputStr = inputStr.slice(0, 197) + '...';
18
+ const entry = JSON.stringify({
19
+ ts: new Date().toISOString(),
20
+ tag,
21
+ input: inputStr,
22
+ approved: Boolean(approved),
23
+ result: resultStatus,
24
+ });
25
+ fs.appendFileSync(AUDIT_LOG, entry + '\n');
26
+ } catch {
27
+ // never throw
28
+ }
29
+ }
30
+
31
+ module.exports = { AUDIT_LOG, logToolCall };