npm - @semalt-ai/code - Versions diffs - 1.7.0 → 1.8.1 - Mend

@semalt-ai/code 1.7.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/.claude/settings.local.json +8 -0
package/ARCHITECTURE.md +99 -0
package/CLAUDE.md +349 -0
package/index.js +69 -7
package/lib/agent.js +577 -39
package/lib/api.js +285 -79
package/lib/args.js +31 -0
package/lib/audit.js +31 -0
package/lib/commands.js +1006 -307
package/lib/config.js +51 -5
package/lib/constants.js +72 -0
package/lib/context.js +2 -6
package/lib/metrics.js +94 -0
package/lib/permissions.js +180 -49
package/lib/prompts.js +96 -13
package/lib/storage.js +96 -0
package/lib/tools.js +1009 -35
package/lib/ui/ansi.js +65 -0
package/lib/ui/chat-history.js +217 -0
package/lib/ui/create-ui.js +474 -0
package/lib/ui/diff.js +243 -0
package/lib/ui/input-field.js +1176 -0
package/lib/ui/layout.js +53 -0
package/lib/ui/legacy.js +130 -0
package/lib/ui/status-bar.js +131 -0
package/lib/ui/stream.js +158 -0
package/lib/ui/utils.js +45 -0
package/lib/ui.js +42 -598
package/package.json +1 -1
package/path +1 -0

package/lib/api.js CHANGED Viewed

@@ -14,9 +14,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
     FG_RED,
     FG_TEAL,
     RST,
+    StatusBar,
     StreamRenderer,
-    getCols,
-    printStatusBar,
   } = ui;
   function apiUrl(urlPath) {
@@ -34,10 +33,6 @@ function createApiClient({ getConfig, saveConfig, ui }) {
     return `${base}${normalizedPath}`;
   }
-  function describeModelProfile(profile) {
-    return `${profile.model} @ ${profile.api_base}`;
-  }
   function requireAuthToken() {
     const config = getConfig();
     if (!config.auth_token) {
@@ -56,46 +51,14 @@ function createApiClient({ getConfig, saveConfig, ui }) {
     saveConfig(config);
   }
-  function chooseSavedModelProfile(rl, currentModel, cwd, onDone) {
-    const config = getConfig();
-    if (!config.models.length) {
-      console.log(`  ${FG_RED}✗${RST} ${FG_GRAY}No saved model profiles. Use semalt-code models add first.${RST}`);
-      onDone(currentModel);
-      return;
-    }
-    console.log();
-    console.log(`  ${FG_TEAL}${BOLD}◆ Saved Models${RST}`);
-    console.log(`  ${FG_DARK}${'─'.repeat(40)}${RST}`);
-    config.models.forEach((profile, index) => {
-      const active = profile.api_base === config.api_base &&
-        profile.api_key === config.api_key &&
-        profile.model === currentModel;
-      const marker = active ? `${FG_GREEN}●${RST}` : `${FG_DARK}○${RST}`;
-      console.log(`  ${marker} ${ui.FG_CYAN}${index + 1}.${RST} ${describeModelProfile(profile)}`);
-    });
-    console.log();
-    rl.question(`  ${FG_TEAL}${BOLD}Select model>${RST} `, (answer) => {
-      const selected = Number((answer || '').trim());
-      if (!Number.isInteger(selected) || selected < 1 || selected > config.models.length) {
-        console.log(`  ${FG_RED}✗${RST} ${FG_GRAY}Invalid selection${RST}`);
-        onDone(currentModel);
-        return;
-      }
-      const profile = config.models[selected - 1];
-      setActiveModelProfile(profile);
-      console.log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Model profile → ${describeModelProfile(profile)}${RST}`);
-      printStatusBar(profile.model, cwd);
-      onDone(profile.model);
-    });
-  }
   function estimateTokens(text) {
     return Math.floor((text || '').length / 4);
   }
+  // Discovered context limit for this process lifetime.
+  // Set on the first context-overflow 400; used to proactively trim all subsequent calls.
+  let _sessionInputLimit = null;
   function httpRequest(urlStr, options, body) {
     return new Promise((resolve, reject) => {
       const url = new URL(urlStr);
@@ -223,50 +186,249 @@ function createApiClient({ getConfig, saveConfig, ui }) {
     });
   }
-  async function chatStream(messages, { model, temperature, maxTokens } = {}) {
+  function dashboardCreateChat(title, modelDbId) {
+    const authToken = requireAuthToken();
+    return requestJson(dashboardUrl('/api/chats'), {
+      method: 'POST',
+      timeout: 15000,
+      headers: { 'Authorization': `Bearer ${authToken}` },
+      body: { title, model_id: modelDbId },
+    });
+  }
+  function dashboardListChats() {
+    const authToken = requireAuthToken();
+    return requestJson(dashboardUrl('/api/chats'), {
+      method: 'GET',
+      timeout: 15000,
+      headers: { 'Authorization': `Bearer ${authToken}` },
+    });
+  }
+  function dashboardGetChat(id) {
+    const authToken = requireAuthToken();
+    return requestJson(dashboardUrl(`/api/chats/${encodeURIComponent(String(id))}`), {
+      method: 'GET',
+      timeout: 15000,
+      headers: { 'Authorization': `Bearer ${authToken}` },
+    });
+  }
+  function dashboardSaveMessages(chatId, messages) {
+    const authToken = requireAuthToken();
+    return requestJson(dashboardUrl(`/api/chats/${encodeURIComponent(String(chatId))}/messages/batch`), {
+      method: 'POST',
+      timeout: 15000,
+      headers: { 'Authorization': `Bearer ${authToken}` },
+      body: { messages },
+    });
+  }
+  async function chatStream(messages, { model, temperature, maxTokens, linePrefix = '', showThink = false, onToken = null, silent = false } = {}) {
     const config = getConfig();
+    // Fit messages into tokenBudget tokens.
+    // Uses chars/3 — conservative for token-dense content (code, JSON, HTML).
+    //
+    // Always keeps: system prompt + first non-system message (original task).
+    // Drops intermediate messages oldest-first, then truncates the last tail
+    // message (typically a large tool result) if still over budget.
+    function trimToTokenBudget(msgs, tokenBudget) {
+      const CHARS_PER_TOKEN = 3;
+      const system = msgs.filter((m) => m.role === 'system');
+      const nonSystem = msgs.filter((m) => m.role !== 'system');
+      if (nonSystem.length === 0) return [...system];
+      const pinned = nonSystem[0]; // original task — never dropped
+      let tail = nonSystem.slice(1);
+      const estimate = () => {
+        const all = tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
+        return Math.floor(JSON.stringify(all).length / CHARS_PER_TOKEN);
+      };
+      while (tail.length > 1 && estimate() > tokenBudget) {
+        tail = tail.slice(1);
+      }
+      if (tail.length === 1 && estimate() > tokenBudget) {
+        const msg = tail[0];
+        const otherChars = JSON.stringify([...system, pinned]).length;
+        const available = tokenBudget * CHARS_PER_TOKEN - otherChars - 200;
+        if (available > 0 && typeof msg.content === 'string' && msg.content.length > available) {
+          tail = [{ ...msg, content: '[…content truncated to fit model limit…]\n' + msg.content.slice(-available) }];
+        }
+      }
+      if (tail.length === 0 && estimate() > tokenBudget) {
+        const systemChars = JSON.stringify(system).length;
+        const available = tokenBudget * CHARS_PER_TOKEN - systemChars - 200;
+        if (available > 0 && typeof pinned.content === 'string' && pinned.content.length > available) {
+          return [...system, { ...pinned, content: '[…content truncated to fit model limit…]\n' + pinned.content.slice(-available) }];
+        }
+      }
+      return tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
+    }
+    // Proactive trim: apply the session input limit discovered from a prior 400.
+    let trimmedMessages = messages;
+    if (_sessionInputLimit !== null) {
+      if (Math.floor(JSON.stringify(messages).length / 3) > _sessionInputLimit) {
+        trimmedMessages = trimToTokenBudget(messages, _sessionInputLimit);
+      }
+    }
     const payload = {
       model: model || config.default_model,
-      messages,
+      messages: trimmedMessages,
       temperature: temperature !== undefined ? temperature : config.temperature,
       stream: true,
+      stream_options: { include_usage: true },
     };
     if (maxTokens !== undefined) payload.max_tokens = maxTokens;
-    const body = JSON.stringify(payload);
-    let res;
-    try {
-      res = await httpRequest(apiUrl('/v1/chat/completions'), {
+    async function doRequest(msgs) {
+      const reqPayload = { ...payload, messages: msgs };
+      const reqBody = JSON.stringify(reqPayload);
+      const res = await httpRequest(apiUrl('/v1/chat/completions'), {
         method: 'POST',
         timeout: config.request_timeout_ms,
         headers: {
           'Content-Type': 'application/json',
           'Authorization': `Bearer ${config.api_key}`,
-          'Content-Length': Buffer.byteLength(body),
+          'Content-Length': Buffer.byteLength(reqBody),
         },
-      }, body);
-    } catch (error) {
-      process.stdout.write(`\n  ${FG_RED}✗ ${error.message}${RST}\n`);
-      return '';
+      }, reqBody);
+      if (res.statusCode !== 200) {
+        const errBody = await new Promise((resolve) => {
+          let d = '';
+          res.setEncoding('utf8');
+          res.on('data', (c) => { d += c; });
+          res.on('end', () => resolve(d));
+          res.on('error', () => resolve(''));
+        });
+        let detail = '';
+        let parsedErr = null;
+        try {
+          parsedErr = JSON.parse(errBody);
+          detail = (parsedErr && (parsedErr.error?.message || parsedErr.error || parsedErr.message)) || '';
+        } catch { detail = errBody.slice(0, 200); }
+        const err = new Error(`HTTP ${res.statusCode}${detail ? `: ${detail}` : ''}`);
+        err.statusCode = res.statusCode;
+        err.parsedErr = parsedErr;
+        err.detail = detail;
+        err.rawBody = errBody;
+        err.responseHeaders = res.headers;
+        throw err;
+      }
+      return res;
     }
-    if (res.statusCode !== 200) {
-      process.stdout.write(`\n  ${FG_RED}✗ Error: HTTP ${res.statusCode}${RST}\n`);
-      res.resume();
-      return '';
+    // On payload-too-large errors, trim and retry.
+    //   400 with context-overflow detail → parse exact context window, budget = window/2
+    //   413 Request Entity Too Large (Nginx/proxy)  → no size hint, halve current estimate
+    // In both cases _sessionInputLimit is set so all subsequent calls are proactively trimmed.
+    let res;
+    try {
+      res = await doRequest(trimmedMessages);
+    } catch (err) {
+      const is400Overflow = err.statusCode === 400 && err.detail &&
+        /context.length|input.token|context_length|maximum.*token|token.*limit/i.test(err.detail);
+      const is413 = err.statusCode === 413;
+      if (is400Overflow || is413) {
+        let budget;
+        if (is400Overflow) {
+          const limitMatch = err.detail.match(/context length is only (\d+)/i) ||
+                             err.detail.match(/maximum.*?(\d+)\s*token/i);
+          const contextWindow = limitMatch ? parseInt(limitMatch[1], 10) : null;
+          budget = contextWindow
+            ? Math.floor(contextWindow / 2)
+            : Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 3) * 0.5);
+        } else {
+          // 413: no token info available — halve the estimated size of the current payload.
+          budget = Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 3) * 0.5);
+        }
+        _sessionInputLimit = budget;
+        trimmedMessages = trimToTokenBudget(trimmedMessages, budget);
+        res = await doRequest(trimmedMessages);
+      } else {
+        throw err;
+      }
     }
-    return new Promise((resolve) => {
+    return new Promise((resolve, reject) => {
       const startTime = Date.now();
       let fullText = '';
       let reasoningText = '';
       let tokenCount = 0;
       let inReasoning = false;
-      const renderer = new StreamRenderer();
+      let streamUsage = null;
+      let resolved = false;
+      // delta.tool_calls accumulator (OpenAI function-calling streaming format).
+      // Keyed by `index` per the OpenAI spec.
+      const toolCallAcc = [];
+      const renderer = new StreamRenderer({ firstLinePrefix: linePrefix, showThink });
+      if (!silent) {
+        process.stdout.write('\n');
+        renderer._linesWritten = 1;
+      }
+      let firstContentToken = true;
       let lineBuffer = '';
+      function escapeXml(s) {
+        return String(s)
+          .replace(/&/g, '&amp;')
+          .replace(/</g, '&lt;')
+          .replace(/>/g, '&gt;');
+      }
+      // Convert any accumulated tool_calls into a MiniMax XML block and
+      // append it to fullText so extractToolCalls() picks them up. Runs once
+      // at stream end.
+      function appendToolCallsXml() {
+        const valid = toolCallAcc.filter((t) => t && t.name);
+        if (valid.length === 0) return;
+        const invokes = valid.map((tc) => {
+          let args = {};
+          try { args = tc.arguments ? JSON.parse(tc.arguments) : {}; } catch {}
+          const params = Object.entries(args).map(([k, v]) => {
+            const val = typeof v === 'string' ? v : JSON.stringify(v);
+            return `<parameter name="${escapeXml(k)}">${val}</parameter>`;
+          }).join('\n');
+          return `<invoke name="${escapeXml(tc.name)}">\n${params}\n</invoke>`;
+        }).join('\n');
+        fullText += `\n<minimax:tool_call>\n${invokes}\n</minimax:tool_call>`;
+      }
+      function finalize() {
+        if (resolved) return;
+        resolved = true;
+        appendToolCallsXml();
+        if (!silent) renderer.flush();
+        const elapsed = (Date.now() - startTime) / 1000;
+        const tps = tokenCount / (elapsed || 1);
+        if (StatusBar.current) {
+          let latency = `${Math.round(tps)} tok/s · ${elapsed.toFixed(1)}s`;
+          if (reasoningText) latency += ` · ${estimateTokens(reasoningText)} think`;
+          StatusBar.current.liveUpdate({ tokens: `${tokenCount} tok`, latency });
+          StatusBar.current.render();
+        }
+        // Fallback for endpoints that don't honor stream_options.include_usage:
+        // estimate prompt/completion tokens locally so the status bar still updates.
+        let usage = streamUsage;
+        if (!usage) {
+          usage = {
+            prompt_tokens: estimateTokens(JSON.stringify(trimmedMessages)),
+            completion_tokens: estimateTokens(fullText) + estimateTokens(reasoningText),
+          };
+        }
+        resolve({ content: fullText, usage });
+      }
       res.setEncoding('utf8');
       res.on('data', (chunk) => {
@@ -277,53 +439,95 @@ function createApiClient({ getConfig, saveConfig, ui }) {
         for (const line of lines) {
           if (!line.startsWith('data: ')) continue;
           const data = line.slice(6).trim();
-          if (data === '[DONE]') continue;
+          if (data === '[DONE]') {
+            finalize();
+            res.destroy();
+            return;
+          }
           try {
             const obj = JSON.parse(data);
+            if (obj.usage && (obj.usage.prompt_tokens !== undefined || obj.usage.completion_tokens !== undefined)) {
+              streamUsage = obj.usage;
+            }
             const delta = ((obj.choices || [])[0] || {}).delta || {};
             const reasoning = delta.reasoning_content || '';
             if (reasoning) {
               if (!inReasoning) {
                 inReasoning = true;
-                process.stdout.write(`\n  ${FG_DARK}${DIM}⟨thinking⟩${RST}`);
+                if (showThink) {
+                  process.stdout.write(`\n  ${FG_DARK}${DIM}⟨thinking⟩${RST}`);
+                  renderer._linesWritten++;
+                }
               }
               reasoningText += reasoning;
               tokenCount++;
-              if (tokenCount % 20 === 0) process.stdout.write(`${FG_DARK}.${RST}`);
+              if (showThink) {
+                process.stdout.write(`${FG_DARK}${DIM}${reasoning}${RST}`);
+              }
+            }
+            const toolCallsDelta = delta.tool_calls;
+            if (Array.isArray(toolCallsDelta)) {
+              for (const tc of toolCallsDelta) {
+                const idx = typeof tc.index === 'number' ? tc.index : toolCallAcc.length;
+                const isNew = !toolCallAcc[idx];
+                if (isNew) toolCallAcc[idx] = { name: '', arguments: '' };
+                if (tc.function?.name) toolCallAcc[idx].name += tc.function.name;
+                if (tc.function?.arguments) toolCallAcc[idx].arguments += tc.function.arguments;
+                // When the model streams purely via delta.tool_calls (no
+                // delta.content), firstContentToken never flips, so the status
+                // bar stays on "Thinking…" for the entire tool-call stream.
+                // Surface each new tool slot the moment its name is known so
+                // the user sees "Using tool: <name>" instead of a frozen UI.
+                if (isNew && StatusBar.current && toolCallAcc[idx].name) {
+                  StatusBar.current.update('tool', `Using tool: ${toolCallAcc[idx].name}`);
+                }
+              }
             }
             const content = delta.content || '';
             if (content) {
               if (inReasoning) {
                 inReasoning = false;
-                process.stdout.write(`${FG_DARK}⟨/thinking⟩${RST}\n`);
+                if (showThink && !silent) {
+                  process.stdout.write(`${FG_DARK}⟨/thinking⟩${RST}\n`);
+                  renderer._linesWritten++;
+                }
+              }
+              if (onToken) {
+                if (firstContentToken) {
+                  firstContentToken = false;
+                  if (StatusBar.current) StatusBar.current.update({ status: 'streaming' });
+                }
+                onToken(content);
+              } else {
+                renderer.feed(content);
               }
-              renderer.feed(content);
               fullText += content;
               tokenCount++;
+              if (tokenCount % 20 === 0 && StatusBar.current) {
+                const elapsedSec = (Date.now() - startTime) / 1000 || 0.001;
+                StatusBar.current.liveUpdate({
+                  tokens: `${tokenCount} tok`,
+                  latency: `${Math.round(tokenCount / elapsedSec)} tok/s`,
+                });
+              }
             }
           } catch {}
         }
       });
       res.on('end', () => {
-        renderer.flush();
-        const elapsed = (Date.now() - startTime) / 1000;
-        const estTokens = estimateTokens(fullText + reasoningText);
-        const tps = tokenCount / (elapsed || 1);
-        const cols = getCols();
-        process.stdout.write(`\n  ${FG_DARK}${'─'.repeat(Math.min(cols, 60) - 4)}${RST}\n`);
-        let costLine = `${FG_DARK}~${estTokens} tokens · ${elapsed.toFixed(1)}s · ${Math.round(tps)} tok/s${RST}`;
-        if (reasoningText) costLine += ` ${FG_DARK}· ${estimateTokens(reasoningText)} thinking${RST}`;
-        process.stdout.write(`  ${costLine}\n`);
-        resolve(fullText);
+        finalize();
       });
       res.on('error', (error) => {
-        process.stdout.write(`\n  ${FG_RED}✗ ${error.message}${RST}\n`);
-        resolve('');
+        if (!resolved) {
+          resolved = true;
+          reject(error);
+        }
       });
     });
   }
@@ -388,12 +592,14 @@ function createApiClient({ getConfig, saveConfig, ui }) {
   return {
     chatStream,
     chatSync,
-    chooseSavedModelProfile,
+    dashboardCreateChat,
+    dashboardGetChat,
     dashboardGetModelForCli,
+    dashboardListChats,
     dashboardListModels,
     dashboardLogout,
+    dashboardSaveMessages,
     dashboardWhoAmI,
-    describeModelProfile,
     estimateTokens,
     getCliLoginStatus,
     requestCliLogin,

package/lib/args.js CHANGED Viewed

@@ -34,6 +34,37 @@ function parseArgs(argv) {
       case '--default-model':
         opts.defaultModel = argv[++i];
         break;
+      case '-r':
+      case '--resume':
+        opts.resume = argv[++i];
+        break;
+      case '--allow-fs':
+        (opts.allowedTiers = opts.allowedTiers || []).push('fs');
+        break;
+      case '--allow-exec':
+        (opts.allowedTiers = opts.allowedTiers || []).push('exec');
+        break;
+      case '--allow-net':
+        (opts.allowedTiers = opts.allowedTiers || []).push('net');
+        break;
+      case '--allow-all':
+        opts.allowedTiers = ['fs', 'exec', 'net', 'sys'];
+        break;
+      case '--readonly':
+        opts.readonly = true;
+        break;
+      case '--new':
+        opts.new = true;
+        break;
+      case '--show-think':
+        opts.showThink = true;
+        break;
+      case '--debug':
+        opts.debug = true;
+        break;
+      case '--system-prompt':
+        opts.systemPromptFile = argv[++i];
+        break;
       default:
         positional.push(argv[i]);
     }

package/lib/audit.js ADDED Viewed

@@ -0,0 +1,31 @@
+'use strict';
+const fs = require('fs');
+const os = require('os');
+const path = require('path');
+const AUDIT_LOG = path.join(os.homedir(), '.semalt-ai', 'audit.log');
+function logToolCall(tag, input, approved, resultStatus) {
+  try {
+    let safeInput = input;
+    if (tag === 'write_file' && input !== null && typeof input === 'object' && 'content' in input) {
+      const n = typeof input.content === 'string' ? input.content.length : 0;
+      safeInput = { ...input, content: `<${n} bytes>` };
+    }
+    let inputStr = typeof safeInput === 'string' ? safeInput : JSON.stringify(safeInput);
+    if (inputStr.length > 200) inputStr = inputStr.slice(0, 197) + '...';
+    const entry = JSON.stringify({
+      ts: new Date().toISOString(),
+      tag,
+      input: inputStr,
+      approved: Boolean(approved),
+      result: resultStatus,
+    });
+    fs.appendFileSync(AUDIT_LOG, entry + '\n');
+  } catch {
+    // never throw
+  }
+}
+module.exports = { AUDIT_LOG, logToolCall };