npm - @semalt-ai/code - Versions diffs - 1.8.0 → 1.8.3 - Mend

@semalt-ai/code 1.8.0 → 1.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/.claude/settings.local.json +14 -1
package/CLAUDE.md +2 -1
package/index.js +15 -1
package/lib/agent.js +607 -77
package/lib/api.js +240 -23
package/lib/commands.js +105 -81
package/lib/config.js +32 -4
package/lib/constants.js +67 -1
package/lib/metrics.js +16 -3
package/lib/permissions.js +66 -67
package/lib/prompts.js +97 -83
package/lib/tool_specs.js +499 -0
package/lib/tools.js +645 -319
package/lib/ui/ansi.js +17 -4
package/lib/ui/chat-history.js +201 -61
package/lib/ui/create-ui.js +116 -373
package/lib/ui/diff.js +87 -75
package/lib/ui/input-field.js +76 -58
package/lib/ui/status-bar.js +56 -25
package/lib/ui/terminal.js +58 -0
package/lib/ui/theme.js +78 -0
package/lib/ui/utils.js +63 -1
package/lib/ui/writer.js +255 -0
package/lib/ui.js +5 -0
package/package.json +1 -1

package/lib/api.js CHANGED Viewed

@@ -4,6 +4,9 @@ const http = require('http');
 const https = require('https');
 const { URL } = require('url');
+const { buildToolsSchema, isUIActive } = require('./tools');
+const { TOOL_SPECS } = require('./tool_specs');
 function createApiClient({ getConfig, saveConfig, ui }) {
   const {
     BOLD,
@@ -55,9 +58,10 @@ function createApiClient({ getConfig, saveConfig, ui }) {
     return Math.floor((text || '').length / 4);
   }
-  // Discovered context limit for this process lifetime.
-  // Set on the first context-overflow 400; used to proactively trim all subsequent calls.
-  let _sessionInputLimit = null;
+  // Discovered context limit per model for this process lifetime.
+  // Keyed by resolved model name; set on the first context-overflow 400
+  // for that model and used to proactively trim subsequent calls.
+  const _sessionInputLimits = new Map();
   function httpRequest(urlStr, options, body) {
     return new Promise((resolve, reject) => {
@@ -71,7 +75,10 @@ function createApiClient({ getConfig, saveConfig, ui }) {
         headers: options.headers || {},
       };
-      const req = lib.request(reqOpts, (res) => resolve(res));
+      const req = lib.request(reqOpts, (res) => {
+        if (options.onResponse) options.onResponse(res);
+        resolve(res);
+      });
       req.on('error', reject);
       if (options.timeout) {
@@ -80,6 +87,18 @@ function createApiClient({ getConfig, saveConfig, ui }) {
         });
       }
+      if (options.signal) {
+        if (options.signal.aborted) {
+          req.destroy(new Error('Aborted'));
+          return reject(new Error('Aborted'));
+        }
+        options.signal.addEventListener('abort', () => {
+          req.destroy(new Error('Aborted'));
+        });
+      }
+      if (options.onRequest) options.onRequest(req);
       if (body) req.write(body);
       req.end();
     });
@@ -224,17 +243,32 @@ function createApiClient({ getConfig, saveConfig, ui }) {
     });
   }
-  async function chatStream(messages, { model, temperature, maxTokens, linePrefix = '', showThink = false, onToken = null, silent = false } = {}) {
+  async function chatStream(messages, { model, temperature, maxTokens, linePrefix = '', showThink = false, onToken = null, silent = false, signal = null, onTrim = null, nativeTools = true } = {}) {
+    // nativeTools is plumbed through for downstream use (tools param + tool_calls parsing); no behavior change yet.
     const config = getConfig();
+    const resolvedModel = model || config.default_model;
+    if (signal && signal.aborted) throw new Error('Aborted');
+    let trimNotified = false;
+    function notifyTrim(info) {
+      if (trimNotified) return;
+      trimNotified = true;
+      if (typeof onTrim === 'function') {
+        try { onTrim(info); } catch {}
+      }
+    }
     // Fit messages into tokenBudget tokens.
-    // Uses chars/3 — conservative for token-dense content (code, JSON, HTML).
+    // Uses chars/4 — aligned with estimateTokens; a deliberate under-estimate
+    // for token-dense content (code, JSON, HTML) but consistent across the
+    // codebase.
     //
     // Always keeps: system prompt + first non-system message (original task).
     // Drops intermediate messages oldest-first, then truncates the last tail
     // message (typically a large tool result) if still over budget.
     function trimToTokenBudget(msgs, tokenBudget) {
-      const CHARS_PER_TOKEN = 3;
+      const CHARS_PER_TOKEN = 4;
       const system = msgs.filter((m) => m.role === 'system');
       const nonSystem = msgs.filter((m) => m.role !== 'system');
       if (nonSystem.length === 0) return [...system];
@@ -271,27 +305,62 @@ function createApiClient({ getConfig, saveConfig, ui }) {
       return tail.length > 0 ? [...system, pinned, ...tail] : [...system, pinned];
     }
-    // Proactive trim: apply the session input limit discovered from a prior 400.
+    // Proactive trim: prefer a limit learned from a prior 400 overflow; otherwise
+    // fall back to config.context_length (with a ~10% safety margin) as a hint.
+    // The fallback is not written to _sessionInputLimits so a real overflow
+    // always overrides the config hint.
     let trimmedMessages = messages;
-    if (_sessionInputLimit !== null) {
-      if (Math.floor(JSON.stringify(messages).length / 3) > _sessionInputLimit) {
-        trimmedMessages = trimToTokenBudget(messages, _sessionInputLimit);
+    let sessionLimit = _sessionInputLimits.get(resolvedModel);
+    if (sessionLimit == null &&
+        Number.isInteger(config.context_length) && config.context_length > 0) {
+      sessionLimit = Math.floor(config.context_length * 0.9);
+    }
+    if (sessionLimit != null) {
+      if (Math.floor(JSON.stringify(messages).length / 4) > sessionLimit) {
+        trimmedMessages = trimToTokenBudget(messages, sessionLimit);
+        const dropped = messages.length - trimmedMessages.length;
+        const keptTokens = Math.floor(JSON.stringify(trimmedMessages).length / 4);
+        notifyTrim({ reason: 'proactive', dropped, keptTokens, limit: sessionLimit });
       }
     }
+    // MiniMax supports `reasoning_split: true` which moves thinking content
+    // into a separate reasoning_details field on the response (and
+    // delta.reasoning_content during streaming) instead of embedding
+    // <think>...</think> inside message.content. Only send this flag to
+    // MiniMax — other providers may reject unknown fields.
+    const isMiniMax =
+      /api\.minimax\.io/i.test(config.api_base || '') ||
+      /^minimax[-\/]/i.test(resolvedModel || '');
     const payload = {
-      model: model || config.default_model,
+      model: resolvedModel,
       messages: trimmedMessages,
       temperature: temperature !== undefined ? temperature : config.temperature,
       stream: true,
+      stream_options: { include_usage: true },
     };
+    if (isMiniMax) payload.reasoning_split = true;
     if (maxTokens !== undefined) payload.max_tokens = maxTokens;
+    // Native function-calling: advertise the tool schema and let the model
+    // emit structured tool_calls. Wrappers are XML envelopes, not callable
+    // tools — filter them out per the TOOL_SPECS contract.
+    if (nativeTools) {
+      const callable = Object.fromEntries(
+        Object.entries(TOOL_SPECS).filter(([, spec]) => !spec.wrapper)
+      );
+      payload.tools = buildToolsSchema(callable);
+      payload.tool_choice = 'auto';
+    }
+    const endpoint = apiUrl('/v1/chat/completions');
     async function doRequest(msgs) {
       const reqPayload = { ...payload, messages: msgs };
       const reqBody = JSON.stringify(reqPayload);
-      const res = await httpRequest(apiUrl('/v1/chat/completions'), {
+      const res = await httpRequest(endpoint, {
         method: 'POST',
         timeout: config.request_timeout_ms,
         headers: {
@@ -299,6 +368,7 @@ function createApiClient({ getConfig, saveConfig, ui }) {
           'Authorization': `Bearer ${config.api_key}`,
           'Content-Length': Buffer.byteLength(reqBody),
         },
+        signal,
       }, reqBody);
       if (res.statusCode !== 200) {
@@ -319,6 +389,9 @@ function createApiClient({ getConfig, saveConfig, ui }) {
         err.statusCode = res.statusCode;
         err.parsedErr = parsedErr;
         err.detail = detail;
+        err.rawBody = errBody;
+        err.responseHeaders = res.headers;
+        err.endpoint = endpoint;
         throw err;
       }
       return res;
@@ -327,7 +400,8 @@ function createApiClient({ getConfig, saveConfig, ui }) {
     // On payload-too-large errors, trim and retry.
     //   400 with context-overflow detail → parse exact context window, budget = window/2
     //   413 Request Entity Too Large (Nginx/proxy)  → no size hint, halve current estimate
-    // In both cases _sessionInputLimit is set so all subsequent calls are proactively trimmed.
+    // In both cases the per-model session input limit is set so all subsequent
+    // calls for this model are proactively trimmed.
     let res;
     try {
       res = await doRequest(trimmedMessages);
@@ -342,15 +416,41 @@ function createApiClient({ getConfig, saveConfig, ui }) {
           const limitMatch = err.detail.match(/context length is only (\d+)/i) ||
                              err.detail.match(/maximum.*?(\d+)\s*token/i);
           const contextWindow = limitMatch ? parseInt(limitMatch[1], 10) : null;
-          budget = contextWindow
-            ? Math.floor(contextWindow / 2)
-            : Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 3) * 0.5);
+          if (contextWindow) {
+            budget = Math.floor(contextWindow * 0.9);
+            // Persist the learned context window so future turns/runs trim
+            // proactively without needing a second 400. Must not block the
+            // retry if the write fails.
+            try {
+              const currentConfig = getConfig();
+              const next = { ...currentConfig, context_length: contextWindow };
+              if (Array.isArray(currentConfig.models)) {
+                next.models = currentConfig.models.map((m) =>
+                  m && m.api_base === currentConfig.api_base && m.model === resolvedModel
+                    ? { ...m, context_length: contextWindow }
+                    : m
+                );
+              }
+              saveConfig(next);
+            } catch {}
+          } else {
+            budget = Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 4) * 0.5);
+          }
         } else {
           // 413: no token info available — halve the estimated size of the current payload.
-          budget = Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 3) * 0.5);
+          budget = Math.floor(Math.floor(JSON.stringify(trimmedMessages).length / 4) * 0.5);
         }
-        _sessionInputLimit = budget;
+        _sessionInputLimits.set(resolvedModel, budget);
+        const before = trimmedMessages;
         trimmedMessages = trimToTokenBudget(trimmedMessages, budget);
+        const dropped = before.length - trimmedMessages.length;
+        const keptTokens = Math.floor(JSON.stringify(trimmedMessages).length / 4);
+        notifyTrim({
+          reason: is413 ? 'overflow-413' : 'overflow-400',
+          dropped,
+          keptTokens,
+          limit: budget,
+        });
         res = await doRequest(trimmedMessages);
       } else {
         throw err;
@@ -361,10 +461,15 @@ function createApiClient({ getConfig, saveConfig, ui }) {
       const startTime = Date.now();
       let fullText = '';
       let reasoningText = '';
+      let reasoningDetailsText = '';
       let tokenCount = 0;
       let inReasoning = false;
       let streamUsage = null;
+      let streamFinishReason = null;
       let resolved = false;
+      // delta.tool_calls accumulator (OpenAI function-calling streaming format).
+      // Keyed by `index` per the OpenAI spec.
+      const toolCallAcc = [];
       const renderer = new StreamRenderer({ firstLinePrefix: linePrefix, showThink });
       if (!silent) {
         process.stdout.write('\n');
@@ -373,9 +478,44 @@ function createApiClient({ getConfig, saveConfig, ui }) {
       let firstContentToken = true;
       let lineBuffer = '';
+      function escapeXml(s) {
+        return String(s)
+          .replace(/&/g, '&amp;')
+          .replace(/</g, '&lt;')
+          .replace(/>/g, '&gt;');
+      }
+      // Convert any accumulated tool_calls into a MiniMax XML block and
+      // append it to fullText so extractToolCalls() picks them up. Runs once
+      // at stream end.
+      function appendToolCallsXml() {
+        const valid = toolCallAcc.filter((t) => t && t.name);
+        if (valid.length === 0) return;
+        const invokes = valid.map((tc) => {
+          let args = {};
+          try { args = tc.arguments ? JSON.parse(tc.arguments) : {}; } catch {}
+          const params = Object.entries(args).map(([k, v]) => {
+            const val = typeof v === 'string' ? v : JSON.stringify(v);
+            return `<parameter name="${escapeXml(k)}">${val}</parameter>`;
+          }).join('\n');
+          return `<invoke name="${escapeXml(tc.name)}">\n${params}\n</invoke>`;
+        }).join('\n');
+        fullText += `\n<minimax:tool_call>\n${invokes}\n</minimax:tool_call>`;
+      }
       function finalize() {
         if (resolved) return;
         resolved = true;
+        // Native mode: surface tool calls as structured data; skip XML serialization.
+        // Legacy mode: serialize into <minimax:tool_call> XML so extractToolCalls picks them up.
+        const validToolCalls = toolCallAcc
+          .filter((t) => t && t.name)
+          .map((t, i) => ({
+            id: t.id || `call_${i}`,
+            type: 'function',
+            function: { name: t.name, arguments: t.arguments || '{}' },
+          }));
+        if (!nativeTools) appendToolCallsXml();
         if (!silent) renderer.flush();
         const elapsed = (Date.now() - startTime) / 1000;
         const tps = tokenCount / (elapsed || 1);
@@ -385,7 +525,47 @@ function createApiClient({ getConfig, saveConfig, ui }) {
           StatusBar.current.liveUpdate({ tokens: `${tokenCount} tok`, latency });
           StatusBar.current.render();
         }
-        resolve({ content: fullText, usage: streamUsage });
+        // Fallback for endpoints that don't honor stream_options.include_usage:
+        // estimate prompt/completion tokens locally so the status bar still updates.
+        let usage = streamUsage;
+        if (!usage) {
+          usage = {
+            prompt_tokens: estimateTokens(JSON.stringify(trimmedMessages)),
+            completion_tokens: estimateTokens(fullText) + estimateTokens(reasoningText),
+          };
+        }
+        const elapsedMs = Date.now() - startTime;
+        resolve({
+          content: fullText,
+          toolCalls: nativeTools ? validToolCalls : [],
+          usage,
+          usage_from_provider: !!streamUsage,
+          tool_calls_count: validToolCalls.length,
+          finish_reason: streamFinishReason,
+          finishReason: streamFinishReason,
+          elapsed_ms: elapsedMs,
+          reasoning: reasoningText,
+          reasoning_details: reasoningDetailsText,
+          endpoint,
+          request: {
+            model: payload.model,
+            temperature: payload.temperature,
+            max_tokens: payload.max_tokens,
+            stream: payload.stream,
+            stop: payload.stop,
+            native_tools: nativeTools,
+          },
+        });
+      }
+      if (signal) {
+        signal.addEventListener('abort', () => {
+          try { res?.destroy(); } catch {}
+          if (!resolved) {
+            resolved = true;
+            reject(new Error('Aborted'));
+          }
+        });
       }
       res.setEncoding('utf8');
@@ -409,24 +589,61 @@ function createApiClient({ getConfig, saveConfig, ui }) {
             if (obj.usage && (obj.usage.prompt_tokens !== undefined || obj.usage.completion_tokens !== undefined)) {
               streamUsage = obj.usage;
             }
-            const delta = ((obj.choices || [])[0] || {}).delta || {};
+            const choice = (obj.choices || [])[0] || {};
+            if (choice.finish_reason) streamFinishReason = choice.finish_reason;
+            const delta = choice.delta || {};
+            // MiniMax `reasoning_split: true` surfaces a structured
+            // reasoning_details field. It may arrive as a streaming delta
+            // (delta.reasoning_details) or as an authoritative final value
+            // on choice.message. Preserve it for debug output; not routed to
+            // the UI and not fed back into messages[] on subsequent turns.
+            const rdDelta = delta.reasoning_details;
+            if (rdDelta !== undefined && rdDelta !== null) {
+              reasoningDetailsText += typeof rdDelta === 'string' ? rdDelta : JSON.stringify(rdDelta);
+            }
+            const rdFinal = choice.message && choice.message.reasoning_details;
+            if (rdFinal !== undefined && rdFinal !== null) {
+              reasoningDetailsText = typeof rdFinal === 'string' ? rdFinal : JSON.stringify(rdFinal);
+            }
             const reasoning = delta.reasoning_content || '';
             if (reasoning) {
+              const uiActive = isUIActive();
               if (!inReasoning) {
                 inReasoning = true;
-                if (showThink) {
+                if (showThink && !uiActive) {
                   process.stdout.write(`\n  ${FG_DARK}${DIM}⟨thinking⟩${RST}`);
                   renderer._linesWritten++;
                 }
               }
               reasoningText += reasoning;
               tokenCount++;
-              if (showThink) {
+              if (showThink && !uiActive) {
                 process.stdout.write(`${FG_DARK}${DIM}${reasoning}${RST}`);
               }
             }
+            const toolCallsDelta = delta.tool_calls;
+            if (Array.isArray(toolCallsDelta)) {
+              for (const tc of toolCallsDelta) {
+                const idx = typeof tc.index === 'number' ? tc.index : toolCallAcc.length;
+                const isNew = !toolCallAcc[idx];
+                if (isNew) toolCallAcc[idx] = { id: '', name: '', arguments: '' };
+                if (tc.id) toolCallAcc[idx].id = tc.id;
+                if (tc.function?.name) toolCallAcc[idx].name += tc.function.name;
+                if (tc.function?.arguments) toolCallAcc[idx].arguments += tc.function.arguments;
+                // When the model streams purely via delta.tool_calls (no
+                // delta.content), firstContentToken never flips, so the status
+                // bar stays on "Thinking…" for the entire tool-call stream.
+                // Surface each new tool slot the moment its name is known so
+                // the user sees "Using tool: <name>" instead of a frozen UI.
+                if (isNew && StatusBar.current && toolCallAcc[idx].name) {
+                  StatusBar.current.update('tool', `Using tool: ${toolCallAcc[idx].name}`);
+                }
+              }
+            }
             const content = delta.content || '';
             if (content) {
               if (inReasoning) {