npm - reasonix - Versions diffs - 0.5.2 → 0.5.4 - Mend

reasonix 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/cli/index.js CHANGED Viewed

@@ -859,6 +859,25 @@ function encode(text) {
 function countTokens(text) {
   return encode(text).length;
 }
+function estimateConversationTokens(messages) {
+  let total = 0;
+  for (const m of messages) {
+    if (typeof m.content === "string" && m.content) {
+      total += countTokens(m.content);
+    }
+    if (m.tool_calls && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) {
+      total += countTokens(JSON.stringify(m.tool_calls));
+    }
+  }
+  return total;
+}
+function estimateRequestTokens(messages, toolSpecs) {
+  let total = estimateConversationTokens(messages);
+  if (toolSpecs && toolSpecs.length > 0) {
+    total += countTokens(JSON.stringify(toolSpecs));
+  }
+  return total;
+}
 // src/repair/flatten.ts
 function analyzeSchema(schema) {
@@ -1816,20 +1835,26 @@ var CacheFirstLoop = class {
   }
   /**
    * Shrink the log by re-truncating oversized tool results to a tighter
-   * cap, and persist the result back to disk so the next launch doesn't
-   * re-inherit a fat session file. Returns a summary the TUI can
-   * display.
+   * token cap, and persist the result back to disk so the next launch
+   * doesn't re-inherit a fat session file. Returns a summary the TUI
+   * can display.
+   *
+   * The cap is in DeepSeek V3 tokens (not chars) — so CJK text gets
+   * capped at the same effective context footprint as English instead
+   * of slipping past a char cap at 2× the token cost. Default 4000
+   * tokens, matching the token-aware dispatch cap from 0.5.2.
    *
    * Only tool-role messages are touched (same rationale as
    * {@link healLoadedMessages}). User and assistant messages carry
    * authored intent we can't mechanically shrink without losing
    * meaning.
    */
-  compact(tightCapChars = 4e3) {
+  compact(maxTokens = 4e3) {
     const before = this.log.toMessages();
-    const { messages, healedCount, healedFrom } = shrinkOversizedToolResults(before, tightCapChars);
-    const afterBytes = messages.filter((m) => m.role === "tool").reduce((s, m) => s + (typeof m.content === "string" ? m.content.length : 0), 0);
-    const charsSaved = healedFrom - afterBytes;
+    const { messages, healedCount, tokensSaved, charsSaved } = shrinkOversizedToolResultsByTokens(
+      before,
+      maxTokens
+    );
     if (healedCount > 0) {
       this.log.compactInPlace(messages);
       if (this.sessionName) {
@@ -1839,7 +1864,7 @@ var CacheFirstLoop = class {
         }
       }
     }
-    return { healedCount, charsSaved };
+    return { healedCount, tokensSaved, charsSaved };
   }
   appendAndPersist(message) {
     this.log.append(message);
@@ -1996,7 +2021,32 @@ var CacheFirstLoop = class {
           content: `${iter}/${this.maxToolIters} tool calls used \u2014 approaching budget. Press Esc to force a summary now.`
         };
       }
-      const messages = this.buildMessages(pendingUser);
+      let messages = this.buildMessages(pendingUser);
+      {
+        const ctxMax2 = DEEPSEEK_CONTEXT_TOKENS[this.model] ?? DEFAULT_CONTEXT_TOKENS;
+        const estimate = estimateRequestTokens(messages, this.prefix.toolSpecs);
+        if (estimate / ctxMax2 > 0.95) {
+          const result = this.compact(1e3);
+          if (result.healedCount > 0) {
+            yield {
+              turn: this._turn,
+              role: "warning",
+              content: `preflight: request ~${estimate.toLocaleString()}/${ctxMax2.toLocaleString()} tokens (${Math.round(
+                estimate / ctxMax2 * 100
+              )}%) \u2014 pre-compacted ${result.healedCount} tool result(s), saved ${result.tokensSaved.toLocaleString()} tokens. Sending.`
+            };
+            messages = this.buildMessages(pendingUser);
+          } else {
+            yield {
+              turn: this._turn,
+              role: "warning",
+              content: `preflight: request ~${estimate.toLocaleString()}/${ctxMax2.toLocaleString()} tokens (${Math.round(
+                estimate / ctxMax2 * 100
+              )}%) and nothing to auto-compact \u2014 DeepSeek will likely 400. Run /forget or /clear to start fresh.`
+            };
+          }
+        }
+      }
       let assistantContent = "";
       let reasoningContent = "";
       let toolCalls = [];
@@ -2203,30 +2253,28 @@ var CacheFirstLoop = class {
         const ratio = usage.promptTokens / ctxMax;
         if (ratio > 0.6 && ratio <= 0.8) {
           const before = usage.promptTokens;
-          const soft = this.compact(16e3);
+          const soft = this.compact(4e3);
           if (soft.healedCount > 0) {
-            const approxSaved = Math.round(soft.charsSaved / 4);
-            const after = Math.max(0, before - approxSaved);
+            const after = Math.max(0, before - soft.tokensSaved);
             yield {
               turn: this._turn,
               role: "warning",
               content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} (${Math.round(
                 ratio * 100
-              )}%) \u2014 proactively compacted ${soft.healedCount} tool result(s) to 16k, saved ~${approxSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Staying ahead of the 80% guard.`
+              )}%) \u2014 proactively compacted ${soft.healedCount} tool result(s) to 4k tokens, saved ${soft.tokensSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Staying ahead of the 80% guard.`
             };
           }
         }
       }
       if (usage && usage.promptTokens / ctxMax > 0.8) {
         const before = usage.promptTokens;
-        const compactResult = this.compact(4e3);
+        const compactResult = this.compact(1e3);
         if (compactResult.healedCount > 0) {
-          const approxSaved = Math.round(compactResult.charsSaved / 4);
-          const after = before - approxSaved;
+          const after = Math.max(0, before - compactResult.tokensSaved);
           yield {
             turn: this._turn,
             role: "warning",
-            content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ~${approxSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
+            content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ${compactResult.tokensSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
           };
         } else {
           yield {
@@ -2427,6 +2475,25 @@ function shrinkOversizedToolResults(messages, maxChars) {
   });
   return { messages: out, healedCount, healedFrom };
 }
+function shrinkOversizedToolResultsByTokens(messages, maxTokens) {
+  let healedCount = 0;
+  let tokensSaved = 0;
+  let charsSaved = 0;
+  const out = messages.map((msg) => {
+    if (msg.role !== "tool") return msg;
+    const content = typeof msg.content === "string" ? msg.content : "";
+    if (content.length <= maxTokens) return msg;
+    const beforeTokens = countTokens(content);
+    if (beforeTokens <= maxTokens) return msg;
+    const truncated = truncateForModelByTokens(content, maxTokens);
+    const afterTokens = countTokens(truncated);
+    healedCount += 1;
+    tokensSaved += Math.max(0, beforeTokens - afterTokens);
+    charsSaved += Math.max(0, content.length - truncated.length);
+    return { ...msg, content: truncated };
+  });
+  return { messages: out, healedCount, tokensSaved, charsSaved };
+}
 function healLoadedMessages(messages, maxChars) {
   const shrunk = shrinkOversizedToolResults(messages, maxChars);
   let healedCount = shrunk.healedCount;
@@ -6877,7 +6944,11 @@ var SLASH_COMMANDS = [
     summary: "break down where context tokens are going: system / tools / per-turn log"
   },
   { cmd: "retry", summary: "truncate & resend your last message (fresh sample)" },
-  { cmd: "compact", argsHint: "[cap]", summary: "shrink oversized tool results in the log" },
+  {
+    cmd: "compact",
+    argsHint: "[tokens]",
+    summary: "shrink oversized tool results in the log (cap in tokens, default 4000)"
+  },
   { cmd: "sessions", summary: "list saved sessions (current marked with \u25B8)" },
   { cmd: "forget", summary: "delete the current session from disk" },
   { cmd: "setup", summary: "reminds you to exit and run `reasonix setup`" },
@@ -6951,7 +7022,7 @@ function handleSlash(cmd, args, loop, ctx = {}) {
           "  /branch <N|off>          run N parallel samples (N>=2), pick most confident",
           "  /mcp                     list MCP servers + tools attached to this session",
           "  /setup                   (exit + reconfigure) \u2192 run `reasonix setup`",
-          "  /compact [cap]           shrink large tool results in history (default 4k/result)",
+          "  /compact [tokens]        shrink large tool results in history (default 4000 tokens/result)",
           "  /think                   dump the most recent turn's full R1 reasoning (reasoner only)",
           "  /tool [N]                list tool calls (or dump full output of #N, 1=most recent)",
           "  /memory [sub]            show pinned memory (REASONIX.md + ~/.reasonix/memory).",
@@ -7179,15 +7250,15 @@ ${entry.text}`
     }
     case "compact": {
       const tight = Number.parseInt(args[0] ?? "", 10);
-      const cap = Number.isFinite(tight) && tight >= 500 ? tight : 4e3;
-      const { healedCount, charsSaved } = loop.compact(cap);
+      const cap = Number.isFinite(tight) && tight >= 100 ? tight : 4e3;
+      const { healedCount, tokensSaved, charsSaved } = loop.compact(cap);
       if (healedCount === 0) {
         return {
-          info: `\u25B8 nothing to compact \u2014 no tool result in history exceeds ${cap.toLocaleString()} chars.`
+          info: `\u25B8 nothing to compact \u2014 no tool result in history exceeds ${cap.toLocaleString()} tokens.`
         };
       }
       return {
-        info: `\u25B8 compacted ${healedCount} tool result(s), saved ${charsSaved.toLocaleString()} chars (~${Math.round(charsSaved / 4).toLocaleString()} tokens). Session file rewritten.`
+        info: `\u25B8 compacted ${healedCount} tool result(s) to ${cap.toLocaleString()} tokens each, saved ${tokensSaved.toLocaleString()} tokens (${charsSaved.toLocaleString()} chars). Session file rewritten.`
       };
     }
     case "sessions": {