npm - reasonix - Versions diffs - 0.5.2 → 0.5.4 - Mend

reasonix 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -903,17 +903,23 @@ declare class CacheFirstLoop {
     constructor(opts: CacheFirstLoopOptions);
     /**
      * Shrink the log by re-truncating oversized tool results to a tighter
-     * cap, and persist the result back to disk so the next launch doesn't
-     * re-inherit a fat session file. Returns a summary the TUI can
-     * display.
+     * token cap, and persist the result back to disk so the next launch
+     * doesn't re-inherit a fat session file. Returns a summary the TUI
+     * can display.
+     *
+     * The cap is in DeepSeek V3 tokens (not chars) — so CJK text gets
+     * capped at the same effective context footprint as English instead
+     * of slipping past a char cap at 2× the token cost. Default 4000
+     * tokens, matching the token-aware dispatch cap from 0.5.2.
      *
      * Only tool-role messages are touched (same rationale as
      * {@link healLoadedMessages}). User and assistant messages carry
      * authored intent we can't mechanically shrink without losing
      * meaning.
      */
-    compact(tightCapChars?: number): {
+    compact(maxTokens?: number): {
         healedCount: number;
+        tokensSaved: number;
         charsSaved: number;
     };
     private appendAndPersist;

package/dist/index.js CHANGED Viewed

@@ -780,6 +780,25 @@ function encode(text) {
 function countTokens(text) {
   return encode(text).length;
 }
+function estimateConversationTokens(messages) {
+  let total = 0;
+  for (const m of messages) {
+    if (typeof m.content === "string" && m.content) {
+      total += countTokens(m.content);
+    }
+    if (m.tool_calls && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) {
+      total += countTokens(JSON.stringify(m.tool_calls));
+    }
+  }
+  return total;
+}
+function estimateRequestTokens(messages, toolSpecs) {
+  let total = estimateConversationTokens(messages);
+  if (toolSpecs && toolSpecs.length > 0) {
+    total += countTokens(JSON.stringify(toolSpecs));
+  }
+  return total;
+}
 // src/repair/flatten.ts
 function analyzeSchema(schema) {
@@ -1737,20 +1756,26 @@ var CacheFirstLoop = class {
   }
   /**
    * Shrink the log by re-truncating oversized tool results to a tighter
-   * cap, and persist the result back to disk so the next launch doesn't
-   * re-inherit a fat session file. Returns a summary the TUI can
-   * display.
+   * token cap, and persist the result back to disk so the next launch
+   * doesn't re-inherit a fat session file. Returns a summary the TUI
+   * can display.
+   *
+   * The cap is in DeepSeek V3 tokens (not chars) — so CJK text gets
+   * capped at the same effective context footprint as English instead
+   * of slipping past a char cap at 2× the token cost. Default 4000
+   * tokens, matching the token-aware dispatch cap from 0.5.2.
    *
    * Only tool-role messages are touched (same rationale as
    * {@link healLoadedMessages}). User and assistant messages carry
    * authored intent we can't mechanically shrink without losing
    * meaning.
    */
-  compact(tightCapChars = 4e3) {
+  compact(maxTokens = 4e3) {
     const before = this.log.toMessages();
-    const { messages, healedCount, healedFrom } = shrinkOversizedToolResults(before, tightCapChars);
-    const afterBytes = messages.filter((m) => m.role === "tool").reduce((s, m) => s + (typeof m.content === "string" ? m.content.length : 0), 0);
-    const charsSaved = healedFrom - afterBytes;
+    const { messages, healedCount, tokensSaved, charsSaved } = shrinkOversizedToolResultsByTokens(
+      before,
+      maxTokens
+    );
     if (healedCount > 0) {
       this.log.compactInPlace(messages);
       if (this.sessionName) {
@@ -1760,7 +1785,7 @@ var CacheFirstLoop = class {
         }
       }
     }
-    return { healedCount, charsSaved };
+    return { healedCount, tokensSaved, charsSaved };
   }
   appendAndPersist(message) {
     this.log.append(message);
@@ -1917,7 +1942,32 @@ var CacheFirstLoop = class {
           content: `${iter}/${this.maxToolIters} tool calls used \u2014 approaching budget. Press Esc to force a summary now.`
         };
       }
-      const messages = this.buildMessages(pendingUser);
+      let messages = this.buildMessages(pendingUser);
+      {
+        const ctxMax2 = DEEPSEEK_CONTEXT_TOKENS[this.model] ?? DEFAULT_CONTEXT_TOKENS;
+        const estimate = estimateRequestTokens(messages, this.prefix.toolSpecs);
+        if (estimate / ctxMax2 > 0.95) {
+          const result = this.compact(1e3);
+          if (result.healedCount > 0) {
+            yield {
+              turn: this._turn,
+              role: "warning",
+              content: `preflight: request ~${estimate.toLocaleString()}/${ctxMax2.toLocaleString()} tokens (${Math.round(
+                estimate / ctxMax2 * 100
+              )}%) \u2014 pre-compacted ${result.healedCount} tool result(s), saved ${result.tokensSaved.toLocaleString()} tokens. Sending.`
+            };
+            messages = this.buildMessages(pendingUser);
+          } else {
+            yield {
+              turn: this._turn,
+              role: "warning",
+              content: `preflight: request ~${estimate.toLocaleString()}/${ctxMax2.toLocaleString()} tokens (${Math.round(
+                estimate / ctxMax2 * 100
+              )}%) and nothing to auto-compact \u2014 DeepSeek will likely 400. Run /forget or /clear to start fresh.`
+            };
+          }
+        }
+      }
       let assistantContent = "";
       let reasoningContent = "";
       let toolCalls = [];
@@ -2124,30 +2174,28 @@ var CacheFirstLoop = class {
         const ratio = usage.promptTokens / ctxMax;
         if (ratio > 0.6 && ratio <= 0.8) {
           const before = usage.promptTokens;
-          const soft = this.compact(16e3);
+          const soft = this.compact(4e3);
           if (soft.healedCount > 0) {
-            const approxSaved = Math.round(soft.charsSaved / 4);
-            const after = Math.max(0, before - approxSaved);
+            const after = Math.max(0, before - soft.tokensSaved);
             yield {
               turn: this._turn,
               role: "warning",
               content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} (${Math.round(
                 ratio * 100
-              )}%) \u2014 proactively compacted ${soft.healedCount} tool result(s) to 16k, saved ~${approxSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Staying ahead of the 80% guard.`
+              )}%) \u2014 proactively compacted ${soft.healedCount} tool result(s) to 4k tokens, saved ${soft.tokensSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Staying ahead of the 80% guard.`
             };
           }
         }
       }
       if (usage && usage.promptTokens / ctxMax > 0.8) {
         const before = usage.promptTokens;
-        const compactResult = this.compact(4e3);
+        const compactResult = this.compact(1e3);
         if (compactResult.healedCount > 0) {
-          const approxSaved = Math.round(compactResult.charsSaved / 4);
-          const after = before - approxSaved;
+          const after = Math.max(0, before - compactResult.tokensSaved);
           yield {
             turn: this._turn,
             role: "warning",
-            content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ~${approxSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
+            content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ${compactResult.tokensSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
           };
         } else {
           yield {
@@ -2348,6 +2396,25 @@ function shrinkOversizedToolResults(messages, maxChars) {
   });
   return { messages: out, healedCount, healedFrom };
 }
+function shrinkOversizedToolResultsByTokens(messages, maxTokens) {
+  let healedCount = 0;
+  let tokensSaved = 0;
+  let charsSaved = 0;
+  const out = messages.map((msg) => {
+    if (msg.role !== "tool") return msg;
+    const content = typeof msg.content === "string" ? msg.content : "";
+    if (content.length <= maxTokens) return msg;
+    const beforeTokens = countTokens(content);
+    if (beforeTokens <= maxTokens) return msg;
+    const truncated = truncateForModelByTokens(content, maxTokens);
+    const afterTokens = countTokens(truncated);
+    healedCount += 1;
+    tokensSaved += Math.max(0, beforeTokens - afterTokens);
+    charsSaved += Math.max(0, content.length - truncated.length);
+    return { ...msg, content: truncated };
+  });
+  return { messages: out, healedCount, tokensSaved, charsSaved };
+}
 function healLoadedMessages(messages, maxChars) {
   const shrunk = shrinkOversizedToolResults(messages, maxChars);
   let healedCount = shrunk.healedCount;