reasonix 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -903,17 +903,23 @@ declare class CacheFirstLoop {
903
903
  constructor(opts: CacheFirstLoopOptions);
904
904
  /**
905
905
  * Shrink the log by re-truncating oversized tool results to a tighter
906
- * cap, and persist the result back to disk so the next launch doesn't
907
- * re-inherit a fat session file. Returns a summary the TUI can
908
- * display.
906
+ * token cap, and persist the result back to disk so the next launch
907
+ * doesn't re-inherit a fat session file. Returns a summary the TUI
908
+ * can display.
909
+ *
910
+ * The cap is in DeepSeek V3 tokens (not chars) — so CJK text gets
911
+ * capped at the same effective context footprint as English instead
912
+ * of slipping past a char cap at 2× the token cost. Default 4000
913
+ * tokens, matching the token-aware dispatch cap from 0.5.2.
909
914
  *
910
915
  * Only tool-role messages are touched (same rationale as
911
916
  * {@link healLoadedMessages}). User and assistant messages carry
912
917
  * authored intent we can't mechanically shrink without losing
913
918
  * meaning.
914
919
  */
915
- compact(tightCapChars?: number): {
920
+ compact(maxTokens?: number): {
916
921
  healedCount: number;
922
+ tokensSaved: number;
917
923
  charsSaved: number;
918
924
  };
919
925
  private appendAndPersist;
package/dist/index.js CHANGED
@@ -780,6 +780,25 @@ function encode(text) {
780
780
  function countTokens(text) {
781
781
  return encode(text).length;
782
782
  }
783
+ function estimateConversationTokens(messages) {
784
+ let total = 0;
785
+ for (const m of messages) {
786
+ if (typeof m.content === "string" && m.content) {
787
+ total += countTokens(m.content);
788
+ }
789
+ if (m.tool_calls && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) {
790
+ total += countTokens(JSON.stringify(m.tool_calls));
791
+ }
792
+ }
793
+ return total;
794
+ }
795
+ function estimateRequestTokens(messages, toolSpecs) {
796
+ let total = estimateConversationTokens(messages);
797
+ if (toolSpecs && toolSpecs.length > 0) {
798
+ total += countTokens(JSON.stringify(toolSpecs));
799
+ }
800
+ return total;
801
+ }
783
802
 
784
803
  // src/repair/flatten.ts
785
804
  function analyzeSchema(schema) {
@@ -1737,20 +1756,26 @@ var CacheFirstLoop = class {
1737
1756
  }
1738
1757
  /**
1739
1758
  * Shrink the log by re-truncating oversized tool results to a tighter
1740
- * cap, and persist the result back to disk so the next launch doesn't
1741
- * re-inherit a fat session file. Returns a summary the TUI can
1742
- * display.
1759
+ * token cap, and persist the result back to disk so the next launch
1760
+ * doesn't re-inherit a fat session file. Returns a summary the TUI
1761
+ * can display.
1762
+ *
1763
+ * The cap is in DeepSeek V3 tokens (not chars) — so CJK text gets
1764
+ * capped at the same effective context footprint as English instead
1765
+ * of slipping past a char cap at 2× the token cost. Default 4000
1766
+ * tokens, matching the token-aware dispatch cap from 0.5.2.
1743
1767
  *
1744
1768
  * Only tool-role messages are touched (same rationale as
1745
1769
  * {@link healLoadedMessages}). User and assistant messages carry
1746
1770
  * authored intent we can't mechanically shrink without losing
1747
1771
  * meaning.
1748
1772
  */
1749
- compact(tightCapChars = 4e3) {
1773
+ compact(maxTokens = 4e3) {
1750
1774
  const before = this.log.toMessages();
1751
- const { messages, healedCount, healedFrom } = shrinkOversizedToolResults(before, tightCapChars);
1752
- const afterBytes = messages.filter((m) => m.role === "tool").reduce((s, m) => s + (typeof m.content === "string" ? m.content.length : 0), 0);
1753
- const charsSaved = healedFrom - afterBytes;
1775
+ const { messages, healedCount, tokensSaved, charsSaved } = shrinkOversizedToolResultsByTokens(
1776
+ before,
1777
+ maxTokens
1778
+ );
1754
1779
  if (healedCount > 0) {
1755
1780
  this.log.compactInPlace(messages);
1756
1781
  if (this.sessionName) {
@@ -1760,7 +1785,7 @@ var CacheFirstLoop = class {
1760
1785
  }
1761
1786
  }
1762
1787
  }
1763
- return { healedCount, charsSaved };
1788
+ return { healedCount, tokensSaved, charsSaved };
1764
1789
  }
1765
1790
  appendAndPersist(message) {
1766
1791
  this.log.append(message);
@@ -1917,7 +1942,32 @@ var CacheFirstLoop = class {
1917
1942
  content: `${iter}/${this.maxToolIters} tool calls used \u2014 approaching budget. Press Esc to force a summary now.`
1918
1943
  };
1919
1944
  }
1920
- const messages = this.buildMessages(pendingUser);
1945
+ let messages = this.buildMessages(pendingUser);
1946
+ {
1947
+ const ctxMax2 = DEEPSEEK_CONTEXT_TOKENS[this.model] ?? DEFAULT_CONTEXT_TOKENS;
1948
+ const estimate = estimateRequestTokens(messages, this.prefix.toolSpecs);
1949
+ if (estimate / ctxMax2 > 0.95) {
1950
+ const result = this.compact(1e3);
1951
+ if (result.healedCount > 0) {
1952
+ yield {
1953
+ turn: this._turn,
1954
+ role: "warning",
1955
+ content: `preflight: request ~${estimate.toLocaleString()}/${ctxMax2.toLocaleString()} tokens (${Math.round(
1956
+ estimate / ctxMax2 * 100
1957
+ )}%) \u2014 pre-compacted ${result.healedCount} tool result(s), saved ${result.tokensSaved.toLocaleString()} tokens. Sending.`
1958
+ };
1959
+ messages = this.buildMessages(pendingUser);
1960
+ } else {
1961
+ yield {
1962
+ turn: this._turn,
1963
+ role: "warning",
1964
+ content: `preflight: request ~${estimate.toLocaleString()}/${ctxMax2.toLocaleString()} tokens (${Math.round(
1965
+ estimate / ctxMax2 * 100
1966
+ )}%) and nothing to auto-compact \u2014 DeepSeek will likely 400. Run /forget or /clear to start fresh.`
1967
+ };
1968
+ }
1969
+ }
1970
+ }
1921
1971
  let assistantContent = "";
1922
1972
  let reasoningContent = "";
1923
1973
  let toolCalls = [];
@@ -2124,30 +2174,28 @@ var CacheFirstLoop = class {
2124
2174
  const ratio = usage.promptTokens / ctxMax;
2125
2175
  if (ratio > 0.6 && ratio <= 0.8) {
2126
2176
  const before = usage.promptTokens;
2127
- const soft = this.compact(16e3);
2177
+ const soft = this.compact(4e3);
2128
2178
  if (soft.healedCount > 0) {
2129
- const approxSaved = Math.round(soft.charsSaved / 4);
2130
- const after = Math.max(0, before - approxSaved);
2179
+ const after = Math.max(0, before - soft.tokensSaved);
2131
2180
  yield {
2132
2181
  turn: this._turn,
2133
2182
  role: "warning",
2134
2183
  content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} (${Math.round(
2135
2184
  ratio * 100
2136
- )}%) \u2014 proactively compacted ${soft.healedCount} tool result(s) to 16k, saved ~${approxSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Staying ahead of the 80% guard.`
2185
+ )}%) \u2014 proactively compacted ${soft.healedCount} tool result(s) to 4k tokens, saved ${soft.tokensSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Staying ahead of the 80% guard.`
2137
2186
  };
2138
2187
  }
2139
2188
  }
2140
2189
  }
2141
2190
  if (usage && usage.promptTokens / ctxMax > 0.8) {
2142
2191
  const before = usage.promptTokens;
2143
- const compactResult = this.compact(4e3);
2192
+ const compactResult = this.compact(1e3);
2144
2193
  if (compactResult.healedCount > 0) {
2145
- const approxSaved = Math.round(compactResult.charsSaved / 4);
2146
- const after = before - approxSaved;
2194
+ const after = Math.max(0, before - compactResult.tokensSaved);
2147
2195
  yield {
2148
2196
  turn: this._turn,
2149
2197
  role: "warning",
2150
- content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ~${approxSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
2198
+ content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ${compactResult.tokensSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
2151
2199
  };
2152
2200
  } else {
2153
2201
  yield {
@@ -2348,6 +2396,25 @@ function shrinkOversizedToolResults(messages, maxChars) {
2348
2396
  });
2349
2397
  return { messages: out, healedCount, healedFrom };
2350
2398
  }
2399
+ function shrinkOversizedToolResultsByTokens(messages, maxTokens) {
2400
+ let healedCount = 0;
2401
+ let tokensSaved = 0;
2402
+ let charsSaved = 0;
2403
+ const out = messages.map((msg) => {
2404
+ if (msg.role !== "tool") return msg;
2405
+ const content = typeof msg.content === "string" ? msg.content : "";
2406
+ if (content.length <= maxTokens) return msg;
2407
+ const beforeTokens = countTokens(content);
2408
+ if (beforeTokens <= maxTokens) return msg;
2409
+ const truncated = truncateForModelByTokens(content, maxTokens);
2410
+ const afterTokens = countTokens(truncated);
2411
+ healedCount += 1;
2412
+ tokensSaved += Math.max(0, beforeTokens - afterTokens);
2413
+ charsSaved += Math.max(0, content.length - truncated.length);
2414
+ return { ...msg, content: truncated };
2415
+ });
2416
+ return { messages: out, healedCount, tokensSaved, charsSaved };
2417
+ }
2351
2418
  function healLoadedMessages(messages, maxChars) {
2352
2419
  const shrunk = shrinkOversizedToolResults(messages, maxChars);
2353
2420
  let healedCount = shrunk.healedCount;