reasonix 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -859,6 +859,25 @@ function encode(text) {
859
859
  function countTokens(text) {
860
860
  return encode(text).length;
861
861
  }
862
+ function estimateConversationTokens(messages) {
863
+ let total = 0;
864
+ for (const m of messages) {
865
+ if (typeof m.content === "string" && m.content) {
866
+ total += countTokens(m.content);
867
+ }
868
+ if (m.tool_calls && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) {
869
+ total += countTokens(JSON.stringify(m.tool_calls));
870
+ }
871
+ }
872
+ return total;
873
+ }
874
+ function estimateRequestTokens(messages, toolSpecs) {
875
+ let total = estimateConversationTokens(messages);
876
+ if (toolSpecs && toolSpecs.length > 0) {
877
+ total += countTokens(JSON.stringify(toolSpecs));
878
+ }
879
+ return total;
880
+ }
862
881
 
863
882
  // src/repair/flatten.ts
864
883
  function analyzeSchema(schema) {
@@ -1816,20 +1835,26 @@ var CacheFirstLoop = class {
1816
1835
  }
1817
1836
  /**
1818
1837
  * Shrink the log by re-truncating oversized tool results to a tighter
1819
- * cap, and persist the result back to disk so the next launch doesn't
1820
- * re-inherit a fat session file. Returns a summary the TUI can
1821
- * display.
1838
+ * token cap, and persist the result back to disk so the next launch
1839
+ * doesn't re-inherit a fat session file. Returns a summary the TUI
1840
+ * can display.
1841
+ *
1842
+ * The cap is in DeepSeek V3 tokens (not chars) — so CJK text gets
1843
+ * capped at the same effective context footprint as English instead
1844
+ * of slipping past a char cap at 2× the token cost. Default 4000
1845
+ * tokens, matching the token-aware dispatch cap from 0.5.2.
1822
1846
  *
1823
1847
  * Only tool-role messages are touched (same rationale as
1824
1848
  * {@link healLoadedMessages}). User and assistant messages carry
1825
1849
  * authored intent we can't mechanically shrink without losing
1826
1850
  * meaning.
1827
1851
  */
1828
- compact(tightCapChars = 4e3) {
1852
+ compact(maxTokens = 4e3) {
1829
1853
  const before = this.log.toMessages();
1830
- const { messages, healedCount, healedFrom } = shrinkOversizedToolResults(before, tightCapChars);
1831
- const afterBytes = messages.filter((m) => m.role === "tool").reduce((s, m) => s + (typeof m.content === "string" ? m.content.length : 0), 0);
1832
- const charsSaved = healedFrom - afterBytes;
1854
+ const { messages, healedCount, tokensSaved, charsSaved } = shrinkOversizedToolResultsByTokens(
1855
+ before,
1856
+ maxTokens
1857
+ );
1833
1858
  if (healedCount > 0) {
1834
1859
  this.log.compactInPlace(messages);
1835
1860
  if (this.sessionName) {
@@ -1839,7 +1864,7 @@ var CacheFirstLoop = class {
1839
1864
  }
1840
1865
  }
1841
1866
  }
1842
- return { healedCount, charsSaved };
1867
+ return { healedCount, tokensSaved, charsSaved };
1843
1868
  }
1844
1869
  appendAndPersist(message) {
1845
1870
  this.log.append(message);
@@ -1996,7 +2021,32 @@ var CacheFirstLoop = class {
1996
2021
  content: `${iter}/${this.maxToolIters} tool calls used \u2014 approaching budget. Press Esc to force a summary now.`
1997
2022
  };
1998
2023
  }
1999
- const messages = this.buildMessages(pendingUser);
2024
+ let messages = this.buildMessages(pendingUser);
2025
+ {
2026
+ const ctxMax2 = DEEPSEEK_CONTEXT_TOKENS[this.model] ?? DEFAULT_CONTEXT_TOKENS;
2027
+ const estimate = estimateRequestTokens(messages, this.prefix.toolSpecs);
2028
+ if (estimate / ctxMax2 > 0.95) {
2029
+ const result = this.compact(1e3);
2030
+ if (result.healedCount > 0) {
2031
+ yield {
2032
+ turn: this._turn,
2033
+ role: "warning",
2034
+ content: `preflight: request ~${estimate.toLocaleString()}/${ctxMax2.toLocaleString()} tokens (${Math.round(
2035
+ estimate / ctxMax2 * 100
2036
+ )}%) \u2014 pre-compacted ${result.healedCount} tool result(s), saved ${result.tokensSaved.toLocaleString()} tokens. Sending.`
2037
+ };
2038
+ messages = this.buildMessages(pendingUser);
2039
+ } else {
2040
+ yield {
2041
+ turn: this._turn,
2042
+ role: "warning",
2043
+ content: `preflight: request ~${estimate.toLocaleString()}/${ctxMax2.toLocaleString()} tokens (${Math.round(
2044
+ estimate / ctxMax2 * 100
2045
+ )}%) and nothing to auto-compact \u2014 DeepSeek will likely 400. Run /forget or /clear to start fresh.`
2046
+ };
2047
+ }
2048
+ }
2049
+ }
2000
2050
  let assistantContent = "";
2001
2051
  let reasoningContent = "";
2002
2052
  let toolCalls = [];
@@ -2203,30 +2253,28 @@ var CacheFirstLoop = class {
2203
2253
  const ratio = usage.promptTokens / ctxMax;
2204
2254
  if (ratio > 0.6 && ratio <= 0.8) {
2205
2255
  const before = usage.promptTokens;
2206
- const soft = this.compact(16e3);
2256
+ const soft = this.compact(4e3);
2207
2257
  if (soft.healedCount > 0) {
2208
- const approxSaved = Math.round(soft.charsSaved / 4);
2209
- const after = Math.max(0, before - approxSaved);
2258
+ const after = Math.max(0, before - soft.tokensSaved);
2210
2259
  yield {
2211
2260
  turn: this._turn,
2212
2261
  role: "warning",
2213
2262
  content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} (${Math.round(
2214
2263
  ratio * 100
2215
- )}%) \u2014 proactively compacted ${soft.healedCount} tool result(s) to 16k, saved ~${approxSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Staying ahead of the 80% guard.`
2264
+ )}%) \u2014 proactively compacted ${soft.healedCount} tool result(s) to 4k tokens, saved ${soft.tokensSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Staying ahead of the 80% guard.`
2216
2265
  };
2217
2266
  }
2218
2267
  }
2219
2268
  }
2220
2269
  if (usage && usage.promptTokens / ctxMax > 0.8) {
2221
2270
  const before = usage.promptTokens;
2222
- const compactResult = this.compact(4e3);
2271
+ const compactResult = this.compact(1e3);
2223
2272
  if (compactResult.healedCount > 0) {
2224
- const approxSaved = Math.round(compactResult.charsSaved / 4);
2225
- const after = before - approxSaved;
2273
+ const after = Math.max(0, before - compactResult.tokensSaved);
2226
2274
  yield {
2227
2275
  turn: this._turn,
2228
2276
  role: "warning",
2229
- content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ~${approxSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
2277
+ content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ${compactResult.tokensSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
2230
2278
  };
2231
2279
  } else {
2232
2280
  yield {
@@ -2427,6 +2475,25 @@ function shrinkOversizedToolResults(messages, maxChars) {
2427
2475
  });
2428
2476
  return { messages: out, healedCount, healedFrom };
2429
2477
  }
2478
+ function shrinkOversizedToolResultsByTokens(messages, maxTokens) {
2479
+ let healedCount = 0;
2480
+ let tokensSaved = 0;
2481
+ let charsSaved = 0;
2482
+ const out = messages.map((msg) => {
2483
+ if (msg.role !== "tool") return msg;
2484
+ const content = typeof msg.content === "string" ? msg.content : "";
2485
+ if (content.length <= maxTokens) return msg;
2486
+ const beforeTokens = countTokens(content);
2487
+ if (beforeTokens <= maxTokens) return msg;
2488
+ const truncated = truncateForModelByTokens(content, maxTokens);
2489
+ const afterTokens = countTokens(truncated);
2490
+ healedCount += 1;
2491
+ tokensSaved += Math.max(0, beforeTokens - afterTokens);
2492
+ charsSaved += Math.max(0, content.length - truncated.length);
2493
+ return { ...msg, content: truncated };
2494
+ });
2495
+ return { messages: out, healedCount, tokensSaved, charsSaved };
2496
+ }
2430
2497
  function healLoadedMessages(messages, maxChars) {
2431
2498
  const shrunk = shrinkOversizedToolResults(messages, maxChars);
2432
2499
  let healedCount = shrunk.healedCount;
@@ -6877,7 +6944,11 @@ var SLASH_COMMANDS = [
6877
6944
  summary: "break down where context tokens are going: system / tools / per-turn log"
6878
6945
  },
6879
6946
  { cmd: "retry", summary: "truncate & resend your last message (fresh sample)" },
6880
- { cmd: "compact", argsHint: "[cap]", summary: "shrink oversized tool results in the log" },
6947
+ {
6948
+ cmd: "compact",
6949
+ argsHint: "[tokens]",
6950
+ summary: "shrink oversized tool results in the log (cap in tokens, default 4000)"
6951
+ },
6881
6952
  { cmd: "sessions", summary: "list saved sessions (current marked with \u25B8)" },
6882
6953
  { cmd: "forget", summary: "delete the current session from disk" },
6883
6954
  { cmd: "setup", summary: "reminds you to exit and run `reasonix setup`" },
@@ -6951,7 +7022,7 @@ function handleSlash(cmd, args, loop, ctx = {}) {
6951
7022
  " /branch <N|off> run N parallel samples (N>=2), pick most confident",
6952
7023
  " /mcp list MCP servers + tools attached to this session",
6953
7024
  " /setup (exit + reconfigure) \u2192 run `reasonix setup`",
6954
- " /compact [cap] shrink large tool results in history (default 4k/result)",
7025
+ " /compact [tokens] shrink large tool results in history (default 4000 tokens/result)",
6955
7026
  " /think dump the most recent turn's full R1 reasoning (reasoner only)",
6956
7027
  " /tool [N] list tool calls (or dump full output of #N, 1=most recent)",
6957
7028
  " /memory [sub] show pinned memory (REASONIX.md + ~/.reasonix/memory).",
@@ -7179,15 +7250,15 @@ ${entry.text}`
7179
7250
  }
7180
7251
  case "compact": {
7181
7252
  const tight = Number.parseInt(args[0] ?? "", 10);
7182
- const cap = Number.isFinite(tight) && tight >= 500 ? tight : 4e3;
7183
- const { healedCount, charsSaved } = loop.compact(cap);
7253
+ const cap = Number.isFinite(tight) && tight >= 100 ? tight : 4e3;
7254
+ const { healedCount, tokensSaved, charsSaved } = loop.compact(cap);
7184
7255
  if (healedCount === 0) {
7185
7256
  return {
7186
- info: `\u25B8 nothing to compact \u2014 no tool result in history exceeds ${cap.toLocaleString()} chars.`
7257
+ info: `\u25B8 nothing to compact \u2014 no tool result in history exceeds ${cap.toLocaleString()} tokens.`
7187
7258
  };
7188
7259
  }
7189
7260
  return {
7190
- info: `\u25B8 compacted ${healedCount} tool result(s), saved ${charsSaved.toLocaleString()} chars (~${Math.round(charsSaved / 4).toLocaleString()} tokens). Session file rewritten.`
7261
+ info: `\u25B8 compacted ${healedCount} tool result(s) to ${cap.toLocaleString()} tokens each, saved ${tokensSaved.toLocaleString()} tokens (${charsSaved.toLocaleString()} chars). Session file rewritten.`
7191
7262
  };
7192
7263
  }
7193
7264
  case "sessions": {