reasonix 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +94 -23
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.ts +10 -4
- package/dist/index.js +84 -17
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -903,17 +903,23 @@ declare class CacheFirstLoop {
|
|
|
903
903
|
constructor(opts: CacheFirstLoopOptions);
|
|
904
904
|
/**
|
|
905
905
|
* Shrink the log by re-truncating oversized tool results to a tighter
|
|
906
|
-
* cap, and persist the result back to disk so the next launch
|
|
907
|
-
* re-inherit a fat session file. Returns a summary the TUI
|
|
908
|
-
* display.
|
|
906
|
+
* token cap, and persist the result back to disk so the next launch
|
|
907
|
+
* doesn't re-inherit a fat session file. Returns a summary the TUI
|
|
908
|
+
* can display.
|
|
909
|
+
*
|
|
910
|
+
* The cap is in DeepSeek V3 tokens (not chars) — so CJK text gets
|
|
911
|
+
* capped at the same effective context footprint as English instead
|
|
912
|
+
* of slipping past a char cap at 2× the token cost. Default 4000
|
|
913
|
+
* tokens, matching the token-aware dispatch cap from 0.5.2.
|
|
909
914
|
*
|
|
910
915
|
* Only tool-role messages are touched (same rationale as
|
|
911
916
|
* {@link healLoadedMessages}). User and assistant messages carry
|
|
912
917
|
* authored intent we can't mechanically shrink without losing
|
|
913
918
|
* meaning.
|
|
914
919
|
*/
|
|
915
|
-
compact(
|
|
920
|
+
compact(maxTokens?: number): {
|
|
916
921
|
healedCount: number;
|
|
922
|
+
tokensSaved: number;
|
|
917
923
|
charsSaved: number;
|
|
918
924
|
};
|
|
919
925
|
private appendAndPersist;
|
package/dist/index.js
CHANGED
|
@@ -780,6 +780,25 @@ function encode(text) {
|
|
|
780
780
|
function countTokens(text) {
|
|
781
781
|
return encode(text).length;
|
|
782
782
|
}
|
|
783
|
+
function estimateConversationTokens(messages) {
|
|
784
|
+
let total = 0;
|
|
785
|
+
for (const m of messages) {
|
|
786
|
+
if (typeof m.content === "string" && m.content) {
|
|
787
|
+
total += countTokens(m.content);
|
|
788
|
+
}
|
|
789
|
+
if (m.tool_calls && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) {
|
|
790
|
+
total += countTokens(JSON.stringify(m.tool_calls));
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
return total;
|
|
794
|
+
}
|
|
795
|
+
function estimateRequestTokens(messages, toolSpecs) {
|
|
796
|
+
let total = estimateConversationTokens(messages);
|
|
797
|
+
if (toolSpecs && toolSpecs.length > 0) {
|
|
798
|
+
total += countTokens(JSON.stringify(toolSpecs));
|
|
799
|
+
}
|
|
800
|
+
return total;
|
|
801
|
+
}
|
|
783
802
|
|
|
784
803
|
// src/repair/flatten.ts
|
|
785
804
|
function analyzeSchema(schema) {
|
|
@@ -1737,20 +1756,26 @@ var CacheFirstLoop = class {
|
|
|
1737
1756
|
}
|
|
1738
1757
|
/**
|
|
1739
1758
|
* Shrink the log by re-truncating oversized tool results to a tighter
|
|
1740
|
-
* cap, and persist the result back to disk so the next launch
|
|
1741
|
-
* re-inherit a fat session file. Returns a summary the TUI
|
|
1742
|
-
* display.
|
|
1759
|
+
* token cap, and persist the result back to disk so the next launch
|
|
1760
|
+
* doesn't re-inherit a fat session file. Returns a summary the TUI
|
|
1761
|
+
* can display.
|
|
1762
|
+
*
|
|
1763
|
+
* The cap is in DeepSeek V3 tokens (not chars) — so CJK text gets
|
|
1764
|
+
* capped at the same effective context footprint as English instead
|
|
1765
|
+
* of slipping past a char cap at 2× the token cost. Default 4000
|
|
1766
|
+
* tokens, matching the token-aware dispatch cap from 0.5.2.
|
|
1743
1767
|
*
|
|
1744
1768
|
* Only tool-role messages are touched (same rationale as
|
|
1745
1769
|
* {@link healLoadedMessages}). User and assistant messages carry
|
|
1746
1770
|
* authored intent we can't mechanically shrink without losing
|
|
1747
1771
|
* meaning.
|
|
1748
1772
|
*/
|
|
1749
|
-
compact(
|
|
1773
|
+
compact(maxTokens = 4e3) {
|
|
1750
1774
|
const before = this.log.toMessages();
|
|
1751
|
-
const { messages, healedCount,
|
|
1752
|
-
|
|
1753
|
-
|
|
1775
|
+
const { messages, healedCount, tokensSaved, charsSaved } = shrinkOversizedToolResultsByTokens(
|
|
1776
|
+
before,
|
|
1777
|
+
maxTokens
|
|
1778
|
+
);
|
|
1754
1779
|
if (healedCount > 0) {
|
|
1755
1780
|
this.log.compactInPlace(messages);
|
|
1756
1781
|
if (this.sessionName) {
|
|
@@ -1760,7 +1785,7 @@ var CacheFirstLoop = class {
|
|
|
1760
1785
|
}
|
|
1761
1786
|
}
|
|
1762
1787
|
}
|
|
1763
|
-
return { healedCount, charsSaved };
|
|
1788
|
+
return { healedCount, tokensSaved, charsSaved };
|
|
1764
1789
|
}
|
|
1765
1790
|
appendAndPersist(message) {
|
|
1766
1791
|
this.log.append(message);
|
|
@@ -1917,7 +1942,32 @@ var CacheFirstLoop = class {
|
|
|
1917
1942
|
content: `${iter}/${this.maxToolIters} tool calls used \u2014 approaching budget. Press Esc to force a summary now.`
|
|
1918
1943
|
};
|
|
1919
1944
|
}
|
|
1920
|
-
|
|
1945
|
+
let messages = this.buildMessages(pendingUser);
|
|
1946
|
+
{
|
|
1947
|
+
const ctxMax2 = DEEPSEEK_CONTEXT_TOKENS[this.model] ?? DEFAULT_CONTEXT_TOKENS;
|
|
1948
|
+
const estimate = estimateRequestTokens(messages, this.prefix.toolSpecs);
|
|
1949
|
+
if (estimate / ctxMax2 > 0.95) {
|
|
1950
|
+
const result = this.compact(1e3);
|
|
1951
|
+
if (result.healedCount > 0) {
|
|
1952
|
+
yield {
|
|
1953
|
+
turn: this._turn,
|
|
1954
|
+
role: "warning",
|
|
1955
|
+
content: `preflight: request ~${estimate.toLocaleString()}/${ctxMax2.toLocaleString()} tokens (${Math.round(
|
|
1956
|
+
estimate / ctxMax2 * 100
|
|
1957
|
+
)}%) \u2014 pre-compacted ${result.healedCount} tool result(s), saved ${result.tokensSaved.toLocaleString()} tokens. Sending.`
|
|
1958
|
+
};
|
|
1959
|
+
messages = this.buildMessages(pendingUser);
|
|
1960
|
+
} else {
|
|
1961
|
+
yield {
|
|
1962
|
+
turn: this._turn,
|
|
1963
|
+
role: "warning",
|
|
1964
|
+
content: `preflight: request ~${estimate.toLocaleString()}/${ctxMax2.toLocaleString()} tokens (${Math.round(
|
|
1965
|
+
estimate / ctxMax2 * 100
|
|
1966
|
+
)}%) and nothing to auto-compact \u2014 DeepSeek will likely 400. Run /forget or /clear to start fresh.`
|
|
1967
|
+
};
|
|
1968
|
+
}
|
|
1969
|
+
}
|
|
1970
|
+
}
|
|
1921
1971
|
let assistantContent = "";
|
|
1922
1972
|
let reasoningContent = "";
|
|
1923
1973
|
let toolCalls = [];
|
|
@@ -2124,30 +2174,28 @@ var CacheFirstLoop = class {
|
|
|
2124
2174
|
const ratio = usage.promptTokens / ctxMax;
|
|
2125
2175
|
if (ratio > 0.6 && ratio <= 0.8) {
|
|
2126
2176
|
const before = usage.promptTokens;
|
|
2127
|
-
const soft = this.compact(
|
|
2177
|
+
const soft = this.compact(4e3);
|
|
2128
2178
|
if (soft.healedCount > 0) {
|
|
2129
|
-
const
|
|
2130
|
-
const after = Math.max(0, before - approxSaved);
|
|
2179
|
+
const after = Math.max(0, before - soft.tokensSaved);
|
|
2131
2180
|
yield {
|
|
2132
2181
|
turn: this._turn,
|
|
2133
2182
|
role: "warning",
|
|
2134
2183
|
content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} (${Math.round(
|
|
2135
2184
|
ratio * 100
|
|
2136
|
-
)}%) \u2014 proactively compacted ${soft.healedCount} tool result(s) to
|
|
2185
|
+
)}%) \u2014 proactively compacted ${soft.healedCount} tool result(s) to 4k tokens, saved ${soft.tokensSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Staying ahead of the 80% guard.`
|
|
2137
2186
|
};
|
|
2138
2187
|
}
|
|
2139
2188
|
}
|
|
2140
2189
|
}
|
|
2141
2190
|
if (usage && usage.promptTokens / ctxMax > 0.8) {
|
|
2142
2191
|
const before = usage.promptTokens;
|
|
2143
|
-
const compactResult = this.compact(
|
|
2192
|
+
const compactResult = this.compact(1e3);
|
|
2144
2193
|
if (compactResult.healedCount > 0) {
|
|
2145
|
-
const
|
|
2146
|
-
const after = before - approxSaved;
|
|
2194
|
+
const after = Math.max(0, before - compactResult.tokensSaved);
|
|
2147
2195
|
yield {
|
|
2148
2196
|
turn: this._turn,
|
|
2149
2197
|
role: "warning",
|
|
2150
|
-
content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved
|
|
2198
|
+
content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ${compactResult.tokensSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
|
|
2151
2199
|
};
|
|
2152
2200
|
} else {
|
|
2153
2201
|
yield {
|
|
@@ -2348,6 +2396,25 @@ function shrinkOversizedToolResults(messages, maxChars) {
|
|
|
2348
2396
|
});
|
|
2349
2397
|
return { messages: out, healedCount, healedFrom };
|
|
2350
2398
|
}
|
|
2399
|
+
function shrinkOversizedToolResultsByTokens(messages, maxTokens) {
|
|
2400
|
+
let healedCount = 0;
|
|
2401
|
+
let tokensSaved = 0;
|
|
2402
|
+
let charsSaved = 0;
|
|
2403
|
+
const out = messages.map((msg) => {
|
|
2404
|
+
if (msg.role !== "tool") return msg;
|
|
2405
|
+
const content = typeof msg.content === "string" ? msg.content : "";
|
|
2406
|
+
if (content.length <= maxTokens) return msg;
|
|
2407
|
+
const beforeTokens = countTokens(content);
|
|
2408
|
+
if (beforeTokens <= maxTokens) return msg;
|
|
2409
|
+
const truncated = truncateForModelByTokens(content, maxTokens);
|
|
2410
|
+
const afterTokens = countTokens(truncated);
|
|
2411
|
+
healedCount += 1;
|
|
2412
|
+
tokensSaved += Math.max(0, beforeTokens - afterTokens);
|
|
2413
|
+
charsSaved += Math.max(0, content.length - truncated.length);
|
|
2414
|
+
return { ...msg, content: truncated };
|
|
2415
|
+
});
|
|
2416
|
+
return { messages: out, healedCount, tokensSaved, charsSaved };
|
|
2417
|
+
}
|
|
2351
2418
|
function healLoadedMessages(messages, maxChars) {
|
|
2352
2419
|
const shrunk = shrinkOversizedToolResults(messages, maxChars);
|
|
2353
2420
|
let healedCount = shrunk.healedCount;
|