@librechat/agents 3.1.57 → 3.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +1 -1
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3810 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -1,8 +1,331 @@
1
1
  'use strict';
2
2
 
3
3
  var messages = require('@langchain/core/messages');
4
+ var truncation = require('../utils/truncation.cjs');
5
+ var contextPruningSettings = require('./contextPruningSettings.cjs');
4
6
  var _enum = require('../common/enum.cjs');
7
+ var contextPruning = require('./contextPruning.cjs');
5
8
 
9
+ function sumTokenCounts(tokenMap, count) {
10
+ let total = 0;
11
+ for (let i = 0; i < count; i++) {
12
+ total += tokenMap[i] ?? 0;
13
+ }
14
+ return total;
15
+ }
16
+ /** Default fraction of the token budget reserved as headroom (5 %). */
17
+ const DEFAULT_RESERVE_RATIO = 0.05;
18
+ /** Context pressure at which observation masking and context fading activate. */
19
+ const PRESSURE_THRESHOLD_MASKING = 0.8;
20
+ /** Pressure band thresholds paired with budget factors for progressive context fading. */
21
+ const PRESSURE_BANDS = [
22
+ [0.99, 0.05],
23
+ [0.9, 0.2],
24
+ [0.85, 0.5],
25
+ [0.8, 1.0],
26
+ ];
27
+ /** Maximum character length for masked (consumed) tool results. */
28
+ const MASKED_RESULT_MAX_CHARS = 300;
29
+ /** Hard cap for the originalToolContent store (~2 MB estimated from char length). */
30
+ const ORIGINAL_CONTENT_MAX_CHARS = 2_000_000;
31
+ /** Minimum cumulative calibration ratio — provider can't count fewer tokens
32
+ * than our raw estimate (within reason). Prevents divide-by-zero edge cases. */
33
+ const CALIBRATION_RATIO_MIN = 0.5;
34
+ /** Maximum cumulative calibration ratio — sanity cap for the running ratio. */
35
+ const CALIBRATION_RATIO_MAX = 5;
36
+ function getToolCallIds(message) {
37
+ if (message.getType() !== 'ai') {
38
+ return new Set();
39
+ }
40
+ const ids = new Set();
41
+ const aiMessage = message;
42
+ for (const toolCall of aiMessage.tool_calls ?? []) {
43
+ if (typeof toolCall.id === 'string' && toolCall.id.length > 0) {
44
+ ids.add(toolCall.id);
45
+ }
46
+ }
47
+ if (Array.isArray(aiMessage.content)) {
48
+ for (const part of aiMessage.content) {
49
+ if (typeof part !== 'object') {
50
+ continue;
51
+ }
52
+ const record = part;
53
+ if ((record.type === 'tool_use' || record.type === 'tool_call') &&
54
+ typeof record.id === 'string' &&
55
+ record.id.length > 0) {
56
+ ids.add(record.id);
57
+ }
58
+ }
59
+ }
60
+ return ids;
61
+ }
62
+ function getToolResultId(message) {
63
+ if (message.getType() !== 'tool') {
64
+ return null;
65
+ }
66
+ const toolMessage = message;
67
+ if (typeof toolMessage.tool_call_id === 'string' &&
68
+ toolMessage.tool_call_id.length > 0) {
69
+ return toolMessage.tool_call_id;
70
+ }
71
+ if (typeof toolMessage.toolCallId === 'string' &&
72
+ toolMessage.toolCallId.length > 0) {
73
+ return toolMessage.toolCallId;
74
+ }
75
+ return null;
76
+ }
77
+ function resolveTokenCountForMessage({ message, messageIndexMap, tokenCounter, indexTokenCountMap, }) {
78
+ const originalIndex = messageIndexMap.get(message) ?? -1;
79
+ if (originalIndex > -1 && indexTokenCountMap[originalIndex] != null) {
80
+ return indexTokenCountMap[originalIndex];
81
+ }
82
+ return tokenCounter(message);
83
+ }
84
+ function repairOrphanedToolMessages({ context, allMessages, tokenCounter, indexTokenCountMap, }) {
85
+ const messageIndexMap = new Map();
86
+ for (let i = 0; i < allMessages.length; i++) {
87
+ messageIndexMap.set(allMessages[i], i);
88
+ }
89
+ const validToolCallIds = new Set();
90
+ const presentToolResultIds = new Set();
91
+ for (const message of context) {
92
+ for (const id of getToolCallIds(message)) {
93
+ validToolCallIds.add(id);
94
+ }
95
+ const resultId = getToolResultId(message);
96
+ if (resultId != null) {
97
+ presentToolResultIds.add(resultId);
98
+ }
99
+ }
100
+ let reclaimedTokens = 0;
101
+ let droppedOrphanCount = 0;
102
+ const repairedContext = [];
103
+ const droppedMessages = [];
104
+ for (const message of context) {
105
+ if (message.getType() === 'tool') {
106
+ const toolResultId = getToolResultId(message);
107
+ if (toolResultId == null || !validToolCallIds.has(toolResultId)) {
108
+ droppedOrphanCount += 1;
109
+ reclaimedTokens += resolveTokenCountForMessage({
110
+ message,
111
+ tokenCounter,
112
+ messageIndexMap,
113
+ indexTokenCountMap,
114
+ });
115
+ droppedMessages.push(message);
116
+ continue;
117
+ }
118
+ repairedContext.push(message);
119
+ continue;
120
+ }
121
+ if (message.getType() === 'ai' && message instanceof messages.AIMessage) {
122
+ const toolCallIds = getToolCallIds(message);
123
+ if (toolCallIds.size > 0) {
124
+ let hasOrphanToolCalls = false;
125
+ for (const id of toolCallIds) {
126
+ if (!presentToolResultIds.has(id)) {
127
+ hasOrphanToolCalls = true;
128
+ break;
129
+ }
130
+ }
131
+ if (hasOrphanToolCalls) {
132
+ const originalTokens = resolveTokenCountForMessage({
133
+ message,
134
+ messageIndexMap,
135
+ tokenCounter,
136
+ indexTokenCountMap,
137
+ });
138
+ const stripped = stripOrphanToolUseBlocks(message, presentToolResultIds);
139
+ if (stripped != null) {
140
+ const strippedTokens = tokenCounter(stripped);
141
+ reclaimedTokens += originalTokens - strippedTokens;
142
+ repairedContext.push(stripped);
143
+ }
144
+ else {
145
+ droppedOrphanCount += 1;
146
+ reclaimedTokens += originalTokens;
147
+ droppedMessages.push(message);
148
+ }
149
+ continue;
150
+ }
151
+ }
152
+ }
153
+ repairedContext.push(message);
154
+ }
155
+ return {
156
+ context: repairedContext,
157
+ reclaimedTokens,
158
+ droppedOrphanCount,
159
+ droppedMessages,
160
+ };
161
+ }
162
+ /**
163
+ * Strips tool_use content blocks and tool_calls entries from an AI message
164
+ * when their corresponding ToolMessages are not in the context.
165
+ * Returns null if the message has no content left after stripping.
166
+ */
167
+ function stripOrphanToolUseBlocks(message, presentToolResultIds) {
168
+ const keptToolCalls = (message.tool_calls ?? []).filter((tc) => typeof tc.id === 'string' && presentToolResultIds.has(tc.id));
169
+ let keptContent;
170
+ if (Array.isArray(message.content)) {
171
+ const filtered = message.content.filter((block) => {
172
+ if (typeof block !== 'object') {
173
+ return true;
174
+ }
175
+ const record = block;
176
+ if ((record.type === 'tool_use' || record.type === 'tool_call') &&
177
+ typeof record.id === 'string') {
178
+ return presentToolResultIds.has(record.id);
179
+ }
180
+ return true;
181
+ });
182
+ if (filtered.length === 0) {
183
+ return null;
184
+ }
185
+ keptContent = filtered;
186
+ }
187
+ else {
188
+ keptContent = message.content;
189
+ }
190
+ return new messages.AIMessage({
191
+ ...message,
192
+ content: keptContent,
193
+ tool_calls: keptToolCalls.length > 0 ? keptToolCalls : undefined,
194
+ });
195
+ }
196
+ /**
197
+ * Lightweight structural cleanup: strips orphan tool_use blocks from AI messages
198
+ * and drops orphan ToolMessages whose AI counterpart is missing.
199
+ *
200
+ * Unlike `repairOrphanedToolMessages`, this does NOT track tokens — it is
201
+ * intended as a final safety net in Graph.ts right before model invocation
202
+ * to prevent Anthropic/Bedrock structural validation errors.
203
+ *
204
+ * Uses duck-typing instead of `getType()` because messages at this stage
205
+ * may be plain objects (from LangGraph state serialization) rather than
206
+ * proper BaseMessage class instances.
207
+ *
208
+ * Includes a fast-path: if every tool_call has a matching tool_result and
209
+ * vice-versa, the original array is returned immediately with zero allocation.
210
+ */
211
+ function sanitizeOrphanToolBlocks(messages$1) {
212
+ const allToolCallIds = new Set();
213
+ const allToolResultIds = new Set();
214
+ for (const msg of messages$1) {
215
+ const msgAny = msg;
216
+ const toolCalls = msgAny.tool_calls;
217
+ if (Array.isArray(toolCalls)) {
218
+ for (const tc of toolCalls) {
219
+ if (typeof tc.id === 'string' &&
220
+ tc.id.length > 0 &&
221
+ !tc.id.startsWith(_enum.Constants.ANTHROPIC_SERVER_TOOL_PREFIX)) {
222
+ allToolCallIds.add(tc.id);
223
+ }
224
+ }
225
+ }
226
+ if (Array.isArray(msgAny.content)) {
227
+ for (const block of msgAny.content) {
228
+ if (typeof block === 'object' &&
229
+ (block.type === 'tool_use' || block.type === 'tool_call') &&
230
+ typeof block.id === 'string' &&
231
+ !block.id.startsWith(_enum.Constants.ANTHROPIC_SERVER_TOOL_PREFIX)) {
232
+ allToolCallIds.add(block.id);
233
+ }
234
+ }
235
+ }
236
+ const toolCallId = msgAny.tool_call_id;
237
+ if (typeof toolCallId === 'string' && toolCallId.length > 0) {
238
+ allToolResultIds.add(toolCallId);
239
+ }
240
+ }
241
+ let hasOrphans = false;
242
+ for (const id of allToolCallIds) {
243
+ if (!allToolResultIds.has(id)) {
244
+ hasOrphans = true;
245
+ break;
246
+ }
247
+ }
248
+ if (!hasOrphans) {
249
+ for (const id of allToolResultIds) {
250
+ if (!allToolCallIds.has(id)) {
251
+ hasOrphans = true;
252
+ break;
253
+ }
254
+ }
255
+ }
256
+ if (!hasOrphans) {
257
+ return messages$1;
258
+ }
259
+ const result = [];
260
+ const strippedAiIndices = new Set();
261
+ for (const msg of messages$1) {
262
+ const msgAny = msg;
263
+ const msgType = typeof msg.getType === 'function'
264
+ ? msg.getType()
265
+ : (msgAny.role ??
266
+ msgAny._type);
267
+ const toolCallId = msgAny.tool_call_id;
268
+ if ((msgType === 'tool' || msg instanceof messages.ToolMessage) &&
269
+ typeof toolCallId === 'string' &&
270
+ !allToolCallIds.has(toolCallId)) {
271
+ continue;
272
+ }
273
+ const toolCalls = msgAny.tool_calls;
274
+ if ((msgType === 'ai' ||
275
+ msgType === 'assistant' ||
276
+ msg instanceof messages.AIMessage) &&
277
+ Array.isArray(toolCalls) &&
278
+ toolCalls.length > 0) {
279
+ const hasOrphanCalls = toolCalls.some((tc) => typeof tc.id === 'string' && !allToolResultIds.has(tc.id));
280
+ if (hasOrphanCalls) {
281
+ if (msg instanceof messages.AIMessage) {
282
+ const stripped = stripOrphanToolUseBlocks(msg, allToolResultIds);
283
+ if (stripped != null) {
284
+ strippedAiIndices.add(result.length);
285
+ result.push(stripped);
286
+ }
287
+ continue;
288
+ }
289
+ const keptToolCalls = toolCalls.filter((tc) => typeof tc.id === 'string' && allToolResultIds.has(tc.id));
290
+ const keptContent = Array.isArray(msgAny.content)
291
+ ? msgAny.content.filter((block) => {
292
+ if (typeof block !== 'object')
293
+ return true;
294
+ if ((block.type === 'tool_use' || block.type === 'tool_call') &&
295
+ typeof block.id === 'string') {
296
+ return allToolResultIds.has(block.id);
297
+ }
298
+ return true;
299
+ })
300
+ : msgAny.content;
301
+ if (keptToolCalls.length === 0 &&
302
+ Array.isArray(keptContent) &&
303
+ keptContent.length === 0) {
304
+ continue;
305
+ }
306
+ strippedAiIndices.add(result.length);
307
+ const patched = Object.create(Object.getPrototypeOf(msg), Object.getOwnPropertyDescriptors(msg));
308
+ patched.tool_calls = keptToolCalls.length > 0 ? keptToolCalls : [];
309
+ patched.content = keptContent;
310
+ result.push(patched);
311
+ continue;
312
+ }
313
+ }
314
+ result.push(msg);
315
+ }
316
+ // Bedrock/Anthropic require the conversation to end with a user message;
317
+ // a stripped AI message (tool_use removed) represents a dead-end exchange.
318
+ while (result.length > 0 && strippedAiIndices.has(result.length - 1)) {
319
+ result.pop();
320
+ }
321
+ return result;
322
+ }
323
+ /**
324
+ * Truncates an oversized tool_use `input` field using head+tail, preserving
325
+ * it as a valid JSON object. Head gets ~70%, tail gets ~30% so the model
326
+ * sees both the beginning (what was called) and end (closing structure/values).
327
+ * Falls back to head-only when the budget is too small for a meaningful tail.
328
+ */
6
329
  function isIndexInContext(arrayA, arrayB, targetIndex) {
7
330
  const startingIndexInA = arrayA.length - arrayB.length;
8
331
  return targetIndex >= startingIndexInA;
@@ -36,8 +359,14 @@ function calculateTotalTokens(usage) {
36
359
  const baseInputTokens = Number(usage.input_tokens) || 0;
37
360
  const cacheCreation = Number(usage.input_token_details?.cache_creation) || 0;
38
361
  const cacheRead = Number(usage.input_token_details?.cache_read) || 0;
39
- const totalInputTokens = baseInputTokens + cacheCreation + cacheRead;
40
362
  const totalOutputTokens = Number(usage.output_tokens) || 0;
363
+ const cacheSum = cacheCreation + cacheRead;
364
+ // Anthropic: input_tokens excludes cache, cache_read can be much larger than input_tokens.
365
+ // OpenAI: input_tokens includes cache, cache_read is always <= input_tokens.
366
+ const cacheIsAdditive = cacheSum > 0 && cacheSum > baseInputTokens;
367
+ const totalInputTokens = cacheIsAdditive
368
+ ? baseInputTokens + cacheSum
369
+ : baseInputTokens;
41
370
  return {
42
371
  input_tokens: totalInputTokens,
43
372
  output_tokens: totalOutputTokens,
@@ -51,12 +380,12 @@ function calculateTotalTokens(usage) {
51
380
  * @param options Configuration options for processing messages
52
381
  * @returns Object containing the message context, remaining tokens, messages not included, and summary index
53
382
  */
54
- function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, indexTokenCountMap, startType: _startType, thinkingEnabled, tokenCounter, thinkingStartIndex: _thinkingStartIndex = -1, reasoningType = _enum.ContentTypes.THINKING, }) {
383
+ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, indexTokenCountMap, startType: _startType, thinkingEnabled, tokenCounter, thinkingStartIndex: _thinkingStartIndex = -1, reasoningType = _enum.ContentTypes.THINKING, instructionTokens: _instructionTokens = 0, }) {
55
384
  // Every reply is primed with <|start|>assistant<|message|>, so we
56
385
  // start with 3 tokens for the label after all messages have been counted.
57
386
  let currentTokenCount = 3;
58
387
  const instructions = _messages[0]?.getType() === 'system' ? _messages[0] : undefined;
59
- const instructionsTokenCount = instructions != null ? (indexTokenCountMap[0] ?? 0) : 0;
388
+ const instructionsTokenCount = instructions != null ? (indexTokenCountMap[0] ?? 0) : _instructionTokens;
60
389
  const initialContextTokens = maxContextTokens - instructionsTokenCount;
61
390
  let remainingContextTokens = initialContextTokens;
62
391
  let startType = _startType;
@@ -154,6 +483,16 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
154
483
  context.push(_messages[0]);
155
484
  messages$1.shift();
156
485
  }
486
+ // The backward iteration pushed messages in reverse chronological order
487
+ // (newest first). Restore correct chronological order before prepending
488
+ // the remaining (older) messages so that messagesToRefine is always
489
+ // ordered oldest → newest. Without this, callers that rely on
490
+ // messagesToRefine order (e.g. the summarization node extracting the
491
+ // latest turn) would see tool_use/tool_result pairs in the wrong order.
492
+ prunedMemory.reverse();
493
+ if (messages$1.length > 0) {
494
+ prunedMemory.unshift(...messages$1);
495
+ }
157
496
  remainingContextTokens -= currentTokenCount;
158
497
  const result = {
159
498
  remainingContextTokens,
@@ -167,7 +506,6 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
167
506
  thinkingEndIndex < 0 ||
168
507
  (thinkingStartIndex > -1 &&
169
508
  isIndexInContext(_messages, context, thinkingStartIndex))) {
170
- // we reverse at this step to ensure the context is in the correct order for the model, and we need to work backwards
171
509
  result.context = context.reverse();
172
510
  return result;
173
511
  }
@@ -177,9 +515,6 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
177
515
  if (!thinkingBlock) {
178
516
  throw new Error('The payload is malformed. There is a thinking sequence but no thinking block found.');
179
517
  }
180
- // Since we have a thinking sequence, we need to find the last assistant message
181
- // in the latest AI/tool sequence to add the thinking block that falls outside of the current context
182
- // Latest messages are ordered first.
183
518
  let assistantIndex = -1;
184
519
  for (let i = 0; i < context.length; i++) {
185
520
  const currentMessage = context[i];
@@ -192,7 +527,10 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
192
527
  }
193
528
  }
194
529
  if (assistantIndex === -1) {
195
- throw new Error('Context window exceeded: aggressive pruning removed all AI messages (likely due to an oversized tool response). Increase max context tokens or reduce tool output size.');
530
+ // No AI messages survived pruning skip thinking block reattachment.
531
+ // The caller handles empty/insufficient context via overflow recovery.
532
+ result.context = context.reverse();
533
+ return result;
196
534
  }
197
535
  thinkingStartIndex = originalLength - 1 - assistantIndex;
198
536
  const thinkingTokenCount = tokenCounter(new messages.AIMessage({ content: [thinkingBlock] }));
@@ -204,7 +542,6 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
204
542
  return result;
205
543
  }
206
544
  const thinkingMessage = context[assistantIndex];
207
- // now we need to an additional round of pruning but making the thinking block fit
208
545
  const newThinkingMessageTokenCount = (indexTokenCountMap[thinkingStartIndex] ?? 0) + thinkingTokenCount;
209
546
  remainingContextTokens = initialContextTokens - newThinkingMessageTokenCount;
210
547
  currentTokenCount = 3;
@@ -269,13 +606,271 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
269
606
  function checkValidNumber(value) {
270
607
  return typeof value === 'number' && !isNaN(value) && value > 0;
271
608
  }
609
+ /**
610
+ * Observation masking: replaces consumed ToolMessage content with tight
611
+ * head+tail truncations that serve as informative placeholders.
612
+ *
613
+ * A ToolMessage is "consumed" when a subsequent AI message exists that is NOT
614
+ * purely tool calls — meaning the model has already read and acted on the
615
+ * result. Unconsumed results (the latest tool outputs the model hasn't
616
+ * responded to yet) are left intact so the model can still use them.
617
+ *
618
+ * AI messages are never masked — they contain the model's own reasoning and
619
+ * conclusions, which is what prevents the model from repeating work after
620
+ * its tool results are masked.
621
+ *
622
+ * @returns The number of tool messages that were masked.
623
+ */
624
+ function maskConsumedToolResults(params) {
625
+ const { messages: messages$1, indexTokenCountMap, tokenCounter } = params;
626
+ let maskedCount = 0;
627
+ // Pass 1 (backward): identify consumed tool message indices.
628
+ // A ToolMessage is "consumed" once we've seen a subsequent AI message with
629
+ // substantive text content (not just tool calls).
630
+ // Collected in forward order (oldest first) for recency weighting.
631
+ let seenNonToolCallAI = false;
632
+ const consumedIndices = [];
633
+ for (let i = messages$1.length - 1; i >= 0; i--) {
634
+ const msg = messages$1[i];
635
+ const type = msg.getType();
636
+ if (type === 'ai') {
637
+ const hasText = typeof msg.content === 'string'
638
+ ? msg.content.trim().length > 0
639
+ : Array.isArray(msg.content) &&
640
+ msg.content.some((b) => typeof b === 'object' &&
641
+ b.type === 'text' &&
642
+ typeof b.text === 'string' &&
643
+ b.text.trim().length >
644
+ 0);
645
+ if (hasText) {
646
+ seenNonToolCallAI = true;
647
+ }
648
+ }
649
+ else if (type === 'tool' && seenNonToolCallAI) {
650
+ consumedIndices.push(i);
651
+ }
652
+ }
653
+ if (consumedIndices.length === 0) {
654
+ return 0;
655
+ }
656
+ consumedIndices.reverse();
657
+ const totalBudgetChars = params.availableRawBudget != null && params.availableRawBudget > 0
658
+ ? params.availableRawBudget * 4
659
+ : 0;
660
+ const count = consumedIndices.length;
661
+ for (let c = 0; c < count; c++) {
662
+ const i = consumedIndices[c];
663
+ const message = messages$1[i];
664
+ const content = message.content;
665
+ if (typeof content !== 'string') {
666
+ continue;
667
+ }
668
+ let maxChars;
669
+ if (totalBudgetChars > 0) {
670
+ const position = count > 1 ? c / (count - 1) : 1;
671
+ const weight = 0.2 + 0.8 * position;
672
+ const totalWeight = count > 1 ? 0.6 * count : 1;
673
+ const share = (weight / totalWeight) * totalBudgetChars;
674
+ maxChars = Math.max(MASKED_RESULT_MAX_CHARS, Math.floor(share));
675
+ }
676
+ else {
677
+ maxChars = MASKED_RESULT_MAX_CHARS;
678
+ }
679
+ if (content.length <= maxChars) {
680
+ continue;
681
+ }
682
+ if (params.originalContentStore && !params.originalContentStore.has(i)) {
683
+ params.originalContentStore.set(i, content);
684
+ if (params.onContentStored) {
685
+ params.onContentStored(content.length);
686
+ }
687
+ }
688
+ const cloned = new messages.ToolMessage({
689
+ content: truncation.truncateToolResultContent(content, maxChars),
690
+ tool_call_id: message.tool_call_id,
691
+ name: message.name,
692
+ id: message.id,
693
+ additional_kwargs: message.additional_kwargs,
694
+ response_metadata: message.response_metadata,
695
+ });
696
+ messages$1[i] = cloned;
697
+ indexTokenCountMap[i] = tokenCounter(cloned);
698
+ maskedCount++;
699
+ }
700
+ return maskedCount;
701
+ }
702
+ /**
703
+ * Pre-flight truncation: truncates oversized ToolMessage content before the
704
+ * main backward-iteration pruning runs. Unlike the ingestion guard (which caps
705
+ * at tool-execution time), pre-flight truncation applies per-turn based on the
706
+ * current context window budget (which may have shrunk due to growing conversation).
707
+ *
708
+ * After truncation, recounts tokens via tokenCounter and updates indexTokenCountMap
709
+ * so subsequent pruning works with accurate counts.
710
+ *
711
+ * @returns The number of tool messages that were truncated.
712
+ */
713
+ function preFlightTruncateToolResults(params) {
714
+ const { messages: messages$1, maxContextTokens, indexTokenCountMap, tokenCounter } = params;
715
+ const baseMaxChars = truncation.calculateMaxToolResultChars(maxContextTokens);
716
+ let truncatedCount = 0;
717
+ const toolIndices = [];
718
+ for (let i = 0; i < messages$1.length; i++) {
719
+ if (messages$1[i].getType() === 'tool') {
720
+ toolIndices.push(i);
721
+ }
722
+ }
723
+ for (let t = 0; t < toolIndices.length; t++) {
724
+ const i = toolIndices[t];
725
+ const message = messages$1[i];
726
+ const content = message.content;
727
+ if (typeof content !== 'string') {
728
+ continue;
729
+ }
730
+ const position = toolIndices.length > 1 ? t / (toolIndices.length - 1) : 1;
731
+ const recencyFactor = 0.2 + 0.8 * position;
732
+ const maxChars = Math.max(200, Math.floor(baseMaxChars * recencyFactor));
733
+ if (content.length <= maxChars) {
734
+ continue;
735
+ }
736
+ const truncated = truncation.truncateToolResultContent(content, maxChars);
737
+ const cloned = new messages.ToolMessage({
738
+ content: truncated,
739
+ tool_call_id: message.tool_call_id,
740
+ name: message.name,
741
+ id: message.id,
742
+ additional_kwargs: message.additional_kwargs,
743
+ response_metadata: message.response_metadata,
744
+ });
745
+ messages$1[i] = cloned;
746
+ indexTokenCountMap[i] = tokenCounter(cloned);
747
+ truncatedCount++;
748
+ }
749
+ return truncatedCount;
750
+ }
751
+ /**
752
+ * Pre-flight truncation: truncates oversized `tool_use` input fields in AI messages.
753
+ *
754
+ * Tool call inputs (arguments) can be very large — e.g., code evaluation payloads from
755
+ * MCP tools like chrome-devtools. Since these tool calls have already been executed,
756
+ * the model only needs a summary of what was called, not the full arguments. Truncating
757
+ * them before pruning can prevent entire messages from being dropped.
758
+ *
759
+ * Uses 15% of the context window (in estimated characters, ~4 chars/token) as the
760
+ * per-input cap, capped at 200K chars.
761
+ *
762
+ * @returns The number of AI messages that had tool_use inputs truncated.
763
+ */
764
+ function preFlightTruncateToolCallInputs(params) {
765
+ const { messages: messages$1, maxContextTokens, indexTokenCountMap, tokenCounter } = params;
766
+ const maxInputChars = Math.min(Math.floor(maxContextTokens * 0.15) * 4, 200_000);
767
+ let truncatedCount = 0;
768
+ for (let i = 0; i < messages$1.length; i++) {
769
+ const message = messages$1[i];
770
+ if (message.getType() !== 'ai') {
771
+ continue;
772
+ }
773
+ if (!Array.isArray(message.content)) {
774
+ continue;
775
+ }
776
+ const originalContent = message.content;
777
+ const state = { changed: false };
778
+ const newContent = originalContent.map((block) => {
779
+ if (typeof block !== 'object') {
780
+ return block;
781
+ }
782
+ const record = block;
783
+ if (record.type !== 'tool_use' && record.type !== 'tool_call') {
784
+ return block;
785
+ }
786
+ const input = record.input;
787
+ if (input == null) {
788
+ return block;
789
+ }
790
+ const serialized = typeof input === 'string' ? input : JSON.stringify(input);
791
+ if (serialized.length <= maxInputChars) {
792
+ return block;
793
+ }
794
+ state.changed = true;
795
+ // Replaces original input with { _truncated, _originalChars } —
796
+ // safe because the tool call already executed in a prior turn.
797
+ return {
798
+ ...record,
799
+ input: truncation.truncateToolInput(serialized, maxInputChars),
800
+ };
801
+ });
802
+ if (!state.changed) {
803
+ continue;
804
+ }
805
+ const aiMsg = message;
806
+ const newToolCalls = (aiMsg.tool_calls ?? []).map((tc) => {
807
+ const serializedArgs = JSON.stringify(tc.args);
808
+ if (serializedArgs.length <= maxInputChars) {
809
+ return tc;
810
+ }
811
+ // Replaces original args with { _truncated, _originalChars } —
812
+ // safe because the tool call already executed in a prior turn.
813
+ return {
814
+ ...tc,
815
+ args: truncation.truncateToolInput(serializedArgs, maxInputChars),
816
+ };
817
+ });
818
+ messages$1[i] = new messages.AIMessage({
819
+ ...aiMsg,
820
+ content: newContent,
821
+ tool_calls: newToolCalls.length > 0 ? newToolCalls : undefined,
822
+ });
823
+ indexTokenCountMap[i] = tokenCounter(messages$1[i]);
824
+ truncatedCount++;
825
+ }
826
+ return truncatedCount;
827
+ }
272
828
  function createPruneMessages(factoryParams) {
273
829
  const indexTokenCountMap = { ...factoryParams.indexTokenCountMap };
274
830
  let lastTurnStartIndex = factoryParams.startIndex;
275
831
  let lastCutOffIndex = 0;
276
- let totalTokens = Object.values(indexTokenCountMap).reduce((a = 0, b = 0) => a + b, 0);
832
+ let totalTokens = 0;
833
+ for (const key in indexTokenCountMap) {
834
+ totalTokens += indexTokenCountMap[key] ?? 0;
835
+ }
277
836
  let runThinkingStartIndex = -1;
837
+ /** Cumulative raw tiktoken tokens we've sent to the provider (messages only,
838
+ * excludes instruction overhead and new outputs not yet seen by provider). */
839
+ let cumulativeRawSent = 0;
840
+ /** Cumulative provider-reported message tokens (providerInput - instructionOverhead). */
841
+ let cumulativeProviderReported = 0;
842
+ /** Stable calibration ratio = cumulativeProviderReported / cumulativeRawSent.
843
+ * Converges monotonically as data accumulates. Falls back to seeded value. */
844
+ let calibrationRatio = factoryParams.calibrationRatio != null && factoryParams.calibrationRatio > 0
845
+ ? factoryParams.calibrationRatio
846
+ : 1;
847
+ /** Best observed instruction overhead from a near-zero variance turn.
848
+ * Self-seeds from provider observations within the run. */
849
+ let bestInstructionOverhead;
850
+ let bestVarianceAbs = Infinity;
851
+ /** Local estimate at the time bestInstructionOverhead was observed.
852
+ * Used to invalidate the cached overhead when instructions change
853
+ * mid-run (e.g. tool discovery adds tools to the bound set). */
854
+ let bestInstructionEstimate;
855
+ /** Original (pre-masking) tool result content keyed by message index.
856
+ * Allows the summarizer to see full tool outputs even after masking
857
+ * has truncated them in the live message array. Cleared when the
858
+ * pruner is recreated after summarization. */
859
+ const originalToolContent = new Map();
860
+ let originalToolContentSize = 0;
861
+ const contextPruningSettings$1 = contextPruningSettings.resolveContextPruningSettings(factoryParams.contextPruningConfig);
278
862
  return function pruneMessages(params) {
863
+ if (params.messages.length === 0) {
864
+ return {
865
+ context: [],
866
+ indexTokenCountMap,
867
+ messagesToRefine: [],
868
+ prePruneContextTokens: 0,
869
+ remainingContextTokens: factoryParams.maxTokens,
870
+ calibrationRatio,
871
+ resolvedInstructionOverhead: bestInstructionOverhead,
872
+ };
873
+ }
279
874
  if (factoryParams.provider === _enum.Providers.OPENAI &&
280
875
  factoryParams.thinkingEnabled === true) {
281
876
  for (let i = lastTurnStartIndex; i < params.messages.length; i++) {
@@ -312,70 +907,309 @@ function createPruneMessages(factoryParams) {
312
907
  checkValidNumber(params.usageMetadata.input_token_details.cache_read)))) &&
313
908
  checkValidNumber(params.usageMetadata.output_tokens)) {
314
909
  currentUsage = calculateTotalTokens(params.usageMetadata);
315
- totalTokens = currentUsage.total_tokens;
316
910
  }
317
911
  const newOutputs = new Set();
912
+ let outputTokensAssigned = false;
318
913
  for (let i = lastTurnStartIndex; i < params.messages.length; i++) {
319
914
  const message = params.messages[i];
320
- if (i === lastTurnStartIndex &&
321
- indexTokenCountMap[i] === undefined &&
322
- currentUsage) {
915
+ if (indexTokenCountMap[i] !== undefined) {
916
+ continue;
917
+ }
918
+ // Assign output_tokens to the first uncounted AI message — this is the
919
+ // model's response. Previous code blindly targeted lastTurnStartIndex
920
+ // which could hit a pre-counted HumanMessage or miss the AI entirely.
921
+ if (!outputTokensAssigned && currentUsage && message.getType() === 'ai') {
323
922
  indexTokenCountMap[i] = currentUsage.output_tokens;
923
+ newOutputs.add(i);
924
+ outputTokensAssigned = true;
324
925
  }
325
- else if (indexTokenCountMap[i] === undefined) {
926
+ else {
927
+ // Always store raw tiktoken count — the map stays in raw space.
928
+ // Budget decisions multiply by calibrationRatio on the fly.
326
929
  indexTokenCountMap[i] = factoryParams.tokenCounter(message);
327
930
  if (currentUsage) {
328
931
  newOutputs.add(i);
329
932
  }
330
- totalTokens += indexTokenCountMap[i] ?? 0;
331
933
  }
934
+ totalTokens += indexTokenCountMap[i] ?? 0;
332
935
  }
333
- // If `currentUsage` is defined, we need to distribute the current total tokens to our `indexTokenCountMap`,
334
- // We must distribute it in a weighted manner, so that the total token count is equal to `currentUsage.total_tokens`,
335
- // relative the manually counted tokens in `indexTokenCountMap`.
336
- // EDGE CASE: when the resulting context gets pruned, we should not distribute the usage for messages that are not in the context.
337
- if (currentUsage) {
338
- let totalIndexTokens = 0;
339
- if (params.messages[0].getType() === 'system') {
340
- totalIndexTokens += indexTokenCountMap[0] ?? 0;
936
+ // Cumulative calibration: accumulate raw tiktoken tokens and provider-
937
+ // reported tokens across turns. The ratio of the two running totals
938
+ // converges monotonically to the true provider multiplier no EMA,
939
+ // no per-turn oscillation, no map mutation.
940
+ if (currentUsage && params.totalTokensFresh !== false) {
941
+ const instructionOverhead = factoryParams.getInstructionTokens?.() ?? 0;
942
+ const providerInputTokens = params.lastCallUsage?.inputTokens ?? currentUsage.input_tokens;
943
+ // Sum raw tiktoken counts for messages the provider saw (excludes
944
+ // new outputs from this turn — the provider hasn't seen them yet).
945
+ let rawSentThisTurn = 0;
946
+ const firstIsSystem = params.messages.length > 0 && params.messages[0].getType() === 'system';
947
+ if (firstIsSystem) {
948
+ rawSentThisTurn += indexTokenCountMap[0] ?? 0;
341
949
  }
342
950
  for (let i = lastCutOffIndex; i < params.messages.length; i++) {
343
- if (i === 0 && params.messages[0].getType() === 'system') {
951
+ if ((i === 0 && firstIsSystem) || newOutputs.has(i)) {
344
952
  continue;
345
953
  }
346
- if (newOutputs.has(i)) {
347
- continue;
348
- }
349
- totalIndexTokens += indexTokenCountMap[i] ?? 0;
350
- }
351
- // Calculate ratio based only on messages that remain in the context
352
- const ratio = currentUsage.total_tokens / totalIndexTokens;
353
- const isRatioSafe = ratio >= 1 / 3 && ratio <= 2.5;
354
- // Apply the ratio adjustment only to messages at or after lastCutOffIndex, and only if the ratio is safe
355
- if (isRatioSafe) {
356
- if (params.messages[0].getType() === 'system' &&
357
- lastCutOffIndex !== 0) {
358
- indexTokenCountMap[0] = Math.round((indexTokenCountMap[0] ?? 0) * ratio);
359
- }
360
- for (let i = lastCutOffIndex; i < params.messages.length; i++) {
361
- if (newOutputs.has(i)) {
362
- continue;
954
+ rawSentThisTurn += indexTokenCountMap[i] ?? 0;
955
+ }
956
+ const providerMessageTokens = Math.max(0, providerInputTokens - instructionOverhead);
957
+ if (rawSentThisTurn > 0 && providerMessageTokens > 0) {
958
+ cumulativeRawSent += rawSentThisTurn;
959
+ cumulativeProviderReported += providerMessageTokens;
960
+ const newRatio = cumulativeProviderReported / cumulativeRawSent;
961
+ calibrationRatio = Math.max(CALIBRATION_RATIO_MIN, Math.min(CALIBRATION_RATIO_MAX, newRatio));
962
+ }
963
+ const calibratedOurTotal = instructionOverhead + rawSentThisTurn * calibrationRatio;
964
+ const overallRatio = calibratedOurTotal > 0 ? providerInputTokens / calibratedOurTotal : 0;
965
+ const variancePct = Math.round((overallRatio - 1) * 100);
966
+ const absVariance = Math.abs(overallRatio - 1);
967
+ if (absVariance < bestVarianceAbs && rawSentThisTurn > 0) {
968
+ bestVarianceAbs = absVariance;
969
+ bestInstructionOverhead = Math.max(0, Math.round(providerInputTokens - rawSentThisTurn * calibrationRatio));
970
+ bestInstructionEstimate = factoryParams.getInstructionTokens?.() ?? 0;
971
+ }
972
+ factoryParams.log?.('debug', 'Calibration observed', {
973
+ providerInputTokens,
974
+ calibratedEstimate: Math.round(calibratedOurTotal),
975
+ variance: `${variancePct > 0 ? '+' : ''}${variancePct}%`,
976
+ calibrationRatio: Math.round(calibrationRatio * 100) / 100,
977
+ instructionOverhead,
978
+ cumulativeRawSent,
979
+ cumulativeProviderReported,
980
+ });
981
+ }
982
+ // Computed BEFORE pre-flight truncation so the effective budget can drive
983
+ // truncation thresholds — without this, thresholds based on maxTokens are
984
+ // too generous and leave individual messages larger than the actual budget.
985
+ const estimatedInstructionTokens = factoryParams.getInstructionTokens?.() ?? 0;
986
+ const estimateStable = bestInstructionEstimate != null &&
987
+ bestInstructionEstimate > 0 &&
988
+ Math.abs(estimatedInstructionTokens - bestInstructionEstimate) /
989
+ bestInstructionEstimate <
990
+ 0.1;
991
+ const currentInstructionTokens = bestInstructionOverhead != null &&
992
+ bestInstructionOverhead <= estimatedInstructionTokens &&
993
+ estimateStable
994
+ ? bestInstructionOverhead
995
+ : estimatedInstructionTokens;
996
+ const reserveRatio = factoryParams.reserveRatio ?? DEFAULT_RESERVE_RATIO;
997
+ const reserveTokens = reserveRatio > 0 && reserveRatio < 1
998
+ ? Math.round(factoryParams.maxTokens * reserveRatio)
999
+ : 0;
1000
+ const pruningBudget = factoryParams.maxTokens - reserveTokens;
1001
+ const effectiveMaxTokens = Math.max(0, pruningBudget - currentInstructionTokens);
1002
+ let calibratedTotalTokens = Math.round(totalTokens * calibrationRatio);
1003
+ factoryParams.log?.('debug', 'Budget', {
1004
+ maxTokens: factoryParams.maxTokens,
1005
+ pruningBudget,
1006
+ effectiveMax: effectiveMaxTokens,
1007
+ instructionTokens: currentInstructionTokens,
1008
+ messageCount: params.messages.length,
1009
+ calibratedTotalTokens,
1010
+ calibrationRatio: Math.round(calibrationRatio * 100) / 100,
1011
+ });
1012
+ // When instructions alone consume the entire budget, no message can
1013
+ // fit regardless of truncation. Short-circuit: yield all messages for
1014
+ // summarization and return an empty context so the Graph can route to
1015
+ // the summarize node immediately instead of falling through to the
1016
+ // emergency path that would reach the same outcome more expensively.
1017
+ if (effectiveMaxTokens === 0 &&
1018
+ factoryParams.summarizationEnabled === true &&
1019
+ params.messages.length > 0) {
1020
+ factoryParams.log?.('warn', 'Instructions consume entire budget — yielding all messages for summarization', {
1021
+ instructionTokens: currentInstructionTokens,
1022
+ pruningBudget,
1023
+ messageCount: params.messages.length,
1024
+ });
1025
+ lastTurnStartIndex = params.messages.length;
1026
+ return {
1027
+ context: [],
1028
+ indexTokenCountMap,
1029
+ messagesToRefine: [...params.messages],
1030
+ prePruneContextTokens: calibratedTotalTokens,
1031
+ remainingContextTokens: 0,
1032
+ contextPressure: pruningBudget > 0 ? calibratedTotalTokens / pruningBudget : 0,
1033
+ calibrationRatio,
1034
+ resolvedInstructionOverhead: bestInstructionOverhead,
1035
+ };
1036
+ }
1037
+ // ---------------------------------------------------------------------------
1038
+ // Progressive context fading — inspired by Claude Code's staged compaction.
1039
+ // Below 80%: no modifications, tool results retain full size.
1040
+ // Above 80%: graduated truncation with increasing aggression per pressure band.
1041
+ // Recency weighting ensures older results fade first, newer results last.
1042
+ //
1043
+ // At the gentlest level, truncation preserves most content (head+tail).
1044
+ // At the most aggressive level, the result is effectively a one-line placeholder.
1045
+ //
1046
+ // 80%: gentle — budget factor 1.0, oldest get light truncation
1047
+ // 85%: moderate — budget factor 0.50, older results shrink significantly
1048
+ // 90%: aggressive — budget factor 0.20, most results heavily truncated
1049
+ // 99%: emergency — budget factor 0.05, effectively placeholders for old results
1050
+ // ---------------------------------------------------------------------------
1051
+ totalTokens = sumTokenCounts(indexTokenCountMap, params.messages.length);
1052
+ calibratedTotalTokens = Math.round(totalTokens * calibrationRatio);
1053
+ const contextPressure = pruningBudget > 0 ? calibratedTotalTokens / pruningBudget : 0;
1054
+ let preFlightResultCount = 0;
1055
+ let preFlightInputCount = 0;
1056
+ // -----------------------------------------------------------------------
1057
+ // Observation masking (80%+ pressure, both paths):
1058
+ // Replace consumed ToolMessage content with tight head+tail placeholders.
1059
+ // AI messages stay intact so the model can read its own prior reasoning
1060
+ // and won't repeat work. Unconsumed results (latest tool outputs the
1061
+ // model hasn't acted on yet) stay full.
1062
+ //
1063
+ // When summarization is enabled, snapshot messages first so the
1064
+ // summarizer can see the full originals when compaction fires.
1065
+ // -----------------------------------------------------------------------
1066
+ let observationsMasked = 0;
1067
+ if (contextPressure >= PRESSURE_THRESHOLD_MASKING) {
1068
+ const rawMessageBudget = calibrationRatio > 0
1069
+ ? Math.floor(effectiveMaxTokens / calibrationRatio)
1070
+ : effectiveMaxTokens;
1071
+ // When summarization is enabled, use half the reserve ratio as extra
1072
+ // masking headroom — the LLM keeps more context while the summarizer
1073
+ // gets full content from originalToolContent regardless. The remaining
1074
+ // half of the reserve covers estimation errors.
1075
+ const reserveHeadroom = factoryParams.summarizationEnabled === true
1076
+ ? Math.floor(rawMessageBudget *
1077
+ (factoryParams.reserveRatio ?? DEFAULT_RESERVE_RATIO) *
1078
+ 0.5)
1079
+ : 0;
1080
+ observationsMasked = maskConsumedToolResults({
1081
+ messages: params.messages,
1082
+ indexTokenCountMap,
1083
+ tokenCounter: factoryParams.tokenCounter,
1084
+ availableRawBudget: rawMessageBudget + reserveHeadroom,
1085
+ originalContentStore: factoryParams.summarizationEnabled === true
1086
+ ? originalToolContent
1087
+ : undefined,
1088
+ onContentStored: factoryParams.summarizationEnabled === true
1089
+ ? (charLen) => {
1090
+ originalToolContentSize += charLen;
1091
+ while (originalToolContentSize > ORIGINAL_CONTENT_MAX_CHARS &&
1092
+ originalToolContent.size > 0) {
1093
+ const oldest = originalToolContent.keys().next();
1094
+ if (oldest.done === true) {
1095
+ break;
1096
+ }
1097
+ const removed = originalToolContent.get(oldest.value);
1098
+ if (removed != null) {
1099
+ originalToolContentSize -= removed.length;
1100
+ }
1101
+ originalToolContent.delete(oldest.value);
1102
+ }
363
1103
  }
364
- indexTokenCountMap[i] = Math.round((indexTokenCountMap[i] ?? 0) * ratio);
365
- }
1104
+ : undefined,
1105
+ });
1106
+ if (observationsMasked > 0) {
1107
+ cumulativeRawSent = 0;
1108
+ cumulativeProviderReported = 0;
366
1109
  }
367
1110
  }
1111
+ if (contextPressure >= PRESSURE_THRESHOLD_MASKING &&
1112
+ factoryParams.summarizationEnabled !== true) {
1113
+ const budgetFactor = PRESSURE_BANDS.find(([threshold]) => contextPressure >= threshold)?.[1] ?? 1.0;
1114
+ const baseBudget = Math.max(1024, Math.floor(effectiveMaxTokens * budgetFactor));
1115
+ preFlightResultCount = preFlightTruncateToolResults({
1116
+ messages: params.messages,
1117
+ maxContextTokens: baseBudget,
1118
+ indexTokenCountMap,
1119
+ tokenCounter: factoryParams.tokenCounter,
1120
+ });
1121
+ preFlightInputCount = preFlightTruncateToolCallInputs({
1122
+ messages: params.messages,
1123
+ maxContextTokens: baseBudget,
1124
+ indexTokenCountMap,
1125
+ tokenCounter: factoryParams.tokenCounter,
1126
+ });
1127
+ }
1128
+ if (factoryParams.contextPruningConfig?.enabled === true &&
1129
+ factoryParams.summarizationEnabled !== true) {
1130
+ contextPruning.applyContextPruning({
1131
+ messages: params.messages,
1132
+ indexTokenCountMap,
1133
+ tokenCounter: factoryParams.tokenCounter,
1134
+ resolvedSettings: contextPruningSettings$1,
1135
+ });
1136
+ }
1137
+ // Fit-to-budget: when summarization is enabled and individual messages
1138
+ // exceed the effective budget, truncate them so every message can fit in
1139
+ // a single context slot. Without this, oversized tool results (e.g.
1140
+ // take_snapshot at 9K chars) cause empty context → emergency truncation
1141
+ // → immediate re-summarization after just one tool call.
1142
+ //
1143
+ // This is NOT the lossy position-based fading above — it only targets
1144
+ // messages that individually exceed the budget, using the full effective
1145
+ // budget as the cap (not a pressure-scaled fraction).
1146
+ // Fit-to-budget caps are in raw space (divide by ratio) so that after
1147
+ // calibration the truncated results actually fit within the budget.
1148
+ const rawSpaceEffectiveMax = calibrationRatio > 0
1149
+ ? Math.round(effectiveMaxTokens / calibrationRatio)
1150
+ : effectiveMaxTokens;
1151
+ if (factoryParams.summarizationEnabled === true &&
1152
+ rawSpaceEffectiveMax > 0) {
1153
+ preFlightResultCount = preFlightTruncateToolResults({
1154
+ messages: params.messages,
1155
+ maxContextTokens: rawSpaceEffectiveMax,
1156
+ indexTokenCountMap,
1157
+ tokenCounter: factoryParams.tokenCounter,
1158
+ });
1159
+ preFlightInputCount = preFlightTruncateToolCallInputs({
1160
+ messages: params.messages,
1161
+ maxContextTokens: rawSpaceEffectiveMax,
1162
+ indexTokenCountMap,
1163
+ tokenCounter: factoryParams.tokenCounter,
1164
+ });
1165
+ }
1166
+ const preTruncationTotalTokens = totalTokens;
1167
+ totalTokens = sumTokenCounts(indexTokenCountMap, params.messages.length);
1168
+ calibratedTotalTokens = Math.round(totalTokens * calibrationRatio);
1169
+ const anyAdjustment = observationsMasked > 0 ||
1170
+ preFlightResultCount > 0 ||
1171
+ preFlightInputCount > 0 ||
1172
+ totalTokens !== preTruncationTotalTokens;
1173
+ if (anyAdjustment) {
1174
+ factoryParams.log?.('debug', 'Context adjusted', {
1175
+ contextPressure: Math.round(contextPressure * 100),
1176
+ observationsMasked,
1177
+ toolOutputsTruncated: preFlightResultCount,
1178
+ toolInputsTruncated: preFlightInputCount,
1179
+ tokensBefore: preTruncationTotalTokens,
1180
+ tokensAfter: totalTokens,
1181
+ tokensSaved: preTruncationTotalTokens - totalTokens,
1182
+ });
1183
+ }
368
1184
  lastTurnStartIndex = params.messages.length;
369
- if (lastCutOffIndex === 0 && totalTokens <= factoryParams.maxTokens) {
370
- return { context: params.messages, indexTokenCountMap };
1185
+ if (lastCutOffIndex === 0 &&
1186
+ calibratedTotalTokens + currentInstructionTokens <= pruningBudget) {
1187
+ return {
1188
+ context: params.messages,
1189
+ indexTokenCountMap,
1190
+ messagesToRefine: [],
1191
+ prePruneContextTokens: calibratedTotalTokens,
1192
+ remainingContextTokens: pruningBudget - calibratedTotalTokens - currentInstructionTokens,
1193
+ contextPressure,
1194
+ originalToolContent: originalToolContent.size > 0 ? originalToolContent : undefined,
1195
+ calibrationRatio,
1196
+ resolvedInstructionOverhead: bestInstructionOverhead,
1197
+ };
371
1198
  }
372
- const { context, thinkingStartIndex } = getMessagesWithinTokenLimit({
373
- maxContextTokens: factoryParams.maxTokens,
1199
+ const rawSpaceBudget = calibrationRatio > 0
1200
+ ? Math.round(pruningBudget / calibrationRatio)
1201
+ : pruningBudget;
1202
+ const rawSpaceInstructionTokens = calibrationRatio > 0
1203
+ ? Math.round(currentInstructionTokens / calibrationRatio)
1204
+ : currentInstructionTokens;
1205
+ const { context: initialContext, thinkingStartIndex, messagesToRefine, remainingContextTokens: initialRemainingContextTokens, } = getMessagesWithinTokenLimit({
1206
+ maxContextTokens: rawSpaceBudget,
374
1207
  messages: params.messages,
375
1208
  indexTokenCountMap,
376
1209
  startType: params.startType,
377
1210
  thinkingEnabled: factoryParams.thinkingEnabled,
378
1211
  tokenCounter: factoryParams.tokenCounter,
1212
+ instructionTokens: rawSpaceInstructionTokens,
379
1213
  reasoningType: factoryParams.provider === _enum.Providers.BEDROCK
380
1214
  ? _enum.ContentTypes.REASONING_CONTENT
381
1215
  : _enum.ContentTypes.THINKING,
@@ -383,16 +1217,294 @@ function createPruneMessages(factoryParams) {
383
1217
  ? runThinkingStartIndex
384
1218
  : undefined,
385
1219
  });
1220
+ const { context: repairedContext, reclaimedTokens: initialReclaimedTokens, droppedMessages, } = repairOrphanedToolMessages({
1221
+ context: initialContext,
1222
+ allMessages: params.messages,
1223
+ tokenCounter: factoryParams.tokenCounter,
1224
+ indexTokenCountMap,
1225
+ });
1226
+ const contextBreakdown = repairedContext.map((msg) => {
1227
+ const type = msg.getType();
1228
+ const name = type === 'tool' ? (msg.name ?? 'unknown') : '';
1229
+ return name !== '' ? `${type}(${name})` : type;
1230
+ });
1231
+ factoryParams.log?.('debug', 'Pruning complete', {
1232
+ contextLength: repairedContext.length,
1233
+ contextTypes: contextBreakdown.join(', '),
1234
+ messagesToRefineCount: messagesToRefine.length,
1235
+ droppedOrphans: droppedMessages.length,
1236
+ remainingTokens: initialRemainingContextTokens,
1237
+ });
1238
+ let context = repairedContext;
1239
+ let reclaimedTokens = initialReclaimedTokens;
1240
+ // Orphan repair may drop ToolMessages whose parent AI was pruned.
1241
+ // Append them to messagesToRefine so summarization can still see the
1242
+ // tool results (otherwise the summary says "in progress" for a tool
1243
+ // call that already completed, causing the model to repeat it).
1244
+ if (droppedMessages.length > 0) {
1245
+ messagesToRefine.push(...droppedMessages);
1246
+ }
1247
+ // ---------------------------------------------------------------
1248
+ // Fallback fading: when summarization skipped fading earlier and
1249
+ // pruning still produced an empty context, apply lossy pressure-band
1250
+ // fading and retry. This is a last resort before emergency truncation
1251
+ // — the summarizer already saw the full messages, so fading the
1252
+ // surviving context for the LLM is acceptable.
1253
+ // ---------------------------------------------------------------
1254
+ if (context.length === 0 &&
1255
+ params.messages.length > 0 &&
1256
+ effectiveMaxTokens > 0 &&
1257
+ factoryParams.summarizationEnabled === true) {
1258
+ const fadingBudget = Math.max(1024, effectiveMaxTokens);
1259
+ factoryParams.log?.('debug', 'Fallback fading — empty context with summarization', {
1260
+ messageCount: params.messages.length,
1261
+ effectiveMaxTokens,
1262
+ fadingBudget,
1263
+ });
1264
+ const fadedMessages = [...params.messages];
1265
+ const preFadingTokenCounts = {};
1266
+ for (let i = 0; i < params.messages.length; i++) {
1267
+ preFadingTokenCounts[i] = indexTokenCountMap[i];
1268
+ }
1269
+ preFlightTruncateToolResults({
1270
+ messages: fadedMessages,
1271
+ maxContextTokens: fadingBudget,
1272
+ indexTokenCountMap,
1273
+ tokenCounter: factoryParams.tokenCounter,
1274
+ });
1275
+ preFlightTruncateToolCallInputs({
1276
+ messages: fadedMessages,
1277
+ maxContextTokens: fadingBudget,
1278
+ indexTokenCountMap,
1279
+ tokenCounter: factoryParams.tokenCounter,
1280
+ });
1281
+ const fadingRetry = getMessagesWithinTokenLimit({
1282
+ maxContextTokens: pruningBudget,
1283
+ messages: fadedMessages,
1284
+ indexTokenCountMap,
1285
+ startType: params.startType,
1286
+ thinkingEnabled: factoryParams.thinkingEnabled,
1287
+ tokenCounter: factoryParams.tokenCounter,
1288
+ instructionTokens: currentInstructionTokens,
1289
+ reasoningType: factoryParams.provider === _enum.Providers.BEDROCK
1290
+ ? _enum.ContentTypes.REASONING_CONTENT
1291
+ : _enum.ContentTypes.THINKING,
1292
+ thinkingStartIndex: factoryParams.thinkingEnabled === true
1293
+ ? runThinkingStartIndex
1294
+ : undefined,
1295
+ });
1296
+ const fadingRepaired = repairOrphanedToolMessages({
1297
+ context: fadingRetry.context,
1298
+ allMessages: fadedMessages,
1299
+ tokenCounter: factoryParams.tokenCounter,
1300
+ indexTokenCountMap,
1301
+ });
1302
+ if (fadingRepaired.context.length > 0) {
1303
+ context = fadingRepaired.context;
1304
+ reclaimedTokens = fadingRepaired.reclaimedTokens;
1305
+ messagesToRefine.push(...fadingRetry.messagesToRefine);
1306
+ if (fadingRepaired.droppedMessages.length > 0) {
1307
+ messagesToRefine.push(...fadingRepaired.droppedMessages);
1308
+ }
1309
+ factoryParams.log?.('debug', 'Fallback fading recovered context', {
1310
+ contextLength: context.length,
1311
+ messagesToRefineCount: messagesToRefine.length,
1312
+ remainingTokens: fadingRetry.remainingContextTokens,
1313
+ });
1314
+ for (const [key, value] of Object.entries(preFadingTokenCounts)) {
1315
+ indexTokenCountMap[key] = value;
1316
+ }
1317
+ }
1318
+ else {
1319
+ for (const [key, value] of Object.entries(preFadingTokenCounts)) {
1320
+ indexTokenCountMap[key] = value;
1321
+ }
1322
+ }
1323
+ }
1324
+ // ---------------------------------------------------------------
1325
+ // Emergency truncation: if pruning produced an empty context but
1326
+ // messages exist, aggressively truncate all tool_call inputs and
1327
+ // tool results, then retry. Budget is proportional to the
1328
+ // effective token limit (~4 chars/token, spread across messages)
1329
+ // with a floor of 200 chars so content is never completely blank.
1330
+ // Uses head+tail so the model sees both what was called and the
1331
+ // final outcome (e.g., return value at the end of a script eval).
1332
+ // ---------------------------------------------------------------
1333
+ if (context.length === 0 &&
1334
+ params.messages.length > 0 &&
1335
+ effectiveMaxTokens > 0) {
1336
+ const perMessageTokenBudget = Math.floor(effectiveMaxTokens / Math.max(1, params.messages.length));
1337
+ const emergencyMaxChars = Math.max(200, perMessageTokenBudget * 4);
1338
+ factoryParams.log?.('warn', 'Empty context, entering emergency truncation', {
1339
+ messageCount: params.messages.length,
1340
+ effectiveMax: effectiveMaxTokens,
1341
+ emergencyMaxChars,
1342
+ });
1343
+ // Clone the messages array so emergency truncation doesn't permanently
1344
+ // mutate graph state. The originals remain intact for future turns
1345
+ // where more budget may be available. Also snapshot indexTokenCountMap
1346
+ // entries so the closure doesn't retain stale (too-small) counts for
1347
+ // the original un-truncated messages on the next turn.
1348
+ const emergencyMessages = [...params.messages];
1349
+ const preEmergencyTokenCounts = {};
1350
+ for (let i = 0; i < params.messages.length; i++) {
1351
+ preEmergencyTokenCounts[i] = indexTokenCountMap[i];
1352
+ }
1353
+ try {
1354
+ let emergencyTruncatedCount = 0;
1355
+ for (let i = 0; i < emergencyMessages.length; i++) {
1356
+ const message = emergencyMessages[i];
1357
+ if (message.getType() === 'tool') {
1358
+ const content = message.content;
1359
+ if (typeof content === 'string' &&
1360
+ content.length > emergencyMaxChars) {
1361
+ const cloned = new messages.ToolMessage({
1362
+ content: truncation.truncateToolResultContent(content, emergencyMaxChars),
1363
+ tool_call_id: message.tool_call_id,
1364
+ name: message.name,
1365
+ id: message.id,
1366
+ additional_kwargs: message.additional_kwargs,
1367
+ response_metadata: message.response_metadata,
1368
+ });
1369
+ emergencyMessages[i] = cloned;
1370
+ indexTokenCountMap[i] = factoryParams.tokenCounter(cloned);
1371
+ emergencyTruncatedCount++;
1372
+ }
1373
+ }
1374
+ if (message.getType() === 'ai' && Array.isArray(message.content)) {
1375
+ const aiMsg = message;
1376
+ const contentBlocks = aiMsg.content;
1377
+ const needsTruncation = contentBlocks.some((block) => {
1378
+ if (typeof block !== 'object')
1379
+ return false;
1380
+ const record = block;
1381
+ if ((record.type === 'tool_use' || record.type === 'tool_call') &&
1382
+ record.input != null) {
1383
+ const serialized = typeof record.input === 'string'
1384
+ ? record.input
1385
+ : JSON.stringify(record.input);
1386
+ return serialized.length > emergencyMaxChars;
1387
+ }
1388
+ return false;
1389
+ });
1390
+ if (needsTruncation) {
1391
+ const newContent = contentBlocks.map((block) => {
1392
+ if (typeof block !== 'object')
1393
+ return block;
1394
+ const record = block;
1395
+ if ((record.type === 'tool_use' || record.type === 'tool_call') &&
1396
+ record.input != null) {
1397
+ const serialized = typeof record.input === 'string'
1398
+ ? record.input
1399
+ : JSON.stringify(record.input);
1400
+ if (serialized.length > emergencyMaxChars) {
1401
+ // Replaces original input with { _truncated, _originalChars } —
1402
+ // safe because the tool call already executed in a prior turn.
1403
+ return {
1404
+ ...record,
1405
+ input: truncation.truncateToolInput(serialized, emergencyMaxChars),
1406
+ };
1407
+ }
1408
+ }
1409
+ return block;
1410
+ });
1411
+ const newToolCalls = (aiMsg.tool_calls ?? []).map((tc) => {
1412
+ const serializedArgs = JSON.stringify(tc.args);
1413
+ if (serializedArgs.length > emergencyMaxChars) {
1414
+ // Replaces original args with { _truncated, _originalChars } —
1415
+ // safe because the tool call already executed in a prior turn.
1416
+ return {
1417
+ ...tc,
1418
+ args: truncation.truncateToolInput(serializedArgs, emergencyMaxChars),
1419
+ };
1420
+ }
1421
+ return tc;
1422
+ });
1423
+ emergencyMessages[i] = new messages.AIMessage({
1424
+ ...aiMsg,
1425
+ content: newContent,
1426
+ tool_calls: newToolCalls.length > 0 ? newToolCalls : undefined,
1427
+ });
1428
+ indexTokenCountMap[i] = factoryParams.tokenCounter(emergencyMessages[i]);
1429
+ emergencyTruncatedCount++;
1430
+ }
1431
+ }
1432
+ }
1433
+ factoryParams.log?.('info', 'Emergency truncation complete');
1434
+ factoryParams.log?.('debug', 'Emergency truncation details', {
1435
+ truncatedCount: emergencyTruncatedCount,
1436
+ emergencyMaxChars,
1437
+ });
1438
+ const retryResult = getMessagesWithinTokenLimit({
1439
+ maxContextTokens: pruningBudget,
1440
+ messages: emergencyMessages,
1441
+ indexTokenCountMap,
1442
+ startType: params.startType,
1443
+ thinkingEnabled: factoryParams.thinkingEnabled,
1444
+ tokenCounter: factoryParams.tokenCounter,
1445
+ instructionTokens: currentInstructionTokens,
1446
+ reasoningType: factoryParams.provider === _enum.Providers.BEDROCK
1447
+ ? _enum.ContentTypes.REASONING_CONTENT
1448
+ : _enum.ContentTypes.THINKING,
1449
+ thinkingStartIndex: factoryParams.thinkingEnabled === true
1450
+ ? runThinkingStartIndex
1451
+ : undefined,
1452
+ });
1453
+ const repaired = repairOrphanedToolMessages({
1454
+ context: retryResult.context,
1455
+ allMessages: emergencyMessages,
1456
+ tokenCounter: factoryParams.tokenCounter,
1457
+ indexTokenCountMap,
1458
+ });
1459
+ context = repaired.context;
1460
+ reclaimedTokens = repaired.reclaimedTokens;
1461
+ messagesToRefine.push(...retryResult.messagesToRefine);
1462
+ if (repaired.droppedMessages.length > 0) {
1463
+ messagesToRefine.push(...repaired.droppedMessages);
1464
+ }
1465
+ factoryParams.log?.('debug', 'Emergency truncation retry result', {
1466
+ contextLength: context.length,
1467
+ messagesToRefineCount: messagesToRefine.length,
1468
+ remainingTokens: retryResult.remainingContextTokens,
1469
+ });
1470
+ }
1471
+ finally {
1472
+ // Restore the closure's indexTokenCountMap to pre-emergency values so the
1473
+ // next turn counts old messages at their original (un-truncated) size.
1474
+ // The emergency-truncated counts were only needed for this turn's
1475
+ // getMessagesWithinTokenLimit retry.
1476
+ for (const [key, value] of Object.entries(preEmergencyTokenCounts)) {
1477
+ indexTokenCountMap[key] = value;
1478
+ }
1479
+ }
1480
+ }
1481
+ const remainingContextTokens = Math.max(0, Math.min(pruningBudget, initialRemainingContextTokens + reclaimedTokens));
386
1482
  runThinkingStartIndex = thinkingStartIndex ?? -1;
387
1483
  /** The index is the first value of `context`, index relative to `params.messages` */
388
1484
  lastCutOffIndex = Math.max(params.messages.length -
389
1485
  (context.length - (context[0]?.getType() === 'system' ? 1 : 0)), 0);
390
- return { context, indexTokenCountMap };
1486
+ return {
1487
+ context,
1488
+ indexTokenCountMap,
1489
+ messagesToRefine,
1490
+ prePruneContextTokens: calibratedTotalTokens,
1491
+ remainingContextTokens,
1492
+ contextPressure,
1493
+ originalToolContent: originalToolContent.size > 0 ? originalToolContent : undefined,
1494
+ calibrationRatio,
1495
+ resolvedInstructionOverhead: bestInstructionOverhead,
1496
+ };
391
1497
  };
392
1498
  }
393
1499
 
1500
+ exports.DEFAULT_RESERVE_RATIO = DEFAULT_RESERVE_RATIO;
394
1501
  exports.calculateTotalTokens = calculateTotalTokens;
395
1502
  exports.checkValidNumber = checkValidNumber;
396
1503
  exports.createPruneMessages = createPruneMessages;
397
1504
  exports.getMessagesWithinTokenLimit = getMessagesWithinTokenLimit;
1505
+ exports.maskConsumedToolResults = maskConsumedToolResults;
1506
+ exports.preFlightTruncateToolCallInputs = preFlightTruncateToolCallInputs;
1507
+ exports.preFlightTruncateToolResults = preFlightTruncateToolResults;
1508
+ exports.repairOrphanedToolMessages = repairOrphanedToolMessages;
1509
+ exports.sanitizeOrphanToolBlocks = sanitizeOrphanToolBlocks;
398
1510
  //# sourceMappingURL=prune.cjs.map