@librechat/agents 3.2.32 → 3.2.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (573) hide show
  1. package/dist/cjs/_virtual/_rolldown/runtime.cjs +23 -0
  2. package/dist/cjs/agents/AgentContext.cjs +844 -1046
  3. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  4. package/dist/cjs/common/constants.cjs +13 -13
  5. package/dist/cjs/common/constants.cjs.map +1 -1
  6. package/dist/cjs/common/enum.cjs +233 -240
  7. package/dist/cjs/common/enum.cjs.map +1 -1
  8. package/dist/cjs/common/index.cjs +2 -0
  9. package/dist/cjs/events.cjs +121 -169
  10. package/dist/cjs/events.cjs.map +1 -1
  11. package/dist/cjs/graphs/Graph.cjs +1389 -1807
  12. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  13. package/dist/cjs/graphs/MultiAgentGraph.cjs +713 -945
  14. package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
  15. package/dist/cjs/graphs/index.cjs +2 -0
  16. package/dist/cjs/hitl/askUserQuestion.cjs +60 -62
  17. package/dist/cjs/hitl/askUserQuestion.cjs.map +1 -1
  18. package/dist/cjs/hitl/index.cjs +1 -0
  19. package/dist/cjs/hooks/HookRegistry.cjs +176 -202
  20. package/dist/cjs/hooks/HookRegistry.cjs.map +1 -1
  21. package/dist/cjs/hooks/createToolPolicyHook.cjs +71 -101
  22. package/dist/cjs/hooks/createToolPolicyHook.cjs.map +1 -1
  23. package/dist/cjs/hooks/createWorkspacePolicyHook.cjs +170 -273
  24. package/dist/cjs/hooks/createWorkspacePolicyHook.cjs.map +1 -1
  25. package/dist/cjs/hooks/executeHooks.cjs +227 -282
  26. package/dist/cjs/hooks/executeHooks.cjs.map +1 -1
  27. package/dist/cjs/hooks/index.cjs +6 -0
  28. package/dist/cjs/hooks/matchers.cjs +196 -230
  29. package/dist/cjs/hooks/matchers.cjs.map +1 -1
  30. package/dist/cjs/hooks/types.cjs +24 -24
  31. package/dist/cjs/hooks/types.cjs.map +1 -1
  32. package/dist/cjs/instrumentation.cjs +110 -137
  33. package/dist/cjs/instrumentation.cjs.map +1 -1
  34. package/dist/cjs/langchain/google-common.cjs +0 -3
  35. package/dist/cjs/langchain/index.cjs +80 -43
  36. package/dist/cjs/langchain/language_models/chat_models.cjs +0 -3
  37. package/dist/cjs/langchain/messages/tool.cjs +0 -3
  38. package/dist/cjs/langchain/messages.cjs +35 -18
  39. package/dist/cjs/langchain/openai.cjs +0 -3
  40. package/dist/cjs/langchain/prompts.cjs +5 -8
  41. package/dist/cjs/langchain/runnables.cjs +11 -10
  42. package/dist/cjs/langchain/tools.cjs +14 -11
  43. package/dist/cjs/langchain/utils/env.cjs +5 -8
  44. package/dist/cjs/langfuse.cjs +60 -79
  45. package/dist/cjs/langfuse.cjs.map +1 -1
  46. package/dist/cjs/langfuseToolOutputTracing.cjs +267 -399
  47. package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
  48. package/dist/cjs/llm/anthropic/index.cjs +432 -562
  49. package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
  50. package/dist/cjs/llm/anthropic/types.cjs +23 -47
  51. package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
  52. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +441 -731
  53. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  54. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs +171 -256
  55. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
  56. package/dist/cjs/llm/anthropic/utils/output_parsers.cjs +2 -0
  57. package/dist/cjs/llm/anthropic/utils/tools.cjs +12 -26
  58. package/dist/cjs/llm/anthropic/utils/tools.cjs.map +1 -1
  59. package/dist/cjs/llm/bedrock/index.cjs +195 -240
  60. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  61. package/dist/cjs/llm/bedrock/toolCache.cjs +84 -106
  62. package/dist/cjs/llm/bedrock/toolCache.cjs.map +1 -1
  63. package/dist/cjs/llm/bedrock/utils/index.cjs +2 -0
  64. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +357 -620
  65. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  66. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +105 -149
  67. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
  68. package/dist/cjs/llm/fake.cjs +86 -96
  69. package/dist/cjs/llm/fake.cjs.map +1 -1
  70. package/dist/cjs/llm/google/index.cjs +183 -237
  71. package/dist/cjs/llm/google/index.cjs.map +1 -1
  72. package/dist/cjs/llm/google/utils/common.cjs +398 -674
  73. package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
  74. package/dist/cjs/llm/google/utils/zod_to_genai_parameters.cjs +2 -0
  75. package/dist/cjs/llm/init.cjs +44 -53
  76. package/dist/cjs/llm/init.cjs.map +1 -1
  77. package/dist/cjs/llm/invoke.cjs +142 -182
  78. package/dist/cjs/llm/invoke.cjs.map +1 -1
  79. package/dist/cjs/llm/openai/index.cjs +991 -1276
  80. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  81. package/dist/cjs/llm/openai/utils/index.cjs +189 -316
  82. package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
  83. package/dist/cjs/llm/openrouter/index.cjs +102 -153
  84. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  85. package/dist/cjs/llm/openrouter/toolCache.cjs +35 -44
  86. package/dist/cjs/llm/openrouter/toolCache.cjs.map +1 -1
  87. package/dist/cjs/llm/providers.cjs +29 -37
  88. package/dist/cjs/llm/providers.cjs.map +1 -1
  89. package/dist/cjs/llm/request.cjs +20 -33
  90. package/dist/cjs/llm/request.cjs.map +1 -1
  91. package/dist/cjs/llm/vertexai/index.cjs +427 -453
  92. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  93. package/dist/cjs/main.cjs +547 -528
  94. package/dist/cjs/messages/anthropicToolCache.cjs +68 -119
  95. package/dist/cjs/messages/anthropicToolCache.cjs.map +1 -1
  96. package/dist/cjs/messages/cache.cjs +305 -418
  97. package/dist/cjs/messages/cache.cjs.map +1 -1
  98. package/dist/cjs/messages/content.cjs +36 -49
  99. package/dist/cjs/messages/content.cjs.map +1 -1
  100. package/dist/cjs/messages/contextPruning.cjs +112 -145
  101. package/dist/cjs/messages/contextPruning.cjs.map +1 -1
  102. package/dist/cjs/messages/contextPruningSettings.cjs +36 -46
  103. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -1
  104. package/dist/cjs/messages/core.cjs +256 -397
  105. package/dist/cjs/messages/core.cjs.map +1 -1
  106. package/dist/cjs/messages/format.cjs +904 -1387
  107. package/dist/cjs/messages/format.cjs.map +1 -1
  108. package/dist/cjs/messages/ids.cjs +16 -20
  109. package/dist/cjs/messages/ids.cjs.map +1 -1
  110. package/dist/cjs/messages/index.cjs +12 -0
  111. package/dist/cjs/messages/langchain.cjs +18 -18
  112. package/dist/cjs/messages/langchain.cjs.map +1 -1
  113. package/dist/cjs/messages/prune.cjs +1054 -1517
  114. package/dist/cjs/messages/prune.cjs.map +1 -1
  115. package/dist/cjs/messages/recency.cjs +77 -95
  116. package/dist/cjs/messages/recency.cjs.map +1 -1
  117. package/dist/cjs/messages/reducer.cjs +63 -78
  118. package/dist/cjs/messages/reducer.cjs.map +1 -1
  119. package/dist/cjs/messages/tools.cjs +51 -79
  120. package/dist/cjs/messages/tools.cjs.map +1 -1
  121. package/dist/cjs/openai/index.cjs +171 -217
  122. package/dist/cjs/openai/index.cjs.map +1 -1
  123. package/dist/cjs/responses/index.cjs +302 -391
  124. package/dist/cjs/responses/index.cjs.map +1 -1
  125. package/dist/cjs/run.cjs +903 -1113
  126. package/dist/cjs/run.cjs.map +1 -1
  127. package/dist/cjs/session/AgentSession.cjs +805 -986
  128. package/dist/cjs/session/AgentSession.cjs.map +1 -1
  129. package/dist/cjs/session/JsonlSessionStore.cjs +327 -410
  130. package/dist/cjs/session/JsonlSessionStore.cjs.map +1 -1
  131. package/dist/cjs/session/handlers.cjs +192 -208
  132. package/dist/cjs/session/handlers.cjs.map +1 -1
  133. package/dist/cjs/session/ids.cjs +9 -10
  134. package/dist/cjs/session/ids.cjs.map +1 -1
  135. package/dist/cjs/session/index.cjs +4 -0
  136. package/dist/cjs/session/messageSerialization.cjs +94 -156
  137. package/dist/cjs/session/messageSerialization.cjs.map +1 -1
  138. package/dist/cjs/splitStream.cjs +147 -206
  139. package/dist/cjs/splitStream.cjs.map +1 -1
  140. package/dist/cjs/stream.cjs +856 -1344
  141. package/dist/cjs/stream.cjs.map +1 -1
  142. package/dist/cjs/summarization/index.cjs +57 -101
  143. package/dist/cjs/summarization/index.cjs.map +1 -1
  144. package/dist/cjs/summarization/node.cjs +643 -796
  145. package/dist/cjs/summarization/node.cjs.map +1 -1
  146. package/dist/cjs/tools/BashExecutor.cjs +110 -136
  147. package/dist/cjs/tools/BashExecutor.cjs.map +1 -1
  148. package/dist/cjs/tools/BashProgrammaticToolCalling.cjs +165 -245
  149. package/dist/cjs/tools/BashProgrammaticToolCalling.cjs.map +1 -1
  150. package/dist/cjs/tools/Calculator.cjs +36 -57
  151. package/dist/cjs/tools/Calculator.cjs.map +1 -1
  152. package/dist/cjs/tools/CodeExecutor.cjs +126 -168
  153. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  154. package/dist/cjs/tools/CodeSessionFileSummary.cjs +36 -46
  155. package/dist/cjs/tools/CodeSessionFileSummary.cjs.map +1 -1
  156. package/dist/cjs/tools/ProgrammaticToolCalling.cjs +459 -649
  157. package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
  158. package/dist/cjs/tools/ReadFile.cjs +17 -20
  159. package/dist/cjs/tools/ReadFile.cjs.map +1 -1
  160. package/dist/cjs/tools/SkillTool.cjs +26 -27
  161. package/dist/cjs/tools/SkillTool.cjs.map +1 -1
  162. package/dist/cjs/tools/SubagentTool.cjs +59 -61
  163. package/dist/cjs/tools/SubagentTool.cjs.map +1 -1
  164. package/dist/cjs/tools/ToolNode.cjs +2109 -2686
  165. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  166. package/dist/cjs/tools/ToolSearch.cjs +663 -825
  167. package/dist/cjs/tools/ToolSearch.cjs.map +1 -1
  168. package/dist/cjs/tools/cloudflare/CloudflareBridgeRuntime.cjs +248 -340
  169. package/dist/cjs/tools/cloudflare/CloudflareBridgeRuntime.cjs.map +1 -1
  170. package/dist/cjs/tools/cloudflare/CloudflareProgrammaticToolCalling.cjs +170 -197
  171. package/dist/cjs/tools/cloudflare/CloudflareProgrammaticToolCalling.cjs.map +1 -1
  172. package/dist/cjs/tools/cloudflare/CloudflareSandboxExecutionEngine.cjs +425 -520
  173. package/dist/cjs/tools/cloudflare/CloudflareSandboxExecutionEngine.cjs.map +1 -1
  174. package/dist/cjs/tools/cloudflare/CloudflareSandboxTools.cjs +91 -124
  175. package/dist/cjs/tools/cloudflare/CloudflareSandboxTools.cjs.map +1 -1
  176. package/dist/cjs/tools/cloudflare/index.cjs +4 -0
  177. package/dist/cjs/tools/eagerEventExecution.cjs +75 -99
  178. package/dist/cjs/tools/eagerEventExecution.cjs.map +1 -1
  179. package/dist/cjs/tools/handlers.cjs +200 -262
  180. package/dist/cjs/tools/handlers.cjs.map +1 -1
  181. package/dist/cjs/tools/local/CompileCheckTool.cjs +150 -212
  182. package/dist/cjs/tools/local/CompileCheckTool.cjs.map +1 -1
  183. package/dist/cjs/tools/local/FileCheckpointer.cjs +77 -85
  184. package/dist/cjs/tools/local/FileCheckpointer.cjs.map +1 -1
  185. package/dist/cjs/tools/local/LocalCodingTools.cjs +763 -1022
  186. package/dist/cjs/tools/local/LocalCodingTools.cjs.map +1 -1
  187. package/dist/cjs/tools/local/LocalExecutionEngine.cjs +666 -941
  188. package/dist/cjs/tools/local/LocalExecutionEngine.cjs.map +1 -1
  189. package/dist/cjs/tools/local/LocalExecutionTools.cjs +49 -92
  190. package/dist/cjs/tools/local/LocalExecutionTools.cjs.map +1 -1
  191. package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs +286 -354
  192. package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs.map +1 -1
  193. package/dist/cjs/tools/local/attachments.cjs +108 -165
  194. package/dist/cjs/tools/local/attachments.cjs.map +1 -1
  195. package/dist/cjs/tools/local/bashAst.cjs +99 -113
  196. package/dist/cjs/tools/local/bashAst.cjs.map +1 -1
  197. package/dist/cjs/tools/local/editStrategies.cjs +126 -169
  198. package/dist/cjs/tools/local/editStrategies.cjs.map +1 -1
  199. package/dist/cjs/tools/local/index.cjs +12 -0
  200. package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs +136 -218
  201. package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs.map +1 -1
  202. package/dist/cjs/tools/local/syntaxCheck.cjs +142 -161
  203. package/dist/cjs/tools/local/syntaxCheck.cjs.map +1 -1
  204. package/dist/cjs/tools/local/textEncoding.cjs +25 -23
  205. package/dist/cjs/tools/local/textEncoding.cjs.map +1 -1
  206. package/dist/cjs/tools/local/workspaceFS.cjs +38 -46
  207. package/dist/cjs/tools/local/workspaceFS.cjs.map +1 -1
  208. package/dist/cjs/tools/ptcTimeout.cjs +27 -47
  209. package/dist/cjs/tools/ptcTimeout.cjs.map +1 -1
  210. package/dist/cjs/tools/schema.cjs +24 -23
  211. package/dist/cjs/tools/schema.cjs.map +1 -1
  212. package/dist/cjs/tools/search/anthropic.cjs +24 -33
  213. package/dist/cjs/tools/search/anthropic.cjs.map +1 -1
  214. package/dist/cjs/tools/search/content.cjs +95 -137
  215. package/dist/cjs/tools/search/content.cjs.map +1 -1
  216. package/dist/cjs/tools/search/firecrawl.cjs +141 -172
  217. package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
  218. package/dist/cjs/tools/search/format.cjs +128 -196
  219. package/dist/cjs/tools/search/format.cjs.map +1 -1
  220. package/dist/cjs/tools/search/highlights.cjs +165 -232
  221. package/dist/cjs/tools/search/highlights.cjs.map +1 -1
  222. package/dist/cjs/tools/search/index.cjs +2 -0
  223. package/dist/cjs/tools/search/rerankers.cjs +151 -174
  224. package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
  225. package/dist/cjs/tools/search/schema.cjs +40 -39
  226. package/dist/cjs/tools/search/schema.cjs.map +1 -1
  227. package/dist/cjs/tools/search/search.cjs +428 -530
  228. package/dist/cjs/tools/search/search.cjs.map +1 -1
  229. package/dist/cjs/tools/search/serper-scraper.cjs +106 -127
  230. package/dist/cjs/tools/search/serper-scraper.cjs.map +1 -1
  231. package/dist/cjs/tools/search/tavily-scraper.cjs +129 -181
  232. package/dist/cjs/tools/search/tavily-scraper.cjs.map +1 -1
  233. package/dist/cjs/tools/search/tavily-search.cjs +295 -359
  234. package/dist/cjs/tools/search/tavily-search.cjs.map +1 -1
  235. package/dist/cjs/tools/search/tool.cjs +260 -299
  236. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  237. package/dist/cjs/tools/search/utils.cjs +74 -117
  238. package/dist/cjs/tools/search/utils.cjs.map +1 -1
  239. package/dist/cjs/tools/skillCatalog.cjs +54 -72
  240. package/dist/cjs/tools/skillCatalog.cjs.map +1 -1
  241. package/dist/cjs/tools/streamedToolCallSeals.cjs +19 -36
  242. package/dist/cjs/tools/streamedToolCallSeals.cjs.map +1 -1
  243. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +612 -771
  244. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  245. package/dist/cjs/tools/subagent/index.cjs +1 -0
  246. package/dist/cjs/tools/toolOutputReferences.cjs +523 -630
  247. package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -1
  248. package/dist/cjs/utils/callbacks.cjs +11 -21
  249. package/dist/cjs/utils/callbacks.cjs.map +1 -1
  250. package/dist/cjs/utils/errors.cjs +70 -95
  251. package/dist/cjs/utils/errors.cjs.map +1 -1
  252. package/dist/cjs/utils/events.cjs +32 -42
  253. package/dist/cjs/utils/events.cjs.map +1 -1
  254. package/dist/cjs/utils/graph.cjs +8 -12
  255. package/dist/cjs/utils/graph.cjs.map +1 -1
  256. package/dist/cjs/utils/handlers.cjs +60 -82
  257. package/dist/cjs/utils/handlers.cjs.map +1 -1
  258. package/dist/cjs/utils/index.cjs +9 -0
  259. package/dist/cjs/utils/llm.cjs +19 -27
  260. package/dist/cjs/utils/llm.cjs.map +1 -1
  261. package/dist/cjs/utils/misc.cjs +30 -46
  262. package/dist/cjs/utils/misc.cjs.map +1 -1
  263. package/dist/cjs/utils/run.cjs +50 -66
  264. package/dist/cjs/utils/run.cjs.map +1 -1
  265. package/dist/cjs/utils/schema.cjs +11 -19
  266. package/dist/cjs/utils/schema.cjs.map +1 -1
  267. package/dist/cjs/utils/title.cjs +71 -106
  268. package/dist/cjs/utils/title.cjs.map +1 -1
  269. package/dist/cjs/utils/tokens.cjs +186 -283
  270. package/dist/cjs/utils/tokens.cjs.map +1 -1
  271. package/dist/cjs/utils/truncation.cjs +95 -114
  272. package/dist/cjs/utils/truncation.cjs.map +1 -1
  273. package/dist/esm/agents/AgentContext.mjs +844 -1044
  274. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  275. package/dist/esm/common/constants.mjs +13 -11
  276. package/dist/esm/common/constants.mjs.map +1 -1
  277. package/dist/esm/common/enum.mjs +221 -238
  278. package/dist/esm/common/enum.mjs.map +1 -1
  279. package/dist/esm/common/index.mjs +3 -0
  280. package/dist/esm/events.mjs +121 -167
  281. package/dist/esm/events.mjs.map +1 -1
  282. package/dist/esm/graphs/Graph.mjs +1388 -1804
  283. package/dist/esm/graphs/Graph.mjs.map +1 -1
  284. package/dist/esm/graphs/MultiAgentGraph.mjs +713 -943
  285. package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
  286. package/dist/esm/graphs/index.mjs +3 -0
  287. package/dist/esm/hitl/askUserQuestion.mjs +60 -60
  288. package/dist/esm/hitl/askUserQuestion.mjs.map +1 -1
  289. package/dist/esm/hitl/index.mjs +2 -0
  290. package/dist/esm/hooks/HookRegistry.mjs +176 -200
  291. package/dist/esm/hooks/HookRegistry.mjs.map +1 -1
  292. package/dist/esm/hooks/createToolPolicyHook.mjs +71 -99
  293. package/dist/esm/hooks/createToolPolicyHook.mjs.map +1 -1
  294. package/dist/esm/hooks/createWorkspacePolicyHook.mjs +170 -271
  295. package/dist/esm/hooks/createWorkspacePolicyHook.mjs.map +1 -1
  296. package/dist/esm/hooks/executeHooks.mjs +227 -280
  297. package/dist/esm/hooks/executeHooks.mjs.map +1 -1
  298. package/dist/esm/hooks/index.mjs +7 -0
  299. package/dist/esm/hooks/matchers.mjs +196 -228
  300. package/dist/esm/hooks/matchers.mjs.map +1 -1
  301. package/dist/esm/hooks/types.mjs +24 -22
  302. package/dist/esm/hooks/types.mjs.map +1 -1
  303. package/dist/esm/instrumentation.mjs +109 -132
  304. package/dist/esm/instrumentation.mjs.map +1 -1
  305. package/dist/esm/langchain/google-common.mjs +1 -2
  306. package/dist/esm/langchain/index.mjs +5 -5
  307. package/dist/esm/langchain/language_models/chat_models.mjs +1 -2
  308. package/dist/esm/langchain/messages/tool.mjs +1 -2
  309. package/dist/esm/langchain/messages.mjs +2 -2
  310. package/dist/esm/langchain/openai.mjs +1 -2
  311. package/dist/esm/langchain/prompts.mjs +2 -2
  312. package/dist/esm/langchain/runnables.mjs +2 -2
  313. package/dist/esm/langchain/tools.mjs +2 -2
  314. package/dist/esm/langchain/utils/env.mjs +2 -2
  315. package/dist/esm/langfuse.mjs +60 -76
  316. package/dist/esm/langfuse.mjs.map +1 -1
  317. package/dist/esm/langfuseToolOutputTracing.mjs +267 -395
  318. package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
  319. package/dist/esm/llm/anthropic/index.mjs +432 -559
  320. package/dist/esm/llm/anthropic/index.mjs.map +1 -1
  321. package/dist/esm/llm/anthropic/types.mjs +23 -45
  322. package/dist/esm/llm/anthropic/types.mjs.map +1 -1
  323. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +439 -725
  324. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  325. package/dist/esm/llm/anthropic/utils/message_outputs.mjs +171 -253
  326. package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
  327. package/dist/esm/llm/anthropic/utils/output_parsers.mjs +3 -0
  328. package/dist/esm/llm/anthropic/utils/tools.mjs +12 -24
  329. package/dist/esm/llm/anthropic/utils/tools.mjs.map +1 -1
  330. package/dist/esm/llm/bedrock/index.mjs +195 -238
  331. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  332. package/dist/esm/llm/bedrock/toolCache.mjs +84 -104
  333. package/dist/esm/llm/bedrock/toolCache.mjs.map +1 -1
  334. package/dist/esm/llm/bedrock/utils/index.mjs +3 -0
  335. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +357 -618
  336. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  337. package/dist/esm/llm/bedrock/utils/message_outputs.mjs +105 -147
  338. package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
  339. package/dist/esm/llm/fake.mjs +86 -94
  340. package/dist/esm/llm/fake.mjs.map +1 -1
  341. package/dist/esm/llm/google/index.mjs +183 -235
  342. package/dist/esm/llm/google/index.mjs.map +1 -1
  343. package/dist/esm/llm/google/utils/common.mjs +397 -666
  344. package/dist/esm/llm/google/utils/common.mjs.map +1 -1
  345. package/dist/esm/llm/google/utils/zod_to_genai_parameters.mjs +3 -0
  346. package/dist/esm/llm/init.mjs +44 -51
  347. package/dist/esm/llm/init.mjs.map +1 -1
  348. package/dist/esm/llm/invoke.mjs +142 -180
  349. package/dist/esm/llm/invoke.mjs.map +1 -1
  350. package/dist/esm/llm/openai/index.mjs +991 -1271
  351. package/dist/esm/llm/openai/index.mjs.map +1 -1
  352. package/dist/esm/llm/openai/utils/index.mjs +188 -312
  353. package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
  354. package/dist/esm/llm/openrouter/index.mjs +102 -151
  355. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  356. package/dist/esm/llm/openrouter/toolCache.mjs +35 -42
  357. package/dist/esm/llm/openrouter/toolCache.mjs.map +1 -1
  358. package/dist/esm/llm/providers.mjs +29 -34
  359. package/dist/esm/llm/providers.mjs.map +1 -1
  360. package/dist/esm/llm/request.mjs +20 -31
  361. package/dist/esm/llm/request.mjs.map +1 -1
  362. package/dist/esm/llm/vertexai/index.mjs +427 -449
  363. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  364. package/dist/esm/main.mjs +99 -87
  365. package/dist/esm/messages/anthropicToolCache.mjs +68 -117
  366. package/dist/esm/messages/anthropicToolCache.mjs.map +1 -1
  367. package/dist/esm/messages/cache.mjs +305 -416
  368. package/dist/esm/messages/cache.mjs.map +1 -1
  369. package/dist/esm/messages/content.mjs +36 -47
  370. package/dist/esm/messages/content.mjs.map +1 -1
  371. package/dist/esm/messages/contextPruning.mjs +112 -143
  372. package/dist/esm/messages/contextPruning.mjs.map +1 -1
  373. package/dist/esm/messages/contextPruningSettings.mjs +36 -44
  374. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -1
  375. package/dist/esm/messages/core.mjs +254 -393
  376. package/dist/esm/messages/core.mjs.map +1 -1
  377. package/dist/esm/messages/format.mjs +902 -1383
  378. package/dist/esm/messages/format.mjs.map +1 -1
  379. package/dist/esm/messages/ids.mjs +16 -18
  380. package/dist/esm/messages/ids.mjs.map +1 -1
  381. package/dist/esm/messages/index.mjs +13 -0
  382. package/dist/esm/messages/langchain.mjs +18 -16
  383. package/dist/esm/messages/langchain.mjs.map +1 -1
  384. package/dist/esm/messages/prune.mjs +1053 -1514
  385. package/dist/esm/messages/prune.mjs.map +1 -1
  386. package/dist/esm/messages/recency.mjs +77 -93
  387. package/dist/esm/messages/recency.mjs.map +1 -1
  388. package/dist/esm/messages/reducer.mjs +63 -76
  389. package/dist/esm/messages/reducer.mjs.map +1 -1
  390. package/dist/esm/messages/tools.mjs +49 -75
  391. package/dist/esm/messages/tools.mjs.map +1 -1
  392. package/dist/esm/openai/index.mjs +170 -215
  393. package/dist/esm/openai/index.mjs.map +1 -1
  394. package/dist/esm/responses/index.mjs +301 -389
  395. package/dist/esm/responses/index.mjs.map +1 -1
  396. package/dist/esm/run.mjs +903 -1111
  397. package/dist/esm/run.mjs.map +1 -1
  398. package/dist/esm/session/AgentSession.mjs +806 -985
  399. package/dist/esm/session/AgentSession.mjs.map +1 -1
  400. package/dist/esm/session/JsonlSessionStore.mjs +326 -407
  401. package/dist/esm/session/JsonlSessionStore.mjs.map +1 -1
  402. package/dist/esm/session/handlers.mjs +192 -206
  403. package/dist/esm/session/handlers.mjs.map +1 -1
  404. package/dist/esm/session/ids.mjs +9 -8
  405. package/dist/esm/session/ids.mjs.map +1 -1
  406. package/dist/esm/session/index.mjs +5 -0
  407. package/dist/esm/session/messageSerialization.mjs +94 -154
  408. package/dist/esm/session/messageSerialization.mjs.map +1 -1
  409. package/dist/esm/splitStream.mjs +147 -204
  410. package/dist/esm/splitStream.mjs.map +1 -1
  411. package/dist/esm/stream.mjs +854 -1341
  412. package/dist/esm/stream.mjs.map +1 -1
  413. package/dist/esm/summarization/index.mjs +57 -99
  414. package/dist/esm/summarization/index.mjs.map +1 -1
  415. package/dist/esm/summarization/node.mjs +640 -790
  416. package/dist/esm/summarization/node.mjs.map +1 -1
  417. package/dist/esm/tools/BashExecutor.mjs +103 -129
  418. package/dist/esm/tools/BashExecutor.mjs.map +1 -1
  419. package/dist/esm/tools/BashProgrammaticToolCalling.mjs +162 -239
  420. package/dist/esm/tools/BashProgrammaticToolCalling.mjs.map +1 -1
  421. package/dist/esm/tools/Calculator.mjs +34 -36
  422. package/dist/esm/tools/Calculator.mjs.map +1 -1
  423. package/dist/esm/tools/CodeExecutor.mjs +123 -164
  424. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  425. package/dist/esm/tools/CodeSessionFileSummary.mjs +36 -44
  426. package/dist/esm/tools/CodeSessionFileSummary.mjs.map +1 -1
  427. package/dist/esm/tools/ProgrammaticToolCalling.mjs +454 -644
  428. package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
  429. package/dist/esm/tools/ReadFile.mjs +17 -18
  430. package/dist/esm/tools/ReadFile.mjs.map +1 -1
  431. package/dist/esm/tools/SkillTool.mjs +26 -25
  432. package/dist/esm/tools/SkillTool.mjs.map +1 -1
  433. package/dist/esm/tools/SubagentTool.mjs +59 -59
  434. package/dist/esm/tools/SubagentTool.mjs.map +1 -1
  435. package/dist/esm/tools/ToolNode.mjs +2107 -2684
  436. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  437. package/dist/esm/tools/ToolSearch.mjs +659 -804
  438. package/dist/esm/tools/ToolSearch.mjs.map +1 -1
  439. package/dist/esm/tools/cloudflare/CloudflareBridgeRuntime.mjs +248 -338
  440. package/dist/esm/tools/cloudflare/CloudflareBridgeRuntime.mjs.map +1 -1
  441. package/dist/esm/tools/cloudflare/CloudflareProgrammaticToolCalling.mjs +170 -195
  442. package/dist/esm/tools/cloudflare/CloudflareProgrammaticToolCalling.mjs.map +1 -1
  443. package/dist/esm/tools/cloudflare/CloudflareSandboxExecutionEngine.mjs +424 -517
  444. package/dist/esm/tools/cloudflare/CloudflareSandboxExecutionEngine.mjs.map +1 -1
  445. package/dist/esm/tools/cloudflare/CloudflareSandboxTools.mjs +91 -122
  446. package/dist/esm/tools/cloudflare/CloudflareSandboxTools.mjs.map +1 -1
  447. package/dist/esm/tools/cloudflare/index.mjs +5 -0
  448. package/dist/esm/tools/eagerEventExecution.mjs +75 -96
  449. package/dist/esm/tools/eagerEventExecution.mjs.map +1 -1
  450. package/dist/esm/tools/handlers.mjs +200 -260
  451. package/dist/esm/tools/handlers.mjs.map +1 -1
  452. package/dist/esm/tools/local/CompileCheckTool.mjs +150 -210
  453. package/dist/esm/tools/local/CompileCheckTool.mjs.map +1 -1
  454. package/dist/esm/tools/local/FileCheckpointer.mjs +77 -83
  455. package/dist/esm/tools/local/FileCheckpointer.mjs.map +1 -1
  456. package/dist/esm/tools/local/LocalCodingTools.mjs +760 -1017
  457. package/dist/esm/tools/local/LocalCodingTools.mjs.map +1 -1
  458. package/dist/esm/tools/local/LocalExecutionEngine.mjs +663 -936
  459. package/dist/esm/tools/local/LocalExecutionEngine.mjs.map +1 -1
  460. package/dist/esm/tools/local/LocalExecutionTools.mjs +49 -90
  461. package/dist/esm/tools/local/LocalExecutionTools.mjs.map +1 -1
  462. package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs +283 -349
  463. package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs.map +1 -1
  464. package/dist/esm/tools/local/attachments.mjs +108 -163
  465. package/dist/esm/tools/local/attachments.mjs.map +1 -1
  466. package/dist/esm/tools/local/bashAst.mjs +99 -111
  467. package/dist/esm/tools/local/bashAst.mjs.map +1 -1
  468. package/dist/esm/tools/local/editStrategies.mjs +126 -167
  469. package/dist/esm/tools/local/editStrategies.mjs.map +1 -1
  470. package/dist/esm/tools/local/index.mjs +13 -0
  471. package/dist/esm/tools/local/resolveLocalExecutionTools.mjs +136 -216
  472. package/dist/esm/tools/local/resolveLocalExecutionTools.mjs.map +1 -1
  473. package/dist/esm/tools/local/syntaxCheck.mjs +138 -155
  474. package/dist/esm/tools/local/syntaxCheck.mjs.map +1 -1
  475. package/dist/esm/tools/local/textEncoding.mjs +25 -21
  476. package/dist/esm/tools/local/textEncoding.mjs.map +1 -1
  477. package/dist/esm/tools/local/workspaceFS.mjs +38 -44
  478. package/dist/esm/tools/local/workspaceFS.mjs.map +1 -1
  479. package/dist/esm/tools/ptcTimeout.mjs +27 -42
  480. package/dist/esm/tools/ptcTimeout.mjs.map +1 -1
  481. package/dist/esm/tools/schema.mjs +24 -21
  482. package/dist/esm/tools/schema.mjs.map +1 -1
  483. package/dist/esm/tools/search/anthropic.mjs +24 -31
  484. package/dist/esm/tools/search/anthropic.mjs.map +1 -1
  485. package/dist/esm/tools/search/content.mjs +93 -116
  486. package/dist/esm/tools/search/content.mjs.map +1 -1
  487. package/dist/esm/tools/search/firecrawl.mjs +139 -169
  488. package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
  489. package/dist/esm/tools/search/format.mjs +128 -194
  490. package/dist/esm/tools/search/format.mjs.map +1 -1
  491. package/dist/esm/tools/search/highlights.mjs +165 -230
  492. package/dist/esm/tools/search/highlights.mjs.map +1 -1
  493. package/dist/esm/tools/search/index.mjs +3 -0
  494. package/dist/esm/tools/search/rerankers.mjs +149 -168
  495. package/dist/esm/tools/search/rerankers.mjs.map +1 -1
  496. package/dist/esm/tools/search/schema.mjs +39 -37
  497. package/dist/esm/tools/search/schema.mjs.map +1 -1
  498. package/dist/esm/tools/search/search.mjs +426 -528
  499. package/dist/esm/tools/search/search.mjs.map +1 -1
  500. package/dist/esm/tools/search/serper-scraper.mjs +104 -124
  501. package/dist/esm/tools/search/serper-scraper.mjs.map +1 -1
  502. package/dist/esm/tools/search/tavily-scraper.mjs +127 -178
  503. package/dist/esm/tools/search/tavily-scraper.mjs.map +1 -1
  504. package/dist/esm/tools/search/tavily-search.mjs +293 -357
  505. package/dist/esm/tools/search/tavily-search.mjs.map +1 -1
  506. package/dist/esm/tools/search/tool.mjs +259 -297
  507. package/dist/esm/tools/search/tool.mjs.map +1 -1
  508. package/dist/esm/tools/search/utils.mjs +74 -115
  509. package/dist/esm/tools/search/utils.mjs.map +1 -1
  510. package/dist/esm/tools/skillCatalog.mjs +54 -70
  511. package/dist/esm/tools/skillCatalog.mjs.map +1 -1
  512. package/dist/esm/tools/streamedToolCallSeals.mjs +19 -31
  513. package/dist/esm/tools/streamedToolCallSeals.mjs.map +1 -1
  514. package/dist/esm/tools/subagent/SubagentExecutor.mjs +612 -768
  515. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  516. package/dist/esm/tools/subagent/index.mjs +2 -0
  517. package/dist/esm/tools/toolOutputReferences.mjs +523 -624
  518. package/dist/esm/tools/toolOutputReferences.mjs.map +1 -1
  519. package/dist/esm/utils/callbacks.mjs +11 -19
  520. package/dist/esm/utils/callbacks.mjs.map +1 -1
  521. package/dist/esm/utils/errors.mjs +70 -93
  522. package/dist/esm/utils/errors.mjs.map +1 -1
  523. package/dist/esm/utils/events.mjs +32 -40
  524. package/dist/esm/utils/events.mjs.map +1 -1
  525. package/dist/esm/utils/graph.mjs +8 -10
  526. package/dist/esm/utils/graph.mjs.map +1 -1
  527. package/dist/esm/utils/handlers.mjs +60 -80
  528. package/dist/esm/utils/handlers.mjs.map +1 -1
  529. package/dist/esm/utils/index.mjs +10 -0
  530. package/dist/esm/utils/llm.mjs +19 -25
  531. package/dist/esm/utils/llm.mjs.map +1 -1
  532. package/dist/esm/utils/misc.mjs +30 -44
  533. package/dist/esm/utils/misc.mjs.map +1 -1
  534. package/dist/esm/utils/run.mjs +50 -64
  535. package/dist/esm/utils/run.mjs.map +1 -1
  536. package/dist/esm/utils/schema.mjs +11 -17
  537. package/dist/esm/utils/schema.mjs.map +1 -1
  538. package/dist/esm/utils/title.mjs +71 -104
  539. package/dist/esm/utils/title.mjs.map +1 -1
  540. package/dist/esm/utils/tokens.mjs +186 -281
  541. package/dist/esm/utils/tokens.mjs.map +1 -1
  542. package/dist/esm/utils/truncation.mjs +95 -112
  543. package/dist/esm/utils/truncation.mjs.map +1 -1
  544. package/dist/types/tools/search/tool.d.ts +17 -0
  545. package/dist/types/tools/search/types.d.ts +4 -0
  546. package/package.json +4 -10
  547. package/src/tools/search/highlights.ts +9 -1
  548. package/src/tools/search/search.ts +41 -3
  549. package/src/tools/search/source-processing.test.ts +373 -0
  550. package/src/tools/search/tool.ts +22 -2
  551. package/src/tools/search/types.ts +4 -0
  552. package/dist/cjs/langchain/google-common.cjs.map +0 -1
  553. package/dist/cjs/langchain/index.cjs.map +0 -1
  554. package/dist/cjs/langchain/language_models/chat_models.cjs.map +0 -1
  555. package/dist/cjs/langchain/messages/tool.cjs.map +0 -1
  556. package/dist/cjs/langchain/messages.cjs.map +0 -1
  557. package/dist/cjs/langchain/openai.cjs.map +0 -1
  558. package/dist/cjs/langchain/prompts.cjs.map +0 -1
  559. package/dist/cjs/langchain/runnables.cjs.map +0 -1
  560. package/dist/cjs/langchain/tools.cjs.map +0 -1
  561. package/dist/cjs/langchain/utils/env.cjs.map +0 -1
  562. package/dist/cjs/main.cjs.map +0 -1
  563. package/dist/esm/langchain/google-common.mjs.map +0 -1
  564. package/dist/esm/langchain/index.mjs.map +0 -1
  565. package/dist/esm/langchain/language_models/chat_models.mjs.map +0 -1
  566. package/dist/esm/langchain/messages/tool.mjs.map +0 -1
  567. package/dist/esm/langchain/messages.mjs.map +0 -1
  568. package/dist/esm/langchain/openai.mjs.map +0 -1
  569. package/dist/esm/langchain/prompts.mjs.map +0 -1
  570. package/dist/esm/langchain/runnables.mjs.map +0 -1
  571. package/dist/esm/langchain/tools.mjs.map +0 -1
  572. package/dist/esm/langchain/utils/env.mjs.map +0 -1
  573. package/dist/esm/main.mjs.map +0 -1
@@ -1,1572 +1,1108 @@
1
- 'use strict';
2
-
3
- var messages = require('@langchain/core/messages');
4
- var truncation = require('../utils/truncation.cjs');
5
- var contextPruningSettings = require('./contextPruningSettings.cjs');
6
- var _enum = require('../common/enum.cjs');
7
- var contextPruning = require('./contextPruning.cjs');
8
- var langchain = require('./langchain.cjs');
9
-
1
+ require("../common/enum.cjs");
2
+ require("../common/index.cjs");
3
+ const require_langchain = require("./langchain.cjs");
4
+ const require_truncation = require("../utils/truncation.cjs");
5
+ const require_contextPruningSettings = require("./contextPruningSettings.cjs");
6
+ const require_contextPruning = require("./contextPruning.cjs");
7
+ let _langchain_core_messages = require("@langchain/core/messages");
8
+ //#region src/messages/prune.ts
10
9
  function sumTokenCounts(tokenMap, count) {
11
- let total = 0;
12
- for (let i = 0; i < count; i++) {
13
- total += tokenMap[i] ?? 0;
14
- }
15
- return total;
10
+ let total = 0;
11
+ for (let i = 0; i < count; i++) total += tokenMap[i] ?? 0;
12
+ return total;
16
13
  }
17
14
  /** Default fraction of the token budget reserved as headroom (5 %). */
18
- const DEFAULT_RESERVE_RATIO = 0.05;
15
+ const DEFAULT_RESERVE_RATIO = .05;
19
16
  /** Context pressure at which observation masking and context fading activate. */
20
- const PRESSURE_THRESHOLD_MASKING = 0.8;
17
+ const PRESSURE_THRESHOLD_MASKING = .8;
21
18
  /** Pressure band thresholds paired with budget factors for progressive context fading. */
22
19
  const PRESSURE_BANDS = [
23
- [0.99, 0.05],
24
- [0.9, 0.2],
25
- [0.85, 0.5],
26
- [0.8, 1.0],
20
+ [.99, .05],
21
+ [.9, .2],
22
+ [.85, .5],
23
+ [.8, 1]
27
24
  ];
28
25
  /** Maximum character length for masked (consumed) tool results. */
29
26
  const MASKED_RESULT_MAX_CHARS = 300;
30
27
  /** Hard cap for the originalToolContent store (~2 MB estimated from char length). */
31
- const ORIGINAL_CONTENT_MAX_CHARS = 2_000_000;
28
+ const ORIGINAL_CONTENT_MAX_CHARS = 2e6;
32
29
  /**
33
- * Evicts oldest entries from `map` (in Map-iteration / insertion order) until
34
- * the cumulative char length of remaining values fits within
35
- * `ORIGINAL_CONTENT_MAX_CHARS`. Used by the recency-window carry-over merge
36
- * path in Graph.ts to bound long-running session memory: the pruner enforces
37
- * the cap inside its own `originalToolContent` map, but a key-wise union with
38
- * recency carry-over bypasses that cap unless re-applied here.
39
- */
30
+ * Evicts oldest entries from `map` (in Map-iteration / insertion order) until
31
+ * the cumulative char length of remaining values fits within
32
+ * `ORIGINAL_CONTENT_MAX_CHARS`. Used by the recency-window carry-over merge
33
+ * path in Graph.ts to bound long-running session memory: the pruner enforces
34
+ * the cap inside its own `originalToolContent` map, but a key-wise union with
35
+ * recency carry-over bypasses that cap unless re-applied here.
36
+ */
40
37
  function enforceOriginalContentCap(map) {
41
- let total = 0;
42
- for (const v of map.values()) {
43
- total += v.length;
44
- }
45
- while (total > ORIGINAL_CONTENT_MAX_CHARS && map.size > 0) {
46
- const oldest = map.keys().next();
47
- if (oldest.done === true) {
48
- break;
49
- }
50
- const removed = map.get(oldest.value);
51
- if (removed != null) {
52
- total -= removed.length;
53
- }
54
- map.delete(oldest.value);
55
- }
38
+ let total = 0;
39
+ for (const v of map.values()) total += v.length;
40
+ while (total > 2e6 && map.size > 0) {
41
+ const oldest = map.keys().next();
42
+ if (oldest.done === true) break;
43
+ const removed = map.get(oldest.value);
44
+ if (removed != null) total -= removed.length;
45
+ map.delete(oldest.value);
46
+ }
56
47
  }
57
48
  /** Minimum cumulative calibration ratio — provider can't count fewer tokens
58
- * than our raw estimate (within reason). Prevents divide-by-zero edge cases. */
59
- const CALIBRATION_RATIO_MIN = 0.5;
49
+ * than our raw estimate (within reason). Prevents divide-by-zero edge cases. */
50
+ const CALIBRATION_RATIO_MIN = .5;
60
51
  /** Maximum cumulative calibration ratio — sanity cap for the running ratio. */
61
52
  const CALIBRATION_RATIO_MAX = 5;
62
53
  function getToolCallIds(message) {
63
- if (message.getType() !== 'ai') {
64
- return new Set();
65
- }
66
- const ids = new Set();
67
- const aiMessage = message;
68
- for (const toolCall of aiMessage.tool_calls ?? []) {
69
- if (typeof toolCall.id === 'string' && toolCall.id.length > 0) {
70
- ids.add(toolCall.id);
71
- }
72
- }
73
- if (Array.isArray(aiMessage.content)) {
74
- for (const part of aiMessage.content) {
75
- if (typeof part !== 'object') {
76
- continue;
77
- }
78
- const record = part;
79
- if ((record.type === 'tool_use' || record.type === 'tool_call') &&
80
- typeof record.id === 'string' &&
81
- record.id.length > 0) {
82
- ids.add(record.id);
83
- }
84
- }
85
- }
86
- return ids;
54
+ if (message.getType() !== "ai") return /* @__PURE__ */ new Set();
55
+ const ids = /* @__PURE__ */ new Set();
56
+ const aiMessage = message;
57
+ for (const toolCall of aiMessage.tool_calls ?? []) if (typeof toolCall.id === "string" && toolCall.id.length > 0) ids.add(toolCall.id);
58
+ if (Array.isArray(aiMessage.content)) for (const part of aiMessage.content) {
59
+ if (typeof part !== "object") continue;
60
+ const record = part;
61
+ if ((record.type === "tool_use" || record.type === "tool_call") && typeof record.id === "string" && record.id.length > 0) ids.add(record.id);
62
+ }
63
+ return ids;
87
64
  }
88
65
  function getToolResultId(message) {
89
- if (message.getType() !== 'tool') {
90
- return null;
91
- }
92
- const toolMessage = message;
93
- if (typeof toolMessage.tool_call_id === 'string' &&
94
- toolMessage.tool_call_id.length > 0) {
95
- return toolMessage.tool_call_id;
96
- }
97
- if (typeof toolMessage.toolCallId === 'string' &&
98
- toolMessage.toolCallId.length > 0) {
99
- return toolMessage.toolCallId;
100
- }
101
- return null;
66
+ if (message.getType() !== "tool") return null;
67
+ const toolMessage = message;
68
+ if (typeof toolMessage.tool_call_id === "string" && toolMessage.tool_call_id.length > 0) return toolMessage.tool_call_id;
69
+ if (typeof toolMessage.toolCallId === "string" && toolMessage.toolCallId.length > 0) return toolMessage.toolCallId;
70
+ return null;
102
71
  }
103
- function resolveTokenCountForMessage({ message, messageIndexMap, tokenCounter, indexTokenCountMap, }) {
104
- const originalIndex = messageIndexMap.get(message) ?? -1;
105
- if (originalIndex > -1 && indexTokenCountMap[originalIndex] != null) {
106
- return indexTokenCountMap[originalIndex];
107
- }
108
- return tokenCounter(message);
72
+ function resolveTokenCountForMessage({ message, messageIndexMap, tokenCounter, indexTokenCountMap }) {
73
+ const originalIndex = messageIndexMap.get(message) ?? -1;
74
+ if (originalIndex > -1 && indexTokenCountMap[originalIndex] != null) return indexTokenCountMap[originalIndex];
75
+ return tokenCounter(message);
109
76
  }
110
- function repairOrphanedToolMessages({ context, allMessages, tokenCounter, indexTokenCountMap, }) {
111
- const messageIndexMap = new Map();
112
- for (let i = 0; i < allMessages.length; i++) {
113
- messageIndexMap.set(allMessages[i], i);
114
- }
115
- const validToolCallIds = new Set();
116
- const presentToolResultIds = new Set();
117
- for (const message of context) {
118
- for (const id of getToolCallIds(message)) {
119
- validToolCallIds.add(id);
120
- }
121
- const resultId = getToolResultId(message);
122
- if (resultId != null) {
123
- presentToolResultIds.add(resultId);
124
- }
125
- }
126
- let reclaimedTokens = 0;
127
- let droppedOrphanCount = 0;
128
- const repairedContext = [];
129
- const droppedMessages = [];
130
- for (const message of context) {
131
- if (message.getType() === 'tool') {
132
- const toolResultId = getToolResultId(message);
133
- if (toolResultId == null || !validToolCallIds.has(toolResultId)) {
134
- droppedOrphanCount += 1;
135
- reclaimedTokens += resolveTokenCountForMessage({
136
- message,
137
- tokenCounter,
138
- messageIndexMap,
139
- indexTokenCountMap,
140
- });
141
- droppedMessages.push(message);
142
- continue;
143
- }
144
- repairedContext.push(message);
145
- continue;
146
- }
147
- if (message.getType() === 'ai' && message instanceof messages.AIMessage) {
148
- const toolCallIds = getToolCallIds(message);
149
- if (toolCallIds.size > 0) {
150
- let hasOrphanToolCalls = false;
151
- for (const id of toolCallIds) {
152
- if (!presentToolResultIds.has(id)) {
153
- hasOrphanToolCalls = true;
154
- break;
155
- }
156
- }
157
- if (hasOrphanToolCalls) {
158
- const originalTokens = resolveTokenCountForMessage({
159
- message,
160
- messageIndexMap,
161
- tokenCounter,
162
- indexTokenCountMap,
163
- });
164
- const stripped = stripOrphanToolUseBlocks(message, presentToolResultIds);
165
- if (stripped != null) {
166
- const strippedTokens = tokenCounter(stripped);
167
- reclaimedTokens += originalTokens - strippedTokens;
168
- repairedContext.push(stripped);
169
- }
170
- else {
171
- droppedOrphanCount += 1;
172
- reclaimedTokens += originalTokens;
173
- droppedMessages.push(message);
174
- }
175
- continue;
176
- }
177
- }
178
- }
179
- repairedContext.push(message);
180
- }
181
- return {
182
- context: repairedContext,
183
- reclaimedTokens,
184
- droppedOrphanCount,
185
- droppedMessages,
186
- };
77
+ function repairOrphanedToolMessages({ context, allMessages, tokenCounter, indexTokenCountMap }) {
78
+ const messageIndexMap = /* @__PURE__ */ new Map();
79
+ for (let i = 0; i < allMessages.length; i++) messageIndexMap.set(allMessages[i], i);
80
+ const validToolCallIds = /* @__PURE__ */ new Set();
81
+ const presentToolResultIds = /* @__PURE__ */ new Set();
82
+ for (const message of context) {
83
+ for (const id of getToolCallIds(message)) validToolCallIds.add(id);
84
+ const resultId = getToolResultId(message);
85
+ if (resultId != null) presentToolResultIds.add(resultId);
86
+ }
87
+ let reclaimedTokens = 0;
88
+ let droppedOrphanCount = 0;
89
+ const repairedContext = [];
90
+ const droppedMessages = [];
91
+ for (const message of context) {
92
+ if (message.getType() === "tool") {
93
+ const toolResultId = getToolResultId(message);
94
+ if (toolResultId == null || !validToolCallIds.has(toolResultId)) {
95
+ droppedOrphanCount += 1;
96
+ reclaimedTokens += resolveTokenCountForMessage({
97
+ message,
98
+ tokenCounter,
99
+ messageIndexMap,
100
+ indexTokenCountMap
101
+ });
102
+ droppedMessages.push(message);
103
+ continue;
104
+ }
105
+ repairedContext.push(message);
106
+ continue;
107
+ }
108
+ if (message.getType() === "ai" && message instanceof _langchain_core_messages.AIMessage) {
109
+ const toolCallIds = getToolCallIds(message);
110
+ if (toolCallIds.size > 0) {
111
+ let hasOrphanToolCalls = false;
112
+ for (const id of toolCallIds) if (!presentToolResultIds.has(id)) {
113
+ hasOrphanToolCalls = true;
114
+ break;
115
+ }
116
+ if (hasOrphanToolCalls) {
117
+ const originalTokens = resolveTokenCountForMessage({
118
+ message,
119
+ messageIndexMap,
120
+ tokenCounter,
121
+ indexTokenCountMap
122
+ });
123
+ const stripped = stripOrphanToolUseBlocks(message, presentToolResultIds);
124
+ if (stripped != null) {
125
+ const strippedTokens = tokenCounter(stripped);
126
+ reclaimedTokens += originalTokens - strippedTokens;
127
+ repairedContext.push(stripped);
128
+ } else {
129
+ droppedOrphanCount += 1;
130
+ reclaimedTokens += originalTokens;
131
+ droppedMessages.push(message);
132
+ }
133
+ continue;
134
+ }
135
+ }
136
+ }
137
+ repairedContext.push(message);
138
+ }
139
+ return {
140
+ context: repairedContext,
141
+ reclaimedTokens,
142
+ droppedOrphanCount,
143
+ droppedMessages
144
+ };
187
145
  }
188
146
  /**
189
- * Strips tool_use content blocks and tool_calls entries from an AI message
190
- * when their corresponding ToolMessages are not in the context.
191
- * Returns null if the message has no content left after stripping.
192
- */
147
+ * Strips tool_use content blocks and tool_calls entries from an AI message
148
+ * when their corresponding ToolMessages are not in the context.
149
+ * Returns null if the message has no content left after stripping.
150
+ */
193
151
  function stripOrphanToolUseBlocks(message, presentToolResultIds) {
194
- const keptToolCalls = (message.tool_calls ?? []).filter((tc) => typeof tc.id === 'string' && presentToolResultIds.has(tc.id));
195
- let keptContent;
196
- if (Array.isArray(message.content)) {
197
- const filtered = message.content.filter((block) => {
198
- if (typeof block !== 'object') {
199
- return true;
200
- }
201
- const record = block;
202
- if ((record.type === 'tool_use' || record.type === 'tool_call') &&
203
- typeof record.id === 'string') {
204
- return presentToolResultIds.has(record.id);
205
- }
206
- return true;
207
- });
208
- if (filtered.length === 0) {
209
- return null;
210
- }
211
- keptContent = filtered;
212
- }
213
- else {
214
- keptContent = message.content;
215
- }
216
- return new messages.AIMessage({
217
- ...message,
218
- content: langchain.toLangChainContent(keptContent),
219
- tool_calls: keptToolCalls.length > 0 ? keptToolCalls : undefined,
220
- });
152
+ const keptToolCalls = (message.tool_calls ?? []).filter((tc) => typeof tc.id === "string" && presentToolResultIds.has(tc.id));
153
+ let keptContent;
154
+ if (Array.isArray(message.content)) {
155
+ const filtered = message.content.filter((block) => {
156
+ if (typeof block !== "object") return true;
157
+ const record = block;
158
+ if ((record.type === "tool_use" || record.type === "tool_call") && typeof record.id === "string") return presentToolResultIds.has(record.id);
159
+ return true;
160
+ });
161
+ if (filtered.length === 0) return null;
162
+ keptContent = filtered;
163
+ } else keptContent = message.content;
164
+ return new _langchain_core_messages.AIMessage({
165
+ ...message,
166
+ content: require_langchain.toLangChainContent(keptContent),
167
+ tool_calls: keptToolCalls.length > 0 ? keptToolCalls : void 0
168
+ });
221
169
  }
222
170
  /**
223
- * Lightweight structural cleanup: strips orphan tool_use blocks from AI messages
224
- * and drops orphan ToolMessages whose AI counterpart is missing.
225
- *
226
- * Unlike `repairOrphanedToolMessages`, this does NOT track tokens — it is
227
- * intended as a final safety net in Graph.ts right before model invocation
228
- * to prevent Anthropic/Bedrock structural validation errors.
229
- *
230
- * Uses duck-typing instead of `getType()` because messages at this stage
231
- * may be plain objects (from LangGraph state serialization) rather than
232
- * proper BaseMessage class instances.
233
- *
234
- * Includes a fast-path: if every tool_call has a matching tool_result and
235
- * vice-versa, the original array is returned immediately with zero allocation.
236
- */
237
- function sanitizeOrphanToolBlocks(messages$1) {
238
- const allToolCallIds = new Set();
239
- const allToolResultIds = new Set();
240
- for (const msg of messages$1) {
241
- const msgAny = msg;
242
- const toolCalls = msgAny.tool_calls;
243
- if (Array.isArray(toolCalls)) {
244
- for (const tc of toolCalls) {
245
- if (typeof tc.id === 'string' &&
246
- tc.id.length > 0 &&
247
- !tc.id.startsWith(_enum.Constants.ANTHROPIC_SERVER_TOOL_PREFIX)) {
248
- allToolCallIds.add(tc.id);
249
- }
250
- }
251
- }
252
- if (Array.isArray(msgAny.content)) {
253
- for (const block of msgAny.content) {
254
- if (typeof block === 'object' &&
255
- (block.type === 'tool_use' || block.type === 'tool_call') &&
256
- typeof block.id === 'string' &&
257
- !block.id.startsWith(_enum.Constants.ANTHROPIC_SERVER_TOOL_PREFIX)) {
258
- allToolCallIds.add(block.id);
259
- }
260
- }
261
- }
262
- const toolCallId = msgAny.tool_call_id;
263
- if (typeof toolCallId === 'string' && toolCallId.length > 0) {
264
- allToolResultIds.add(toolCallId);
265
- }
266
- }
267
- let hasOrphans = false;
268
- for (const id of allToolCallIds) {
269
- if (!allToolResultIds.has(id)) {
270
- hasOrphans = true;
271
- break;
272
- }
273
- }
274
- if (!hasOrphans) {
275
- for (const id of allToolResultIds) {
276
- if (!allToolCallIds.has(id)) {
277
- hasOrphans = true;
278
- break;
279
- }
280
- }
281
- }
282
- if (!hasOrphans) {
283
- return messages$1;
284
- }
285
- const result = [];
286
- const strippedAiIndices = new Set();
287
- for (const msg of messages$1) {
288
- const msgAny = msg;
289
- const msgType = typeof msg.getType === 'function'
290
- ? msg.getType()
291
- : (msgAny.role ??
292
- msgAny._type);
293
- const toolCallId = msgAny.tool_call_id;
294
- if ((msgType === 'tool' || msg instanceof messages.ToolMessage) &&
295
- typeof toolCallId === 'string' &&
296
- !allToolCallIds.has(toolCallId)) {
297
- continue;
298
- }
299
- const toolCalls = msgAny.tool_calls;
300
- if ((msgType === 'ai' ||
301
- msgType === 'assistant' ||
302
- msg instanceof messages.AIMessage) &&
303
- Array.isArray(toolCalls) &&
304
- toolCalls.length > 0) {
305
- const hasOrphanCalls = toolCalls.some((tc) => typeof tc.id === 'string' && !allToolResultIds.has(tc.id));
306
- if (hasOrphanCalls) {
307
- if (msg instanceof messages.AIMessage) {
308
- const stripped = stripOrphanToolUseBlocks(msg, allToolResultIds);
309
- if (stripped != null) {
310
- strippedAiIndices.add(result.length);
311
- result.push(stripped);
312
- }
313
- continue;
314
- }
315
- const keptToolCalls = toolCalls.filter((tc) => typeof tc.id === 'string' && allToolResultIds.has(tc.id));
316
- const keptContent = Array.isArray(msgAny.content)
317
- ? msgAny.content.filter((block) => {
318
- if (typeof block !== 'object')
319
- return true;
320
- if ((block.type === 'tool_use' || block.type === 'tool_call') &&
321
- typeof block.id === 'string') {
322
- return allToolResultIds.has(block.id);
323
- }
324
- return true;
325
- })
326
- : msgAny.content;
327
- if (keptToolCalls.length === 0 &&
328
- Array.isArray(keptContent) &&
329
- keptContent.length === 0) {
330
- continue;
331
- }
332
- strippedAiIndices.add(result.length);
333
- const patched = Object.create(Object.getPrototypeOf(msg), Object.getOwnPropertyDescriptors(msg));
334
- patched.tool_calls = keptToolCalls.length > 0 ? keptToolCalls : [];
335
- patched.content = keptContent;
336
- result.push(patched);
337
- continue;
338
- }
339
- }
340
- result.push(msg);
341
- }
342
- // Bedrock/Anthropic require the conversation to end with a user message;
343
- // a stripped AI message (tool_use removed) represents a dead-end exchange.
344
- while (result.length > 0 && strippedAiIndices.has(result.length - 1)) {
345
- result.pop();
346
- }
347
- return result;
171
+ * Lightweight structural cleanup: strips orphan tool_use blocks from AI messages
172
+ * and drops orphan ToolMessages whose AI counterpart is missing.
173
+ *
174
+ * Unlike `repairOrphanedToolMessages`, this does NOT track tokens — it is
175
+ * intended as a final safety net in Graph.ts right before model invocation
176
+ * to prevent Anthropic/Bedrock structural validation errors.
177
+ *
178
+ * Uses duck-typing instead of `getType()` because messages at this stage
179
+ * may be plain objects (from LangGraph state serialization) rather than
180
+ * proper BaseMessage class instances.
181
+ *
182
+ * Includes a fast-path: if every tool_call has a matching tool_result and
183
+ * vice-versa, the original array is returned immediately with zero allocation.
184
+ */
185
+ function sanitizeOrphanToolBlocks(messages) {
186
+ const allToolCallIds = /* @__PURE__ */ new Set();
187
+ const allToolResultIds = /* @__PURE__ */ new Set();
188
+ for (const msg of messages) {
189
+ const msgAny = msg;
190
+ const toolCalls = msgAny.tool_calls;
191
+ if (Array.isArray(toolCalls)) {
192
+ for (const tc of toolCalls) if (typeof tc.id === "string" && tc.id.length > 0 && !tc.id.startsWith("srvtoolu_")) allToolCallIds.add(tc.id);
193
+ }
194
+ if (Array.isArray(msgAny.content)) {
195
+ for (const block of msgAny.content) if (typeof block === "object" && (block.type === "tool_use" || block.type === "tool_call") && typeof block.id === "string" && !block.id.startsWith("srvtoolu_")) allToolCallIds.add(block.id);
196
+ }
197
+ const toolCallId = msgAny.tool_call_id;
198
+ if (typeof toolCallId === "string" && toolCallId.length > 0) allToolResultIds.add(toolCallId);
199
+ }
200
+ let hasOrphans = false;
201
+ for (const id of allToolCallIds) if (!allToolResultIds.has(id)) {
202
+ hasOrphans = true;
203
+ break;
204
+ }
205
+ if (!hasOrphans) {
206
+ for (const id of allToolResultIds) if (!allToolCallIds.has(id)) {
207
+ hasOrphans = true;
208
+ break;
209
+ }
210
+ }
211
+ if (!hasOrphans) return messages;
212
+ const result = [];
213
+ const strippedAiIndices = /* @__PURE__ */ new Set();
214
+ for (const msg of messages) {
215
+ const msgAny = msg;
216
+ const msgType = typeof msg.getType === "function" ? msg.getType() : msgAny.role ?? msgAny._type;
217
+ const toolCallId = msgAny.tool_call_id;
218
+ if ((msgType === "tool" || msg instanceof _langchain_core_messages.ToolMessage) && typeof toolCallId === "string" && !allToolCallIds.has(toolCallId)) continue;
219
+ const toolCalls = msgAny.tool_calls;
220
+ if ((msgType === "ai" || msgType === "assistant" || msg instanceof _langchain_core_messages.AIMessage) && Array.isArray(toolCalls) && toolCalls.length > 0) {
221
+ if (toolCalls.some((tc) => typeof tc.id === "string" && !allToolResultIds.has(tc.id))) {
222
+ if (msg instanceof _langchain_core_messages.AIMessage) {
223
+ const stripped = stripOrphanToolUseBlocks(msg, allToolResultIds);
224
+ if (stripped != null) {
225
+ strippedAiIndices.add(result.length);
226
+ result.push(stripped);
227
+ }
228
+ continue;
229
+ }
230
+ const keptToolCalls = toolCalls.filter((tc) => typeof tc.id === "string" && allToolResultIds.has(tc.id));
231
+ const keptContent = Array.isArray(msgAny.content) ? msgAny.content.filter((block) => {
232
+ if (typeof block !== "object") return true;
233
+ if ((block.type === "tool_use" || block.type === "tool_call") && typeof block.id === "string") return allToolResultIds.has(block.id);
234
+ return true;
235
+ }) : msgAny.content;
236
+ if (keptToolCalls.length === 0 && Array.isArray(keptContent) && keptContent.length === 0) continue;
237
+ strippedAiIndices.add(result.length);
238
+ const patched = Object.create(Object.getPrototypeOf(msg), Object.getOwnPropertyDescriptors(msg));
239
+ patched.tool_calls = keptToolCalls.length > 0 ? keptToolCalls : [];
240
+ patched.content = keptContent;
241
+ result.push(patched);
242
+ continue;
243
+ }
244
+ }
245
+ result.push(msg);
246
+ }
247
+ while (result.length > 0 && strippedAiIndices.has(result.length - 1)) result.pop();
248
+ return result;
348
249
  }
349
250
  /**
350
- * Truncates an oversized tool_use `input` field using head+tail, preserving
351
- * it as a valid JSON object. Head gets ~70%, tail gets ~30% so the model
352
- * sees both the beginning (what was called) and end (closing structure/values).
353
- * Falls back to head-only when the budget is too small for a meaningful tail.
354
- */
251
+ * Truncates an oversized tool_use `input` field using head+tail, preserving
252
+ * it as a valid JSON object. Head gets ~70%, tail gets ~30% so the model
253
+ * sees both the beginning (what was called) and end (closing structure/values).
254
+ * Falls back to head-only when the budget is too small for a meaningful tail.
255
+ */
355
256
  function isIndexInContext(arrayA, arrayB, targetIndex) {
356
- const startingIndexInA = arrayA.length - arrayB.length;
357
- return targetIndex >= startingIndexInA;
257
+ return targetIndex >= arrayA.length - arrayB.length;
358
258
  }
359
259
  function addThinkingBlock(message, thinkingBlock) {
360
- const content = Array.isArray(message.content)
361
- ? message.content
362
- : [
363
- {
364
- type: _enum.ContentTypes.TEXT,
365
- text: message.content,
366
- },
367
- ];
368
- /** Edge case, the message already has the thinking block */
369
- if (content[0]?.type === thinkingBlock.type) {
370
- return message;
371
- }
372
- content.unshift(thinkingBlock);
373
- return new messages.AIMessage({
374
- ...message,
375
- content: langchain.toLangChainContent(content),
376
- });
260
+ const content = Array.isArray(message.content) ? message.content : [{
261
+ type: "text",
262
+ text: message.content
263
+ }];
264
+ /** Edge case, the message already has the thinking block */
265
+ if (content[0]?.type === thinkingBlock.type) return message;
266
+ content.unshift(thinkingBlock);
267
+ return new _langchain_core_messages.AIMessage({
268
+ ...message,
269
+ content: require_langchain.toLangChainContent(content)
270
+ });
377
271
  }
378
272
  /**
379
- * Calculates the total tokens from a single usage object
380
- *
381
- * @param usage The usage metadata object containing token information
382
- * @returns An object containing the total input and output tokens
383
- */
273
+ * Calculates the total tokens from a single usage object
274
+ *
275
+ * @param usage The usage metadata object containing token information
276
+ * @returns An object containing the total input and output tokens
277
+ */
384
278
  function calculateTotalTokens(usage) {
385
- const baseInputTokens = Number(usage.input_tokens) || 0;
386
- const cacheCreation = Number(usage.input_token_details?.cache_creation) || 0;
387
- const cacheRead = Number(usage.input_token_details?.cache_read) || 0;
388
- const totalOutputTokens = Number(usage.output_tokens) || 0;
389
- const cacheSum = cacheCreation + cacheRead;
390
- // Anthropic: input_tokens excludes cache, cache_read can be much larger than input_tokens.
391
- // OpenAI: input_tokens includes cache, cache_read is always <= input_tokens.
392
- const cacheIsAdditive = cacheSum > 0 && cacheSum > baseInputTokens;
393
- const totalInputTokens = cacheIsAdditive
394
- ? baseInputTokens + cacheSum
395
- : baseInputTokens;
396
- return {
397
- input_tokens: totalInputTokens,
398
- output_tokens: totalOutputTokens,
399
- total_tokens: totalInputTokens + totalOutputTokens,
400
- };
279
+ const baseInputTokens = Number(usage.input_tokens) || 0;
280
+ const cacheCreation = Number(usage.input_token_details?.cache_creation) || 0;
281
+ const cacheRead = Number(usage.input_token_details?.cache_read) || 0;
282
+ const totalOutputTokens = Number(usage.output_tokens) || 0;
283
+ const cacheSum = cacheCreation + cacheRead;
284
+ const totalInputTokens = cacheSum > 0 && cacheSum > baseInputTokens ? baseInputTokens + cacheSum : baseInputTokens;
285
+ return {
286
+ input_tokens: totalInputTokens,
287
+ output_tokens: totalOutputTokens,
288
+ total_tokens: totalInputTokens + totalOutputTokens
289
+ };
401
290
  }
402
291
  /**
403
- * Locates a reasoning block in assistant content. Reasoning blocks carry
404
- * provider-specific `type` tags: Anthropic emits `thinking`, while Bedrock and
405
- * OpenAI-compatible reasoning providers (DeepSeek-R1, DashScope/Qwen-thinking)
406
- * emit `reasoning_content`. DeepSeek/Qwen route through the `THINKING` default
407
- * even though their blocks are `reasoning_content` and aren't normalized
408
- * upstream, so for the `THINKING` case we also accept `reasoning_content` — this
409
- * is what fixes issue #191.
410
- *
411
- * The broadening is intentionally one-directional. A Bedrock run
412
- * (`REASONING_CONTENT`) must NOT match an Anthropic `thinking` block: the
413
- * Bedrock input converter rejects `thinking` blocks outright
414
- * (`src/llm/bedrock/utils/message_inputs.ts`), so reattaching one to a
415
- * surviving message would make the request fail before it is sent.
416
- */
292
+ * Locates a reasoning block in assistant content. Reasoning blocks carry
293
+ * provider-specific `type` tags: Anthropic emits `thinking`, while Bedrock and
294
+ * OpenAI-compatible reasoning providers (DeepSeek-R1, DashScope/Qwen-thinking)
295
+ * emit `reasoning_content`. DeepSeek/Qwen route through the `THINKING` default
296
+ * even though their blocks are `reasoning_content` and aren't normalized
297
+ * upstream, so for the `THINKING` case we also accept `reasoning_content` — this
298
+ * is what fixes issue #191.
299
+ *
300
+ * The broadening is intentionally one-directional. A Bedrock run
301
+ * (`REASONING_CONTENT`) must NOT match an Anthropic `thinking` block: the
302
+ * Bedrock input converter rejects `thinking` blocks outright
303
+ * (`src/llm/bedrock/utils/message_inputs.ts`), so reattaching one to a
304
+ * surviving message would make the request fail before it is sent.
305
+ */
417
306
  function findReasoningBlock(content, reasoningType) {
418
- return content.find((part) => part.type === reasoningType ||
419
- (reasoningType === _enum.ContentTypes.THINKING &&
420
- part.type === _enum.ContentTypes.REASONING_CONTENT));
307
+ return content.find((part) => part.type === reasoningType || reasoningType === "thinking" && part.type === "reasoning_content");
421
308
  }
422
309
  /**
423
- * Processes an array of messages and returns a context of messages that fit within a specified token limit.
424
- * It iterates over the messages from newest to oldest, adding them to the context until the token limit is reached.
425
- *
426
- * @param options Configuration options for processing messages
427
- * @returns Object containing the message context, remaining tokens, messages not included, and summary index
428
- */
429
- function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, indexTokenCountMap, startType: _startType, thinkingEnabled, tokenCounter, thinkingStartIndex: _thinkingStartIndex = -1, reasoningType = _enum.ContentTypes.THINKING, instructionTokens: _instructionTokens = 0, }) {
430
- // Every reply is primed with <|start|>assistant<|message|>, so we
431
- // start with 3 tokens for the label after all messages have been counted.
432
- let currentTokenCount = 3;
433
- const instructions = _messages[0]?.getType() === 'system' ? _messages[0] : undefined;
434
- const instructionsTokenCount = instructions != null ? (indexTokenCountMap[0] ?? 0) : _instructionTokens;
435
- const initialContextTokens = maxContextTokens - instructionsTokenCount;
436
- let remainingContextTokens = initialContextTokens;
437
- let startType = _startType;
438
- const originalLength = _messages.length;
439
- const messages$1 = [..._messages];
440
- /**
441
- * IMPORTANT: this context array gets reversed at the end, since the latest messages get pushed first.
442
- *
443
- * This may be confusing to read, but it is done to ensure the context is in the correct order for the model.
444
- * */
445
- let context = [];
446
- let thinkingStartIndex = _thinkingStartIndex;
447
- let thinkingEndIndex = -1;
448
- let thinkingBlock;
449
- const endIndex = instructions != null ? 1 : 0;
450
- const prunedMemory = [];
451
- if (_thinkingStartIndex > -1) {
452
- const thinkingMessageContent = messages$1[_thinkingStartIndex]?.content;
453
- if (Array.isArray(thinkingMessageContent)) {
454
- thinkingBlock = findReasoningBlock(thinkingMessageContent, reasoningType);
455
- }
456
- }
457
- if (currentTokenCount < remainingContextTokens) {
458
- let currentIndex = messages$1.length;
459
- while (messages$1.length > 0 &&
460
- currentTokenCount < remainingContextTokens &&
461
- currentIndex > endIndex) {
462
- currentIndex--;
463
- if (messages$1.length === 1 && instructions) {
464
- break;
465
- }
466
- const poppedMessage = messages$1.pop();
467
- if (!poppedMessage)
468
- continue;
469
- const messageType = poppedMessage.getType();
470
- if (thinkingEnabled === true &&
471
- thinkingEndIndex === -1 &&
472
- currentIndex === originalLength - 1 &&
473
- (messageType === 'ai' || messageType === 'tool')) {
474
- thinkingEndIndex = currentIndex;
475
- }
476
- if (thinkingEndIndex > -1 &&
477
- !thinkingBlock &&
478
- thinkingStartIndex < 0 &&
479
- messageType === 'ai' &&
480
- Array.isArray(poppedMessage.content)) {
481
- thinkingBlock = findReasoningBlock(poppedMessage.content, reasoningType);
482
- thinkingStartIndex = thinkingBlock != null ? currentIndex : -1;
483
- }
484
- /**
485
- * Exited the trailing assistant/tool sequence without finding a
486
- * thinking block. Anthropic does not require Claude to emit a
487
- * thinking block before every tool call, so the absence of one is
488
- * a valid sequence clear thinkingEndIndex so the pruner does not
489
- * treat it as malformed.
490
- */
491
- if (thinkingEndIndex > -1 &&
492
- thinkingStartIndex < 0 &&
493
- !thinkingBlock &&
494
- messageType !== 'ai' &&
495
- messageType !== 'tool') {
496
- thinkingEndIndex = -1;
497
- }
498
- const tokenCount = indexTokenCountMap[currentIndex] ?? 0;
499
- if (prunedMemory.length === 0 &&
500
- currentTokenCount + tokenCount <= remainingContextTokens) {
501
- context.push(poppedMessage);
502
- currentTokenCount += tokenCount;
503
- }
504
- else {
505
- prunedMemory.push(poppedMessage);
506
- if (thinkingEndIndex > -1 && thinkingStartIndex < 0) {
507
- continue;
508
- }
509
- break;
510
- }
511
- }
512
- if (context[context.length - 1]?.getType() === 'tool') {
513
- startType = ['ai', 'human'];
514
- }
515
- if (startType != null && startType.length > 0 && context.length > 0) {
516
- let requiredTypeIndex = -1;
517
- let totalTokens = 0;
518
- for (let i = context.length - 1; i >= 0; i--) {
519
- const currentType = context[i]?.getType() ?? '';
520
- if (Array.isArray(startType)
521
- ? startType.includes(currentType)
522
- : currentType === startType) {
523
- requiredTypeIndex = i + 1;
524
- break;
525
- }
526
- const originalIndex = originalLength - 1 - i;
527
- totalTokens += indexTokenCountMap[originalIndex] ?? 0;
528
- }
529
- if (requiredTypeIndex > 0) {
530
- currentTokenCount -= totalTokens;
531
- context = context.slice(0, requiredTypeIndex);
532
- }
533
- }
534
- }
535
- if (instructions && originalLength > 0) {
536
- context.push(_messages[0]);
537
- messages$1.shift();
538
- }
539
- // The backward iteration pushed messages in reverse chronological order
540
- // (newest first). Restore correct chronological order before prepending
541
- // the remaining (older) messages so that messagesToRefine is always
542
- // ordered oldest → newest. Without this, callers that rely on
543
- // messagesToRefine order (e.g. the summarization node extracting the
544
- // latest turn) would see tool_use/tool_result pairs in the wrong order.
545
- prunedMemory.reverse();
546
- if (messages$1.length > 0) {
547
- prunedMemory.unshift(...messages$1);
548
- }
549
- remainingContextTokens -= currentTokenCount;
550
- const result = {
551
- remainingContextTokens,
552
- context: [],
553
- messagesToRefine: prunedMemory,
554
- };
555
- if (thinkingStartIndex > -1) {
556
- result.thinkingStartIndex = thinkingStartIndex;
557
- }
558
- if (prunedMemory.length === 0 ||
559
- thinkingEndIndex < 0 ||
560
- (thinkingStartIndex > -1 &&
561
- isIndexInContext(_messages, context, thinkingStartIndex))) {
562
- result.context = context.reverse();
563
- return result;
564
- }
565
- /**
566
- * A trailing reasoning sequence was detected but its block could not be
567
- * located in the surviving context. Rather than throw which permanently
568
- * bricks the conversation, re-firing on every retry of the same thread (see
569
- * issue #191) — return the partially-pruned context and let the provider
570
- * surface a real, recoverable error if the payload is genuinely malformed.
571
- * Strict providers (Anthropic) reject it cleanly; lenient ones (DeepSeek,
572
- * Qwen) proceed. The pruner cannot know which applies, so it must not be the
573
- * one to make the failure fatal.
574
- */
575
- if ((thinkingEndIndex > -1 && thinkingStartIndex < 0) || !thinkingBlock) {
576
- /**
577
- * No block was located, so any `thinkingStartIndex` set above came from a
578
- * stale carried-over index pointing at a block-less message. Drop it:
579
- * `createPruneMessages` persists the returned index as
580
- * `runThinkingStartIndex`, and a stale value would suppress the trailing
581
- * scan (`thinkingStartIndex < 0`) on later turns, causing a real reasoning
582
- * block to be missed and never reattached.
583
- */
584
- delete result.thinkingStartIndex;
585
- result.context = context.reverse();
586
- return result;
587
- }
588
- let assistantIndex = -1;
589
- for (let i = 0; i < context.length; i++) {
590
- const currentMessage = context[i];
591
- const type = currentMessage?.getType();
592
- if (type === 'ai') {
593
- assistantIndex = i;
594
- }
595
- if (assistantIndex > -1 && (type === 'human' || type === 'system')) {
596
- break;
597
- }
598
- }
599
- if (assistantIndex === -1) {
600
- // No AI messages survived pruning — skip thinking block reattachment.
601
- // The caller handles empty/insufficient context via overflow recovery.
602
- result.context = context.reverse();
603
- return result;
604
- }
605
- thinkingStartIndex = originalLength - 1 - assistantIndex;
606
- const thinkingTokenCount = tokenCounter(new messages.AIMessage({ content: langchain.toLangChainContent([thinkingBlock]) }));
607
- const newRemainingCount = remainingContextTokens - thinkingTokenCount;
608
- const newMessage = addThinkingBlock(context[assistantIndex], thinkingBlock);
609
- context[assistantIndex] = newMessage;
610
- if (newRemainingCount > 0) {
611
- result.context = context.reverse();
612
- return result;
613
- }
614
- const thinkingMessage = context[assistantIndex];
615
- const newThinkingMessageTokenCount = (indexTokenCountMap[thinkingStartIndex] ?? 0) + thinkingTokenCount;
616
- remainingContextTokens = initialContextTokens - newThinkingMessageTokenCount;
617
- currentTokenCount = 3;
618
- let newContext = [];
619
- const secondRoundMessages = [..._messages];
620
- let currentIndex = secondRoundMessages.length;
621
- while (secondRoundMessages.length > 0 &&
622
- currentTokenCount < remainingContextTokens &&
623
- currentIndex > thinkingStartIndex) {
624
- currentIndex--;
625
- const poppedMessage = secondRoundMessages.pop();
626
- if (!poppedMessage)
627
- continue;
628
- const tokenCount = indexTokenCountMap[currentIndex] ?? 0;
629
- if (currentTokenCount + tokenCount <= remainingContextTokens) {
630
- newContext.push(poppedMessage);
631
- currentTokenCount += tokenCount;
632
- }
633
- else {
634
- messages$1.push(poppedMessage);
635
- break;
636
- }
637
- }
638
- const firstMessage = newContext[newContext.length - 1];
639
- const firstMessageType = newContext[newContext.length - 1].getType();
640
- if (firstMessageType === 'tool') {
641
- startType = ['ai', 'human'];
642
- }
643
- if (startType != null && startType.length > 0 && newContext.length > 0) {
644
- let requiredTypeIndex = -1;
645
- let totalTokens = 0;
646
- for (let i = newContext.length - 1; i >= 0; i--) {
647
- const currentType = newContext[i]?.getType() ?? '';
648
- if (Array.isArray(startType)
649
- ? startType.includes(currentType)
650
- : currentType === startType) {
651
- requiredTypeIndex = i + 1;
652
- break;
653
- }
654
- const originalIndex = originalLength - 1 - i;
655
- totalTokens += indexTokenCountMap[originalIndex] ?? 0;
656
- }
657
- if (requiredTypeIndex > 0) {
658
- currentTokenCount -= totalTokens;
659
- newContext = newContext.slice(0, requiredTypeIndex);
660
- }
661
- }
662
- if (firstMessageType === 'ai') {
663
- const newMessage = addThinkingBlock(firstMessage, thinkingBlock);
664
- newContext[newContext.length - 1] = newMessage;
665
- }
666
- else {
667
- newContext.push(thinkingMessage);
668
- }
669
- if (instructions && originalLength > 0) {
670
- newContext.push(_messages[0]);
671
- secondRoundMessages.shift();
672
- }
673
- result.context = newContext.reverse();
674
- return result;
310
+ * Processes an array of messages and returns a context of messages that fit within a specified token limit.
311
+ * It iterates over the messages from newest to oldest, adding them to the context until the token limit is reached.
312
+ *
313
+ * @param options Configuration options for processing messages
314
+ * @returns Object containing the message context, remaining tokens, messages not included, and summary index
315
+ */
316
+ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, indexTokenCountMap, startType: _startType, thinkingEnabled, tokenCounter, thinkingStartIndex: _thinkingStartIndex = -1, reasoningType = "thinking", instructionTokens: _instructionTokens = 0 }) {
317
+ let currentTokenCount = 3;
318
+ const instructions = _messages[0]?.getType() === "system" ? _messages[0] : void 0;
319
+ const initialContextTokens = maxContextTokens - (instructions != null ? indexTokenCountMap[0] ?? 0 : _instructionTokens);
320
+ let remainingContextTokens = initialContextTokens;
321
+ let startType = _startType;
322
+ const originalLength = _messages.length;
323
+ const messages = [..._messages];
324
+ /**
325
+ * IMPORTANT: this context array gets reversed at the end, since the latest messages get pushed first.
326
+ *
327
+ * This may be confusing to read, but it is done to ensure the context is in the correct order for the model.
328
+ * */
329
+ let context = [];
330
+ let thinkingStartIndex = _thinkingStartIndex;
331
+ let thinkingEndIndex = -1;
332
+ let thinkingBlock;
333
+ const endIndex = instructions != null ? 1 : 0;
334
+ const prunedMemory = [];
335
+ if (_thinkingStartIndex > -1) {
336
+ const thinkingMessageContent = messages[_thinkingStartIndex]?.content;
337
+ if (Array.isArray(thinkingMessageContent)) thinkingBlock = findReasoningBlock(thinkingMessageContent, reasoningType);
338
+ }
339
+ if (currentTokenCount < remainingContextTokens) {
340
+ let currentIndex = messages.length;
341
+ while (messages.length > 0 && currentTokenCount < remainingContextTokens && currentIndex > endIndex) {
342
+ currentIndex--;
343
+ if (messages.length === 1 && instructions) break;
344
+ const poppedMessage = messages.pop();
345
+ if (!poppedMessage) continue;
346
+ const messageType = poppedMessage.getType();
347
+ if (thinkingEnabled === true && thinkingEndIndex === -1 && currentIndex === originalLength - 1 && (messageType === "ai" || messageType === "tool")) thinkingEndIndex = currentIndex;
348
+ if (thinkingEndIndex > -1 && !thinkingBlock && thinkingStartIndex < 0 && messageType === "ai" && Array.isArray(poppedMessage.content)) {
349
+ thinkingBlock = findReasoningBlock(poppedMessage.content, reasoningType);
350
+ thinkingStartIndex = thinkingBlock != null ? currentIndex : -1;
351
+ }
352
+ /**
353
+ * Exited the trailing assistant/tool sequence without finding a
354
+ * thinking block. Anthropic does not require Claude to emit a
355
+ * thinking block before every tool call, so the absence of one is
356
+ * a valid sequence — clear thinkingEndIndex so the pruner does not
357
+ * treat it as malformed.
358
+ */
359
+ if (thinkingEndIndex > -1 && thinkingStartIndex < 0 && !thinkingBlock && messageType !== "ai" && messageType !== "tool") thinkingEndIndex = -1;
360
+ const tokenCount = indexTokenCountMap[currentIndex] ?? 0;
361
+ if (prunedMemory.length === 0 && currentTokenCount + tokenCount <= remainingContextTokens) {
362
+ context.push(poppedMessage);
363
+ currentTokenCount += tokenCount;
364
+ } else {
365
+ prunedMemory.push(poppedMessage);
366
+ if (thinkingEndIndex > -1 && thinkingStartIndex < 0) continue;
367
+ break;
368
+ }
369
+ }
370
+ if (context[context.length - 1]?.getType() === "tool") startType = ["ai", "human"];
371
+ if (startType != null && startType.length > 0 && context.length > 0) {
372
+ let requiredTypeIndex = -1;
373
+ let totalTokens = 0;
374
+ for (let i = context.length - 1; i >= 0; i--) {
375
+ const currentType = context[i]?.getType() ?? "";
376
+ if (Array.isArray(startType) ? startType.includes(currentType) : currentType === startType) {
377
+ requiredTypeIndex = i + 1;
378
+ break;
379
+ }
380
+ const originalIndex = originalLength - 1 - i;
381
+ totalTokens += indexTokenCountMap[originalIndex] ?? 0;
382
+ }
383
+ if (requiredTypeIndex > 0) {
384
+ currentTokenCount -= totalTokens;
385
+ context = context.slice(0, requiredTypeIndex);
386
+ }
387
+ }
388
+ }
389
+ if (instructions && originalLength > 0) {
390
+ context.push(_messages[0]);
391
+ messages.shift();
392
+ }
393
+ prunedMemory.reverse();
394
+ if (messages.length > 0) prunedMemory.unshift(...messages);
395
+ remainingContextTokens -= currentTokenCount;
396
+ const result = {
397
+ remainingContextTokens,
398
+ context: [],
399
+ messagesToRefine: prunedMemory
400
+ };
401
+ if (thinkingStartIndex > -1) result.thinkingStartIndex = thinkingStartIndex;
402
+ if (prunedMemory.length === 0 || thinkingEndIndex < 0 || thinkingStartIndex > -1 && isIndexInContext(_messages, context, thinkingStartIndex)) {
403
+ result.context = context.reverse();
404
+ return result;
405
+ }
406
+ /**
407
+ * A trailing reasoning sequence was detected but its block could not be
408
+ * located in the surviving context. Rather than throw — which permanently
409
+ * bricks the conversation, re-firing on every retry of the same thread (see
410
+ * issue #191) return the partially-pruned context and let the provider
411
+ * surface a real, recoverable error if the payload is genuinely malformed.
412
+ * Strict providers (Anthropic) reject it cleanly; lenient ones (DeepSeek,
413
+ * Qwen) proceed. The pruner cannot know which applies, so it must not be the
414
+ * one to make the failure fatal.
415
+ */
416
+ if (thinkingEndIndex > -1 && thinkingStartIndex < 0 || !thinkingBlock) {
417
+ /**
418
+ * No block was located, so any `thinkingStartIndex` set above came from a
419
+ * stale carried-over index pointing at a block-less message. Drop it:
420
+ * `createPruneMessages` persists the returned index as
421
+ * `runThinkingStartIndex`, and a stale value would suppress the trailing
422
+ * scan (`thinkingStartIndex < 0`) on later turns, causing a real reasoning
423
+ * block to be missed and never reattached.
424
+ */
425
+ delete result.thinkingStartIndex;
426
+ result.context = context.reverse();
427
+ return result;
428
+ }
429
+ let assistantIndex = -1;
430
+ for (let i = 0; i < context.length; i++) {
431
+ const type = context[i]?.getType();
432
+ if (type === "ai") assistantIndex = i;
433
+ if (assistantIndex > -1 && (type === "human" || type === "system")) break;
434
+ }
435
+ if (assistantIndex === -1) {
436
+ result.context = context.reverse();
437
+ return result;
438
+ }
439
+ thinkingStartIndex = originalLength - 1 - assistantIndex;
440
+ const thinkingTokenCount = tokenCounter(new _langchain_core_messages.AIMessage({ content: require_langchain.toLangChainContent([thinkingBlock]) }));
441
+ const newRemainingCount = remainingContextTokens - thinkingTokenCount;
442
+ const newMessage = addThinkingBlock(context[assistantIndex], thinkingBlock);
443
+ context[assistantIndex] = newMessage;
444
+ if (newRemainingCount > 0) {
445
+ result.context = context.reverse();
446
+ return result;
447
+ }
448
+ const thinkingMessage = context[assistantIndex];
449
+ remainingContextTokens = initialContextTokens - ((indexTokenCountMap[thinkingStartIndex] ?? 0) + thinkingTokenCount);
450
+ currentTokenCount = 3;
451
+ let newContext = [];
452
+ const secondRoundMessages = [..._messages];
453
+ let currentIndex = secondRoundMessages.length;
454
+ while (secondRoundMessages.length > 0 && currentTokenCount < remainingContextTokens && currentIndex > thinkingStartIndex) {
455
+ currentIndex--;
456
+ const poppedMessage = secondRoundMessages.pop();
457
+ if (!poppedMessage) continue;
458
+ const tokenCount = indexTokenCountMap[currentIndex] ?? 0;
459
+ if (currentTokenCount + tokenCount <= remainingContextTokens) {
460
+ newContext.push(poppedMessage);
461
+ currentTokenCount += tokenCount;
462
+ } else {
463
+ messages.push(poppedMessage);
464
+ break;
465
+ }
466
+ }
467
+ const firstMessage = newContext[newContext.length - 1];
468
+ const firstMessageType = newContext[newContext.length - 1].getType();
469
+ if (firstMessageType === "tool") startType = ["ai", "human"];
470
+ if (startType != null && startType.length > 0 && newContext.length > 0) {
471
+ let requiredTypeIndex = -1;
472
+ let totalTokens = 0;
473
+ for (let i = newContext.length - 1; i >= 0; i--) {
474
+ const currentType = newContext[i]?.getType() ?? "";
475
+ if (Array.isArray(startType) ? startType.includes(currentType) : currentType === startType) {
476
+ requiredTypeIndex = i + 1;
477
+ break;
478
+ }
479
+ const originalIndex = originalLength - 1 - i;
480
+ totalTokens += indexTokenCountMap[originalIndex] ?? 0;
481
+ }
482
+ if (requiredTypeIndex > 0) {
483
+ currentTokenCount -= totalTokens;
484
+ newContext = newContext.slice(0, requiredTypeIndex);
485
+ }
486
+ }
487
+ if (firstMessageType === "ai") {
488
+ const newMessage = addThinkingBlock(firstMessage, thinkingBlock);
489
+ newContext[newContext.length - 1] = newMessage;
490
+ } else newContext.push(thinkingMessage);
491
+ if (instructions && originalLength > 0) {
492
+ newContext.push(_messages[0]);
493
+ secondRoundMessages.shift();
494
+ }
495
+ result.context = newContext.reverse();
496
+ return result;
675
497
  }
676
498
  function checkValidNumber(value) {
677
- return typeof value === 'number' && !isNaN(value) && value > 0;
499
+ return typeof value === "number" && !isNaN(value) && value > 0;
678
500
  }
679
501
  /**
680
- * Observation masking: replaces consumed ToolMessage content with tight
681
- * head+tail truncations that serve as informative placeholders.
682
- *
683
- * A ToolMessage is "consumed" when a subsequent AI message exists that is NOT
684
- * purely tool calls — meaning the model has already read and acted on the
685
- * result. Unconsumed results (the latest tool outputs the model hasn't
686
- * responded to yet) are left intact so the model can still use them.
687
- *
688
- * AI messages are never masked — they contain the model's own reasoning and
689
- * conclusions, which is what prevents the model from repeating work after
690
- * its tool results are masked.
691
- *
692
- * @returns The number of tool messages that were masked.
693
- */
502
+ * Observation masking: replaces consumed ToolMessage content with tight
503
+ * head+tail truncations that serve as informative placeholders.
504
+ *
505
+ * A ToolMessage is "consumed" when a subsequent AI message exists that is NOT
506
+ * purely tool calls — meaning the model has already read and acted on the
507
+ * result. Unconsumed results (the latest tool outputs the model hasn't
508
+ * responded to yet) are left intact so the model can still use them.
509
+ *
510
+ * AI messages are never masked — they contain the model's own reasoning and
511
+ * conclusions, which is what prevents the model from repeating work after
512
+ * its tool results are masked.
513
+ *
514
+ * @returns The number of tool messages that were masked.
515
+ */
694
516
  function maskConsumedToolResults(params) {
695
- const { messages: messages$1, indexTokenCountMap, tokenCounter } = params;
696
- let maskedCount = 0;
697
- // Pass 1 (backward): identify consumed tool message indices.
698
- // A ToolMessage is "consumed" once we've seen a subsequent AI message with
699
- // substantive text content (not just tool calls).
700
- // Collected in forward order (oldest first) for recency weighting.
701
- let seenNonToolCallAI = false;
702
- const consumedIndices = [];
703
- for (let i = messages$1.length - 1; i >= 0; i--) {
704
- const msg = messages$1[i];
705
- const type = msg.getType();
706
- if (type === 'ai') {
707
- const hasText = typeof msg.content === 'string'
708
- ? msg.content.trim().length > 0
709
- : Array.isArray(msg.content) &&
710
- msg.content.some((b) => typeof b === 'object' &&
711
- b.type === 'text' &&
712
- typeof b.text === 'string' &&
713
- b.text.trim().length >
714
- 0);
715
- if (hasText) {
716
- seenNonToolCallAI = true;
717
- }
718
- }
719
- else if (type === 'tool' && seenNonToolCallAI) {
720
- consumedIndices.push(i);
721
- }
722
- }
723
- if (consumedIndices.length === 0) {
724
- return 0;
725
- }
726
- consumedIndices.reverse();
727
- const totalBudgetChars = params.availableRawBudget != null && params.availableRawBudget > 0
728
- ? params.availableRawBudget * 4
729
- : 0;
730
- const count = consumedIndices.length;
731
- for (let c = 0; c < count; c++) {
732
- const i = consumedIndices[c];
733
- const message = messages$1[i];
734
- const content = message.content;
735
- if (typeof content !== 'string') {
736
- continue;
737
- }
738
- let maxChars;
739
- if (totalBudgetChars > 0) {
740
- const position = count > 1 ? c / (count - 1) : 1;
741
- const weight = 0.2 + 0.8 * position;
742
- const totalWeight = count > 1 ? 0.6 * count : 1;
743
- const share = (weight / totalWeight) * totalBudgetChars;
744
- maxChars = Math.max(MASKED_RESULT_MAX_CHARS, Math.floor(share));
745
- }
746
- else {
747
- maxChars = MASKED_RESULT_MAX_CHARS;
748
- }
749
- if (content.length <= maxChars) {
750
- continue;
751
- }
752
- if (params.originalContentStore && !params.originalContentStore.has(i)) {
753
- params.originalContentStore.set(i, content);
754
- if (params.onContentStored) {
755
- params.onContentStored(content.length);
756
- }
757
- }
758
- const cloned = new messages.ToolMessage({
759
- content: truncation.truncateToolResultContent(content, maxChars),
760
- tool_call_id: message.tool_call_id,
761
- name: message.name,
762
- id: message.id,
763
- additional_kwargs: message.additional_kwargs,
764
- response_metadata: message.response_metadata,
765
- });
766
- messages$1[i] = cloned;
767
- indexTokenCountMap[i] = tokenCounter(cloned);
768
- maskedCount++;
769
- }
770
- return maskedCount;
517
+ const { messages, indexTokenCountMap, tokenCounter } = params;
518
+ let maskedCount = 0;
519
+ let seenNonToolCallAI = false;
520
+ const consumedIndices = [];
521
+ for (let i = messages.length - 1; i >= 0; i--) {
522
+ const msg = messages[i];
523
+ const type = msg.getType();
524
+ if (type === "ai") {
525
+ if (typeof msg.content === "string" ? msg.content.trim().length > 0 : Array.isArray(msg.content) && msg.content.some((b) => typeof b === "object" && b.type === "text" && typeof b.text === "string" && b.text.trim().length > 0)) seenNonToolCallAI = true;
526
+ } else if (type === "tool" && seenNonToolCallAI) consumedIndices.push(i);
527
+ }
528
+ if (consumedIndices.length === 0) return 0;
529
+ consumedIndices.reverse();
530
+ const totalBudgetChars = params.availableRawBudget != null && params.availableRawBudget > 0 ? params.availableRawBudget * 4 : 0;
531
+ const count = consumedIndices.length;
532
+ for (let c = 0; c < count; c++) {
533
+ const i = consumedIndices[c];
534
+ const message = messages[i];
535
+ const content = message.content;
536
+ if (typeof content !== "string") continue;
537
+ let maxChars;
538
+ if (totalBudgetChars > 0) {
539
+ const share = (.2 + .8 * (count > 1 ? c / (count - 1) : 1)) / (count > 1 ? .6 * count : 1) * totalBudgetChars;
540
+ maxChars = Math.max(MASKED_RESULT_MAX_CHARS, Math.floor(share));
541
+ } else maxChars = MASKED_RESULT_MAX_CHARS;
542
+ if (content.length <= maxChars) continue;
543
+ if (params.originalContentStore && !params.originalContentStore.has(i)) {
544
+ params.originalContentStore.set(i, content);
545
+ if (params.onContentStored) params.onContentStored(content.length);
546
+ }
547
+ const cloned = new _langchain_core_messages.ToolMessage({
548
+ content: require_truncation.truncateToolResultContent(content, maxChars),
549
+ tool_call_id: message.tool_call_id,
550
+ name: message.name,
551
+ id: message.id,
552
+ additional_kwargs: message.additional_kwargs,
553
+ response_metadata: message.response_metadata
554
+ });
555
+ messages[i] = cloned;
556
+ indexTokenCountMap[i] = tokenCounter(cloned);
557
+ maskedCount++;
558
+ }
559
+ return maskedCount;
771
560
  }
772
561
  /**
773
- * Pre-flight truncation: truncates oversized ToolMessage content before the
774
- * main backward-iteration pruning runs. Unlike the ingestion guard (which caps
775
- * at tool-execution time), pre-flight truncation applies per-turn based on the
776
- * current context window budget (which may have shrunk due to growing conversation).
777
- *
778
- * After truncation, recounts tokens via tokenCounter and updates indexTokenCountMap
779
- * so subsequent pruning works with accurate counts.
780
- *
781
- * @returns The number of tool messages that were truncated.
782
- */
562
+ * Pre-flight truncation: truncates oversized ToolMessage content before the
563
+ * main backward-iteration pruning runs. Unlike the ingestion guard (which caps
564
+ * at tool-execution time), pre-flight truncation applies per-turn based on the
565
+ * current context window budget (which may have shrunk due to growing conversation).
566
+ *
567
+ * After truncation, recounts tokens via tokenCounter and updates indexTokenCountMap
568
+ * so subsequent pruning works with accurate counts.
569
+ *
570
+ * @returns The number of tool messages that were truncated.
571
+ */
783
572
  function preFlightTruncateToolResults(params) {
784
- const { messages: messages$1, maxContextTokens, indexTokenCountMap, tokenCounter } = params;
785
- const baseMaxChars = truncation.calculateMaxToolResultChars(maxContextTokens);
786
- let truncatedCount = 0;
787
- const toolIndices = [];
788
- for (let i = 0; i < messages$1.length; i++) {
789
- if (messages$1[i].getType() === 'tool') {
790
- toolIndices.push(i);
791
- }
792
- }
793
- for (let t = 0; t < toolIndices.length; t++) {
794
- const i = toolIndices[t];
795
- const message = messages$1[i];
796
- const content = message.content;
797
- if (typeof content !== 'string') {
798
- continue;
799
- }
800
- const position = toolIndices.length > 1 ? t / (toolIndices.length - 1) : 1;
801
- const recencyFactor = 0.2 + 0.8 * position;
802
- const maxChars = Math.max(200, Math.floor(baseMaxChars * recencyFactor));
803
- if (content.length <= maxChars) {
804
- continue;
805
- }
806
- const truncated = truncation.truncateToolResultContent(content, maxChars);
807
- const cloned = new messages.ToolMessage({
808
- content: truncated,
809
- tool_call_id: message.tool_call_id,
810
- name: message.name,
811
- id: message.id,
812
- additional_kwargs: message.additional_kwargs,
813
- response_metadata: message.response_metadata,
814
- });
815
- messages$1[i] = cloned;
816
- indexTokenCountMap[i] = tokenCounter(cloned);
817
- truncatedCount++;
818
- }
819
- return truncatedCount;
573
+ const { messages, maxContextTokens, indexTokenCountMap, tokenCounter } = params;
574
+ const baseMaxChars = require_truncation.calculateMaxToolResultChars(maxContextTokens);
575
+ let truncatedCount = 0;
576
+ const toolIndices = [];
577
+ for (let i = 0; i < messages.length; i++) if (messages[i].getType() === "tool") toolIndices.push(i);
578
+ for (let t = 0; t < toolIndices.length; t++) {
579
+ const i = toolIndices[t];
580
+ const message = messages[i];
581
+ const content = message.content;
582
+ if (typeof content !== "string") continue;
583
+ const recencyFactor = .2 + .8 * (toolIndices.length > 1 ? t / (toolIndices.length - 1) : 1);
584
+ const maxChars = Math.max(200, Math.floor(baseMaxChars * recencyFactor));
585
+ if (content.length <= maxChars) continue;
586
+ const cloned = new _langchain_core_messages.ToolMessage({
587
+ content: require_truncation.truncateToolResultContent(content, maxChars),
588
+ tool_call_id: message.tool_call_id,
589
+ name: message.name,
590
+ id: message.id,
591
+ additional_kwargs: message.additional_kwargs,
592
+ response_metadata: message.response_metadata
593
+ });
594
+ messages[i] = cloned;
595
+ indexTokenCountMap[i] = tokenCounter(cloned);
596
+ truncatedCount++;
597
+ }
598
+ return truncatedCount;
820
599
  }
821
600
  /**
822
- * Pre-flight truncation: truncates oversized `tool_use` input fields in AI messages.
823
- *
824
- * Tool call inputs (arguments) can be very large — e.g., code evaluation payloads from
825
- * MCP tools like chrome-devtools. Since these tool calls have already been executed,
826
- * the model only needs a summary of what was called, not the full arguments. Truncating
827
- * them before pruning can prevent entire messages from being dropped.
828
- *
829
- * Uses 15% of the context window (in estimated characters, ~4 chars/token) as the
830
- * per-input cap, capped at 200K chars.
831
- *
832
- * @returns The number of AI messages that had tool_use inputs truncated.
833
- */
601
+ * Pre-flight truncation: truncates oversized `tool_use` input fields in AI messages.
602
+ *
603
+ * Tool call inputs (arguments) can be very large — e.g., code evaluation payloads from
604
+ * MCP tools like chrome-devtools. Since these tool calls have already been executed,
605
+ * the model only needs a summary of what was called, not the full arguments. Truncating
606
+ * them before pruning can prevent entire messages from being dropped.
607
+ *
608
+ * Uses 15% of the context window (in estimated characters, ~4 chars/token) as the
609
+ * per-input cap, capped at 200K chars.
610
+ *
611
+ * @returns The number of AI messages that had tool_use inputs truncated.
612
+ */
834
613
  function preFlightTruncateToolCallInputs(params) {
835
- const { messages: messages$1, maxContextTokens, indexTokenCountMap, tokenCounter } = params;
836
- const maxInputChars = Math.min(Math.floor(maxContextTokens * 0.15) * 4, 200_000);
837
- let truncatedCount = 0;
838
- for (let i = 0; i < messages$1.length; i++) {
839
- const message = messages$1[i];
840
- if (message.getType() !== 'ai') {
841
- continue;
842
- }
843
- if (!Array.isArray(message.content)) {
844
- continue;
845
- }
846
- const originalContent = message.content;
847
- const state = { changed: false };
848
- const newContent = originalContent.map((block) => {
849
- if (typeof block !== 'object') {
850
- return block;
851
- }
852
- const record = block;
853
- if (record.type !== 'tool_use' && record.type !== 'tool_call') {
854
- return block;
855
- }
856
- const input = record.input;
857
- if (input == null) {
858
- return block;
859
- }
860
- const serialized = typeof input === 'string' ? input : JSON.stringify(input);
861
- if (serialized.length <= maxInputChars) {
862
- return block;
863
- }
864
- state.changed = true;
865
- // Replaces original input with { _truncated, _originalChars } —
866
- // safe because the tool call already executed in a prior turn.
867
- return {
868
- ...record,
869
- input: truncation.truncateToolInput(serialized, maxInputChars),
870
- };
871
- });
872
- if (!state.changed) {
873
- continue;
874
- }
875
- const aiMsg = message;
876
- const newToolCalls = (aiMsg.tool_calls ?? []).map((tc) => {
877
- const serializedArgs = JSON.stringify(tc.args);
878
- if (serializedArgs.length <= maxInputChars) {
879
- return tc;
880
- }
881
- // Replaces original args with { _truncated, _originalChars } —
882
- // safe because the tool call already executed in a prior turn.
883
- return {
884
- ...tc,
885
- args: truncation.truncateToolInput(serializedArgs, maxInputChars),
886
- };
887
- });
888
- messages$1[i] = new messages.AIMessage({
889
- ...aiMsg,
890
- content: langchain.toLangChainContent(newContent),
891
- tool_calls: newToolCalls.length > 0 ? newToolCalls : undefined,
892
- });
893
- indexTokenCountMap[i] = tokenCounter(messages$1[i]);
894
- truncatedCount++;
895
- }
896
- return truncatedCount;
614
+ const { messages, maxContextTokens, indexTokenCountMap, tokenCounter } = params;
615
+ const maxInputChars = Math.min(Math.floor(maxContextTokens * .15) * 4, 2e5);
616
+ let truncatedCount = 0;
617
+ for (let i = 0; i < messages.length; i++) {
618
+ const message = messages[i];
619
+ if (message.getType() !== "ai") continue;
620
+ if (!Array.isArray(message.content)) continue;
621
+ const originalContent = message.content;
622
+ const state = { changed: false };
623
+ const newContent = originalContent.map((block) => {
624
+ if (typeof block !== "object") return block;
625
+ const record = block;
626
+ if (record.type !== "tool_use" && record.type !== "tool_call") return block;
627
+ const input = record.input;
628
+ if (input == null) return block;
629
+ const serialized = typeof input === "string" ? input : JSON.stringify(input);
630
+ if (serialized.length <= maxInputChars) return block;
631
+ state.changed = true;
632
+ return {
633
+ ...record,
634
+ input: require_truncation.truncateToolInput(serialized, maxInputChars)
635
+ };
636
+ });
637
+ if (!state.changed) continue;
638
+ const aiMsg = message;
639
+ const newToolCalls = (aiMsg.tool_calls ?? []).map((tc) => {
640
+ const serializedArgs = JSON.stringify(tc.args);
641
+ if (serializedArgs.length <= maxInputChars) return tc;
642
+ return {
643
+ ...tc,
644
+ args: require_truncation.truncateToolInput(serializedArgs, maxInputChars)
645
+ };
646
+ });
647
+ messages[i] = new _langchain_core_messages.AIMessage({
648
+ ...aiMsg,
649
+ content: require_langchain.toLangChainContent(newContent),
650
+ tool_calls: newToolCalls.length > 0 ? newToolCalls : void 0
651
+ });
652
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
653
+ truncatedCount++;
654
+ }
655
+ return truncatedCount;
897
656
  }
898
657
  function createPruneMessages(factoryParams) {
899
- const indexTokenCountMap = { ...factoryParams.indexTokenCountMap };
900
- let lastTurnStartIndex = factoryParams.startIndex;
901
- let lastCutOffIndex = 0;
902
- let totalTokens = 0;
903
- for (const key in indexTokenCountMap) {
904
- totalTokens += indexTokenCountMap[key] ?? 0;
905
- }
906
- let runThinkingStartIndex = -1;
907
- /** Cumulative raw tiktoken tokens we've sent to the provider (messages only,
908
- * excludes instruction overhead and new outputs not yet seen by provider). */
909
- let cumulativeRawSent = 0;
910
- /** Cumulative provider-reported message tokens (providerInput - instructionOverhead). */
911
- let cumulativeProviderReported = 0;
912
- /** Stable calibration ratio = cumulativeProviderReported / cumulativeRawSent.
913
- * Converges monotonically as data accumulates. Falls back to seeded value. */
914
- let calibrationRatio = factoryParams.calibrationRatio != null && factoryParams.calibrationRatio > 0
915
- ? factoryParams.calibrationRatio
916
- : 1;
917
- /** Best observed instruction overhead from a near-zero variance turn.
918
- * Self-seeds from provider observations within the run. */
919
- let bestInstructionOverhead;
920
- let bestVarianceAbs = Infinity;
921
- /** Local estimate at the time bestInstructionOverhead was observed.
922
- * Used to invalidate the cached overhead when instructions change
923
- * mid-run (e.g. tool discovery adds tools to the bound set). */
924
- let bestInstructionEstimate;
925
- /** Original (pre-masking) tool result content keyed by message index.
926
- * Allows the summarizer to see full tool outputs even after masking
927
- * has truncated them in the live message array. Cleared when the
928
- * pruner is recreated after summarization. */
929
- const originalToolContent = new Map();
930
- let originalToolContentSize = 0;
931
- const contextPruningSettings$1 = contextPruningSettings.resolveContextPruningSettings(factoryParams.contextPruningConfig);
932
- return function pruneMessages(params) {
933
- if (params.messages.length === 0) {
934
- return {
935
- context: [],
936
- indexTokenCountMap,
937
- messagesToRefine: [],
938
- prePruneContextTokens: 0,
939
- remainingContextTokens: factoryParams.maxTokens,
940
- calibrationRatio,
941
- resolvedInstructionOverhead: bestInstructionOverhead,
942
- };
943
- }
944
- if (factoryParams.provider === _enum.Providers.OPENAI &&
945
- factoryParams.thinkingEnabled === true) {
946
- for (let i = lastTurnStartIndex; i < params.messages.length; i++) {
947
- const m = params.messages[i];
948
- if (m.getType() === 'ai' &&
949
- typeof m.additional_kwargs.reasoning_content === 'string' &&
950
- Array.isArray(m.additional_kwargs.provider_specific_fields?.thinking_blocks) &&
951
- m.tool_calls &&
952
- (m.tool_calls?.length ?? 0) > 0) {
953
- const message = m;
954
- const thinkingBlocks = message.additional_kwargs.provider_specific_fields.thinking_blocks;
955
- const signature = thinkingBlocks?.[thinkingBlocks.length - 1].signature;
956
- const thinkingBlock = {
957
- signature,
958
- type: _enum.ContentTypes.THINKING,
959
- thinking: message.additional_kwargs.reasoning_content,
960
- };
961
- params.messages[i] = new messages.AIMessage({
962
- ...message,
963
- content: langchain.toLangChainContent([thinkingBlock]),
964
- additional_kwargs: {
965
- ...message.additional_kwargs,
966
- reasoning_content: undefined,
967
- },
968
- });
969
- }
970
- }
971
- }
972
- let currentUsage;
973
- if (params.usageMetadata &&
974
- (checkValidNumber(params.usageMetadata.input_tokens) ||
975
- (checkValidNumber(params.usageMetadata.input_token_details) &&
976
- (checkValidNumber(params.usageMetadata.input_token_details.cache_creation) ||
977
- checkValidNumber(params.usageMetadata.input_token_details.cache_read)))) &&
978
- checkValidNumber(params.usageMetadata.output_tokens)) {
979
- currentUsage = calculateTotalTokens(params.usageMetadata);
980
- }
981
- const newOutputs = new Set();
982
- let outputTokensAssigned = false;
983
- for (let i = lastTurnStartIndex; i < params.messages.length; i++) {
984
- const message = params.messages[i];
985
- if (indexTokenCountMap[i] !== undefined) {
986
- continue;
987
- }
988
- // Assign output_tokens to the first uncounted AI message — this is the
989
- // model's response. Previous code blindly targeted lastTurnStartIndex
990
- // which could hit a pre-counted HumanMessage or miss the AI entirely.
991
- if (!outputTokensAssigned && currentUsage && message.getType() === 'ai') {
992
- indexTokenCountMap[i] = currentUsage.output_tokens;
993
- newOutputs.add(i);
994
- outputTokensAssigned = true;
995
- }
996
- else {
997
- // Always store raw tiktoken count — the map stays in raw space.
998
- // Budget decisions multiply by calibrationRatio on the fly.
999
- indexTokenCountMap[i] = factoryParams.tokenCounter(message);
1000
- if (currentUsage) {
1001
- newOutputs.add(i);
1002
- }
1003
- }
1004
- totalTokens += indexTokenCountMap[i] ?? 0;
1005
- }
1006
- // Cumulative calibration: accumulate raw tiktoken tokens and provider-
1007
- // reported tokens across turns. The ratio of the two running totals
1008
- // converges monotonically to the true provider multiplier — no EMA,
1009
- // no per-turn oscillation, no map mutation.
1010
- if (currentUsage && params.totalTokensFresh !== false) {
1011
- const instructionOverhead = factoryParams.getInstructionTokens?.() ?? 0;
1012
- const providerInputTokens = params.lastCallUsage?.inputTokens ?? currentUsage.input_tokens;
1013
- // Sum raw tiktoken counts for messages the provider saw (excludes
1014
- // new outputs from this turn — the provider hasn't seen them yet).
1015
- let rawSentThisTurn = 0;
1016
- const firstIsSystem = params.messages.length > 0 && params.messages[0].getType() === 'system';
1017
- if (firstIsSystem) {
1018
- rawSentThisTurn += indexTokenCountMap[0] ?? 0;
1019
- }
1020
- for (let i = lastCutOffIndex; i < params.messages.length; i++) {
1021
- if ((i === 0 && firstIsSystem) || newOutputs.has(i)) {
1022
- continue;
1023
- }
1024
- rawSentThisTurn += indexTokenCountMap[i] ?? 0;
1025
- }
1026
- const providerMessageTokens = Math.max(0, providerInputTokens - instructionOverhead);
1027
- if (rawSentThisTurn > 0 && providerMessageTokens > 0) {
1028
- cumulativeRawSent += rawSentThisTurn;
1029
- cumulativeProviderReported += providerMessageTokens;
1030
- const newRatio = cumulativeProviderReported / cumulativeRawSent;
1031
- calibrationRatio = Math.max(CALIBRATION_RATIO_MIN, Math.min(CALIBRATION_RATIO_MAX, newRatio));
1032
- }
1033
- const calibratedOurTotal = instructionOverhead + rawSentThisTurn * calibrationRatio;
1034
- const overallRatio = calibratedOurTotal > 0 ? providerInputTokens / calibratedOurTotal : 0;
1035
- const variancePct = Math.round((overallRatio - 1) * 100);
1036
- const absVariance = Math.abs(overallRatio - 1);
1037
- if (absVariance < bestVarianceAbs && rawSentThisTurn > 0) {
1038
- bestVarianceAbs = absVariance;
1039
- bestInstructionOverhead = Math.max(0, Math.round(providerInputTokens - rawSentThisTurn * calibrationRatio));
1040
- bestInstructionEstimate = factoryParams.getInstructionTokens?.() ?? 0;
1041
- }
1042
- factoryParams.log?.('debug', 'Calibration observed', {
1043
- providerInputTokens,
1044
- calibratedEstimate: Math.round(calibratedOurTotal),
1045
- variance: `${variancePct > 0 ? '+' : ''}${variancePct}%`,
1046
- calibrationRatio: Math.round(calibrationRatio * 100) / 100,
1047
- instructionOverhead,
1048
- cumulativeRawSent,
1049
- cumulativeProviderReported,
1050
- });
1051
- }
1052
- // Computed BEFORE pre-flight truncation so the effective budget can drive
1053
- // truncation thresholds without this, thresholds based on maxTokens are
1054
- // too generous and leave individual messages larger than the actual budget.
1055
- const estimatedInstructionTokens = factoryParams.getInstructionTokens?.() ?? 0;
1056
- const estimateStable = bestInstructionEstimate != null &&
1057
- bestInstructionEstimate > 0 &&
1058
- Math.abs(estimatedInstructionTokens - bestInstructionEstimate) /
1059
- bestInstructionEstimate <
1060
- 0.1;
1061
- const currentInstructionTokens = bestInstructionOverhead != null &&
1062
- bestInstructionOverhead <= estimatedInstructionTokens &&
1063
- estimateStable
1064
- ? bestInstructionOverhead
1065
- : estimatedInstructionTokens;
1066
- const reserveRatio = factoryParams.reserveRatio ?? DEFAULT_RESERVE_RATIO;
1067
- const reserveTokens = reserveRatio > 0 && reserveRatio < 1
1068
- ? Math.round(factoryParams.maxTokens * reserveRatio)
1069
- : 0;
1070
- const pruningBudget = factoryParams.maxTokens - reserveTokens;
1071
- const effectiveMaxTokens = Math.max(0, pruningBudget - currentInstructionTokens);
1072
- let calibratedTotalTokens = Math.round(totalTokens * calibrationRatio);
1073
- factoryParams.log?.('debug', 'Budget', {
1074
- maxTokens: factoryParams.maxTokens,
1075
- pruningBudget,
1076
- effectiveMax: effectiveMaxTokens,
1077
- instructionTokens: currentInstructionTokens,
1078
- messageCount: params.messages.length,
1079
- calibratedTotalTokens,
1080
- calibrationRatio: Math.round(calibrationRatio * 100) / 100,
1081
- });
1082
- // When instructions alone consume the entire budget, no message can
1083
- // fit regardless of truncation. Short-circuit: yield all messages for
1084
- // summarization and return an empty context so the Graph can route to
1085
- // the summarize node immediately instead of falling through to the
1086
- // emergency path that would reach the same outcome more expensively.
1087
- if (effectiveMaxTokens === 0 &&
1088
- factoryParams.summarizationEnabled === true &&
1089
- params.messages.length > 0) {
1090
- factoryParams.log?.('warn', 'Instructions consume entire budget — yielding all messages for summarization', {
1091
- instructionTokens: currentInstructionTokens,
1092
- pruningBudget,
1093
- messageCount: params.messages.length,
1094
- });
1095
- lastTurnStartIndex = params.messages.length;
1096
- return {
1097
- context: [],
1098
- indexTokenCountMap,
1099
- messagesToRefine: [...params.messages],
1100
- prePruneContextTokens: calibratedTotalTokens,
1101
- remainingContextTokens: 0,
1102
- contextPressure: pruningBudget > 0 ? calibratedTotalTokens / pruningBudget : 0,
1103
- calibrationRatio,
1104
- resolvedInstructionOverhead: bestInstructionOverhead,
1105
- };
1106
- }
1107
- // ---------------------------------------------------------------------------
1108
- // Progressive context fading — inspired by Claude Code's staged compaction.
1109
- // Below 80%: no modifications, tool results retain full size.
1110
- // Above 80%: graduated truncation with increasing aggression per pressure band.
1111
- // Recency weighting ensures older results fade first, newer results last.
1112
- //
1113
- // At the gentlest level, truncation preserves most content (head+tail).
1114
- // At the most aggressive level, the result is effectively a one-line placeholder.
1115
- //
1116
- // 80%: gentle — budget factor 1.0, oldest get light truncation
1117
- // 85%: moderate budget factor 0.50, older results shrink significantly
1118
- // 90%: aggressive budget factor 0.20, most results heavily truncated
1119
- // 99%: emergency — budget factor 0.05, effectively placeholders for old results
1120
- // ---------------------------------------------------------------------------
1121
- totalTokens = sumTokenCounts(indexTokenCountMap, params.messages.length);
1122
- calibratedTotalTokens = Math.round(totalTokens * calibrationRatio);
1123
- const contextPressure = pruningBudget > 0 ? calibratedTotalTokens / pruningBudget : 0;
1124
- let preFlightResultCount = 0;
1125
- let preFlightInputCount = 0;
1126
- // -----------------------------------------------------------------------
1127
- // Observation masking (80%+ pressure, both paths):
1128
- // Replace consumed ToolMessage content with tight head+tail placeholders.
1129
- // AI messages stay intact so the model can read its own prior reasoning
1130
- // and won't repeat work. Unconsumed results (latest tool outputs the
1131
- // model hasn't acted on yet) stay full.
1132
- //
1133
- // When summarization is enabled, snapshot messages first so the
1134
- // summarizer can see the full originals when compaction fires.
1135
- // -----------------------------------------------------------------------
1136
- let observationsMasked = 0;
1137
- if (contextPressure >= PRESSURE_THRESHOLD_MASKING) {
1138
- const rawMessageBudget = calibrationRatio > 0
1139
- ? Math.floor(effectiveMaxTokens / calibrationRatio)
1140
- : effectiveMaxTokens;
1141
- // When summarization is enabled, use half the reserve ratio as extra
1142
- // masking headroom — the LLM keeps more context while the summarizer
1143
- // gets full content from originalToolContent regardless. The remaining
1144
- // half of the reserve covers estimation errors.
1145
- const reserveHeadroom = factoryParams.summarizationEnabled === true
1146
- ? Math.floor(rawMessageBudget *
1147
- (factoryParams.reserveRatio ?? DEFAULT_RESERVE_RATIO) *
1148
- 0.5)
1149
- : 0;
1150
- observationsMasked = maskConsumedToolResults({
1151
- messages: params.messages,
1152
- indexTokenCountMap,
1153
- tokenCounter: factoryParams.tokenCounter,
1154
- availableRawBudget: rawMessageBudget + reserveHeadroom,
1155
- originalContentStore: factoryParams.summarizationEnabled === true
1156
- ? originalToolContent
1157
- : undefined,
1158
- onContentStored: factoryParams.summarizationEnabled === true
1159
- ? (charLen) => {
1160
- originalToolContentSize += charLen;
1161
- while (originalToolContentSize > ORIGINAL_CONTENT_MAX_CHARS &&
1162
- originalToolContent.size > 0) {
1163
- const oldest = originalToolContent.keys().next();
1164
- if (oldest.done === true) {
1165
- break;
1166
- }
1167
- const removed = originalToolContent.get(oldest.value);
1168
- if (removed != null) {
1169
- originalToolContentSize -= removed.length;
1170
- }
1171
- originalToolContent.delete(oldest.value);
1172
- }
1173
- }
1174
- : undefined,
1175
- });
1176
- if (observationsMasked > 0) {
1177
- cumulativeRawSent = 0;
1178
- cumulativeProviderReported = 0;
1179
- }
1180
- }
1181
- if (contextPressure >= PRESSURE_THRESHOLD_MASKING &&
1182
- factoryParams.summarizationEnabled !== true) {
1183
- const budgetFactor = PRESSURE_BANDS.find(([threshold]) => contextPressure >= threshold)?.[1] ?? 1.0;
1184
- const baseBudget = Math.max(1024, Math.floor(effectiveMaxTokens * budgetFactor));
1185
- preFlightResultCount = preFlightTruncateToolResults({
1186
- messages: params.messages,
1187
- maxContextTokens: baseBudget,
1188
- indexTokenCountMap,
1189
- tokenCounter: factoryParams.tokenCounter,
1190
- });
1191
- preFlightInputCount = preFlightTruncateToolCallInputs({
1192
- messages: params.messages,
1193
- maxContextTokens: baseBudget,
1194
- indexTokenCountMap,
1195
- tokenCounter: factoryParams.tokenCounter,
1196
- });
1197
- }
1198
- if (factoryParams.contextPruningConfig?.enabled === true &&
1199
- factoryParams.summarizationEnabled !== true) {
1200
- contextPruning.applyContextPruning({
1201
- messages: params.messages,
1202
- indexTokenCountMap,
1203
- tokenCounter: factoryParams.tokenCounter,
1204
- resolvedSettings: contextPruningSettings$1,
1205
- });
1206
- }
1207
- // Fit-to-budget: when summarization is enabled and individual messages
1208
- // exceed the effective budget, truncate them so every message can fit in
1209
- // a single context slot. Without this, oversized tool results (e.g.
1210
- // take_snapshot at 9K chars) cause empty context → emergency truncation
1211
- // immediate re-summarization after just one tool call.
1212
- //
1213
- // This is NOT the lossy position-based fading above — it only targets
1214
- // messages that individually exceed the budget, using the full effective
1215
- // budget as the cap (not a pressure-scaled fraction).
1216
- // Fit-to-budget caps are in raw space (divide by ratio) so that after
1217
- // calibration the truncated results actually fit within the budget.
1218
- const rawSpaceEffectiveMax = calibrationRatio > 0
1219
- ? Math.round(effectiveMaxTokens / calibrationRatio)
1220
- : effectiveMaxTokens;
1221
- if (factoryParams.summarizationEnabled === true &&
1222
- rawSpaceEffectiveMax > 0) {
1223
- preFlightResultCount = preFlightTruncateToolResults({
1224
- messages: params.messages,
1225
- maxContextTokens: rawSpaceEffectiveMax,
1226
- indexTokenCountMap,
1227
- tokenCounter: factoryParams.tokenCounter,
1228
- });
1229
- preFlightInputCount = preFlightTruncateToolCallInputs({
1230
- messages: params.messages,
1231
- maxContextTokens: rawSpaceEffectiveMax,
1232
- indexTokenCountMap,
1233
- tokenCounter: factoryParams.tokenCounter,
1234
- });
1235
- }
1236
- const preTruncationTotalTokens = totalTokens;
1237
- totalTokens = sumTokenCounts(indexTokenCountMap, params.messages.length);
1238
- calibratedTotalTokens = Math.round(totalTokens * calibrationRatio);
1239
- const anyAdjustment = observationsMasked > 0 ||
1240
- preFlightResultCount > 0 ||
1241
- preFlightInputCount > 0 ||
1242
- totalTokens !== preTruncationTotalTokens;
1243
- if (anyAdjustment) {
1244
- factoryParams.log?.('debug', 'Context adjusted', {
1245
- contextPressure: Math.round(contextPressure * 100),
1246
- observationsMasked,
1247
- toolOutputsTruncated: preFlightResultCount,
1248
- toolInputsTruncated: preFlightInputCount,
1249
- tokensBefore: preTruncationTotalTokens,
1250
- tokensAfter: totalTokens,
1251
- tokensSaved: preTruncationTotalTokens - totalTokens,
1252
- });
1253
- }
1254
- lastTurnStartIndex = params.messages.length;
1255
- if (lastCutOffIndex === 0 &&
1256
- calibratedTotalTokens + currentInstructionTokens <= pruningBudget) {
1257
- return {
1258
- context: params.messages,
1259
- indexTokenCountMap,
1260
- messagesToRefine: [],
1261
- prePruneContextTokens: calibratedTotalTokens,
1262
- remainingContextTokens: pruningBudget - calibratedTotalTokens - currentInstructionTokens,
1263
- contextPressure,
1264
- originalToolContent: originalToolContent.size > 0 ? originalToolContent : undefined,
1265
- calibrationRatio,
1266
- resolvedInstructionOverhead: bestInstructionOverhead,
1267
- };
1268
- }
1269
- const rawSpaceBudget = calibrationRatio > 0
1270
- ? Math.round(pruningBudget / calibrationRatio)
1271
- : pruningBudget;
1272
- const rawSpaceInstructionTokens = calibrationRatio > 0
1273
- ? Math.round(currentInstructionTokens / calibrationRatio)
1274
- : currentInstructionTokens;
1275
- const { context: initialContext, thinkingStartIndex, messagesToRefine, remainingContextTokens: initialRemainingContextTokens, } = getMessagesWithinTokenLimit({
1276
- maxContextTokens: rawSpaceBudget,
1277
- messages: params.messages,
1278
- indexTokenCountMap,
1279
- startType: params.startType,
1280
- thinkingEnabled: factoryParams.thinkingEnabled,
1281
- tokenCounter: factoryParams.tokenCounter,
1282
- instructionTokens: rawSpaceInstructionTokens,
1283
- reasoningType: factoryParams.provider === _enum.Providers.BEDROCK
1284
- ? _enum.ContentTypes.REASONING_CONTENT
1285
- : _enum.ContentTypes.THINKING,
1286
- thinkingStartIndex: factoryParams.thinkingEnabled === true
1287
- ? runThinkingStartIndex
1288
- : undefined,
1289
- });
1290
- const { context: repairedContext, reclaimedTokens: initialReclaimedTokens, droppedMessages, } = repairOrphanedToolMessages({
1291
- context: initialContext,
1292
- allMessages: params.messages,
1293
- tokenCounter: factoryParams.tokenCounter,
1294
- indexTokenCountMap,
1295
- });
1296
- const contextBreakdown = repairedContext.map((msg) => {
1297
- const type = msg.getType();
1298
- const name = type === 'tool' ? (msg.name ?? 'unknown') : '';
1299
- return name !== '' ? `${type}(${name})` : type;
1300
- });
1301
- factoryParams.log?.('debug', 'Pruning complete', {
1302
- contextLength: repairedContext.length,
1303
- contextTypes: contextBreakdown.join(', '),
1304
- messagesToRefineCount: messagesToRefine.length,
1305
- droppedOrphans: droppedMessages.length,
1306
- remainingTokens: initialRemainingContextTokens,
1307
- });
1308
- let context = repairedContext;
1309
- let reclaimedTokens = initialReclaimedTokens;
1310
- // Orphan repair may drop ToolMessages whose parent AI was pruned.
1311
- // Append them to messagesToRefine so summarization can still see the
1312
- // tool results (otherwise the summary says "in progress" for a tool
1313
- // call that already completed, causing the model to repeat it).
1314
- if (droppedMessages.length > 0) {
1315
- messagesToRefine.push(...droppedMessages);
1316
- }
1317
- // ---------------------------------------------------------------
1318
- // Fallback fading: when summarization skipped fading earlier and
1319
- // pruning still produced an empty context, apply lossy pressure-band
1320
- // fading and retry. This is a last resort before emergency truncation
1321
- // — the summarizer already saw the full messages, so fading the
1322
- // surviving context for the LLM is acceptable.
1323
- // ---------------------------------------------------------------
1324
- if (context.length === 0 &&
1325
- params.messages.length > 0 &&
1326
- effectiveMaxTokens > 0 &&
1327
- factoryParams.summarizationEnabled === true) {
1328
- const fadingBudget = Math.max(1024, effectiveMaxTokens);
1329
- factoryParams.log?.('debug', 'Fallback fading empty context with summarization', {
1330
- messageCount: params.messages.length,
1331
- effectiveMaxTokens,
1332
- fadingBudget,
1333
- });
1334
- const fadedMessages = [...params.messages];
1335
- const preFadingTokenCounts = {};
1336
- for (let i = 0; i < params.messages.length; i++) {
1337
- preFadingTokenCounts[i] = indexTokenCountMap[i];
1338
- }
1339
- preFlightTruncateToolResults({
1340
- messages: fadedMessages,
1341
- maxContextTokens: fadingBudget,
1342
- indexTokenCountMap,
1343
- tokenCounter: factoryParams.tokenCounter,
1344
- });
1345
- preFlightTruncateToolCallInputs({
1346
- messages: fadedMessages,
1347
- maxContextTokens: fadingBudget,
1348
- indexTokenCountMap,
1349
- tokenCounter: factoryParams.tokenCounter,
1350
- });
1351
- const fadingRetry = getMessagesWithinTokenLimit({
1352
- maxContextTokens: pruningBudget,
1353
- messages: fadedMessages,
1354
- indexTokenCountMap,
1355
- startType: params.startType,
1356
- thinkingEnabled: factoryParams.thinkingEnabled,
1357
- tokenCounter: factoryParams.tokenCounter,
1358
- instructionTokens: currentInstructionTokens,
1359
- reasoningType: factoryParams.provider === _enum.Providers.BEDROCK
1360
- ? _enum.ContentTypes.REASONING_CONTENT
1361
- : _enum.ContentTypes.THINKING,
1362
- thinkingStartIndex: factoryParams.thinkingEnabled === true
1363
- ? runThinkingStartIndex
1364
- : undefined,
1365
- });
1366
- const fadingRepaired = repairOrphanedToolMessages({
1367
- context: fadingRetry.context,
1368
- allMessages: fadedMessages,
1369
- tokenCounter: factoryParams.tokenCounter,
1370
- indexTokenCountMap,
1371
- });
1372
- if (fadingRepaired.context.length > 0) {
1373
- context = fadingRepaired.context;
1374
- reclaimedTokens = fadingRepaired.reclaimedTokens;
1375
- messagesToRefine.push(...fadingRetry.messagesToRefine);
1376
- if (fadingRepaired.droppedMessages.length > 0) {
1377
- messagesToRefine.push(...fadingRepaired.droppedMessages);
1378
- }
1379
- factoryParams.log?.('debug', 'Fallback fading recovered context', {
1380
- contextLength: context.length,
1381
- messagesToRefineCount: messagesToRefine.length,
1382
- remainingTokens: fadingRetry.remainingContextTokens,
1383
- });
1384
- for (const [key, value] of Object.entries(preFadingTokenCounts)) {
1385
- indexTokenCountMap[key] = value;
1386
- }
1387
- }
1388
- else {
1389
- for (const [key, value] of Object.entries(preFadingTokenCounts)) {
1390
- indexTokenCountMap[key] = value;
1391
- }
1392
- }
1393
- }
1394
- // ---------------------------------------------------------------
1395
- // Emergency truncation: if pruning produced an empty context but
1396
- // messages exist, aggressively truncate all tool_call inputs and
1397
- // tool results, then retry. Budget is proportional to the
1398
- // effective token limit (~4 chars/token, spread across messages)
1399
- // with a floor of 200 chars so content is never completely blank.
1400
- // Uses head+tail so the model sees both what was called and the
1401
- // final outcome (e.g., return value at the end of a script eval).
1402
- // ---------------------------------------------------------------
1403
- if (context.length === 0 &&
1404
- params.messages.length > 0 &&
1405
- effectiveMaxTokens > 0) {
1406
- const perMessageTokenBudget = Math.floor(effectiveMaxTokens / Math.max(1, params.messages.length));
1407
- const emergencyMaxChars = Math.max(200, perMessageTokenBudget * 4);
1408
- factoryParams.log?.('warn', 'Empty context, entering emergency truncation', {
1409
- messageCount: params.messages.length,
1410
- effectiveMax: effectiveMaxTokens,
1411
- emergencyMaxChars,
1412
- });
1413
- // Clone the messages array so emergency truncation doesn't permanently
1414
- // mutate graph state. The originals remain intact for future turns
1415
- // where more budget may be available. Also snapshot indexTokenCountMap
1416
- // entries so the closure doesn't retain stale (too-small) counts for
1417
- // the original un-truncated messages on the next turn.
1418
- const emergencyMessages = [...params.messages];
1419
- const preEmergencyTokenCounts = {};
1420
- for (let i = 0; i < params.messages.length; i++) {
1421
- preEmergencyTokenCounts[i] = indexTokenCountMap[i];
1422
- }
1423
- try {
1424
- let emergencyTruncatedCount = 0;
1425
- for (let i = 0; i < emergencyMessages.length; i++) {
1426
- const message = emergencyMessages[i];
1427
- if (message.getType() === 'tool') {
1428
- const content = message.content;
1429
- if (typeof content === 'string' &&
1430
- content.length > emergencyMaxChars) {
1431
- const cloned = new messages.ToolMessage({
1432
- content: truncation.truncateToolResultContent(content, emergencyMaxChars),
1433
- tool_call_id: message.tool_call_id,
1434
- name: message.name,
1435
- id: message.id,
1436
- additional_kwargs: message.additional_kwargs,
1437
- response_metadata: message.response_metadata,
1438
- });
1439
- emergencyMessages[i] = cloned;
1440
- indexTokenCountMap[i] = factoryParams.tokenCounter(cloned);
1441
- emergencyTruncatedCount++;
1442
- }
1443
- }
1444
- if (message.getType() === 'ai' && Array.isArray(message.content)) {
1445
- const aiMsg = message;
1446
- const contentBlocks = aiMsg.content;
1447
- const needsTruncation = contentBlocks.some((block) => {
1448
- if (typeof block !== 'object')
1449
- return false;
1450
- const record = block;
1451
- if ((record.type === 'tool_use' || record.type === 'tool_call') &&
1452
- record.input != null) {
1453
- const serialized = typeof record.input === 'string'
1454
- ? record.input
1455
- : JSON.stringify(record.input);
1456
- return serialized.length > emergencyMaxChars;
1457
- }
1458
- return false;
1459
- });
1460
- if (needsTruncation) {
1461
- const newContent = contentBlocks.map((block) => {
1462
- if (typeof block !== 'object')
1463
- return block;
1464
- const record = block;
1465
- if ((record.type === 'tool_use' || record.type === 'tool_call') &&
1466
- record.input != null) {
1467
- const serialized = typeof record.input === 'string'
1468
- ? record.input
1469
- : JSON.stringify(record.input);
1470
- if (serialized.length > emergencyMaxChars) {
1471
- // Replaces original input with { _truncated, _originalChars } —
1472
- // safe because the tool call already executed in a prior turn.
1473
- return {
1474
- ...record,
1475
- input: truncation.truncateToolInput(serialized, emergencyMaxChars),
1476
- };
1477
- }
1478
- }
1479
- return block;
1480
- });
1481
- const newToolCalls = (aiMsg.tool_calls ?? []).map((tc) => {
1482
- const serializedArgs = JSON.stringify(tc.args);
1483
- if (serializedArgs.length > emergencyMaxChars) {
1484
- // Replaces original args with { _truncated, _originalChars } —
1485
- // safe because the tool call already executed in a prior turn.
1486
- return {
1487
- ...tc,
1488
- args: truncation.truncateToolInput(serializedArgs, emergencyMaxChars),
1489
- };
1490
- }
1491
- return tc;
1492
- });
1493
- emergencyMessages[i] = new messages.AIMessage({
1494
- ...aiMsg,
1495
- content: langchain.toLangChainContent(newContent),
1496
- tool_calls: newToolCalls.length > 0 ? newToolCalls : undefined,
1497
- });
1498
- indexTokenCountMap[i] = factoryParams.tokenCounter(emergencyMessages[i]);
1499
- emergencyTruncatedCount++;
1500
- }
1501
- }
1502
- }
1503
- factoryParams.log?.('info', 'Emergency truncation complete');
1504
- factoryParams.log?.('debug', 'Emergency truncation details', {
1505
- truncatedCount: emergencyTruncatedCount,
1506
- emergencyMaxChars,
1507
- });
1508
- const retryResult = getMessagesWithinTokenLimit({
1509
- maxContextTokens: pruningBudget,
1510
- messages: emergencyMessages,
1511
- indexTokenCountMap,
1512
- startType: params.startType,
1513
- thinkingEnabled: factoryParams.thinkingEnabled,
1514
- tokenCounter: factoryParams.tokenCounter,
1515
- instructionTokens: currentInstructionTokens,
1516
- reasoningType: factoryParams.provider === _enum.Providers.BEDROCK
1517
- ? _enum.ContentTypes.REASONING_CONTENT
1518
- : _enum.ContentTypes.THINKING,
1519
- thinkingStartIndex: factoryParams.thinkingEnabled === true
1520
- ? runThinkingStartIndex
1521
- : undefined,
1522
- });
1523
- const repaired = repairOrphanedToolMessages({
1524
- context: retryResult.context,
1525
- allMessages: emergencyMessages,
1526
- tokenCounter: factoryParams.tokenCounter,
1527
- indexTokenCountMap,
1528
- });
1529
- context = repaired.context;
1530
- reclaimedTokens = repaired.reclaimedTokens;
1531
- messagesToRefine.push(...retryResult.messagesToRefine);
1532
- if (repaired.droppedMessages.length > 0) {
1533
- messagesToRefine.push(...repaired.droppedMessages);
1534
- }
1535
- factoryParams.log?.('debug', 'Emergency truncation retry result', {
1536
- contextLength: context.length,
1537
- messagesToRefineCount: messagesToRefine.length,
1538
- remainingTokens: retryResult.remainingContextTokens,
1539
- });
1540
- }
1541
- finally {
1542
- // Restore the closure's indexTokenCountMap to pre-emergency values so the
1543
- // next turn counts old messages at their original (un-truncated) size.
1544
- // The emergency-truncated counts were only needed for this turn's
1545
- // getMessagesWithinTokenLimit retry.
1546
- for (const [key, value] of Object.entries(preEmergencyTokenCounts)) {
1547
- indexTokenCountMap[key] = value;
1548
- }
1549
- }
1550
- }
1551
- const remainingContextTokens = Math.max(0, Math.min(pruningBudget, initialRemainingContextTokens + reclaimedTokens));
1552
- runThinkingStartIndex = thinkingStartIndex ?? -1;
1553
- /** The index is the first value of `context`, index relative to `params.messages` */
1554
- lastCutOffIndex = Math.max(params.messages.length -
1555
- (context.length - (context[0]?.getType() === 'system' ? 1 : 0)), 0);
1556
- return {
1557
- context,
1558
- indexTokenCountMap,
1559
- messagesToRefine,
1560
- prePruneContextTokens: calibratedTotalTokens,
1561
- remainingContextTokens,
1562
- contextPressure,
1563
- originalToolContent: originalToolContent.size > 0 ? originalToolContent : undefined,
1564
- calibrationRatio,
1565
- resolvedInstructionOverhead: bestInstructionOverhead,
1566
- };
1567
- };
658
+ const indexTokenCountMap = { ...factoryParams.indexTokenCountMap };
659
+ let lastTurnStartIndex = factoryParams.startIndex;
660
+ let lastCutOffIndex = 0;
661
+ let totalTokens = 0;
662
+ for (const key in indexTokenCountMap) totalTokens += indexTokenCountMap[key] ?? 0;
663
+ let runThinkingStartIndex = -1;
664
+ /** Cumulative raw tiktoken tokens we've sent to the provider (messages only,
665
+ * excludes instruction overhead and new outputs not yet seen by provider). */
666
+ let cumulativeRawSent = 0;
667
+ /** Cumulative provider-reported message tokens (providerInput - instructionOverhead). */
668
+ let cumulativeProviderReported = 0;
669
+ /** Stable calibration ratio = cumulativeProviderReported / cumulativeRawSent.
670
+ * Converges monotonically as data accumulates. Falls back to seeded value. */
671
+ let calibrationRatio = factoryParams.calibrationRatio != null && factoryParams.calibrationRatio > 0 ? factoryParams.calibrationRatio : 1;
672
+ /** Best observed instruction overhead from a near-zero variance turn.
673
+ * Self-seeds from provider observations within the run. */
674
+ let bestInstructionOverhead;
675
+ let bestVarianceAbs = Infinity;
676
+ /** Local estimate at the time bestInstructionOverhead was observed.
677
+ * Used to invalidate the cached overhead when instructions change
678
+ * mid-run (e.g. tool discovery adds tools to the bound set). */
679
+ let bestInstructionEstimate;
680
+ /** Original (pre-masking) tool result content keyed by message index.
681
+ * Allows the summarizer to see full tool outputs even after masking
682
+ * has truncated them in the live message array. Cleared when the
683
+ * pruner is recreated after summarization. */
684
+ const originalToolContent = /* @__PURE__ */ new Map();
685
+ let originalToolContentSize = 0;
686
+ const contextPruningSettings = require_contextPruningSettings.resolveContextPruningSettings(factoryParams.contextPruningConfig);
687
+ return function pruneMessages(params) {
688
+ if (params.messages.length === 0) return {
689
+ context: [],
690
+ indexTokenCountMap,
691
+ messagesToRefine: [],
692
+ prePruneContextTokens: 0,
693
+ remainingContextTokens: factoryParams.maxTokens,
694
+ calibrationRatio,
695
+ resolvedInstructionOverhead: bestInstructionOverhead
696
+ };
697
+ if (factoryParams.provider === "openAI" && factoryParams.thinkingEnabled === true) for (let i = lastTurnStartIndex; i < params.messages.length; i++) {
698
+ const m = params.messages[i];
699
+ if (m.getType() === "ai" && typeof m.additional_kwargs.reasoning_content === "string" && Array.isArray(m.additional_kwargs.provider_specific_fields?.thinking_blocks) && m.tool_calls && (m.tool_calls?.length ?? 0) > 0) {
700
+ const message = m;
701
+ const thinkingBlocks = message.additional_kwargs.provider_specific_fields.thinking_blocks;
702
+ const thinkingBlock = {
703
+ signature: thinkingBlocks?.[thinkingBlocks.length - 1].signature,
704
+ type: "thinking",
705
+ thinking: message.additional_kwargs.reasoning_content
706
+ };
707
+ params.messages[i] = new _langchain_core_messages.AIMessage({
708
+ ...message,
709
+ content: require_langchain.toLangChainContent([thinkingBlock]),
710
+ additional_kwargs: {
711
+ ...message.additional_kwargs,
712
+ reasoning_content: void 0
713
+ }
714
+ });
715
+ }
716
+ }
717
+ let currentUsage;
718
+ if (params.usageMetadata && (checkValidNumber(params.usageMetadata.input_tokens) || checkValidNumber(params.usageMetadata.input_token_details) && (checkValidNumber(params.usageMetadata.input_token_details.cache_creation) || checkValidNumber(params.usageMetadata.input_token_details.cache_read))) && checkValidNumber(params.usageMetadata.output_tokens)) currentUsage = calculateTotalTokens(params.usageMetadata);
719
+ const newOutputs = /* @__PURE__ */ new Set();
720
+ let outputTokensAssigned = false;
721
+ for (let i = lastTurnStartIndex; i < params.messages.length; i++) {
722
+ const message = params.messages[i];
723
+ if (indexTokenCountMap[i] !== void 0) continue;
724
+ if (!outputTokensAssigned && currentUsage && message.getType() === "ai") {
725
+ indexTokenCountMap[i] = currentUsage.output_tokens;
726
+ newOutputs.add(i);
727
+ outputTokensAssigned = true;
728
+ } else {
729
+ indexTokenCountMap[i] = factoryParams.tokenCounter(message);
730
+ if (currentUsage) newOutputs.add(i);
731
+ }
732
+ totalTokens += indexTokenCountMap[i] ?? 0;
733
+ }
734
+ if (currentUsage && params.totalTokensFresh !== false) {
735
+ const instructionOverhead = factoryParams.getInstructionTokens?.() ?? 0;
736
+ const providerInputTokens = params.lastCallUsage?.inputTokens ?? currentUsage.input_tokens;
737
+ let rawSentThisTurn = 0;
738
+ const firstIsSystem = params.messages.length > 0 && params.messages[0].getType() === "system";
739
+ if (firstIsSystem) rawSentThisTurn += indexTokenCountMap[0] ?? 0;
740
+ for (let i = lastCutOffIndex; i < params.messages.length; i++) {
741
+ if (i === 0 && firstIsSystem || newOutputs.has(i)) continue;
742
+ rawSentThisTurn += indexTokenCountMap[i] ?? 0;
743
+ }
744
+ const providerMessageTokens = Math.max(0, providerInputTokens - instructionOverhead);
745
+ if (rawSentThisTurn > 0 && providerMessageTokens > 0) {
746
+ cumulativeRawSent += rawSentThisTurn;
747
+ cumulativeProviderReported += providerMessageTokens;
748
+ const newRatio = cumulativeProviderReported / cumulativeRawSent;
749
+ calibrationRatio = Math.max(CALIBRATION_RATIO_MIN, Math.min(CALIBRATION_RATIO_MAX, newRatio));
750
+ }
751
+ const calibratedOurTotal = instructionOverhead + rawSentThisTurn * calibrationRatio;
752
+ const overallRatio = calibratedOurTotal > 0 ? providerInputTokens / calibratedOurTotal : 0;
753
+ const variancePct = Math.round((overallRatio - 1) * 100);
754
+ const absVariance = Math.abs(overallRatio - 1);
755
+ if (absVariance < bestVarianceAbs && rawSentThisTurn > 0) {
756
+ bestVarianceAbs = absVariance;
757
+ bestInstructionOverhead = Math.max(0, Math.round(providerInputTokens - rawSentThisTurn * calibrationRatio));
758
+ bestInstructionEstimate = factoryParams.getInstructionTokens?.() ?? 0;
759
+ }
760
+ factoryParams.log?.("debug", "Calibration observed", {
761
+ providerInputTokens,
762
+ calibratedEstimate: Math.round(calibratedOurTotal),
763
+ variance: `${variancePct > 0 ? "+" : ""}${variancePct}%`,
764
+ calibrationRatio: Math.round(calibrationRatio * 100) / 100,
765
+ instructionOverhead,
766
+ cumulativeRawSent,
767
+ cumulativeProviderReported
768
+ });
769
+ }
770
+ const estimatedInstructionTokens = factoryParams.getInstructionTokens?.() ?? 0;
771
+ const estimateStable = bestInstructionEstimate != null && bestInstructionEstimate > 0 && Math.abs(estimatedInstructionTokens - bestInstructionEstimate) / bestInstructionEstimate < .1;
772
+ const currentInstructionTokens = bestInstructionOverhead != null && bestInstructionOverhead <= estimatedInstructionTokens && estimateStable ? bestInstructionOverhead : estimatedInstructionTokens;
773
+ const reserveRatio = factoryParams.reserveRatio ?? .05;
774
+ const reserveTokens = reserveRatio > 0 && reserveRatio < 1 ? Math.round(factoryParams.maxTokens * reserveRatio) : 0;
775
+ const pruningBudget = factoryParams.maxTokens - reserveTokens;
776
+ const effectiveMaxTokens = Math.max(0, pruningBudget - currentInstructionTokens);
777
+ let calibratedTotalTokens = Math.round(totalTokens * calibrationRatio);
778
+ factoryParams.log?.("debug", "Budget", {
779
+ maxTokens: factoryParams.maxTokens,
780
+ pruningBudget,
781
+ effectiveMax: effectiveMaxTokens,
782
+ instructionTokens: currentInstructionTokens,
783
+ messageCount: params.messages.length,
784
+ calibratedTotalTokens,
785
+ calibrationRatio: Math.round(calibrationRatio * 100) / 100
786
+ });
787
+ if (effectiveMaxTokens === 0 && factoryParams.summarizationEnabled === true && params.messages.length > 0) {
788
+ factoryParams.log?.("warn", "Instructions consume entire budget — yielding all messages for summarization", {
789
+ instructionTokens: currentInstructionTokens,
790
+ pruningBudget,
791
+ messageCount: params.messages.length
792
+ });
793
+ lastTurnStartIndex = params.messages.length;
794
+ return {
795
+ context: [],
796
+ indexTokenCountMap,
797
+ messagesToRefine: [...params.messages],
798
+ prePruneContextTokens: calibratedTotalTokens,
799
+ remainingContextTokens: 0,
800
+ contextPressure: pruningBudget > 0 ? calibratedTotalTokens / pruningBudget : 0,
801
+ calibrationRatio,
802
+ resolvedInstructionOverhead: bestInstructionOverhead
803
+ };
804
+ }
805
+ totalTokens = sumTokenCounts(indexTokenCountMap, params.messages.length);
806
+ calibratedTotalTokens = Math.round(totalTokens * calibrationRatio);
807
+ const contextPressure = pruningBudget > 0 ? calibratedTotalTokens / pruningBudget : 0;
808
+ let preFlightResultCount = 0;
809
+ let preFlightInputCount = 0;
810
+ let observationsMasked = 0;
811
+ if (contextPressure >= PRESSURE_THRESHOLD_MASKING) {
812
+ const rawMessageBudget = calibrationRatio > 0 ? Math.floor(effectiveMaxTokens / calibrationRatio) : effectiveMaxTokens;
813
+ const reserveHeadroom = factoryParams.summarizationEnabled === true ? Math.floor(rawMessageBudget * (factoryParams.reserveRatio ?? .05) * .5) : 0;
814
+ observationsMasked = maskConsumedToolResults({
815
+ messages: params.messages,
816
+ indexTokenCountMap,
817
+ tokenCounter: factoryParams.tokenCounter,
818
+ availableRawBudget: rawMessageBudget + reserveHeadroom,
819
+ originalContentStore: factoryParams.summarizationEnabled === true ? originalToolContent : void 0,
820
+ onContentStored: factoryParams.summarizationEnabled === true ? (charLen) => {
821
+ originalToolContentSize += charLen;
822
+ while (originalToolContentSize > 2e6 && originalToolContent.size > 0) {
823
+ const oldest = originalToolContent.keys().next();
824
+ if (oldest.done === true) break;
825
+ const removed = originalToolContent.get(oldest.value);
826
+ if (removed != null) originalToolContentSize -= removed.length;
827
+ originalToolContent.delete(oldest.value);
828
+ }
829
+ } : void 0
830
+ });
831
+ if (observationsMasked > 0) {
832
+ cumulativeRawSent = 0;
833
+ cumulativeProviderReported = 0;
834
+ }
835
+ }
836
+ if (contextPressure >= PRESSURE_THRESHOLD_MASKING && factoryParams.summarizationEnabled !== true) {
837
+ const budgetFactor = PRESSURE_BANDS.find(([threshold]) => contextPressure >= threshold)?.[1] ?? 1;
838
+ const baseBudget = Math.max(1024, Math.floor(effectiveMaxTokens * budgetFactor));
839
+ preFlightResultCount = preFlightTruncateToolResults({
840
+ messages: params.messages,
841
+ maxContextTokens: baseBudget,
842
+ indexTokenCountMap,
843
+ tokenCounter: factoryParams.tokenCounter
844
+ });
845
+ preFlightInputCount = preFlightTruncateToolCallInputs({
846
+ messages: params.messages,
847
+ maxContextTokens: baseBudget,
848
+ indexTokenCountMap,
849
+ tokenCounter: factoryParams.tokenCounter
850
+ });
851
+ }
852
+ if (factoryParams.contextPruningConfig?.enabled === true && factoryParams.summarizationEnabled !== true) require_contextPruning.applyContextPruning({
853
+ messages: params.messages,
854
+ indexTokenCountMap,
855
+ tokenCounter: factoryParams.tokenCounter,
856
+ resolvedSettings: contextPruningSettings
857
+ });
858
+ const rawSpaceEffectiveMax = calibrationRatio > 0 ? Math.round(effectiveMaxTokens / calibrationRatio) : effectiveMaxTokens;
859
+ if (factoryParams.summarizationEnabled === true && rawSpaceEffectiveMax > 0) {
860
+ preFlightResultCount = preFlightTruncateToolResults({
861
+ messages: params.messages,
862
+ maxContextTokens: rawSpaceEffectiveMax,
863
+ indexTokenCountMap,
864
+ tokenCounter: factoryParams.tokenCounter
865
+ });
866
+ preFlightInputCount = preFlightTruncateToolCallInputs({
867
+ messages: params.messages,
868
+ maxContextTokens: rawSpaceEffectiveMax,
869
+ indexTokenCountMap,
870
+ tokenCounter: factoryParams.tokenCounter
871
+ });
872
+ }
873
+ const preTruncationTotalTokens = totalTokens;
874
+ totalTokens = sumTokenCounts(indexTokenCountMap, params.messages.length);
875
+ calibratedTotalTokens = Math.round(totalTokens * calibrationRatio);
876
+ if (observationsMasked > 0 || preFlightResultCount > 0 || preFlightInputCount > 0 || totalTokens !== preTruncationTotalTokens) factoryParams.log?.("debug", "Context adjusted", {
877
+ contextPressure: Math.round(contextPressure * 100),
878
+ observationsMasked,
879
+ toolOutputsTruncated: preFlightResultCount,
880
+ toolInputsTruncated: preFlightInputCount,
881
+ tokensBefore: preTruncationTotalTokens,
882
+ tokensAfter: totalTokens,
883
+ tokensSaved: preTruncationTotalTokens - totalTokens
884
+ });
885
+ lastTurnStartIndex = params.messages.length;
886
+ if (lastCutOffIndex === 0 && calibratedTotalTokens + currentInstructionTokens <= pruningBudget) return {
887
+ context: params.messages,
888
+ indexTokenCountMap,
889
+ messagesToRefine: [],
890
+ prePruneContextTokens: calibratedTotalTokens,
891
+ remainingContextTokens: pruningBudget - calibratedTotalTokens - currentInstructionTokens,
892
+ contextPressure,
893
+ originalToolContent: originalToolContent.size > 0 ? originalToolContent : void 0,
894
+ calibrationRatio,
895
+ resolvedInstructionOverhead: bestInstructionOverhead
896
+ };
897
+ const rawSpaceBudget = calibrationRatio > 0 ? Math.round(pruningBudget / calibrationRatio) : pruningBudget;
898
+ const rawSpaceInstructionTokens = calibrationRatio > 0 ? Math.round(currentInstructionTokens / calibrationRatio) : currentInstructionTokens;
899
+ const { context: initialContext, thinkingStartIndex, messagesToRefine, remainingContextTokens: initialRemainingContextTokens } = getMessagesWithinTokenLimit({
900
+ maxContextTokens: rawSpaceBudget,
901
+ messages: params.messages,
902
+ indexTokenCountMap,
903
+ startType: params.startType,
904
+ thinkingEnabled: factoryParams.thinkingEnabled,
905
+ tokenCounter: factoryParams.tokenCounter,
906
+ instructionTokens: rawSpaceInstructionTokens,
907
+ reasoningType: factoryParams.provider === "bedrock" ? "reasoning_content" : "thinking",
908
+ thinkingStartIndex: factoryParams.thinkingEnabled === true ? runThinkingStartIndex : void 0
909
+ });
910
+ const { context: repairedContext, reclaimedTokens: initialReclaimedTokens, droppedMessages } = repairOrphanedToolMessages({
911
+ context: initialContext,
912
+ allMessages: params.messages,
913
+ tokenCounter: factoryParams.tokenCounter,
914
+ indexTokenCountMap
915
+ });
916
+ const contextBreakdown = repairedContext.map((msg) => {
917
+ const type = msg.getType();
918
+ const name = type === "tool" ? msg.name ?? "unknown" : "";
919
+ return name !== "" ? `${type}(${name})` : type;
920
+ });
921
+ factoryParams.log?.("debug", "Pruning complete", {
922
+ contextLength: repairedContext.length,
923
+ contextTypes: contextBreakdown.join(", "),
924
+ messagesToRefineCount: messagesToRefine.length,
925
+ droppedOrphans: droppedMessages.length,
926
+ remainingTokens: initialRemainingContextTokens
927
+ });
928
+ let context = repairedContext;
929
+ let reclaimedTokens = initialReclaimedTokens;
930
+ if (droppedMessages.length > 0) messagesToRefine.push(...droppedMessages);
931
+ if (context.length === 0 && params.messages.length > 0 && effectiveMaxTokens > 0 && factoryParams.summarizationEnabled === true) {
932
+ const fadingBudget = Math.max(1024, effectiveMaxTokens);
933
+ factoryParams.log?.("debug", "Fallback fading — empty context with summarization", {
934
+ messageCount: params.messages.length,
935
+ effectiveMaxTokens,
936
+ fadingBudget
937
+ });
938
+ const fadedMessages = [...params.messages];
939
+ const preFadingTokenCounts = {};
940
+ for (let i = 0; i < params.messages.length; i++) preFadingTokenCounts[i] = indexTokenCountMap[i];
941
+ preFlightTruncateToolResults({
942
+ messages: fadedMessages,
943
+ maxContextTokens: fadingBudget,
944
+ indexTokenCountMap,
945
+ tokenCounter: factoryParams.tokenCounter
946
+ });
947
+ preFlightTruncateToolCallInputs({
948
+ messages: fadedMessages,
949
+ maxContextTokens: fadingBudget,
950
+ indexTokenCountMap,
951
+ tokenCounter: factoryParams.tokenCounter
952
+ });
953
+ const fadingRetry = getMessagesWithinTokenLimit({
954
+ maxContextTokens: pruningBudget,
955
+ messages: fadedMessages,
956
+ indexTokenCountMap,
957
+ startType: params.startType,
958
+ thinkingEnabled: factoryParams.thinkingEnabled,
959
+ tokenCounter: factoryParams.tokenCounter,
960
+ instructionTokens: currentInstructionTokens,
961
+ reasoningType: factoryParams.provider === "bedrock" ? "reasoning_content" : "thinking",
962
+ thinkingStartIndex: factoryParams.thinkingEnabled === true ? runThinkingStartIndex : void 0
963
+ });
964
+ const fadingRepaired = repairOrphanedToolMessages({
965
+ context: fadingRetry.context,
966
+ allMessages: fadedMessages,
967
+ tokenCounter: factoryParams.tokenCounter,
968
+ indexTokenCountMap
969
+ });
970
+ if (fadingRepaired.context.length > 0) {
971
+ context = fadingRepaired.context;
972
+ reclaimedTokens = fadingRepaired.reclaimedTokens;
973
+ messagesToRefine.push(...fadingRetry.messagesToRefine);
974
+ if (fadingRepaired.droppedMessages.length > 0) messagesToRefine.push(...fadingRepaired.droppedMessages);
975
+ factoryParams.log?.("debug", "Fallback fading recovered context", {
976
+ contextLength: context.length,
977
+ messagesToRefineCount: messagesToRefine.length,
978
+ remainingTokens: fadingRetry.remainingContextTokens
979
+ });
980
+ for (const [key, value] of Object.entries(preFadingTokenCounts)) indexTokenCountMap[key] = value;
981
+ } else for (const [key, value] of Object.entries(preFadingTokenCounts)) indexTokenCountMap[key] = value;
982
+ }
983
+ if (context.length === 0 && params.messages.length > 0 && effectiveMaxTokens > 0) {
984
+ const perMessageTokenBudget = Math.floor(effectiveMaxTokens / Math.max(1, params.messages.length));
985
+ const emergencyMaxChars = Math.max(200, perMessageTokenBudget * 4);
986
+ factoryParams.log?.("warn", "Empty context, entering emergency truncation", {
987
+ messageCount: params.messages.length,
988
+ effectiveMax: effectiveMaxTokens,
989
+ emergencyMaxChars
990
+ });
991
+ const emergencyMessages = [...params.messages];
992
+ const preEmergencyTokenCounts = {};
993
+ for (let i = 0; i < params.messages.length; i++) preEmergencyTokenCounts[i] = indexTokenCountMap[i];
994
+ try {
995
+ let emergencyTruncatedCount = 0;
996
+ for (let i = 0; i < emergencyMessages.length; i++) {
997
+ const message = emergencyMessages[i];
998
+ if (message.getType() === "tool") {
999
+ const content = message.content;
1000
+ if (typeof content === "string" && content.length > emergencyMaxChars) {
1001
+ const cloned = new _langchain_core_messages.ToolMessage({
1002
+ content: require_truncation.truncateToolResultContent(content, emergencyMaxChars),
1003
+ tool_call_id: message.tool_call_id,
1004
+ name: message.name,
1005
+ id: message.id,
1006
+ additional_kwargs: message.additional_kwargs,
1007
+ response_metadata: message.response_metadata
1008
+ });
1009
+ emergencyMessages[i] = cloned;
1010
+ indexTokenCountMap[i] = factoryParams.tokenCounter(cloned);
1011
+ emergencyTruncatedCount++;
1012
+ }
1013
+ }
1014
+ if (message.getType() === "ai" && Array.isArray(message.content)) {
1015
+ const aiMsg = message;
1016
+ const contentBlocks = aiMsg.content;
1017
+ if (contentBlocks.some((block) => {
1018
+ if (typeof block !== "object") return false;
1019
+ const record = block;
1020
+ if ((record.type === "tool_use" || record.type === "tool_call") && record.input != null) return (typeof record.input === "string" ? record.input : JSON.stringify(record.input)).length > emergencyMaxChars;
1021
+ return false;
1022
+ })) {
1023
+ const newContent = contentBlocks.map((block) => {
1024
+ if (typeof block !== "object") return block;
1025
+ const record = block;
1026
+ if ((record.type === "tool_use" || record.type === "tool_call") && record.input != null) {
1027
+ const serialized = typeof record.input === "string" ? record.input : JSON.stringify(record.input);
1028
+ if (serialized.length > emergencyMaxChars) return {
1029
+ ...record,
1030
+ input: require_truncation.truncateToolInput(serialized, emergencyMaxChars)
1031
+ };
1032
+ }
1033
+ return block;
1034
+ });
1035
+ const newToolCalls = (aiMsg.tool_calls ?? []).map((tc) => {
1036
+ const serializedArgs = JSON.stringify(tc.args);
1037
+ if (serializedArgs.length > emergencyMaxChars) return {
1038
+ ...tc,
1039
+ args: require_truncation.truncateToolInput(serializedArgs, emergencyMaxChars)
1040
+ };
1041
+ return tc;
1042
+ });
1043
+ emergencyMessages[i] = new _langchain_core_messages.AIMessage({
1044
+ ...aiMsg,
1045
+ content: require_langchain.toLangChainContent(newContent),
1046
+ tool_calls: newToolCalls.length > 0 ? newToolCalls : void 0
1047
+ });
1048
+ indexTokenCountMap[i] = factoryParams.tokenCounter(emergencyMessages[i]);
1049
+ emergencyTruncatedCount++;
1050
+ }
1051
+ }
1052
+ }
1053
+ factoryParams.log?.("info", "Emergency truncation complete");
1054
+ factoryParams.log?.("debug", "Emergency truncation details", {
1055
+ truncatedCount: emergencyTruncatedCount,
1056
+ emergencyMaxChars
1057
+ });
1058
+ const retryResult = getMessagesWithinTokenLimit({
1059
+ maxContextTokens: pruningBudget,
1060
+ messages: emergencyMessages,
1061
+ indexTokenCountMap,
1062
+ startType: params.startType,
1063
+ thinkingEnabled: factoryParams.thinkingEnabled,
1064
+ tokenCounter: factoryParams.tokenCounter,
1065
+ instructionTokens: currentInstructionTokens,
1066
+ reasoningType: factoryParams.provider === "bedrock" ? "reasoning_content" : "thinking",
1067
+ thinkingStartIndex: factoryParams.thinkingEnabled === true ? runThinkingStartIndex : void 0
1068
+ });
1069
+ const repaired = repairOrphanedToolMessages({
1070
+ context: retryResult.context,
1071
+ allMessages: emergencyMessages,
1072
+ tokenCounter: factoryParams.tokenCounter,
1073
+ indexTokenCountMap
1074
+ });
1075
+ context = repaired.context;
1076
+ reclaimedTokens = repaired.reclaimedTokens;
1077
+ messagesToRefine.push(...retryResult.messagesToRefine);
1078
+ if (repaired.droppedMessages.length > 0) messagesToRefine.push(...repaired.droppedMessages);
1079
+ factoryParams.log?.("debug", "Emergency truncation retry result", {
1080
+ contextLength: context.length,
1081
+ messagesToRefineCount: messagesToRefine.length,
1082
+ remainingTokens: retryResult.remainingContextTokens
1083
+ });
1084
+ } finally {
1085
+ for (const [key, value] of Object.entries(preEmergencyTokenCounts)) indexTokenCountMap[key] = value;
1086
+ }
1087
+ }
1088
+ const remainingContextTokens = Math.max(0, Math.min(pruningBudget, initialRemainingContextTokens + reclaimedTokens));
1089
+ runThinkingStartIndex = thinkingStartIndex ?? -1;
1090
+ /** The index is the first value of `context`, index relative to `params.messages` */
1091
+ lastCutOffIndex = Math.max(params.messages.length - (context.length - (context[0]?.getType() === "system" ? 1 : 0)), 0);
1092
+ return {
1093
+ context,
1094
+ indexTokenCountMap,
1095
+ messagesToRefine,
1096
+ prePruneContextTokens: calibratedTotalTokens,
1097
+ remainingContextTokens,
1098
+ contextPressure,
1099
+ originalToolContent: originalToolContent.size > 0 ? originalToolContent : void 0,
1100
+ calibrationRatio,
1101
+ resolvedInstructionOverhead: bestInstructionOverhead
1102
+ };
1103
+ };
1568
1104
  }
1569
-
1105
+ //#endregion
1570
1106
  exports.DEFAULT_RESERVE_RATIO = DEFAULT_RESERVE_RATIO;
1571
1107
  exports.ORIGINAL_CONTENT_MAX_CHARS = ORIGINAL_CONTENT_MAX_CHARS;
1572
1108
  exports.calculateTotalTokens = calculateTotalTokens;
@@ -1579,4 +1115,5 @@ exports.preFlightTruncateToolCallInputs = preFlightTruncateToolCallInputs;
1579
1115
  exports.preFlightTruncateToolResults = preFlightTruncateToolResults;
1580
1116
  exports.repairOrphanedToolMessages = repairOrphanedToolMessages;
1581
1117
  exports.sanitizeOrphanToolBlocks = sanitizeOrphanToolBlocks;
1582
- //# sourceMappingURL=prune.cjs.map
1118
+
1119
+ //# sourceMappingURL=prune.cjs.map