@librechat/agents 3.1.57 → 3.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +1 -1
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3810 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Ingestion-time and pre-flight truncation utilities for tool results.
3
+ *
4
+ * Prevents oversized tool outputs from entering the message array and
5
+ * consuming the entire context window.
6
+ */
7
+ /**
8
+ * Absolute hard cap on tool result length (characters).
9
+ * Even if the model has a 1M-token context, a single tool result
10
+ * larger than this is almost certainly a bug (e.g., dumping a binary file).
11
+ */
12
+ const HARD_MAX_TOOL_RESULT_CHARS = 400_000;
13
+ /**
14
+ * Computes the dynamic max tool result size based on the model's context window.
15
+ * Uses 30% of the context window (in estimated characters, ~4 chars/token)
16
+ * capped at HARD_MAX_TOOL_RESULT_CHARS.
17
+ *
18
+ * @param contextWindowTokens - The model's max context tokens (optional).
19
+ * @returns Maximum allowed characters for a single tool result.
20
+ */
21
+ function calculateMaxToolResultChars(contextWindowTokens) {
22
+ if (contextWindowTokens == null || contextWindowTokens <= 0) {
23
+ return HARD_MAX_TOOL_RESULT_CHARS;
24
+ }
25
+ return Math.min(Math.floor(contextWindowTokens * 0.3) * 4, HARD_MAX_TOOL_RESULT_CHARS);
26
+ }
27
+ /**
28
+ * Truncates a tool-call input (the arguments/payload of a tool_use block)
29
+ * using head+tail strategy. Returns an object with `_truncated` (the
30
+ * truncated string) and `_originalChars` (for diagnostics).
31
+ *
32
+ * Accepts any type — objects are JSON-serialized before truncation.
33
+ *
34
+ * @param input - The tool input (string, object, etc.).
35
+ * @param maxChars - Maximum allowed characters.
36
+ */
37
+ function truncateToolInput(input, maxChars) {
38
+ const serialized = typeof input === 'string' ? input : JSON.stringify(input);
39
+ if (serialized.length <= maxChars) {
40
+ return { _truncated: serialized, _originalChars: serialized.length };
41
+ }
42
+ const indicator = `\n… [truncated: ${serialized.length} chars exceeded ${maxChars} limit] …\n`;
43
+ const available = maxChars - indicator.length;
44
+ if (available < 100) {
45
+ return {
46
+ _truncated: serialized.slice(0, maxChars) + indicator.trimEnd(),
47
+ _originalChars: serialized.length,
48
+ };
49
+ }
50
+ const headSize = Math.ceil(available * 0.7);
51
+ const tailSize = available - headSize;
52
+ return {
53
+ _truncated: serialized.slice(0, headSize) +
54
+ indicator +
55
+ serialized.slice(serialized.length - tailSize),
56
+ _originalChars: serialized.length,
57
+ };
58
+ }
59
+ /**
60
+ * Truncates tool result content that exceeds `maxChars` using a head+tail
61
+ * strategy. Keeps the beginning (structure/headers) and end (return value /
62
+ * conclusion) of the content so the model retains both the opening context
63
+ * and the final outcome.
64
+ *
65
+ * Head gets ~70% of the budget, tail gets ~30%. Falls back to head-only
66
+ * when the budget is too small for a meaningful tail.
67
+ *
68
+ * @param content - The tool result string content.
69
+ * @param maxChars - Maximum allowed characters.
70
+ * @returns The (possibly truncated) content string.
71
+ */
72
+ function truncateToolResultContent(content, maxChars) {
73
+ if (content.length <= maxChars) {
74
+ return content;
75
+ }
76
+ const indicator = `\n\n… [truncated: ${content.length} chars exceeded ${maxChars} limit] …\n\n`;
77
+ const available = maxChars - indicator.length;
78
+ if (available <= 0) {
79
+ return content.slice(0, maxChars);
80
+ }
81
+ // When budget is too small for a meaningful tail, fall back to head-only
82
+ if (available < 200) {
83
+ return content.slice(0, available) + indicator.trimEnd();
84
+ }
85
+ const headSize = Math.ceil(available * 0.7);
86
+ const tailSize = available - headSize;
87
+ // Try to break at newline boundaries for cleaner output
88
+ let headEnd = headSize;
89
+ const headNewline = content.lastIndexOf('\n', headSize);
90
+ if (headNewline > headSize - 200 && headNewline > 0) {
91
+ headEnd = headNewline;
92
+ }
93
+ let tailStart = content.length - tailSize;
94
+ const tailNewline = content.indexOf('\n', tailStart);
95
+ if (tailNewline > 0 && tailNewline < tailStart + 200) {
96
+ tailStart = tailNewline + 1;
97
+ }
98
+ return content.slice(0, headEnd) + indicator + content.slice(tailStart);
99
+ }
100
+
101
+ export { HARD_MAX_TOOL_RESULT_CHARS, calculateMaxToolResultChars, truncateToolInput, truncateToolResultContent };
102
+ //# sourceMappingURL=truncation.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"truncation.mjs","sources":["../../../src/utils/truncation.ts"],"sourcesContent":["/**\n * Ingestion-time and pre-flight truncation utilities for tool results.\n *\n * Prevents oversized tool outputs from entering the message array and\n * consuming the entire context window.\n */\n\n/**\n * Absolute hard cap on tool result length (characters).\n * Even if the model has a 1M-token context, a single tool result\n * larger than this is almost certainly a bug (e.g., dumping a binary file).\n */\nexport const HARD_MAX_TOOL_RESULT_CHARS = 400_000;\n\n/**\n * Computes the dynamic max tool result size based on the model's context window.\n * Uses 30% of the context window (in estimated characters, ~4 chars/token)\n * capped at HARD_MAX_TOOL_RESULT_CHARS.\n *\n * @param contextWindowTokens - The model's max context tokens (optional).\n * @returns Maximum allowed characters for a single tool result.\n */\nexport function calculateMaxToolResultChars(\n contextWindowTokens?: number\n): number {\n if (contextWindowTokens == null || contextWindowTokens <= 0) {\n return HARD_MAX_TOOL_RESULT_CHARS;\n }\n return Math.min(\n Math.floor(contextWindowTokens * 0.3) * 4,\n HARD_MAX_TOOL_RESULT_CHARS\n );\n}\n\n/**\n * Truncates a tool-call input (the arguments/payload of a tool_use block)\n * using head+tail strategy. Returns an object with `_truncated` (the\n * truncated string) and `_originalChars` (for diagnostics).\n *\n * Accepts any type — objects are JSON-serialized before truncation.\n *\n * @param input - The tool input (string, object, etc.).\n * @param maxChars - Maximum allowed characters.\n */\nexport function truncateToolInput(\n input: unknown,\n maxChars: number\n): { _truncated: string; _originalChars: number } {\n const serialized = typeof input === 'string' ? input : JSON.stringify(input);\n if (serialized.length <= maxChars) {\n return { _truncated: serialized, _originalChars: serialized.length };\n }\n const indicator = `\\n… [truncated: ${serialized.length} chars exceeded ${maxChars} limit] …\\n`;\n const available = maxChars - indicator.length;\n\n if (available < 100) {\n return {\n _truncated: serialized.slice(0, maxChars) + indicator.trimEnd(),\n _originalChars: serialized.length,\n };\n }\n\n const headSize = Math.ceil(available * 0.7);\n const tailSize = available - headSize;\n\n return {\n _truncated:\n serialized.slice(0, headSize) +\n indicator +\n serialized.slice(serialized.length - tailSize),\n _originalChars: serialized.length,\n };\n}\n\n/**\n * Truncates tool result content that exceeds `maxChars` using a head+tail\n * strategy. Keeps the beginning (structure/headers) and end (return value /\n * conclusion) of the content so the model retains both the opening context\n * and the final outcome.\n *\n * Head gets ~70% of the budget, tail gets ~30%. Falls back to head-only\n * when the budget is too small for a meaningful tail.\n *\n * @param content - The tool result string content.\n * @param maxChars - Maximum allowed characters.\n * @returns The (possibly truncated) content string.\n */\nexport function truncateToolResultContent(\n content: string,\n maxChars: number\n): string {\n if (content.length <= maxChars) {\n return content;\n }\n\n const indicator = `\\n\\n… [truncated: ${content.length} chars exceeded ${maxChars} limit] …\\n\\n`;\n const available = maxChars - indicator.length;\n if (available <= 0) {\n return content.slice(0, maxChars);\n }\n\n // When budget is too small for a meaningful tail, fall back to head-only\n if (available < 200) {\n return content.slice(0, available) + indicator.trimEnd();\n }\n\n const headSize = Math.ceil(available * 0.7);\n const tailSize = available - headSize;\n\n // Try to break at newline boundaries for cleaner output\n let headEnd = headSize;\n const headNewline = content.lastIndexOf('\\n', headSize);\n if (headNewline > headSize - 200 && headNewline > 0) {\n headEnd = headNewline;\n }\n\n let tailStart = content.length - tailSize;\n const tailNewline = content.indexOf('\\n', tailStart);\n if (tailNewline > 0 && tailNewline < tailStart + 200) {\n tailStart = tailNewline + 1;\n }\n\n return content.slice(0, headEnd) + indicator + content.slice(tailStart);\n}\n"],"names":[],"mappings":"AAAA;;;;;AAKG;AAEH;;;;AAIG;AACI,MAAM,0BAA0B,GAAG;AAE1C;;;;;;;AAOG;AACG,SAAU,2BAA2B,CACzC,mBAA4B,EAAA;IAE5B,IAAI,mBAAmB,IAAI,IAAI,IAAI,mBAAmB,IAAI,CAAC,EAAE;AAC3D,QAAA,OAAO,0BAA0B;IACnC;AACA,IAAA,OAAO,IAAI,CAAC,GAAG,CACb,IAAI,CAAC,KAAK,CAAC,mBAAmB,GAAG,GAAG,CAAC,GAAG,CAAC,EACzC,0BAA0B,CAC3B;AACH;AAEA;;;;;;;;;AASG;AACG,SAAU,iBAAiB,CAC/B,KAAc,EACd,QAAgB,EAAA;AAEhB,IAAA,MAAM,UAAU,GAAG,OAAO,KAAK,KAAK,QAAQ,GAAG,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC;AAC5E,IAAA,IAAI,UAAU,CAAC,MAAM,IAAI,QAAQ,EAAE;QACjC,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,cAAc,EAAE,UAAU,CAAC,MAAM,EAAE;IACtE;IACA,MAAM,SAAS,GAAG,CAAA,gBAAA,EAAmB,UAAU,CAAC,MAAM,CAAA,gBAAA,EAAmB,QAAQ,CAAA,WAAA,CAAa;AAC9F,IAAA,MAAM,SAAS,GAAG,QAAQ,GAAG,SAAS,CAAC,MAAM;AAE7C,IAAA,IAAI,SAAS,GAAG,GAAG,EAAE;QACnB,OAAO;AACL,YAAA,UAAU,EAAE,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE;YAC/D,cAAc,EAAE,UAAU,CAAC,MAAM;SAClC;IACH;IAEA,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,GAAG,GAAG,CAAC;AAC3C,IAAA,MAAM,QAAQ,GAAG,SAAS,GAAG,QAAQ;IAErC,OAAO;QACL,UAAU,EACR,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC;YAC7B,SAAS;YACT,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,GAAG,QAAQ,CAAC;QAChD,cAAc,EAAE,UAAU,CAAC,MAAM;KAClC;AACH;AAEA;;;;;;;;;;;;AAYG;AACG,SAAU,yBAAyB,CACvC,OAAe,EACf,QAAgB,EAAA;AAEhB,IAAA,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,EAAE;AAC9B,QAAA,OAAO,OAAO;IAChB;IAEA,MAAM,SAAS,GAAG,CAAA,kBAAA,EAAqB,OAAO,CAAC,MAAM,CAAA,gBAAA,EAAmB,QAAQ,CAAA,aAAA,CAAe;AAC/F,IAAA,MAAM,SAAS,GAAG,QAAQ,GAAG,SAAS,CAAC,MAAM;AAC7C,IAAA,IAAI,SAAS,IAAI,CAAC,EAAE;QAClB,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC;IACnC;;AAGA,IAAA,IAAI,SAAS,GAAG,GAAG,EAAE;AACnB,QAAA,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE;IAC1D;IAEA,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,GAAG,GAAG,CAAC;AAC3C,IAAA,MAAM,QAAQ,GAAG,SAAS,GAAG,QAAQ;;IAGrC,IAAI,OAAO,GAAG,QAAQ;IACtB,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC,IAAI,EAAE,QAAQ,CAAC;IACvD,IAAI,WAAW,GAAG,QAAQ,GAAG,GAAG,IAAI,WAAW,GAAG,CAAC,EAAE;QACnD,OAAO,GAAG,WAAW;IACvB;AAEA,IAAA,IAAI,SAAS,GAAG,OAAO,CAAC,MAAM,GAAG,QAAQ;IACzC,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,SAAS,CAAC;IACpD,IAAI,WAAW,GAAG,CAAC,IAAI,WAAW,GAAG,SAAS,GAAG,GAAG,EAAE;AACpD,QAAA,SAAS,GAAG,WAAW,GAAG,CAAC;IAC7B;AAEA,IAAA,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,GAAG,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC;AACzE;;;;"}
@@ -28,12 +28,42 @@ export declare class AgentContext {
28
28
  maxContextTokens?: number;
29
29
  /** Current usage metadata for this agent */
30
30
  currentUsage?: Partial<UsageMetadata>;
31
+ /**
32
+ * Usage from the most recent LLM call only (not accumulated).
33
+ * Used for accurate provider calibration in pruning.
34
+ */
35
+ lastCallUsage?: {
36
+ inputTokens: number;
37
+ outputTokens: number;
38
+ totalTokens: number;
39
+ cacheRead?: number;
40
+ cacheCreation?: number;
41
+ };
42
+ /**
43
+ * Whether totalTokens data is fresh (set true when provider usage arrives,
44
+ * false at the start of each turn before the LLM responds).
45
+ * Prevents stale token data from driving pruning/trigger decisions.
46
+ */
47
+ totalTokensFresh: boolean;
48
+ /** Context pruning configuration. */
49
+ contextPruningConfig?: t.ContextPruningConfig;
50
+ maxToolResultChars?: number;
31
51
  /** Prune messages function configured for this agent */
32
52
  pruneMessages?: ReturnType<typeof createPruneMessages>;
33
53
  /** Token counter function for this agent */
34
54
  tokenCounter?: t.TokenCounter;
35
- /** Instructions/system message token count */
36
- instructionTokens: number;
55
+ /** Token count for the system message (instructions text). */
56
+ systemMessageTokens: number;
57
+ /** Token count for tool schemas only. */
58
+ toolSchemaTokens: number;
59
+ /** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
60
+ calibrationRatio: number;
61
+ /** Provider-observed instruction overhead from the pruner's best-variance turn. */
62
+ resolvedInstructionOverhead?: number;
63
+ /** Pre-masking tool content keyed by message index, consumed by the summarize node. */
64
+ pendingOriginalToolContent?: Map<number, string>;
65
+ /** Total instruction overhead: system message + tool schemas + pending summary. */
66
+ get instructionTokens(): number;
37
67
  /** The amount of time that should pass before another consecutive API call */
38
68
  streamBuffer?: number;
39
69
  /** Last stream call timestamp for rate limiting */
@@ -76,12 +106,41 @@ export declare class AgentContext {
76
106
  private cachedSystemRunnable?;
77
107
  /** Whether system runnable needs rebuild (set when discovered tools change) */
78
108
  private systemRunnableStale;
79
- /** Cached system message token count (separate from tool tokens) */
80
- private systemMessageTokens;
81
109
  /** Promise for token calculation initialization */
82
110
  tokenCalculationPromise?: Promise<void>;
83
111
  /** Format content blocks as strings (for legacy compatibility) */
84
112
  useLegacyContent: boolean;
113
+ /** Enables graph-level summarization for this agent */
114
+ summarizationEnabled?: boolean;
115
+ /** Summarization runtime settings used by graph pruning hooks */
116
+ summarizationConfig?: t.SummarizationConfig;
117
+ /** Current summary text produced by the summarize node, integrated into system message */
118
+ private summaryText?;
119
+ /** Token count of the current summary (tracked for token accounting) */
120
+ private summaryTokenCount;
121
+ /**
122
+ * Where the summary should be injected:
123
+ * - `'system_prompt'`: cross-run summary, included in `buildInstructionsString`
124
+ * - `'user_message'`: mid-run compaction, injected as HumanMessage on clean slate
125
+ * - `'none'`: no summary present
126
+ */
127
+ private _summaryLocation;
128
+ /**
129
+ * Durable summary that survives reset() calls. Set from initialSummary
130
+ * during fromConfig() and updated by setSummary() so that the latest
131
+ * summary (whether cross-run or intra-run) is always restored after
132
+ * processStream's resetValues() cycle.
133
+ */
134
+ private _durableSummaryText?;
135
+ private _durableSummaryTokenCount;
136
+ /** Number of summarization cycles that have occurred for this agent context */
137
+ private _summaryVersion;
138
+ /**
139
+ * Message count at the time summarization was last triggered.
140
+ * Used to prevent re-summarizing the same unchanged message set.
141
+ * Summarization is allowed to fire again only when new messages appear.
142
+ */
143
+ private _lastSummarizationMsgCount;
85
144
  /**
86
145
  * Handoff context when this agent receives control via handoff.
87
146
  * Contains source and parallel execution info for system message context.
@@ -92,7 +151,7 @@ export declare class AgentContext {
92
151
  /** Names of sibling agents executing in parallel (empty if sequential) */
93
152
  parallelSiblings: string[];
94
153
  };
95
- constructor({ agentId, name, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, reasoningKey, toolEnd, instructionTokens, useLegacyContent, discoveredTools, }: {
154
+ constructor({ agentId, name, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, reasoningKey, toolEnd, instructionTokens, useLegacyContent, discoveredTools, summarizationEnabled, summarizationConfig, contextPruningConfig, maxToolResultChars, }: {
96
155
  agentId: string;
97
156
  name?: string;
98
157
  provider: Providers;
@@ -111,6 +170,10 @@ export declare class AgentContext {
111
170
  instructionTokens?: number;
112
171
  useLegacyContent?: boolean;
113
172
  discoveredTools?: string[];
173
+ summarizationEnabled?: boolean;
174
+ summarizationConfig?: t.SummarizationConfig;
175
+ contextPruningConfig?: t.ContextPruningConfig;
176
+ maxToolResultChars?: number;
114
177
  });
115
178
  /**
116
179
  * Builds instructions text for tools that are ONLY callable via programmatic code execution.
@@ -153,7 +216,18 @@ export declare class AgentContext {
153
216
  */
154
217
  reset(): void;
155
218
  /**
156
- * Update the token count map with instruction tokens
219
+ * Update the token count map from a base map.
220
+ *
221
+ * Previously this inflated index 0 with instructionTokens to indirectly
222
+ * reserve budget for the system prompt. That approach was imprecise: with
223
+ * large tool-schema overhead (e.g. 26 MCP tools ~5 000 tokens) the first
224
+ * conversation message appeared enormous and was always pruned, while the
225
+ * real available budget was never explicitly computed.
226
+ *
227
+ * Now instruction tokens are passed to getMessagesWithinTokenLimit via
228
+ * the `getInstructionTokens` factory param so the pruner subtracts them
229
+ * from the budget directly. The token map contains only real per-message
230
+ * token counts.
157
231
  */
158
232
  updateTokenMapWithInstructions(baseTokenMap: Record<string, number>): void;
159
233
  /**
@@ -180,6 +254,50 @@ export declare class AgentContext {
180
254
  * Call this when resetting the agent or when handoff context is no longer relevant.
181
255
  */
182
256
  clearHandoffContext(): void;
257
+ setSummary(text: string, tokenCount: number): void;
258
+ /** Sets a cross-run summary that is injected into the system prompt. */
259
+ setInitialSummary(text: string, tokenCount: number): void;
260
+ /**
261
+ * Replaces the indexTokenCountMap with a fresh map keyed to the surviving
262
+ * context messages after summarization. Called by the summarize node after
263
+ * it emits RemoveMessage operations that shift message indices.
264
+ */
265
+ rebuildTokenMapAfterSummarization(newTokenMap: Record<string, number>): void;
266
+ hasSummary(): boolean;
267
+ /** True when a mid-run compaction summary is ready to be injected as a HumanMessage. */
268
+ hasPendingCompactionSummary(): boolean;
269
+ getSummaryText(): string | undefined;
270
+ get summaryVersion(): number;
271
+ /**
272
+ * Returns true when the message count hasn't changed since the last
273
+ * summarization — re-summarizing would produce an identical result.
274
+ * Oversized individual messages are handled by fit-to-budget truncation
275
+ * in the pruner, which keeps them in context without triggering overflow.
276
+ */
277
+ shouldSkipSummarization(currentMsgCount: number): boolean;
278
+ /**
279
+ * Records the message count at which summarization was triggered,
280
+ * so subsequent calls with the same count are suppressed.
281
+ */
282
+ markSummarizationTriggered(msgCount: number): void;
283
+ clearSummary(): void;
284
+ /**
285
+ * Returns a structured breakdown of how the context token budget is consumed.
286
+ * Useful for diagnostics when context overflow or pruning issues occur.
287
+ */
288
+ getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown;
289
+ /**
290
+ * Returns a human-readable string of the token budget breakdown
291
+ * for inclusion in error messages and diagnostics.
292
+ */
293
+ formatTokenBudgetBreakdown(messages?: BaseMessage[]): string;
294
+ /**
295
+ * Updates the last-call usage with data from the most recent LLM response.
296
+ * Unlike `currentUsage` which accumulates, this captures only the single call.
297
+ */
298
+ updateLastCallUsage(usage: Partial<UsageMetadata>): void;
299
+ /** Marks token data as stale before a new LLM call. */
300
+ markTokensStale(): void;
183
301
  /**
184
302
  * Marks tools as discovered via tool search.
185
303
  * Discovered tools will be included in the next model binding.
@@ -19,6 +19,14 @@ export declare enum GraphEvents {
19
19
  ON_REASONING_DELTA = "on_reasoning_delta",
20
20
  /** [Custom] Request to execute tools - dispatched by ToolNode, handled by host */
21
21
  ON_TOOL_EXECUTE = "on_tool_execute",
22
+ /** [Custom] Emitted when the summarize node begins generating a summary */
23
+ ON_SUMMARIZE_START = "on_summarize_start",
24
+ /** [Custom] Delta event carrying the completed summary content */
25
+ ON_SUMMARIZE_DELTA = "on_summarize_delta",
26
+ /** [Custom] Emitted when the summarize node completes with the final summary */
27
+ ON_SUMMARIZE_COMPLETE = "on_summarize_complete",
28
+ /** [Custom] Diagnostic logging event for context management observability */
29
+ ON_AGENT_LOG = "on_agent_log",
22
30
  /** Custom event, emitted by system */
23
31
  ON_CUSTOM_EVENT = "on_custom_event",
24
32
  /** Emitted when a chat model starts processing. */
@@ -69,6 +77,7 @@ export declare enum Providers {
69
77
  export declare enum GraphNodeKeys {
70
78
  TOOLS = "tools=",
71
79
  AGENT = "agent=",
80
+ SUMMARIZE = "summarize=",
72
81
  ROUTER = "router",
73
82
  PRE_TOOLS = "pre_tools",
74
83
  POST_TOOLS = "post_tools"
@@ -98,6 +107,8 @@ export declare enum ContentTypes {
98
107
  REASONING = "reasoning",
99
108
  /** Multi-Agent Switch */
100
109
  AGENT_UPDATE = "agent_update",
110
+ /** Framework-level conversation summary block */
111
+ SUMMARY = "summary",
101
112
  /** Bedrock */
102
113
  REASONING_CONTENT = "reasoning_content"
103
114
  }
@@ -123,7 +134,9 @@ export declare enum Constants {
123
134
  CONTENT_AND_ARTIFACT = "content_and_artifact",
124
135
  LC_TRANSFER_TO_ = "lc_transfer_to_",
125
136
  /** Delimiter for MCP tools: toolName_mcp_serverName */
126
- MCP_DELIMITER = "_mcp_"
137
+ MCP_DELIMITER = "_mcp_",
138
+ /** Anthropic server tool ID prefix (web_search, code_execution, etc.) */
139
+ ANTHROPIC_SERVER_TOOL_PREFIX = "srvtoolu_"
127
140
  }
128
141
  export declare enum TitleMethod {
129
142
  STRUCTURED = "structured",
@@ -1,9 +1,8 @@
1
1
  import { ToolNode } from '@langchain/langgraph/prebuilt';
2
- import { Runnable, RunnableConfig } from '@langchain/core/runnables';
2
+ import { RunnableConfig } from '@langchain/core/runnables';
3
3
  import type { UsageMetadata, BaseMessage } from '@langchain/core/messages';
4
4
  import type { ToolCall } from '@langchain/core/messages/tool';
5
5
  import type * as t from '@/types';
6
- import { Providers } from '@/common';
7
6
  import { ToolNode as CustomToolNode } from '@/tools/ToolNode';
8
7
  import { AgentContext } from '@/agents/AgentContext';
9
8
  import { HandlerRegistry } from '@/events';
@@ -13,11 +12,6 @@ export declare abstract class Graph<T extends t.BaseGraphState = t.BaseGraphStat
13
12
  currentTools?: t.GraphTools;
14
13
  currentToolMap?: t.ToolMap;
15
14
  }): CustomToolNode<T> | ToolNode<T>;
16
- abstract initializeModel({ currentModel, tools, clientOptions, }: {
17
- currentModel?: t.ChatModel;
18
- tools?: t.GraphTools;
19
- clientOptions?: t.ClientOptions;
20
- }): Runnable;
21
15
  abstract getRunMessages(): BaseMessage[] | undefined;
22
16
  abstract getContentParts(): t.MessageContentComplex[] | undefined;
23
17
  abstract generateStepId(stepKey: string): [string, number];
@@ -30,7 +24,7 @@ export declare abstract class Graph<T extends t.BaseGraphState = t.BaseGraphStat
30
24
  abstract dispatchRunStepDelta(id: string, delta: t.ToolCallDelta): Promise<void>;
31
25
  abstract dispatchMessageDelta(id: string, delta: t.MessageDelta): Promise<void>;
32
26
  abstract dispatchReasoningDelta(stepId: string, delta: t.ReasoningDelta): Promise<void>;
33
- abstract createCallModel(agentId?: string, currentModel?: t.ChatModel): (state: T, config?: RunnableConfig) => Promise<Partial<T>>;
27
+ abstract createCallModel(agentId?: string, currentModel?: t.ChatModel): (state: t.AgentSubgraphState, config?: RunnableConfig) => Promise<Partial<t.AgentSubgraphState>>;
34
28
  messageStepHasToolCalls: Map<string, boolean>;
35
29
  messageIdsByStepKey: Map<string, string>;
36
30
  prelimMessageIdsByStepKey: Map<string, string>;
@@ -39,6 +33,12 @@ export declare abstract class Graph<T extends t.BaseGraphState = t.BaseGraphStat
39
33
  stepKeyIds: Map<string, string[]>;
40
34
  contentIndexMap: Map<string, number>;
41
35
  toolCallStepIds: Map<string, string>;
36
+ /**
37
+ * Step IDs that have been dispatched via handler registry directly
38
+ * (in dispatchRunStep). Used by the custom event callback to skip
39
+ * duplicate dispatch through the LangGraph callback chain.
40
+ */
41
+ handlerDispatchedStepIds: Set<string>;
42
42
  signal?: AbortSignal;
43
43
  /** Set of invoked tool call IDs from non-message run steps completed mid-run, if any */
44
44
  invokedToolIds?: Set<string>;
@@ -61,14 +61,23 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
61
61
  /** Optional compile options passed into workflow.compile() */
62
62
  compileOptions?: t.CompileOptions | undefined;
63
63
  messages: BaseMessage[];
64
+ /** Cached run messages preserved before clearHeavyState() so getRunMessages() works after cleanup. */
65
+ private cachedRunMessages?;
64
66
  runId: string | undefined;
67
+ /**
68
+ * Boundary between historical messages (loaded from conversation state)
69
+ * and messages produced during the current run. Set once in the state
70
+ * reducer when messages first arrive. Used by `getRunMessages()` and
71
+ * multi-agent message filtering — NOT for pruner token counting (the
72
+ * pruner maintains its own `lastTurnStartIndex` in its closure).
73
+ */
65
74
  startIndex: number;
66
75
  signal?: AbortSignal;
67
76
  /** Map of agent contexts by agent ID */
68
77
  agentContexts: Map<string, AgentContext>;
69
78
  /** Default agent ID to use */
70
79
  defaultAgentId: string;
71
- constructor({ runId, signal, agents, tokenCounter, indexTokenCountMap, }: t.StandardGraphInput);
80
+ constructor({ runId, signal, agents, tokenCounter, indexTokenCountMap, calibrationRatio, }: t.StandardGraphInput);
72
81
  resetValues(keepContent?: boolean): void;
73
82
  clearHeavyState(): void;
74
83
  getRunStep(stepId: string): t.RunStep | undefined;
@@ -80,6 +89,9 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
80
89
  checkKeyList(keyList: (string | number | undefined)[]): boolean;
81
90
  getRunMessages(): BaseMessage[] | undefined;
82
91
  getContentParts(): t.MessageContentComplex[] | undefined;
92
+ getCalibrationRatio(): number;
93
+ getResolvedInstructionOverhead(): number | undefined;
94
+ getToolCount(): number;
83
95
  /**
84
96
  * Get all run steps, optionally filtered by agent ID
85
97
  */
@@ -97,32 +109,15 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
97
109
  * Returns a map where key is the contentPart index and value is the agentId
98
110
  */
99
111
  getContentPartAgentMap(): Map<number, string>;
100
- createSystemRunnable({ provider, clientOptions, instructions, additional_instructions, }: {
101
- provider?: Providers;
102
- clientOptions?: t.ClientOptions;
103
- instructions?: string;
104
- additional_instructions?: string;
105
- }): t.SystemRunnable | undefined;
106
112
  initializeTools({ currentTools, currentToolMap, agentContext, }: {
107
113
  currentTools?: t.GraphTools;
108
114
  currentToolMap?: t.ToolMap;
109
115
  agentContext?: AgentContext;
110
116
  }): CustomToolNode<t.BaseGraphState> | ToolNode<t.BaseGraphState>;
111
- initializeModel({ provider, tools, clientOptions, }: {
112
- provider: Providers;
113
- tools?: t.GraphTools;
114
- clientOptions?: t.ClientOptions;
115
- }): Runnable;
116
117
  overrideTestModel(responses: string[], sleep?: number, toolCalls?: ToolCall[]): void;
117
- getNewModel({ provider, clientOptions, }: {
118
- provider: Providers;
119
- clientOptions?: t.ClientOptions;
120
- }): t.ChatModelInstance;
121
118
  getUsageMetadata(finalMessage?: BaseMessage): Partial<UsageMetadata> | undefined;
122
- /** Execute model invocation with streaming support */
123
- private attemptInvoke;
124
119
  cleanupSignalListener(currentModel?: t.ChatModel): void;
125
- createCallModel(agentId?: string): (state: t.BaseGraphState, config?: RunnableConfig) => Promise<Partial<t.BaseGraphState>>;
120
+ createCallModel(agentId?: string): (state: t.AgentSubgraphState, config?: RunnableConfig) => Promise<Partial<t.AgentSubgraphState>>;
126
121
  createAgentNode(agentId: string): t.CompiledAgentWorfklow;
127
122
  createWorkflow(): t.CompiledStateWorkflow;
128
123
  /**
@@ -4,6 +4,7 @@ export * from './splitStream';
4
4
  export * from './events';
5
5
  export * from './messages';
6
6
  export * from './graphs';
7
+ export * from './summarization';
7
8
  export * from './tools/Calculator';
8
9
  export * from './tools/CodeExecutor';
9
10
  export * from './tools/ProgrammaticToolCalling';
@@ -18,3 +19,7 @@ export type * from './types';
18
19
  export { CustomOpenAIClient } from './llm/openai';
19
20
  export { ChatOpenRouter } from './llm/openrouter';
20
21
  export type { OpenRouterReasoning, OpenRouterReasoningEffort, ChatOpenRouterCallOptions, } from './llm/openrouter';
22
+ export { getChatModelClass } from './llm/providers';
23
+ export { initializeModel } from './llm/init';
24
+ export { attemptInvoke, tryFallbackProviders } from './llm/invoke';
25
+ export { isThinkingEnabled, getMaxOutputTokensKey } from './llm/request';
@@ -0,0 +1,18 @@
1
+ import type { Runnable } from '@langchain/core/runnables';
2
+ import type * as t from '@/types';
3
+ import { Providers } from '@/common';
4
+ /**
5
+ * Creates a chat model instance for a given provider, applies provider-specific
6
+ * field assignments, and optionally binds tools.
7
+ *
8
+ * This is the single entry point for model creation across the codebase — used
9
+ * by both the agent graph (main LLM) and the summarization node (compaction LLM).
10
+ * An optional `override` model can be passed to skip construction entirely
11
+ * (useful for cached/reused model instances or test fakes).
12
+ */
13
+ export declare function initializeModel({ provider, clientOptions, tools, override, }: {
14
+ provider: Providers;
15
+ clientOptions?: t.ClientOptions;
16
+ tools?: t.GraphTools;
17
+ override?: t.ChatModelInstance;
18
+ }): Runnable;
@@ -0,0 +1,48 @@
1
+ import { AIMessageChunk } from '@langchain/core/messages';
2
+ import type { RunnableConfig } from '@langchain/core/runnables';
3
+ import type { BaseMessage } from '@langchain/core/messages';
4
+ import type * as t from '@/types';
5
+ import { ChatModelStreamHandler } from '@/stream';
6
+ import { Providers } from '@/common';
7
+ /**
8
+ * Context passed to `attemptInvoke` for the default stream handler.
9
+ * Matches the subset of Graph that `ChatModelStreamHandler.handle` needs.
10
+ */
11
+ export type InvokeContext = Parameters<ChatModelStreamHandler['handle']>[3];
12
+ /**
13
+ * Per-chunk callback for custom stream processing.
14
+ * When provided, replaces the default `ChatModelStreamHandler`.
15
+ */
16
+ export type OnChunk = (chunk: AIMessageChunk) => void | Promise<void>;
17
+ /**
18
+ * Invokes a chat model with the given messages, handling both streaming and
19
+ * non-streaming paths.
20
+ *
21
+ * By default, stream chunks are processed through a `ChatModelStreamHandler`
22
+ * that dispatches run steps (MESSAGE_CREATION, TOOL_CALLS) for the graph.
23
+ * Pass an `onChunk` callback to override this with custom chunk processing
24
+ * (e.g. summarization delta events).
25
+ */
26
+ export declare function attemptInvoke({ model, messages, provider, context, onChunk, }: {
27
+ model: t.ChatModel;
28
+ messages: BaseMessage[];
29
+ provider: Providers;
30
+ context?: InvokeContext;
31
+ onChunk?: OnChunk;
32
+ }, config?: RunnableConfig): Promise<Partial<t.BaseGraphState>>;
33
+ /**
34
+ * Attempts each fallback provider in order until one succeeds.
35
+ * Throws the last error if all fallbacks fail.
36
+ */
37
+ export declare function tryFallbackProviders({ fallbacks, tools, messages, config, primaryError, context, onChunk, }: {
38
+ fallbacks: Array<{
39
+ provider: Providers;
40
+ clientOptions?: t.ClientOptions;
41
+ }>;
42
+ tools?: t.GraphTools;
43
+ messages: BaseMessage[];
44
+ config?: RunnableConfig;
45
+ primaryError: unknown;
46
+ context?: InvokeContext;
47
+ onChunk?: OnChunk;
48
+ }): Promise<Partial<t.BaseGraphState> | undefined>;
@@ -0,0 +1,14 @@
1
+ import type * as t from '@/types';
2
+ import { Providers } from '@/common';
3
+ /**
4
+ * Returns true when the provider + clientOptions indicate extended thinking
5
+ * is enabled. Works across Anthropic (direct), Bedrock (additionalModelRequestFields),
6
+ * and OpenAI-compat (modelKwargs.thinking).
7
+ */
8
+ export declare function isThinkingEnabled(provider: Providers, clientOptions?: t.ClientOptions): boolean;
9
+ /**
10
+ * Returns the correct key for setting max output tokens on the model
11
+ * constructor options. Google/Vertex use `maxOutputTokens`, all others
12
+ * use `maxTokens`.
13
+ */
14
+ export declare function getMaxOutputTokensKey(provider: Providers | string): 'maxOutputTokens' | 'maxTokens';
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Position-based context pruning for tool results.
3
+ *
4
+ * Uses position-based age: the distance of a message
5
+ * from the conversation end as a fraction of total messages.
6
+ *
7
+ * Two degradation levels:
8
+ * - Soft-trim: Keep head + tail of tool result content, drop middle.
9
+ * - Hard-clear: Replace entire content with a placeholder.
10
+ *
11
+ * Messages in the "protected zone" (recent assistant turns, system/pre-first-human
12
+ * messages, and messages with image content) are never pruned.
13
+ */
14
+ import { type BaseMessage } from '@langchain/core/messages';
15
+ import type { ContextPruningConfig } from '@/types/graph';
16
+ import type { TokenCounter } from '@/types/run';
17
+ import type { ContextPruningSettings } from './contextPruningSettings';
18
+ export interface ContextPruningResult {
19
+ /** Number of messages that were soft-trimmed. */
20
+ softTrimmed: number;
21
+ /** Number of messages that were hard-cleared. */
22
+ hardCleared: number;
23
+ }
24
+ /**
25
+ * Applies position-based context pruning to tool result messages.
26
+ *
27
+ * Modifies messages in-place and updates indexTokenCountMap with recounted
28
+ * token values for modified messages.
29
+ *
30
+ * @param params.messages - The full message array (modified in-place).
31
+ * @param params.indexTokenCountMap - Token count map (updated in-place).
32
+ * @param params.tokenCounter - Function to recount tokens after modification.
33
+ * @param params.config - Partial context pruning config (merged with defaults).
34
+ * @returns Counts of soft-trimmed and hard-cleared messages.
35
+ */
36
+ export declare function applyContextPruning(params: {
37
+ messages: BaseMessage[];
38
+ indexTokenCountMap: Record<string, number | undefined>;
39
+ tokenCounter: TokenCounter;
40
+ config?: ContextPruningConfig;
41
+ resolvedSettings?: ContextPruningSettings;
42
+ }): ContextPruningResult;
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Default settings for position-based context pruning.
3
+ *
4
+ * These are merged with user-provided overrides so any subset can be customized.
5
+ */
6
+ export interface ContextPruningSettings {
7
+ /** Whether position-based pruning is enabled. Default: false (opt-in). */
8
+ enabled: boolean;
9
+ /** Number of recent assistant turns to protect from pruning. Default: 3 */
10
+ keepLastAssistants: number;
11
+ /** Age ratio (0-1) at which soft-trim fires. Default: 0.3 */
12
+ softTrimRatio: number;
13
+ /** Age ratio (0-1) at which hard-clear fires. Default: 0.5 */
14
+ hardClearRatio: number;
15
+ /** Minimum tool result size (chars) before pruning applies. Default: 50000 */
16
+ minPrunableToolChars: number;
17
+ softTrim: {
18
+ /** Maximum total chars after soft-trim. Default: 4000 */
19
+ maxChars: number;
20
+ /** Head portion to keep. Default: 1500 */
21
+ headChars: number;
22
+ /** Tail portion to keep. Default: 1500 */
23
+ tailChars: number;
24
+ };
25
+ hardClear: {
26
+ /** Whether hard-clear is enabled. Default: true */
27
+ enabled: boolean;
28
+ /** Placeholder text for hard-cleared content. */
29
+ placeholder: string;
30
+ };
31
+ }
32
+ export declare const DEFAULT_CONTEXT_PRUNING_SETTINGS: ContextPruningSettings;
33
+ /**
34
+ * Merges user-provided partial overrides with the defaults.
35
+ */
36
+ export declare function resolveContextPruningSettings(overrides?: Partial<{
37
+ enabled?: boolean;
38
+ keepLastAssistants?: number;
39
+ softTrimRatio?: number;
40
+ hardClearRatio?: number;
41
+ minPrunableToolChars?: number;
42
+ softTrim?: Partial<ContextPruningSettings['softTrim']>;
43
+ hardClear?: Partial<ContextPruningSettings['hardClear']>;
44
+ }>): ContextPruningSettings;
@@ -1,4 +1,4 @@
1
- import { AIMessageChunk, HumanMessage, ToolMessage, AIMessage, BaseMessage } from '@langchain/core/messages';
1
+ import { AIMessage, BaseMessage, ToolMessage, HumanMessage, AIMessageChunk } from '@langchain/core/messages';
2
2
  import type * as t from '@/types';
3
3
  import { Providers } from '@/common';
4
4
  export declare function getConverseOverrideMessage({ userMessage, lastMessageX, lastMessageY, }: {