@librechat/agents 3.1.57 → 3.1.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +3 -3
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3827 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -0,0 +1,659 @@
1
+ import { ToolMessage, HumanMessage, SystemMessage, AIMessage } from '@langchain/core/messages';
2
+ import { ContentTypes, StepTypes, GraphEvents } from '../common/enum.mjs';
3
+ import { emitAgentLog, safeDispatchCustomEvent } from '../utils/events.mjs';
4
+ import { tryFallbackProviders, attemptInvoke } from '../llm/invoke.mjs';
5
+ import { createRemoveAllMessage } from '../messages/reducer.mjs';
6
+ import { getMaxOutputTokensKey } from '../llm/request.mjs';
7
+ import { addCacheControl } from '../messages/cache.mjs';
8
+ import { initializeModel } from '../llm/init.mjs';
9
+ import { getChunkContent } from '../stream.mjs';
10
+
11
+ const SUMMARIZATION_PARAM_KEYS = new Set(['maxSummaryTokens']);
12
+ /**
13
+ * Token overhead of the XML wrapper + instruction text added around the
14
+ * summary at injection time in AgentContext.buildSystemRunnable:
15
+ * `<summary>\n${text}\n</summary>\n\nYour context window was compacted...`
16
+ * ~33 tokens on Anthropic, ~24-27 on OpenAI. Using 33 as a safe ceiling.
17
+ */
18
+ const SUMMARY_WRAPPER_OVERHEAD_TOKENS = 33;
19
+ /** Structured checkpoint prompt for fresh summarization (no prior summary). */
20
+ const DEFAULT_SUMMARIZATION_PROMPT = `Hold on, before you continue I need you to write me a checkpoint of everything so far. Your context window is filling up and this checkpoint replaces the messages above, so capture everything you need to pick right back up.
21
+
22
+ Don't second-guess or fact-check anything you did, your tool results reflect exactly what happened. Just record what you did and what you observed. Only the checkpoint, don't respond to me or continue the conversation.
23
+
24
+ ## Checkpoint
25
+
26
+ ## Goal
27
+ What I asked you to do and any sub-goals you identified.
28
+
29
+ ## Constraints & Preferences
30
+ Any rules, preferences, or configuration I established.
31
+
32
+ ## Progress
33
+ ### Done
34
+ - What you completed and the outcomes
35
+
36
+ ### In Progress
37
+ - What you're currently working on
38
+
39
+ ## Key Decisions
40
+ Decisions you made and why.
41
+
42
+ ## Next Steps
43
+ Concrete task actions remaining, in priority order.
44
+
45
+ ## Critical Context
46
+ Exact identifiers, names, error messages, URLs, and details you need to preserve verbatim.
47
+
48
+ Rules:
49
+ - Record what you did and observed, don't judge or re-evaluate it
50
+ - For each tool call: the tool name, key inputs, and the outcome
51
+ - Preserve exact identifiers, names, errors, and references verbatim
52
+ - Short declarative sentences
53
+ - Skip empty sections`;
54
+ /** Prompt for re-compaction when a prior summary exists. */
55
+ const DEFAULT_UPDATE_SUMMARIZATION_PROMPT = `Hold on again, update your checkpoint. Merge the new messages into your existing checkpoint and give me a single consolidated replacement.
56
+
57
+ Keep it roughly the same length as your last checkpoint. Compress older details to make room for what's new, don't just append. Give recent actions more detail, compress older items to one-liners.
58
+
59
+ Don't fact-check or second-guess anything, your tool results are ground truth. Only the checkpoint, don't respond to me or continue the conversation.
60
+
61
+ Rules:
62
+ - Merge new progress into existing sections, don't duplicate headers
63
+ - Compress older completed items into one-line entries
64
+ - Move items from "In Progress" to "Done" when you completed them
65
+ - Update "Next Steps" to reflect current task priorities.
66
+ - For each new tool call: the tool name, key inputs, and the outcome
67
+ - Preserve exact identifiers, names, errors, and references verbatim
68
+ - Skip empty sections`;
69
+ function separateParameters(parameters) {
70
+ const llmParams = {};
71
+ let maxSummaryTokens;
72
+ for (const [key, value] of Object.entries(parameters)) {
73
+ if (SUMMARIZATION_PARAM_KEYS.has(key)) {
74
+ if (key === 'maxSummaryTokens' &&
75
+ typeof value === 'number' &&
76
+ value > 0) {
77
+ maxSummaryTokens = value;
78
+ }
79
+ }
80
+ else {
81
+ llmParams[key] = value;
82
+ }
83
+ }
84
+ return { llmParams, maxSummaryTokens };
85
+ }
86
+ /**
87
+ * Generates a structural metadata summary without making an LLM call.
88
+ * Used as a last-resort fallback when all summarization attempts fail.
89
+ * Preserves tool names and message counts so the agent retains basic context.
90
+ */
91
+ function generateMetadataStub(messages) {
92
+ const counts = {};
93
+ const toolNames = new Set();
94
+ for (const msg of messages) {
95
+ const role = msg.getType();
96
+ counts[role] = (counts[role] ?? 0) + 1;
97
+ if (role === 'tool' && msg.name != null && msg.name !== '') {
98
+ toolNames.add(msg.name);
99
+ }
100
+ if (role === 'ai' &&
101
+ msg instanceof AIMessage &&
102
+ msg.tool_calls &&
103
+ msg.tool_calls.length > 0) {
104
+ for (const tc of msg.tool_calls) {
105
+ toolNames.add(tc.name);
106
+ }
107
+ }
108
+ }
109
+ const countParts = Object.entries(counts)
110
+ .map(([role, count]) => `${count} ${role}`)
111
+ .join(', ');
112
+ const lines = [
113
+ `[Metadata summary: ${messages.length} messages (${countParts})]`,
114
+ ];
115
+ if (toolNames.size > 0) {
116
+ lines.push(`[Tools used: ${Array.from(toolNames).join(', ')}]`);
117
+ }
118
+ return lines.join('\n');
119
+ }
120
+ /** Maximum number of tool failures to include in the enrichment section. */
121
+ const MAX_TOOL_FAILURES = 8;
122
+ /** Maximum chars per failure summary line. */
123
+ const MAX_TOOL_FAILURE_CHARS = 240;
124
+ /**
125
+ * Extracts failed tool results from messages and formats them as a structured
126
+ * section. LLMs often omit specific failure details (exit codes, error messages)
127
+ * from their summaries, this mechanical enrichment guarantees they survive.
128
+ */
129
+ function extractToolFailuresSection(messages) {
130
+ const failures = [];
131
+ const seen = new Set();
132
+ for (const msg of messages) {
133
+ if (msg.getType() !== 'tool') {
134
+ continue;
135
+ }
136
+ const toolMsg = msg;
137
+ if (toolMsg.status !== 'error') {
138
+ continue;
139
+ }
140
+ // Deduplicate by tool_call_id
141
+ const callId = toolMsg.tool_call_id;
142
+ if (callId && seen.has(callId)) {
143
+ continue;
144
+ }
145
+ if (callId) {
146
+ seen.add(callId);
147
+ }
148
+ const toolName = toolMsg.name ?? 'tool';
149
+ const content = typeof toolMsg.content === 'string'
150
+ ? toolMsg.content
151
+ : JSON.stringify(toolMsg.content);
152
+ const normalized = content.replace(/\s+/g, ' ').trim();
153
+ const summary = normalized.length > MAX_TOOL_FAILURE_CHARS
154
+ ? `${normalized.slice(0, MAX_TOOL_FAILURE_CHARS - 3)}...`
155
+ : normalized;
156
+ failures.push({ toolName, summary });
157
+ }
158
+ if (failures.length === 0) {
159
+ return '';
160
+ }
161
+ const lines = failures
162
+ .slice(0, MAX_TOOL_FAILURES)
163
+ .map((f) => `- ${f.toolName}: ${f.summary}`);
164
+ if (failures.length > MAX_TOOL_FAILURES) {
165
+ lines.push(`- ...and ${failures.length - MAX_TOOL_FAILURES} more`);
166
+ }
167
+ return `\n\n## Tool Failures\n${lines.join('\n')}`;
168
+ }
169
+ /**
170
+ * Appends mechanical enrichment sections to an LLM-generated summary.
171
+ * Tool failures are appended verbatim because LLMs often omit specific
172
+ * error details from their summaries.
173
+ */
174
+ function enrichSummary(summaryText, messages) {
175
+ return summaryText + extractToolFailuresSection(messages);
176
+ }
177
+ /**
178
+ * Restores pre-masking tool content onto the messages array using
179
+ * `pendingOriginalToolContent` stored on AgentContext. Only allocates
180
+ * a new array when there are entries to restore; otherwise returns the
181
+ * input reference unchanged.
182
+ */
183
+ function restoreOriginalToolContent(messages, originalToolContent) {
184
+ if (originalToolContent == null || originalToolContent.size === 0) {
185
+ return messages;
186
+ }
187
+ const restored = [...messages];
188
+ for (const [idx, content] of originalToolContent) {
189
+ const msg = restored[idx];
190
+ if (msg instanceof ToolMessage) {
191
+ restored[idx] = new ToolMessage({
192
+ content,
193
+ tool_call_id: msg.tool_call_id,
194
+ name: msg.name,
195
+ id: msg.id,
196
+ additional_kwargs: msg.additional_kwargs,
197
+ response_metadata: msg.response_metadata,
198
+ });
199
+ }
200
+ }
201
+ return restored;
202
+ }
203
+ /** Assembles the summarization model's client options from agent and config. */
204
+ function buildSummarizationClientConfig(agentContext, summarizationConfig) {
205
+ const provider = (summarizationConfig?.provider ??
206
+ agentContext.provider);
207
+ const modelName = summarizationConfig?.model;
208
+ const parameters = summarizationConfig?.parameters ?? {};
209
+ const promptText = summarizationConfig?.prompt ?? DEFAULT_SUMMARIZATION_PROMPT;
210
+ const updatePromptText = summarizationConfig?.updatePrompt ?? DEFAULT_UPDATE_SUMMARIZATION_PROMPT;
211
+ const { llmParams, maxSummaryTokens: paramMaxSummaryTokens } = separateParameters(parameters);
212
+ const isSelfSummarize = provider === agentContext.provider;
213
+ const baseOptions = isSelfSummarize && agentContext.clientOptions
214
+ ? { ...agentContext.clientOptions }
215
+ : {};
216
+ const clientOptions = {
217
+ ...baseOptions,
218
+ ...llmParams,
219
+ };
220
+ if (modelName != null && modelName !== '') {
221
+ clientOptions.model = modelName;
222
+ clientOptions.modelName = modelName;
223
+ }
224
+ const effectiveMaxSummaryTokens = paramMaxSummaryTokens ?? summarizationConfig?.maxSummaryTokens;
225
+ if (effectiveMaxSummaryTokens != null) {
226
+ clientOptions[getMaxOutputTokensKey(provider)] = effectiveMaxSummaryTokens;
227
+ }
228
+ return {
229
+ provider,
230
+ modelName,
231
+ clientOptions,
232
+ effectiveMaxSummaryTokens,
233
+ promptText,
234
+ updatePromptText,
235
+ };
236
+ }
237
+ /** Computes the token count for a summary, preferring provider output tokens when available. */
238
+ function computeSummaryTokenCount(summaryText, summaryUsage, tokenCounter) {
239
+ const providerOutputTokens = Number(summaryUsage?.output_tokens) || 0;
240
+ if (providerOutputTokens > 0) {
241
+ return providerOutputTokens + SUMMARY_WRAPPER_OVERHEAD_TOKENS;
242
+ }
243
+ if (tokenCounter) {
244
+ return (tokenCounter(new SystemMessage(summaryText)) +
245
+ SUMMARY_WRAPPER_OVERHEAD_TOKENS);
246
+ }
247
+ return 0;
248
+ }
249
+ /** Constructs the SummaryContentBlock persisted in the run step and dispatched to events. */
250
+ function buildSummaryBlock(params) {
251
+ return {
252
+ type: ContentTypes.SUMMARY,
253
+ content: [
254
+ {
255
+ type: ContentTypes.TEXT,
256
+ text: params.summaryText,
257
+ },
258
+ ],
259
+ tokenCount: params.tokenCount,
260
+ summaryVersion: params.summaryVersion,
261
+ boundary: {
262
+ messageId: params.stepId,
263
+ contentIndex: params.stepIndex,
264
+ },
265
+ model: params.modelName,
266
+ provider: params.provider,
267
+ createdAt: new Date().toISOString(),
268
+ };
269
+ }
270
+ /**
271
+ * Runs the summarization LLM call with primary + fallback providers,
272
+ * falling back to a metadata stub when all calls fail.
273
+ */
274
+ async function executeSummarizationWithFallback(params) {
275
+ const { agentContext, messages, clientConfig, summarizeConfig, stepId, usePromptCache, log, } = params;
276
+ const summarizationModel = initializeModel({
277
+ provider: clientConfig.provider,
278
+ clientOptions: clientConfig.clientOptions,
279
+ tools: agentContext.getToolsForBinding(),
280
+ });
281
+ const priorSummaryText = agentContext.getSummaryText()?.trim() ?? '';
282
+ let summaryText = '';
283
+ let summaryUsage;
284
+ try {
285
+ const result = await summarizeWithCacheHit({
286
+ model: summarizationModel,
287
+ messages,
288
+ promptText: clientConfig.promptText,
289
+ updatePromptText: clientConfig.updatePromptText,
290
+ priorSummaryText,
291
+ config: summarizeConfig,
292
+ stepId,
293
+ provider: clientConfig.provider,
294
+ reasoningKey: agentContext.reasoningKey,
295
+ usePromptCache,
296
+ log,
297
+ });
298
+ summaryText = result.text;
299
+ summaryUsage = result.usage;
300
+ }
301
+ catch (primaryError) {
302
+ log('error', 'Summarization LLM call failed', {
303
+ error: primaryError instanceof Error
304
+ ? primaryError.message
305
+ : String(primaryError),
306
+ provider: clientConfig.provider,
307
+ model: clientConfig.modelName,
308
+ messagesToRefineCount: messages.length,
309
+ });
310
+ const fallbacks = clientConfig.clientOptions
311
+ ?.fallbacks ?? [];
312
+ if (fallbacks.length > 0) {
313
+ try {
314
+ const onChunk = createSummarizationChunkHandler({
315
+ stepId,
316
+ config: traceConfig(summarizeConfig, 'cache_hit_compaction'),
317
+ provider: clientConfig.provider,
318
+ reasoningKey: agentContext.reasoningKey,
319
+ });
320
+ const fbResult = await tryFallbackProviders({
321
+ fallbacks,
322
+ tools: agentContext.getToolsForBinding(),
323
+ messages: [
324
+ ...messages,
325
+ new HumanMessage(buildSummarizationInstruction(clientConfig.promptText, clientConfig.updatePromptText, priorSummaryText)),
326
+ ],
327
+ config: traceConfig(summarizeConfig, 'cache_hit_compaction'),
328
+ primaryError,
329
+ onChunk,
330
+ });
331
+ const fbMsg = fbResult?.messages?.[0];
332
+ if (fbMsg) {
333
+ summaryText = extractResponseText(fbMsg);
334
+ }
335
+ }
336
+ catch (fbErr) {
337
+ log('warn', 'Fallback providers also failed', {
338
+ error: fbErr instanceof Error ? fbErr.message : String(fbErr),
339
+ });
340
+ }
341
+ }
342
+ if (!summaryText) {
343
+ log('warn', 'Summarization failed, falling back to metadata stub', {
344
+ error: primaryError instanceof Error
345
+ ? primaryError.message
346
+ : String(primaryError),
347
+ });
348
+ summaryText = generateMetadataStub(messages);
349
+ }
350
+ }
351
+ return { text: summaryText, usage: summaryUsage };
352
+ }
353
+ /** Dispatches run step completion, ON_SUMMARIZE_COMPLETE, and rebuilds token map. */
354
+ async function dispatchCompletionEvents(params) {
355
+ const { graph, runnableConfig, stepId, summaryBlock, agentContext, runStep, summaryUsage, agentId, } = params;
356
+ runStep.summary = summaryBlock;
357
+ if (summaryUsage) {
358
+ runStep.usage = {
359
+ prompt_tokens: Number(summaryUsage.input_tokens) || 0,
360
+ completion_tokens: Number(summaryUsage.output_tokens) || 0,
361
+ total_tokens: (Number(summaryUsage.input_tokens) || 0) +
362
+ (Number(summaryUsage.output_tokens) || 0),
363
+ };
364
+ }
365
+ await graph.dispatchRunStepCompleted(stepId, { type: 'summary', summary: summaryBlock }, runnableConfig);
366
+ if (runnableConfig) {
367
+ await safeDispatchCustomEvent(GraphEvents.ON_SUMMARIZE_COMPLETE, {
368
+ id: stepId,
369
+ agentId,
370
+ summary: summaryBlock,
371
+ }, runnableConfig);
372
+ }
373
+ agentContext.rebuildTokenMapAfterSummarization({});
374
+ }
375
+ function createSummarizeNode({ agentContext, graph, generateStepId, }) {
376
+ return async (state, config) => {
377
+ const request = state.summarizationRequest;
378
+ if (request == null) {
379
+ return { summarizationRequest: undefined };
380
+ }
381
+ const maxCtx = agentContext.maxContextTokens ?? 0;
382
+ if (maxCtx > 0 && agentContext.instructionTokens >= maxCtx) {
383
+ emitAgentLog(config, 'warn', 'summarize', 'Summarization skipped, instructions exceed context budget. Reduce the number of tools or increase maxContextTokens.', {
384
+ instructionTokens: agentContext.instructionTokens,
385
+ maxContextTokens: maxCtx,
386
+ breakdown: agentContext.formatTokenBudgetBreakdown(),
387
+ }, { runId: graph.runId, agentId: request.agentId });
388
+ return { summarizationRequest: undefined };
389
+ }
390
+ const messagesToRefine = restoreOriginalToolContent(state.messages, agentContext.pendingOriginalToolContent);
391
+ agentContext.pendingOriginalToolContent = undefined;
392
+ const clientConfig = buildSummarizationClientConfig(agentContext, agentContext.summarizationConfig);
393
+ const runnableConfig = config ?? graph.config;
394
+ const stepKey = `summarize-${request.agentId}`;
395
+ const [stepId, stepIndex] = generateStepId(stepKey);
396
+ const placeholderSummary = {
397
+ type: ContentTypes.SUMMARY,
398
+ model: clientConfig.modelName,
399
+ provider: clientConfig.provider,
400
+ };
401
+ const runStep = {
402
+ stepIndex,
403
+ id: stepId,
404
+ type: StepTypes.MESSAGE_CREATION,
405
+ index: graph.contentData.length,
406
+ stepDetails: {
407
+ type: StepTypes.MESSAGE_CREATION,
408
+ message_creation: { message_id: stepId },
409
+ },
410
+ summary: placeholderSummary,
411
+ usage: null,
412
+ };
413
+ if (graph.runId != null && graph.runId !== '') {
414
+ runStep.runId = graph.runId;
415
+ }
416
+ if (graph.isMultiAgent && agentContext.agentId) {
417
+ runStep.agentId = agentContext.agentId;
418
+ }
419
+ await graph.dispatchRunStep(runStep, runnableConfig);
420
+ if (runnableConfig) {
421
+ await safeDispatchCustomEvent(GraphEvents.ON_SUMMARIZE_START, {
422
+ agentId: request.agentId,
423
+ provider: clientConfig.provider,
424
+ model: clientConfig.modelName,
425
+ messagesToRefineCount: messagesToRefine.length,
426
+ summaryVersion: agentContext.summaryVersion + 1,
427
+ }, runnableConfig);
428
+ }
429
+ const isSelfSummarizeModel = clientConfig.provider === agentContext.provider;
430
+ const hasPromptCache = isSelfSummarizeModel &&
431
+ agentContext.clientOptions
432
+ ?.promptCache === true;
433
+ const log = (level, message, data) => {
434
+ emitAgentLog(runnableConfig, level, 'summarize', message, data, {
435
+ runId: graph.runId,
436
+ agentId: request.agentId,
437
+ });
438
+ };
439
+ log('debug', 'Summarization starting', {
440
+ messagesToRefineCount: messagesToRefine.length,
441
+ hasPriorSummary: (agentContext.getSummaryText()?.trim() ?? '') !== '',
442
+ summaryVersion: agentContext.summaryVersion + 1,
443
+ isSelfSummarize: isSelfSummarizeModel,
444
+ hasPromptCache,
445
+ provider: clientConfig.provider,
446
+ });
447
+ const summarizeConfig = config
448
+ ? {
449
+ ...config,
450
+ metadata: {
451
+ ...config.metadata,
452
+ agent_id: request.agentId,
453
+ summarization_provider: clientConfig.provider,
454
+ summarization_model: clientConfig.modelName,
455
+ },
456
+ }
457
+ : undefined;
458
+ const { text: rawText, usage: summaryUsage } = await executeSummarizationWithFallback({
459
+ agentContext,
460
+ messages: messagesToRefine,
461
+ clientConfig,
462
+ summarizeConfig,
463
+ stepId,
464
+ usePromptCache: isSelfSummarizeModel && hasPromptCache,
465
+ log,
466
+ });
467
+ if (!rawText) {
468
+ agentContext.markSummarizationTriggered(0);
469
+ if (runnableConfig) {
470
+ await safeDispatchCustomEvent(GraphEvents.ON_SUMMARIZE_COMPLETE, {
471
+ id: stepId,
472
+ agentId: request.agentId,
473
+ error: 'Summarization produced empty output',
474
+ }, runnableConfig);
475
+ }
476
+ return { summarizationRequest: undefined };
477
+ }
478
+ const summaryText = enrichSummary(rawText, messagesToRefine);
479
+ const tokenCount = computeSummaryTokenCount(summaryText, summaryUsage, agentContext.tokenCounter);
480
+ agentContext.setSummary(summaryText, tokenCount);
481
+ log('info', 'Summary persisted');
482
+ log('debug', 'Summary details', {
483
+ summaryTokens: tokenCount,
484
+ textLength: summaryText.length,
485
+ messagesCompacted: messagesToRefine.length,
486
+ summaryVersion: agentContext.summaryVersion,
487
+ ...(summaryUsage != null
488
+ ? {
489
+ input_tokens: summaryUsage.input_tokens,
490
+ output_tokens: summaryUsage.output_tokens,
491
+ cache_read: summaryUsage.input_token_details?.cache_read,
492
+ cache_creation: summaryUsage.input_token_details?.cache_creation,
493
+ }
494
+ : {}),
495
+ });
496
+ const summaryBlock = buildSummaryBlock({
497
+ summaryText,
498
+ tokenCount,
499
+ stepId,
500
+ stepIndex: runStep.index,
501
+ modelName: clientConfig.modelName,
502
+ provider: clientConfig.provider,
503
+ summaryVersion: agentContext.summaryVersion,
504
+ });
505
+ await dispatchCompletionEvents({
506
+ graph,
507
+ runnableConfig,
508
+ stepId,
509
+ summaryBlock,
510
+ agentContext,
511
+ runStep,
512
+ summaryUsage,
513
+ agentId: request.agentId,
514
+ });
515
+ return {
516
+ summarizationRequest: undefined,
517
+ messages: [createRemoveAllMessage()],
518
+ };
519
+ };
520
+ }
521
+ /** Extracts text from an LLM response, skipping reasoning/thinking blocks. */
522
+ function extractResponseText(response) {
523
+ const { content } = response;
524
+ if (typeof content === 'string') {
525
+ return content.trim();
526
+ }
527
+ if (!Array.isArray(content)) {
528
+ return '';
529
+ }
530
+ const parts = [];
531
+ for (const block of content) {
532
+ if (typeof block === 'string') {
533
+ parts.push(block);
534
+ continue;
535
+ }
536
+ if (block == null || typeof block !== 'object') {
537
+ continue;
538
+ }
539
+ const rec = block;
540
+ if (rec.type === ContentTypes.THINKING ||
541
+ rec.type === ContentTypes.REASONING_CONTENT ||
542
+ rec.type === 'redacted_thinking') {
543
+ continue;
544
+ }
545
+ if (rec.type === 'text' && typeof rec.text === 'string') {
546
+ parts.push(rec.text);
547
+ }
548
+ }
549
+ return parts.join('').trim();
550
+ }
551
+ function buildSummarizationInstruction(promptText, updatePromptText, priorSummaryText) {
552
+ const effectivePrompt = priorSummaryText
553
+ ? (updatePromptText ?? promptText)
554
+ : promptText;
555
+ const parts = [effectivePrompt];
556
+ if (priorSummaryText) {
557
+ parts.push(`\n\n<previous-summary>\n${priorSummaryText}\n</previous-summary>`);
558
+ }
559
+ return parts.join('');
560
+ }
561
+ /** Creates an `onChunk` callback that dispatches `ON_SUMMARIZE_DELTA` events for streaming. */
562
+ function createSummarizationChunkHandler({ stepId, config, provider, reasoningKey = 'reasoning_content', }) {
563
+ if (stepId == null || stepId === '' || !config) {
564
+ return undefined;
565
+ }
566
+ return (chunk) => {
567
+ const chunkAny = chunk;
568
+ const raw = getChunkContent({ chunk: chunkAny, provider, reasoningKey });
569
+ if (raw == null || (typeof raw === 'string' && !raw)) {
570
+ return;
571
+ }
572
+ const contentBlocks = typeof raw === 'string'
573
+ ? [{ type: ContentTypes.TEXT, text: raw }]
574
+ : raw;
575
+ safeDispatchCustomEvent(GraphEvents.ON_SUMMARIZE_DELTA, {
576
+ id: stepId,
577
+ delta: {
578
+ summary: {
579
+ type: ContentTypes.SUMMARY,
580
+ content: contentBlocks,
581
+ provider: String(config.metadata?.summarization_provider ?? ''),
582
+ model: String(config.metadata?.summarization_model ?? ''),
583
+ },
584
+ },
585
+ }, config);
586
+ };
587
+ }
588
+ function traceConfig(config, stage) {
589
+ if (!config) {
590
+ return undefined;
591
+ }
592
+ return {
593
+ ...config,
594
+ runName: `summarization:${stage}`,
595
+ metadata: { ...config.metadata, summarization: true, stage },
596
+ };
597
+ }
598
+ /**
599
+ * Cache-friendly compaction: sends raw conversation messages with the
600
+ * summarization instruction appended as the final HumanMessage.
601
+ * Providers with prompt caching get a cache hit on the system prompt +
602
+ * tool definitions prefix.
603
+ */
604
+ async function summarizeWithCacheHit({ model, messages, promptText, updatePromptText, priorSummaryText, config, stepId, provider, reasoningKey, usePromptCache, log, }) {
605
+ const instruction = buildSummarizationInstruction(promptText, updatePromptText, priorSummaryText);
606
+ const fullMessages = [...messages, new HumanMessage(instruction)];
607
+ const invokeMessages = usePromptCache === true ? addCacheControl(fullMessages) : fullMessages;
608
+ const result = await attemptInvoke({
609
+ model,
610
+ messages: invokeMessages,
611
+ provider,
612
+ onChunk: createSummarizationChunkHandler({
613
+ stepId,
614
+ config: traceConfig(config, 'cache_hit_compaction'),
615
+ provider,
616
+ reasoningKey,
617
+ }),
618
+ }, traceConfig(config, 'cache_hit_compaction'));
619
+ const responseMsg = result.messages?.[0];
620
+ const text = responseMsg
621
+ ? extractResponseText(responseMsg)
622
+ : '';
623
+ let usage;
624
+ let usageSource = 'none';
625
+ if (responseMsg != null &&
626
+ 'usage_metadata' in responseMsg &&
627
+ responseMsg.usage_metadata != null) {
628
+ usage = responseMsg.usage_metadata;
629
+ usageSource = 'usage_metadata';
630
+ }
631
+ else if (responseMsg != null) {
632
+ const respMeta = responseMsg.response_metadata;
633
+ const raw = respMeta?.metadata
634
+ ?.usage;
635
+ if (raw != null) {
636
+ usage = {
637
+ input_tokens: Number(raw.inputTokens) || undefined,
638
+ output_tokens: Number(raw.outputTokens) || undefined,
639
+ };
640
+ usageSource = 'response_metadata';
641
+ }
642
+ }
643
+ const cacheDetails = usage?.input_token_details;
644
+ log?.('debug', 'Summarization LLM usage', {
645
+ source: usageSource,
646
+ input_tokens: usage?.input_tokens,
647
+ output_tokens: usage?.output_tokens,
648
+ ...(cacheDetails?.cache_read != null || cacheDetails?.cache_creation != null
649
+ ? {
650
+ 'input_token_details.cache_read': cacheDetails.cache_read,
651
+ 'input_token_details.cache_creation': cacheDetails.cache_creation,
652
+ }
653
+ : {}),
654
+ });
655
+ return { text, usage };
656
+ }
657
+
658
+ export { DEFAULT_SUMMARIZATION_PROMPT, DEFAULT_UPDATE_SUMMARIZATION_PROMPT, createSummarizeNode };
659
+ //# sourceMappingURL=node.mjs.map