@librechat/agents 3.1.57 → 3.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +1 -1
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3810 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -0,0 +1,663 @@
1
+ 'use strict';
2
+
3
+ var messages = require('@langchain/core/messages');
4
+ var _enum = require('../common/enum.cjs');
5
+ var events = require('../utils/events.cjs');
6
+ var invoke = require('../llm/invoke.cjs');
7
+ var reducer = require('../messages/reducer.cjs');
8
+ var request = require('../llm/request.cjs');
9
+ var cache = require('../messages/cache.cjs');
10
+ var init = require('../llm/init.cjs');
11
+ var stream = require('../stream.cjs');
12
+
13
+ const SUMMARIZATION_PARAM_KEYS = new Set(['maxSummaryTokens']);
14
+ /**
15
+ * Token overhead of the XML wrapper + instruction text added around the
16
+ * summary at injection time in AgentContext.buildSystemRunnable:
17
+ * `<summary>\n${text}\n</summary>\n\nYour context window was compacted...`
18
+ * ~33 tokens on Anthropic, ~24-27 on OpenAI. Using 33 as a safe ceiling.
19
+ */
20
+ const SUMMARY_WRAPPER_OVERHEAD_TOKENS = 33;
21
+ /** Structured checkpoint prompt for fresh summarization (no prior summary). */
22
+ const DEFAULT_SUMMARIZATION_PROMPT = `Hold on, before you continue I need you to write me a checkpoint of everything so far. Your context window is filling up and this checkpoint replaces the messages above, so capture everything you need to pick right back up.
23
+
24
+ Don't second-guess or fact-check anything you did, your tool results reflect exactly what happened. Just record what you did and what you observed. Only the checkpoint, don't respond to me or continue the conversation.
25
+
26
+ ## Checkpoint
27
+
28
+ ## Goal
29
+ What I asked you to do and any sub-goals you identified.
30
+
31
+ ## Constraints & Preferences
32
+ Any rules, preferences, or configuration I established.
33
+
34
+ ## Progress
35
+ ### Done
36
+ - What you completed and the outcomes
37
+
38
+ ### In Progress
39
+ - What you're currently working on
40
+
41
+ ## Key Decisions
42
+ Decisions you made and why.
43
+
44
+ ## Next Steps
45
+ Concrete task actions remaining, in priority order.
46
+
47
+ ## Critical Context
48
+ Exact identifiers, names, error messages, URLs, and details you need to preserve verbatim.
49
+
50
+ Rules:
51
+ - Record what you did and observed, don't judge or re-evaluate it
52
+ - For each tool call: the tool name, key inputs, and the outcome
53
+ - Preserve exact identifiers, names, errors, and references verbatim
54
+ - Short declarative sentences
55
+ - Skip empty sections`;
56
+ /** Prompt for re-compaction when a prior summary exists. */
57
+ const DEFAULT_UPDATE_SUMMARIZATION_PROMPT = `Hold on again, update your checkpoint. Merge the new messages into your existing checkpoint and give me a single consolidated replacement.
58
+
59
+ Keep it roughly the same length as your last checkpoint. Compress older details to make room for what's new, don't just append. Give recent actions more detail, compress older items to one-liners.
60
+
61
+ Don't fact-check or second-guess anything, your tool results are ground truth. Only the checkpoint, don't respond to me or continue the conversation.
62
+
63
+ Rules:
64
+ - Merge new progress into existing sections, don't duplicate headers
65
+ - Compress older completed items into one-line entries
66
+ - Move items from "In Progress" to "Done" when you completed them
67
+ - Update "Next Steps" to reflect current task priorities.
68
+ - For each new tool call: the tool name, key inputs, and the outcome
69
+ - Preserve exact identifiers, names, errors, and references verbatim
70
+ - Skip empty sections`;
71
+ function separateParameters(parameters) {
72
+ const llmParams = {};
73
+ let maxSummaryTokens;
74
+ for (const [key, value] of Object.entries(parameters)) {
75
+ if (SUMMARIZATION_PARAM_KEYS.has(key)) {
76
+ if (key === 'maxSummaryTokens' &&
77
+ typeof value === 'number' &&
78
+ value > 0) {
79
+ maxSummaryTokens = value;
80
+ }
81
+ }
82
+ else {
83
+ llmParams[key] = value;
84
+ }
85
+ }
86
+ return { llmParams, maxSummaryTokens };
87
+ }
88
+ /**
89
+ * Generates a structural metadata summary without making an LLM call.
90
+ * Used as a last-resort fallback when all summarization attempts fail.
91
+ * Preserves tool names and message counts so the agent retains basic context.
92
+ */
93
+ function generateMetadataStub(messages$1) {
94
+ const counts = {};
95
+ const toolNames = new Set();
96
+ for (const msg of messages$1) {
97
+ const role = msg.getType();
98
+ counts[role] = (counts[role] ?? 0) + 1;
99
+ if (role === 'tool' && msg.name != null && msg.name !== '') {
100
+ toolNames.add(msg.name);
101
+ }
102
+ if (role === 'ai' &&
103
+ msg instanceof messages.AIMessage &&
104
+ msg.tool_calls &&
105
+ msg.tool_calls.length > 0) {
106
+ for (const tc of msg.tool_calls) {
107
+ toolNames.add(tc.name);
108
+ }
109
+ }
110
+ }
111
+ const countParts = Object.entries(counts)
112
+ .map(([role, count]) => `${count} ${role}`)
113
+ .join(', ');
114
+ const lines = [
115
+ `[Metadata summary: ${messages$1.length} messages (${countParts})]`,
116
+ ];
117
+ if (toolNames.size > 0) {
118
+ lines.push(`[Tools used: ${Array.from(toolNames).join(', ')}]`);
119
+ }
120
+ return lines.join('\n');
121
+ }
122
+ /** Maximum number of tool failures to include in the enrichment section. */
123
+ const MAX_TOOL_FAILURES = 8;
124
+ /** Maximum chars per failure summary line. */
125
+ const MAX_TOOL_FAILURE_CHARS = 240;
126
+ /**
127
+ * Extracts failed tool results from messages and formats them as a structured
128
+ * section. LLMs often omit specific failure details (exit codes, error messages)
129
+ * from their summaries, this mechanical enrichment guarantees they survive.
130
+ */
131
+ function extractToolFailuresSection(messages) {
132
+ const failures = [];
133
+ const seen = new Set();
134
+ for (const msg of messages) {
135
+ if (msg.getType() !== 'tool') {
136
+ continue;
137
+ }
138
+ const toolMsg = msg;
139
+ if (toolMsg.status !== 'error') {
140
+ continue;
141
+ }
142
+ // Deduplicate by tool_call_id
143
+ const callId = toolMsg.tool_call_id;
144
+ if (callId && seen.has(callId)) {
145
+ continue;
146
+ }
147
+ if (callId) {
148
+ seen.add(callId);
149
+ }
150
+ const toolName = toolMsg.name ?? 'tool';
151
+ const content = typeof toolMsg.content === 'string'
152
+ ? toolMsg.content
153
+ : JSON.stringify(toolMsg.content);
154
+ const normalized = content.replace(/\s+/g, ' ').trim();
155
+ const summary = normalized.length > MAX_TOOL_FAILURE_CHARS
156
+ ? `${normalized.slice(0, MAX_TOOL_FAILURE_CHARS - 3)}...`
157
+ : normalized;
158
+ failures.push({ toolName, summary });
159
+ }
160
+ if (failures.length === 0) {
161
+ return '';
162
+ }
163
+ const lines = failures
164
+ .slice(0, MAX_TOOL_FAILURES)
165
+ .map((f) => `- ${f.toolName}: ${f.summary}`);
166
+ if (failures.length > MAX_TOOL_FAILURES) {
167
+ lines.push(`- ...and ${failures.length - MAX_TOOL_FAILURES} more`);
168
+ }
169
+ return `\n\n## Tool Failures\n${lines.join('\n')}`;
170
+ }
171
+ /**
172
+ * Appends mechanical enrichment sections to an LLM-generated summary.
173
+ * Tool failures are appended verbatim because LLMs often omit specific
174
+ * error details from their summaries.
175
+ */
176
+ function enrichSummary(summaryText, messages) {
177
+ return summaryText + extractToolFailuresSection(messages);
178
+ }
179
+ /**
180
+ * Restores pre-masking tool content onto the messages array using
181
+ * `pendingOriginalToolContent` stored on AgentContext. Only allocates
182
+ * a new array when there are entries to restore; otherwise returns the
183
+ * input reference unchanged.
184
+ */
185
+ function restoreOriginalToolContent(messages$1, originalToolContent) {
186
+ if (originalToolContent == null || originalToolContent.size === 0) {
187
+ return messages$1;
188
+ }
189
+ const restored = [...messages$1];
190
+ for (const [idx, content] of originalToolContent) {
191
+ const msg = restored[idx];
192
+ if (msg instanceof messages.ToolMessage) {
193
+ restored[idx] = new messages.ToolMessage({
194
+ content,
195
+ tool_call_id: msg.tool_call_id,
196
+ name: msg.name,
197
+ id: msg.id,
198
+ additional_kwargs: msg.additional_kwargs,
199
+ response_metadata: msg.response_metadata,
200
+ });
201
+ }
202
+ }
203
+ return restored;
204
+ }
205
+ /** Assembles the summarization model's client options from agent and config. */
206
+ function buildSummarizationClientConfig(agentContext, summarizationConfig) {
207
+ const provider = (summarizationConfig?.provider ??
208
+ agentContext.provider);
209
+ const modelName = summarizationConfig?.model;
210
+ const parameters = summarizationConfig?.parameters ?? {};
211
+ const promptText = summarizationConfig?.prompt ?? DEFAULT_SUMMARIZATION_PROMPT;
212
+ const updatePromptText = summarizationConfig?.updatePrompt ?? DEFAULT_UPDATE_SUMMARIZATION_PROMPT;
213
+ const { llmParams, maxSummaryTokens: paramMaxSummaryTokens } = separateParameters(parameters);
214
+ const isSelfSummarize = provider === agentContext.provider;
215
+ const baseOptions = isSelfSummarize && agentContext.clientOptions
216
+ ? { ...agentContext.clientOptions }
217
+ : {};
218
+ const clientOptions = {
219
+ ...baseOptions,
220
+ ...llmParams,
221
+ };
222
+ if (modelName != null && modelName !== '') {
223
+ clientOptions.model = modelName;
224
+ clientOptions.modelName = modelName;
225
+ }
226
+ const effectiveMaxSummaryTokens = paramMaxSummaryTokens ?? summarizationConfig?.maxSummaryTokens;
227
+ if (effectiveMaxSummaryTokens != null) {
228
+ clientOptions[request.getMaxOutputTokensKey(provider)] = effectiveMaxSummaryTokens;
229
+ }
230
+ return {
231
+ provider,
232
+ modelName,
233
+ clientOptions,
234
+ effectiveMaxSummaryTokens,
235
+ promptText,
236
+ updatePromptText,
237
+ };
238
+ }
239
+ /** Computes the token count for a summary, preferring provider output tokens when available. */
240
+ function computeSummaryTokenCount(summaryText, summaryUsage, tokenCounter) {
241
+ const providerOutputTokens = Number(summaryUsage?.output_tokens) || 0;
242
+ if (providerOutputTokens > 0) {
243
+ return providerOutputTokens + SUMMARY_WRAPPER_OVERHEAD_TOKENS;
244
+ }
245
+ if (tokenCounter) {
246
+ return (tokenCounter(new messages.SystemMessage(summaryText)) +
247
+ SUMMARY_WRAPPER_OVERHEAD_TOKENS);
248
+ }
249
+ return 0;
250
+ }
251
+ /** Constructs the SummaryContentBlock persisted in the run step and dispatched to events. */
252
+ function buildSummaryBlock(params) {
253
+ return {
254
+ type: _enum.ContentTypes.SUMMARY,
255
+ content: [
256
+ {
257
+ type: _enum.ContentTypes.TEXT,
258
+ text: params.summaryText,
259
+ },
260
+ ],
261
+ tokenCount: params.tokenCount,
262
+ summaryVersion: params.summaryVersion,
263
+ boundary: {
264
+ messageId: params.stepId,
265
+ contentIndex: params.stepIndex,
266
+ },
267
+ model: params.modelName,
268
+ provider: params.provider,
269
+ createdAt: new Date().toISOString(),
270
+ };
271
+ }
272
+ /**
273
+ * Runs the summarization LLM call with primary + fallback providers,
274
+ * falling back to a metadata stub when all calls fail.
275
+ */
276
+ async function executeSummarizationWithFallback(params) {
277
+ const { agentContext, messages: messages$1, clientConfig, summarizeConfig, stepId, usePromptCache, log, } = params;
278
+ const summarizationModel = init.initializeModel({
279
+ provider: clientConfig.provider,
280
+ clientOptions: clientConfig.clientOptions,
281
+ tools: agentContext.getToolsForBinding(),
282
+ });
283
+ const priorSummaryText = agentContext.getSummaryText()?.trim() ?? '';
284
+ let summaryText = '';
285
+ let summaryUsage;
286
+ try {
287
+ const result = await summarizeWithCacheHit({
288
+ model: summarizationModel,
289
+ messages: messages$1,
290
+ promptText: clientConfig.promptText,
291
+ updatePromptText: clientConfig.updatePromptText,
292
+ priorSummaryText,
293
+ config: summarizeConfig,
294
+ stepId,
295
+ provider: clientConfig.provider,
296
+ reasoningKey: agentContext.reasoningKey,
297
+ usePromptCache,
298
+ log,
299
+ });
300
+ summaryText = result.text;
301
+ summaryUsage = result.usage;
302
+ }
303
+ catch (primaryError) {
304
+ log('error', 'Summarization LLM call failed', {
305
+ error: primaryError instanceof Error
306
+ ? primaryError.message
307
+ : String(primaryError),
308
+ provider: clientConfig.provider,
309
+ model: clientConfig.modelName,
310
+ messagesToRefineCount: messages$1.length,
311
+ });
312
+ const fallbacks = clientConfig.clientOptions
313
+ ?.fallbacks ?? [];
314
+ if (fallbacks.length > 0) {
315
+ try {
316
+ const onChunk = createSummarizationChunkHandler({
317
+ stepId,
318
+ config: traceConfig(summarizeConfig, 'cache_hit_compaction'),
319
+ provider: clientConfig.provider,
320
+ reasoningKey: agentContext.reasoningKey,
321
+ });
322
+ const fbResult = await invoke.tryFallbackProviders({
323
+ fallbacks,
324
+ tools: agentContext.getToolsForBinding(),
325
+ messages: [
326
+ ...messages$1,
327
+ new messages.HumanMessage(buildSummarizationInstruction(clientConfig.promptText, clientConfig.updatePromptText, priorSummaryText)),
328
+ ],
329
+ config: traceConfig(summarizeConfig, 'cache_hit_compaction'),
330
+ primaryError,
331
+ onChunk,
332
+ });
333
+ const fbMsg = fbResult?.messages?.[0];
334
+ if (fbMsg) {
335
+ summaryText = extractResponseText(fbMsg);
336
+ }
337
+ }
338
+ catch (fbErr) {
339
+ log('warn', 'Fallback providers also failed', {
340
+ error: fbErr instanceof Error ? fbErr.message : String(fbErr),
341
+ });
342
+ }
343
+ }
344
+ if (!summaryText) {
345
+ log('warn', 'Summarization failed, falling back to metadata stub', {
346
+ error: primaryError instanceof Error
347
+ ? primaryError.message
348
+ : String(primaryError),
349
+ });
350
+ summaryText = generateMetadataStub(messages$1);
351
+ }
352
+ }
353
+ return { text: summaryText, usage: summaryUsage };
354
+ }
355
+ /** Dispatches run step completion, ON_SUMMARIZE_COMPLETE, and rebuilds token map. */
356
+ async function dispatchCompletionEvents(params) {
357
+ const { graph, runnableConfig, stepId, summaryBlock, agentContext, runStep, summaryUsage, agentId, } = params;
358
+ runStep.summary = summaryBlock;
359
+ if (summaryUsage) {
360
+ runStep.usage = {
361
+ prompt_tokens: Number(summaryUsage.input_tokens) || 0,
362
+ completion_tokens: Number(summaryUsage.output_tokens) || 0,
363
+ total_tokens: (Number(summaryUsage.input_tokens) || 0) +
364
+ (Number(summaryUsage.output_tokens) || 0),
365
+ };
366
+ }
367
+ await graph.dispatchRunStepCompleted(stepId, { type: 'summary', summary: summaryBlock }, runnableConfig);
368
+ if (runnableConfig) {
369
+ await events.safeDispatchCustomEvent(_enum.GraphEvents.ON_SUMMARIZE_COMPLETE, {
370
+ id: stepId,
371
+ agentId,
372
+ summary: summaryBlock,
373
+ }, runnableConfig);
374
+ }
375
+ agentContext.rebuildTokenMapAfterSummarization({});
376
+ }
377
+ function createSummarizeNode({ agentContext, graph, generateStepId, }) {
378
+ return async (state, config) => {
379
+ const request = state.summarizationRequest;
380
+ if (request == null) {
381
+ return { summarizationRequest: undefined };
382
+ }
383
+ const maxCtx = agentContext.maxContextTokens ?? 0;
384
+ if (maxCtx > 0 && agentContext.instructionTokens >= maxCtx) {
385
+ events.emitAgentLog(config, 'warn', 'summarize', 'Summarization skipped, instructions exceed context budget. Reduce the number of tools or increase maxContextTokens.', {
386
+ instructionTokens: agentContext.instructionTokens,
387
+ maxContextTokens: maxCtx,
388
+ breakdown: agentContext.formatTokenBudgetBreakdown(),
389
+ }, { runId: graph.runId, agentId: request.agentId });
390
+ return { summarizationRequest: undefined };
391
+ }
392
+ const messagesToRefine = restoreOriginalToolContent(state.messages, agentContext.pendingOriginalToolContent);
393
+ agentContext.pendingOriginalToolContent = undefined;
394
+ const clientConfig = buildSummarizationClientConfig(agentContext, agentContext.summarizationConfig);
395
+ const runnableConfig = config ?? graph.config;
396
+ const stepKey = `summarize-${request.agentId}`;
397
+ const [stepId, stepIndex] = generateStepId(stepKey);
398
+ const placeholderSummary = {
399
+ type: _enum.ContentTypes.SUMMARY,
400
+ model: clientConfig.modelName,
401
+ provider: clientConfig.provider,
402
+ };
403
+ const runStep = {
404
+ stepIndex,
405
+ id: stepId,
406
+ type: _enum.StepTypes.MESSAGE_CREATION,
407
+ index: graph.contentData.length,
408
+ stepDetails: {
409
+ type: _enum.StepTypes.MESSAGE_CREATION,
410
+ message_creation: { message_id: stepId },
411
+ },
412
+ summary: placeholderSummary,
413
+ usage: null,
414
+ };
415
+ if (graph.runId != null && graph.runId !== '') {
416
+ runStep.runId = graph.runId;
417
+ }
418
+ if (graph.isMultiAgent && agentContext.agentId) {
419
+ runStep.agentId = agentContext.agentId;
420
+ }
421
+ await graph.dispatchRunStep(runStep, runnableConfig);
422
+ if (runnableConfig) {
423
+ await events.safeDispatchCustomEvent(_enum.GraphEvents.ON_SUMMARIZE_START, {
424
+ agentId: request.agentId,
425
+ provider: clientConfig.provider,
426
+ model: clientConfig.modelName,
427
+ messagesToRefineCount: messagesToRefine.length,
428
+ summaryVersion: agentContext.summaryVersion + 1,
429
+ }, runnableConfig);
430
+ }
431
+ const isSelfSummarizeModel = clientConfig.provider === agentContext.provider;
432
+ const hasPromptCache = isSelfSummarizeModel &&
433
+ agentContext.clientOptions
434
+ ?.promptCache === true;
435
+ const log = (level, message, data) => {
436
+ events.emitAgentLog(runnableConfig, level, 'summarize', message, data, {
437
+ runId: graph.runId,
438
+ agentId: request.agentId,
439
+ });
440
+ };
441
+ log('debug', 'Summarization starting', {
442
+ messagesToRefineCount: messagesToRefine.length,
443
+ hasPriorSummary: (agentContext.getSummaryText()?.trim() ?? '') !== '',
444
+ summaryVersion: agentContext.summaryVersion + 1,
445
+ isSelfSummarize: isSelfSummarizeModel,
446
+ hasPromptCache,
447
+ provider: clientConfig.provider,
448
+ });
449
+ const summarizeConfig = config
450
+ ? {
451
+ ...config,
452
+ metadata: {
453
+ ...config.metadata,
454
+ agent_id: request.agentId,
455
+ summarization_provider: clientConfig.provider,
456
+ summarization_model: clientConfig.modelName,
457
+ },
458
+ }
459
+ : undefined;
460
+ const { text: rawText, usage: summaryUsage } = await executeSummarizationWithFallback({
461
+ agentContext,
462
+ messages: messagesToRefine,
463
+ clientConfig,
464
+ summarizeConfig,
465
+ stepId,
466
+ usePromptCache: isSelfSummarizeModel && hasPromptCache,
467
+ log,
468
+ });
469
+ if (!rawText) {
470
+ agentContext.markSummarizationTriggered(0);
471
+ if (runnableConfig) {
472
+ await events.safeDispatchCustomEvent(_enum.GraphEvents.ON_SUMMARIZE_COMPLETE, {
473
+ id: stepId,
474
+ agentId: request.agentId,
475
+ error: 'Summarization produced empty output',
476
+ }, runnableConfig);
477
+ }
478
+ return { summarizationRequest: undefined };
479
+ }
480
+ const summaryText = enrichSummary(rawText, messagesToRefine);
481
+ const tokenCount = computeSummaryTokenCount(summaryText, summaryUsage, agentContext.tokenCounter);
482
+ agentContext.setSummary(summaryText, tokenCount);
483
+ log('info', 'Summary persisted');
484
+ log('debug', 'Summary details', {
485
+ summaryTokens: tokenCount,
486
+ textLength: summaryText.length,
487
+ messagesCompacted: messagesToRefine.length,
488
+ summaryVersion: agentContext.summaryVersion,
489
+ ...(summaryUsage != null
490
+ ? {
491
+ input_tokens: summaryUsage.input_tokens,
492
+ output_tokens: summaryUsage.output_tokens,
493
+ cache_read: summaryUsage.input_token_details?.cache_read,
494
+ cache_creation: summaryUsage.input_token_details?.cache_creation,
495
+ }
496
+ : {}),
497
+ });
498
+ const summaryBlock = buildSummaryBlock({
499
+ summaryText,
500
+ tokenCount,
501
+ stepId,
502
+ stepIndex: runStep.index,
503
+ modelName: clientConfig.modelName,
504
+ provider: clientConfig.provider,
505
+ summaryVersion: agentContext.summaryVersion,
506
+ });
507
+ await dispatchCompletionEvents({
508
+ graph,
509
+ runnableConfig,
510
+ stepId,
511
+ summaryBlock,
512
+ agentContext,
513
+ runStep,
514
+ summaryUsage,
515
+ agentId: request.agentId,
516
+ });
517
+ return {
518
+ summarizationRequest: undefined,
519
+ messages: [reducer.createRemoveAllMessage()],
520
+ };
521
+ };
522
+ }
523
+ /** Extracts text from an LLM response, skipping reasoning/thinking blocks. */
524
+ function extractResponseText(response) {
525
+ const { content } = response;
526
+ if (typeof content === 'string') {
527
+ return content.trim();
528
+ }
529
+ if (!Array.isArray(content)) {
530
+ return '';
531
+ }
532
+ const parts = [];
533
+ for (const block of content) {
534
+ if (typeof block === 'string') {
535
+ parts.push(block);
536
+ continue;
537
+ }
538
+ if (block == null || typeof block !== 'object') {
539
+ continue;
540
+ }
541
+ const rec = block;
542
+ if (rec.type === _enum.ContentTypes.THINKING ||
543
+ rec.type === _enum.ContentTypes.REASONING_CONTENT ||
544
+ rec.type === 'redacted_thinking') {
545
+ continue;
546
+ }
547
+ if (rec.type === 'text' && typeof rec.text === 'string') {
548
+ parts.push(rec.text);
549
+ }
550
+ }
551
+ return parts.join('').trim();
552
+ }
553
+ function buildSummarizationInstruction(promptText, updatePromptText, priorSummaryText) {
554
+ const effectivePrompt = priorSummaryText
555
+ ? (updatePromptText ?? promptText)
556
+ : promptText;
557
+ const parts = [effectivePrompt];
558
+ if (priorSummaryText) {
559
+ parts.push(`\n\n<previous-summary>\n${priorSummaryText}\n</previous-summary>`);
560
+ }
561
+ return parts.join('');
562
+ }
563
+ /** Creates an `onChunk` callback that dispatches `ON_SUMMARIZE_DELTA` events for streaming. */
564
+ function createSummarizationChunkHandler({ stepId, config, provider, reasoningKey = 'reasoning_content', }) {
565
+ if (stepId == null || stepId === '' || !config) {
566
+ return undefined;
567
+ }
568
+ return (chunk) => {
569
+ const chunkAny = chunk;
570
+ const raw = stream.getChunkContent({ chunk: chunkAny, provider, reasoningKey });
571
+ if (raw == null || (typeof raw === 'string' && !raw)) {
572
+ return;
573
+ }
574
+ const contentBlocks = typeof raw === 'string'
575
+ ? [{ type: _enum.ContentTypes.TEXT, text: raw }]
576
+ : raw;
577
+ events.safeDispatchCustomEvent(_enum.GraphEvents.ON_SUMMARIZE_DELTA, {
578
+ id: stepId,
579
+ delta: {
580
+ summary: {
581
+ type: _enum.ContentTypes.SUMMARY,
582
+ content: contentBlocks,
583
+ provider: String(config.metadata?.summarization_provider ?? ''),
584
+ model: String(config.metadata?.summarization_model ?? ''),
585
+ },
586
+ },
587
+ }, config);
588
+ };
589
+ }
590
+ function traceConfig(config, stage) {
591
+ if (!config) {
592
+ return undefined;
593
+ }
594
+ return {
595
+ ...config,
596
+ runName: `summarization:${stage}`,
597
+ metadata: { ...config.metadata, summarization: true, stage },
598
+ };
599
+ }
600
+ /**
601
+ * Cache-friendly compaction: sends raw conversation messages with the
602
+ * summarization instruction appended as the final HumanMessage.
603
+ * Providers with prompt caching get a cache hit on the system prompt +
604
+ * tool definitions prefix.
605
+ */
606
+ async function summarizeWithCacheHit({ model, messages: messages$1, promptText, updatePromptText, priorSummaryText, config, stepId, provider, reasoningKey, usePromptCache, log, }) {
607
+ const instruction = buildSummarizationInstruction(promptText, updatePromptText, priorSummaryText);
608
+ const fullMessages = [...messages$1, new messages.HumanMessage(instruction)];
609
+ const invokeMessages = usePromptCache === true ? cache.addCacheControl(fullMessages) : fullMessages;
610
+ const result = await invoke.attemptInvoke({
611
+ model,
612
+ messages: invokeMessages,
613
+ provider,
614
+ onChunk: createSummarizationChunkHandler({
615
+ stepId,
616
+ config: traceConfig(config, 'cache_hit_compaction'),
617
+ provider,
618
+ reasoningKey,
619
+ }),
620
+ }, traceConfig(config, 'cache_hit_compaction'));
621
+ const responseMsg = result.messages?.[0];
622
+ const text = responseMsg
623
+ ? extractResponseText(responseMsg)
624
+ : '';
625
+ let usage;
626
+ let usageSource = 'none';
627
+ if (responseMsg != null &&
628
+ 'usage_metadata' in responseMsg &&
629
+ responseMsg.usage_metadata != null) {
630
+ usage = responseMsg.usage_metadata;
631
+ usageSource = 'usage_metadata';
632
+ }
633
+ else if (responseMsg != null) {
634
+ const respMeta = responseMsg.response_metadata;
635
+ const raw = respMeta?.metadata
636
+ ?.usage;
637
+ if (raw != null) {
638
+ usage = {
639
+ input_tokens: Number(raw.inputTokens) || undefined,
640
+ output_tokens: Number(raw.outputTokens) || undefined,
641
+ };
642
+ usageSource = 'response_metadata';
643
+ }
644
+ }
645
+ const cacheDetails = usage?.input_token_details;
646
+ log?.('debug', 'Summarization LLM usage', {
647
+ source: usageSource,
648
+ input_tokens: usage?.input_tokens,
649
+ output_tokens: usage?.output_tokens,
650
+ ...(cacheDetails?.cache_read != null || cacheDetails?.cache_creation != null
651
+ ? {
652
+ 'input_token_details.cache_read': cacheDetails.cache_read,
653
+ 'input_token_details.cache_creation': cacheDetails.cache_creation,
654
+ }
655
+ : {}),
656
+ });
657
+ return { text, usage };
658
+ }
659
+
660
+ exports.DEFAULT_SUMMARIZATION_PROMPT = DEFAULT_SUMMARIZATION_PROMPT;
661
+ exports.DEFAULT_UPDATE_SUMMARIZATION_PROMPT = DEFAULT_UPDATE_SUMMARIZATION_PROMPT;
662
+ exports.createSummarizeNode = createSummarizeNode;
663
+ //# sourceMappingURL=node.cjs.map