@librechat/agents 3.1.57 → 3.1.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +3 -3
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3827 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -0,0 +1,982 @@
1
+ import {
2
+ AIMessage,
3
+ ToolMessage,
4
+ HumanMessage,
5
+ SystemMessage,
6
+ } from '@langchain/core/messages';
7
+ import type { RunnableConfig } from '@langchain/core/runnables';
8
+ import type { UsageMetadata, BaseMessage } from '@langchain/core/messages';
9
+ import type { AgentContext } from '@/agents/AgentContext';
10
+ import type { OnChunk } from '@/llm/invoke';
11
+ import type * as t from '@/types';
12
+ import { ContentTypes, GraphEvents, StepTypes, Providers } from '@/common';
13
+ import { safeDispatchCustomEvent, emitAgentLog } from '@/utils/events';
14
+ import { attemptInvoke, tryFallbackProviders } from '@/llm/invoke';
15
+ import { createRemoveAllMessage } from '@/messages/reducer';
16
+ import { getMaxOutputTokensKey } from '@/llm/request';
17
+ import { addCacheControl } from '@/messages/cache';
18
+ import { initializeModel } from '@/llm/init';
19
+ import { getChunkContent } from '@/stream';
20
+
21
+ const SUMMARIZATION_PARAM_KEYS = new Set(['maxSummaryTokens']);
22
+
23
+ /**
24
+ * Token overhead of the XML wrapper + instruction text added around the
25
+ * summary at injection time in AgentContext.buildSystemRunnable:
26
+ * `<summary>\n${text}\n</summary>\n\nYour context window was compacted...`
27
+ * ~33 tokens on Anthropic, ~24-27 on OpenAI. Using 33 as a safe ceiling.
28
+ */
29
+ const SUMMARY_WRAPPER_OVERHEAD_TOKENS = 33;
30
+
31
+ /** Structured checkpoint prompt for fresh summarization (no prior summary). */
32
+ export const DEFAULT_SUMMARIZATION_PROMPT = `Hold on, before you continue I need you to write me a checkpoint of everything so far. Your context window is filling up and this checkpoint replaces the messages above, so capture everything you need to pick right back up.
33
+
34
+ Don't second-guess or fact-check anything you did, your tool results reflect exactly what happened. Just record what you did and what you observed. Only the checkpoint, don't respond to me or continue the conversation.
35
+
36
+ ## Checkpoint
37
+
38
+ ## Goal
39
+ What I asked you to do and any sub-goals you identified.
40
+
41
+ ## Constraints & Preferences
42
+ Any rules, preferences, or configuration I established.
43
+
44
+ ## Progress
45
+ ### Done
46
+ - What you completed and the outcomes
47
+
48
+ ### In Progress
49
+ - What you're currently working on
50
+
51
+ ## Key Decisions
52
+ Decisions you made and why.
53
+
54
+ ## Next Steps
55
+ Concrete task actions remaining, in priority order.
56
+
57
+ ## Critical Context
58
+ Exact identifiers, names, error messages, URLs, and details you need to preserve verbatim.
59
+
60
+ Rules:
61
+ - Record what you did and observed, don't judge or re-evaluate it
62
+ - For each tool call: the tool name, key inputs, and the outcome
63
+ - Preserve exact identifiers, names, errors, and references verbatim
64
+ - Short declarative sentences
65
+ - Skip empty sections`;
66
+
67
+ /** Prompt for re-compaction when a prior summary exists. */
68
+ export const DEFAULT_UPDATE_SUMMARIZATION_PROMPT = `Hold on again, update your checkpoint. Merge the new messages into your existing checkpoint and give me a single consolidated replacement.
69
+
70
+ Keep it roughly the same length as your last checkpoint. Compress older details to make room for what's new, don't just append. Give recent actions more detail, compress older items to one-liners.
71
+
72
+ Don't fact-check or second-guess anything, your tool results are ground truth. Only the checkpoint, don't respond to me or continue the conversation.
73
+
74
+ Rules:
75
+ - Merge new progress into existing sections, don't duplicate headers
76
+ - Compress older completed items into one-line entries
77
+ - Move items from "In Progress" to "Done" when you completed them
78
+ - Update "Next Steps" to reflect current task priorities.
79
+ - For each new tool call: the tool name, key inputs, and the outcome
80
+ - Preserve exact identifiers, names, errors, and references verbatim
81
+ - Skip empty sections`;
82
+
83
+ function separateParameters(parameters: Record<string, unknown>): {
84
+ llmParams: Record<string, unknown>;
85
+ maxSummaryTokens?: number;
86
+ } {
87
+ const llmParams: Record<string, unknown> = {};
88
+ let maxSummaryTokens: number | undefined;
89
+
90
+ for (const [key, value] of Object.entries(parameters)) {
91
+ if (SUMMARIZATION_PARAM_KEYS.has(key)) {
92
+ if (
93
+ key === 'maxSummaryTokens' &&
94
+ typeof value === 'number' &&
95
+ value > 0
96
+ ) {
97
+ maxSummaryTokens = value;
98
+ }
99
+ } else {
100
+ llmParams[key] = value;
101
+ }
102
+ }
103
+
104
+ return { llmParams, maxSummaryTokens };
105
+ }
106
+
107
+ /**
108
+ * Generates a structural metadata summary without making an LLM call.
109
+ * Used as a last-resort fallback when all summarization attempts fail.
110
+ * Preserves tool names and message counts so the agent retains basic context.
111
+ */
112
+ function generateMetadataStub(messages: BaseMessage[]): string {
113
+ const counts: Record<string, number> = {};
114
+ const toolNames = new Set<string>();
115
+
116
+ for (const msg of messages) {
117
+ const role = msg.getType();
118
+ counts[role] = (counts[role] ?? 0) + 1;
119
+
120
+ if (role === 'tool' && msg.name != null && msg.name !== '') {
121
+ toolNames.add(msg.name);
122
+ }
123
+
124
+ if (
125
+ role === 'ai' &&
126
+ msg instanceof AIMessage &&
127
+ msg.tool_calls &&
128
+ msg.tool_calls.length > 0
129
+ ) {
130
+ for (const tc of msg.tool_calls) {
131
+ toolNames.add(tc.name);
132
+ }
133
+ }
134
+ }
135
+
136
+ const countParts = Object.entries(counts)
137
+ .map(([role, count]) => `${count} ${role}`)
138
+ .join(', ');
139
+
140
+ const lines = [
141
+ `[Metadata summary: ${messages.length} messages (${countParts})]`,
142
+ ];
143
+
144
+ if (toolNames.size > 0) {
145
+ lines.push(`[Tools used: ${Array.from(toolNames).join(', ')}]`);
146
+ }
147
+
148
+ return lines.join('\n');
149
+ }
150
+
151
+ /** Maximum number of tool failures to include in the enrichment section. */
152
+ const MAX_TOOL_FAILURES = 8;
153
+ /** Maximum chars per failure summary line. */
154
+ const MAX_TOOL_FAILURE_CHARS = 240;
155
+
156
+ /**
157
+ * Extracts failed tool results from messages and formats them as a structured
158
+ * section. LLMs often omit specific failure details (exit codes, error messages)
159
+ * from their summaries, this mechanical enrichment guarantees they survive.
160
+ */
161
+ function extractToolFailuresSection(messages: BaseMessage[]): string {
162
+ const failures: Array<{ toolName: string; summary: string }> = [];
163
+ const seen = new Set<string>();
164
+
165
+ for (const msg of messages) {
166
+ if (msg.getType() !== 'tool') {
167
+ continue;
168
+ }
169
+ const toolMsg = msg as ToolMessage;
170
+ if (toolMsg.status !== 'error') {
171
+ continue;
172
+ }
173
+ // Deduplicate by tool_call_id
174
+ const callId = toolMsg.tool_call_id;
175
+ if (callId && seen.has(callId)) {
176
+ continue;
177
+ }
178
+ if (callId) {
179
+ seen.add(callId);
180
+ }
181
+
182
+ const toolName = toolMsg.name ?? 'tool';
183
+ const content =
184
+ typeof toolMsg.content === 'string'
185
+ ? toolMsg.content
186
+ : JSON.stringify(toolMsg.content);
187
+ const normalized = content.replace(/\s+/g, ' ').trim();
188
+ const summary =
189
+ normalized.length > MAX_TOOL_FAILURE_CHARS
190
+ ? `${normalized.slice(0, MAX_TOOL_FAILURE_CHARS - 3)}...`
191
+ : normalized;
192
+
193
+ failures.push({ toolName, summary });
194
+ }
195
+
196
+ if (failures.length === 0) {
197
+ return '';
198
+ }
199
+
200
+ const lines = failures
201
+ .slice(0, MAX_TOOL_FAILURES)
202
+ .map((f) => `- ${f.toolName}: ${f.summary}`);
203
+ if (failures.length > MAX_TOOL_FAILURES) {
204
+ lines.push(`- ...and ${failures.length - MAX_TOOL_FAILURES} more`);
205
+ }
206
+
207
+ return `\n\n## Tool Failures\n${lines.join('\n')}`;
208
+ }
209
+
210
+ /**
211
+ * Appends mechanical enrichment sections to an LLM-generated summary.
212
+ * Tool failures are appended verbatim because LLMs often omit specific
213
+ * error details from their summaries.
214
+ */
215
+ function enrichSummary(summaryText: string, messages: BaseMessage[]): string {
216
+ return summaryText + extractToolFailuresSection(messages);
217
+ }
218
+
219
+ /**
220
+ * Restores pre-masking tool content onto the messages array using
221
+ * `pendingOriginalToolContent` stored on AgentContext. Only allocates
222
+ * a new array when there are entries to restore; otherwise returns the
223
+ * input reference unchanged.
224
+ */
225
+ function restoreOriginalToolContent(
226
+ messages: BaseMessage[],
227
+ originalToolContent: Map<number, string> | undefined
228
+ ): BaseMessage[] {
229
+ if (originalToolContent == null || originalToolContent.size === 0) {
230
+ return messages;
231
+ }
232
+ const restored = [...messages];
233
+ for (const [idx, content] of originalToolContent) {
234
+ const msg = restored[idx];
235
+ if (msg instanceof ToolMessage) {
236
+ restored[idx] = new ToolMessage({
237
+ content,
238
+ tool_call_id: msg.tool_call_id,
239
+ name: msg.name,
240
+ id: msg.id,
241
+ additional_kwargs: msg.additional_kwargs,
242
+ response_metadata: msg.response_metadata,
243
+ });
244
+ }
245
+ }
246
+ return restored;
247
+ }
248
+
249
+ // ---------------------------------------------------------------------------
250
+ // Extracted helpers for createSummarizeNode
251
+ // ---------------------------------------------------------------------------
252
+
253
+ interface SummarizationClientConfig {
254
+ provider: string;
255
+ modelName?: string;
256
+ clientOptions: Record<string, unknown>;
257
+ effectiveMaxSummaryTokens?: number;
258
+ promptText: string;
259
+ updatePromptText: string;
260
+ }
261
+
262
+ /** Assembles the summarization model's client options from agent and config. */
263
+ function buildSummarizationClientConfig(
264
+ agentContext: AgentContext,
265
+ summarizationConfig?: t.SummarizationConfig
266
+ ): SummarizationClientConfig {
267
+ const provider = (summarizationConfig?.provider ??
268
+ agentContext.provider) as string;
269
+ const modelName = summarizationConfig?.model;
270
+ const parameters = summarizationConfig?.parameters ?? {};
271
+ const promptText =
272
+ summarizationConfig?.prompt ?? DEFAULT_SUMMARIZATION_PROMPT;
273
+ const updatePromptText =
274
+ summarizationConfig?.updatePrompt ?? DEFAULT_UPDATE_SUMMARIZATION_PROMPT;
275
+
276
+ const { llmParams, maxSummaryTokens: paramMaxSummaryTokens } =
277
+ separateParameters(parameters);
278
+
279
+ const isSelfSummarize = provider === (agentContext.provider as string);
280
+ const baseOptions =
281
+ isSelfSummarize && agentContext.clientOptions
282
+ ? { ...agentContext.clientOptions }
283
+ : {};
284
+
285
+ const clientOptions: Record<string, unknown> = {
286
+ ...baseOptions,
287
+ ...llmParams,
288
+ };
289
+
290
+ if (modelName != null && modelName !== '') {
291
+ clientOptions.model = modelName;
292
+ clientOptions.modelName = modelName;
293
+ }
294
+
295
+ const effectiveMaxSummaryTokens =
296
+ paramMaxSummaryTokens ?? summarizationConfig?.maxSummaryTokens;
297
+
298
+ if (effectiveMaxSummaryTokens != null) {
299
+ clientOptions[getMaxOutputTokensKey(provider)] = effectiveMaxSummaryTokens;
300
+ }
301
+
302
+ return {
303
+ provider,
304
+ modelName,
305
+ clientOptions,
306
+ effectiveMaxSummaryTokens,
307
+ promptText,
308
+ updatePromptText,
309
+ };
310
+ }
311
+
312
+ /** Computes the token count for a summary, preferring provider output tokens when available. */
313
+ function computeSummaryTokenCount(
314
+ summaryText: string,
315
+ summaryUsage: Partial<UsageMetadata> | undefined,
316
+ tokenCounter?: (message: BaseMessage) => number
317
+ ): number {
318
+ const providerOutputTokens = Number(summaryUsage?.output_tokens) || 0;
319
+ if (providerOutputTokens > 0) {
320
+ return providerOutputTokens + SUMMARY_WRAPPER_OVERHEAD_TOKENS;
321
+ }
322
+ if (tokenCounter) {
323
+ return (
324
+ tokenCounter(new SystemMessage(summaryText)) +
325
+ SUMMARY_WRAPPER_OVERHEAD_TOKENS
326
+ );
327
+ }
328
+ return 0;
329
+ }
330
+
331
+ /** Constructs the SummaryContentBlock persisted in the run step and dispatched to events. */
332
+ function buildSummaryBlock(params: {
333
+ summaryText: string;
334
+ tokenCount: number;
335
+ stepId: string;
336
+ stepIndex: number;
337
+ modelName?: string;
338
+ provider: string;
339
+ summaryVersion: number;
340
+ }): t.SummaryContentBlock {
341
+ return {
342
+ type: ContentTypes.SUMMARY,
343
+ content: [
344
+ {
345
+ type: ContentTypes.TEXT,
346
+ text: params.summaryText,
347
+ } as t.MessageContentComplex,
348
+ ],
349
+ tokenCount: params.tokenCount,
350
+ summaryVersion: params.summaryVersion,
351
+ boundary: {
352
+ messageId: params.stepId,
353
+ contentIndex: params.stepIndex,
354
+ },
355
+ model: params.modelName,
356
+ provider: params.provider,
357
+ createdAt: new Date().toISOString(),
358
+ };
359
+ }
360
+
361
+ type LogFn = (
362
+ level: 'debug' | 'info' | 'warn' | 'error',
363
+ message: string,
364
+ data?: Record<string, unknown>
365
+ ) => void;
366
+
367
+ /**
368
+ * Runs the summarization LLM call with primary + fallback providers,
369
+ * falling back to a metadata stub when all calls fail.
370
+ */
371
+ async function executeSummarizationWithFallback(params: {
372
+ agentContext: AgentContext;
373
+ messages: BaseMessage[];
374
+ clientConfig: SummarizationClientConfig;
375
+ summarizeConfig?: RunnableConfig;
376
+ stepId: string;
377
+ usePromptCache: boolean;
378
+ log: LogFn;
379
+ }): Promise<{ text: string; usage?: Partial<UsageMetadata> }> {
380
+ const {
381
+ agentContext,
382
+ messages,
383
+ clientConfig,
384
+ summarizeConfig,
385
+ stepId,
386
+ usePromptCache,
387
+ log,
388
+ } = params;
389
+
390
+ const summarizationModel = initializeModel({
391
+ provider: clientConfig.provider as Providers,
392
+ clientOptions: clientConfig.clientOptions as t.ClientOptions,
393
+ tools: agentContext.getToolsForBinding(),
394
+ }) as t.ChatModel;
395
+
396
+ const priorSummaryText = agentContext.getSummaryText()?.trim() ?? '';
397
+
398
+ let summaryText = '';
399
+ let summaryUsage: Partial<UsageMetadata> | undefined;
400
+
401
+ try {
402
+ const result = await summarizeWithCacheHit({
403
+ model: summarizationModel,
404
+ messages,
405
+ promptText: clientConfig.promptText,
406
+ updatePromptText: clientConfig.updatePromptText,
407
+ priorSummaryText,
408
+ config: summarizeConfig,
409
+ stepId,
410
+ provider: clientConfig.provider as Providers,
411
+ reasoningKey: agentContext.reasoningKey,
412
+ usePromptCache,
413
+ log,
414
+ });
415
+ summaryText = result.text;
416
+ summaryUsage = result.usage;
417
+ } catch (primaryError) {
418
+ log('error', 'Summarization LLM call failed', {
419
+ error:
420
+ primaryError instanceof Error
421
+ ? primaryError.message
422
+ : String(primaryError),
423
+ provider: clientConfig.provider,
424
+ model: clientConfig.modelName,
425
+ messagesToRefineCount: messages.length,
426
+ });
427
+
428
+ const fallbacks =
429
+ (clientConfig.clientOptions as unknown as t.LLMConfig | undefined)
430
+ ?.fallbacks ?? [];
431
+ if (fallbacks.length > 0) {
432
+ try {
433
+ const onChunk = createSummarizationChunkHandler({
434
+ stepId,
435
+ config: traceConfig(summarizeConfig, 'cache_hit_compaction'),
436
+ provider: clientConfig.provider as Providers,
437
+ reasoningKey: agentContext.reasoningKey,
438
+ });
439
+ const fbResult = await tryFallbackProviders({
440
+ fallbacks,
441
+ tools: agentContext.getToolsForBinding(),
442
+ messages: [
443
+ ...messages,
444
+ new HumanMessage(
445
+ buildSummarizationInstruction(
446
+ clientConfig.promptText,
447
+ clientConfig.updatePromptText,
448
+ priorSummaryText
449
+ )
450
+ ),
451
+ ],
452
+ config: traceConfig(summarizeConfig, 'cache_hit_compaction'),
453
+ primaryError,
454
+ onChunk,
455
+ });
456
+ const fbMsg = fbResult?.messages?.[0];
457
+ if (fbMsg) {
458
+ summaryText = extractResponseText(
459
+ fbMsg as { content: string | object }
460
+ );
461
+ }
462
+ } catch (fbErr) {
463
+ log('warn', 'Fallback providers also failed', {
464
+ error: fbErr instanceof Error ? fbErr.message : String(fbErr),
465
+ });
466
+ }
467
+ }
468
+ if (!summaryText) {
469
+ log('warn', 'Summarization failed, falling back to metadata stub', {
470
+ error:
471
+ primaryError instanceof Error
472
+ ? primaryError.message
473
+ : String(primaryError),
474
+ });
475
+ summaryText = generateMetadataStub(messages);
476
+ }
477
+ }
478
+
479
+ return { text: summaryText, usage: summaryUsage };
480
+ }
481
+
482
+ /** Dispatches run step completion, ON_SUMMARIZE_COMPLETE, and rebuilds token map. */
483
+ async function dispatchCompletionEvents(params: {
484
+ graph: CreateSummarizeNodeParams['graph'];
485
+ runnableConfig?: RunnableConfig;
486
+ stepId: string;
487
+ summaryBlock: t.SummaryContentBlock;
488
+ agentContext: AgentContext;
489
+ runStep: t.RunStep;
490
+ summaryUsage?: Partial<UsageMetadata>;
491
+ agentId: string;
492
+ }): Promise<void> {
493
+ const {
494
+ graph,
495
+ runnableConfig,
496
+ stepId,
497
+ summaryBlock,
498
+ agentContext,
499
+ runStep,
500
+ summaryUsage,
501
+ agentId,
502
+ } = params;
503
+
504
+ runStep.summary = summaryBlock;
505
+ if (summaryUsage) {
506
+ runStep.usage = {
507
+ prompt_tokens: Number(summaryUsage.input_tokens) || 0,
508
+ completion_tokens: Number(summaryUsage.output_tokens) || 0,
509
+ total_tokens:
510
+ (Number(summaryUsage.input_tokens) || 0) +
511
+ (Number(summaryUsage.output_tokens) || 0),
512
+ };
513
+ }
514
+
515
+ await graph.dispatchRunStepCompleted(
516
+ stepId,
517
+ { type: 'summary', summary: summaryBlock } satisfies t.SummaryCompleted,
518
+ runnableConfig
519
+ );
520
+
521
+ if (runnableConfig) {
522
+ await safeDispatchCustomEvent(
523
+ GraphEvents.ON_SUMMARIZE_COMPLETE,
524
+ {
525
+ id: stepId,
526
+ agentId,
527
+ summary: summaryBlock,
528
+ } satisfies t.SummarizeCompleteEvent,
529
+ runnableConfig
530
+ );
531
+ }
532
+
533
+ agentContext.rebuildTokenMapAfterSummarization({});
534
+ }
535
+
536
+ // ---------------------------------------------------------------------------
537
+ // createSummarizeNode
538
+ // ---------------------------------------------------------------------------
539
+
540
+ interface CreateSummarizeNodeParams {
541
+ agentContext: AgentContext;
542
+ graph: {
543
+ contentData: t.RunStep[];
544
+ contentIndexMap: Map<string, number>;
545
+ config?: RunnableConfig;
546
+ runId?: string;
547
+ isMultiAgent: boolean;
548
+ dispatchRunStep: (
549
+ runStep: t.RunStep,
550
+ config?: RunnableConfig
551
+ ) => Promise<void>;
552
+ dispatchRunStepCompleted: (
553
+ stepId: string,
554
+ result: t.StepCompleted,
555
+ config?: RunnableConfig
556
+ ) => Promise<void>;
557
+ };
558
+ generateStepId: (stepKey: string) => [string, number];
559
+ }
560
+
561
+ export function createSummarizeNode({
562
+ agentContext,
563
+ graph,
564
+ generateStepId,
565
+ }: CreateSummarizeNodeParams) {
566
+ return async (
567
+ state: {
568
+ messages: BaseMessage[];
569
+ summarizationRequest?: t.SummarizationNodeInput;
570
+ },
571
+ config?: RunnableConfig
572
+ ): Promise<{ summarizationRequest: undefined; messages?: BaseMessage[] }> => {
573
+ const request = state.summarizationRequest;
574
+ if (request == null) {
575
+ return { summarizationRequest: undefined };
576
+ }
577
+
578
+ const maxCtx = agentContext.maxContextTokens ?? 0;
579
+ if (maxCtx > 0 && agentContext.instructionTokens >= maxCtx) {
580
+ emitAgentLog(
581
+ config,
582
+ 'warn',
583
+ 'summarize',
584
+ 'Summarization skipped, instructions exceed context budget. Reduce the number of tools or increase maxContextTokens.',
585
+ {
586
+ instructionTokens: agentContext.instructionTokens,
587
+ maxContextTokens: maxCtx,
588
+ breakdown: agentContext.formatTokenBudgetBreakdown(),
589
+ },
590
+ { runId: graph.runId, agentId: request.agentId }
591
+ );
592
+ return { summarizationRequest: undefined };
593
+ }
594
+
595
+ const messagesToRefine = restoreOriginalToolContent(
596
+ state.messages,
597
+ agentContext.pendingOriginalToolContent
598
+ );
599
+ agentContext.pendingOriginalToolContent = undefined;
600
+
601
+ const clientConfig = buildSummarizationClientConfig(
602
+ agentContext,
603
+ agentContext.summarizationConfig
604
+ );
605
+
606
+ const runnableConfig = config ?? graph.config;
607
+
608
+ const stepKey = `summarize-${request.agentId}`;
609
+ const [stepId, stepIndex] = generateStepId(stepKey);
610
+
611
+ const placeholderSummary: t.SummaryContentBlock = {
612
+ type: ContentTypes.SUMMARY,
613
+ model: clientConfig.modelName,
614
+ provider: clientConfig.provider,
615
+ };
616
+
617
+ const runStep: t.RunStep = {
618
+ stepIndex,
619
+ id: stepId,
620
+ type: StepTypes.MESSAGE_CREATION,
621
+ index: graph.contentData.length,
622
+ stepDetails: {
623
+ type: StepTypes.MESSAGE_CREATION,
624
+ message_creation: { message_id: stepId },
625
+ },
626
+ summary: placeholderSummary,
627
+ usage: null,
628
+ };
629
+
630
+ if (graph.runId != null && graph.runId !== '') {
631
+ runStep.runId = graph.runId;
632
+ }
633
+ if (graph.isMultiAgent && agentContext.agentId) {
634
+ runStep.agentId = agentContext.agentId;
635
+ }
636
+
637
+ await graph.dispatchRunStep(runStep, runnableConfig);
638
+
639
+ if (runnableConfig) {
640
+ await safeDispatchCustomEvent(
641
+ GraphEvents.ON_SUMMARIZE_START,
642
+ {
643
+ agentId: request.agentId,
644
+ provider: clientConfig.provider,
645
+ model: clientConfig.modelName,
646
+ messagesToRefineCount: messagesToRefine.length,
647
+ summaryVersion: agentContext.summaryVersion + 1,
648
+ } satisfies t.SummarizeStartEvent,
649
+ runnableConfig
650
+ );
651
+ }
652
+
653
+ const isSelfSummarizeModel =
654
+ clientConfig.provider === (agentContext.provider as string);
655
+ const hasPromptCache =
656
+ isSelfSummarizeModel &&
657
+ (agentContext.clientOptions as Record<string, unknown> | undefined)
658
+ ?.promptCache === true;
659
+
660
+ const log: LogFn = (level, message, data) => {
661
+ emitAgentLog(runnableConfig, level, 'summarize', message, data, {
662
+ runId: graph.runId,
663
+ agentId: request.agentId,
664
+ });
665
+ };
666
+
667
+ log('debug', 'Summarization starting', {
668
+ messagesToRefineCount: messagesToRefine.length,
669
+ hasPriorSummary: (agentContext.getSummaryText()?.trim() ?? '') !== '',
670
+ summaryVersion: agentContext.summaryVersion + 1,
671
+ isSelfSummarize: isSelfSummarizeModel,
672
+ hasPromptCache,
673
+ provider: clientConfig.provider,
674
+ });
675
+
676
+ const summarizeConfig: RunnableConfig | undefined = config
677
+ ? {
678
+ ...config,
679
+ metadata: {
680
+ ...config.metadata,
681
+ agent_id: request.agentId,
682
+ summarization_provider: clientConfig.provider,
683
+ summarization_model: clientConfig.modelName,
684
+ },
685
+ }
686
+ : undefined;
687
+
688
+ const { text: rawText, usage: summaryUsage } =
689
+ await executeSummarizationWithFallback({
690
+ agentContext,
691
+ messages: messagesToRefine,
692
+ clientConfig,
693
+ summarizeConfig,
694
+ stepId,
695
+ usePromptCache: isSelfSummarizeModel && hasPromptCache,
696
+ log,
697
+ });
698
+
699
+ if (!rawText) {
700
+ agentContext.markSummarizationTriggered(0);
701
+ if (runnableConfig) {
702
+ await safeDispatchCustomEvent(
703
+ GraphEvents.ON_SUMMARIZE_COMPLETE,
704
+ {
705
+ id: stepId,
706
+ agentId: request.agentId,
707
+ error: 'Summarization produced empty output',
708
+ } satisfies t.SummarizeCompleteEvent,
709
+ runnableConfig
710
+ );
711
+ }
712
+ return { summarizationRequest: undefined };
713
+ }
714
+
715
+ const summaryText = enrichSummary(rawText, messagesToRefine);
716
+
717
+ const tokenCount = computeSummaryTokenCount(
718
+ summaryText,
719
+ summaryUsage,
720
+ agentContext.tokenCounter
721
+ );
722
+
723
+ agentContext.setSummary(summaryText, tokenCount);
724
+
725
+ log('info', 'Summary persisted');
726
+ log('debug', 'Summary details', {
727
+ summaryTokens: tokenCount,
728
+ textLength: summaryText.length,
729
+ messagesCompacted: messagesToRefine.length,
730
+ summaryVersion: agentContext.summaryVersion,
731
+ ...(summaryUsage != null
732
+ ? {
733
+ input_tokens: summaryUsage.input_tokens,
734
+ output_tokens: summaryUsage.output_tokens,
735
+ cache_read: summaryUsage.input_token_details?.cache_read,
736
+ cache_creation: summaryUsage.input_token_details?.cache_creation,
737
+ }
738
+ : {}),
739
+ });
740
+
741
+ const summaryBlock = buildSummaryBlock({
742
+ summaryText,
743
+ tokenCount,
744
+ stepId,
745
+ stepIndex: runStep.index,
746
+ modelName: clientConfig.modelName,
747
+ provider: clientConfig.provider,
748
+ summaryVersion: agentContext.summaryVersion,
749
+ });
750
+
751
+ await dispatchCompletionEvents({
752
+ graph,
753
+ runnableConfig,
754
+ stepId,
755
+ summaryBlock,
756
+ agentContext,
757
+ runStep,
758
+ summaryUsage,
759
+ agentId: request.agentId,
760
+ });
761
+
762
+ return {
763
+ summarizationRequest: undefined,
764
+ messages: [createRemoveAllMessage()],
765
+ };
766
+ };
767
+ }
768
+
769
+ /** Extracts text from an LLM response, skipping reasoning/thinking blocks. */
770
+ function extractResponseText(response: { content: string | object }): string {
771
+ const { content } = response;
772
+ if (typeof content === 'string') {
773
+ return content.trim();
774
+ }
775
+ if (!Array.isArray(content)) {
776
+ return '';
777
+ }
778
+ const parts: string[] = [];
779
+ for (const block of content) {
780
+ if (typeof block === 'string') {
781
+ parts.push(block);
782
+ continue;
783
+ }
784
+ if (block == null || typeof block !== 'object') {
785
+ continue;
786
+ }
787
+ const rec = block as Record<string, unknown>;
788
+ if (
789
+ rec.type === ContentTypes.THINKING ||
790
+ rec.type === ContentTypes.REASONING_CONTENT ||
791
+ rec.type === 'redacted_thinking'
792
+ ) {
793
+ continue;
794
+ }
795
+ if (rec.type === 'text' && typeof rec.text === 'string') {
796
+ parts.push(rec.text);
797
+ }
798
+ }
799
+ return parts.join('').trim();
800
+ }
801
+
802
+ function buildSummarizationInstruction(
803
+ promptText: string,
804
+ updatePromptText: string | undefined,
805
+ priorSummaryText: string
806
+ ): string {
807
+ const effectivePrompt = priorSummaryText
808
+ ? (updatePromptText ?? promptText)
809
+ : promptText;
810
+ const parts = [effectivePrompt];
811
+ if (priorSummaryText) {
812
+ parts.push(
813
+ `\n\n<previous-summary>\n${priorSummaryText}\n</previous-summary>`
814
+ );
815
+ }
816
+ return parts.join('');
817
+ }
818
+
819
+ /** Creates an `onChunk` callback that dispatches `ON_SUMMARIZE_DELTA` events for streaming. */
820
+ function createSummarizationChunkHandler({
821
+ stepId,
822
+ config,
823
+ provider,
824
+ reasoningKey = 'reasoning_content',
825
+ }: {
826
+ stepId?: string;
827
+ config?: RunnableConfig;
828
+ provider?: Providers;
829
+ reasoningKey?: 'reasoning_content' | 'reasoning';
830
+ }): OnChunk | undefined {
831
+ if (stepId == null || stepId === '' || !config) {
832
+ return undefined;
833
+ }
834
+ return (chunk) => {
835
+ const chunkAny = chunk as Parameters<typeof getChunkContent>[0]['chunk'];
836
+ const raw = getChunkContent({ chunk: chunkAny, provider, reasoningKey });
837
+ if (raw == null || (typeof raw === 'string' && !raw)) {
838
+ return;
839
+ }
840
+ const contentBlocks: t.MessageContentComplex[] =
841
+ typeof raw === 'string'
842
+ ? [{ type: ContentTypes.TEXT, text: raw } as t.MessageContentComplex]
843
+ : raw;
844
+
845
+ safeDispatchCustomEvent(
846
+ GraphEvents.ON_SUMMARIZE_DELTA,
847
+ {
848
+ id: stepId,
849
+ delta: {
850
+ summary: {
851
+ type: ContentTypes.SUMMARY,
852
+ content: contentBlocks,
853
+ provider: String(config.metadata?.summarization_provider ?? ''),
854
+ model: String(config.metadata?.summarization_model ?? ''),
855
+ },
856
+ },
857
+ } satisfies t.SummarizeDeltaEvent,
858
+ config
859
+ );
860
+ };
861
+ }
862
+
863
+ function traceConfig(
864
+ config: RunnableConfig | undefined,
865
+ stage: string
866
+ ): RunnableConfig | undefined {
867
+ if (!config) {
868
+ return undefined;
869
+ }
870
+ return {
871
+ ...config,
872
+ runName: `summarization:${stage}`,
873
+ metadata: { ...config.metadata, summarization: true, stage },
874
+ };
875
+ }
876
+
877
+ /**
878
+ * Cache-friendly compaction: sends raw conversation messages with the
879
+ * summarization instruction appended as the final HumanMessage.
880
+ * Providers with prompt caching get a cache hit on the system prompt +
881
+ * tool definitions prefix.
882
+ */
883
+ async function summarizeWithCacheHit({
884
+ model,
885
+ messages,
886
+ promptText,
887
+ updatePromptText,
888
+ priorSummaryText,
889
+ config,
890
+ stepId,
891
+ provider,
892
+ reasoningKey,
893
+ usePromptCache,
894
+ log,
895
+ }: {
896
+ model: t.ChatModel;
897
+ messages: BaseMessage[];
898
+ promptText: string;
899
+ updatePromptText?: string;
900
+ priorSummaryText: string;
901
+ config?: RunnableConfig;
902
+ stepId?: string;
903
+ provider: Providers;
904
+ reasoningKey?: 'reasoning_content' | 'reasoning';
905
+ usePromptCache?: boolean;
906
+ log?: LogFn;
907
+ }): Promise<{ text: string; usage?: Partial<UsageMetadata> }> {
908
+ const instruction = buildSummarizationInstruction(
909
+ promptText,
910
+ updatePromptText,
911
+ priorSummaryText
912
+ );
913
+
914
+ const fullMessages = [...messages, new HumanMessage(instruction)];
915
+ const invokeMessages =
916
+ usePromptCache === true ? addCacheControl(fullMessages) : fullMessages;
917
+
918
+ const result = await attemptInvoke(
919
+ {
920
+ model,
921
+ messages: invokeMessages,
922
+ provider,
923
+ onChunk: createSummarizationChunkHandler({
924
+ stepId,
925
+ config: traceConfig(config, 'cache_hit_compaction'),
926
+ provider,
927
+ reasoningKey,
928
+ }),
929
+ },
930
+ traceConfig(config, 'cache_hit_compaction')
931
+ );
932
+
933
+ const responseMsg = result.messages?.[0];
934
+ const text = responseMsg
935
+ ? extractResponseText(responseMsg as { content: string | object })
936
+ : '';
937
+ let usage: Partial<UsageMetadata> | undefined;
938
+ let usageSource = 'none';
939
+ if (
940
+ responseMsg != null &&
941
+ 'usage_metadata' in responseMsg &&
942
+ responseMsg.usage_metadata != null
943
+ ) {
944
+ usage = responseMsg.usage_metadata as Partial<UsageMetadata>;
945
+ usageSource = 'usage_metadata';
946
+ } else if (responseMsg != null) {
947
+ const respMeta = responseMsg.response_metadata as
948
+ | Record<string, unknown>
949
+ | undefined;
950
+ const raw = (respMeta?.metadata as Record<string, unknown> | undefined)
951
+ ?.usage as Record<string, unknown> | undefined;
952
+ if (raw != null) {
953
+ usage = {
954
+ input_tokens: Number(raw.inputTokens) || undefined,
955
+ output_tokens: Number(raw.outputTokens) || undefined,
956
+ } as Partial<UsageMetadata>;
957
+ usageSource = 'response_metadata';
958
+ }
959
+ }
960
+ const cacheDetails = (
961
+ usage as
962
+ | {
963
+ input_token_details?: {
964
+ cache_read?: number;
965
+ cache_creation?: number;
966
+ };
967
+ }
968
+ | undefined
969
+ )?.input_token_details;
970
+ log?.('debug', 'Summarization LLM usage', {
971
+ source: usageSource,
972
+ input_tokens: usage?.input_tokens,
973
+ output_tokens: usage?.output_tokens,
974
+ ...(cacheDetails?.cache_read != null || cacheDetails?.cache_creation != null
975
+ ? {
976
+ 'input_token_details.cache_read': cacheDetails.cache_read,
977
+ 'input_token_details.cache_creation': cacheDetails.cache_creation,
978
+ }
979
+ : {}),
980
+ });
981
+ return { text, usage };
982
+ }