illuma-agents 1.0.37 → 1.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +69 -14
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +3 -1
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +50 -8
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/graphs/MultiAgentGraph.cjs +277 -11
  8. package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
  9. package/dist/cjs/llm/bedrock/index.cjs +128 -61
  10. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  11. package/dist/cjs/main.cjs +16 -7
  12. package/dist/cjs/main.cjs.map +1 -1
  13. package/dist/cjs/messages/cache.cjs +1 -0
  14. package/dist/cjs/messages/cache.cjs.map +1 -1
  15. package/dist/cjs/messages/core.cjs +1 -1
  16. package/dist/cjs/messages/core.cjs.map +1 -1
  17. package/dist/cjs/messages/tools.cjs +2 -2
  18. package/dist/cjs/messages/tools.cjs.map +1 -1
  19. package/dist/cjs/stream.cjs +4 -2
  20. package/dist/cjs/stream.cjs.map +1 -1
  21. package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
  22. package/dist/cjs/tools/CodeExecutor.cjs +22 -21
  23. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  24. package/dist/cjs/tools/ProgrammaticToolCalling.cjs +14 -11
  25. package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
  26. package/dist/cjs/tools/ToolNode.cjs +101 -2
  27. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  28. package/dist/cjs/tools/ToolSearch.cjs +862 -0
  29. package/dist/cjs/tools/ToolSearch.cjs.map +1 -0
  30. package/dist/esm/agents/AgentContext.mjs +69 -14
  31. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  32. package/dist/esm/common/enum.mjs +3 -1
  33. package/dist/esm/common/enum.mjs.map +1 -1
  34. package/dist/esm/graphs/Graph.mjs +51 -9
  35. package/dist/esm/graphs/Graph.mjs.map +1 -1
  36. package/dist/esm/graphs/MultiAgentGraph.mjs +278 -12
  37. package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
  38. package/dist/esm/llm/bedrock/index.mjs +127 -60
  39. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  40. package/dist/esm/main.mjs +1 -1
  41. package/dist/esm/messages/cache.mjs +1 -0
  42. package/dist/esm/messages/cache.mjs.map +1 -1
  43. package/dist/esm/messages/core.mjs +1 -1
  44. package/dist/esm/messages/core.mjs.map +1 -1
  45. package/dist/esm/messages/tools.mjs +2 -2
  46. package/dist/esm/messages/tools.mjs.map +1 -1
  47. package/dist/esm/stream.mjs +4 -2
  48. package/dist/esm/stream.mjs.map +1 -1
  49. package/dist/esm/tools/BrowserTools.mjs.map +1 -1
  50. package/dist/esm/tools/CodeExecutor.mjs +22 -21
  51. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  52. package/dist/esm/tools/ProgrammaticToolCalling.mjs +14 -11
  53. package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
  54. package/dist/esm/tools/ToolNode.mjs +102 -3
  55. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  56. package/dist/esm/tools/ToolSearch.mjs +827 -0
  57. package/dist/esm/tools/ToolSearch.mjs.map +1 -0
  58. package/dist/types/agents/AgentContext.d.ts +33 -1
  59. package/dist/types/common/enum.d.ts +4 -2
  60. package/dist/types/graphs/Graph.d.ts +6 -0
  61. package/dist/types/graphs/MultiAgentGraph.d.ts +16 -0
  62. package/dist/types/index.d.ts +1 -1
  63. package/dist/types/llm/bedrock/index.d.ts +89 -11
  64. package/dist/types/llm/bedrock/types.d.ts +27 -0
  65. package/dist/types/llm/bedrock/utils/index.d.ts +5 -0
  66. package/dist/types/llm/bedrock/utils/message_inputs.d.ts +31 -0
  67. package/dist/types/llm/bedrock/utils/message_outputs.d.ts +33 -0
  68. package/dist/types/tools/CodeExecutor.d.ts +0 -3
  69. package/dist/types/tools/ProgrammaticToolCalling.d.ts +0 -3
  70. package/dist/types/tools/ToolNode.d.ts +3 -1
  71. package/dist/types/tools/ToolSearch.d.ts +148 -0
  72. package/dist/types/types/graph.d.ts +2 -0
  73. package/dist/types/types/llm.d.ts +3 -1
  74. package/dist/types/types/tools.d.ts +42 -2
  75. package/package.json +12 -5
  76. package/src/agents/AgentContext.ts +88 -16
  77. package/src/common/enum.ts +3 -1
  78. package/src/graphs/Graph.ts +64 -13
  79. package/src/graphs/MultiAgentGraph.ts +350 -13
  80. package/src/index.ts +1 -1
  81. package/src/llm/bedrock/index.ts +221 -99
  82. package/src/llm/bedrock/llm.spec.ts +616 -0
  83. package/src/llm/bedrock/types.ts +51 -0
  84. package/src/llm/bedrock/utils/index.ts +18 -0
  85. package/src/llm/bedrock/utils/message_inputs.ts +563 -0
  86. package/src/llm/bedrock/utils/message_outputs.ts +310 -0
  87. package/src/messages/__tests__/tools.test.ts +21 -21
  88. package/src/messages/cache.test.ts +259 -0
  89. package/src/messages/cache.ts +104 -1
  90. package/src/messages/core.ts +1 -1
  91. package/src/messages/tools.ts +2 -2
  92. package/src/scripts/caching.ts +27 -19
  93. package/src/scripts/code_exec_files.ts +58 -15
  94. package/src/scripts/code_exec_multi_session.ts +241 -0
  95. package/src/scripts/code_exec_session.ts +282 -0
  96. package/src/scripts/multi-agent-conditional.ts +1 -0
  97. package/src/scripts/multi-agent-supervisor.ts +1 -0
  98. package/src/scripts/programmatic_exec_agent.ts +4 -4
  99. package/src/scripts/test-handoff-preamble.ts +277 -0
  100. package/src/scripts/test-parallel-handoffs.ts +291 -0
  101. package/src/scripts/test-tools-before-handoff.ts +8 -4
  102. package/src/scripts/test_code_api.ts +361 -0
  103. package/src/scripts/thinking-bedrock.ts +159 -0
  104. package/src/scripts/thinking.ts +39 -18
  105. package/src/scripts/{tool_search_regex.ts → tool_search.ts} +5 -5
  106. package/src/scripts/tools.ts +7 -3
  107. package/src/stream.ts +4 -2
  108. package/src/tools/BrowserTools.ts +39 -17
  109. package/src/tools/CodeExecutor.ts +26 -23
  110. package/src/tools/ProgrammaticToolCalling.ts +18 -14
  111. package/src/tools/ToolNode.ts +114 -1
  112. package/src/tools/ToolSearch.ts +1041 -0
  113. package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +0 -2
  114. package/src/tools/__tests__/{ToolSearchRegex.integration.test.ts → ToolSearch.integration.test.ts} +6 -6
  115. package/src/tools/__tests__/ToolSearch.test.ts +1003 -0
  116. package/src/types/graph.ts +2 -0
  117. package/src/types/llm.ts +3 -1
  118. package/src/types/tools.ts +51 -2
  119. package/dist/cjs/tools/ToolSearchRegex.cjs +0 -455
  120. package/dist/cjs/tools/ToolSearchRegex.cjs.map +0 -1
  121. package/dist/esm/tools/ToolSearchRegex.mjs +0 -448
  122. package/dist/esm/tools/ToolSearchRegex.mjs.map +0 -1
  123. package/dist/types/tools/ToolSearchRegex.d.ts +0 -80
  124. package/src/tools/ToolSearchRegex.ts +0 -535
  125. package/src/tools/__tests__/ToolSearchRegex.test.ts +0 -232
@@ -1,6 +1,10 @@
1
1
  /**
2
2
  * Optimized ChatBedrockConverse wrapper that fixes contentBlockIndex conflicts
3
- * and adds prompt caching support for Bedrock Converse API.
3
+ * and adds support for:
4
+ *
5
+ * - Prompt caching support for Bedrock Converse API (Illuma feature)
6
+ * - Application Inference Profiles (PR #9129)
7
+ * - Service Tiers (Priority/Standard/Flex) (PR #9785) - requires AWS SDK 3.966.0+
4
8
  *
5
9
  * Bedrock sends the same contentBlockIndex for both text and tool_use content blocks,
6
10
  * causing LangChain's merge logic to fail with "field[contentBlockIndex] already exists"
@@ -22,24 +26,81 @@
22
26
  */
23
27
 
24
28
  import { ChatBedrockConverse } from '@langchain/aws';
25
- import type { ChatBedrockConverseInput } from '@langchain/aws';
26
29
  import { AIMessageChunk } from '@langchain/core/messages';
27
30
  import type { BaseMessage, UsageMetadata } from '@langchain/core/messages';
28
- import { ChatGenerationChunk } from '@langchain/core/outputs';
31
+ import { ChatGenerationChunk, ChatResult } from '@langchain/core/outputs';
29
32
  import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
33
+ import type { ChatBedrockConverseInput } from '@langchain/aws';
34
+
35
+ /**
36
+ * Service tier type for Bedrock invocations.
37
+ * Requires AWS SDK >= 3.966.0 to actually work.
38
+ * @see https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
39
+ */
40
+ export type ServiceTierType = 'priority' | 'default' | 'flex' | 'reserved';
30
41
 
31
- /** Extended input type with promptCache option */
42
+ /**
43
+ * Extended input interface with additional features:
44
+ * - promptCache: Enable Bedrock prompt caching for tool definitions
45
+ * - applicationInferenceProfile: Use an inference profile ARN instead of model ID
46
+ * - serviceTier: Specify service tier (Priority, Standard, Flex, Reserved)
47
+ */
32
48
  export interface CustomChatBedrockConverseInput
33
49
  extends ChatBedrockConverseInput {
50
+ /**
51
+ * Enable Bedrock prompt caching for tool definitions.
52
+ * When true, adds cachePoint markers to tools array.
53
+ */
34
54
  promptCache?: boolean;
55
+
56
+ /**
57
+ * Application Inference Profile ARN to use for the model.
58
+ * For example, "arn:aws:bedrock:eu-west-1:123456789102:application-inference-profile/fm16bt65tzgx"
59
+ * When provided, this ARN will be used for the actual inference calls instead of the model ID.
60
+ * Must still provide `model` as normal modelId to benefit from all the metadata.
61
+ * @see https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-create.html
62
+ */
63
+ applicationInferenceProfile?: string;
64
+
65
+ /**
66
+ * Service tier for model invocation.
67
+ * Specifies the processing tier type used for serving the request.
68
+ * Supported values are 'priority', 'default', 'flex', and 'reserved'.
69
+ *
70
+ * - 'priority': Prioritized processing for lower latency
71
+ * - 'default': Standard processing tier
72
+ * - 'flex': Flexible processing tier with lower cost
73
+ * - 'reserved': Reserved capacity for consistent performance
74
+ *
75
+ * If not provided, AWS uses the default tier.
76
+ * Note: Requires AWS SDK >= 3.966.0 to work.
77
+ * @see https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
78
+ */
79
+ serviceTier?: ServiceTierType;
80
+ }
81
+
82
+ /**
83
+ * Extended call options with serviceTier override support.
84
+ */
85
+ export interface CustomChatBedrockConverseCallOptions {
86
+ serviceTier?: ServiceTierType;
35
87
  }
36
88
 
37
89
  export class CustomChatBedrockConverse extends ChatBedrockConverse {
90
+ /** Enable Bedrock prompt caching for tool definitions */
38
91
  promptCache: boolean;
39
92
 
93
+ /** Application Inference Profile ARN to use instead of model ID */
94
+ applicationInferenceProfile?: string;
95
+
96
+ /** Service tier for model invocation */
97
+ serviceTier?: ServiceTierType;
98
+
40
99
  constructor(fields?: CustomChatBedrockConverseInput) {
41
100
  super(fields);
42
101
  this.promptCache = fields?.promptCache ?? false;
102
+ this.applicationInferenceProfile = fields?.applicationInferenceProfile;
103
+ this.serviceTier = fields?.serviceTier;
43
104
  }
44
105
 
45
106
  static lc_name(): string {
@@ -47,19 +108,30 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
47
108
  }
48
109
 
49
110
  /**
50
- * Override invocationParams to add cachePoint to tools when promptCache is enabled.
51
- * This enables Bedrock prompt caching for tool definitions.
111
+ * Get the model ID to use for API calls.
112
+ * Returns applicationInferenceProfile if set, otherwise returns this.model.
113
+ */
114
+ protected getModelId(): string {
115
+ return this.applicationInferenceProfile ?? this.model;
116
+ }
117
+
118
+ /**
119
+ * Override invocationParams to:
120
+ * 1. Add cachePoint to tools when promptCache is enabled
121
+ * 2. Add serviceTier support
52
122
  *
53
- * STRATEGY: Separate cachePoints for core tools and MCP tools
123
+ * CACHING STRATEGY: Separate cachePoints for core tools and MCP tools
54
124
  * - Core tools (web_search, execute_code, etc.) are stable → cache first
55
125
  * - MCP tools (have '_mcp_' in name) are dynamic → cache separately after
56
126
  * - This allows core tools to stay cached when MCP selection changes
57
127
  *
58
128
  * NOTE: Only Claude models support cachePoint - Nova and other models will reject it.
59
129
  */
60
- invocationParams(
61
- options?: this['ParsedCallOptions']
62
- ): ReturnType<ChatBedrockConverse['invocationParams']> {
130
+ override invocationParams(
131
+ options?: this['ParsedCallOptions'] & CustomChatBedrockConverseCallOptions
132
+ ): ReturnType<ChatBedrockConverse['invocationParams']> & {
133
+ serviceTier?: { type: ServiceTierType };
134
+ } {
63
135
  const params = super.invocationParams(options);
64
136
 
65
137
  // Add cachePoint to tools array if promptCache is enabled and tools exist
@@ -79,8 +151,6 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
79
151
  // MCP tools have '_mcp_' in their name (e.g., 'search_emails_mcp_Google-Workspace')
80
152
  const coreTools: typeof params.toolConfig.tools = [];
81
153
  const mcpTools: typeof params.toolConfig.tools = [];
82
- const coreToolNames: string[] = [];
83
- const mcpToolNames: string[] = [];
84
154
 
85
155
  for (const tool of params.toolConfig.tools) {
86
156
  // Check if tool has a name property with '_mcp_' pattern
@@ -88,10 +158,8 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
88
158
  (tool as { toolSpec?: { name?: string } }).toolSpec?.name ?? '';
89
159
  if (toolName.includes('_mcp_')) {
90
160
  mcpTools.push(tool);
91
- mcpToolNames.push(toolName);
92
161
  } else {
93
162
  coreTools.push(tool);
94
- coreToolNames.push(toolName);
95
163
  }
96
164
  }
97
165
 
@@ -119,104 +187,158 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
119
187
  params.toolConfig.tools = toolsWithCache;
120
188
  }
121
189
 
122
- return params;
190
+ // Add serviceTier support
191
+ const serviceTierType = options?.serviceTier ?? this.serviceTier;
192
+
193
+ return {
194
+ ...params,
195
+ serviceTier: serviceTierType ? { type: serviceTierType } : undefined,
196
+ };
197
+ }
198
+
199
+ /**
200
+ * Override _generateNonStreaming to use applicationInferenceProfile as modelId.
201
+ * Uses the same model-swapping pattern as streaming for consistency.
202
+ */
203
+ override async _generateNonStreaming(
204
+ messages: BaseMessage[],
205
+ options: this['ParsedCallOptions'] & CustomChatBedrockConverseCallOptions,
206
+ runManager?: CallbackManagerForLLMRun
207
+ ): Promise<ChatResult> {
208
+ // Temporarily swap model for applicationInferenceProfile support
209
+ const originalModel = this.model;
210
+ if (
211
+ this.applicationInferenceProfile != null &&
212
+ this.applicationInferenceProfile !== ''
213
+ ) {
214
+ this.model = this.applicationInferenceProfile;
215
+ }
216
+
217
+ try {
218
+ return await super._generateNonStreaming(messages, options, runManager);
219
+ } finally {
220
+ // Restore original model
221
+ this.model = originalModel;
222
+ }
123
223
  }
124
224
 
125
225
  /**
126
226
  * Override _streamResponseChunks to:
127
- * 1. Strip contentBlockIndex from response_metadata to prevent merge conflicts
128
- * 2. Extract cacheReadInputTokens/cacheWriteInputTokens and add to usage_metadata
227
+ * 1. Use applicationInferenceProfile as modelId (by temporarily swapping this.model)
228
+ * 2. Strip contentBlockIndex from response_metadata to prevent merge conflicts
229
+ * 3. Extract cacheReadInputTokens/cacheWriteInputTokens and add to usage_metadata
230
+ *
231
+ * Note: We delegate to super._streamResponseChunks() to preserve @langchain/aws's
232
+ * internal chunk handling which correctly preserves array content for reasoning blocks.
129
233
  */
130
- async *_streamResponseChunks(
234
+ override async *_streamResponseChunks(
131
235
  messages: BaseMessage[],
132
- options: this['ParsedCallOptions'],
236
+ options: this['ParsedCallOptions'] & CustomChatBedrockConverseCallOptions,
133
237
  runManager?: CallbackManagerForLLMRun
134
238
  ): AsyncGenerator<ChatGenerationChunk> {
135
- const baseStream = super._streamResponseChunks(
136
- messages,
137
- options,
138
- runManager
139
- );
140
-
141
- for await (const chunk of baseStream) {
142
- // Only process if we have response_metadata
143
- if (
144
- chunk.message instanceof AIMessageChunk &&
145
- (chunk.message as Partial<AIMessageChunk>).response_metadata &&
146
- typeof chunk.message.response_metadata === 'object'
147
- ) {
148
- const responseMetadata = chunk.message.response_metadata as Record<
149
- string,
150
- unknown
151
- >;
152
- let needsModification = false;
153
- let cleanedMetadata = responseMetadata;
154
-
155
- // Check if contentBlockIndex exists anywhere in response_metadata
156
- const hasContentBlockIndex =
157
- this.hasContentBlockIndex(responseMetadata);
158
- if (hasContentBlockIndex) {
159
- cleanedMetadata = this.removeContentBlockIndex(
160
- responseMetadata
161
- ) as Record<string, unknown>;
162
- needsModification = true;
163
- }
239
+ // Temporarily swap model for applicationInferenceProfile support
240
+ const originalModel = this.model;
241
+ if (
242
+ this.applicationInferenceProfile != null &&
243
+ this.applicationInferenceProfile !== ''
244
+ ) {
245
+ this.model = this.applicationInferenceProfile;
246
+ }
164
247
 
165
- // Extract cache tokens from metadata.usage (Bedrock streaming format)
166
- // The metadata chunk contains usage with cacheReadInputTokens/cacheWriteInputTokens
167
- const metadata = responseMetadata.metadata as
168
- | Record<string, unknown>
169
- | undefined;
170
- const usage = (metadata?.usage ?? responseMetadata.usage) as
171
- | Record<string, unknown>
172
- | undefined;
173
-
174
- let enhancedUsageMetadata: UsageMetadata | undefined =
175
- chunk.message.usage_metadata;
176
-
177
- if (usage) {
178
- const cacheRead =
179
- (usage.cacheReadInputTokens as number | undefined) ?? 0;
180
- const cacheWrite =
181
- (usage.cacheWriteInputTokens as number | undefined) ?? 0;
182
- const inputTokens = (usage.inputTokens as number | undefined) ?? 0;
183
- const outputTokens = (usage.outputTokens as number | undefined) ?? 0;
184
-
185
- if (cacheRead > 0 || cacheWrite > 0) {
186
- needsModification = true;
187
- enhancedUsageMetadata = {
188
- input_tokens:
189
- chunk.message.usage_metadata?.input_tokens ?? inputTokens,
190
- output_tokens:
191
- chunk.message.usage_metadata?.output_tokens ?? outputTokens,
192
- total_tokens:
193
- chunk.message.usage_metadata?.total_tokens ??
194
- (usage.totalTokens as number | undefined) ??
195
- 0,
196
- input_token_details: {
197
- cache_read: cacheRead,
198
- cache_creation: cacheWrite,
199
- },
200
- };
201
- }
202
- }
248
+ try {
249
+ // Use parent's streaming logic which correctly handles reasoning content
250
+ const baseStream = super._streamResponseChunks(
251
+ messages,
252
+ options,
253
+ runManager
254
+ );
255
+
256
+ for await (const chunk of baseStream) {
257
+ // Clean and enhance chunk
258
+ yield this.processChunk(chunk);
259
+ }
260
+ } finally {
261
+ // Restore original model
262
+ this.model = originalModel;
263
+ }
264
+ }
203
265
 
204
- if (needsModification) {
205
- yield new ChatGenerationChunk({
206
- text: chunk.text,
207
- message: new AIMessageChunk({
208
- ...chunk.message,
209
- response_metadata: cleanedMetadata,
210
- usage_metadata: enhancedUsageMetadata,
211
- }),
212
- generationInfo: chunk.generationInfo,
213
- });
214
- continue;
215
- }
266
+ /**
267
+ * Process a chunk by:
268
+ * 1. Removing contentBlockIndex from response_metadata
269
+ * 2. Extracting cache token information from Bedrock's usage data
270
+ */
271
+ private processChunk(chunk: ChatGenerationChunk): ChatGenerationChunk {
272
+ const message = chunk.message;
273
+ if (!(message instanceof AIMessageChunk)) {
274
+ return chunk;
275
+ }
276
+
277
+ const responseMetadata = message.response_metadata as Record<
278
+ string,
279
+ unknown
280
+ >;
281
+ let needsModification = false;
282
+ let cleanedMetadata = responseMetadata;
283
+
284
+ // Check if contentBlockIndex exists anywhere in response_metadata
285
+ const hasContentBlockIndex = this.hasContentBlockIndex(responseMetadata);
286
+ if (hasContentBlockIndex) {
287
+ cleanedMetadata = this.removeContentBlockIndex(
288
+ responseMetadata
289
+ ) as Record<string, unknown>;
290
+ needsModification = true;
291
+ }
292
+
293
+ // Extract cache tokens from metadata.usage (Bedrock streaming format)
294
+ // The metadata chunk contains usage with cacheReadInputTokens/cacheWriteInputTokens
295
+ const metadata = responseMetadata.metadata as
296
+ | Record<string, unknown>
297
+ | undefined;
298
+ const usage = (metadata?.usage ?? responseMetadata.usage) as
299
+ | Record<string, unknown>
300
+ | undefined;
301
+
302
+ let enhancedUsageMetadata: UsageMetadata | undefined =
303
+ message.usage_metadata;
304
+
305
+ if (usage) {
306
+ const cacheRead = (usage.cacheReadInputTokens as number | undefined) ?? 0;
307
+ const cacheWrite =
308
+ (usage.cacheWriteInputTokens as number | undefined) ?? 0;
309
+ const inputTokens = (usage.inputTokens as number | undefined) ?? 0;
310
+ const outputTokens = (usage.outputTokens as number | undefined) ?? 0;
311
+
312
+ if (cacheRead > 0 || cacheWrite > 0) {
313
+ needsModification = true;
314
+ enhancedUsageMetadata = {
315
+ input_tokens: message.usage_metadata?.input_tokens ?? inputTokens,
316
+ output_tokens: message.usage_metadata?.output_tokens ?? outputTokens,
317
+ total_tokens:
318
+ message.usage_metadata?.total_tokens ??
319
+ (usage.totalTokens as number | undefined) ??
320
+ 0,
321
+ input_token_details: {
322
+ cache_read: cacheRead,
323
+ cache_creation: cacheWrite,
324
+ },
325
+ };
216
326
  }
327
+ }
217
328
 
218
- yield chunk;
329
+ if (needsModification) {
330
+ return new ChatGenerationChunk({
331
+ text: chunk.text,
332
+ message: new AIMessageChunk({
333
+ ...message,
334
+ response_metadata: cleanedMetadata,
335
+ usage_metadata: enhancedUsageMetadata,
336
+ }),
337
+ generationInfo: chunk.generationInfo,
338
+ });
219
339
  }
340
+
341
+ return chunk;
220
342
  }
221
343
 
222
344
  /**