illuma-agents 1.0.37 → 1.0.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +112 -14
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +5 -1
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +148 -8
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/graphs/MultiAgentGraph.cjs +277 -11
- package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +128 -61
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +22 -7
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +140 -46
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/core.cjs +1 -1
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/tools.cjs +2 -2
- package/dist/cjs/messages/tools.cjs.map +1 -1
- package/dist/cjs/schemas/validate.cjs +173 -0
- package/dist/cjs/schemas/validate.cjs.map +1 -0
- package/dist/cjs/stream.cjs +4 -2
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
- package/dist/cjs/tools/CodeExecutor.cjs +22 -21
- package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs +14 -11
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +101 -2
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/ToolSearch.cjs +862 -0
- package/dist/cjs/tools/ToolSearch.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +112 -14
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +5 -1
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +149 -9
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/graphs/MultiAgentGraph.mjs +278 -12
- package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +127 -60
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/main.mjs +2 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +140 -46
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/core.mjs +1 -1
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/tools.mjs +2 -2
- package/dist/esm/messages/tools.mjs.map +1 -1
- package/dist/esm/schemas/validate.mjs +167 -0
- package/dist/esm/schemas/validate.mjs.map +1 -0
- package/dist/esm/stream.mjs +4 -2
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/BrowserTools.mjs.map +1 -1
- package/dist/esm/tools/CodeExecutor.mjs +22 -21
- package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
- package/dist/esm/tools/ProgrammaticToolCalling.mjs +14 -11
- package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +102 -3
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/ToolSearch.mjs +827 -0
- package/dist/esm/tools/ToolSearch.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +51 -1
- package/dist/types/common/enum.d.ts +6 -2
- package/dist/types/graphs/Graph.d.ts +12 -0
- package/dist/types/graphs/MultiAgentGraph.d.ts +16 -0
- package/dist/types/index.d.ts +2 -1
- package/dist/types/llm/bedrock/index.d.ts +89 -11
- package/dist/types/llm/bedrock/types.d.ts +27 -0
- package/dist/types/llm/bedrock/utils/index.d.ts +5 -0
- package/dist/types/llm/bedrock/utils/message_inputs.d.ts +31 -0
- package/dist/types/llm/bedrock/utils/message_outputs.d.ts +33 -0
- package/dist/types/messages/cache.d.ts +4 -1
- package/dist/types/schemas/index.d.ts +1 -0
- package/dist/types/schemas/validate.d.ts +36 -0
- package/dist/types/tools/CodeExecutor.d.ts +0 -3
- package/dist/types/tools/ProgrammaticToolCalling.d.ts +0 -3
- package/dist/types/tools/ToolNode.d.ts +3 -1
- package/dist/types/tools/ToolSearch.d.ts +148 -0
- package/dist/types/types/graph.d.ts +71 -0
- package/dist/types/types/llm.d.ts +3 -1
- package/dist/types/types/tools.d.ts +42 -2
- package/package.json +13 -6
- package/src/agents/AgentContext.test.ts +312 -0
- package/src/agents/AgentContext.ts +144 -16
- package/src/common/enum.ts +5 -1
- package/src/graphs/Graph.ts +214 -13
- package/src/graphs/MultiAgentGraph.ts +350 -13
- package/src/index.ts +4 -1
- package/src/llm/bedrock/index.ts +221 -99
- package/src/llm/bedrock/llm.spec.ts +616 -0
- package/src/llm/bedrock/types.ts +51 -0
- package/src/llm/bedrock/utils/index.ts +18 -0
- package/src/llm/bedrock/utils/message_inputs.ts +563 -0
- package/src/llm/bedrock/utils/message_outputs.ts +310 -0
- package/src/messages/__tests__/tools.test.ts +21 -21
- package/src/messages/cache.test.ts +304 -0
- package/src/messages/cache.ts +183 -53
- package/src/messages/core.ts +1 -1
- package/src/messages/tools.ts +2 -2
- package/src/schemas/index.ts +2 -0
- package/src/schemas/validate.test.ts +358 -0
- package/src/schemas/validate.ts +238 -0
- package/src/scripts/caching.ts +27 -19
- package/src/scripts/code_exec_files.ts +58 -15
- package/src/scripts/code_exec_multi_session.ts +241 -0
- package/src/scripts/code_exec_session.ts +282 -0
- package/src/scripts/multi-agent-conditional.ts +1 -0
- package/src/scripts/multi-agent-supervisor.ts +1 -0
- package/src/scripts/programmatic_exec_agent.ts +4 -4
- package/src/scripts/test-handoff-preamble.ts +277 -0
- package/src/scripts/test-parallel-handoffs.ts +291 -0
- package/src/scripts/test-tools-before-handoff.ts +8 -4
- package/src/scripts/test_code_api.ts +361 -0
- package/src/scripts/thinking-bedrock.ts +159 -0
- package/src/scripts/thinking.ts +39 -18
- package/src/scripts/{tool_search_regex.ts → tool_search.ts} +5 -5
- package/src/scripts/tools.ts +7 -3
- package/src/specs/cache.simple.test.ts +396 -0
- package/src/stream.ts +4 -2
- package/src/tools/BrowserTools.ts +39 -17
- package/src/tools/CodeExecutor.ts +26 -23
- package/src/tools/ProgrammaticToolCalling.ts +18 -14
- package/src/tools/ToolNode.ts +114 -1
- package/src/tools/ToolSearch.ts +1041 -0
- package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +0 -2
- package/src/tools/__tests__/{ToolSearchRegex.integration.test.ts → ToolSearch.integration.test.ts} +6 -6
- package/src/tools/__tests__/ToolSearch.test.ts +1003 -0
- package/src/types/graph.test.ts +183 -0
- package/src/types/graph.ts +73 -0
- package/src/types/llm.ts +3 -1
- package/src/types/tools.ts +51 -2
- package/dist/cjs/tools/ToolSearchRegex.cjs +0 -455
- package/dist/cjs/tools/ToolSearchRegex.cjs.map +0 -1
- package/dist/esm/tools/ToolSearchRegex.mjs +0 -448
- package/dist/esm/tools/ToolSearchRegex.mjs.map +0 -1
- package/dist/types/tools/ToolSearchRegex.d.ts +0 -80
- package/src/tools/ToolSearchRegex.ts +0 -535
- package/src/tools/__tests__/ToolSearchRegex.test.ts +0 -232
package/src/llm/bedrock/index.ts
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Optimized ChatBedrockConverse wrapper that fixes contentBlockIndex conflicts
|
|
3
|
-
* and adds
|
|
3
|
+
* and adds support for:
|
|
4
|
+
*
|
|
5
|
+
* - Prompt caching support for Bedrock Converse API (Illuma feature)
|
|
6
|
+
* - Application Inference Profiles (PR #9129)
|
|
7
|
+
* - Service Tiers (Priority/Standard/Flex) (PR #9785) - requires AWS SDK 3.966.0+
|
|
4
8
|
*
|
|
5
9
|
* Bedrock sends the same contentBlockIndex for both text and tool_use content blocks,
|
|
6
10
|
* causing LangChain's merge logic to fail with "field[contentBlockIndex] already exists"
|
|
@@ -22,24 +26,81 @@
|
|
|
22
26
|
*/
|
|
23
27
|
|
|
24
28
|
import { ChatBedrockConverse } from '@langchain/aws';
|
|
25
|
-
import type { ChatBedrockConverseInput } from '@langchain/aws';
|
|
26
29
|
import { AIMessageChunk } from '@langchain/core/messages';
|
|
27
30
|
import type { BaseMessage, UsageMetadata } from '@langchain/core/messages';
|
|
28
|
-
import { ChatGenerationChunk } from '@langchain/core/outputs';
|
|
31
|
+
import { ChatGenerationChunk, ChatResult } from '@langchain/core/outputs';
|
|
29
32
|
import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
|
|
33
|
+
import type { ChatBedrockConverseInput } from '@langchain/aws';
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Service tier type for Bedrock invocations.
|
|
37
|
+
* Requires AWS SDK >= 3.966.0 to actually work.
|
|
38
|
+
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
|
|
39
|
+
*/
|
|
40
|
+
export type ServiceTierType = 'priority' | 'default' | 'flex' | 'reserved';
|
|
30
41
|
|
|
31
|
-
/**
|
|
42
|
+
/**
|
|
43
|
+
* Extended input interface with additional features:
|
|
44
|
+
* - promptCache: Enable Bedrock prompt caching for tool definitions
|
|
45
|
+
* - applicationInferenceProfile: Use an inference profile ARN instead of model ID
|
|
46
|
+
* - serviceTier: Specify service tier (Priority, Standard, Flex, Reserved)
|
|
47
|
+
*/
|
|
32
48
|
export interface CustomChatBedrockConverseInput
|
|
33
49
|
extends ChatBedrockConverseInput {
|
|
50
|
+
/**
|
|
51
|
+
* Enable Bedrock prompt caching for tool definitions.
|
|
52
|
+
* When true, adds cachePoint markers to tools array.
|
|
53
|
+
*/
|
|
34
54
|
promptCache?: boolean;
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Application Inference Profile ARN to use for the model.
|
|
58
|
+
* For example, "arn:aws:bedrock:eu-west-1:123456789102:application-inference-profile/fm16bt65tzgx"
|
|
59
|
+
* When provided, this ARN will be used for the actual inference calls instead of the model ID.
|
|
60
|
+
* Must still provide `model` as normal modelId to benefit from all the metadata.
|
|
61
|
+
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-create.html
|
|
62
|
+
*/
|
|
63
|
+
applicationInferenceProfile?: string;
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Service tier for model invocation.
|
|
67
|
+
* Specifies the processing tier type used for serving the request.
|
|
68
|
+
* Supported values are 'priority', 'default', 'flex', and 'reserved'.
|
|
69
|
+
*
|
|
70
|
+
* - 'priority': Prioritized processing for lower latency
|
|
71
|
+
* - 'default': Standard processing tier
|
|
72
|
+
* - 'flex': Flexible processing tier with lower cost
|
|
73
|
+
* - 'reserved': Reserved capacity for consistent performance
|
|
74
|
+
*
|
|
75
|
+
* If not provided, AWS uses the default tier.
|
|
76
|
+
* Note: Requires AWS SDK >= 3.966.0 to work.
|
|
77
|
+
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
|
|
78
|
+
*/
|
|
79
|
+
serviceTier?: ServiceTierType;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Extended call options with serviceTier override support.
|
|
84
|
+
*/
|
|
85
|
+
export interface CustomChatBedrockConverseCallOptions {
|
|
86
|
+
serviceTier?: ServiceTierType;
|
|
35
87
|
}
|
|
36
88
|
|
|
37
89
|
export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
90
|
+
/** Enable Bedrock prompt caching for tool definitions */
|
|
38
91
|
promptCache: boolean;
|
|
39
92
|
|
|
93
|
+
/** Application Inference Profile ARN to use instead of model ID */
|
|
94
|
+
applicationInferenceProfile?: string;
|
|
95
|
+
|
|
96
|
+
/** Service tier for model invocation */
|
|
97
|
+
serviceTier?: ServiceTierType;
|
|
98
|
+
|
|
40
99
|
constructor(fields?: CustomChatBedrockConverseInput) {
|
|
41
100
|
super(fields);
|
|
42
101
|
this.promptCache = fields?.promptCache ?? false;
|
|
102
|
+
this.applicationInferenceProfile = fields?.applicationInferenceProfile;
|
|
103
|
+
this.serviceTier = fields?.serviceTier;
|
|
43
104
|
}
|
|
44
105
|
|
|
45
106
|
static lc_name(): string {
|
|
@@ -47,19 +108,30 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
47
108
|
}
|
|
48
109
|
|
|
49
110
|
/**
|
|
50
|
-
*
|
|
51
|
-
*
|
|
111
|
+
* Get the model ID to use for API calls.
|
|
112
|
+
* Returns applicationInferenceProfile if set, otherwise returns this.model.
|
|
113
|
+
*/
|
|
114
|
+
protected getModelId(): string {
|
|
115
|
+
return this.applicationInferenceProfile ?? this.model;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Override invocationParams to:
|
|
120
|
+
* 1. Add cachePoint to tools when promptCache is enabled
|
|
121
|
+
* 2. Add serviceTier support
|
|
52
122
|
*
|
|
53
|
-
* STRATEGY: Separate cachePoints for core tools and MCP tools
|
|
123
|
+
* CACHING STRATEGY: Separate cachePoints for core tools and MCP tools
|
|
54
124
|
* - Core tools (web_search, execute_code, etc.) are stable → cache first
|
|
55
125
|
* - MCP tools (have '_mcp_' in name) are dynamic → cache separately after
|
|
56
126
|
* - This allows core tools to stay cached when MCP selection changes
|
|
57
127
|
*
|
|
58
128
|
* NOTE: Only Claude models support cachePoint - Nova and other models will reject it.
|
|
59
129
|
*/
|
|
60
|
-
invocationParams(
|
|
61
|
-
options?: this['ParsedCallOptions']
|
|
62
|
-
): ReturnType<ChatBedrockConverse['invocationParams']> {
|
|
130
|
+
override invocationParams(
|
|
131
|
+
options?: this['ParsedCallOptions'] & CustomChatBedrockConverseCallOptions
|
|
132
|
+
): ReturnType<ChatBedrockConverse['invocationParams']> & {
|
|
133
|
+
serviceTier?: { type: ServiceTierType };
|
|
134
|
+
} {
|
|
63
135
|
const params = super.invocationParams(options);
|
|
64
136
|
|
|
65
137
|
// Add cachePoint to tools array if promptCache is enabled and tools exist
|
|
@@ -79,8 +151,6 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
79
151
|
// MCP tools have '_mcp_' in their name (e.g., 'search_emails_mcp_Google-Workspace')
|
|
80
152
|
const coreTools: typeof params.toolConfig.tools = [];
|
|
81
153
|
const mcpTools: typeof params.toolConfig.tools = [];
|
|
82
|
-
const coreToolNames: string[] = [];
|
|
83
|
-
const mcpToolNames: string[] = [];
|
|
84
154
|
|
|
85
155
|
for (const tool of params.toolConfig.tools) {
|
|
86
156
|
// Check if tool has a name property with '_mcp_' pattern
|
|
@@ -88,10 +158,8 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
88
158
|
(tool as { toolSpec?: { name?: string } }).toolSpec?.name ?? '';
|
|
89
159
|
if (toolName.includes('_mcp_')) {
|
|
90
160
|
mcpTools.push(tool);
|
|
91
|
-
mcpToolNames.push(toolName);
|
|
92
161
|
} else {
|
|
93
162
|
coreTools.push(tool);
|
|
94
|
-
coreToolNames.push(toolName);
|
|
95
163
|
}
|
|
96
164
|
}
|
|
97
165
|
|
|
@@ -119,104 +187,158 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
119
187
|
params.toolConfig.tools = toolsWithCache;
|
|
120
188
|
}
|
|
121
189
|
|
|
122
|
-
|
|
190
|
+
// Add serviceTier support
|
|
191
|
+
const serviceTierType = options?.serviceTier ?? this.serviceTier;
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
...params,
|
|
195
|
+
serviceTier: serviceTierType ? { type: serviceTierType } : undefined,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Override _generateNonStreaming to use applicationInferenceProfile as modelId.
|
|
201
|
+
* Uses the same model-swapping pattern as streaming for consistency.
|
|
202
|
+
*/
|
|
203
|
+
override async _generateNonStreaming(
|
|
204
|
+
messages: BaseMessage[],
|
|
205
|
+
options: this['ParsedCallOptions'] & CustomChatBedrockConverseCallOptions,
|
|
206
|
+
runManager?: CallbackManagerForLLMRun
|
|
207
|
+
): Promise<ChatResult> {
|
|
208
|
+
// Temporarily swap model for applicationInferenceProfile support
|
|
209
|
+
const originalModel = this.model;
|
|
210
|
+
if (
|
|
211
|
+
this.applicationInferenceProfile != null &&
|
|
212
|
+
this.applicationInferenceProfile !== ''
|
|
213
|
+
) {
|
|
214
|
+
this.model = this.applicationInferenceProfile;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
try {
|
|
218
|
+
return await super._generateNonStreaming(messages, options, runManager);
|
|
219
|
+
} finally {
|
|
220
|
+
// Restore original model
|
|
221
|
+
this.model = originalModel;
|
|
222
|
+
}
|
|
123
223
|
}
|
|
124
224
|
|
|
125
225
|
/**
|
|
126
226
|
* Override _streamResponseChunks to:
|
|
127
|
-
* 1.
|
|
128
|
-
* 2.
|
|
227
|
+
* 1. Use applicationInferenceProfile as modelId (by temporarily swapping this.model)
|
|
228
|
+
* 2. Strip contentBlockIndex from response_metadata to prevent merge conflicts
|
|
229
|
+
* 3. Extract cacheReadInputTokens/cacheWriteInputTokens and add to usage_metadata
|
|
230
|
+
*
|
|
231
|
+
* Note: We delegate to super._streamResponseChunks() to preserve @langchain/aws's
|
|
232
|
+
* internal chunk handling which correctly preserves array content for reasoning blocks.
|
|
129
233
|
*/
|
|
130
|
-
async *_streamResponseChunks(
|
|
234
|
+
override async *_streamResponseChunks(
|
|
131
235
|
messages: BaseMessage[],
|
|
132
|
-
options: this['ParsedCallOptions'],
|
|
236
|
+
options: this['ParsedCallOptions'] & CustomChatBedrockConverseCallOptions,
|
|
133
237
|
runManager?: CallbackManagerForLLMRun
|
|
134
238
|
): AsyncGenerator<ChatGenerationChunk> {
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
if (
|
|
144
|
-
chunk.message instanceof AIMessageChunk &&
|
|
145
|
-
(chunk.message as Partial<AIMessageChunk>).response_metadata &&
|
|
146
|
-
typeof chunk.message.response_metadata === 'object'
|
|
147
|
-
) {
|
|
148
|
-
const responseMetadata = chunk.message.response_metadata as Record<
|
|
149
|
-
string,
|
|
150
|
-
unknown
|
|
151
|
-
>;
|
|
152
|
-
let needsModification = false;
|
|
153
|
-
let cleanedMetadata = responseMetadata;
|
|
154
|
-
|
|
155
|
-
// Check if contentBlockIndex exists anywhere in response_metadata
|
|
156
|
-
const hasContentBlockIndex =
|
|
157
|
-
this.hasContentBlockIndex(responseMetadata);
|
|
158
|
-
if (hasContentBlockIndex) {
|
|
159
|
-
cleanedMetadata = this.removeContentBlockIndex(
|
|
160
|
-
responseMetadata
|
|
161
|
-
) as Record<string, unknown>;
|
|
162
|
-
needsModification = true;
|
|
163
|
-
}
|
|
239
|
+
// Temporarily swap model for applicationInferenceProfile support
|
|
240
|
+
const originalModel = this.model;
|
|
241
|
+
if (
|
|
242
|
+
this.applicationInferenceProfile != null &&
|
|
243
|
+
this.applicationInferenceProfile !== ''
|
|
244
|
+
) {
|
|
245
|
+
this.model = this.applicationInferenceProfile;
|
|
246
|
+
}
|
|
164
247
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
const inputTokens = (usage.inputTokens as number | undefined) ?? 0;
|
|
183
|
-
const outputTokens = (usage.outputTokens as number | undefined) ?? 0;
|
|
184
|
-
|
|
185
|
-
if (cacheRead > 0 || cacheWrite > 0) {
|
|
186
|
-
needsModification = true;
|
|
187
|
-
enhancedUsageMetadata = {
|
|
188
|
-
input_tokens:
|
|
189
|
-
chunk.message.usage_metadata?.input_tokens ?? inputTokens,
|
|
190
|
-
output_tokens:
|
|
191
|
-
chunk.message.usage_metadata?.output_tokens ?? outputTokens,
|
|
192
|
-
total_tokens:
|
|
193
|
-
chunk.message.usage_metadata?.total_tokens ??
|
|
194
|
-
(usage.totalTokens as number | undefined) ??
|
|
195
|
-
0,
|
|
196
|
-
input_token_details: {
|
|
197
|
-
cache_read: cacheRead,
|
|
198
|
-
cache_creation: cacheWrite,
|
|
199
|
-
},
|
|
200
|
-
};
|
|
201
|
-
}
|
|
202
|
-
}
|
|
248
|
+
try {
|
|
249
|
+
// Use parent's streaming logic which correctly handles reasoning content
|
|
250
|
+
const baseStream = super._streamResponseChunks(
|
|
251
|
+
messages,
|
|
252
|
+
options,
|
|
253
|
+
runManager
|
|
254
|
+
);
|
|
255
|
+
|
|
256
|
+
for await (const chunk of baseStream) {
|
|
257
|
+
// Clean and enhance chunk
|
|
258
|
+
yield this.processChunk(chunk);
|
|
259
|
+
}
|
|
260
|
+
} finally {
|
|
261
|
+
// Restore original model
|
|
262
|
+
this.model = originalModel;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
203
265
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
266
|
+
/**
|
|
267
|
+
* Process a chunk by:
|
|
268
|
+
* 1. Removing contentBlockIndex from response_metadata
|
|
269
|
+
* 2. Extracting cache token information from Bedrock's usage data
|
|
270
|
+
*/
|
|
271
|
+
private processChunk(chunk: ChatGenerationChunk): ChatGenerationChunk {
|
|
272
|
+
const message = chunk.message;
|
|
273
|
+
if (!(message instanceof AIMessageChunk)) {
|
|
274
|
+
return chunk;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
const responseMetadata = message.response_metadata as Record<
|
|
278
|
+
string,
|
|
279
|
+
unknown
|
|
280
|
+
>;
|
|
281
|
+
let needsModification = false;
|
|
282
|
+
let cleanedMetadata = responseMetadata;
|
|
283
|
+
|
|
284
|
+
// Check if contentBlockIndex exists anywhere in response_metadata
|
|
285
|
+
const hasContentBlockIndex = this.hasContentBlockIndex(responseMetadata);
|
|
286
|
+
if (hasContentBlockIndex) {
|
|
287
|
+
cleanedMetadata = this.removeContentBlockIndex(
|
|
288
|
+
responseMetadata
|
|
289
|
+
) as Record<string, unknown>;
|
|
290
|
+
needsModification = true;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Extract cache tokens from metadata.usage (Bedrock streaming format)
|
|
294
|
+
// The metadata chunk contains usage with cacheReadInputTokens/cacheWriteInputTokens
|
|
295
|
+
const metadata = responseMetadata.metadata as
|
|
296
|
+
| Record<string, unknown>
|
|
297
|
+
| undefined;
|
|
298
|
+
const usage = (metadata?.usage ?? responseMetadata.usage) as
|
|
299
|
+
| Record<string, unknown>
|
|
300
|
+
| undefined;
|
|
301
|
+
|
|
302
|
+
let enhancedUsageMetadata: UsageMetadata | undefined =
|
|
303
|
+
message.usage_metadata;
|
|
304
|
+
|
|
305
|
+
if (usage) {
|
|
306
|
+
const cacheRead = (usage.cacheReadInputTokens as number | undefined) ?? 0;
|
|
307
|
+
const cacheWrite =
|
|
308
|
+
(usage.cacheWriteInputTokens as number | undefined) ?? 0;
|
|
309
|
+
const inputTokens = (usage.inputTokens as number | undefined) ?? 0;
|
|
310
|
+
const outputTokens = (usage.outputTokens as number | undefined) ?? 0;
|
|
311
|
+
|
|
312
|
+
if (cacheRead > 0 || cacheWrite > 0) {
|
|
313
|
+
needsModification = true;
|
|
314
|
+
enhancedUsageMetadata = {
|
|
315
|
+
input_tokens: message.usage_metadata?.input_tokens ?? inputTokens,
|
|
316
|
+
output_tokens: message.usage_metadata?.output_tokens ?? outputTokens,
|
|
317
|
+
total_tokens:
|
|
318
|
+
message.usage_metadata?.total_tokens ??
|
|
319
|
+
(usage.totalTokens as number | undefined) ??
|
|
320
|
+
0,
|
|
321
|
+
input_token_details: {
|
|
322
|
+
cache_read: cacheRead,
|
|
323
|
+
cache_creation: cacheWrite,
|
|
324
|
+
},
|
|
325
|
+
};
|
|
216
326
|
}
|
|
327
|
+
}
|
|
217
328
|
|
|
218
|
-
|
|
329
|
+
if (needsModification) {
|
|
330
|
+
return new ChatGenerationChunk({
|
|
331
|
+
text: chunk.text,
|
|
332
|
+
message: new AIMessageChunk({
|
|
333
|
+
...message,
|
|
334
|
+
response_metadata: cleanedMetadata,
|
|
335
|
+
usage_metadata: enhancedUsageMetadata,
|
|
336
|
+
}),
|
|
337
|
+
generationInfo: chunk.generationInfo,
|
|
338
|
+
});
|
|
219
339
|
}
|
|
340
|
+
|
|
341
|
+
return chunk;
|
|
220
342
|
}
|
|
221
343
|
|
|
222
344
|
/**
|