illuma-agents 1.0.16 → 1.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +3 -1
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +18 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +79 -32
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +5 -3
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +1 -0
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +10 -1
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +7 -8
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +15 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +11 -6
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/core.cjs +16 -8
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +9 -2
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/messages/tools.cjs +17 -10
- package/dist/cjs/messages/tools.cjs.map +1 -1
- package/dist/cjs/stream.cjs +30 -16
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs +209 -47
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +73 -3
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +1 -0
- package/dist/cjs/tools/handlers.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +3 -1
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/utils/contextAnalytics.cjs +66 -0
- package/dist/cjs/utils/contextAnalytics.cjs.map +1 -0
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/toonFormat.cjs +388 -0
- package/dist/cjs/utils/toonFormat.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +3 -1
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +19 -1
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +81 -34
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +5 -3
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +1 -0
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +10 -1
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +7 -8
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +4 -2
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +11 -6
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/core.mjs +18 -10
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +10 -3
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/messages/tools.mjs +19 -12
- package/dist/esm/messages/tools.mjs.map +1 -1
- package/dist/esm/stream.mjs +30 -16
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/ProgrammaticToolCalling.mjs +208 -48
- package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +73 -3
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +1 -0
- package/dist/esm/tools/handlers.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +3 -1
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/utils/contextAnalytics.mjs +64 -0
- package/dist/esm/utils/contextAnalytics.mjs.map +1 -0
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/toonFormat.mjs +381 -0
- package/dist/esm/utils/toonFormat.mjs.map +1 -0
- package/dist/types/common/enum.d.ts +17 -0
- package/dist/types/graphs/Graph.d.ts +8 -0
- package/dist/types/tools/ProgrammaticToolCalling.d.ts +19 -0
- package/dist/types/types/tools.d.ts +3 -1
- package/dist/types/utils/contextAnalytics.d.ts +37 -0
- package/dist/types/utils/index.d.ts +2 -0
- package/dist/types/utils/toonFormat.d.ts +111 -0
- package/package.json +3 -2
- package/src/agents/AgentContext.ts +28 -20
- package/src/common/enum.ts +18 -0
- package/src/graphs/Graph.ts +152 -62
- package/src/llm/bedrock/__tests__/bedrock-caching.test.ts +495 -473
- package/src/llm/bedrock/index.ts +47 -35
- package/src/llm/openrouter/index.ts +11 -1
- package/src/llm/vertexai/index.ts +9 -10
- package/src/messages/cache.ts +104 -55
- package/src/messages/core.ts +29 -19
- package/src/messages/format.ts +14 -3
- package/src/messages/tools.ts +20 -13
- package/src/scripts/simple.ts +1 -1
- package/src/specs/emergency-prune.test.ts +407 -355
- package/src/stream.ts +28 -20
- package/src/tools/ProgrammaticToolCalling.ts +246 -52
- package/src/tools/ToolNode.ts +78 -5
- package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +155 -0
- package/src/tools/search/jina-reranker.test.ts +32 -28
- package/src/tools/search/search.ts +3 -1
- package/src/tools/search/tool.ts +16 -7
- package/src/types/tools.ts +3 -1
- package/src/utils/contextAnalytics.ts +103 -0
- package/src/utils/index.ts +2 -0
- package/src/utils/llmConfig.ts +8 -1
- package/src/utils/run.ts +5 -4
- package/src/utils/toonFormat.ts +475 -0
package/src/llm/bedrock/index.ts
CHANGED
|
@@ -29,7 +29,8 @@ import { ChatGenerationChunk } from '@langchain/core/outputs';
|
|
|
29
29
|
import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
|
|
30
30
|
|
|
31
31
|
/** Extended input type with promptCache option */
|
|
32
|
-
export interface CustomChatBedrockConverseInput
|
|
32
|
+
export interface CustomChatBedrockConverseInput
|
|
33
|
+
extends ChatBedrockConverseInput {
|
|
33
34
|
promptCache?: boolean;
|
|
34
35
|
}
|
|
35
36
|
|
|
@@ -48,12 +49,12 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
48
49
|
/**
|
|
49
50
|
* Override invocationParams to add cachePoint to tools when promptCache is enabled.
|
|
50
51
|
* This enables Bedrock prompt caching for tool definitions.
|
|
51
|
-
*
|
|
52
|
+
*
|
|
52
53
|
* STRATEGY: Separate cachePoints for core tools and MCP tools
|
|
53
54
|
* - Core tools (web_search, execute_code, etc.) are stable → cache first
|
|
54
55
|
* - MCP tools (have '_mcp_' in name) are dynamic → cache separately after
|
|
55
56
|
* - This allows core tools to stay cached when MCP selection changes
|
|
56
|
-
*
|
|
57
|
+
*
|
|
57
58
|
* NOTE: Only Claude models support cachePoint - Nova and other models will reject it.
|
|
58
59
|
*/
|
|
59
60
|
invocationParams(
|
|
@@ -63,8 +64,9 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
63
64
|
|
|
64
65
|
// Add cachePoint to tools array if promptCache is enabled and tools exist
|
|
65
66
|
// Only Claude models support cachePoint - check model name
|
|
66
|
-
const modelId = this.model
|
|
67
|
-
const isClaudeModel =
|
|
67
|
+
const modelId = this.model.toLowerCase();
|
|
68
|
+
const isClaudeModel =
|
|
69
|
+
modelId.includes('claude') || modelId.includes('anthropic');
|
|
68
70
|
|
|
69
71
|
if (
|
|
70
72
|
this.promptCache &&
|
|
@@ -79,10 +81,11 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
79
81
|
const mcpTools: typeof params.toolConfig.tools = [];
|
|
80
82
|
const coreToolNames: string[] = [];
|
|
81
83
|
const mcpToolNames: string[] = [];
|
|
82
|
-
|
|
84
|
+
|
|
83
85
|
for (const tool of params.toolConfig.tools) {
|
|
84
86
|
// Check if tool has a name property with '_mcp_' pattern
|
|
85
|
-
const toolName =
|
|
87
|
+
const toolName =
|
|
88
|
+
(tool as { toolSpec?: { name?: string } }).toolSpec?.name ?? '';
|
|
86
89
|
if (toolName.includes('_mcp_')) {
|
|
87
90
|
mcpTools.push(tool);
|
|
88
91
|
mcpToolNames.push(toolName);
|
|
@@ -92,35 +95,27 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
92
95
|
}
|
|
93
96
|
}
|
|
94
97
|
|
|
95
|
-
|
|
96
|
-
|
|
97
98
|
// Build tools array with strategic cachePoints:
|
|
98
99
|
// [CoreTool1, CoreTool2, cachePoint] + [MCPTool1, MCPTool2, cachePoint]
|
|
99
100
|
const toolsWithCache: typeof params.toolConfig.tools = [];
|
|
100
|
-
|
|
101
|
-
|
|
101
|
+
|
|
102
102
|
// Add core tools with cachePoint (if any)
|
|
103
103
|
if (coreTools.length > 0) {
|
|
104
104
|
toolsWithCache.push(...coreTools);
|
|
105
105
|
toolsWithCache.push({ cachePoint: { type: 'default' } });
|
|
106
|
-
cachePointCount++;
|
|
107
106
|
}
|
|
108
|
-
|
|
107
|
+
|
|
109
108
|
// Add MCP tools with their own cachePoint (if any)
|
|
110
109
|
if (mcpTools.length > 0) {
|
|
111
110
|
toolsWithCache.push(...mcpTools);
|
|
112
111
|
toolsWithCache.push({ cachePoint: { type: 'default' } });
|
|
113
|
-
cachePointCount++;
|
|
114
112
|
}
|
|
115
|
-
|
|
113
|
+
|
|
116
114
|
// If no tools at all (shouldn't happen but safety check)
|
|
117
115
|
if (toolsWithCache.length === 0) {
|
|
118
116
|
toolsWithCache.push({ cachePoint: { type: 'default' } });
|
|
119
|
-
cachePointCount++;
|
|
120
117
|
}
|
|
121
|
-
|
|
122
118
|
|
|
123
|
-
|
|
124
119
|
params.toolConfig.tools = toolsWithCache;
|
|
125
120
|
}
|
|
126
121
|
|
|
@@ -150,37 +145,54 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
150
145
|
(chunk.message as Partial<AIMessageChunk>).response_metadata &&
|
|
151
146
|
typeof chunk.message.response_metadata === 'object'
|
|
152
147
|
) {
|
|
153
|
-
const responseMetadata = chunk.message.response_metadata as Record<
|
|
148
|
+
const responseMetadata = chunk.message.response_metadata as Record<
|
|
149
|
+
string,
|
|
150
|
+
unknown
|
|
151
|
+
>;
|
|
154
152
|
let needsModification = false;
|
|
155
153
|
let cleanedMetadata = responseMetadata;
|
|
156
154
|
|
|
157
155
|
// Check if contentBlockIndex exists anywhere in response_metadata
|
|
158
|
-
const hasContentBlockIndex =
|
|
156
|
+
const hasContentBlockIndex =
|
|
157
|
+
this.hasContentBlockIndex(responseMetadata);
|
|
159
158
|
if (hasContentBlockIndex) {
|
|
160
|
-
cleanedMetadata = this.removeContentBlockIndex(
|
|
159
|
+
cleanedMetadata = this.removeContentBlockIndex(
|
|
160
|
+
responseMetadata
|
|
161
|
+
) as Record<string, unknown>;
|
|
161
162
|
needsModification = true;
|
|
162
163
|
}
|
|
163
164
|
|
|
164
165
|
// Extract cache tokens from metadata.usage (Bedrock streaming format)
|
|
165
166
|
// The metadata chunk contains usage with cacheReadInputTokens/cacheWriteInputTokens
|
|
166
|
-
const metadata = responseMetadata.metadata as
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
167
|
+
const metadata = responseMetadata.metadata as
|
|
168
|
+
| Record<string, unknown>
|
|
169
|
+
| undefined;
|
|
170
|
+
const usage = (metadata?.usage ?? responseMetadata.usage) as
|
|
171
|
+
| Record<string, unknown>
|
|
172
|
+
| undefined;
|
|
173
|
+
|
|
174
|
+
let enhancedUsageMetadata: UsageMetadata | undefined =
|
|
175
|
+
chunk.message.usage_metadata;
|
|
176
|
+
|
|
171
177
|
if (usage) {
|
|
172
|
-
const cacheRead =
|
|
173
|
-
|
|
174
|
-
const
|
|
175
|
-
|
|
176
|
-
|
|
178
|
+
const cacheRead =
|
|
179
|
+
(usage.cacheReadInputTokens as number | undefined) ?? 0;
|
|
180
|
+
const cacheWrite =
|
|
181
|
+
(usage.cacheWriteInputTokens as number | undefined) ?? 0;
|
|
182
|
+
const inputTokens = (usage.inputTokens as number | undefined) ?? 0;
|
|
183
|
+
const outputTokens = (usage.outputTokens as number | undefined) ?? 0;
|
|
184
|
+
|
|
177
185
|
if (cacheRead > 0 || cacheWrite > 0) {
|
|
178
|
-
|
|
179
186
|
needsModification = true;
|
|
180
187
|
enhancedUsageMetadata = {
|
|
181
|
-
input_tokens:
|
|
182
|
-
|
|
183
|
-
|
|
188
|
+
input_tokens:
|
|
189
|
+
chunk.message.usage_metadata?.input_tokens ?? inputTokens,
|
|
190
|
+
output_tokens:
|
|
191
|
+
chunk.message.usage_metadata?.output_tokens ?? outputTokens,
|
|
192
|
+
total_tokens:
|
|
193
|
+
chunk.message.usage_metadata?.total_tokens ??
|
|
194
|
+
(usage.totalTokens as number | undefined) ??
|
|
195
|
+
0,
|
|
184
196
|
input_token_details: {
|
|
185
197
|
cache_read: cacheRead,
|
|
186
198
|
cache_creation: cacheWrite,
|
|
@@ -128,6 +128,8 @@ export class ChatOpenRouter extends ChatOpenAI {
|
|
|
128
128
|
// Accumulate reasoning_details from each delta
|
|
129
129
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
130
130
|
const deltaAny = delta as Record<string, any>;
|
|
131
|
+
// Extract current chunk's reasoning text for streaming (before accumulation)
|
|
132
|
+
let currentChunkReasoningText = '';
|
|
131
133
|
if (
|
|
132
134
|
deltaAny.reasoning_details != null &&
|
|
133
135
|
Array.isArray(deltaAny.reasoning_details)
|
|
@@ -143,7 +145,9 @@ export class ChatOpenRouter extends ChatOpenAI {
|
|
|
143
145
|
index: detail.index,
|
|
144
146
|
});
|
|
145
147
|
} else if (detail.type === 'reasoning.text') {
|
|
146
|
-
//
|
|
148
|
+
// Extract current chunk's text for streaming
|
|
149
|
+
currentChunkReasoningText += detail.text || '';
|
|
150
|
+
// For text reasoning, accumulate text by index for final message
|
|
147
151
|
const idx = detail.index ?? 0;
|
|
148
152
|
const existing = reasoningTextByIndex.get(idx);
|
|
149
153
|
if (existing) {
|
|
@@ -167,6 +171,12 @@ export class ChatOpenRouter extends ChatOpenAI {
|
|
|
167
171
|
defaultRole
|
|
168
172
|
);
|
|
169
173
|
|
|
174
|
+
// For models that send reasoning_details (Gemini style) instead of reasoning (DeepSeek style),
|
|
175
|
+
// set the current chunk's reasoning text to additional_kwargs.reasoning for streaming
|
|
176
|
+
if (currentChunkReasoningText && !chunk.additional_kwargs.reasoning) {
|
|
177
|
+
chunk.additional_kwargs.reasoning = currentChunkReasoningText;
|
|
178
|
+
}
|
|
179
|
+
|
|
170
180
|
// IMPORTANT: Only set reasoning_details on the FINAL chunk to prevent
|
|
171
181
|
// LangChain's chunk concatenation from corrupting the array
|
|
172
182
|
// Check if this is the final chunk (has finish_reason)
|
|
@@ -17,11 +17,14 @@ class CustomChatConnection extends ChatConnection<VertexAIClientOptions> {
|
|
|
17
17
|
input,
|
|
18
18
|
parameters
|
|
19
19
|
)) as GeminiRequest;
|
|
20
|
-
if (
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
if (formattedData.generationConfig?.thinkingConfig?.thinkingBudget === -1) {
|
|
21
|
+
// -1 means "let the model decide" - delete the property so the API doesn't receive an invalid value
|
|
22
|
+
if (
|
|
23
|
+
formattedData.generationConfig.thinkingConfig.includeThoughts === false
|
|
24
|
+
) {
|
|
25
|
+
formattedData.generationConfig.thinkingConfig.includeThoughts = true;
|
|
26
|
+
}
|
|
27
|
+
delete formattedData.generationConfig.thinkingConfig.thinkingBudget;
|
|
25
28
|
}
|
|
26
29
|
return formattedData;
|
|
27
30
|
}
|
|
@@ -318,11 +321,7 @@ export class ChatVertexAI extends ChatGoogle {
|
|
|
318
321
|
}
|
|
319
322
|
|
|
320
323
|
constructor(fields?: VertexAIClientOptions) {
|
|
321
|
-
|
|
322
|
-
if (fields?.thinkingBudget === -1) {
|
|
323
|
-
dynamicThinkingBudget = true;
|
|
324
|
-
fields.thinkingBudget = 1;
|
|
325
|
-
}
|
|
324
|
+
const dynamicThinkingBudget = fields?.thinkingBudget === -1;
|
|
326
325
|
super({
|
|
327
326
|
...fields,
|
|
328
327
|
platformType: 'gcp',
|
package/src/messages/cache.ts
CHANGED
|
@@ -8,10 +8,13 @@ type MessageWithContent = {
|
|
|
8
8
|
};
|
|
9
9
|
|
|
10
10
|
/** Debug logger for cache operations - set ILLUMA_DEBUG_CACHE=true to enable */
|
|
11
|
-
const debugCache = (message: string, data?: unknown) => {
|
|
11
|
+
const debugCache = (message: string, data?: unknown): void => {
|
|
12
12
|
if (process.env.ILLUMA_DEBUG_CACHE === 'true') {
|
|
13
13
|
// eslint-disable-next-line no-console
|
|
14
|
-
console.log(
|
|
14
|
+
console.log(
|
|
15
|
+
`[Cache] ${message}`,
|
|
16
|
+
data !== undefined ? JSON.stringify(data, null, 2) : ''
|
|
17
|
+
);
|
|
15
18
|
}
|
|
16
19
|
};
|
|
17
20
|
|
|
@@ -148,26 +151,26 @@ export function stripBedrockCacheControl<T extends MessageWithContent>(
|
|
|
148
151
|
|
|
149
152
|
/**
|
|
150
153
|
* Adds Bedrock Converse API cache points using "Stable Prefix Caching" strategy.
|
|
151
|
-
*
|
|
154
|
+
*
|
|
152
155
|
* STRATEGY: Place cache point after the LAST ASSISTANT message only.
|
|
153
156
|
* This ensures the prefix (everything before the cache point) remains STABLE
|
|
154
157
|
* as the conversation grows, maximizing cache hits.
|
|
155
|
-
*
|
|
158
|
+
*
|
|
156
159
|
* Why this works:
|
|
157
160
|
* - System message has its own cachePoint (added in AgentContext)
|
|
158
161
|
* - Tools have their own cachePoint (added in CustomChatBedrockConverse)
|
|
159
162
|
* - Conversation history grows, but the PREFIX stays the same
|
|
160
163
|
* - Only the NEW user message is uncached (it's always different)
|
|
161
|
-
*
|
|
164
|
+
*
|
|
162
165
|
* Example conversation flow:
|
|
163
166
|
* Request 1: [System+cachePoint][Tools+cachePoint][User1] → No conversation cache yet
|
|
164
167
|
* Request 2: [System][Tools][User1][Assistant1+cachePoint][User2] → Cache User1+Assistant1
|
|
165
168
|
* Request 3: [System][Tools][User1][Assistant1][User2][Assistant2+cachePoint][User3]
|
|
166
169
|
* → Cache reads User1+A1+User2+A2, cache writes new portion
|
|
167
|
-
*
|
|
170
|
+
*
|
|
168
171
|
* Claude's "Simplified Cache Management" automatically looks back up to 20 content
|
|
169
172
|
* blocks from the cache checkpoint to find the longest matching prefix.
|
|
170
|
-
*
|
|
173
|
+
*
|
|
171
174
|
* @param messages - The array of message objects (excluding system message).
|
|
172
175
|
* @returns - The updated array with a single cache point after the last assistant message.
|
|
173
176
|
*/
|
|
@@ -175,16 +178,21 @@ export function addBedrockCacheControl<
|
|
|
175
178
|
T extends Partial<BaseMessage> & MessageWithContent,
|
|
176
179
|
>(messages: T[]): T[] {
|
|
177
180
|
if (!Array.isArray(messages) || messages.length < 1) {
|
|
178
|
-
debugCache('addBedrockCacheControl: Skipping - no messages', {
|
|
181
|
+
debugCache('addBedrockCacheControl: Skipping - no messages', {
|
|
182
|
+
count: messages.length,
|
|
183
|
+
});
|
|
179
184
|
return messages;
|
|
180
185
|
}
|
|
181
186
|
|
|
182
|
-
debugCache(
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
187
|
+
debugCache(
|
|
188
|
+
'addBedrockCacheControl: Processing messages with stable prefix strategy',
|
|
189
|
+
{
|
|
190
|
+
count: messages.length,
|
|
191
|
+
}
|
|
192
|
+
);
|
|
193
|
+
|
|
186
194
|
const updatedMessages: T[] = messages.slice();
|
|
187
|
-
|
|
195
|
+
|
|
188
196
|
// First pass: Remove ALL existing cache points to ensure clean state
|
|
189
197
|
// This prevents accumulation of stale cache points
|
|
190
198
|
for (const message of updatedMessages) {
|
|
@@ -214,7 +222,7 @@ export function addBedrockCacheControl<
|
|
|
214
222
|
const type = (block as { type?: string }).type;
|
|
215
223
|
// Check for all reasoning/thinking block types:
|
|
216
224
|
// - reasoning_content: Bedrock Anthropic extended thinking
|
|
217
|
-
// - reasoning: Generic reasoning format
|
|
225
|
+
// - reasoning: Generic reasoning format
|
|
218
226
|
// - thinking: Anthropic direct API thinking
|
|
219
227
|
// - redacted_thinking: Anthropic redacted thinking blocks
|
|
220
228
|
if (
|
|
@@ -233,27 +241,32 @@ export function addBedrockCacheControl<
|
|
|
233
241
|
// Messages with reasoning/thinking blocks cannot have cache points after them (Bedrock limitation)
|
|
234
242
|
let lastAssistantIndex = -1;
|
|
235
243
|
let skippedWithReasoning = 0;
|
|
236
|
-
|
|
244
|
+
|
|
237
245
|
// Count message types for logging
|
|
238
246
|
const messageTypes: Record<string, number> = {};
|
|
239
247
|
for (const message of updatedMessages) {
|
|
240
|
-
const msgType =
|
|
241
|
-
|
|
242
|
-
|
|
248
|
+
const msgType =
|
|
249
|
+
'getType' in message && typeof message.getType === 'function'
|
|
250
|
+
? message.getType()
|
|
251
|
+
: 'unknown';
|
|
243
252
|
messageTypes[msgType] = (messageTypes[msgType] || 0) + 1;
|
|
244
253
|
}
|
|
245
|
-
|
|
254
|
+
|
|
246
255
|
for (let i = updatedMessages.length - 1; i >= 0; i--) {
|
|
247
256
|
const message = updatedMessages[i];
|
|
248
|
-
const messageType =
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
257
|
+
const messageType =
|
|
258
|
+
'getType' in message && typeof message.getType === 'function'
|
|
259
|
+
? message.getType()
|
|
260
|
+
: 'unknown';
|
|
261
|
+
|
|
252
262
|
if (messageType === 'ai') {
|
|
253
263
|
// Skip assistant messages with reasoning blocks - cache points not allowed after them
|
|
254
264
|
if (hasReasoningBlock(message)) {
|
|
255
265
|
skippedWithReasoning++;
|
|
256
|
-
debugCache(
|
|
266
|
+
debugCache(
|
|
267
|
+
'addBedrockCacheControl: Skipping assistant message with reasoning block',
|
|
268
|
+
{ index: i }
|
|
269
|
+
);
|
|
257
270
|
continue;
|
|
258
271
|
}
|
|
259
272
|
lastAssistantIndex = i;
|
|
@@ -262,12 +275,20 @@ export function addBedrockCacheControl<
|
|
|
262
275
|
}
|
|
263
276
|
|
|
264
277
|
// Log message summary
|
|
265
|
-
debugCache(
|
|
278
|
+
debugCache(
|
|
279
|
+
`📨 Messages | total=${updatedMessages.length} | ${Object.entries(
|
|
280
|
+
messageTypes
|
|
281
|
+
)
|
|
282
|
+
.map(([k, v]) => `${k}:${v}`)
|
|
283
|
+
.join(' ')} | skippedReasoning=${skippedWithReasoning}`
|
|
284
|
+
);
|
|
266
285
|
|
|
267
286
|
// If no suitable assistant message found, skip conversation caching
|
|
268
287
|
// (System and Tools caching are still handled separately)
|
|
269
288
|
if (lastAssistantIndex === -1) {
|
|
270
|
-
debugCache(
|
|
289
|
+
debugCache(
|
|
290
|
+
'📨 Messages | No suitable assistant message for cachePoint (first turn or all have reasoning)'
|
|
291
|
+
);
|
|
271
292
|
return updatedMessages;
|
|
272
293
|
}
|
|
273
294
|
|
|
@@ -280,22 +301,35 @@ export function addBedrockCacheControl<
|
|
|
280
301
|
{ type: ContentTypes.TEXT, text: content },
|
|
281
302
|
{ cachePoint: { type: 'default' } },
|
|
282
303
|
] as MessageContentComplex[];
|
|
283
|
-
debugCache(
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
304
|
+
debugCache(
|
|
305
|
+
`📍 Message cachePoint at index ${lastAssistantIndex} (string, ${content.length} chars)`
|
|
306
|
+
);
|
|
307
|
+
debugCache(
|
|
308
|
+
'addBedrockCacheControl: Added cachePoint to assistant message (string content)',
|
|
309
|
+
{
|
|
310
|
+
index: lastAssistantIndex,
|
|
311
|
+
contentLength: content.length,
|
|
312
|
+
}
|
|
313
|
+
);
|
|
314
|
+
} else if (
|
|
315
|
+
Array.isArray(assistantMessage.content) &&
|
|
316
|
+
assistantMessage.content.length > 0
|
|
317
|
+
) {
|
|
289
318
|
// Double-check: If this message has reasoning blocks, skip adding cache point entirely
|
|
290
319
|
// This handles edge cases where the initial skip check might have missed it
|
|
291
320
|
if (hasReasoningBlock(assistantMessage)) {
|
|
292
|
-
debugCache(
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
321
|
+
debugCache(
|
|
322
|
+
`⚠️ Message cachePoint SKIPPED at index ${lastAssistantIndex} (has reasoning blocks)`
|
|
323
|
+
);
|
|
324
|
+
debugCache(
|
|
325
|
+
'addBedrockCacheControl: Skipping - assistant message has reasoning blocks (safety check)',
|
|
326
|
+
{
|
|
327
|
+
index: lastAssistantIndex,
|
|
328
|
+
}
|
|
329
|
+
);
|
|
296
330
|
return updatedMessages;
|
|
297
331
|
}
|
|
298
|
-
|
|
332
|
+
|
|
299
333
|
// Find the last text block and insert cache point after it
|
|
300
334
|
let inserted = false;
|
|
301
335
|
for (let j = assistantMessage.content.length - 1; j >= 0; j--) {
|
|
@@ -303,37 +337,52 @@ export function addBedrockCacheControl<
|
|
|
303
337
|
const type = (block as { type?: string }).type;
|
|
304
338
|
if (type === ContentTypes.TEXT || type === 'text') {
|
|
305
339
|
const text = (block as { text?: string }).text;
|
|
306
|
-
if (text && text !== '') {
|
|
340
|
+
if (text != null && text !== '') {
|
|
307
341
|
assistantMessage.content.splice(j + 1, 0, {
|
|
308
342
|
cachePoint: { type: 'default' },
|
|
309
343
|
} as MessageContentComplex);
|
|
310
344
|
inserted = true;
|
|
311
|
-
debugCache(
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
345
|
+
debugCache(
|
|
346
|
+
`📍 Message cachePoint at index ${lastAssistantIndex} (array, block ${j}, ${text.length} chars)`
|
|
347
|
+
);
|
|
348
|
+
debugCache(
|
|
349
|
+
'addBedrockCacheControl: Added cachePoint after text block in assistant message',
|
|
350
|
+
{
|
|
351
|
+
index: lastAssistantIndex,
|
|
352
|
+
textBlockIndex: j,
|
|
353
|
+
contentLength: text.length,
|
|
354
|
+
}
|
|
355
|
+
);
|
|
317
356
|
break;
|
|
318
357
|
}
|
|
319
358
|
}
|
|
320
359
|
}
|
|
321
|
-
|
|
360
|
+
|
|
322
361
|
// If no text block found, don't append cache point as the message structure is unexpected
|
|
323
362
|
if (!inserted) {
|
|
324
|
-
const contentTypes = assistantMessage.content.map(
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
contentTypes,
|
|
329
|
-
|
|
363
|
+
const contentTypes = assistantMessage.content.map(
|
|
364
|
+
(b) => (b as { type?: string }).type
|
|
365
|
+
);
|
|
366
|
+
debugCache(
|
|
367
|
+
`⚠️ Message cachePoint SKIPPED at index ${lastAssistantIndex} (no text block, types: ${contentTypes.join(',')})`
|
|
368
|
+
);
|
|
369
|
+
debugCache(
|
|
370
|
+
'addBedrockCacheControl: No suitable text block found, skipping cache point',
|
|
371
|
+
{
|
|
372
|
+
index: lastAssistantIndex,
|
|
373
|
+
contentTypes,
|
|
374
|
+
}
|
|
375
|
+
);
|
|
330
376
|
}
|
|
331
377
|
}
|
|
332
378
|
|
|
333
|
-
debugCache(
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
379
|
+
debugCache(
|
|
380
|
+
'addBedrockCacheControl: Complete - stable prefix caching applied',
|
|
381
|
+
{
|
|
382
|
+
lastAssistantIndex,
|
|
383
|
+
totalMessages: updatedMessages.length,
|
|
384
|
+
}
|
|
385
|
+
);
|
|
337
386
|
|
|
338
387
|
return updatedMessages;
|
|
339
388
|
}
|
package/src/messages/core.ts
CHANGED
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
} from '@langchain/core/messages';
|
|
9
9
|
import type { ToolCall } from '@langchain/core/messages/tool';
|
|
10
10
|
import type * as t from '@/types';
|
|
11
|
-
import { Providers } from '@/common';
|
|
11
|
+
import { Providers, MessageTypes } from '@/common';
|
|
12
12
|
|
|
13
13
|
export function getConverseOverrideMessage({
|
|
14
14
|
userMessage,
|
|
@@ -346,7 +346,9 @@ export function convertMessagesToContent(
|
|
|
346
346
|
|
|
347
347
|
export function formatAnthropicArtifactContent(messages: BaseMessage[]): void {
|
|
348
348
|
const lastMessage = messages[messages.length - 1];
|
|
349
|
-
|
|
349
|
+
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
350
|
+
if (lastMessage.getType() !== 'tool') return;
|
|
351
|
+
const lastToolMessage = lastMessage as ToolMessage;
|
|
350
352
|
|
|
351
353
|
// Find the latest AIMessage with tool_calls that this tool message belongs to
|
|
352
354
|
const latestAIParentIndex = findLastIndex(
|
|
@@ -354,20 +356,21 @@ export function formatAnthropicArtifactContent(messages: BaseMessage[]): void {
|
|
|
354
356
|
(msg) =>
|
|
355
357
|
(msg instanceof AIMessageChunk &&
|
|
356
358
|
(msg.tool_calls?.length ?? 0) > 0 &&
|
|
357
|
-
msg.tool_calls?.some((tc) => tc.id ===
|
|
359
|
+
msg.tool_calls?.some((tc) => tc.id === lastToolMessage.tool_call_id)) ??
|
|
358
360
|
false
|
|
359
361
|
);
|
|
360
362
|
|
|
361
363
|
if (latestAIParentIndex === -1) return;
|
|
362
364
|
|
|
363
365
|
// Check if any tool message after the AI message has array artifact content
|
|
366
|
+
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
364
367
|
const hasArtifactContent = messages.some(
|
|
365
368
|
(msg, i) =>
|
|
366
369
|
i > latestAIParentIndex &&
|
|
367
|
-
msg
|
|
368
|
-
msg.artifact != null &&
|
|
369
|
-
msg.artifact?.content != null &&
|
|
370
|
-
Array.isArray(msg.artifact.content)
|
|
370
|
+
msg.getType() === MessageTypes.TOOL &&
|
|
371
|
+
(msg as ToolMessage).artifact != null &&
|
|
372
|
+
(msg as ToolMessage).artifact?.content != null &&
|
|
373
|
+
Array.isArray((msg as ToolMessage).artifact.content)
|
|
371
374
|
);
|
|
372
375
|
|
|
373
376
|
if (!hasArtifactContent) return;
|
|
@@ -377,21 +380,26 @@ export function formatAnthropicArtifactContent(messages: BaseMessage[]): void {
|
|
|
377
380
|
|
|
378
381
|
for (let j = latestAIParentIndex + 1; j < messages.length; j++) {
|
|
379
382
|
const msg = messages[j];
|
|
383
|
+
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
380
384
|
if (
|
|
381
|
-
msg
|
|
382
|
-
toolCallIds.includes(msg.tool_call_id) &&
|
|
383
|
-
msg.artifact != null &&
|
|
384
|
-
Array.isArray(msg.artifact?.content) &&
|
|
385
|
+
msg.getType() === MessageTypes.TOOL &&
|
|
386
|
+
toolCallIds.includes((msg as ToolMessage).tool_call_id) &&
|
|
387
|
+
(msg as ToolMessage).artifact != null &&
|
|
388
|
+
Array.isArray((msg as ToolMessage).artifact?.content) &&
|
|
385
389
|
Array.isArray(msg.content)
|
|
386
390
|
) {
|
|
387
|
-
msg.content = msg.content.concat(
|
|
391
|
+
msg.content = (msg.content as t.MessageContentComplex[]).concat(
|
|
392
|
+
(msg as ToolMessage).artifact.content
|
|
393
|
+
);
|
|
388
394
|
}
|
|
389
395
|
}
|
|
390
396
|
}
|
|
391
397
|
|
|
392
398
|
export function formatArtifactPayload(messages: BaseMessage[]): void {
|
|
393
399
|
const lastMessageY = messages[messages.length - 1];
|
|
394
|
-
|
|
400
|
+
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
401
|
+
if (lastMessageY.getType() !== 'tool') return;
|
|
402
|
+
const lastToolMessage = lastMessageY as ToolMessage;
|
|
395
403
|
|
|
396
404
|
// Find the latest AIMessage with tool_calls that this tool message belongs to
|
|
397
405
|
const latestAIParentIndex = findLastIndex(
|
|
@@ -399,28 +407,30 @@ export function formatArtifactPayload(messages: BaseMessage[]): void {
|
|
|
399
407
|
(msg) =>
|
|
400
408
|
(msg instanceof AIMessageChunk &&
|
|
401
409
|
(msg.tool_calls?.length ?? 0) > 0 &&
|
|
402
|
-
msg.tool_calls?.some((tc) => tc.id ===
|
|
410
|
+
msg.tool_calls?.some((tc) => tc.id === lastToolMessage.tool_call_id)) ??
|
|
403
411
|
false
|
|
404
412
|
);
|
|
405
413
|
|
|
406
414
|
if (latestAIParentIndex === -1) return;
|
|
407
415
|
|
|
408
416
|
// Check if any tool message after the AI message has array artifact content
|
|
417
|
+
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
409
418
|
const hasArtifactContent = messages.some(
|
|
410
419
|
(msg, i) =>
|
|
411
420
|
i > latestAIParentIndex &&
|
|
412
|
-
msg
|
|
413
|
-
msg.artifact != null &&
|
|
414
|
-
msg.artifact?.content != null &&
|
|
415
|
-
Array.isArray(msg.artifact.content)
|
|
421
|
+
msg.getType() === MessageTypes.TOOL &&
|
|
422
|
+
(msg as ToolMessage).artifact != null &&
|
|
423
|
+
(msg as ToolMessage).artifact?.content != null &&
|
|
424
|
+
Array.isArray((msg as ToolMessage).artifact.content)
|
|
416
425
|
);
|
|
417
426
|
|
|
418
427
|
if (!hasArtifactContent) return;
|
|
419
428
|
|
|
420
429
|
// Collect all relevant tool messages and their artifacts
|
|
430
|
+
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
421
431
|
const relevantMessages = messages
|
|
422
432
|
.slice(latestAIParentIndex + 1)
|
|
423
|
-
.filter((msg) => msg
|
|
433
|
+
.filter((msg) => msg.getType() === MessageTypes.TOOL) as ToolMessage[];
|
|
424
434
|
|
|
425
435
|
// Aggregate all content and artifacts
|
|
426
436
|
const aggregatedContent: t.MessageContentComplex[] = [];
|
package/src/messages/format.ts
CHANGED
|
@@ -19,7 +19,8 @@ import type {
|
|
|
19
19
|
TPayload,
|
|
20
20
|
TMessage,
|
|
21
21
|
} from '@/types';
|
|
22
|
-
import { Providers, ContentTypes } from '@/common';
|
|
22
|
+
import { Providers, ContentTypes, MessageTypes } from '@/common';
|
|
23
|
+
import { processToolOutput } from '@/utils/toonFormat';
|
|
23
24
|
|
|
24
25
|
interface MediaMessageParams {
|
|
25
26
|
message: {
|
|
@@ -359,11 +360,16 @@ function formatAssistantMessage(
|
|
|
359
360
|
}
|
|
360
361
|
lastAIMessage.tool_calls.push(tool_call as ToolCall);
|
|
361
362
|
|
|
363
|
+
// Apply TOON compression to historical tool outputs for context efficiency
|
|
364
|
+
// processToolOutput handles: JSON→TOON conversion, already-TOON detection (skip), truncation
|
|
365
|
+
const processedOutput =
|
|
366
|
+
output != null ? processToolOutput(output).content : '';
|
|
367
|
+
|
|
362
368
|
formattedMessages.push(
|
|
363
369
|
new ToolMessage({
|
|
364
370
|
tool_call_id: tool_call.id ?? '',
|
|
365
371
|
name: tool_call.name,
|
|
366
|
-
content:
|
|
372
|
+
content: processedOutput,
|
|
367
373
|
})
|
|
368
374
|
);
|
|
369
375
|
} else if (part.type === ContentTypes.THINK) {
|
|
@@ -898,7 +904,12 @@ export function ensureThinkingBlockInMessages(
|
|
|
898
904
|
let j = i + 1;
|
|
899
905
|
|
|
900
906
|
// Look ahead for tool messages that belong to this AI message
|
|
901
|
-
|
|
907
|
+
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
908
|
+
// where different copies of ToolMessage class might be loaded
|
|
909
|
+
while (
|
|
910
|
+
j < messages.length &&
|
|
911
|
+
messages[j].getType() === MessageTypes.TOOL
|
|
912
|
+
) {
|
|
902
913
|
toolSequence.push(messages[j]);
|
|
903
914
|
j++;
|
|
904
915
|
}
|