illuma-agents 1.0.17 → 1.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +3 -1
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +18 -9
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +5 -3
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +10 -1
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +7 -8
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +2 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +11 -6
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/core.cjs +2 -2
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +2 -1
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/messages/tools.cjs +2 -2
- package/dist/cjs/messages/tools.cjs.map +1 -1
- package/dist/cjs/stream.cjs +29 -16
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs +209 -47
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +1 -1
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +3 -1
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/utils/contextAnalytics.cjs +7 -5
- package/dist/cjs/utils/contextAnalytics.cjs.map +1 -1
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/toonFormat.cjs +42 -12
- package/dist/cjs/utils/toonFormat.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +3 -1
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +18 -9
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +5 -3
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +10 -1
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +7 -8
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +1 -1
- package/dist/esm/messages/cache.mjs +11 -6
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/core.mjs +2 -2
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +2 -1
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/messages/tools.mjs +2 -2
- package/dist/esm/messages/tools.mjs.map +1 -1
- package/dist/esm/stream.mjs +29 -16
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/ProgrammaticToolCalling.mjs +208 -48
- package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +1 -1
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +3 -1
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/utils/contextAnalytics.mjs +7 -5
- package/dist/esm/utils/contextAnalytics.mjs.map +1 -1
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/toonFormat.mjs +42 -12
- package/dist/esm/utils/toonFormat.mjs.map +1 -1
- package/dist/types/tools/ProgrammaticToolCalling.d.ts +19 -0
- package/dist/types/types/tools.d.ts +3 -1
- package/package.json +2 -2
- package/src/agents/AgentContext.ts +28 -20
- package/src/graphs/Graph.ts +76 -37
- package/src/llm/bedrock/__tests__/bedrock-caching.test.ts +495 -473
- package/src/llm/bedrock/index.ts +47 -35
- package/src/llm/openrouter/index.ts +11 -1
- package/src/llm/vertexai/index.ts +9 -10
- package/src/messages/cache.ts +104 -55
- package/src/messages/core.ts +5 -3
- package/src/messages/format.ts +6 -2
- package/src/messages/tools.ts +2 -2
- package/src/scripts/simple.ts +1 -1
- package/src/specs/emergency-prune.test.ts +407 -355
- package/src/stream.ts +28 -20
- package/src/tools/ProgrammaticToolCalling.ts +246 -52
- package/src/tools/ToolNode.ts +4 -4
- package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +155 -0
- package/src/tools/search/jina-reranker.test.ts +32 -28
- package/src/tools/search/search.ts +3 -1
- package/src/tools/search/tool.ts +16 -7
- package/src/types/tools.ts +3 -1
- package/src/utils/contextAnalytics.ts +103 -95
- package/src/utils/llmConfig.ts +8 -1
- package/src/utils/run.ts +5 -4
- package/src/utils/toonFormat.ts +475 -437
package/src/llm/bedrock/index.ts
CHANGED
|
@@ -29,7 +29,8 @@ import { ChatGenerationChunk } from '@langchain/core/outputs';
|
|
|
29
29
|
import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
|
|
30
30
|
|
|
31
31
|
/** Extended input type with promptCache option */
|
|
32
|
-
export interface CustomChatBedrockConverseInput
|
|
32
|
+
export interface CustomChatBedrockConverseInput
|
|
33
|
+
extends ChatBedrockConverseInput {
|
|
33
34
|
promptCache?: boolean;
|
|
34
35
|
}
|
|
35
36
|
|
|
@@ -48,12 +49,12 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
48
49
|
/**
|
|
49
50
|
* Override invocationParams to add cachePoint to tools when promptCache is enabled.
|
|
50
51
|
* This enables Bedrock prompt caching for tool definitions.
|
|
51
|
-
*
|
|
52
|
+
*
|
|
52
53
|
* STRATEGY: Separate cachePoints for core tools and MCP tools
|
|
53
54
|
* - Core tools (web_search, execute_code, etc.) are stable → cache first
|
|
54
55
|
* - MCP tools (have '_mcp_' in name) are dynamic → cache separately after
|
|
55
56
|
* - This allows core tools to stay cached when MCP selection changes
|
|
56
|
-
*
|
|
57
|
+
*
|
|
57
58
|
* NOTE: Only Claude models support cachePoint - Nova and other models will reject it.
|
|
58
59
|
*/
|
|
59
60
|
invocationParams(
|
|
@@ -63,8 +64,9 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
63
64
|
|
|
64
65
|
// Add cachePoint to tools array if promptCache is enabled and tools exist
|
|
65
66
|
// Only Claude models support cachePoint - check model name
|
|
66
|
-
const modelId = this.model
|
|
67
|
-
const isClaudeModel =
|
|
67
|
+
const modelId = this.model.toLowerCase();
|
|
68
|
+
const isClaudeModel =
|
|
69
|
+
modelId.includes('claude') || modelId.includes('anthropic');
|
|
68
70
|
|
|
69
71
|
if (
|
|
70
72
|
this.promptCache &&
|
|
@@ -79,10 +81,11 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
79
81
|
const mcpTools: typeof params.toolConfig.tools = [];
|
|
80
82
|
const coreToolNames: string[] = [];
|
|
81
83
|
const mcpToolNames: string[] = [];
|
|
82
|
-
|
|
84
|
+
|
|
83
85
|
for (const tool of params.toolConfig.tools) {
|
|
84
86
|
// Check if tool has a name property with '_mcp_' pattern
|
|
85
|
-
const toolName =
|
|
87
|
+
const toolName =
|
|
88
|
+
(tool as { toolSpec?: { name?: string } }).toolSpec?.name ?? '';
|
|
86
89
|
if (toolName.includes('_mcp_')) {
|
|
87
90
|
mcpTools.push(tool);
|
|
88
91
|
mcpToolNames.push(toolName);
|
|
@@ -92,35 +95,27 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
92
95
|
}
|
|
93
96
|
}
|
|
94
97
|
|
|
95
|
-
|
|
96
|
-
|
|
97
98
|
// Build tools array with strategic cachePoints:
|
|
98
99
|
// [CoreTool1, CoreTool2, cachePoint] + [MCPTool1, MCPTool2, cachePoint]
|
|
99
100
|
const toolsWithCache: typeof params.toolConfig.tools = [];
|
|
100
|
-
|
|
101
|
-
|
|
101
|
+
|
|
102
102
|
// Add core tools with cachePoint (if any)
|
|
103
103
|
if (coreTools.length > 0) {
|
|
104
104
|
toolsWithCache.push(...coreTools);
|
|
105
105
|
toolsWithCache.push({ cachePoint: { type: 'default' } });
|
|
106
|
-
cachePointCount++;
|
|
107
106
|
}
|
|
108
|
-
|
|
107
|
+
|
|
109
108
|
// Add MCP tools with their own cachePoint (if any)
|
|
110
109
|
if (mcpTools.length > 0) {
|
|
111
110
|
toolsWithCache.push(...mcpTools);
|
|
112
111
|
toolsWithCache.push({ cachePoint: { type: 'default' } });
|
|
113
|
-
cachePointCount++;
|
|
114
112
|
}
|
|
115
|
-
|
|
113
|
+
|
|
116
114
|
// If no tools at all (shouldn't happen but safety check)
|
|
117
115
|
if (toolsWithCache.length === 0) {
|
|
118
116
|
toolsWithCache.push({ cachePoint: { type: 'default' } });
|
|
119
|
-
cachePointCount++;
|
|
120
117
|
}
|
|
121
|
-
|
|
122
118
|
|
|
123
|
-
|
|
124
119
|
params.toolConfig.tools = toolsWithCache;
|
|
125
120
|
}
|
|
126
121
|
|
|
@@ -150,37 +145,54 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
150
145
|
(chunk.message as Partial<AIMessageChunk>).response_metadata &&
|
|
151
146
|
typeof chunk.message.response_metadata === 'object'
|
|
152
147
|
) {
|
|
153
|
-
const responseMetadata = chunk.message.response_metadata as Record<
|
|
148
|
+
const responseMetadata = chunk.message.response_metadata as Record<
|
|
149
|
+
string,
|
|
150
|
+
unknown
|
|
151
|
+
>;
|
|
154
152
|
let needsModification = false;
|
|
155
153
|
let cleanedMetadata = responseMetadata;
|
|
156
154
|
|
|
157
155
|
// Check if contentBlockIndex exists anywhere in response_metadata
|
|
158
|
-
const hasContentBlockIndex =
|
|
156
|
+
const hasContentBlockIndex =
|
|
157
|
+
this.hasContentBlockIndex(responseMetadata);
|
|
159
158
|
if (hasContentBlockIndex) {
|
|
160
|
-
cleanedMetadata = this.removeContentBlockIndex(
|
|
159
|
+
cleanedMetadata = this.removeContentBlockIndex(
|
|
160
|
+
responseMetadata
|
|
161
|
+
) as Record<string, unknown>;
|
|
161
162
|
needsModification = true;
|
|
162
163
|
}
|
|
163
164
|
|
|
164
165
|
// Extract cache tokens from metadata.usage (Bedrock streaming format)
|
|
165
166
|
// The metadata chunk contains usage with cacheReadInputTokens/cacheWriteInputTokens
|
|
166
|
-
const metadata = responseMetadata.metadata as
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
167
|
+
const metadata = responseMetadata.metadata as
|
|
168
|
+
| Record<string, unknown>
|
|
169
|
+
| undefined;
|
|
170
|
+
const usage = (metadata?.usage ?? responseMetadata.usage) as
|
|
171
|
+
| Record<string, unknown>
|
|
172
|
+
| undefined;
|
|
173
|
+
|
|
174
|
+
let enhancedUsageMetadata: UsageMetadata | undefined =
|
|
175
|
+
chunk.message.usage_metadata;
|
|
176
|
+
|
|
171
177
|
if (usage) {
|
|
172
|
-
const cacheRead =
|
|
173
|
-
|
|
174
|
-
const
|
|
175
|
-
|
|
176
|
-
|
|
178
|
+
const cacheRead =
|
|
179
|
+
(usage.cacheReadInputTokens as number | undefined) ?? 0;
|
|
180
|
+
const cacheWrite =
|
|
181
|
+
(usage.cacheWriteInputTokens as number | undefined) ?? 0;
|
|
182
|
+
const inputTokens = (usage.inputTokens as number | undefined) ?? 0;
|
|
183
|
+
const outputTokens = (usage.outputTokens as number | undefined) ?? 0;
|
|
184
|
+
|
|
177
185
|
if (cacheRead > 0 || cacheWrite > 0) {
|
|
178
|
-
|
|
179
186
|
needsModification = true;
|
|
180
187
|
enhancedUsageMetadata = {
|
|
181
|
-
input_tokens:
|
|
182
|
-
|
|
183
|
-
|
|
188
|
+
input_tokens:
|
|
189
|
+
chunk.message.usage_metadata?.input_tokens ?? inputTokens,
|
|
190
|
+
output_tokens:
|
|
191
|
+
chunk.message.usage_metadata?.output_tokens ?? outputTokens,
|
|
192
|
+
total_tokens:
|
|
193
|
+
chunk.message.usage_metadata?.total_tokens ??
|
|
194
|
+
(usage.totalTokens as number | undefined) ??
|
|
195
|
+
0,
|
|
184
196
|
input_token_details: {
|
|
185
197
|
cache_read: cacheRead,
|
|
186
198
|
cache_creation: cacheWrite,
|
|
@@ -128,6 +128,8 @@ export class ChatOpenRouter extends ChatOpenAI {
|
|
|
128
128
|
// Accumulate reasoning_details from each delta
|
|
129
129
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
130
130
|
const deltaAny = delta as Record<string, any>;
|
|
131
|
+
// Extract current chunk's reasoning text for streaming (before accumulation)
|
|
132
|
+
let currentChunkReasoningText = '';
|
|
131
133
|
if (
|
|
132
134
|
deltaAny.reasoning_details != null &&
|
|
133
135
|
Array.isArray(deltaAny.reasoning_details)
|
|
@@ -143,7 +145,9 @@ export class ChatOpenRouter extends ChatOpenAI {
|
|
|
143
145
|
index: detail.index,
|
|
144
146
|
});
|
|
145
147
|
} else if (detail.type === 'reasoning.text') {
|
|
146
|
-
//
|
|
148
|
+
// Extract current chunk's text for streaming
|
|
149
|
+
currentChunkReasoningText += detail.text || '';
|
|
150
|
+
// For text reasoning, accumulate text by index for final message
|
|
147
151
|
const idx = detail.index ?? 0;
|
|
148
152
|
const existing = reasoningTextByIndex.get(idx);
|
|
149
153
|
if (existing) {
|
|
@@ -167,6 +171,12 @@ export class ChatOpenRouter extends ChatOpenAI {
|
|
|
167
171
|
defaultRole
|
|
168
172
|
);
|
|
169
173
|
|
|
174
|
+
// For models that send reasoning_details (Gemini style) instead of reasoning (DeepSeek style),
|
|
175
|
+
// set the current chunk's reasoning text to additional_kwargs.reasoning for streaming
|
|
176
|
+
if (currentChunkReasoningText && !chunk.additional_kwargs.reasoning) {
|
|
177
|
+
chunk.additional_kwargs.reasoning = currentChunkReasoningText;
|
|
178
|
+
}
|
|
179
|
+
|
|
170
180
|
// IMPORTANT: Only set reasoning_details on the FINAL chunk to prevent
|
|
171
181
|
// LangChain's chunk concatenation from corrupting the array
|
|
172
182
|
// Check if this is the final chunk (has finish_reason)
|
|
@@ -17,11 +17,14 @@ class CustomChatConnection extends ChatConnection<VertexAIClientOptions> {
|
|
|
17
17
|
input,
|
|
18
18
|
parameters
|
|
19
19
|
)) as GeminiRequest;
|
|
20
|
-
if (
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
if (formattedData.generationConfig?.thinkingConfig?.thinkingBudget === -1) {
|
|
21
|
+
// -1 means "let the model decide" - delete the property so the API doesn't receive an invalid value
|
|
22
|
+
if (
|
|
23
|
+
formattedData.generationConfig.thinkingConfig.includeThoughts === false
|
|
24
|
+
) {
|
|
25
|
+
formattedData.generationConfig.thinkingConfig.includeThoughts = true;
|
|
26
|
+
}
|
|
27
|
+
delete formattedData.generationConfig.thinkingConfig.thinkingBudget;
|
|
25
28
|
}
|
|
26
29
|
return formattedData;
|
|
27
30
|
}
|
|
@@ -318,11 +321,7 @@ export class ChatVertexAI extends ChatGoogle {
|
|
|
318
321
|
}
|
|
319
322
|
|
|
320
323
|
constructor(fields?: VertexAIClientOptions) {
|
|
321
|
-
|
|
322
|
-
if (fields?.thinkingBudget === -1) {
|
|
323
|
-
dynamicThinkingBudget = true;
|
|
324
|
-
fields.thinkingBudget = 1;
|
|
325
|
-
}
|
|
324
|
+
const dynamicThinkingBudget = fields?.thinkingBudget === -1;
|
|
326
325
|
super({
|
|
327
326
|
...fields,
|
|
328
327
|
platformType: 'gcp',
|
package/src/messages/cache.ts
CHANGED
|
@@ -8,10 +8,13 @@ type MessageWithContent = {
|
|
|
8
8
|
};
|
|
9
9
|
|
|
10
10
|
/** Debug logger for cache operations - set ILLUMA_DEBUG_CACHE=true to enable */
|
|
11
|
-
const debugCache = (message: string, data?: unknown) => {
|
|
11
|
+
const debugCache = (message: string, data?: unknown): void => {
|
|
12
12
|
if (process.env.ILLUMA_DEBUG_CACHE === 'true') {
|
|
13
13
|
// eslint-disable-next-line no-console
|
|
14
|
-
console.log(
|
|
14
|
+
console.log(
|
|
15
|
+
`[Cache] ${message}`,
|
|
16
|
+
data !== undefined ? JSON.stringify(data, null, 2) : ''
|
|
17
|
+
);
|
|
15
18
|
}
|
|
16
19
|
};
|
|
17
20
|
|
|
@@ -148,26 +151,26 @@ export function stripBedrockCacheControl<T extends MessageWithContent>(
|
|
|
148
151
|
|
|
149
152
|
/**
|
|
150
153
|
* Adds Bedrock Converse API cache points using "Stable Prefix Caching" strategy.
|
|
151
|
-
*
|
|
154
|
+
*
|
|
152
155
|
* STRATEGY: Place cache point after the LAST ASSISTANT message only.
|
|
153
156
|
* This ensures the prefix (everything before the cache point) remains STABLE
|
|
154
157
|
* as the conversation grows, maximizing cache hits.
|
|
155
|
-
*
|
|
158
|
+
*
|
|
156
159
|
* Why this works:
|
|
157
160
|
* - System message has its own cachePoint (added in AgentContext)
|
|
158
161
|
* - Tools have their own cachePoint (added in CustomChatBedrockConverse)
|
|
159
162
|
* - Conversation history grows, but the PREFIX stays the same
|
|
160
163
|
* - Only the NEW user message is uncached (it's always different)
|
|
161
|
-
*
|
|
164
|
+
*
|
|
162
165
|
* Example conversation flow:
|
|
163
166
|
* Request 1: [System+cachePoint][Tools+cachePoint][User1] → No conversation cache yet
|
|
164
167
|
* Request 2: [System][Tools][User1][Assistant1+cachePoint][User2] → Cache User1+Assistant1
|
|
165
168
|
* Request 3: [System][Tools][User1][Assistant1][User2][Assistant2+cachePoint][User3]
|
|
166
169
|
* → Cache reads User1+A1+User2+A2, cache writes new portion
|
|
167
|
-
*
|
|
170
|
+
*
|
|
168
171
|
* Claude's "Simplified Cache Management" automatically looks back up to 20 content
|
|
169
172
|
* blocks from the cache checkpoint to find the longest matching prefix.
|
|
170
|
-
*
|
|
173
|
+
*
|
|
171
174
|
* @param messages - The array of message objects (excluding system message).
|
|
172
175
|
* @returns - The updated array with a single cache point after the last assistant message.
|
|
173
176
|
*/
|
|
@@ -175,16 +178,21 @@ export function addBedrockCacheControl<
|
|
|
175
178
|
T extends Partial<BaseMessage> & MessageWithContent,
|
|
176
179
|
>(messages: T[]): T[] {
|
|
177
180
|
if (!Array.isArray(messages) || messages.length < 1) {
|
|
178
|
-
debugCache('addBedrockCacheControl: Skipping - no messages', {
|
|
181
|
+
debugCache('addBedrockCacheControl: Skipping - no messages', {
|
|
182
|
+
count: messages.length,
|
|
183
|
+
});
|
|
179
184
|
return messages;
|
|
180
185
|
}
|
|
181
186
|
|
|
182
|
-
debugCache(
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
187
|
+
debugCache(
|
|
188
|
+
'addBedrockCacheControl: Processing messages with stable prefix strategy',
|
|
189
|
+
{
|
|
190
|
+
count: messages.length,
|
|
191
|
+
}
|
|
192
|
+
);
|
|
193
|
+
|
|
186
194
|
const updatedMessages: T[] = messages.slice();
|
|
187
|
-
|
|
195
|
+
|
|
188
196
|
// First pass: Remove ALL existing cache points to ensure clean state
|
|
189
197
|
// This prevents accumulation of stale cache points
|
|
190
198
|
for (const message of updatedMessages) {
|
|
@@ -214,7 +222,7 @@ export function addBedrockCacheControl<
|
|
|
214
222
|
const type = (block as { type?: string }).type;
|
|
215
223
|
// Check for all reasoning/thinking block types:
|
|
216
224
|
// - reasoning_content: Bedrock Anthropic extended thinking
|
|
217
|
-
// - reasoning: Generic reasoning format
|
|
225
|
+
// - reasoning: Generic reasoning format
|
|
218
226
|
// - thinking: Anthropic direct API thinking
|
|
219
227
|
// - redacted_thinking: Anthropic redacted thinking blocks
|
|
220
228
|
if (
|
|
@@ -233,27 +241,32 @@ export function addBedrockCacheControl<
|
|
|
233
241
|
// Messages with reasoning/thinking blocks cannot have cache points after them (Bedrock limitation)
|
|
234
242
|
let lastAssistantIndex = -1;
|
|
235
243
|
let skippedWithReasoning = 0;
|
|
236
|
-
|
|
244
|
+
|
|
237
245
|
// Count message types for logging
|
|
238
246
|
const messageTypes: Record<string, number> = {};
|
|
239
247
|
for (const message of updatedMessages) {
|
|
240
|
-
const msgType =
|
|
241
|
-
|
|
242
|
-
|
|
248
|
+
const msgType =
|
|
249
|
+
'getType' in message && typeof message.getType === 'function'
|
|
250
|
+
? message.getType()
|
|
251
|
+
: 'unknown';
|
|
243
252
|
messageTypes[msgType] = (messageTypes[msgType] || 0) + 1;
|
|
244
253
|
}
|
|
245
|
-
|
|
254
|
+
|
|
246
255
|
for (let i = updatedMessages.length - 1; i >= 0; i--) {
|
|
247
256
|
const message = updatedMessages[i];
|
|
248
|
-
const messageType =
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
257
|
+
const messageType =
|
|
258
|
+
'getType' in message && typeof message.getType === 'function'
|
|
259
|
+
? message.getType()
|
|
260
|
+
: 'unknown';
|
|
261
|
+
|
|
252
262
|
if (messageType === 'ai') {
|
|
253
263
|
// Skip assistant messages with reasoning blocks - cache points not allowed after them
|
|
254
264
|
if (hasReasoningBlock(message)) {
|
|
255
265
|
skippedWithReasoning++;
|
|
256
|
-
debugCache(
|
|
266
|
+
debugCache(
|
|
267
|
+
'addBedrockCacheControl: Skipping assistant message with reasoning block',
|
|
268
|
+
{ index: i }
|
|
269
|
+
);
|
|
257
270
|
continue;
|
|
258
271
|
}
|
|
259
272
|
lastAssistantIndex = i;
|
|
@@ -262,12 +275,20 @@ export function addBedrockCacheControl<
|
|
|
262
275
|
}
|
|
263
276
|
|
|
264
277
|
// Log message summary
|
|
265
|
-
debugCache(
|
|
278
|
+
debugCache(
|
|
279
|
+
`📨 Messages | total=${updatedMessages.length} | ${Object.entries(
|
|
280
|
+
messageTypes
|
|
281
|
+
)
|
|
282
|
+
.map(([k, v]) => `${k}:${v}`)
|
|
283
|
+
.join(' ')} | skippedReasoning=${skippedWithReasoning}`
|
|
284
|
+
);
|
|
266
285
|
|
|
267
286
|
// If no suitable assistant message found, skip conversation caching
|
|
268
287
|
// (System and Tools caching are still handled separately)
|
|
269
288
|
if (lastAssistantIndex === -1) {
|
|
270
|
-
debugCache(
|
|
289
|
+
debugCache(
|
|
290
|
+
'📨 Messages | No suitable assistant message for cachePoint (first turn or all have reasoning)'
|
|
291
|
+
);
|
|
271
292
|
return updatedMessages;
|
|
272
293
|
}
|
|
273
294
|
|
|
@@ -280,22 +301,35 @@ export function addBedrockCacheControl<
|
|
|
280
301
|
{ type: ContentTypes.TEXT, text: content },
|
|
281
302
|
{ cachePoint: { type: 'default' } },
|
|
282
303
|
] as MessageContentComplex[];
|
|
283
|
-
debugCache(
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
304
|
+
debugCache(
|
|
305
|
+
`📍 Message cachePoint at index ${lastAssistantIndex} (string, ${content.length} chars)`
|
|
306
|
+
);
|
|
307
|
+
debugCache(
|
|
308
|
+
'addBedrockCacheControl: Added cachePoint to assistant message (string content)',
|
|
309
|
+
{
|
|
310
|
+
index: lastAssistantIndex,
|
|
311
|
+
contentLength: content.length,
|
|
312
|
+
}
|
|
313
|
+
);
|
|
314
|
+
} else if (
|
|
315
|
+
Array.isArray(assistantMessage.content) &&
|
|
316
|
+
assistantMessage.content.length > 0
|
|
317
|
+
) {
|
|
289
318
|
// Double-check: If this message has reasoning blocks, skip adding cache point entirely
|
|
290
319
|
// This handles edge cases where the initial skip check might have missed it
|
|
291
320
|
if (hasReasoningBlock(assistantMessage)) {
|
|
292
|
-
debugCache(
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
321
|
+
debugCache(
|
|
322
|
+
`⚠️ Message cachePoint SKIPPED at index ${lastAssistantIndex} (has reasoning blocks)`
|
|
323
|
+
);
|
|
324
|
+
debugCache(
|
|
325
|
+
'addBedrockCacheControl: Skipping - assistant message has reasoning blocks (safety check)',
|
|
326
|
+
{
|
|
327
|
+
index: lastAssistantIndex,
|
|
328
|
+
}
|
|
329
|
+
);
|
|
296
330
|
return updatedMessages;
|
|
297
331
|
}
|
|
298
|
-
|
|
332
|
+
|
|
299
333
|
// Find the last text block and insert cache point after it
|
|
300
334
|
let inserted = false;
|
|
301
335
|
for (let j = assistantMessage.content.length - 1; j >= 0; j--) {
|
|
@@ -303,37 +337,52 @@ export function addBedrockCacheControl<
|
|
|
303
337
|
const type = (block as { type?: string }).type;
|
|
304
338
|
if (type === ContentTypes.TEXT || type === 'text') {
|
|
305
339
|
const text = (block as { text?: string }).text;
|
|
306
|
-
if (text && text !== '') {
|
|
340
|
+
if (text != null && text !== '') {
|
|
307
341
|
assistantMessage.content.splice(j + 1, 0, {
|
|
308
342
|
cachePoint: { type: 'default' },
|
|
309
343
|
} as MessageContentComplex);
|
|
310
344
|
inserted = true;
|
|
311
|
-
debugCache(
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
345
|
+
debugCache(
|
|
346
|
+
`📍 Message cachePoint at index ${lastAssistantIndex} (array, block ${j}, ${text.length} chars)`
|
|
347
|
+
);
|
|
348
|
+
debugCache(
|
|
349
|
+
'addBedrockCacheControl: Added cachePoint after text block in assistant message',
|
|
350
|
+
{
|
|
351
|
+
index: lastAssistantIndex,
|
|
352
|
+
textBlockIndex: j,
|
|
353
|
+
contentLength: text.length,
|
|
354
|
+
}
|
|
355
|
+
);
|
|
317
356
|
break;
|
|
318
357
|
}
|
|
319
358
|
}
|
|
320
359
|
}
|
|
321
|
-
|
|
360
|
+
|
|
322
361
|
// If no text block found, don't append cache point as the message structure is unexpected
|
|
323
362
|
if (!inserted) {
|
|
324
|
-
const contentTypes = assistantMessage.content.map(
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
contentTypes,
|
|
329
|
-
|
|
363
|
+
const contentTypes = assistantMessage.content.map(
|
|
364
|
+
(b) => (b as { type?: string }).type
|
|
365
|
+
);
|
|
366
|
+
debugCache(
|
|
367
|
+
`⚠️ Message cachePoint SKIPPED at index ${lastAssistantIndex} (no text block, types: ${contentTypes.join(',')})`
|
|
368
|
+
);
|
|
369
|
+
debugCache(
|
|
370
|
+
'addBedrockCacheControl: No suitable text block found, skipping cache point',
|
|
371
|
+
{
|
|
372
|
+
index: lastAssistantIndex,
|
|
373
|
+
contentTypes,
|
|
374
|
+
}
|
|
375
|
+
);
|
|
330
376
|
}
|
|
331
377
|
}
|
|
332
378
|
|
|
333
|
-
debugCache(
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
379
|
+
debugCache(
|
|
380
|
+
'addBedrockCacheControl: Complete - stable prefix caching applied',
|
|
381
|
+
{
|
|
382
|
+
lastAssistantIndex,
|
|
383
|
+
totalMessages: updatedMessages.length,
|
|
384
|
+
}
|
|
385
|
+
);
|
|
337
386
|
|
|
338
387
|
return updatedMessages;
|
|
339
388
|
}
|
package/src/messages/core.ts
CHANGED
|
@@ -347,7 +347,7 @@ export function convertMessagesToContent(
|
|
|
347
347
|
export function formatAnthropicArtifactContent(messages: BaseMessage[]): void {
|
|
348
348
|
const lastMessage = messages[messages.length - 1];
|
|
349
349
|
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
350
|
-
if (lastMessage
|
|
350
|
+
if (lastMessage.getType() !== 'tool') return;
|
|
351
351
|
const lastToolMessage = lastMessage as ToolMessage;
|
|
352
352
|
|
|
353
353
|
// Find the latest AIMessage with tool_calls that this tool message belongs to
|
|
@@ -388,7 +388,9 @@ export function formatAnthropicArtifactContent(messages: BaseMessage[]): void {
|
|
|
388
388
|
Array.isArray((msg as ToolMessage).artifact?.content) &&
|
|
389
389
|
Array.isArray(msg.content)
|
|
390
390
|
) {
|
|
391
|
-
msg.content = (msg.content as t.MessageContentComplex[]).concat(
|
|
391
|
+
msg.content = (msg.content as t.MessageContentComplex[]).concat(
|
|
392
|
+
(msg as ToolMessage).artifact.content
|
|
393
|
+
);
|
|
392
394
|
}
|
|
393
395
|
}
|
|
394
396
|
}
|
|
@@ -396,7 +398,7 @@ export function formatAnthropicArtifactContent(messages: BaseMessage[]): void {
|
|
|
396
398
|
export function formatArtifactPayload(messages: BaseMessage[]): void {
|
|
397
399
|
const lastMessageY = messages[messages.length - 1];
|
|
398
400
|
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
399
|
-
if (lastMessageY
|
|
401
|
+
if (lastMessageY.getType() !== 'tool') return;
|
|
400
402
|
const lastToolMessage = lastMessageY as ToolMessage;
|
|
401
403
|
|
|
402
404
|
// Find the latest AIMessage with tool_calls that this tool message belongs to
|
package/src/messages/format.ts
CHANGED
|
@@ -362,7 +362,8 @@ function formatAssistantMessage(
|
|
|
362
362
|
|
|
363
363
|
// Apply TOON compression to historical tool outputs for context efficiency
|
|
364
364
|
// processToolOutput handles: JSON→TOON conversion, already-TOON detection (skip), truncation
|
|
365
|
-
const processedOutput =
|
|
365
|
+
const processedOutput =
|
|
366
|
+
output != null ? processToolOutput(output).content : '';
|
|
366
367
|
|
|
367
368
|
formattedMessages.push(
|
|
368
369
|
new ToolMessage({
|
|
@@ -905,7 +906,10 @@ export function ensureThinkingBlockInMessages(
|
|
|
905
906
|
// Look ahead for tool messages that belong to this AI message
|
|
906
907
|
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
907
908
|
// where different copies of ToolMessage class might be loaded
|
|
908
|
-
while (
|
|
909
|
+
while (
|
|
910
|
+
j < messages.length &&
|
|
911
|
+
messages[j].getType() === MessageTypes.TOOL
|
|
912
|
+
) {
|
|
909
913
|
toolSequence.push(messages[j]);
|
|
910
914
|
j++;
|
|
911
915
|
}
|
package/src/messages/tools.ts
CHANGED
|
@@ -21,7 +21,7 @@ type ToolSearchArtifact = {
|
|
|
21
21
|
export function extractToolDiscoveries(messages: BaseMessage[]): string[] {
|
|
22
22
|
const lastMessage = messages[messages.length - 1];
|
|
23
23
|
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
24
|
-
if (lastMessage
|
|
24
|
+
if (lastMessage.getType() !== MessageTypes.TOOL) return [];
|
|
25
25
|
const lastToolMessage = lastMessage as ToolMessage;
|
|
26
26
|
|
|
27
27
|
// Find the latest AIMessage with tool_calls that this tool message belongs to
|
|
@@ -71,7 +71,7 @@ export function extractToolDiscoveries(messages: BaseMessage[]): string[] {
|
|
|
71
71
|
export function hasToolSearchInCurrentTurn(messages: BaseMessage[]): boolean {
|
|
72
72
|
const lastMessage = messages[messages.length - 1];
|
|
73
73
|
// Use getType() instead of instanceof to avoid module mismatch issues
|
|
74
|
-
if (lastMessage
|
|
74
|
+
if (lastMessage.getType() !== MessageTypes.TOOL) return false;
|
|
75
75
|
const lastToolMessage = lastMessage as ToolMessage;
|
|
76
76
|
|
|
77
77
|
// Find the latest AIMessage with tool_calls
|
package/src/scripts/simple.ts
CHANGED