illuma-agents 1.0.16 → 1.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +3 -1
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +18 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +79 -32
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/llm/bedrock/index.cjs +5 -3
  8. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  9. package/dist/cjs/llm/openai/index.cjs +1 -0
  10. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  11. package/dist/cjs/llm/openrouter/index.cjs +10 -1
  12. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  13. package/dist/cjs/llm/vertexai/index.cjs +7 -8
  14. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  15. package/dist/cjs/main.cjs +15 -0
  16. package/dist/cjs/main.cjs.map +1 -1
  17. package/dist/cjs/messages/cache.cjs +11 -6
  18. package/dist/cjs/messages/cache.cjs.map +1 -1
  19. package/dist/cjs/messages/core.cjs +16 -8
  20. package/dist/cjs/messages/core.cjs.map +1 -1
  21. package/dist/cjs/messages/format.cjs +9 -2
  22. package/dist/cjs/messages/format.cjs.map +1 -1
  23. package/dist/cjs/messages/tools.cjs +17 -10
  24. package/dist/cjs/messages/tools.cjs.map +1 -1
  25. package/dist/cjs/stream.cjs +30 -16
  26. package/dist/cjs/stream.cjs.map +1 -1
  27. package/dist/cjs/tools/ProgrammaticToolCalling.cjs +209 -47
  28. package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
  29. package/dist/cjs/tools/ToolNode.cjs +73 -3
  30. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  31. package/dist/cjs/tools/handlers.cjs +1 -0
  32. package/dist/cjs/tools/handlers.cjs.map +1 -1
  33. package/dist/cjs/tools/search/search.cjs.map +1 -1
  34. package/dist/cjs/tools/search/tool.cjs +3 -1
  35. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  36. package/dist/cjs/utils/contextAnalytics.cjs +66 -0
  37. package/dist/cjs/utils/contextAnalytics.cjs.map +1 -0
  38. package/dist/cjs/utils/run.cjs.map +1 -1
  39. package/dist/cjs/utils/toonFormat.cjs +388 -0
  40. package/dist/cjs/utils/toonFormat.cjs.map +1 -0
  41. package/dist/esm/agents/AgentContext.mjs +3 -1
  42. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  43. package/dist/esm/common/enum.mjs +19 -1
  44. package/dist/esm/common/enum.mjs.map +1 -1
  45. package/dist/esm/graphs/Graph.mjs +81 -34
  46. package/dist/esm/graphs/Graph.mjs.map +1 -1
  47. package/dist/esm/llm/bedrock/index.mjs +5 -3
  48. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  49. package/dist/esm/llm/openai/index.mjs +1 -0
  50. package/dist/esm/llm/openai/index.mjs.map +1 -1
  51. package/dist/esm/llm/openrouter/index.mjs +10 -1
  52. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  53. package/dist/esm/llm/vertexai/index.mjs +7 -8
  54. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  55. package/dist/esm/main.mjs +4 -2
  56. package/dist/esm/main.mjs.map +1 -1
  57. package/dist/esm/messages/cache.mjs +11 -6
  58. package/dist/esm/messages/cache.mjs.map +1 -1
  59. package/dist/esm/messages/core.mjs +18 -10
  60. package/dist/esm/messages/core.mjs.map +1 -1
  61. package/dist/esm/messages/format.mjs +10 -3
  62. package/dist/esm/messages/format.mjs.map +1 -1
  63. package/dist/esm/messages/tools.mjs +19 -12
  64. package/dist/esm/messages/tools.mjs.map +1 -1
  65. package/dist/esm/stream.mjs +30 -16
  66. package/dist/esm/stream.mjs.map +1 -1
  67. package/dist/esm/tools/ProgrammaticToolCalling.mjs +208 -48
  68. package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
  69. package/dist/esm/tools/ToolNode.mjs +73 -3
  70. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  71. package/dist/esm/tools/handlers.mjs +1 -0
  72. package/dist/esm/tools/handlers.mjs.map +1 -1
  73. package/dist/esm/tools/search/search.mjs.map +1 -1
  74. package/dist/esm/tools/search/tool.mjs +3 -1
  75. package/dist/esm/tools/search/tool.mjs.map +1 -1
  76. package/dist/esm/utils/contextAnalytics.mjs +64 -0
  77. package/dist/esm/utils/contextAnalytics.mjs.map +1 -0
  78. package/dist/esm/utils/run.mjs.map +1 -1
  79. package/dist/esm/utils/toonFormat.mjs +381 -0
  80. package/dist/esm/utils/toonFormat.mjs.map +1 -0
  81. package/dist/types/common/enum.d.ts +17 -0
  82. package/dist/types/graphs/Graph.d.ts +8 -0
  83. package/dist/types/tools/ProgrammaticToolCalling.d.ts +19 -0
  84. package/dist/types/types/tools.d.ts +3 -1
  85. package/dist/types/utils/contextAnalytics.d.ts +37 -0
  86. package/dist/types/utils/index.d.ts +2 -0
  87. package/dist/types/utils/toonFormat.d.ts +111 -0
  88. package/package.json +3 -2
  89. package/src/agents/AgentContext.ts +28 -20
  90. package/src/common/enum.ts +18 -0
  91. package/src/graphs/Graph.ts +152 -62
  92. package/src/llm/bedrock/__tests__/bedrock-caching.test.ts +495 -473
  93. package/src/llm/bedrock/index.ts +47 -35
  94. package/src/llm/openrouter/index.ts +11 -1
  95. package/src/llm/vertexai/index.ts +9 -10
  96. package/src/messages/cache.ts +104 -55
  97. package/src/messages/core.ts +29 -19
  98. package/src/messages/format.ts +14 -3
  99. package/src/messages/tools.ts +20 -13
  100. package/src/scripts/simple.ts +1 -1
  101. package/src/specs/emergency-prune.test.ts +407 -355
  102. package/src/stream.ts +28 -20
  103. package/src/tools/ProgrammaticToolCalling.ts +246 -52
  104. package/src/tools/ToolNode.ts +78 -5
  105. package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +155 -0
  106. package/src/tools/search/jina-reranker.test.ts +32 -28
  107. package/src/tools/search/search.ts +3 -1
  108. package/src/tools/search/tool.ts +16 -7
  109. package/src/types/tools.ts +3 -1
  110. package/src/utils/contextAnalytics.ts +103 -0
  111. package/src/utils/index.ts +2 -0
  112. package/src/utils/llmConfig.ts +8 -1
  113. package/src/utils/run.ts +5 -4
  114. package/src/utils/toonFormat.ts +475 -0
@@ -29,7 +29,8 @@ import { ChatGenerationChunk } from '@langchain/core/outputs';
29
29
  import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
30
30
 
31
31
  /** Extended input type with promptCache option */
32
- export interface CustomChatBedrockConverseInput extends ChatBedrockConverseInput {
32
+ export interface CustomChatBedrockConverseInput
33
+ extends ChatBedrockConverseInput {
33
34
  promptCache?: boolean;
34
35
  }
35
36
 
@@ -48,12 +49,12 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
48
49
  /**
49
50
  * Override invocationParams to add cachePoint to tools when promptCache is enabled.
50
51
  * This enables Bedrock prompt caching for tool definitions.
51
- *
52
+ *
52
53
  * STRATEGY: Separate cachePoints for core tools and MCP tools
53
54
  * - Core tools (web_search, execute_code, etc.) are stable → cache first
54
55
  * - MCP tools (have '_mcp_' in name) are dynamic → cache separately after
55
56
  * - This allows core tools to stay cached when MCP selection changes
56
- *
57
+ *
57
58
  * NOTE: Only Claude models support cachePoint - Nova and other models will reject it.
58
59
  */
59
60
  invocationParams(
@@ -63,8 +64,9 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
63
64
 
64
65
  // Add cachePoint to tools array if promptCache is enabled and tools exist
65
66
  // Only Claude models support cachePoint - check model name
66
- const modelId = this.model?.toLowerCase() ?? '';
67
- const isClaudeModel = modelId.includes('claude') || modelId.includes('anthropic');
67
+ const modelId = this.model.toLowerCase();
68
+ const isClaudeModel =
69
+ modelId.includes('claude') || modelId.includes('anthropic');
68
70
 
69
71
  if (
70
72
  this.promptCache &&
@@ -79,10 +81,11 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
79
81
  const mcpTools: typeof params.toolConfig.tools = [];
80
82
  const coreToolNames: string[] = [];
81
83
  const mcpToolNames: string[] = [];
82
-
84
+
83
85
  for (const tool of params.toolConfig.tools) {
84
86
  // Check if tool has a name property with '_mcp_' pattern
85
- const toolName = (tool as { toolSpec?: { name?: string } })?.toolSpec?.name ?? '';
87
+ const toolName =
88
+ (tool as { toolSpec?: { name?: string } }).toolSpec?.name ?? '';
86
89
  if (toolName.includes('_mcp_')) {
87
90
  mcpTools.push(tool);
88
91
  mcpToolNames.push(toolName);
@@ -92,35 +95,27 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
92
95
  }
93
96
  }
94
97
 
95
-
96
-
97
98
  // Build tools array with strategic cachePoints:
98
99
  // [CoreTool1, CoreTool2, cachePoint] + [MCPTool1, MCPTool2, cachePoint]
99
100
  const toolsWithCache: typeof params.toolConfig.tools = [];
100
- let cachePointCount = 0;
101
-
101
+
102
102
  // Add core tools with cachePoint (if any)
103
103
  if (coreTools.length > 0) {
104
104
  toolsWithCache.push(...coreTools);
105
105
  toolsWithCache.push({ cachePoint: { type: 'default' } });
106
- cachePointCount++;
107
106
  }
108
-
107
+
109
108
  // Add MCP tools with their own cachePoint (if any)
110
109
  if (mcpTools.length > 0) {
111
110
  toolsWithCache.push(...mcpTools);
112
111
  toolsWithCache.push({ cachePoint: { type: 'default' } });
113
- cachePointCount++;
114
112
  }
115
-
113
+
116
114
  // If no tools at all (shouldn't happen but safety check)
117
115
  if (toolsWithCache.length === 0) {
118
116
  toolsWithCache.push({ cachePoint: { type: 'default' } });
119
- cachePointCount++;
120
117
  }
121
-
122
118
 
123
-
124
119
  params.toolConfig.tools = toolsWithCache;
125
120
  }
126
121
 
@@ -150,37 +145,54 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
150
145
  (chunk.message as Partial<AIMessageChunk>).response_metadata &&
151
146
  typeof chunk.message.response_metadata === 'object'
152
147
  ) {
153
- const responseMetadata = chunk.message.response_metadata as Record<string, unknown>;
148
+ const responseMetadata = chunk.message.response_metadata as Record<
149
+ string,
150
+ unknown
151
+ >;
154
152
  let needsModification = false;
155
153
  let cleanedMetadata = responseMetadata;
156
154
 
157
155
  // Check if contentBlockIndex exists anywhere in response_metadata
158
- const hasContentBlockIndex = this.hasContentBlockIndex(responseMetadata);
156
+ const hasContentBlockIndex =
157
+ this.hasContentBlockIndex(responseMetadata);
159
158
  if (hasContentBlockIndex) {
160
- cleanedMetadata = this.removeContentBlockIndex(responseMetadata) as Record<string, unknown>;
159
+ cleanedMetadata = this.removeContentBlockIndex(
160
+ responseMetadata
161
+ ) as Record<string, unknown>;
161
162
  needsModification = true;
162
163
  }
163
164
 
164
165
  // Extract cache tokens from metadata.usage (Bedrock streaming format)
165
166
  // The metadata chunk contains usage with cacheReadInputTokens/cacheWriteInputTokens
166
- const metadata = responseMetadata.metadata as Record<string, unknown> | undefined;
167
- const usage = (metadata?.usage ?? responseMetadata.usage) as Record<string, unknown> | undefined;
168
-
169
- let enhancedUsageMetadata: UsageMetadata | undefined = chunk.message.usage_metadata;
170
-
167
+ const metadata = responseMetadata.metadata as
168
+ | Record<string, unknown>
169
+ | undefined;
170
+ const usage = (metadata?.usage ?? responseMetadata.usage) as
171
+ | Record<string, unknown>
172
+ | undefined;
173
+
174
+ let enhancedUsageMetadata: UsageMetadata | undefined =
175
+ chunk.message.usage_metadata;
176
+
171
177
  if (usage) {
172
- const cacheRead = (usage.cacheReadInputTokens as number) ?? 0;
173
- const cacheWrite = (usage.cacheWriteInputTokens as number) ?? 0;
174
- const inputTokens = (usage.inputTokens as number) ?? 0;
175
- const outputTokens = (usage.outputTokens as number) ?? 0;
176
-
178
+ const cacheRead =
179
+ (usage.cacheReadInputTokens as number | undefined) ?? 0;
180
+ const cacheWrite =
181
+ (usage.cacheWriteInputTokens as number | undefined) ?? 0;
182
+ const inputTokens = (usage.inputTokens as number | undefined) ?? 0;
183
+ const outputTokens = (usage.outputTokens as number | undefined) ?? 0;
184
+
177
185
  if (cacheRead > 0 || cacheWrite > 0) {
178
-
179
186
  needsModification = true;
180
187
  enhancedUsageMetadata = {
181
- input_tokens: chunk.message.usage_metadata?.input_tokens ?? inputTokens,
182
- output_tokens: chunk.message.usage_metadata?.output_tokens ?? outputTokens,
183
- total_tokens: chunk.message.usage_metadata?.total_tokens ?? (usage.totalTokens as number) ?? 0,
188
+ input_tokens:
189
+ chunk.message.usage_metadata?.input_tokens ?? inputTokens,
190
+ output_tokens:
191
+ chunk.message.usage_metadata?.output_tokens ?? outputTokens,
192
+ total_tokens:
193
+ chunk.message.usage_metadata?.total_tokens ??
194
+ (usage.totalTokens as number | undefined) ??
195
+ 0,
184
196
  input_token_details: {
185
197
  cache_read: cacheRead,
186
198
  cache_creation: cacheWrite,
@@ -128,6 +128,8 @@ export class ChatOpenRouter extends ChatOpenAI {
128
128
  // Accumulate reasoning_details from each delta
129
129
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
130
130
  const deltaAny = delta as Record<string, any>;
131
+ // Extract current chunk's reasoning text for streaming (before accumulation)
132
+ let currentChunkReasoningText = '';
131
133
  if (
132
134
  deltaAny.reasoning_details != null &&
133
135
  Array.isArray(deltaAny.reasoning_details)
@@ -143,7 +145,9 @@ export class ChatOpenRouter extends ChatOpenAI {
143
145
  index: detail.index,
144
146
  });
145
147
  } else if (detail.type === 'reasoning.text') {
146
- // For text reasoning, accumulate text by index
148
+ // Extract current chunk's text for streaming
149
+ currentChunkReasoningText += detail.text || '';
150
+ // For text reasoning, accumulate text by index for final message
147
151
  const idx = detail.index ?? 0;
148
152
  const existing = reasoningTextByIndex.get(idx);
149
153
  if (existing) {
@@ -167,6 +171,12 @@ export class ChatOpenRouter extends ChatOpenAI {
167
171
  defaultRole
168
172
  );
169
173
 
174
+ // For models that send reasoning_details (Gemini style) instead of reasoning (DeepSeek style),
175
+ // set the current chunk's reasoning text to additional_kwargs.reasoning for streaming
176
+ if (currentChunkReasoningText && !chunk.additional_kwargs.reasoning) {
177
+ chunk.additional_kwargs.reasoning = currentChunkReasoningText;
178
+ }
179
+
170
180
  // IMPORTANT: Only set reasoning_details on the FINAL chunk to prevent
171
181
  // LangChain's chunk concatenation from corrupting the array
172
182
  // Check if this is the final chunk (has finish_reason)
@@ -17,11 +17,14 @@ class CustomChatConnection extends ChatConnection<VertexAIClientOptions> {
17
17
  input,
18
18
  parameters
19
19
  )) as GeminiRequest;
20
- if (
21
- formattedData.generationConfig?.thinkingConfig?.thinkingBudget === -1 &&
22
- formattedData.generationConfig.thinkingConfig.includeThoughts === false
23
- ) {
24
- formattedData.generationConfig.thinkingConfig.includeThoughts = true;
20
+ if (formattedData.generationConfig?.thinkingConfig?.thinkingBudget === -1) {
21
+ // -1 means "let the model decide" - delete the property so the API doesn't receive an invalid value
22
+ if (
23
+ formattedData.generationConfig.thinkingConfig.includeThoughts === false
24
+ ) {
25
+ formattedData.generationConfig.thinkingConfig.includeThoughts = true;
26
+ }
27
+ delete formattedData.generationConfig.thinkingConfig.thinkingBudget;
25
28
  }
26
29
  return formattedData;
27
30
  }
@@ -318,11 +321,7 @@ export class ChatVertexAI extends ChatGoogle {
318
321
  }
319
322
 
320
323
  constructor(fields?: VertexAIClientOptions) {
321
- let dynamicThinkingBudget = false;
322
- if (fields?.thinkingBudget === -1) {
323
- dynamicThinkingBudget = true;
324
- fields.thinkingBudget = 1;
325
- }
324
+ const dynamicThinkingBudget = fields?.thinkingBudget === -1;
326
325
  super({
327
326
  ...fields,
328
327
  platformType: 'gcp',
@@ -8,10 +8,13 @@ type MessageWithContent = {
8
8
  };
9
9
 
10
10
  /** Debug logger for cache operations - set ILLUMA_DEBUG_CACHE=true to enable */
11
- const debugCache = (message: string, data?: unknown) => {
11
+ const debugCache = (message: string, data?: unknown): void => {
12
12
  if (process.env.ILLUMA_DEBUG_CACHE === 'true') {
13
13
  // eslint-disable-next-line no-console
14
- console.log(`[Cache] ${message}`, data !== undefined ? JSON.stringify(data, null, 2) : '');
14
+ console.log(
15
+ `[Cache] ${message}`,
16
+ data !== undefined ? JSON.stringify(data, null, 2) : ''
17
+ );
15
18
  }
16
19
  };
17
20
 
@@ -148,26 +151,26 @@ export function stripBedrockCacheControl<T extends MessageWithContent>(
148
151
 
149
152
  /**
150
153
  * Adds Bedrock Converse API cache points using "Stable Prefix Caching" strategy.
151
- *
154
+ *
152
155
  * STRATEGY: Place cache point after the LAST ASSISTANT message only.
153
156
  * This ensures the prefix (everything before the cache point) remains STABLE
154
157
  * as the conversation grows, maximizing cache hits.
155
- *
158
+ *
156
159
  * Why this works:
157
160
  * - System message has its own cachePoint (added in AgentContext)
158
161
  * - Tools have their own cachePoint (added in CustomChatBedrockConverse)
159
162
  * - Conversation history grows, but the PREFIX stays the same
160
163
  * - Only the NEW user message is uncached (it's always different)
161
- *
164
+ *
162
165
  * Example conversation flow:
163
166
  * Request 1: [System+cachePoint][Tools+cachePoint][User1] → No conversation cache yet
164
167
  * Request 2: [System][Tools][User1][Assistant1+cachePoint][User2] → Cache User1+Assistant1
165
168
  * Request 3: [System][Tools][User1][Assistant1][User2][Assistant2+cachePoint][User3]
166
169
  * → Cache reads User1+A1+User2+A2, cache writes new portion
167
- *
170
+ *
168
171
  * Claude's "Simplified Cache Management" automatically looks back up to 20 content
169
172
  * blocks from the cache checkpoint to find the longest matching prefix.
170
- *
173
+ *
171
174
  * @param messages - The array of message objects (excluding system message).
172
175
  * @returns - The updated array with a single cache point after the last assistant message.
173
176
  */
@@ -175,16 +178,21 @@ export function addBedrockCacheControl<
175
178
  T extends Partial<BaseMessage> & MessageWithContent,
176
179
  >(messages: T[]): T[] {
177
180
  if (!Array.isArray(messages) || messages.length < 1) {
178
- debugCache('addBedrockCacheControl: Skipping - no messages', { count: messages?.length });
181
+ debugCache('addBedrockCacheControl: Skipping - no messages', {
182
+ count: messages.length,
183
+ });
179
184
  return messages;
180
185
  }
181
186
 
182
- debugCache('addBedrockCacheControl: Processing messages with stable prefix strategy', {
183
- count: messages.length
184
- });
185
-
187
+ debugCache(
188
+ 'addBedrockCacheControl: Processing messages with stable prefix strategy',
189
+ {
190
+ count: messages.length,
191
+ }
192
+ );
193
+
186
194
  const updatedMessages: T[] = messages.slice();
187
-
195
+
188
196
  // First pass: Remove ALL existing cache points to ensure clean state
189
197
  // This prevents accumulation of stale cache points
190
198
  for (const message of updatedMessages) {
@@ -214,7 +222,7 @@ export function addBedrockCacheControl<
214
222
  const type = (block as { type?: string }).type;
215
223
  // Check for all reasoning/thinking block types:
216
224
  // - reasoning_content: Bedrock Anthropic extended thinking
217
- // - reasoning: Generic reasoning format
225
+ // - reasoning: Generic reasoning format
218
226
  // - thinking: Anthropic direct API thinking
219
227
  // - redacted_thinking: Anthropic redacted thinking blocks
220
228
  if (
@@ -233,27 +241,32 @@ export function addBedrockCacheControl<
233
241
  // Messages with reasoning/thinking blocks cannot have cache points after them (Bedrock limitation)
234
242
  let lastAssistantIndex = -1;
235
243
  let skippedWithReasoning = 0;
236
-
244
+
237
245
  // Count message types for logging
238
246
  const messageTypes: Record<string, number> = {};
239
247
  for (const message of updatedMessages) {
240
- const msgType = 'getType' in message && typeof message.getType === 'function'
241
- ? message.getType()
242
- : 'unknown';
248
+ const msgType =
249
+ 'getType' in message && typeof message.getType === 'function'
250
+ ? message.getType()
251
+ : 'unknown';
243
252
  messageTypes[msgType] = (messageTypes[msgType] || 0) + 1;
244
253
  }
245
-
254
+
246
255
  for (let i = updatedMessages.length - 1; i >= 0; i--) {
247
256
  const message = updatedMessages[i];
248
- const messageType = 'getType' in message && typeof message.getType === 'function'
249
- ? message.getType()
250
- : 'unknown';
251
-
257
+ const messageType =
258
+ 'getType' in message && typeof message.getType === 'function'
259
+ ? message.getType()
260
+ : 'unknown';
261
+
252
262
  if (messageType === 'ai') {
253
263
  // Skip assistant messages with reasoning blocks - cache points not allowed after them
254
264
  if (hasReasoningBlock(message)) {
255
265
  skippedWithReasoning++;
256
- debugCache('addBedrockCacheControl: Skipping assistant message with reasoning block', { index: i });
266
+ debugCache(
267
+ 'addBedrockCacheControl: Skipping assistant message with reasoning block',
268
+ { index: i }
269
+ );
257
270
  continue;
258
271
  }
259
272
  lastAssistantIndex = i;
@@ -262,12 +275,20 @@ export function addBedrockCacheControl<
262
275
  }
263
276
 
264
277
  // Log message summary
265
- debugCache(`📨 Messages | total=${updatedMessages.length} | ${Object.entries(messageTypes).map(([k,v]) => `${k}:${v}`).join(' ')} | skippedReasoning=${skippedWithReasoning}`);
278
+ debugCache(
279
+ `📨 Messages | total=${updatedMessages.length} | ${Object.entries(
280
+ messageTypes
281
+ )
282
+ .map(([k, v]) => `${k}:${v}`)
283
+ .join(' ')} | skippedReasoning=${skippedWithReasoning}`
284
+ );
266
285
 
267
286
  // If no suitable assistant message found, skip conversation caching
268
287
  // (System and Tools caching are still handled separately)
269
288
  if (lastAssistantIndex === -1) {
270
- debugCache('📨 Messages | No suitable assistant message for cachePoint (first turn or all have reasoning)');
289
+ debugCache(
290
+ '📨 Messages | No suitable assistant message for cachePoint (first turn or all have reasoning)'
291
+ );
271
292
  return updatedMessages;
272
293
  }
273
294
 
@@ -280,22 +301,35 @@ export function addBedrockCacheControl<
280
301
  { type: ContentTypes.TEXT, text: content },
281
302
  { cachePoint: { type: 'default' } },
282
303
  ] as MessageContentComplex[];
283
- debugCache(`📍 Message cachePoint at index ${lastAssistantIndex} (string, ${content.length} chars)`);
284
- debugCache('addBedrockCacheControl: Added cachePoint to assistant message (string content)', {
285
- index: lastAssistantIndex,
286
- contentLength: content.length,
287
- });
288
- } else if (Array.isArray(assistantMessage.content) && assistantMessage.content.length > 0) {
304
+ debugCache(
305
+ `📍 Message cachePoint at index ${lastAssistantIndex} (string, ${content.length} chars)`
306
+ );
307
+ debugCache(
308
+ 'addBedrockCacheControl: Added cachePoint to assistant message (string content)',
309
+ {
310
+ index: lastAssistantIndex,
311
+ contentLength: content.length,
312
+ }
313
+ );
314
+ } else if (
315
+ Array.isArray(assistantMessage.content) &&
316
+ assistantMessage.content.length > 0
317
+ ) {
289
318
  // Double-check: If this message has reasoning blocks, skip adding cache point entirely
290
319
  // This handles edge cases where the initial skip check might have missed it
291
320
  if (hasReasoningBlock(assistantMessage)) {
292
- debugCache(`⚠️ Message cachePoint SKIPPED at index ${lastAssistantIndex} (has reasoning blocks)`);
293
- debugCache('addBedrockCacheControl: Skipping - assistant message has reasoning blocks (safety check)', {
294
- index: lastAssistantIndex,
295
- });
321
+ debugCache(
322
+ `⚠️ Message cachePoint SKIPPED at index ${lastAssistantIndex} (has reasoning blocks)`
323
+ );
324
+ debugCache(
325
+ 'addBedrockCacheControl: Skipping - assistant message has reasoning blocks (safety check)',
326
+ {
327
+ index: lastAssistantIndex,
328
+ }
329
+ );
296
330
  return updatedMessages;
297
331
  }
298
-
332
+
299
333
  // Find the last text block and insert cache point after it
300
334
  let inserted = false;
301
335
  for (let j = assistantMessage.content.length - 1; j >= 0; j--) {
@@ -303,37 +337,52 @@ export function addBedrockCacheControl<
303
337
  const type = (block as { type?: string }).type;
304
338
  if (type === ContentTypes.TEXT || type === 'text') {
305
339
  const text = (block as { text?: string }).text;
306
- if (text && text !== '') {
340
+ if (text != null && text !== '') {
307
341
  assistantMessage.content.splice(j + 1, 0, {
308
342
  cachePoint: { type: 'default' },
309
343
  } as MessageContentComplex);
310
344
  inserted = true;
311
- debugCache(`📍 Message cachePoint at index ${lastAssistantIndex} (array, block ${j}, ${text.length} chars)`);
312
- debugCache('addBedrockCacheControl: Added cachePoint after text block in assistant message', {
313
- index: lastAssistantIndex,
314
- textBlockIndex: j,
315
- contentLength: text.length,
316
- });
345
+ debugCache(
346
+ `📍 Message cachePoint at index ${lastAssistantIndex} (array, block ${j}, ${text.length} chars)`
347
+ );
348
+ debugCache(
349
+ 'addBedrockCacheControl: Added cachePoint after text block in assistant message',
350
+ {
351
+ index: lastAssistantIndex,
352
+ textBlockIndex: j,
353
+ contentLength: text.length,
354
+ }
355
+ );
317
356
  break;
318
357
  }
319
358
  }
320
359
  }
321
-
360
+
322
361
  // If no text block found, don't append cache point as the message structure is unexpected
323
362
  if (!inserted) {
324
- const contentTypes = assistantMessage.content.map((b) => (b as { type?: string }).type);
325
- debugCache(`⚠️ Message cachePoint SKIPPED at index ${lastAssistantIndex} (no text block, types: ${contentTypes.join(',')})`);
326
- debugCache('addBedrockCacheControl: No suitable text block found, skipping cache point', {
327
- index: lastAssistantIndex,
328
- contentTypes,
329
- });
363
+ const contentTypes = assistantMessage.content.map(
364
+ (b) => (b as { type?: string }).type
365
+ );
366
+ debugCache(
367
+ `⚠️ Message cachePoint SKIPPED at index ${lastAssistantIndex} (no text block, types: ${contentTypes.join(',')})`
368
+ );
369
+ debugCache(
370
+ 'addBedrockCacheControl: No suitable text block found, skipping cache point',
371
+ {
372
+ index: lastAssistantIndex,
373
+ contentTypes,
374
+ }
375
+ );
330
376
  }
331
377
  }
332
378
 
333
- debugCache('addBedrockCacheControl: Complete - stable prefix caching applied', {
334
- lastAssistantIndex,
335
- totalMessages: updatedMessages.length,
336
- });
379
+ debugCache(
380
+ 'addBedrockCacheControl: Complete - stable prefix caching applied',
381
+ {
382
+ lastAssistantIndex,
383
+ totalMessages: updatedMessages.length,
384
+ }
385
+ );
337
386
 
338
387
  return updatedMessages;
339
388
  }
@@ -8,7 +8,7 @@ import {
8
8
  } from '@langchain/core/messages';
9
9
  import type { ToolCall } from '@langchain/core/messages/tool';
10
10
  import type * as t from '@/types';
11
- import { Providers } from '@/common';
11
+ import { Providers, MessageTypes } from '@/common';
12
12
 
13
13
  export function getConverseOverrideMessage({
14
14
  userMessage,
@@ -346,7 +346,9 @@ export function convertMessagesToContent(
346
346
 
347
347
  export function formatAnthropicArtifactContent(messages: BaseMessage[]): void {
348
348
  const lastMessage = messages[messages.length - 1];
349
- if (!(lastMessage instanceof ToolMessage)) return;
349
+ // Use getType() instead of instanceof to avoid module mismatch issues
350
+ if (lastMessage.getType() !== 'tool') return;
351
+ const lastToolMessage = lastMessage as ToolMessage;
350
352
 
351
353
  // Find the latest AIMessage with tool_calls that this tool message belongs to
352
354
  const latestAIParentIndex = findLastIndex(
@@ -354,20 +356,21 @@ export function formatAnthropicArtifactContent(messages: BaseMessage[]): void {
354
356
  (msg) =>
355
357
  (msg instanceof AIMessageChunk &&
356
358
  (msg.tool_calls?.length ?? 0) > 0 &&
357
- msg.tool_calls?.some((tc) => tc.id === lastMessage.tool_call_id)) ??
359
+ msg.tool_calls?.some((tc) => tc.id === lastToolMessage.tool_call_id)) ??
358
360
  false
359
361
  );
360
362
 
361
363
  if (latestAIParentIndex === -1) return;
362
364
 
363
365
  // Check if any tool message after the AI message has array artifact content
366
+ // Use getType() instead of instanceof to avoid module mismatch issues
364
367
  const hasArtifactContent = messages.some(
365
368
  (msg, i) =>
366
369
  i > latestAIParentIndex &&
367
- msg instanceof ToolMessage &&
368
- msg.artifact != null &&
369
- msg.artifact?.content != null &&
370
- Array.isArray(msg.artifact.content)
370
+ msg.getType() === MessageTypes.TOOL &&
371
+ (msg as ToolMessage).artifact != null &&
372
+ (msg as ToolMessage).artifact?.content != null &&
373
+ Array.isArray((msg as ToolMessage).artifact.content)
371
374
  );
372
375
 
373
376
  if (!hasArtifactContent) return;
@@ -377,21 +380,26 @@ export function formatAnthropicArtifactContent(messages: BaseMessage[]): void {
377
380
 
378
381
  for (let j = latestAIParentIndex + 1; j < messages.length; j++) {
379
382
  const msg = messages[j];
383
+ // Use getType() instead of instanceof to avoid module mismatch issues
380
384
  if (
381
- msg instanceof ToolMessage &&
382
- toolCallIds.includes(msg.tool_call_id) &&
383
- msg.artifact != null &&
384
- Array.isArray(msg.artifact?.content) &&
385
+ msg.getType() === MessageTypes.TOOL &&
386
+ toolCallIds.includes((msg as ToolMessage).tool_call_id) &&
387
+ (msg as ToolMessage).artifact != null &&
388
+ Array.isArray((msg as ToolMessage).artifact?.content) &&
385
389
  Array.isArray(msg.content)
386
390
  ) {
387
- msg.content = msg.content.concat(msg.artifact.content);
391
+ msg.content = (msg.content as t.MessageContentComplex[]).concat(
392
+ (msg as ToolMessage).artifact.content
393
+ );
388
394
  }
389
395
  }
390
396
  }
391
397
 
392
398
  export function formatArtifactPayload(messages: BaseMessage[]): void {
393
399
  const lastMessageY = messages[messages.length - 1];
394
- if (!(lastMessageY instanceof ToolMessage)) return;
400
+ // Use getType() instead of instanceof to avoid module mismatch issues
401
+ if (lastMessageY.getType() !== 'tool') return;
402
+ const lastToolMessage = lastMessageY as ToolMessage;
395
403
 
396
404
  // Find the latest AIMessage with tool_calls that this tool message belongs to
397
405
  const latestAIParentIndex = findLastIndex(
@@ -399,28 +407,30 @@ export function formatArtifactPayload(messages: BaseMessage[]): void {
399
407
  (msg) =>
400
408
  (msg instanceof AIMessageChunk &&
401
409
  (msg.tool_calls?.length ?? 0) > 0 &&
402
- msg.tool_calls?.some((tc) => tc.id === lastMessageY.tool_call_id)) ??
410
+ msg.tool_calls?.some((tc) => tc.id === lastToolMessage.tool_call_id)) ??
403
411
  false
404
412
  );
405
413
 
406
414
  if (latestAIParentIndex === -1) return;
407
415
 
408
416
  // Check if any tool message after the AI message has array artifact content
417
+ // Use getType() instead of instanceof to avoid module mismatch issues
409
418
  const hasArtifactContent = messages.some(
410
419
  (msg, i) =>
411
420
  i > latestAIParentIndex &&
412
- msg instanceof ToolMessage &&
413
- msg.artifact != null &&
414
- msg.artifact?.content != null &&
415
- Array.isArray(msg.artifact.content)
421
+ msg.getType() === MessageTypes.TOOL &&
422
+ (msg as ToolMessage).artifact != null &&
423
+ (msg as ToolMessage).artifact?.content != null &&
424
+ Array.isArray((msg as ToolMessage).artifact.content)
416
425
  );
417
426
 
418
427
  if (!hasArtifactContent) return;
419
428
 
420
429
  // Collect all relevant tool messages and their artifacts
430
+ // Use getType() instead of instanceof to avoid module mismatch issues
421
431
  const relevantMessages = messages
422
432
  .slice(latestAIParentIndex + 1)
423
- .filter((msg) => msg instanceof ToolMessage) as ToolMessage[];
433
+ .filter((msg) => msg.getType() === MessageTypes.TOOL) as ToolMessage[];
424
434
 
425
435
  // Aggregate all content and artifacts
426
436
  const aggregatedContent: t.MessageContentComplex[] = [];
@@ -19,7 +19,8 @@ import type {
19
19
  TPayload,
20
20
  TMessage,
21
21
  } from '@/types';
22
- import { Providers, ContentTypes } from '@/common';
22
+ import { Providers, ContentTypes, MessageTypes } from '@/common';
23
+ import { processToolOutput } from '@/utils/toonFormat';
23
24
 
24
25
  interface MediaMessageParams {
25
26
  message: {
@@ -359,11 +360,16 @@ function formatAssistantMessage(
359
360
  }
360
361
  lastAIMessage.tool_calls.push(tool_call as ToolCall);
361
362
 
363
+ // Apply TOON compression to historical tool outputs for context efficiency
364
+ // processToolOutput handles: JSON→TOON conversion, already-TOON detection (skip), truncation
365
+ const processedOutput =
366
+ output != null ? processToolOutput(output).content : '';
367
+
362
368
  formattedMessages.push(
363
369
  new ToolMessage({
364
370
  tool_call_id: tool_call.id ?? '',
365
371
  name: tool_call.name,
366
- content: output != null ? output : '',
372
+ content: processedOutput,
367
373
  })
368
374
  );
369
375
  } else if (part.type === ContentTypes.THINK) {
@@ -898,7 +904,12 @@ export function ensureThinkingBlockInMessages(
898
904
  let j = i + 1;
899
905
 
900
906
  // Look ahead for tool messages that belong to this AI message
901
- while (j < messages.length && messages[j] instanceof ToolMessage) {
907
+ // Use getType() instead of instanceof to avoid module mismatch issues
908
+ // where different copies of ToolMessage class might be loaded
909
+ while (
910
+ j < messages.length &&
911
+ messages[j].getType() === MessageTypes.TOOL
912
+ ) {
902
913
  toolSequence.push(messages[j]);
903
914
  j++;
904
915
  }