@librechat/agents 3.0.775 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/cjs/graphs/Graph.cjs +19 -5
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/llm/bedrock/index.cjs +98 -25
  4. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  5. package/dist/cjs/messages/cache.cjs +27 -77
  6. package/dist/cjs/messages/cache.cjs.map +1 -1
  7. package/dist/cjs/messages/core.cjs +1 -1
  8. package/dist/cjs/messages/core.cjs.map +1 -1
  9. package/dist/cjs/stream.cjs +4 -2
  10. package/dist/cjs/stream.cjs.map +1 -1
  11. package/dist/cjs/tools/ToolNode.cjs +9 -5
  12. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  13. package/dist/esm/graphs/Graph.mjs +19 -5
  14. package/dist/esm/graphs/Graph.mjs.map +1 -1
  15. package/dist/esm/llm/bedrock/index.mjs +97 -24
  16. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  17. package/dist/esm/messages/cache.mjs +27 -77
  18. package/dist/esm/messages/cache.mjs.map +1 -1
  19. package/dist/esm/messages/core.mjs +1 -1
  20. package/dist/esm/messages/core.mjs.map +1 -1
  21. package/dist/esm/stream.mjs +4 -2
  22. package/dist/esm/stream.mjs.map +1 -1
  23. package/dist/esm/tools/ToolNode.mjs +9 -5
  24. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  25. package/dist/types/llm/bedrock/index.d.ts +86 -7
  26. package/dist/types/llm/bedrock/types.d.ts +27 -0
  27. package/dist/types/llm/bedrock/utils/index.d.ts +5 -0
  28. package/dist/types/llm/bedrock/utils/message_inputs.d.ts +31 -0
  29. package/dist/types/llm/bedrock/utils/message_outputs.d.ts +33 -0
  30. package/dist/types/types/tools.d.ts +2 -0
  31. package/package.json +7 -4
  32. package/src/graphs/Graph.ts +23 -5
  33. package/src/llm/bedrock/index.ts +180 -43
  34. package/src/llm/bedrock/llm.spec.ts +616 -0
  35. package/src/llm/bedrock/types.ts +51 -0
  36. package/src/llm/bedrock/utils/index.ts +18 -0
  37. package/src/llm/bedrock/utils/message_inputs.ts +563 -0
  38. package/src/llm/bedrock/utils/message_outputs.ts +310 -0
  39. package/src/messages/cache.test.ts +6 -12
  40. package/src/messages/cache.ts +48 -107
  41. package/src/messages/core.ts +1 -1
  42. package/src/scripts/code_exec_multi_session.ts +241 -0
  43. package/src/scripts/thinking-bedrock.ts +159 -0
  44. package/src/scripts/thinking.ts +39 -18
  45. package/src/scripts/tools.ts +7 -3
  46. package/src/specs/cache.simple.test.ts +396 -0
  47. package/src/stream.ts +4 -2
  48. package/src/tools/ToolNode.ts +9 -5
  49. package/src/types/tools.ts +2 -0
@@ -0,0 +1,396 @@
1
+ /* eslint-disable no-console */
2
+ /* eslint-disable @typescript-eslint/no-explicit-any */
3
+ import { config } from 'dotenv';
4
+ config();
5
+ import { Calculator } from '@/tools/Calculator';
6
+ import {
7
+ AIMessage,
8
+ BaseMessage,
9
+ HumanMessage,
10
+ UsageMetadata,
11
+ } from '@langchain/core/messages';
12
+ import type * as t from '@/types';
13
+ import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
14
+ import { ModelEndHandler, ToolEndHandler } from '@/events';
15
+ import { capitalizeFirstLetter } from './spec.utils';
16
+ import { GraphEvents, Providers } from '@/common';
17
+ import { getLLMConfig } from '@/utils/llmConfig';
18
+ import { getArgs } from '@/scripts/args';
19
+ import { Run } from '@/run';
20
+
21
+ /**
22
+ * These tests verify that prompt caching works correctly across multi-turn
23
+ * conversations and that messages are not mutated in place.
24
+ */
25
+ describe('Prompt Caching Integration Tests', () => {
26
+ jest.setTimeout(120000);
27
+
28
+ const setupTest = (): {
29
+ collectedUsage: UsageMetadata[];
30
+ contentParts: Array<t.MessageContentComplex | undefined>;
31
+ customHandlers: Record<string | GraphEvents, t.EventHandler>;
32
+ } => {
33
+ const collectedUsage: UsageMetadata[] = [];
34
+ const { contentParts, aggregateContent } = createContentAggregator();
35
+
36
+ const customHandlers: Record<string | GraphEvents, t.EventHandler> = {
37
+ [GraphEvents.TOOL_END]: new ToolEndHandler(),
38
+ [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage),
39
+ [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
40
+ [GraphEvents.ON_RUN_STEP_COMPLETED]: {
41
+ handle: (
42
+ event: GraphEvents.ON_RUN_STEP_COMPLETED,
43
+ data: t.StreamEventData
44
+ ): void => {
45
+ aggregateContent({
46
+ event,
47
+ data: data as unknown as { result: t.ToolEndEvent },
48
+ });
49
+ },
50
+ },
51
+ [GraphEvents.ON_RUN_STEP]: {
52
+ handle: (
53
+ event: GraphEvents.ON_RUN_STEP,
54
+ data: t.StreamEventData
55
+ ): void => {
56
+ aggregateContent({ event, data: data as t.RunStep });
57
+ },
58
+ },
59
+ [GraphEvents.ON_RUN_STEP_DELTA]: {
60
+ handle: (
61
+ event: GraphEvents.ON_RUN_STEP_DELTA,
62
+ data: t.StreamEventData
63
+ ): void => {
64
+ aggregateContent({ event, data: data as t.RunStepDeltaEvent });
65
+ },
66
+ },
67
+ [GraphEvents.ON_MESSAGE_DELTA]: {
68
+ handle: (
69
+ event: GraphEvents.ON_MESSAGE_DELTA,
70
+ data: t.StreamEventData
71
+ ): void => {
72
+ aggregateContent({ event, data: data as t.MessageDeltaEvent });
73
+ },
74
+ },
75
+ };
76
+
77
+ return { collectedUsage, contentParts, customHandlers };
78
+ };
79
+
80
+ const streamConfig = {
81
+ configurable: { thread_id: 'cache-test-thread' },
82
+ streamMode: 'values',
83
+ version: 'v2' as const,
84
+ };
85
+
86
+ describe('Anthropic Prompt Caching', () => {
87
+ const provider = Providers.ANTHROPIC;
88
+
89
+ test(`${capitalizeFirstLetter(provider)}: multi-turn conversation with caching should not corrupt messages`, async () => {
90
+ const { userName, location } = await getArgs();
91
+ const llmConfig = getLLMConfig(provider);
92
+ const { collectedUsage, customHandlers } = setupTest();
93
+
94
+ const run = await Run.create<t.IState>({
95
+ runId: 'cache-test-anthropic',
96
+ graphConfig: {
97
+ type: 'standard',
98
+ llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
99
+ tools: [new Calculator()],
100
+ instructions: 'You are a helpful assistant.',
101
+ additional_instructions: `User: ${userName}, Location: ${location}`,
102
+ },
103
+ returnContent: true,
104
+ customHandlers,
105
+ });
106
+
107
+ // Turn 1
108
+ const turn1Messages: BaseMessage[] = [
109
+ new HumanMessage('Hello, what is 2+2?'),
110
+ ];
111
+ const turn1ContentSnapshot = JSON.stringify(turn1Messages[0].content);
112
+
113
+ const turn1Result = await run.processStream(
114
+ { messages: turn1Messages },
115
+ streamConfig
116
+ );
117
+ expect(turn1Result).toBeDefined();
118
+
119
+ // Verify original message was NOT mutated
120
+ expect(JSON.stringify(turn1Messages[0].content)).toBe(
121
+ turn1ContentSnapshot
122
+ );
123
+ expect((turn1Messages[0] as any).content).not.toContain('cache_control');
124
+
125
+ const turn1RunMessages = run.getRunMessages();
126
+ expect(turn1RunMessages).toBeDefined();
127
+ expect(turn1RunMessages!.length).toBeGreaterThan(0);
128
+
129
+ // Turn 2 - build on conversation
130
+ const turn2Messages: BaseMessage[] = [
131
+ ...turn1Messages,
132
+ ...turn1RunMessages!,
133
+ new HumanMessage('Now multiply that by 10'),
134
+ ];
135
+ const turn2HumanContentSnapshot = JSON.stringify(
136
+ turn2Messages[turn2Messages.length - 1].content
137
+ );
138
+
139
+ const run2 = await Run.create<t.IState>({
140
+ runId: 'cache-test-anthropic-2',
141
+ graphConfig: {
142
+ type: 'standard',
143
+ llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
144
+ tools: [new Calculator()],
145
+ instructions: 'You are a helpful assistant.',
146
+ additional_instructions: `User: ${userName}, Location: ${location}`,
147
+ },
148
+ returnContent: true,
149
+ customHandlers,
150
+ });
151
+
152
+ const turn2Result = await run2.processStream(
153
+ { messages: turn2Messages },
154
+ streamConfig
155
+ );
156
+ expect(turn2Result).toBeDefined();
157
+
158
+ // Verify messages were NOT mutated
159
+ expect(
160
+ JSON.stringify(turn2Messages[turn2Messages.length - 1].content)
161
+ ).toBe(turn2HumanContentSnapshot);
162
+
163
+ // Check that we got cache read tokens (indicating caching worked)
164
+ console.log(`${provider} Usage:`, collectedUsage);
165
+ expect(collectedUsage.length).toBeGreaterThan(0);
166
+
167
+ console.log(
168
+ `${capitalizeFirstLetter(provider)} multi-turn caching test passed - messages not mutated`
169
+ );
170
+ });
171
+
172
+ test(`${capitalizeFirstLetter(provider)}: tool calls should work with caching enabled`, async () => {
173
+ const llmConfig = getLLMConfig(provider);
174
+ const { customHandlers } = setupTest();
175
+
176
+ const run = await Run.create<t.IState>({
177
+ runId: 'cache-test-anthropic-tools',
178
+ graphConfig: {
179
+ type: 'standard',
180
+ llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
181
+ tools: [new Calculator()],
182
+ instructions:
183
+ 'You are a math assistant. Use the calculator tool for all calculations.',
184
+ },
185
+ returnContent: true,
186
+ customHandlers,
187
+ });
188
+
189
+ const messages: BaseMessage[] = [
190
+ new HumanMessage('Calculate 123 * 456 using the calculator'),
191
+ ];
192
+
193
+ const result = await run.processStream({ messages }, streamConfig);
194
+ expect(result).toBeDefined();
195
+
196
+ const runMessages = run.getRunMessages();
197
+ expect(runMessages).toBeDefined();
198
+
199
+ // Should have used the calculator tool
200
+ const hasToolUse = runMessages?.some(
201
+ (msg) =>
202
+ msg._getType() === 'ai' &&
203
+ ((msg as AIMessage).tool_calls?.length ?? 0) > 0
204
+ );
205
+ expect(hasToolUse).toBe(true);
206
+
207
+ console.log(
208
+ `${capitalizeFirstLetter(provider)} tool call with caching test passed`
209
+ );
210
+ });
211
+ });
212
+
213
+ describe('Bedrock Prompt Caching', () => {
214
+ const provider = Providers.BEDROCK;
215
+
216
+ test(`${capitalizeFirstLetter(provider)}: multi-turn conversation with caching should not corrupt messages`, async () => {
217
+ const { userName, location } = await getArgs();
218
+ const llmConfig = getLLMConfig(provider);
219
+ const { collectedUsage, customHandlers } = setupTest();
220
+
221
+ const run = await Run.create<t.IState>({
222
+ runId: 'cache-test-bedrock',
223
+ graphConfig: {
224
+ type: 'standard',
225
+ llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
226
+ tools: [new Calculator()],
227
+ instructions: 'You are a helpful assistant.',
228
+ additional_instructions: `User: ${userName}, Location: ${location}`,
229
+ },
230
+ returnContent: true,
231
+ customHandlers,
232
+ });
233
+
234
+ // Turn 1
235
+ const turn1Messages: BaseMessage[] = [
236
+ new HumanMessage('Hello, what is 5+5?'),
237
+ ];
238
+ const turn1ContentSnapshot = JSON.stringify(turn1Messages[0].content);
239
+
240
+ const turn1Result = await run.processStream(
241
+ { messages: turn1Messages },
242
+ streamConfig
243
+ );
244
+ expect(turn1Result).toBeDefined();
245
+
246
+ // Verify original message was NOT mutated
247
+ expect(JSON.stringify(turn1Messages[0].content)).toBe(
248
+ turn1ContentSnapshot
249
+ );
250
+
251
+ const turn1RunMessages = run.getRunMessages();
252
+ expect(turn1RunMessages).toBeDefined();
253
+ expect(turn1RunMessages!.length).toBeGreaterThan(0);
254
+
255
+ // Turn 2
256
+ const turn2Messages: BaseMessage[] = [
257
+ ...turn1Messages,
258
+ ...turn1RunMessages!,
259
+ new HumanMessage('Multiply that by 3'),
260
+ ];
261
+ const turn2HumanContentSnapshot = JSON.stringify(
262
+ turn2Messages[turn2Messages.length - 1].content
263
+ );
264
+
265
+ const run2 = await Run.create<t.IState>({
266
+ runId: 'cache-test-bedrock-2',
267
+ graphConfig: {
268
+ type: 'standard',
269
+ llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
270
+ tools: [new Calculator()],
271
+ instructions: 'You are a helpful assistant.',
272
+ additional_instructions: `User: ${userName}, Location: ${location}`,
273
+ },
274
+ returnContent: true,
275
+ customHandlers,
276
+ });
277
+
278
+ const turn2Result = await run2.processStream(
279
+ { messages: turn2Messages },
280
+ streamConfig
281
+ );
282
+ expect(turn2Result).toBeDefined();
283
+
284
+ // Verify messages were NOT mutated
285
+ expect(
286
+ JSON.stringify(turn2Messages[turn2Messages.length - 1].content)
287
+ ).toBe(turn2HumanContentSnapshot);
288
+
289
+ console.log(`${provider} Usage:`, collectedUsage);
290
+ expect(collectedUsage.length).toBeGreaterThan(0);
291
+
292
+ console.log(
293
+ `${capitalizeFirstLetter(provider)} multi-turn caching test passed - messages not mutated`
294
+ );
295
+ });
296
+
297
+ test(`${capitalizeFirstLetter(provider)}: tool calls should work with caching enabled`, async () => {
298
+ const llmConfig = getLLMConfig(provider);
299
+ const { customHandlers } = setupTest();
300
+
301
+ const run = await Run.create<t.IState>({
302
+ runId: 'cache-test-bedrock-tools',
303
+ graphConfig: {
304
+ type: 'standard',
305
+ llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
306
+ tools: [new Calculator()],
307
+ instructions:
308
+ 'You are a math assistant. Use the calculator tool for all calculations.',
309
+ },
310
+ returnContent: true,
311
+ customHandlers,
312
+ });
313
+
314
+ const messages: BaseMessage[] = [
315
+ new HumanMessage('Calculate 789 * 123 using the calculator'),
316
+ ];
317
+
318
+ const result = await run.processStream({ messages }, streamConfig);
319
+ expect(result).toBeDefined();
320
+
321
+ const runMessages = run.getRunMessages();
322
+ expect(runMessages).toBeDefined();
323
+
324
+ // Should have used the calculator tool
325
+ const hasToolUse = runMessages?.some(
326
+ (msg) =>
327
+ msg._getType() === 'ai' &&
328
+ ((msg as AIMessage).tool_calls?.length ?? 0) > 0
329
+ );
330
+ expect(hasToolUse).toBe(true);
331
+
332
+ console.log(
333
+ `${capitalizeFirstLetter(provider)} tool call with caching test passed`
334
+ );
335
+ });
336
+ });
337
+
338
+ describe('Cross-provider message isolation', () => {
339
+ test('Messages processed by Anthropic should not affect Bedrock processing', async () => {
340
+ const anthropicConfig = getLLMConfig(Providers.ANTHROPIC);
341
+ const bedrockConfig = getLLMConfig(Providers.BEDROCK);
342
+ const { customHandlers: handlers1 } = setupTest();
343
+ const { customHandlers: handlers2 } = setupTest();
344
+
345
+ // Create a shared message array
346
+ const sharedMessages: BaseMessage[] = [
347
+ new HumanMessage('Hello, what is the capital of France?'),
348
+ ];
349
+ const originalContent = JSON.stringify(sharedMessages[0].content);
350
+
351
+ // Process with Anthropic first
352
+ const anthropicRun = await Run.create<t.IState>({
353
+ runId: 'cross-provider-anthropic',
354
+ graphConfig: {
355
+ type: 'standard',
356
+ llmConfig: { ...anthropicConfig, promptCache: true } as t.LLMConfig,
357
+ instructions: 'You are a helpful assistant.',
358
+ },
359
+ returnContent: true,
360
+ customHandlers: handlers1,
361
+ });
362
+
363
+ const anthropicResult = await anthropicRun.processStream(
364
+ { messages: sharedMessages },
365
+ streamConfig
366
+ );
367
+ expect(anthropicResult).toBeDefined();
368
+
369
+ // Verify message not mutated
370
+ expect(JSON.stringify(sharedMessages[0].content)).toBe(originalContent);
371
+
372
+ // Now process with Bedrock using the SAME messages
373
+ const bedrockRun = await Run.create<t.IState>({
374
+ runId: 'cross-provider-bedrock',
375
+ graphConfig: {
376
+ type: 'standard',
377
+ llmConfig: { ...bedrockConfig, promptCache: true } as t.LLMConfig,
378
+ instructions: 'You are a helpful assistant.',
379
+ },
380
+ returnContent: true,
381
+ customHandlers: handlers2,
382
+ });
383
+
384
+ const bedrockResult = await bedrockRun.processStream(
385
+ { messages: sharedMessages },
386
+ streamConfig
387
+ );
388
+ expect(bedrockResult).toBeDefined();
389
+
390
+ // Verify message STILL not mutated after both providers processed
391
+ expect(JSON.stringify(sharedMessages[0].content)).toBe(originalContent);
392
+
393
+ console.log('Cross-provider message isolation test passed');
394
+ });
395
+ });
396
+ });
package/src/stream.ts CHANGED
@@ -339,7 +339,8 @@ hasToolCallChunks: ${hasToolCallChunks}
339
339
  (c) =>
340
340
  (c.type?.startsWith(ContentTypes.THINKING) ?? false) ||
341
341
  (c.type?.startsWith(ContentTypes.REASONING) ?? false) ||
342
- (c.type?.startsWith(ContentTypes.REASONING_CONTENT) ?? false)
342
+ (c.type?.startsWith(ContentTypes.REASONING_CONTENT) ?? false) ||
343
+ c.type === 'redacted_thinking'
343
344
  )
344
345
  ) {
345
346
  await graph.dispatchReasoningDelta(stepId, {
@@ -365,7 +366,8 @@ hasToolCallChunks: ${hasToolCallChunks}
365
366
  Array.isArray(chunk.content) &&
366
367
  (chunk.content[0]?.type === ContentTypes.THINKING ||
367
368
  chunk.content[0]?.type === ContentTypes.REASONING ||
368
- chunk.content[0]?.type === ContentTypes.REASONING_CONTENT)
369
+ chunk.content[0]?.type === ContentTypes.REASONING_CONTENT ||
370
+ chunk.content[0]?.type === 'redacted_thinking')
369
371
  ) {
370
372
  reasoning_content = 'valid';
371
373
  } else if (
@@ -145,9 +145,9 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
145
145
 
146
146
  /**
147
147
  * Inject session context for code execution tools when available.
148
+ * Each file uses its own session_id (supporting multi-session file tracking).
148
149
  * Both session_id and _injected_files are injected directly to invokeParams
149
150
  * (not inside args) so they bypass Zod schema validation and reach config.toolCall.
150
- * This avoids /files endpoint race conditions.
151
151
  */
152
152
  if (
153
153
  call.name === Constants.EXECUTE_CODE ||
@@ -156,14 +156,18 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
156
156
  const codeSession = this.sessions?.get(Constants.EXECUTE_CODE) as
157
157
  | t.CodeSessionContext
158
158
  | undefined;
159
- if (codeSession?.session_id != null && codeSession.files.length > 0) {
160
- /** Convert tracked files to CodeEnvFile format for the API */
159
+ if (codeSession?.files != null && codeSession.files.length > 0) {
160
+ /**
161
+ * Convert tracked files to CodeEnvFile format for the API.
162
+ * Each file uses its own session_id (set when file was created).
163
+ * This supports files from multiple parallel/sequential executions.
164
+ */
161
165
  const fileRefs: t.CodeEnvFile[] = codeSession.files.map((file) => ({
162
- session_id: codeSession.session_id,
166
+ session_id: file.session_id ?? codeSession.session_id,
163
167
  id: file.id,
164
168
  name: file.name,
165
169
  }));
166
- /** Inject session_id and files directly - bypasses Zod, reaches config.toolCall */
170
+ /** Inject latest session_id and files - bypasses Zod, reaches config.toolCall */
167
171
  invokeParams = {
168
172
  ...invokeParams,
169
173
  session_id: codeSession.session_id,
@@ -74,6 +74,8 @@ export type FileRef = {
74
74
  id: string;
75
75
  name: string;
76
76
  path?: string;
77
+ /** Session ID this file belongs to (for multi-session file tracking) */
78
+ session_id?: string;
77
79
  };
78
80
 
79
81
  export type FileRefs = FileRef[];