@librechat/agents 3.1.97 → 3.1.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/cjs/graphs/Graph.cjs +6 -0
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/langfuseToolOutputTracing.cjs +16 -5
  4. package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
  5. package/dist/cjs/llm/bedrock/index.cjs +10 -0
  6. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  7. package/dist/cjs/llm/bedrock/toolCache.cjs +125 -0
  8. package/dist/cjs/llm/bedrock/toolCache.cjs.map +1 -0
  9. package/dist/cjs/messages/cache.cjs +17 -9
  10. package/dist/cjs/messages/cache.cjs.map +1 -1
  11. package/dist/cjs/messages/prune.cjs +45 -8
  12. package/dist/cjs/messages/prune.cjs.map +1 -1
  13. package/dist/cjs/tools/ToolNode.cjs +6 -1
  14. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  15. package/dist/esm/graphs/Graph.mjs +6 -0
  16. package/dist/esm/graphs/Graph.mjs.map +1 -1
  17. package/dist/esm/langfuseToolOutputTracing.mjs +16 -5
  18. package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
  19. package/dist/esm/llm/bedrock/index.mjs +10 -0
  20. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  21. package/dist/esm/llm/bedrock/toolCache.mjs +122 -0
  22. package/dist/esm/llm/bedrock/toolCache.mjs.map +1 -0
  23. package/dist/esm/messages/cache.mjs +17 -9
  24. package/dist/esm/messages/cache.mjs.map +1 -1
  25. package/dist/esm/messages/prune.mjs +45 -8
  26. package/dist/esm/messages/prune.mjs.map +1 -1
  27. package/dist/esm/tools/ToolNode.mjs +6 -1
  28. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  29. package/dist/types/llm/bedrock/index.d.ts +16 -0
  30. package/dist/types/llm/bedrock/toolCache.d.ts +4 -0
  31. package/dist/types/messages/cache.d.ts +2 -2
  32. package/dist/types/types/llm.d.ts +2 -2
  33. package/package.json +1 -1
  34. package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +332 -0
  35. package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +504 -0
  36. package/src/graphs/Graph.ts +14 -0
  37. package/src/langfuseToolOutputTracing.ts +26 -7
  38. package/src/llm/bedrock/index.ts +32 -1
  39. package/src/llm/bedrock/llm.spec.ts +154 -1
  40. package/src/llm/bedrock/toolCache.test.ts +131 -0
  41. package/src/llm/bedrock/toolCache.ts +191 -0
  42. package/src/messages/cache.test.ts +97 -38
  43. package/src/messages/cache.ts +18 -10
  44. package/src/messages/prune.ts +55 -17
  45. package/src/specs/langfuse-tool-output-tracing.test.ts +28 -0
  46. package/src/specs/prune.test.ts +193 -0
  47. package/src/tools/ToolNode.ts +7 -1
  48. package/src/tools/__tests__/ToolNode.langfuse.test.ts +6 -0
  49. package/src/types/llm.ts +2 -2
@@ -21,6 +21,7 @@
21
21
  * the accumulated content is already an array.
22
22
  */
23
23
  import { ChatBedrockConverse } from '@langchain/aws';
24
+ import { type GuardrailConfiguration, type GuardrailStreamConfiguration } from '@aws-sdk/client-bedrock-runtime';
24
25
  import { ChatGenerationChunk, ChatResult } from '@langchain/core/outputs';
25
26
  import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
26
27
  import type { ChatBedrockConverseInput } from '@langchain/aws';
@@ -31,12 +32,22 @@ import type { BaseMessage } from '@langchain/core/messages';
31
32
  * @see https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
32
33
  */
33
34
  export type ServiceTierType = 'priority' | 'default' | 'flex' | 'reserved';
35
+ export type CustomGuardrailConfiguration = GuardrailConfiguration & Pick<GuardrailStreamConfiguration, 'streamProcessingMode'>;
34
36
  /**
35
37
  * Extended input interface with additional features:
36
38
  * - applicationInferenceProfile: Use an inference profile ARN instead of model ID
37
39
  * - serviceTier: Specify service tier (Priority, Standard, Flex, Reserved)
38
40
  */
39
41
  export interface CustomChatBedrockConverseInput extends ChatBedrockConverseInput {
42
+ /**
43
+ * Enables Bedrock prompt cache checkpoints for message and tool prefixes.
44
+ */
45
+ promptCache?: boolean;
46
+ /**
47
+ * Guardrail configuration for Converse and ConverseStream invocations.
48
+ * `streamProcessingMode` is only used by ConverseStream.
49
+ */
50
+ guardrailConfig?: CustomGuardrailConfiguration;
40
51
  /**
41
52
  * Application Inference Profile ARN to use for the model.
42
53
  * For example, "arn:aws:bedrock:eu-west-1:123456789102:application-inference-profile/fm16bt65tzgx"
@@ -66,8 +77,13 @@ export interface CustomChatBedrockConverseInput extends ChatBedrockConverseInput
66
77
  */
67
78
  export interface CustomChatBedrockConverseCallOptions {
68
79
  serviceTier?: ServiceTierType;
80
+ guardrailConfig?: CustomGuardrailConfiguration;
69
81
  }
70
82
  export declare class CustomChatBedrockConverse extends ChatBedrockConverse {
83
+ /**
84
+ * Whether to insert Bedrock prompt cache checkpoints when available.
85
+ */
86
+ promptCache?: boolean;
71
87
  /**
72
88
  * Application Inference Profile ARN to use instead of model ID.
73
89
  */
@@ -0,0 +1,4 @@
1
+ import type { ToolConfiguration } from '@aws-sdk/client-bedrock-runtime';
2
+ import type { GraphTools } from '@/types';
3
+ export declare function partitionAndMarkBedrockToolCache(tools: GraphTools | undefined, isDeferred: (toolName: string) => boolean): GraphTools | undefined;
4
+ export declare function insertBedrockToolCachePoint(toolConfig: ToolConfiguration | undefined, fallbackToEnd: boolean): ToolConfiguration | undefined;
@@ -27,11 +27,11 @@ export declare function stripAnthropicCacheControl<T extends MessageWithContent>
27
27
  */
28
28
  export declare function stripBedrockCacheControl<T extends MessageWithContent>(messages: T[]): T[];
29
29
  /**
30
- * Adds Bedrock Converse API cache points to the last two messages.
30
+ * Adds Bedrock Converse API cache points to the latest two user messages.
31
31
  * Inserts `{ cachePoint: { type: 'default' } }` as a separate content block
32
32
  * immediately after the last text block in each targeted message.
33
33
  * Strips ALL existing cache control (both Bedrock and Anthropic formats) from all messages,
34
- * then adds fresh cache points to the last 2 messages in a single backward pass.
34
+ * then adds fresh cache points to the latest two non-tool user messages in a single backward pass.
35
35
  * This ensures we don't accumulate stale cache points across multiple turns.
36
36
  * Returns a new array - only clones messages that require modification.
37
37
  * @param messages - The array of message objects.
@@ -65,7 +65,7 @@ export type BedrockAnthropicInput = ChatBedrockConverseInput & {
65
65
  additionalModelRequestFields?: ChatBedrockConverseInput['additionalModelRequestFields'] & AnthropicReasoning;
66
66
  promptCache?: boolean;
67
67
  };
68
- export type BedrockConverseClientOptions = ChatBedrockConverseInput;
68
+ export type BedrockConverseClientOptions = BedrockAnthropicInput;
69
69
  export type BedrockAnthropicClientOptions = BedrockAnthropicInput;
70
70
  export type GoogleClientOptions = GoogleGenerativeAIChatInput & {
71
71
  customHeaders?: RequestOptions['customHeaders'];
@@ -95,7 +95,7 @@ export type ProviderOptionsMap = {
95
95
  [Providers.MISTRALAI]: MistralAIClientOptions;
96
96
  [Providers.MISTRAL]: MistralAIClientOptions;
97
97
  [Providers.OPENROUTER]: ChatOpenRouterCallOptions;
98
- [Providers.BEDROCK]: BedrockConverseClientOptions;
98
+ [Providers.BEDROCK]: BedrockAnthropicClientOptions;
99
99
  [Providers.XAI]: XAIClientOptions;
100
100
  [Providers.MOONSHOT]: OpenAIClientOptions;
101
101
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@librechat/agents",
3
- "version": "3.1.97",
3
+ "version": "3.1.99",
4
4
  "main": "./dist/cjs/main.cjs",
5
5
  "module": "./dist/esm/main.mjs",
6
6
  "types": "./dist/types/index.d.ts",
@@ -9,6 +9,14 @@ import { config as dotenvConfig } from 'dotenv';
9
9
  dotenvConfig();
10
10
 
11
11
  import { describe, expect, it } from '@jest/globals';
12
+ import {
13
+ AIMessage,
14
+ BaseMessage,
15
+ HumanMessage,
16
+ SystemMessage,
17
+ ToolMessage,
18
+ type MessageContentComplex,
19
+ } from '@langchain/core/messages';
12
20
  import type * as t from '@/types';
13
21
  import {
14
22
  runLiveTurn,
@@ -18,6 +26,9 @@ import {
18
26
  waitForCachePropagation,
19
27
  } from './promptCacheLiveHelpers';
20
28
  import { Providers } from '@/common';
29
+ import { addCacheControl } from '@/messages/cache';
30
+ import { toLangChainContent } from '@/messages/langchain';
31
+ import { _convertMessagesToAnthropicPayload } from '@/llm/anthropic/utils/message_inputs';
21
32
 
22
33
  const shouldRunLive =
23
34
  process.env.RUN_ANTHROPIC_PROMPT_CACHE_LIVE_TESTS === '1' &&
@@ -46,6 +57,268 @@ function createClientOptions(): t.AnthropicClientOptions {
46
57
  };
47
58
  }
48
59
 
60
+ type AnthropicCacheUsage = {
61
+ inputTokens: number;
62
+ outputTokens: number;
63
+ cacheCreation: number;
64
+ cacheRead: number;
65
+ latencyMs: number;
66
+ };
67
+
68
+ type AnthropicUsageResponse = {
69
+ usage?: {
70
+ input_tokens?: number;
71
+ output_tokens?: number;
72
+ cache_creation_input_tokens?: number | null;
73
+ cache_read_input_tokens?: number | null;
74
+ };
75
+ };
76
+
77
+ type AnthropicMessagesClient = {
78
+ messages: {
79
+ create: (
80
+ request: Record<string, unknown>,
81
+ options: { headers: Record<string, string> }
82
+ ) => Promise<AnthropicUsageResponse>;
83
+ };
84
+ };
85
+
86
+ const benchmarkTool = {
87
+ name: 'lookup_cache_probe',
88
+ description: 'Returns prompt cache benchmark data.',
89
+ input_schema: {
90
+ type: 'object',
91
+ properties: {
92
+ step: { type: 'integer' },
93
+ },
94
+ required: ['step'],
95
+ },
96
+ };
97
+
98
+ function cloneLiveMessage(
99
+ message: BaseMessage,
100
+ content: MessageContentComplex[]
101
+ ): BaseMessage {
102
+ const baseParams = {
103
+ content: toLangChainContent(content),
104
+ additional_kwargs: { ...message.additional_kwargs },
105
+ response_metadata: { ...message.response_metadata },
106
+ id: message.id,
107
+ name: message.name,
108
+ };
109
+
110
+ const messageType = message.getType();
111
+ if (messageType === 'ai') {
112
+ return new AIMessage({
113
+ ...baseParams,
114
+ tool_calls: (message as AIMessage).tool_calls,
115
+ });
116
+ }
117
+ if (messageType === 'human') {
118
+ return new HumanMessage(baseParams);
119
+ }
120
+ if (messageType === 'system') {
121
+ return new SystemMessage(baseParams);
122
+ }
123
+ if (messageType === 'tool') {
124
+ return new ToolMessage({
125
+ ...baseParams,
126
+ tool_call_id: (message as ToolMessage).tool_call_id,
127
+ });
128
+ }
129
+
130
+ return message;
131
+ }
132
+
133
+ function addLatestUserOnlyAnthropicCacheControl(
134
+ messages: BaseMessage[]
135
+ ): BaseMessage[] {
136
+ const updatedMessages = [...messages];
137
+ let addedCacheControl = false;
138
+
139
+ for (let i = updatedMessages.length - 1; i >= 0; i--) {
140
+ const message = updatedMessages[i];
141
+ const content = message.content;
142
+ const hasArrayContent = Array.isArray(content);
143
+ const canAddCache =
144
+ !addedCacheControl &&
145
+ message.getType() === 'human' &&
146
+ (typeof content === 'string' || hasArrayContent);
147
+
148
+ if (!canAddCache && !hasArrayContent) {
149
+ continue;
150
+ }
151
+
152
+ let workingContent: MessageContentComplex[];
153
+ let modified = false;
154
+
155
+ if (hasArrayContent) {
156
+ workingContent = [];
157
+ let lastTextIndex = -1;
158
+ for (const block of content as MessageContentComplex[]) {
159
+ if ('cachePoint' in block && !('type' in block)) {
160
+ modified = true;
161
+ continue;
162
+ }
163
+ const cloned = { ...block };
164
+ if ('cache_control' in cloned) {
165
+ delete (cloned as Record<string, unknown>).cache_control;
166
+ modified = true;
167
+ }
168
+ if ('type' in cloned && cloned.type === 'text') {
169
+ const text = (cloned as { text?: string }).text;
170
+ if (text != null && text.trim() !== '') {
171
+ lastTextIndex = workingContent.length;
172
+ }
173
+ }
174
+ workingContent.push(cloned as MessageContentComplex);
175
+ }
176
+
177
+ if (canAddCache && lastTextIndex >= 0) {
178
+ (
179
+ workingContent[lastTextIndex] as MessageContentComplex & {
180
+ cache_control?: { type: 'ephemeral' };
181
+ }
182
+ ).cache_control = { type: 'ephemeral' };
183
+ addedCacheControl = true;
184
+ modified = true;
185
+ }
186
+
187
+ if (!modified) {
188
+ continue;
189
+ }
190
+ } else if (typeof content === 'string' && content.trim() !== '' && canAddCache) {
191
+ workingContent = [
192
+ {
193
+ type: 'text',
194
+ text: content,
195
+ cache_control: { type: 'ephemeral' },
196
+ },
197
+ ] as unknown as MessageContentComplex[];
198
+ addedCacheControl = true;
199
+ } else {
200
+ continue;
201
+ }
202
+
203
+ updatedMessages[i] = cloneLiveMessage(message, workingContent);
204
+ }
205
+
206
+ return updatedMessages;
207
+ }
208
+
209
+ function repeated(label: string, count: number): string {
210
+ return Array.from(
211
+ { length: count },
212
+ (_, index) =>
213
+ `${label} reference ${index}: stable schema, metric definition, access policy, dashboard note, and query planning guidance.`
214
+ ).join('\n');
215
+ }
216
+
217
+ function buildMultiTurnToolMessages({
218
+ nonce,
219
+ marker,
220
+ }: {
221
+ nonce: string;
222
+ marker: string;
223
+ }): BaseMessage[] {
224
+ const stableFirstUser = [
225
+ `Anthropic prompt cache placement benchmark ${nonce}.`,
226
+ 'This first user turn is intentionally stable across calls in the same benchmark case.',
227
+ repeated(`${nonce} stable-user-context`, 190),
228
+ ].join('\n');
229
+ const latestUser = [
230
+ `Current user request marker: ${marker}.`,
231
+ 'Use the final tool result to answer with the marker only.',
232
+ repeated(`${nonce} latest-user-${marker}`, 18),
233
+ ].join('\n');
234
+ const volatileToolPayload = repeated(`${nonce} volatile-tool-${marker}`, 70);
235
+
236
+ return [
237
+ new HumanMessage(stableFirstUser),
238
+ new AIMessage('I will keep this stable context in mind.'),
239
+ new HumanMessage(latestUser),
240
+ new AIMessage({
241
+ content: `I will inspect cache probe step 1 for ${marker}.\n${volatileToolPayload}`,
242
+ tool_calls: [
243
+ {
244
+ id: `call_${marker}_1`,
245
+ name: 'lookup_cache_probe',
246
+ args: { step: 1 },
247
+ },
248
+ ],
249
+ }),
250
+ new ToolMessage({
251
+ content: `Tool result 1 for ${marker}.\n${volatileToolPayload}`,
252
+ tool_call_id: `call_${marker}_1`,
253
+ }),
254
+ new AIMessage({
255
+ content: `I will inspect cache probe step 2 for ${marker}.\n${volatileToolPayload}`,
256
+ tool_calls: [
257
+ {
258
+ id: `call_${marker}_2`,
259
+ name: 'lookup_cache_probe',
260
+ args: { step: 2 },
261
+ },
262
+ ],
263
+ }),
264
+ new ToolMessage({
265
+ content: [
266
+ `Final tool result marker: ${marker}.`,
267
+ 'Reply with the marker and no extra explanation.',
268
+ volatileToolPayload,
269
+ ].join('\n'),
270
+ tool_call_id: `call_${marker}_2`,
271
+ }),
272
+ ];
273
+ }
274
+
275
+ function extractCacheUsage(
276
+ response: AnthropicUsageResponse,
277
+ latencyMs: number
278
+ ): AnthropicCacheUsage {
279
+ if (response.usage == null) {
280
+ throw new Error('Missing Anthropic usage metadata for cache benchmark');
281
+ }
282
+
283
+ return {
284
+ inputTokens: response.usage.input_tokens ?? 0,
285
+ outputTokens: response.usage.output_tokens ?? 0,
286
+ cacheCreation: response.usage.cache_creation_input_tokens ?? 0,
287
+ cacheRead: response.usage.cache_read_input_tokens ?? 0,
288
+ latencyMs,
289
+ };
290
+ }
291
+
292
+ async function runAnthropicCacheBenchmarkTurn({
293
+ client,
294
+ messages,
295
+ }: {
296
+ client: AnthropicMessagesClient;
297
+ messages: BaseMessage[];
298
+ }): Promise<AnthropicCacheUsage> {
299
+ const payload = _convertMessagesToAnthropicPayload(messages);
300
+ const startedAt = Date.now();
301
+ const response = await client.messages.create(
302
+ {
303
+ ...payload,
304
+ model: modelName,
305
+ max_tokens: 16,
306
+ temperature: 0,
307
+ tools: [benchmarkTool],
308
+ },
309
+ {
310
+ headers: {
311
+ 'anthropic-beta': 'prompt-caching-2024-07-31',
312
+ },
313
+ }
314
+ );
315
+
316
+ return extractCacheUsage(
317
+ response as AnthropicUsageResponse,
318
+ Date.now() - startedAt
319
+ );
320
+ }
321
+
49
322
  describeIfLive('AgentContext Anthropic prompt cache live API', () => {
50
323
  it('caches only the stable system prefix while dynamic tail changes', async () => {
51
324
  const nonce = `agent-cache-live-${Date.now()}`;
@@ -109,4 +382,63 @@ describeIfLive('AgentContext Anthropic prompt cache live API', () => {
109
382
  expect(second.text.toLowerCase()).toContain('bravo');
110
383
  expect(second.usage.input_token_details?.cache_read).toBeGreaterThan(0);
111
384
  }, 120_000);
385
+
386
+ it('compares current two-user cache placement against latest-user-only', async () => {
387
+ const Anthropic = (await import('@anthropic-ai/sdk')).default;
388
+ const client = new Anthropic({
389
+ apiKey: process.env.ANTHROPIC_API_KEY,
390
+ }) as unknown as AnthropicMessagesClient;
391
+ const nonce = `anthropic-cache-placement-${Date.now()}`;
392
+ const currentNonce = `${nonce}-current`;
393
+ const latestOnlyNonce = `${nonce}-latest-only`;
394
+
395
+ const currentFirst = await runAnthropicCacheBenchmarkTurn({
396
+ client,
397
+ messages: addCacheControl(
398
+ buildMultiTurnToolMessages({ nonce: currentNonce, marker: 'alpha' })
399
+ ),
400
+ });
401
+
402
+ await waitForCachePropagation();
403
+
404
+ const currentSecond = await runAnthropicCacheBenchmarkTurn({
405
+ client,
406
+ messages: addCacheControl(
407
+ buildMultiTurnToolMessages({ nonce: currentNonce, marker: 'bravo' })
408
+ ),
409
+ });
410
+
411
+ const latestOnlyFirst = await runAnthropicCacheBenchmarkTurn({
412
+ client,
413
+ messages: addLatestUserOnlyAnthropicCacheControl(
414
+ buildMultiTurnToolMessages({ nonce: latestOnlyNonce, marker: 'alpha' })
415
+ ),
416
+ });
417
+
418
+ await waitForCachePropagation();
419
+
420
+ const latestOnlySecond = await runAnthropicCacheBenchmarkTurn({
421
+ client,
422
+ messages: addLatestUserOnlyAnthropicCacheControl(
423
+ buildMultiTurnToolMessages({ nonce: latestOnlyNonce, marker: 'bravo' })
424
+ ),
425
+ });
426
+
427
+ process.stdout.write(
428
+ `Anthropic cache placement benchmark ${JSON.stringify({
429
+ currentFirst,
430
+ currentSecond,
431
+ latestOnlyFirst,
432
+ latestOnlySecond,
433
+ cacheWriteDelta:
434
+ currentSecond.cacheCreation - latestOnlySecond.cacheCreation,
435
+ })}\n`
436
+ );
437
+
438
+ expect(currentSecond.cacheRead).toBeGreaterThan(0);
439
+ expect(currentSecond.cacheRead).toBeGreaterThan(latestOnlySecond.cacheRead);
440
+ expect(currentSecond.cacheCreation).toBeLessThan(
441
+ latestOnlySecond.cacheCreation
442
+ );
443
+ }, 180_000);
112
444
  });