@librechat/agents 3.1.97 → 3.1.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/cjs/graphs/Graph.cjs +6 -0
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/langfuseToolOutputTracing.cjs +16 -5
  4. package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
  5. package/dist/cjs/llm/bedrock/index.cjs +10 -0
  6. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  7. package/dist/cjs/llm/bedrock/toolCache.cjs +125 -0
  8. package/dist/cjs/llm/bedrock/toolCache.cjs.map +1 -0
  9. package/dist/cjs/messages/cache.cjs +17 -9
  10. package/dist/cjs/messages/cache.cjs.map +1 -1
  11. package/dist/cjs/messages/prune.cjs +45 -8
  12. package/dist/cjs/messages/prune.cjs.map +1 -1
  13. package/dist/cjs/tools/ToolNode.cjs +6 -1
  14. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  15. package/dist/esm/graphs/Graph.mjs +6 -0
  16. package/dist/esm/graphs/Graph.mjs.map +1 -1
  17. package/dist/esm/langfuseToolOutputTracing.mjs +16 -5
  18. package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
  19. package/dist/esm/llm/bedrock/index.mjs +10 -0
  20. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  21. package/dist/esm/llm/bedrock/toolCache.mjs +122 -0
  22. package/dist/esm/llm/bedrock/toolCache.mjs.map +1 -0
  23. package/dist/esm/messages/cache.mjs +17 -9
  24. package/dist/esm/messages/cache.mjs.map +1 -1
  25. package/dist/esm/messages/prune.mjs +45 -8
  26. package/dist/esm/messages/prune.mjs.map +1 -1
  27. package/dist/esm/tools/ToolNode.mjs +6 -1
  28. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  29. package/dist/types/llm/bedrock/index.d.ts +16 -0
  30. package/dist/types/llm/bedrock/toolCache.d.ts +4 -0
  31. package/dist/types/messages/cache.d.ts +2 -2
  32. package/dist/types/types/llm.d.ts +2 -2
  33. package/package.json +1 -1
  34. package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +332 -0
  35. package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +504 -0
  36. package/src/graphs/Graph.ts +14 -0
  37. package/src/langfuseToolOutputTracing.ts +26 -7
  38. package/src/llm/bedrock/index.ts +32 -1
  39. package/src/llm/bedrock/llm.spec.ts +154 -1
  40. package/src/llm/bedrock/toolCache.test.ts +131 -0
  41. package/src/llm/bedrock/toolCache.ts +191 -0
  42. package/src/messages/cache.test.ts +97 -38
  43. package/src/messages/cache.ts +18 -10
  44. package/src/messages/prune.ts +55 -17
  45. package/src/specs/langfuse-tool-output-tracing.test.ts +28 -0
  46. package/src/specs/prune.test.ts +193 -0
  47. package/src/tools/ToolNode.ts +7 -1
  48. package/src/tools/__tests__/ToolNode.langfuse.test.ts +6 -0
  49. package/src/types/llm.ts +2 -2
@@ -11,6 +11,18 @@ import { config as dotenvConfig } from 'dotenv';
11
11
  dotenvConfig();
12
12
 
13
13
  import { describe, expect, it } from '@jest/globals';
14
+ import {
15
+ AIMessage,
16
+ BaseMessage,
17
+ HumanMessage,
18
+ SystemMessage,
19
+ ToolMessage,
20
+ type MessageContentComplex,
21
+ } from '@langchain/core/messages';
22
+ import {
23
+ BedrockRuntimeClient,
24
+ ConverseCommand,
25
+ } from '@aws-sdk/client-bedrock-runtime';
14
26
  import type * as t from '@/types';
15
27
  import {
16
28
  runLiveTurn,
@@ -20,6 +32,9 @@ import {
20
32
  waitForCachePropagation,
21
33
  } from './promptCacheLiveHelpers';
22
34
  import { Providers } from '@/common';
35
+ import { addBedrockCacheControl } from '@/messages/cache';
36
+ import { toLangChainContent } from '@/messages/langchain';
37
+ import { convertToConverseMessages } from '@/llm/bedrock/utils';
23
38
 
24
39
  const accessKeyId =
25
40
  process.env.BEDROCK_AWS_ACCESS_KEY_ID ?? process.env.AWS_ACCESS_KEY_ID;
@@ -77,6 +92,373 @@ function createClientOptions(): t.BedrockAnthropicClientOptions {
77
92
  };
78
93
  }
79
94
 
95
+ type BedrockCacheUsage = {
96
+ inputTokens: number;
97
+ outputTokens: number;
98
+ totalTokens: number;
99
+ cacheCreation: number;
100
+ cacheRead: number;
101
+ latencyMs: number;
102
+ };
103
+
104
+ type ConverseUsageResponse = {
105
+ usage?: {
106
+ inputTokens?: number;
107
+ outputTokens?: number;
108
+ totalTokens?: number;
109
+ cacheReadInputTokens?: number;
110
+ cacheWriteInputTokens?: number;
111
+ };
112
+ };
113
+
114
+ const benchmarkToolConfig = {
115
+ tools: [
116
+ {
117
+ toolSpec: {
118
+ name: 'lookup_cache_probe',
119
+ description: 'Returns prompt cache benchmark data.',
120
+ inputSchema: {
121
+ json: {
122
+ type: 'object',
123
+ properties: {
124
+ step: { type: 'integer' },
125
+ },
126
+ required: ['step'],
127
+ },
128
+ },
129
+ },
130
+ },
131
+ ],
132
+ };
133
+
134
+ function cachePointBlock(): MessageContentComplex {
135
+ return { cachePoint: { type: 'default' } } as MessageContentComplex;
136
+ }
137
+
138
+ function stripCacheMarkers(
139
+ content: MessageContentComplex[]
140
+ ): MessageContentComplex[] {
141
+ return content
142
+ .filter((block) => !('cachePoint' in block && !('type' in block)))
143
+ .map((block) => {
144
+ const cloned = { ...block };
145
+ delete (cloned as Record<string, unknown>).cache_control;
146
+ return cloned as MessageContentComplex;
147
+ });
148
+ }
149
+
150
+ function cloneLiveMessage(
151
+ message: BaseMessage,
152
+ content: MessageContentComplex[]
153
+ ): BaseMessage {
154
+ const baseParams = {
155
+ content: toLangChainContent(content),
156
+ additional_kwargs: { ...message.additional_kwargs },
157
+ response_metadata: { ...message.response_metadata },
158
+ id: message.id,
159
+ name: message.name,
160
+ };
161
+
162
+ const messageType = message.getType();
163
+ if (messageType === 'ai') {
164
+ return new AIMessage({
165
+ ...baseParams,
166
+ tool_calls: (message as AIMessage).tool_calls,
167
+ });
168
+ }
169
+ if (messageType === 'human') {
170
+ return new HumanMessage(baseParams);
171
+ }
172
+ if (messageType === 'system') {
173
+ return new SystemMessage(baseParams);
174
+ }
175
+ if (messageType === 'tool') {
176
+ return new ToolMessage({
177
+ ...baseParams,
178
+ tool_call_id: (message as ToolMessage).tool_call_id,
179
+ });
180
+ }
181
+
182
+ return message;
183
+ }
184
+
185
+ function addLegacyMovingTailBedrockCacheControl(
186
+ messages: BaseMessage[]
187
+ ): BaseMessage[] {
188
+ const updatedMessages = [...messages];
189
+ let messagesModified = 0;
190
+
191
+ for (let i = updatedMessages.length - 1; i >= 0; i--) {
192
+ const message = updatedMessages[i];
193
+ const messageType = message.getType();
194
+ if (messageType === 'system' || messageType === 'tool') {
195
+ continue;
196
+ }
197
+
198
+ const content = message.content;
199
+ if (typeof content === 'string') {
200
+ if (content === '' || messagesModified >= 2) {
201
+ continue;
202
+ }
203
+ updatedMessages[i] = cloneLiveMessage(message, [
204
+ { type: 'text', text: content } as MessageContentComplex,
205
+ cachePointBlock(),
206
+ ]);
207
+ messagesModified++;
208
+ continue;
209
+ }
210
+
211
+ if (!Array.isArray(content)) {
212
+ continue;
213
+ }
214
+
215
+ const workingContent = stripCacheMarkers(
216
+ content as MessageContentComplex[]
217
+ );
218
+ const lastTextIndex = workingContent.findLastIndex((block) => {
219
+ const type = (block as { type?: string }).type;
220
+ const text = (block as { text?: string }).text;
221
+ return (type === 'text' || type === 'input_text') && text?.trim() !== '';
222
+ });
223
+
224
+ if (messagesModified < 2 && lastTextIndex >= 0) {
225
+ workingContent.splice(lastTextIndex + 1, 0, cachePointBlock());
226
+ messagesModified++;
227
+ }
228
+
229
+ updatedMessages[i] = cloneLiveMessage(message, workingContent);
230
+ }
231
+
232
+ return updatedMessages;
233
+ }
234
+
235
+ function addLatestUserOnlyBedrockCacheControl(
236
+ messages: BaseMessage[]
237
+ ): BaseMessage[] {
238
+ const updatedMessages = [...messages];
239
+ let addedCachePoint = false;
240
+
241
+ for (let i = updatedMessages.length - 1; i >= 0; i--) {
242
+ const message = updatedMessages[i];
243
+ const messageType = message.getType();
244
+ if (messageType === 'system') {
245
+ continue;
246
+ }
247
+
248
+ const content = message.content;
249
+ const hasArrayContent = Array.isArray(content);
250
+ const canAddCache =
251
+ !addedCachePoint &&
252
+ messageType === 'human' &&
253
+ (typeof content === 'string' || hasArrayContent);
254
+
255
+ if (!canAddCache && !hasArrayContent) {
256
+ continue;
257
+ }
258
+
259
+ let workingContent: MessageContentComplex[];
260
+ let modified = false;
261
+
262
+ if (hasArrayContent) {
263
+ workingContent = stripCacheMarkers(content as MessageContentComplex[]);
264
+ modified = workingContent.length !== content.length;
265
+ const lastTextIndex = workingContent.findLastIndex((block) => {
266
+ const type = (block as { type?: string }).type;
267
+ const text = (block as { text?: string }).text;
268
+ return (
269
+ (type === 'text' || type === 'input_text') && text?.trim() !== ''
270
+ );
271
+ });
272
+
273
+ if (canAddCache && lastTextIndex >= 0) {
274
+ workingContent.splice(lastTextIndex + 1, 0, cachePointBlock());
275
+ addedCachePoint = true;
276
+ modified = true;
277
+ }
278
+
279
+ if (!modified) {
280
+ continue;
281
+ }
282
+ } else if (typeof content === 'string' && content.trim() !== '' && canAddCache) {
283
+ workingContent = [
284
+ { type: 'text', text: content } as MessageContentComplex,
285
+ cachePointBlock(),
286
+ ];
287
+ addedCachePoint = true;
288
+ } else {
289
+ continue;
290
+ }
291
+
292
+ updatedMessages[i] = cloneLiveMessage(message, workingContent);
293
+ }
294
+
295
+ return updatedMessages;
296
+ }
297
+
298
+ function repeated(label: string, count: number): string {
299
+ return Array.from(
300
+ { length: count },
301
+ (_, index) =>
302
+ `${label} reference ${index}: stable schema, metric definition, access policy, dashboard note, and query planning guidance.`
303
+ ).join('\n');
304
+ }
305
+
306
+ function buildToolLoopMessages({
307
+ nonce,
308
+ marker,
309
+ }: {
310
+ nonce: string;
311
+ marker: string;
312
+ }): BaseMessage[] {
313
+ const stableUserContext = [
314
+ `Bedrock prompt cache placement benchmark ${nonce}.`,
315
+ 'The first user turn is intentionally stable across calls in the same benchmark case.',
316
+ repeated(`${nonce} user-context`, 190),
317
+ 'Use the final tool result to answer with the requested marker.',
318
+ ].join('\n');
319
+ const volatileToolPayload = repeated(`${nonce} volatile-${marker}`, 70);
320
+
321
+ return [
322
+ new HumanMessage(stableUserContext),
323
+ new AIMessage({
324
+ content: `I will inspect cache probe step 1 for ${marker}.\n${volatileToolPayload}`,
325
+ tool_calls: [
326
+ {
327
+ id: `call_${marker}_1`,
328
+ name: 'lookup_cache_probe',
329
+ args: { step: 1 },
330
+ },
331
+ ],
332
+ }),
333
+ new ToolMessage({
334
+ content: `Tool result 1 for ${marker}.\n${volatileToolPayload}`,
335
+ tool_call_id: `call_${marker}_1`,
336
+ }),
337
+ new AIMessage({
338
+ content: `I will inspect cache probe step 2 for ${marker}.\n${volatileToolPayload}`,
339
+ tool_calls: [
340
+ {
341
+ id: `call_${marker}_2`,
342
+ name: 'lookup_cache_probe',
343
+ args: { step: 2 },
344
+ },
345
+ ],
346
+ }),
347
+ new ToolMessage({
348
+ content: [
349
+ `Final tool result marker: ${marker}.`,
350
+ 'Reply with the marker and no extra explanation.',
351
+ volatileToolPayload,
352
+ ].join('\n'),
353
+ tool_call_id: `call_${marker}_2`,
354
+ }),
355
+ ];
356
+ }
357
+
358
+ function buildMultiTurnToolMessages({
359
+ nonce,
360
+ marker,
361
+ }: {
362
+ nonce: string;
363
+ marker: string;
364
+ }): BaseMessage[] {
365
+ const stableFirstUser = [
366
+ `Bedrock multi-turn prompt cache benchmark ${nonce}.`,
367
+ 'This first user turn is intentionally stable across calls in the same benchmark case.',
368
+ repeated(`${nonce} stable-user-context`, 190),
369
+ ].join('\n');
370
+ const latestUser = [
371
+ `Current user request marker: ${marker}.`,
372
+ 'Use the final tool result to answer with the marker only.',
373
+ repeated(`${nonce} latest-user-${marker}`, 18),
374
+ ].join('\n');
375
+ const volatileToolPayload = repeated(`${nonce} volatile-tool-${marker}`, 70);
376
+
377
+ return [
378
+ new HumanMessage(stableFirstUser),
379
+ new AIMessage('I will keep this stable context in mind.'),
380
+ new HumanMessage(latestUser),
381
+ new AIMessage({
382
+ content: `I will inspect cache probe step 1 for ${marker}.\n${volatileToolPayload}`,
383
+ tool_calls: [
384
+ {
385
+ id: `call_${marker}_1`,
386
+ name: 'lookup_cache_probe',
387
+ args: { step: 1 },
388
+ },
389
+ ],
390
+ }),
391
+ new ToolMessage({
392
+ content: `Tool result 1 for ${marker}.\n${volatileToolPayload}`,
393
+ tool_call_id: `call_${marker}_1`,
394
+ }),
395
+ new AIMessage({
396
+ content: `I will inspect cache probe step 2 for ${marker}.\n${volatileToolPayload}`,
397
+ tool_calls: [
398
+ {
399
+ id: `call_${marker}_2`,
400
+ name: 'lookup_cache_probe',
401
+ args: { step: 2 },
402
+ },
403
+ ],
404
+ }),
405
+ new ToolMessage({
406
+ content: [
407
+ `Final tool result marker: ${marker}.`,
408
+ 'Reply with the marker and no extra explanation.',
409
+ volatileToolPayload,
410
+ ].join('\n'),
411
+ tool_call_id: `call_${marker}_2`,
412
+ }),
413
+ ];
414
+ }
415
+
416
+ function extractCacheUsage(
417
+ response: ConverseUsageResponse,
418
+ latencyMs: number
419
+ ): BedrockCacheUsage {
420
+ if (response.usage == null) {
421
+ throw new Error('Missing Bedrock usage metadata for cache benchmark');
422
+ }
423
+
424
+ const inputTokens = response.usage.inputTokens ?? 0;
425
+ const outputTokens = response.usage.outputTokens ?? 0;
426
+ return {
427
+ inputTokens,
428
+ outputTokens,
429
+ totalTokens: response.usage.totalTokens ?? inputTokens + outputTokens,
430
+ cacheCreation: response.usage.cacheWriteInputTokens ?? 0,
431
+ cacheRead: response.usage.cacheReadInputTokens ?? 0,
432
+ latencyMs,
433
+ };
434
+ }
435
+
436
+ async function runConverseCacheBenchmarkTurn({
437
+ client,
438
+ messages,
439
+ }: {
440
+ client: BedrockRuntimeClient;
441
+ messages: BaseMessage[];
442
+ }): Promise<BedrockCacheUsage> {
443
+ const { converseMessages, converseSystem } =
444
+ convertToConverseMessages(messages);
445
+ const startedAt = Date.now();
446
+ const response = await client.send(
447
+ new ConverseCommand({
448
+ modelId: model,
449
+ ...(converseSystem.length > 0 ? { system: converseSystem } : {}),
450
+ messages: converseMessages,
451
+ toolConfig: benchmarkToolConfig,
452
+ inferenceConfig: { maxTokens: 16, temperature: 0 },
453
+ })
454
+ );
455
+
456
+ return extractCacheUsage(
457
+ response as ConverseUsageResponse,
458
+ Date.now() - startedAt
459
+ );
460
+ }
461
+
80
462
  describeIfLive('AgentContext Bedrock prompt cache live API', () => {
81
463
  it('caches only the stable system prefix while dynamic tail changes', async () => {
82
464
  const nonce = `agent-bedrock-cache-live-${Date.now()}`;
@@ -146,4 +528,126 @@ describeIfLive('AgentContext Bedrock prompt cache live API', () => {
146
528
  expect(second.text.toLowerCase()).toContain('bravo');
147
529
  expect(second.usage.input_token_details?.cache_read).toBeGreaterThan(0);
148
530
  }, 180_000);
531
+
532
+ it('reduces repeated cache writes versus the previous moving-tail placement', async () => {
533
+ const credentials = getCredentials();
534
+ const client = new BedrockRuntimeClient({
535
+ region,
536
+ ...(credentials != null ? { credentials } : {}),
537
+ });
538
+ const nonce = `bedrock-cache-placement-${Date.now()}`;
539
+ const legacyNonce = `${nonce}-legacy`;
540
+ const currentNonce = `${nonce}-current`;
541
+
542
+ const legacyFirst = await runConverseCacheBenchmarkTurn({
543
+ client,
544
+ messages: addLegacyMovingTailBedrockCacheControl(
545
+ buildToolLoopMessages({ nonce: legacyNonce, marker: 'alpha' })
546
+ ),
547
+ });
548
+
549
+ await waitForCachePropagation();
550
+
551
+ const legacySecond = await runConverseCacheBenchmarkTurn({
552
+ client,
553
+ messages: addLegacyMovingTailBedrockCacheControl(
554
+ buildToolLoopMessages({ nonce: legacyNonce, marker: 'bravo' })
555
+ ),
556
+ });
557
+
558
+ const currentFirst = await runConverseCacheBenchmarkTurn({
559
+ client,
560
+ messages: addBedrockCacheControl(
561
+ buildToolLoopMessages({ nonce: currentNonce, marker: 'alpha' })
562
+ ),
563
+ });
564
+
565
+ await waitForCachePropagation();
566
+
567
+ const currentSecond = await runConverseCacheBenchmarkTurn({
568
+ client,
569
+ messages: addBedrockCacheControl(
570
+ buildToolLoopMessages({ nonce: currentNonce, marker: 'bravo' })
571
+ ),
572
+ });
573
+
574
+ const cacheWriteReduction =
575
+ legacySecond.cacheCreation - currentSecond.cacheCreation;
576
+ process.stdout.write(
577
+ `Bedrock cache placement benchmark ${JSON.stringify({
578
+ legacyFirst,
579
+ legacySecond,
580
+ currentFirst,
581
+ currentSecond,
582
+ cacheWriteReduction,
583
+ })}\n`
584
+ );
585
+
586
+ expect(currentSecond.cacheRead).toBeGreaterThan(0);
587
+ expect(cacheWriteReduction).toBeGreaterThan(0);
588
+ expect(currentSecond.cacheCreation).toBeLessThan(
589
+ Math.ceil(legacySecond.cacheCreation * 0.5)
590
+ );
591
+ }, 240_000);
592
+
593
+ it('reuses prior user cache points when the latest user turn changes', async () => {
594
+ const credentials = getCredentials();
595
+ const client = new BedrockRuntimeClient({
596
+ region,
597
+ ...(credentials != null ? { credentials } : {}),
598
+ });
599
+ const nonce = `bedrock-multiturn-cache-placement-${Date.now()}`;
600
+ const currentNonce = `${nonce}-current`;
601
+ const latestOnlyNonce = `${nonce}-latest-only`;
602
+
603
+ const currentFirst = await runConverseCacheBenchmarkTurn({
604
+ client,
605
+ messages: addBedrockCacheControl(
606
+ buildMultiTurnToolMessages({ nonce: currentNonce, marker: 'alpha' })
607
+ ),
608
+ });
609
+
610
+ await waitForCachePropagation();
611
+
612
+ const currentSecond = await runConverseCacheBenchmarkTurn({
613
+ client,
614
+ messages: addBedrockCacheControl(
615
+ buildMultiTurnToolMessages({ nonce: currentNonce, marker: 'bravo' })
616
+ ),
617
+ });
618
+
619
+ const latestOnlyFirst = await runConverseCacheBenchmarkTurn({
620
+ client,
621
+ messages: addLatestUserOnlyBedrockCacheControl(
622
+ buildMultiTurnToolMessages({ nonce: latestOnlyNonce, marker: 'alpha' })
623
+ ),
624
+ });
625
+
626
+ await waitForCachePropagation();
627
+
628
+ const latestOnlySecond = await runConverseCacheBenchmarkTurn({
629
+ client,
630
+ messages: addLatestUserOnlyBedrockCacheControl(
631
+ buildMultiTurnToolMessages({ nonce: latestOnlyNonce, marker: 'bravo' })
632
+ ),
633
+ });
634
+
635
+ process.stdout.write(
636
+ `Bedrock multi-turn cache placement benchmark ${JSON.stringify({
637
+ currentFirst,
638
+ currentSecond,
639
+ latestOnlyFirst,
640
+ latestOnlySecond,
641
+ cacheWriteDelta:
642
+ currentSecond.cacheCreation - latestOnlySecond.cacheCreation,
643
+ })}\n`
644
+ );
645
+
646
+ expect(currentSecond.cacheRead).toBeGreaterThan(
647
+ latestOnlySecond.cacheRead
648
+ );
649
+ expect(currentSecond.cacheCreation).toBeLessThan(
650
+ latestOnlySecond.cacheCreation
651
+ );
652
+ }, 240_000);
149
653
  });
@@ -81,6 +81,7 @@ import {
81
81
  import { HandlerRegistry } from '@/events';
82
82
  import { ChatOpenAI } from '@/llm/openai';
83
83
  import { partitionAndMarkOpenRouterToolCache } from '@/llm/openrouter/toolCache';
84
+ import { partitionAndMarkBedrockToolCache } from '@/llm/bedrock/toolCache';
84
85
  import type { HookRegistry } from '@/hooks';
85
86
 
86
87
  const { AGENT, TOOLS, SUMMARIZE } = GraphNodeKeys;
@@ -962,6 +963,19 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
962
963
  rawToolsForBinding,
963
964
  makeIsDeferred(agentContext.toolDefinitions)
964
965
  ) ?? rawToolsForBinding;
966
+ } else if (
967
+ agentContext.provider === Providers.BEDROCK &&
968
+ (
969
+ agentContext.clientOptions as
970
+ | t.BedrockAnthropicClientOptions
971
+ | undefined
972
+ )?.promptCache === true
973
+ ) {
974
+ toolsForBinding =
975
+ partitionAndMarkBedrockToolCache(
976
+ rawToolsForBinding,
977
+ makeIsDeferred(agentContext.toolDefinitions)
978
+ ) ?? rawToolsForBinding;
965
979
  }
966
980
 
967
981
  let model =
@@ -20,6 +20,7 @@ const langfuseConfigKey = createContextKey('librechat.langfuse.config');
20
20
  const toolOutputTracingStorage =
21
21
  new AsyncLocalStorage<ResolvedLangfuseToolOutputTracingConfig>();
22
22
  const langfuseConfigStorage = new AsyncLocalStorage<t.LangfuseConfig>();
23
+ const LANGGRAPH_TOOL_NODE_PREFIX = 'tools=';
23
24
 
24
25
  const CHAT_ROLES = new Set([
25
26
  'assistant',
@@ -446,6 +447,26 @@ function isToolObservation(attributes: Record<string, unknown>): boolean {
446
447
  return typeof type === 'string' && type.toLowerCase() === 'tool';
447
448
  }
448
449
 
450
+ function classifyLangGraphToolNodeSpan(
451
+ attributes: Record<string, unknown>
452
+ ): void {
453
+ const type = attributes[LangfuseOtelSpanAttributes.OBSERVATION_TYPE];
454
+ if (typeof type !== 'string' || type.toLowerCase() !== 'span') {
455
+ return;
456
+ }
457
+
458
+ const langGraphNode =
459
+ attributes[
460
+ `${LangfuseOtelSpanAttributes.OBSERVATION_METADATA}.langgraph_node`
461
+ ];
462
+ if (
463
+ typeof langGraphNode === 'string' &&
464
+ langGraphNode.startsWith(LANGGRAPH_TOOL_NODE_PREFIX)
465
+ ) {
466
+ attributes[LangfuseOtelSpanAttributes.OBSERVATION_TYPE] = 'tool';
467
+ }
468
+ }
469
+
449
470
  function redactToolObservationOutput(
450
471
  span: ReadableSpan,
451
472
  attributes: Record<string, unknown>,
@@ -469,11 +490,13 @@ export function redactLangfuseSpanToolOutputs(
469
490
  span: ReadableSpan,
470
491
  config: ResolvedLangfuseToolOutputTracingConfig
471
492
  ): void {
493
+ const attributes = (span as SpanWithAttributes).attributes;
494
+ classifyLangGraphToolNodeSpan(attributes);
495
+
472
496
  if (!shouldApplyToolOutputRedaction(config)) {
473
497
  return;
474
498
  }
475
499
 
476
- const attributes = (span as SpanWithAttributes).attributes;
477
500
  redactToolObservationOutput(span, attributes, config);
478
501
 
479
502
  for (const key of [
@@ -618,10 +641,7 @@ function hasLangfuseConfigKeys(langfuse?: t.LangfuseConfig): boolean {
618
641
  if (langfuse == null) {
619
642
  return false;
620
643
  }
621
- return (
622
- isPresent(langfuse.secretKey) &&
623
- isPresent(langfuse.publicKey)
624
- );
644
+ return isPresent(langfuse.secretKey) && isPresent(langfuse.publicKey);
625
645
  }
626
646
 
627
647
  export function shouldTraceToolNodeForLangfuse({
@@ -639,8 +659,7 @@ export function shouldTraceToolNodeForLangfuse({
639
659
  const explicit = langfuse?.toolNodeTracing?.enabled;
640
660
  if (explicit != null) {
641
661
  return (
642
- explicit &&
643
- (hasLangfuseConfigKeys(langfuse) || hasLangfuseEnvKeys())
662
+ explicit && (hasLangfuseConfigKeys(langfuse) || hasLangfuseEnvKeys())
644
663
  );
645
664
  }
646
665
 
@@ -22,12 +22,17 @@
22
22
  */
23
23
 
24
24
  import { ChatBedrockConverse } from '@langchain/aws';
25
- import { ConverseStreamCommand } from '@aws-sdk/client-bedrock-runtime';
25
+ import {
26
+ ConverseStreamCommand,
27
+ type GuardrailConfiguration,
28
+ type GuardrailStreamConfiguration,
29
+ } from '@aws-sdk/client-bedrock-runtime';
26
30
  import { AIMessageChunk } from '@langchain/core/messages';
27
31
  import { ChatGenerationChunk, ChatResult } from '@langchain/core/outputs';
28
32
  import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
29
33
  import type { ChatBedrockConverseInput } from '@langchain/aws';
30
34
  import type { BaseMessage, ResponseMetadata } from '@langchain/core/messages';
35
+ import { insertBedrockToolCachePoint } from './toolCache';
31
36
  import {
32
37
  convertToConverseMessages,
33
38
  handleConverseStreamContentBlockStart,
@@ -42,6 +47,9 @@ import {
42
47
  */
43
48
  export type ServiceTierType = 'priority' | 'default' | 'flex' | 'reserved';
44
49
 
50
+ export type CustomGuardrailConfiguration = GuardrailConfiguration &
51
+ Pick<GuardrailStreamConfiguration, 'streamProcessingMode'>;
52
+
45
53
  /**
46
54
  * Extended input interface with additional features:
47
55
  * - applicationInferenceProfile: Use an inference profile ARN instead of model ID
@@ -49,6 +57,17 @@ export type ServiceTierType = 'priority' | 'default' | 'flex' | 'reserved';
49
57
  */
50
58
  export interface CustomChatBedrockConverseInput
51
59
  extends ChatBedrockConverseInput {
60
+ /**
61
+ * Enables Bedrock prompt cache checkpoints for message and tool prefixes.
62
+ */
63
+ promptCache?: boolean;
64
+
65
+ /**
66
+ * Guardrail configuration for Converse and ConverseStream invocations.
67
+ * `streamProcessingMode` is only used by ConverseStream.
68
+ */
69
+ guardrailConfig?: CustomGuardrailConfiguration;
70
+
52
71
  /**
53
72
  * Application Inference Profile ARN to use for the model.
54
73
  * For example, "arn:aws:bedrock:eu-west-1:123456789102:application-inference-profile/fm16bt65tzgx"
@@ -80,9 +99,15 @@ export interface CustomChatBedrockConverseInput
80
99
  */
81
100
  export interface CustomChatBedrockConverseCallOptions {
82
101
  serviceTier?: ServiceTierType;
102
+ guardrailConfig?: CustomGuardrailConfiguration;
83
103
  }
84
104
 
85
105
  export class CustomChatBedrockConverse extends ChatBedrockConverse {
106
+ /**
107
+ * Whether to insert Bedrock prompt cache checkpoints when available.
108
+ */
109
+ promptCache?: boolean;
110
+
86
111
  /**
87
112
  * Application Inference Profile ARN to use instead of model ID.
88
113
  */
@@ -95,6 +120,7 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
95
120
 
96
121
  constructor(fields?: CustomChatBedrockConverseInput) {
97
122
  super(fields);
123
+ this.promptCache = fields?.promptCache;
98
124
  this.applicationInferenceProfile = fields?.applicationInferenceProfile;
99
125
  this.serviceTier = fields?.serviceTier;
100
126
  }
@@ -120,12 +146,17 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
120
146
  serviceTier?: { type: ServiceTierType };
121
147
  } {
122
148
  const baseParams = super.invocationParams(options);
149
+ const toolConfig =
150
+ this.promptCache === true
151
+ ? insertBedrockToolCachePoint(baseParams.toolConfig, true)
152
+ : baseParams.toolConfig;
123
153
 
124
154
  /** Service tier from options or fall back to class-level setting */
125
155
  const serviceTierType = options?.serviceTier ?? this.serviceTier;
126
156
 
127
157
  return {
128
158
  ...baseParams,
159
+ toolConfig,
129
160
  serviceTier: serviceTierType ? { type: serviceTierType } : undefined,
130
161
  };
131
162
  }