@librechat/agents 3.2.36 → 3.2.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +1 -1
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +7 -8
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/langfuse.cjs +16 -5
  6. package/dist/cjs/langfuse.cjs.map +1 -1
  7. package/dist/cjs/langfuseToolOutputTracing.cjs +7 -0
  8. package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +92 -3
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +24 -4
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/main.cjs +2 -0
  14. package/dist/cjs/messages/cache.cjs +183 -0
  15. package/dist/cjs/messages/cache.cjs.map +1 -1
  16. package/dist/cjs/summarization/node.cjs +1 -1
  17. package/dist/cjs/summarization/node.cjs.map +1 -1
  18. package/dist/cjs/tools/toolOutputReferences.cjs +28 -14
  19. package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -1
  20. package/dist/esm/agents/AgentContext.mjs +2 -2
  21. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  22. package/dist/esm/graphs/Graph.mjs +8 -9
  23. package/dist/esm/graphs/Graph.mjs.map +1 -1
  24. package/dist/esm/langfuse.mjs +16 -5
  25. package/dist/esm/langfuse.mjs.map +1 -1
  26. package/dist/esm/langfuseToolOutputTracing.mjs +7 -0
  27. package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
  28. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +92 -3
  29. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  30. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +24 -4
  31. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  32. package/dist/esm/main.mjs +2 -2
  33. package/dist/esm/messages/cache.mjs +182 -1
  34. package/dist/esm/messages/cache.mjs.map +1 -1
  35. package/dist/esm/summarization/node.mjs +2 -2
  36. package/dist/esm/summarization/node.mjs.map +1 -1
  37. package/dist/esm/tools/toolOutputReferences.mjs +28 -14
  38. package/dist/esm/tools/toolOutputReferences.mjs.map +1 -1
  39. package/dist/types/messages/cache.d.ts +40 -0
  40. package/dist/types/types/graph.d.ts +2 -0
  41. package/package.json +8 -5
  42. package/src/agents/AgentContext.ts +2 -2
  43. package/src/agents/__tests__/AgentContext.test.ts +3 -9
  44. package/src/graphs/Graph.ts +65 -36
  45. package/src/langfuse.ts +38 -4
  46. package/src/langfuseToolOutputTracing.ts +18 -0
  47. package/src/llm/anthropic/utils/message_inputs.ts +131 -3
  48. package/src/llm/anthropic/utils/stripPrefillCache.test.ts +111 -0
  49. package/src/llm/bedrock/utils/message_inputs.test.ts +129 -0
  50. package/src/llm/bedrock/utils/message_inputs.ts +46 -4
  51. package/src/llm/bedrock/utils/toolResultCachePoint.test.ts +103 -0
  52. package/src/messages/cache.tail.test.ts +340 -0
  53. package/src/messages/cache.ts +266 -0
  54. package/src/messages/tailCacheConversion.test.ts +161 -0
  55. package/src/scripts/bench-prompt-cache.ts +479 -0
  56. package/src/specs/langfuse-config.test.ts +69 -2
  57. package/src/specs/langfuse-metadata.test.ts +44 -0
  58. package/src/specs/langfuse-tool-output-tracing.test.ts +6 -0
  59. package/src/summarization/node.ts +2 -2
  60. package/src/tools/__tests__/annotateMessagesForLLM.test.ts +50 -0
  61. package/src/tools/toolOutputReferences.ts +34 -20
  62. package/src/types/graph.ts +2 -0
@@ -254,6 +254,152 @@ function isCachePoint(block: MessageContentComplex): boolean {
254
254
  return 'cachePoint' in block && !('type' in block);
255
255
  }
256
256
 
257
+ /**
258
+ * Block types that must never anchor the tail cache breakpoint, because the
259
+ * marker would not survive to the model call:
260
+ * - `thinking` / `redacted_thinking`: native Anthropic reasoning — the API
261
+ * rejects `cache_control` on these blocks.
262
+ * - `reasoning_content` / `reasoning` / `think`: foreign reasoning (Bedrock,
263
+ * Google, LibreChat) that `_convertMessagesToAnthropicPayload` DROPS on
264
+ * assistant turns during a cross-provider handoff.
265
+ * - `input_json_delta`: persisted partial tool-input deltas, also DROPPED by
266
+ * `_convertMessagesToAnthropicPayload` (the assembled input is restored onto
267
+ * the tool_use block).
268
+ * Anchoring the only breakpoint on a block that is about to disappear silently
269
+ * loses tail caching, so all of these are excluded.
270
+ */
271
+ const NON_ANCHORABLE_BLOCK_TYPES = new Set([
272
+ 'thinking',
273
+ 'redacted_thinking',
274
+ 'reasoning_content',
275
+ 'reasoning',
276
+ 'think',
277
+ 'input_json_delta',
278
+ ]);
279
+
280
+ /**
281
+ * A block can anchor the tail cache breakpoint when it is a real content block
282
+ * that the Anthropic API accepts `cache_control` on and that survives provider
283
+ * conversion. Reasoning / dropped-delta blocks are excluded (see
284
+ * {@link NON_ANCHORABLE_BLOCK_TYPES}), and empty text blocks are not cacheable,
285
+ * so both are skipped.
286
+ */
287
+ function isTailCacheableBlock(block: MessageContentComplex): boolean {
288
+ if (isCachePoint(block)) {
289
+ return false;
290
+ }
291
+ const type = (block as { type?: string }).type;
292
+ if (type == null || NON_ANCHORABLE_BLOCK_TYPES.has(type)) {
293
+ return false;
294
+ }
295
+ if (type === 'text') {
296
+ const text = (block as { text?: string }).text;
297
+ return text != null && text.trim() !== '';
298
+ }
299
+ return true;
300
+ }
301
+
302
+ /**
303
+ * Anthropic API: single tail cache breakpoint (default strategy).
304
+ *
305
+ * Places exactly ONE `cache_control` marker on the last cacheable block of the
306
+ * final non-synthetic message, mirroring the Claude Code strategy
307
+ * (`markerIndex = messages.length - 1`). Because the marker always rides the
308
+ * true tail, the entire conversation prefix is written once and read back on
309
+ * the next turn as the history grows append-only — instead of the rolling
310
+ * "last two user messages" markers, which leave freshly appended tool/assistant
311
+ * turns outside the cached prefix and re-write large spans every step.
312
+ *
313
+ * Stale markers (Anthropic `cache_control` and Bedrock cache points) are
314
+ * stripped from every message in a single backward pass so exactly one marker
315
+ * survives. Synthetic skill/meta messages are skipped as anchors (their volatile
316
+ * content must not pin the cache) but still have stale markers removed.
317
+ *
318
+ * Returns a new array; only messages that require modification are cloned.
319
+ */
320
+ export function addTailCacheControl<T extends AnthropicMessage | BaseMessage>(
321
+ messages: T[]
322
+ ): T[] {
323
+ if (!Array.isArray(messages) || messages.length === 0) {
324
+ return messages;
325
+ }
326
+
327
+ const updatedMessages: T[] = [...messages];
328
+ let markerPlaced = false;
329
+
330
+ for (let i = updatedMessages.length - 1; i >= 0; i--) {
331
+ const originalMessage = updatedMessages[i];
332
+ const content = originalMessage.content;
333
+ const hasArrayContent = Array.isArray(content);
334
+ const canPlaceMarker =
335
+ !markerPlaced && !isSyntheticMetaMessage(originalMessage);
336
+
337
+ // Earlier string-content messages carry no markers to strip.
338
+ if (!canPlaceMarker && !hasArrayContent) {
339
+ continue;
340
+ }
341
+
342
+ let workingContent: MessageContentComplex[];
343
+ let modified = false;
344
+
345
+ if (hasArrayContent) {
346
+ const src = content as MessageContentComplex[];
347
+ workingContent = [];
348
+ let tailIndex = -1;
349
+ for (let j = 0; j < src.length; j++) {
350
+ const block = src[j];
351
+ if (isCachePoint(block)) {
352
+ modified = true;
353
+ continue;
354
+ }
355
+ const cloned = { ...block };
356
+ if ('cache_control' in cloned) {
357
+ delete (cloned as Record<string, unknown>).cache_control;
358
+ modified = true;
359
+ }
360
+ if (
361
+ canPlaceMarker &&
362
+ isTailCacheableBlock(cloned as MessageContentComplex)
363
+ ) {
364
+ tailIndex = workingContent.length;
365
+ }
366
+ workingContent.push(cloned as MessageContentComplex);
367
+ }
368
+
369
+ if (canPlaceMarker && tailIndex >= 0) {
370
+ (workingContent[tailIndex] as Anthropic.TextBlockParam).cache_control =
371
+ {
372
+ type: 'ephemeral',
373
+ };
374
+ markerPlaced = true;
375
+ modified = true;
376
+ }
377
+
378
+ if (!modified) {
379
+ continue;
380
+ }
381
+ } else if (
382
+ typeof content === 'string' &&
383
+ canPlaceMarker &&
384
+ content.trim() !== ''
385
+ ) {
386
+ workingContent = [
387
+ { type: 'text', text: content, cache_control: { type: 'ephemeral' } },
388
+ ] as unknown as MessageContentComplex[];
389
+ markerPlaced = true;
390
+ } else {
391
+ continue;
392
+ }
393
+
394
+ updatedMessages[i] = cloneMessage(
395
+ originalMessage as MessageWithContent,
396
+ workingContent
397
+ ) as T;
398
+ }
399
+
400
+ return updatedMessages;
401
+ }
402
+
257
403
  function getMessageRole(message: MessageWithContent): string | undefined {
258
404
  if (message instanceof BaseMessage) {
259
405
  return message.getType();
@@ -625,3 +771,123 @@ export function addBedrockCacheControl<
625
771
 
626
772
  return updatedMessages;
627
773
  }
774
+
775
+ /**
776
+ * Bedrock Converse API: single tail cache breakpoint (default strategy).
777
+ *
778
+ * The Bedrock counterpart of {@link addTailCacheControl}. Strips ALL existing
779
+ * cache control (Bedrock cache points and Anthropic `cache_control`) from every
780
+ * message, then inserts exactly ONE `{ cachePoint: { type: 'default' } }` block
781
+ * immediately after the last non-empty text block of the most recent
782
+ * non-synthetic, non-system message. Anchoring on the rolling tail keeps the
783
+ * cached prefix append-only as the conversation grows, instead of re-writing
784
+ * large spans every turn with the legacy "last two user messages" cache points.
785
+ *
786
+ * System messages are sanitized (Anthropic `cache_control` stripped) but never
787
+ * anchored. Synthetic skill/meta messages are skipped as anchors so their
788
+ * volatile content cannot pin the cache.
789
+ *
790
+ * Returns a new array - only clones messages that require modification.
791
+ */
792
+ export function addBedrockTailCacheControl<
793
+ T extends MessageWithContent & { getType?: () => string; role?: string },
794
+ >(messages: T[]): T[] {
795
+ if (!Array.isArray(messages) || messages.length === 0) {
796
+ return messages;
797
+ }
798
+
799
+ const updatedMessages: T[] = [...messages];
800
+ let cachePointPlaced = false;
801
+
802
+ for (let i = updatedMessages.length - 1; i >= 0; i--) {
803
+ const originalMessage = updatedMessages[i];
804
+ const messageType =
805
+ 'getType' in originalMessage &&
806
+ typeof originalMessage.getType === 'function'
807
+ ? originalMessage.getType()
808
+ : undefined;
809
+ const messageRole =
810
+ 'role' in originalMessage && typeof originalMessage.role === 'string'
811
+ ? originalMessage.role
812
+ : undefined;
813
+
814
+ const isSystemMessage =
815
+ messageType === 'system' || messageRole === 'system';
816
+ if (isSystemMessage) {
817
+ updatedMessages[i] = sanitizeBedrockSystemMessage(originalMessage);
818
+ continue;
819
+ }
820
+
821
+ const content = originalMessage.content;
822
+ const hasSerializationProps =
823
+ 'lc_kwargs' in originalMessage ||
824
+ 'lc_serializable' in originalMessage ||
825
+ 'lc_namespace' in originalMessage;
826
+ const hasArrayContent = Array.isArray(content);
827
+ const isEmptyString = typeof content === 'string' && content === '';
828
+ const canPlaceCachePoint =
829
+ !cachePointPlaced &&
830
+ !isEmptyString &&
831
+ !isSyntheticMetaMessage(originalMessage) &&
832
+ (typeof content === 'string' || hasArrayContent);
833
+
834
+ if (!canPlaceCachePoint && !hasArrayContent && !hasSerializationProps) {
835
+ continue;
836
+ }
837
+
838
+ let workingContent: string | MessageContentComplex[];
839
+ let modified = hasSerializationProps;
840
+
841
+ if (hasArrayContent) {
842
+ const src = content as MessageContentComplex[];
843
+ workingContent = [];
844
+ let lastNonEmptyTextIndex = -1;
845
+ for (let j = 0; j < src.length; j++) {
846
+ const block = src[j];
847
+ if (isCachePoint(block)) {
848
+ modified = true;
849
+ continue;
850
+ }
851
+ const cloned = { ...block };
852
+ if ('cache_control' in cloned) {
853
+ delete (cloned as Record<string, unknown>).cache_control;
854
+ modified = true;
855
+ }
856
+ const type = (cloned as { type?: string }).type;
857
+ if (type === ContentTypes.TEXT || type === 'text') {
858
+ const text = (cloned as { text?: string }).text;
859
+ if (text != null && text.trim() !== '') {
860
+ lastNonEmptyTextIndex = workingContent.length;
861
+ }
862
+ }
863
+ workingContent.push(cloned as MessageContentComplex);
864
+ }
865
+
866
+ if (!modified && !canPlaceCachePoint) {
867
+ continue;
868
+ }
869
+
870
+ if (canPlaceCachePoint && lastNonEmptyTextIndex >= 0) {
871
+ workingContent.splice(lastNonEmptyTextIndex + 1, 0, {
872
+ cachePoint: { type: 'default' },
873
+ } as MessageContentComplex);
874
+ cachePointPlaced = true;
875
+ modified = true;
876
+ }
877
+ } else if (typeof content === 'string' && canPlaceCachePoint) {
878
+ workingContent = [
879
+ { type: ContentTypes.TEXT, text: content },
880
+ { cachePoint: { type: 'default' } } as MessageContentComplex,
881
+ ];
882
+ cachePointPlaced = true;
883
+ } else if (typeof content === 'string' && hasSerializationProps) {
884
+ workingContent = content;
885
+ } else {
886
+ continue;
887
+ }
888
+
889
+ updatedMessages[i] = cloneMessage(originalMessage, workingContent);
890
+ }
891
+
892
+ return updatedMessages;
893
+ }
@@ -0,0 +1,161 @@
1
+ import {
2
+ HumanMessage,
3
+ AIMessage,
4
+ ToolMessage,
5
+ type BaseMessage,
6
+ type MessageContentComplex,
7
+ } from '@langchain/core/messages';
8
+ import { _convertMessagesToAnthropicPayload } from '@/llm/anthropic/utils/message_inputs';
9
+ import { ensureThinkingBlockInMessages } from './format';
10
+ import { toLangChainContent } from './langchain';
11
+ import { addTailCacheControl } from './cache';
12
+ import { Providers } from '@/common';
13
+
14
+ /**
15
+ * Regression coverage for the single tail prompt-cache breakpoint surviving all
16
+ * the way into the final Anthropic payload — i.e. the marker must land on a
17
+ * block that actually ships, not one that downstream conversion / folding
18
+ * removes. Two ways the breakpoint was silently lost:
19
+ *
20
+ * - Foreign reasoning tail: addTailCacheControl anchored on a
21
+ * `reasoning_content`/`reasoning`/`think` block, which the Anthropic
22
+ * converter drops on assistant turns (cross-provider handoff).
23
+ * - Thinking-fold ordering: marking before ensureThinkingBlockInMessages let
24
+ * the fold rewrite the anchored AI→Tool tail into a `[Previous agent
25
+ * context]` HumanMessage that copies text but not cache_control.
26
+ */
27
+
28
+ type PayloadMessage = { content: unknown };
29
+
30
+ function hasCacheControl(block: unknown): boolean {
31
+ return (
32
+ typeof block === 'object' && block !== null && 'cache_control' in block
33
+ );
34
+ }
35
+
36
+ /** Does any block (top-level or nested in tool_result) carry cache_control? */
37
+ function breakpointSurvives(messages: PayloadMessage[]): boolean {
38
+ for (const m of messages) {
39
+ if (!Array.isArray(m.content)) {
40
+ continue;
41
+ }
42
+ for (const block of m.content as unknown[]) {
43
+ if (hasCacheControl(block)) {
44
+ return true;
45
+ }
46
+ const inner = (block as { content?: unknown }).content;
47
+ if (Array.isArray(inner) && inner.some(hasCacheControl)) {
48
+ return true;
49
+ }
50
+ }
51
+ }
52
+ return false;
53
+ }
54
+
55
+ describe('tail breakpoint survives Anthropic conversion', () => {
56
+ test('foreign reasoning tail keeps a usable breakpoint (anchored on text)', () => {
57
+ const messages: BaseMessage[] = [
58
+ new HumanMessage('hello'),
59
+ new AIMessage({
60
+ content: toLangChainContent([
61
+ { type: 'text', text: 'Here is my answer.' },
62
+ { type: 'reasoning_content', reasoningText: { text: 'r' } },
63
+ ] as MessageContentComplex[]),
64
+ }),
65
+ ];
66
+
67
+ const payload = _convertMessagesToAnthropicPayload(
68
+ addTailCacheControl(messages)
69
+ );
70
+
71
+ expect(breakpointSurvives(payload.messages as PayloadMessage[])).toBe(true);
72
+ });
73
+
74
+ test('string tool-result tail keeps a usable breakpoint on the tool_result block', () => {
75
+ const messages: BaseMessage[] = [
76
+ new HumanMessage('run it'),
77
+ new AIMessage({
78
+ content: 'calling',
79
+ tool_calls: [{ id: 't1', name: 'search', args: {} }],
80
+ }),
81
+ new ToolMessage({ tool_call_id: 't1', content: 'result body' }),
82
+ ];
83
+
84
+ const payload = _convertMessagesToAnthropicPayload(
85
+ addTailCacheControl(messages)
86
+ );
87
+
88
+ expect(breakpointSurvives(payload.messages as PayloadMessage[])).toBe(true);
89
+
90
+ // The marker must sit on the top-level tool_result block (the documented
91
+ // cacheable position), NOT nested inside tool_result.content.
92
+ const toolResult = (payload.messages as PayloadMessage[])
93
+ .flatMap((m) => (Array.isArray(m.content) ? m.content : []))
94
+ .find(
95
+ (b): b is Record<string, unknown> =>
96
+ b != null &&
97
+ typeof b === 'object' &&
98
+ 'type' in b &&
99
+ (b as { type?: string }).type === 'tool_result'
100
+ ) as { cache_control?: unknown; content?: unknown } | undefined;
101
+ expect(toolResult?.cache_control).toEqual({ type: 'ephemeral' });
102
+ const inner = toolResult?.content;
103
+ if (Array.isArray(inner)) {
104
+ expect(
105
+ inner.some(
106
+ (b) => b != null && typeof b === 'object' && 'cache_control' in b
107
+ )
108
+ ).toBe(false);
109
+ }
110
+ });
111
+
112
+ test('marking AFTER the thinking fold preserves the breakpoint (Graph order)', () => {
113
+ // A historical non-thinking AI→Tool chain at the tail (no trailing human).
114
+ const messages: BaseMessage[] = [
115
+ new HumanMessage('do the thing'),
116
+ new AIMessage({
117
+ content: '',
118
+ tool_calls: [{ id: 't1', name: 'search', args: { q: 'x' } }],
119
+ }),
120
+ new ToolMessage({ tool_call_id: 't1', content: 'tool output text' }),
121
+ ];
122
+
123
+ // Graph applies the fold first, THEN the tail marker.
124
+ const folded = ensureThinkingBlockInMessages(
125
+ messages,
126
+ Providers.ANTHROPIC,
127
+ undefined,
128
+ messages.length
129
+ );
130
+ const payload = _convertMessagesToAnthropicPayload(
131
+ addTailCacheControl(folded)
132
+ );
133
+
134
+ expect(breakpointSurvives(payload.messages as PayloadMessage[])).toBe(true);
135
+ });
136
+
137
+ test('marking BEFORE the fold loses the breakpoint (guards the ordering)', () => {
138
+ const messages: BaseMessage[] = [
139
+ new HumanMessage('do the thing'),
140
+ new AIMessage({
141
+ content: '',
142
+ tool_calls: [{ id: 't1', name: 'search', args: { q: 'x' } }],
143
+ }),
144
+ new ToolMessage({ tool_call_id: 't1', content: 'tool output text' }),
145
+ ];
146
+
147
+ // The buggy order: mark first, then fold drops the marker.
148
+ const marked = addTailCacheControl(messages);
149
+ const folded = ensureThinkingBlockInMessages(
150
+ marked,
151
+ Providers.ANTHROPIC,
152
+ undefined,
153
+ messages.length
154
+ );
155
+ const payload = _convertMessagesToAnthropicPayload(folded);
156
+
157
+ expect(breakpointSurvives(payload.messages as PayloadMessage[])).toBe(
158
+ false
159
+ );
160
+ });
161
+ });