@librechat/agents 3.2.36 → 3.2.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +1 -1
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +7 -8
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/langfuse.cjs +16 -5
- package/dist/cjs/langfuse.cjs.map +1 -1
- package/dist/cjs/langfuseToolOutputTracing.cjs +7 -0
- package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +92 -3
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +24 -4
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/main.cjs +2 -0
- package/dist/cjs/messages/cache.cjs +183 -0
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +1 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/toolOutputReferences.cjs +28 -14
- package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +2 -2
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +8 -9
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/langfuse.mjs +16 -5
- package/dist/esm/langfuse.mjs.map +1 -1
- package/dist/esm/langfuseToolOutputTracing.mjs +7 -0
- package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +92 -3
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +24 -4
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/main.mjs +2 -2
- package/dist/esm/messages/cache.mjs +182 -1
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +2 -2
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/toolOutputReferences.mjs +28 -14
- package/dist/esm/tools/toolOutputReferences.mjs.map +1 -1
- package/dist/types/messages/cache.d.ts +40 -0
- package/dist/types/types/graph.d.ts +2 -0
- package/package.json +2 -1
- package/src/agents/AgentContext.ts +2 -2
- package/src/agents/__tests__/AgentContext.test.ts +3 -9
- package/src/graphs/Graph.ts +65 -36
- package/src/langfuse.ts +38 -4
- package/src/langfuseToolOutputTracing.ts +18 -0
- package/src/llm/anthropic/utils/message_inputs.ts +131 -3
- package/src/llm/anthropic/utils/stripPrefillCache.test.ts +111 -0
- package/src/llm/bedrock/utils/message_inputs.test.ts +129 -0
- package/src/llm/bedrock/utils/message_inputs.ts +46 -4
- package/src/llm/bedrock/utils/toolResultCachePoint.test.ts +103 -0
- package/src/messages/cache.tail.test.ts +340 -0
- package/src/messages/cache.ts +266 -0
- package/src/messages/tailCacheConversion.test.ts +161 -0
- package/src/scripts/bench-prompt-cache.ts +479 -0
- package/src/specs/langfuse-config.test.ts +69 -2
- package/src/specs/langfuse-metadata.test.ts +44 -0
- package/src/specs/langfuse-tool-output-tracing.test.ts +6 -0
- package/src/summarization/node.ts +2 -2
- package/src/tools/__tests__/annotateMessagesForLLM.test.ts +50 -0
- package/src/tools/toolOutputReferences.ts +34 -20
- package/src/types/graph.ts +2 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { AIMessage, HumanMessage } from '@langchain/core/messages';
|
|
2
|
+
import type { BaseMessage } from '@langchain/core/messages';
|
|
3
|
+
import { convertToConverseMessages } from './message_inputs';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Native-Bedrock reasoning serialization. A `reasoning_content` block whose
|
|
7
|
+
* `reasoningText.text` is null/empty (e.g. a signature-only block that never
|
|
8
|
+
* merged with its text) is invalid for Bedrock Converse — it rejects with
|
|
9
|
+
* `...reasoningContent.reasoningText.text ... Member must not be null`. Such a
|
|
10
|
+
* block must be dropped on replay rather than sent; a block carrying real text
|
|
11
|
+
* is still converted.
|
|
12
|
+
*/
|
|
13
|
+
type ConverseResult = ReturnType<typeof convertToConverseMessages>;
|
|
14
|
+
|
|
15
|
+
/** Minimal view of a converted Bedrock Converse content block the assertions read. */
|
|
16
|
+
interface ConverseBlock {
|
|
17
|
+
text?: string;
|
|
18
|
+
reasoningContent?: { reasoningText?: { text?: string; signature?: string } };
|
|
19
|
+
toolUse?: {
|
|
20
|
+
toolUseId?: string;
|
|
21
|
+
name?: string;
|
|
22
|
+
input?: Record<string, string>;
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const assistantContent = (result: ConverseResult): ConverseBlock[] => {
|
|
27
|
+
const msg = result.converseMessages.find((m) => m.role === 'assistant');
|
|
28
|
+
return (msg?.content ?? []) as ConverseBlock[];
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
describe('convertToConverseMessages — native Bedrock reasoning serialization', () => {
|
|
32
|
+
it('drops a signature-only reasoning block, keeping text and tool calls', () => {
|
|
33
|
+
const messages: BaseMessage[] = [
|
|
34
|
+
new HumanMessage('what data do you have?'),
|
|
35
|
+
new AIMessage({
|
|
36
|
+
content: [
|
|
37
|
+
{
|
|
38
|
+
type: 'reasoning_content',
|
|
39
|
+
reasoningText: { signature: 'sig-abc' },
|
|
40
|
+
},
|
|
41
|
+
{ type: 'text', text: 'Let me check your databases.' },
|
|
42
|
+
],
|
|
43
|
+
tool_calls: [
|
|
44
|
+
{
|
|
45
|
+
id: 'tooluse_list',
|
|
46
|
+
name: 'list_databases',
|
|
47
|
+
args: {},
|
|
48
|
+
type: 'tool_call',
|
|
49
|
+
},
|
|
50
|
+
],
|
|
51
|
+
}),
|
|
52
|
+
];
|
|
53
|
+
|
|
54
|
+
expect(() => convertToConverseMessages(messages)).not.toThrow();
|
|
55
|
+
const content = assistantContent(convertToConverseMessages(messages));
|
|
56
|
+
|
|
57
|
+
expect(content.find((b) => b.reasoningContent != null)).toBeUndefined();
|
|
58
|
+
expect(JSON.stringify(content)).not.toContain('sig-abc');
|
|
59
|
+
expect(content.some((b) => b.text === 'Let me check your databases.')).toBe(
|
|
60
|
+
true
|
|
61
|
+
);
|
|
62
|
+
const toolUse = content.find((b) => b.toolUse != null);
|
|
63
|
+
expect(toolUse?.toolUse).toMatchObject({
|
|
64
|
+
toolUseId: 'tooluse_list',
|
|
65
|
+
name: 'list_databases',
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it('drops a reasoning block whose text is empty', () => {
|
|
70
|
+
const messages: BaseMessage[] = [
|
|
71
|
+
new HumanMessage('hi'),
|
|
72
|
+
new AIMessage({
|
|
73
|
+
content: [
|
|
74
|
+
{
|
|
75
|
+
type: 'reasoning_content',
|
|
76
|
+
reasoningText: { text: '', signature: 'sig' },
|
|
77
|
+
},
|
|
78
|
+
{ type: 'text', text: 'answer' },
|
|
79
|
+
],
|
|
80
|
+
}),
|
|
81
|
+
];
|
|
82
|
+
|
|
83
|
+
const content = assistantContent(convertToConverseMessages(messages));
|
|
84
|
+
expect(content.find((b) => b.reasoningContent != null)).toBeUndefined();
|
|
85
|
+
expect(content.some((b) => b.text === 'answer')).toBe(true);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it('emits a placeholder (not empty content) when the only block is a signature-only reasoning block', () => {
|
|
89
|
+
const messages: BaseMessage[] = [
|
|
90
|
+
new HumanMessage('hi'),
|
|
91
|
+
new AIMessage({
|
|
92
|
+
content: [
|
|
93
|
+
{ type: 'reasoning_content', reasoningText: { signature: 'sig' } },
|
|
94
|
+
],
|
|
95
|
+
}),
|
|
96
|
+
];
|
|
97
|
+
|
|
98
|
+
expect(() => convertToConverseMessages(messages)).not.toThrow();
|
|
99
|
+
const content = assistantContent(convertToConverseMessages(messages));
|
|
100
|
+
expect(content.length).toBeGreaterThan(0);
|
|
101
|
+
expect(content.find((b) => b.reasoningContent != null)).toBeUndefined();
|
|
102
|
+
expect(content.every((b) => typeof b.text === 'string')).toBe(true);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it('still converts a reasoning block that carries text (not dropped)', () => {
|
|
106
|
+
const messages: BaseMessage[] = [
|
|
107
|
+
new HumanMessage('hi'),
|
|
108
|
+
new AIMessage({
|
|
109
|
+
content: [
|
|
110
|
+
{
|
|
111
|
+
type: 'reasoning_content',
|
|
112
|
+
reasoningText: {
|
|
113
|
+
text: 'native bedrock reasoning',
|
|
114
|
+
signature: 'sig',
|
|
115
|
+
},
|
|
116
|
+
},
|
|
117
|
+
{ type: 'text', text: 'answer' },
|
|
118
|
+
],
|
|
119
|
+
}),
|
|
120
|
+
];
|
|
121
|
+
|
|
122
|
+
const content = assistantContent(convertToConverseMessages(messages));
|
|
123
|
+
const reasoning = content.find((b) => b.reasoningContent != null);
|
|
124
|
+
expect(reasoning).toBeDefined();
|
|
125
|
+
expect(reasoning?.reasoningContent?.reasoningText?.text).toBe(
|
|
126
|
+
'native bedrock reasoning'
|
|
127
|
+
);
|
|
128
|
+
});
|
|
129
|
+
});
|
|
@@ -72,6 +72,22 @@ export function langchainReasoningBlockToBedrockReasoningBlock(
|
|
|
72
72
|
throw new Error('Invalid reasoning content');
|
|
73
73
|
}
|
|
74
74
|
|
|
75
|
+
/**
|
|
76
|
+
* Whether a reasoning block can be serialized to a valid Bedrock
|
|
77
|
+
* `reasoningContent`. Bedrock Converse rejects `reasoningText` with a null/empty
|
|
78
|
+
* `text` (e.g. a signature-only block that never merged with its text), so such
|
|
79
|
+
* blocks must be dropped rather than sent.
|
|
80
|
+
*/
|
|
81
|
+
function isSerializableBedrockReasoningBlock(
|
|
82
|
+
content: MessageContentReasoningBlock
|
|
83
|
+
): boolean {
|
|
84
|
+
if (content.reasoningText != null) {
|
|
85
|
+
const text = content.reasoningText.text;
|
|
86
|
+
return text != null && text !== '';
|
|
87
|
+
}
|
|
88
|
+
return content.redactedContent != null && content.redactedContent !== '';
|
|
89
|
+
}
|
|
90
|
+
|
|
75
91
|
/**
|
|
76
92
|
* Concatenate consecutive reasoning blocks in content array.
|
|
77
93
|
*/
|
|
@@ -653,10 +669,17 @@ function convertAIMessageToConverseMessage(msg: BaseMessage): BedrockMessage {
|
|
|
653
669
|
contentBlocks.push({ text });
|
|
654
670
|
}
|
|
655
671
|
} else if (block.type === 'reasoning_content') {
|
|
672
|
+
const reasoningBlock = block as MessageContentReasoningBlock;
|
|
673
|
+
// Bedrock Converse rejects reasoningContent whose reasoningText.text is
|
|
674
|
+
// null/empty (a signature-only block that never merged with its text).
|
|
675
|
+
// Drop it rather than emit an invalid request; the empty-turn
|
|
676
|
+
// placeholder below covers a turn left with no content.
|
|
677
|
+
if (!isSerializableBedrockReasoningBlock(reasoningBlock)) {
|
|
678
|
+
return;
|
|
679
|
+
}
|
|
656
680
|
contentBlocks.push({
|
|
657
|
-
reasoningContent:
|
|
658
|
-
|
|
659
|
-
),
|
|
681
|
+
reasoningContent:
|
|
682
|
+
langchainReasoningBlockToBedrockReasoningBlock(reasoningBlock),
|
|
660
683
|
} as BedrockContentBlock);
|
|
661
684
|
} else if (isDefaultCachePoint(block)) {
|
|
662
685
|
contentBlocks.push({
|
|
@@ -832,15 +855,34 @@ function convertToolMessageToConverseMessage(msg: BaseMessage): BedrockMessage {
|
|
|
832
855
|
content = [{ text: String(msg.content) }];
|
|
833
856
|
}
|
|
834
857
|
|
|
858
|
+
// A `cachePoint` is a message-level ContentBlock — it is NOT a valid
|
|
859
|
+
// ToolResultContentBlock. A tail prompt-cache breakpoint that anchors on a
|
|
860
|
+
// tool result therefore ends up nested inside `toolResult.content`, which
|
|
861
|
+
// Bedrock silently ignores (no cache write, no cache read). Hoist any
|
|
862
|
+
// cachePoint(s) out of the tool result body so they sit as siblings after
|
|
863
|
+
// it, which is the only position Bedrock honors.
|
|
864
|
+
const toolResultContent: BedrockContentBlock[] = [];
|
|
865
|
+
const trailingCachePoints: BedrockContentBlock[] = [];
|
|
866
|
+
for (const block of content) {
|
|
867
|
+
if (isDefaultCachePoint(block)) {
|
|
868
|
+
trailingCachePoints.push({
|
|
869
|
+
cachePoint: { type: 'default' },
|
|
870
|
+
} as BedrockContentBlock);
|
|
871
|
+
} else {
|
|
872
|
+
toolResultContent.push(block);
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
|
|
835
876
|
return {
|
|
836
877
|
role: 'user',
|
|
837
878
|
content: [
|
|
838
879
|
{
|
|
839
880
|
toolResult: {
|
|
840
881
|
toolUseId: toolCallId,
|
|
841
|
-
content:
|
|
882
|
+
content: toolResultContent as { text: string }[],
|
|
842
883
|
},
|
|
843
884
|
},
|
|
885
|
+
...trailingCachePoints,
|
|
844
886
|
],
|
|
845
887
|
};
|
|
846
888
|
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { HumanMessage, AIMessage, ToolMessage } from '@langchain/core/messages';
|
|
2
|
+
import type {
|
|
3
|
+
BaseMessage,
|
|
4
|
+
MessageContentComplex,
|
|
5
|
+
} from '@langchain/core/messages';
|
|
6
|
+
import { addBedrockTailCacheControl } from '@/messages/cache';
|
|
7
|
+
import { convertToConverseMessages } from './message_inputs';
|
|
8
|
+
import { toLangChainContent } from '@/messages/langchain';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* A Bedrock `cachePoint` is a message-level ContentBlock and is NOT a valid
|
|
12
|
+
* `ToolResultContentBlock`. When the single tail prompt-cache breakpoint
|
|
13
|
+
* anchors on a tool result (the common agent-loop shape), the cachePoint must
|
|
14
|
+
* be hoisted out of `toolResult.content` to a message-level sibling — otherwise
|
|
15
|
+
* Bedrock silently drops the breakpoint (no cache write, no cache read),
|
|
16
|
+
* verified live against Bedrock Converse.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
interface ConverseBlock {
|
|
20
|
+
text?: string;
|
|
21
|
+
cachePoint?: { type?: string };
|
|
22
|
+
toolResult?: {
|
|
23
|
+
toolUseId?: string;
|
|
24
|
+
content?: Array<{ text?: string; cachePoint?: { type?: string } }>;
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function toolUserMessage(
|
|
29
|
+
result: ReturnType<typeof convertToConverseMessages>
|
|
30
|
+
): ConverseBlock[] {
|
|
31
|
+
const msg = result.converseMessages.find(
|
|
32
|
+
(m) =>
|
|
33
|
+
m.role === 'user' && m.content?.some((c) => 'toolResult' in c) === true
|
|
34
|
+
);
|
|
35
|
+
return (msg?.content ?? []) as ConverseBlock[];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
describe('convertToConverseMessages — tool-result cachePoint hoisting', () => {
|
|
39
|
+
it('hoists a cachePoint out of toolResult.content to a message-level sibling', () => {
|
|
40
|
+
const toolMsg = new ToolMessage({
|
|
41
|
+
tool_call_id: 't1',
|
|
42
|
+
content: toLangChainContent([
|
|
43
|
+
{ type: 'text', text: 'result body' },
|
|
44
|
+
{ cachePoint: { type: 'default' } },
|
|
45
|
+
] as MessageContentComplex[]),
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
const { converseMessages } = convertToConverseMessages([
|
|
49
|
+
new HumanMessage('go'),
|
|
50
|
+
toolMsg,
|
|
51
|
+
]);
|
|
52
|
+
|
|
53
|
+
const content = toolUserMessage({ converseMessages, converseSystem: [] });
|
|
54
|
+
|
|
55
|
+
// toolResult body must NOT contain the cachePoint
|
|
56
|
+
const toolResult = content.find((c) => 'toolResult' in c)?.toolResult;
|
|
57
|
+
expect(toolResult?.content?.some((b) => 'cachePoint' in b)).toBe(false);
|
|
58
|
+
expect(toolResult?.content).toEqual([{ text: 'result body' }]);
|
|
59
|
+
|
|
60
|
+
// cachePoint must be a sibling AFTER the toolResult block
|
|
61
|
+
expect(content[content.length - 1]).toEqual({
|
|
62
|
+
cachePoint: { type: 'default' },
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('leaves tool results without a cachePoint untouched', () => {
|
|
67
|
+
const { converseMessages } = convertToConverseMessages([
|
|
68
|
+
new HumanMessage('go'),
|
|
69
|
+
new ToolMessage({ tool_call_id: 't1', content: 'plain result' }),
|
|
70
|
+
]);
|
|
71
|
+
|
|
72
|
+
const content = toolUserMessage({ converseMessages, converseSystem: [] });
|
|
73
|
+
expect(content).toEqual([
|
|
74
|
+
{ toolResult: { toolUseId: 't1', content: [{ text: 'plain result' }] } },
|
|
75
|
+
]);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it('end-to-end: tail breakpoint on a string tool result renders as a valid sibling cachePoint', () => {
|
|
79
|
+
const messages: BaseMessage[] = [
|
|
80
|
+
new HumanMessage('What is 15 * 23? Use the calculator.'),
|
|
81
|
+
new AIMessage({
|
|
82
|
+
content: 'Calculating.',
|
|
83
|
+
tool_calls: [
|
|
84
|
+
{ id: 't1', name: 'calculator', args: { expression: '15 * 23' } },
|
|
85
|
+
],
|
|
86
|
+
}),
|
|
87
|
+
new ToolMessage({ tool_call_id: 't1', content: '345' }),
|
|
88
|
+
];
|
|
89
|
+
|
|
90
|
+
const cached = addBedrockTailCacheControl(messages);
|
|
91
|
+
const { converseMessages } = convertToConverseMessages(cached);
|
|
92
|
+
|
|
93
|
+
const content = toolUserMessage({ converseMessages, converseSystem: [] });
|
|
94
|
+
const toolResult = content.find((c) => 'toolResult' in c)?.toolResult;
|
|
95
|
+
|
|
96
|
+
// Exactly one cachePoint, at the message level, never nested in the body.
|
|
97
|
+
expect(toolResult?.content?.some((b) => 'cachePoint' in b)).toBe(false);
|
|
98
|
+
expect(content.filter((c) => 'cachePoint' in c)).toHaveLength(1);
|
|
99
|
+
expect(content[content.length - 1]).toEqual({
|
|
100
|
+
cachePoint: { type: 'default' },
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
});
|
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AIMessage,
|
|
3
|
+
HumanMessage,
|
|
4
|
+
SystemMessage,
|
|
5
|
+
ToolMessage,
|
|
6
|
+
} from '@langchain/core/messages';
|
|
7
|
+
import type {
|
|
8
|
+
BaseMessage,
|
|
9
|
+
MessageContentComplex,
|
|
10
|
+
} from '@langchain/core/messages';
|
|
11
|
+
import type Anthropic from '@anthropic-ai/sdk';
|
|
12
|
+
import type { AnthropicMessages } from '@/types/messages';
|
|
13
|
+
import { addTailCacheControl, addBedrockTailCacheControl } from './cache';
|
|
14
|
+
import { toLangChainContent } from './langchain';
|
|
15
|
+
|
|
16
|
+
type CacheControlBlock = MessageContentComplex & {
|
|
17
|
+
cache_control?: { type: 'ephemeral'; ttl?: '1h' };
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
/** Count every block across all messages that carries a cache_control marker. */
|
|
21
|
+
function countCacheMarkers(
|
|
22
|
+
messages: ReadonlyArray<{ content: unknown }>
|
|
23
|
+
): number {
|
|
24
|
+
let count = 0;
|
|
25
|
+
for (const message of messages) {
|
|
26
|
+
if (!Array.isArray(message.content)) {
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
for (const block of message.content) {
|
|
30
|
+
if (block && typeof block === 'object' && 'cache_control' in block) {
|
|
31
|
+
count++;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return count;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function blocksOf(message: { content: unknown }): CacheControlBlock[] {
|
|
39
|
+
return message.content as CacheControlBlock[];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
describe('addTailCacheControl (single tail breakpoint)', () => {
|
|
43
|
+
test('places exactly one marker on the last message', () => {
|
|
44
|
+
const messages: AnthropicMessages = [
|
|
45
|
+
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
|
|
46
|
+
{ role: 'assistant', content: [{ type: 'text', text: 'Hi there' }] },
|
|
47
|
+
{ role: 'user', content: [{ type: 'text', text: 'How are you?' }] },
|
|
48
|
+
{ role: 'assistant', content: [{ type: 'text', text: 'Doing well' }] },
|
|
49
|
+
{ role: 'user', content: [{ type: 'text', text: 'Great!' }] },
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
const result = addTailCacheControl(messages);
|
|
53
|
+
|
|
54
|
+
expect(countCacheMarkers(result)).toBe(1);
|
|
55
|
+
expect(
|
|
56
|
+
(result[4].content[0] as Anthropic.TextBlockParam).cache_control
|
|
57
|
+
).toEqual({ type: 'ephemeral' });
|
|
58
|
+
expect(result[2].content[0]).not.toHaveProperty('cache_control');
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test('anchors on a trailing tool_result block (tail is a tool turn)', () => {
|
|
62
|
+
const messages: BaseMessage[] = [
|
|
63
|
+
new HumanMessage('Run the tool'),
|
|
64
|
+
new AIMessage({
|
|
65
|
+
content: toLangChainContent([
|
|
66
|
+
{ type: 'text', text: 'Calling it' },
|
|
67
|
+
{ type: 'tool_use', id: 't1', name: 'search', input: {} },
|
|
68
|
+
] as MessageContentComplex[]),
|
|
69
|
+
tool_calls: [{ id: 't1', name: 'search', args: {} }],
|
|
70
|
+
}),
|
|
71
|
+
new ToolMessage({
|
|
72
|
+
tool_call_id: 't1',
|
|
73
|
+
content: toLangChainContent([
|
|
74
|
+
{
|
|
75
|
+
type: 'tool_result',
|
|
76
|
+
tool_use_id: 't1',
|
|
77
|
+
content: 'result body',
|
|
78
|
+
},
|
|
79
|
+
] as MessageContentComplex[]),
|
|
80
|
+
}),
|
|
81
|
+
];
|
|
82
|
+
|
|
83
|
+
const result = addTailCacheControl(messages);
|
|
84
|
+
|
|
85
|
+
expect(countCacheMarkers(result)).toBe(1);
|
|
86
|
+
expect(blocksOf(result[2])[0].cache_control).toEqual({ type: 'ephemeral' });
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
test('strips ALL stale markers and re-anchors a single one at the tail', () => {
|
|
90
|
+
const messages: BaseMessage[] = [
|
|
91
|
+
new HumanMessage({
|
|
92
|
+
content: toLangChainContent([
|
|
93
|
+
{
|
|
94
|
+
type: 'text',
|
|
95
|
+
text: 'old marker',
|
|
96
|
+
cache_control: { type: 'ephemeral' },
|
|
97
|
+
},
|
|
98
|
+
] as MessageContentComplex[]),
|
|
99
|
+
}),
|
|
100
|
+
new HumanMessage({
|
|
101
|
+
content: toLangChainContent([
|
|
102
|
+
{
|
|
103
|
+
type: 'text',
|
|
104
|
+
text: 'another old marker',
|
|
105
|
+
cache_control: { type: 'ephemeral' },
|
|
106
|
+
},
|
|
107
|
+
] as MessageContentComplex[]),
|
|
108
|
+
}),
|
|
109
|
+
new AIMessage({ content: 'reply' }),
|
|
110
|
+
];
|
|
111
|
+
|
|
112
|
+
const result = addTailCacheControl(messages);
|
|
113
|
+
|
|
114
|
+
expect(countCacheMarkers(result)).toBe(1);
|
|
115
|
+
expect(blocksOf(result[2])[0].cache_control).toEqual({ type: 'ephemeral' });
|
|
116
|
+
expect(blocksOf(result[0])[0]).not.toHaveProperty('cache_control');
|
|
117
|
+
expect(blocksOf(result[1])[0]).not.toHaveProperty('cache_control');
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
test('does not anchor on thinking blocks', () => {
|
|
121
|
+
const messages: BaseMessage[] = [
|
|
122
|
+
new HumanMessage('Hi'),
|
|
123
|
+
new AIMessage({
|
|
124
|
+
content: toLangChainContent([
|
|
125
|
+
{ type: 'text', text: 'thought through it' },
|
|
126
|
+
{ type: 'thinking', thinking: 'secret reasoning' },
|
|
127
|
+
] as MessageContentComplex[]),
|
|
128
|
+
}),
|
|
129
|
+
];
|
|
130
|
+
|
|
131
|
+
const result = addTailCacheControl(messages);
|
|
132
|
+
|
|
133
|
+
expect(countCacheMarkers(result)).toBe(1);
|
|
134
|
+
expect(blocksOf(result[1])[0].cache_control).toEqual({ type: 'ephemeral' });
|
|
135
|
+
expect(blocksOf(result[1])[1]).not.toHaveProperty('cache_control');
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test.each(['reasoning_content', 'reasoning', 'think'])(
|
|
139
|
+
'does not anchor on a trailing foreign reasoning block (%s)',
|
|
140
|
+
(reasoningType) => {
|
|
141
|
+
// Foreign reasoning (Bedrock/Google/LibreChat) is dropped by the
|
|
142
|
+
// Anthropic converter on assistant turns; anchoring the only breakpoint
|
|
143
|
+
// there would silently lose tail caching on a cross-provider handoff.
|
|
144
|
+
const messages: BaseMessage[] = [
|
|
145
|
+
new HumanMessage('Hi'),
|
|
146
|
+
new AIMessage({
|
|
147
|
+
content: toLangChainContent([
|
|
148
|
+
{ type: 'text', text: 'Here is my answer.' },
|
|
149
|
+
{ type: reasoningType, text: 'foreign reasoning' },
|
|
150
|
+
] as MessageContentComplex[]),
|
|
151
|
+
}),
|
|
152
|
+
];
|
|
153
|
+
|
|
154
|
+
const result = addTailCacheControl(messages);
|
|
155
|
+
|
|
156
|
+
expect(countCacheMarkers(result)).toBe(1);
|
|
157
|
+
// Marker must land on the surviving text block, not the reasoning block.
|
|
158
|
+
expect(blocksOf(result[1])[0].cache_control).toEqual({
|
|
159
|
+
type: 'ephemeral',
|
|
160
|
+
});
|
|
161
|
+
expect(blocksOf(result[1])[1]).not.toHaveProperty('cache_control');
|
|
162
|
+
}
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
test('skips synthetic meta tail and anchors on the previous real message', () => {
|
|
166
|
+
const realTail = new AIMessage({ content: 'real answer' });
|
|
167
|
+
const metaTail = new HumanMessage({ content: 'reinjected skill body' });
|
|
168
|
+
(
|
|
169
|
+
metaTail as unknown as { additional_kwargs: Record<string, unknown> }
|
|
170
|
+
).additional_kwargs = { isMeta: true };
|
|
171
|
+
|
|
172
|
+
const result = addTailCacheControl([
|
|
173
|
+
new HumanMessage({ content: 'question' }),
|
|
174
|
+
realTail,
|
|
175
|
+
metaTail,
|
|
176
|
+
]);
|
|
177
|
+
|
|
178
|
+
expect(countCacheMarkers(result)).toBe(1);
|
|
179
|
+
expect(blocksOf(result[1])[0].cache_control).toEqual({ type: 'ephemeral' });
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
test('handles string content on the tail', () => {
|
|
183
|
+
const messages: AnthropicMessages = [
|
|
184
|
+
{ role: 'user', content: 'Hello' },
|
|
185
|
+
{ role: 'assistant', content: 'Final' },
|
|
186
|
+
];
|
|
187
|
+
|
|
188
|
+
const result = addTailCacheControl(messages);
|
|
189
|
+
|
|
190
|
+
expect(result[0].content).toBe('Hello');
|
|
191
|
+
expect(result[1].content[0]).toEqual({
|
|
192
|
+
type: 'text',
|
|
193
|
+
text: 'Final',
|
|
194
|
+
cache_control: { type: 'ephemeral' },
|
|
195
|
+
});
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
test('does not mutate the original messages', () => {
|
|
199
|
+
const original: AnthropicMessages = [
|
|
200
|
+
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
|
|
201
|
+
{ role: 'assistant', content: [{ type: 'text', text: 'World' }] },
|
|
202
|
+
];
|
|
203
|
+
|
|
204
|
+
addTailCacheControl(original);
|
|
205
|
+
|
|
206
|
+
expect(original[1].content[0]).not.toHaveProperty('cache_control');
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
test('returns input unchanged for empty array', () => {
|
|
210
|
+
const messages: AnthropicMessages = [];
|
|
211
|
+
expect(addTailCacheControl(messages)).toEqual([]);
|
|
212
|
+
});
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
/** Count every Bedrock cachePoint block across all messages. */
|
|
216
|
+
function countCachePoints(
|
|
217
|
+
messages: ReadonlyArray<{ content: unknown }>
|
|
218
|
+
): number {
|
|
219
|
+
let count = 0;
|
|
220
|
+
for (const message of messages) {
|
|
221
|
+
if (!Array.isArray(message.content)) {
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
for (const block of message.content) {
|
|
225
|
+
if (block && typeof block === 'object' && 'cachePoint' in block) {
|
|
226
|
+
count++;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return count;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
describe('addBedrockTailCacheControl (single tail cachePoint)', () => {
|
|
234
|
+
test('inserts exactly one cachePoint after the last text block of the tail', () => {
|
|
235
|
+
const messages: BaseMessage[] = [
|
|
236
|
+
new HumanMessage('First question'),
|
|
237
|
+
new AIMessage('First answer'),
|
|
238
|
+
new HumanMessage('Second question'),
|
|
239
|
+
];
|
|
240
|
+
|
|
241
|
+
const result = addBedrockTailCacheControl(messages);
|
|
242
|
+
|
|
243
|
+
expect(countCachePoints(result)).toBe(1);
|
|
244
|
+
const tail = blocksOf(result[2]);
|
|
245
|
+
expect(tail[tail.length - 1]).toEqual({ cachePoint: { type: 'default' } });
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
test('strips stale cachePoints and re-anchors a single one at the tail', () => {
|
|
249
|
+
const messages: BaseMessage[] = [
|
|
250
|
+
new HumanMessage({
|
|
251
|
+
content: toLangChainContent([
|
|
252
|
+
{ type: 'text', text: 'old' },
|
|
253
|
+
{ cachePoint: { type: 'default' } },
|
|
254
|
+
] as MessageContentComplex[]),
|
|
255
|
+
}),
|
|
256
|
+
new AIMessage('reply'),
|
|
257
|
+
new HumanMessage('newest'),
|
|
258
|
+
];
|
|
259
|
+
|
|
260
|
+
const result = addBedrockTailCacheControl(messages);
|
|
261
|
+
|
|
262
|
+
expect(countCachePoints(result)).toBe(1);
|
|
263
|
+
const tail = blocksOf(result[2]);
|
|
264
|
+
expect(tail[tail.length - 1]).toEqual({ cachePoint: { type: 'default' } });
|
|
265
|
+
expect(blocksOf(result[0]).some((b) => 'cachePoint' in b)).toBe(false);
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
test('strips Anthropic cache_control from a system message but never anchors it', () => {
|
|
269
|
+
const messages: BaseMessage[] = [
|
|
270
|
+
new SystemMessage({
|
|
271
|
+
content: toLangChainContent([
|
|
272
|
+
{
|
|
273
|
+
type: 'text',
|
|
274
|
+
text: 'system rules',
|
|
275
|
+
cache_control: { type: 'ephemeral' },
|
|
276
|
+
},
|
|
277
|
+
] as MessageContentComplex[]),
|
|
278
|
+
}),
|
|
279
|
+
new HumanMessage('hi'),
|
|
280
|
+
];
|
|
281
|
+
|
|
282
|
+
const result = addBedrockTailCacheControl(messages);
|
|
283
|
+
|
|
284
|
+
expect(blocksOf(result[0])[0]).not.toHaveProperty('cache_control');
|
|
285
|
+
expect(countCachePoints(result)).toBe(1);
|
|
286
|
+
expect(blocksOf(result[1])[1]).toEqual({ cachePoint: { type: 'default' } });
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
test('skips synthetic meta tail and anchors on the previous real message', () => {
|
|
290
|
+
const metaTail = new HumanMessage({ content: 'reinjected skill body' });
|
|
291
|
+
(
|
|
292
|
+
metaTail as unknown as { additional_kwargs: Record<string, unknown> }
|
|
293
|
+
).additional_kwargs = { source: 'skill' };
|
|
294
|
+
|
|
295
|
+
const result = addBedrockTailCacheControl([
|
|
296
|
+
new HumanMessage('question'),
|
|
297
|
+
new AIMessage('real answer'),
|
|
298
|
+
metaTail,
|
|
299
|
+
]);
|
|
300
|
+
|
|
301
|
+
expect(countCachePoints(result)).toBe(1);
|
|
302
|
+
const realTail = blocksOf(result[1]);
|
|
303
|
+
expect(realTail[realTail.length - 1]).toEqual({
|
|
304
|
+
cachePoint: { type: 'default' },
|
|
305
|
+
});
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
test('handles string content on the tail', () => {
|
|
309
|
+
const result = addBedrockTailCacheControl([
|
|
310
|
+
new HumanMessage('Hello'),
|
|
311
|
+
new AIMessage('Final'),
|
|
312
|
+
]);
|
|
313
|
+
|
|
314
|
+
expect(countCachePoints(result)).toBe(1);
|
|
315
|
+
expect(blocksOf(result[1])).toEqual([
|
|
316
|
+
{ type: 'text', text: 'Final' },
|
|
317
|
+
{ cachePoint: { type: 'default' } },
|
|
318
|
+
]);
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
test('anchors on a trailing string tool result (agent-loop tail)', () => {
|
|
322
|
+
const result = addBedrockTailCacheControl([
|
|
323
|
+
new HumanMessage('Run the tool'),
|
|
324
|
+
new AIMessage({
|
|
325
|
+
content: 'Calling it',
|
|
326
|
+
tool_calls: [{ id: 't1', name: 'search', args: {} }],
|
|
327
|
+
}),
|
|
328
|
+
new ToolMessage({ tool_call_id: 't1', content: 'result body' }),
|
|
329
|
+
]);
|
|
330
|
+
|
|
331
|
+
// The single cachePoint must land on the trailing tool result so the
|
|
332
|
+
// tool output is part of the cached prefix; the converter later hoists it
|
|
333
|
+
// out of toolResult.content (see toolResultCachePoint.test.ts).
|
|
334
|
+
expect(countCachePoints(result)).toBe(1);
|
|
335
|
+
expect(blocksOf(result[2])).toEqual([
|
|
336
|
+
{ type: 'text', text: 'result body' },
|
|
337
|
+
{ cachePoint: { type: 'default' } },
|
|
338
|
+
]);
|
|
339
|
+
});
|
|
340
|
+
});
|