@librechat/agents 3.2.35 → 3.2.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +75 -2
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/agents/projection.cjs +25 -0
- package/dist/cjs/agents/projection.cjs.map +1 -0
- package/dist/cjs/graphs/Graph.cjs +10 -26
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/langfuse.cjs +16 -5
- package/dist/cjs/langfuse.cjs.map +1 -1
- package/dist/cjs/langfuseToolOutputTracing.cjs +7 -0
- package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +118 -7
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +44 -4
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/main.cjs +7 -0
- package/dist/cjs/messages/budget.cjs +23 -0
- package/dist/cjs/messages/budget.cjs.map +1 -0
- package/dist/cjs/messages/cache.cjs +184 -0
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/index.cjs +1 -0
- package/dist/cjs/summarization/node.cjs +1 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +91 -2
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +4 -3
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/toolOutputReferences.cjs +28 -14
- package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +76 -3
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/agents/projection.mjs +25 -0
- package/dist/esm/agents/projection.mjs.map +1 -0
- package/dist/esm/graphs/Graph.mjs +9 -25
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/langfuse.mjs +16 -5
- package/dist/esm/langfuse.mjs.map +1 -1
- package/dist/esm/langfuseToolOutputTracing.mjs +7 -0
- package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +118 -7
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +44 -4
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/main.mjs +4 -2
- package/dist/esm/messages/budget.mjs +23 -0
- package/dist/esm/messages/budget.mjs.map +1 -0
- package/dist/esm/messages/cache.mjs +182 -1
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/index.mjs +1 -0
- package/dist/esm/summarization/node.mjs +2 -2
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +91 -2
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +4 -3
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/toolOutputReferences.mjs +28 -14
- package/dist/esm/tools/toolOutputReferences.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +30 -1
- package/dist/types/agents/projection.d.ts +26 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/messages/budget.d.ts +11 -0
- package/dist/types/messages/cache.d.ts +47 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/tools/search/format.d.ts +4 -1
- package/dist/types/tools/search/types.d.ts +7 -0
- package/dist/types/types/graph.d.ts +2 -0
- package/package.json +2 -1
- package/src/agents/AgentContext.ts +105 -4
- package/src/agents/__tests__/AgentContext.test.ts +232 -9
- package/src/agents/__tests__/projection.test.ts +73 -0
- package/src/agents/projection.ts +46 -0
- package/src/graphs/Graph.ts +66 -65
- package/src/index.ts +3 -0
- package/src/langfuse.ts +38 -4
- package/src/langfuseToolOutputTracing.ts +18 -0
- package/src/llm/anthropic/utils/cross-provider-reasoning.test.ts +317 -0
- package/src/llm/anthropic/utils/message_inputs.ts +209 -19
- package/src/llm/anthropic/utils/stripPrefillCache.test.ts +111 -0
- package/src/llm/bedrock/utils/cross-provider-reasoning.test.ts +131 -0
- package/src/llm/bedrock/utils/message_inputs.test.ts +129 -0
- package/src/llm/bedrock/utils/message_inputs.ts +81 -4
- package/src/llm/bedrock/utils/toolResultCachePoint.test.ts +103 -0
- package/src/messages/budget.ts +32 -0
- package/src/messages/cache.tail.test.ts +340 -0
- package/src/messages/cache.ts +267 -1
- package/src/messages/index.ts +1 -0
- package/src/messages/tailCacheConversion.test.ts +161 -0
- package/src/scripts/bench-prompt-cache.ts +479 -0
- package/src/specs/langfuse-config.test.ts +69 -2
- package/src/specs/langfuse-metadata.test.ts +44 -0
- package/src/specs/langfuse-tool-output-tracing.test.ts +6 -0
- package/src/summarization/node.ts +2 -2
- package/src/tools/__tests__/annotateMessagesForLLM.test.ts +50 -0
- package/src/tools/search/format.test.ts +242 -0
- package/src/tools/search/format.ts +122 -5
- package/src/tools/search/tool.ts +5 -1
- package/src/tools/search/types.ts +7 -0
- package/src/tools/toolOutputReferences.ts +34 -20
- package/src/types/graph.ts +2 -0
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AIMessage,
|
|
3
|
+
HumanMessage,
|
|
4
|
+
SystemMessage,
|
|
5
|
+
ToolMessage,
|
|
6
|
+
} from '@langchain/core/messages';
|
|
7
|
+
import type {
|
|
8
|
+
BaseMessage,
|
|
9
|
+
MessageContentComplex,
|
|
10
|
+
} from '@langchain/core/messages';
|
|
11
|
+
import type Anthropic from '@anthropic-ai/sdk';
|
|
12
|
+
import type { AnthropicMessages } from '@/types/messages';
|
|
13
|
+
import { addTailCacheControl, addBedrockTailCacheControl } from './cache';
|
|
14
|
+
import { toLangChainContent } from './langchain';
|
|
15
|
+
|
|
16
|
+
type CacheControlBlock = MessageContentComplex & {
|
|
17
|
+
cache_control?: { type: 'ephemeral'; ttl?: '1h' };
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
/** Count every block across all messages that carries a cache_control marker. */
|
|
21
|
+
function countCacheMarkers(
|
|
22
|
+
messages: ReadonlyArray<{ content: unknown }>
|
|
23
|
+
): number {
|
|
24
|
+
let count = 0;
|
|
25
|
+
for (const message of messages) {
|
|
26
|
+
if (!Array.isArray(message.content)) {
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
for (const block of message.content) {
|
|
30
|
+
if (block && typeof block === 'object' && 'cache_control' in block) {
|
|
31
|
+
count++;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return count;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function blocksOf(message: { content: unknown }): CacheControlBlock[] {
|
|
39
|
+
return message.content as CacheControlBlock[];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
describe('addTailCacheControl (single tail breakpoint)', () => {
|
|
43
|
+
test('places exactly one marker on the last message', () => {
|
|
44
|
+
const messages: AnthropicMessages = [
|
|
45
|
+
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
|
|
46
|
+
{ role: 'assistant', content: [{ type: 'text', text: 'Hi there' }] },
|
|
47
|
+
{ role: 'user', content: [{ type: 'text', text: 'How are you?' }] },
|
|
48
|
+
{ role: 'assistant', content: [{ type: 'text', text: 'Doing well' }] },
|
|
49
|
+
{ role: 'user', content: [{ type: 'text', text: 'Great!' }] },
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
const result = addTailCacheControl(messages);
|
|
53
|
+
|
|
54
|
+
expect(countCacheMarkers(result)).toBe(1);
|
|
55
|
+
expect(
|
|
56
|
+
(result[4].content[0] as Anthropic.TextBlockParam).cache_control
|
|
57
|
+
).toEqual({ type: 'ephemeral' });
|
|
58
|
+
expect(result[2].content[0]).not.toHaveProperty('cache_control');
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test('anchors on a trailing tool_result block (tail is a tool turn)', () => {
|
|
62
|
+
const messages: BaseMessage[] = [
|
|
63
|
+
new HumanMessage('Run the tool'),
|
|
64
|
+
new AIMessage({
|
|
65
|
+
content: toLangChainContent([
|
|
66
|
+
{ type: 'text', text: 'Calling it' },
|
|
67
|
+
{ type: 'tool_use', id: 't1', name: 'search', input: {} },
|
|
68
|
+
] as MessageContentComplex[]),
|
|
69
|
+
tool_calls: [{ id: 't1', name: 'search', args: {} }],
|
|
70
|
+
}),
|
|
71
|
+
new ToolMessage({
|
|
72
|
+
tool_call_id: 't1',
|
|
73
|
+
content: toLangChainContent([
|
|
74
|
+
{
|
|
75
|
+
type: 'tool_result',
|
|
76
|
+
tool_use_id: 't1',
|
|
77
|
+
content: 'result body',
|
|
78
|
+
},
|
|
79
|
+
] as MessageContentComplex[]),
|
|
80
|
+
}),
|
|
81
|
+
];
|
|
82
|
+
|
|
83
|
+
const result = addTailCacheControl(messages);
|
|
84
|
+
|
|
85
|
+
expect(countCacheMarkers(result)).toBe(1);
|
|
86
|
+
expect(blocksOf(result[2])[0].cache_control).toEqual({ type: 'ephemeral' });
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
test('strips ALL stale markers and re-anchors a single one at the tail', () => {
|
|
90
|
+
const messages: BaseMessage[] = [
|
|
91
|
+
new HumanMessage({
|
|
92
|
+
content: toLangChainContent([
|
|
93
|
+
{
|
|
94
|
+
type: 'text',
|
|
95
|
+
text: 'old marker',
|
|
96
|
+
cache_control: { type: 'ephemeral' },
|
|
97
|
+
},
|
|
98
|
+
] as MessageContentComplex[]),
|
|
99
|
+
}),
|
|
100
|
+
new HumanMessage({
|
|
101
|
+
content: toLangChainContent([
|
|
102
|
+
{
|
|
103
|
+
type: 'text',
|
|
104
|
+
text: 'another old marker',
|
|
105
|
+
cache_control: { type: 'ephemeral' },
|
|
106
|
+
},
|
|
107
|
+
] as MessageContentComplex[]),
|
|
108
|
+
}),
|
|
109
|
+
new AIMessage({ content: 'reply' }),
|
|
110
|
+
];
|
|
111
|
+
|
|
112
|
+
const result = addTailCacheControl(messages);
|
|
113
|
+
|
|
114
|
+
expect(countCacheMarkers(result)).toBe(1);
|
|
115
|
+
expect(blocksOf(result[2])[0].cache_control).toEqual({ type: 'ephemeral' });
|
|
116
|
+
expect(blocksOf(result[0])[0]).not.toHaveProperty('cache_control');
|
|
117
|
+
expect(blocksOf(result[1])[0]).not.toHaveProperty('cache_control');
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
test('does not anchor on thinking blocks', () => {
|
|
121
|
+
const messages: BaseMessage[] = [
|
|
122
|
+
new HumanMessage('Hi'),
|
|
123
|
+
new AIMessage({
|
|
124
|
+
content: toLangChainContent([
|
|
125
|
+
{ type: 'text', text: 'thought through it' },
|
|
126
|
+
{ type: 'thinking', thinking: 'secret reasoning' },
|
|
127
|
+
] as MessageContentComplex[]),
|
|
128
|
+
}),
|
|
129
|
+
];
|
|
130
|
+
|
|
131
|
+
const result = addTailCacheControl(messages);
|
|
132
|
+
|
|
133
|
+
expect(countCacheMarkers(result)).toBe(1);
|
|
134
|
+
expect(blocksOf(result[1])[0].cache_control).toEqual({ type: 'ephemeral' });
|
|
135
|
+
expect(blocksOf(result[1])[1]).not.toHaveProperty('cache_control');
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test.each(['reasoning_content', 'reasoning', 'think'])(
|
|
139
|
+
'does not anchor on a trailing foreign reasoning block (%s)',
|
|
140
|
+
(reasoningType) => {
|
|
141
|
+
// Foreign reasoning (Bedrock/Google/LibreChat) is dropped by the
|
|
142
|
+
// Anthropic converter on assistant turns; anchoring the only breakpoint
|
|
143
|
+
// there would silently lose tail caching on a cross-provider handoff.
|
|
144
|
+
const messages: BaseMessage[] = [
|
|
145
|
+
new HumanMessage('Hi'),
|
|
146
|
+
new AIMessage({
|
|
147
|
+
content: toLangChainContent([
|
|
148
|
+
{ type: 'text', text: 'Here is my answer.' },
|
|
149
|
+
{ type: reasoningType, text: 'foreign reasoning' },
|
|
150
|
+
] as MessageContentComplex[]),
|
|
151
|
+
}),
|
|
152
|
+
];
|
|
153
|
+
|
|
154
|
+
const result = addTailCacheControl(messages);
|
|
155
|
+
|
|
156
|
+
expect(countCacheMarkers(result)).toBe(1);
|
|
157
|
+
// Marker must land on the surviving text block, not the reasoning block.
|
|
158
|
+
expect(blocksOf(result[1])[0].cache_control).toEqual({
|
|
159
|
+
type: 'ephemeral',
|
|
160
|
+
});
|
|
161
|
+
expect(blocksOf(result[1])[1]).not.toHaveProperty('cache_control');
|
|
162
|
+
}
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
test('skips synthetic meta tail and anchors on the previous real message', () => {
|
|
166
|
+
const realTail = new AIMessage({ content: 'real answer' });
|
|
167
|
+
const metaTail = new HumanMessage({ content: 'reinjected skill body' });
|
|
168
|
+
(
|
|
169
|
+
metaTail as unknown as { additional_kwargs: Record<string, unknown> }
|
|
170
|
+
).additional_kwargs = { isMeta: true };
|
|
171
|
+
|
|
172
|
+
const result = addTailCacheControl([
|
|
173
|
+
new HumanMessage({ content: 'question' }),
|
|
174
|
+
realTail,
|
|
175
|
+
metaTail,
|
|
176
|
+
]);
|
|
177
|
+
|
|
178
|
+
expect(countCacheMarkers(result)).toBe(1);
|
|
179
|
+
expect(blocksOf(result[1])[0].cache_control).toEqual({ type: 'ephemeral' });
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
test('handles string content on the tail', () => {
|
|
183
|
+
const messages: AnthropicMessages = [
|
|
184
|
+
{ role: 'user', content: 'Hello' },
|
|
185
|
+
{ role: 'assistant', content: 'Final' },
|
|
186
|
+
];
|
|
187
|
+
|
|
188
|
+
const result = addTailCacheControl(messages);
|
|
189
|
+
|
|
190
|
+
expect(result[0].content).toBe('Hello');
|
|
191
|
+
expect(result[1].content[0]).toEqual({
|
|
192
|
+
type: 'text',
|
|
193
|
+
text: 'Final',
|
|
194
|
+
cache_control: { type: 'ephemeral' },
|
|
195
|
+
});
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
test('does not mutate the original messages', () => {
|
|
199
|
+
const original: AnthropicMessages = [
|
|
200
|
+
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
|
|
201
|
+
{ role: 'assistant', content: [{ type: 'text', text: 'World' }] },
|
|
202
|
+
];
|
|
203
|
+
|
|
204
|
+
addTailCacheControl(original);
|
|
205
|
+
|
|
206
|
+
expect(original[1].content[0]).not.toHaveProperty('cache_control');
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
test('returns input unchanged for empty array', () => {
|
|
210
|
+
const messages: AnthropicMessages = [];
|
|
211
|
+
expect(addTailCacheControl(messages)).toEqual([]);
|
|
212
|
+
});
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
/** Count every Bedrock cachePoint block across all messages. */
|
|
216
|
+
function countCachePoints(
|
|
217
|
+
messages: ReadonlyArray<{ content: unknown }>
|
|
218
|
+
): number {
|
|
219
|
+
let count = 0;
|
|
220
|
+
for (const message of messages) {
|
|
221
|
+
if (!Array.isArray(message.content)) {
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
for (const block of message.content) {
|
|
225
|
+
if (block && typeof block === 'object' && 'cachePoint' in block) {
|
|
226
|
+
count++;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return count;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
describe('addBedrockTailCacheControl (single tail cachePoint)', () => {
|
|
234
|
+
test('inserts exactly one cachePoint after the last text block of the tail', () => {
|
|
235
|
+
const messages: BaseMessage[] = [
|
|
236
|
+
new HumanMessage('First question'),
|
|
237
|
+
new AIMessage('First answer'),
|
|
238
|
+
new HumanMessage('Second question'),
|
|
239
|
+
];
|
|
240
|
+
|
|
241
|
+
const result = addBedrockTailCacheControl(messages);
|
|
242
|
+
|
|
243
|
+
expect(countCachePoints(result)).toBe(1);
|
|
244
|
+
const tail = blocksOf(result[2]);
|
|
245
|
+
expect(tail[tail.length - 1]).toEqual({ cachePoint: { type: 'default' } });
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
test('strips stale cachePoints and re-anchors a single one at the tail', () => {
|
|
249
|
+
const messages: BaseMessage[] = [
|
|
250
|
+
new HumanMessage({
|
|
251
|
+
content: toLangChainContent([
|
|
252
|
+
{ type: 'text', text: 'old' },
|
|
253
|
+
{ cachePoint: { type: 'default' } },
|
|
254
|
+
] as MessageContentComplex[]),
|
|
255
|
+
}),
|
|
256
|
+
new AIMessage('reply'),
|
|
257
|
+
new HumanMessage('newest'),
|
|
258
|
+
];
|
|
259
|
+
|
|
260
|
+
const result = addBedrockTailCacheControl(messages);
|
|
261
|
+
|
|
262
|
+
expect(countCachePoints(result)).toBe(1);
|
|
263
|
+
const tail = blocksOf(result[2]);
|
|
264
|
+
expect(tail[tail.length - 1]).toEqual({ cachePoint: { type: 'default' } });
|
|
265
|
+
expect(blocksOf(result[0]).some((b) => 'cachePoint' in b)).toBe(false);
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
test('strips Anthropic cache_control from a system message but never anchors it', () => {
|
|
269
|
+
const messages: BaseMessage[] = [
|
|
270
|
+
new SystemMessage({
|
|
271
|
+
content: toLangChainContent([
|
|
272
|
+
{
|
|
273
|
+
type: 'text',
|
|
274
|
+
text: 'system rules',
|
|
275
|
+
cache_control: { type: 'ephemeral' },
|
|
276
|
+
},
|
|
277
|
+
] as MessageContentComplex[]),
|
|
278
|
+
}),
|
|
279
|
+
new HumanMessage('hi'),
|
|
280
|
+
];
|
|
281
|
+
|
|
282
|
+
const result = addBedrockTailCacheControl(messages);
|
|
283
|
+
|
|
284
|
+
expect(blocksOf(result[0])[0]).not.toHaveProperty('cache_control');
|
|
285
|
+
expect(countCachePoints(result)).toBe(1);
|
|
286
|
+
expect(blocksOf(result[1])[1]).toEqual({ cachePoint: { type: 'default' } });
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
test('skips synthetic meta tail and anchors on the previous real message', () => {
|
|
290
|
+
const metaTail = new HumanMessage({ content: 'reinjected skill body' });
|
|
291
|
+
(
|
|
292
|
+
metaTail as unknown as { additional_kwargs: Record<string, unknown> }
|
|
293
|
+
).additional_kwargs = { source: 'skill' };
|
|
294
|
+
|
|
295
|
+
const result = addBedrockTailCacheControl([
|
|
296
|
+
new HumanMessage('question'),
|
|
297
|
+
new AIMessage('real answer'),
|
|
298
|
+
metaTail,
|
|
299
|
+
]);
|
|
300
|
+
|
|
301
|
+
expect(countCachePoints(result)).toBe(1);
|
|
302
|
+
const realTail = blocksOf(result[1]);
|
|
303
|
+
expect(realTail[realTail.length - 1]).toEqual({
|
|
304
|
+
cachePoint: { type: 'default' },
|
|
305
|
+
});
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
test('handles string content on the tail', () => {
|
|
309
|
+
const result = addBedrockTailCacheControl([
|
|
310
|
+
new HumanMessage('Hello'),
|
|
311
|
+
new AIMessage('Final'),
|
|
312
|
+
]);
|
|
313
|
+
|
|
314
|
+
expect(countCachePoints(result)).toBe(1);
|
|
315
|
+
expect(blocksOf(result[1])).toEqual([
|
|
316
|
+
{ type: 'text', text: 'Final' },
|
|
317
|
+
{ cachePoint: { type: 'default' } },
|
|
318
|
+
]);
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
test('anchors on a trailing string tool result (agent-loop tail)', () => {
|
|
322
|
+
const result = addBedrockTailCacheControl([
|
|
323
|
+
new HumanMessage('Run the tool'),
|
|
324
|
+
new AIMessage({
|
|
325
|
+
content: 'Calling it',
|
|
326
|
+
tool_calls: [{ id: 't1', name: 'search', args: {} }],
|
|
327
|
+
}),
|
|
328
|
+
new ToolMessage({ tool_call_id: 't1', content: 'result body' }),
|
|
329
|
+
]);
|
|
330
|
+
|
|
331
|
+
// The single cachePoint must land on the trailing tool result so the
|
|
332
|
+
// tool output is part of the cached prefix; the converter later hoists it
|
|
333
|
+
// out of toolResult.content (see toolResultCachePoint.test.ts).
|
|
334
|
+
expect(countCachePoints(result)).toBe(1);
|
|
335
|
+
expect(blocksOf(result[2])).toEqual([
|
|
336
|
+
{ type: 'text', text: 'result body' },
|
|
337
|
+
{ cachePoint: { type: 'default' } },
|
|
338
|
+
]);
|
|
339
|
+
});
|
|
340
|
+
});
|
package/src/messages/cache.ts
CHANGED
|
@@ -41,7 +41,7 @@ function deepCloneContent<T extends string | MessageContentComplex[]>(
|
|
|
41
41
|
* in downstream code (e.g., ensureThinkingBlockInMessages).
|
|
42
42
|
* For plain objects (AnthropicMessage), uses object spread.
|
|
43
43
|
*/
|
|
44
|
-
function cloneMessage<T extends MessageWithContent>(
|
|
44
|
+
export function cloneMessage<T extends MessageWithContent>(
|
|
45
45
|
message: T,
|
|
46
46
|
content: string | MessageContentComplex[]
|
|
47
47
|
): T {
|
|
@@ -254,6 +254,152 @@ function isCachePoint(block: MessageContentComplex): boolean {
|
|
|
254
254
|
return 'cachePoint' in block && !('type' in block);
|
|
255
255
|
}
|
|
256
256
|
|
|
257
|
+
/**
|
|
258
|
+
* Block types that must never anchor the tail cache breakpoint, because the
|
|
259
|
+
* marker would not survive to the model call:
|
|
260
|
+
* - `thinking` / `redacted_thinking`: native Anthropic reasoning — the API
|
|
261
|
+
* rejects `cache_control` on these blocks.
|
|
262
|
+
* - `reasoning_content` / `reasoning` / `think`: foreign reasoning (Bedrock,
|
|
263
|
+
* Google, LibreChat) that `_convertMessagesToAnthropicPayload` DROPS on
|
|
264
|
+
* assistant turns during a cross-provider handoff.
|
|
265
|
+
* - `input_json_delta`: persisted partial tool-input deltas, also DROPPED by
|
|
266
|
+
* `_convertMessagesToAnthropicPayload` (the assembled input is restored onto
|
|
267
|
+
* the tool_use block).
|
|
268
|
+
* Anchoring the only breakpoint on a block that is about to disappear silently
|
|
269
|
+
* loses tail caching, so all of these are excluded.
|
|
270
|
+
*/
|
|
271
|
+
const NON_ANCHORABLE_BLOCK_TYPES = new Set([
|
|
272
|
+
'thinking',
|
|
273
|
+
'redacted_thinking',
|
|
274
|
+
'reasoning_content',
|
|
275
|
+
'reasoning',
|
|
276
|
+
'think',
|
|
277
|
+
'input_json_delta',
|
|
278
|
+
]);
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* A block can anchor the tail cache breakpoint when it is a real content block
|
|
282
|
+
* that the Anthropic API accepts `cache_control` on and that survives provider
|
|
283
|
+
* conversion. Reasoning / dropped-delta blocks are excluded (see
|
|
284
|
+
* {@link NON_ANCHORABLE_BLOCK_TYPES}), and empty text blocks are not cacheable,
|
|
285
|
+
* so both are skipped.
|
|
286
|
+
*/
|
|
287
|
+
function isTailCacheableBlock(block: MessageContentComplex): boolean {
|
|
288
|
+
if (isCachePoint(block)) {
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
const type = (block as { type?: string }).type;
|
|
292
|
+
if (type == null || NON_ANCHORABLE_BLOCK_TYPES.has(type)) {
|
|
293
|
+
return false;
|
|
294
|
+
}
|
|
295
|
+
if (type === 'text') {
|
|
296
|
+
const text = (block as { text?: string }).text;
|
|
297
|
+
return text != null && text.trim() !== '';
|
|
298
|
+
}
|
|
299
|
+
return true;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/**
|
|
303
|
+
* Anthropic API: single tail cache breakpoint (default strategy).
|
|
304
|
+
*
|
|
305
|
+
* Places exactly ONE `cache_control` marker on the last cacheable block of the
|
|
306
|
+
* final non-synthetic message, mirroring the Claude Code strategy
|
|
307
|
+
* (`markerIndex = messages.length - 1`). Because the marker always rides the
|
|
308
|
+
* true tail, the entire conversation prefix is written once and read back on
|
|
309
|
+
* the next turn as the history grows append-only — instead of the rolling
|
|
310
|
+
* "last two user messages" markers, which leave freshly appended tool/assistant
|
|
311
|
+
* turns outside the cached prefix and re-write large spans every step.
|
|
312
|
+
*
|
|
313
|
+
* Stale markers (Anthropic `cache_control` and Bedrock cache points) are
|
|
314
|
+
* stripped from every message in a single backward pass so exactly one marker
|
|
315
|
+
* survives. Synthetic skill/meta messages are skipped as anchors (their volatile
|
|
316
|
+
* content must not pin the cache) but still have stale markers removed.
|
|
317
|
+
*
|
|
318
|
+
* Returns a new array; only messages that require modification are cloned.
|
|
319
|
+
*/
|
|
320
|
+
export function addTailCacheControl<T extends AnthropicMessage | BaseMessage>(
|
|
321
|
+
messages: T[]
|
|
322
|
+
): T[] {
|
|
323
|
+
if (!Array.isArray(messages) || messages.length === 0) {
|
|
324
|
+
return messages;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
const updatedMessages: T[] = [...messages];
|
|
328
|
+
let markerPlaced = false;
|
|
329
|
+
|
|
330
|
+
for (let i = updatedMessages.length - 1; i >= 0; i--) {
|
|
331
|
+
const originalMessage = updatedMessages[i];
|
|
332
|
+
const content = originalMessage.content;
|
|
333
|
+
const hasArrayContent = Array.isArray(content);
|
|
334
|
+
const canPlaceMarker =
|
|
335
|
+
!markerPlaced && !isSyntheticMetaMessage(originalMessage);
|
|
336
|
+
|
|
337
|
+
// Earlier string-content messages carry no markers to strip.
|
|
338
|
+
if (!canPlaceMarker && !hasArrayContent) {
|
|
339
|
+
continue;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
let workingContent: MessageContentComplex[];
|
|
343
|
+
let modified = false;
|
|
344
|
+
|
|
345
|
+
if (hasArrayContent) {
|
|
346
|
+
const src = content as MessageContentComplex[];
|
|
347
|
+
workingContent = [];
|
|
348
|
+
let tailIndex = -1;
|
|
349
|
+
for (let j = 0; j < src.length; j++) {
|
|
350
|
+
const block = src[j];
|
|
351
|
+
if (isCachePoint(block)) {
|
|
352
|
+
modified = true;
|
|
353
|
+
continue;
|
|
354
|
+
}
|
|
355
|
+
const cloned = { ...block };
|
|
356
|
+
if ('cache_control' in cloned) {
|
|
357
|
+
delete (cloned as Record<string, unknown>).cache_control;
|
|
358
|
+
modified = true;
|
|
359
|
+
}
|
|
360
|
+
if (
|
|
361
|
+
canPlaceMarker &&
|
|
362
|
+
isTailCacheableBlock(cloned as MessageContentComplex)
|
|
363
|
+
) {
|
|
364
|
+
tailIndex = workingContent.length;
|
|
365
|
+
}
|
|
366
|
+
workingContent.push(cloned as MessageContentComplex);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
if (canPlaceMarker && tailIndex >= 0) {
|
|
370
|
+
(workingContent[tailIndex] as Anthropic.TextBlockParam).cache_control =
|
|
371
|
+
{
|
|
372
|
+
type: 'ephemeral',
|
|
373
|
+
};
|
|
374
|
+
markerPlaced = true;
|
|
375
|
+
modified = true;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
if (!modified) {
|
|
379
|
+
continue;
|
|
380
|
+
}
|
|
381
|
+
} else if (
|
|
382
|
+
typeof content === 'string' &&
|
|
383
|
+
canPlaceMarker &&
|
|
384
|
+
content.trim() !== ''
|
|
385
|
+
) {
|
|
386
|
+
workingContent = [
|
|
387
|
+
{ type: 'text', text: content, cache_control: { type: 'ephemeral' } },
|
|
388
|
+
] as unknown as MessageContentComplex[];
|
|
389
|
+
markerPlaced = true;
|
|
390
|
+
} else {
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
updatedMessages[i] = cloneMessage(
|
|
395
|
+
originalMessage as MessageWithContent,
|
|
396
|
+
workingContent
|
|
397
|
+
) as T;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
return updatedMessages;
|
|
401
|
+
}
|
|
402
|
+
|
|
257
403
|
function getMessageRole(message: MessageWithContent): string | undefined {
|
|
258
404
|
if (message instanceof BaseMessage) {
|
|
259
405
|
return message.getType();
|
|
@@ -625,3 +771,123 @@ export function addBedrockCacheControl<
|
|
|
625
771
|
|
|
626
772
|
return updatedMessages;
|
|
627
773
|
}
|
|
774
|
+
|
|
775
|
+
/**
|
|
776
|
+
* Bedrock Converse API: single tail cache breakpoint (default strategy).
|
|
777
|
+
*
|
|
778
|
+
* The Bedrock counterpart of {@link addTailCacheControl}. Strips ALL existing
|
|
779
|
+
* cache control (Bedrock cache points and Anthropic `cache_control`) from every
|
|
780
|
+
* message, then inserts exactly ONE `{ cachePoint: { type: 'default' } }` block
|
|
781
|
+
* immediately after the last non-empty text block of the most recent
|
|
782
|
+
* non-synthetic, non-system message. Anchoring on the rolling tail keeps the
|
|
783
|
+
* cached prefix append-only as the conversation grows, instead of re-writing
|
|
784
|
+
* large spans every turn with the legacy "last two user messages" cache points.
|
|
785
|
+
*
|
|
786
|
+
* System messages are sanitized (Anthropic `cache_control` stripped) but never
|
|
787
|
+
* anchored. Synthetic skill/meta messages are skipped as anchors so their
|
|
788
|
+
* volatile content cannot pin the cache.
|
|
789
|
+
*
|
|
790
|
+
* Returns a new array - only clones messages that require modification.
|
|
791
|
+
*/
|
|
792
|
+
export function addBedrockTailCacheControl<
|
|
793
|
+
T extends MessageWithContent & { getType?: () => string; role?: string },
|
|
794
|
+
>(messages: T[]): T[] {
|
|
795
|
+
if (!Array.isArray(messages) || messages.length === 0) {
|
|
796
|
+
return messages;
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
const updatedMessages: T[] = [...messages];
|
|
800
|
+
let cachePointPlaced = false;
|
|
801
|
+
|
|
802
|
+
for (let i = updatedMessages.length - 1; i >= 0; i--) {
|
|
803
|
+
const originalMessage = updatedMessages[i];
|
|
804
|
+
const messageType =
|
|
805
|
+
'getType' in originalMessage &&
|
|
806
|
+
typeof originalMessage.getType === 'function'
|
|
807
|
+
? originalMessage.getType()
|
|
808
|
+
: undefined;
|
|
809
|
+
const messageRole =
|
|
810
|
+
'role' in originalMessage && typeof originalMessage.role === 'string'
|
|
811
|
+
? originalMessage.role
|
|
812
|
+
: undefined;
|
|
813
|
+
|
|
814
|
+
const isSystemMessage =
|
|
815
|
+
messageType === 'system' || messageRole === 'system';
|
|
816
|
+
if (isSystemMessage) {
|
|
817
|
+
updatedMessages[i] = sanitizeBedrockSystemMessage(originalMessage);
|
|
818
|
+
continue;
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
const content = originalMessage.content;
|
|
822
|
+
const hasSerializationProps =
|
|
823
|
+
'lc_kwargs' in originalMessage ||
|
|
824
|
+
'lc_serializable' in originalMessage ||
|
|
825
|
+
'lc_namespace' in originalMessage;
|
|
826
|
+
const hasArrayContent = Array.isArray(content);
|
|
827
|
+
const isEmptyString = typeof content === 'string' && content === '';
|
|
828
|
+
const canPlaceCachePoint =
|
|
829
|
+
!cachePointPlaced &&
|
|
830
|
+
!isEmptyString &&
|
|
831
|
+
!isSyntheticMetaMessage(originalMessage) &&
|
|
832
|
+
(typeof content === 'string' || hasArrayContent);
|
|
833
|
+
|
|
834
|
+
if (!canPlaceCachePoint && !hasArrayContent && !hasSerializationProps) {
|
|
835
|
+
continue;
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
let workingContent: string | MessageContentComplex[];
|
|
839
|
+
let modified = hasSerializationProps;
|
|
840
|
+
|
|
841
|
+
if (hasArrayContent) {
|
|
842
|
+
const src = content as MessageContentComplex[];
|
|
843
|
+
workingContent = [];
|
|
844
|
+
let lastNonEmptyTextIndex = -1;
|
|
845
|
+
for (let j = 0; j < src.length; j++) {
|
|
846
|
+
const block = src[j];
|
|
847
|
+
if (isCachePoint(block)) {
|
|
848
|
+
modified = true;
|
|
849
|
+
continue;
|
|
850
|
+
}
|
|
851
|
+
const cloned = { ...block };
|
|
852
|
+
if ('cache_control' in cloned) {
|
|
853
|
+
delete (cloned as Record<string, unknown>).cache_control;
|
|
854
|
+
modified = true;
|
|
855
|
+
}
|
|
856
|
+
const type = (cloned as { type?: string }).type;
|
|
857
|
+
if (type === ContentTypes.TEXT || type === 'text') {
|
|
858
|
+
const text = (cloned as { text?: string }).text;
|
|
859
|
+
if (text != null && text.trim() !== '') {
|
|
860
|
+
lastNonEmptyTextIndex = workingContent.length;
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
workingContent.push(cloned as MessageContentComplex);
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
if (!modified && !canPlaceCachePoint) {
|
|
867
|
+
continue;
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
if (canPlaceCachePoint && lastNonEmptyTextIndex >= 0) {
|
|
871
|
+
workingContent.splice(lastNonEmptyTextIndex + 1, 0, {
|
|
872
|
+
cachePoint: { type: 'default' },
|
|
873
|
+
} as MessageContentComplex);
|
|
874
|
+
cachePointPlaced = true;
|
|
875
|
+
modified = true;
|
|
876
|
+
}
|
|
877
|
+
} else if (typeof content === 'string' && canPlaceCachePoint) {
|
|
878
|
+
workingContent = [
|
|
879
|
+
{ type: ContentTypes.TEXT, text: content },
|
|
880
|
+
{ cachePoint: { type: 'default' } } as MessageContentComplex,
|
|
881
|
+
];
|
|
882
|
+
cachePointPlaced = true;
|
|
883
|
+
} else if (typeof content === 'string' && hasSerializationProps) {
|
|
884
|
+
workingContent = content;
|
|
885
|
+
} else {
|
|
886
|
+
continue;
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
updatedMessages[i] = cloneMessage(originalMessage, workingContent);
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
return updatedMessages;
|
|
893
|
+
}
|