@librechat/agents 3.1.97 → 3.1.99
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +6 -0
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/langfuseToolOutputTracing.cjs +16 -5
- package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +10 -0
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/toolCache.cjs +125 -0
- package/dist/cjs/llm/bedrock/toolCache.cjs.map +1 -0
- package/dist/cjs/messages/cache.cjs +17 -9
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +45 -8
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +6 -1
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +6 -0
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/langfuseToolOutputTracing.mjs +16 -5
- package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +10 -0
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/toolCache.mjs +122 -0
- package/dist/esm/llm/bedrock/toolCache.mjs.map +1 -0
- package/dist/esm/messages/cache.mjs +17 -9
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +45 -8
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +6 -1
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/types/llm/bedrock/index.d.ts +16 -0
- package/dist/types/llm/bedrock/toolCache.d.ts +4 -0
- package/dist/types/messages/cache.d.ts +2 -2
- package/dist/types/types/llm.d.ts +2 -2
- package/package.json +1 -1
- package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +332 -0
- package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +504 -0
- package/src/graphs/Graph.ts +14 -0
- package/src/langfuseToolOutputTracing.ts +26 -7
- package/src/llm/bedrock/index.ts +32 -1
- package/src/llm/bedrock/llm.spec.ts +154 -1
- package/src/llm/bedrock/toolCache.test.ts +131 -0
- package/src/llm/bedrock/toolCache.ts +191 -0
- package/src/messages/cache.test.ts +97 -38
- package/src/messages/cache.ts +18 -10
- package/src/messages/prune.ts +55 -17
- package/src/specs/langfuse-tool-output-tracing.test.ts +28 -0
- package/src/specs/prune.test.ts +193 -0
- package/src/tools/ToolNode.ts +7 -1
- package/src/tools/__tests__/ToolNode.langfuse.test.ts +6 -0
- package/src/types/llm.ts +2 -2
|
@@ -11,6 +11,18 @@ import { config as dotenvConfig } from 'dotenv';
|
|
|
11
11
|
dotenvConfig();
|
|
12
12
|
|
|
13
13
|
import { describe, expect, it } from '@jest/globals';
|
|
14
|
+
import {
|
|
15
|
+
AIMessage,
|
|
16
|
+
BaseMessage,
|
|
17
|
+
HumanMessage,
|
|
18
|
+
SystemMessage,
|
|
19
|
+
ToolMessage,
|
|
20
|
+
type MessageContentComplex,
|
|
21
|
+
} from '@langchain/core/messages';
|
|
22
|
+
import {
|
|
23
|
+
BedrockRuntimeClient,
|
|
24
|
+
ConverseCommand,
|
|
25
|
+
} from '@aws-sdk/client-bedrock-runtime';
|
|
14
26
|
import type * as t from '@/types';
|
|
15
27
|
import {
|
|
16
28
|
runLiveTurn,
|
|
@@ -20,6 +32,9 @@ import {
|
|
|
20
32
|
waitForCachePropagation,
|
|
21
33
|
} from './promptCacheLiveHelpers';
|
|
22
34
|
import { Providers } from '@/common';
|
|
35
|
+
import { addBedrockCacheControl } from '@/messages/cache';
|
|
36
|
+
import { toLangChainContent } from '@/messages/langchain';
|
|
37
|
+
import { convertToConverseMessages } from '@/llm/bedrock/utils';
|
|
23
38
|
|
|
24
39
|
const accessKeyId =
|
|
25
40
|
process.env.BEDROCK_AWS_ACCESS_KEY_ID ?? process.env.AWS_ACCESS_KEY_ID;
|
|
@@ -77,6 +92,373 @@ function createClientOptions(): t.BedrockAnthropicClientOptions {
|
|
|
77
92
|
};
|
|
78
93
|
}
|
|
79
94
|
|
|
95
|
+
type BedrockCacheUsage = {
|
|
96
|
+
inputTokens: number;
|
|
97
|
+
outputTokens: number;
|
|
98
|
+
totalTokens: number;
|
|
99
|
+
cacheCreation: number;
|
|
100
|
+
cacheRead: number;
|
|
101
|
+
latencyMs: number;
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
type ConverseUsageResponse = {
|
|
105
|
+
usage?: {
|
|
106
|
+
inputTokens?: number;
|
|
107
|
+
outputTokens?: number;
|
|
108
|
+
totalTokens?: number;
|
|
109
|
+
cacheReadInputTokens?: number;
|
|
110
|
+
cacheWriteInputTokens?: number;
|
|
111
|
+
};
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
const benchmarkToolConfig = {
|
|
115
|
+
tools: [
|
|
116
|
+
{
|
|
117
|
+
toolSpec: {
|
|
118
|
+
name: 'lookup_cache_probe',
|
|
119
|
+
description: 'Returns prompt cache benchmark data.',
|
|
120
|
+
inputSchema: {
|
|
121
|
+
json: {
|
|
122
|
+
type: 'object',
|
|
123
|
+
properties: {
|
|
124
|
+
step: { type: 'integer' },
|
|
125
|
+
},
|
|
126
|
+
required: ['step'],
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
},
|
|
130
|
+
},
|
|
131
|
+
],
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
function cachePointBlock(): MessageContentComplex {
|
|
135
|
+
return { cachePoint: { type: 'default' } } as MessageContentComplex;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function stripCacheMarkers(
|
|
139
|
+
content: MessageContentComplex[]
|
|
140
|
+
): MessageContentComplex[] {
|
|
141
|
+
return content
|
|
142
|
+
.filter((block) => !('cachePoint' in block && !('type' in block)))
|
|
143
|
+
.map((block) => {
|
|
144
|
+
const cloned = { ...block };
|
|
145
|
+
delete (cloned as Record<string, unknown>).cache_control;
|
|
146
|
+
return cloned as MessageContentComplex;
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function cloneLiveMessage(
|
|
151
|
+
message: BaseMessage,
|
|
152
|
+
content: MessageContentComplex[]
|
|
153
|
+
): BaseMessage {
|
|
154
|
+
const baseParams = {
|
|
155
|
+
content: toLangChainContent(content),
|
|
156
|
+
additional_kwargs: { ...message.additional_kwargs },
|
|
157
|
+
response_metadata: { ...message.response_metadata },
|
|
158
|
+
id: message.id,
|
|
159
|
+
name: message.name,
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
const messageType = message.getType();
|
|
163
|
+
if (messageType === 'ai') {
|
|
164
|
+
return new AIMessage({
|
|
165
|
+
...baseParams,
|
|
166
|
+
tool_calls: (message as AIMessage).tool_calls,
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
if (messageType === 'human') {
|
|
170
|
+
return new HumanMessage(baseParams);
|
|
171
|
+
}
|
|
172
|
+
if (messageType === 'system') {
|
|
173
|
+
return new SystemMessage(baseParams);
|
|
174
|
+
}
|
|
175
|
+
if (messageType === 'tool') {
|
|
176
|
+
return new ToolMessage({
|
|
177
|
+
...baseParams,
|
|
178
|
+
tool_call_id: (message as ToolMessage).tool_call_id,
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return message;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function addLegacyMovingTailBedrockCacheControl(
|
|
186
|
+
messages: BaseMessage[]
|
|
187
|
+
): BaseMessage[] {
|
|
188
|
+
const updatedMessages = [...messages];
|
|
189
|
+
let messagesModified = 0;
|
|
190
|
+
|
|
191
|
+
for (let i = updatedMessages.length - 1; i >= 0; i--) {
|
|
192
|
+
const message = updatedMessages[i];
|
|
193
|
+
const messageType = message.getType();
|
|
194
|
+
if (messageType === 'system' || messageType === 'tool') {
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const content = message.content;
|
|
199
|
+
if (typeof content === 'string') {
|
|
200
|
+
if (content === '' || messagesModified >= 2) {
|
|
201
|
+
continue;
|
|
202
|
+
}
|
|
203
|
+
updatedMessages[i] = cloneLiveMessage(message, [
|
|
204
|
+
{ type: 'text', text: content } as MessageContentComplex,
|
|
205
|
+
cachePointBlock(),
|
|
206
|
+
]);
|
|
207
|
+
messagesModified++;
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (!Array.isArray(content)) {
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
const workingContent = stripCacheMarkers(
|
|
216
|
+
content as MessageContentComplex[]
|
|
217
|
+
);
|
|
218
|
+
const lastTextIndex = workingContent.findLastIndex((block) => {
|
|
219
|
+
const type = (block as { type?: string }).type;
|
|
220
|
+
const text = (block as { text?: string }).text;
|
|
221
|
+
return (type === 'text' || type === 'input_text') && text?.trim() !== '';
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
if (messagesModified < 2 && lastTextIndex >= 0) {
|
|
225
|
+
workingContent.splice(lastTextIndex + 1, 0, cachePointBlock());
|
|
226
|
+
messagesModified++;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
updatedMessages[i] = cloneLiveMessage(message, workingContent);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return updatedMessages;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function addLatestUserOnlyBedrockCacheControl(
|
|
236
|
+
messages: BaseMessage[]
|
|
237
|
+
): BaseMessage[] {
|
|
238
|
+
const updatedMessages = [...messages];
|
|
239
|
+
let addedCachePoint = false;
|
|
240
|
+
|
|
241
|
+
for (let i = updatedMessages.length - 1; i >= 0; i--) {
|
|
242
|
+
const message = updatedMessages[i];
|
|
243
|
+
const messageType = message.getType();
|
|
244
|
+
if (messageType === 'system') {
|
|
245
|
+
continue;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const content = message.content;
|
|
249
|
+
const hasArrayContent = Array.isArray(content);
|
|
250
|
+
const canAddCache =
|
|
251
|
+
!addedCachePoint &&
|
|
252
|
+
messageType === 'human' &&
|
|
253
|
+
(typeof content === 'string' || hasArrayContent);
|
|
254
|
+
|
|
255
|
+
if (!canAddCache && !hasArrayContent) {
|
|
256
|
+
continue;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
let workingContent: MessageContentComplex[];
|
|
260
|
+
let modified = false;
|
|
261
|
+
|
|
262
|
+
if (hasArrayContent) {
|
|
263
|
+
workingContent = stripCacheMarkers(content as MessageContentComplex[]);
|
|
264
|
+
modified = workingContent.length !== content.length;
|
|
265
|
+
const lastTextIndex = workingContent.findLastIndex((block) => {
|
|
266
|
+
const type = (block as { type?: string }).type;
|
|
267
|
+
const text = (block as { text?: string }).text;
|
|
268
|
+
return (
|
|
269
|
+
(type === 'text' || type === 'input_text') && text?.trim() !== ''
|
|
270
|
+
);
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
if (canAddCache && lastTextIndex >= 0) {
|
|
274
|
+
workingContent.splice(lastTextIndex + 1, 0, cachePointBlock());
|
|
275
|
+
addedCachePoint = true;
|
|
276
|
+
modified = true;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
if (!modified) {
|
|
280
|
+
continue;
|
|
281
|
+
}
|
|
282
|
+
} else if (typeof content === 'string' && content.trim() !== '' && canAddCache) {
|
|
283
|
+
workingContent = [
|
|
284
|
+
{ type: 'text', text: content } as MessageContentComplex,
|
|
285
|
+
cachePointBlock(),
|
|
286
|
+
];
|
|
287
|
+
addedCachePoint = true;
|
|
288
|
+
} else {
|
|
289
|
+
continue;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
updatedMessages[i] = cloneLiveMessage(message, workingContent);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
return updatedMessages;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
function repeated(label: string, count: number): string {
|
|
299
|
+
return Array.from(
|
|
300
|
+
{ length: count },
|
|
301
|
+
(_, index) =>
|
|
302
|
+
`${label} reference ${index}: stable schema, metric definition, access policy, dashboard note, and query planning guidance.`
|
|
303
|
+
).join('\n');
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function buildToolLoopMessages({
|
|
307
|
+
nonce,
|
|
308
|
+
marker,
|
|
309
|
+
}: {
|
|
310
|
+
nonce: string;
|
|
311
|
+
marker: string;
|
|
312
|
+
}): BaseMessage[] {
|
|
313
|
+
const stableUserContext = [
|
|
314
|
+
`Bedrock prompt cache placement benchmark ${nonce}.`,
|
|
315
|
+
'The first user turn is intentionally stable across calls in the same benchmark case.',
|
|
316
|
+
repeated(`${nonce} user-context`, 190),
|
|
317
|
+
'Use the final tool result to answer with the requested marker.',
|
|
318
|
+
].join('\n');
|
|
319
|
+
const volatileToolPayload = repeated(`${nonce} volatile-${marker}`, 70);
|
|
320
|
+
|
|
321
|
+
return [
|
|
322
|
+
new HumanMessage(stableUserContext),
|
|
323
|
+
new AIMessage({
|
|
324
|
+
content: `I will inspect cache probe step 1 for ${marker}.\n${volatileToolPayload}`,
|
|
325
|
+
tool_calls: [
|
|
326
|
+
{
|
|
327
|
+
id: `call_${marker}_1`,
|
|
328
|
+
name: 'lookup_cache_probe',
|
|
329
|
+
args: { step: 1 },
|
|
330
|
+
},
|
|
331
|
+
],
|
|
332
|
+
}),
|
|
333
|
+
new ToolMessage({
|
|
334
|
+
content: `Tool result 1 for ${marker}.\n${volatileToolPayload}`,
|
|
335
|
+
tool_call_id: `call_${marker}_1`,
|
|
336
|
+
}),
|
|
337
|
+
new AIMessage({
|
|
338
|
+
content: `I will inspect cache probe step 2 for ${marker}.\n${volatileToolPayload}`,
|
|
339
|
+
tool_calls: [
|
|
340
|
+
{
|
|
341
|
+
id: `call_${marker}_2`,
|
|
342
|
+
name: 'lookup_cache_probe',
|
|
343
|
+
args: { step: 2 },
|
|
344
|
+
},
|
|
345
|
+
],
|
|
346
|
+
}),
|
|
347
|
+
new ToolMessage({
|
|
348
|
+
content: [
|
|
349
|
+
`Final tool result marker: ${marker}.`,
|
|
350
|
+
'Reply with the marker and no extra explanation.',
|
|
351
|
+
volatileToolPayload,
|
|
352
|
+
].join('\n'),
|
|
353
|
+
tool_call_id: `call_${marker}_2`,
|
|
354
|
+
}),
|
|
355
|
+
];
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
function buildMultiTurnToolMessages({
|
|
359
|
+
nonce,
|
|
360
|
+
marker,
|
|
361
|
+
}: {
|
|
362
|
+
nonce: string;
|
|
363
|
+
marker: string;
|
|
364
|
+
}): BaseMessage[] {
|
|
365
|
+
const stableFirstUser = [
|
|
366
|
+
`Bedrock multi-turn prompt cache benchmark ${nonce}.`,
|
|
367
|
+
'This first user turn is intentionally stable across calls in the same benchmark case.',
|
|
368
|
+
repeated(`${nonce} stable-user-context`, 190),
|
|
369
|
+
].join('\n');
|
|
370
|
+
const latestUser = [
|
|
371
|
+
`Current user request marker: ${marker}.`,
|
|
372
|
+
'Use the final tool result to answer with the marker only.',
|
|
373
|
+
repeated(`${nonce} latest-user-${marker}`, 18),
|
|
374
|
+
].join('\n');
|
|
375
|
+
const volatileToolPayload = repeated(`${nonce} volatile-tool-${marker}`, 70);
|
|
376
|
+
|
|
377
|
+
return [
|
|
378
|
+
new HumanMessage(stableFirstUser),
|
|
379
|
+
new AIMessage('I will keep this stable context in mind.'),
|
|
380
|
+
new HumanMessage(latestUser),
|
|
381
|
+
new AIMessage({
|
|
382
|
+
content: `I will inspect cache probe step 1 for ${marker}.\n${volatileToolPayload}`,
|
|
383
|
+
tool_calls: [
|
|
384
|
+
{
|
|
385
|
+
id: `call_${marker}_1`,
|
|
386
|
+
name: 'lookup_cache_probe',
|
|
387
|
+
args: { step: 1 },
|
|
388
|
+
},
|
|
389
|
+
],
|
|
390
|
+
}),
|
|
391
|
+
new ToolMessage({
|
|
392
|
+
content: `Tool result 1 for ${marker}.\n${volatileToolPayload}`,
|
|
393
|
+
tool_call_id: `call_${marker}_1`,
|
|
394
|
+
}),
|
|
395
|
+
new AIMessage({
|
|
396
|
+
content: `I will inspect cache probe step 2 for ${marker}.\n${volatileToolPayload}`,
|
|
397
|
+
tool_calls: [
|
|
398
|
+
{
|
|
399
|
+
id: `call_${marker}_2`,
|
|
400
|
+
name: 'lookup_cache_probe',
|
|
401
|
+
args: { step: 2 },
|
|
402
|
+
},
|
|
403
|
+
],
|
|
404
|
+
}),
|
|
405
|
+
new ToolMessage({
|
|
406
|
+
content: [
|
|
407
|
+
`Final tool result marker: ${marker}.`,
|
|
408
|
+
'Reply with the marker and no extra explanation.',
|
|
409
|
+
volatileToolPayload,
|
|
410
|
+
].join('\n'),
|
|
411
|
+
tool_call_id: `call_${marker}_2`,
|
|
412
|
+
}),
|
|
413
|
+
];
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
function extractCacheUsage(
|
|
417
|
+
response: ConverseUsageResponse,
|
|
418
|
+
latencyMs: number
|
|
419
|
+
): BedrockCacheUsage {
|
|
420
|
+
if (response.usage == null) {
|
|
421
|
+
throw new Error('Missing Bedrock usage metadata for cache benchmark');
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
const inputTokens = response.usage.inputTokens ?? 0;
|
|
425
|
+
const outputTokens = response.usage.outputTokens ?? 0;
|
|
426
|
+
return {
|
|
427
|
+
inputTokens,
|
|
428
|
+
outputTokens,
|
|
429
|
+
totalTokens: response.usage.totalTokens ?? inputTokens + outputTokens,
|
|
430
|
+
cacheCreation: response.usage.cacheWriteInputTokens ?? 0,
|
|
431
|
+
cacheRead: response.usage.cacheReadInputTokens ?? 0,
|
|
432
|
+
latencyMs,
|
|
433
|
+
};
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
async function runConverseCacheBenchmarkTurn({
|
|
437
|
+
client,
|
|
438
|
+
messages,
|
|
439
|
+
}: {
|
|
440
|
+
client: BedrockRuntimeClient;
|
|
441
|
+
messages: BaseMessage[];
|
|
442
|
+
}): Promise<BedrockCacheUsage> {
|
|
443
|
+
const { converseMessages, converseSystem } =
|
|
444
|
+
convertToConverseMessages(messages);
|
|
445
|
+
const startedAt = Date.now();
|
|
446
|
+
const response = await client.send(
|
|
447
|
+
new ConverseCommand({
|
|
448
|
+
modelId: model,
|
|
449
|
+
...(converseSystem.length > 0 ? { system: converseSystem } : {}),
|
|
450
|
+
messages: converseMessages,
|
|
451
|
+
toolConfig: benchmarkToolConfig,
|
|
452
|
+
inferenceConfig: { maxTokens: 16, temperature: 0 },
|
|
453
|
+
})
|
|
454
|
+
);
|
|
455
|
+
|
|
456
|
+
return extractCacheUsage(
|
|
457
|
+
response as ConverseUsageResponse,
|
|
458
|
+
Date.now() - startedAt
|
|
459
|
+
);
|
|
460
|
+
}
|
|
461
|
+
|
|
80
462
|
describeIfLive('AgentContext Bedrock prompt cache live API', () => {
|
|
81
463
|
it('caches only the stable system prefix while dynamic tail changes', async () => {
|
|
82
464
|
const nonce = `agent-bedrock-cache-live-${Date.now()}`;
|
|
@@ -146,4 +528,126 @@ describeIfLive('AgentContext Bedrock prompt cache live API', () => {
|
|
|
146
528
|
expect(second.text.toLowerCase()).toContain('bravo');
|
|
147
529
|
expect(second.usage.input_token_details?.cache_read).toBeGreaterThan(0);
|
|
148
530
|
}, 180_000);
|
|
531
|
+
|
|
532
|
+
it('reduces repeated cache writes versus the previous moving-tail placement', async () => {
|
|
533
|
+
const credentials = getCredentials();
|
|
534
|
+
const client = new BedrockRuntimeClient({
|
|
535
|
+
region,
|
|
536
|
+
...(credentials != null ? { credentials } : {}),
|
|
537
|
+
});
|
|
538
|
+
const nonce = `bedrock-cache-placement-${Date.now()}`;
|
|
539
|
+
const legacyNonce = `${nonce}-legacy`;
|
|
540
|
+
const currentNonce = `${nonce}-current`;
|
|
541
|
+
|
|
542
|
+
const legacyFirst = await runConverseCacheBenchmarkTurn({
|
|
543
|
+
client,
|
|
544
|
+
messages: addLegacyMovingTailBedrockCacheControl(
|
|
545
|
+
buildToolLoopMessages({ nonce: legacyNonce, marker: 'alpha' })
|
|
546
|
+
),
|
|
547
|
+
});
|
|
548
|
+
|
|
549
|
+
await waitForCachePropagation();
|
|
550
|
+
|
|
551
|
+
const legacySecond = await runConverseCacheBenchmarkTurn({
|
|
552
|
+
client,
|
|
553
|
+
messages: addLegacyMovingTailBedrockCacheControl(
|
|
554
|
+
buildToolLoopMessages({ nonce: legacyNonce, marker: 'bravo' })
|
|
555
|
+
),
|
|
556
|
+
});
|
|
557
|
+
|
|
558
|
+
const currentFirst = await runConverseCacheBenchmarkTurn({
|
|
559
|
+
client,
|
|
560
|
+
messages: addBedrockCacheControl(
|
|
561
|
+
buildToolLoopMessages({ nonce: currentNonce, marker: 'alpha' })
|
|
562
|
+
),
|
|
563
|
+
});
|
|
564
|
+
|
|
565
|
+
await waitForCachePropagation();
|
|
566
|
+
|
|
567
|
+
const currentSecond = await runConverseCacheBenchmarkTurn({
|
|
568
|
+
client,
|
|
569
|
+
messages: addBedrockCacheControl(
|
|
570
|
+
buildToolLoopMessages({ nonce: currentNonce, marker: 'bravo' })
|
|
571
|
+
),
|
|
572
|
+
});
|
|
573
|
+
|
|
574
|
+
const cacheWriteReduction =
|
|
575
|
+
legacySecond.cacheCreation - currentSecond.cacheCreation;
|
|
576
|
+
process.stdout.write(
|
|
577
|
+
`Bedrock cache placement benchmark ${JSON.stringify({
|
|
578
|
+
legacyFirst,
|
|
579
|
+
legacySecond,
|
|
580
|
+
currentFirst,
|
|
581
|
+
currentSecond,
|
|
582
|
+
cacheWriteReduction,
|
|
583
|
+
})}\n`
|
|
584
|
+
);
|
|
585
|
+
|
|
586
|
+
expect(currentSecond.cacheRead).toBeGreaterThan(0);
|
|
587
|
+
expect(cacheWriteReduction).toBeGreaterThan(0);
|
|
588
|
+
expect(currentSecond.cacheCreation).toBeLessThan(
|
|
589
|
+
Math.ceil(legacySecond.cacheCreation * 0.5)
|
|
590
|
+
);
|
|
591
|
+
}, 240_000);
|
|
592
|
+
|
|
593
|
+
it('reuses prior user cache points when the latest user turn changes', async () => {
|
|
594
|
+
const credentials = getCredentials();
|
|
595
|
+
const client = new BedrockRuntimeClient({
|
|
596
|
+
region,
|
|
597
|
+
...(credentials != null ? { credentials } : {}),
|
|
598
|
+
});
|
|
599
|
+
const nonce = `bedrock-multiturn-cache-placement-${Date.now()}`;
|
|
600
|
+
const currentNonce = `${nonce}-current`;
|
|
601
|
+
const latestOnlyNonce = `${nonce}-latest-only`;
|
|
602
|
+
|
|
603
|
+
const currentFirst = await runConverseCacheBenchmarkTurn({
|
|
604
|
+
client,
|
|
605
|
+
messages: addBedrockCacheControl(
|
|
606
|
+
buildMultiTurnToolMessages({ nonce: currentNonce, marker: 'alpha' })
|
|
607
|
+
),
|
|
608
|
+
});
|
|
609
|
+
|
|
610
|
+
await waitForCachePropagation();
|
|
611
|
+
|
|
612
|
+
const currentSecond = await runConverseCacheBenchmarkTurn({
|
|
613
|
+
client,
|
|
614
|
+
messages: addBedrockCacheControl(
|
|
615
|
+
buildMultiTurnToolMessages({ nonce: currentNonce, marker: 'bravo' })
|
|
616
|
+
),
|
|
617
|
+
});
|
|
618
|
+
|
|
619
|
+
const latestOnlyFirst = await runConverseCacheBenchmarkTurn({
|
|
620
|
+
client,
|
|
621
|
+
messages: addLatestUserOnlyBedrockCacheControl(
|
|
622
|
+
buildMultiTurnToolMessages({ nonce: latestOnlyNonce, marker: 'alpha' })
|
|
623
|
+
),
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
await waitForCachePropagation();
|
|
627
|
+
|
|
628
|
+
const latestOnlySecond = await runConverseCacheBenchmarkTurn({
|
|
629
|
+
client,
|
|
630
|
+
messages: addLatestUserOnlyBedrockCacheControl(
|
|
631
|
+
buildMultiTurnToolMessages({ nonce: latestOnlyNonce, marker: 'bravo' })
|
|
632
|
+
),
|
|
633
|
+
});
|
|
634
|
+
|
|
635
|
+
process.stdout.write(
|
|
636
|
+
`Bedrock multi-turn cache placement benchmark ${JSON.stringify({
|
|
637
|
+
currentFirst,
|
|
638
|
+
currentSecond,
|
|
639
|
+
latestOnlyFirst,
|
|
640
|
+
latestOnlySecond,
|
|
641
|
+
cacheWriteDelta:
|
|
642
|
+
currentSecond.cacheCreation - latestOnlySecond.cacheCreation,
|
|
643
|
+
})}\n`
|
|
644
|
+
);
|
|
645
|
+
|
|
646
|
+
expect(currentSecond.cacheRead).toBeGreaterThan(
|
|
647
|
+
latestOnlySecond.cacheRead
|
|
648
|
+
);
|
|
649
|
+
expect(currentSecond.cacheCreation).toBeLessThan(
|
|
650
|
+
latestOnlySecond.cacheCreation
|
|
651
|
+
);
|
|
652
|
+
}, 240_000);
|
|
149
653
|
});
|
package/src/graphs/Graph.ts
CHANGED
|
@@ -81,6 +81,7 @@ import {
|
|
|
81
81
|
import { HandlerRegistry } from '@/events';
|
|
82
82
|
import { ChatOpenAI } from '@/llm/openai';
|
|
83
83
|
import { partitionAndMarkOpenRouterToolCache } from '@/llm/openrouter/toolCache';
|
|
84
|
+
import { partitionAndMarkBedrockToolCache } from '@/llm/bedrock/toolCache';
|
|
84
85
|
import type { HookRegistry } from '@/hooks';
|
|
85
86
|
|
|
86
87
|
const { AGENT, TOOLS, SUMMARIZE } = GraphNodeKeys;
|
|
@@ -962,6 +963,19 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
962
963
|
rawToolsForBinding,
|
|
963
964
|
makeIsDeferred(agentContext.toolDefinitions)
|
|
964
965
|
) ?? rawToolsForBinding;
|
|
966
|
+
} else if (
|
|
967
|
+
agentContext.provider === Providers.BEDROCK &&
|
|
968
|
+
(
|
|
969
|
+
agentContext.clientOptions as
|
|
970
|
+
| t.BedrockAnthropicClientOptions
|
|
971
|
+
| undefined
|
|
972
|
+
)?.promptCache === true
|
|
973
|
+
) {
|
|
974
|
+
toolsForBinding =
|
|
975
|
+
partitionAndMarkBedrockToolCache(
|
|
976
|
+
rawToolsForBinding,
|
|
977
|
+
makeIsDeferred(agentContext.toolDefinitions)
|
|
978
|
+
) ?? rawToolsForBinding;
|
|
965
979
|
}
|
|
966
980
|
|
|
967
981
|
let model =
|
|
@@ -20,6 +20,7 @@ const langfuseConfigKey = createContextKey('librechat.langfuse.config');
|
|
|
20
20
|
const toolOutputTracingStorage =
|
|
21
21
|
new AsyncLocalStorage<ResolvedLangfuseToolOutputTracingConfig>();
|
|
22
22
|
const langfuseConfigStorage = new AsyncLocalStorage<t.LangfuseConfig>();
|
|
23
|
+
const LANGGRAPH_TOOL_NODE_PREFIX = 'tools=';
|
|
23
24
|
|
|
24
25
|
const CHAT_ROLES = new Set([
|
|
25
26
|
'assistant',
|
|
@@ -446,6 +447,26 @@ function isToolObservation(attributes: Record<string, unknown>): boolean {
|
|
|
446
447
|
return typeof type === 'string' && type.toLowerCase() === 'tool';
|
|
447
448
|
}
|
|
448
449
|
|
|
450
|
+
function classifyLangGraphToolNodeSpan(
|
|
451
|
+
attributes: Record<string, unknown>
|
|
452
|
+
): void {
|
|
453
|
+
const type = attributes[LangfuseOtelSpanAttributes.OBSERVATION_TYPE];
|
|
454
|
+
if (typeof type !== 'string' || type.toLowerCase() !== 'span') {
|
|
455
|
+
return;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
const langGraphNode =
|
|
459
|
+
attributes[
|
|
460
|
+
`${LangfuseOtelSpanAttributes.OBSERVATION_METADATA}.langgraph_node`
|
|
461
|
+
];
|
|
462
|
+
if (
|
|
463
|
+
typeof langGraphNode === 'string' &&
|
|
464
|
+
langGraphNode.startsWith(LANGGRAPH_TOOL_NODE_PREFIX)
|
|
465
|
+
) {
|
|
466
|
+
attributes[LangfuseOtelSpanAttributes.OBSERVATION_TYPE] = 'tool';
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
449
470
|
function redactToolObservationOutput(
|
|
450
471
|
span: ReadableSpan,
|
|
451
472
|
attributes: Record<string, unknown>,
|
|
@@ -469,11 +490,13 @@ export function redactLangfuseSpanToolOutputs(
|
|
|
469
490
|
span: ReadableSpan,
|
|
470
491
|
config: ResolvedLangfuseToolOutputTracingConfig
|
|
471
492
|
): void {
|
|
493
|
+
const attributes = (span as SpanWithAttributes).attributes;
|
|
494
|
+
classifyLangGraphToolNodeSpan(attributes);
|
|
495
|
+
|
|
472
496
|
if (!shouldApplyToolOutputRedaction(config)) {
|
|
473
497
|
return;
|
|
474
498
|
}
|
|
475
499
|
|
|
476
|
-
const attributes = (span as SpanWithAttributes).attributes;
|
|
477
500
|
redactToolObservationOutput(span, attributes, config);
|
|
478
501
|
|
|
479
502
|
for (const key of [
|
|
@@ -618,10 +641,7 @@ function hasLangfuseConfigKeys(langfuse?: t.LangfuseConfig): boolean {
|
|
|
618
641
|
if (langfuse == null) {
|
|
619
642
|
return false;
|
|
620
643
|
}
|
|
621
|
-
return (
|
|
622
|
-
isPresent(langfuse.secretKey) &&
|
|
623
|
-
isPresent(langfuse.publicKey)
|
|
624
|
-
);
|
|
644
|
+
return isPresent(langfuse.secretKey) && isPresent(langfuse.publicKey);
|
|
625
645
|
}
|
|
626
646
|
|
|
627
647
|
export function shouldTraceToolNodeForLangfuse({
|
|
@@ -639,8 +659,7 @@ export function shouldTraceToolNodeForLangfuse({
|
|
|
639
659
|
const explicit = langfuse?.toolNodeTracing?.enabled;
|
|
640
660
|
if (explicit != null) {
|
|
641
661
|
return (
|
|
642
|
-
explicit &&
|
|
643
|
-
(hasLangfuseConfigKeys(langfuse) || hasLangfuseEnvKeys())
|
|
662
|
+
explicit && (hasLangfuseConfigKeys(langfuse) || hasLangfuseEnvKeys())
|
|
644
663
|
);
|
|
645
664
|
}
|
|
646
665
|
|
package/src/llm/bedrock/index.ts
CHANGED
|
@@ -22,12 +22,17 @@
|
|
|
22
22
|
*/
|
|
23
23
|
|
|
24
24
|
import { ChatBedrockConverse } from '@langchain/aws';
|
|
25
|
-
import {
|
|
25
|
+
import {
|
|
26
|
+
ConverseStreamCommand,
|
|
27
|
+
type GuardrailConfiguration,
|
|
28
|
+
type GuardrailStreamConfiguration,
|
|
29
|
+
} from '@aws-sdk/client-bedrock-runtime';
|
|
26
30
|
import { AIMessageChunk } from '@langchain/core/messages';
|
|
27
31
|
import { ChatGenerationChunk, ChatResult } from '@langchain/core/outputs';
|
|
28
32
|
import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
|
|
29
33
|
import type { ChatBedrockConverseInput } from '@langchain/aws';
|
|
30
34
|
import type { BaseMessage, ResponseMetadata } from '@langchain/core/messages';
|
|
35
|
+
import { insertBedrockToolCachePoint } from './toolCache';
|
|
31
36
|
import {
|
|
32
37
|
convertToConverseMessages,
|
|
33
38
|
handleConverseStreamContentBlockStart,
|
|
@@ -42,6 +47,9 @@ import {
|
|
|
42
47
|
*/
|
|
43
48
|
export type ServiceTierType = 'priority' | 'default' | 'flex' | 'reserved';
|
|
44
49
|
|
|
50
|
+
export type CustomGuardrailConfiguration = GuardrailConfiguration &
|
|
51
|
+
Pick<GuardrailStreamConfiguration, 'streamProcessingMode'>;
|
|
52
|
+
|
|
45
53
|
/**
|
|
46
54
|
* Extended input interface with additional features:
|
|
47
55
|
* - applicationInferenceProfile: Use an inference profile ARN instead of model ID
|
|
@@ -49,6 +57,17 @@ export type ServiceTierType = 'priority' | 'default' | 'flex' | 'reserved';
|
|
|
49
57
|
*/
|
|
50
58
|
export interface CustomChatBedrockConverseInput
|
|
51
59
|
extends ChatBedrockConverseInput {
|
|
60
|
+
/**
|
|
61
|
+
* Enables Bedrock prompt cache checkpoints for message and tool prefixes.
|
|
62
|
+
*/
|
|
63
|
+
promptCache?: boolean;
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Guardrail configuration for Converse and ConverseStream invocations.
|
|
67
|
+
* `streamProcessingMode` is only used by ConverseStream.
|
|
68
|
+
*/
|
|
69
|
+
guardrailConfig?: CustomGuardrailConfiguration;
|
|
70
|
+
|
|
52
71
|
/**
|
|
53
72
|
* Application Inference Profile ARN to use for the model.
|
|
54
73
|
* For example, "arn:aws:bedrock:eu-west-1:123456789102:application-inference-profile/fm16bt65tzgx"
|
|
@@ -80,9 +99,15 @@ export interface CustomChatBedrockConverseInput
|
|
|
80
99
|
*/
|
|
81
100
|
export interface CustomChatBedrockConverseCallOptions {
|
|
82
101
|
serviceTier?: ServiceTierType;
|
|
102
|
+
guardrailConfig?: CustomGuardrailConfiguration;
|
|
83
103
|
}
|
|
84
104
|
|
|
85
105
|
export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
106
|
+
/**
|
|
107
|
+
* Whether to insert Bedrock prompt cache checkpoints when available.
|
|
108
|
+
*/
|
|
109
|
+
promptCache?: boolean;
|
|
110
|
+
|
|
86
111
|
/**
|
|
87
112
|
* Application Inference Profile ARN to use instead of model ID.
|
|
88
113
|
*/
|
|
@@ -95,6 +120,7 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
95
120
|
|
|
96
121
|
constructor(fields?: CustomChatBedrockConverseInput) {
|
|
97
122
|
super(fields);
|
|
123
|
+
this.promptCache = fields?.promptCache;
|
|
98
124
|
this.applicationInferenceProfile = fields?.applicationInferenceProfile;
|
|
99
125
|
this.serviceTier = fields?.serviceTier;
|
|
100
126
|
}
|
|
@@ -120,12 +146,17 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
120
146
|
serviceTier?: { type: ServiceTierType };
|
|
121
147
|
} {
|
|
122
148
|
const baseParams = super.invocationParams(options);
|
|
149
|
+
const toolConfig =
|
|
150
|
+
this.promptCache === true
|
|
151
|
+
? insertBedrockToolCachePoint(baseParams.toolConfig, true)
|
|
152
|
+
: baseParams.toolConfig;
|
|
123
153
|
|
|
124
154
|
/** Service tier from options or fall back to class-level setting */
|
|
125
155
|
const serviceTierType = options?.serviceTier ?? this.serviceTier;
|
|
126
156
|
|
|
127
157
|
return {
|
|
128
158
|
...baseParams,
|
|
159
|
+
toolConfig,
|
|
129
160
|
serviceTier: serviceTierType ? { type: serviceTierType } : undefined,
|
|
130
161
|
};
|
|
131
162
|
}
|