@librechat/agents 3.1.96 → 3.1.98
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +60 -21
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/instrumentation.cjs +120 -9
- package/dist/cjs/instrumentation.cjs.map +1 -1
- package/dist/cjs/langfuse.cjs +30 -226
- package/dist/cjs/langfuse.cjs.map +1 -1
- package/dist/cjs/langfuseToolOutputTracing.cjs +476 -0
- package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -0
- package/dist/cjs/llm/bedrock/index.cjs +10 -0
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/toolCache.cjs +125 -0
- package/dist/cjs/llm/bedrock/toolCache.cjs.map +1 -0
- package/dist/cjs/messages/cache.cjs +17 -9
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/run.cjs +142 -69
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +26 -9
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +10 -6
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +62 -23
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/instrumentation.mjs +118 -9
- package/dist/esm/instrumentation.mjs.map +1 -1
- package/dist/esm/langfuse.mjs +28 -224
- package/dist/esm/langfuse.mjs.map +1 -1
- package/dist/esm/langfuseToolOutputTracing.mjs +468 -0
- package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -0
- package/dist/esm/llm/bedrock/index.mjs +10 -0
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/toolCache.mjs +122 -0
- package/dist/esm/llm/bedrock/toolCache.mjs.map +1 -0
- package/dist/esm/messages/cache.mjs +17 -9
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/run.mjs +144 -71
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +26 -9
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +10 -6
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/types/graphs/Graph.d.ts +5 -1
- package/dist/types/instrumentation.d.ts +5 -1
- package/dist/types/langfuse.d.ts +6 -28
- package/dist/types/langfuseToolOutputTracing.d.ts +20 -0
- package/dist/types/llm/bedrock/index.d.ts +16 -0
- package/dist/types/llm/bedrock/toolCache.d.ts +4 -0
- package/dist/types/messages/cache.d.ts +2 -2
- package/dist/types/run.d.ts +5 -1
- package/dist/types/tools/ToolNode.d.ts +4 -1
- package/dist/types/tools/subagent/SubagentExecutor.d.ts +2 -0
- package/dist/types/types/graph.d.ts +30 -0
- package/dist/types/types/llm.d.ts +2 -2
- package/dist/types/types/run.d.ts +6 -0
- package/dist/types/types/tools.d.ts +7 -0
- package/package.json +2 -1
- package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +332 -0
- package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +504 -0
- package/src/graphs/Graph.ts +104 -34
- package/src/instrumentation.ts +172 -11
- package/src/langfuse.ts +59 -324
- package/src/langfuseToolOutputTracing.ts +702 -0
- package/src/llm/bedrock/index.ts +32 -1
- package/src/llm/bedrock/llm.spec.ts +154 -1
- package/src/llm/bedrock/toolCache.test.ts +131 -0
- package/src/llm/bedrock/toolCache.ts +191 -0
- package/src/messages/cache.test.ts +97 -38
- package/src/messages/cache.ts +18 -10
- package/src/run.ts +190 -87
- package/src/specs/langfuse-callbacks.test.ts +178 -1
- package/src/specs/langfuse-config.test.ts +112 -76
- package/src/specs/langfuse-instrumentation.test.ts +283 -0
- package/src/specs/langfuse-metadata.test.ts +54 -1
- package/src/specs/langfuse-tool-output-tracing.test.ts +616 -0
- package/src/tools/ToolNode.ts +35 -8
- package/src/tools/__tests__/SubagentExecutor.test.ts +32 -0
- package/src/tools/__tests__/ToolNode.langfuse.test.ts +47 -0
- package/src/tools/subagent/SubagentExecutor.ts +11 -6
- package/src/types/graph.ts +32 -0
- package/src/types/llm.ts +2 -2
- package/src/types/run.ts +6 -0
- package/src/types/tools.ts +7 -0
|
@@ -11,6 +11,18 @@ import { config as dotenvConfig } from 'dotenv';
|
|
|
11
11
|
dotenvConfig();
|
|
12
12
|
|
|
13
13
|
import { describe, expect, it } from '@jest/globals';
|
|
14
|
+
import {
|
|
15
|
+
AIMessage,
|
|
16
|
+
BaseMessage,
|
|
17
|
+
HumanMessage,
|
|
18
|
+
SystemMessage,
|
|
19
|
+
ToolMessage,
|
|
20
|
+
type MessageContentComplex,
|
|
21
|
+
} from '@langchain/core/messages';
|
|
22
|
+
import {
|
|
23
|
+
BedrockRuntimeClient,
|
|
24
|
+
ConverseCommand,
|
|
25
|
+
} from '@aws-sdk/client-bedrock-runtime';
|
|
14
26
|
import type * as t from '@/types';
|
|
15
27
|
import {
|
|
16
28
|
runLiveTurn,
|
|
@@ -20,6 +32,9 @@ import {
|
|
|
20
32
|
waitForCachePropagation,
|
|
21
33
|
} from './promptCacheLiveHelpers';
|
|
22
34
|
import { Providers } from '@/common';
|
|
35
|
+
import { addBedrockCacheControl } from '@/messages/cache';
|
|
36
|
+
import { toLangChainContent } from '@/messages/langchain';
|
|
37
|
+
import { convertToConverseMessages } from '@/llm/bedrock/utils';
|
|
23
38
|
|
|
24
39
|
const accessKeyId =
|
|
25
40
|
process.env.BEDROCK_AWS_ACCESS_KEY_ID ?? process.env.AWS_ACCESS_KEY_ID;
|
|
@@ -77,6 +92,373 @@ function createClientOptions(): t.BedrockAnthropicClientOptions {
|
|
|
77
92
|
};
|
|
78
93
|
}
|
|
79
94
|
|
|
95
|
+
type BedrockCacheUsage = {
|
|
96
|
+
inputTokens: number;
|
|
97
|
+
outputTokens: number;
|
|
98
|
+
totalTokens: number;
|
|
99
|
+
cacheCreation: number;
|
|
100
|
+
cacheRead: number;
|
|
101
|
+
latencyMs: number;
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
type ConverseUsageResponse = {
|
|
105
|
+
usage?: {
|
|
106
|
+
inputTokens?: number;
|
|
107
|
+
outputTokens?: number;
|
|
108
|
+
totalTokens?: number;
|
|
109
|
+
cacheReadInputTokens?: number;
|
|
110
|
+
cacheWriteInputTokens?: number;
|
|
111
|
+
};
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
const benchmarkToolConfig = {
|
|
115
|
+
tools: [
|
|
116
|
+
{
|
|
117
|
+
toolSpec: {
|
|
118
|
+
name: 'lookup_cache_probe',
|
|
119
|
+
description: 'Returns prompt cache benchmark data.',
|
|
120
|
+
inputSchema: {
|
|
121
|
+
json: {
|
|
122
|
+
type: 'object',
|
|
123
|
+
properties: {
|
|
124
|
+
step: { type: 'integer' },
|
|
125
|
+
},
|
|
126
|
+
required: ['step'],
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
},
|
|
130
|
+
},
|
|
131
|
+
],
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
function cachePointBlock(): MessageContentComplex {
|
|
135
|
+
return { cachePoint: { type: 'default' } } as MessageContentComplex;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function stripCacheMarkers(
|
|
139
|
+
content: MessageContentComplex[]
|
|
140
|
+
): MessageContentComplex[] {
|
|
141
|
+
return content
|
|
142
|
+
.filter((block) => !('cachePoint' in block && !('type' in block)))
|
|
143
|
+
.map((block) => {
|
|
144
|
+
const cloned = { ...block };
|
|
145
|
+
delete (cloned as Record<string, unknown>).cache_control;
|
|
146
|
+
return cloned as MessageContentComplex;
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function cloneLiveMessage(
|
|
151
|
+
message: BaseMessage,
|
|
152
|
+
content: MessageContentComplex[]
|
|
153
|
+
): BaseMessage {
|
|
154
|
+
const baseParams = {
|
|
155
|
+
content: toLangChainContent(content),
|
|
156
|
+
additional_kwargs: { ...message.additional_kwargs },
|
|
157
|
+
response_metadata: { ...message.response_metadata },
|
|
158
|
+
id: message.id,
|
|
159
|
+
name: message.name,
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
const messageType = message.getType();
|
|
163
|
+
if (messageType === 'ai') {
|
|
164
|
+
return new AIMessage({
|
|
165
|
+
...baseParams,
|
|
166
|
+
tool_calls: (message as AIMessage).tool_calls,
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
if (messageType === 'human') {
|
|
170
|
+
return new HumanMessage(baseParams);
|
|
171
|
+
}
|
|
172
|
+
if (messageType === 'system') {
|
|
173
|
+
return new SystemMessage(baseParams);
|
|
174
|
+
}
|
|
175
|
+
if (messageType === 'tool') {
|
|
176
|
+
return new ToolMessage({
|
|
177
|
+
...baseParams,
|
|
178
|
+
tool_call_id: (message as ToolMessage).tool_call_id,
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return message;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function addLegacyMovingTailBedrockCacheControl(
|
|
186
|
+
messages: BaseMessage[]
|
|
187
|
+
): BaseMessage[] {
|
|
188
|
+
const updatedMessages = [...messages];
|
|
189
|
+
let messagesModified = 0;
|
|
190
|
+
|
|
191
|
+
for (let i = updatedMessages.length - 1; i >= 0; i--) {
|
|
192
|
+
const message = updatedMessages[i];
|
|
193
|
+
const messageType = message.getType();
|
|
194
|
+
if (messageType === 'system' || messageType === 'tool') {
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const content = message.content;
|
|
199
|
+
if (typeof content === 'string') {
|
|
200
|
+
if (content === '' || messagesModified >= 2) {
|
|
201
|
+
continue;
|
|
202
|
+
}
|
|
203
|
+
updatedMessages[i] = cloneLiveMessage(message, [
|
|
204
|
+
{ type: 'text', text: content } as MessageContentComplex,
|
|
205
|
+
cachePointBlock(),
|
|
206
|
+
]);
|
|
207
|
+
messagesModified++;
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (!Array.isArray(content)) {
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
const workingContent = stripCacheMarkers(
|
|
216
|
+
content as MessageContentComplex[]
|
|
217
|
+
);
|
|
218
|
+
const lastTextIndex = workingContent.findLastIndex((block) => {
|
|
219
|
+
const type = (block as { type?: string }).type;
|
|
220
|
+
const text = (block as { text?: string }).text;
|
|
221
|
+
return (type === 'text' || type === 'input_text') && text?.trim() !== '';
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
if (messagesModified < 2 && lastTextIndex >= 0) {
|
|
225
|
+
workingContent.splice(lastTextIndex + 1, 0, cachePointBlock());
|
|
226
|
+
messagesModified++;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
updatedMessages[i] = cloneLiveMessage(message, workingContent);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return updatedMessages;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function addLatestUserOnlyBedrockCacheControl(
|
|
236
|
+
messages: BaseMessage[]
|
|
237
|
+
): BaseMessage[] {
|
|
238
|
+
const updatedMessages = [...messages];
|
|
239
|
+
let addedCachePoint = false;
|
|
240
|
+
|
|
241
|
+
for (let i = updatedMessages.length - 1; i >= 0; i--) {
|
|
242
|
+
const message = updatedMessages[i];
|
|
243
|
+
const messageType = message.getType();
|
|
244
|
+
if (messageType === 'system') {
|
|
245
|
+
continue;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const content = message.content;
|
|
249
|
+
const hasArrayContent = Array.isArray(content);
|
|
250
|
+
const canAddCache =
|
|
251
|
+
!addedCachePoint &&
|
|
252
|
+
messageType === 'human' &&
|
|
253
|
+
(typeof content === 'string' || hasArrayContent);
|
|
254
|
+
|
|
255
|
+
if (!canAddCache && !hasArrayContent) {
|
|
256
|
+
continue;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
let workingContent: MessageContentComplex[];
|
|
260
|
+
let modified = false;
|
|
261
|
+
|
|
262
|
+
if (hasArrayContent) {
|
|
263
|
+
workingContent = stripCacheMarkers(content as MessageContentComplex[]);
|
|
264
|
+
modified = workingContent.length !== content.length;
|
|
265
|
+
const lastTextIndex = workingContent.findLastIndex((block) => {
|
|
266
|
+
const type = (block as { type?: string }).type;
|
|
267
|
+
const text = (block as { text?: string }).text;
|
|
268
|
+
return (
|
|
269
|
+
(type === 'text' || type === 'input_text') && text?.trim() !== ''
|
|
270
|
+
);
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
if (canAddCache && lastTextIndex >= 0) {
|
|
274
|
+
workingContent.splice(lastTextIndex + 1, 0, cachePointBlock());
|
|
275
|
+
addedCachePoint = true;
|
|
276
|
+
modified = true;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
if (!modified) {
|
|
280
|
+
continue;
|
|
281
|
+
}
|
|
282
|
+
} else if (typeof content === 'string' && content.trim() !== '' && canAddCache) {
|
|
283
|
+
workingContent = [
|
|
284
|
+
{ type: 'text', text: content } as MessageContentComplex,
|
|
285
|
+
cachePointBlock(),
|
|
286
|
+
];
|
|
287
|
+
addedCachePoint = true;
|
|
288
|
+
} else {
|
|
289
|
+
continue;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
updatedMessages[i] = cloneLiveMessage(message, workingContent);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
return updatedMessages;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
function repeated(label: string, count: number): string {
|
|
299
|
+
return Array.from(
|
|
300
|
+
{ length: count },
|
|
301
|
+
(_, index) =>
|
|
302
|
+
`${label} reference ${index}: stable schema, metric definition, access policy, dashboard note, and query planning guidance.`
|
|
303
|
+
).join('\n');
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function buildToolLoopMessages({
|
|
307
|
+
nonce,
|
|
308
|
+
marker,
|
|
309
|
+
}: {
|
|
310
|
+
nonce: string;
|
|
311
|
+
marker: string;
|
|
312
|
+
}): BaseMessage[] {
|
|
313
|
+
const stableUserContext = [
|
|
314
|
+
`Bedrock prompt cache placement benchmark ${nonce}.`,
|
|
315
|
+
'The first user turn is intentionally stable across calls in the same benchmark case.',
|
|
316
|
+
repeated(`${nonce} user-context`, 190),
|
|
317
|
+
'Use the final tool result to answer with the requested marker.',
|
|
318
|
+
].join('\n');
|
|
319
|
+
const volatileToolPayload = repeated(`${nonce} volatile-${marker}`, 70);
|
|
320
|
+
|
|
321
|
+
return [
|
|
322
|
+
new HumanMessage(stableUserContext),
|
|
323
|
+
new AIMessage({
|
|
324
|
+
content: `I will inspect cache probe step 1 for ${marker}.\n${volatileToolPayload}`,
|
|
325
|
+
tool_calls: [
|
|
326
|
+
{
|
|
327
|
+
id: `call_${marker}_1`,
|
|
328
|
+
name: 'lookup_cache_probe',
|
|
329
|
+
args: { step: 1 },
|
|
330
|
+
},
|
|
331
|
+
],
|
|
332
|
+
}),
|
|
333
|
+
new ToolMessage({
|
|
334
|
+
content: `Tool result 1 for ${marker}.\n${volatileToolPayload}`,
|
|
335
|
+
tool_call_id: `call_${marker}_1`,
|
|
336
|
+
}),
|
|
337
|
+
new AIMessage({
|
|
338
|
+
content: `I will inspect cache probe step 2 for ${marker}.\n${volatileToolPayload}`,
|
|
339
|
+
tool_calls: [
|
|
340
|
+
{
|
|
341
|
+
id: `call_${marker}_2`,
|
|
342
|
+
name: 'lookup_cache_probe',
|
|
343
|
+
args: { step: 2 },
|
|
344
|
+
},
|
|
345
|
+
],
|
|
346
|
+
}),
|
|
347
|
+
new ToolMessage({
|
|
348
|
+
content: [
|
|
349
|
+
`Final tool result marker: ${marker}.`,
|
|
350
|
+
'Reply with the marker and no extra explanation.',
|
|
351
|
+
volatileToolPayload,
|
|
352
|
+
].join('\n'),
|
|
353
|
+
tool_call_id: `call_${marker}_2`,
|
|
354
|
+
}),
|
|
355
|
+
];
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
function buildMultiTurnToolMessages({
|
|
359
|
+
nonce,
|
|
360
|
+
marker,
|
|
361
|
+
}: {
|
|
362
|
+
nonce: string;
|
|
363
|
+
marker: string;
|
|
364
|
+
}): BaseMessage[] {
|
|
365
|
+
const stableFirstUser = [
|
|
366
|
+
`Bedrock multi-turn prompt cache benchmark ${nonce}.`,
|
|
367
|
+
'This first user turn is intentionally stable across calls in the same benchmark case.',
|
|
368
|
+
repeated(`${nonce} stable-user-context`, 190),
|
|
369
|
+
].join('\n');
|
|
370
|
+
const latestUser = [
|
|
371
|
+
`Current user request marker: ${marker}.`,
|
|
372
|
+
'Use the final tool result to answer with the marker only.',
|
|
373
|
+
repeated(`${nonce} latest-user-${marker}`, 18),
|
|
374
|
+
].join('\n');
|
|
375
|
+
const volatileToolPayload = repeated(`${nonce} volatile-tool-${marker}`, 70);
|
|
376
|
+
|
|
377
|
+
return [
|
|
378
|
+
new HumanMessage(stableFirstUser),
|
|
379
|
+
new AIMessage('I will keep this stable context in mind.'),
|
|
380
|
+
new HumanMessage(latestUser),
|
|
381
|
+
new AIMessage({
|
|
382
|
+
content: `I will inspect cache probe step 1 for ${marker}.\n${volatileToolPayload}`,
|
|
383
|
+
tool_calls: [
|
|
384
|
+
{
|
|
385
|
+
id: `call_${marker}_1`,
|
|
386
|
+
name: 'lookup_cache_probe',
|
|
387
|
+
args: { step: 1 },
|
|
388
|
+
},
|
|
389
|
+
],
|
|
390
|
+
}),
|
|
391
|
+
new ToolMessage({
|
|
392
|
+
content: `Tool result 1 for ${marker}.\n${volatileToolPayload}`,
|
|
393
|
+
tool_call_id: `call_${marker}_1`,
|
|
394
|
+
}),
|
|
395
|
+
new AIMessage({
|
|
396
|
+
content: `I will inspect cache probe step 2 for ${marker}.\n${volatileToolPayload}`,
|
|
397
|
+
tool_calls: [
|
|
398
|
+
{
|
|
399
|
+
id: `call_${marker}_2`,
|
|
400
|
+
name: 'lookup_cache_probe',
|
|
401
|
+
args: { step: 2 },
|
|
402
|
+
},
|
|
403
|
+
],
|
|
404
|
+
}),
|
|
405
|
+
new ToolMessage({
|
|
406
|
+
content: [
|
|
407
|
+
`Final tool result marker: ${marker}.`,
|
|
408
|
+
'Reply with the marker and no extra explanation.',
|
|
409
|
+
volatileToolPayload,
|
|
410
|
+
].join('\n'),
|
|
411
|
+
tool_call_id: `call_${marker}_2`,
|
|
412
|
+
}),
|
|
413
|
+
];
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
function extractCacheUsage(
|
|
417
|
+
response: ConverseUsageResponse,
|
|
418
|
+
latencyMs: number
|
|
419
|
+
): BedrockCacheUsage {
|
|
420
|
+
if (response.usage == null) {
|
|
421
|
+
throw new Error('Missing Bedrock usage metadata for cache benchmark');
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
const inputTokens = response.usage.inputTokens ?? 0;
|
|
425
|
+
const outputTokens = response.usage.outputTokens ?? 0;
|
|
426
|
+
return {
|
|
427
|
+
inputTokens,
|
|
428
|
+
outputTokens,
|
|
429
|
+
totalTokens: response.usage.totalTokens ?? inputTokens + outputTokens,
|
|
430
|
+
cacheCreation: response.usage.cacheWriteInputTokens ?? 0,
|
|
431
|
+
cacheRead: response.usage.cacheReadInputTokens ?? 0,
|
|
432
|
+
latencyMs,
|
|
433
|
+
};
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
async function runConverseCacheBenchmarkTurn({
|
|
437
|
+
client,
|
|
438
|
+
messages,
|
|
439
|
+
}: {
|
|
440
|
+
client: BedrockRuntimeClient;
|
|
441
|
+
messages: BaseMessage[];
|
|
442
|
+
}): Promise<BedrockCacheUsage> {
|
|
443
|
+
const { converseMessages, converseSystem } =
|
|
444
|
+
convertToConverseMessages(messages);
|
|
445
|
+
const startedAt = Date.now();
|
|
446
|
+
const response = await client.send(
|
|
447
|
+
new ConverseCommand({
|
|
448
|
+
modelId: model,
|
|
449
|
+
...(converseSystem.length > 0 ? { system: converseSystem } : {}),
|
|
450
|
+
messages: converseMessages,
|
|
451
|
+
toolConfig: benchmarkToolConfig,
|
|
452
|
+
inferenceConfig: { maxTokens: 16, temperature: 0 },
|
|
453
|
+
})
|
|
454
|
+
);
|
|
455
|
+
|
|
456
|
+
return extractCacheUsage(
|
|
457
|
+
response as ConverseUsageResponse,
|
|
458
|
+
Date.now() - startedAt
|
|
459
|
+
);
|
|
460
|
+
}
|
|
461
|
+
|
|
80
462
|
describeIfLive('AgentContext Bedrock prompt cache live API', () => {
|
|
81
463
|
it('caches only the stable system prefix while dynamic tail changes', async () => {
|
|
82
464
|
const nonce = `agent-bedrock-cache-live-${Date.now()}`;
|
|
@@ -146,4 +528,126 @@ describeIfLive('AgentContext Bedrock prompt cache live API', () => {
|
|
|
146
528
|
expect(second.text.toLowerCase()).toContain('bravo');
|
|
147
529
|
expect(second.usage.input_token_details?.cache_read).toBeGreaterThan(0);
|
|
148
530
|
}, 180_000);
|
|
531
|
+
|
|
532
|
+
it('reduces repeated cache writes versus the previous moving-tail placement', async () => {
|
|
533
|
+
const credentials = getCredentials();
|
|
534
|
+
const client = new BedrockRuntimeClient({
|
|
535
|
+
region,
|
|
536
|
+
...(credentials != null ? { credentials } : {}),
|
|
537
|
+
});
|
|
538
|
+
const nonce = `bedrock-cache-placement-${Date.now()}`;
|
|
539
|
+
const legacyNonce = `${nonce}-legacy`;
|
|
540
|
+
const currentNonce = `${nonce}-current`;
|
|
541
|
+
|
|
542
|
+
const legacyFirst = await runConverseCacheBenchmarkTurn({
|
|
543
|
+
client,
|
|
544
|
+
messages: addLegacyMovingTailBedrockCacheControl(
|
|
545
|
+
buildToolLoopMessages({ nonce: legacyNonce, marker: 'alpha' })
|
|
546
|
+
),
|
|
547
|
+
});
|
|
548
|
+
|
|
549
|
+
await waitForCachePropagation();
|
|
550
|
+
|
|
551
|
+
const legacySecond = await runConverseCacheBenchmarkTurn({
|
|
552
|
+
client,
|
|
553
|
+
messages: addLegacyMovingTailBedrockCacheControl(
|
|
554
|
+
buildToolLoopMessages({ nonce: legacyNonce, marker: 'bravo' })
|
|
555
|
+
),
|
|
556
|
+
});
|
|
557
|
+
|
|
558
|
+
const currentFirst = await runConverseCacheBenchmarkTurn({
|
|
559
|
+
client,
|
|
560
|
+
messages: addBedrockCacheControl(
|
|
561
|
+
buildToolLoopMessages({ nonce: currentNonce, marker: 'alpha' })
|
|
562
|
+
),
|
|
563
|
+
});
|
|
564
|
+
|
|
565
|
+
await waitForCachePropagation();
|
|
566
|
+
|
|
567
|
+
const currentSecond = await runConverseCacheBenchmarkTurn({
|
|
568
|
+
client,
|
|
569
|
+
messages: addBedrockCacheControl(
|
|
570
|
+
buildToolLoopMessages({ nonce: currentNonce, marker: 'bravo' })
|
|
571
|
+
),
|
|
572
|
+
});
|
|
573
|
+
|
|
574
|
+
const cacheWriteReduction =
|
|
575
|
+
legacySecond.cacheCreation - currentSecond.cacheCreation;
|
|
576
|
+
process.stdout.write(
|
|
577
|
+
`Bedrock cache placement benchmark ${JSON.stringify({
|
|
578
|
+
legacyFirst,
|
|
579
|
+
legacySecond,
|
|
580
|
+
currentFirst,
|
|
581
|
+
currentSecond,
|
|
582
|
+
cacheWriteReduction,
|
|
583
|
+
})}\n`
|
|
584
|
+
);
|
|
585
|
+
|
|
586
|
+
expect(currentSecond.cacheRead).toBeGreaterThan(0);
|
|
587
|
+
expect(cacheWriteReduction).toBeGreaterThan(0);
|
|
588
|
+
expect(currentSecond.cacheCreation).toBeLessThan(
|
|
589
|
+
Math.ceil(legacySecond.cacheCreation * 0.5)
|
|
590
|
+
);
|
|
591
|
+
}, 240_000);
|
|
592
|
+
|
|
593
|
+
it('reuses prior user cache points when the latest user turn changes', async () => {
|
|
594
|
+
const credentials = getCredentials();
|
|
595
|
+
const client = new BedrockRuntimeClient({
|
|
596
|
+
region,
|
|
597
|
+
...(credentials != null ? { credentials } : {}),
|
|
598
|
+
});
|
|
599
|
+
const nonce = `bedrock-multiturn-cache-placement-${Date.now()}`;
|
|
600
|
+
const currentNonce = `${nonce}-current`;
|
|
601
|
+
const latestOnlyNonce = `${nonce}-latest-only`;
|
|
602
|
+
|
|
603
|
+
const currentFirst = await runConverseCacheBenchmarkTurn({
|
|
604
|
+
client,
|
|
605
|
+
messages: addBedrockCacheControl(
|
|
606
|
+
buildMultiTurnToolMessages({ nonce: currentNonce, marker: 'alpha' })
|
|
607
|
+
),
|
|
608
|
+
});
|
|
609
|
+
|
|
610
|
+
await waitForCachePropagation();
|
|
611
|
+
|
|
612
|
+
const currentSecond = await runConverseCacheBenchmarkTurn({
|
|
613
|
+
client,
|
|
614
|
+
messages: addBedrockCacheControl(
|
|
615
|
+
buildMultiTurnToolMessages({ nonce: currentNonce, marker: 'bravo' })
|
|
616
|
+
),
|
|
617
|
+
});
|
|
618
|
+
|
|
619
|
+
const latestOnlyFirst = await runConverseCacheBenchmarkTurn({
|
|
620
|
+
client,
|
|
621
|
+
messages: addLatestUserOnlyBedrockCacheControl(
|
|
622
|
+
buildMultiTurnToolMessages({ nonce: latestOnlyNonce, marker: 'alpha' })
|
|
623
|
+
),
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
await waitForCachePropagation();
|
|
627
|
+
|
|
628
|
+
const latestOnlySecond = await runConverseCacheBenchmarkTurn({
|
|
629
|
+
client,
|
|
630
|
+
messages: addLatestUserOnlyBedrockCacheControl(
|
|
631
|
+
buildMultiTurnToolMessages({ nonce: latestOnlyNonce, marker: 'bravo' })
|
|
632
|
+
),
|
|
633
|
+
});
|
|
634
|
+
|
|
635
|
+
process.stdout.write(
|
|
636
|
+
`Bedrock multi-turn cache placement benchmark ${JSON.stringify({
|
|
637
|
+
currentFirst,
|
|
638
|
+
currentSecond,
|
|
639
|
+
latestOnlyFirst,
|
|
640
|
+
latestOnlySecond,
|
|
641
|
+
cacheWriteDelta:
|
|
642
|
+
currentSecond.cacheCreation - latestOnlySecond.cacheCreation,
|
|
643
|
+
})}\n`
|
|
644
|
+
);
|
|
645
|
+
|
|
646
|
+
expect(currentSecond.cacheRead).toBeGreaterThan(
|
|
647
|
+
latestOnlySecond.cacheRead
|
|
648
|
+
);
|
|
649
|
+
expect(currentSecond.cacheCreation).toBeLessThan(
|
|
650
|
+
latestOnlySecond.cacheCreation
|
|
651
|
+
);
|
|
652
|
+
}, 240_000);
|
|
149
653
|
});
|