@librechat/agents 3.2.37 → 3.2.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +25 -8
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +7 -4
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +20 -4
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +7 -1
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/toolCache.cjs +5 -4
- package/dist/cjs/llm/bedrock/toolCache.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +34 -17
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +1 -0
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/toolCache.cjs +18 -5
- package/dist/cjs/llm/openrouter/toolCache.cjs.map +1 -1
- package/dist/cjs/main.cjs +4 -0
- package/dist/cjs/messages/anthropicToolCache.cjs +75 -13
- package/dist/cjs/messages/anthropicToolCache.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +91 -35
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +3 -2
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +26 -9
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +8 -5
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +20 -4
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +7 -1
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/toolCache.mjs +5 -4
- package/dist/esm/llm/bedrock/toolCache.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +34 -17
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +1 -0
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/toolCache.mjs +18 -5
- package/dist/esm/llm/openrouter/toolCache.mjs.map +1 -1
- package/dist/esm/main.mjs +2 -2
- package/dist/esm/messages/anthropicToolCache.mjs +75 -13
- package/dist/esm/messages/anthropicToolCache.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +88 -36
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +4 -3
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +11 -0
- package/dist/types/agents/__tests__/promptCacheLiveHelpers.d.ts +2 -0
- package/dist/types/llm/bedrock/index.d.ts +13 -0
- package/dist/types/llm/bedrock/toolCache.d.ts +2 -1
- package/dist/types/llm/openrouter/index.d.ts +8 -0
- package/dist/types/llm/openrouter/toolCache.d.ts +2 -1
- package/dist/types/messages/anthropicToolCache.d.ts +2 -1
- package/dist/types/messages/cache.d.ts +49 -5
- package/dist/types/types/llm.d.ts +14 -0
- package/package.json +7 -5
- package/src/agents/AgentContext.ts +64 -17
- package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +6 -2
- package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +7 -5
- package/src/agents/__tests__/AgentContext.openrouter.live.test.ts +1 -1
- package/src/agents/__tests__/AgentContext.test.ts +31 -19
- package/src/agents/__tests__/promptCacheLiveHelpers.ts +6 -2
- package/src/graphs/Graph.ts +40 -4
- package/src/llm/anthropic/utils/message_inputs.ts +33 -6
- package/src/llm/bedrock/index.ts +21 -1
- package/src/llm/bedrock/llm.spec.ts +61 -0
- package/src/llm/bedrock/toolCache.test.ts +24 -0
- package/src/llm/bedrock/toolCache.ts +12 -7
- package/src/llm/bedrock/utils/message_inputs.ts +57 -40
- package/src/llm/openrouter/index.ts +9 -0
- package/src/llm/openrouter/toolCache.test.ts +52 -1
- package/src/llm/openrouter/toolCache.ts +40 -6
- package/src/messages/__tests__/anthropicToolCache.test.ts +168 -0
- package/src/messages/anthropicToolCache.ts +118 -15
- package/src/messages/cache.test.ts +175 -0
- package/src/messages/cache.ts +133 -48
- package/src/summarization/node.ts +21 -2
- package/src/types/llm.ts +14 -0
package/src/messages/cache.ts
CHANGED
|
@@ -20,6 +20,65 @@ type MessageContentWithCacheControl = MessageContentComplex & {
|
|
|
20
20
|
cache_control?: unknown;
|
|
21
21
|
};
|
|
22
22
|
|
|
23
|
+
/**
|
|
24
|
+
* Prompt-cache breakpoint TTL.
|
|
25
|
+
*
|
|
26
|
+
* Both Anthropic (`cache_control.ttl`) and Bedrock (`cachePoint.ttl`) accept
|
|
27
|
+
* `'5m'` (the legacy provider default) and `'1h'` (the extended cache). When
|
|
28
|
+
* prompt caching is enabled the SDK now defaults to the 1-hour extended cache
|
|
29
|
+
* (see {@link DEFAULT_PROMPT_CACHE_TTL}); pass `'5m'` to opt back into the
|
|
30
|
+
* legacy 5-minute behavior.
|
|
31
|
+
*/
|
|
32
|
+
export type PromptCacheTtl = '5m' | '1h';
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Default TTL applied wherever a prompt-cache breakpoint is added. The 1-hour
|
|
36
|
+
* extended cache keeps prefixes warm across longer gaps between turns, at the
|
|
37
|
+
* cost of a higher one-time cache-write multiplier (2x vs 1.25x for 5m).
|
|
38
|
+
*/
|
|
39
|
+
export const DEFAULT_PROMPT_CACHE_TTL: PromptCacheTtl = '1h';
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Resolve an optionally-configured TTL to a concrete value, defaulting to the
|
|
43
|
+
* 1-hour extended cache. Used at the Anthropic/Bedrock prompt-cache call sites.
|
|
44
|
+
*/
|
|
45
|
+
export function resolvePromptCacheTtl(
|
|
46
|
+
ttl: PromptCacheTtl | undefined
|
|
47
|
+
): PromptCacheTtl {
|
|
48
|
+
return ttl ?? DEFAULT_PROMPT_CACHE_TTL;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** Anthropic `cache_control` shape (the SDK accepts an optional `ttl`). */
|
|
52
|
+
type AnthropicCacheControl = { type: 'ephemeral'; ttl?: '1h' };
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Build an Anthropic `cache_control` breakpoint for the given TTL. `'5m'` (or
|
|
56
|
+
* `undefined`) omits the `ttl` field — that is the provider default, so the
|
|
57
|
+
* payload stays byte-identical to the legacy 5-minute marker. `'1h'` adds the
|
|
58
|
+
* explicit extended-cache `ttl`.
|
|
59
|
+
*/
|
|
60
|
+
export function buildAnthropicCacheControl(
|
|
61
|
+
ttl?: PromptCacheTtl
|
|
62
|
+
): AnthropicCacheControl {
|
|
63
|
+
return ttl === '1h'
|
|
64
|
+
? { type: 'ephemeral', ttl: '1h' }
|
|
65
|
+
: { type: 'ephemeral' };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/** Bedrock `cachePoint` shape (the SDK accepts an optional `ttl`). */
|
|
69
|
+
type BedrockCachePoint = { type: 'default'; ttl?: '1h' };
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Build a Bedrock `cachePoint` for the given TTL. Mirrors
|
|
73
|
+
* {@link buildAnthropicCacheControl}: `'5m'`/`undefined` omits `ttl` (the
|
|
74
|
+
* legacy default), `'1h'` adds the extended-cache `ttl`.
|
|
75
|
+
*/
|
|
76
|
+
export function buildBedrockCachePoint(
|
|
77
|
+
ttl?: PromptCacheTtl
|
|
78
|
+
): BedrockCachePoint {
|
|
79
|
+
return ttl === '1h' ? { type: 'default', ttl: '1h' } : { type: 'default' };
|
|
80
|
+
}
|
|
81
|
+
|
|
23
82
|
/**
|
|
24
83
|
* Deep clones a message's content to prevent mutation of the original.
|
|
25
84
|
*/
|
|
@@ -119,38 +178,53 @@ export function cloneMessage<T extends MessageWithContent>(
|
|
|
119
178
|
return cloned;
|
|
120
179
|
}
|
|
121
180
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
const cloned: MessageContentWithCacheControl = { ...block };
|
|
132
|
-
delete cloned.cache_control;
|
|
133
|
-
modified = true;
|
|
134
|
-
return cloned;
|
|
135
|
-
});
|
|
136
|
-
|
|
137
|
-
return { content: strippedContent, modified };
|
|
138
|
-
}
|
|
139
|
-
|
|
181
|
+
/**
|
|
182
|
+
* Sanitize a Bedrock system message: strip Anthropic `cache_control` (Bedrock
|
|
183
|
+
* conversion can't use it) and normalize any existing `cachePoint` to the
|
|
184
|
+
* resolved TTL. The normalization matters because Bedrock requires longer-TTL
|
|
185
|
+
* checkpoints to appear before shorter ones — a stale 5-minute system cachePoint
|
|
186
|
+
* (host-supplied or carried over from a 5m config) left ahead of a 1-hour
|
|
187
|
+
* message tail would make the request invalid. System messages are never
|
|
188
|
+
* anchored as the tail breakpoint; this only fixes markers already present.
|
|
189
|
+
*/
|
|
140
190
|
function sanitizeBedrockSystemMessage<T extends MessageWithContent>(
|
|
141
|
-
message: T
|
|
191
|
+
message: T,
|
|
192
|
+
ttl?: PromptCacheTtl
|
|
142
193
|
): T {
|
|
143
194
|
const content = message.content;
|
|
144
195
|
if (!Array.isArray(content)) {
|
|
145
196
|
return message;
|
|
146
197
|
}
|
|
147
198
|
|
|
148
|
-
const
|
|
149
|
-
|
|
199
|
+
const sanitized: MessageContentComplex[] = [];
|
|
200
|
+
let modified = false;
|
|
201
|
+
for (const block of content) {
|
|
202
|
+
if (isCachePoint(block)) {
|
|
203
|
+
const existing = (block as { cachePoint?: { ttl?: unknown } }).cachePoint;
|
|
204
|
+
const desired = buildBedrockCachePoint(ttl);
|
|
205
|
+
if (existing?.ttl !== desired.ttl) {
|
|
206
|
+
modified = true;
|
|
207
|
+
sanitized.push({ cachePoint: desired } as MessageContentComplex);
|
|
208
|
+
} else {
|
|
209
|
+
sanitized.push(block);
|
|
210
|
+
}
|
|
211
|
+
continue;
|
|
212
|
+
}
|
|
213
|
+
if ('cache_control' in block) {
|
|
214
|
+
const cloned: MessageContentWithCacheControl = { ...block };
|
|
215
|
+
delete cloned.cache_control;
|
|
216
|
+
modified = true;
|
|
217
|
+
sanitized.push(cloned);
|
|
218
|
+
continue;
|
|
219
|
+
}
|
|
220
|
+
sanitized.push(block);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
if (!modified) {
|
|
150
224
|
return message;
|
|
151
225
|
}
|
|
152
226
|
|
|
153
|
-
return cloneMessage(message,
|
|
227
|
+
return cloneMessage(message, sanitized);
|
|
154
228
|
}
|
|
155
229
|
|
|
156
230
|
/**
|
|
@@ -163,7 +237,8 @@ function sanitizeBedrockSystemMessage<T extends MessageWithContent>(
|
|
|
163
237
|
* @returns - A new array of message objects with cache control added.
|
|
164
238
|
*/
|
|
165
239
|
export function addCacheControl<T extends AnthropicMessage | BaseMessage>(
|
|
166
|
-
messages: T[]
|
|
240
|
+
messages: T[],
|
|
241
|
+
ttl?: PromptCacheTtl
|
|
167
242
|
): T[] {
|
|
168
243
|
if (!Array.isArray(messages) || messages.length < 2) {
|
|
169
244
|
return messages;
|
|
@@ -224,14 +299,16 @@ export function addCacheControl<T extends AnthropicMessage | BaseMessage>(
|
|
|
224
299
|
if (needsCacheAdd && lastTextIndex >= 0) {
|
|
225
300
|
(
|
|
226
301
|
workingContent[lastTextIndex] as Anthropic.TextBlockParam
|
|
227
|
-
).cache_control =
|
|
228
|
-
type: 'ephemeral',
|
|
229
|
-
};
|
|
302
|
+
).cache_control = buildAnthropicCacheControl(ttl);
|
|
230
303
|
userMessagesModified++;
|
|
231
304
|
}
|
|
232
305
|
} else if (typeof content === 'string' && needsCacheAdd) {
|
|
233
306
|
workingContent = [
|
|
234
|
-
{
|
|
307
|
+
{
|
|
308
|
+
type: 'text',
|
|
309
|
+
text: content,
|
|
310
|
+
cache_control: buildAnthropicCacheControl(ttl),
|
|
311
|
+
},
|
|
235
312
|
] as unknown as MessageContentComplex[];
|
|
236
313
|
userMessagesModified++;
|
|
237
314
|
} else {
|
|
@@ -318,7 +395,8 @@ function isTailCacheableBlock(block: MessageContentComplex): boolean {
|
|
|
318
395
|
* Returns a new array; only messages that require modification are cloned.
|
|
319
396
|
*/
|
|
320
397
|
export function addTailCacheControl<T extends AnthropicMessage | BaseMessage>(
|
|
321
|
-
messages: T[]
|
|
398
|
+
messages: T[],
|
|
399
|
+
ttl?: PromptCacheTtl
|
|
322
400
|
): T[] {
|
|
323
401
|
if (!Array.isArray(messages) || messages.length === 0) {
|
|
324
402
|
return messages;
|
|
@@ -368,9 +446,7 @@ export function addTailCacheControl<T extends AnthropicMessage | BaseMessage>(
|
|
|
368
446
|
|
|
369
447
|
if (canPlaceMarker && tailIndex >= 0) {
|
|
370
448
|
(workingContent[tailIndex] as Anthropic.TextBlockParam).cache_control =
|
|
371
|
-
|
|
372
|
-
type: 'ephemeral',
|
|
373
|
-
};
|
|
449
|
+
buildAnthropicCacheControl(ttl);
|
|
374
450
|
markerPlaced = true;
|
|
375
451
|
modified = true;
|
|
376
452
|
}
|
|
@@ -384,7 +460,11 @@ export function addTailCacheControl<T extends AnthropicMessage | BaseMessage>(
|
|
|
384
460
|
content.trim() !== ''
|
|
385
461
|
) {
|
|
386
462
|
workingContent = [
|
|
387
|
-
{
|
|
463
|
+
{
|
|
464
|
+
type: 'text',
|
|
465
|
+
text: content,
|
|
466
|
+
cache_control: buildAnthropicCacheControl(ttl),
|
|
467
|
+
},
|
|
388
468
|
] as unknown as MessageContentComplex[];
|
|
389
469
|
markerPlaced = true;
|
|
390
470
|
} else {
|
|
@@ -454,7 +534,8 @@ function addCacheControlToRecentMessages<
|
|
|
454
534
|
>(
|
|
455
535
|
messages: T[],
|
|
456
536
|
maxCachePoints: number,
|
|
457
|
-
canUseMessage: (message: MessageWithContent) => boolean
|
|
537
|
+
canUseMessage: (message: MessageWithContent) => boolean,
|
|
538
|
+
ttl?: PromptCacheTtl
|
|
458
539
|
): T[] {
|
|
459
540
|
if (
|
|
460
541
|
!Array.isArray(messages) ||
|
|
@@ -513,9 +594,7 @@ function addCacheControlToRecentMessages<
|
|
|
513
594
|
if (canAddCache && lastNonEmptyTextIndex >= 0) {
|
|
514
595
|
(
|
|
515
596
|
workingContent[lastNonEmptyTextIndex] as Anthropic.TextBlockParam
|
|
516
|
-
).cache_control =
|
|
517
|
-
type: 'ephemeral',
|
|
518
|
-
};
|
|
597
|
+
).cache_control = buildAnthropicCacheControl(ttl);
|
|
519
598
|
cachePointsAdded++;
|
|
520
599
|
modified = true;
|
|
521
600
|
}
|
|
@@ -529,7 +608,11 @@ function addCacheControlToRecentMessages<
|
|
|
529
608
|
canAddCache
|
|
530
609
|
) {
|
|
531
610
|
workingContent = [
|
|
532
|
-
{
|
|
611
|
+
{
|
|
612
|
+
type: 'text',
|
|
613
|
+
text: content,
|
|
614
|
+
cache_control: buildAnthropicCacheControl(ttl),
|
|
615
|
+
},
|
|
533
616
|
] as unknown as MessageContentComplex[];
|
|
534
617
|
cachePointsAdded++;
|
|
535
618
|
} else {
|
|
@@ -547,11 +630,12 @@ function addCacheControlToRecentMessages<
|
|
|
547
630
|
|
|
548
631
|
export function addCacheControlToStablePrefixMessages<
|
|
549
632
|
T extends AnthropicMessage | BaseMessage,
|
|
550
|
-
>(messages: T[], maxCachePoints: number): T[] {
|
|
633
|
+
>(messages: T[], maxCachePoints: number, ttl?: PromptCacheTtl): T[] {
|
|
551
634
|
const assistantMarked = addCacheControlToRecentMessages(
|
|
552
635
|
messages,
|
|
553
636
|
maxCachePoints,
|
|
554
|
-
isAssistantConversationMessage
|
|
637
|
+
isAssistantConversationMessage,
|
|
638
|
+
ttl
|
|
555
639
|
);
|
|
556
640
|
|
|
557
641
|
if (assistantMarked.some(hasCacheMarker)) {
|
|
@@ -561,7 +645,8 @@ export function addCacheControlToStablePrefixMessages<
|
|
|
561
645
|
return addCacheControlToRecentMessages(
|
|
562
646
|
messages,
|
|
563
647
|
maxCachePoints,
|
|
564
|
-
isCacheableConversationMessage
|
|
648
|
+
isCacheableConversationMessage,
|
|
649
|
+
ttl
|
|
565
650
|
);
|
|
566
651
|
}
|
|
567
652
|
|
|
@@ -664,7 +749,7 @@ export function stripBedrockCacheControl<T extends MessageWithContent>(
|
|
|
664
749
|
*/
|
|
665
750
|
export function addBedrockCacheControl<
|
|
666
751
|
T extends MessageWithContent & { getType?: () => string; role?: string },
|
|
667
|
-
>(messages: T[]): T[] {
|
|
752
|
+
>(messages: T[], ttl?: PromptCacheTtl): T[] {
|
|
668
753
|
if (!Array.isArray(messages) || messages.length === 0) {
|
|
669
754
|
return messages;
|
|
670
755
|
}
|
|
@@ -687,7 +772,7 @@ export function addBedrockCacheControl<
|
|
|
687
772
|
const isSystemMessage =
|
|
688
773
|
messageType === 'system' || messageRole === 'system';
|
|
689
774
|
if (isSystemMessage) {
|
|
690
|
-
updatedMessages[i] = sanitizeBedrockSystemMessage(originalMessage);
|
|
775
|
+
updatedMessages[i] = sanitizeBedrockSystemMessage(originalMessage, ttl);
|
|
691
776
|
continue;
|
|
692
777
|
}
|
|
693
778
|
|
|
@@ -750,14 +835,14 @@ export function addBedrockCacheControl<
|
|
|
750
835
|
// Skip if no cacheable text content exists (whitespace-only messages).
|
|
751
836
|
if (needsCacheAdd && lastNonEmptyTextIndex >= 0) {
|
|
752
837
|
workingContent.splice(lastNonEmptyTextIndex + 1, 0, {
|
|
753
|
-
cachePoint:
|
|
838
|
+
cachePoint: buildBedrockCachePoint(ttl),
|
|
754
839
|
} as MessageContentComplex);
|
|
755
840
|
cachePointsAdded++;
|
|
756
841
|
}
|
|
757
842
|
} else if (typeof content === 'string' && needsCacheAdd) {
|
|
758
843
|
workingContent = [
|
|
759
844
|
{ type: ContentTypes.TEXT, text: content },
|
|
760
|
-
{ cachePoint:
|
|
845
|
+
{ cachePoint: buildBedrockCachePoint(ttl) } as MessageContentComplex,
|
|
761
846
|
];
|
|
762
847
|
cachePointsAdded++;
|
|
763
848
|
} else if (typeof content === 'string' && hasSerializationProps) {
|
|
@@ -791,7 +876,7 @@ export function addBedrockCacheControl<
|
|
|
791
876
|
*/
|
|
792
877
|
export function addBedrockTailCacheControl<
|
|
793
878
|
T extends MessageWithContent & { getType?: () => string; role?: string },
|
|
794
|
-
>(messages: T[]): T[] {
|
|
879
|
+
>(messages: T[], ttl?: PromptCacheTtl): T[] {
|
|
795
880
|
if (!Array.isArray(messages) || messages.length === 0) {
|
|
796
881
|
return messages;
|
|
797
882
|
}
|
|
@@ -814,7 +899,7 @@ export function addBedrockTailCacheControl<
|
|
|
814
899
|
const isSystemMessage =
|
|
815
900
|
messageType === 'system' || messageRole === 'system';
|
|
816
901
|
if (isSystemMessage) {
|
|
817
|
-
updatedMessages[i] = sanitizeBedrockSystemMessage(originalMessage);
|
|
902
|
+
updatedMessages[i] = sanitizeBedrockSystemMessage(originalMessage, ttl);
|
|
818
903
|
continue;
|
|
819
904
|
}
|
|
820
905
|
|
|
@@ -869,7 +954,7 @@ export function addBedrockTailCacheControl<
|
|
|
869
954
|
|
|
870
955
|
if (canPlaceCachePoint && lastNonEmptyTextIndex >= 0) {
|
|
871
956
|
workingContent.splice(lastNonEmptyTextIndex + 1, 0, {
|
|
872
|
-
cachePoint:
|
|
957
|
+
cachePoint: buildBedrockCachePoint(ttl),
|
|
873
958
|
} as MessageContentComplex);
|
|
874
959
|
cachePointPlaced = true;
|
|
875
960
|
modified = true;
|
|
@@ -877,7 +962,7 @@ export function addBedrockTailCacheControl<
|
|
|
877
962
|
} else if (typeof content === 'string' && canPlaceCachePoint) {
|
|
878
963
|
workingContent = [
|
|
879
964
|
{ type: ContentTypes.TEXT, text: content },
|
|
880
|
-
{ cachePoint:
|
|
965
|
+
{ cachePoint: buildBedrockCachePoint(ttl) } as MessageContentComplex,
|
|
881
966
|
];
|
|
882
967
|
cachePointPlaced = true;
|
|
883
968
|
} else if (typeof content === 'string' && hasSerializationProps) {
|
|
@@ -10,6 +10,11 @@ import type { AgentContext } from '@/agents/AgentContext';
|
|
|
10
10
|
import type { HookRegistry } from '@/hooks';
|
|
11
11
|
import type { OnChunk } from '@/llm/invoke';
|
|
12
12
|
import type * as t from '@/types';
|
|
13
|
+
import {
|
|
14
|
+
addTailCacheControl,
|
|
15
|
+
resolvePromptCacheTtl,
|
|
16
|
+
type PromptCacheTtl,
|
|
17
|
+
} from '@/messages/cache';
|
|
13
18
|
import {
|
|
14
19
|
Constants,
|
|
15
20
|
ContentTypes,
|
|
@@ -22,7 +27,6 @@ import { attemptInvoke, tryFallbackProviders } from '@/llm/invoke';
|
|
|
22
27
|
import { createRemoveAllMessage } from '@/messages/reducer';
|
|
23
28
|
import { splitAtRecencyBoundary } from '@/messages/recency';
|
|
24
29
|
import { getMaxOutputTokensKey } from '@/llm/request';
|
|
25
|
-
import { addTailCacheControl } from '@/messages/cache';
|
|
26
30
|
import { initializeModel } from '@/llm/init';
|
|
27
31
|
import { getChunkContent } from '@/stream';
|
|
28
32
|
import { executeHooks } from '@/hooks';
|
|
@@ -552,6 +556,17 @@ async function executeSummarizationWithFallback(params: {
|
|
|
552
556
|
provider: clientConfig.provider as Providers,
|
|
553
557
|
reasoningKey: agentContext.reasoningKey,
|
|
554
558
|
usePromptCache,
|
|
559
|
+
promptCacheTtl:
|
|
560
|
+
(clientConfig.provider as Providers) === Providers.ANTHROPIC ||
|
|
561
|
+
(clientConfig.provider as Providers) === Providers.OPENROUTER
|
|
562
|
+
? resolvePromptCacheTtl(
|
|
563
|
+
(
|
|
564
|
+
clientConfig.clientOptions as {
|
|
565
|
+
promptCacheTtl?: PromptCacheTtl;
|
|
566
|
+
}
|
|
567
|
+
).promptCacheTtl
|
|
568
|
+
)
|
|
569
|
+
: undefined,
|
|
555
570
|
log,
|
|
556
571
|
});
|
|
557
572
|
summaryText = result.text;
|
|
@@ -1205,6 +1220,7 @@ async function summarizeWithCacheHit({
|
|
|
1205
1220
|
provider,
|
|
1206
1221
|
reasoningKey,
|
|
1207
1222
|
usePromptCache,
|
|
1223
|
+
promptCacheTtl,
|
|
1208
1224
|
log,
|
|
1209
1225
|
}: {
|
|
1210
1226
|
model: t.ChatModel;
|
|
@@ -1217,6 +1233,7 @@ async function summarizeWithCacheHit({
|
|
|
1217
1233
|
provider: Providers;
|
|
1218
1234
|
reasoningKey?: 'reasoning_content' | 'reasoning';
|
|
1219
1235
|
usePromptCache?: boolean;
|
|
1236
|
+
promptCacheTtl?: PromptCacheTtl;
|
|
1220
1237
|
log?: LogFn;
|
|
1221
1238
|
}): Promise<{ text: string; usage?: Partial<UsageMetadata> }> {
|
|
1222
1239
|
const instruction = buildSummarizationInstruction(
|
|
@@ -1227,7 +1244,9 @@ async function summarizeWithCacheHit({
|
|
|
1227
1244
|
|
|
1228
1245
|
const fullMessages = [...messages, new HumanMessage(instruction)];
|
|
1229
1246
|
const invokeMessages =
|
|
1230
|
-
usePromptCache === true
|
|
1247
|
+
usePromptCache === true
|
|
1248
|
+
? addTailCacheControl(fullMessages, promptCacheTtl)
|
|
1249
|
+
: fullMessages;
|
|
1231
1250
|
|
|
1232
1251
|
const result = await attemptInvoke(
|
|
1233
1252
|
{
|
package/src/types/llm.ts
CHANGED
|
@@ -22,6 +22,7 @@ import type { Runnable } from '@langchain/core/runnables';
|
|
|
22
22
|
import type { OpenAI as OpenAIClient } from 'openai';
|
|
23
23
|
import type { ChatXAIInput } from '@langchain/xai';
|
|
24
24
|
import type { ChatOpenRouterCallOptions } from '@/llm/openrouter';
|
|
25
|
+
import type { PromptCacheTtl } from '@/messages/cache';
|
|
25
26
|
import {
|
|
26
27
|
AzureChatOpenAI,
|
|
27
28
|
ChatDeepSeek,
|
|
@@ -76,6 +77,12 @@ export type OpenAIClientOptions = ChatOpenAIFields;
|
|
|
76
77
|
export type AnthropicClientOptions = Omit<AnthropicInput, 'thinking'> & {
|
|
77
78
|
thinking?: ThinkingConfig;
|
|
78
79
|
promptCache?: boolean;
|
|
80
|
+
/**
|
|
81
|
+
* Prompt-cache breakpoint TTL. Defaults to `'1h'` (extended cache) when
|
|
82
|
+
* `promptCache` is enabled; set `'5m'` to opt back into the legacy
|
|
83
|
+
* 5-minute behavior.
|
|
84
|
+
*/
|
|
85
|
+
promptCacheTtl?: PromptCacheTtl;
|
|
79
86
|
};
|
|
80
87
|
export type MistralAIClientOptions = ChatMistralAIInput;
|
|
81
88
|
export type VertexAIClientOptions = ChatVertexAIInput & {
|
|
@@ -86,6 +93,13 @@ export type BedrockAnthropicInput = ChatBedrockConverseInput & {
|
|
|
86
93
|
additionalModelRequestFields?: ChatBedrockConverseInput['additionalModelRequestFields'] &
|
|
87
94
|
AnthropicReasoning;
|
|
88
95
|
promptCache?: boolean;
|
|
96
|
+
/**
|
|
97
|
+
* Prompt-cache checkpoint TTL. Defaults to `'1h'` (extended cache) when
|
|
98
|
+
* `promptCache` is enabled; set `'5m'` to opt into the legacy 5-minute
|
|
99
|
+
* behavior. Bedrock models that don't support the 1-hour TTL downgrade to 5m
|
|
100
|
+
* server-side, so the default is safe to leave on.
|
|
101
|
+
*/
|
|
102
|
+
promptCacheTtl?: PromptCacheTtl;
|
|
89
103
|
};
|
|
90
104
|
export type BedrockConverseClientOptions = BedrockAnthropicInput;
|
|
91
105
|
export type BedrockAnthropicClientOptions = BedrockAnthropicInput;
|