@librechat/agents 3.2.37 → 3.2.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +25 -8
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +7 -4
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +20 -4
  6. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  7. package/dist/cjs/llm/bedrock/index.cjs +7 -1
  8. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  9. package/dist/cjs/llm/bedrock/toolCache.cjs +5 -4
  10. package/dist/cjs/llm/bedrock/toolCache.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +34 -17
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/openrouter/index.cjs +1 -0
  14. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  15. package/dist/cjs/llm/openrouter/toolCache.cjs +18 -5
  16. package/dist/cjs/llm/openrouter/toolCache.cjs.map +1 -1
  17. package/dist/cjs/main.cjs +4 -0
  18. package/dist/cjs/messages/anthropicToolCache.cjs +75 -13
  19. package/dist/cjs/messages/anthropicToolCache.cjs.map +1 -1
  20. package/dist/cjs/messages/cache.cjs +91 -35
  21. package/dist/cjs/messages/cache.cjs.map +1 -1
  22. package/dist/cjs/summarization/node.cjs +3 -2
  23. package/dist/cjs/summarization/node.cjs.map +1 -1
  24. package/dist/esm/agents/AgentContext.mjs +26 -9
  25. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  26. package/dist/esm/graphs/Graph.mjs +8 -5
  27. package/dist/esm/graphs/Graph.mjs.map +1 -1
  28. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +20 -4
  29. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  30. package/dist/esm/llm/bedrock/index.mjs +7 -1
  31. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  32. package/dist/esm/llm/bedrock/toolCache.mjs +5 -4
  33. package/dist/esm/llm/bedrock/toolCache.mjs.map +1 -1
  34. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +34 -17
  35. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  36. package/dist/esm/llm/openrouter/index.mjs +1 -0
  37. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  38. package/dist/esm/llm/openrouter/toolCache.mjs +18 -5
  39. package/dist/esm/llm/openrouter/toolCache.mjs.map +1 -1
  40. package/dist/esm/main.mjs +2 -2
  41. package/dist/esm/messages/anthropicToolCache.mjs +75 -13
  42. package/dist/esm/messages/anthropicToolCache.mjs.map +1 -1
  43. package/dist/esm/messages/cache.mjs +88 -36
  44. package/dist/esm/messages/cache.mjs.map +1 -1
  45. package/dist/esm/summarization/node.mjs +4 -3
  46. package/dist/esm/summarization/node.mjs.map +1 -1
  47. package/dist/types/agents/AgentContext.d.ts +11 -0
  48. package/dist/types/agents/__tests__/promptCacheLiveHelpers.d.ts +2 -0
  49. package/dist/types/llm/bedrock/index.d.ts +13 -0
  50. package/dist/types/llm/bedrock/toolCache.d.ts +2 -1
  51. package/dist/types/llm/openrouter/index.d.ts +8 -0
  52. package/dist/types/llm/openrouter/toolCache.d.ts +2 -1
  53. package/dist/types/messages/anthropicToolCache.d.ts +2 -1
  54. package/dist/types/messages/cache.d.ts +49 -5
  55. package/dist/types/types/llm.d.ts +14 -0
  56. package/package.json +7 -5
  57. package/src/agents/AgentContext.ts +64 -17
  58. package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +6 -2
  59. package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +7 -5
  60. package/src/agents/__tests__/AgentContext.openrouter.live.test.ts +1 -1
  61. package/src/agents/__tests__/AgentContext.test.ts +31 -19
  62. package/src/agents/__tests__/promptCacheLiveHelpers.ts +6 -2
  63. package/src/graphs/Graph.ts +40 -4
  64. package/src/llm/anthropic/utils/message_inputs.ts +33 -6
  65. package/src/llm/bedrock/index.ts +21 -1
  66. package/src/llm/bedrock/llm.spec.ts +61 -0
  67. package/src/llm/bedrock/toolCache.test.ts +24 -0
  68. package/src/llm/bedrock/toolCache.ts +12 -7
  69. package/src/llm/bedrock/utils/message_inputs.ts +57 -40
  70. package/src/llm/openrouter/index.ts +9 -0
  71. package/src/llm/openrouter/toolCache.test.ts +52 -1
  72. package/src/llm/openrouter/toolCache.ts +40 -6
  73. package/src/messages/__tests__/anthropicToolCache.test.ts +168 -0
  74. package/src/messages/anthropicToolCache.ts +118 -15
  75. package/src/messages/cache.test.ts +175 -0
  76. package/src/messages/cache.ts +133 -48
  77. package/src/summarization/node.ts +21 -2
  78. package/src/types/llm.ts +14 -0
@@ -20,6 +20,65 @@ type MessageContentWithCacheControl = MessageContentComplex & {
20
20
  cache_control?: unknown;
21
21
  };
22
22
 
23
+ /**
24
+ * Prompt-cache breakpoint TTL.
25
+ *
26
+ * Both Anthropic (`cache_control.ttl`) and Bedrock (`cachePoint.ttl`) accept
27
+ * `'5m'` (the legacy provider default) and `'1h'` (the extended cache). When
28
+ * prompt caching is enabled the SDK now defaults to the 1-hour extended cache
29
+ * (see {@link DEFAULT_PROMPT_CACHE_TTL}); pass `'5m'` to opt back into the
30
+ * legacy 5-minute behavior.
31
+ */
32
+ export type PromptCacheTtl = '5m' | '1h';
33
+
34
+ /**
35
+ * Default TTL applied wherever a prompt-cache breakpoint is added. The 1-hour
36
+ * extended cache keeps prefixes warm across longer gaps between turns, at the
37
+ * cost of a higher one-time cache-write multiplier (2x vs 1.25x for 5m).
38
+ */
39
+ export const DEFAULT_PROMPT_CACHE_TTL: PromptCacheTtl = '1h';
40
+
41
+ /**
42
+ * Resolve an optionally-configured TTL to a concrete value, defaulting to the
43
+ * 1-hour extended cache. Used at the Anthropic/Bedrock prompt-cache call sites.
44
+ */
45
+ export function resolvePromptCacheTtl(
46
+ ttl: PromptCacheTtl | undefined
47
+ ): PromptCacheTtl {
48
+ return ttl ?? DEFAULT_PROMPT_CACHE_TTL;
49
+ }
50
+
51
+ /** Anthropic `cache_control` shape (the SDK accepts an optional `ttl`). */
52
+ type AnthropicCacheControl = { type: 'ephemeral'; ttl?: '1h' };
53
+
54
+ /**
55
+ * Build an Anthropic `cache_control` breakpoint for the given TTL. `'5m'` (or
56
+ * `undefined`) omits the `ttl` field — that is the provider default, so the
57
+ * payload stays byte-identical to the legacy 5-minute marker. `'1h'` adds the
58
+ * explicit extended-cache `ttl`.
59
+ */
60
+ export function buildAnthropicCacheControl(
61
+ ttl?: PromptCacheTtl
62
+ ): AnthropicCacheControl {
63
+ return ttl === '1h'
64
+ ? { type: 'ephemeral', ttl: '1h' }
65
+ : { type: 'ephemeral' };
66
+ }
67
+
68
+ /** Bedrock `cachePoint` shape (the SDK accepts an optional `ttl`). */
69
+ type BedrockCachePoint = { type: 'default'; ttl?: '1h' };
70
+
71
+ /**
72
+ * Build a Bedrock `cachePoint` for the given TTL. Mirrors
73
+ * {@link buildAnthropicCacheControl}: `'5m'`/`undefined` omits `ttl` (the
74
+ * legacy default), `'1h'` adds the extended-cache `ttl`.
75
+ */
76
+ export function buildBedrockCachePoint(
77
+ ttl?: PromptCacheTtl
78
+ ): BedrockCachePoint {
79
+ return ttl === '1h' ? { type: 'default', ttl: '1h' } : { type: 'default' };
80
+ }
81
+
23
82
  /**
24
83
  * Deep clones a message's content to prevent mutation of the original.
25
84
  */
@@ -119,38 +178,53 @@ export function cloneMessage<T extends MessageWithContent>(
119
178
  return cloned;
120
179
  }
121
180
 
122
- function stripAnthropicCacheControlFromBlocks(
123
- content: MessageContentComplex[]
124
- ): { content: MessageContentComplex[]; modified: boolean } {
125
- let modified = false;
126
- const strippedContent = content.map((block) => {
127
- if (!('cache_control' in block)) {
128
- return block;
129
- }
130
-
131
- const cloned: MessageContentWithCacheControl = { ...block };
132
- delete cloned.cache_control;
133
- modified = true;
134
- return cloned;
135
- });
136
-
137
- return { content: strippedContent, modified };
138
- }
139
-
181
+ /**
182
+ * Sanitize a Bedrock system message: strip Anthropic `cache_control` (Bedrock
183
+ * conversion can't use it) and normalize any existing `cachePoint` to the
184
+ * resolved TTL. The normalization matters because Bedrock requires longer-TTL
185
+ * checkpoints to appear before shorter ones — a stale 5-minute system cachePoint
186
+ * (host-supplied or carried over from a 5m config) left ahead of a 1-hour
187
+ * message tail would make the request invalid. System messages are never
188
+ * anchored as the tail breakpoint; this only fixes markers already present.
189
+ */
140
190
  function sanitizeBedrockSystemMessage<T extends MessageWithContent>(
141
- message: T
191
+ message: T,
192
+ ttl?: PromptCacheTtl
142
193
  ): T {
143
194
  const content = message.content;
144
195
  if (!Array.isArray(content)) {
145
196
  return message;
146
197
  }
147
198
 
148
- const stripped = stripAnthropicCacheControlFromBlocks(content);
149
- if (!stripped.modified) {
199
+ const sanitized: MessageContentComplex[] = [];
200
+ let modified = false;
201
+ for (const block of content) {
202
+ if (isCachePoint(block)) {
203
+ const existing = (block as { cachePoint?: { ttl?: unknown } }).cachePoint;
204
+ const desired = buildBedrockCachePoint(ttl);
205
+ if (existing?.ttl !== desired.ttl) {
206
+ modified = true;
207
+ sanitized.push({ cachePoint: desired } as MessageContentComplex);
208
+ } else {
209
+ sanitized.push(block);
210
+ }
211
+ continue;
212
+ }
213
+ if ('cache_control' in block) {
214
+ const cloned: MessageContentWithCacheControl = { ...block };
215
+ delete cloned.cache_control;
216
+ modified = true;
217
+ sanitized.push(cloned);
218
+ continue;
219
+ }
220
+ sanitized.push(block);
221
+ }
222
+
223
+ if (!modified) {
150
224
  return message;
151
225
  }
152
226
 
153
- return cloneMessage(message, stripped.content);
227
+ return cloneMessage(message, sanitized);
154
228
  }
155
229
 
156
230
  /**
@@ -163,7 +237,8 @@ function sanitizeBedrockSystemMessage<T extends MessageWithContent>(
163
237
  * @returns - A new array of message objects with cache control added.
164
238
  */
165
239
  export function addCacheControl<T extends AnthropicMessage | BaseMessage>(
166
- messages: T[]
240
+ messages: T[],
241
+ ttl?: PromptCacheTtl
167
242
  ): T[] {
168
243
  if (!Array.isArray(messages) || messages.length < 2) {
169
244
  return messages;
@@ -224,14 +299,16 @@ export function addCacheControl<T extends AnthropicMessage | BaseMessage>(
224
299
  if (needsCacheAdd && lastTextIndex >= 0) {
225
300
  (
226
301
  workingContent[lastTextIndex] as Anthropic.TextBlockParam
227
- ).cache_control = {
228
- type: 'ephemeral',
229
- };
302
+ ).cache_control = buildAnthropicCacheControl(ttl);
230
303
  userMessagesModified++;
231
304
  }
232
305
  } else if (typeof content === 'string' && needsCacheAdd) {
233
306
  workingContent = [
234
- { type: 'text', text: content, cache_control: { type: 'ephemeral' } },
307
+ {
308
+ type: 'text',
309
+ text: content,
310
+ cache_control: buildAnthropicCacheControl(ttl),
311
+ },
235
312
  ] as unknown as MessageContentComplex[];
236
313
  userMessagesModified++;
237
314
  } else {
@@ -318,7 +395,8 @@ function isTailCacheableBlock(block: MessageContentComplex): boolean {
318
395
  * Returns a new array; only messages that require modification are cloned.
319
396
  */
320
397
  export function addTailCacheControl<T extends AnthropicMessage | BaseMessage>(
321
- messages: T[]
398
+ messages: T[],
399
+ ttl?: PromptCacheTtl
322
400
  ): T[] {
323
401
  if (!Array.isArray(messages) || messages.length === 0) {
324
402
  return messages;
@@ -368,9 +446,7 @@ export function addTailCacheControl<T extends AnthropicMessage | BaseMessage>(
368
446
 
369
447
  if (canPlaceMarker && tailIndex >= 0) {
370
448
  (workingContent[tailIndex] as Anthropic.TextBlockParam).cache_control =
371
- {
372
- type: 'ephemeral',
373
- };
449
+ buildAnthropicCacheControl(ttl);
374
450
  markerPlaced = true;
375
451
  modified = true;
376
452
  }
@@ -384,7 +460,11 @@ export function addTailCacheControl<T extends AnthropicMessage | BaseMessage>(
384
460
  content.trim() !== ''
385
461
  ) {
386
462
  workingContent = [
387
- { type: 'text', text: content, cache_control: { type: 'ephemeral' } },
463
+ {
464
+ type: 'text',
465
+ text: content,
466
+ cache_control: buildAnthropicCacheControl(ttl),
467
+ },
388
468
  ] as unknown as MessageContentComplex[];
389
469
  markerPlaced = true;
390
470
  } else {
@@ -454,7 +534,8 @@ function addCacheControlToRecentMessages<
454
534
  >(
455
535
  messages: T[],
456
536
  maxCachePoints: number,
457
- canUseMessage: (message: MessageWithContent) => boolean
537
+ canUseMessage: (message: MessageWithContent) => boolean,
538
+ ttl?: PromptCacheTtl
458
539
  ): T[] {
459
540
  if (
460
541
  !Array.isArray(messages) ||
@@ -513,9 +594,7 @@ function addCacheControlToRecentMessages<
513
594
  if (canAddCache && lastNonEmptyTextIndex >= 0) {
514
595
  (
515
596
  workingContent[lastNonEmptyTextIndex] as Anthropic.TextBlockParam
516
- ).cache_control = {
517
- type: 'ephemeral',
518
- };
597
+ ).cache_control = buildAnthropicCacheControl(ttl);
519
598
  cachePointsAdded++;
520
599
  modified = true;
521
600
  }
@@ -529,7 +608,11 @@ function addCacheControlToRecentMessages<
529
608
  canAddCache
530
609
  ) {
531
610
  workingContent = [
532
- { type: 'text', text: content, cache_control: { type: 'ephemeral' } },
611
+ {
612
+ type: 'text',
613
+ text: content,
614
+ cache_control: buildAnthropicCacheControl(ttl),
615
+ },
533
616
  ] as unknown as MessageContentComplex[];
534
617
  cachePointsAdded++;
535
618
  } else {
@@ -547,11 +630,12 @@ function addCacheControlToRecentMessages<
547
630
 
548
631
  export function addCacheControlToStablePrefixMessages<
549
632
  T extends AnthropicMessage | BaseMessage,
550
- >(messages: T[], maxCachePoints: number): T[] {
633
+ >(messages: T[], maxCachePoints: number, ttl?: PromptCacheTtl): T[] {
551
634
  const assistantMarked = addCacheControlToRecentMessages(
552
635
  messages,
553
636
  maxCachePoints,
554
- isAssistantConversationMessage
637
+ isAssistantConversationMessage,
638
+ ttl
555
639
  );
556
640
 
557
641
  if (assistantMarked.some(hasCacheMarker)) {
@@ -561,7 +645,8 @@ export function addCacheControlToStablePrefixMessages<
561
645
  return addCacheControlToRecentMessages(
562
646
  messages,
563
647
  maxCachePoints,
564
- isCacheableConversationMessage
648
+ isCacheableConversationMessage,
649
+ ttl
565
650
  );
566
651
  }
567
652
 
@@ -664,7 +749,7 @@ export function stripBedrockCacheControl<T extends MessageWithContent>(
664
749
  */
665
750
  export function addBedrockCacheControl<
666
751
  T extends MessageWithContent & { getType?: () => string; role?: string },
667
- >(messages: T[]): T[] {
752
+ >(messages: T[], ttl?: PromptCacheTtl): T[] {
668
753
  if (!Array.isArray(messages) || messages.length === 0) {
669
754
  return messages;
670
755
  }
@@ -687,7 +772,7 @@ export function addBedrockCacheControl<
687
772
  const isSystemMessage =
688
773
  messageType === 'system' || messageRole === 'system';
689
774
  if (isSystemMessage) {
690
- updatedMessages[i] = sanitizeBedrockSystemMessage(originalMessage);
775
+ updatedMessages[i] = sanitizeBedrockSystemMessage(originalMessage, ttl);
691
776
  continue;
692
777
  }
693
778
 
@@ -750,14 +835,14 @@ export function addBedrockCacheControl<
750
835
  // Skip if no cacheable text content exists (whitespace-only messages).
751
836
  if (needsCacheAdd && lastNonEmptyTextIndex >= 0) {
752
837
  workingContent.splice(lastNonEmptyTextIndex + 1, 0, {
753
- cachePoint: { type: 'default' },
838
+ cachePoint: buildBedrockCachePoint(ttl),
754
839
  } as MessageContentComplex);
755
840
  cachePointsAdded++;
756
841
  }
757
842
  } else if (typeof content === 'string' && needsCacheAdd) {
758
843
  workingContent = [
759
844
  { type: ContentTypes.TEXT, text: content },
760
- { cachePoint: { type: 'default' } } as MessageContentComplex,
845
+ { cachePoint: buildBedrockCachePoint(ttl) } as MessageContentComplex,
761
846
  ];
762
847
  cachePointsAdded++;
763
848
  } else if (typeof content === 'string' && hasSerializationProps) {
@@ -791,7 +876,7 @@ export function addBedrockCacheControl<
791
876
  */
792
877
  export function addBedrockTailCacheControl<
793
878
  T extends MessageWithContent & { getType?: () => string; role?: string },
794
- >(messages: T[]): T[] {
879
+ >(messages: T[], ttl?: PromptCacheTtl): T[] {
795
880
  if (!Array.isArray(messages) || messages.length === 0) {
796
881
  return messages;
797
882
  }
@@ -814,7 +899,7 @@ export function addBedrockTailCacheControl<
814
899
  const isSystemMessage =
815
900
  messageType === 'system' || messageRole === 'system';
816
901
  if (isSystemMessage) {
817
- updatedMessages[i] = sanitizeBedrockSystemMessage(originalMessage);
902
+ updatedMessages[i] = sanitizeBedrockSystemMessage(originalMessage, ttl);
818
903
  continue;
819
904
  }
820
905
 
@@ -869,7 +954,7 @@ export function addBedrockTailCacheControl<
869
954
 
870
955
  if (canPlaceCachePoint && lastNonEmptyTextIndex >= 0) {
871
956
  workingContent.splice(lastNonEmptyTextIndex + 1, 0, {
872
- cachePoint: { type: 'default' },
957
+ cachePoint: buildBedrockCachePoint(ttl),
873
958
  } as MessageContentComplex);
874
959
  cachePointPlaced = true;
875
960
  modified = true;
@@ -877,7 +962,7 @@ export function addBedrockTailCacheControl<
877
962
  } else if (typeof content === 'string' && canPlaceCachePoint) {
878
963
  workingContent = [
879
964
  { type: ContentTypes.TEXT, text: content },
880
- { cachePoint: { type: 'default' } } as MessageContentComplex,
965
+ { cachePoint: buildBedrockCachePoint(ttl) } as MessageContentComplex,
881
966
  ];
882
967
  cachePointPlaced = true;
883
968
  } else if (typeof content === 'string' && hasSerializationProps) {
@@ -10,6 +10,11 @@ import type { AgentContext } from '@/agents/AgentContext';
10
10
  import type { HookRegistry } from '@/hooks';
11
11
  import type { OnChunk } from '@/llm/invoke';
12
12
  import type * as t from '@/types';
13
+ import {
14
+ addTailCacheControl,
15
+ resolvePromptCacheTtl,
16
+ type PromptCacheTtl,
17
+ } from '@/messages/cache';
13
18
  import {
14
19
  Constants,
15
20
  ContentTypes,
@@ -22,7 +27,6 @@ import { attemptInvoke, tryFallbackProviders } from '@/llm/invoke';
22
27
  import { createRemoveAllMessage } from '@/messages/reducer';
23
28
  import { splitAtRecencyBoundary } from '@/messages/recency';
24
29
  import { getMaxOutputTokensKey } from '@/llm/request';
25
- import { addTailCacheControl } from '@/messages/cache';
26
30
  import { initializeModel } from '@/llm/init';
27
31
  import { getChunkContent } from '@/stream';
28
32
  import { executeHooks } from '@/hooks';
@@ -552,6 +556,17 @@ async function executeSummarizationWithFallback(params: {
552
556
  provider: clientConfig.provider as Providers,
553
557
  reasoningKey: agentContext.reasoningKey,
554
558
  usePromptCache,
559
+ promptCacheTtl:
560
+ (clientConfig.provider as Providers) === Providers.ANTHROPIC ||
561
+ (clientConfig.provider as Providers) === Providers.OPENROUTER
562
+ ? resolvePromptCacheTtl(
563
+ (
564
+ clientConfig.clientOptions as {
565
+ promptCacheTtl?: PromptCacheTtl;
566
+ }
567
+ ).promptCacheTtl
568
+ )
569
+ : undefined,
555
570
  log,
556
571
  });
557
572
  summaryText = result.text;
@@ -1205,6 +1220,7 @@ async function summarizeWithCacheHit({
1205
1220
  provider,
1206
1221
  reasoningKey,
1207
1222
  usePromptCache,
1223
+ promptCacheTtl,
1208
1224
  log,
1209
1225
  }: {
1210
1226
  model: t.ChatModel;
@@ -1217,6 +1233,7 @@ async function summarizeWithCacheHit({
1217
1233
  provider: Providers;
1218
1234
  reasoningKey?: 'reasoning_content' | 'reasoning';
1219
1235
  usePromptCache?: boolean;
1236
+ promptCacheTtl?: PromptCacheTtl;
1220
1237
  log?: LogFn;
1221
1238
  }): Promise<{ text: string; usage?: Partial<UsageMetadata> }> {
1222
1239
  const instruction = buildSummarizationInstruction(
@@ -1227,7 +1244,9 @@ async function summarizeWithCacheHit({
1227
1244
 
1228
1245
  const fullMessages = [...messages, new HumanMessage(instruction)];
1229
1246
  const invokeMessages =
1230
- usePromptCache === true ? addTailCacheControl(fullMessages) : fullMessages;
1247
+ usePromptCache === true
1248
+ ? addTailCacheControl(fullMessages, promptCacheTtl)
1249
+ : fullMessages;
1231
1250
 
1232
1251
  const result = await attemptInvoke(
1233
1252
  {
package/src/types/llm.ts CHANGED
@@ -22,6 +22,7 @@ import type { Runnable } from '@langchain/core/runnables';
22
22
  import type { OpenAI as OpenAIClient } from 'openai';
23
23
  import type { ChatXAIInput } from '@langchain/xai';
24
24
  import type { ChatOpenRouterCallOptions } from '@/llm/openrouter';
25
+ import type { PromptCacheTtl } from '@/messages/cache';
25
26
  import {
26
27
  AzureChatOpenAI,
27
28
  ChatDeepSeek,
@@ -76,6 +77,12 @@ export type OpenAIClientOptions = ChatOpenAIFields;
76
77
  export type AnthropicClientOptions = Omit<AnthropicInput, 'thinking'> & {
77
78
  thinking?: ThinkingConfig;
78
79
  promptCache?: boolean;
80
+ /**
81
+ * Prompt-cache breakpoint TTL. Defaults to `'1h'` (extended cache) when
82
+ * `promptCache` is enabled; set `'5m'` to opt back into the legacy
83
+ * 5-minute behavior.
84
+ */
85
+ promptCacheTtl?: PromptCacheTtl;
79
86
  };
80
87
  export type MistralAIClientOptions = ChatMistralAIInput;
81
88
  export type VertexAIClientOptions = ChatVertexAIInput & {
@@ -86,6 +93,13 @@ export type BedrockAnthropicInput = ChatBedrockConverseInput & {
86
93
  additionalModelRequestFields?: ChatBedrockConverseInput['additionalModelRequestFields'] &
87
94
  AnthropicReasoning;
88
95
  promptCache?: boolean;
96
+ /**
97
+ * Prompt-cache checkpoint TTL. Defaults to `'1h'` (extended cache) when
98
+ * `promptCache` is enabled; set `'5m'` to opt into the legacy 5-minute
99
+ * behavior. Bedrock models that don't support the 1-hour TTL downgrade to 5m
100
+ * server-side, so the default is safe to leave on.
101
+ */
102
+ promptCacheTtl?: PromptCacheTtl;
89
103
  };
90
104
  export type BedrockConverseClientOptions = BedrockAnthropicInput;
91
105
  export type BedrockAnthropicClientOptions = BedrockAnthropicInput;