@librechat/agents 3.2.37 → 3.2.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +25 -8
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +7 -4
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +20 -4
  6. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  7. package/dist/cjs/llm/bedrock/index.cjs +7 -1
  8. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  9. package/dist/cjs/llm/bedrock/toolCache.cjs +5 -4
  10. package/dist/cjs/llm/bedrock/toolCache.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +34 -17
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/openrouter/index.cjs +1 -0
  14. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  15. package/dist/cjs/llm/openrouter/toolCache.cjs +18 -5
  16. package/dist/cjs/llm/openrouter/toolCache.cjs.map +1 -1
  17. package/dist/cjs/main.cjs +4 -0
  18. package/dist/cjs/messages/anthropicToolCache.cjs +75 -13
  19. package/dist/cjs/messages/anthropicToolCache.cjs.map +1 -1
  20. package/dist/cjs/messages/cache.cjs +91 -35
  21. package/dist/cjs/messages/cache.cjs.map +1 -1
  22. package/dist/cjs/summarization/node.cjs +3 -2
  23. package/dist/cjs/summarization/node.cjs.map +1 -1
  24. package/dist/esm/agents/AgentContext.mjs +26 -9
  25. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  26. package/dist/esm/graphs/Graph.mjs +8 -5
  27. package/dist/esm/graphs/Graph.mjs.map +1 -1
  28. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +20 -4
  29. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  30. package/dist/esm/llm/bedrock/index.mjs +7 -1
  31. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  32. package/dist/esm/llm/bedrock/toolCache.mjs +5 -4
  33. package/dist/esm/llm/bedrock/toolCache.mjs.map +1 -1
  34. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +34 -17
  35. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  36. package/dist/esm/llm/openrouter/index.mjs +1 -0
  37. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  38. package/dist/esm/llm/openrouter/toolCache.mjs +18 -5
  39. package/dist/esm/llm/openrouter/toolCache.mjs.map +1 -1
  40. package/dist/esm/main.mjs +2 -2
  41. package/dist/esm/messages/anthropicToolCache.mjs +75 -13
  42. package/dist/esm/messages/anthropicToolCache.mjs.map +1 -1
  43. package/dist/esm/messages/cache.mjs +88 -36
  44. package/dist/esm/messages/cache.mjs.map +1 -1
  45. package/dist/esm/summarization/node.mjs +4 -3
  46. package/dist/esm/summarization/node.mjs.map +1 -1
  47. package/dist/types/agents/AgentContext.d.ts +11 -0
  48. package/dist/types/agents/__tests__/promptCacheLiveHelpers.d.ts +2 -0
  49. package/dist/types/llm/bedrock/index.d.ts +13 -0
  50. package/dist/types/llm/bedrock/toolCache.d.ts +2 -1
  51. package/dist/types/llm/openrouter/index.d.ts +8 -0
  52. package/dist/types/llm/openrouter/toolCache.d.ts +2 -1
  53. package/dist/types/messages/anthropicToolCache.d.ts +2 -1
  54. package/dist/types/messages/cache.d.ts +49 -5
  55. package/dist/types/types/llm.d.ts +14 -0
  56. package/package.json +7 -5
  57. package/src/agents/AgentContext.ts +64 -17
  58. package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +6 -2
  59. package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +7 -5
  60. package/src/agents/__tests__/AgentContext.openrouter.live.test.ts +1 -1
  61. package/src/agents/__tests__/AgentContext.test.ts +31 -19
  62. package/src/agents/__tests__/promptCacheLiveHelpers.ts +6 -2
  63. package/src/graphs/Graph.ts +40 -4
  64. package/src/llm/anthropic/utils/message_inputs.ts +33 -6
  65. package/src/llm/bedrock/index.ts +21 -1
  66. package/src/llm/bedrock/llm.spec.ts +61 -0
  67. package/src/llm/bedrock/toolCache.test.ts +24 -0
  68. package/src/llm/bedrock/toolCache.ts +12 -7
  69. package/src/llm/bedrock/utils/message_inputs.ts +57 -40
  70. package/src/llm/openrouter/index.ts +9 -0
  71. package/src/llm/openrouter/toolCache.test.ts +52 -1
  72. package/src/llm/openrouter/toolCache.ts +40 -6
  73. package/src/messages/__tests__/anthropicToolCache.test.ts +168 -0
  74. package/src/messages/anthropicToolCache.ts +118 -15
  75. package/src/messages/cache.test.ts +175 -0
  76. package/src/messages/cache.ts +133 -48
  77. package/src/summarization/node.ts +21 -2
  78. package/src/types/llm.ts +14 -0
@@ -8,6 +8,15 @@ import type {
8
8
  } from '@langchain/core/messages';
9
9
  import type { RunnableConfig, Runnable } from '@langchain/core/runnables';
10
10
  import type * as t from '@/types';
11
+ import {
12
+ addTailCacheControl,
13
+ addCacheControlToStablePrefixMessages,
14
+ buildAnthropicCacheControl,
15
+ buildBedrockCachePoint,
16
+ resolvePromptCacheTtl,
17
+ cloneMessage,
18
+ type PromptCacheTtl,
19
+ } from '@/messages/cache';
11
20
  import {
12
21
  ANTHROPIC_TOOL_TOKEN_MULTIPLIER,
13
22
  DEFAULT_TOOL_TOKEN_MULTIPLIER,
@@ -15,30 +24,25 @@ import {
15
24
  Constants,
16
25
  Providers,
17
26
  } from '@/common';
18
- import {
19
- addTailCacheControl,
20
- addCacheControlToStablePrefixMessages,
21
- cloneMessage,
22
- } from '@/messages/cache';
23
- import { createSchemaOnlyTools } from '@/tools/schema';
24
- import { apportionTokenCounts } from '@/utils/tokens';
25
27
  import {
26
28
  DEFAULT_RESERVE_RATIO,
27
29
  createPruneMessages,
28
30
  syncBudgetDerivedFields,
29
31
  } from '@/messages';
32
+ import { createSchemaOnlyTools } from '@/tools/schema';
33
+ import { apportionTokenCounts } from '@/utils/tokens';
30
34
  import { isThinkingEnabled } from '@/llm/request';
31
35
  import { toJsonSchema } from '@/utils/schema';
32
36
 
33
37
  type AgentSystemTextBlock = {
34
38
  type: 'text';
35
39
  text: string;
36
- cache_control?: { type: 'ephemeral' };
40
+ cache_control?: { type: 'ephemeral'; ttl?: '1h' };
37
41
  };
38
42
 
39
43
  type AgentSystemContentBlock =
40
44
  | AgentSystemTextBlock
41
- | { cachePoint: { type: 'default' } };
45
+ | { cachePoint: { type: 'default'; ttl?: '1h' } };
42
46
 
43
47
  type PromptCacheProvider = Providers.ANTHROPIC | Providers.OPENROUTER;
44
48
 
@@ -689,7 +693,10 @@ export class AgentContext {
689
693
  dynamicTail.length === 0 &&
690
694
  body.length >= 2
691
695
  ) {
692
- body = addTailCacheControl(body);
696
+ body = addTailCacheControl(
697
+ body,
698
+ this.getPromptCacheTtl(promptCacheProvider)
699
+ );
693
700
  }
694
701
  return [...prefix, ...body];
695
702
  }).withConfig({ runName: 'prompt' });
@@ -713,7 +720,9 @@ export class AgentContext {
713
720
  {
714
721
  type: 'text',
715
722
  text: wrappedSummary,
716
- cache_control: { type: 'ephemeral' },
723
+ cache_control: buildAnthropicCacheControl(
724
+ this.getPromptCacheTtl(Providers.ANTHROPIC)
725
+ ),
717
726
  },
718
727
  ],
719
728
  });
@@ -760,7 +769,10 @@ export class AgentContext {
760
769
  );
761
770
  const stablePrefix = messages.slice(0, tailIndex);
762
771
  const trailingMessages = messages.slice(tailIndex);
763
- const cacheablePrefix = this.addStablePromptCacheMarkers(stablePrefix);
772
+ const cacheablePrefix = this.addStablePromptCacheMarkers(
773
+ stablePrefix,
774
+ this.getPromptCacheTtl(promptCacheProvider)
775
+ );
764
776
 
765
777
  return [...cacheablePrefix, ...tail, ...trailingMessages];
766
778
  }
@@ -791,14 +803,17 @@ export class AgentContext {
791
803
  return messages.length;
792
804
  }
793
805
 
794
- private addStablePromptCacheMarkers(messages: BaseMessage[]): BaseMessage[] {
806
+ private addStablePromptCacheMarkers(
807
+ messages: BaseMessage[],
808
+ ttl?: PromptCacheTtl
809
+ ): BaseMessage[] {
795
810
  if (messages.length <= 1) {
796
811
  return messages;
797
812
  }
798
813
 
799
814
  return [
800
815
  messages[0],
801
- ...addCacheControlToStablePrefixMessages(messages.slice(1), 2),
816
+ ...addCacheControlToStablePrefixMessages(messages.slice(1), 2, ttl),
802
817
  ];
803
818
  }
804
819
 
@@ -834,6 +849,34 @@ export class AgentContext {
834
849
  return bedrockOptions?.promptCache === true;
835
850
  }
836
851
 
852
+ /**
853
+ * Resolved TTL for the active prompt-cache provider (Anthropic or OpenRouter).
854
+ * Both expose `promptCacheTtl` and use the Anthropic `cache_control` format, so
855
+ * the configured value resolves the same way (default `'1h'` extended cache).
856
+ */
857
+ private getPromptCacheTtl(
858
+ provider: PromptCacheProvider | undefined
859
+ ): PromptCacheTtl | undefined {
860
+ if (provider == null) {
861
+ return undefined;
862
+ }
863
+ return resolvePromptCacheTtl(
864
+ (this.clientOptions as { promptCacheTtl?: PromptCacheTtl } | undefined)
865
+ ?.promptCacheTtl
866
+ );
867
+ }
868
+
869
+ /**
870
+ * Resolved TTL for Bedrock prompt-cache checkpoints (default `'1h'`).
871
+ * Models that don't support the 1-hour TTL downgrade to 5m server-side.
872
+ */
873
+ private getBedrockPromptCacheTtl(): PromptCacheTtl {
874
+ const bedrockOptions = this.clientOptions as
875
+ | t.BedrockAnthropicClientOptions
876
+ | undefined;
877
+ return resolvePromptCacheTtl(bedrockOptions?.promptCacheTtl);
878
+ }
879
+
837
880
  private buildSystemMessage({
838
881
  stableInstructions,
839
882
  dynamicInstructions,
@@ -855,7 +898,9 @@ export class AgentContext {
855
898
  content.push({
856
899
  type: 'text',
857
900
  text: stableInstructions,
858
- cache_control: { type: 'ephemeral' },
901
+ cache_control: buildAnthropicCacheControl(
902
+ this.getPromptCacheTtl(promptCacheProvider)
903
+ ),
859
904
  });
860
905
  }
861
906
  if (dynamicInstructions && !shouldMoveDynamicInstructions) {
@@ -874,7 +919,9 @@ export class AgentContext {
874
919
  {
875
920
  type: 'text',
876
921
  text: stableInstructions,
877
- cache_control: { type: 'ephemeral' },
922
+ cache_control: buildAnthropicCacheControl(
923
+ this.getPromptCacheTtl(promptCacheProvider)
924
+ ),
878
925
  },
879
926
  ],
880
927
  } as BaseMessageFields);
@@ -883,7 +930,7 @@ export class AgentContext {
883
930
  if (this.hasBedrockPromptCache() && stableInstructions) {
884
931
  const content: AgentSystemContentBlock[] = [
885
932
  { type: 'text', text: stableInstructions },
886
- { cachePoint: { type: 'default' } },
933
+ { cachePoint: buildBedrockCachePoint(this.getBedrockPromptCacheTtl()) },
887
934
  ];
888
935
  if (dynamicInstructions) {
889
936
  content.push({ type: 'text', text: dynamicInstructions });
@@ -187,7 +187,11 @@ function addLatestUserOnlyAnthropicCacheControl(
187
187
  if (!modified) {
188
188
  continue;
189
189
  }
190
- } else if (typeof content === 'string' && content.trim() !== '' && canAddCache) {
190
+ } else if (
191
+ typeof content === 'string' &&
192
+ content.trim() !== '' &&
193
+ canAddCache
194
+ ) {
191
195
  workingContent = [
192
196
  {
193
197
  type: 'text',
@@ -348,7 +352,7 @@ describeIfLive('AgentContext Anthropic prompt cache live API', () => {
348
352
  {
349
353
  type: 'text',
350
354
  text: stableInstructions,
351
- cache_control: { type: 'ephemeral' },
355
+ cache_control: { type: 'ephemeral', ttl: '1h' },
352
356
  },
353
357
  ],
354
358
  });
@@ -279,7 +279,11 @@ function addLatestUserOnlyBedrockCacheControl(
279
279
  if (!modified) {
280
280
  continue;
281
281
  }
282
- } else if (typeof content === 'string' && content.trim() !== '' && canAddCache) {
282
+ } else if (
283
+ typeof content === 'string' &&
284
+ content.trim() !== '' &&
285
+ canAddCache
286
+ ) {
283
287
  workingContent = [
284
288
  { type: 'text', text: content } as MessageContentComplex,
285
289
  cachePointBlock(),
@@ -490,7 +494,7 @@ describeIfLive('AgentContext Bedrock prompt cache live API', () => {
490
494
  text: stableInstructions,
491
495
  },
492
496
  {
493
- cachePoint: { type: 'default' },
497
+ cachePoint: { type: 'default', ttl: '1h' },
494
498
  },
495
499
  {
496
500
  type: 'text',
@@ -643,9 +647,7 @@ describeIfLive('AgentContext Bedrock prompt cache live API', () => {
643
647
  })}\n`
644
648
  );
645
649
 
646
- expect(currentSecond.cacheRead).toBeGreaterThan(
647
- latestOnlySecond.cacheRead
648
- );
650
+ expect(currentSecond.cacheRead).toBeGreaterThan(latestOnlySecond.cacheRead);
649
651
  expect(currentSecond.cacheCreation).toBeLessThan(
650
652
  latestOnlySecond.cacheCreation
651
653
  );
@@ -93,7 +93,7 @@ describeIfLive('AgentContext OpenRouter prompt cache live API', () => {
93
93
  {
94
94
  type: 'text',
95
95
  text: stableInstructions,
96
- cache_control: { type: 'ephemeral' },
96
+ cache_control: { type: 'ephemeral', ttl: '1h' },
97
97
  },
98
98
  ],
99
99
  });
@@ -7,8 +7,12 @@ import { AgentContext } from '../AgentContext';
7
7
 
8
8
  describe('AgentContext', () => {
9
9
  type TestSystemContentBlock =
10
- | { type: 'text'; text: string; cache_control?: { type: 'ephemeral' } }
11
- | { cachePoint: { type: 'default' } };
10
+ | {
11
+ type: 'text';
12
+ text: string;
13
+ cache_control?: { type: 'ephemeral'; ttl?: '1h' };
14
+ }
15
+ | { cachePoint: { type: 'default'; ttl?: '1h' } };
12
16
 
13
17
  type ContextOptions = {
14
18
  agentConfig?: Partial<t.AgentInputs>;
@@ -98,7 +102,7 @@ describe('AgentContext', () => {
98
102
  {
99
103
  type: 'text',
100
104
  text: 'Stable instructions',
101
- cache_control: { type: 'ephemeral' },
105
+ cache_control: { type: 'ephemeral', ttl: '1h' },
102
106
  },
103
107
  ]);
104
108
  expect(result[1].content).toBe('Hello');
@@ -164,7 +168,7 @@ describe('AgentContext', () => {
164
168
  agentConfig: {
165
169
  provider: Providers.BEDROCK,
166
170
  clientOptions: {
167
- model: 'anthropic.claude-3-5-sonnet',
171
+ model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
168
172
  promptCache: true,
169
173
  },
170
174
  instructions: 'Stable instructions',
@@ -176,7 +180,7 @@ describe('AgentContext', () => {
176
180
  const content = result[0].content as TestSystemContentBlock[];
177
181
  expect(content).toEqual([
178
182
  { type: 'text', text: 'Stable instructions' },
179
- { cachePoint: { type: 'default' } },
183
+ { cachePoint: { type: 'default', ttl: '1h' } },
180
184
  { type: 'text', text: 'Dynamic instructions' },
181
185
  ]);
182
186
  });
@@ -186,7 +190,7 @@ describe('AgentContext', () => {
186
190
  agentConfig: {
187
191
  provider: Providers.BEDROCK,
188
192
  clientOptions: {
189
- model: 'anthropic.claude-3-5-sonnet',
193
+ model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
190
194
  promptCache: true,
191
195
  },
192
196
  instructions: undefined,
@@ -240,7 +244,7 @@ describe('AgentContext', () => {
240
244
  {
241
245
  type: 'text',
242
246
  text: 'Stable instructions',
243
- cache_control: { type: 'ephemeral' },
247
+ cache_control: { type: 'ephemeral', ttl: '1h' },
244
248
  },
245
249
  ]);
246
250
  expect(result[1].content).toBe('Hello');
@@ -730,7 +734,7 @@ describe('AgentContext', () => {
730
734
  agentConfig: {
731
735
  provider: Providers.BEDROCK,
732
736
  clientOptions: {
733
- model: 'anthropic.claude-3-5-sonnet',
737
+ model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
734
738
  promptCache: true,
735
739
  },
736
740
  instructions: 'Stable instructions',
@@ -741,10 +745,12 @@ describe('AgentContext', () => {
741
745
  const result = await ctx.systemRunnable!.invoke([
742
746
  new HumanMessage('Hello'),
743
747
  ]);
744
- const finalMessages = addBedrockCacheControl(result);
748
+ // The graph applies the same resolved TTL it stamped on the system
749
+ // checkpoint, so the 1h system cachePoint is preserved (normalized to 1h).
750
+ const finalMessages = addBedrockCacheControl(result, '1h');
745
751
  expect(finalMessages[0].content).toEqual([
746
752
  { type: 'text', text: 'Stable instructions' },
747
- { cachePoint: { type: 'default' } },
753
+ { cachePoint: { type: 'default', ttl: '1h' } },
748
754
  { type: 'text', text: 'Dynamic instructions' },
749
755
  ]);
750
756
  });
@@ -2154,7 +2160,7 @@ describe('AgentContext', () => {
2154
2160
  const buildBranch = (
2155
2161
  maxContextTokens: number,
2156
2162
  perMessageTokens: number,
2157
- count: number,
2163
+ count: number
2158
2164
  ): { ctx: AgentContext; messages: AIMessage[] } => {
2159
2165
  const ctx = createBasicContext({ tokenCounter: countByChars });
2160
2166
  ctx.maxContextTokens = maxContextTokens;
@@ -2166,7 +2172,7 @@ describe('AgentContext', () => {
2166
2172
  messages.push(
2167
2173
  i % 2 === 0
2168
2174
  ? (new HumanMessage(content) as unknown as AIMessage)
2169
- : new AIMessage(content),
2175
+ : new AIMessage(content)
2170
2176
  );
2171
2177
  }
2172
2178
  return { ctx, messages };
@@ -2175,7 +2181,9 @@ describe('AgentContext', () => {
2175
2181
  it('returns null without a tokenizer or a window', () => {
2176
2182
  const noCounter = createBasicContext({});
2177
2183
  noCounter.maxContextTokens = 1000;
2178
- expect(noCounter.projectContextUsage([new HumanMessage('hi')])).toBeNull();
2184
+ expect(
2185
+ noCounter.projectContextUsage([new HumanMessage('hi')])
2186
+ ).toBeNull();
2179
2187
 
2180
2188
  const noWindow = createBasicContext({ tokenCounter: countByChars });
2181
2189
  noWindow.maxContextTokens = undefined;
@@ -2207,7 +2215,9 @@ describe('AgentContext', () => {
2207
2215
  expect(usage!.remainingContextTokens).toBeGreaterThanOrEqual(0);
2208
2216
 
2209
2217
  const max = usage!.contextBudget ?? usage!.breakdown.maxContextTokens;
2210
- expect(max - (usage!.remainingContextTokens ?? 0)).toBeLessThanOrEqual(max);
2218
+ expect(max - (usage!.remainingContextTokens ?? 0)).toBeLessThanOrEqual(
2219
+ max
2220
+ );
2211
2221
  });
2212
2222
 
2213
2223
  it('does not mutate the context (local pruner, no field writes)', () => {
@@ -2245,7 +2255,7 @@ describe('AgentContext', () => {
2245
2255
 
2246
2256
  expect(messages[2]).toBe(originalRef);
2247
2257
  expect((messages[2] as unknown as ToolMessage).content).toBe(
2248
- originalContent,
2258
+ originalContent
2249
2259
  );
2250
2260
  });
2251
2261
 
@@ -2257,7 +2267,9 @@ describe('AgentContext', () => {
2257
2267
  ctx.indexTokenCountMap = {};
2258
2268
  const messages: AIMessage[] = [];
2259
2269
  for (let i = 0; i < 6; i++) {
2260
- messages.push(new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage);
2270
+ messages.push(
2271
+ new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage
2272
+ );
2261
2273
  }
2262
2274
 
2263
2275
  const usage = ctx.projectContextUsage(messages);
@@ -2285,7 +2297,7 @@ describe('AgentContext', () => {
2285
2297
  const ctx = createBasicContext({ tokenCounter: countByChars });
2286
2298
  ctx.maxContextTokens = 5_000;
2287
2299
  const messages: AIMessage[] = [0, 1, 2].map(
2288
- () => new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage,
2300
+ () => new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage
2289
2301
  );
2290
2302
  return { ctx, messages };
2291
2303
  };
@@ -2303,7 +2315,7 @@ describe('AgentContext', () => {
2303
2315
  const dirtyUsage = dirty.ctx.projectContextUsage(dirty.messages);
2304
2316
 
2305
2317
  expect(dirtyUsage!.remainingContextTokens).toBe(
2306
- cleanUsage!.remainingContextTokens,
2318
+ cleanUsage!.remainingContextTokens
2307
2319
  );
2308
2320
  expect(dirtyUsage!.calibrationRatio).toBe(cleanUsage!.calibrationRatio);
2309
2321
  });
@@ -2350,7 +2362,7 @@ describe('AgentContext', () => {
2350
2362
 
2351
2363
  expect(scaledUsage!.calibrationRatio).toBe(3);
2352
2364
  expect(scaledUsage!.remainingContextTokens).not.toBe(
2353
- baseUsage!.remainingContextTokens,
2365
+ baseUsage!.remainingContextTokens
2354
2366
  );
2355
2367
  });
2356
2368
 
@@ -15,8 +15,12 @@ type LivePromptCacheProvider =
15
15
  | Providers.OPENROUTER;
16
16
 
17
17
  type PromptCacheExpectedSystemBlock =
18
- | { type: 'text'; text: string; cache_control?: { type: 'ephemeral' } }
19
- | { cachePoint: { type: 'default' } };
18
+ | {
19
+ type: 'text';
20
+ text: string;
21
+ cache_control?: { type: 'ephemeral'; ttl?: '1h' };
22
+ }
23
+ | { cachePoint: { type: 'default'; ttl?: '1h' } };
20
24
 
21
25
  type LivePromptCacheClientOptions =
22
26
  | t.ClientOptions
@@ -27,6 +27,7 @@ import {
27
27
  createPruneMessages,
28
28
  syncBudgetDerivedFields,
29
29
  addTailCacheControl,
30
+ resolvePromptCacheTtl,
30
31
  getMessageId,
31
32
  makeIsDeferred,
32
33
  partitionAndMarkAnthropicToolCache,
@@ -1404,7 +1405,14 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1404
1405
  toolsForBinding =
1405
1406
  partitionAndMarkAnthropicToolCache(
1406
1407
  rawToolsForBinding,
1407
- makeIsDeferred(agentContext.toolDefinitions)
1408
+ makeIsDeferred(agentContext.toolDefinitions),
1409
+ resolvePromptCacheTtl(
1410
+ (
1411
+ agentContext.clientOptions as
1412
+ | t.AnthropicClientOptions
1413
+ | undefined
1414
+ )?.promptCacheTtl
1415
+ )
1408
1416
  ) ?? rawToolsForBinding;
1409
1417
  } else if (
1410
1418
  agentContext.provider === Providers.OPENROUTER &&
@@ -1417,7 +1425,14 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1417
1425
  toolsForBinding =
1418
1426
  partitionAndMarkOpenRouterToolCache(
1419
1427
  rawToolsForBinding,
1420
- makeIsDeferred(agentContext.toolDefinitions)
1428
+ makeIsDeferred(agentContext.toolDefinitions),
1429
+ resolvePromptCacheTtl(
1430
+ (
1431
+ agentContext.clientOptions as
1432
+ | t.ProviderOptionsMap[Providers.OPENROUTER]
1433
+ | undefined
1434
+ )?.promptCacheTtl
1435
+ )
1421
1436
  ) ?? rawToolsForBinding;
1422
1437
  } else if (
1423
1438
  agentContext.provider === Providers.BEDROCK &&
@@ -1833,9 +1848,30 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1833
1848
  (anthropicPromptCacheEnabled || openRouterPromptCacheEnabled) &&
1834
1849
  !agentContext.systemRunnable
1835
1850
  ) {
1836
- finalMessages = addTailCacheControl<BaseMessage>(finalMessages);
1851
+ finalMessages = addTailCacheControl<BaseMessage>(
1852
+ finalMessages,
1853
+ resolvePromptCacheTtl(
1854
+ anthropicPromptCacheEnabled
1855
+ ? (
1856
+ agentContext.clientOptions as
1857
+ | t.AnthropicClientOptions
1858
+ | undefined
1859
+ )?.promptCacheTtl
1860
+ : (
1861
+ agentContext.clientOptions as
1862
+ | t.ProviderOptionsMap[Providers.OPENROUTER]
1863
+ | undefined
1864
+ )?.promptCacheTtl
1865
+ )
1866
+ );
1837
1867
  } else if (bedrockPromptCacheEnabled) {
1838
- finalMessages = addBedrockTailCacheControl<BaseMessage>(finalMessages);
1868
+ const bedrockOptions = agentContext.clientOptions as
1869
+ | t.BedrockAnthropicClientOptions
1870
+ | undefined;
1871
+ finalMessages = addBedrockTailCacheControl<BaseMessage>(
1872
+ finalMessages,
1873
+ resolvePromptCacheTtl(bedrockOptions?.promptCacheTtl)
1874
+ );
1839
1875
  }
1840
1876
 
1841
1877
  if (
@@ -978,11 +978,16 @@ const NON_CACHEABLE_PAYLOAD_BLOCK_TYPES = new Set([
978
978
  * skipped. Returns a new array only when it actually places a marker.
979
979
  */
980
980
  function reanchorTailCacheControl(
981
- messages: AnthropicMessageCreateParams['messages']
981
+ messages: AnthropicMessageCreateParams['messages'],
982
+ ttl?: '1h'
982
983
  ): AnthropicMessageCreateParams['messages'] {
983
984
  if (messages.length === 0) {
984
985
  return messages;
985
986
  }
987
+ const cacheControl =
988
+ ttl === '1h'
989
+ ? ({ type: 'ephemeral', ttl: '1h' } as const)
990
+ : ({ type: 'ephemeral' } as const);
986
991
  const lastIndex = messages.length - 1;
987
992
  const tail = messages[lastIndex];
988
993
  const content = tail.content;
@@ -994,9 +999,7 @@ function reanchorTailCacheControl(
994
999
  const next = [...messages];
995
1000
  next[lastIndex] = {
996
1001
  ...tail,
997
- content: [
998
- { type: 'text', text: content, cache_control: { type: 'ephemeral' } },
999
- ],
1002
+ content: [{ type: 'text', text: content, cache_control: cacheControl }],
1000
1003
  } as (typeof messages)[number];
1001
1004
  return next;
1002
1005
  }
@@ -1027,12 +1030,36 @@ function reanchorTailCacheControl(
1027
1030
  next[lastIndex] = {
1028
1031
  ...tail,
1029
1032
  content: content.map((block, i) =>
1030
- i === anchor ? { ...block, cache_control: { type: 'ephemeral' } } : block
1033
+ i === anchor ? { ...block, cache_control: cacheControl } : block
1031
1034
  ),
1032
1035
  } as (typeof messages)[number];
1033
1036
  return next;
1034
1037
  }
1035
1038
 
1039
+ /**
1040
+ * Find the extended-cache TTL (`'1h'`) carried by an existing `cache_control`
1041
+ * breakpoint, so {@link reanchorTailCacheControl} can re-apply the same TTL the
1042
+ * stripped prefill had. Returns `undefined` for the legacy 5-minute default
1043
+ * (no `ttl`), keeping that path byte-identical to before.
1044
+ */
1045
+ function findCacheControlTtl(
1046
+ messages: AnthropicMessageCreateParams['messages']
1047
+ ): '1h' | undefined {
1048
+ for (const message of messages) {
1049
+ if (!Array.isArray(message.content)) {
1050
+ continue;
1051
+ }
1052
+ for (const block of message.content) {
1053
+ const cacheControl = (block as { cache_control?: { ttl?: unknown } })
1054
+ .cache_control;
1055
+ if (cacheControl?.ttl === '1h') {
1056
+ return '1h';
1057
+ }
1058
+ }
1059
+ }
1060
+ return undefined;
1061
+ }
1062
+
1036
1063
  export function stripUnsupportedAssistantPrefill<
1037
1064
  T extends Pick<AnthropicMessageCreateParams, 'messages'> & { model?: string },
1038
1065
  >(request: T): T {
@@ -1065,7 +1092,7 @@ export function stripUnsupportedAssistantPrefill<
1065
1092
  const reanchored =
1066
1093
  messagesHaveCacheControl(messages) &&
1067
1094
  !messagesHaveCacheControl(nextMessages)
1068
- ? reanchorTailCacheControl(nextMessages)
1095
+ ? reanchorTailCacheControl(nextMessages, findCacheControlTtl(messages))
1069
1096
  : nextMessages;
1070
1097
 
1071
1098
  return {
@@ -39,6 +39,7 @@ import {
39
39
  handleConverseStreamContentBlockDelta,
40
40
  handleConverseStreamMetadata,
41
41
  } from './utils';
42
+ import { resolvePromptCacheTtl, type PromptCacheTtl } from '@/messages/cache';
42
43
  import { insertBedrockToolCachePoint } from './toolCache';
43
44
 
44
45
  /**
@@ -63,6 +64,15 @@ export interface CustomChatBedrockConverseInput
63
64
  */
64
65
  promptCache?: boolean;
65
66
 
67
+ /**
68
+ * Prompt-cache checkpoint TTL. Defaults to `'1h'` (extended cache) when
69
+ * `promptCache` is enabled; set `'5m'` for the legacy 5-minute behavior.
70
+ * Bedrock models that don't support the 1-hour TTL downgrade to 5m
71
+ * server-side (verified on Sonnet/Opus 4.6), so the default is safe to leave
72
+ * on; use `'5m'` for any model that rejects it.
73
+ */
74
+ promptCacheTtl?: PromptCacheTtl;
75
+
66
76
  /**
67
77
  * Guardrail configuration for Converse and ConverseStream invocations.
68
78
  * `streamProcessingMode` is only used by ConverseStream.
@@ -109,6 +119,11 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
109
119
  */
110
120
  promptCache?: boolean;
111
121
 
122
+ /**
123
+ * Prompt-cache checkpoint TTL (`'5m'` legacy or `'1h'` extended cache).
124
+ */
125
+ promptCacheTtl?: PromptCacheTtl;
126
+
112
127
  /**
113
128
  * Application Inference Profile ARN to use instead of model ID.
114
129
  */
@@ -122,6 +137,7 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
122
137
  constructor(fields?: CustomChatBedrockConverseInput) {
123
138
  super(fields);
124
139
  this.promptCache = fields?.promptCache;
140
+ this.promptCacheTtl = fields?.promptCacheTtl;
125
141
  this.applicationInferenceProfile = fields?.applicationInferenceProfile;
126
142
  this.serviceTier = fields?.serviceTier;
127
143
  }
@@ -149,7 +165,11 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
149
165
  const baseParams = super.invocationParams(options);
150
166
  const toolConfig =
151
167
  this.promptCache === true
152
- ? insertBedrockToolCachePoint(baseParams.toolConfig, true)
168
+ ? insertBedrockToolCachePoint(
169
+ baseParams.toolConfig,
170
+ true,
171
+ resolvePromptCacheTtl(this.promptCacheTtl)
172
+ )
153
173
  : baseParams.toolConfig;
154
174
 
155
175
  /** Service tier from options or fall back to class-level setting */
@@ -397,6 +397,67 @@ describe('CustomChatBedrockConverse', () => {
397
397
  ]);
398
398
  });
399
399
 
400
+ test('defaults the tool cache point to the 1h extended TTL', () => {
401
+ const model = new CustomChatBedrockConverse({
402
+ ...baseConstructorArgs,
403
+ promptCache: true,
404
+ });
405
+
406
+ const params = model.invocationParams({
407
+ tools: [
408
+ {
409
+ type: 'function',
410
+ function: {
411
+ name: 'direct_tool',
412
+ description: 'Direct tool',
413
+ parameters: { type: 'object', properties: {} },
414
+ },
415
+ },
416
+ ],
417
+ });
418
+
419
+ const toolList = (params.toolConfig?.tools ?? []) as unknown as Array<
420
+ Record<string, unknown>
421
+ >;
422
+ const cachePoints = toolList.filter((t) => 'cachePoint' in t);
423
+ expect(cachePoints).toHaveLength(1);
424
+ expect((cachePoints[0] as { cachePoint: unknown }).cachePoint).toEqual({
425
+ type: 'default',
426
+ ttl: '1h',
427
+ });
428
+ });
429
+
430
+ test('honors an explicit 5m promptCacheTtl on the tool cache point', () => {
431
+ const model = new CustomChatBedrockConverse({
432
+ ...baseConstructorArgs,
433
+ promptCache: true,
434
+ promptCacheTtl: '5m',
435
+ });
436
+
437
+ const params = model.invocationParams({
438
+ tools: [
439
+ {
440
+ type: 'function',
441
+ function: {
442
+ name: 'direct_tool',
443
+ description: 'Direct tool',
444
+ parameters: { type: 'object', properties: {} },
445
+ },
446
+ },
447
+ ],
448
+ });
449
+
450
+ const toolList = (params.toolConfig?.tools ?? []) as unknown as Array<
451
+ Record<string, unknown>
452
+ >;
453
+ const cachePoints = toolList.filter((t) => 'cachePoint' in t);
454
+ expect(cachePoints).toHaveLength(1);
455
+ // 5m omits the ttl field (provider default).
456
+ expect((cachePoints[0] as { cachePoint: unknown }).cachePoint).toEqual({
457
+ type: 'default',
458
+ });
459
+ });
460
+
400
461
  test('adds the Bedrock cache point before deferred tools', () => {
401
462
  const model = new CustomChatBedrockConverse({
402
463
  ...baseConstructorArgs,