@librechat/agents 3.2.36 → 3.2.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +1 -1
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +7 -8
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/langfuse.cjs +16 -5
  6. package/dist/cjs/langfuse.cjs.map +1 -1
  7. package/dist/cjs/langfuseToolOutputTracing.cjs +7 -0
  8. package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +92 -3
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +24 -4
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/main.cjs +2 -0
  14. package/dist/cjs/messages/cache.cjs +183 -0
  15. package/dist/cjs/messages/cache.cjs.map +1 -1
  16. package/dist/cjs/summarization/node.cjs +1 -1
  17. package/dist/cjs/summarization/node.cjs.map +1 -1
  18. package/dist/cjs/tools/toolOutputReferences.cjs +28 -14
  19. package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -1
  20. package/dist/esm/agents/AgentContext.mjs +2 -2
  21. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  22. package/dist/esm/graphs/Graph.mjs +8 -9
  23. package/dist/esm/graphs/Graph.mjs.map +1 -1
  24. package/dist/esm/langfuse.mjs +16 -5
  25. package/dist/esm/langfuse.mjs.map +1 -1
  26. package/dist/esm/langfuseToolOutputTracing.mjs +7 -0
  27. package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
  28. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +92 -3
  29. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  30. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +24 -4
  31. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  32. package/dist/esm/main.mjs +2 -2
  33. package/dist/esm/messages/cache.mjs +182 -1
  34. package/dist/esm/messages/cache.mjs.map +1 -1
  35. package/dist/esm/summarization/node.mjs +2 -2
  36. package/dist/esm/summarization/node.mjs.map +1 -1
  37. package/dist/esm/tools/toolOutputReferences.mjs +28 -14
  38. package/dist/esm/tools/toolOutputReferences.mjs.map +1 -1
  39. package/dist/types/messages/cache.d.ts +40 -0
  40. package/dist/types/types/graph.d.ts +2 -0
  41. package/package.json +8 -5
  42. package/src/agents/AgentContext.ts +2 -2
  43. package/src/agents/__tests__/AgentContext.test.ts +3 -9
  44. package/src/graphs/Graph.ts +65 -36
  45. package/src/langfuse.ts +38 -4
  46. package/src/langfuseToolOutputTracing.ts +18 -0
  47. package/src/llm/anthropic/utils/message_inputs.ts +131 -3
  48. package/src/llm/anthropic/utils/stripPrefillCache.test.ts +111 -0
  49. package/src/llm/bedrock/utils/message_inputs.test.ts +129 -0
  50. package/src/llm/bedrock/utils/message_inputs.ts +46 -4
  51. package/src/llm/bedrock/utils/toolResultCachePoint.test.ts +103 -0
  52. package/src/messages/cache.tail.test.ts +340 -0
  53. package/src/messages/cache.ts +266 -0
  54. package/src/messages/tailCacheConversion.test.ts +161 -0
  55. package/src/scripts/bench-prompt-cache.ts +479 -0
  56. package/src/specs/langfuse-config.test.ts +69 -2
  57. package/src/specs/langfuse-metadata.test.ts +44 -0
  58. package/src/specs/langfuse-tool-output-tracing.test.ts +6 -0
  59. package/src/summarization/node.ts +2 -2
  60. package/src/tools/__tests__/annotateMessagesForLLM.test.ts +50 -0
  61. package/src/tools/toolOutputReferences.ts +34 -20
  62. package/src/types/graph.ts +2 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@librechat/agents",
3
- "version": "3.2.36",
3
+ "version": "3.2.38",
4
4
  "main": "./dist/cjs/main.cjs",
5
5
  "module": "./dist/esm/main.mjs",
6
6
  "types": "./dist/types/index.d.ts",
@@ -139,6 +139,7 @@
139
139
  "tool": "node --trace-warnings -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/tools.ts --provider 'bedrock' --name 'Jo' --location 'New York, NY'",
140
140
  "search": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/search.ts --provider 'bedrock' --name 'Jo' --location 'New York, NY'",
141
141
  "tool_search": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/tool_search.ts",
142
+ "bench:cache": "node --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/bench-prompt-cache.ts",
142
143
  "subagent": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/multi-agent-subagent.ts",
143
144
  "subagent:events": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/subagent-event-driven-debug.ts",
144
145
  "subagent:tools": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/subagent-tools-debug.ts",
@@ -211,7 +212,9 @@
211
212
  "uuid": "$uuid",
212
213
  "fast-xml-parser": "5.7.2",
213
214
  "ajv": "6.14.0",
214
- "minimatch": "3.1.4"
215
+ "minimatch": "3.1.4",
216
+ "@opentelemetry/core": "^2.8.0",
217
+ "js-yaml": "^4.2.0"
215
218
  },
216
219
  "dependencies": {
217
220
  "@anthropic-ai/sdk": "^0.92.0",
@@ -229,9 +232,9 @@
229
232
  "@langchain/openai": "1.4.5",
230
233
  "@langchain/textsplitters": "^1.0.1",
231
234
  "@langchain/xai": "^1.3.17",
232
- "@langfuse/langchain": "^5.3.0",
233
- "@langfuse/otel": "^5.3.0",
234
- "@langfuse/tracing": "^5.3.0",
235
+ "@langfuse/langchain": "^5.4.1",
236
+ "@langfuse/otel": "^5.4.1",
237
+ "@langfuse/tracing": "^5.4.1",
235
238
  "@opentelemetry/context-async-hooks": "2.7.1",
236
239
  "@opentelemetry/sdk-node": "^0.218.0",
237
240
  "@scarf/scarf": "^1.4.0",
@@ -16,7 +16,7 @@ import {
16
16
  Providers,
17
17
  } from '@/common';
18
18
  import {
19
- addCacheControl,
19
+ addTailCacheControl,
20
20
  addCacheControlToStablePrefixMessages,
21
21
  cloneMessage,
22
22
  } from '@/messages/cache';
@@ -689,7 +689,7 @@ export class AgentContext {
689
689
  dynamicTail.length === 0 &&
690
690
  body.length >= 2
691
691
  ) {
692
- body = addCacheControl(body);
692
+ body = addTailCacheControl(body);
693
693
  }
694
694
  return [...prefix, ...body];
695
695
  }).withConfig({ runName: 'prompt' });
@@ -274,16 +274,11 @@ describe('AgentContext', () => {
274
274
  new HumanMessage('First'),
275
275
  new HumanMessage('Second'),
276
276
  ]);
277
- const firstContent = result[1].content as TestSystemContentBlock[];
278
277
  const secondContent = result[2].content as TestSystemContentBlock[];
279
278
 
280
279
  expect(result).toHaveLength(3);
281
280
  expect(result[0].content).toBe('Dynamic only');
282
- expect(firstContent[0]).toMatchObject({
283
- type: 'text',
284
- text: 'First',
285
- cache_control: { type: 'ephemeral' },
286
- });
281
+ expect(result[1].content).toBe('First');
287
282
  expect(secondContent[0]).toMatchObject({
288
283
  type: 'text',
289
284
  text: 'Second',
@@ -686,7 +681,7 @@ describe('AgentContext', () => {
686
681
  expect(result[8].content).toBe('Now answer without tools');
687
682
  });
688
683
 
689
- it('adds OpenRouter body cache points when there is no dynamic tail', async () => {
684
+ it('adds a single OpenRouter body cache point on the tail when there is no dynamic tail', async () => {
690
685
  const ctx = createBasicContext({
691
686
  agentConfig: {
692
687
  provider: Providers.OPENROUTER,
@@ -702,9 +697,8 @@ describe('AgentContext', () => {
702
697
  new HumanMessage('First'),
703
698
  new HumanMessage('Second'),
704
699
  ]);
705
- const firstContent = result[1].content as TestSystemContentBlock[];
706
700
  const secondContent = result[2].content as TestSystemContentBlock[];
707
- expect(firstContent[0]).toHaveProperty('cache_control');
701
+ expect(result[1].content).toBe('First');
708
702
  expect(secondContent[0]).toHaveProperty('cache_control');
709
703
  });
710
704
 
@@ -19,14 +19,14 @@ import {
19
19
  convertMessagesToContent,
20
20
  sanitizeOrphanToolBlocks,
21
21
  extractToolDiscoveries,
22
- addBedrockCacheControl,
22
+ addBedrockTailCacheControl,
23
23
  formatArtifactPayload,
24
24
  enforceOriginalContentCap,
25
25
  formatContentStrings,
26
26
  isLegacyConvertible,
27
27
  createPruneMessages,
28
28
  syncBudgetDerivedFields,
29
- addCacheControl,
29
+ addTailCacheControl,
30
30
  getMessageId,
31
31
  makeIsDeferred,
32
32
  partitionAndMarkAnthropicToolCache,
@@ -1733,35 +1733,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1733
1733
  }
1734
1734
  }
1735
1735
 
1736
- if (agentContext.provider === Providers.ANTHROPIC) {
1737
- const anthropicOptions = agentContext.clientOptions as
1738
- | t.AnthropicClientOptions
1739
- | undefined;
1740
- if (
1741
- anthropicOptions?.promptCache === true &&
1742
- !agentContext.systemRunnable
1743
- ) {
1744
- finalMessages = addCacheControl<BaseMessage>(finalMessages);
1745
- }
1746
- } else if (agentContext.provider === Providers.BEDROCK) {
1747
- const bedrockOptions = agentContext.clientOptions as
1748
- | t.BedrockAnthropicClientOptions
1749
- | undefined;
1750
- if (bedrockOptions?.promptCache === true) {
1751
- finalMessages = addBedrockCacheControl<BaseMessage>(finalMessages);
1752
- }
1753
- } else if (agentContext.provider === Providers.OPENROUTER) {
1754
- const openRouterOptions = agentContext.clientOptions as
1755
- | t.ProviderOptionsMap[Providers.OPENROUTER]
1756
- | undefined;
1757
- if (
1758
- openRouterOptions?.promptCache === true &&
1759
- !agentContext.systemRunnable
1760
- ) {
1761
- finalMessages = addCacheControl<BaseMessage>(finalMessages);
1762
- }
1763
- }
1764
-
1765
1736
  if (
1766
1737
  isThinkingEnabled(agentContext.provider, agentContext.clientOptions)
1767
1738
  ) {
@@ -1783,13 +1754,53 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1783
1754
  );
1784
1755
  }
1785
1756
 
1786
- // Intentionally broad: runs when the pruner wasn't used OR any post-pruning
1787
- // transform (addCacheControl, ensureThinkingBlock, etc.) reassigned finalMessages.
1788
- // sanitizeOrphanToolBlocks fast-paths to a Set diff check when no orphans exist,
1789
- // so the cost is negligible and this acts as a safety net for Anthropic/Bedrock.
1757
+ // Determine the prompt-cache strategy up front. Two distinct facts:
1758
+ //
1759
+ // `providerPromptCacheEnabled` prompt caching is on for this provider
1760
+ // at all. This drives orphan cleanup, because EVERY cached send must be
1761
+ // sanitized — including the system-runnable path, where AgentContext (not
1762
+ // this node) adds the body marker.
1763
+ //
1764
+ // `willAddTailCache` — THIS node will add the marker itself. Anthropic /
1765
+ // OpenRouter defer to the system runnable when one owns the system-prompt
1766
+ // breakpoint, so they exclude that case; Bedrock always marks here.
1767
+ const anthropicPromptCacheEnabled =
1768
+ agentContext.provider === Providers.ANTHROPIC &&
1769
+ (agentContext.clientOptions as t.AnthropicClientOptions | undefined)
1770
+ ?.promptCache === true;
1771
+ const openRouterPromptCacheEnabled =
1772
+ agentContext.provider === Providers.OPENROUTER &&
1773
+ (
1774
+ agentContext.clientOptions as
1775
+ | t.ProviderOptionsMap[Providers.OPENROUTER]
1776
+ | undefined
1777
+ )?.promptCache === true;
1778
+ const bedrockPromptCacheEnabled =
1779
+ agentContext.provider === Providers.BEDROCK &&
1780
+ (
1781
+ agentContext.clientOptions as
1782
+ | t.BedrockAnthropicClientOptions
1783
+ | undefined
1784
+ )?.promptCache === true;
1785
+ const providerPromptCacheEnabled =
1786
+ anthropicPromptCacheEnabled ||
1787
+ openRouterPromptCacheEnabled ||
1788
+ bedrockPromptCacheEnabled;
1789
+
1790
+ // Intentionally broad: runs when the pruner wasn't used, when any
1791
+ // post-pruning transform (ensureThinkingBlock, etc.) reassigned
1792
+ // finalMessages, OR when this is a prompt-cached send. The last clause
1793
+ // matters because the marker is now applied AFTER this gate (and, for the
1794
+ // system-runnable path, in AgentContext entirely): without it, a cached
1795
+ // send whose pruner returned the context unchanged would skip cleanup and
1796
+ // could ship orphaned AI/tool pairs from persisted history.
1797
+ // sanitizeOrphanToolBlocks fast-paths to a Set diff check when no orphans
1798
+ // exist, so the cost is negligible.
1790
1799
  const needsOrphanSanitize =
1791
1800
  anthropicLike &&
1792
- (!agentContext.pruneMessages || finalMessages !== messagesToUse);
1801
+ (!agentContext.pruneMessages ||
1802
+ finalMessages !== messagesToUse ||
1803
+ providerPromptCacheEnabled);
1793
1804
  if (needsOrphanSanitize) {
1794
1805
  const beforeSanitize = finalMessages.length;
1795
1806
  finalMessages = sanitizeOrphanToolBlocks(finalMessages);
@@ -1809,6 +1820,24 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1809
1820
  }
1810
1821
  }
1811
1822
 
1823
+ // Place the single tail prompt-cache breakpoint LAST, after thinking
1824
+ // normalization and orphan sanitization. ensureThinkingBlockInMessages can
1825
+ // fold a trailing non-thinking AI→Tool chain into a `[Previous agent
1826
+ // context]` HumanMessage whose builder copies text but not cache_control /
1827
+ // cachePoint, and sanitizeOrphanToolBlocks can drop the anchored block — so
1828
+ // marking earlier would let the only breakpoint vanish before the model
1829
+ // call (zero message caching). Anchoring on the final message list keeps
1830
+ // the marker on a block that actually ships. The system-runnable path
1831
+ // adds its body marker in AgentContext, so this node skips it there.
1832
+ if (
1833
+ (anthropicPromptCacheEnabled || openRouterPromptCacheEnabled) &&
1834
+ !agentContext.systemRunnable
1835
+ ) {
1836
+ finalMessages = addTailCacheControl<BaseMessage>(finalMessages);
1837
+ } else if (bedrockPromptCacheEnabled) {
1838
+ finalMessages = addBedrockTailCacheControl<BaseMessage>(finalMessages);
1839
+ }
1840
+
1812
1841
  if (
1813
1842
  agentContext.lastStreamCall != null &&
1814
1843
  agentContext.streamBuffer != null
package/src/langfuse.ts CHANGED
@@ -11,6 +11,7 @@ const TRACE_METADATA_MAX_LENGTH = 200;
11
11
  const LANGFUSE_FORCE_FLUSH_ON_DISPOSE = 'LANGFUSE_FORCE_FLUSH_ON_DISPOSE';
12
12
 
13
13
  export type LangfuseTraceMetadata = Record<string, string>;
14
+ type LangfuseMetadata = NonNullable<t.LangfuseConfig['metadata']>;
14
15
 
15
16
  type LangfuseHandlerParams = {
16
17
  userId?: string;
@@ -44,6 +45,13 @@ function hasLangfuseTracingConfig(langfuse?: t.LangfuseConfig): boolean {
44
45
  );
45
46
  }
46
47
 
48
+ function hasLangfuseTraceAttributes(langfuse?: t.LangfuseConfig): boolean {
49
+ return (
50
+ Object.keys(createTraceMetadata(langfuse?.metadata ?? {})).length > 0 ||
51
+ (mergeLangfuseTags(undefined, langfuse?.tags)?.length ?? 0) > 0
52
+ );
53
+ }
54
+
47
55
  export function hasLangfuseConfigCredentials(
48
56
  langfuse?: t.LangfuseConfig
49
57
  ): langfuse is t.LangfuseConfig & {
@@ -67,6 +75,7 @@ export function isExplicitLangfuseConfig(langfuse?: t.LangfuseConfig): boolean {
67
75
  isPresent(langfuse?.publicKey) ||
68
76
  isPresent(langfuse?.secretKey) ||
69
77
  isPresent(langfuse?.baseUrl) ||
78
+ hasLangfuseTraceAttributes(langfuse) ||
70
79
  hasLangfuseTracingConfig(langfuse)
71
80
  );
72
81
  }
@@ -110,6 +119,27 @@ export function createLangfuseTraceMetadata({
110
119
  });
111
120
  }
112
121
 
122
+ function mergeLangfuseTraceMetadata(
123
+ traceMetadata?: LangfuseTraceMetadata,
124
+ metadata?: LangfuseMetadata
125
+ ): LangfuseTraceMetadata | undefined {
126
+ const merged = createTraceMetadata({
127
+ ...(metadata ?? {}),
128
+ ...(traceMetadata ?? {}),
129
+ });
130
+ return Object.keys(merged).length > 0 ? merged : undefined;
131
+ }
132
+
133
+ function mergeLangfuseTags(
134
+ tags?: string[],
135
+ configTags?: string[]
136
+ ): string[] | undefined {
137
+ const merged = [...(tags ?? []), ...(configTags ?? [])].filter(
138
+ (tag) => tag.trim() !== ''
139
+ );
140
+ return merged.length > 0 ? [...new Set(merged)] : undefined;
141
+ }
142
+
113
143
  export function getLangfuseTraceName(
114
144
  traceMetadata?: LangfuseTraceMetadata,
115
145
  fallback: string = 'LibreChat Agent'
@@ -161,12 +191,16 @@ export function createLangfuseHandler({
161
191
  return new CallbackHandler({
162
192
  userId,
163
193
  sessionId,
164
- traceMetadata,
165
- tags,
194
+ traceMetadata: mergeLangfuseTraceMetadata(
195
+ traceMetadata,
196
+ langfuse?.metadata
197
+ ),
198
+ tags: mergeLangfuseTags(tags, langfuse?.tags),
166
199
  });
167
200
  }
168
201
 
169
202
  function createPropagateAttributeParams({
203
+ langfuse,
170
204
  userId,
171
205
  sessionId,
172
206
  traceMetadata,
@@ -177,8 +211,8 @@ function createPropagateAttributeParams({
177
211
  userId,
178
212
  sessionId,
179
213
  traceName,
180
- tags,
181
- metadata: traceMetadata,
214
+ tags: mergeLangfuseTags(tags, langfuse?.tags),
215
+ metadata: mergeLangfuseTraceMetadata(traceMetadata, langfuse?.metadata),
182
216
  };
183
217
  }
184
218
 
@@ -692,10 +692,28 @@ export function resolveLangfuseConfig(
692
692
  ...agentLangfuse.toolOutputTracing,
693
693
  }
694
694
  : undefined;
695
+ const metadata =
696
+ runLangfuse.metadata != null || agentLangfuse.metadata != null
697
+ ? {
698
+ ...runLangfuse.metadata,
699
+ ...agentLangfuse.metadata,
700
+ }
701
+ : undefined;
702
+ const tags =
703
+ runLangfuse.tags != null || agentLangfuse.tags != null
704
+ ? [
705
+ ...new Set([
706
+ ...(runLangfuse.tags ?? []),
707
+ ...(agentLangfuse.tags ?? []),
708
+ ]),
709
+ ]
710
+ : undefined;
695
711
 
696
712
  return {
697
713
  ...runLangfuse,
698
714
  ...agentLangfuse,
715
+ ...(metadata != null ? { metadata } : {}),
716
+ ...(tags != null ? { tags } : {}),
699
717
  ...(toolNodeTracing != null ? { toolNodeTracing } : {}),
700
718
  ...(toolOutputTracing != null ? { toolOutputTracing } : {}),
701
719
  };
@@ -140,6 +140,35 @@ export function normalizeAnthropicToolCallId(
140
140
  return `${sanitized.slice(0, prefixMaxLength)}_${hash}`;
141
141
  }
142
142
 
143
+ /**
144
+ * Lift any `cache_control` off the inner blocks of a tool result onto the
145
+ * `tool_result` block itself. Anthropic documents the top-level
146
+ * `messages.content` block as the cacheable position and does not document
147
+ * caching of sub-content blocks; the API currently honors a nested marker, but
148
+ * anchoring on the documented position keeps the single tail breakpoint robust
149
+ * (and mirrors the Bedrock cachePoint hoist). The first marker found wins; it is
150
+ * stripped from every inner block so exactly one survives, on the outer block.
151
+ */
152
+ function hoistToolResultCacheControl(
153
+ content: string | MessageContentComplex[]
154
+ ): { content: string | MessageContentComplex[]; cacheControl: unknown } {
155
+ if (!Array.isArray(content)) {
156
+ return { content, cacheControl: undefined };
157
+ }
158
+ let cacheControl: unknown;
159
+ const stripped = content.map((block) => {
160
+ if ('cache_control' in block) {
161
+ cacheControl ??= (block as Record<string, unknown>).cache_control;
162
+ const clone = { ...(block as Record<string, unknown>) };
163
+ delete clone.cache_control;
164
+ return clone as MessageContentComplex;
165
+ }
166
+ return block;
167
+ });
168
+ // `stripped` is element-equal to `content` when no marker was present.
169
+ return { content: stripped, cacheControl };
170
+ }
171
+
143
172
  function _ensureMessageContents(
144
173
  messages: BaseMessage[]
145
174
  ): (SystemMessage | HumanMessage | AIMessage)[] {
@@ -183,13 +212,20 @@ function _ensureMessageContents(
183
212
  const toolMessageContent = (
184
213
  message as { content?: BaseMessage['content'] | null }
185
214
  ).content;
215
+ // Hoist a tail cache_control off the inner content onto the
216
+ // tool_result block itself (the documented cacheable position).
217
+ const { content: hoistedContent, cacheControl } =
218
+ toolMessageContent != null
219
+ ? hoistToolResultCacheControl(_formatContent(message))
220
+ : { content: undefined, cacheControl: undefined };
186
221
  updatedMsgs.push(
187
222
  new HumanMessage({
188
223
  content: [
189
224
  {
190
225
  type: 'tool_result',
191
- ...(toolMessageContent != null
192
- ? { content: _formatContent(message) }
226
+ ...(hoistedContent != null ? { content: hoistedContent } : {}),
227
+ ...(cacheControl != null
228
+ ? { cache_control: cacheControl as { type: 'ephemeral' } }
193
229
  : {}),
194
230
  tool_use_id: normalizeAnthropicToolCallId(
195
231
  (message as ToolMessage).tool_call_id
@@ -917,6 +953,86 @@ export function modelDisallowsAssistantPrefill(model?: string): boolean {
917
953
  return Number(match[1]) >= 6;
918
954
  }
919
955
 
956
+ function messagesHaveCacheControl(
957
+ messages: AnthropicMessageCreateParams['messages']
958
+ ): boolean {
959
+ return messages.some(
960
+ (message) =>
961
+ Array.isArray(message.content) &&
962
+ message.content.some((block) => 'cache_control' in block)
963
+ );
964
+ }
965
+
966
+ /** Anthropic rejects cache_control on these reasoning blocks. */
967
+ const NON_CACHEABLE_PAYLOAD_BLOCK_TYPES = new Set([
968
+ 'thinking',
969
+ 'redacted_thinking',
970
+ ]);
971
+
972
+ /**
973
+ * Place one ephemeral `cache_control` on the last cacheable block of the final
974
+ * message of an already-converted Anthropic payload. Used to re-anchor the tail
975
+ * breakpoint after a trailing assistant prefill is stripped. Operates on the
976
+ * post-conversion payload, where blocks the converter drops (foreign reasoning,
977
+ * input_json_delta) are already gone — only native thinking blocks must be
978
+ * skipped. Returns a new array only when it actually places a marker.
979
+ */
980
+ function reanchorTailCacheControl(
981
+ messages: AnthropicMessageCreateParams['messages']
982
+ ): AnthropicMessageCreateParams['messages'] {
983
+ if (messages.length === 0) {
984
+ return messages;
985
+ }
986
+ const lastIndex = messages.length - 1;
987
+ const tail = messages[lastIndex];
988
+ const content = tail.content;
989
+
990
+ if (typeof content === 'string') {
991
+ if (content.trim() === '') {
992
+ return messages;
993
+ }
994
+ const next = [...messages];
995
+ next[lastIndex] = {
996
+ ...tail,
997
+ content: [
998
+ { type: 'text', text: content, cache_control: { type: 'ephemeral' } },
999
+ ],
1000
+ } as (typeof messages)[number];
1001
+ return next;
1002
+ }
1003
+
1004
+ if (!Array.isArray(content)) {
1005
+ return messages;
1006
+ }
1007
+
1008
+ let anchor = -1;
1009
+ for (let i = 0; i < content.length; i++) {
1010
+ const type = (content[i] as { type?: string }).type;
1011
+ if (type == null || NON_CACHEABLE_PAYLOAD_BLOCK_TYPES.has(type)) {
1012
+ continue;
1013
+ }
1014
+ if (
1015
+ type === 'text' &&
1016
+ ((content[i] as { text?: string }).text ?? '').trim() === ''
1017
+ ) {
1018
+ continue;
1019
+ }
1020
+ anchor = i;
1021
+ }
1022
+ if (anchor < 0) {
1023
+ return messages;
1024
+ }
1025
+
1026
+ const next = [...messages];
1027
+ next[lastIndex] = {
1028
+ ...tail,
1029
+ content: content.map((block, i) =>
1030
+ i === anchor ? { ...block, cache_control: { type: 'ephemeral' } } : block
1031
+ ),
1032
+ } as (typeof messages)[number];
1033
+ return next;
1034
+ }
1035
+
920
1036
  export function stripUnsupportedAssistantPrefill<
921
1037
  T extends Pick<AnthropicMessageCreateParams, 'messages'> & { model?: string },
922
1038
  >(request: T): T {
@@ -940,9 +1056,21 @@ export function stripUnsupportedAssistantPrefill<
940
1056
  nextMessages.pop();
941
1057
  }
942
1058
 
1059
+ /**
1060
+ * If a single tail prompt-cache breakpoint rode the stripped assistant
1061
+ * prefill, the survivors may now carry no `cache_control` at all, dropping
1062
+ * message caching for this request. Re-anchor the breakpoint on the new tail
1063
+ * (only when one was actually lost, so caching-off requests stay untouched).
1064
+ */
1065
+ const reanchored =
1066
+ messagesHaveCacheControl(messages) &&
1067
+ !messagesHaveCacheControl(nextMessages)
1068
+ ? reanchorTailCacheControl(nextMessages)
1069
+ : nextMessages;
1070
+
943
1071
  return {
944
1072
  ...request,
945
- messages: nextMessages,
1073
+ messages: reanchored,
946
1074
  };
947
1075
  }
948
1076
 
@@ -0,0 +1,111 @@
1
+ import type { AnthropicMessageCreateParams } from '../types';
2
+ import { stripUnsupportedAssistantPrefill } from './message_inputs';
3
+
4
+ /**
5
+ * When a model disallows assistant prefill (Claude 4.6+), the trailing
6
+ * assistant message is stripped right before the API call. If the single tail
7
+ * prompt-cache breakpoint rode that assistant prefill, the survivors would lose
8
+ * their only message-level `cache_control` — so the strip must re-anchor the
9
+ * breakpoint onto the new tail.
10
+ */
11
+
12
+ type Msgs = AnthropicMessageCreateParams['messages'];
13
+
14
+ function cacheControlBlocks(messages: Msgs): number {
15
+ let n = 0;
16
+ for (const m of messages) {
17
+ if (!Array.isArray(m.content)) continue;
18
+ for (const b of m.content) {
19
+ if ('cache_control' in b) n++;
20
+ }
21
+ }
22
+ return n;
23
+ }
24
+
25
+ describe('stripUnsupportedAssistantPrefill — cache re-anchoring', () => {
26
+ test('re-anchors the breakpoint onto the new tail when the prefill carried it', () => {
27
+ const request = {
28
+ model: 'claude-opus-4-6',
29
+ max_tokens: 100,
30
+ messages: [
31
+ {
32
+ role: 'user' as const,
33
+ content: [{ type: 'text' as const, text: 'q' }],
34
+ },
35
+ {
36
+ role: 'assistant' as const,
37
+ content: [
38
+ {
39
+ type: 'text' as const,
40
+ text: 'prefill',
41
+ cache_control: { type: 'ephemeral' as const },
42
+ },
43
+ ],
44
+ },
45
+ ],
46
+ };
47
+
48
+ const out = stripUnsupportedAssistantPrefill(request);
49
+
50
+ // Prefill removed, and exactly one breakpoint survives — on the new tail.
51
+ expect(out.messages).toHaveLength(1);
52
+ expect(out.messages[0].role).toBe('user');
53
+ expect(cacheControlBlocks(out.messages)).toBe(1);
54
+ const tail = out.messages[0].content as Array<{ cache_control?: unknown }>;
55
+ expect(tail[tail.length - 1].cache_control).toEqual({ type: 'ephemeral' });
56
+ });
57
+
58
+ test('does not add a breakpoint when caching was off (no marker present)', () => {
59
+ const request = {
60
+ model: 'claude-opus-4-6',
61
+ max_tokens: 100,
62
+ messages: [
63
+ { role: 'user' as const, content: 'q' },
64
+ { role: 'assistant' as const, content: 'prefill' },
65
+ ],
66
+ };
67
+
68
+ const out = stripUnsupportedAssistantPrefill(request);
69
+
70
+ expect(out.messages).toHaveLength(1);
71
+ expect(cacheControlBlocks(out.messages)).toBe(0);
72
+ });
73
+
74
+ test('leaves a surviving breakpoint untouched (no double-anchor)', () => {
75
+ const request = {
76
+ model: 'claude-opus-4-6',
77
+ max_tokens: 100,
78
+ messages: [
79
+ {
80
+ role: 'user' as const,
81
+ content: [
82
+ {
83
+ type: 'text' as const,
84
+ text: 'q',
85
+ cache_control: { type: 'ephemeral' as const },
86
+ },
87
+ ],
88
+ },
89
+ { role: 'assistant' as const, content: 'prefill' },
90
+ ],
91
+ };
92
+
93
+ const out = stripUnsupportedAssistantPrefill(request);
94
+
95
+ expect(out.messages).toHaveLength(1);
96
+ expect(cacheControlBlocks(out.messages)).toBe(1);
97
+ });
98
+
99
+ test('older models keep the assistant prefill (no strip, no re-anchor)', () => {
100
+ const request = {
101
+ model: 'claude-sonnet-4-5-20250929',
102
+ max_tokens: 100,
103
+ messages: [
104
+ { role: 'user' as const, content: 'q' },
105
+ { role: 'assistant' as const, content: '{' },
106
+ ],
107
+ };
108
+
109
+ expect(stripUnsupportedAssistantPrefill(request)).toBe(request);
110
+ });
111
+ });