@librechat/agents 3.2.36 → 3.2.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +1 -1
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +7 -8
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/langfuse.cjs +16 -5
- package/dist/cjs/langfuse.cjs.map +1 -1
- package/dist/cjs/langfuseToolOutputTracing.cjs +7 -0
- package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +92 -3
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +24 -4
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/main.cjs +2 -0
- package/dist/cjs/messages/cache.cjs +183 -0
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +1 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/toolOutputReferences.cjs +28 -14
- package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +2 -2
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +8 -9
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/langfuse.mjs +16 -5
- package/dist/esm/langfuse.mjs.map +1 -1
- package/dist/esm/langfuseToolOutputTracing.mjs +7 -0
- package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +92 -3
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +24 -4
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/main.mjs +2 -2
- package/dist/esm/messages/cache.mjs +182 -1
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +2 -2
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/toolOutputReferences.mjs +28 -14
- package/dist/esm/tools/toolOutputReferences.mjs.map +1 -1
- package/dist/types/messages/cache.d.ts +40 -0
- package/dist/types/types/graph.d.ts +2 -0
- package/package.json +2 -1
- package/src/agents/AgentContext.ts +2 -2
- package/src/agents/__tests__/AgentContext.test.ts +3 -9
- package/src/graphs/Graph.ts +65 -36
- package/src/langfuse.ts +38 -4
- package/src/langfuseToolOutputTracing.ts +18 -0
- package/src/llm/anthropic/utils/message_inputs.ts +131 -3
- package/src/llm/anthropic/utils/stripPrefillCache.test.ts +111 -0
- package/src/llm/bedrock/utils/message_inputs.test.ts +129 -0
- package/src/llm/bedrock/utils/message_inputs.ts +46 -4
- package/src/llm/bedrock/utils/toolResultCachePoint.test.ts +103 -0
- package/src/messages/cache.tail.test.ts +340 -0
- package/src/messages/cache.ts +266 -0
- package/src/messages/tailCacheConversion.test.ts +161 -0
- package/src/scripts/bench-prompt-cache.ts +479 -0
- package/src/specs/langfuse-config.test.ts +69 -2
- package/src/specs/langfuse-metadata.test.ts +44 -0
- package/src/specs/langfuse-tool-output-tracing.test.ts +6 -0
- package/src/summarization/node.ts +2 -2
- package/src/tools/__tests__/annotateMessagesForLLM.test.ts +50 -0
- package/src/tools/toolOutputReferences.ts +34 -20
- package/src/types/graph.ts +2 -0
|
@@ -274,16 +274,11 @@ describe('AgentContext', () => {
|
|
|
274
274
|
new HumanMessage('First'),
|
|
275
275
|
new HumanMessage('Second'),
|
|
276
276
|
]);
|
|
277
|
-
const firstContent = result[1].content as TestSystemContentBlock[];
|
|
278
277
|
const secondContent = result[2].content as TestSystemContentBlock[];
|
|
279
278
|
|
|
280
279
|
expect(result).toHaveLength(3);
|
|
281
280
|
expect(result[0].content).toBe('Dynamic only');
|
|
282
|
-
expect(
|
|
283
|
-
type: 'text',
|
|
284
|
-
text: 'First',
|
|
285
|
-
cache_control: { type: 'ephemeral' },
|
|
286
|
-
});
|
|
281
|
+
expect(result[1].content).toBe('First');
|
|
287
282
|
expect(secondContent[0]).toMatchObject({
|
|
288
283
|
type: 'text',
|
|
289
284
|
text: 'Second',
|
|
@@ -686,7 +681,7 @@ describe('AgentContext', () => {
|
|
|
686
681
|
expect(result[8].content).toBe('Now answer without tools');
|
|
687
682
|
});
|
|
688
683
|
|
|
689
|
-
it('adds OpenRouter body cache
|
|
684
|
+
it('adds a single OpenRouter body cache point on the tail when there is no dynamic tail', async () => {
|
|
690
685
|
const ctx = createBasicContext({
|
|
691
686
|
agentConfig: {
|
|
692
687
|
provider: Providers.OPENROUTER,
|
|
@@ -702,9 +697,8 @@ describe('AgentContext', () => {
|
|
|
702
697
|
new HumanMessage('First'),
|
|
703
698
|
new HumanMessage('Second'),
|
|
704
699
|
]);
|
|
705
|
-
const firstContent = result[1].content as TestSystemContentBlock[];
|
|
706
700
|
const secondContent = result[2].content as TestSystemContentBlock[];
|
|
707
|
-
expect(
|
|
701
|
+
expect(result[1].content).toBe('First');
|
|
708
702
|
expect(secondContent[0]).toHaveProperty('cache_control');
|
|
709
703
|
});
|
|
710
704
|
|
package/src/graphs/Graph.ts
CHANGED
|
@@ -19,14 +19,14 @@ import {
|
|
|
19
19
|
convertMessagesToContent,
|
|
20
20
|
sanitizeOrphanToolBlocks,
|
|
21
21
|
extractToolDiscoveries,
|
|
22
|
-
|
|
22
|
+
addBedrockTailCacheControl,
|
|
23
23
|
formatArtifactPayload,
|
|
24
24
|
enforceOriginalContentCap,
|
|
25
25
|
formatContentStrings,
|
|
26
26
|
isLegacyConvertible,
|
|
27
27
|
createPruneMessages,
|
|
28
28
|
syncBudgetDerivedFields,
|
|
29
|
-
|
|
29
|
+
addTailCacheControl,
|
|
30
30
|
getMessageId,
|
|
31
31
|
makeIsDeferred,
|
|
32
32
|
partitionAndMarkAnthropicToolCache,
|
|
@@ -1733,35 +1733,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1733
1733
|
}
|
|
1734
1734
|
}
|
|
1735
1735
|
|
|
1736
|
-
if (agentContext.provider === Providers.ANTHROPIC) {
|
|
1737
|
-
const anthropicOptions = agentContext.clientOptions as
|
|
1738
|
-
| t.AnthropicClientOptions
|
|
1739
|
-
| undefined;
|
|
1740
|
-
if (
|
|
1741
|
-
anthropicOptions?.promptCache === true &&
|
|
1742
|
-
!agentContext.systemRunnable
|
|
1743
|
-
) {
|
|
1744
|
-
finalMessages = addCacheControl<BaseMessage>(finalMessages);
|
|
1745
|
-
}
|
|
1746
|
-
} else if (agentContext.provider === Providers.BEDROCK) {
|
|
1747
|
-
const bedrockOptions = agentContext.clientOptions as
|
|
1748
|
-
| t.BedrockAnthropicClientOptions
|
|
1749
|
-
| undefined;
|
|
1750
|
-
if (bedrockOptions?.promptCache === true) {
|
|
1751
|
-
finalMessages = addBedrockCacheControl<BaseMessage>(finalMessages);
|
|
1752
|
-
}
|
|
1753
|
-
} else if (agentContext.provider === Providers.OPENROUTER) {
|
|
1754
|
-
const openRouterOptions = agentContext.clientOptions as
|
|
1755
|
-
| t.ProviderOptionsMap[Providers.OPENROUTER]
|
|
1756
|
-
| undefined;
|
|
1757
|
-
if (
|
|
1758
|
-
openRouterOptions?.promptCache === true &&
|
|
1759
|
-
!agentContext.systemRunnable
|
|
1760
|
-
) {
|
|
1761
|
-
finalMessages = addCacheControl<BaseMessage>(finalMessages);
|
|
1762
|
-
}
|
|
1763
|
-
}
|
|
1764
|
-
|
|
1765
1736
|
if (
|
|
1766
1737
|
isThinkingEnabled(agentContext.provider, agentContext.clientOptions)
|
|
1767
1738
|
) {
|
|
@@ -1783,13 +1754,53 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1783
1754
|
);
|
|
1784
1755
|
}
|
|
1785
1756
|
|
|
1786
|
-
//
|
|
1787
|
-
//
|
|
1788
|
-
//
|
|
1789
|
-
//
|
|
1757
|
+
// Determine the prompt-cache strategy up front. Two distinct facts:
|
|
1758
|
+
//
|
|
1759
|
+
// `providerPromptCacheEnabled` — prompt caching is on for this provider
|
|
1760
|
+
// at all. This drives orphan cleanup, because EVERY cached send must be
|
|
1761
|
+
// sanitized — including the system-runnable path, where AgentContext (not
|
|
1762
|
+
// this node) adds the body marker.
|
|
1763
|
+
//
|
|
1764
|
+
// `willAddTailCache` — THIS node will add the marker itself. Anthropic /
|
|
1765
|
+
// OpenRouter defer to the system runnable when one owns the system-prompt
|
|
1766
|
+
// breakpoint, so they exclude that case; Bedrock always marks here.
|
|
1767
|
+
const anthropicPromptCacheEnabled =
|
|
1768
|
+
agentContext.provider === Providers.ANTHROPIC &&
|
|
1769
|
+
(agentContext.clientOptions as t.AnthropicClientOptions | undefined)
|
|
1770
|
+
?.promptCache === true;
|
|
1771
|
+
const openRouterPromptCacheEnabled =
|
|
1772
|
+
agentContext.provider === Providers.OPENROUTER &&
|
|
1773
|
+
(
|
|
1774
|
+
agentContext.clientOptions as
|
|
1775
|
+
| t.ProviderOptionsMap[Providers.OPENROUTER]
|
|
1776
|
+
| undefined
|
|
1777
|
+
)?.promptCache === true;
|
|
1778
|
+
const bedrockPromptCacheEnabled =
|
|
1779
|
+
agentContext.provider === Providers.BEDROCK &&
|
|
1780
|
+
(
|
|
1781
|
+
agentContext.clientOptions as
|
|
1782
|
+
| t.BedrockAnthropicClientOptions
|
|
1783
|
+
| undefined
|
|
1784
|
+
)?.promptCache === true;
|
|
1785
|
+
const providerPromptCacheEnabled =
|
|
1786
|
+
anthropicPromptCacheEnabled ||
|
|
1787
|
+
openRouterPromptCacheEnabled ||
|
|
1788
|
+
bedrockPromptCacheEnabled;
|
|
1789
|
+
|
|
1790
|
+
// Intentionally broad: runs when the pruner wasn't used, when any
|
|
1791
|
+
// post-pruning transform (ensureThinkingBlock, etc.) reassigned
|
|
1792
|
+
// finalMessages, OR when this is a prompt-cached send. The last clause
|
|
1793
|
+
// matters because the marker is now applied AFTER this gate (and, for the
|
|
1794
|
+
// system-runnable path, in AgentContext entirely): without it, a cached
|
|
1795
|
+
// send whose pruner returned the context unchanged would skip cleanup and
|
|
1796
|
+
// could ship orphaned AI/tool pairs from persisted history.
|
|
1797
|
+
// sanitizeOrphanToolBlocks fast-paths to a Set diff check when no orphans
|
|
1798
|
+
// exist, so the cost is negligible.
|
|
1790
1799
|
const needsOrphanSanitize =
|
|
1791
1800
|
anthropicLike &&
|
|
1792
|
-
(!agentContext.pruneMessages ||
|
|
1801
|
+
(!agentContext.pruneMessages ||
|
|
1802
|
+
finalMessages !== messagesToUse ||
|
|
1803
|
+
providerPromptCacheEnabled);
|
|
1793
1804
|
if (needsOrphanSanitize) {
|
|
1794
1805
|
const beforeSanitize = finalMessages.length;
|
|
1795
1806
|
finalMessages = sanitizeOrphanToolBlocks(finalMessages);
|
|
@@ -1809,6 +1820,24 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1809
1820
|
}
|
|
1810
1821
|
}
|
|
1811
1822
|
|
|
1823
|
+
// Place the single tail prompt-cache breakpoint LAST, after thinking
|
|
1824
|
+
// normalization and orphan sanitization. ensureThinkingBlockInMessages can
|
|
1825
|
+
// fold a trailing non-thinking AI→Tool chain into a `[Previous agent
|
|
1826
|
+
// context]` HumanMessage whose builder copies text but not cache_control /
|
|
1827
|
+
// cachePoint, and sanitizeOrphanToolBlocks can drop the anchored block — so
|
|
1828
|
+
// marking earlier would let the only breakpoint vanish before the model
|
|
1829
|
+
// call (zero message caching). Anchoring on the final message list keeps
|
|
1830
|
+
// the marker on a block that actually ships. The system-runnable path
|
|
1831
|
+
// adds its body marker in AgentContext, so this node skips it there.
|
|
1832
|
+
if (
|
|
1833
|
+
(anthropicPromptCacheEnabled || openRouterPromptCacheEnabled) &&
|
|
1834
|
+
!agentContext.systemRunnable
|
|
1835
|
+
) {
|
|
1836
|
+
finalMessages = addTailCacheControl<BaseMessage>(finalMessages);
|
|
1837
|
+
} else if (bedrockPromptCacheEnabled) {
|
|
1838
|
+
finalMessages = addBedrockTailCacheControl<BaseMessage>(finalMessages);
|
|
1839
|
+
}
|
|
1840
|
+
|
|
1812
1841
|
if (
|
|
1813
1842
|
agentContext.lastStreamCall != null &&
|
|
1814
1843
|
agentContext.streamBuffer != null
|
package/src/langfuse.ts
CHANGED
|
@@ -11,6 +11,7 @@ const TRACE_METADATA_MAX_LENGTH = 200;
|
|
|
11
11
|
const LANGFUSE_FORCE_FLUSH_ON_DISPOSE = 'LANGFUSE_FORCE_FLUSH_ON_DISPOSE';
|
|
12
12
|
|
|
13
13
|
export type LangfuseTraceMetadata = Record<string, string>;
|
|
14
|
+
type LangfuseMetadata = NonNullable<t.LangfuseConfig['metadata']>;
|
|
14
15
|
|
|
15
16
|
type LangfuseHandlerParams = {
|
|
16
17
|
userId?: string;
|
|
@@ -44,6 +45,13 @@ function hasLangfuseTracingConfig(langfuse?: t.LangfuseConfig): boolean {
|
|
|
44
45
|
);
|
|
45
46
|
}
|
|
46
47
|
|
|
48
|
+
function hasLangfuseTraceAttributes(langfuse?: t.LangfuseConfig): boolean {
|
|
49
|
+
return (
|
|
50
|
+
Object.keys(createTraceMetadata(langfuse?.metadata ?? {})).length > 0 ||
|
|
51
|
+
(mergeLangfuseTags(undefined, langfuse?.tags)?.length ?? 0) > 0
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
|
|
47
55
|
export function hasLangfuseConfigCredentials(
|
|
48
56
|
langfuse?: t.LangfuseConfig
|
|
49
57
|
): langfuse is t.LangfuseConfig & {
|
|
@@ -67,6 +75,7 @@ export function isExplicitLangfuseConfig(langfuse?: t.LangfuseConfig): boolean {
|
|
|
67
75
|
isPresent(langfuse?.publicKey) ||
|
|
68
76
|
isPresent(langfuse?.secretKey) ||
|
|
69
77
|
isPresent(langfuse?.baseUrl) ||
|
|
78
|
+
hasLangfuseTraceAttributes(langfuse) ||
|
|
70
79
|
hasLangfuseTracingConfig(langfuse)
|
|
71
80
|
);
|
|
72
81
|
}
|
|
@@ -110,6 +119,27 @@ export function createLangfuseTraceMetadata({
|
|
|
110
119
|
});
|
|
111
120
|
}
|
|
112
121
|
|
|
122
|
+
function mergeLangfuseTraceMetadata(
|
|
123
|
+
traceMetadata?: LangfuseTraceMetadata,
|
|
124
|
+
metadata?: LangfuseMetadata
|
|
125
|
+
): LangfuseTraceMetadata | undefined {
|
|
126
|
+
const merged = createTraceMetadata({
|
|
127
|
+
...(metadata ?? {}),
|
|
128
|
+
...(traceMetadata ?? {}),
|
|
129
|
+
});
|
|
130
|
+
return Object.keys(merged).length > 0 ? merged : undefined;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function mergeLangfuseTags(
|
|
134
|
+
tags?: string[],
|
|
135
|
+
configTags?: string[]
|
|
136
|
+
): string[] | undefined {
|
|
137
|
+
const merged = [...(tags ?? []), ...(configTags ?? [])].filter(
|
|
138
|
+
(tag) => tag.trim() !== ''
|
|
139
|
+
);
|
|
140
|
+
return merged.length > 0 ? [...new Set(merged)] : undefined;
|
|
141
|
+
}
|
|
142
|
+
|
|
113
143
|
export function getLangfuseTraceName(
|
|
114
144
|
traceMetadata?: LangfuseTraceMetadata,
|
|
115
145
|
fallback: string = 'LibreChat Agent'
|
|
@@ -161,12 +191,16 @@ export function createLangfuseHandler({
|
|
|
161
191
|
return new CallbackHandler({
|
|
162
192
|
userId,
|
|
163
193
|
sessionId,
|
|
164
|
-
traceMetadata
|
|
165
|
-
|
|
194
|
+
traceMetadata: mergeLangfuseTraceMetadata(
|
|
195
|
+
traceMetadata,
|
|
196
|
+
langfuse?.metadata
|
|
197
|
+
),
|
|
198
|
+
tags: mergeLangfuseTags(tags, langfuse?.tags),
|
|
166
199
|
});
|
|
167
200
|
}
|
|
168
201
|
|
|
169
202
|
function createPropagateAttributeParams({
|
|
203
|
+
langfuse,
|
|
170
204
|
userId,
|
|
171
205
|
sessionId,
|
|
172
206
|
traceMetadata,
|
|
@@ -177,8 +211,8 @@ function createPropagateAttributeParams({
|
|
|
177
211
|
userId,
|
|
178
212
|
sessionId,
|
|
179
213
|
traceName,
|
|
180
|
-
tags,
|
|
181
|
-
metadata: traceMetadata,
|
|
214
|
+
tags: mergeLangfuseTags(tags, langfuse?.tags),
|
|
215
|
+
metadata: mergeLangfuseTraceMetadata(traceMetadata, langfuse?.metadata),
|
|
182
216
|
};
|
|
183
217
|
}
|
|
184
218
|
|
|
@@ -692,10 +692,28 @@ export function resolveLangfuseConfig(
|
|
|
692
692
|
...agentLangfuse.toolOutputTracing,
|
|
693
693
|
}
|
|
694
694
|
: undefined;
|
|
695
|
+
const metadata =
|
|
696
|
+
runLangfuse.metadata != null || agentLangfuse.metadata != null
|
|
697
|
+
? {
|
|
698
|
+
...runLangfuse.metadata,
|
|
699
|
+
...agentLangfuse.metadata,
|
|
700
|
+
}
|
|
701
|
+
: undefined;
|
|
702
|
+
const tags =
|
|
703
|
+
runLangfuse.tags != null || agentLangfuse.tags != null
|
|
704
|
+
? [
|
|
705
|
+
...new Set([
|
|
706
|
+
...(runLangfuse.tags ?? []),
|
|
707
|
+
...(agentLangfuse.tags ?? []),
|
|
708
|
+
]),
|
|
709
|
+
]
|
|
710
|
+
: undefined;
|
|
695
711
|
|
|
696
712
|
return {
|
|
697
713
|
...runLangfuse,
|
|
698
714
|
...agentLangfuse,
|
|
715
|
+
...(metadata != null ? { metadata } : {}),
|
|
716
|
+
...(tags != null ? { tags } : {}),
|
|
699
717
|
...(toolNodeTracing != null ? { toolNodeTracing } : {}),
|
|
700
718
|
...(toolOutputTracing != null ? { toolOutputTracing } : {}),
|
|
701
719
|
};
|
|
@@ -140,6 +140,35 @@ export function normalizeAnthropicToolCallId(
|
|
|
140
140
|
return `${sanitized.slice(0, prefixMaxLength)}_${hash}`;
|
|
141
141
|
}
|
|
142
142
|
|
|
143
|
+
/**
|
|
144
|
+
* Lift any `cache_control` off the inner blocks of a tool result onto the
|
|
145
|
+
* `tool_result` block itself. Anthropic documents the top-level
|
|
146
|
+
* `messages.content` block as the cacheable position and does not document
|
|
147
|
+
* caching of sub-content blocks; the API currently honors a nested marker, but
|
|
148
|
+
* anchoring on the documented position keeps the single tail breakpoint robust
|
|
149
|
+
* (and mirrors the Bedrock cachePoint hoist). The first marker found wins; it is
|
|
150
|
+
* stripped from every inner block so exactly one survives, on the outer block.
|
|
151
|
+
*/
|
|
152
|
+
function hoistToolResultCacheControl(
|
|
153
|
+
content: string | MessageContentComplex[]
|
|
154
|
+
): { content: string | MessageContentComplex[]; cacheControl: unknown } {
|
|
155
|
+
if (!Array.isArray(content)) {
|
|
156
|
+
return { content, cacheControl: undefined };
|
|
157
|
+
}
|
|
158
|
+
let cacheControl: unknown;
|
|
159
|
+
const stripped = content.map((block) => {
|
|
160
|
+
if ('cache_control' in block) {
|
|
161
|
+
cacheControl ??= (block as Record<string, unknown>).cache_control;
|
|
162
|
+
const clone = { ...(block as Record<string, unknown>) };
|
|
163
|
+
delete clone.cache_control;
|
|
164
|
+
return clone as MessageContentComplex;
|
|
165
|
+
}
|
|
166
|
+
return block;
|
|
167
|
+
});
|
|
168
|
+
// `stripped` is element-equal to `content` when no marker was present.
|
|
169
|
+
return { content: stripped, cacheControl };
|
|
170
|
+
}
|
|
171
|
+
|
|
143
172
|
function _ensureMessageContents(
|
|
144
173
|
messages: BaseMessage[]
|
|
145
174
|
): (SystemMessage | HumanMessage | AIMessage)[] {
|
|
@@ -183,13 +212,20 @@ function _ensureMessageContents(
|
|
|
183
212
|
const toolMessageContent = (
|
|
184
213
|
message as { content?: BaseMessage['content'] | null }
|
|
185
214
|
).content;
|
|
215
|
+
// Hoist a tail cache_control off the inner content onto the
|
|
216
|
+
// tool_result block itself (the documented cacheable position).
|
|
217
|
+
const { content: hoistedContent, cacheControl } =
|
|
218
|
+
toolMessageContent != null
|
|
219
|
+
? hoistToolResultCacheControl(_formatContent(message))
|
|
220
|
+
: { content: undefined, cacheControl: undefined };
|
|
186
221
|
updatedMsgs.push(
|
|
187
222
|
new HumanMessage({
|
|
188
223
|
content: [
|
|
189
224
|
{
|
|
190
225
|
type: 'tool_result',
|
|
191
|
-
...(
|
|
192
|
-
|
|
226
|
+
...(hoistedContent != null ? { content: hoistedContent } : {}),
|
|
227
|
+
...(cacheControl != null
|
|
228
|
+
? { cache_control: cacheControl as { type: 'ephemeral' } }
|
|
193
229
|
: {}),
|
|
194
230
|
tool_use_id: normalizeAnthropicToolCallId(
|
|
195
231
|
(message as ToolMessage).tool_call_id
|
|
@@ -917,6 +953,86 @@ export function modelDisallowsAssistantPrefill(model?: string): boolean {
|
|
|
917
953
|
return Number(match[1]) >= 6;
|
|
918
954
|
}
|
|
919
955
|
|
|
956
|
+
function messagesHaveCacheControl(
|
|
957
|
+
messages: AnthropicMessageCreateParams['messages']
|
|
958
|
+
): boolean {
|
|
959
|
+
return messages.some(
|
|
960
|
+
(message) =>
|
|
961
|
+
Array.isArray(message.content) &&
|
|
962
|
+
message.content.some((block) => 'cache_control' in block)
|
|
963
|
+
);
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
/** Anthropic rejects cache_control on these reasoning blocks. */
|
|
967
|
+
const NON_CACHEABLE_PAYLOAD_BLOCK_TYPES = new Set([
|
|
968
|
+
'thinking',
|
|
969
|
+
'redacted_thinking',
|
|
970
|
+
]);
|
|
971
|
+
|
|
972
|
+
/**
|
|
973
|
+
* Place one ephemeral `cache_control` on the last cacheable block of the final
|
|
974
|
+
* message of an already-converted Anthropic payload. Used to re-anchor the tail
|
|
975
|
+
* breakpoint after a trailing assistant prefill is stripped. Operates on the
|
|
976
|
+
* post-conversion payload, where blocks the converter drops (foreign reasoning,
|
|
977
|
+
* input_json_delta) are already gone — only native thinking blocks must be
|
|
978
|
+
* skipped. Returns a new array only when it actually places a marker.
|
|
979
|
+
*/
|
|
980
|
+
function reanchorTailCacheControl(
|
|
981
|
+
messages: AnthropicMessageCreateParams['messages']
|
|
982
|
+
): AnthropicMessageCreateParams['messages'] {
|
|
983
|
+
if (messages.length === 0) {
|
|
984
|
+
return messages;
|
|
985
|
+
}
|
|
986
|
+
const lastIndex = messages.length - 1;
|
|
987
|
+
const tail = messages[lastIndex];
|
|
988
|
+
const content = tail.content;
|
|
989
|
+
|
|
990
|
+
if (typeof content === 'string') {
|
|
991
|
+
if (content.trim() === '') {
|
|
992
|
+
return messages;
|
|
993
|
+
}
|
|
994
|
+
const next = [...messages];
|
|
995
|
+
next[lastIndex] = {
|
|
996
|
+
...tail,
|
|
997
|
+
content: [
|
|
998
|
+
{ type: 'text', text: content, cache_control: { type: 'ephemeral' } },
|
|
999
|
+
],
|
|
1000
|
+
} as (typeof messages)[number];
|
|
1001
|
+
return next;
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
if (!Array.isArray(content)) {
|
|
1005
|
+
return messages;
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
let anchor = -1;
|
|
1009
|
+
for (let i = 0; i < content.length; i++) {
|
|
1010
|
+
const type = (content[i] as { type?: string }).type;
|
|
1011
|
+
if (type == null || NON_CACHEABLE_PAYLOAD_BLOCK_TYPES.has(type)) {
|
|
1012
|
+
continue;
|
|
1013
|
+
}
|
|
1014
|
+
if (
|
|
1015
|
+
type === 'text' &&
|
|
1016
|
+
((content[i] as { text?: string }).text ?? '').trim() === ''
|
|
1017
|
+
) {
|
|
1018
|
+
continue;
|
|
1019
|
+
}
|
|
1020
|
+
anchor = i;
|
|
1021
|
+
}
|
|
1022
|
+
if (anchor < 0) {
|
|
1023
|
+
return messages;
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
const next = [...messages];
|
|
1027
|
+
next[lastIndex] = {
|
|
1028
|
+
...tail,
|
|
1029
|
+
content: content.map((block, i) =>
|
|
1030
|
+
i === anchor ? { ...block, cache_control: { type: 'ephemeral' } } : block
|
|
1031
|
+
),
|
|
1032
|
+
} as (typeof messages)[number];
|
|
1033
|
+
return next;
|
|
1034
|
+
}
|
|
1035
|
+
|
|
920
1036
|
export function stripUnsupportedAssistantPrefill<
|
|
921
1037
|
T extends Pick<AnthropicMessageCreateParams, 'messages'> & { model?: string },
|
|
922
1038
|
>(request: T): T {
|
|
@@ -940,9 +1056,21 @@ export function stripUnsupportedAssistantPrefill<
|
|
|
940
1056
|
nextMessages.pop();
|
|
941
1057
|
}
|
|
942
1058
|
|
|
1059
|
+
/**
|
|
1060
|
+
* If a single tail prompt-cache breakpoint rode the stripped assistant
|
|
1061
|
+
* prefill, the survivors may now carry no `cache_control` at all, dropping
|
|
1062
|
+
* message caching for this request. Re-anchor the breakpoint on the new tail
|
|
1063
|
+
* (only when one was actually lost, so caching-off requests stay untouched).
|
|
1064
|
+
*/
|
|
1065
|
+
const reanchored =
|
|
1066
|
+
messagesHaveCacheControl(messages) &&
|
|
1067
|
+
!messagesHaveCacheControl(nextMessages)
|
|
1068
|
+
? reanchorTailCacheControl(nextMessages)
|
|
1069
|
+
: nextMessages;
|
|
1070
|
+
|
|
943
1071
|
return {
|
|
944
1072
|
...request,
|
|
945
|
-
messages:
|
|
1073
|
+
messages: reanchored,
|
|
946
1074
|
};
|
|
947
1075
|
}
|
|
948
1076
|
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import type { AnthropicMessageCreateParams } from '../types';
|
|
2
|
+
import { stripUnsupportedAssistantPrefill } from './message_inputs';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* When a model disallows assistant prefill (Claude 4.6+), the trailing
|
|
6
|
+
* assistant message is stripped right before the API call. If the single tail
|
|
7
|
+
* prompt-cache breakpoint rode that assistant prefill, the survivors would lose
|
|
8
|
+
* their only message-level `cache_control` — so the strip must re-anchor the
|
|
9
|
+
* breakpoint onto the new tail.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
type Msgs = AnthropicMessageCreateParams['messages'];
|
|
13
|
+
|
|
14
|
+
function cacheControlBlocks(messages: Msgs): number {
|
|
15
|
+
let n = 0;
|
|
16
|
+
for (const m of messages) {
|
|
17
|
+
if (!Array.isArray(m.content)) continue;
|
|
18
|
+
for (const b of m.content) {
|
|
19
|
+
if ('cache_control' in b) n++;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
return n;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
describe('stripUnsupportedAssistantPrefill — cache re-anchoring', () => {
|
|
26
|
+
test('re-anchors the breakpoint onto the new tail when the prefill carried it', () => {
|
|
27
|
+
const request = {
|
|
28
|
+
model: 'claude-opus-4-6',
|
|
29
|
+
max_tokens: 100,
|
|
30
|
+
messages: [
|
|
31
|
+
{
|
|
32
|
+
role: 'user' as const,
|
|
33
|
+
content: [{ type: 'text' as const, text: 'q' }],
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
role: 'assistant' as const,
|
|
37
|
+
content: [
|
|
38
|
+
{
|
|
39
|
+
type: 'text' as const,
|
|
40
|
+
text: 'prefill',
|
|
41
|
+
cache_control: { type: 'ephemeral' as const },
|
|
42
|
+
},
|
|
43
|
+
],
|
|
44
|
+
},
|
|
45
|
+
],
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
const out = stripUnsupportedAssistantPrefill(request);
|
|
49
|
+
|
|
50
|
+
// Prefill removed, and exactly one breakpoint survives — on the new tail.
|
|
51
|
+
expect(out.messages).toHaveLength(1);
|
|
52
|
+
expect(out.messages[0].role).toBe('user');
|
|
53
|
+
expect(cacheControlBlocks(out.messages)).toBe(1);
|
|
54
|
+
const tail = out.messages[0].content as Array<{ cache_control?: unknown }>;
|
|
55
|
+
expect(tail[tail.length - 1].cache_control).toEqual({ type: 'ephemeral' });
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
test('does not add a breakpoint when caching was off (no marker present)', () => {
|
|
59
|
+
const request = {
|
|
60
|
+
model: 'claude-opus-4-6',
|
|
61
|
+
max_tokens: 100,
|
|
62
|
+
messages: [
|
|
63
|
+
{ role: 'user' as const, content: 'q' },
|
|
64
|
+
{ role: 'assistant' as const, content: 'prefill' },
|
|
65
|
+
],
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
const out = stripUnsupportedAssistantPrefill(request);
|
|
69
|
+
|
|
70
|
+
expect(out.messages).toHaveLength(1);
|
|
71
|
+
expect(cacheControlBlocks(out.messages)).toBe(0);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test('leaves a surviving breakpoint untouched (no double-anchor)', () => {
|
|
75
|
+
const request = {
|
|
76
|
+
model: 'claude-opus-4-6',
|
|
77
|
+
max_tokens: 100,
|
|
78
|
+
messages: [
|
|
79
|
+
{
|
|
80
|
+
role: 'user' as const,
|
|
81
|
+
content: [
|
|
82
|
+
{
|
|
83
|
+
type: 'text' as const,
|
|
84
|
+
text: 'q',
|
|
85
|
+
cache_control: { type: 'ephemeral' as const },
|
|
86
|
+
},
|
|
87
|
+
],
|
|
88
|
+
},
|
|
89
|
+
{ role: 'assistant' as const, content: 'prefill' },
|
|
90
|
+
],
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
const out = stripUnsupportedAssistantPrefill(request);
|
|
94
|
+
|
|
95
|
+
expect(out.messages).toHaveLength(1);
|
|
96
|
+
expect(cacheControlBlocks(out.messages)).toBe(1);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
test('older models keep the assistant prefill (no strip, no re-anchor)', () => {
|
|
100
|
+
const request = {
|
|
101
|
+
model: 'claude-sonnet-4-5-20250929',
|
|
102
|
+
max_tokens: 100,
|
|
103
|
+
messages: [
|
|
104
|
+
{ role: 'user' as const, content: 'q' },
|
|
105
|
+
{ role: 'assistant' as const, content: '{' },
|
|
106
|
+
],
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
expect(stripUnsupportedAssistantPrefill(request)).toBe(request);
|
|
110
|
+
});
|
|
111
|
+
});
|