@librechat/agents 3.2.38 → 3.2.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +25 -8
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +7 -4
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +20 -4
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +7 -1
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/toolCache.cjs +5 -4
- package/dist/cjs/llm/bedrock/toolCache.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +34 -17
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +1 -0
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/toolCache.cjs +18 -5
- package/dist/cjs/llm/openrouter/toolCache.cjs.map +1 -1
- package/dist/cjs/main.cjs +4 -0
- package/dist/cjs/messages/anthropicToolCache.cjs +75 -13
- package/dist/cjs/messages/anthropicToolCache.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +91 -35
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +3 -2
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +26 -9
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +8 -5
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +20 -4
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +7 -1
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/toolCache.mjs +5 -4
- package/dist/esm/llm/bedrock/toolCache.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +34 -17
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +1 -0
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/toolCache.mjs +18 -5
- package/dist/esm/llm/openrouter/toolCache.mjs.map +1 -1
- package/dist/esm/main.mjs +2 -2
- package/dist/esm/messages/anthropicToolCache.mjs +75 -13
- package/dist/esm/messages/anthropicToolCache.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +88 -36
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +4 -3
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +11 -0
- package/dist/types/agents/__tests__/promptCacheLiveHelpers.d.ts +2 -0
- package/dist/types/llm/bedrock/index.d.ts +13 -0
- package/dist/types/llm/bedrock/toolCache.d.ts +2 -1
- package/dist/types/llm/openrouter/index.d.ts +8 -0
- package/dist/types/llm/openrouter/toolCache.d.ts +2 -1
- package/dist/types/messages/anthropicToolCache.d.ts +2 -1
- package/dist/types/messages/cache.d.ts +49 -5
- package/dist/types/types/llm.d.ts +14 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +64 -17
- package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +6 -2
- package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +7 -5
- package/src/agents/__tests__/AgentContext.openrouter.live.test.ts +1 -1
- package/src/agents/__tests__/AgentContext.test.ts +31 -19
- package/src/agents/__tests__/promptCacheLiveHelpers.ts +6 -2
- package/src/graphs/Graph.ts +40 -4
- package/src/llm/anthropic/utils/message_inputs.ts +33 -6
- package/src/llm/bedrock/index.ts +21 -1
- package/src/llm/bedrock/llm.spec.ts +61 -0
- package/src/llm/bedrock/toolCache.test.ts +24 -0
- package/src/llm/bedrock/toolCache.ts +12 -7
- package/src/llm/bedrock/utils/message_inputs.ts +57 -40
- package/src/llm/openrouter/index.ts +9 -0
- package/src/llm/openrouter/toolCache.test.ts +52 -1
- package/src/llm/openrouter/toolCache.ts +40 -6
- package/src/messages/__tests__/anthropicToolCache.test.ts +168 -0
- package/src/messages/anthropicToolCache.ts +118 -15
- package/src/messages/cache.test.ts +175 -0
- package/src/messages/cache.ts +133 -48
- package/src/summarization/node.ts +21 -2
- package/src/types/llm.ts +14 -0
|
@@ -8,6 +8,15 @@ import type {
|
|
|
8
8
|
} from '@langchain/core/messages';
|
|
9
9
|
import type { RunnableConfig, Runnable } from '@langchain/core/runnables';
|
|
10
10
|
import type * as t from '@/types';
|
|
11
|
+
import {
|
|
12
|
+
addTailCacheControl,
|
|
13
|
+
addCacheControlToStablePrefixMessages,
|
|
14
|
+
buildAnthropicCacheControl,
|
|
15
|
+
buildBedrockCachePoint,
|
|
16
|
+
resolvePromptCacheTtl,
|
|
17
|
+
cloneMessage,
|
|
18
|
+
type PromptCacheTtl,
|
|
19
|
+
} from '@/messages/cache';
|
|
11
20
|
import {
|
|
12
21
|
ANTHROPIC_TOOL_TOKEN_MULTIPLIER,
|
|
13
22
|
DEFAULT_TOOL_TOKEN_MULTIPLIER,
|
|
@@ -15,30 +24,25 @@ import {
|
|
|
15
24
|
Constants,
|
|
16
25
|
Providers,
|
|
17
26
|
} from '@/common';
|
|
18
|
-
import {
|
|
19
|
-
addTailCacheControl,
|
|
20
|
-
addCacheControlToStablePrefixMessages,
|
|
21
|
-
cloneMessage,
|
|
22
|
-
} from '@/messages/cache';
|
|
23
|
-
import { createSchemaOnlyTools } from '@/tools/schema';
|
|
24
|
-
import { apportionTokenCounts } from '@/utils/tokens';
|
|
25
27
|
import {
|
|
26
28
|
DEFAULT_RESERVE_RATIO,
|
|
27
29
|
createPruneMessages,
|
|
28
30
|
syncBudgetDerivedFields,
|
|
29
31
|
} from '@/messages';
|
|
32
|
+
import { createSchemaOnlyTools } from '@/tools/schema';
|
|
33
|
+
import { apportionTokenCounts } from '@/utils/tokens';
|
|
30
34
|
import { isThinkingEnabled } from '@/llm/request';
|
|
31
35
|
import { toJsonSchema } from '@/utils/schema';
|
|
32
36
|
|
|
33
37
|
type AgentSystemTextBlock = {
|
|
34
38
|
type: 'text';
|
|
35
39
|
text: string;
|
|
36
|
-
cache_control?: { type: 'ephemeral' };
|
|
40
|
+
cache_control?: { type: 'ephemeral'; ttl?: '1h' };
|
|
37
41
|
};
|
|
38
42
|
|
|
39
43
|
type AgentSystemContentBlock =
|
|
40
44
|
| AgentSystemTextBlock
|
|
41
|
-
| { cachePoint: { type: 'default' } };
|
|
45
|
+
| { cachePoint: { type: 'default'; ttl?: '1h' } };
|
|
42
46
|
|
|
43
47
|
type PromptCacheProvider = Providers.ANTHROPIC | Providers.OPENROUTER;
|
|
44
48
|
|
|
@@ -689,7 +693,10 @@ export class AgentContext {
|
|
|
689
693
|
dynamicTail.length === 0 &&
|
|
690
694
|
body.length >= 2
|
|
691
695
|
) {
|
|
692
|
-
body = addTailCacheControl(
|
|
696
|
+
body = addTailCacheControl(
|
|
697
|
+
body,
|
|
698
|
+
this.getPromptCacheTtl(promptCacheProvider)
|
|
699
|
+
);
|
|
693
700
|
}
|
|
694
701
|
return [...prefix, ...body];
|
|
695
702
|
}).withConfig({ runName: 'prompt' });
|
|
@@ -713,7 +720,9 @@ export class AgentContext {
|
|
|
713
720
|
{
|
|
714
721
|
type: 'text',
|
|
715
722
|
text: wrappedSummary,
|
|
716
|
-
cache_control:
|
|
723
|
+
cache_control: buildAnthropicCacheControl(
|
|
724
|
+
this.getPromptCacheTtl(Providers.ANTHROPIC)
|
|
725
|
+
),
|
|
717
726
|
},
|
|
718
727
|
],
|
|
719
728
|
});
|
|
@@ -760,7 +769,10 @@ export class AgentContext {
|
|
|
760
769
|
);
|
|
761
770
|
const stablePrefix = messages.slice(0, tailIndex);
|
|
762
771
|
const trailingMessages = messages.slice(tailIndex);
|
|
763
|
-
const cacheablePrefix = this.addStablePromptCacheMarkers(
|
|
772
|
+
const cacheablePrefix = this.addStablePromptCacheMarkers(
|
|
773
|
+
stablePrefix,
|
|
774
|
+
this.getPromptCacheTtl(promptCacheProvider)
|
|
775
|
+
);
|
|
764
776
|
|
|
765
777
|
return [...cacheablePrefix, ...tail, ...trailingMessages];
|
|
766
778
|
}
|
|
@@ -791,14 +803,17 @@ export class AgentContext {
|
|
|
791
803
|
return messages.length;
|
|
792
804
|
}
|
|
793
805
|
|
|
794
|
-
private addStablePromptCacheMarkers(
|
|
806
|
+
private addStablePromptCacheMarkers(
|
|
807
|
+
messages: BaseMessage[],
|
|
808
|
+
ttl?: PromptCacheTtl
|
|
809
|
+
): BaseMessage[] {
|
|
795
810
|
if (messages.length <= 1) {
|
|
796
811
|
return messages;
|
|
797
812
|
}
|
|
798
813
|
|
|
799
814
|
return [
|
|
800
815
|
messages[0],
|
|
801
|
-
...addCacheControlToStablePrefixMessages(messages.slice(1), 2),
|
|
816
|
+
...addCacheControlToStablePrefixMessages(messages.slice(1), 2, ttl),
|
|
802
817
|
];
|
|
803
818
|
}
|
|
804
819
|
|
|
@@ -834,6 +849,34 @@ export class AgentContext {
|
|
|
834
849
|
return bedrockOptions?.promptCache === true;
|
|
835
850
|
}
|
|
836
851
|
|
|
852
|
+
/**
|
|
853
|
+
* Resolved TTL for the active prompt-cache provider (Anthropic or OpenRouter).
|
|
854
|
+
* Both expose `promptCacheTtl` and use the Anthropic `cache_control` format, so
|
|
855
|
+
* the configured value resolves the same way (default `'1h'` extended cache).
|
|
856
|
+
*/
|
|
857
|
+
private getPromptCacheTtl(
|
|
858
|
+
provider: PromptCacheProvider | undefined
|
|
859
|
+
): PromptCacheTtl | undefined {
|
|
860
|
+
if (provider == null) {
|
|
861
|
+
return undefined;
|
|
862
|
+
}
|
|
863
|
+
return resolvePromptCacheTtl(
|
|
864
|
+
(this.clientOptions as { promptCacheTtl?: PromptCacheTtl } | undefined)
|
|
865
|
+
?.promptCacheTtl
|
|
866
|
+
);
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
/**
|
|
870
|
+
* Resolved TTL for Bedrock prompt-cache checkpoints (default `'1h'`).
|
|
871
|
+
* Models that don't support the 1-hour TTL downgrade to 5m server-side.
|
|
872
|
+
*/
|
|
873
|
+
private getBedrockPromptCacheTtl(): PromptCacheTtl {
|
|
874
|
+
const bedrockOptions = this.clientOptions as
|
|
875
|
+
| t.BedrockAnthropicClientOptions
|
|
876
|
+
| undefined;
|
|
877
|
+
return resolvePromptCacheTtl(bedrockOptions?.promptCacheTtl);
|
|
878
|
+
}
|
|
879
|
+
|
|
837
880
|
private buildSystemMessage({
|
|
838
881
|
stableInstructions,
|
|
839
882
|
dynamicInstructions,
|
|
@@ -855,7 +898,9 @@ export class AgentContext {
|
|
|
855
898
|
content.push({
|
|
856
899
|
type: 'text',
|
|
857
900
|
text: stableInstructions,
|
|
858
|
-
cache_control:
|
|
901
|
+
cache_control: buildAnthropicCacheControl(
|
|
902
|
+
this.getPromptCacheTtl(promptCacheProvider)
|
|
903
|
+
),
|
|
859
904
|
});
|
|
860
905
|
}
|
|
861
906
|
if (dynamicInstructions && !shouldMoveDynamicInstructions) {
|
|
@@ -874,7 +919,9 @@ export class AgentContext {
|
|
|
874
919
|
{
|
|
875
920
|
type: 'text',
|
|
876
921
|
text: stableInstructions,
|
|
877
|
-
cache_control:
|
|
922
|
+
cache_control: buildAnthropicCacheControl(
|
|
923
|
+
this.getPromptCacheTtl(promptCacheProvider)
|
|
924
|
+
),
|
|
878
925
|
},
|
|
879
926
|
],
|
|
880
927
|
} as BaseMessageFields);
|
|
@@ -883,7 +930,7 @@ export class AgentContext {
|
|
|
883
930
|
if (this.hasBedrockPromptCache() && stableInstructions) {
|
|
884
931
|
const content: AgentSystemContentBlock[] = [
|
|
885
932
|
{ type: 'text', text: stableInstructions },
|
|
886
|
-
{ cachePoint:
|
|
933
|
+
{ cachePoint: buildBedrockCachePoint(this.getBedrockPromptCacheTtl()) },
|
|
887
934
|
];
|
|
888
935
|
if (dynamicInstructions) {
|
|
889
936
|
content.push({ type: 'text', text: dynamicInstructions });
|
|
@@ -187,7 +187,11 @@ function addLatestUserOnlyAnthropicCacheControl(
|
|
|
187
187
|
if (!modified) {
|
|
188
188
|
continue;
|
|
189
189
|
}
|
|
190
|
-
} else if (
|
|
190
|
+
} else if (
|
|
191
|
+
typeof content === 'string' &&
|
|
192
|
+
content.trim() !== '' &&
|
|
193
|
+
canAddCache
|
|
194
|
+
) {
|
|
191
195
|
workingContent = [
|
|
192
196
|
{
|
|
193
197
|
type: 'text',
|
|
@@ -348,7 +352,7 @@ describeIfLive('AgentContext Anthropic prompt cache live API', () => {
|
|
|
348
352
|
{
|
|
349
353
|
type: 'text',
|
|
350
354
|
text: stableInstructions,
|
|
351
|
-
cache_control: { type: 'ephemeral' },
|
|
355
|
+
cache_control: { type: 'ephemeral', ttl: '1h' },
|
|
352
356
|
},
|
|
353
357
|
],
|
|
354
358
|
});
|
|
@@ -279,7 +279,11 @@ function addLatestUserOnlyBedrockCacheControl(
|
|
|
279
279
|
if (!modified) {
|
|
280
280
|
continue;
|
|
281
281
|
}
|
|
282
|
-
} else if (
|
|
282
|
+
} else if (
|
|
283
|
+
typeof content === 'string' &&
|
|
284
|
+
content.trim() !== '' &&
|
|
285
|
+
canAddCache
|
|
286
|
+
) {
|
|
283
287
|
workingContent = [
|
|
284
288
|
{ type: 'text', text: content } as MessageContentComplex,
|
|
285
289
|
cachePointBlock(),
|
|
@@ -490,7 +494,7 @@ describeIfLive('AgentContext Bedrock prompt cache live API', () => {
|
|
|
490
494
|
text: stableInstructions,
|
|
491
495
|
},
|
|
492
496
|
{
|
|
493
|
-
cachePoint: { type: 'default' },
|
|
497
|
+
cachePoint: { type: 'default', ttl: '1h' },
|
|
494
498
|
},
|
|
495
499
|
{
|
|
496
500
|
type: 'text',
|
|
@@ -643,9 +647,7 @@ describeIfLive('AgentContext Bedrock prompt cache live API', () => {
|
|
|
643
647
|
})}\n`
|
|
644
648
|
);
|
|
645
649
|
|
|
646
|
-
expect(currentSecond.cacheRead).toBeGreaterThan(
|
|
647
|
-
latestOnlySecond.cacheRead
|
|
648
|
-
);
|
|
650
|
+
expect(currentSecond.cacheRead).toBeGreaterThan(latestOnlySecond.cacheRead);
|
|
649
651
|
expect(currentSecond.cacheCreation).toBeLessThan(
|
|
650
652
|
latestOnlySecond.cacheCreation
|
|
651
653
|
);
|
|
@@ -7,8 +7,12 @@ import { AgentContext } from '../AgentContext';
|
|
|
7
7
|
|
|
8
8
|
describe('AgentContext', () => {
|
|
9
9
|
type TestSystemContentBlock =
|
|
10
|
-
| {
|
|
11
|
-
|
|
10
|
+
| {
|
|
11
|
+
type: 'text';
|
|
12
|
+
text: string;
|
|
13
|
+
cache_control?: { type: 'ephemeral'; ttl?: '1h' };
|
|
14
|
+
}
|
|
15
|
+
| { cachePoint: { type: 'default'; ttl?: '1h' } };
|
|
12
16
|
|
|
13
17
|
type ContextOptions = {
|
|
14
18
|
agentConfig?: Partial<t.AgentInputs>;
|
|
@@ -98,7 +102,7 @@ describe('AgentContext', () => {
|
|
|
98
102
|
{
|
|
99
103
|
type: 'text',
|
|
100
104
|
text: 'Stable instructions',
|
|
101
|
-
cache_control: { type: 'ephemeral' },
|
|
105
|
+
cache_control: { type: 'ephemeral', ttl: '1h' },
|
|
102
106
|
},
|
|
103
107
|
]);
|
|
104
108
|
expect(result[1].content).toBe('Hello');
|
|
@@ -164,7 +168,7 @@ describe('AgentContext', () => {
|
|
|
164
168
|
agentConfig: {
|
|
165
169
|
provider: Providers.BEDROCK,
|
|
166
170
|
clientOptions: {
|
|
167
|
-
model: 'anthropic.claude-
|
|
171
|
+
model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
|
|
168
172
|
promptCache: true,
|
|
169
173
|
},
|
|
170
174
|
instructions: 'Stable instructions',
|
|
@@ -176,7 +180,7 @@ describe('AgentContext', () => {
|
|
|
176
180
|
const content = result[0].content as TestSystemContentBlock[];
|
|
177
181
|
expect(content).toEqual([
|
|
178
182
|
{ type: 'text', text: 'Stable instructions' },
|
|
179
|
-
{ cachePoint: { type: 'default' } },
|
|
183
|
+
{ cachePoint: { type: 'default', ttl: '1h' } },
|
|
180
184
|
{ type: 'text', text: 'Dynamic instructions' },
|
|
181
185
|
]);
|
|
182
186
|
});
|
|
@@ -186,7 +190,7 @@ describe('AgentContext', () => {
|
|
|
186
190
|
agentConfig: {
|
|
187
191
|
provider: Providers.BEDROCK,
|
|
188
192
|
clientOptions: {
|
|
189
|
-
model: 'anthropic.claude-
|
|
193
|
+
model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
|
|
190
194
|
promptCache: true,
|
|
191
195
|
},
|
|
192
196
|
instructions: undefined,
|
|
@@ -240,7 +244,7 @@ describe('AgentContext', () => {
|
|
|
240
244
|
{
|
|
241
245
|
type: 'text',
|
|
242
246
|
text: 'Stable instructions',
|
|
243
|
-
cache_control: { type: 'ephemeral' },
|
|
247
|
+
cache_control: { type: 'ephemeral', ttl: '1h' },
|
|
244
248
|
},
|
|
245
249
|
]);
|
|
246
250
|
expect(result[1].content).toBe('Hello');
|
|
@@ -730,7 +734,7 @@ describe('AgentContext', () => {
|
|
|
730
734
|
agentConfig: {
|
|
731
735
|
provider: Providers.BEDROCK,
|
|
732
736
|
clientOptions: {
|
|
733
|
-
model: 'anthropic.claude-
|
|
737
|
+
model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
|
|
734
738
|
promptCache: true,
|
|
735
739
|
},
|
|
736
740
|
instructions: 'Stable instructions',
|
|
@@ -741,10 +745,12 @@ describe('AgentContext', () => {
|
|
|
741
745
|
const result = await ctx.systemRunnable!.invoke([
|
|
742
746
|
new HumanMessage('Hello'),
|
|
743
747
|
]);
|
|
744
|
-
|
|
748
|
+
// The graph applies the same resolved TTL it stamped on the system
|
|
749
|
+
// checkpoint, so the 1h system cachePoint is preserved (normalized to 1h).
|
|
750
|
+
const finalMessages = addBedrockCacheControl(result, '1h');
|
|
745
751
|
expect(finalMessages[0].content).toEqual([
|
|
746
752
|
{ type: 'text', text: 'Stable instructions' },
|
|
747
|
-
{ cachePoint: { type: 'default' } },
|
|
753
|
+
{ cachePoint: { type: 'default', ttl: '1h' } },
|
|
748
754
|
{ type: 'text', text: 'Dynamic instructions' },
|
|
749
755
|
]);
|
|
750
756
|
});
|
|
@@ -2154,7 +2160,7 @@ describe('AgentContext', () => {
|
|
|
2154
2160
|
const buildBranch = (
|
|
2155
2161
|
maxContextTokens: number,
|
|
2156
2162
|
perMessageTokens: number,
|
|
2157
|
-
count: number
|
|
2163
|
+
count: number
|
|
2158
2164
|
): { ctx: AgentContext; messages: AIMessage[] } => {
|
|
2159
2165
|
const ctx = createBasicContext({ tokenCounter: countByChars });
|
|
2160
2166
|
ctx.maxContextTokens = maxContextTokens;
|
|
@@ -2166,7 +2172,7 @@ describe('AgentContext', () => {
|
|
|
2166
2172
|
messages.push(
|
|
2167
2173
|
i % 2 === 0
|
|
2168
2174
|
? (new HumanMessage(content) as unknown as AIMessage)
|
|
2169
|
-
: new AIMessage(content)
|
|
2175
|
+
: new AIMessage(content)
|
|
2170
2176
|
);
|
|
2171
2177
|
}
|
|
2172
2178
|
return { ctx, messages };
|
|
@@ -2175,7 +2181,9 @@ describe('AgentContext', () => {
|
|
|
2175
2181
|
it('returns null without a tokenizer or a window', () => {
|
|
2176
2182
|
const noCounter = createBasicContext({});
|
|
2177
2183
|
noCounter.maxContextTokens = 1000;
|
|
2178
|
-
expect(
|
|
2184
|
+
expect(
|
|
2185
|
+
noCounter.projectContextUsage([new HumanMessage('hi')])
|
|
2186
|
+
).toBeNull();
|
|
2179
2187
|
|
|
2180
2188
|
const noWindow = createBasicContext({ tokenCounter: countByChars });
|
|
2181
2189
|
noWindow.maxContextTokens = undefined;
|
|
@@ -2207,7 +2215,9 @@ describe('AgentContext', () => {
|
|
|
2207
2215
|
expect(usage!.remainingContextTokens).toBeGreaterThanOrEqual(0);
|
|
2208
2216
|
|
|
2209
2217
|
const max = usage!.contextBudget ?? usage!.breakdown.maxContextTokens;
|
|
2210
|
-
expect(max - (usage!.remainingContextTokens ?? 0)).toBeLessThanOrEqual(
|
|
2218
|
+
expect(max - (usage!.remainingContextTokens ?? 0)).toBeLessThanOrEqual(
|
|
2219
|
+
max
|
|
2220
|
+
);
|
|
2211
2221
|
});
|
|
2212
2222
|
|
|
2213
2223
|
it('does not mutate the context (local pruner, no field writes)', () => {
|
|
@@ -2245,7 +2255,7 @@ describe('AgentContext', () => {
|
|
|
2245
2255
|
|
|
2246
2256
|
expect(messages[2]).toBe(originalRef);
|
|
2247
2257
|
expect((messages[2] as unknown as ToolMessage).content).toBe(
|
|
2248
|
-
originalContent
|
|
2258
|
+
originalContent
|
|
2249
2259
|
);
|
|
2250
2260
|
});
|
|
2251
2261
|
|
|
@@ -2257,7 +2267,9 @@ describe('AgentContext', () => {
|
|
|
2257
2267
|
ctx.indexTokenCountMap = {};
|
|
2258
2268
|
const messages: AIMessage[] = [];
|
|
2259
2269
|
for (let i = 0; i < 6; i++) {
|
|
2260
|
-
messages.push(
|
|
2270
|
+
messages.push(
|
|
2271
|
+
new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage
|
|
2272
|
+
);
|
|
2261
2273
|
}
|
|
2262
2274
|
|
|
2263
2275
|
const usage = ctx.projectContextUsage(messages);
|
|
@@ -2285,7 +2297,7 @@ describe('AgentContext', () => {
|
|
|
2285
2297
|
const ctx = createBasicContext({ tokenCounter: countByChars });
|
|
2286
2298
|
ctx.maxContextTokens = 5_000;
|
|
2287
2299
|
const messages: AIMessage[] = [0, 1, 2].map(
|
|
2288
|
-
() => new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage
|
|
2300
|
+
() => new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage
|
|
2289
2301
|
);
|
|
2290
2302
|
return { ctx, messages };
|
|
2291
2303
|
};
|
|
@@ -2303,7 +2315,7 @@ describe('AgentContext', () => {
|
|
|
2303
2315
|
const dirtyUsage = dirty.ctx.projectContextUsage(dirty.messages);
|
|
2304
2316
|
|
|
2305
2317
|
expect(dirtyUsage!.remainingContextTokens).toBe(
|
|
2306
|
-
cleanUsage!.remainingContextTokens
|
|
2318
|
+
cleanUsage!.remainingContextTokens
|
|
2307
2319
|
);
|
|
2308
2320
|
expect(dirtyUsage!.calibrationRatio).toBe(cleanUsage!.calibrationRatio);
|
|
2309
2321
|
});
|
|
@@ -2350,7 +2362,7 @@ describe('AgentContext', () => {
|
|
|
2350
2362
|
|
|
2351
2363
|
expect(scaledUsage!.calibrationRatio).toBe(3);
|
|
2352
2364
|
expect(scaledUsage!.remainingContextTokens).not.toBe(
|
|
2353
|
-
baseUsage!.remainingContextTokens
|
|
2365
|
+
baseUsage!.remainingContextTokens
|
|
2354
2366
|
);
|
|
2355
2367
|
});
|
|
2356
2368
|
|
|
@@ -15,8 +15,12 @@ type LivePromptCacheProvider =
|
|
|
15
15
|
| Providers.OPENROUTER;
|
|
16
16
|
|
|
17
17
|
type PromptCacheExpectedSystemBlock =
|
|
18
|
-
| {
|
|
19
|
-
|
|
18
|
+
| {
|
|
19
|
+
type: 'text';
|
|
20
|
+
text: string;
|
|
21
|
+
cache_control?: { type: 'ephemeral'; ttl?: '1h' };
|
|
22
|
+
}
|
|
23
|
+
| { cachePoint: { type: 'default'; ttl?: '1h' } };
|
|
20
24
|
|
|
21
25
|
type LivePromptCacheClientOptions =
|
|
22
26
|
| t.ClientOptions
|
package/src/graphs/Graph.ts
CHANGED
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
createPruneMessages,
|
|
28
28
|
syncBudgetDerivedFields,
|
|
29
29
|
addTailCacheControl,
|
|
30
|
+
resolvePromptCacheTtl,
|
|
30
31
|
getMessageId,
|
|
31
32
|
makeIsDeferred,
|
|
32
33
|
partitionAndMarkAnthropicToolCache,
|
|
@@ -1404,7 +1405,14 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1404
1405
|
toolsForBinding =
|
|
1405
1406
|
partitionAndMarkAnthropicToolCache(
|
|
1406
1407
|
rawToolsForBinding,
|
|
1407
|
-
makeIsDeferred(agentContext.toolDefinitions)
|
|
1408
|
+
makeIsDeferred(agentContext.toolDefinitions),
|
|
1409
|
+
resolvePromptCacheTtl(
|
|
1410
|
+
(
|
|
1411
|
+
agentContext.clientOptions as
|
|
1412
|
+
| t.AnthropicClientOptions
|
|
1413
|
+
| undefined
|
|
1414
|
+
)?.promptCacheTtl
|
|
1415
|
+
)
|
|
1408
1416
|
) ?? rawToolsForBinding;
|
|
1409
1417
|
} else if (
|
|
1410
1418
|
agentContext.provider === Providers.OPENROUTER &&
|
|
@@ -1417,7 +1425,14 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1417
1425
|
toolsForBinding =
|
|
1418
1426
|
partitionAndMarkOpenRouterToolCache(
|
|
1419
1427
|
rawToolsForBinding,
|
|
1420
|
-
makeIsDeferred(agentContext.toolDefinitions)
|
|
1428
|
+
makeIsDeferred(agentContext.toolDefinitions),
|
|
1429
|
+
resolvePromptCacheTtl(
|
|
1430
|
+
(
|
|
1431
|
+
agentContext.clientOptions as
|
|
1432
|
+
| t.ProviderOptionsMap[Providers.OPENROUTER]
|
|
1433
|
+
| undefined
|
|
1434
|
+
)?.promptCacheTtl
|
|
1435
|
+
)
|
|
1421
1436
|
) ?? rawToolsForBinding;
|
|
1422
1437
|
} else if (
|
|
1423
1438
|
agentContext.provider === Providers.BEDROCK &&
|
|
@@ -1833,9 +1848,30 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1833
1848
|
(anthropicPromptCacheEnabled || openRouterPromptCacheEnabled) &&
|
|
1834
1849
|
!agentContext.systemRunnable
|
|
1835
1850
|
) {
|
|
1836
|
-
finalMessages = addTailCacheControl<BaseMessage>(
|
|
1851
|
+
finalMessages = addTailCacheControl<BaseMessage>(
|
|
1852
|
+
finalMessages,
|
|
1853
|
+
resolvePromptCacheTtl(
|
|
1854
|
+
anthropicPromptCacheEnabled
|
|
1855
|
+
? (
|
|
1856
|
+
agentContext.clientOptions as
|
|
1857
|
+
| t.AnthropicClientOptions
|
|
1858
|
+
| undefined
|
|
1859
|
+
)?.promptCacheTtl
|
|
1860
|
+
: (
|
|
1861
|
+
agentContext.clientOptions as
|
|
1862
|
+
| t.ProviderOptionsMap[Providers.OPENROUTER]
|
|
1863
|
+
| undefined
|
|
1864
|
+
)?.promptCacheTtl
|
|
1865
|
+
)
|
|
1866
|
+
);
|
|
1837
1867
|
} else if (bedrockPromptCacheEnabled) {
|
|
1838
|
-
|
|
1868
|
+
const bedrockOptions = agentContext.clientOptions as
|
|
1869
|
+
| t.BedrockAnthropicClientOptions
|
|
1870
|
+
| undefined;
|
|
1871
|
+
finalMessages = addBedrockTailCacheControl<BaseMessage>(
|
|
1872
|
+
finalMessages,
|
|
1873
|
+
resolvePromptCacheTtl(bedrockOptions?.promptCacheTtl)
|
|
1874
|
+
);
|
|
1839
1875
|
}
|
|
1840
1876
|
|
|
1841
1877
|
if (
|
|
@@ -978,11 +978,16 @@ const NON_CACHEABLE_PAYLOAD_BLOCK_TYPES = new Set([
|
|
|
978
978
|
* skipped. Returns a new array only when it actually places a marker.
|
|
979
979
|
*/
|
|
980
980
|
function reanchorTailCacheControl(
|
|
981
|
-
messages: AnthropicMessageCreateParams['messages']
|
|
981
|
+
messages: AnthropicMessageCreateParams['messages'],
|
|
982
|
+
ttl?: '1h'
|
|
982
983
|
): AnthropicMessageCreateParams['messages'] {
|
|
983
984
|
if (messages.length === 0) {
|
|
984
985
|
return messages;
|
|
985
986
|
}
|
|
987
|
+
const cacheControl =
|
|
988
|
+
ttl === '1h'
|
|
989
|
+
? ({ type: 'ephemeral', ttl: '1h' } as const)
|
|
990
|
+
: ({ type: 'ephemeral' } as const);
|
|
986
991
|
const lastIndex = messages.length - 1;
|
|
987
992
|
const tail = messages[lastIndex];
|
|
988
993
|
const content = tail.content;
|
|
@@ -994,9 +999,7 @@ function reanchorTailCacheControl(
|
|
|
994
999
|
const next = [...messages];
|
|
995
1000
|
next[lastIndex] = {
|
|
996
1001
|
...tail,
|
|
997
|
-
content: [
|
|
998
|
-
{ type: 'text', text: content, cache_control: { type: 'ephemeral' } },
|
|
999
|
-
],
|
|
1002
|
+
content: [{ type: 'text', text: content, cache_control: cacheControl }],
|
|
1000
1003
|
} as (typeof messages)[number];
|
|
1001
1004
|
return next;
|
|
1002
1005
|
}
|
|
@@ -1027,12 +1030,36 @@ function reanchorTailCacheControl(
|
|
|
1027
1030
|
next[lastIndex] = {
|
|
1028
1031
|
...tail,
|
|
1029
1032
|
content: content.map((block, i) =>
|
|
1030
|
-
i === anchor ? { ...block, cache_control:
|
|
1033
|
+
i === anchor ? { ...block, cache_control: cacheControl } : block
|
|
1031
1034
|
),
|
|
1032
1035
|
} as (typeof messages)[number];
|
|
1033
1036
|
return next;
|
|
1034
1037
|
}
|
|
1035
1038
|
|
|
1039
|
+
/**
|
|
1040
|
+
* Find the extended-cache TTL (`'1h'`) carried by an existing `cache_control`
|
|
1041
|
+
* breakpoint, so {@link reanchorTailCacheControl} can re-apply the same TTL the
|
|
1042
|
+
* stripped prefill had. Returns `undefined` for the legacy 5-minute default
|
|
1043
|
+
* (no `ttl`), keeping that path byte-identical to before.
|
|
1044
|
+
*/
|
|
1045
|
+
function findCacheControlTtl(
|
|
1046
|
+
messages: AnthropicMessageCreateParams['messages']
|
|
1047
|
+
): '1h' | undefined {
|
|
1048
|
+
for (const message of messages) {
|
|
1049
|
+
if (!Array.isArray(message.content)) {
|
|
1050
|
+
continue;
|
|
1051
|
+
}
|
|
1052
|
+
for (const block of message.content) {
|
|
1053
|
+
const cacheControl = (block as { cache_control?: { ttl?: unknown } })
|
|
1054
|
+
.cache_control;
|
|
1055
|
+
if (cacheControl?.ttl === '1h') {
|
|
1056
|
+
return '1h';
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
return undefined;
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1036
1063
|
export function stripUnsupportedAssistantPrefill<
|
|
1037
1064
|
T extends Pick<AnthropicMessageCreateParams, 'messages'> & { model?: string },
|
|
1038
1065
|
>(request: T): T {
|
|
@@ -1065,7 +1092,7 @@ export function stripUnsupportedAssistantPrefill<
|
|
|
1065
1092
|
const reanchored =
|
|
1066
1093
|
messagesHaveCacheControl(messages) &&
|
|
1067
1094
|
!messagesHaveCacheControl(nextMessages)
|
|
1068
|
-
? reanchorTailCacheControl(nextMessages)
|
|
1095
|
+
? reanchorTailCacheControl(nextMessages, findCacheControlTtl(messages))
|
|
1069
1096
|
: nextMessages;
|
|
1070
1097
|
|
|
1071
1098
|
return {
|
package/src/llm/bedrock/index.ts
CHANGED
|
@@ -39,6 +39,7 @@ import {
|
|
|
39
39
|
handleConverseStreamContentBlockDelta,
|
|
40
40
|
handleConverseStreamMetadata,
|
|
41
41
|
} from './utils';
|
|
42
|
+
import { resolvePromptCacheTtl, type PromptCacheTtl } from '@/messages/cache';
|
|
42
43
|
import { insertBedrockToolCachePoint } from './toolCache';
|
|
43
44
|
|
|
44
45
|
/**
|
|
@@ -63,6 +64,15 @@ export interface CustomChatBedrockConverseInput
|
|
|
63
64
|
*/
|
|
64
65
|
promptCache?: boolean;
|
|
65
66
|
|
|
67
|
+
/**
|
|
68
|
+
* Prompt-cache checkpoint TTL. Defaults to `'1h'` (extended cache) when
|
|
69
|
+
* `promptCache` is enabled; set `'5m'` for the legacy 5-minute behavior.
|
|
70
|
+
* Bedrock models that don't support the 1-hour TTL downgrade to 5m
|
|
71
|
+
* server-side (verified on Sonnet/Opus 4.6), so the default is safe to leave
|
|
72
|
+
* on; use `'5m'` for any model that rejects it.
|
|
73
|
+
*/
|
|
74
|
+
promptCacheTtl?: PromptCacheTtl;
|
|
75
|
+
|
|
66
76
|
/**
|
|
67
77
|
* Guardrail configuration for Converse and ConverseStream invocations.
|
|
68
78
|
* `streamProcessingMode` is only used by ConverseStream.
|
|
@@ -109,6 +119,11 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
109
119
|
*/
|
|
110
120
|
promptCache?: boolean;
|
|
111
121
|
|
|
122
|
+
/**
|
|
123
|
+
* Prompt-cache checkpoint TTL (`'5m'` legacy or `'1h'` extended cache).
|
|
124
|
+
*/
|
|
125
|
+
promptCacheTtl?: PromptCacheTtl;
|
|
126
|
+
|
|
112
127
|
/**
|
|
113
128
|
* Application Inference Profile ARN to use instead of model ID.
|
|
114
129
|
*/
|
|
@@ -122,6 +137,7 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
122
137
|
constructor(fields?: CustomChatBedrockConverseInput) {
|
|
123
138
|
super(fields);
|
|
124
139
|
this.promptCache = fields?.promptCache;
|
|
140
|
+
this.promptCacheTtl = fields?.promptCacheTtl;
|
|
125
141
|
this.applicationInferenceProfile = fields?.applicationInferenceProfile;
|
|
126
142
|
this.serviceTier = fields?.serviceTier;
|
|
127
143
|
}
|
|
@@ -149,7 +165,11 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
149
165
|
const baseParams = super.invocationParams(options);
|
|
150
166
|
const toolConfig =
|
|
151
167
|
this.promptCache === true
|
|
152
|
-
? insertBedrockToolCachePoint(
|
|
168
|
+
? insertBedrockToolCachePoint(
|
|
169
|
+
baseParams.toolConfig,
|
|
170
|
+
true,
|
|
171
|
+
resolvePromptCacheTtl(this.promptCacheTtl)
|
|
172
|
+
)
|
|
153
173
|
: baseParams.toolConfig;
|
|
154
174
|
|
|
155
175
|
/** Service tier from options or fall back to class-level setting */
|
|
@@ -397,6 +397,67 @@ describe('CustomChatBedrockConverse', () => {
|
|
|
397
397
|
]);
|
|
398
398
|
});
|
|
399
399
|
|
|
400
|
+
test('defaults the tool cache point to the 1h extended TTL', () => {
|
|
401
|
+
const model = new CustomChatBedrockConverse({
|
|
402
|
+
...baseConstructorArgs,
|
|
403
|
+
promptCache: true,
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
const params = model.invocationParams({
|
|
407
|
+
tools: [
|
|
408
|
+
{
|
|
409
|
+
type: 'function',
|
|
410
|
+
function: {
|
|
411
|
+
name: 'direct_tool',
|
|
412
|
+
description: 'Direct tool',
|
|
413
|
+
parameters: { type: 'object', properties: {} },
|
|
414
|
+
},
|
|
415
|
+
},
|
|
416
|
+
],
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
const toolList = (params.toolConfig?.tools ?? []) as unknown as Array<
|
|
420
|
+
Record<string, unknown>
|
|
421
|
+
>;
|
|
422
|
+
const cachePoints = toolList.filter((t) => 'cachePoint' in t);
|
|
423
|
+
expect(cachePoints).toHaveLength(1);
|
|
424
|
+
expect((cachePoints[0] as { cachePoint: unknown }).cachePoint).toEqual({
|
|
425
|
+
type: 'default',
|
|
426
|
+
ttl: '1h',
|
|
427
|
+
});
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
test('honors an explicit 5m promptCacheTtl on the tool cache point', () => {
|
|
431
|
+
const model = new CustomChatBedrockConverse({
|
|
432
|
+
...baseConstructorArgs,
|
|
433
|
+
promptCache: true,
|
|
434
|
+
promptCacheTtl: '5m',
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
const params = model.invocationParams({
|
|
438
|
+
tools: [
|
|
439
|
+
{
|
|
440
|
+
type: 'function',
|
|
441
|
+
function: {
|
|
442
|
+
name: 'direct_tool',
|
|
443
|
+
description: 'Direct tool',
|
|
444
|
+
parameters: { type: 'object', properties: {} },
|
|
445
|
+
},
|
|
446
|
+
},
|
|
447
|
+
],
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
const toolList = (params.toolConfig?.tools ?? []) as unknown as Array<
|
|
451
|
+
Record<string, unknown>
|
|
452
|
+
>;
|
|
453
|
+
const cachePoints = toolList.filter((t) => 'cachePoint' in t);
|
|
454
|
+
expect(cachePoints).toHaveLength(1);
|
|
455
|
+
// 5m omits the ttl field (provider default).
|
|
456
|
+
expect((cachePoints[0] as { cachePoint: unknown }).cachePoint).toEqual({
|
|
457
|
+
type: 'default',
|
|
458
|
+
});
|
|
459
|
+
});
|
|
460
|
+
|
|
400
461
|
test('adds the Bedrock cache point before deferred tools', () => {
|
|
401
462
|
const model = new CustomChatBedrockConverse({
|
|
402
463
|
...baseConstructorArgs,
|