@librechat/agents 3.2.38 → 3.2.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +25 -8
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +7 -4
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/hooks/createWorkspacePolicyHook.cjs +4 -3
- package/dist/cjs/hooks/createWorkspacePolicyHook.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +20 -4
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +7 -1
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/toolCache.cjs +5 -4
- package/dist/cjs/llm/bedrock/toolCache.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +34 -17
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +1 -0
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/toolCache.cjs +18 -5
- package/dist/cjs/llm/openrouter/toolCache.cjs.map +1 -1
- package/dist/cjs/main.cjs +4 -0
- package/dist/cjs/messages/anthropicToolCache.cjs +75 -13
- package/dist/cjs/messages/anthropicToolCache.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +91 -35
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +3 -2
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/ReadFile.cjs +2 -2
- package/dist/cjs/tools/ReadFile.cjs.map +1 -1
- package/dist/cjs/tools/cloudflare/CloudflareProgrammaticToolCalling.cjs +11 -11
- package/dist/cjs/tools/cloudflare/CloudflareProgrammaticToolCalling.cjs.map +1 -1
- package/dist/cjs/tools/local/LocalCodingTools.cjs +11 -11
- package/dist/cjs/tools/local/LocalCodingTools.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +26 -9
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +8 -5
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/hooks/createWorkspacePolicyHook.mjs +4 -3
- package/dist/esm/hooks/createWorkspacePolicyHook.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +20 -4
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +7 -1
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/toolCache.mjs +5 -4
- package/dist/esm/llm/bedrock/toolCache.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +34 -17
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +1 -0
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/toolCache.mjs +18 -5
- package/dist/esm/llm/openrouter/toolCache.mjs.map +1 -1
- package/dist/esm/main.mjs +2 -2
- package/dist/esm/messages/anthropicToolCache.mjs +75 -13
- package/dist/esm/messages/anthropicToolCache.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +88 -36
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +4 -3
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/ReadFile.mjs +2 -2
- package/dist/esm/tools/ReadFile.mjs.map +1 -1
- package/dist/esm/tools/cloudflare/CloudflareProgrammaticToolCalling.mjs +11 -11
- package/dist/esm/tools/cloudflare/CloudflareProgrammaticToolCalling.mjs.map +1 -1
- package/dist/esm/tools/local/LocalCodingTools.mjs +11 -11
- package/dist/esm/tools/local/LocalCodingTools.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +11 -0
- package/dist/types/agents/__tests__/promptCacheLiveHelpers.d.ts +2 -0
- package/dist/types/llm/bedrock/index.d.ts +13 -0
- package/dist/types/llm/bedrock/toolCache.d.ts +2 -1
- package/dist/types/llm/openrouter/index.d.ts +8 -0
- package/dist/types/llm/openrouter/toolCache.d.ts +2 -1
- package/dist/types/messages/anthropicToolCache.d.ts +2 -1
- package/dist/types/messages/cache.d.ts +49 -5
- package/dist/types/tools/ReadFile.d.ts +4 -4
- package/dist/types/types/llm.d.ts +14 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +64 -17
- package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +6 -2
- package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +7 -5
- package/src/agents/__tests__/AgentContext.openrouter.live.test.ts +1 -1
- package/src/agents/__tests__/AgentContext.test.ts +31 -19
- package/src/agents/__tests__/promptCacheLiveHelpers.ts +6 -2
- package/src/graphs/Graph.ts +40 -4
- package/src/hooks/__tests__/createWorkspacePolicyHook.test.ts +12 -12
- package/src/hooks/createWorkspacePolicyHook.ts +7 -6
- package/src/llm/anthropic/utils/message_inputs.ts +33 -6
- package/src/llm/bedrock/index.ts +21 -1
- package/src/llm/bedrock/llm.spec.ts +61 -0
- package/src/llm/bedrock/toolCache.test.ts +24 -0
- package/src/llm/bedrock/toolCache.ts +12 -7
- package/src/llm/bedrock/utils/message_inputs.ts +57 -40
- package/src/llm/openrouter/index.ts +9 -0
- package/src/llm/openrouter/toolCache.test.ts +52 -1
- package/src/llm/openrouter/toolCache.ts +40 -6
- package/src/messages/__tests__/anthropicToolCache.test.ts +168 -0
- package/src/messages/anthropicToolCache.ts +118 -15
- package/src/messages/cache.test.ts +175 -0
- package/src/messages/cache.ts +133 -48
- package/src/summarization/node.ts +21 -2
- package/src/tools/ReadFile.ts +2 -2
- package/src/tools/__tests__/LocalExecutionTools.test.ts +25 -25
- package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +5 -5
- package/src/tools/__tests__/ReadFile.test.ts +3 -3
- package/src/tools/__tests__/ToolNode.session.test.ts +2 -2
- package/src/tools/__tests__/workspaceSeam.test.ts +2 -2
- package/src/tools/cloudflare/CloudflareProgrammaticToolCalling.ts +11 -11
- package/src/tools/local/LocalCodingTools.ts +14 -14
- package/src/types/llm.ts +14 -0
|
@@ -978,11 +978,16 @@ const NON_CACHEABLE_PAYLOAD_BLOCK_TYPES = new Set([
|
|
|
978
978
|
* skipped. Returns a new array only when it actually places a marker.
|
|
979
979
|
*/
|
|
980
980
|
function reanchorTailCacheControl(
|
|
981
|
-
messages: AnthropicMessageCreateParams['messages']
|
|
981
|
+
messages: AnthropicMessageCreateParams['messages'],
|
|
982
|
+
ttl?: '1h'
|
|
982
983
|
): AnthropicMessageCreateParams['messages'] {
|
|
983
984
|
if (messages.length === 0) {
|
|
984
985
|
return messages;
|
|
985
986
|
}
|
|
987
|
+
const cacheControl =
|
|
988
|
+
ttl === '1h'
|
|
989
|
+
? ({ type: 'ephemeral', ttl: '1h' } as const)
|
|
990
|
+
: ({ type: 'ephemeral' } as const);
|
|
986
991
|
const lastIndex = messages.length - 1;
|
|
987
992
|
const tail = messages[lastIndex];
|
|
988
993
|
const content = tail.content;
|
|
@@ -994,9 +999,7 @@ function reanchorTailCacheControl(
|
|
|
994
999
|
const next = [...messages];
|
|
995
1000
|
next[lastIndex] = {
|
|
996
1001
|
...tail,
|
|
997
|
-
content: [
|
|
998
|
-
{ type: 'text', text: content, cache_control: { type: 'ephemeral' } },
|
|
999
|
-
],
|
|
1002
|
+
content: [{ type: 'text', text: content, cache_control: cacheControl }],
|
|
1000
1003
|
} as (typeof messages)[number];
|
|
1001
1004
|
return next;
|
|
1002
1005
|
}
|
|
@@ -1027,12 +1030,36 @@ function reanchorTailCacheControl(
|
|
|
1027
1030
|
next[lastIndex] = {
|
|
1028
1031
|
...tail,
|
|
1029
1032
|
content: content.map((block, i) =>
|
|
1030
|
-
i === anchor ? { ...block, cache_control:
|
|
1033
|
+
i === anchor ? { ...block, cache_control: cacheControl } : block
|
|
1031
1034
|
),
|
|
1032
1035
|
} as (typeof messages)[number];
|
|
1033
1036
|
return next;
|
|
1034
1037
|
}
|
|
1035
1038
|
|
|
1039
|
+
/**
|
|
1040
|
+
* Find the extended-cache TTL (`'1h'`) carried by an existing `cache_control`
|
|
1041
|
+
* breakpoint, so {@link reanchorTailCacheControl} can re-apply the same TTL the
|
|
1042
|
+
* stripped prefill had. Returns `undefined` for the legacy 5-minute default
|
|
1043
|
+
* (no `ttl`), keeping that path byte-identical to before.
|
|
1044
|
+
*/
|
|
1045
|
+
function findCacheControlTtl(
|
|
1046
|
+
messages: AnthropicMessageCreateParams['messages']
|
|
1047
|
+
): '1h' | undefined {
|
|
1048
|
+
for (const message of messages) {
|
|
1049
|
+
if (!Array.isArray(message.content)) {
|
|
1050
|
+
continue;
|
|
1051
|
+
}
|
|
1052
|
+
for (const block of message.content) {
|
|
1053
|
+
const cacheControl = (block as { cache_control?: { ttl?: unknown } })
|
|
1054
|
+
.cache_control;
|
|
1055
|
+
if (cacheControl?.ttl === '1h') {
|
|
1056
|
+
return '1h';
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
return undefined;
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1036
1063
|
export function stripUnsupportedAssistantPrefill<
|
|
1037
1064
|
T extends Pick<AnthropicMessageCreateParams, 'messages'> & { model?: string },
|
|
1038
1065
|
>(request: T): T {
|
|
@@ -1065,7 +1092,7 @@ export function stripUnsupportedAssistantPrefill<
|
|
|
1065
1092
|
const reanchored =
|
|
1066
1093
|
messagesHaveCacheControl(messages) &&
|
|
1067
1094
|
!messagesHaveCacheControl(nextMessages)
|
|
1068
|
-
? reanchorTailCacheControl(nextMessages)
|
|
1095
|
+
? reanchorTailCacheControl(nextMessages, findCacheControlTtl(messages))
|
|
1069
1096
|
: nextMessages;
|
|
1070
1097
|
|
|
1071
1098
|
return {
|
package/src/llm/bedrock/index.ts
CHANGED
|
@@ -39,6 +39,7 @@ import {
|
|
|
39
39
|
handleConverseStreamContentBlockDelta,
|
|
40
40
|
handleConverseStreamMetadata,
|
|
41
41
|
} from './utils';
|
|
42
|
+
import { resolvePromptCacheTtl, type PromptCacheTtl } from '@/messages/cache';
|
|
42
43
|
import { insertBedrockToolCachePoint } from './toolCache';
|
|
43
44
|
|
|
44
45
|
/**
|
|
@@ -63,6 +64,15 @@ export interface CustomChatBedrockConverseInput
|
|
|
63
64
|
*/
|
|
64
65
|
promptCache?: boolean;
|
|
65
66
|
|
|
67
|
+
/**
|
|
68
|
+
* Prompt-cache checkpoint TTL. Defaults to `'1h'` (extended cache) when
|
|
69
|
+
* `promptCache` is enabled; set `'5m'` for the legacy 5-minute behavior.
|
|
70
|
+
* Bedrock models that don't support the 1-hour TTL downgrade to 5m
|
|
71
|
+
* server-side (verified on Sonnet/Opus 4.6), so the default is safe to leave
|
|
72
|
+
* on; use `'5m'` for any model that rejects it.
|
|
73
|
+
*/
|
|
74
|
+
promptCacheTtl?: PromptCacheTtl;
|
|
75
|
+
|
|
66
76
|
/**
|
|
67
77
|
* Guardrail configuration for Converse and ConverseStream invocations.
|
|
68
78
|
* `streamProcessingMode` is only used by ConverseStream.
|
|
@@ -109,6 +119,11 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
109
119
|
*/
|
|
110
120
|
promptCache?: boolean;
|
|
111
121
|
|
|
122
|
+
/**
|
|
123
|
+
* Prompt-cache checkpoint TTL (`'5m'` legacy or `'1h'` extended cache).
|
|
124
|
+
*/
|
|
125
|
+
promptCacheTtl?: PromptCacheTtl;
|
|
126
|
+
|
|
112
127
|
/**
|
|
113
128
|
* Application Inference Profile ARN to use instead of model ID.
|
|
114
129
|
*/
|
|
@@ -122,6 +137,7 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
122
137
|
constructor(fields?: CustomChatBedrockConverseInput) {
|
|
123
138
|
super(fields);
|
|
124
139
|
this.promptCache = fields?.promptCache;
|
|
140
|
+
this.promptCacheTtl = fields?.promptCacheTtl;
|
|
125
141
|
this.applicationInferenceProfile = fields?.applicationInferenceProfile;
|
|
126
142
|
this.serviceTier = fields?.serviceTier;
|
|
127
143
|
}
|
|
@@ -149,7 +165,11 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
149
165
|
const baseParams = super.invocationParams(options);
|
|
150
166
|
const toolConfig =
|
|
151
167
|
this.promptCache === true
|
|
152
|
-
? insertBedrockToolCachePoint(
|
|
168
|
+
? insertBedrockToolCachePoint(
|
|
169
|
+
baseParams.toolConfig,
|
|
170
|
+
true,
|
|
171
|
+
resolvePromptCacheTtl(this.promptCacheTtl)
|
|
172
|
+
)
|
|
153
173
|
: baseParams.toolConfig;
|
|
154
174
|
|
|
155
175
|
/** Service tier from options or fall back to class-level setting */
|
|
@@ -397,6 +397,67 @@ describe('CustomChatBedrockConverse', () => {
|
|
|
397
397
|
]);
|
|
398
398
|
});
|
|
399
399
|
|
|
400
|
+
test('defaults the tool cache point to the 1h extended TTL', () => {
|
|
401
|
+
const model = new CustomChatBedrockConverse({
|
|
402
|
+
...baseConstructorArgs,
|
|
403
|
+
promptCache: true,
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
const params = model.invocationParams({
|
|
407
|
+
tools: [
|
|
408
|
+
{
|
|
409
|
+
type: 'function',
|
|
410
|
+
function: {
|
|
411
|
+
name: 'direct_tool',
|
|
412
|
+
description: 'Direct tool',
|
|
413
|
+
parameters: { type: 'object', properties: {} },
|
|
414
|
+
},
|
|
415
|
+
},
|
|
416
|
+
],
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
const toolList = (params.toolConfig?.tools ?? []) as unknown as Array<
|
|
420
|
+
Record<string, unknown>
|
|
421
|
+
>;
|
|
422
|
+
const cachePoints = toolList.filter((t) => 'cachePoint' in t);
|
|
423
|
+
expect(cachePoints).toHaveLength(1);
|
|
424
|
+
expect((cachePoints[0] as { cachePoint: unknown }).cachePoint).toEqual({
|
|
425
|
+
type: 'default',
|
|
426
|
+
ttl: '1h',
|
|
427
|
+
});
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
test('honors an explicit 5m promptCacheTtl on the tool cache point', () => {
|
|
431
|
+
const model = new CustomChatBedrockConverse({
|
|
432
|
+
...baseConstructorArgs,
|
|
433
|
+
promptCache: true,
|
|
434
|
+
promptCacheTtl: '5m',
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
const params = model.invocationParams({
|
|
438
|
+
tools: [
|
|
439
|
+
{
|
|
440
|
+
type: 'function',
|
|
441
|
+
function: {
|
|
442
|
+
name: 'direct_tool',
|
|
443
|
+
description: 'Direct tool',
|
|
444
|
+
parameters: { type: 'object', properties: {} },
|
|
445
|
+
},
|
|
446
|
+
},
|
|
447
|
+
],
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
const toolList = (params.toolConfig?.tools ?? []) as unknown as Array<
|
|
451
|
+
Record<string, unknown>
|
|
452
|
+
>;
|
|
453
|
+
const cachePoints = toolList.filter((t) => 'cachePoint' in t);
|
|
454
|
+
expect(cachePoints).toHaveLength(1);
|
|
455
|
+
// 5m omits the ttl field (provider default).
|
|
456
|
+
expect((cachePoints[0] as { cachePoint: unknown }).cachePoint).toEqual({
|
|
457
|
+
type: 'default',
|
|
458
|
+
});
|
|
459
|
+
});
|
|
460
|
+
|
|
400
461
|
test('adds the Bedrock cache point before deferred tools', () => {
|
|
401
462
|
const model = new CustomChatBedrockConverse({
|
|
402
463
|
...baseConstructorArgs,
|
|
@@ -167,4 +167,28 @@ describe('partitionAndMarkBedrockToolCache', () => {
|
|
|
167
167
|
'described_tool description'
|
|
168
168
|
);
|
|
169
169
|
});
|
|
170
|
+
|
|
171
|
+
it('normalizes an existing tool cache point to the resolved 1h ttl', () => {
|
|
172
|
+
const result = insertBedrockToolCachePoint(
|
|
173
|
+
{
|
|
174
|
+
tools: [
|
|
175
|
+
{
|
|
176
|
+
toolSpec: {
|
|
177
|
+
name: 'direct_tool',
|
|
178
|
+
description: 'Direct tool',
|
|
179
|
+
inputSchema: { json: { type: 'object', properties: {} } },
|
|
180
|
+
},
|
|
181
|
+
},
|
|
182
|
+
{ cachePoint: { type: 'default' } },
|
|
183
|
+
] as Tool[],
|
|
184
|
+
},
|
|
185
|
+
false,
|
|
186
|
+
'1h'
|
|
187
|
+
);
|
|
188
|
+
const cachePoints = (result?.tools ?? []).filter(
|
|
189
|
+
(t): t is Tool.CachePointMember => 'cachePoint' in t
|
|
190
|
+
);
|
|
191
|
+
expect(cachePoints).toHaveLength(1);
|
|
192
|
+
expect(cachePoints[0].cachePoint).toEqual({ type: 'default', ttl: '1h' });
|
|
193
|
+
});
|
|
170
194
|
});
|
|
@@ -3,12 +3,9 @@ import type { Tool, ToolConfiguration } from '@aws-sdk/client-bedrock-runtime';
|
|
|
3
3
|
import type { OpenAIClient } from '@langchain/openai';
|
|
4
4
|
import type { DocumentType } from '@smithy/types';
|
|
5
5
|
import type { GraphTools } from '@/types';
|
|
6
|
+
import { buildBedrockCachePoint, type PromptCacheTtl } from '@/messages/cache';
|
|
6
7
|
import { _convertToOpenAITool } from '@/llm/openai';
|
|
7
8
|
|
|
8
|
-
const CACHE_POINT: Tool.CachePointMember = {
|
|
9
|
-
cachePoint: { type: 'default' },
|
|
10
|
-
};
|
|
11
|
-
|
|
12
9
|
const BEDROCK_TOOL_CACHE_MARKER = '__lc_bedrock_cache_point_after';
|
|
13
10
|
const BEDROCK_TOOL_CACHE_DISABLED_MARKER = '__lc_bedrock_skip_tool_cache';
|
|
14
11
|
|
|
@@ -148,13 +145,18 @@ export function partitionAndMarkBedrockToolCache(
|
|
|
148
145
|
|
|
149
146
|
export function insertBedrockToolCachePoint(
|
|
150
147
|
toolConfig: ToolConfiguration | undefined,
|
|
151
|
-
fallbackToEnd: boolean
|
|
148
|
+
fallbackToEnd: boolean,
|
|
149
|
+
ttl?: PromptCacheTtl
|
|
152
150
|
): ToolConfiguration | undefined {
|
|
153
151
|
const tools = toolConfig?.tools as BedrockToolWithCacheMarker[] | undefined;
|
|
154
152
|
if (tools == null || tools.length === 0) {
|
|
155
153
|
return toolConfig;
|
|
156
154
|
}
|
|
157
155
|
|
|
156
|
+
const cachePoint: Tool.CachePointMember = {
|
|
157
|
+
cachePoint: buildBedrockCachePoint(ttl),
|
|
158
|
+
};
|
|
159
|
+
|
|
158
160
|
let markerIndex = -1;
|
|
159
161
|
let hasCachePoint = false;
|
|
160
162
|
let hasDisabledMarker = false;
|
|
@@ -163,8 +165,11 @@ export function insertBedrockToolCachePoint(
|
|
|
163
165
|
for (let i = 0; i < tools.length; i++) {
|
|
164
166
|
const tool = tools[i];
|
|
165
167
|
if (isBedrockCachePoint(tool)) {
|
|
168
|
+
// Normalize an existing cache point to the resolved TTL so a stale
|
|
169
|
+
// 5-minute tool breakpoint never precedes the new 1-hour system/message
|
|
170
|
+
// breakpoints (Bedrock requires longer-TTL entries to appear first).
|
|
166
171
|
hasCachePoint = true;
|
|
167
|
-
cleanedTools.push(
|
|
172
|
+
cleanedTools.push(cachePoint);
|
|
168
173
|
continue;
|
|
169
174
|
}
|
|
170
175
|
if (tool[BEDROCK_TOOL_CACHE_MARKER] === true) {
|
|
@@ -189,7 +194,7 @@ export function insertBedrockToolCachePoint(
|
|
|
189
194
|
...toolConfig,
|
|
190
195
|
tools: [
|
|
191
196
|
...cleanedTools.slice(0, insertionIndex + 1),
|
|
192
|
-
|
|
197
|
+
cachePoint,
|
|
193
198
|
...cleanedTools.slice(insertionIndex + 1),
|
|
194
199
|
],
|
|
195
200
|
};
|
|
@@ -427,24 +427,40 @@ const standardContentBlockConverter: StandardContentBlockConverter<{
|
|
|
427
427
|
},
|
|
428
428
|
};
|
|
429
429
|
|
|
430
|
+
type BedrockPromptCacheTtl = '5m' | '1h';
|
|
431
|
+
type NormalizedBedrockCachePoint = {
|
|
432
|
+
type: 'default';
|
|
433
|
+
ttl?: BedrockPromptCacheTtl;
|
|
434
|
+
};
|
|
435
|
+
|
|
430
436
|
/**
|
|
431
|
-
* Check if a block has a cache point
|
|
437
|
+
* Check if a block has a default cache point and return its normalized form,
|
|
438
|
+
* preserving an optional extended-TTL `ttl` (`'5m'` | `'1h'`). Returns
|
|
439
|
+
* `undefined` when the block is not a default cache point.
|
|
432
440
|
*/
|
|
433
|
-
function
|
|
441
|
+
function getDefaultCachePoint(
|
|
442
|
+
block: unknown
|
|
443
|
+
): NormalizedBedrockCachePoint | undefined {
|
|
434
444
|
if (typeof block !== 'object' || block === null) {
|
|
435
|
-
return
|
|
445
|
+
return undefined;
|
|
436
446
|
}
|
|
437
447
|
if (!('cachePoint' in block)) {
|
|
438
|
-
return
|
|
448
|
+
return undefined;
|
|
439
449
|
}
|
|
440
450
|
const cachePoint = (block as { cachePoint?: unknown }).cachePoint;
|
|
441
451
|
if (typeof cachePoint !== 'object' || cachePoint === null) {
|
|
442
|
-
return
|
|
452
|
+
return undefined;
|
|
443
453
|
}
|
|
444
454
|
if (!('type' in cachePoint)) {
|
|
445
|
-
return
|
|
455
|
+
return undefined;
|
|
446
456
|
}
|
|
447
|
-
|
|
457
|
+
if ((cachePoint as { type?: string }).type !== 'default') {
|
|
458
|
+
return undefined;
|
|
459
|
+
}
|
|
460
|
+
const ttl = (cachePoint as { ttl?: unknown }).ttl;
|
|
461
|
+
return ttl === '5m' || ttl === '1h'
|
|
462
|
+
? { type: 'default', ttl }
|
|
463
|
+
: { type: 'default' };
|
|
448
464
|
}
|
|
449
465
|
|
|
450
466
|
/**
|
|
@@ -570,11 +586,10 @@ function convertLangChainContentBlockToConverseContentBlock({
|
|
|
570
586
|
} as BedrockContentBlock;
|
|
571
587
|
}
|
|
572
588
|
|
|
573
|
-
|
|
589
|
+
const cachePoint = getDefaultCachePoint(block);
|
|
590
|
+
if (cachePoint != null) {
|
|
574
591
|
return {
|
|
575
|
-
cachePoint
|
|
576
|
-
type: 'default',
|
|
577
|
-
},
|
|
592
|
+
cachePoint,
|
|
578
593
|
} as BedrockContentBlock;
|
|
579
594
|
}
|
|
580
595
|
|
|
@@ -604,14 +619,14 @@ function convertSystemMessageToConverseMessage(
|
|
|
604
619
|
contentBlocks.push({
|
|
605
620
|
text: (block as { text: string }).text,
|
|
606
621
|
});
|
|
607
|
-
} else
|
|
622
|
+
} else {
|
|
623
|
+
const cachePoint = getDefaultCachePoint(block);
|
|
624
|
+
if (cachePoint == null) {
|
|
625
|
+
break;
|
|
626
|
+
}
|
|
608
627
|
contentBlocks.push({
|
|
609
|
-
cachePoint
|
|
610
|
-
type: 'default',
|
|
611
|
-
},
|
|
628
|
+
cachePoint,
|
|
612
629
|
} as BedrockSystemContentBlock);
|
|
613
|
-
} else {
|
|
614
|
-
break;
|
|
615
630
|
}
|
|
616
631
|
}
|
|
617
632
|
if (msg.content.length === contentBlocks.length) {
|
|
@@ -681,28 +696,29 @@ function convertAIMessageToConverseMessage(msg: BaseMessage): BedrockMessage {
|
|
|
681
696
|
reasoningContent:
|
|
682
697
|
langchainReasoningBlockToBedrockReasoningBlock(reasoningBlock),
|
|
683
698
|
} as BedrockContentBlock);
|
|
684
|
-
} else if (isDefaultCachePoint(block)) {
|
|
685
|
-
contentBlocks.push({
|
|
686
|
-
cachePoint: {
|
|
687
|
-
type: 'default',
|
|
688
|
-
},
|
|
689
|
-
} as BedrockContentBlock);
|
|
690
|
-
} else if (FOREIGN_REASONING_TYPES.some((t) => t === block.type)) {
|
|
691
|
-
// Reasoning from another provider (Anthropic `thinking`/
|
|
692
|
-
// `redacted_thinking`, Google `reasoning`, LibreChat `think`). Bedrock's
|
|
693
|
-
// native reasoning is `reasoning_content` (handled above); a foreign
|
|
694
|
-
// block carries a signature Bedrock cannot validate, so drop it on a
|
|
695
|
-
// cross-provider handoff (e.g. Anthropic → Bedrock) rather than crash.
|
|
696
|
-
// The Bedrock model produces its own reasoning. Anything else unknown
|
|
697
|
-
// still throws below — real content must be surfaced, not dropped.
|
|
698
|
-
return;
|
|
699
699
|
} else {
|
|
700
|
-
const
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
)
|
|
700
|
+
const cachePoint = getDefaultCachePoint(block);
|
|
701
|
+
if (cachePoint != null) {
|
|
702
|
+
contentBlocks.push({
|
|
703
|
+
cachePoint,
|
|
704
|
+
} as BedrockContentBlock);
|
|
705
|
+
} else if (FOREIGN_REASONING_TYPES.some((t) => t === block.type)) {
|
|
706
|
+
// Reasoning from another provider (Anthropic `thinking`/
|
|
707
|
+
// `redacted_thinking`, Google `reasoning`, LibreChat `think`).
|
|
708
|
+
// Bedrock's native reasoning is `reasoning_content` (handled above); a
|
|
709
|
+
// foreign block carries a signature Bedrock cannot validate, so drop
|
|
710
|
+
// it on a cross-provider handoff (e.g. Anthropic → Bedrock) rather
|
|
711
|
+
// than crash. The Bedrock model produces its own reasoning. Anything
|
|
712
|
+
// else unknown still throws below — real content must be surfaced.
|
|
713
|
+
return;
|
|
714
|
+
} else {
|
|
715
|
+
const blockValues = Object.fromEntries(
|
|
716
|
+
Object.entries(block).filter(([key]) => key !== 'type')
|
|
717
|
+
);
|
|
718
|
+
throw new Error(
|
|
719
|
+
`Unsupported content block type: ${block.type} with content of ${JSON.stringify(blockValues, null, 2)}`
|
|
720
|
+
);
|
|
721
|
+
}
|
|
706
722
|
}
|
|
707
723
|
});
|
|
708
724
|
|
|
@@ -864,9 +880,10 @@ function convertToolMessageToConverseMessage(msg: BaseMessage): BedrockMessage {
|
|
|
864
880
|
const toolResultContent: BedrockContentBlock[] = [];
|
|
865
881
|
const trailingCachePoints: BedrockContentBlock[] = [];
|
|
866
882
|
for (const block of content) {
|
|
867
|
-
|
|
883
|
+
const cachePoint = getDefaultCachePoint(block);
|
|
884
|
+
if (cachePoint != null) {
|
|
868
885
|
trailingCachePoints.push({
|
|
869
|
-
cachePoint
|
|
886
|
+
cachePoint,
|
|
870
887
|
} as BedrockContentBlock);
|
|
871
888
|
} else {
|
|
872
889
|
toolResultContent.push(block);
|
|
@@ -6,6 +6,7 @@ import type {
|
|
|
6
6
|
import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
|
|
7
7
|
import type { ChatGenerationChunk } from '@langchain/core/outputs';
|
|
8
8
|
import type { BaseMessage } from '@langchain/core/messages';
|
|
9
|
+
import type { PromptCacheTtl } from '@/messages/cache';
|
|
9
10
|
import { ChatOpenAI, emitStreamChunkCallback } from '@/llm/openai';
|
|
10
11
|
|
|
11
12
|
export type OpenRouterReasoningEffort =
|
|
@@ -30,6 +31,13 @@ export interface ChatOpenRouterCallOptions
|
|
|
30
31
|
reasoning?: OpenRouterReasoning;
|
|
31
32
|
modelKwargs?: OpenAIChatInput['modelKwargs'];
|
|
32
33
|
promptCache?: boolean;
|
|
34
|
+
/**
|
|
35
|
+
* Prompt-cache breakpoint TTL. Defaults to `'1h'` (extended cache) when
|
|
36
|
+
* `promptCache` is enabled; set `'5m'` for the legacy 5-minute behavior.
|
|
37
|
+
* OpenRouter forwards this to Claude upstreams (Anthropic / Bedrock / Vertex),
|
|
38
|
+
* which downgrade to 5m where the extended TTL isn't supported.
|
|
39
|
+
*/
|
|
40
|
+
promptCacheTtl?: PromptCacheTtl;
|
|
33
41
|
}
|
|
34
42
|
|
|
35
43
|
export type ChatOpenRouterInput = Partial<
|
|
@@ -107,6 +115,7 @@ export class ChatOpenRouter extends ChatOpenAI {
|
|
|
107
115
|
constructor(_fields: ChatOpenRouterInput) {
|
|
108
116
|
const fieldsWithoutPromptCache: ChatOpenRouterInput = { ..._fields };
|
|
109
117
|
delete fieldsWithoutPromptCache.promptCache;
|
|
118
|
+
delete fieldsWithoutPromptCache.promptCacheTtl;
|
|
110
119
|
|
|
111
120
|
const {
|
|
112
121
|
include_reasoning,
|
|
@@ -9,7 +9,7 @@ type OpenRouterTool = {
|
|
|
9
9
|
description?: string;
|
|
10
10
|
parameters?: object;
|
|
11
11
|
};
|
|
12
|
-
cache_control?: { type: 'ephemeral' };
|
|
12
|
+
cache_control?: { type: 'ephemeral'; ttl?: '1h' };
|
|
13
13
|
defer_loading?: boolean;
|
|
14
14
|
};
|
|
15
15
|
|
|
@@ -80,4 +80,55 @@ describe('partitionAndMarkOpenRouterToolCache', () => {
|
|
|
80
80
|
expect(result[0].cache_control).toEqual({ type: 'ephemeral' });
|
|
81
81
|
expect(result[1]).not.toHaveProperty('cache_control');
|
|
82
82
|
});
|
|
83
|
+
|
|
84
|
+
it('stamps the resolved 1h ttl on the last static tool', () => {
|
|
85
|
+
const result = partitionAndMarkOpenRouterToolCache(
|
|
86
|
+
[
|
|
87
|
+
createOpenAITool('static_one'),
|
|
88
|
+
createOpenAITool('static_two'),
|
|
89
|
+
] as GraphTools,
|
|
90
|
+
() => false,
|
|
91
|
+
'1h'
|
|
92
|
+
) as OpenRouterTool[];
|
|
93
|
+
|
|
94
|
+
expect(result[1].cache_control).toEqual({ type: 'ephemeral', ttl: '1h' });
|
|
95
|
+
expect(result[0]).not.toHaveProperty('cache_control');
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it('omits ttl for the 5m legacy default', () => {
|
|
99
|
+
const result = partitionAndMarkOpenRouterToolCache(
|
|
100
|
+
[createOpenAITool('only_static')] as GraphTools,
|
|
101
|
+
() => false,
|
|
102
|
+
'5m'
|
|
103
|
+
) as OpenRouterTool[];
|
|
104
|
+
|
|
105
|
+
expect(result[0].cache_control).toEqual({ type: 'ephemeral' });
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('strips a stale marker off an earlier static tool', () => {
|
|
109
|
+
const earlier = createOpenAITool('static_one');
|
|
110
|
+
earlier.cache_control = { type: 'ephemeral' };
|
|
111
|
+
const result = partitionAndMarkOpenRouterToolCache(
|
|
112
|
+
[earlier, createOpenAITool('static_two')] as GraphTools,
|
|
113
|
+
() => false,
|
|
114
|
+
'1h'
|
|
115
|
+
) as OpenRouterTool[];
|
|
116
|
+
|
|
117
|
+
// No stale 5m marker survives ahead of the resolved 1h breakpoint.
|
|
118
|
+
expect(result[0]).not.toHaveProperty('cache_control');
|
|
119
|
+
expect(result[1].cache_control).toEqual({ type: 'ephemeral', ttl: '1h' });
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it('strips a stale marker off a deferred tool', () => {
|
|
123
|
+
const deferred = createOpenAITool('deferred_one');
|
|
124
|
+
deferred.cache_control = { type: 'ephemeral' };
|
|
125
|
+
const result = partitionAndMarkOpenRouterToolCache(
|
|
126
|
+
[createOpenAITool('static_one'), deferred] as GraphTools,
|
|
127
|
+
(name) => name === 'deferred_one',
|
|
128
|
+
'1h'
|
|
129
|
+
) as OpenRouterTool[];
|
|
130
|
+
|
|
131
|
+
expect(result[0].cache_control).toEqual({ type: 'ephemeral', ttl: '1h' });
|
|
132
|
+
expect(result[1]).not.toHaveProperty('cache_control');
|
|
133
|
+
});
|
|
83
134
|
});
|
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
import type { BindToolsInput } from '@langchain/core/language_models/chat_models';
|
|
2
2
|
import type { OpenAIClient } from '@langchain/openai';
|
|
3
3
|
import type { GraphTools } from '@/types';
|
|
4
|
+
import {
|
|
5
|
+
buildAnthropicCacheControl,
|
|
6
|
+
type PromptCacheTtl,
|
|
7
|
+
} from '@/messages/cache';
|
|
4
8
|
import { _convertToOpenAITool } from '@/llm/openai';
|
|
5
9
|
|
|
6
|
-
|
|
10
|
+
type OpenRouterCacheControl = { type: 'ephemeral'; ttl?: '1h' };
|
|
7
11
|
|
|
8
12
|
type OpenRouterToolWithCacheControl = OpenAIClient.ChatCompletionTool & {
|
|
9
|
-
cache_control?:
|
|
13
|
+
cache_control?: OpenRouterCacheControl;
|
|
10
14
|
defer_loading?: boolean;
|
|
11
15
|
};
|
|
12
16
|
|
|
@@ -46,17 +50,35 @@ function toOpenRouterTool(tool: unknown): OpenRouterToolWithCacheControl {
|
|
|
46
50
|
}
|
|
47
51
|
|
|
48
52
|
function markCacheControl(
|
|
49
|
-
tool: OpenRouterToolWithCacheControl
|
|
53
|
+
tool: OpenRouterToolWithCacheControl,
|
|
54
|
+
ttl?: PromptCacheTtl
|
|
50
55
|
): OpenRouterToolWithCacheControl {
|
|
51
56
|
return {
|
|
52
57
|
...tool,
|
|
53
|
-
cache_control:
|
|
58
|
+
cache_control: buildAnthropicCacheControl(ttl),
|
|
54
59
|
};
|
|
55
60
|
}
|
|
56
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Drop any existing `cache_control` from a tool. Reused/caller-supplied tools
|
|
64
|
+
* can carry a stale marker (e.g. from a prior `promptCacheTtl: '5m'` run); since
|
|
65
|
+
* all tools serialize before system/messages, a leftover 5-minute marker ahead
|
|
66
|
+
* of the resolved breakpoint would violate the longer-TTL-first ordering.
|
|
67
|
+
*/
|
|
68
|
+
function stripCacheControl(
|
|
69
|
+
tool: OpenRouterToolWithCacheControl
|
|
70
|
+
): OpenRouterToolWithCacheControl {
|
|
71
|
+
if (tool.cache_control == null) {
|
|
72
|
+
return tool;
|
|
73
|
+
}
|
|
74
|
+
const { cache_control: _omit, ...rest } = tool;
|
|
75
|
+
return rest;
|
|
76
|
+
}
|
|
77
|
+
|
|
57
78
|
export function partitionAndMarkOpenRouterToolCache(
|
|
58
79
|
tools: GraphTools | undefined,
|
|
59
|
-
isDeferred: (toolName: string) => boolean
|
|
80
|
+
isDeferred: (toolName: string) => boolean,
|
|
81
|
+
ttl?: PromptCacheTtl
|
|
60
82
|
): GraphTools | undefined {
|
|
61
83
|
if (tools == null || tools.length === 0) {
|
|
62
84
|
return tools;
|
|
@@ -77,12 +99,24 @@ export function partitionAndMarkOpenRouterToolCache(
|
|
|
77
99
|
staticTools.push(converted);
|
|
78
100
|
}
|
|
79
101
|
|
|
102
|
+
// Deferred tools sit after the breakpoint but still before system/messages,
|
|
103
|
+
// so strip any stale marker off them.
|
|
104
|
+
for (let i = 0; i < deferredTools.length; i++) {
|
|
105
|
+
deferredTools[i] = stripCacheControl(deferredTools[i]);
|
|
106
|
+
}
|
|
107
|
+
|
|
80
108
|
if (staticTools.length === 0) {
|
|
81
109
|
return [...deferredTools] as GraphTools;
|
|
82
110
|
}
|
|
83
111
|
|
|
112
|
+
// Strip stale markers off the earlier static tools, then stamp only the last
|
|
113
|
+
// static tool with the resolved TTL (markCacheControl overwrites any marker).
|
|
114
|
+
for (let i = 0; i < staticTools.length - 1; i++) {
|
|
115
|
+
staticTools[i] = stripCacheControl(staticTools[i]);
|
|
116
|
+
}
|
|
84
117
|
staticTools[staticTools.length - 1] = markCacheControl(
|
|
85
|
-
staticTools[staticTools.length - 1]
|
|
118
|
+
staticTools[staticTools.length - 1],
|
|
119
|
+
ttl
|
|
86
120
|
);
|
|
87
121
|
|
|
88
122
|
return [...staticTools, ...deferredTools] as GraphTools;
|