langchain 1.4.6-dev-1781485641139 → 1.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/chat_models/universal.cjs +1 -0
- package/chat_models/universal.d.cts +1 -0
- package/chat_models/universal.d.ts +1 -0
- package/chat_models/universal.js +1 -0
- package/dist/agents/ReactAgent.cjs +2 -5
- package/dist/agents/ReactAgent.cjs.map +1 -1
- package/dist/agents/ReactAgent.d.cts +1 -1
- package/dist/agents/ReactAgent.d.cts.map +1 -1
- package/dist/agents/ReactAgent.d.ts +1 -1
- package/dist/agents/ReactAgent.d.ts.map +1 -1
- package/dist/agents/ReactAgent.js +1 -4
- package/dist/agents/ReactAgent.js.map +1 -1
- package/dist/agents/index.cjs +1 -3
- package/dist/agents/index.cjs.map +1 -1
- package/dist/agents/index.d.cts +1 -3
- package/dist/agents/index.d.cts.map +1 -1
- package/dist/agents/index.d.ts +1 -3
- package/dist/agents/index.d.ts.map +1 -1
- package/dist/agents/index.js +1 -3
- package/dist/agents/index.js.map +1 -1
- package/dist/agents/middleware/hitl.cjs +50 -3
- package/dist/agents/middleware/hitl.cjs.map +1 -1
- package/dist/agents/middleware/hitl.d.cts +105 -2
- package/dist/agents/middleware/hitl.d.cts.map +1 -1
- package/dist/agents/middleware/hitl.d.ts +105 -2
- package/dist/agents/middleware/hitl.d.ts.map +1 -1
- package/dist/agents/middleware/hitl.js +50 -3
- package/dist/agents/middleware/hitl.js.map +1 -1
- package/dist/agents/middleware/index.cjs +1 -0
- package/dist/agents/middleware/index.d.cts +2 -1
- package/dist/agents/middleware/index.d.ts +2 -1
- package/dist/agents/middleware/index.js +1 -0
- package/dist/agents/middleware/provider/aws/promptCaching.cjs +209 -0
- package/dist/agents/middleware/provider/aws/promptCaching.cjs.map +1 -0
- package/dist/agents/middleware/provider/aws/promptCaching.d.cts +207 -0
- package/dist/agents/middleware/provider/aws/promptCaching.d.cts.map +1 -0
- package/dist/agents/middleware/provider/aws/promptCaching.d.ts +207 -0
- package/dist/agents/middleware/provider/aws/promptCaching.d.ts.map +1 -0
- package/dist/agents/middleware/provider/aws/promptCaching.js +208 -0
- package/dist/agents/middleware/provider/aws/promptCaching.js.map +1 -0
- package/dist/agents/middleware/toolEmulator.cjs +1 -1
- package/dist/agents/middleware/toolEmulator.cjs.map +1 -1
- package/dist/agents/middleware/toolEmulator.js +1 -1
- package/dist/agents/middleware/toolEmulator.js.map +1 -1
- package/dist/agents/{transformers/tool-call.cjs → stream.cjs} +15 -3
- package/dist/agents/stream.cjs.map +1 -0
- package/dist/agents/{transformers/types.d.ts → stream.d.cts} +20 -50
- package/dist/agents/stream.d.cts.map +1 -0
- package/dist/agents/{transformers/types.d.cts → stream.d.ts} +20 -50
- package/dist/agents/stream.d.ts.map +1 -0
- package/dist/agents/{transformers/tool-call.js → stream.js} +14 -2
- package/dist/agents/stream.js.map +1 -0
- package/dist/browser.cjs +6 -6
- package/dist/browser.d.cts +4 -5
- package/dist/browser.d.ts +4 -5
- package/dist/browser.js +4 -4
- package/dist/chat_models/universal.cjs +23 -4
- package/dist/chat_models/universal.cjs.map +1 -1
- package/dist/chat_models/universal.d.cts +27 -2
- package/dist/chat_models/universal.d.cts.map +1 -1
- package/dist/chat_models/universal.d.ts +27 -2
- package/dist/chat_models/universal.d.ts.map +1 -1
- package/dist/chat_models/universal.js +23 -4
- package/dist/chat_models/universal.js.map +1 -1
- package/dist/index.cjs +6 -6
- package/dist/index.d.cts +4 -5
- package/dist/index.d.ts +4 -5
- package/dist/index.js +4 -4
- package/hub/node.cjs +1 -0
- package/hub/node.d.cts +1 -0
- package/hub/node.d.ts +1 -0
- package/hub/node.js +1 -0
- package/hub.cjs +1 -0
- package/hub.d.cts +1 -0
- package/hub.d.ts +1 -0
- package/hub.js +1 -0
- package/load/serializable.cjs +1 -0
- package/load/serializable.d.cts +1 -0
- package/load/serializable.d.ts +1 -0
- package/load/serializable.js +1 -0
- package/load.cjs +1 -0
- package/load.d.cts +1 -0
- package/load.d.ts +1 -0
- package/load.js +1 -0
- package/package.json +7 -6
- package/storage/encoder_backed.cjs +1 -0
- package/storage/encoder_backed.d.cts +1 -0
- package/storage/encoder_backed.d.ts +1 -0
- package/storage/encoder_backed.js +1 -0
- package/storage/file_system.cjs +1 -0
- package/storage/file_system.d.cts +1 -0
- package/storage/file_system.d.ts +1 -0
- package/storage/file_system.js +1 -0
- package/storage/in_memory.cjs +1 -0
- package/storage/in_memory.d.cts +1 -0
- package/storage/in_memory.d.ts +1 -0
- package/storage/in_memory.js +1 -0
- package/dist/agents/transformers/index.cjs +0 -2
- package/dist/agents/transformers/index.d.cts +0 -3
- package/dist/agents/transformers/index.d.ts +0 -3
- package/dist/agents/transformers/index.js +0 -3
- package/dist/agents/transformers/subagent.cjs +0 -205
- package/dist/agents/transformers/subagent.cjs.map +0 -1
- package/dist/agents/transformers/subagent.d.cts +0 -34
- package/dist/agents/transformers/subagent.d.cts.map +0 -1
- package/dist/agents/transformers/subagent.d.ts +0 -34
- package/dist/agents/transformers/subagent.d.ts.map +0 -1
- package/dist/agents/transformers/subagent.js +0 -204
- package/dist/agents/transformers/subagent.js.map +0 -1
- package/dist/agents/transformers/tool-call.cjs.map +0 -1
- package/dist/agents/transformers/tool-call.d.cts +0 -17
- package/dist/agents/transformers/tool-call.d.cts.map +0 -1
- package/dist/agents/transformers/tool-call.d.ts +0 -17
- package/dist/agents/transformers/tool-call.d.ts.map +0 -1
- package/dist/agents/transformers/tool-call.js.map +0 -1
- package/dist/agents/transformers/types.d.cts.map +0 -1
- package/dist/agents/transformers/types.d.ts.map +0 -1
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
require("../../../../_virtual/_rolldown/runtime.cjs");
|
|
2
|
+
const require_middleware = require("../../../middleware.cjs");
|
|
3
|
+
let zod_v3 = require("zod/v3");
|
|
4
|
+
//#region src/agents/middleware/provider/aws/promptCaching.ts
|
|
5
|
+
const DEFAULT_ENABLE_CACHING = true;
|
|
6
|
+
const DEFAULT_TTL = "5m";
|
|
7
|
+
const DEFAULT_MIN_MESSAGES_TO_CACHE = 1;
|
|
8
|
+
const DEFAULT_UNSUPPORTED_MODEL_BEHAVIOR = "warn";
|
|
9
|
+
const contextSchema = zod_v3.z.object({
|
|
10
|
+
/**
|
|
11
|
+
* Whether to enable prompt caching.
|
|
12
|
+
* @default true
|
|
13
|
+
*/
|
|
14
|
+
enableCaching: zod_v3.z.boolean().optional(),
|
|
15
|
+
/**
|
|
16
|
+
* The time-to-live for the cached prompt.
|
|
17
|
+
* @default "5m"
|
|
18
|
+
*/
|
|
19
|
+
ttl: zod_v3.z.enum(["5m", "1h"]).optional(),
|
|
20
|
+
/**
|
|
21
|
+
* The minimum number of messages required before caching is applied.
|
|
22
|
+
* @default 1
|
|
23
|
+
*/
|
|
24
|
+
minMessagesToCache: zod_v3.z.number().optional(),
|
|
25
|
+
/**
|
|
26
|
+
* The behavior to take when an unsupported model is used.
|
|
27
|
+
* - "ignore" will ignore the unsupported model and continue without caching.
|
|
28
|
+
* - "warn" will warn the user and continue without caching.
|
|
29
|
+
* - "raise" will raise an error and stop the agent.
|
|
30
|
+
* @default "warn"
|
|
31
|
+
*/
|
|
32
|
+
unsupportedModelBehavior: zod_v3.z.enum([
|
|
33
|
+
"ignore",
|
|
34
|
+
"warn",
|
|
35
|
+
"raise"
|
|
36
|
+
]).optional()
|
|
37
|
+
});
|
|
38
|
+
var BedrockPromptCachingMiddlewareError = class extends Error {
|
|
39
|
+
constructor(message) {
|
|
40
|
+
super(message);
|
|
41
|
+
this.name = "BedrockPromptCachingMiddlewareError";
|
|
42
|
+
}
|
|
43
|
+
};
|
|
44
|
+
/**
|
|
45
|
+
* Creates a prompt caching middleware for AWS Bedrock Converse models to optimize API usage.
|
|
46
|
+
*
|
|
47
|
+
* This middleware automatically enables Bedrock's prompt caching when using AWS Bedrock Converse
|
|
48
|
+
* models. This can significantly reduce costs for applications with repetitive prompts, long
|
|
49
|
+
* system messages, or extensive conversation histories.
|
|
50
|
+
*
|
|
51
|
+
* ## How It Works
|
|
52
|
+
*
|
|
53
|
+
* The middleware intercepts model requests and sets a cache control signal that
|
|
54
|
+
* `ChatBedrockConverse` translates into Bedrock `cachePoint` breakpoints. Cache points are
|
|
55
|
+
* inserted after the system prompt, after the tool definitions, and after the final message, so
|
|
56
|
+
* the stable prefix of each request is cached. On subsequent requests with a matching prefix, the
|
|
57
|
+
* cached representations are reused, skipping redundant token processing. Exact placement varies
|
|
58
|
+
* by model (e.g. Amazon Nova models cache fewer breakpoints and ignore the `"1h"` TTL).
|
|
59
|
+
*
|
|
60
|
+
* ## Benefits
|
|
61
|
+
*
|
|
62
|
+
* - **Cost Reduction**: Avoid reprocessing the same tokens repeatedly
|
|
63
|
+
* - **Lower Latency**: Cached prompts are processed faster as embeddings are pre-computed
|
|
64
|
+
* - **Better Scalability**: Reduced computational load enables handling more requests
|
|
65
|
+
* - **Consistent Performance**: Stable response times for repetitive queries
|
|
66
|
+
*
|
|
67
|
+
* @param middlewareOptions - Configuration options for the caching behavior
|
|
68
|
+
* @param middlewareOptions.enableCaching - Whether to enable prompt caching (default: `true`)
|
|
69
|
+
* @param middlewareOptions.ttl - Cache time-to-live: `"5m"` for 5 minutes or `"1h"` for 1 hour (default: `"5m"`)
|
|
70
|
+
* @param middlewareOptions.minMessagesToCache - Minimum number of messages required before caching is applied (default: `1`)
|
|
71
|
+
* @param middlewareOptions.unsupportedModelBehavior - The behavior to take when an unsupported model is used (default: `"warn"`)
|
|
72
|
+
*
|
|
73
|
+
* @returns A middleware instance that can be passed to `createAgent`
|
|
74
|
+
*
|
|
75
|
+
* @throws {Error} When `unsupportedModelBehavior` is `"raise"` and the model is not a
|
|
76
|
+
* cache-capable Bedrock Converse model — either a non-Bedrock provider, or a Bedrock
|
|
77
|
+
* Converse model outside the Anthropic Claude / Amazon Nova families.
|
|
78
|
+
*
|
|
79
|
+
* @example
|
|
80
|
+
* Basic usage with default settings
|
|
81
|
+
* ```typescript
|
|
82
|
+
* import { createAgent } from "langchain";
|
|
83
|
+
* import { bedrockPromptCachingMiddleware } from "langchain";
|
|
84
|
+
*
|
|
85
|
+
* const agent = createAgent({
|
|
86
|
+
* model: "bedrock:anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
87
|
+
* middleware: [
|
|
88
|
+
* bedrockPromptCachingMiddleware()
|
|
89
|
+
* ]
|
|
90
|
+
* });
|
|
91
|
+
* ```
|
|
92
|
+
*
|
|
93
|
+
* @example
|
|
94
|
+
* Custom configuration for longer conversations
|
|
95
|
+
* ```typescript
|
|
96
|
+
* const cachingMiddleware = bedrockPromptCachingMiddleware({
|
|
97
|
+
* ttl: "1h", // Cache for 1 hour instead of default 5 minutes
|
|
98
|
+
* minMessagesToCache: 5 // Only cache after 5 messages
|
|
99
|
+
* });
|
|
100
|
+
*
|
|
101
|
+
* const agent = createAgent({
|
|
102
|
+
* model: "bedrock:anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
103
|
+
* systemPrompt: "You are a helpful assistant with deep knowledge of...", // Long system prompt
|
|
104
|
+
* middleware: [cachingMiddleware]
|
|
105
|
+
* });
|
|
106
|
+
* ```
|
|
107
|
+
*
|
|
108
|
+
* @example
|
|
109
|
+
* Conditional caching based on runtime context
|
|
110
|
+
* ```typescript
|
|
111
|
+
* const agent = createAgent({
|
|
112
|
+
* model: "bedrock:anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
113
|
+
* middleware: [
|
|
114
|
+
* bedrockPromptCachingMiddleware({
|
|
115
|
+
* enableCaching: true,
|
|
116
|
+
* ttl: "5m"
|
|
117
|
+
* })
|
|
118
|
+
* ]
|
|
119
|
+
* });
|
|
120
|
+
*
|
|
121
|
+
* // Disable caching for specific requests
|
|
122
|
+
* await agent.invoke(
|
|
123
|
+
* { messages: [new HumanMessage("Process this without caching")] },
|
|
124
|
+
* {
|
|
125
|
+
* configurable: {
|
|
126
|
+
* middleware_context: { enableCaching: false }
|
|
127
|
+
* }
|
|
128
|
+
* }
|
|
129
|
+
* );
|
|
130
|
+
* ```
|
|
131
|
+
*
|
|
132
|
+
* @example
|
|
133
|
+
* Optimal setup for customer support chatbot
|
|
134
|
+
* ```typescript
|
|
135
|
+
* const supportAgent = createAgent({
|
|
136
|
+
* model: "bedrock:anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
137
|
+
* systemPrompt: `You are a customer support agent for ACME Corp.
|
|
138
|
+
*
|
|
139
|
+
* Company policies:
|
|
140
|
+
* - Always be polite and professional
|
|
141
|
+
* - Refer to knowledge base for product information
|
|
142
|
+
* - Escalate billing issues to human agents
|
|
143
|
+
* ... (extensive policies and guidelines)
|
|
144
|
+
* `,
|
|
145
|
+
* tools: [searchKnowledgeBase, createTicket, checkOrderStatus],
|
|
146
|
+
* middleware: [
|
|
147
|
+
* bedrockPromptCachingMiddleware({
|
|
148
|
+
* ttl: "1h", // Long TTL for stable system prompt
|
|
149
|
+
* minMessagesToCache: 1 // Cache immediately due to large system prompt
|
|
150
|
+
* })
|
|
151
|
+
* ]
|
|
152
|
+
* });
|
|
153
|
+
* ```
|
|
154
|
+
*
|
|
155
|
+
* @remarks
|
|
156
|
+
* - **Bedrock Converse Only**: This middleware only applies caching to AWS Bedrock Converse models. Other providers are handled per `unsupportedModelBehavior`
|
|
157
|
+
* - **Supported Families**: Bedrock prompt caching is only available on the **Anthropic Claude** and **Amazon Nova** model families. Other Bedrock Converse models (e.g. Mistral, Cohere, Meta) reject cache points at request time, so they are treated as unsupported and routed through `unsupportedModelBehavior`
|
|
158
|
+
* - **Automatic Application**: Caching is applied automatically when the message count reaches `minMessagesToCache`
|
|
159
|
+
* - **TTL Options**: Only supports "5m" (5 minutes) and "1h" (1 hour) as TTL values; actual support varies by model
|
|
160
|
+
* - **Best Use Cases**: Long system prompts, multi-turn conversations, repetitive queries, RAG applications
|
|
161
|
+
*
|
|
162
|
+
* @see {@link createAgent} for agent creation
|
|
163
|
+
* @see {@link https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html} AWS Bedrock prompt caching documentation
|
|
164
|
+
* @public
|
|
165
|
+
*/
|
|
166
|
+
function bedrockPromptCachingMiddleware(middlewareOptions) {
|
|
167
|
+
return require_middleware.createMiddleware({
|
|
168
|
+
name: "BedrockPromptCachingMiddleware",
|
|
169
|
+
contextSchema,
|
|
170
|
+
wrapModelCall: (request, handler) => {
|
|
171
|
+
const enableCaching = request.runtime.context.enableCaching ?? middlewareOptions?.enableCaching ?? DEFAULT_ENABLE_CACHING;
|
|
172
|
+
const ttl = request.runtime.context.ttl ?? middlewareOptions?.ttl ?? DEFAULT_TTL;
|
|
173
|
+
const minMessagesToCache = request.runtime.context.minMessagesToCache ?? middlewareOptions?.minMessagesToCache ?? DEFAULT_MIN_MESSAGES_TO_CACHE;
|
|
174
|
+
const unsupportedModelBehavior = request.runtime.context.unsupportedModelBehavior ?? middlewareOptions?.unsupportedModelBehavior ?? DEFAULT_UNSUPPORTED_MODEL_BEHAVIOR;
|
|
175
|
+
if (!enableCaching || !request.model) return handler(request);
|
|
176
|
+
const modelName = request.model.getName();
|
|
177
|
+
const isBedrockConverseModel = modelName === "ChatBedrockConverse" || modelName === "ConfigurableModel" && (request.model._defaultConfig?.modelProvider === "bedrock" || request.model._defaultConfig?.modelProvider === "aws");
|
|
178
|
+
const modelId = modelName === "ConfigurableModel" ? request.model._defaultConfig?.model : request.model.model;
|
|
179
|
+
if (!(isBedrockConverseModel && typeof modelId === "string" && (modelId.toLowerCase().includes("anthropic.claude") || modelId.toLowerCase().includes("amazon.nova")))) {
|
|
180
|
+
const modelInfo = modelName === "ConfigurableModel" ? `${modelName} (${request.model._defaultConfig?.modelProvider})` : modelName;
|
|
181
|
+
const baseMessage = isBedrockConverseModel ? `Unsupported model '${modelInfo}'. Bedrock prompt caching is only supported on Anthropic Claude and Amazon Nova models` : `Unsupported model '${modelInfo}'. Prompt caching requires an AWS Bedrock Converse model`;
|
|
182
|
+
if (unsupportedModelBehavior === "raise") throw new BedrockPromptCachingMiddlewareError(`${baseMessage} (e.g., 'bedrock:anthropic.claude-haiku-4-5-20251001-v1:0').`);
|
|
183
|
+
else if (unsupportedModelBehavior === "warn") console.warn(`BedrockPromptCachingMiddleware: Skipping caching for ${modelName}. Consider switching to an Anthropic Claude or Amazon Nova model for caching benefits.`);
|
|
184
|
+
return handler(request);
|
|
185
|
+
}
|
|
186
|
+
if (request.state.messages.length + (request.systemPrompt ? 1 : 0) < minMessagesToCache) return handler(request);
|
|
187
|
+
/**
|
|
188
|
+
* The cache_control is applied at the final message formatting layer in
|
|
189
|
+
* ChatBedrockConverse (translated into Converse `cachePoint` blocks).
|
|
190
|
+
*
|
|
191
|
+
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
|
|
192
|
+
*/
|
|
193
|
+
return handler({
|
|
194
|
+
...request,
|
|
195
|
+
modelSettings: {
|
|
196
|
+
...request.modelSettings,
|
|
197
|
+
cache_control: {
|
|
198
|
+
type: "ephemeral",
|
|
199
|
+
ttl
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
//#endregion
|
|
207
|
+
exports.bedrockPromptCachingMiddleware = bedrockPromptCachingMiddleware;
|
|
208
|
+
|
|
209
|
+
//# sourceMappingURL=promptCaching.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"promptCaching.cjs","names":["z","createMiddleware"],"sources":["../../../../../src/agents/middleware/provider/aws/promptCaching.ts"],"sourcesContent":["import { z } from \"zod/v3\";\nimport { InferInteropZodInput } from \"@langchain/core/utils/types\";\n\nimport type { ConfigurableModel } from \"../../../../chat_models/universal.js\";\nimport { createMiddleware } from \"../../../middleware.js\";\n\nconst DEFAULT_ENABLE_CACHING = true;\nconst DEFAULT_TTL = \"5m\";\nconst DEFAULT_MIN_MESSAGES_TO_CACHE = 1;\nconst DEFAULT_UNSUPPORTED_MODEL_BEHAVIOR = \"warn\";\n\nconst contextSchema = z.object({\n /**\n * Whether to enable prompt caching.\n * @default true\n */\n enableCaching: z.boolean().optional(),\n /**\n * The time-to-live for the cached prompt.\n * @default \"5m\"\n */\n ttl: z.enum([\"5m\", \"1h\"]).optional(),\n /**\n * The minimum number of messages required before caching is applied.\n * @default 1\n */\n minMessagesToCache: z.number().optional(),\n /**\n * The behavior to take when an unsupported model is used.\n * - \"ignore\" will ignore the unsupported model and continue without caching.\n * - \"warn\" will warn the user and continue without caching.\n * - \"raise\" will raise an error and stop the agent.\n * @default \"warn\"\n */\n unsupportedModelBehavior: z.enum([\"ignore\", \"warn\", \"raise\"]).optional(),\n});\nexport type BedrockConversePromptCachingMiddlewareConfig = Partial<\n InferInteropZodInput<typeof contextSchema>\n>;\n\nclass BedrockPromptCachingMiddlewareError extends Error {\n constructor(message: string) {\n super(message);\n this.name = \"BedrockPromptCachingMiddlewareError\";\n }\n}\n\n/**\n * Creates a prompt caching middleware for AWS Bedrock Converse models to optimize API usage.\n *\n * This middleware automatically enables Bedrock's prompt caching when using AWS Bedrock Converse\n * models. This can significantly reduce costs for applications with repetitive prompts, long\n * system messages, or extensive conversation histories.\n *\n * ## How It Works\n *\n * The middleware intercepts model requests and sets a cache control signal that\n * `ChatBedrockConverse` translates into Bedrock `cachePoint` breakpoints. Cache points are\n * inserted after the system prompt, after the tool definitions, and after the final message, so\n * the stable prefix of each request is cached. On subsequent requests with a matching prefix, the\n * cached representations are reused, skipping redundant token processing. Exact placement varies\n * by model (e.g. Amazon Nova models cache fewer breakpoints and ignore the `\"1h\"` TTL).\n *\n * ## Benefits\n *\n * - **Cost Reduction**: Avoid reprocessing the same tokens repeatedly\n * - **Lower Latency**: Cached prompts are processed faster as embeddings are pre-computed\n * - **Better Scalability**: Reduced computational load enables handling more requests\n * - **Consistent Performance**: Stable response times for repetitive queries\n *\n * @param middlewareOptions - Configuration options for the caching behavior\n * @param middlewareOptions.enableCaching - Whether to enable prompt caching (default: `true`)\n * @param middlewareOptions.ttl - Cache time-to-live: `\"5m\"` for 5 minutes or `\"1h\"` for 1 hour (default: `\"5m\"`)\n * @param middlewareOptions.minMessagesToCache - Minimum number of messages required before caching is applied (default: `1`)\n * @param middlewareOptions.unsupportedModelBehavior - The behavior to take when an unsupported model is used (default: `\"warn\"`)\n *\n * @returns A middleware instance that can be passed to `createAgent`\n *\n * @throws {Error} When `unsupportedModelBehavior` is `\"raise\"` and the model is not a\n * cache-capable Bedrock Converse model — either a non-Bedrock provider, or a Bedrock\n * Converse model outside the Anthropic Claude / Amazon Nova families.\n *\n * @example\n * Basic usage with default settings\n * ```typescript\n * import { createAgent } from \"langchain\";\n * import { bedrockPromptCachingMiddleware } from \"langchain\";\n *\n * const agent = createAgent({\n * model: \"bedrock:anthropic.claude-haiku-4-5-20251001-v1:0\",\n * middleware: [\n * bedrockPromptCachingMiddleware()\n * ]\n * });\n * ```\n *\n * @example\n * Custom configuration for longer conversations\n * ```typescript\n * const cachingMiddleware = bedrockPromptCachingMiddleware({\n * ttl: \"1h\", // Cache for 1 hour instead of default 5 minutes\n * minMessagesToCache: 5 // Only cache after 5 messages\n * });\n *\n * const agent = createAgent({\n * model: \"bedrock:anthropic.claude-haiku-4-5-20251001-v1:0\",\n * systemPrompt: \"You are a helpful assistant with deep knowledge of...\", // Long system prompt\n * middleware: [cachingMiddleware]\n * });\n * ```\n *\n * @example\n * Conditional caching based on runtime context\n * ```typescript\n * const agent = createAgent({\n * model: \"bedrock:anthropic.claude-haiku-4-5-20251001-v1:0\",\n * middleware: [\n * bedrockPromptCachingMiddleware({\n * enableCaching: true,\n * ttl: \"5m\"\n * })\n * ]\n * });\n *\n * // Disable caching for specific requests\n * await agent.invoke(\n * { messages: [new HumanMessage(\"Process this without caching\")] },\n * {\n * configurable: {\n * middleware_context: { enableCaching: false }\n * }\n * }\n * );\n * ```\n *\n * @example\n * Optimal setup for customer support chatbot\n * ```typescript\n * const supportAgent = createAgent({\n * model: \"bedrock:anthropic.claude-haiku-4-5-20251001-v1:0\",\n * systemPrompt: `You are a customer support agent for ACME Corp.\n *\n * Company policies:\n * - Always be polite and professional\n * - Refer to knowledge base for product information\n * - Escalate billing issues to human agents\n * ... (extensive policies and guidelines)\n * `,\n * tools: [searchKnowledgeBase, createTicket, checkOrderStatus],\n * middleware: [\n * bedrockPromptCachingMiddleware({\n * ttl: \"1h\", // Long TTL for stable system prompt\n * minMessagesToCache: 1 // Cache immediately due to large system prompt\n * })\n * ]\n * });\n * ```\n *\n * @remarks\n * - **Bedrock Converse Only**: This middleware only applies caching to AWS Bedrock Converse models. Other providers are handled per `unsupportedModelBehavior`\n * - **Supported Families**: Bedrock prompt caching is only available on the **Anthropic Claude** and **Amazon Nova** model families. Other Bedrock Converse models (e.g. Mistral, Cohere, Meta) reject cache points at request time, so they are treated as unsupported and routed through `unsupportedModelBehavior`\n * - **Automatic Application**: Caching is applied automatically when the message count reaches `minMessagesToCache`\n * - **TTL Options**: Only supports \"5m\" (5 minutes) and \"1h\" (1 hour) as TTL values; actual support varies by model\n * - **Best Use Cases**: Long system prompts, multi-turn conversations, repetitive queries, RAG applications\n *\n * @see {@link createAgent} for agent creation\n * @see {@link https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html} AWS Bedrock prompt caching documentation\n * @public\n */\nexport function bedrockPromptCachingMiddleware(\n middlewareOptions?: BedrockConversePromptCachingMiddlewareConfig\n) {\n return createMiddleware({\n name: \"BedrockPromptCachingMiddleware\",\n contextSchema,\n wrapModelCall: (request, handler) => {\n const enableCaching =\n request.runtime.context.enableCaching ??\n middlewareOptions?.enableCaching ??\n DEFAULT_ENABLE_CACHING;\n const ttl =\n request.runtime.context.ttl ?? middlewareOptions?.ttl ?? DEFAULT_TTL;\n const minMessagesToCache =\n request.runtime.context.minMessagesToCache ??\n middlewareOptions?.minMessagesToCache ??\n DEFAULT_MIN_MESSAGES_TO_CACHE;\n const unsupportedModelBehavior =\n request.runtime.context.unsupportedModelBehavior ??\n middlewareOptions?.unsupportedModelBehavior ??\n DEFAULT_UNSUPPORTED_MODEL_BEHAVIOR;\n\n // Skip if caching is disabled\n if (!enableCaching || !request.model) {\n return handler(request);\n }\n\n const modelName = request.model.getName();\n const isBedrockConverseModel =\n modelName === \"ChatBedrockConverse\" ||\n (modelName === \"ConfigurableModel\" &&\n ((request.model as ConfigurableModel)._defaultConfig\n ?.modelProvider === \"bedrock\" ||\n (request.model as ConfigurableModel)._defaultConfig\n ?.modelProvider === \"aws\"));\n\n // Resolve the underlying Bedrock model id for cache-capability detection.\n const modelId =\n modelName === \"ConfigurableModel\"\n ? ((request.model as ConfigurableModel)._defaultConfig?.model as\n | string\n | undefined)\n : (request.model as { model?: string }).model;\n\n // Bedrock prompt caching is only supported on the Anthropic Claude and\n // Amazon Nova model families. Other Converse models (Mistral, Cohere,\n // Meta, etc.) reject `cachePoint` blocks with an AccessDeniedException, so\n // they are treated as unsupported.\n const isCacheCapableModel =\n isBedrockConverseModel &&\n typeof modelId === \"string\" &&\n (modelId.toLowerCase().includes(\"anthropic.claude\") ||\n modelId.toLowerCase().includes(\"amazon.nova\"));\n\n if (!isCacheCapableModel) {\n const modelInfo =\n modelName === \"ConfigurableModel\"\n ? `${modelName} (${\n (request.model as ConfigurableModel)._defaultConfig\n ?.modelProvider\n })`\n : modelName;\n\n const baseMessage = isBedrockConverseModel\n ? `Unsupported model '${modelInfo}'. Bedrock prompt caching is only supported on Anthropic Claude and Amazon Nova models`\n : `Unsupported model '${modelInfo}'. Prompt caching requires an AWS Bedrock Converse model`;\n\n if (unsupportedModelBehavior === \"raise\") {\n throw new BedrockPromptCachingMiddlewareError(\n `${baseMessage} (e.g., 'bedrock:anthropic.claude-haiku-4-5-20251001-v1:0').`\n );\n } else if (unsupportedModelBehavior === \"warn\") {\n console.warn(\n `BedrockPromptCachingMiddleware: Skipping caching for ${modelName}. Consider switching to an Anthropic Claude or Amazon Nova model for caching benefits.`\n );\n }\n return handler(request);\n }\n\n const messagesCount =\n request.state.messages.length + (request.systemPrompt ? 1 : 0);\n\n if (messagesCount < minMessagesToCache) {\n return handler(request);\n }\n\n /**\n * The cache_control is applied at the final message formatting layer in\n * ChatBedrockConverse (translated into Converse `cachePoint` blocks).\n *\n * @see https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html\n */\n return handler({\n ...request,\n modelSettings: {\n ...request.modelSettings,\n cache_control: {\n type: \"ephemeral\" as const,\n ttl,\n },\n },\n });\n },\n });\n}\n"],"mappings":";;;;AAMA,MAAM,yBAAyB;AAC/B,MAAM,cAAc;AACpB,MAAM,gCAAgC;AACtC,MAAM,qCAAqC;AAE3C,MAAM,gBAAgBA,OAAAA,EAAE,OAAO;;;;;CAK7B,eAAeA,OAAAA,EAAE,SAAS,CAAC,UAAU;;;;;CAKrC,KAAKA,OAAAA,EAAE,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,UAAU;;;;;CAKpC,oBAAoBA,OAAAA,EAAE,QAAQ,CAAC,UAAU;;;;;;;;CAQzC,0BAA0BA,OAAAA,EAAE,KAAK;EAAC;EAAU;EAAQ;EAAQ,CAAC,CAAC,UAAU;CACzE,CAAC;AAKF,IAAM,sCAAN,cAAkD,MAAM;CACtD,YAAY,SAAiB;AAC3B,QAAM,QAAQ;AACd,OAAK,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA8HhB,SAAgB,+BACd,mBACA;AACA,QAAOC,mBAAAA,iBAAiB;EACtB,MAAM;EACN;EACA,gBAAgB,SAAS,YAAY;GACnC,MAAM,gBACJ,QAAQ,QAAQ,QAAQ,iBACxB,mBAAmB,iBACnB;GACF,MAAM,MACJ,QAAQ,QAAQ,QAAQ,OAAO,mBAAmB,OAAO;GAC3D,MAAM,qBACJ,QAAQ,QAAQ,QAAQ,sBACxB,mBAAmB,sBACnB;GACF,MAAM,2BACJ,QAAQ,QAAQ,QAAQ,4BACxB,mBAAmB,4BACnB;AAGF,OAAI,CAAC,iBAAiB,CAAC,QAAQ,MAC7B,QAAO,QAAQ,QAAQ;GAGzB,MAAM,YAAY,QAAQ,MAAM,SAAS;GACzC,MAAM,yBACJ,cAAc,yBACb,cAAc,wBACX,QAAQ,MAA4B,gBAClC,kBAAkB,aACnB,QAAQ,MAA4B,gBACjC,kBAAkB;GAG5B,MAAM,UACJ,cAAc,sBACR,QAAQ,MAA4B,gBAAgB,QAGrD,QAAQ,MAA6B;AAY5C,OAAI,EALF,0BACA,OAAO,YAAY,aAClB,QAAQ,aAAa,CAAC,SAAS,mBAAmB,IACjD,QAAQ,aAAa,CAAC,SAAS,cAAc,IAEvB;IACxB,MAAM,YACJ,cAAc,sBACV,GAAG,UAAU,IACV,QAAQ,MAA4B,gBACjC,cACL,KACD;IAEN,MAAM,cAAc,yBAChB,sBAAsB,UAAU,0FAChC,sBAAsB,UAAU;AAEpC,QAAI,6BAA6B,QAC/B,OAAM,IAAI,oCACR,GAAG,YAAY,8DAChB;aACQ,6BAA6B,OACtC,SAAQ,KACN,wDAAwD,UAAU,wFACnE;AAEH,WAAO,QAAQ,QAAQ;;AAMzB,OAFE,QAAQ,MAAM,SAAS,UAAU,QAAQ,eAAe,IAAI,KAE1C,mBAClB,QAAO,QAAQ,QAAQ;;;;;;;AASzB,UAAO,QAAQ;IACb,GAAG;IACH,eAAe;KACb,GAAG,QAAQ;KACX,eAAe;MACb,MAAM;MACN;MACD;KACF;IACF,CAAC;;EAEL,CAAC"}
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import { AgentMiddleware } from "../../types.cjs";
|
|
2
|
+
import * as _$_langchain_core_tools0 from "@langchain/core/tools";
|
|
3
|
+
import { InferInteropZodInput } from "@langchain/core/utils/types";
|
|
4
|
+
import { z } from "zod/v3";
|
|
5
|
+
|
|
6
|
+
//#region src/agents/middleware/provider/aws/promptCaching.d.ts
|
|
7
|
+
declare const contextSchema: z.ZodObject<{
|
|
8
|
+
/**
|
|
9
|
+
* Whether to enable prompt caching.
|
|
10
|
+
* @default true
|
|
11
|
+
*/
|
|
12
|
+
enableCaching: z.ZodOptional<z.ZodBoolean>;
|
|
13
|
+
/**
|
|
14
|
+
* The time-to-live for the cached prompt.
|
|
15
|
+
* @default "5m"
|
|
16
|
+
*/
|
|
17
|
+
ttl: z.ZodOptional<z.ZodEnum<["5m", "1h"]>>;
|
|
18
|
+
/**
|
|
19
|
+
* The minimum number of messages required before caching is applied.
|
|
20
|
+
* @default 1
|
|
21
|
+
*/
|
|
22
|
+
minMessagesToCache: z.ZodOptional<z.ZodNumber>;
|
|
23
|
+
/**
|
|
24
|
+
* The behavior to take when an unsupported model is used.
|
|
25
|
+
* - "ignore" will ignore the unsupported model and continue without caching.
|
|
26
|
+
* - "warn" will warn the user and continue without caching.
|
|
27
|
+
* - "raise" will raise an error and stop the agent.
|
|
28
|
+
* @default "warn"
|
|
29
|
+
*/
|
|
30
|
+
unsupportedModelBehavior: z.ZodOptional<z.ZodEnum<["ignore", "warn", "raise"]>>;
|
|
31
|
+
}, "strip", z.ZodTypeAny, {
|
|
32
|
+
enableCaching?: boolean | undefined;
|
|
33
|
+
ttl?: "1h" | "5m" | undefined;
|
|
34
|
+
minMessagesToCache?: number | undefined;
|
|
35
|
+
unsupportedModelBehavior?: "ignore" | "raise" | "warn" | undefined;
|
|
36
|
+
}, {
|
|
37
|
+
enableCaching?: boolean | undefined;
|
|
38
|
+
ttl?: "1h" | "5m" | undefined;
|
|
39
|
+
minMessagesToCache?: number | undefined;
|
|
40
|
+
unsupportedModelBehavior?: "ignore" | "raise" | "warn" | undefined;
|
|
41
|
+
}>;
|
|
42
|
+
type BedrockConversePromptCachingMiddlewareConfig = Partial<InferInteropZodInput<typeof contextSchema>>;
|
|
43
|
+
/**
|
|
44
|
+
* Creates a prompt caching middleware for AWS Bedrock Converse models to optimize API usage.
|
|
45
|
+
*
|
|
46
|
+
* This middleware automatically enables Bedrock's prompt caching when using AWS Bedrock Converse
|
|
47
|
+
* models. This can significantly reduce costs for applications with repetitive prompts, long
|
|
48
|
+
* system messages, or extensive conversation histories.
|
|
49
|
+
*
|
|
50
|
+
* ## How It Works
|
|
51
|
+
*
|
|
52
|
+
* The middleware intercepts model requests and sets a cache control signal that
|
|
53
|
+
* `ChatBedrockConverse` translates into Bedrock `cachePoint` breakpoints. Cache points are
|
|
54
|
+
* inserted after the system prompt, after the tool definitions, and after the final message, so
|
|
55
|
+
* the stable prefix of each request is cached. On subsequent requests with a matching prefix, the
|
|
56
|
+
* cached representations are reused, skipping redundant token processing. Exact placement varies
|
|
57
|
+
* by model (e.g. Amazon Nova models cache fewer breakpoints and ignore the `"1h"` TTL).
|
|
58
|
+
*
|
|
59
|
+
* ## Benefits
|
|
60
|
+
*
|
|
61
|
+
* - **Cost Reduction**: Avoid reprocessing the same tokens repeatedly
|
|
62
|
+
* - **Lower Latency**: Cached prompts are processed faster as embeddings are pre-computed
|
|
63
|
+
* - **Better Scalability**: Reduced computational load enables handling more requests
|
|
64
|
+
* - **Consistent Performance**: Stable response times for repetitive queries
|
|
65
|
+
*
|
|
66
|
+
* @param middlewareOptions - Configuration options for the caching behavior
|
|
67
|
+
* @param middlewareOptions.enableCaching - Whether to enable prompt caching (default: `true`)
|
|
68
|
+
* @param middlewareOptions.ttl - Cache time-to-live: `"5m"` for 5 minutes or `"1h"` for 1 hour (default: `"5m"`)
|
|
69
|
+
* @param middlewareOptions.minMessagesToCache - Minimum number of messages required before caching is applied (default: `1`)
|
|
70
|
+
* @param middlewareOptions.unsupportedModelBehavior - The behavior to take when an unsupported model is used (default: `"warn"`)
|
|
71
|
+
*
|
|
72
|
+
* @returns A middleware instance that can be passed to `createAgent`
|
|
73
|
+
*
|
|
74
|
+
* @throws {Error} When `unsupportedModelBehavior` is `"raise"` and the model is not a
|
|
75
|
+
* cache-capable Bedrock Converse model — either a non-Bedrock provider, or a Bedrock
|
|
76
|
+
* Converse model outside the Anthropic Claude / Amazon Nova families.
|
|
77
|
+
*
|
|
78
|
+
* @example
|
|
79
|
+
* Basic usage with default settings
|
|
80
|
+
* ```typescript
|
|
81
|
+
* import { createAgent } from "langchain";
|
|
82
|
+
* import { bedrockPromptCachingMiddleware } from "langchain";
|
|
83
|
+
*
|
|
84
|
+
* const agent = createAgent({
|
|
85
|
+
* model: "bedrock:anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
86
|
+
* middleware: [
|
|
87
|
+
* bedrockPromptCachingMiddleware()
|
|
88
|
+
* ]
|
|
89
|
+
* });
|
|
90
|
+
* ```
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* Custom configuration for longer conversations
|
|
94
|
+
* ```typescript
|
|
95
|
+
* const cachingMiddleware = bedrockPromptCachingMiddleware({
|
|
96
|
+
* ttl: "1h", // Cache for 1 hour instead of default 5 minutes
|
|
97
|
+
* minMessagesToCache: 5 // Only cache after 5 messages
|
|
98
|
+
* });
|
|
99
|
+
*
|
|
100
|
+
* const agent = createAgent({
|
|
101
|
+
* model: "bedrock:anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
102
|
+
* systemPrompt: "You are a helpful assistant with deep knowledge of...", // Long system prompt
|
|
103
|
+
* middleware: [cachingMiddleware]
|
|
104
|
+
* });
|
|
105
|
+
* ```
|
|
106
|
+
*
|
|
107
|
+
* @example
|
|
108
|
+
* Conditional caching based on runtime context
|
|
109
|
+
* ```typescript
|
|
110
|
+
* const agent = createAgent({
|
|
111
|
+
* model: "bedrock:anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
112
|
+
* middleware: [
|
|
113
|
+
* bedrockPromptCachingMiddleware({
|
|
114
|
+
* enableCaching: true,
|
|
115
|
+
* ttl: "5m"
|
|
116
|
+
* })
|
|
117
|
+
* ]
|
|
118
|
+
* });
|
|
119
|
+
*
|
|
120
|
+
* // Disable caching for specific requests
|
|
121
|
+
* await agent.invoke(
|
|
122
|
+
* { messages: [new HumanMessage("Process this without caching")] },
|
|
123
|
+
* {
|
|
124
|
+
* configurable: {
|
|
125
|
+
* middleware_context: { enableCaching: false }
|
|
126
|
+
* }
|
|
127
|
+
* }
|
|
128
|
+
* );
|
|
129
|
+
* ```
|
|
130
|
+
*
|
|
131
|
+
* @example
|
|
132
|
+
* Optimal setup for customer support chatbot
|
|
133
|
+
* ```typescript
|
|
134
|
+
* const supportAgent = createAgent({
|
|
135
|
+
* model: "bedrock:anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
136
|
+
* systemPrompt: `You are a customer support agent for ACME Corp.
|
|
137
|
+
*
|
|
138
|
+
* Company policies:
|
|
139
|
+
* - Always be polite and professional
|
|
140
|
+
* - Refer to knowledge base for product information
|
|
141
|
+
* - Escalate billing issues to human agents
|
|
142
|
+
* ... (extensive policies and guidelines)
|
|
143
|
+
* `,
|
|
144
|
+
* tools: [searchKnowledgeBase, createTicket, checkOrderStatus],
|
|
145
|
+
* middleware: [
|
|
146
|
+
* bedrockPromptCachingMiddleware({
|
|
147
|
+
* ttl: "1h", // Long TTL for stable system prompt
|
|
148
|
+
* minMessagesToCache: 1 // Cache immediately due to large system prompt
|
|
149
|
+
* })
|
|
150
|
+
* ]
|
|
151
|
+
* });
|
|
152
|
+
* ```
|
|
153
|
+
*
|
|
154
|
+
* @remarks
|
|
155
|
+
* - **Bedrock Converse Only**: This middleware only applies caching to AWS Bedrock Converse models. Other providers are handled per `unsupportedModelBehavior`
|
|
156
|
+
* - **Supported Families**: Bedrock prompt caching is only available on the **Anthropic Claude** and **Amazon Nova** model families. Other Bedrock Converse models (e.g. Mistral, Cohere, Meta) reject cache points at request time, so they are treated as unsupported and routed through `unsupportedModelBehavior`
|
|
157
|
+
* - **Automatic Application**: Caching is applied automatically when the message count reaches `minMessagesToCache`
|
|
158
|
+
* - **TTL Options**: Only supports "5m" (5 minutes) and "1h" (1 hour) as TTL values; actual support varies by model
|
|
159
|
+
* - **Best Use Cases**: Long system prompts, multi-turn conversations, repetitive queries, RAG applications
|
|
160
|
+
*
|
|
161
|
+
* @see {@link createAgent} for agent creation
|
|
162
|
+
* @see {@link https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html} AWS Bedrock prompt caching documentation
|
|
163
|
+
* @public
|
|
164
|
+
*/
|
|
165
|
+
declare function bedrockPromptCachingMiddleware(middlewareOptions?: BedrockConversePromptCachingMiddlewareConfig): AgentMiddleware<undefined, z.ZodObject<{
|
|
166
|
+
/**
|
|
167
|
+
* Whether to enable prompt caching.
|
|
168
|
+
* @default true
|
|
169
|
+
*/
|
|
170
|
+
enableCaching: z.ZodOptional<z.ZodBoolean>;
|
|
171
|
+
/**
|
|
172
|
+
* The time-to-live for the cached prompt.
|
|
173
|
+
* @default "5m"
|
|
174
|
+
*/
|
|
175
|
+
ttl: z.ZodOptional<z.ZodEnum<["5m", "1h"]>>;
|
|
176
|
+
/**
|
|
177
|
+
* The minimum number of messages required before caching is applied.
|
|
178
|
+
* @default 1
|
|
179
|
+
*/
|
|
180
|
+
minMessagesToCache: z.ZodOptional<z.ZodNumber>;
|
|
181
|
+
/**
|
|
182
|
+
* The behavior to take when an unsupported model is used.
|
|
183
|
+
* - "ignore" will ignore the unsupported model and continue without caching.
|
|
184
|
+
* - "warn" will warn the user and continue without caching.
|
|
185
|
+
* - "raise" will raise an error and stop the agent.
|
|
186
|
+
* @default "warn"
|
|
187
|
+
*/
|
|
188
|
+
unsupportedModelBehavior: z.ZodOptional<z.ZodEnum<["ignore", "warn", "raise"]>>;
|
|
189
|
+
}, "strip", z.ZodTypeAny, {
|
|
190
|
+
enableCaching?: boolean | undefined;
|
|
191
|
+
ttl?: "1h" | "5m" | undefined;
|
|
192
|
+
minMessagesToCache?: number | undefined;
|
|
193
|
+
unsupportedModelBehavior?: "ignore" | "raise" | "warn" | undefined;
|
|
194
|
+
}, {
|
|
195
|
+
enableCaching?: boolean | undefined;
|
|
196
|
+
ttl?: "1h" | "5m" | undefined;
|
|
197
|
+
minMessagesToCache?: number | undefined;
|
|
198
|
+
unsupportedModelBehavior?: "ignore" | "raise" | "warn" | undefined;
|
|
199
|
+
}>, {
|
|
200
|
+
enableCaching?: boolean | undefined;
|
|
201
|
+
ttl?: "1h" | "5m" | undefined;
|
|
202
|
+
minMessagesToCache?: number | undefined;
|
|
203
|
+
unsupportedModelBehavior?: "ignore" | "raise" | "warn" | undefined;
|
|
204
|
+
}, readonly (_$_langchain_core_tools0.ServerTool | _$_langchain_core_tools0.ClientTool)[], readonly []>;
|
|
205
|
+
//#endregion
|
|
206
|
+
export { BedrockConversePromptCachingMiddlewareConfig, bedrockPromptCachingMiddleware };
|
|
207
|
+
//# sourceMappingURL=promptCaching.d.cts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"promptCaching.d.cts","names":[],"sources":["../../../../../src/agents/middleware/provider/aws/promptCaching.ts"],"mappings":";;;;;;cAWM,aAAA,EAAa,CAAA,CAAA,SAAA;;;;AAVgD;;;;;;;;;;;;EAUhD;;;;;;;;;;;;;;;;;;;KAyBP,4CAAA,GAA+C,OAAA,CACzD,oBAAA,QAA4B,aAAA;;;;;;;;;;;;;;;;;;AAD9B;;;;;;;;;;;;;AAqIA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAAgB,8BAAA,CACd,iBAAA,GAAoB,4CAAA,8BAA4C,CAAA,CAAA,SAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;aAAA,wBAAA,CAAA,UAAA"}
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import { AgentMiddleware } from "../../types.js";
|
|
2
|
+
import * as _$_langchain_core_tools0 from "@langchain/core/tools";
|
|
3
|
+
import { InferInteropZodInput } from "@langchain/core/utils/types";
|
|
4
|
+
import { z } from "zod/v3";
|
|
5
|
+
|
|
6
|
+
//#region src/agents/middleware/provider/aws/promptCaching.d.ts
|
|
7
|
+
declare const contextSchema: z.ZodObject<{
|
|
8
|
+
/**
|
|
9
|
+
* Whether to enable prompt caching.
|
|
10
|
+
* @default true
|
|
11
|
+
*/
|
|
12
|
+
enableCaching: z.ZodOptional<z.ZodBoolean>;
|
|
13
|
+
/**
|
|
14
|
+
* The time-to-live for the cached prompt.
|
|
15
|
+
* @default "5m"
|
|
16
|
+
*/
|
|
17
|
+
ttl: z.ZodOptional<z.ZodEnum<["5m", "1h"]>>;
|
|
18
|
+
/**
|
|
19
|
+
* The minimum number of messages required before caching is applied.
|
|
20
|
+
* @default 1
|
|
21
|
+
*/
|
|
22
|
+
minMessagesToCache: z.ZodOptional<z.ZodNumber>;
|
|
23
|
+
/**
|
|
24
|
+
* The behavior to take when an unsupported model is used.
|
|
25
|
+
* - "ignore" will ignore the unsupported model and continue without caching.
|
|
26
|
+
* - "warn" will warn the user and continue without caching.
|
|
27
|
+
* - "raise" will raise an error and stop the agent.
|
|
28
|
+
* @default "warn"
|
|
29
|
+
*/
|
|
30
|
+
unsupportedModelBehavior: z.ZodOptional<z.ZodEnum<["ignore", "warn", "raise"]>>;
|
|
31
|
+
}, "strip", z.ZodTypeAny, {
|
|
32
|
+
enableCaching?: boolean | undefined;
|
|
33
|
+
ttl?: "1h" | "5m" | undefined;
|
|
34
|
+
minMessagesToCache?: number | undefined;
|
|
35
|
+
unsupportedModelBehavior?: "ignore" | "raise" | "warn" | undefined;
|
|
36
|
+
}, {
|
|
37
|
+
enableCaching?: boolean | undefined;
|
|
38
|
+
ttl?: "1h" | "5m" | undefined;
|
|
39
|
+
minMessagesToCache?: number | undefined;
|
|
40
|
+
unsupportedModelBehavior?: "ignore" | "raise" | "warn" | undefined;
|
|
41
|
+
}>;
|
|
42
|
+
type BedrockConversePromptCachingMiddlewareConfig = Partial<InferInteropZodInput<typeof contextSchema>>;
|
|
43
|
+
/**
|
|
44
|
+
* Creates a prompt caching middleware for AWS Bedrock Converse models to optimize API usage.
|
|
45
|
+
*
|
|
46
|
+
* This middleware automatically enables Bedrock's prompt caching when using AWS Bedrock Converse
|
|
47
|
+
* models. This can significantly reduce costs for applications with repetitive prompts, long
|
|
48
|
+
* system messages, or extensive conversation histories.
|
|
49
|
+
*
|
|
50
|
+
* ## How It Works
|
|
51
|
+
*
|
|
52
|
+
* The middleware intercepts model requests and sets a cache control signal that
|
|
53
|
+
* `ChatBedrockConverse` translates into Bedrock `cachePoint` breakpoints. Cache points are
|
|
54
|
+
* inserted after the system prompt, after the tool definitions, and after the final message, so
|
|
55
|
+
* the stable prefix of each request is cached. On subsequent requests with a matching prefix, the
|
|
56
|
+
* cached representations are reused, skipping redundant token processing. Exact placement varies
|
|
57
|
+
* by model (e.g. Amazon Nova models cache fewer breakpoints and ignore the `"1h"` TTL).
|
|
58
|
+
*
|
|
59
|
+
* ## Benefits
|
|
60
|
+
*
|
|
61
|
+
* - **Cost Reduction**: Avoid reprocessing the same tokens repeatedly
|
|
62
|
+
* - **Lower Latency**: Cached prompts are processed faster as embeddings are pre-computed
|
|
63
|
+
* - **Better Scalability**: Reduced computational load enables handling more requests
|
|
64
|
+
* - **Consistent Performance**: Stable response times for repetitive queries
|
|
65
|
+
*
|
|
66
|
+
* @param middlewareOptions - Configuration options for the caching behavior
|
|
67
|
+
* @param middlewareOptions.enableCaching - Whether to enable prompt caching (default: `true`)
|
|
68
|
+
* @param middlewareOptions.ttl - Cache time-to-live: `"5m"` for 5 minutes or `"1h"` for 1 hour (default: `"5m"`)
|
|
69
|
+
* @param middlewareOptions.minMessagesToCache - Minimum number of messages required before caching is applied (default: `1`)
|
|
70
|
+
* @param middlewareOptions.unsupportedModelBehavior - The behavior to take when an unsupported model is used (default: `"warn"`)
|
|
71
|
+
*
|
|
72
|
+
* @returns A middleware instance that can be passed to `createAgent`
|
|
73
|
+
*
|
|
74
|
+
* @throws {Error} When `unsupportedModelBehavior` is `"raise"` and the model is not a
|
|
75
|
+
* cache-capable Bedrock Converse model — either a non-Bedrock provider, or a Bedrock
|
|
76
|
+
* Converse model outside the Anthropic Claude / Amazon Nova families.
|
|
77
|
+
*
|
|
78
|
+
* @example
|
|
79
|
+
* Basic usage with default settings
|
|
80
|
+
* ```typescript
|
|
81
|
+
* import { createAgent } from "langchain";
|
|
82
|
+
* import { bedrockPromptCachingMiddleware } from "langchain";
|
|
83
|
+
*
|
|
84
|
+
* const agent = createAgent({
|
|
85
|
+
* model: "bedrock:anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
86
|
+
* middleware: [
|
|
87
|
+
* bedrockPromptCachingMiddleware()
|
|
88
|
+
* ]
|
|
89
|
+
* });
|
|
90
|
+
* ```
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* Custom configuration for longer conversations
|
|
94
|
+
* ```typescript
|
|
95
|
+
* const cachingMiddleware = bedrockPromptCachingMiddleware({
|
|
96
|
+
* ttl: "1h", // Cache for 1 hour instead of default 5 minutes
|
|
97
|
+
* minMessagesToCache: 5 // Only cache after 5 messages
|
|
98
|
+
* });
|
|
99
|
+
*
|
|
100
|
+
* const agent = createAgent({
|
|
101
|
+
* model: "bedrock:anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
102
|
+
* systemPrompt: "You are a helpful assistant with deep knowledge of...", // Long system prompt
|
|
103
|
+
* middleware: [cachingMiddleware]
|
|
104
|
+
* });
|
|
105
|
+
* ```
|
|
106
|
+
*
|
|
107
|
+
* @example
|
|
108
|
+
* Conditional caching based on runtime context
|
|
109
|
+
* ```typescript
|
|
110
|
+
* const agent = createAgent({
|
|
111
|
+
* model: "bedrock:anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
112
|
+
* middleware: [
|
|
113
|
+
* bedrockPromptCachingMiddleware({
|
|
114
|
+
* enableCaching: true,
|
|
115
|
+
* ttl: "5m"
|
|
116
|
+
* })
|
|
117
|
+
* ]
|
|
118
|
+
* });
|
|
119
|
+
*
|
|
120
|
+
* // Disable caching for specific requests
|
|
121
|
+
* await agent.invoke(
|
|
122
|
+
* { messages: [new HumanMessage("Process this without caching")] },
|
|
123
|
+
* {
|
|
124
|
+
* configurable: {
|
|
125
|
+
* middleware_context: { enableCaching: false }
|
|
126
|
+
* }
|
|
127
|
+
* }
|
|
128
|
+
* );
|
|
129
|
+
* ```
|
|
130
|
+
*
|
|
131
|
+
* @example
|
|
132
|
+
* Optimal setup for customer support chatbot
|
|
133
|
+
* ```typescript
|
|
134
|
+
* const supportAgent = createAgent({
|
|
135
|
+
* model: "bedrock:anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
136
|
+
* systemPrompt: `You are a customer support agent for ACME Corp.
|
|
137
|
+
*
|
|
138
|
+
* Company policies:
|
|
139
|
+
* - Always be polite and professional
|
|
140
|
+
* - Refer to knowledge base for product information
|
|
141
|
+
* - Escalate billing issues to human agents
|
|
142
|
+
* ... (extensive policies and guidelines)
|
|
143
|
+
* `,
|
|
144
|
+
* tools: [searchKnowledgeBase, createTicket, checkOrderStatus],
|
|
145
|
+
* middleware: [
|
|
146
|
+
* bedrockPromptCachingMiddleware({
|
|
147
|
+
* ttl: "1h", // Long TTL for stable system prompt
|
|
148
|
+
* minMessagesToCache: 1 // Cache immediately due to large system prompt
|
|
149
|
+
* })
|
|
150
|
+
* ]
|
|
151
|
+
* });
|
|
152
|
+
* ```
|
|
153
|
+
*
|
|
154
|
+
* @remarks
|
|
155
|
+
* - **Bedrock Converse Only**: This middleware only applies caching to AWS Bedrock Converse models. Other providers are handled per `unsupportedModelBehavior`
|
|
156
|
+
* - **Supported Families**: Bedrock prompt caching is only available on the **Anthropic Claude** and **Amazon Nova** model families. Other Bedrock Converse models (e.g. Mistral, Cohere, Meta) reject cache points at request time, so they are treated as unsupported and routed through `unsupportedModelBehavior`
|
|
157
|
+
* - **Automatic Application**: Caching is applied automatically when the message count reaches `minMessagesToCache`
|
|
158
|
+
* - **TTL Options**: Only supports "5m" (5 minutes) and "1h" (1 hour) as TTL values; actual support varies by model
|
|
159
|
+
* - **Best Use Cases**: Long system prompts, multi-turn conversations, repetitive queries, RAG applications
|
|
160
|
+
*
|
|
161
|
+
* @see {@link createAgent} for agent creation
|
|
162
|
+
* @see {@link https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html} AWS Bedrock prompt caching documentation
|
|
163
|
+
* @public
|
|
164
|
+
*/
|
|
165
|
+
declare function bedrockPromptCachingMiddleware(middlewareOptions?: BedrockConversePromptCachingMiddlewareConfig): AgentMiddleware<undefined, z.ZodObject<{
|
|
166
|
+
/**
|
|
167
|
+
* Whether to enable prompt caching.
|
|
168
|
+
* @default true
|
|
169
|
+
*/
|
|
170
|
+
enableCaching: z.ZodOptional<z.ZodBoolean>;
|
|
171
|
+
/**
|
|
172
|
+
* The time-to-live for the cached prompt.
|
|
173
|
+
* @default "5m"
|
|
174
|
+
*/
|
|
175
|
+
ttl: z.ZodOptional<z.ZodEnum<["5m", "1h"]>>;
|
|
176
|
+
/**
|
|
177
|
+
* The minimum number of messages required before caching is applied.
|
|
178
|
+
* @default 1
|
|
179
|
+
*/
|
|
180
|
+
minMessagesToCache: z.ZodOptional<z.ZodNumber>;
|
|
181
|
+
/**
|
|
182
|
+
* The behavior to take when an unsupported model is used.
|
|
183
|
+
* - "ignore" will ignore the unsupported model and continue without caching.
|
|
184
|
+
* - "warn" will warn the user and continue without caching.
|
|
185
|
+
* - "raise" will raise an error and stop the agent.
|
|
186
|
+
* @default "warn"
|
|
187
|
+
*/
|
|
188
|
+
unsupportedModelBehavior: z.ZodOptional<z.ZodEnum<["ignore", "warn", "raise"]>>;
|
|
189
|
+
}, "strip", z.ZodTypeAny, {
|
|
190
|
+
enableCaching?: boolean | undefined;
|
|
191
|
+
ttl?: "1h" | "5m" | undefined;
|
|
192
|
+
minMessagesToCache?: number | undefined;
|
|
193
|
+
unsupportedModelBehavior?: "ignore" | "raise" | "warn" | undefined;
|
|
194
|
+
}, {
|
|
195
|
+
enableCaching?: boolean | undefined;
|
|
196
|
+
ttl?: "1h" | "5m" | undefined;
|
|
197
|
+
minMessagesToCache?: number | undefined;
|
|
198
|
+
unsupportedModelBehavior?: "ignore" | "raise" | "warn" | undefined;
|
|
199
|
+
}>, {
|
|
200
|
+
enableCaching?: boolean | undefined;
|
|
201
|
+
ttl?: "1h" | "5m" | undefined;
|
|
202
|
+
minMessagesToCache?: number | undefined;
|
|
203
|
+
unsupportedModelBehavior?: "ignore" | "raise" | "warn" | undefined;
|
|
204
|
+
}, readonly (_$_langchain_core_tools0.ServerTool | _$_langchain_core_tools0.ClientTool)[], readonly []>;
|
|
205
|
+
//#endregion
|
|
206
|
+
export { BedrockConversePromptCachingMiddlewareConfig, bedrockPromptCachingMiddleware };
|
|
207
|
+
//# sourceMappingURL=promptCaching.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"promptCaching.d.ts","names":[],"sources":["../../../../../src/agents/middleware/provider/aws/promptCaching.ts"],"mappings":";;;;;;cAWM,aAAA,EAAa,CAAA,CAAA,SAAA;;;;AAVgD;;;;;;;;;;;;EAUhD;;;;;;;;;;;;;;;;;;;KAyBP,4CAAA,GAA+C,OAAA,CACzD,oBAAA,QAA4B,aAAA;;;;;;;;;;;;;;;;;;AAD9B;;;;;;;;;;;;;AAqIA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAAgB,8BAAA,CACd,iBAAA,GAAoB,4CAAA,8BAA4C,CAAA,CAAA,SAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;aAAA,wBAAA,CAAA,UAAA"}
|