langchain 1.0.6 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/README.md +1 -1
- package/chat_models/universal.cjs +1 -0
- package/chat_models/universal.d.cts +1 -0
- package/chat_models/universal.d.ts +1 -0
- package/chat_models/universal.js +1 -0
- package/dist/agents/ReactAgent.cjs +1 -1
- package/dist/agents/ReactAgent.cjs.map +1 -1
- package/dist/agents/ReactAgent.js +2 -2
- package/dist/agents/ReactAgent.js.map +1 -1
- package/dist/agents/index.d.cts +0 -2
- package/dist/agents/index.d.ts +0 -2
- package/dist/agents/middleware/constants.cjs +16 -0
- package/dist/agents/middleware/constants.cjs.map +1 -0
- package/dist/agents/middleware/constants.js +15 -0
- package/dist/agents/middleware/constants.js.map +1 -0
- package/dist/agents/middleware/contextEditing.cjs.map +1 -1
- package/dist/agents/middleware/contextEditing.d.cts +23 -7
- package/dist/agents/middleware/contextEditing.d.ts +23 -7
- package/dist/agents/middleware/contextEditing.js.map +1 -1
- package/dist/agents/middleware/dynamicSystemPrompt.cjs +5 -2
- package/dist/agents/middleware/dynamicSystemPrompt.cjs.map +1 -1
- package/dist/agents/middleware/dynamicSystemPrompt.d.cts +2 -1
- package/dist/agents/middleware/dynamicSystemPrompt.d.ts +2 -1
- package/dist/agents/middleware/dynamicSystemPrompt.js +4 -2
- package/dist/agents/middleware/dynamicSystemPrompt.js.map +1 -1
- package/dist/agents/middleware/error.cjs +20 -0
- package/dist/agents/middleware/error.cjs.map +1 -0
- package/dist/agents/middleware/error.js +19 -0
- package/dist/agents/middleware/error.js.map +1 -0
- package/dist/agents/middleware/index.cjs +4 -2
- package/dist/agents/middleware/index.d.ts +18 -0
- package/dist/agents/middleware/index.js +4 -2
- package/dist/agents/middleware/modelRetry.cjs +162 -0
- package/dist/agents/middleware/modelRetry.cjs.map +1 -0
- package/dist/agents/middleware/modelRetry.d.cts +134 -0
- package/dist/agents/middleware/modelRetry.d.ts +134 -0
- package/dist/agents/middleware/modelRetry.js +161 -0
- package/dist/agents/middleware/modelRetry.js.map +1 -0
- package/dist/agents/middleware/{promptCaching.cjs → provider/anthropic/promptCaching.cjs} +3 -3
- package/dist/agents/middleware/provider/anthropic/promptCaching.cjs.map +1 -0
- package/dist/agents/middleware/{promptCaching.d.cts → provider/anthropic/promptCaching.d.cts} +2 -2
- package/dist/agents/middleware/{promptCaching.d.ts → provider/anthropic/promptCaching.d.ts} +2 -2
- package/dist/agents/middleware/{promptCaching.js → provider/anthropic/promptCaching.js} +2 -2
- package/dist/agents/middleware/provider/anthropic/promptCaching.js.map +1 -0
- package/dist/agents/middleware/provider/openai/moderation.cjs +299 -0
- package/dist/agents/middleware/provider/openai/moderation.cjs.map +1 -0
- package/dist/agents/middleware/provider/openai/moderation.d.cts +133 -0
- package/dist/agents/middleware/provider/openai/moderation.d.ts +133 -0
- package/dist/agents/middleware/provider/openai/moderation.js +298 -0
- package/dist/agents/middleware/provider/openai/moderation.js.map +1 -0
- package/dist/agents/middleware/summarization.d.cts +0 -4
- package/dist/agents/middleware/summarization.d.ts +0 -4
- package/dist/agents/middleware/todoListMiddleware.cjs +1 -1
- package/dist/agents/middleware/todoListMiddleware.cjs.map +1 -1
- package/dist/agents/middleware/todoListMiddleware.js +1 -1
- package/dist/agents/middleware/todoListMiddleware.js.map +1 -1
- package/dist/agents/middleware/toolRetry.cjs +32 -44
- package/dist/agents/middleware/toolRetry.cjs.map +1 -1
- package/dist/agents/middleware/toolRetry.d.cts +16 -36
- package/dist/agents/middleware/toolRetry.d.ts +16 -36
- package/dist/agents/middleware/toolRetry.js +32 -44
- package/dist/agents/middleware/toolRetry.js.map +1 -1
- package/dist/agents/middleware/types.d.cts +9 -10
- package/dist/agents/middleware/types.d.ts +9 -10
- package/dist/agents/middleware/utils.cjs +23 -0
- package/dist/agents/middleware/utils.cjs.map +1 -1
- package/dist/agents/middleware/utils.d.ts +2 -0
- package/dist/agents/middleware/utils.js +23 -1
- package/dist/agents/middleware/utils.js.map +1 -1
- package/dist/agents/nodes/AgentNode.cjs +50 -22
- package/dist/agents/nodes/AgentNode.cjs.map +1 -1
- package/dist/agents/nodes/AgentNode.js +52 -24
- package/dist/agents/nodes/AgentNode.js.map +1 -1
- package/dist/agents/nodes/types.d.cts +39 -3
- package/dist/agents/nodes/types.d.ts +39 -3
- package/dist/agents/responses.d.cts +0 -19
- package/dist/agents/responses.d.ts +0 -19
- package/dist/agents/runtime.d.ts +1 -0
- package/dist/agents/tests/utils.cjs +10 -1
- package/dist/agents/tests/utils.cjs.map +1 -1
- package/dist/agents/tests/utils.js +10 -1
- package/dist/agents/tests/utils.js.map +1 -1
- package/dist/agents/types.d.cts +68 -2
- package/dist/agents/types.d.ts +68 -2
- package/dist/agents/utils.cjs +15 -12
- package/dist/agents/utils.cjs.map +1 -1
- package/dist/agents/utils.js +16 -13
- package/dist/agents/utils.js.map +1 -1
- package/dist/chat_models/universal.cjs +50 -16
- package/dist/chat_models/universal.cjs.map +1 -1
- package/dist/chat_models/universal.d.cts +19 -1
- package/dist/chat_models/universal.d.ts +19 -1
- package/dist/chat_models/universal.js +50 -16
- package/dist/chat_models/universal.js.map +1 -1
- package/dist/index.cjs +8 -2
- package/dist/index.d.cts +5 -3
- package/dist/index.d.ts +6 -3
- package/dist/index.js +7 -3
- package/dist/load/import_constants.cjs +2 -1
- package/dist/load/import_constants.cjs.map +1 -1
- package/dist/load/import_constants.js +2 -1
- package/dist/load/import_constants.js.map +1 -1
- package/dist/load/import_map.cjs +2 -19
- package/dist/load/import_map.cjs.map +1 -1
- package/dist/load/import_map.js +2 -19
- package/dist/load/import_map.js.map +1 -1
- package/hub/node.cjs +1 -0
- package/hub/node.d.cts +1 -0
- package/hub/node.d.ts +1 -0
- package/hub/node.js +1 -0
- package/hub.cjs +1 -0
- package/hub.d.cts +1 -0
- package/hub.d.ts +1 -0
- package/hub.js +1 -0
- package/load/serializable.cjs +1 -0
- package/load/serializable.d.cts +1 -0
- package/load/serializable.d.ts +1 -0
- package/load/serializable.js +1 -0
- package/load.cjs +1 -0
- package/load.d.cts +1 -0
- package/load.d.ts +1 -0
- package/load.js +1 -0
- package/package.json +66 -53
- package/storage/encoder_backed.cjs +1 -0
- package/storage/encoder_backed.d.cts +1 -0
- package/storage/encoder_backed.d.ts +1 -0
- package/storage/encoder_backed.js +1 -0
- package/storage/file_system.cjs +1 -0
- package/storage/file_system.d.cts +1 -0
- package/storage/file_system.d.ts +1 -0
- package/storage/file_system.js +1 -0
- package/storage/in_memory.cjs +1 -0
- package/storage/in_memory.d.cts +1 -0
- package/storage/in_memory.d.ts +1 -0
- package/storage/in_memory.js +1 -0
- package/dist/agents/ReactAgent.d.cts.map +0 -1
- package/dist/agents/ReactAgent.d.ts.map +0 -1
- package/dist/agents/constants.cjs +0 -7
- package/dist/agents/constants.cjs.map +0 -1
- package/dist/agents/constants.d.cts.map +0 -1
- package/dist/agents/constants.d.ts.map +0 -1
- package/dist/agents/constants.js +0 -6
- package/dist/agents/constants.js.map +0 -1
- package/dist/agents/errors.d.cts.map +0 -1
- package/dist/agents/errors.d.ts.map +0 -1
- package/dist/agents/index.d.cts.map +0 -1
- package/dist/agents/index.d.ts.map +0 -1
- package/dist/agents/middleware/contextEditing.d.cts.map +0 -1
- package/dist/agents/middleware/contextEditing.d.ts.map +0 -1
- package/dist/agents/middleware/dynamicSystemPrompt.d.cts.map +0 -1
- package/dist/agents/middleware/dynamicSystemPrompt.d.ts.map +0 -1
- package/dist/agents/middleware/hitl.d.cts.map +0 -1
- package/dist/agents/middleware/hitl.d.ts.map +0 -1
- package/dist/agents/middleware/llmToolSelector.d.cts.map +0 -1
- package/dist/agents/middleware/llmToolSelector.d.ts.map +0 -1
- package/dist/agents/middleware/modelCallLimit.d.cts.map +0 -1
- package/dist/agents/middleware/modelCallLimit.d.ts.map +0 -1
- package/dist/agents/middleware/modelFallback.d.cts.map +0 -1
- package/dist/agents/middleware/modelFallback.d.ts.map +0 -1
- package/dist/agents/middleware/pii.d.cts.map +0 -1
- package/dist/agents/middleware/pii.d.ts.map +0 -1
- package/dist/agents/middleware/piiRedaction.d.cts.map +0 -1
- package/dist/agents/middleware/piiRedaction.d.ts.map +0 -1
- package/dist/agents/middleware/promptCaching.cjs.map +0 -1
- package/dist/agents/middleware/promptCaching.d.cts.map +0 -1
- package/dist/agents/middleware/promptCaching.d.ts.map +0 -1
- package/dist/agents/middleware/promptCaching.js.map +0 -1
- package/dist/agents/middleware/summarization.d.cts.map +0 -1
- package/dist/agents/middleware/summarization.d.ts.map +0 -1
- package/dist/agents/middleware/todoListMiddleware.d.cts.map +0 -1
- package/dist/agents/middleware/todoListMiddleware.d.ts.map +0 -1
- package/dist/agents/middleware/toolCallLimit.d.cts.map +0 -1
- package/dist/agents/middleware/toolCallLimit.d.ts.map +0 -1
- package/dist/agents/middleware/toolEmulator.d.cts.map +0 -1
- package/dist/agents/middleware/toolEmulator.d.ts.map +0 -1
- package/dist/agents/middleware/toolRetry.d.cts.map +0 -1
- package/dist/agents/middleware/toolRetry.d.ts.map +0 -1
- package/dist/agents/middleware/types.d.cts.map +0 -1
- package/dist/agents/middleware/types.d.ts.map +0 -1
- package/dist/agents/middleware/utils.d.cts.map +0 -1
- package/dist/agents/middleware/utils.d.ts.map +0 -1
- package/dist/agents/middleware.d.cts.map +0 -1
- package/dist/agents/middleware.d.ts.map +0 -1
- package/dist/agents/nodes/types.d.cts.map +0 -1
- package/dist/agents/nodes/types.d.ts.map +0 -1
- package/dist/agents/responses.d.cts.map +0 -1
- package/dist/agents/responses.d.ts.map +0 -1
- package/dist/agents/runtime.d.cts.map +0 -1
- package/dist/agents/runtime.d.ts.map +0 -1
- package/dist/agents/tests/utils.d.cts.map +0 -1
- package/dist/agents/tests/utils.d.ts.map +0 -1
- package/dist/agents/types.d.cts.map +0 -1
- package/dist/agents/types.d.ts.map +0 -1
- package/dist/chat_models/universal.d.cts.map +0 -1
- package/dist/chat_models/universal.d.ts.map +0 -1
- package/dist/hub/base.d.cts.map +0 -1
- package/dist/hub/base.d.ts.map +0 -1
- package/dist/hub/index.d.cts.map +0 -1
- package/dist/hub/index.d.ts.map +0 -1
- package/dist/hub/node.d.cts.map +0 -1
- package/dist/hub/node.d.ts.map +0 -1
- package/dist/load/import_type.d.cts.map +0 -1
- package/dist/load/import_type.d.ts.map +0 -1
- package/dist/load/index.d.cts.map +0 -1
- package/dist/load/index.d.ts.map +0 -1
- package/dist/storage/encoder_backed.d.cts.map +0 -1
- package/dist/storage/encoder_backed.d.ts.map +0 -1
- package/dist/storage/file_system.d.cts.map +0 -1
- package/dist/storage/file_system.d.ts.map +0 -1
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import { AgentMiddleware } from "./types.cjs";
|
|
2
|
+
import { z } from "zod/v3";
|
|
3
|
+
|
|
4
|
+
//#region src/agents/middleware/modelRetry.d.ts
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Configuration options for the Model Retry Middleware.
|
|
8
|
+
*/
|
|
9
|
+
declare const ModelRetryMiddlewareOptionsSchema: z.ZodObject<{
|
|
10
|
+
/**
|
|
11
|
+
* Behavior when all retries are exhausted. Options:
|
|
12
|
+
* - `"continue"` (default): Return an AIMessage with error details, allowing
|
|
13
|
+
* the agent to potentially handle the failure gracefully.
|
|
14
|
+
* - `"error"`: Re-raise the exception, stopping agent execution.
|
|
15
|
+
* - Custom function: Function that takes the exception and returns a string
|
|
16
|
+
* for the AIMessage content, allowing custom error formatting.
|
|
17
|
+
*/
|
|
18
|
+
onFailure: z.ZodDefault<z.ZodUnion<[z.ZodLiteral<"error">, z.ZodLiteral<"continue">, z.ZodFunction<z.ZodTuple<[z.ZodType<Error, z.ZodTypeDef, Error>], z.ZodUnknown>, z.ZodString>]>>;
|
|
19
|
+
} & {
|
|
20
|
+
maxRetries: z.ZodDefault<z.ZodNumber>;
|
|
21
|
+
retryOn: z.ZodDefault<z.ZodUnion<[z.ZodFunction<z.ZodTuple<[z.ZodType<Error, z.ZodTypeDef, Error>], z.ZodUnknown>, z.ZodBoolean>, z.ZodArray<z.ZodType<new (...args: any[]) => Error, z.ZodTypeDef, new (...args: any[]) => Error>, "many">]>>;
|
|
22
|
+
backoffFactor: z.ZodDefault<z.ZodNumber>;
|
|
23
|
+
initialDelayMs: z.ZodDefault<z.ZodNumber>;
|
|
24
|
+
maxDelayMs: z.ZodDefault<z.ZodNumber>;
|
|
25
|
+
jitter: z.ZodDefault<z.ZodBoolean>;
|
|
26
|
+
}, "strip", z.ZodTypeAny, {
|
|
27
|
+
maxRetries: number;
|
|
28
|
+
retryOn: (new (...args: any[]) => Error)[] | ((args_0: Error, ...args: unknown[]) => boolean);
|
|
29
|
+
backoffFactor: number;
|
|
30
|
+
initialDelayMs: number;
|
|
31
|
+
maxDelayMs: number;
|
|
32
|
+
jitter: boolean;
|
|
33
|
+
onFailure: "continue" | "error" | ((args_0: Error, ...args: unknown[]) => string);
|
|
34
|
+
}, {
|
|
35
|
+
maxRetries?: number | undefined;
|
|
36
|
+
retryOn?: (new (...args: any[]) => Error)[] | ((args_0: Error, ...args: unknown[]) => boolean) | undefined;
|
|
37
|
+
backoffFactor?: number | undefined;
|
|
38
|
+
initialDelayMs?: number | undefined;
|
|
39
|
+
maxDelayMs?: number | undefined;
|
|
40
|
+
jitter?: boolean | undefined;
|
|
41
|
+
onFailure?: "continue" | "error" | ((args_0: Error, ...args: unknown[]) => string) | undefined;
|
|
42
|
+
}>;
|
|
43
|
+
type ModelRetryMiddlewareConfig = z.input<typeof ModelRetryMiddlewareOptionsSchema>;
|
|
44
|
+
/**
|
|
45
|
+
* Middleware that automatically retries failed model calls with configurable backoff.
|
|
46
|
+
*
|
|
47
|
+
* Supports retrying on specific exceptions and exponential backoff.
|
|
48
|
+
*
|
|
49
|
+
* @example Basic usage with default settings (2 retries, exponential backoff)
|
|
50
|
+
* ```ts
|
|
51
|
+
* import { createAgent, modelRetryMiddleware } from "langchain";
|
|
52
|
+
*
|
|
53
|
+
* const agent = createAgent({
|
|
54
|
+
* model: "openai:gpt-4o",
|
|
55
|
+
* tools: [searchTool],
|
|
56
|
+
* middleware: [modelRetryMiddleware()],
|
|
57
|
+
* });
|
|
58
|
+
* ```
|
|
59
|
+
*
|
|
60
|
+
* @example Retry specific exceptions only
|
|
61
|
+
* ```ts
|
|
62
|
+
* import { modelRetryMiddleware } from "langchain";
|
|
63
|
+
*
|
|
64
|
+
* const retry = modelRetryMiddleware({
|
|
65
|
+
* maxRetries: 4,
|
|
66
|
+
* retryOn: [TimeoutError, NetworkError],
|
|
67
|
+
* backoffFactor: 1.5,
|
|
68
|
+
* });
|
|
69
|
+
* ```
|
|
70
|
+
*
|
|
71
|
+
* @example Custom exception filtering
|
|
72
|
+
* ```ts
|
|
73
|
+
* function shouldRetry(error: Error): boolean {
|
|
74
|
+
* // Only retry on rate limit errors
|
|
75
|
+
* if (error.name === "RateLimitError") {
|
|
76
|
+
* return true;
|
|
77
|
+
* }
|
|
78
|
+
* // Or check for specific HTTP status codes
|
|
79
|
+
* if (error.name === "HTTPError" && "statusCode" in error) {
|
|
80
|
+
* const statusCode = (error as any).statusCode;
|
|
81
|
+
* return statusCode === 429 || statusCode === 503;
|
|
82
|
+
* }
|
|
83
|
+
* return false;
|
|
84
|
+
* }
|
|
85
|
+
*
|
|
86
|
+
* const retry = modelRetryMiddleware({
|
|
87
|
+
* maxRetries: 3,
|
|
88
|
+
* retryOn: shouldRetry,
|
|
89
|
+
* });
|
|
90
|
+
* ```
|
|
91
|
+
*
|
|
92
|
+
* @example Return error message instead of raising
|
|
93
|
+
* ```ts
|
|
94
|
+
* const retry = modelRetryMiddleware({
|
|
95
|
+
* maxRetries: 4,
|
|
96
|
+
* onFailure: "continue", // Return AIMessage with error instead of throwing
|
|
97
|
+
* });
|
|
98
|
+
* ```
|
|
99
|
+
*
|
|
100
|
+
* @example Custom error message formatting
|
|
101
|
+
* ```ts
|
|
102
|
+
* const formatError = (error: Error) =>
|
|
103
|
+
* `Model call failed: ${error.message}. Please try again later.`;
|
|
104
|
+
*
|
|
105
|
+
* const retry = modelRetryMiddleware({
|
|
106
|
+
* maxRetries: 4,
|
|
107
|
+
* onFailure: formatError,
|
|
108
|
+
* });
|
|
109
|
+
* ```
|
|
110
|
+
*
|
|
111
|
+
* @example Constant backoff (no exponential growth)
|
|
112
|
+
* ```ts
|
|
113
|
+
* const retry = modelRetryMiddleware({
|
|
114
|
+
* maxRetries: 5,
|
|
115
|
+
* backoffFactor: 0.0, // No exponential growth
|
|
116
|
+
* initialDelayMs: 2000, // Always wait 2 seconds
|
|
117
|
+
* });
|
|
118
|
+
* ```
|
|
119
|
+
*
|
|
120
|
+
* @example Raise exception on failure
|
|
121
|
+
* ```ts
|
|
122
|
+
* const retry = modelRetryMiddleware({
|
|
123
|
+
* maxRetries: 2,
|
|
124
|
+
* onFailure: "error", // Re-raise exception instead of returning message
|
|
125
|
+
* });
|
|
126
|
+
* ```
|
|
127
|
+
*
|
|
128
|
+
* @param config - Configuration options for the retry middleware
|
|
129
|
+
* @returns A middleware instance that handles model failures with retries
|
|
130
|
+
*/
|
|
131
|
+
declare function modelRetryMiddleware(config?: ModelRetryMiddlewareConfig): AgentMiddleware;
|
|
132
|
+
//#endregion
|
|
133
|
+
export { ModelRetryMiddlewareConfig, modelRetryMiddleware };
|
|
134
|
+
//# sourceMappingURL=modelRetry.d.cts.map
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import { AgentMiddleware } from "./types.js";
|
|
2
|
+
import { z } from "zod/v3";
|
|
3
|
+
|
|
4
|
+
//#region src/agents/middleware/modelRetry.d.ts
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Configuration options for the Model Retry Middleware.
|
|
8
|
+
*/
|
|
9
|
+
declare const ModelRetryMiddlewareOptionsSchema: z.ZodObject<{
|
|
10
|
+
/**
|
|
11
|
+
* Behavior when all retries are exhausted. Options:
|
|
12
|
+
* - `"continue"` (default): Return an AIMessage with error details, allowing
|
|
13
|
+
* the agent to potentially handle the failure gracefully.
|
|
14
|
+
* - `"error"`: Re-raise the exception, stopping agent execution.
|
|
15
|
+
* - Custom function: Function that takes the exception and returns a string
|
|
16
|
+
* for the AIMessage content, allowing custom error formatting.
|
|
17
|
+
*/
|
|
18
|
+
onFailure: z.ZodDefault<z.ZodUnion<[z.ZodLiteral<"error">, z.ZodLiteral<"continue">, z.ZodFunction<z.ZodTuple<[z.ZodType<Error, z.ZodTypeDef, Error>], z.ZodUnknown>, z.ZodString>]>>;
|
|
19
|
+
} & {
|
|
20
|
+
maxRetries: z.ZodDefault<z.ZodNumber>;
|
|
21
|
+
retryOn: z.ZodDefault<z.ZodUnion<[z.ZodFunction<z.ZodTuple<[z.ZodType<Error, z.ZodTypeDef, Error>], z.ZodUnknown>, z.ZodBoolean>, z.ZodArray<z.ZodType<new (...args: any[]) => Error, z.ZodTypeDef, new (...args: any[]) => Error>, "many">]>>;
|
|
22
|
+
backoffFactor: z.ZodDefault<z.ZodNumber>;
|
|
23
|
+
initialDelayMs: z.ZodDefault<z.ZodNumber>;
|
|
24
|
+
maxDelayMs: z.ZodDefault<z.ZodNumber>;
|
|
25
|
+
jitter: z.ZodDefault<z.ZodBoolean>;
|
|
26
|
+
}, "strip", z.ZodTypeAny, {
|
|
27
|
+
maxRetries: number;
|
|
28
|
+
retryOn: (new (...args: any[]) => Error)[] | ((args_0: Error, ...args: unknown[]) => boolean);
|
|
29
|
+
backoffFactor: number;
|
|
30
|
+
initialDelayMs: number;
|
|
31
|
+
maxDelayMs: number;
|
|
32
|
+
jitter: boolean;
|
|
33
|
+
onFailure: "continue" | "error" | ((args_0: Error, ...args: unknown[]) => string);
|
|
34
|
+
}, {
|
|
35
|
+
maxRetries?: number | undefined;
|
|
36
|
+
retryOn?: (new (...args: any[]) => Error)[] | ((args_0: Error, ...args: unknown[]) => boolean) | undefined;
|
|
37
|
+
backoffFactor?: number | undefined;
|
|
38
|
+
initialDelayMs?: number | undefined;
|
|
39
|
+
maxDelayMs?: number | undefined;
|
|
40
|
+
jitter?: boolean | undefined;
|
|
41
|
+
onFailure?: "continue" | "error" | ((args_0: Error, ...args: unknown[]) => string) | undefined;
|
|
42
|
+
}>;
|
|
43
|
+
type ModelRetryMiddlewareConfig = z.input<typeof ModelRetryMiddlewareOptionsSchema>;
|
|
44
|
+
/**
|
|
45
|
+
* Middleware that automatically retries failed model calls with configurable backoff.
|
|
46
|
+
*
|
|
47
|
+
* Supports retrying on specific exceptions and exponential backoff.
|
|
48
|
+
*
|
|
49
|
+
* @example Basic usage with default settings (2 retries, exponential backoff)
|
|
50
|
+
* ```ts
|
|
51
|
+
* import { createAgent, modelRetryMiddleware } from "langchain";
|
|
52
|
+
*
|
|
53
|
+
* const agent = createAgent({
|
|
54
|
+
* model: "openai:gpt-4o",
|
|
55
|
+
* tools: [searchTool],
|
|
56
|
+
* middleware: [modelRetryMiddleware()],
|
|
57
|
+
* });
|
|
58
|
+
* ```
|
|
59
|
+
*
|
|
60
|
+
* @example Retry specific exceptions only
|
|
61
|
+
* ```ts
|
|
62
|
+
* import { modelRetryMiddleware } from "langchain";
|
|
63
|
+
*
|
|
64
|
+
* const retry = modelRetryMiddleware({
|
|
65
|
+
* maxRetries: 4,
|
|
66
|
+
* retryOn: [TimeoutError, NetworkError],
|
|
67
|
+
* backoffFactor: 1.5,
|
|
68
|
+
* });
|
|
69
|
+
* ```
|
|
70
|
+
*
|
|
71
|
+
* @example Custom exception filtering
|
|
72
|
+
* ```ts
|
|
73
|
+
* function shouldRetry(error: Error): boolean {
|
|
74
|
+
* // Only retry on rate limit errors
|
|
75
|
+
* if (error.name === "RateLimitError") {
|
|
76
|
+
* return true;
|
|
77
|
+
* }
|
|
78
|
+
* // Or check for specific HTTP status codes
|
|
79
|
+
* if (error.name === "HTTPError" && "statusCode" in error) {
|
|
80
|
+
* const statusCode = (error as any).statusCode;
|
|
81
|
+
* return statusCode === 429 || statusCode === 503;
|
|
82
|
+
* }
|
|
83
|
+
* return false;
|
|
84
|
+
* }
|
|
85
|
+
*
|
|
86
|
+
* const retry = modelRetryMiddleware({
|
|
87
|
+
* maxRetries: 3,
|
|
88
|
+
* retryOn: shouldRetry,
|
|
89
|
+
* });
|
|
90
|
+
* ```
|
|
91
|
+
*
|
|
92
|
+
* @example Return error message instead of raising
|
|
93
|
+
* ```ts
|
|
94
|
+
* const retry = modelRetryMiddleware({
|
|
95
|
+
* maxRetries: 4,
|
|
96
|
+
* onFailure: "continue", // Return AIMessage with error instead of throwing
|
|
97
|
+
* });
|
|
98
|
+
* ```
|
|
99
|
+
*
|
|
100
|
+
* @example Custom error message formatting
|
|
101
|
+
* ```ts
|
|
102
|
+
* const formatError = (error: Error) =>
|
|
103
|
+
* `Model call failed: ${error.message}. Please try again later.`;
|
|
104
|
+
*
|
|
105
|
+
* const retry = modelRetryMiddleware({
|
|
106
|
+
* maxRetries: 4,
|
|
107
|
+
* onFailure: formatError,
|
|
108
|
+
* });
|
|
109
|
+
* ```
|
|
110
|
+
*
|
|
111
|
+
* @example Constant backoff (no exponential growth)
|
|
112
|
+
* ```ts
|
|
113
|
+
* const retry = modelRetryMiddleware({
|
|
114
|
+
* maxRetries: 5,
|
|
115
|
+
* backoffFactor: 0.0, // No exponential growth
|
|
116
|
+
* initialDelayMs: 2000, // Always wait 2 seconds
|
|
117
|
+
* });
|
|
118
|
+
* ```
|
|
119
|
+
*
|
|
120
|
+
* @example Raise exception on failure
|
|
121
|
+
* ```ts
|
|
122
|
+
* const retry = modelRetryMiddleware({
|
|
123
|
+
* maxRetries: 2,
|
|
124
|
+
* onFailure: "error", // Re-raise exception instead of returning message
|
|
125
|
+
* });
|
|
126
|
+
* ```
|
|
127
|
+
*
|
|
128
|
+
* @param config - Configuration options for the retry middleware
|
|
129
|
+
* @returns A middleware instance that handles model failures with retries
|
|
130
|
+
*/
|
|
131
|
+
declare function modelRetryMiddleware(config?: ModelRetryMiddlewareConfig): AgentMiddleware;
|
|
132
|
+
//#endregion
|
|
133
|
+
export { ModelRetryMiddlewareConfig, modelRetryMiddleware };
|
|
134
|
+
//# sourceMappingURL=modelRetry.d.ts.map
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import { calculateRetryDelay, sleep } from "./utils.js";
|
|
2
|
+
import { createMiddleware } from "../middleware.js";
|
|
3
|
+
import { RetrySchema } from "./constants.js";
|
|
4
|
+
import { InvalidRetryConfigError } from "./error.js";
|
|
5
|
+
import { AIMessage } from "@langchain/core/messages";
|
|
6
|
+
import { z } from "zod/v3";
|
|
7
|
+
|
|
8
|
+
//#region src/agents/middleware/modelRetry.ts
|
|
9
|
+
/**
|
|
10
|
+
* Configuration options for the Model Retry Middleware.
|
|
11
|
+
*/
|
|
12
|
+
const ModelRetryMiddlewareOptionsSchema = z.object({ onFailure: z.union([
|
|
13
|
+
z.literal("error"),
|
|
14
|
+
z.literal("continue"),
|
|
15
|
+
z.function().args(z.instanceof(Error)).returns(z.string())
|
|
16
|
+
]).default("continue") }).merge(RetrySchema);
|
|
17
|
+
/**
|
|
18
|
+
* Middleware that automatically retries failed model calls with configurable backoff.
|
|
19
|
+
*
|
|
20
|
+
* Supports retrying on specific exceptions and exponential backoff.
|
|
21
|
+
*
|
|
22
|
+
* @example Basic usage with default settings (2 retries, exponential backoff)
|
|
23
|
+
* ```ts
|
|
24
|
+
* import { createAgent, modelRetryMiddleware } from "langchain";
|
|
25
|
+
*
|
|
26
|
+
* const agent = createAgent({
|
|
27
|
+
* model: "openai:gpt-4o",
|
|
28
|
+
* tools: [searchTool],
|
|
29
|
+
* middleware: [modelRetryMiddleware()],
|
|
30
|
+
* });
|
|
31
|
+
* ```
|
|
32
|
+
*
|
|
33
|
+
* @example Retry specific exceptions only
|
|
34
|
+
* ```ts
|
|
35
|
+
* import { modelRetryMiddleware } from "langchain";
|
|
36
|
+
*
|
|
37
|
+
* const retry = modelRetryMiddleware({
|
|
38
|
+
* maxRetries: 4,
|
|
39
|
+
* retryOn: [TimeoutError, NetworkError],
|
|
40
|
+
* backoffFactor: 1.5,
|
|
41
|
+
* });
|
|
42
|
+
* ```
|
|
43
|
+
*
|
|
44
|
+
* @example Custom exception filtering
|
|
45
|
+
* ```ts
|
|
46
|
+
* function shouldRetry(error: Error): boolean {
|
|
47
|
+
* // Only retry on rate limit errors
|
|
48
|
+
* if (error.name === "RateLimitError") {
|
|
49
|
+
* return true;
|
|
50
|
+
* }
|
|
51
|
+
* // Or check for specific HTTP status codes
|
|
52
|
+
* if (error.name === "HTTPError" && "statusCode" in error) {
|
|
53
|
+
* const statusCode = (error as any).statusCode;
|
|
54
|
+
* return statusCode === 429 || statusCode === 503;
|
|
55
|
+
* }
|
|
56
|
+
* return false;
|
|
57
|
+
* }
|
|
58
|
+
*
|
|
59
|
+
* const retry = modelRetryMiddleware({
|
|
60
|
+
* maxRetries: 3,
|
|
61
|
+
* retryOn: shouldRetry,
|
|
62
|
+
* });
|
|
63
|
+
* ```
|
|
64
|
+
*
|
|
65
|
+
* @example Return error message instead of raising
|
|
66
|
+
* ```ts
|
|
67
|
+
* const retry = modelRetryMiddleware({
|
|
68
|
+
* maxRetries: 4,
|
|
69
|
+
* onFailure: "continue", // Return AIMessage with error instead of throwing
|
|
70
|
+
* });
|
|
71
|
+
* ```
|
|
72
|
+
*
|
|
73
|
+
* @example Custom error message formatting
|
|
74
|
+
* ```ts
|
|
75
|
+
* const formatError = (error: Error) =>
|
|
76
|
+
* `Model call failed: ${error.message}. Please try again later.`;
|
|
77
|
+
*
|
|
78
|
+
* const retry = modelRetryMiddleware({
|
|
79
|
+
* maxRetries: 4,
|
|
80
|
+
* onFailure: formatError,
|
|
81
|
+
* });
|
|
82
|
+
* ```
|
|
83
|
+
*
|
|
84
|
+
* @example Constant backoff (no exponential growth)
|
|
85
|
+
* ```ts
|
|
86
|
+
* const retry = modelRetryMiddleware({
|
|
87
|
+
* maxRetries: 5,
|
|
88
|
+
* backoffFactor: 0.0, // No exponential growth
|
|
89
|
+
* initialDelayMs: 2000, // Always wait 2 seconds
|
|
90
|
+
* });
|
|
91
|
+
* ```
|
|
92
|
+
*
|
|
93
|
+
* @example Raise exception on failure
|
|
94
|
+
* ```ts
|
|
95
|
+
* const retry = modelRetryMiddleware({
|
|
96
|
+
* maxRetries: 2,
|
|
97
|
+
* onFailure: "error", // Re-raise exception instead of returning message
|
|
98
|
+
* });
|
|
99
|
+
* ```
|
|
100
|
+
*
|
|
101
|
+
* @param config - Configuration options for the retry middleware
|
|
102
|
+
* @returns A middleware instance that handles model failures with retries
|
|
103
|
+
*/
|
|
104
|
+
function modelRetryMiddleware(config = {}) {
|
|
105
|
+
const { success, error, data } = ModelRetryMiddlewareOptionsSchema.safeParse(config);
|
|
106
|
+
if (!success) throw new InvalidRetryConfigError(error);
|
|
107
|
+
const { maxRetries, retryOn, onFailure, backoffFactor, initialDelayMs, maxDelayMs, jitter } = data;
|
|
108
|
+
/**
|
|
109
|
+
* Check if the exception should trigger a retry.
|
|
110
|
+
*/
|
|
111
|
+
const shouldRetryException = (error$1) => {
|
|
112
|
+
if (typeof retryOn === "function") return retryOn(error$1);
|
|
113
|
+
return retryOn.some((ErrorConstructor) => error$1.constructor === ErrorConstructor);
|
|
114
|
+
};
|
|
115
|
+
const delayConfig = {
|
|
116
|
+
backoffFactor,
|
|
117
|
+
initialDelayMs,
|
|
118
|
+
maxDelayMs,
|
|
119
|
+
jitter
|
|
120
|
+
};
|
|
121
|
+
/**
|
|
122
|
+
* Format the failure message when retries are exhausted.
|
|
123
|
+
*/
|
|
124
|
+
const formatFailureMessage = (error$1, attemptsMade) => {
|
|
125
|
+
const errorType = error$1.constructor.name;
|
|
126
|
+
const attemptWord = attemptsMade === 1 ? "attempt" : "attempts";
|
|
127
|
+
return `Model call failed after ${attemptsMade} ${attemptWord} with ${errorType}: ${error$1.message}`;
|
|
128
|
+
};
|
|
129
|
+
/**
|
|
130
|
+
* Handle failure when all retries are exhausted.
|
|
131
|
+
*/
|
|
132
|
+
const handleFailure = (error$1, attemptsMade) => {
|
|
133
|
+
if (onFailure === "error") throw error$1;
|
|
134
|
+
let content;
|
|
135
|
+
if (typeof onFailure === "function") content = onFailure(error$1);
|
|
136
|
+
else content = formatFailureMessage(error$1, attemptsMade);
|
|
137
|
+
return new AIMessage({ content });
|
|
138
|
+
};
|
|
139
|
+
return createMiddleware({
|
|
140
|
+
name: "modelRetryMiddleware",
|
|
141
|
+
contextSchema: ModelRetryMiddlewareOptionsSchema,
|
|
142
|
+
wrapModelCall: async (request, handler) => {
|
|
143
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) try {
|
|
144
|
+
return await handler(request);
|
|
145
|
+
} catch (error$1) {
|
|
146
|
+
const attemptsMade = attempt + 1;
|
|
147
|
+
const err = error$1 && typeof error$1 === "object" && "message" in error$1 ? error$1 : new Error(String(error$1));
|
|
148
|
+
if (!shouldRetryException(err)) return handleFailure(err, attemptsMade);
|
|
149
|
+
if (attempt < maxRetries) {
|
|
150
|
+
const delay = calculateRetryDelay(delayConfig, attempt);
|
|
151
|
+
if (delay > 0) await sleep(delay);
|
|
152
|
+
} else return handleFailure(err, attemptsMade);
|
|
153
|
+
}
|
|
154
|
+
throw new Error("Unexpected: retry loop completed without returning");
|
|
155
|
+
}
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
//#endregion
|
|
160
|
+
export { modelRetryMiddleware };
|
|
161
|
+
//# sourceMappingURL=modelRetry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"modelRetry.js","names":["config: ModelRetryMiddlewareConfig","error: Error","error","attemptsMade: number","content: string"],"sources":["../../../src/agents/middleware/modelRetry.ts"],"sourcesContent":["/**\n * Model retry middleware for agents.\n */\nimport { z } from \"zod/v3\";\nimport { AIMessage } from \"@langchain/core/messages\";\n\nimport { createMiddleware } from \"../middleware.js\";\nimport type { AgentMiddleware } from \"./types.js\";\nimport { sleep, calculateRetryDelay } from \"./utils.js\";\nimport { RetrySchema } from \"./constants.js\";\nimport { InvalidRetryConfigError } from \"./error.js\";\n\n/**\n * Configuration options for the Model Retry Middleware.\n */\nexport const ModelRetryMiddlewareOptionsSchema = z\n .object({\n /**\n * Behavior when all retries are exhausted. Options:\n * - `\"continue\"` (default): Return an AIMessage with error details, allowing\n * the agent to potentially handle the failure gracefully.\n * - `\"error\"`: Re-raise the exception, stopping agent execution.\n * - Custom function: Function that takes the exception and returns a string\n * for the AIMessage content, allowing custom error formatting.\n */\n onFailure: z\n .union([\n z.literal(\"error\"),\n z.literal(\"continue\"),\n z.function().args(z.instanceof(Error)).returns(z.string()),\n ])\n .default(\"continue\"),\n })\n .merge(RetrySchema);\n\nexport type ModelRetryMiddlewareConfig = z.input<\n typeof ModelRetryMiddlewareOptionsSchema\n>;\n\n/**\n * Middleware that automatically retries failed model calls with configurable backoff.\n *\n * Supports retrying on specific exceptions and exponential backoff.\n *\n * @example Basic usage with default settings (2 retries, exponential backoff)\n * ```ts\n * import { createAgent, modelRetryMiddleware } from \"langchain\";\n *\n * const agent = createAgent({\n * model: \"openai:gpt-4o\",\n * tools: [searchTool],\n * middleware: [modelRetryMiddleware()],\n * });\n * ```\n *\n * @example Retry specific exceptions only\n * ```ts\n * import { modelRetryMiddleware } from \"langchain\";\n *\n * const retry = modelRetryMiddleware({\n * maxRetries: 4,\n * retryOn: [TimeoutError, NetworkError],\n * backoffFactor: 1.5,\n * });\n * ```\n *\n * @example Custom exception filtering\n * ```ts\n * function shouldRetry(error: Error): boolean {\n * // Only retry on rate limit errors\n * if (error.name === \"RateLimitError\") {\n * return true;\n * }\n * // Or check for specific HTTP status codes\n * if (error.name === \"HTTPError\" && \"statusCode\" in error) {\n * const statusCode = (error as any).statusCode;\n * return statusCode === 429 || statusCode === 503;\n * }\n * return false;\n * }\n *\n * const retry = modelRetryMiddleware({\n * maxRetries: 3,\n * retryOn: shouldRetry,\n * });\n * ```\n *\n * @example Return error message instead of raising\n * ```ts\n * const retry = modelRetryMiddleware({\n * maxRetries: 4,\n * onFailure: \"continue\", // Return AIMessage with error instead of throwing\n * });\n * ```\n *\n * @example Custom error message formatting\n * ```ts\n * const formatError = (error: Error) =>\n * `Model call failed: ${error.message}. Please try again later.`;\n *\n * const retry = modelRetryMiddleware({\n * maxRetries: 4,\n * onFailure: formatError,\n * });\n * ```\n *\n * @example Constant backoff (no exponential growth)\n * ```ts\n * const retry = modelRetryMiddleware({\n * maxRetries: 5,\n * backoffFactor: 0.0, // No exponential growth\n * initialDelayMs: 2000, // Always wait 2 seconds\n * });\n * ```\n *\n * @example Raise exception on failure\n * ```ts\n * const retry = modelRetryMiddleware({\n * maxRetries: 2,\n * onFailure: \"error\", // Re-raise exception instead of returning message\n * });\n * ```\n *\n * @param config - Configuration options for the retry middleware\n * @returns A middleware instance that handles model failures with retries\n */\nexport function modelRetryMiddleware(\n config: ModelRetryMiddlewareConfig = {}\n): AgentMiddleware {\n const { success, error, data } =\n ModelRetryMiddlewareOptionsSchema.safeParse(config);\n if (!success) {\n throw new InvalidRetryConfigError(error);\n }\n const {\n maxRetries,\n retryOn,\n onFailure,\n backoffFactor,\n initialDelayMs,\n maxDelayMs,\n jitter,\n } = data;\n\n /**\n * Check if the exception should trigger a retry.\n */\n const shouldRetryException = (error: Error): boolean => {\n if (typeof retryOn === \"function\") {\n return retryOn(error);\n }\n // retryOn is an array of error constructors\n return retryOn.some(\n (ErrorConstructor) => error.constructor === ErrorConstructor\n );\n };\n\n // Use the exported calculateRetryDelay function with our config\n const delayConfig = { backoffFactor, initialDelayMs, maxDelayMs, jitter };\n\n /**\n * Format the failure message when retries are exhausted.\n */\n const formatFailureMessage = (error: Error, attemptsMade: number): string => {\n const errorType = error.constructor.name;\n const attemptWord = attemptsMade === 1 ? \"attempt\" : \"attempts\";\n return `Model call failed after ${attemptsMade} ${attemptWord} with ${errorType}: ${error.message}`;\n };\n\n /**\n * Handle failure when all retries are exhausted.\n */\n const handleFailure = (error: Error, attemptsMade: number): AIMessage => {\n if (onFailure === \"error\") {\n throw error;\n }\n\n let content: string;\n if (typeof onFailure === \"function\") {\n content = onFailure(error);\n } else {\n content = formatFailureMessage(error, attemptsMade);\n }\n\n return new AIMessage({\n content,\n });\n };\n\n return createMiddleware({\n name: \"modelRetryMiddleware\",\n contextSchema: ModelRetryMiddlewareOptionsSchema,\n wrapModelCall: async (request, handler) => {\n // Initial attempt + retries\n for (let attempt = 0; attempt <= maxRetries; attempt++) {\n try {\n return await handler(request);\n } catch (error) {\n const attemptsMade = attempt + 1; // attempt is 0-indexed\n\n // Ensure error is an Error instance\n const err =\n error && typeof error === \"object\" && \"message\" in error\n ? (error as Error)\n : new Error(String(error));\n\n // Check if we should retry this exception\n if (!shouldRetryException(err)) {\n // Exception is not retryable, handle failure immediately\n return handleFailure(err, attemptsMade);\n }\n\n // Check if we have more retries left\n if (attempt < maxRetries) {\n // Calculate and apply backoff delay\n const delay = calculateRetryDelay(delayConfig, attempt);\n if (delay > 0) {\n await sleep(delay);\n }\n // Continue to next retry\n } else {\n // No more retries, handle failure\n return handleFailure(err, attemptsMade);\n }\n }\n }\n\n // Unreachable: loop always returns via handler success or handleFailure\n throw new Error(\"Unexpected: retry loop completed without returning\");\n },\n });\n}\n"],"mappings":";;;;;;;;;;;AAeA,MAAa,oCAAoC,EAC9C,OAAO,EASN,WAAW,EACR,MAAM;CACL,EAAE,QAAQ,QAAQ;CAClB,EAAE,QAAQ,WAAW;CACrB,EAAE,UAAU,CAAC,KAAK,EAAE,WAAW,MAAM,CAAC,CAAC,QAAQ,EAAE,QAAQ,CAAC;AAC3D,EAAC,CACD,QAAQ,WAAW,CACvB,EAAC,CACD,MAAM,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA6FrB,SAAgB,qBACdA,SAAqC,CAAE,GACtB;CACjB,MAAM,EAAE,SAAS,OAAO,MAAM,GAC5B,kCAAkC,UAAU,OAAO;AACrD,KAAI,CAAC,QACH,OAAM,IAAI,wBAAwB;CAEpC,MAAM,EACJ,YACA,SACA,WACA,eACA,gBACA,YACA,QACD,GAAG;;;;CAKJ,MAAM,uBAAuB,CAACC,YAA0B;AACtD,MAAI,OAAO,YAAY,WACrB,QAAO,QAAQC,QAAM;AAGvB,SAAO,QAAQ,KACb,CAAC,qBAAqBA,QAAM,gBAAgB,iBAC7C;CACF;CAGD,MAAM,cAAc;EAAE;EAAe;EAAgB;EAAY;CAAQ;;;;CAKzE,MAAM,uBAAuB,CAACD,SAAcE,iBAAiC;EAC3E,MAAM,YAAYD,QAAM,YAAY;EACpC,MAAM,cAAc,iBAAiB,IAAI,YAAY;AACrD,SAAO,CAAC,wBAAwB,EAAE,aAAa,CAAC,EAAE,YAAY,MAAM,EAAE,UAAU,EAAE,EAAEA,QAAM,SAAS;CACpG;;;;CAKD,MAAM,gBAAgB,CAACD,SAAcE,iBAAoC;AACvE,MAAI,cAAc,QAChB,OAAMD;EAGR,IAAIE;AACJ,MAAI,OAAO,cAAc,YACvB,UAAU,UAAUF,QAAM;OAE1B,UAAU,qBAAqBA,SAAO,aAAa;AAGrD,SAAO,IAAI,UAAU,EACnB,QACD;CACF;AAED,QAAO,iBAAiB;EACtB,MAAM;EACN,eAAe;EACf,eAAe,OAAO,SAAS,YAAY;AAEzC,QAAK,IAAI,UAAU,GAAG,WAAW,YAAY,UAC3C,KAAI;AACF,WAAO,MAAM,QAAQ,QAAQ;GAC9B,SAAQA,SAAO;IACd,MAAM,eAAe,UAAU;IAG/B,MAAM,MACJA,WAAS,OAAOA,YAAU,YAAY,aAAaA,UAC9CA,UACD,IAAI,MAAM,OAAOA,QAAM;AAG7B,QAAI,CAAC,qBAAqB,IAAI,CAE5B,QAAO,cAAc,KAAK,aAAa;AAIzC,QAAI,UAAU,YAAY;KAExB,MAAM,QAAQ,oBAAoB,aAAa,QAAQ;AACvD,SAAI,QAAQ,GACV,MAAM,MAAM,MAAM;IAGrB,MAEC,QAAO,cAAc,KAAK,aAAa;GAE1C;AAIH,SAAM,IAAI,MAAM;EACjB;CACF,EAAC;AACH"}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
const require_rolldown_runtime = require('
|
|
2
|
-
const require_middleware = require('
|
|
1
|
+
const require_rolldown_runtime = require('../../../../_virtual/rolldown_runtime.cjs');
|
|
2
|
+
const require_middleware = require('../../../middleware.cjs');
|
|
3
3
|
const zod_v3 = require_rolldown_runtime.__toESM(require("zod/v3"));
|
|
4
4
|
|
|
5
|
-
//#region src/agents/middleware/promptCaching.ts
|
|
5
|
+
//#region src/agents/middleware/provider/anthropic/promptCaching.ts
|
|
6
6
|
const DEFAULT_ENABLE_CACHING = true;
|
|
7
7
|
const DEFAULT_TTL = "5m";
|
|
8
8
|
const DEFAULT_MIN_MESSAGES_TO_CACHE = 3;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"promptCaching.cjs","names":["z","message: string","middlewareOptions?: PromptCachingMiddlewareConfig","createMiddleware"],"sources":["../../../../../src/agents/middleware/provider/anthropic/promptCaching.ts"],"sourcesContent":["import { z } from \"zod/v3\";\nimport { ContentBlock } from \"@langchain/core/messages\";\nimport { InferInteropZodInput } from \"@langchain/core/utils/types\";\n\nimport { ConfigurableModel } from \"../../../../chat_models/universal.js\";\nimport { createMiddleware } from \"../../../middleware.js\";\n\nconst DEFAULT_ENABLE_CACHING = true;\nconst DEFAULT_TTL = \"5m\";\nconst DEFAULT_MIN_MESSAGES_TO_CACHE = 3;\nconst DEFAULT_UNSUPPORTED_MODEL_BEHAVIOR = \"warn\";\n\nconst contextSchema = z.object({\n /**\n * Whether to enable prompt caching.\n * @default true\n */\n enableCaching: z.boolean().optional(),\n /**\n * The time-to-live for the cached prompt.\n * @default \"5m\"\n */\n ttl: z.enum([\"5m\", \"1h\"]).optional(),\n /**\n * The minimum number of messages required before caching is applied.\n * @default 3\n */\n minMessagesToCache: z.number().optional(),\n /**\n * The behavior to take when an unsupported model is used.\n * - \"ignore\" will ignore the unsupported model and continue without caching.\n * - \"warn\" will warn the user and continue without caching.\n * - \"raise\" will raise an error and stop the agent.\n * @default \"warn\"\n */\n unsupportedModelBehavior: z.enum([\"ignore\", \"warn\", \"raise\"]).optional(),\n});\nexport type PromptCachingMiddlewareConfig = Partial<\n InferInteropZodInput<typeof contextSchema>\n>;\n\nclass PromptCachingMiddlewareError extends Error {\n constructor(message: string) {\n super(message);\n this.name = \"PromptCachingMiddlewareError\";\n }\n}\n\n/**\n * Creates a prompt caching middleware for Anthropic models to optimize API usage.\n *\n * This middleware automatically adds cache control headers to the last messages when using Anthropic models,\n * enabling their prompt caching feature. This can significantly reduce costs for applications with repetitive\n * prompts, long system messages, or extensive conversation histories.\n *\n * ## How It Works\n *\n * The middleware intercepts model requests and adds cache control metadata that tells Anthropic's\n * API to cache processed prompt prefixes. On subsequent requests with matching prefixes, the\n * cached representations are reused, skipping redundant token processing.\n *\n * ## Benefits\n *\n * - **Cost Reduction**: Avoid reprocessing the same tokens repeatedly (up to 90% savings on cached portions)\n * - **Lower Latency**: Cached prompts are processed faster as embeddings are pre-computed\n * - **Better Scalability**: Reduced computational load enables handling more requests\n * - **Consistent Performance**: Stable response times for repetitive queries\n *\n * @param middlewareOptions - Configuration options for the caching behavior\n * @param middlewareOptions.enableCaching - Whether to enable prompt caching (default: `true`)\n * @param middlewareOptions.ttl - Cache time-to-live: `\"5m\"` for 5 minutes or `\"1h\"` for 1 hour (default: `\"5m\"`)\n * @param middlewareOptions.minMessagesToCache - Minimum number of messages required before caching is applied (default: `3`)\n * @param middlewareOptions.unsupportedModelBehavior - The behavior to take when an unsupported model is used (default: `\"warn\"`)\n *\n * @returns A middleware instance that can be passed to `createAgent`\n *\n * @throws {Error} If used with non-Anthropic models\n *\n * @example\n * Basic usage with default settings\n * ```typescript\n * import { createAgent } from \"langchain\";\n * import { anthropicPromptCachingMiddleware } from \"langchain\";\n *\n * const agent = createAgent({\n * model: \"anthropic:claude-3-5-sonnet\",\n * middleware: [\n * anthropicPromptCachingMiddleware()\n * ]\n * });\n * ```\n *\n * @example\n * Custom configuration for longer conversations\n * ```typescript\n * const cachingMiddleware = anthropicPromptCachingMiddleware({\n * ttl: \"1h\", // Cache for 1 hour instead of default 5 minutes\n * minMessagesToCache: 5 // Only cache after 5 messages\n * });\n *\n * const agent = createAgent({\n * model: \"anthropic:claude-3-5-sonnet\",\n * systemPrompt: \"You are a helpful assistant with deep knowledge of...\", // Long system prompt\n * middleware: [cachingMiddleware]\n * });\n * ```\n *\n * @example\n * Conditional caching based on runtime context\n * ```typescript\n * const agent = createAgent({\n * model: \"anthropic:claude-3-5-sonnet\",\n * middleware: [\n * anthropicPromptCachingMiddleware({\n * enableCaching: true,\n * ttl: \"5m\"\n * })\n * ]\n * });\n *\n * // Disable caching for specific requests\n * await agent.invoke(\n * { messages: [new HumanMessage(\"Process this without caching\")] },\n * {\n * configurable: {\n * middleware_context: { enableCaching: false }\n * }\n * }\n * );\n * ```\n *\n * @example\n * Optimal setup for customer support chatbot\n * ```typescript\n * const supportAgent = createAgent({\n * model: \"anthropic:claude-3-5-sonnet\",\n * systemPrompt: `You are a customer support agent for ACME Corp.\n *\n * Company policies:\n * - Always be polite and professional\n * - Refer to knowledge base for product information\n * - Escalate billing issues to human agents\n * ... (extensive policies and guidelines)\n * `,\n * tools: [searchKnowledgeBase, createTicket, checkOrderStatus],\n * middleware: [\n * anthropicPromptCachingMiddleware({\n * ttl: \"1h\", // Long TTL for stable system prompt\n * minMessagesToCache: 1 // Cache immediately due to large system prompt\n * })\n * ]\n * });\n * ```\n *\n * @remarks\n * - **Anthropic Only**: This middleware only works with Anthropic models and will throw an error if used with other providers\n * - **Automatic Application**: Caching is applied automatically when message count exceeds `minMessagesToCache`\n * - **Cache Scope**: Caches are isolated per API key and cannot be shared across different keys\n * - **TTL Options**: Only supports \"5m\" (5 minutes) and \"1h\" (1 hour) as TTL values per Anthropic's API\n * - **Best Use Cases**: Long system prompts, multi-turn conversations, repetitive queries, RAG applications\n * - **Cost Impact**: Cached tokens are billed at 10% of the base input token price, cache writes are billed at 25% of the base\n *\n * @see {@link createAgent} for agent creation\n * @see {@link https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching} Anthropic's prompt caching documentation\n * @public\n */\nexport function anthropicPromptCachingMiddleware(\n middlewareOptions?: PromptCachingMiddlewareConfig\n) {\n return createMiddleware({\n name: \"PromptCachingMiddleware\",\n contextSchema,\n wrapModelCall: (request, handler) => {\n /**\n * Prefer runtime context values over middleware options values over defaults\n */\n const enableCaching =\n request.runtime.context.enableCaching ??\n middlewareOptions?.enableCaching ??\n DEFAULT_ENABLE_CACHING;\n const ttl =\n request.runtime.context.ttl ?? middlewareOptions?.ttl ?? DEFAULT_TTL;\n const minMessagesToCache =\n request.runtime.context.minMessagesToCache ??\n middlewareOptions?.minMessagesToCache ??\n DEFAULT_MIN_MESSAGES_TO_CACHE;\n const unsupportedModelBehavior =\n request.runtime.context.unsupportedModelBehavior ??\n middlewareOptions?.unsupportedModelBehavior ??\n DEFAULT_UNSUPPORTED_MODEL_BEHAVIOR;\n\n // Skip if caching is disabled\n if (!enableCaching || !request.model) {\n return handler(request);\n }\n\n const isAnthropicModel =\n request.model.getName() === \"ChatAnthropic\" ||\n (request.model.getName() === \"ConfigurableModel\" &&\n (request.model as ConfigurableModel)._defaultConfig?.modelProvider ===\n \"anthropic\");\n if (!isAnthropicModel) {\n // Get model name for better error context\n const modelName = request.model.getName();\n const modelInfo =\n request.model.getName() === \"ConfigurableModel\"\n ? `${modelName} (${\n (request.model as ConfigurableModel)._defaultConfig\n ?.modelProvider\n })`\n : modelName;\n\n const baseMessage = `Unsupported model '${modelInfo}'. Prompt caching requires an Anthropic model`;\n\n if (unsupportedModelBehavior === \"raise\") {\n throw new PromptCachingMiddlewareError(\n `${baseMessage} (e.g., 'anthropic:claude-4-0-sonnet').`\n );\n } else if (unsupportedModelBehavior === \"warn\") {\n console.warn(\n `PromptCachingMiddleware: Skipping caching for ${modelName}. Consider switching to an Anthropic model for caching benefits.`\n );\n }\n return handler(request);\n }\n\n const messagesCount =\n request.state.messages.length + (request.systemPrompt ? 1 : 0);\n\n if (messagesCount < minMessagesToCache) {\n return handler(request);\n }\n\n /**\n * Add cache_control to the last message\n */\n const lastMessage = request.messages.at(-1);\n if (!lastMessage) {\n return handler(request);\n }\n\n const NewMessageConstructor =\n Object.getPrototypeOf(lastMessage).constructor;\n if (Array.isArray(lastMessage.content)) {\n const newMessage = new NewMessageConstructor({\n ...lastMessage,\n content: [\n ...lastMessage.content.slice(0, -1),\n {\n ...lastMessage.content.at(-1),\n cache_control: {\n type: \"ephemeral\",\n ttl,\n },\n } as ContentBlock,\n ],\n });\n return handler({\n ...request,\n messages: [...request.messages.slice(0, -1), newMessage],\n });\n } else if (typeof lastMessage.content === \"string\") {\n const newMessage = new NewMessageConstructor({\n ...lastMessage,\n content: [\n {\n type: \"text\",\n text: lastMessage.content,\n cache_control: {\n type: \"ephemeral\",\n ttl,\n },\n },\n ],\n });\n return handler({\n ...request,\n messages: [...request.messages.slice(0, -1), newMessage],\n });\n }\n\n throw new PromptCachingMiddlewareError(\n \"Last message content is not a string or array\"\n );\n },\n });\n}\n"],"mappings":";;;;;AAOA,MAAM,yBAAyB;AAC/B,MAAM,cAAc;AACpB,MAAM,gCAAgC;AACtC,MAAM,qCAAqC;AAE3C,MAAM,gBAAgBA,SAAE,OAAO;CAK7B,eAAeA,SAAE,SAAS,CAAC,UAAU;CAKrC,KAAKA,SAAE,KAAK,CAAC,MAAM,IAAK,EAAC,CAAC,UAAU;CAKpC,oBAAoBA,SAAE,QAAQ,CAAC,UAAU;CAQzC,0BAA0BA,SAAE,KAAK;EAAC;EAAU;EAAQ;CAAQ,EAAC,CAAC,UAAU;AACzE,EAAC;AAKF,IAAM,+BAAN,cAA2C,MAAM;CAC/C,YAAYC,SAAiB;EAC3B,MAAM,QAAQ;EACd,KAAK,OAAO;CACb;AACF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAwHD,SAAgB,iCACdC,mBACA;AACA,QAAOC,oCAAiB;EACtB,MAAM;EACN;EACA,eAAe,CAAC,SAAS,YAAY;;;;GAInC,MAAM,gBACJ,QAAQ,QAAQ,QAAQ,iBACxB,mBAAmB,iBACnB;GACF,MAAM,MACJ,QAAQ,QAAQ,QAAQ,OAAO,mBAAmB,OAAO;GAC3D,MAAM,qBACJ,QAAQ,QAAQ,QAAQ,sBACxB,mBAAmB,sBACnB;GACF,MAAM,2BACJ,QAAQ,QAAQ,QAAQ,4BACxB,mBAAmB,4BACnB;AAGF,OAAI,CAAC,iBAAiB,CAAC,QAAQ,MAC7B,QAAO,QAAQ,QAAQ;GAGzB,MAAM,mBACJ,QAAQ,MAAM,SAAS,KAAK,mBAC3B,QAAQ,MAAM,SAAS,KAAK,uBAC1B,QAAQ,MAA4B,gBAAgB,kBACnD;AACN,OAAI,CAAC,kBAAkB;IAErB,MAAM,YAAY,QAAQ,MAAM,SAAS;IACzC,MAAM,YACJ,QAAQ,MAAM,SAAS,KAAK,sBACxB,GAAG,UAAU,EAAE,EACZ,QAAQ,MAA4B,gBACjC,cACL,CAAC,CAAC,GACH;IAEN,MAAM,cAAc,CAAC,mBAAmB,EAAE,UAAU,6CAA6C,CAAC;AAElG,QAAI,6BAA6B,QAC/B,OAAM,IAAI,6BACR,GAAG,YAAY,uCAAuC,CAAC;aAEhD,6BAA6B,QACtC,QAAQ,KACN,CAAC,8CAA8C,EAAE,UAAU,gEAAgE,CAAC,CAC7H;AAEH,WAAO,QAAQ,QAAQ;GACxB;GAED,MAAM,gBACJ,QAAQ,MAAM,SAAS,UAAU,QAAQ,eAAe,IAAI;AAE9D,OAAI,gBAAgB,mBAClB,QAAO,QAAQ,QAAQ;;;;GAMzB,MAAM,cAAc,QAAQ,SAAS,GAAG,GAAG;AAC3C,OAAI,CAAC,YACH,QAAO,QAAQ,QAAQ;GAGzB,MAAM,wBACJ,OAAO,eAAe,YAAY,CAAC;AACrC,OAAI,MAAM,QAAQ,YAAY,QAAQ,EAAE;IACtC,MAAM,aAAa,IAAI,sBAAsB;KAC3C,GAAG;KACH,SAAS,CACP,GAAG,YAAY,QAAQ,MAAM,GAAG,GAAG,EACnC;MACE,GAAG,YAAY,QAAQ,GAAG,GAAG;MAC7B,eAAe;OACb,MAAM;OACN;MACD;KACF,CACF;IACF;AACD,WAAO,QAAQ;KACb,GAAG;KACH,UAAU,CAAC,GAAG,QAAQ,SAAS,MAAM,GAAG,GAAG,EAAE,UAAW;IACzD,EAAC;GACH,WAAU,OAAO,YAAY,YAAY,UAAU;IAClD,MAAM,aAAa,IAAI,sBAAsB;KAC3C,GAAG;KACH,SAAS,CACP;MACE,MAAM;MACN,MAAM,YAAY;MAClB,eAAe;OACb,MAAM;OACN;MACD;KACF,CACF;IACF;AACD,WAAO,QAAQ;KACb,GAAG;KACH,UAAU,CAAC,GAAG,QAAQ,SAAS,MAAM,GAAG,GAAG,EAAE,UAAW;IACzD,EAAC;GACH;AAED,SAAM,IAAI,6BACR;EAEH;CACF,EAAC;AACH"}
|
package/dist/agents/middleware/{promptCaching.d.cts → provider/anthropic/promptCaching.d.cts}
RENAMED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { AgentMiddleware } from "
|
|
1
|
+
import { AgentMiddleware } from "../../types.cjs";
|
|
2
2
|
import { InferInteropZodInput } from "@langchain/core/utils/types";
|
|
3
3
|
import { z } from "zod/v3";
|
|
4
4
|
|
|
5
|
-
//#region src/agents/middleware/promptCaching.d.ts
|
|
5
|
+
//#region src/agents/middleware/provider/anthropic/promptCaching.d.ts
|
|
6
6
|
declare const contextSchema: z.ZodObject<{
|
|
7
7
|
/**
|
|
8
8
|
* Whether to enable prompt caching.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { AgentMiddleware } from "
|
|
1
|
+
import { AgentMiddleware } from "../../types.js";
|
|
2
2
|
import { z } from "zod/v3";
|
|
3
3
|
import { InferInteropZodInput } from "@langchain/core/utils/types";
|
|
4
4
|
|
|
5
|
-
//#region src/agents/middleware/promptCaching.d.ts
|
|
5
|
+
//#region src/agents/middleware/provider/anthropic/promptCaching.d.ts
|
|
6
6
|
declare const contextSchema: z.ZodObject<{
|
|
7
7
|
/**
|
|
8
8
|
* Whether to enable prompt caching.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { createMiddleware } from "
|
|
1
|
+
import { createMiddleware } from "../../../middleware.js";
|
|
2
2
|
import { z } from "zod/v3";
|
|
3
3
|
|
|
4
|
-
//#region src/agents/middleware/promptCaching.ts
|
|
4
|
+
//#region src/agents/middleware/provider/anthropic/promptCaching.ts
|
|
5
5
|
const DEFAULT_ENABLE_CACHING = true;
|
|
6
6
|
const DEFAULT_TTL = "5m";
|
|
7
7
|
const DEFAULT_MIN_MESSAGES_TO_CACHE = 3;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"promptCaching.js","names":["message: string","middlewareOptions?: PromptCachingMiddlewareConfig"],"sources":["../../../../../src/agents/middleware/provider/anthropic/promptCaching.ts"],"sourcesContent":["import { z } from \"zod/v3\";\nimport { ContentBlock } from \"@langchain/core/messages\";\nimport { InferInteropZodInput } from \"@langchain/core/utils/types\";\n\nimport { ConfigurableModel } from \"../../../../chat_models/universal.js\";\nimport { createMiddleware } from \"../../../middleware.js\";\n\nconst DEFAULT_ENABLE_CACHING = true;\nconst DEFAULT_TTL = \"5m\";\nconst DEFAULT_MIN_MESSAGES_TO_CACHE = 3;\nconst DEFAULT_UNSUPPORTED_MODEL_BEHAVIOR = \"warn\";\n\nconst contextSchema = z.object({\n /**\n * Whether to enable prompt caching.\n * @default true\n */\n enableCaching: z.boolean().optional(),\n /**\n * The time-to-live for the cached prompt.\n * @default \"5m\"\n */\n ttl: z.enum([\"5m\", \"1h\"]).optional(),\n /**\n * The minimum number of messages required before caching is applied.\n * @default 3\n */\n minMessagesToCache: z.number().optional(),\n /**\n * The behavior to take when an unsupported model is used.\n * - \"ignore\" will ignore the unsupported model and continue without caching.\n * - \"warn\" will warn the user and continue without caching.\n * - \"raise\" will raise an error and stop the agent.\n * @default \"warn\"\n */\n unsupportedModelBehavior: z.enum([\"ignore\", \"warn\", \"raise\"]).optional(),\n});\nexport type PromptCachingMiddlewareConfig = Partial<\n InferInteropZodInput<typeof contextSchema>\n>;\n\nclass PromptCachingMiddlewareError extends Error {\n constructor(message: string) {\n super(message);\n this.name = \"PromptCachingMiddlewareError\";\n }\n}\n\n/**\n * Creates a prompt caching middleware for Anthropic models to optimize API usage.\n *\n * This middleware automatically adds cache control headers to the last messages when using Anthropic models,\n * enabling their prompt caching feature. This can significantly reduce costs for applications with repetitive\n * prompts, long system messages, or extensive conversation histories.\n *\n * ## How It Works\n *\n * The middleware intercepts model requests and adds cache control metadata that tells Anthropic's\n * API to cache processed prompt prefixes. On subsequent requests with matching prefixes, the\n * cached representations are reused, skipping redundant token processing.\n *\n * ## Benefits\n *\n * - **Cost Reduction**: Avoid reprocessing the same tokens repeatedly (up to 90% savings on cached portions)\n * - **Lower Latency**: Cached prompts are processed faster as embeddings are pre-computed\n * - **Better Scalability**: Reduced computational load enables handling more requests\n * - **Consistent Performance**: Stable response times for repetitive queries\n *\n * @param middlewareOptions - Configuration options for the caching behavior\n * @param middlewareOptions.enableCaching - Whether to enable prompt caching (default: `true`)\n * @param middlewareOptions.ttl - Cache time-to-live: `\"5m\"` for 5 minutes or `\"1h\"` for 1 hour (default: `\"5m\"`)\n * @param middlewareOptions.minMessagesToCache - Minimum number of messages required before caching is applied (default: `3`)\n * @param middlewareOptions.unsupportedModelBehavior - The behavior to take when an unsupported model is used (default: `\"warn\"`)\n *\n * @returns A middleware instance that can be passed to `createAgent`\n *\n * @throws {Error} If used with non-Anthropic models\n *\n * @example\n * Basic usage with default settings\n * ```typescript\n * import { createAgent } from \"langchain\";\n * import { anthropicPromptCachingMiddleware } from \"langchain\";\n *\n * const agent = createAgent({\n * model: \"anthropic:claude-3-5-sonnet\",\n * middleware: [\n * anthropicPromptCachingMiddleware()\n * ]\n * });\n * ```\n *\n * @example\n * Custom configuration for longer conversations\n * ```typescript\n * const cachingMiddleware = anthropicPromptCachingMiddleware({\n * ttl: \"1h\", // Cache for 1 hour instead of default 5 minutes\n * minMessagesToCache: 5 // Only cache after 5 messages\n * });\n *\n * const agent = createAgent({\n * model: \"anthropic:claude-3-5-sonnet\",\n * systemPrompt: \"You are a helpful assistant with deep knowledge of...\", // Long system prompt\n * middleware: [cachingMiddleware]\n * });\n * ```\n *\n * @example\n * Conditional caching based on runtime context\n * ```typescript\n * const agent = createAgent({\n * model: \"anthropic:claude-3-5-sonnet\",\n * middleware: [\n * anthropicPromptCachingMiddleware({\n * enableCaching: true,\n * ttl: \"5m\"\n * })\n * ]\n * });\n *\n * // Disable caching for specific requests\n * await agent.invoke(\n * { messages: [new HumanMessage(\"Process this without caching\")] },\n * {\n * configurable: {\n * middleware_context: { enableCaching: false }\n * }\n * }\n * );\n * ```\n *\n * @example\n * Optimal setup for customer support chatbot\n * ```typescript\n * const supportAgent = createAgent({\n * model: \"anthropic:claude-3-5-sonnet\",\n * systemPrompt: `You are a customer support agent for ACME Corp.\n *\n * Company policies:\n * - Always be polite and professional\n * - Refer to knowledge base for product information\n * - Escalate billing issues to human agents\n * ... (extensive policies and guidelines)\n * `,\n * tools: [searchKnowledgeBase, createTicket, checkOrderStatus],\n * middleware: [\n * anthropicPromptCachingMiddleware({\n * ttl: \"1h\", // Long TTL for stable system prompt\n * minMessagesToCache: 1 // Cache immediately due to large system prompt\n * })\n * ]\n * });\n * ```\n *\n * @remarks\n * - **Anthropic Only**: This middleware only works with Anthropic models and will throw an error if used with other providers\n * - **Automatic Application**: Caching is applied automatically when message count exceeds `minMessagesToCache`\n * - **Cache Scope**: Caches are isolated per API key and cannot be shared across different keys\n * - **TTL Options**: Only supports \"5m\" (5 minutes) and \"1h\" (1 hour) as TTL values per Anthropic's API\n * - **Best Use Cases**: Long system prompts, multi-turn conversations, repetitive queries, RAG applications\n * - **Cost Impact**: Cached tokens are billed at 10% of the base input token price, cache writes are billed at 25% of the base\n *\n * @see {@link createAgent} for agent creation\n * @see {@link https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching} Anthropic's prompt caching documentation\n * @public\n */\nexport function anthropicPromptCachingMiddleware(\n middlewareOptions?: PromptCachingMiddlewareConfig\n) {\n return createMiddleware({\n name: \"PromptCachingMiddleware\",\n contextSchema,\n wrapModelCall: (request, handler) => {\n /**\n * Prefer runtime context values over middleware options values over defaults\n */\n const enableCaching =\n request.runtime.context.enableCaching ??\n middlewareOptions?.enableCaching ??\n DEFAULT_ENABLE_CACHING;\n const ttl =\n request.runtime.context.ttl ?? middlewareOptions?.ttl ?? DEFAULT_TTL;\n const minMessagesToCache =\n request.runtime.context.minMessagesToCache ??\n middlewareOptions?.minMessagesToCache ??\n DEFAULT_MIN_MESSAGES_TO_CACHE;\n const unsupportedModelBehavior =\n request.runtime.context.unsupportedModelBehavior ??\n middlewareOptions?.unsupportedModelBehavior ??\n DEFAULT_UNSUPPORTED_MODEL_BEHAVIOR;\n\n // Skip if caching is disabled\n if (!enableCaching || !request.model) {\n return handler(request);\n }\n\n const isAnthropicModel =\n request.model.getName() === \"ChatAnthropic\" ||\n (request.model.getName() === \"ConfigurableModel\" &&\n (request.model as ConfigurableModel)._defaultConfig?.modelProvider ===\n \"anthropic\");\n if (!isAnthropicModel) {\n // Get model name for better error context\n const modelName = request.model.getName();\n const modelInfo =\n request.model.getName() === \"ConfigurableModel\"\n ? `${modelName} (${\n (request.model as ConfigurableModel)._defaultConfig\n ?.modelProvider\n })`\n : modelName;\n\n const baseMessage = `Unsupported model '${modelInfo}'. Prompt caching requires an Anthropic model`;\n\n if (unsupportedModelBehavior === \"raise\") {\n throw new PromptCachingMiddlewareError(\n `${baseMessage} (e.g., 'anthropic:claude-4-0-sonnet').`\n );\n } else if (unsupportedModelBehavior === \"warn\") {\n console.warn(\n `PromptCachingMiddleware: Skipping caching for ${modelName}. Consider switching to an Anthropic model for caching benefits.`\n );\n }\n return handler(request);\n }\n\n const messagesCount =\n request.state.messages.length + (request.systemPrompt ? 1 : 0);\n\n if (messagesCount < minMessagesToCache) {\n return handler(request);\n }\n\n /**\n * Add cache_control to the last message\n */\n const lastMessage = request.messages.at(-1);\n if (!lastMessage) {\n return handler(request);\n }\n\n const NewMessageConstructor =\n Object.getPrototypeOf(lastMessage).constructor;\n if (Array.isArray(lastMessage.content)) {\n const newMessage = new NewMessageConstructor({\n ...lastMessage,\n content: [\n ...lastMessage.content.slice(0, -1),\n {\n ...lastMessage.content.at(-1),\n cache_control: {\n type: \"ephemeral\",\n ttl,\n },\n } as ContentBlock,\n ],\n });\n return handler({\n ...request,\n messages: [...request.messages.slice(0, -1), newMessage],\n });\n } else if (typeof lastMessage.content === \"string\") {\n const newMessage = new NewMessageConstructor({\n ...lastMessage,\n content: [\n {\n type: \"text\",\n text: lastMessage.content,\n cache_control: {\n type: \"ephemeral\",\n ttl,\n },\n },\n ],\n });\n return handler({\n ...request,\n messages: [...request.messages.slice(0, -1), newMessage],\n });\n }\n\n throw new PromptCachingMiddlewareError(\n \"Last message content is not a string or array\"\n );\n },\n });\n}\n"],"mappings":";;;;AAOA,MAAM,yBAAyB;AAC/B,MAAM,cAAc;AACpB,MAAM,gCAAgC;AACtC,MAAM,qCAAqC;AAE3C,MAAM,gBAAgB,EAAE,OAAO;CAK7B,eAAe,EAAE,SAAS,CAAC,UAAU;CAKrC,KAAK,EAAE,KAAK,CAAC,MAAM,IAAK,EAAC,CAAC,UAAU;CAKpC,oBAAoB,EAAE,QAAQ,CAAC,UAAU;CAQzC,0BAA0B,EAAE,KAAK;EAAC;EAAU;EAAQ;CAAQ,EAAC,CAAC,UAAU;AACzE,EAAC;AAKF,IAAM,+BAAN,cAA2C,MAAM;CAC/C,YAAYA,SAAiB;EAC3B,MAAM,QAAQ;EACd,KAAK,OAAO;CACb;AACF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAwHD,SAAgB,iCACdC,mBACA;AACA,QAAO,iBAAiB;EACtB,MAAM;EACN;EACA,eAAe,CAAC,SAAS,YAAY;;;;GAInC,MAAM,gBACJ,QAAQ,QAAQ,QAAQ,iBACxB,mBAAmB,iBACnB;GACF,MAAM,MACJ,QAAQ,QAAQ,QAAQ,OAAO,mBAAmB,OAAO;GAC3D,MAAM,qBACJ,QAAQ,QAAQ,QAAQ,sBACxB,mBAAmB,sBACnB;GACF,MAAM,2BACJ,QAAQ,QAAQ,QAAQ,4BACxB,mBAAmB,4BACnB;AAGF,OAAI,CAAC,iBAAiB,CAAC,QAAQ,MAC7B,QAAO,QAAQ,QAAQ;GAGzB,MAAM,mBACJ,QAAQ,MAAM,SAAS,KAAK,mBAC3B,QAAQ,MAAM,SAAS,KAAK,uBAC1B,QAAQ,MAA4B,gBAAgB,kBACnD;AACN,OAAI,CAAC,kBAAkB;IAErB,MAAM,YAAY,QAAQ,MAAM,SAAS;IACzC,MAAM,YACJ,QAAQ,MAAM,SAAS,KAAK,sBACxB,GAAG,UAAU,EAAE,EACZ,QAAQ,MAA4B,gBACjC,cACL,CAAC,CAAC,GACH;IAEN,MAAM,cAAc,CAAC,mBAAmB,EAAE,UAAU,6CAA6C,CAAC;AAElG,QAAI,6BAA6B,QAC/B,OAAM,IAAI,6BACR,GAAG,YAAY,uCAAuC,CAAC;aAEhD,6BAA6B,QACtC,QAAQ,KACN,CAAC,8CAA8C,EAAE,UAAU,gEAAgE,CAAC,CAC7H;AAEH,WAAO,QAAQ,QAAQ;GACxB;GAED,MAAM,gBACJ,QAAQ,MAAM,SAAS,UAAU,QAAQ,eAAe,IAAI;AAE9D,OAAI,gBAAgB,mBAClB,QAAO,QAAQ,QAAQ;;;;GAMzB,MAAM,cAAc,QAAQ,SAAS,GAAG,GAAG;AAC3C,OAAI,CAAC,YACH,QAAO,QAAQ,QAAQ;GAGzB,MAAM,wBACJ,OAAO,eAAe,YAAY,CAAC;AACrC,OAAI,MAAM,QAAQ,YAAY,QAAQ,EAAE;IACtC,MAAM,aAAa,IAAI,sBAAsB;KAC3C,GAAG;KACH,SAAS,CACP,GAAG,YAAY,QAAQ,MAAM,GAAG,GAAG,EACnC;MACE,GAAG,YAAY,QAAQ,GAAG,GAAG;MAC7B,eAAe;OACb,MAAM;OACN;MACD;KACF,CACF;IACF;AACD,WAAO,QAAQ;KACb,GAAG;KACH,UAAU,CAAC,GAAG,QAAQ,SAAS,MAAM,GAAG,GAAG,EAAE,UAAW;IACzD,EAAC;GACH,WAAU,OAAO,YAAY,YAAY,UAAU;IAClD,MAAM,aAAa,IAAI,sBAAsB;KAC3C,GAAG;KACH,SAAS,CACP;MACE,MAAM;MACN,MAAM,YAAY;MAClB,eAAe;OACb,MAAM;OACN;MACD;KACF,CACF;IACF;AACD,WAAO,QAAQ;KACb,GAAG;KACH,UAAU,CAAC,GAAG,QAAQ,SAAS,MAAM,GAAG,GAAG,EAAE,UAAW;IACzD,EAAC;GACH;AAED,SAAM,IAAI,6BACR;EAEH;CACF,EAAC;AACH"}
|