langchain 1.0.0a13__py3-none-any.whl → 1.0.0a15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain might be problematic. Click here for more details.
- langchain/__init__.py +1 -1
- langchain/agents/factory.py +115 -29
- langchain/agents/middleware/__init__.py +6 -5
- langchain/agents/middleware/context_editing.py +29 -1
- langchain/agents/middleware/human_in_the_loop.py +13 -13
- langchain/agents/middleware/model_call_limit.py +38 -4
- langchain/agents/middleware/model_fallback.py +36 -1
- langchain/agents/middleware/pii.py +6 -8
- langchain/agents/middleware/{planning.py → todo.py} +18 -5
- langchain/agents/middleware/tool_call_limit.py +88 -15
- langchain/agents/middleware/types.py +196 -18
- langchain/embeddings/__init__.py +0 -2
- langchain/messages/__init__.py +32 -0
- langchain/tools/__init__.py +1 -6
- langchain/tools/tool_node.py +62 -11
- langchain-1.0.0a15.dist-info/METADATA +85 -0
- langchain-1.0.0a15.dist-info/RECORD +29 -0
- langchain/agents/middleware/prompt_caching.py +0 -89
- langchain/documents/__init__.py +0 -7
- langchain/embeddings/cache.py +0 -361
- langchain/storage/__init__.py +0 -22
- langchain/storage/encoder_backed.py +0 -122
- langchain/storage/exceptions.py +0 -5
- langchain/storage/in_memory.py +0 -13
- langchain-1.0.0a13.dist-info/METADATA +0 -125
- langchain-1.0.0a13.dist-info/RECORD +0 -36
- {langchain-1.0.0a13.dist-info → langchain-1.0.0a15.dist-info}/WHEEL +0 -0
- {langchain-1.0.0a13.dist-info → langchain-1.0.0a15.dist-info}/licenses/LICENSE +0 -0
langchain/tools/tool_node.py
CHANGED
|
@@ -38,7 +38,7 @@ from __future__ import annotations
|
|
|
38
38
|
import asyncio
|
|
39
39
|
import inspect
|
|
40
40
|
import json
|
|
41
|
-
from collections.abc import Callable
|
|
41
|
+
from collections.abc import Awaitable, Callable
|
|
42
42
|
from copy import copy, deepcopy
|
|
43
43
|
from dataclasses import dataclass, replace
|
|
44
44
|
from types import UnionType
|
|
@@ -72,6 +72,7 @@ from langchain_core.tools import BaseTool, InjectedToolArg
|
|
|
72
72
|
from langchain_core.tools import tool as create_tool
|
|
73
73
|
from langchain_core.tools.base import (
|
|
74
74
|
TOOL_MESSAGE_BLOCK_TYPES,
|
|
75
|
+
ToolException,
|
|
75
76
|
get_all_basemodel_annotations,
|
|
76
77
|
)
|
|
77
78
|
from langgraph._internal._runnable import RunnableCallable
|
|
@@ -80,6 +81,7 @@ from langgraph.graph.message import REMOVE_ALL_MESSAGES
|
|
|
80
81
|
from langgraph.runtime import get_runtime
|
|
81
82
|
from langgraph.types import Command, Send
|
|
82
83
|
from pydantic import BaseModel, ValidationError
|
|
84
|
+
from typing_extensions import Unpack
|
|
83
85
|
|
|
84
86
|
if TYPE_CHECKING:
|
|
85
87
|
from collections.abc import Sequence
|
|
@@ -103,6 +105,12 @@ TOOL_INVOCATION_ERROR_TEMPLATE = (
|
|
|
103
105
|
)
|
|
104
106
|
|
|
105
107
|
|
|
108
|
+
class _ToolCallRequestOverrides(TypedDict, total=False):
|
|
109
|
+
"""Possible overrides for ToolCallRequest.override() method."""
|
|
110
|
+
|
|
111
|
+
tool_call: ToolCall
|
|
112
|
+
|
|
113
|
+
|
|
106
114
|
@dataclass()
|
|
107
115
|
class ToolCallRequest:
|
|
108
116
|
"""Tool execution request passed to tool call interceptors.
|
|
@@ -119,6 +127,31 @@ class ToolCallRequest:
|
|
|
119
127
|
state: Any
|
|
120
128
|
runtime: Any
|
|
121
129
|
|
|
130
|
+
def override(self, **overrides: Unpack[_ToolCallRequestOverrides]) -> ToolCallRequest:
|
|
131
|
+
"""Replace the request with a new request with the given overrides.
|
|
132
|
+
|
|
133
|
+
Returns a new `ToolCallRequest` instance with the specified attributes replaced.
|
|
134
|
+
This follows an immutable pattern, leaving the original request unchanged.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
**overrides: Keyword arguments for attributes to override. Supported keys:
|
|
138
|
+
- tool_call: Tool call dict with name, args, and id
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
New ToolCallRequest instance with specified overrides applied.
|
|
142
|
+
|
|
143
|
+
Examples:
|
|
144
|
+
```python
|
|
145
|
+
# Modify tool call arguments without mutating original
|
|
146
|
+
modified_call = {**request.tool_call, "args": {"value": 10}}
|
|
147
|
+
new_request = request.override(tool_call=modified_call)
|
|
148
|
+
|
|
149
|
+
# Override multiple attributes
|
|
150
|
+
new_request = request.override(tool_call=modified_call, state=new_state)
|
|
151
|
+
```
|
|
152
|
+
"""
|
|
153
|
+
return replace(self, **overrides)
|
|
154
|
+
|
|
122
155
|
|
|
123
156
|
ToolCallWrapper = Callable[
|
|
124
157
|
[ToolCallRequest, Callable[[ToolCallRequest], ToolMessage | Command]],
|
|
@@ -188,6 +221,12 @@ Examples:
|
|
|
188
221
|
return result
|
|
189
222
|
"""
|
|
190
223
|
|
|
224
|
+
AsyncToolCallWrapper = Callable[
|
|
225
|
+
[ToolCallRequest, Callable[[ToolCallRequest], Awaitable[ToolMessage | Command]]],
|
|
226
|
+
Awaitable[ToolMessage | Command],
|
|
227
|
+
]
|
|
228
|
+
"""Async wrapper for tool call execution with multi-call support."""
|
|
229
|
+
|
|
191
230
|
|
|
192
231
|
class ToolCallWithContext(TypedDict):
|
|
193
232
|
"""ToolCall with additional context for graph state.
|
|
@@ -239,8 +278,11 @@ def msg_content_output(output: Any) -> str | list[dict]:
|
|
|
239
278
|
return str(output)
|
|
240
279
|
|
|
241
280
|
|
|
242
|
-
class ToolInvocationError(
|
|
243
|
-
"""
|
|
281
|
+
class ToolInvocationError(ToolException):
|
|
282
|
+
"""An error occurred while invoking a tool due to invalid arguments.
|
|
283
|
+
|
|
284
|
+
This exception is only raised when invoking a tool using the ToolNode!
|
|
285
|
+
"""
|
|
244
286
|
|
|
245
287
|
def __init__(
|
|
246
288
|
self, tool_name: str, source: ValidationError, tool_kwargs: dict[str, Any]
|
|
@@ -382,7 +424,7 @@ def _infer_handled_types(handler: Callable[..., str]) -> tuple[type[Exception],
|
|
|
382
424
|
return (Exception,)
|
|
383
425
|
|
|
384
426
|
|
|
385
|
-
class
|
|
427
|
+
class _ToolNode(RunnableCallable):
|
|
386
428
|
"""A node for executing tools in LangGraph workflows.
|
|
387
429
|
|
|
388
430
|
Handles tool execution patterns including function calls, state injection,
|
|
@@ -500,6 +542,7 @@ class ToolNode(RunnableCallable):
|
|
|
500
542
|
| tuple[type[Exception], ...] = _default_handle_tool_errors,
|
|
501
543
|
messages_key: str = "messages",
|
|
502
544
|
wrap_tool_call: ToolCallWrapper | None = None,
|
|
545
|
+
awrap_tool_call: AsyncToolCallWrapper | None = None,
|
|
503
546
|
) -> None:
|
|
504
547
|
"""Initialize ToolNode with tools and configuration.
|
|
505
548
|
|
|
@@ -509,9 +552,11 @@ class ToolNode(RunnableCallable):
|
|
|
509
552
|
tags: Optional metadata tags.
|
|
510
553
|
handle_tool_errors: Error handling configuration.
|
|
511
554
|
messages_key: State key containing messages.
|
|
512
|
-
wrap_tool_call:
|
|
555
|
+
wrap_tool_call: Sync wrapper function to intercept tool execution. Receives
|
|
513
556
|
ToolCallRequest and execute callable, returns ToolMessage or Command.
|
|
514
557
|
Enables retries, caching, request modification, and control flow.
|
|
558
|
+
awrap_tool_call: Async wrapper function to intercept tool execution.
|
|
559
|
+
If not provided, falls back to wrap_tool_call for async execution.
|
|
515
560
|
"""
|
|
516
561
|
super().__init__(self._func, self._afunc, name=name, tags=tags, trace=False)
|
|
517
562
|
self._tools_by_name: dict[str, BaseTool] = {}
|
|
@@ -520,6 +565,7 @@ class ToolNode(RunnableCallable):
|
|
|
520
565
|
self._handle_tool_errors = handle_tool_errors
|
|
521
566
|
self._messages_key = messages_key
|
|
522
567
|
self._wrap_tool_call = wrap_tool_call
|
|
568
|
+
self._awrap_tool_call = awrap_tool_call
|
|
523
569
|
for tool in tools:
|
|
524
570
|
if not isinstance(tool, BaseTool):
|
|
525
571
|
tool_ = create_tool(cast("type[BaseTool]", tool))
|
|
@@ -855,7 +901,7 @@ class ToolNode(RunnableCallable):
|
|
|
855
901
|
input: list[AnyMessage] | dict[str, Any] | BaseModel,
|
|
856
902
|
runtime: Any,
|
|
857
903
|
) -> ToolMessage | Command:
|
|
858
|
-
"""Execute single tool call asynchronously with
|
|
904
|
+
"""Execute single tool call asynchronously with awrap_tool_call wrapper if configured.
|
|
859
905
|
|
|
860
906
|
Args:
|
|
861
907
|
call: Tool call dict.
|
|
@@ -883,7 +929,7 @@ class ToolNode(RunnableCallable):
|
|
|
883
929
|
runtime=runtime,
|
|
884
930
|
)
|
|
885
931
|
|
|
886
|
-
if self._wrap_tool_call is None:
|
|
932
|
+
if self._awrap_tool_call is None and self._wrap_tool_call is None:
|
|
887
933
|
# No wrapper - execute directly
|
|
888
934
|
return await self._execute_tool_async(tool_request, input_type, config)
|
|
889
935
|
|
|
@@ -892,12 +938,17 @@ class ToolNode(RunnableCallable):
|
|
|
892
938
|
"""Execute tool with given request. Can be called multiple times."""
|
|
893
939
|
return await self._execute_tool_async(req, input_type, config)
|
|
894
940
|
|
|
941
|
+
def _sync_execute(req: ToolCallRequest) -> ToolMessage | Command:
|
|
942
|
+
"""Sync execute fallback for sync wrapper."""
|
|
943
|
+
return self._execute_tool_sync(req, input_type, config)
|
|
944
|
+
|
|
895
945
|
# Call wrapper with request and execute callable
|
|
896
|
-
# Note: wrapper is sync, but execute callable is async
|
|
897
946
|
try:
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
947
|
+
if self._awrap_tool_call is not None:
|
|
948
|
+
return await self._awrap_tool_call(tool_request, execute)
|
|
949
|
+
# None check was performed above already
|
|
950
|
+
self._wrap_tool_call = cast("ToolCallWrapper", self._wrap_tool_call)
|
|
951
|
+
return self._wrap_tool_call(tool_request, _sync_execute)
|
|
901
952
|
except Exception as e:
|
|
902
953
|
# Wrapper threw an exception
|
|
903
954
|
if not self._handle_tool_errors:
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: langchain
|
|
3
|
+
Version: 1.0.0a15
|
|
4
|
+
Summary: Building applications with LLMs through composability
|
|
5
|
+
Project-URL: homepage, https://docs.langchain.com/
|
|
6
|
+
Project-URL: repository, https://github.com/langchain-ai/langchain/tree/master/libs/langchain
|
|
7
|
+
Project-URL: changelog, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain%3D%3D1%22
|
|
8
|
+
Project-URL: twitter, https://x.com/LangChainAI
|
|
9
|
+
Project-URL: slack, https://www.langchain.com/join-community
|
|
10
|
+
Project-URL: reddit, https://www.reddit.com/r/LangChain/
|
|
11
|
+
License: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Python: <4.0.0,>=3.10.0
|
|
14
|
+
Requires-Dist: langchain-core<2.0.0,>=1.0.0a7
|
|
15
|
+
Requires-Dist: langgraph<2.0.0,>=1.0.0a4
|
|
16
|
+
Requires-Dist: pydantic<3.0.0,>=2.7.4
|
|
17
|
+
Provides-Extra: anthropic
|
|
18
|
+
Requires-Dist: langchain-anthropic; extra == 'anthropic'
|
|
19
|
+
Provides-Extra: aws
|
|
20
|
+
Requires-Dist: langchain-aws; extra == 'aws'
|
|
21
|
+
Provides-Extra: community
|
|
22
|
+
Requires-Dist: langchain-community; extra == 'community'
|
|
23
|
+
Provides-Extra: deepseek
|
|
24
|
+
Requires-Dist: langchain-deepseek; extra == 'deepseek'
|
|
25
|
+
Provides-Extra: fireworks
|
|
26
|
+
Requires-Dist: langchain-fireworks; extra == 'fireworks'
|
|
27
|
+
Provides-Extra: google-genai
|
|
28
|
+
Requires-Dist: langchain-google-genai; extra == 'google-genai'
|
|
29
|
+
Provides-Extra: google-vertexai
|
|
30
|
+
Requires-Dist: langchain-google-vertexai; extra == 'google-vertexai'
|
|
31
|
+
Provides-Extra: groq
|
|
32
|
+
Requires-Dist: langchain-groq; extra == 'groq'
|
|
33
|
+
Provides-Extra: mistralai
|
|
34
|
+
Requires-Dist: langchain-mistralai; extra == 'mistralai'
|
|
35
|
+
Provides-Extra: ollama
|
|
36
|
+
Requires-Dist: langchain-ollama; extra == 'ollama'
|
|
37
|
+
Provides-Extra: openai
|
|
38
|
+
Requires-Dist: langchain-openai; extra == 'openai'
|
|
39
|
+
Provides-Extra: perplexity
|
|
40
|
+
Requires-Dist: langchain-perplexity; extra == 'perplexity'
|
|
41
|
+
Provides-Extra: together
|
|
42
|
+
Requires-Dist: langchain-together; extra == 'together'
|
|
43
|
+
Provides-Extra: xai
|
|
44
|
+
Requires-Dist: langchain-xai; extra == 'xai'
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
|
|
47
|
+
# 🦜️🔗 LangChain
|
|
48
|
+
|
|
49
|
+
[](https://pypi.org/project/langchain/#history)
|
|
50
|
+
[](https://opensource.org/licenses/MIT)
|
|
51
|
+
[](https://pypistats.org/packages/langchain)
|
|
52
|
+
[](https://twitter.com/langchainai)
|
|
53
|
+
|
|
54
|
+
Looking for the JS/TS version? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
|
|
55
|
+
|
|
56
|
+
To help you ship LangChain apps to production faster, check out [LangSmith](https://smith.langchain.com).
|
|
57
|
+
[LangSmith](https://smith.langchain.com) is a unified developer platform for building, testing, and monitoring LLM applications.
|
|
58
|
+
|
|
59
|
+
## Quick Install
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
pip install langchain
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## 🤔 What is this?
|
|
66
|
+
|
|
67
|
+
LangChain is the easiest way to start building agents and applications powered by LLMs. With under 10 lines of code, you can connect to OpenAI, Anthropic, Google, and [more](https://docs.langchain.com/oss/python/integrations/providers/overview). LangChain provides a pre-built agent architecture and model integrations to help you get started quickly and seamlessly incorporate LLMs into your agents and applications.
|
|
68
|
+
|
|
69
|
+
We recommend you use LangChain if you want to quickly build agents and autonomous applications. Use [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our low-level agent orchestration framework and runtime, when you have more advanced needs that require a combination of deterministic and agentic workflows, heavy customization, and carefully controlled latency.
|
|
70
|
+
|
|
71
|
+
LangChain [agents](https://docs.langchain.com/oss/python/langchain/agents) are built on top of LangGraph in order to provide durable execution, streaming, human-in-the-loop, persistence, and more. (You do not need to know LangGraph for basic LangChain agent usage.)
|
|
72
|
+
|
|
73
|
+
## 📖 Documentation
|
|
74
|
+
|
|
75
|
+
For full documentation, see the [API reference](https://reference.langchain.com/python/langchain_classic).
|
|
76
|
+
|
|
77
|
+
## 📕 Releases & Versioning
|
|
78
|
+
|
|
79
|
+
See our [Releases](https://docs.langchain.com/oss/python/release-policy) and [Versioning](https://docs.langchain.com/oss/python/versioning) policies.
|
|
80
|
+
|
|
81
|
+
## 💁 Contributing
|
|
82
|
+
|
|
83
|
+
As an open-source project in a rapidly developing field, we are extremely open to contributions, whether it be in the form of a new feature, improved infrastructure, or better documentation.
|
|
84
|
+
|
|
85
|
+
For detailed information on how to contribute, see the [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview).
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
langchain/__init__.py,sha256=FOFSABkFKCLhvZ83wF1wmMdw-l8UKpDcjmi-BqKuSRQ,64
|
|
2
|
+
langchain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
langchain/agents/__init__.py,sha256=x85V7MqddVSrraoirGHplPMzEz9Lha-vL9fKjXCS7lA,258
|
|
4
|
+
langchain/agents/factory.py,sha256=9iBzgKnb_9D2DLt_4ZnE62GrAYmDDykeAya9ZwNHJdQ,60839
|
|
5
|
+
langchain/agents/structured_output.py,sha256=msf-ClqDnMfJ-oGHqjwEyth860tMnx58GLTvqJijqg8,13686
|
|
6
|
+
langchain/agents/middleware/__init__.py,sha256=CGBHDIok3roWJDytMYfladxmdkaBa1vpjIy_aD5-euM,1480
|
|
7
|
+
langchain/agents/middleware/context_editing.py,sha256=brg9IQHC8NZKOQmA7afSlj2IScfe2rozdZB9TGthRTQ,8748
|
|
8
|
+
langchain/agents/middleware/human_in_the_loop.py,sha256=yKhubIrh4TrgUC8ctz7artixYq5paDF5gys2w5XzJzg,12601
|
|
9
|
+
langchain/agents/middleware/model_call_limit.py,sha256=tZx5MSMJvb4EE6Zr-hvC1nEHNgn4a4uFfVQxyzUzBe4,7804
|
|
10
|
+
langchain/agents/middleware/model_fallback.py,sha256=io6jHXnbTpDTA_RZg9d-eArpktOetomFMTX--B9y_x0,4177
|
|
11
|
+
langchain/agents/middleware/pii.py,sha256=7hTBxnpcG_hSZd29TCg-4tbiLFO9IJb-wwnujCRMrv4,24780
|
|
12
|
+
langchain/agents/middleware/summarization.py,sha256=H1VxRkkbauw4p4sMMKyc_uZGbJhtqoVvOF7y_5JBXTc,10329
|
|
13
|
+
langchain/agents/middleware/todo.py,sha256=0PyHV4u5JaBBuMmPWmDr3orZ5T5F6lk2jiVoBzVVMM4,9808
|
|
14
|
+
langchain/agents/middleware/tool_call_limit.py,sha256=0ilGNJRVBtjVN7MyMDgtYXOr1WLrCfNblXNCmvND-84,12317
|
|
15
|
+
langchain/agents/middleware/tool_emulator.py,sha256=5qJFPfTSiVukNclDeUo7_c7-PjGEVWyefbPC-zpYSlI,7115
|
|
16
|
+
langchain/agents/middleware/tool_selection.py,sha256=6RYdgkg6aSNx1w-YxRyL2Hct7UPnMRgGg6YVZVtW5TU,11638
|
|
17
|
+
langchain/agents/middleware/types.py,sha256=JGR6KMqfsPrt8Uxfcl9aN4bpMRVkGOFQKGrSdwUWgnA,55292
|
|
18
|
+
langchain/chat_models/__init__.py,sha256=PTq9qskQEbqXYAcUUxUXDsugOcwISgFhv4w40JgkbgU,181
|
|
19
|
+
langchain/chat_models/base.py,sha256=HPlD0QaLOGXRJAY1Qq6ojr1WcteBlgVO--_GoSqpxXE,34560
|
|
20
|
+
langchain/embeddings/__init__.py,sha256=kfLfu342i9bTrA0WC8yA6IJE2bgY4ZynWBi-_cMUg8E,179
|
|
21
|
+
langchain/embeddings/base.py,sha256=o77Z1TrXoUZN1SdYY9nZCNehm7cZzC-TNqc5NIzWtww,7327
|
|
22
|
+
langchain/messages/__init__.py,sha256=X5-dRewJP-jtehdC6oDbs21j9bxGDUbI5WlcNrO_bHk,1309
|
|
23
|
+
langchain/rate_limiters/__init__.py,sha256=5490xUNhet37N2nX6kbJlDgf8u1DX-C1Cs_r7etXn8A,351
|
|
24
|
+
langchain/tools/__init__.py,sha256=fYEuNXytW77uztDt1kQyQWWeZRIL3pA0h1m8F7bLerA,362
|
|
25
|
+
langchain/tools/tool_node.py,sha256=C0DPV53kY_eqGf2bZbddHj3l2_74sSnHkmZJZ06uhpw,59158
|
|
26
|
+
langchain-1.0.0a15.dist-info/METADATA,sha256=CMqeRpnFLjhFavvb-6zFZIL9XrLmXzDoX20OVNZZASk,4543
|
|
27
|
+
langchain-1.0.0a15.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
28
|
+
langchain-1.0.0a15.dist-info/licenses/LICENSE,sha256=TsZ-TKbmch26hJssqCJhWXyGph7iFLvyFBYAa3stBHg,1067
|
|
29
|
+
langchain-1.0.0a15.dist-info/RECORD,,
|
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
"""Anthropic prompt caching middleware."""
|
|
2
|
-
|
|
3
|
-
from collections.abc import Callable
|
|
4
|
-
from typing import Literal
|
|
5
|
-
from warnings import warn
|
|
6
|
-
|
|
7
|
-
from langchain.agents.middleware.types import (
|
|
8
|
-
AgentMiddleware,
|
|
9
|
-
ModelCallResult,
|
|
10
|
-
ModelRequest,
|
|
11
|
-
ModelResponse,
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class AnthropicPromptCachingMiddleware(AgentMiddleware):
|
|
16
|
-
"""Prompt Caching Middleware.
|
|
17
|
-
|
|
18
|
-
Optimizes API usage by caching conversation prefixes for Anthropic models.
|
|
19
|
-
|
|
20
|
-
Learn more about Anthropic prompt caching
|
|
21
|
-
[here](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching).
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
def __init__(
|
|
25
|
-
self,
|
|
26
|
-
type: Literal["ephemeral"] = "ephemeral",
|
|
27
|
-
ttl: Literal["5m", "1h"] = "5m",
|
|
28
|
-
min_messages_to_cache: int = 0,
|
|
29
|
-
unsupported_model_behavior: Literal["ignore", "warn", "raise"] = "warn",
|
|
30
|
-
) -> None:
|
|
31
|
-
"""Initialize the middleware with cache control settings.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
type: The type of cache to use, only "ephemeral" is supported.
|
|
35
|
-
ttl: The time to live for the cache, only "5m" and "1h" are supported.
|
|
36
|
-
min_messages_to_cache: The minimum number of messages until the cache is used,
|
|
37
|
-
default is 0.
|
|
38
|
-
unsupported_model_behavior: The behavior to take when an unsupported model is used.
|
|
39
|
-
"ignore" will ignore the unsupported model and continue without caching.
|
|
40
|
-
"warn" will warn the user and continue without caching.
|
|
41
|
-
"raise" will raise an error and stop the agent.
|
|
42
|
-
"""
|
|
43
|
-
self.type = type
|
|
44
|
-
self.ttl = ttl
|
|
45
|
-
self.min_messages_to_cache = min_messages_to_cache
|
|
46
|
-
self.unsupported_model_behavior = unsupported_model_behavior
|
|
47
|
-
|
|
48
|
-
def wrap_model_call(
|
|
49
|
-
self,
|
|
50
|
-
request: ModelRequest,
|
|
51
|
-
handler: Callable[[ModelRequest], ModelResponse],
|
|
52
|
-
) -> ModelCallResult:
|
|
53
|
-
"""Modify the model request to add cache control blocks."""
|
|
54
|
-
try:
|
|
55
|
-
from langchain_anthropic import ChatAnthropic
|
|
56
|
-
except ImportError:
|
|
57
|
-
ChatAnthropic = None # noqa: N806
|
|
58
|
-
|
|
59
|
-
msg: str | None = None
|
|
60
|
-
|
|
61
|
-
if ChatAnthropic is None:
|
|
62
|
-
msg = (
|
|
63
|
-
"AnthropicPromptCachingMiddleware caching middleware only supports "
|
|
64
|
-
"Anthropic models. "
|
|
65
|
-
"Please install langchain-anthropic."
|
|
66
|
-
)
|
|
67
|
-
elif not isinstance(request.model, ChatAnthropic):
|
|
68
|
-
msg = (
|
|
69
|
-
"AnthropicPromptCachingMiddleware caching middleware only supports "
|
|
70
|
-
f"Anthropic models, not instances of {type(request.model)}"
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
if msg is not None:
|
|
74
|
-
if self.unsupported_model_behavior == "raise":
|
|
75
|
-
raise ValueError(msg)
|
|
76
|
-
if self.unsupported_model_behavior == "warn":
|
|
77
|
-
warn(msg, stacklevel=3)
|
|
78
|
-
else:
|
|
79
|
-
return handler(request)
|
|
80
|
-
|
|
81
|
-
messages_count = (
|
|
82
|
-
len(request.messages) + 1 if request.system_prompt else len(request.messages)
|
|
83
|
-
)
|
|
84
|
-
if messages_count < self.min_messages_to_cache:
|
|
85
|
-
return handler(request)
|
|
86
|
-
|
|
87
|
-
request.model_settings["cache_control"] = {"type": self.type, "ttl": self.ttl}
|
|
88
|
-
|
|
89
|
-
return handler(request)
|