langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/__init__.py +1 -1
- langchain_core/_api/__init__.py +3 -4
- langchain_core/_api/beta_decorator.py +45 -70
- langchain_core/_api/deprecation.py +80 -80
- langchain_core/_api/path.py +22 -8
- langchain_core/_import_utils.py +10 -4
- langchain_core/agents.py +25 -21
- langchain_core/caches.py +53 -63
- langchain_core/callbacks/__init__.py +1 -8
- langchain_core/callbacks/base.py +341 -348
- langchain_core/callbacks/file.py +55 -44
- langchain_core/callbacks/manager.py +546 -683
- langchain_core/callbacks/stdout.py +29 -30
- langchain_core/callbacks/streaming_stdout.py +35 -36
- langchain_core/callbacks/usage.py +65 -70
- langchain_core/chat_history.py +48 -55
- langchain_core/document_loaders/base.py +46 -21
- langchain_core/document_loaders/langsmith.py +39 -36
- langchain_core/documents/__init__.py +0 -1
- langchain_core/documents/base.py +96 -74
- langchain_core/documents/compressor.py +12 -9
- langchain_core/documents/transformers.py +29 -28
- langchain_core/embeddings/fake.py +56 -57
- langchain_core/env.py +2 -3
- langchain_core/example_selectors/base.py +12 -0
- langchain_core/example_selectors/length_based.py +1 -1
- langchain_core/example_selectors/semantic_similarity.py +21 -25
- langchain_core/exceptions.py +15 -9
- langchain_core/globals.py +4 -163
- langchain_core/indexing/api.py +132 -125
- langchain_core/indexing/base.py +64 -67
- langchain_core/indexing/in_memory.py +26 -6
- langchain_core/language_models/__init__.py +15 -27
- langchain_core/language_models/_utils.py +267 -117
- langchain_core/language_models/base.py +92 -177
- langchain_core/language_models/chat_models.py +547 -407
- langchain_core/language_models/fake.py +11 -11
- langchain_core/language_models/fake_chat_models.py +72 -118
- langchain_core/language_models/llms.py +168 -242
- langchain_core/load/dump.py +8 -11
- langchain_core/load/load.py +32 -28
- langchain_core/load/mapping.py +2 -4
- langchain_core/load/serializable.py +50 -56
- langchain_core/messages/__init__.py +36 -51
- langchain_core/messages/ai.py +377 -150
- langchain_core/messages/base.py +239 -47
- langchain_core/messages/block_translators/__init__.py +111 -0
- langchain_core/messages/block_translators/anthropic.py +470 -0
- langchain_core/messages/block_translators/bedrock.py +94 -0
- langchain_core/messages/block_translators/bedrock_converse.py +297 -0
- langchain_core/messages/block_translators/google_genai.py +530 -0
- langchain_core/messages/block_translators/google_vertexai.py +21 -0
- langchain_core/messages/block_translators/groq.py +143 -0
- langchain_core/messages/block_translators/langchain_v0.py +301 -0
- langchain_core/messages/block_translators/openai.py +1010 -0
- langchain_core/messages/chat.py +2 -3
- langchain_core/messages/content.py +1423 -0
- langchain_core/messages/function.py +7 -7
- langchain_core/messages/human.py +44 -38
- langchain_core/messages/modifier.py +3 -2
- langchain_core/messages/system.py +40 -27
- langchain_core/messages/tool.py +160 -58
- langchain_core/messages/utils.py +527 -638
- langchain_core/output_parsers/__init__.py +1 -14
- langchain_core/output_parsers/base.py +68 -104
- langchain_core/output_parsers/json.py +13 -17
- langchain_core/output_parsers/list.py +11 -33
- langchain_core/output_parsers/openai_functions.py +56 -74
- langchain_core/output_parsers/openai_tools.py +68 -109
- langchain_core/output_parsers/pydantic.py +15 -13
- langchain_core/output_parsers/string.py +6 -2
- langchain_core/output_parsers/transform.py +17 -60
- langchain_core/output_parsers/xml.py +34 -44
- langchain_core/outputs/__init__.py +1 -1
- langchain_core/outputs/chat_generation.py +26 -11
- langchain_core/outputs/chat_result.py +1 -3
- langchain_core/outputs/generation.py +17 -6
- langchain_core/outputs/llm_result.py +15 -8
- langchain_core/prompt_values.py +29 -123
- langchain_core/prompts/__init__.py +3 -27
- langchain_core/prompts/base.py +48 -63
- langchain_core/prompts/chat.py +259 -288
- langchain_core/prompts/dict.py +19 -11
- langchain_core/prompts/few_shot.py +84 -90
- langchain_core/prompts/few_shot_with_templates.py +14 -12
- langchain_core/prompts/image.py +19 -14
- langchain_core/prompts/loading.py +6 -8
- langchain_core/prompts/message.py +7 -8
- langchain_core/prompts/prompt.py +42 -43
- langchain_core/prompts/string.py +37 -16
- langchain_core/prompts/structured.py +43 -46
- langchain_core/rate_limiters.py +51 -60
- langchain_core/retrievers.py +52 -192
- langchain_core/runnables/base.py +1727 -1683
- langchain_core/runnables/branch.py +52 -73
- langchain_core/runnables/config.py +89 -103
- langchain_core/runnables/configurable.py +128 -130
- langchain_core/runnables/fallbacks.py +93 -82
- langchain_core/runnables/graph.py +127 -127
- langchain_core/runnables/graph_ascii.py +63 -41
- langchain_core/runnables/graph_mermaid.py +87 -70
- langchain_core/runnables/graph_png.py +31 -36
- langchain_core/runnables/history.py +145 -161
- langchain_core/runnables/passthrough.py +141 -144
- langchain_core/runnables/retry.py +84 -68
- langchain_core/runnables/router.py +33 -37
- langchain_core/runnables/schema.py +79 -72
- langchain_core/runnables/utils.py +95 -139
- langchain_core/stores.py +85 -131
- langchain_core/structured_query.py +11 -15
- langchain_core/sys_info.py +31 -32
- langchain_core/tools/__init__.py +1 -14
- langchain_core/tools/base.py +221 -247
- langchain_core/tools/convert.py +144 -161
- langchain_core/tools/render.py +10 -10
- langchain_core/tools/retriever.py +12 -19
- langchain_core/tools/simple.py +52 -29
- langchain_core/tools/structured.py +56 -60
- langchain_core/tracers/__init__.py +1 -9
- langchain_core/tracers/_streaming.py +6 -7
- langchain_core/tracers/base.py +103 -112
- langchain_core/tracers/context.py +29 -48
- langchain_core/tracers/core.py +142 -105
- langchain_core/tracers/evaluation.py +30 -34
- langchain_core/tracers/event_stream.py +162 -117
- langchain_core/tracers/langchain.py +34 -36
- langchain_core/tracers/log_stream.py +87 -49
- langchain_core/tracers/memory_stream.py +3 -3
- langchain_core/tracers/root_listeners.py +18 -34
- langchain_core/tracers/run_collector.py +8 -20
- langchain_core/tracers/schemas.py +0 -125
- langchain_core/tracers/stdout.py +3 -3
- langchain_core/utils/__init__.py +1 -4
- langchain_core/utils/_merge.py +47 -9
- langchain_core/utils/aiter.py +70 -66
- langchain_core/utils/env.py +12 -9
- langchain_core/utils/function_calling.py +139 -206
- langchain_core/utils/html.py +7 -8
- langchain_core/utils/input.py +6 -6
- langchain_core/utils/interactive_env.py +6 -2
- langchain_core/utils/iter.py +48 -45
- langchain_core/utils/json.py +14 -4
- langchain_core/utils/json_schema.py +159 -43
- langchain_core/utils/mustache.py +32 -25
- langchain_core/utils/pydantic.py +67 -40
- langchain_core/utils/strings.py +5 -5
- langchain_core/utils/usage.py +1 -1
- langchain_core/utils/utils.py +104 -62
- langchain_core/vectorstores/base.py +131 -179
- langchain_core/vectorstores/in_memory.py +113 -182
- langchain_core/vectorstores/utils.py +23 -17
- langchain_core/version.py +1 -1
- langchain_core-1.0.0.dist-info/METADATA +68 -0
- langchain_core-1.0.0.dist-info/RECORD +172 -0
- {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
- langchain_core/beta/__init__.py +0 -1
- langchain_core/beta/runnables/__init__.py +0 -1
- langchain_core/beta/runnables/context.py +0 -448
- langchain_core/memory.py +0 -116
- langchain_core/messages/content_blocks.py +0 -1435
- langchain_core/prompts/pipeline.py +0 -133
- langchain_core/pydantic_v1/__init__.py +0 -30
- langchain_core/pydantic_v1/dataclasses.py +0 -23
- langchain_core/pydantic_v1/main.py +0 -23
- langchain_core/tracers/langchain_v1.py +0 -23
- langchain_core/utils/loading.py +0 -31
- langchain_core/v1/__init__.py +0 -1
- langchain_core/v1/chat_models.py +0 -1047
- langchain_core/v1/messages.py +0 -755
- langchain_core-0.4.0.dev0.dist-info/METADATA +0 -108
- langchain_core-0.4.0.dev0.dist-info/RECORD +0 -177
- langchain_core-0.4.0.dev0.dist-info/entry_points.txt +0 -4
langchain_core/rate_limiters.py
CHANGED
|
@@ -6,7 +6,6 @@ import abc
|
|
|
6
6
|
import asyncio
|
|
7
7
|
import threading
|
|
8
8
|
import time
|
|
9
|
-
from typing import Optional
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
class BaseRateLimiter(abc.ABC):
|
|
@@ -22,11 +21,8 @@ class BaseRateLimiter(abc.ABC):
|
|
|
22
21
|
Current limitations:
|
|
23
22
|
|
|
24
23
|
- Rate limiting information is not surfaced in tracing or callbacks. This means
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
.. versionadded:: 0.2.24
|
|
24
|
+
that the total time it takes to invoke a chat model will encompass both
|
|
25
|
+
the time spent waiting for tokens and the time spent making the request.
|
|
30
26
|
"""
|
|
31
27
|
|
|
32
28
|
@abc.abstractmethod
|
|
@@ -34,18 +30,18 @@ class BaseRateLimiter(abc.ABC):
|
|
|
34
30
|
"""Attempt to acquire the necessary tokens for the rate limiter.
|
|
35
31
|
|
|
36
32
|
This method blocks until the required tokens are available if `blocking`
|
|
37
|
-
is set to True
|
|
33
|
+
is set to `True`.
|
|
38
34
|
|
|
39
|
-
If `blocking` is set to False
|
|
35
|
+
If `blocking` is set to `False`, the method will immediately return the result
|
|
40
36
|
of the attempt to acquire the tokens.
|
|
41
37
|
|
|
42
38
|
Args:
|
|
43
|
-
blocking: If True
|
|
44
|
-
If False
|
|
45
|
-
the attempt.
|
|
39
|
+
blocking: If `True`, the method will block until the tokens are available.
|
|
40
|
+
If `False`, the method will return immediately with the result of
|
|
41
|
+
the attempt.
|
|
46
42
|
|
|
47
43
|
Returns:
|
|
48
|
-
|
|
44
|
+
`True` if the tokens were successfully acquired, `False` otherwise.
|
|
49
45
|
"""
|
|
50
46
|
|
|
51
47
|
@abc.abstractmethod
|
|
@@ -53,18 +49,18 @@ class BaseRateLimiter(abc.ABC):
|
|
|
53
49
|
"""Attempt to acquire the necessary tokens for the rate limiter.
|
|
54
50
|
|
|
55
51
|
This method blocks until the required tokens are available if `blocking`
|
|
56
|
-
is set to True
|
|
52
|
+
is set to `True`.
|
|
57
53
|
|
|
58
|
-
If `blocking` is set to False
|
|
54
|
+
If `blocking` is set to `False`, the method will immediately return the result
|
|
59
55
|
of the attempt to acquire the tokens.
|
|
60
56
|
|
|
61
57
|
Args:
|
|
62
|
-
blocking: If True
|
|
63
|
-
If False
|
|
64
|
-
the attempt.
|
|
58
|
+
blocking: If `True`, the method will block until the tokens are available.
|
|
59
|
+
If `False`, the method will return immediately with the result of
|
|
60
|
+
the attempt.
|
|
65
61
|
|
|
66
62
|
Returns:
|
|
67
|
-
|
|
63
|
+
`True` if the tokens were successfully acquired, `False` otherwise.
|
|
68
64
|
"""
|
|
69
65
|
|
|
70
66
|
|
|
@@ -85,45 +81,40 @@ class InMemoryRateLimiter(BaseRateLimiter):
|
|
|
85
81
|
not enough tokens in the bucket, the request is blocked until there are
|
|
86
82
|
enough tokens.
|
|
87
83
|
|
|
88
|
-
These
|
|
84
|
+
These tokens have nothing to do with LLM tokens. They are just
|
|
89
85
|
a way to keep track of how many requests can be made at a given time.
|
|
90
86
|
|
|
91
87
|
Current limitations:
|
|
92
88
|
|
|
93
89
|
- The rate limiter is not designed to work across different processes. It is
|
|
94
|
-
|
|
90
|
+
an in-memory rate limiter, but it is thread safe.
|
|
95
91
|
- The rate limiter only supports time-based rate limiting. It does not take
|
|
96
|
-
|
|
92
|
+
into account the size of the request or any other factors.
|
|
97
93
|
|
|
98
94
|
Example:
|
|
95
|
+
```python
|
|
96
|
+
import time
|
|
99
97
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
import time
|
|
103
|
-
|
|
104
|
-
from langchain_core.rate_limiters import InMemoryRateLimiter
|
|
105
|
-
|
|
106
|
-
rate_limiter = InMemoryRateLimiter(
|
|
107
|
-
requests_per_second=0.1, # <-- Can only make a request once every 10 seconds!!
|
|
108
|
-
check_every_n_seconds=0.1, # Wake up every 100 ms to check whether allowed to make a request,
|
|
109
|
-
max_bucket_size=10, # Controls the maximum burst size.
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
from langchain_anthropic import ChatAnthropic
|
|
113
|
-
model = ChatAnthropic(
|
|
114
|
-
model_name="claude-3-opus-20240229",
|
|
115
|
-
rate_limiter=rate_limiter
|
|
116
|
-
)
|
|
98
|
+
from langchain_core.rate_limiters import InMemoryRateLimiter
|
|
117
99
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
100
|
+
rate_limiter = InMemoryRateLimiter(
|
|
101
|
+
requests_per_second=0.1, # <-- Can only make a request once every 10 seconds!!
|
|
102
|
+
check_every_n_seconds=0.1, # Wake up every 100 ms to check whether allowed to make a request,
|
|
103
|
+
max_bucket_size=10, # Controls the maximum burst size.
|
|
104
|
+
)
|
|
123
105
|
|
|
106
|
+
from langchain_anthropic import ChatAnthropic
|
|
124
107
|
|
|
125
|
-
|
|
108
|
+
model = ChatAnthropic(
|
|
109
|
+
model_name="claude-sonnet-4-5-20250929", rate_limiter=rate_limiter
|
|
110
|
+
)
|
|
126
111
|
|
|
112
|
+
for _ in range(5):
|
|
113
|
+
tic = time.time()
|
|
114
|
+
model.invoke("hello")
|
|
115
|
+
toc = time.time()
|
|
116
|
+
print(toc - tic)
|
|
117
|
+
```
|
|
127
118
|
""" # noqa: E501
|
|
128
119
|
|
|
129
120
|
def __init__(
|
|
@@ -135,7 +126,7 @@ class InMemoryRateLimiter(BaseRateLimiter):
|
|
|
135
126
|
) -> None:
|
|
136
127
|
"""A rate limiter based on a token bucket.
|
|
137
128
|
|
|
138
|
-
These
|
|
129
|
+
These tokens have nothing to do with LLM tokens. They are just
|
|
139
130
|
a way to keep track of how many requests can be made at a given time.
|
|
140
131
|
|
|
141
132
|
This rate limiter is designed to work in a threaded environment.
|
|
@@ -148,11 +139,11 @@ class InMemoryRateLimiter(BaseRateLimiter):
|
|
|
148
139
|
Args:
|
|
149
140
|
requests_per_second: The number of tokens to add per second to the bucket.
|
|
150
141
|
The tokens represent "credit" that can be used to make requests.
|
|
151
|
-
check_every_n_seconds:
|
|
142
|
+
check_every_n_seconds: Check whether the tokens are available
|
|
152
143
|
every this many seconds. Can be a float to represent
|
|
153
144
|
fractions of a second.
|
|
154
145
|
max_bucket_size: The maximum number of tokens that can be in the bucket.
|
|
155
|
-
Must be at least 1
|
|
146
|
+
Must be at least `1`. Used to prevent bursts of requests.
|
|
156
147
|
"""
|
|
157
148
|
# Number of requests that we can make per second.
|
|
158
149
|
self.requests_per_second = requests_per_second
|
|
@@ -163,7 +154,7 @@ class InMemoryRateLimiter(BaseRateLimiter):
|
|
|
163
154
|
# at a given time.
|
|
164
155
|
self._consume_lock = threading.Lock()
|
|
165
156
|
# The last time we tried to consume tokens.
|
|
166
|
-
self.last:
|
|
157
|
+
self.last: float | None = None
|
|
167
158
|
self.check_every_n_seconds = check_every_n_seconds
|
|
168
159
|
|
|
169
160
|
def _consume(self) -> bool:
|
|
@@ -202,18 +193,18 @@ class InMemoryRateLimiter(BaseRateLimiter):
|
|
|
202
193
|
"""Attempt to acquire a token from the rate limiter.
|
|
203
194
|
|
|
204
195
|
This method blocks until the required tokens are available if `blocking`
|
|
205
|
-
is set to True
|
|
196
|
+
is set to `True`.
|
|
206
197
|
|
|
207
|
-
If `blocking` is set to False
|
|
198
|
+
If `blocking` is set to `False`, the method will immediately return the result
|
|
208
199
|
of the attempt to acquire the tokens.
|
|
209
200
|
|
|
210
201
|
Args:
|
|
211
|
-
blocking: If True
|
|
212
|
-
If False
|
|
213
|
-
the attempt.
|
|
202
|
+
blocking: If `True`, the method will block until the tokens are available.
|
|
203
|
+
If `False`, the method will return immediately with the result of
|
|
204
|
+
the attempt.
|
|
214
205
|
|
|
215
206
|
Returns:
|
|
216
|
-
|
|
207
|
+
`True` if the tokens were successfully acquired, `False` otherwise.
|
|
217
208
|
"""
|
|
218
209
|
if not blocking:
|
|
219
210
|
return self._consume()
|
|
@@ -226,18 +217,18 @@ class InMemoryRateLimiter(BaseRateLimiter):
|
|
|
226
217
|
"""Attempt to acquire a token from the rate limiter. Async version.
|
|
227
218
|
|
|
228
219
|
This method blocks until the required tokens are available if `blocking`
|
|
229
|
-
is set to True
|
|
220
|
+
is set to `True`.
|
|
230
221
|
|
|
231
|
-
If `blocking` is set to False
|
|
222
|
+
If `blocking` is set to `False`, the method will immediately return the result
|
|
232
223
|
of the attempt to acquire the tokens.
|
|
233
224
|
|
|
234
225
|
Args:
|
|
235
|
-
blocking: If True
|
|
236
|
-
If False
|
|
237
|
-
the attempt.
|
|
226
|
+
blocking: If `True`, the method will block until the tokens are available.
|
|
227
|
+
If `False`, the method will return immediately with the result of
|
|
228
|
+
the attempt.
|
|
238
229
|
|
|
239
230
|
Returns:
|
|
240
|
-
|
|
231
|
+
`True` if the tokens were successfully acquired, `False` otherwise.
|
|
241
232
|
"""
|
|
242
233
|
if not blocking:
|
|
243
234
|
return self._consume()
|
langchain_core/retrievers.py
CHANGED
|
@@ -3,34 +3,18 @@
|
|
|
3
3
|
It is more general than a vector store. A retriever does not need to be able to
|
|
4
4
|
store documents, only to return (or retrieve) it. Vector stores can be used as
|
|
5
5
|
the backbone of a retriever, but there are other types of retrievers as well.
|
|
6
|
-
|
|
7
|
-
**Class hierarchy:**
|
|
8
|
-
|
|
9
|
-
.. code-block::
|
|
10
|
-
|
|
11
|
-
BaseRetriever --> <name>Retriever # Examples: ArxivRetriever, MergerRetriever
|
|
12
|
-
|
|
13
|
-
**Main helpers:**
|
|
14
|
-
|
|
15
|
-
.. code-block::
|
|
16
|
-
|
|
17
|
-
RetrieverInput, RetrieverOutput, RetrieverLike, RetrieverOutputLike,
|
|
18
|
-
Document, Serializable, Callbacks,
|
|
19
|
-
CallbackManagerForRetrieverRun, AsyncCallbackManagerForRetrieverRun
|
|
20
6
|
"""
|
|
21
7
|
|
|
22
8
|
from __future__ import annotations
|
|
23
9
|
|
|
24
|
-
import warnings
|
|
25
10
|
from abc import ABC, abstractmethod
|
|
26
11
|
from inspect import signature
|
|
27
|
-
from typing import TYPE_CHECKING, Any
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
28
13
|
|
|
29
14
|
from pydantic import ConfigDict
|
|
30
15
|
from typing_extensions import Self, TypedDict, override
|
|
31
16
|
|
|
32
|
-
from langchain_core.
|
|
33
|
-
from langchain_core.callbacks import Callbacks
|
|
17
|
+
from langchain_core.callbacks.manager import AsyncCallbackManager, CallbackManager
|
|
34
18
|
from langchain_core.documents import Document
|
|
35
19
|
from langchain_core.runnables import (
|
|
36
20
|
Runnable,
|
|
@@ -57,11 +41,11 @@ class LangSmithRetrieverParams(TypedDict, total=False):
|
|
|
57
41
|
|
|
58
42
|
ls_retriever_name: str
|
|
59
43
|
"""Retriever name."""
|
|
60
|
-
ls_vector_store_provider:
|
|
44
|
+
ls_vector_store_provider: str | None
|
|
61
45
|
"""Vector store provider."""
|
|
62
|
-
ls_embedding_provider:
|
|
46
|
+
ls_embedding_provider: str | None
|
|
63
47
|
"""Embedding provider."""
|
|
64
|
-
ls_embedding_model:
|
|
48
|
+
ls_embedding_model: str | None
|
|
65
49
|
"""Embedding model."""
|
|
66
50
|
|
|
67
51
|
|
|
@@ -86,46 +70,46 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|
|
86
70
|
|
|
87
71
|
Example: A retriever that returns the first 5 documents from a list of documents
|
|
88
72
|
|
|
89
|
-
|
|
73
|
+
```python
|
|
74
|
+
from langchain_core.documents import Document
|
|
75
|
+
from langchain_core.retrievers import BaseRetriever
|
|
90
76
|
|
|
91
|
-
|
|
92
|
-
|
|
77
|
+
class SimpleRetriever(BaseRetriever):
|
|
78
|
+
docs: list[Document]
|
|
79
|
+
k: int = 5
|
|
93
80
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
81
|
+
def _get_relevant_documents(self, query: str) -> list[Document]:
|
|
82
|
+
\"\"\"Return the first k documents from the list of documents\"\"\"
|
|
83
|
+
return self.docs[:self.k]
|
|
97
84
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
async def _aget_relevant_documents(self, query: str) -> list[Document]:
|
|
103
|
-
\"\"\"(Optional) async native implementation.\"\"\"
|
|
104
|
-
return self.docs[:self.k]
|
|
85
|
+
async def _aget_relevant_documents(self, query: str) -> list[Document]:
|
|
86
|
+
\"\"\"(Optional) async native implementation.\"\"\"
|
|
87
|
+
return self.docs[:self.k]
|
|
88
|
+
```
|
|
105
89
|
|
|
106
90
|
Example: A simple retriever based on a scikit-learn vectorizer
|
|
107
91
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
from sklearn.metrics.pairwise import cosine_similarity
|
|
92
|
+
```python
|
|
93
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
111
94
|
|
|
112
|
-
class TFIDFRetriever(BaseRetriever, BaseModel):
|
|
113
|
-
vectorizer: Any
|
|
114
|
-
docs: list[Document]
|
|
115
|
-
tfidf_array: Any
|
|
116
|
-
k: int = 4
|
|
117
95
|
|
|
118
|
-
|
|
119
|
-
|
|
96
|
+
class TFIDFRetriever(BaseRetriever, BaseModel):
|
|
97
|
+
vectorizer: Any
|
|
98
|
+
docs: list[Document]
|
|
99
|
+
tfidf_array: Any
|
|
100
|
+
k: int = 4
|
|
120
101
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
query_vec = self.vectorizer.transform([query])
|
|
124
|
-
# Op -- (n_docs,1) -- Cosine Sim with each doc
|
|
125
|
-
results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,))
|
|
126
|
-
return [self.docs[i] for i in results.argsort()[-self.k :][::-1]]
|
|
102
|
+
class Config:
|
|
103
|
+
arbitrary_types_allowed = True
|
|
127
104
|
|
|
128
|
-
|
|
105
|
+
def _get_relevant_documents(self, query: str) -> list[Document]:
|
|
106
|
+
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
|
|
107
|
+
query_vec = self.vectorizer.transform([query])
|
|
108
|
+
# Op -- (n_docs,1) -- Cosine Sim with each doc
|
|
109
|
+
results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,))
|
|
110
|
+
return [self.docs[i] for i in results.argsort()[-self.k :][::-1]]
|
|
111
|
+
```
|
|
112
|
+
"""
|
|
129
113
|
|
|
130
114
|
model_config = ConfigDict(
|
|
131
115
|
arbitrary_types_allowed=True,
|
|
@@ -133,15 +117,15 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|
|
133
117
|
|
|
134
118
|
_new_arg_supported: bool = False
|
|
135
119
|
_expects_other_args: bool = False
|
|
136
|
-
tags:
|
|
137
|
-
"""Optional list of tags associated with the retriever.
|
|
120
|
+
tags: list[str] | None = None
|
|
121
|
+
"""Optional list of tags associated with the retriever.
|
|
138
122
|
These tags will be associated with each call to this retriever,
|
|
139
123
|
and passed as arguments to the handlers defined in `callbacks`.
|
|
140
124
|
You can use these to eg identify a specific instance of a retriever with its
|
|
141
125
|
use case.
|
|
142
126
|
"""
|
|
143
|
-
metadata:
|
|
144
|
-
"""Optional metadata associated with the retriever.
|
|
127
|
+
metadata: dict[str, Any] | None = None
|
|
128
|
+
"""Optional metadata associated with the retriever.
|
|
145
129
|
This metadata will be associated with each call to this retriever,
|
|
146
130
|
and passed as arguments to the handlers defined in `callbacks`.
|
|
147
131
|
You can use these to eg identify a specific instance of a retriever with its
|
|
@@ -151,35 +135,6 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|
|
151
135
|
@override
|
|
152
136
|
def __init_subclass__(cls, **kwargs: Any) -> None:
|
|
153
137
|
super().__init_subclass__(**kwargs)
|
|
154
|
-
# Version upgrade for old retrievers that implemented the public
|
|
155
|
-
# methods directly.
|
|
156
|
-
if cls.get_relevant_documents != BaseRetriever.get_relevant_documents:
|
|
157
|
-
warnings.warn(
|
|
158
|
-
"Retrievers must implement abstract `_get_relevant_documents` method"
|
|
159
|
-
" instead of `get_relevant_documents`",
|
|
160
|
-
DeprecationWarning,
|
|
161
|
-
stacklevel=4,
|
|
162
|
-
)
|
|
163
|
-
swap = cls.get_relevant_documents
|
|
164
|
-
cls.get_relevant_documents = ( # type: ignore[method-assign]
|
|
165
|
-
BaseRetriever.get_relevant_documents
|
|
166
|
-
)
|
|
167
|
-
cls._get_relevant_documents = swap # type: ignore[method-assign]
|
|
168
|
-
if (
|
|
169
|
-
hasattr(cls, "aget_relevant_documents")
|
|
170
|
-
and cls.aget_relevant_documents != BaseRetriever.aget_relevant_documents
|
|
171
|
-
):
|
|
172
|
-
warnings.warn(
|
|
173
|
-
"Retrievers must implement abstract `_aget_relevant_documents` method"
|
|
174
|
-
" instead of `aget_relevant_documents`",
|
|
175
|
-
DeprecationWarning,
|
|
176
|
-
stacklevel=4,
|
|
177
|
-
)
|
|
178
|
-
aswap = cls.aget_relevant_documents
|
|
179
|
-
cls.aget_relevant_documents = ( # type: ignore[method-assign]
|
|
180
|
-
BaseRetriever.aget_relevant_documents
|
|
181
|
-
)
|
|
182
|
-
cls._aget_relevant_documents = aswap # type: ignore[method-assign]
|
|
183
138
|
parameters = signature(cls._get_relevant_documents).parameters
|
|
184
139
|
cls._new_arg_supported = parameters.get("run_manager") is not None
|
|
185
140
|
if (
|
|
@@ -212,7 +167,7 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|
|
212
167
|
|
|
213
168
|
@override
|
|
214
169
|
def invoke(
|
|
215
|
-
self, input: str, config:
|
|
170
|
+
self, input: str, config: RunnableConfig | None = None, **kwargs: Any
|
|
216
171
|
) -> list[Document]:
|
|
217
172
|
"""Invoke the retriever to get relevant documents.
|
|
218
173
|
|
|
@@ -220,21 +175,17 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|
|
220
175
|
|
|
221
176
|
Args:
|
|
222
177
|
input: The query string.
|
|
223
|
-
config: Configuration for the retriever.
|
|
224
|
-
kwargs: Additional arguments to pass to the retriever.
|
|
178
|
+
config: Configuration for the retriever.
|
|
179
|
+
**kwargs: Additional arguments to pass to the retriever.
|
|
225
180
|
|
|
226
181
|
Returns:
|
|
227
182
|
List of relevant documents.
|
|
228
183
|
|
|
229
184
|
Examples:
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
retriever.invoke("query")
|
|
234
|
-
|
|
185
|
+
```python
|
|
186
|
+
retriever.invoke("query")
|
|
187
|
+
```
|
|
235
188
|
"""
|
|
236
|
-
from langchain_core.callbacks.manager import CallbackManager
|
|
237
|
-
|
|
238
189
|
config = ensure_config(config)
|
|
239
190
|
inheritable_metadata = {
|
|
240
191
|
**(config.get("metadata") or {}),
|
|
@@ -276,7 +227,7 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|
|
276
227
|
async def ainvoke(
|
|
277
228
|
self,
|
|
278
229
|
input: str,
|
|
279
|
-
config:
|
|
230
|
+
config: RunnableConfig | None = None,
|
|
280
231
|
**kwargs: Any,
|
|
281
232
|
) -> list[Document]:
|
|
282
233
|
"""Asynchronously invoke the retriever to get relevant documents.
|
|
@@ -285,21 +236,17 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|
|
285
236
|
|
|
286
237
|
Args:
|
|
287
238
|
input: The query string.
|
|
288
|
-
config: Configuration for the retriever.
|
|
289
|
-
kwargs: Additional arguments to pass to the retriever.
|
|
239
|
+
config: Configuration for the retriever.
|
|
240
|
+
**kwargs: Additional arguments to pass to the retriever.
|
|
290
241
|
|
|
291
242
|
Returns:
|
|
292
243
|
List of relevant documents.
|
|
293
244
|
|
|
294
245
|
Examples:
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
await retriever.ainvoke("query")
|
|
299
|
-
|
|
246
|
+
```python
|
|
247
|
+
await retriever.ainvoke("query")
|
|
248
|
+
```
|
|
300
249
|
"""
|
|
301
|
-
from langchain_core.callbacks.manager import AsyncCallbackManager
|
|
302
|
-
|
|
303
250
|
config = ensure_config(config)
|
|
304
251
|
inheritable_metadata = {
|
|
305
252
|
**(config.get("metadata") or {}),
|
|
@@ -359,6 +306,7 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|
|
359
306
|
Args:
|
|
360
307
|
query: String to find relevant documents for
|
|
361
308
|
run_manager: The callback handler to use
|
|
309
|
+
|
|
362
310
|
Returns:
|
|
363
311
|
List of relevant documents
|
|
364
312
|
"""
|
|
@@ -368,91 +316,3 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|
|
368
316
|
query,
|
|
369
317
|
run_manager=run_manager.get_sync(),
|
|
370
318
|
)
|
|
371
|
-
|
|
372
|
-
@deprecated(since="0.1.46", alternative="invoke", removal="1.0")
|
|
373
|
-
def get_relevant_documents(
|
|
374
|
-
self,
|
|
375
|
-
query: str,
|
|
376
|
-
*,
|
|
377
|
-
callbacks: Callbacks = None,
|
|
378
|
-
tags: Optional[list[str]] = None,
|
|
379
|
-
metadata: Optional[dict[str, Any]] = None,
|
|
380
|
-
run_name: Optional[str] = None,
|
|
381
|
-
**kwargs: Any,
|
|
382
|
-
) -> list[Document]:
|
|
383
|
-
"""Retrieve documents relevant to a query.
|
|
384
|
-
|
|
385
|
-
Users should favor using `.invoke` or `.batch` rather than
|
|
386
|
-
`get_relevant_documents directly`.
|
|
387
|
-
|
|
388
|
-
Args:
|
|
389
|
-
query: string to find relevant documents for.
|
|
390
|
-
callbacks: Callback manager or list of callbacks. Defaults to None.
|
|
391
|
-
tags: Optional list of tags associated with the retriever.
|
|
392
|
-
These tags will be associated with each call to this retriever,
|
|
393
|
-
and passed as arguments to the handlers defined in `callbacks`.
|
|
394
|
-
Defaults to None.
|
|
395
|
-
metadata: Optional metadata associated with the retriever.
|
|
396
|
-
This metadata will be associated with each call to this retriever,
|
|
397
|
-
and passed as arguments to the handlers defined in `callbacks`.
|
|
398
|
-
Defaults to None.
|
|
399
|
-
run_name: Optional name for the run. Defaults to None.
|
|
400
|
-
kwargs: Additional arguments to pass to the retriever.
|
|
401
|
-
|
|
402
|
-
Returns:
|
|
403
|
-
List of relevant documents.
|
|
404
|
-
"""
|
|
405
|
-
config: RunnableConfig = {}
|
|
406
|
-
if callbacks:
|
|
407
|
-
config["callbacks"] = callbacks
|
|
408
|
-
if tags:
|
|
409
|
-
config["tags"] = tags
|
|
410
|
-
if metadata:
|
|
411
|
-
config["metadata"] = metadata
|
|
412
|
-
if run_name:
|
|
413
|
-
config["run_name"] = run_name
|
|
414
|
-
return self.invoke(query, config, **kwargs)
|
|
415
|
-
|
|
416
|
-
@deprecated(since="0.1.46", alternative="ainvoke", removal="1.0")
|
|
417
|
-
async def aget_relevant_documents(
|
|
418
|
-
self,
|
|
419
|
-
query: str,
|
|
420
|
-
*,
|
|
421
|
-
callbacks: Callbacks = None,
|
|
422
|
-
tags: Optional[list[str]] = None,
|
|
423
|
-
metadata: Optional[dict[str, Any]] = None,
|
|
424
|
-
run_name: Optional[str] = None,
|
|
425
|
-
**kwargs: Any,
|
|
426
|
-
) -> list[Document]:
|
|
427
|
-
"""Asynchronously get documents relevant to a query.
|
|
428
|
-
|
|
429
|
-
Users should favor using `.ainvoke` or `.abatch` rather than
|
|
430
|
-
`aget_relevant_documents directly`.
|
|
431
|
-
|
|
432
|
-
Args:
|
|
433
|
-
query: string to find relevant documents for.
|
|
434
|
-
callbacks: Callback manager or list of callbacks.
|
|
435
|
-
tags: Optional list of tags associated with the retriever.
|
|
436
|
-
These tags will be associated with each call to this retriever,
|
|
437
|
-
and passed as arguments to the handlers defined in `callbacks`.
|
|
438
|
-
Defaults to None.
|
|
439
|
-
metadata: Optional metadata associated with the retriever.
|
|
440
|
-
This metadata will be associated with each call to this retriever,
|
|
441
|
-
and passed as arguments to the handlers defined in `callbacks`.
|
|
442
|
-
Defaults to None.
|
|
443
|
-
run_name: Optional name for the run. Defaults to None.
|
|
444
|
-
kwargs: Additional arguments to pass to the retriever.
|
|
445
|
-
|
|
446
|
-
Returns:
|
|
447
|
-
List of relevant documents.
|
|
448
|
-
"""
|
|
449
|
-
config: RunnableConfig = {}
|
|
450
|
-
if callbacks:
|
|
451
|
-
config["callbacks"] = callbacks
|
|
452
|
-
if tags:
|
|
453
|
-
config["tags"] = tags
|
|
454
|
-
if metadata:
|
|
455
|
-
config["metadata"] = metadata
|
|
456
|
-
if run_name:
|
|
457
|
-
config["run_name"] = run_name
|
|
458
|
-
return await self.ainvoke(query, config, **kwargs)
|