langchain-core 0.3.79__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/__init__.py +1 -1
- langchain_core/_api/__init__.py +3 -4
- langchain_core/_api/beta_decorator.py +23 -26
- langchain_core/_api/deprecation.py +52 -65
- langchain_core/_api/path.py +3 -6
- langchain_core/_import_utils.py +3 -4
- langchain_core/agents.py +19 -19
- langchain_core/caches.py +53 -63
- langchain_core/callbacks/__init__.py +1 -8
- langchain_core/callbacks/base.py +323 -334
- langchain_core/callbacks/file.py +44 -44
- langchain_core/callbacks/manager.py +441 -507
- langchain_core/callbacks/stdout.py +29 -30
- langchain_core/callbacks/streaming_stdout.py +32 -32
- langchain_core/callbacks/usage.py +60 -57
- langchain_core/chat_history.py +48 -63
- langchain_core/document_loaders/base.py +23 -23
- langchain_core/document_loaders/langsmith.py +37 -37
- langchain_core/documents/__init__.py +0 -1
- langchain_core/documents/base.py +62 -65
- langchain_core/documents/compressor.py +4 -4
- langchain_core/documents/transformers.py +28 -29
- langchain_core/embeddings/fake.py +50 -54
- langchain_core/example_selectors/length_based.py +1 -1
- langchain_core/example_selectors/semantic_similarity.py +21 -25
- langchain_core/exceptions.py +10 -11
- langchain_core/globals.py +3 -151
- langchain_core/indexing/api.py +61 -66
- langchain_core/indexing/base.py +58 -58
- langchain_core/indexing/in_memory.py +3 -3
- langchain_core/language_models/__init__.py +14 -27
- langchain_core/language_models/_utils.py +270 -84
- langchain_core/language_models/base.py +55 -162
- langchain_core/language_models/chat_models.py +442 -402
- langchain_core/language_models/fake.py +11 -11
- langchain_core/language_models/fake_chat_models.py +61 -39
- langchain_core/language_models/llms.py +123 -231
- langchain_core/load/dump.py +4 -5
- langchain_core/load/load.py +18 -28
- langchain_core/load/mapping.py +2 -4
- langchain_core/load/serializable.py +39 -40
- langchain_core/messages/__init__.py +61 -22
- langchain_core/messages/ai.py +368 -163
- langchain_core/messages/base.py +214 -43
- langchain_core/messages/block_translators/__init__.py +111 -0
- langchain_core/messages/block_translators/anthropic.py +470 -0
- langchain_core/messages/block_translators/bedrock.py +94 -0
- langchain_core/messages/block_translators/bedrock_converse.py +297 -0
- langchain_core/messages/block_translators/google_genai.py +530 -0
- langchain_core/messages/block_translators/google_vertexai.py +21 -0
- langchain_core/messages/block_translators/groq.py +143 -0
- langchain_core/messages/block_translators/langchain_v0.py +301 -0
- langchain_core/messages/block_translators/openai.py +1010 -0
- langchain_core/messages/chat.py +2 -6
- langchain_core/messages/content.py +1423 -0
- langchain_core/messages/function.py +6 -10
- langchain_core/messages/human.py +41 -38
- langchain_core/messages/modifier.py +2 -2
- langchain_core/messages/system.py +38 -28
- langchain_core/messages/tool.py +96 -103
- langchain_core/messages/utils.py +478 -504
- langchain_core/output_parsers/__init__.py +1 -14
- langchain_core/output_parsers/base.py +58 -61
- langchain_core/output_parsers/json.py +7 -8
- langchain_core/output_parsers/list.py +5 -7
- langchain_core/output_parsers/openai_functions.py +49 -47
- langchain_core/output_parsers/openai_tools.py +14 -19
- langchain_core/output_parsers/pydantic.py +12 -13
- langchain_core/output_parsers/string.py +2 -2
- langchain_core/output_parsers/transform.py +15 -17
- langchain_core/output_parsers/xml.py +8 -10
- langchain_core/outputs/__init__.py +1 -1
- langchain_core/outputs/chat_generation.py +18 -18
- langchain_core/outputs/chat_result.py +1 -3
- langchain_core/outputs/generation.py +8 -8
- langchain_core/outputs/llm_result.py +10 -10
- langchain_core/prompt_values.py +12 -12
- langchain_core/prompts/__init__.py +3 -27
- langchain_core/prompts/base.py +45 -55
- langchain_core/prompts/chat.py +254 -313
- langchain_core/prompts/dict.py +5 -5
- langchain_core/prompts/few_shot.py +81 -88
- langchain_core/prompts/few_shot_with_templates.py +11 -13
- langchain_core/prompts/image.py +12 -14
- langchain_core/prompts/loading.py +6 -8
- langchain_core/prompts/message.py +3 -3
- langchain_core/prompts/prompt.py +24 -39
- langchain_core/prompts/string.py +4 -4
- langchain_core/prompts/structured.py +42 -50
- langchain_core/rate_limiters.py +51 -60
- langchain_core/retrievers.py +49 -190
- langchain_core/runnables/base.py +1484 -1709
- langchain_core/runnables/branch.py +45 -61
- langchain_core/runnables/config.py +80 -88
- langchain_core/runnables/configurable.py +117 -134
- langchain_core/runnables/fallbacks.py +83 -79
- langchain_core/runnables/graph.py +85 -95
- langchain_core/runnables/graph_ascii.py +27 -28
- langchain_core/runnables/graph_mermaid.py +38 -50
- langchain_core/runnables/graph_png.py +15 -16
- langchain_core/runnables/history.py +135 -148
- langchain_core/runnables/passthrough.py +124 -150
- langchain_core/runnables/retry.py +46 -51
- langchain_core/runnables/router.py +25 -30
- langchain_core/runnables/schema.py +79 -74
- langchain_core/runnables/utils.py +62 -68
- langchain_core/stores.py +81 -115
- langchain_core/structured_query.py +8 -8
- langchain_core/sys_info.py +27 -29
- langchain_core/tools/__init__.py +1 -14
- langchain_core/tools/base.py +179 -187
- langchain_core/tools/convert.py +131 -139
- langchain_core/tools/render.py +10 -10
- langchain_core/tools/retriever.py +11 -11
- langchain_core/tools/simple.py +19 -24
- langchain_core/tools/structured.py +30 -39
- langchain_core/tracers/__init__.py +1 -9
- langchain_core/tracers/base.py +97 -99
- langchain_core/tracers/context.py +29 -52
- langchain_core/tracers/core.py +50 -60
- langchain_core/tracers/evaluation.py +11 -11
- langchain_core/tracers/event_stream.py +115 -70
- langchain_core/tracers/langchain.py +21 -21
- langchain_core/tracers/log_stream.py +43 -43
- langchain_core/tracers/memory_stream.py +3 -3
- langchain_core/tracers/root_listeners.py +16 -16
- langchain_core/tracers/run_collector.py +2 -4
- langchain_core/tracers/schemas.py +0 -129
- langchain_core/tracers/stdout.py +3 -3
- langchain_core/utils/__init__.py +1 -4
- langchain_core/utils/_merge.py +46 -8
- langchain_core/utils/aiter.py +57 -61
- langchain_core/utils/env.py +9 -9
- langchain_core/utils/function_calling.py +89 -191
- langchain_core/utils/html.py +7 -8
- langchain_core/utils/input.py +6 -6
- langchain_core/utils/interactive_env.py +1 -1
- langchain_core/utils/iter.py +37 -42
- langchain_core/utils/json.py +4 -3
- langchain_core/utils/json_schema.py +8 -8
- langchain_core/utils/mustache.py +9 -11
- langchain_core/utils/pydantic.py +33 -35
- langchain_core/utils/strings.py +5 -5
- langchain_core/utils/usage.py +1 -1
- langchain_core/utils/utils.py +80 -54
- langchain_core/vectorstores/base.py +129 -164
- langchain_core/vectorstores/in_memory.py +99 -174
- langchain_core/vectorstores/utils.py +5 -5
- langchain_core/version.py +1 -1
- {langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/METADATA +28 -27
- langchain_core-1.0.0.dist-info/RECORD +172 -0
- {langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
- langchain_core/beta/__init__.py +0 -1
- langchain_core/beta/runnables/__init__.py +0 -1
- langchain_core/beta/runnables/context.py +0 -447
- langchain_core/memory.py +0 -120
- langchain_core/messages/content_blocks.py +0 -176
- langchain_core/prompts/pipeline.py +0 -138
- langchain_core/pydantic_v1/__init__.py +0 -30
- langchain_core/pydantic_v1/dataclasses.py +0 -23
- langchain_core/pydantic_v1/main.py +0 -23
- langchain_core/tracers/langchain_v1.py +0 -31
- langchain_core/utils/loading.py +0 -35
- langchain_core-0.3.79.dist-info/RECORD +0 -174
- langchain_core-0.3.79.dist-info/entry_points.txt +0 -4
|
@@ -3,21 +3,7 @@
|
|
|
3
3
|
One of the most common ways to store and search over unstructured data is to
|
|
4
4
|
embed it and store the resulting embedding vectors, and then query the store
|
|
5
5
|
and retrieve the data that are 'most similar' to the embedded query.
|
|
6
|
-
|
|
7
|
-
**Class hierarchy:**
|
|
8
|
-
|
|
9
|
-
.. code-block::
|
|
10
|
-
|
|
11
|
-
VectorStore --> <name> # Examples: Annoy, FAISS, Milvus
|
|
12
|
-
|
|
13
|
-
BaseRetriever --> VectorStoreRetriever --> <name>Retriever # Example: VespaRetriever
|
|
14
|
-
|
|
15
|
-
**Main helpers:**
|
|
16
|
-
|
|
17
|
-
.. code-block::
|
|
18
|
-
|
|
19
|
-
Embeddings, Document
|
|
20
|
-
""" # noqa: E501
|
|
6
|
+
"""
|
|
21
7
|
|
|
22
8
|
from __future__ import annotations
|
|
23
9
|
|
|
@@ -25,13 +11,12 @@ import logging
|
|
|
25
11
|
import math
|
|
26
12
|
import warnings
|
|
27
13
|
from abc import ABC, abstractmethod
|
|
14
|
+
from collections.abc import Callable
|
|
28
15
|
from itertools import cycle
|
|
29
16
|
from typing import (
|
|
30
17
|
TYPE_CHECKING,
|
|
31
18
|
Any,
|
|
32
|
-
Callable,
|
|
33
19
|
ClassVar,
|
|
34
|
-
Optional,
|
|
35
20
|
TypeVar,
|
|
36
21
|
)
|
|
37
22
|
|
|
@@ -62,15 +47,15 @@ class VectorStore(ABC):
|
|
|
62
47
|
def add_texts(
|
|
63
48
|
self,
|
|
64
49
|
texts: Iterable[str],
|
|
65
|
-
metadatas:
|
|
50
|
+
metadatas: list[dict] | None = None,
|
|
66
51
|
*,
|
|
67
|
-
ids:
|
|
52
|
+
ids: list[str] | None = None,
|
|
68
53
|
**kwargs: Any,
|
|
69
54
|
) -> list[str]:
|
|
70
|
-
"""Run more texts through the embeddings and add to the
|
|
55
|
+
"""Run more texts through the embeddings and add to the `VectorStore`.
|
|
71
56
|
|
|
72
57
|
Args:
|
|
73
|
-
texts: Iterable of strings to add to the
|
|
58
|
+
texts: Iterable of strings to add to the `VectorStore`.
|
|
74
59
|
metadatas: Optional list of metadatas associated with the texts.
|
|
75
60
|
ids: Optional list of IDs associated with the texts.
|
|
76
61
|
**kwargs: vectorstore specific parameters.
|
|
@@ -78,7 +63,7 @@ class VectorStore(ABC):
|
|
|
78
63
|
associated with the texts.
|
|
79
64
|
|
|
80
65
|
Returns:
|
|
81
|
-
List of ids from adding the texts into the
|
|
66
|
+
List of ids from adding the texts into the `VectorStore`.
|
|
82
67
|
|
|
83
68
|
Raises:
|
|
84
69
|
ValueError: If the number of metadatas does not match the number of texts.
|
|
@@ -98,10 +83,10 @@ class VectorStore(ABC):
|
|
|
98
83
|
)
|
|
99
84
|
raise ValueError(msg)
|
|
100
85
|
metadatas_ = iter(metadatas) if metadatas else cycle([{}])
|
|
101
|
-
ids_: Iterator[
|
|
86
|
+
ids_: Iterator[str | None] = iter(ids) if ids else cycle([None])
|
|
102
87
|
docs = [
|
|
103
88
|
Document(id=id_, page_content=text, metadata=metadata_)
|
|
104
|
-
for text, metadata_, id_ in zip(texts, metadatas_, ids_)
|
|
89
|
+
for text, metadata_, id_ in zip(texts, metadatas_, ids_, strict=False)
|
|
105
90
|
]
|
|
106
91
|
if ids is not None:
|
|
107
92
|
# For backward compatibility
|
|
@@ -112,7 +97,7 @@ class VectorStore(ABC):
|
|
|
112
97
|
raise NotImplementedError(msg)
|
|
113
98
|
|
|
114
99
|
@property
|
|
115
|
-
def embeddings(self) ->
|
|
100
|
+
def embeddings(self) -> Embeddings | None:
|
|
116
101
|
"""Access the query embedding object if available."""
|
|
117
102
|
logger.debug(
|
|
118
103
|
"The embeddings property has not been implemented for %s",
|
|
@@ -120,16 +105,15 @@ class VectorStore(ABC):
|
|
|
120
105
|
)
|
|
121
106
|
return None
|
|
122
107
|
|
|
123
|
-
def delete(self, ids:
|
|
108
|
+
def delete(self, ids: list[str] | None = None, **kwargs: Any) -> bool | None:
|
|
124
109
|
"""Delete by vector ID or other criteria.
|
|
125
110
|
|
|
126
111
|
Args:
|
|
127
|
-
ids: List of ids to delete. If None
|
|
112
|
+
ids: List of ids to delete. If `None`, delete all.
|
|
128
113
|
**kwargs: Other keyword arguments that subclasses might use.
|
|
129
114
|
|
|
130
115
|
Returns:
|
|
131
|
-
|
|
132
|
-
False otherwise, None if not implemented.
|
|
116
|
+
True if deletion is successful, False otherwise, None if not implemented.
|
|
133
117
|
"""
|
|
134
118
|
msg = "delete method must be implemented by subclass."
|
|
135
119
|
raise NotImplementedError(msg)
|
|
@@ -156,7 +140,7 @@ class VectorStore(ABC):
|
|
|
156
140
|
Returns:
|
|
157
141
|
List of Documents.
|
|
158
142
|
|
|
159
|
-
|
|
143
|
+
!!! version-added "Added in version 0.2.11"
|
|
160
144
|
"""
|
|
161
145
|
msg = f"{self.__class__.__name__} does not yet support get_by_ids."
|
|
162
146
|
raise NotImplementedError(msg)
|
|
@@ -184,44 +168,40 @@ class VectorStore(ABC):
|
|
|
184
168
|
Returns:
|
|
185
169
|
List of Documents.
|
|
186
170
|
|
|
187
|
-
|
|
171
|
+
!!! version-added "Added in version 0.2.11"
|
|
188
172
|
"""
|
|
189
173
|
return await run_in_executor(None, self.get_by_ids, ids)
|
|
190
174
|
|
|
191
|
-
async def adelete(
|
|
192
|
-
self, ids: Optional[list[str]] = None, **kwargs: Any
|
|
193
|
-
) -> Optional[bool]:
|
|
175
|
+
async def adelete(self, ids: list[str] | None = None, **kwargs: Any) -> bool | None:
|
|
194
176
|
"""Async delete by vector ID or other criteria.
|
|
195
177
|
|
|
196
178
|
Args:
|
|
197
|
-
ids: List of ids to delete. If None
|
|
179
|
+
ids: List of ids to delete. If `None`, delete all.
|
|
198
180
|
**kwargs: Other keyword arguments that subclasses might use.
|
|
199
181
|
|
|
200
182
|
Returns:
|
|
201
|
-
|
|
202
|
-
False otherwise, None if not implemented.
|
|
183
|
+
True if deletion is successful, False otherwise, None if not implemented.
|
|
203
184
|
"""
|
|
204
185
|
return await run_in_executor(None, self.delete, ids, **kwargs)
|
|
205
186
|
|
|
206
187
|
async def aadd_texts(
|
|
207
188
|
self,
|
|
208
189
|
texts: Iterable[str],
|
|
209
|
-
metadatas:
|
|
190
|
+
metadatas: list[dict] | None = None,
|
|
210
191
|
*,
|
|
211
|
-
ids:
|
|
192
|
+
ids: list[str] | None = None,
|
|
212
193
|
**kwargs: Any,
|
|
213
194
|
) -> list[str]:
|
|
214
|
-
"""Async run more texts through the embeddings and add to the
|
|
195
|
+
"""Async run more texts through the embeddings and add to the `VectorStore`.
|
|
215
196
|
|
|
216
197
|
Args:
|
|
217
|
-
texts: Iterable of strings to add to the
|
|
198
|
+
texts: Iterable of strings to add to the `VectorStore`.
|
|
218
199
|
metadatas: Optional list of metadatas associated with the texts.
|
|
219
|
-
Default is None.
|
|
220
200
|
ids: Optional list
|
|
221
201
|
**kwargs: vectorstore specific parameters.
|
|
222
202
|
|
|
223
203
|
Returns:
|
|
224
|
-
List of ids from adding the texts into the
|
|
204
|
+
List of ids from adding the texts into the `VectorStore`.
|
|
225
205
|
|
|
226
206
|
Raises:
|
|
227
207
|
ValueError: If the number of metadatas does not match the number of texts.
|
|
@@ -244,11 +224,11 @@ class VectorStore(ABC):
|
|
|
244
224
|
)
|
|
245
225
|
raise ValueError(msg)
|
|
246
226
|
metadatas_ = iter(metadatas) if metadatas else cycle([{}])
|
|
247
|
-
ids_: Iterator[
|
|
227
|
+
ids_: Iterator[str | None] = iter(ids) if ids else cycle([None])
|
|
248
228
|
|
|
249
229
|
docs = [
|
|
250
230
|
Document(id=id_, page_content=text, metadata=metadata_)
|
|
251
|
-
for text, metadata_, id_ in zip(texts, metadatas_, ids_)
|
|
231
|
+
for text, metadata_, id_ in zip(texts, metadatas_, ids_, strict=False)
|
|
252
232
|
]
|
|
253
233
|
return await self.aadd_documents(docs, **kwargs)
|
|
254
234
|
return await run_in_executor(None, self.add_texts, texts, metadatas, **kwargs)
|
|
@@ -257,8 +237,8 @@ class VectorStore(ABC):
|
|
|
257
237
|
"""Add or update documents in the vectorstore.
|
|
258
238
|
|
|
259
239
|
Args:
|
|
260
|
-
documents: Documents to add to the
|
|
261
|
-
kwargs: Additional keyword arguments.
|
|
240
|
+
documents: Documents to add to the `VectorStore`.
|
|
241
|
+
**kwargs: Additional keyword arguments.
|
|
262
242
|
if kwargs contains ids and documents contain ids,
|
|
263
243
|
the ids in the kwargs will receive precedence.
|
|
264
244
|
|
|
@@ -286,11 +266,11 @@ class VectorStore(ABC):
|
|
|
286
266
|
async def aadd_documents(
|
|
287
267
|
self, documents: list[Document], **kwargs: Any
|
|
288
268
|
) -> list[str]:
|
|
289
|
-
"""Async run more documents through the embeddings and add to the
|
|
269
|
+
"""Async run more documents through the embeddings and add to the `VectorStore`.
|
|
290
270
|
|
|
291
271
|
Args:
|
|
292
|
-
documents: Documents to add to the
|
|
293
|
-
kwargs: Additional keyword arguments.
|
|
272
|
+
documents: Documents to add to the `VectorStore`.
|
|
273
|
+
**kwargs: Additional keyword arguments.
|
|
294
274
|
|
|
295
275
|
Returns:
|
|
296
276
|
List of IDs of the added texts.
|
|
@@ -321,7 +301,7 @@ class VectorStore(ABC):
|
|
|
321
301
|
**kwargs: Arguments to pass to the search method.
|
|
322
302
|
|
|
323
303
|
Returns:
|
|
324
|
-
List of
|
|
304
|
+
List of `Document` objects most similar to the query.
|
|
325
305
|
|
|
326
306
|
Raises:
|
|
327
307
|
ValueError: If search_type is not one of "similarity",
|
|
@@ -355,7 +335,7 @@ class VectorStore(ABC):
|
|
|
355
335
|
**kwargs: Arguments to pass to the search method.
|
|
356
336
|
|
|
357
337
|
Returns:
|
|
358
|
-
List of
|
|
338
|
+
List of `Document` objects most similar to the query.
|
|
359
339
|
|
|
360
340
|
Raises:
|
|
361
341
|
ValueError: If search_type is not one of "similarity",
|
|
@@ -384,11 +364,11 @@ class VectorStore(ABC):
|
|
|
384
364
|
|
|
385
365
|
Args:
|
|
386
366
|
query: Input text.
|
|
387
|
-
k: Number of Documents to return.
|
|
367
|
+
k: Number of Documents to return.
|
|
388
368
|
**kwargs: Arguments to pass to the search method.
|
|
389
369
|
|
|
390
370
|
Returns:
|
|
391
|
-
List of
|
|
371
|
+
List of `Document` objects most similar to the query.
|
|
392
372
|
"""
|
|
393
373
|
|
|
394
374
|
@staticmethod
|
|
@@ -443,7 +423,7 @@ class VectorStore(ABC):
|
|
|
443
423
|
**kwargs: Arguments to pass to the search method.
|
|
444
424
|
|
|
445
425
|
Returns:
|
|
446
|
-
List of Tuples of (doc, similarity_score)
|
|
426
|
+
List of Tuples of `(doc, similarity_score)`.
|
|
447
427
|
"""
|
|
448
428
|
raise NotImplementedError
|
|
449
429
|
|
|
@@ -457,7 +437,7 @@ class VectorStore(ABC):
|
|
|
457
437
|
**kwargs: Arguments to pass to the search method.
|
|
458
438
|
|
|
459
439
|
Returns:
|
|
460
|
-
List of Tuples of (doc, similarity_score)
|
|
440
|
+
List of Tuples of `(doc, similarity_score)`.
|
|
461
441
|
"""
|
|
462
442
|
# This is a temporary workaround to make the similarity search
|
|
463
443
|
# asynchronous. The proper solution is to make the similarity search
|
|
@@ -475,19 +455,19 @@ class VectorStore(ABC):
|
|
|
475
455
|
"""Default similarity search with relevance scores.
|
|
476
456
|
|
|
477
457
|
Modify if necessary in subclass.
|
|
478
|
-
Return docs and relevance scores in the range [0, 1]
|
|
458
|
+
Return docs and relevance scores in the range `[0, 1]`.
|
|
479
459
|
|
|
480
|
-
0 is dissimilar, 1 is most similar.
|
|
460
|
+
`0` is dissimilar, `1` is most similar.
|
|
481
461
|
|
|
482
462
|
Args:
|
|
483
463
|
query: Input text.
|
|
484
|
-
k: Number of Documents to return.
|
|
485
|
-
**kwargs: kwargs to be passed to similarity search. Should include
|
|
486
|
-
score_threshold
|
|
487
|
-
|
|
464
|
+
k: Number of Documents to return.
|
|
465
|
+
**kwargs: kwargs to be passed to similarity search. Should include
|
|
466
|
+
`score_threshold`, An optional floating point value between `0` to `1`
|
|
467
|
+
to filter the resulting set of retrieved docs
|
|
488
468
|
|
|
489
469
|
Returns:
|
|
490
|
-
List of Tuples of (doc, similarity_score)
|
|
470
|
+
List of Tuples of `(doc, similarity_score)`
|
|
491
471
|
"""
|
|
492
472
|
relevance_score_fn = self._select_relevance_score_fn()
|
|
493
473
|
docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
|
|
@@ -502,19 +482,19 @@ class VectorStore(ABC):
|
|
|
502
482
|
"""Default similarity search with relevance scores.
|
|
503
483
|
|
|
504
484
|
Modify if necessary in subclass.
|
|
505
|
-
Return docs and relevance scores in the range [0, 1]
|
|
485
|
+
Return docs and relevance scores in the range `[0, 1]`.
|
|
506
486
|
|
|
507
|
-
0 is dissimilar, 1 is most similar.
|
|
487
|
+
`0` is dissimilar, `1` is most similar.
|
|
508
488
|
|
|
509
489
|
Args:
|
|
510
490
|
query: Input text.
|
|
511
|
-
k: Number of Documents to return.
|
|
512
|
-
**kwargs: kwargs to be passed to similarity search. Should include
|
|
513
|
-
score_threshold
|
|
514
|
-
|
|
491
|
+
k: Number of Documents to return.
|
|
492
|
+
**kwargs: kwargs to be passed to similarity search. Should include
|
|
493
|
+
`score_threshold`, An optional floating point value between `0` to `1`
|
|
494
|
+
to filter the resulting set of retrieved docs
|
|
515
495
|
|
|
516
496
|
Returns:
|
|
517
|
-
List of Tuples of (doc, similarity_score)
|
|
497
|
+
List of Tuples of `(doc, similarity_score)`
|
|
518
498
|
"""
|
|
519
499
|
relevance_score_fn = self._select_relevance_score_fn()
|
|
520
500
|
docs_and_scores = await self.asimilarity_search_with_score(query, k, **kwargs)
|
|
@@ -526,19 +506,19 @@ class VectorStore(ABC):
|
|
|
526
506
|
k: int = 4,
|
|
527
507
|
**kwargs: Any,
|
|
528
508
|
) -> list[tuple[Document, float]]:
|
|
529
|
-
"""Return docs and relevance scores in the range [0, 1]
|
|
509
|
+
"""Return docs and relevance scores in the range `[0, 1]`.
|
|
530
510
|
|
|
531
|
-
0 is dissimilar, 1 is most similar.
|
|
511
|
+
`0` is dissimilar, `1` is most similar.
|
|
532
512
|
|
|
533
513
|
Args:
|
|
534
514
|
query: Input text.
|
|
535
|
-
k: Number of Documents to return.
|
|
536
|
-
**kwargs: kwargs to be passed to similarity search. Should include
|
|
537
|
-
score_threshold
|
|
538
|
-
|
|
515
|
+
k: Number of Documents to return.
|
|
516
|
+
**kwargs: kwargs to be passed to similarity search. Should include
|
|
517
|
+
`score_threshold`, An optional floating point value between `0` to `1`
|
|
518
|
+
to filter the resulting set of retrieved docs
|
|
539
519
|
|
|
540
520
|
Returns:
|
|
541
|
-
List of Tuples of (doc, similarity_score)
|
|
521
|
+
List of Tuples of `(doc, similarity_score)`.
|
|
542
522
|
"""
|
|
543
523
|
score_threshold = kwargs.pop("score_threshold", None)
|
|
544
524
|
|
|
@@ -575,19 +555,19 @@ class VectorStore(ABC):
|
|
|
575
555
|
k: int = 4,
|
|
576
556
|
**kwargs: Any,
|
|
577
557
|
) -> list[tuple[Document, float]]:
|
|
578
|
-
"""Async return docs and relevance scores in the range [0, 1]
|
|
558
|
+
"""Async return docs and relevance scores in the range `[0, 1]`.
|
|
579
559
|
|
|
580
|
-
0 is dissimilar, 1 is most similar.
|
|
560
|
+
`0` is dissimilar, `1` is most similar.
|
|
581
561
|
|
|
582
562
|
Args:
|
|
583
563
|
query: Input text.
|
|
584
|
-
k: Number of Documents to return.
|
|
585
|
-
**kwargs: kwargs to be passed to similarity search. Should include
|
|
586
|
-
score_threshold
|
|
587
|
-
|
|
564
|
+
k: Number of Documents to return.
|
|
565
|
+
**kwargs: kwargs to be passed to similarity search. Should include
|
|
566
|
+
`score_threshold`, An optional floating point value between `0` to `1`
|
|
567
|
+
to filter the resulting set of retrieved docs
|
|
588
568
|
|
|
589
569
|
Returns:
|
|
590
|
-
List of Tuples of (doc, similarity_score)
|
|
570
|
+
List of Tuples of `(doc, similarity_score)`
|
|
591
571
|
"""
|
|
592
572
|
score_threshold = kwargs.pop("score_threshold", None)
|
|
593
573
|
|
|
@@ -625,11 +605,11 @@ class VectorStore(ABC):
|
|
|
625
605
|
|
|
626
606
|
Args:
|
|
627
607
|
query: Input text.
|
|
628
|
-
k: Number of Documents to return.
|
|
608
|
+
k: Number of Documents to return.
|
|
629
609
|
**kwargs: Arguments to pass to the search method.
|
|
630
610
|
|
|
631
611
|
Returns:
|
|
632
|
-
List of
|
|
612
|
+
List of `Document` objects most similar to the query.
|
|
633
613
|
"""
|
|
634
614
|
# This is a temporary workaround to make the similarity search
|
|
635
615
|
# asynchronous. The proper solution is to make the similarity search
|
|
@@ -643,11 +623,11 @@ class VectorStore(ABC):
|
|
|
643
623
|
|
|
644
624
|
Args:
|
|
645
625
|
embedding: Embedding to look up documents similar to.
|
|
646
|
-
k: Number of Documents to return.
|
|
626
|
+
k: Number of Documents to return.
|
|
647
627
|
**kwargs: Arguments to pass to the search method.
|
|
648
628
|
|
|
649
629
|
Returns:
|
|
650
|
-
List of
|
|
630
|
+
List of `Document` objects most similar to the query vector.
|
|
651
631
|
"""
|
|
652
632
|
raise NotImplementedError
|
|
653
633
|
|
|
@@ -658,11 +638,11 @@ class VectorStore(ABC):
|
|
|
658
638
|
|
|
659
639
|
Args:
|
|
660
640
|
embedding: Embedding to look up documents similar to.
|
|
661
|
-
k: Number of Documents to return.
|
|
641
|
+
k: Number of Documents to return.
|
|
662
642
|
**kwargs: Arguments to pass to the search method.
|
|
663
643
|
|
|
664
644
|
Returns:
|
|
665
|
-
List of
|
|
645
|
+
List of `Document` objects most similar to the query vector.
|
|
666
646
|
"""
|
|
667
647
|
# This is a temporary workaround to make the similarity search
|
|
668
648
|
# asynchronous. The proper solution is to make the similarity search
|
|
@@ -686,17 +666,15 @@ class VectorStore(ABC):
|
|
|
686
666
|
|
|
687
667
|
Args:
|
|
688
668
|
query: Text to look up documents similar to.
|
|
689
|
-
k: Number of Documents to return.
|
|
669
|
+
k: Number of Documents to return.
|
|
690
670
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
|
691
|
-
Default is 20.
|
|
692
671
|
lambda_mult: Number between 0 and 1 that determines the degree
|
|
693
672
|
of diversity among the results with 0 corresponding
|
|
694
673
|
to maximum diversity and 1 to minimum diversity.
|
|
695
|
-
Defaults to 0.5.
|
|
696
674
|
**kwargs: Arguments to pass to the search method.
|
|
697
675
|
|
|
698
676
|
Returns:
|
|
699
|
-
List of
|
|
677
|
+
List of `Document` objects selected by maximal marginal relevance.
|
|
700
678
|
"""
|
|
701
679
|
raise NotImplementedError
|
|
702
680
|
|
|
@@ -715,17 +693,15 @@ class VectorStore(ABC):
|
|
|
715
693
|
|
|
716
694
|
Args:
|
|
717
695
|
query: Text to look up documents similar to.
|
|
718
|
-
k: Number of Documents to return.
|
|
696
|
+
k: Number of Documents to return.
|
|
719
697
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
|
720
|
-
Default is 20.
|
|
721
698
|
lambda_mult: Number between 0 and 1 that determines the degree
|
|
722
699
|
of diversity among the results with 0 corresponding
|
|
723
700
|
to maximum diversity and 1 to minimum diversity.
|
|
724
|
-
Defaults to 0.5.
|
|
725
701
|
**kwargs: Arguments to pass to the search method.
|
|
726
702
|
|
|
727
703
|
Returns:
|
|
728
|
-
List of
|
|
704
|
+
List of `Document` objects selected by maximal marginal relevance.
|
|
729
705
|
"""
|
|
730
706
|
# This is a temporary workaround to make the similarity search
|
|
731
707
|
# asynchronous. The proper solution is to make the similarity search
|
|
@@ -755,17 +731,15 @@ class VectorStore(ABC):
|
|
|
755
731
|
|
|
756
732
|
Args:
|
|
757
733
|
embedding: Embedding to look up documents similar to.
|
|
758
|
-
k: Number of Documents to return.
|
|
734
|
+
k: Number of Documents to return.
|
|
759
735
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
|
760
|
-
Default is 20.
|
|
761
736
|
lambda_mult: Number between 0 and 1 that determines the degree
|
|
762
737
|
of diversity among the results with 0 corresponding
|
|
763
738
|
to maximum diversity and 1 to minimum diversity.
|
|
764
|
-
Defaults to 0.5.
|
|
765
739
|
**kwargs: Arguments to pass to the search method.
|
|
766
740
|
|
|
767
741
|
Returns:
|
|
768
|
-
List of
|
|
742
|
+
List of `Document` objects selected by maximal marginal relevance.
|
|
769
743
|
"""
|
|
770
744
|
raise NotImplementedError
|
|
771
745
|
|
|
@@ -784,17 +758,15 @@ class VectorStore(ABC):
|
|
|
784
758
|
|
|
785
759
|
Args:
|
|
786
760
|
embedding: Embedding to look up documents similar to.
|
|
787
|
-
k: Number of Documents to return.
|
|
761
|
+
k: Number of Documents to return.
|
|
788
762
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
|
789
|
-
Default is 20.
|
|
790
763
|
lambda_mult: Number between 0 and 1 that determines the degree
|
|
791
764
|
of diversity among the results with 0 corresponding
|
|
792
765
|
to maximum diversity and 1 to minimum diversity.
|
|
793
|
-
Defaults to 0.5.
|
|
794
766
|
**kwargs: Arguments to pass to the search method.
|
|
795
767
|
|
|
796
768
|
Returns:
|
|
797
|
-
List of
|
|
769
|
+
List of `Document` objects selected by maximal marginal relevance.
|
|
798
770
|
"""
|
|
799
771
|
return await run_in_executor(
|
|
800
772
|
None,
|
|
@@ -813,15 +785,15 @@ class VectorStore(ABC):
|
|
|
813
785
|
embedding: Embeddings,
|
|
814
786
|
**kwargs: Any,
|
|
815
787
|
) -> Self:
|
|
816
|
-
"""Return VectorStore initialized from documents and embeddings.
|
|
788
|
+
"""Return `VectorStore` initialized from documents and embeddings.
|
|
817
789
|
|
|
818
790
|
Args:
|
|
819
|
-
documents: List of
|
|
791
|
+
documents: List of `Document` objects to add to the `VectorStore`.
|
|
820
792
|
embedding: Embedding function to use.
|
|
821
|
-
kwargs: Additional keyword arguments.
|
|
793
|
+
**kwargs: Additional keyword arguments.
|
|
822
794
|
|
|
823
795
|
Returns:
|
|
824
|
-
VectorStore
|
|
796
|
+
`VectorStore` initialized from documents and embeddings.
|
|
825
797
|
"""
|
|
826
798
|
texts = [d.page_content for d in documents]
|
|
827
799
|
metadatas = [d.metadata for d in documents]
|
|
@@ -843,15 +815,15 @@ class VectorStore(ABC):
|
|
|
843
815
|
embedding: Embeddings,
|
|
844
816
|
**kwargs: Any,
|
|
845
817
|
) -> Self:
|
|
846
|
-
"""Async return VectorStore initialized from documents and embeddings.
|
|
818
|
+
"""Async return `VectorStore` initialized from documents and embeddings.
|
|
847
819
|
|
|
848
820
|
Args:
|
|
849
|
-
documents: List of
|
|
821
|
+
documents: List of `Document` objects to add to the `VectorStore`.
|
|
850
822
|
embedding: Embedding function to use.
|
|
851
|
-
kwargs: Additional keyword arguments.
|
|
823
|
+
**kwargs: Additional keyword arguments.
|
|
852
824
|
|
|
853
825
|
Returns:
|
|
854
|
-
VectorStore
|
|
826
|
+
`VectorStore` initialized from documents and embeddings.
|
|
855
827
|
"""
|
|
856
828
|
texts = [d.page_content for d in documents]
|
|
857
829
|
metadatas = [d.metadata for d in documents]
|
|
@@ -872,23 +844,22 @@ class VectorStore(ABC):
|
|
|
872
844
|
cls: type[VST],
|
|
873
845
|
texts: list[str],
|
|
874
846
|
embedding: Embeddings,
|
|
875
|
-
metadatas:
|
|
847
|
+
metadatas: list[dict] | None = None,
|
|
876
848
|
*,
|
|
877
|
-
ids:
|
|
849
|
+
ids: list[str] | None = None,
|
|
878
850
|
**kwargs: Any,
|
|
879
851
|
) -> VST:
|
|
880
852
|
"""Return VectorStore initialized from texts and embeddings.
|
|
881
853
|
|
|
882
854
|
Args:
|
|
883
|
-
texts: Texts to add to the
|
|
855
|
+
texts: Texts to add to the `VectorStore`.
|
|
884
856
|
embedding: Embedding function to use.
|
|
885
857
|
metadatas: Optional list of metadatas associated with the texts.
|
|
886
|
-
Default is None.
|
|
887
858
|
ids: Optional list of IDs associated with the texts.
|
|
888
|
-
kwargs: Additional keyword arguments.
|
|
859
|
+
**kwargs: Additional keyword arguments.
|
|
889
860
|
|
|
890
861
|
Returns:
|
|
891
|
-
VectorStore
|
|
862
|
+
VectorStore initialized from texts and embeddings.
|
|
892
863
|
"""
|
|
893
864
|
|
|
894
865
|
@classmethod
|
|
@@ -896,23 +867,22 @@ class VectorStore(ABC):
|
|
|
896
867
|
cls,
|
|
897
868
|
texts: list[str],
|
|
898
869
|
embedding: Embeddings,
|
|
899
|
-
metadatas:
|
|
870
|
+
metadatas: list[dict] | None = None,
|
|
900
871
|
*,
|
|
901
|
-
ids:
|
|
872
|
+
ids: list[str] | None = None,
|
|
902
873
|
**kwargs: Any,
|
|
903
874
|
) -> Self:
|
|
904
875
|
"""Async return VectorStore initialized from texts and embeddings.
|
|
905
876
|
|
|
906
877
|
Args:
|
|
907
|
-
texts: Texts to add to the
|
|
878
|
+
texts: Texts to add to the `VectorStore`.
|
|
908
879
|
embedding: Embedding function to use.
|
|
909
880
|
metadatas: Optional list of metadatas associated with the texts.
|
|
910
|
-
Default is None.
|
|
911
881
|
ids: Optional list of IDs associated with the texts.
|
|
912
|
-
kwargs: Additional keyword arguments.
|
|
882
|
+
**kwargs: Additional keyword arguments.
|
|
913
883
|
|
|
914
884
|
Returns:
|
|
915
|
-
VectorStore
|
|
885
|
+
VectorStore initialized from texts and embeddings.
|
|
916
886
|
"""
|
|
917
887
|
if ids is not None:
|
|
918
888
|
kwargs["ids"] = ids
|
|
@@ -928,17 +898,16 @@ class VectorStore(ABC):
|
|
|
928
898
|
return tags
|
|
929
899
|
|
|
930
900
|
def as_retriever(self, **kwargs: Any) -> VectorStoreRetriever:
|
|
931
|
-
"""Return VectorStoreRetriever initialized from this VectorStore
|
|
901
|
+
"""Return `VectorStoreRetriever` initialized from this `VectorStore`.
|
|
932
902
|
|
|
933
903
|
Args:
|
|
934
904
|
**kwargs: Keyword arguments to pass to the search function.
|
|
935
905
|
Can include:
|
|
936
|
-
search_type
|
|
937
|
-
|
|
938
|
-
Can be "similarity" (default), "mmr", or
|
|
906
|
+
search_type: Defines the type of search that the Retriever should
|
|
907
|
+
perform. Can be "similarity" (default), "mmr", or
|
|
939
908
|
"similarity_score_threshold".
|
|
940
|
-
search_kwargs
|
|
941
|
-
|
|
909
|
+
search_kwargs: Keyword arguments to pass to the search function. Can
|
|
910
|
+
include things like:
|
|
942
911
|
k: Amount of documents to return (Default: 4)
|
|
943
912
|
score_threshold: Minimum relevance threshold
|
|
944
913
|
for similarity_score_threshold
|
|
@@ -949,39 +918,35 @@ class VectorStore(ABC):
|
|
|
949
918
|
filter: Filter by document metadata
|
|
950
919
|
|
|
951
920
|
Returns:
|
|
952
|
-
|
|
921
|
+
Retriever class for `VectorStore`.
|
|
953
922
|
|
|
954
923
|
Examples:
|
|
924
|
+
```python
|
|
925
|
+
# Retrieve more documents with higher diversity
|
|
926
|
+
# Useful if your dataset has many similar documents
|
|
927
|
+
docsearch.as_retriever(
|
|
928
|
+
search_type="mmr", search_kwargs={"k": 6, "lambda_mult": 0.25}
|
|
929
|
+
)
|
|
955
930
|
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
# Useful if your dataset has many similar documents
|
|
960
|
-
docsearch.as_retriever(
|
|
961
|
-
search_type="mmr", search_kwargs={"k": 6, "lambda_mult": 0.25}
|
|
962
|
-
)
|
|
963
|
-
|
|
964
|
-
# Fetch more documents for the MMR algorithm to consider
|
|
965
|
-
# But only return the top 5
|
|
966
|
-
docsearch.as_retriever(
|
|
967
|
-
search_type="mmr", search_kwargs={"k": 5, "fetch_k": 50}
|
|
968
|
-
)
|
|
969
|
-
|
|
970
|
-
# Only retrieve documents that have a relevance score
|
|
971
|
-
# Above a certain threshold
|
|
972
|
-
docsearch.as_retriever(
|
|
973
|
-
search_type="similarity_score_threshold",
|
|
974
|
-
search_kwargs={"score_threshold": 0.8},
|
|
975
|
-
)
|
|
931
|
+
# Fetch more documents for the MMR algorithm to consider
|
|
932
|
+
# But only return the top 5
|
|
933
|
+
docsearch.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 50})
|
|
976
934
|
|
|
977
|
-
|
|
978
|
-
|
|
935
|
+
# Only retrieve documents that have a relevance score
|
|
936
|
+
# Above a certain threshold
|
|
937
|
+
docsearch.as_retriever(
|
|
938
|
+
search_type="similarity_score_threshold",
|
|
939
|
+
search_kwargs={"score_threshold": 0.8},
|
|
940
|
+
)
|
|
979
941
|
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
search_kwargs={"filter": {"paper_title": "GPT-4 Technical Report"}}
|
|
983
|
-
)
|
|
942
|
+
# Only get the single most similar document from the dataset
|
|
943
|
+
docsearch.as_retriever(search_kwargs={"k": 1})
|
|
984
944
|
|
|
945
|
+
# Use a filter to only retrieve documents from a specific paper
|
|
946
|
+
docsearch.as_retriever(
|
|
947
|
+
search_kwargs={"filter": {"paper_title": "GPT-4 Technical Report"}}
|
|
948
|
+
)
|
|
949
|
+
```
|
|
985
950
|
"""
|
|
986
951
|
tags = kwargs.pop("tags", None) or [*self._get_retriever_tags()]
|
|
987
952
|
return VectorStoreRetriever(vectorstore=self, tags=tags, **kwargs)
|
|
@@ -1015,7 +980,7 @@ class VectorStoreRetriever(BaseRetriever):
|
|
|
1015
980
|
values: Values to validate.
|
|
1016
981
|
|
|
1017
982
|
Returns:
|
|
1018
|
-
|
|
983
|
+
Validated values.
|
|
1019
984
|
|
|
1020
985
|
Raises:
|
|
1021
986
|
ValueError: If search_type is not one of the allowed search types.
|
|
@@ -1107,10 +1072,10 @@ class VectorStoreRetriever(BaseRetriever):
|
|
|
1107
1072
|
return docs
|
|
1108
1073
|
|
|
1109
1074
|
def add_documents(self, documents: list[Document], **kwargs: Any) -> list[str]:
|
|
1110
|
-
"""Add documents to the
|
|
1075
|
+
"""Add documents to the `VectorStore`.
|
|
1111
1076
|
|
|
1112
1077
|
Args:
|
|
1113
|
-
documents: Documents to add to the
|
|
1078
|
+
documents: Documents to add to the `VectorStore`.
|
|
1114
1079
|
**kwargs: Other keyword arguments that subclasses might use.
|
|
1115
1080
|
|
|
1116
1081
|
Returns:
|
|
@@ -1121,10 +1086,10 @@ class VectorStoreRetriever(BaseRetriever):
|
|
|
1121
1086
|
async def aadd_documents(
|
|
1122
1087
|
self, documents: list[Document], **kwargs: Any
|
|
1123
1088
|
) -> list[str]:
|
|
1124
|
-
"""Async add documents to the
|
|
1089
|
+
"""Async add documents to the `VectorStore`.
|
|
1125
1090
|
|
|
1126
1091
|
Args:
|
|
1127
|
-
documents: Documents to add to the
|
|
1092
|
+
documents: Documents to add to the `VectorStore`.
|
|
1128
1093
|
**kwargs: Other keyword arguments that subclasses might use.
|
|
1129
1094
|
|
|
1130
1095
|
Returns:
|