langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (172) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +3 -4
  3. langchain_core/_api/beta_decorator.py +45 -70
  4. langchain_core/_api/deprecation.py +80 -80
  5. langchain_core/_api/path.py +22 -8
  6. langchain_core/_import_utils.py +10 -4
  7. langchain_core/agents.py +25 -21
  8. langchain_core/caches.py +53 -63
  9. langchain_core/callbacks/__init__.py +1 -8
  10. langchain_core/callbacks/base.py +341 -348
  11. langchain_core/callbacks/file.py +55 -44
  12. langchain_core/callbacks/manager.py +546 -683
  13. langchain_core/callbacks/stdout.py +29 -30
  14. langchain_core/callbacks/streaming_stdout.py +35 -36
  15. langchain_core/callbacks/usage.py +65 -70
  16. langchain_core/chat_history.py +48 -55
  17. langchain_core/document_loaders/base.py +46 -21
  18. langchain_core/document_loaders/langsmith.py +39 -36
  19. langchain_core/documents/__init__.py +0 -1
  20. langchain_core/documents/base.py +96 -74
  21. langchain_core/documents/compressor.py +12 -9
  22. langchain_core/documents/transformers.py +29 -28
  23. langchain_core/embeddings/fake.py +56 -57
  24. langchain_core/env.py +2 -3
  25. langchain_core/example_selectors/base.py +12 -0
  26. langchain_core/example_selectors/length_based.py +1 -1
  27. langchain_core/example_selectors/semantic_similarity.py +21 -25
  28. langchain_core/exceptions.py +15 -9
  29. langchain_core/globals.py +4 -163
  30. langchain_core/indexing/api.py +132 -125
  31. langchain_core/indexing/base.py +64 -67
  32. langchain_core/indexing/in_memory.py +26 -6
  33. langchain_core/language_models/__init__.py +15 -27
  34. langchain_core/language_models/_utils.py +267 -117
  35. langchain_core/language_models/base.py +92 -177
  36. langchain_core/language_models/chat_models.py +547 -407
  37. langchain_core/language_models/fake.py +11 -11
  38. langchain_core/language_models/fake_chat_models.py +72 -118
  39. langchain_core/language_models/llms.py +168 -242
  40. langchain_core/load/dump.py +8 -11
  41. langchain_core/load/load.py +32 -28
  42. langchain_core/load/mapping.py +2 -4
  43. langchain_core/load/serializable.py +50 -56
  44. langchain_core/messages/__init__.py +36 -51
  45. langchain_core/messages/ai.py +377 -150
  46. langchain_core/messages/base.py +239 -47
  47. langchain_core/messages/block_translators/__init__.py +111 -0
  48. langchain_core/messages/block_translators/anthropic.py +470 -0
  49. langchain_core/messages/block_translators/bedrock.py +94 -0
  50. langchain_core/messages/block_translators/bedrock_converse.py +297 -0
  51. langchain_core/messages/block_translators/google_genai.py +530 -0
  52. langchain_core/messages/block_translators/google_vertexai.py +21 -0
  53. langchain_core/messages/block_translators/groq.py +143 -0
  54. langchain_core/messages/block_translators/langchain_v0.py +301 -0
  55. langchain_core/messages/block_translators/openai.py +1010 -0
  56. langchain_core/messages/chat.py +2 -3
  57. langchain_core/messages/content.py +1423 -0
  58. langchain_core/messages/function.py +7 -7
  59. langchain_core/messages/human.py +44 -38
  60. langchain_core/messages/modifier.py +3 -2
  61. langchain_core/messages/system.py +40 -27
  62. langchain_core/messages/tool.py +160 -58
  63. langchain_core/messages/utils.py +527 -638
  64. langchain_core/output_parsers/__init__.py +1 -14
  65. langchain_core/output_parsers/base.py +68 -104
  66. langchain_core/output_parsers/json.py +13 -17
  67. langchain_core/output_parsers/list.py +11 -33
  68. langchain_core/output_parsers/openai_functions.py +56 -74
  69. langchain_core/output_parsers/openai_tools.py +68 -109
  70. langchain_core/output_parsers/pydantic.py +15 -13
  71. langchain_core/output_parsers/string.py +6 -2
  72. langchain_core/output_parsers/transform.py +17 -60
  73. langchain_core/output_parsers/xml.py +34 -44
  74. langchain_core/outputs/__init__.py +1 -1
  75. langchain_core/outputs/chat_generation.py +26 -11
  76. langchain_core/outputs/chat_result.py +1 -3
  77. langchain_core/outputs/generation.py +17 -6
  78. langchain_core/outputs/llm_result.py +15 -8
  79. langchain_core/prompt_values.py +29 -123
  80. langchain_core/prompts/__init__.py +3 -27
  81. langchain_core/prompts/base.py +48 -63
  82. langchain_core/prompts/chat.py +259 -288
  83. langchain_core/prompts/dict.py +19 -11
  84. langchain_core/prompts/few_shot.py +84 -90
  85. langchain_core/prompts/few_shot_with_templates.py +14 -12
  86. langchain_core/prompts/image.py +19 -14
  87. langchain_core/prompts/loading.py +6 -8
  88. langchain_core/prompts/message.py +7 -8
  89. langchain_core/prompts/prompt.py +42 -43
  90. langchain_core/prompts/string.py +37 -16
  91. langchain_core/prompts/structured.py +43 -46
  92. langchain_core/rate_limiters.py +51 -60
  93. langchain_core/retrievers.py +52 -192
  94. langchain_core/runnables/base.py +1727 -1683
  95. langchain_core/runnables/branch.py +52 -73
  96. langchain_core/runnables/config.py +89 -103
  97. langchain_core/runnables/configurable.py +128 -130
  98. langchain_core/runnables/fallbacks.py +93 -82
  99. langchain_core/runnables/graph.py +127 -127
  100. langchain_core/runnables/graph_ascii.py +63 -41
  101. langchain_core/runnables/graph_mermaid.py +87 -70
  102. langchain_core/runnables/graph_png.py +31 -36
  103. langchain_core/runnables/history.py +145 -161
  104. langchain_core/runnables/passthrough.py +141 -144
  105. langchain_core/runnables/retry.py +84 -68
  106. langchain_core/runnables/router.py +33 -37
  107. langchain_core/runnables/schema.py +79 -72
  108. langchain_core/runnables/utils.py +95 -139
  109. langchain_core/stores.py +85 -131
  110. langchain_core/structured_query.py +11 -15
  111. langchain_core/sys_info.py +31 -32
  112. langchain_core/tools/__init__.py +1 -14
  113. langchain_core/tools/base.py +221 -247
  114. langchain_core/tools/convert.py +144 -161
  115. langchain_core/tools/render.py +10 -10
  116. langchain_core/tools/retriever.py +12 -19
  117. langchain_core/tools/simple.py +52 -29
  118. langchain_core/tools/structured.py +56 -60
  119. langchain_core/tracers/__init__.py +1 -9
  120. langchain_core/tracers/_streaming.py +6 -7
  121. langchain_core/tracers/base.py +103 -112
  122. langchain_core/tracers/context.py +29 -48
  123. langchain_core/tracers/core.py +142 -105
  124. langchain_core/tracers/evaluation.py +30 -34
  125. langchain_core/tracers/event_stream.py +162 -117
  126. langchain_core/tracers/langchain.py +34 -36
  127. langchain_core/tracers/log_stream.py +87 -49
  128. langchain_core/tracers/memory_stream.py +3 -3
  129. langchain_core/tracers/root_listeners.py +18 -34
  130. langchain_core/tracers/run_collector.py +8 -20
  131. langchain_core/tracers/schemas.py +0 -125
  132. langchain_core/tracers/stdout.py +3 -3
  133. langchain_core/utils/__init__.py +1 -4
  134. langchain_core/utils/_merge.py +47 -9
  135. langchain_core/utils/aiter.py +70 -66
  136. langchain_core/utils/env.py +12 -9
  137. langchain_core/utils/function_calling.py +139 -206
  138. langchain_core/utils/html.py +7 -8
  139. langchain_core/utils/input.py +6 -6
  140. langchain_core/utils/interactive_env.py +6 -2
  141. langchain_core/utils/iter.py +48 -45
  142. langchain_core/utils/json.py +14 -4
  143. langchain_core/utils/json_schema.py +159 -43
  144. langchain_core/utils/mustache.py +32 -25
  145. langchain_core/utils/pydantic.py +67 -40
  146. langchain_core/utils/strings.py +5 -5
  147. langchain_core/utils/usage.py +1 -1
  148. langchain_core/utils/utils.py +104 -62
  149. langchain_core/vectorstores/base.py +131 -179
  150. langchain_core/vectorstores/in_memory.py +113 -182
  151. langchain_core/vectorstores/utils.py +23 -17
  152. langchain_core/version.py +1 -1
  153. langchain_core-1.0.0.dist-info/METADATA +68 -0
  154. langchain_core-1.0.0.dist-info/RECORD +172 -0
  155. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
  156. langchain_core/beta/__init__.py +0 -1
  157. langchain_core/beta/runnables/__init__.py +0 -1
  158. langchain_core/beta/runnables/context.py +0 -448
  159. langchain_core/memory.py +0 -116
  160. langchain_core/messages/content_blocks.py +0 -1435
  161. langchain_core/prompts/pipeline.py +0 -133
  162. langchain_core/pydantic_v1/__init__.py +0 -30
  163. langchain_core/pydantic_v1/dataclasses.py +0 -23
  164. langchain_core/pydantic_v1/main.py +0 -23
  165. langchain_core/tracers/langchain_v1.py +0 -23
  166. langchain_core/utils/loading.py +0 -31
  167. langchain_core/v1/__init__.py +0 -1
  168. langchain_core/v1/chat_models.py +0 -1047
  169. langchain_core/v1/messages.py +0 -755
  170. langchain_core-0.4.0.dev0.dist-info/METADATA +0 -108
  171. langchain_core-0.4.0.dev0.dist-info/RECORD +0 -177
  172. langchain_core-0.4.0.dev0.dist-info/entry_points.txt +0 -4
@@ -3,21 +3,7 @@
3
3
  One of the most common ways to store and search over unstructured data is to
4
4
  embed it and store the resulting embedding vectors, and then query the store
5
5
  and retrieve the data that are 'most similar' to the embedded query.
6
-
7
- **Class hierarchy:**
8
-
9
- .. code-block::
10
-
11
- VectorStore --> <name> # Examples: Annoy, FAISS, Milvus
12
-
13
- BaseRetriever --> VectorStoreRetriever --> <name>Retriever # Example: VespaRetriever
14
-
15
- **Main helpers:**
16
-
17
- .. code-block::
18
-
19
- Embeddings, Document
20
- """ # noqa: E501
6
+ """
21
7
 
22
8
  from __future__ import annotations
23
9
 
@@ -25,19 +11,19 @@ import logging
25
11
  import math
26
12
  import warnings
27
13
  from abc import ABC, abstractmethod
14
+ from collections.abc import Callable
28
15
  from itertools import cycle
29
16
  from typing import (
30
17
  TYPE_CHECKING,
31
18
  Any,
32
- Callable,
33
19
  ClassVar,
34
- Optional,
35
20
  TypeVar,
36
21
  )
37
22
 
38
23
  from pydantic import ConfigDict, Field, model_validator
39
24
  from typing_extensions import Self, override
40
25
 
26
+ from langchain_core.documents import Document
41
27
  from langchain_core.embeddings import Embeddings
42
28
  from langchain_core.retrievers import BaseRetriever, LangSmithRetrieverParams
43
29
  from langchain_core.runnables.config import run_in_executor
@@ -49,7 +35,6 @@ if TYPE_CHECKING:
49
35
  AsyncCallbackManagerForRetrieverRun,
50
36
  CallbackManagerForRetrieverRun,
51
37
  )
52
- from langchain_core.documents import Document
53
38
 
54
39
  logger = logging.getLogger(__name__)
55
40
 
@@ -62,15 +47,15 @@ class VectorStore(ABC):
62
47
  def add_texts(
63
48
  self,
64
49
  texts: Iterable[str],
65
- metadatas: Optional[list[dict]] = None,
50
+ metadatas: list[dict] | None = None,
66
51
  *,
67
- ids: Optional[list[str]] = None,
52
+ ids: list[str] | None = None,
68
53
  **kwargs: Any,
69
54
  ) -> list[str]:
70
- """Run more texts through the embeddings and add to the vectorstore.
55
+ """Run more texts through the embeddings and add to the `VectorStore`.
71
56
 
72
57
  Args:
73
- texts: Iterable of strings to add to the vectorstore.
58
+ texts: Iterable of strings to add to the `VectorStore`.
74
59
  metadatas: Optional list of metadatas associated with the texts.
75
60
  ids: Optional list of IDs associated with the texts.
76
61
  **kwargs: vectorstore specific parameters.
@@ -78,16 +63,13 @@ class VectorStore(ABC):
78
63
  associated with the texts.
79
64
 
80
65
  Returns:
81
- List of ids from adding the texts into the vectorstore.
66
+ List of ids from adding the texts into the `VectorStore`.
82
67
 
83
68
  Raises:
84
69
  ValueError: If the number of metadatas does not match the number of texts.
85
70
  ValueError: If the number of ids does not match the number of texts.
86
71
  """
87
72
  if type(self).add_documents != VectorStore.add_documents:
88
- # Import document in local scope to avoid circular imports
89
- from langchain_core.documents import Document
90
-
91
73
  # This condition is triggered if the subclass has provided
92
74
  # an implementation of the upsert method.
93
75
  # The existing add_texts
@@ -101,10 +83,10 @@ class VectorStore(ABC):
101
83
  )
102
84
  raise ValueError(msg)
103
85
  metadatas_ = iter(metadatas) if metadatas else cycle([{}])
104
- ids_: Iterator[Optional[str]] = iter(ids) if ids else cycle([None])
86
+ ids_: Iterator[str | None] = iter(ids) if ids else cycle([None])
105
87
  docs = [
106
88
  Document(id=id_, page_content=text, metadata=metadata_)
107
- for text, metadata_, id_ in zip(texts, metadatas_, ids_)
89
+ for text, metadata_, id_ in zip(texts, metadatas_, ids_, strict=False)
108
90
  ]
109
91
  if ids is not None:
110
92
  # For backward compatibility
@@ -115,7 +97,7 @@ class VectorStore(ABC):
115
97
  raise NotImplementedError(msg)
116
98
 
117
99
  @property
118
- def embeddings(self) -> Optional[Embeddings]:
100
+ def embeddings(self) -> Embeddings | None:
119
101
  """Access the query embedding object if available."""
120
102
  logger.debug(
121
103
  "The embeddings property has not been implemented for %s",
@@ -123,16 +105,15 @@ class VectorStore(ABC):
123
105
  )
124
106
  return None
125
107
 
126
- def delete(self, ids: Optional[list[str]] = None, **kwargs: Any) -> Optional[bool]:
108
+ def delete(self, ids: list[str] | None = None, **kwargs: Any) -> bool | None:
127
109
  """Delete by vector ID or other criteria.
128
110
 
129
111
  Args:
130
- ids: List of ids to delete. If None, delete all. Default is None.
112
+ ids: List of ids to delete. If `None`, delete all.
131
113
  **kwargs: Other keyword arguments that subclasses might use.
132
114
 
133
115
  Returns:
134
- Optional[bool]: True if deletion is successful,
135
- False otherwise, None if not implemented.
116
+ True if deletion is successful, False otherwise, None if not implemented.
136
117
  """
137
118
  msg = "delete method must be implemented by subclass."
138
119
  raise NotImplementedError(msg)
@@ -159,7 +140,7 @@ class VectorStore(ABC):
159
140
  Returns:
160
141
  List of Documents.
161
142
 
162
- .. versionadded:: 0.2.11
143
+ !!! version-added "Added in version 0.2.11"
163
144
  """
164
145
  msg = f"{self.__class__.__name__} does not yet support get_by_ids."
165
146
  raise NotImplementedError(msg)
@@ -187,44 +168,40 @@ class VectorStore(ABC):
187
168
  Returns:
188
169
  List of Documents.
189
170
 
190
- .. versionadded:: 0.2.11
171
+ !!! version-added "Added in version 0.2.11"
191
172
  """
192
173
  return await run_in_executor(None, self.get_by_ids, ids)
193
174
 
194
- async def adelete(
195
- self, ids: Optional[list[str]] = None, **kwargs: Any
196
- ) -> Optional[bool]:
175
+ async def adelete(self, ids: list[str] | None = None, **kwargs: Any) -> bool | None:
197
176
  """Async delete by vector ID or other criteria.
198
177
 
199
178
  Args:
200
- ids: List of ids to delete. If None, delete all. Default is None.
179
+ ids: List of ids to delete. If `None`, delete all.
201
180
  **kwargs: Other keyword arguments that subclasses might use.
202
181
 
203
182
  Returns:
204
- Optional[bool]: True if deletion is successful,
205
- False otherwise, None if not implemented.
183
+ True if deletion is successful, False otherwise, None if not implemented.
206
184
  """
207
185
  return await run_in_executor(None, self.delete, ids, **kwargs)
208
186
 
209
187
  async def aadd_texts(
210
188
  self,
211
189
  texts: Iterable[str],
212
- metadatas: Optional[list[dict]] = None,
190
+ metadatas: list[dict] | None = None,
213
191
  *,
214
- ids: Optional[list[str]] = None,
192
+ ids: list[str] | None = None,
215
193
  **kwargs: Any,
216
194
  ) -> list[str]:
217
- """Async run more texts through the embeddings and add to the vectorstore.
195
+ """Async run more texts through the embeddings and add to the `VectorStore`.
218
196
 
219
197
  Args:
220
- texts: Iterable of strings to add to the vectorstore.
198
+ texts: Iterable of strings to add to the `VectorStore`.
221
199
  metadatas: Optional list of metadatas associated with the texts.
222
- Default is None.
223
200
  ids: Optional list
224
201
  **kwargs: vectorstore specific parameters.
225
202
 
226
203
  Returns:
227
- List of ids from adding the texts into the vectorstore.
204
+ List of ids from adding the texts into the `VectorStore`.
228
205
 
229
206
  Raises:
230
207
  ValueError: If the number of metadatas does not match the number of texts.
@@ -234,9 +211,6 @@ class VectorStore(ABC):
234
211
  # For backward compatibility
235
212
  kwargs["ids"] = ids
236
213
  if type(self).aadd_documents != VectorStore.aadd_documents:
237
- # Import document in local scope to avoid circular imports
238
- from langchain_core.documents import Document
239
-
240
214
  # This condition is triggered if the subclass has provided
241
215
  # an implementation of the upsert method.
242
216
  # The existing add_texts
@@ -250,11 +224,11 @@ class VectorStore(ABC):
250
224
  )
251
225
  raise ValueError(msg)
252
226
  metadatas_ = iter(metadatas) if metadatas else cycle([{}])
253
- ids_: Iterator[Optional[str]] = iter(ids) if ids else cycle([None])
227
+ ids_: Iterator[str | None] = iter(ids) if ids else cycle([None])
254
228
 
255
229
  docs = [
256
230
  Document(id=id_, page_content=text, metadata=metadata_)
257
- for text, metadata_, id_ in zip(texts, metadatas_, ids_)
231
+ for text, metadata_, id_ in zip(texts, metadatas_, ids_, strict=False)
258
232
  ]
259
233
  return await self.aadd_documents(docs, **kwargs)
260
234
  return await run_in_executor(None, self.add_texts, texts, metadatas, **kwargs)
@@ -263,16 +237,13 @@ class VectorStore(ABC):
263
237
  """Add or update documents in the vectorstore.
264
238
 
265
239
  Args:
266
- documents: Documents to add to the vectorstore.
267
- kwargs: Additional keyword arguments.
240
+ documents: Documents to add to the `VectorStore`.
241
+ **kwargs: Additional keyword arguments.
268
242
  if kwargs contains ids and documents contain ids,
269
243
  the ids in the kwargs will receive precedence.
270
244
 
271
245
  Returns:
272
246
  List of IDs of the added texts.
273
-
274
- Raises:
275
- ValueError: If the number of ids does not match the number of documents.
276
247
  """
277
248
  if type(self).add_texts != VectorStore.add_texts:
278
249
  if "ids" not in kwargs:
@@ -295,17 +266,14 @@ class VectorStore(ABC):
295
266
  async def aadd_documents(
296
267
  self, documents: list[Document], **kwargs: Any
297
268
  ) -> list[str]:
298
- """Async run more documents through the embeddings and add to the vectorstore.
269
+ """Async run more documents through the embeddings and add to the `VectorStore`.
299
270
 
300
271
  Args:
301
- documents: Documents to add to the vectorstore.
302
- kwargs: Additional keyword arguments.
272
+ documents: Documents to add to the `VectorStore`.
273
+ **kwargs: Additional keyword arguments.
303
274
 
304
275
  Returns:
305
276
  List of IDs of the added texts.
306
-
307
- Raises:
308
- ValueError: If the number of IDs does not match the number of documents.
309
277
  """
310
278
  # If the async method has been overridden, we'll use that.
311
279
  if type(self).aadd_texts != VectorStore.aadd_texts:
@@ -333,7 +301,7 @@ class VectorStore(ABC):
333
301
  **kwargs: Arguments to pass to the search method.
334
302
 
335
303
  Returns:
336
- List of Documents most similar to the query.
304
+ List of `Document` objects most similar to the query.
337
305
 
338
306
  Raises:
339
307
  ValueError: If search_type is not one of "similarity",
@@ -367,7 +335,7 @@ class VectorStore(ABC):
367
335
  **kwargs: Arguments to pass to the search method.
368
336
 
369
337
  Returns:
370
- List of Documents most similar to the query.
338
+ List of `Document` objects most similar to the query.
371
339
 
372
340
  Raises:
373
341
  ValueError: If search_type is not one of "similarity",
@@ -396,11 +364,11 @@ class VectorStore(ABC):
396
364
 
397
365
  Args:
398
366
  query: Input text.
399
- k: Number of Documents to return. Defaults to 4.
367
+ k: Number of Documents to return.
400
368
  **kwargs: Arguments to pass to the search method.
401
369
 
402
370
  Returns:
403
- List of Documents most similar to the query.
371
+ List of `Document` objects most similar to the query.
404
372
  """
405
373
 
406
374
  @staticmethod
@@ -435,6 +403,7 @@ class VectorStore(ABC):
435
403
  """The 'correct' relevance function.
436
404
 
437
405
  may differ depending on a few things, including:
406
+
438
407
  - the distance / similarity metric used by the VectorStore
439
408
  - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
440
409
  - embedding dimensionality
@@ -454,7 +423,7 @@ class VectorStore(ABC):
454
423
  **kwargs: Arguments to pass to the search method.
455
424
 
456
425
  Returns:
457
- List of Tuples of (doc, similarity_score).
426
+ List of Tuples of `(doc, similarity_score)`.
458
427
  """
459
428
  raise NotImplementedError
460
429
 
@@ -468,7 +437,7 @@ class VectorStore(ABC):
468
437
  **kwargs: Arguments to pass to the search method.
469
438
 
470
439
  Returns:
471
- List of Tuples of (doc, similarity_score).
440
+ List of Tuples of `(doc, similarity_score)`.
472
441
  """
473
442
  # This is a temporary workaround to make the similarity search
474
443
  # asynchronous. The proper solution is to make the similarity search
@@ -486,19 +455,19 @@ class VectorStore(ABC):
486
455
  """Default similarity search with relevance scores.
487
456
 
488
457
  Modify if necessary in subclass.
489
- Return docs and relevance scores in the range [0, 1].
458
+ Return docs and relevance scores in the range `[0, 1]`.
490
459
 
491
- 0 is dissimilar, 1 is most similar.
460
+ `0` is dissimilar, `1` is most similar.
492
461
 
493
462
  Args:
494
463
  query: Input text.
495
- k: Number of Documents to return. Defaults to 4.
496
- **kwargs: kwargs to be passed to similarity search. Should include:
497
- score_threshold: Optional, a floating point value between 0 to 1 to
498
- filter the resulting set of retrieved docs
464
+ k: Number of Documents to return.
465
+ **kwargs: kwargs to be passed to similarity search. Should include
466
+ `score_threshold`, An optional floating point value between `0` to `1`
467
+ to filter the resulting set of retrieved docs
499
468
 
500
469
  Returns:
501
- List of Tuples of (doc, similarity_score)
470
+ List of Tuples of `(doc, similarity_score)`
502
471
  """
503
472
  relevance_score_fn = self._select_relevance_score_fn()
504
473
  docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
@@ -513,19 +482,19 @@ class VectorStore(ABC):
513
482
  """Default similarity search with relevance scores.
514
483
 
515
484
  Modify if necessary in subclass.
516
- Return docs and relevance scores in the range [0, 1].
485
+ Return docs and relevance scores in the range `[0, 1]`.
517
486
 
518
- 0 is dissimilar, 1 is most similar.
487
+ `0` is dissimilar, `1` is most similar.
519
488
 
520
489
  Args:
521
490
  query: Input text.
522
- k: Number of Documents to return. Defaults to 4.
523
- **kwargs: kwargs to be passed to similarity search. Should include:
524
- score_threshold: Optional, a floating point value between 0 to 1 to
525
- filter the resulting set of retrieved docs
491
+ k: Number of Documents to return.
492
+ **kwargs: kwargs to be passed to similarity search. Should include
493
+ `score_threshold`, An optional floating point value between `0` to `1`
494
+ to filter the resulting set of retrieved docs
526
495
 
527
496
  Returns:
528
- List of Tuples of (doc, similarity_score)
497
+ List of Tuples of `(doc, similarity_score)`
529
498
  """
530
499
  relevance_score_fn = self._select_relevance_score_fn()
531
500
  docs_and_scores = await self.asimilarity_search_with_score(query, k, **kwargs)
@@ -537,19 +506,19 @@ class VectorStore(ABC):
537
506
  k: int = 4,
538
507
  **kwargs: Any,
539
508
  ) -> list[tuple[Document, float]]:
540
- """Return docs and relevance scores in the range [0, 1].
509
+ """Return docs and relevance scores in the range `[0, 1]`.
541
510
 
542
- 0 is dissimilar, 1 is most similar.
511
+ `0` is dissimilar, `1` is most similar.
543
512
 
544
513
  Args:
545
514
  query: Input text.
546
- k: Number of Documents to return. Defaults to 4.
547
- **kwargs: kwargs to be passed to similarity search. Should include:
548
- score_threshold: Optional, a floating point value between 0 to 1 to
549
- filter the resulting set of retrieved docs.
515
+ k: Number of Documents to return.
516
+ **kwargs: kwargs to be passed to similarity search. Should include
517
+ `score_threshold`, An optional floating point value between `0` to `1`
518
+ to filter the resulting set of retrieved docs
550
519
 
551
520
  Returns:
552
- List of Tuples of (doc, similarity_score).
521
+ List of Tuples of `(doc, similarity_score)`.
553
522
  """
554
523
  score_threshold = kwargs.pop("score_threshold", None)
555
524
 
@@ -586,19 +555,19 @@ class VectorStore(ABC):
586
555
  k: int = 4,
587
556
  **kwargs: Any,
588
557
  ) -> list[tuple[Document, float]]:
589
- """Async return docs and relevance scores in the range [0, 1].
558
+ """Async return docs and relevance scores in the range `[0, 1]`.
590
559
 
591
- 0 is dissimilar, 1 is most similar.
560
+ `0` is dissimilar, `1` is most similar.
592
561
 
593
562
  Args:
594
563
  query: Input text.
595
- k: Number of Documents to return. Defaults to 4.
596
- **kwargs: kwargs to be passed to similarity search. Should include:
597
- score_threshold: Optional, a floating point value between 0 to 1 to
598
- filter the resulting set of retrieved docs
564
+ k: Number of Documents to return.
565
+ **kwargs: kwargs to be passed to similarity search. Should include
566
+ `score_threshold`, An optional floating point value between `0` to `1`
567
+ to filter the resulting set of retrieved docs
599
568
 
600
569
  Returns:
601
- List of Tuples of (doc, similarity_score)
570
+ List of Tuples of `(doc, similarity_score)`
602
571
  """
603
572
  score_threshold = kwargs.pop("score_threshold", None)
604
573
 
@@ -636,11 +605,11 @@ class VectorStore(ABC):
636
605
 
637
606
  Args:
638
607
  query: Input text.
639
- k: Number of Documents to return. Defaults to 4.
608
+ k: Number of Documents to return.
640
609
  **kwargs: Arguments to pass to the search method.
641
610
 
642
611
  Returns:
643
- List of Documents most similar to the query.
612
+ List of `Document` objects most similar to the query.
644
613
  """
645
614
  # This is a temporary workaround to make the similarity search
646
615
  # asynchronous. The proper solution is to make the similarity search
@@ -654,11 +623,11 @@ class VectorStore(ABC):
654
623
 
655
624
  Args:
656
625
  embedding: Embedding to look up documents similar to.
657
- k: Number of Documents to return. Defaults to 4.
626
+ k: Number of Documents to return.
658
627
  **kwargs: Arguments to pass to the search method.
659
628
 
660
629
  Returns:
661
- List of Documents most similar to the query vector.
630
+ List of `Document` objects most similar to the query vector.
662
631
  """
663
632
  raise NotImplementedError
664
633
 
@@ -669,11 +638,11 @@ class VectorStore(ABC):
669
638
 
670
639
  Args:
671
640
  embedding: Embedding to look up documents similar to.
672
- k: Number of Documents to return. Defaults to 4.
641
+ k: Number of Documents to return.
673
642
  **kwargs: Arguments to pass to the search method.
674
643
 
675
644
  Returns:
676
- List of Documents most similar to the query vector.
645
+ List of `Document` objects most similar to the query vector.
677
646
  """
678
647
  # This is a temporary workaround to make the similarity search
679
648
  # asynchronous. The proper solution is to make the similarity search
@@ -697,17 +666,15 @@ class VectorStore(ABC):
697
666
 
698
667
  Args:
699
668
  query: Text to look up documents similar to.
700
- k: Number of Documents to return. Defaults to 4.
669
+ k: Number of Documents to return.
701
670
  fetch_k: Number of Documents to fetch to pass to MMR algorithm.
702
- Default is 20.
703
671
  lambda_mult: Number between 0 and 1 that determines the degree
704
672
  of diversity among the results with 0 corresponding
705
673
  to maximum diversity and 1 to minimum diversity.
706
- Defaults to 0.5.
707
674
  **kwargs: Arguments to pass to the search method.
708
675
 
709
676
  Returns:
710
- List of Documents selected by maximal marginal relevance.
677
+ List of `Document` objects selected by maximal marginal relevance.
711
678
  """
712
679
  raise NotImplementedError
713
680
 
@@ -726,17 +693,15 @@ class VectorStore(ABC):
726
693
 
727
694
  Args:
728
695
  query: Text to look up documents similar to.
729
- k: Number of Documents to return. Defaults to 4.
696
+ k: Number of Documents to return.
730
697
  fetch_k: Number of Documents to fetch to pass to MMR algorithm.
731
- Default is 20.
732
698
  lambda_mult: Number between 0 and 1 that determines the degree
733
699
  of diversity among the results with 0 corresponding
734
700
  to maximum diversity and 1 to minimum diversity.
735
- Defaults to 0.5.
736
701
  **kwargs: Arguments to pass to the search method.
737
702
 
738
703
  Returns:
739
- List of Documents selected by maximal marginal relevance.
704
+ List of `Document` objects selected by maximal marginal relevance.
740
705
  """
741
706
  # This is a temporary workaround to make the similarity search
742
707
  # asynchronous. The proper solution is to make the similarity search
@@ -766,17 +731,15 @@ class VectorStore(ABC):
766
731
 
767
732
  Args:
768
733
  embedding: Embedding to look up documents similar to.
769
- k: Number of Documents to return. Defaults to 4.
734
+ k: Number of Documents to return.
770
735
  fetch_k: Number of Documents to fetch to pass to MMR algorithm.
771
- Default is 20.
772
736
  lambda_mult: Number between 0 and 1 that determines the degree
773
737
  of diversity among the results with 0 corresponding
774
738
  to maximum diversity and 1 to minimum diversity.
775
- Defaults to 0.5.
776
739
  **kwargs: Arguments to pass to the search method.
777
740
 
778
741
  Returns:
779
- List of Documents selected by maximal marginal relevance.
742
+ List of `Document` objects selected by maximal marginal relevance.
780
743
  """
781
744
  raise NotImplementedError
782
745
 
@@ -795,17 +758,15 @@ class VectorStore(ABC):
795
758
 
796
759
  Args:
797
760
  embedding: Embedding to look up documents similar to.
798
- k: Number of Documents to return. Defaults to 4.
761
+ k: Number of Documents to return.
799
762
  fetch_k: Number of Documents to fetch to pass to MMR algorithm.
800
- Default is 20.
801
763
  lambda_mult: Number between 0 and 1 that determines the degree
802
764
  of diversity among the results with 0 corresponding
803
765
  to maximum diversity and 1 to minimum diversity.
804
- Defaults to 0.5.
805
766
  **kwargs: Arguments to pass to the search method.
806
767
 
807
768
  Returns:
808
- List of Documents selected by maximal marginal relevance.
769
+ List of `Document` objects selected by maximal marginal relevance.
809
770
  """
810
771
  return await run_in_executor(
811
772
  None,
@@ -824,15 +785,15 @@ class VectorStore(ABC):
824
785
  embedding: Embeddings,
825
786
  **kwargs: Any,
826
787
  ) -> Self:
827
- """Return VectorStore initialized from documents and embeddings.
788
+ """Return `VectorStore` initialized from documents and embeddings.
828
789
 
829
790
  Args:
830
- documents: List of Documents to add to the vectorstore.
791
+ documents: List of `Document` objects to add to the `VectorStore`.
831
792
  embedding: Embedding function to use.
832
- kwargs: Additional keyword arguments.
793
+ **kwargs: Additional keyword arguments.
833
794
 
834
795
  Returns:
835
- VectorStore: VectorStore initialized from documents and embeddings.
796
+ `VectorStore` initialized from documents and embeddings.
836
797
  """
837
798
  texts = [d.page_content for d in documents]
838
799
  metadatas = [d.metadata for d in documents]
@@ -854,15 +815,15 @@ class VectorStore(ABC):
854
815
  embedding: Embeddings,
855
816
  **kwargs: Any,
856
817
  ) -> Self:
857
- """Async return VectorStore initialized from documents and embeddings.
818
+ """Async return `VectorStore` initialized from documents and embeddings.
858
819
 
859
820
  Args:
860
- documents: List of Documents to add to the vectorstore.
821
+ documents: List of `Document` objects to add to the `VectorStore`.
861
822
  embedding: Embedding function to use.
862
- kwargs: Additional keyword arguments.
823
+ **kwargs: Additional keyword arguments.
863
824
 
864
825
  Returns:
865
- VectorStore: VectorStore initialized from documents and embeddings.
826
+ `VectorStore` initialized from documents and embeddings.
866
827
  """
867
828
  texts = [d.page_content for d in documents]
868
829
  metadatas = [d.metadata for d in documents]
@@ -883,23 +844,22 @@ class VectorStore(ABC):
883
844
  cls: type[VST],
884
845
  texts: list[str],
885
846
  embedding: Embeddings,
886
- metadatas: Optional[list[dict]] = None,
847
+ metadatas: list[dict] | None = None,
887
848
  *,
888
- ids: Optional[list[str]] = None,
849
+ ids: list[str] | None = None,
889
850
  **kwargs: Any,
890
851
  ) -> VST:
891
852
  """Return VectorStore initialized from texts and embeddings.
892
853
 
893
854
  Args:
894
- texts: Texts to add to the vectorstore.
855
+ texts: Texts to add to the `VectorStore`.
895
856
  embedding: Embedding function to use.
896
857
  metadatas: Optional list of metadatas associated with the texts.
897
- Default is None.
898
858
  ids: Optional list of IDs associated with the texts.
899
- kwargs: Additional keyword arguments.
859
+ **kwargs: Additional keyword arguments.
900
860
 
901
861
  Returns:
902
- VectorStore: VectorStore initialized from texts and embeddings.
862
+ VectorStore initialized from texts and embeddings.
903
863
  """
904
864
 
905
865
  @classmethod
@@ -907,23 +867,22 @@ class VectorStore(ABC):
907
867
  cls,
908
868
  texts: list[str],
909
869
  embedding: Embeddings,
910
- metadatas: Optional[list[dict]] = None,
870
+ metadatas: list[dict] | None = None,
911
871
  *,
912
- ids: Optional[list[str]] = None,
872
+ ids: list[str] | None = None,
913
873
  **kwargs: Any,
914
874
  ) -> Self:
915
875
  """Async return VectorStore initialized from texts and embeddings.
916
876
 
917
877
  Args:
918
- texts: Texts to add to the vectorstore.
878
+ texts: Texts to add to the `VectorStore`.
919
879
  embedding: Embedding function to use.
920
880
  metadatas: Optional list of metadatas associated with the texts.
921
- Default is None.
922
881
  ids: Optional list of IDs associated with the texts.
923
- kwargs: Additional keyword arguments.
882
+ **kwargs: Additional keyword arguments.
924
883
 
925
884
  Returns:
926
- VectorStore: VectorStore initialized from texts and embeddings.
885
+ VectorStore initialized from texts and embeddings.
927
886
  """
928
887
  if ids is not None:
929
888
  kwargs["ids"] = ids
@@ -939,17 +898,16 @@ class VectorStore(ABC):
939
898
  return tags
940
899
 
941
900
  def as_retriever(self, **kwargs: Any) -> VectorStoreRetriever:
942
- """Return VectorStoreRetriever initialized from this VectorStore.
901
+ """Return `VectorStoreRetriever` initialized from this `VectorStore`.
943
902
 
944
903
  Args:
945
904
  **kwargs: Keyword arguments to pass to the search function.
946
905
  Can include:
947
- search_type (Optional[str]): Defines the type of search that
948
- the Retriever should perform.
949
- Can be "similarity" (default), "mmr", or
906
+ search_type: Defines the type of search that the Retriever should
907
+ perform. Can be "similarity" (default), "mmr", or
950
908
  "similarity_score_threshold".
951
- search_kwargs (Optional[Dict]): Keyword arguments to pass to the
952
- search function. Can include things like:
909
+ search_kwargs: Keyword arguments to pass to the search function. Can
910
+ include things like:
953
911
  k: Amount of documents to return (Default: 4)
954
912
  score_threshold: Minimum relevance threshold
955
913
  for similarity_score_threshold
@@ -960,41 +918,35 @@ class VectorStore(ABC):
960
918
  filter: Filter by document metadata
961
919
 
962
920
  Returns:
963
- VectorStoreRetriever: Retriever class for VectorStore.
921
+ Retriever class for `VectorStore`.
964
922
 
965
923
  Examples:
924
+ ```python
925
+ # Retrieve more documents with higher diversity
926
+ # Useful if your dataset has many similar documents
927
+ docsearch.as_retriever(
928
+ search_type="mmr", search_kwargs={"k": 6, "lambda_mult": 0.25}
929
+ )
966
930
 
967
- .. code-block:: python
968
-
969
- # Retrieve more documents with higher diversity
970
- # Useful if your dataset has many similar documents
971
- docsearch.as_retriever(
972
- search_type="mmr",
973
- search_kwargs={'k': 6, 'lambda_mult': 0.25}
974
- )
975
-
976
- # Fetch more documents for the MMR algorithm to consider
977
- # But only return the top 5
978
- docsearch.as_retriever(
979
- search_type="mmr",
980
- search_kwargs={'k': 5, 'fetch_k': 50}
981
- )
982
-
983
- # Only retrieve documents that have a relevance score
984
- # Above a certain threshold
985
- docsearch.as_retriever(
986
- search_type="similarity_score_threshold",
987
- search_kwargs={'score_threshold': 0.8}
988
- )
931
+ # Fetch more documents for the MMR algorithm to consider
932
+ # But only return the top 5
933
+ docsearch.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 50})
989
934
 
990
- # Only get the single most similar document from the dataset
991
- docsearch.as_retriever(search_kwargs={'k': 1})
935
+ # Only retrieve documents that have a relevance score
936
+ # Above a certain threshold
937
+ docsearch.as_retriever(
938
+ search_type="similarity_score_threshold",
939
+ search_kwargs={"score_threshold": 0.8},
940
+ )
992
941
 
993
- # Use a filter to only retrieve documents from a specific paper
994
- docsearch.as_retriever(
995
- search_kwargs={'filter': {'paper_title':'GPT-4 Technical Report'}}
996
- )
942
+ # Only get the single most similar document from the dataset
943
+ docsearch.as_retriever(search_kwargs={"k": 1})
997
944
 
945
+ # Use a filter to only retrieve documents from a specific paper
946
+ docsearch.as_retriever(
947
+ search_kwargs={"filter": {"paper_title": "GPT-4 Technical Report"}}
948
+ )
949
+ ```
998
950
  """
999
951
  tags = kwargs.pop("tags", None) or [*self._get_retriever_tags()]
1000
952
  return VectorStoreRetriever(vectorstore=self, tags=tags, **kwargs)
@@ -1028,7 +980,7 @@ class VectorStoreRetriever(BaseRetriever):
1028
980
  values: Values to validate.
1029
981
 
1030
982
  Returns:
1031
- Values: Validated values.
983
+ Validated values.
1032
984
 
1033
985
  Raises:
1034
986
  ValueError: If search_type is not one of the allowed search types.
@@ -1120,10 +1072,10 @@ class VectorStoreRetriever(BaseRetriever):
1120
1072
  return docs
1121
1073
 
1122
1074
  def add_documents(self, documents: list[Document], **kwargs: Any) -> list[str]:
1123
- """Add documents to the vectorstore.
1075
+ """Add documents to the `VectorStore`.
1124
1076
 
1125
1077
  Args:
1126
- documents: Documents to add to the vectorstore.
1078
+ documents: Documents to add to the `VectorStore`.
1127
1079
  **kwargs: Other keyword arguments that subclasses might use.
1128
1080
 
1129
1081
  Returns:
@@ -1134,10 +1086,10 @@ class VectorStoreRetriever(BaseRetriever):
1134
1086
  async def aadd_documents(
1135
1087
  self, documents: list[Document], **kwargs: Any
1136
1088
  ) -> list[str]:
1137
- """Async add documents to the vectorstore.
1089
+ """Async add documents to the `VectorStore`.
1138
1090
 
1139
1091
  Args:
1140
- documents: Documents to add to the vectorstore.
1092
+ documents: Documents to add to the `VectorStore`.
1141
1093
  **kwargs: Other keyword arguments that subclasses might use.
1142
1094
 
1143
1095
  Returns: