langchain-core 1.0.0a6__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +3 -4
  3. langchain_core/_api/beta_decorator.py +23 -26
  4. langchain_core/_api/deprecation.py +51 -64
  5. langchain_core/_api/path.py +3 -6
  6. langchain_core/_import_utils.py +3 -4
  7. langchain_core/agents.py +20 -22
  8. langchain_core/caches.py +65 -66
  9. langchain_core/callbacks/__init__.py +1 -8
  10. langchain_core/callbacks/base.py +321 -336
  11. langchain_core/callbacks/file.py +44 -44
  12. langchain_core/callbacks/manager.py +436 -513
  13. langchain_core/callbacks/stdout.py +29 -30
  14. langchain_core/callbacks/streaming_stdout.py +32 -32
  15. langchain_core/callbacks/usage.py +60 -57
  16. langchain_core/chat_history.py +53 -68
  17. langchain_core/document_loaders/base.py +27 -25
  18. langchain_core/document_loaders/blob_loaders.py +1 -1
  19. langchain_core/document_loaders/langsmith.py +44 -48
  20. langchain_core/documents/__init__.py +23 -3
  21. langchain_core/documents/base.py +98 -90
  22. langchain_core/documents/compressor.py +10 -10
  23. langchain_core/documents/transformers.py +34 -35
  24. langchain_core/embeddings/fake.py +50 -54
  25. langchain_core/example_selectors/length_based.py +1 -1
  26. langchain_core/example_selectors/semantic_similarity.py +28 -32
  27. langchain_core/exceptions.py +21 -20
  28. langchain_core/globals.py +3 -151
  29. langchain_core/indexing/__init__.py +1 -1
  30. langchain_core/indexing/api.py +121 -126
  31. langchain_core/indexing/base.py +73 -75
  32. langchain_core/indexing/in_memory.py +4 -6
  33. langchain_core/language_models/__init__.py +14 -29
  34. langchain_core/language_models/_utils.py +58 -61
  35. langchain_core/language_models/base.py +53 -162
  36. langchain_core/language_models/chat_models.py +298 -387
  37. langchain_core/language_models/fake.py +11 -11
  38. langchain_core/language_models/fake_chat_models.py +42 -36
  39. langchain_core/language_models/llms.py +125 -235
  40. langchain_core/load/dump.py +9 -12
  41. langchain_core/load/load.py +18 -28
  42. langchain_core/load/mapping.py +2 -4
  43. langchain_core/load/serializable.py +42 -40
  44. langchain_core/messages/__init__.py +10 -16
  45. langchain_core/messages/ai.py +148 -148
  46. langchain_core/messages/base.py +53 -51
  47. langchain_core/messages/block_translators/__init__.py +19 -22
  48. langchain_core/messages/block_translators/anthropic.py +6 -6
  49. langchain_core/messages/block_translators/bedrock_converse.py +5 -5
  50. langchain_core/messages/block_translators/google_genai.py +10 -7
  51. langchain_core/messages/block_translators/google_vertexai.py +4 -32
  52. langchain_core/messages/block_translators/groq.py +117 -21
  53. langchain_core/messages/block_translators/langchain_v0.py +5 -5
  54. langchain_core/messages/block_translators/openai.py +11 -11
  55. langchain_core/messages/chat.py +2 -6
  56. langchain_core/messages/content.py +337 -328
  57. langchain_core/messages/function.py +6 -10
  58. langchain_core/messages/human.py +24 -31
  59. langchain_core/messages/modifier.py +2 -2
  60. langchain_core/messages/system.py +19 -29
  61. langchain_core/messages/tool.py +74 -90
  62. langchain_core/messages/utils.py +474 -504
  63. langchain_core/output_parsers/__init__.py +13 -10
  64. langchain_core/output_parsers/base.py +61 -61
  65. langchain_core/output_parsers/format_instructions.py +9 -4
  66. langchain_core/output_parsers/json.py +12 -10
  67. langchain_core/output_parsers/list.py +21 -23
  68. langchain_core/output_parsers/openai_functions.py +49 -47
  69. langchain_core/output_parsers/openai_tools.py +16 -21
  70. langchain_core/output_parsers/pydantic.py +13 -14
  71. langchain_core/output_parsers/string.py +5 -5
  72. langchain_core/output_parsers/transform.py +15 -17
  73. langchain_core/output_parsers/xml.py +35 -34
  74. langchain_core/outputs/__init__.py +1 -1
  75. langchain_core/outputs/chat_generation.py +18 -18
  76. langchain_core/outputs/chat_result.py +1 -3
  77. langchain_core/outputs/generation.py +10 -11
  78. langchain_core/outputs/llm_result.py +10 -10
  79. langchain_core/prompt_values.py +11 -17
  80. langchain_core/prompts/__init__.py +3 -27
  81. langchain_core/prompts/base.py +48 -56
  82. langchain_core/prompts/chat.py +275 -325
  83. langchain_core/prompts/dict.py +5 -5
  84. langchain_core/prompts/few_shot.py +81 -88
  85. langchain_core/prompts/few_shot_with_templates.py +11 -13
  86. langchain_core/prompts/image.py +12 -14
  87. langchain_core/prompts/loading.py +4 -6
  88. langchain_core/prompts/message.py +3 -3
  89. langchain_core/prompts/prompt.py +24 -39
  90. langchain_core/prompts/string.py +26 -10
  91. langchain_core/prompts/structured.py +49 -53
  92. langchain_core/rate_limiters.py +51 -60
  93. langchain_core/retrievers.py +61 -198
  94. langchain_core/runnables/base.py +1476 -1626
  95. langchain_core/runnables/branch.py +53 -57
  96. langchain_core/runnables/config.py +72 -89
  97. langchain_core/runnables/configurable.py +120 -137
  98. langchain_core/runnables/fallbacks.py +83 -79
  99. langchain_core/runnables/graph.py +91 -97
  100. langchain_core/runnables/graph_ascii.py +27 -28
  101. langchain_core/runnables/graph_mermaid.py +38 -50
  102. langchain_core/runnables/graph_png.py +15 -16
  103. langchain_core/runnables/history.py +135 -148
  104. langchain_core/runnables/passthrough.py +124 -150
  105. langchain_core/runnables/retry.py +46 -51
  106. langchain_core/runnables/router.py +25 -30
  107. langchain_core/runnables/schema.py +75 -80
  108. langchain_core/runnables/utils.py +60 -67
  109. langchain_core/stores.py +85 -121
  110. langchain_core/structured_query.py +8 -8
  111. langchain_core/sys_info.py +27 -29
  112. langchain_core/tools/__init__.py +1 -14
  113. langchain_core/tools/base.py +284 -229
  114. langchain_core/tools/convert.py +160 -155
  115. langchain_core/tools/render.py +10 -10
  116. langchain_core/tools/retriever.py +12 -11
  117. langchain_core/tools/simple.py +19 -24
  118. langchain_core/tools/structured.py +32 -39
  119. langchain_core/tracers/__init__.py +1 -9
  120. langchain_core/tracers/base.py +97 -99
  121. langchain_core/tracers/context.py +29 -52
  122. langchain_core/tracers/core.py +49 -53
  123. langchain_core/tracers/evaluation.py +11 -11
  124. langchain_core/tracers/event_stream.py +65 -64
  125. langchain_core/tracers/langchain.py +21 -21
  126. langchain_core/tracers/log_stream.py +45 -45
  127. langchain_core/tracers/memory_stream.py +3 -3
  128. langchain_core/tracers/root_listeners.py +16 -16
  129. langchain_core/tracers/run_collector.py +2 -4
  130. langchain_core/tracers/schemas.py +0 -129
  131. langchain_core/tracers/stdout.py +3 -3
  132. langchain_core/utils/__init__.py +1 -4
  133. langchain_core/utils/_merge.py +2 -2
  134. langchain_core/utils/aiter.py +57 -61
  135. langchain_core/utils/env.py +9 -9
  136. langchain_core/utils/function_calling.py +89 -186
  137. langchain_core/utils/html.py +7 -8
  138. langchain_core/utils/input.py +6 -6
  139. langchain_core/utils/interactive_env.py +1 -1
  140. langchain_core/utils/iter.py +36 -40
  141. langchain_core/utils/json.py +4 -3
  142. langchain_core/utils/json_schema.py +9 -9
  143. langchain_core/utils/mustache.py +8 -10
  144. langchain_core/utils/pydantic.py +33 -35
  145. langchain_core/utils/strings.py +6 -9
  146. langchain_core/utils/usage.py +1 -1
  147. langchain_core/utils/utils.py +66 -62
  148. langchain_core/vectorstores/base.py +182 -216
  149. langchain_core/vectorstores/in_memory.py +101 -176
  150. langchain_core/vectorstores/utils.py +5 -5
  151. langchain_core/version.py +1 -1
  152. langchain_core-1.0.3.dist-info/METADATA +69 -0
  153. langchain_core-1.0.3.dist-info/RECORD +172 -0
  154. {langchain_core-1.0.0a6.dist-info → langchain_core-1.0.3.dist-info}/WHEEL +1 -1
  155. langchain_core/memory.py +0 -120
  156. langchain_core/messages/block_translators/ollama.py +0 -47
  157. langchain_core/prompts/pipeline.py +0 -138
  158. langchain_core/pydantic_v1/__init__.py +0 -30
  159. langchain_core/pydantic_v1/dataclasses.py +0 -23
  160. langchain_core/pydantic_v1/main.py +0 -23
  161. langchain_core/tracers/langchain_v1.py +0 -31
  162. langchain_core/utils/loading.py +0 -35
  163. langchain_core-1.0.0a6.dist-info/METADATA +0 -67
  164. langchain_core-1.0.0a6.dist-info/RECORD +0 -181
  165. langchain_core-1.0.0a6.dist-info/entry_points.txt +0 -4
@@ -3,21 +3,7 @@
3
3
  One of the most common ways to store and search over unstructured data is to
4
4
  embed it and store the resulting embedding vectors, and then query the store
5
5
  and retrieve the data that are 'most similar' to the embedded query.
6
-
7
- **Class hierarchy:**
8
-
9
- .. code-block::
10
-
11
- VectorStore --> <name> # Examples: Annoy, FAISS, Milvus
12
-
13
- BaseRetriever --> VectorStoreRetriever --> <name>Retriever # Example: VespaRetriever
14
-
15
- **Main helpers:**
16
-
17
- .. code-block::
18
-
19
- Embeddings, Document
20
- """ # noqa: E501
6
+ """
21
7
 
22
8
  from __future__ import annotations
23
9
 
@@ -25,13 +11,12 @@ import logging
25
11
  import math
26
12
  import warnings
27
13
  from abc import ABC, abstractmethod
14
+ from collections.abc import Callable
28
15
  from itertools import cycle
29
16
  from typing import (
30
17
  TYPE_CHECKING,
31
18
  Any,
32
- Callable,
33
19
  ClassVar,
34
- Optional,
35
20
  TypeVar,
36
21
  )
37
22
 
@@ -62,27 +47,27 @@ class VectorStore(ABC):
62
47
  def add_texts(
63
48
  self,
64
49
  texts: Iterable[str],
65
- metadatas: Optional[list[dict]] = None,
50
+ metadatas: list[dict] | None = None,
66
51
  *,
67
- ids: Optional[list[str]] = None,
52
+ ids: list[str] | None = None,
68
53
  **kwargs: Any,
69
54
  ) -> list[str]:
70
- """Run more texts through the embeddings and add to the vectorstore.
55
+ """Run more texts through the embeddings and add to the `VectorStore`.
71
56
 
72
57
  Args:
73
- texts: Iterable of strings to add to the vectorstore.
58
+ texts: Iterable of strings to add to the `VectorStore`.
74
59
  metadatas: Optional list of metadatas associated with the texts.
75
60
  ids: Optional list of IDs associated with the texts.
76
- **kwargs: vectorstore specific parameters.
61
+ **kwargs: `VectorStore` specific parameters.
77
62
  One of the kwargs should be `ids` which is a list of ids
78
63
  associated with the texts.
79
64
 
80
65
  Returns:
81
- List of ids from adding the texts into the vectorstore.
66
+ List of IDs from adding the texts into the `VectorStore`.
82
67
 
83
68
  Raises:
84
69
  ValueError: If the number of metadatas does not match the number of texts.
85
- ValueError: If the number of ids does not match the number of texts.
70
+ ValueError: If the number of IDs does not match the number of texts.
86
71
  """
87
72
  if type(self).add_documents != VectorStore.add_documents:
88
73
  # This condition is triggered if the subclass has provided
@@ -98,10 +83,10 @@ class VectorStore(ABC):
98
83
  )
99
84
  raise ValueError(msg)
100
85
  metadatas_ = iter(metadatas) if metadatas else cycle([{}])
101
- ids_: Iterator[Optional[str]] = iter(ids) if ids else cycle([None])
86
+ ids_: Iterator[str | None] = iter(ids) if ids else cycle([None])
102
87
  docs = [
103
88
  Document(id=id_, page_content=text, metadata=metadata_)
104
- for text, metadata_, id_ in zip(texts, metadatas_, ids_)
89
+ for text, metadata_, id_ in zip(texts, metadatas_, ids_, strict=False)
105
90
  ]
106
91
  if ids is not None:
107
92
  # For backward compatibility
@@ -112,7 +97,7 @@ class VectorStore(ABC):
112
97
  raise NotImplementedError(msg)
113
98
 
114
99
  @property
115
- def embeddings(self) -> Optional[Embeddings]:
100
+ def embeddings(self) -> Embeddings | None:
116
101
  """Access the query embedding object if available."""
117
102
  logger.debug(
118
103
  "The embeddings property has not been implemented for %s",
@@ -120,16 +105,16 @@ class VectorStore(ABC):
120
105
  )
121
106
  return None
122
107
 
123
- def delete(self, ids: Optional[list[str]] = None, **kwargs: Any) -> Optional[bool]:
108
+ def delete(self, ids: list[str] | None = None, **kwargs: Any) -> bool | None:
124
109
  """Delete by vector ID or other criteria.
125
110
 
126
111
  Args:
127
- ids: List of ids to delete. If None, delete all. Default is None.
112
+ ids: List of IDs to delete. If `None`, delete all.
128
113
  **kwargs: Other keyword arguments that subclasses might use.
129
114
 
130
115
  Returns:
131
- Optional[bool]: True if deletion is successful,
132
- False otherwise, None if not implemented.
116
+ `True` if deletion is successful, `False` otherwise, `None` if not
117
+ implemented.
133
118
  """
134
119
  msg = "delete method must be implemented by subclass."
135
120
  raise NotImplementedError(msg)
@@ -151,12 +136,10 @@ class VectorStore(ABC):
151
136
  some IDs.
152
137
 
153
138
  Args:
154
- ids: List of ids to retrieve.
139
+ ids: List of IDs to retrieve.
155
140
 
156
141
  Returns:
157
- List of Documents.
158
-
159
- .. versionadded:: 0.2.11
142
+ List of `Document` objects.
160
143
  """
161
144
  msg = f"{self.__class__.__name__} does not yet support get_by_ids."
162
145
  raise NotImplementedError(msg)
@@ -179,53 +162,48 @@ class VectorStore(ABC):
179
162
  some IDs.
180
163
 
181
164
  Args:
182
- ids: List of ids to retrieve.
165
+ ids: List of IDs to retrieve.
183
166
 
184
167
  Returns:
185
- List of Documents.
186
-
187
- .. versionadded:: 0.2.11
168
+ List of `Document` objects.
188
169
  """
189
170
  return await run_in_executor(None, self.get_by_ids, ids)
190
171
 
191
- async def adelete(
192
- self, ids: Optional[list[str]] = None, **kwargs: Any
193
- ) -> Optional[bool]:
172
+ async def adelete(self, ids: list[str] | None = None, **kwargs: Any) -> bool | None:
194
173
  """Async delete by vector ID or other criteria.
195
174
 
196
175
  Args:
197
- ids: List of ids to delete. If None, delete all. Default is None.
176
+ ids: List of IDs to delete. If `None`, delete all.
198
177
  **kwargs: Other keyword arguments that subclasses might use.
199
178
 
200
179
  Returns:
201
- Optional[bool]: True if deletion is successful,
202
- False otherwise, None if not implemented.
180
+ `True` if deletion is successful, `False` otherwise, `None` if not
181
+ implemented.
203
182
  """
204
183
  return await run_in_executor(None, self.delete, ids, **kwargs)
205
184
 
206
185
  async def aadd_texts(
207
186
  self,
208
187
  texts: Iterable[str],
209
- metadatas: Optional[list[dict]] = None,
188
+ metadatas: list[dict] | None = None,
210
189
  *,
211
- ids: Optional[list[str]] = None,
190
+ ids: list[str] | None = None,
212
191
  **kwargs: Any,
213
192
  ) -> list[str]:
214
- """Async run more texts through the embeddings and add to the vectorstore.
193
+ """Async run more texts through the embeddings and add to the `VectorStore`.
215
194
 
216
195
  Args:
217
- texts: Iterable of strings to add to the vectorstore.
196
+ texts: Iterable of strings to add to the `VectorStore`.
218
197
  metadatas: Optional list of metadatas associated with the texts.
219
- Default is None.
220
198
  ids: Optional list
221
- **kwargs: vectorstore specific parameters.
199
+ **kwargs: `VectorStore` specific parameters.
222
200
 
223
201
  Returns:
224
- List of ids from adding the texts into the vectorstore.
202
+ List of IDs from adding the texts into the `VectorStore`.
225
203
 
226
204
  Raises:
227
205
  ValueError: If the number of metadatas does not match the number of texts.
228
- ValueError: If the number of ids does not match the number of texts.
206
+ ValueError: If the number of IDs does not match the number of texts.
229
207
  """
230
208
  if ids is not None:
231
209
  # For backward compatibility
@@ -244,23 +222,24 @@ class VectorStore(ABC):
244
222
  )
245
223
  raise ValueError(msg)
246
224
  metadatas_ = iter(metadatas) if metadatas else cycle([{}])
247
- ids_: Iterator[Optional[str]] = iter(ids) if ids else cycle([None])
225
+ ids_: Iterator[str | None] = iter(ids) if ids else cycle([None])
248
226
 
249
227
  docs = [
250
228
  Document(id=id_, page_content=text, metadata=metadata_)
251
- for text, metadata_, id_ in zip(texts, metadatas_, ids_)
229
+ for text, metadata_, id_ in zip(texts, metadatas_, ids_, strict=False)
252
230
  ]
253
231
  return await self.aadd_documents(docs, **kwargs)
254
232
  return await run_in_executor(None, self.add_texts, texts, metadatas, **kwargs)
255
233
 
256
234
  def add_documents(self, documents: list[Document], **kwargs: Any) -> list[str]:
257
- """Add or update documents in the vectorstore.
235
+ """Add or update documents in the `VectorStore`.
258
236
 
259
237
  Args:
260
- documents: Documents to add to the vectorstore.
261
- kwargs: Additional keyword arguments.
262
- if kwargs contains ids and documents contain ids,
263
- the ids in the kwargs will receive precedence.
238
+ documents: Documents to add to the `VectorStore`.
239
+ **kwargs: Additional keyword arguments.
240
+
241
+ If kwargs contains IDs and documents contain ids, the IDs in the kwargs
242
+ will receive precedence.
264
243
 
265
244
  Returns:
266
245
  List of IDs of the added texts.
@@ -286,11 +265,11 @@ class VectorStore(ABC):
286
265
  async def aadd_documents(
287
266
  self, documents: list[Document], **kwargs: Any
288
267
  ) -> list[str]:
289
- """Async run more documents through the embeddings and add to the vectorstore.
268
+ """Async run more documents through the embeddings and add to the `VectorStore`.
290
269
 
291
270
  Args:
292
- documents: Documents to add to the vectorstore.
293
- kwargs: Additional keyword arguments.
271
+ documents: Documents to add to the `VectorStore`.
272
+ **kwargs: Additional keyword arguments.
294
273
 
295
274
  Returns:
296
275
  List of IDs of the added texts.
@@ -315,17 +294,17 @@ class VectorStore(ABC):
315
294
  """Return docs most similar to query using a specified search type.
316
295
 
317
296
  Args:
318
- query: Input text
319
- search_type: Type of search to perform. Can be "similarity",
320
- "mmr", or "similarity_score_threshold".
297
+ query: Input text.
298
+ search_type: Type of search to perform. Can be `'similarity'`, `'mmr'`, or
299
+ `'similarity_score_threshold'`.
321
300
  **kwargs: Arguments to pass to the search method.
322
301
 
323
302
  Returns:
324
- List of Documents most similar to the query.
303
+ List of `Document` objects most similar to the query.
325
304
 
326
305
  Raises:
327
- ValueError: If search_type is not one of "similarity",
328
- "mmr", or "similarity_score_threshold".
306
+ ValueError: If `search_type` is not one of `'similarity'`,
307
+ `'mmr'`, or `'similarity_score_threshold'`.
329
308
  """
330
309
  if search_type == "similarity":
331
310
  return self.similarity_search(query, **kwargs)
@@ -350,16 +329,16 @@ class VectorStore(ABC):
350
329
 
351
330
  Args:
352
331
  query: Input text.
353
- search_type: Type of search to perform. Can be "similarity",
354
- "mmr", or "similarity_score_threshold".
332
+ search_type: Type of search to perform. Can be `'similarity'`, `'mmr'`, or
333
+ `'similarity_score_threshold'`.
355
334
  **kwargs: Arguments to pass to the search method.
356
335
 
357
336
  Returns:
358
- List of Documents most similar to the query.
337
+ List of `Document` objects most similar to the query.
359
338
 
360
339
  Raises:
361
- ValueError: If search_type is not one of "similarity",
362
- "mmr", or "similarity_score_threshold".
340
+ ValueError: If `search_type` is not one of `'similarity'`,
341
+ `'mmr'`, or `'similarity_score_threshold'`.
363
342
  """
364
343
  if search_type == "similarity":
365
344
  return await self.asimilarity_search(query, **kwargs)
@@ -384,11 +363,11 @@ class VectorStore(ABC):
384
363
 
385
364
  Args:
386
365
  query: Input text.
387
- k: Number of Documents to return. Defaults to 4.
366
+ k: Number of `Document` objects to return.
388
367
  **kwargs: Arguments to pass to the search method.
389
368
 
390
369
  Returns:
391
- List of Documents most similar to the query.
370
+ List of `Document` objects most similar to the query.
392
371
  """
393
372
 
394
373
  @staticmethod
@@ -443,7 +422,7 @@ class VectorStore(ABC):
443
422
  **kwargs: Arguments to pass to the search method.
444
423
 
445
424
  Returns:
446
- List of Tuples of (doc, similarity_score).
425
+ List of tuples of `(doc, similarity_score)`.
447
426
  """
448
427
  raise NotImplementedError
449
428
 
@@ -457,7 +436,7 @@ class VectorStore(ABC):
457
436
  **kwargs: Arguments to pass to the search method.
458
437
 
459
438
  Returns:
460
- List of Tuples of (doc, similarity_score).
439
+ List of tuples of `(doc, similarity_score)`.
461
440
  """
462
441
  # This is a temporary workaround to make the similarity search
463
442
  # asynchronous. The proper solution is to make the similarity search
@@ -475,19 +454,19 @@ class VectorStore(ABC):
475
454
  """Default similarity search with relevance scores.
476
455
 
477
456
  Modify if necessary in subclass.
478
- Return docs and relevance scores in the range [0, 1].
457
+ Return docs and relevance scores in the range `[0, 1]`.
479
458
 
480
- 0 is dissimilar, 1 is most similar.
459
+ `0` is dissimilar, `1` is most similar.
481
460
 
482
461
  Args:
483
462
  query: Input text.
484
- k: Number of Documents to return. Defaults to 4.
485
- **kwargs: kwargs to be passed to similarity search. Should include:
486
- score_threshold: Optional, a floating point value between 0 to 1 to
487
- filter the resulting set of retrieved docs
463
+ k: Number of `Document` objects to return.
464
+ **kwargs: kwargs to be passed to similarity search. Should include
465
+ `score_threshold`, An optional floating point value between `0` to `1`
466
+ to filter the resulting set of retrieved docs
488
467
 
489
468
  Returns:
490
- List of Tuples of (doc, similarity_score)
469
+ List of tuples of `(doc, similarity_score)`
491
470
  """
492
471
  relevance_score_fn = self._select_relevance_score_fn()
493
472
  docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
@@ -502,19 +481,19 @@ class VectorStore(ABC):
502
481
  """Default similarity search with relevance scores.
503
482
 
504
483
  Modify if necessary in subclass.
505
- Return docs and relevance scores in the range [0, 1].
484
+ Return docs and relevance scores in the range `[0, 1]`.
506
485
 
507
- 0 is dissimilar, 1 is most similar.
486
+ `0` is dissimilar, `1` is most similar.
508
487
 
509
488
  Args:
510
489
  query: Input text.
511
- k: Number of Documents to return. Defaults to 4.
512
- **kwargs: kwargs to be passed to similarity search. Should include:
513
- score_threshold: Optional, a floating point value between 0 to 1 to
514
- filter the resulting set of retrieved docs
490
+ k: Number of `Document` objects to return.
491
+ **kwargs: kwargs to be passed to similarity search. Should include
492
+ `score_threshold`, An optional floating point value between `0` to `1`
493
+ to filter the resulting set of retrieved docs
515
494
 
516
495
  Returns:
517
- List of Tuples of (doc, similarity_score)
496
+ List of tuples of `(doc, similarity_score)`
518
497
  """
519
498
  relevance_score_fn = self._select_relevance_score_fn()
520
499
  docs_and_scores = await self.asimilarity_search_with_score(query, k, **kwargs)
@@ -526,19 +505,19 @@ class VectorStore(ABC):
526
505
  k: int = 4,
527
506
  **kwargs: Any,
528
507
  ) -> list[tuple[Document, float]]:
529
- """Return docs and relevance scores in the range [0, 1].
508
+ """Return docs and relevance scores in the range `[0, 1]`.
530
509
 
531
- 0 is dissimilar, 1 is most similar.
510
+ `0` is dissimilar, `1` is most similar.
532
511
 
533
512
  Args:
534
513
  query: Input text.
535
- k: Number of Documents to return. Defaults to 4.
536
- **kwargs: kwargs to be passed to similarity search. Should include:
537
- score_threshold: Optional, a floating point value between 0 to 1 to
538
- filter the resulting set of retrieved docs.
514
+ k: Number of `Document` objects to return.
515
+ **kwargs: kwargs to be passed to similarity search. Should include
516
+ `score_threshold`, An optional floating point value between `0` to `1`
517
+ to filter the resulting set of retrieved docs
539
518
 
540
519
  Returns:
541
- List of Tuples of (doc, similarity_score).
520
+ List of tuples of `(doc, similarity_score)`.
542
521
  """
543
522
  score_threshold = kwargs.pop("score_threshold", None)
544
523
 
@@ -575,19 +554,19 @@ class VectorStore(ABC):
575
554
  k: int = 4,
576
555
  **kwargs: Any,
577
556
  ) -> list[tuple[Document, float]]:
578
- """Async return docs and relevance scores in the range [0, 1].
557
+ """Async return docs and relevance scores in the range `[0, 1]`.
579
558
 
580
- 0 is dissimilar, 1 is most similar.
559
+ `0` is dissimilar, `1` is most similar.
581
560
 
582
561
  Args:
583
562
  query: Input text.
584
- k: Number of Documents to return. Defaults to 4.
585
- **kwargs: kwargs to be passed to similarity search. Should include:
586
- score_threshold: Optional, a floating point value between 0 to 1 to
587
- filter the resulting set of retrieved docs
563
+ k: Number of `Document` objects to return.
564
+ **kwargs: kwargs to be passed to similarity search. Should include
565
+ `score_threshold`, An optional floating point value between `0` to `1`
566
+ to filter the resulting set of retrieved docs
588
567
 
589
568
  Returns:
590
- List of Tuples of (doc, similarity_score)
569
+ List of tuples of `(doc, similarity_score)`
591
570
  """
592
571
  score_threshold = kwargs.pop("score_threshold", None)
593
572
 
@@ -625,11 +604,11 @@ class VectorStore(ABC):
625
604
 
626
605
  Args:
627
606
  query: Input text.
628
- k: Number of Documents to return. Defaults to 4.
607
+ k: Number of `Document` objects to return.
629
608
  **kwargs: Arguments to pass to the search method.
630
609
 
631
610
  Returns:
632
- List of Documents most similar to the query.
611
+ List of `Document` objects most similar to the query.
633
612
  """
634
613
  # This is a temporary workaround to make the similarity search
635
614
  # asynchronous. The proper solution is to make the similarity search
@@ -643,11 +622,11 @@ class VectorStore(ABC):
643
622
 
644
623
  Args:
645
624
  embedding: Embedding to look up documents similar to.
646
- k: Number of Documents to return. Defaults to 4.
625
+ k: Number of `Document` objects to return.
647
626
  **kwargs: Arguments to pass to the search method.
648
627
 
649
628
  Returns:
650
- List of Documents most similar to the query vector.
629
+ List of `Document` objects most similar to the query vector.
651
630
  """
652
631
  raise NotImplementedError
653
632
 
@@ -658,11 +637,11 @@ class VectorStore(ABC):
658
637
 
659
638
  Args:
660
639
  embedding: Embedding to look up documents similar to.
661
- k: Number of Documents to return. Defaults to 4.
640
+ k: Number of `Document` objects to return.
662
641
  **kwargs: Arguments to pass to the search method.
663
642
 
664
643
  Returns:
665
- List of Documents most similar to the query vector.
644
+ List of `Document` objects most similar to the query vector.
666
645
  """
667
646
  # This is a temporary workaround to make the similarity search
668
647
  # asynchronous. The proper solution is to make the similarity search
@@ -686,17 +665,15 @@ class VectorStore(ABC):
686
665
 
687
666
  Args:
688
667
  query: Text to look up documents similar to.
689
- k: Number of Documents to return. Defaults to 4.
690
- fetch_k: Number of Documents to fetch to pass to MMR algorithm.
691
- Default is 20.
692
- lambda_mult: Number between 0 and 1 that determines the degree
693
- of diversity among the results with 0 corresponding
694
- to maximum diversity and 1 to minimum diversity.
695
- Defaults to 0.5.
668
+ k: Number of `Document` objects to return.
669
+ fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
670
+ lambda_mult: Number between `0` and `1` that determines the degree
671
+ of diversity among the results with `0` corresponding
672
+ to maximum diversity and `1` to minimum diversity.
696
673
  **kwargs: Arguments to pass to the search method.
697
674
 
698
675
  Returns:
699
- List of Documents selected by maximal marginal relevance.
676
+ List of `Document` objects selected by maximal marginal relevance.
700
677
  """
701
678
  raise NotImplementedError
702
679
 
@@ -715,17 +692,15 @@ class VectorStore(ABC):
715
692
 
716
693
  Args:
717
694
  query: Text to look up documents similar to.
718
- k: Number of Documents to return. Defaults to 4.
719
- fetch_k: Number of Documents to fetch to pass to MMR algorithm.
720
- Default is 20.
721
- lambda_mult: Number between 0 and 1 that determines the degree
722
- of diversity among the results with 0 corresponding
723
- to maximum diversity and 1 to minimum diversity.
724
- Defaults to 0.5.
695
+ k: Number of `Document` objects to return.
696
+ fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
697
+ lambda_mult: Number between `0` and `1` that determines the degree
698
+ of diversity among the results with `0` corresponding
699
+ to maximum diversity and `1` to minimum diversity.
725
700
  **kwargs: Arguments to pass to the search method.
726
701
 
727
702
  Returns:
728
- List of Documents selected by maximal marginal relevance.
703
+ List of `Document` objects selected by maximal marginal relevance.
729
704
  """
730
705
  # This is a temporary workaround to make the similarity search
731
706
  # asynchronous. The proper solution is to make the similarity search
@@ -755,17 +730,15 @@ class VectorStore(ABC):
755
730
 
756
731
  Args:
757
732
  embedding: Embedding to look up documents similar to.
758
- k: Number of Documents to return. Defaults to 4.
759
- fetch_k: Number of Documents to fetch to pass to MMR algorithm.
760
- Default is 20.
761
- lambda_mult: Number between 0 and 1 that determines the degree
762
- of diversity among the results with 0 corresponding
763
- to maximum diversity and 1 to minimum diversity.
764
- Defaults to 0.5.
733
+ k: Number of `Document` objects to return.
734
+ fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
735
+ lambda_mult: Number between `0` and `1` that determines the degree
736
+ of diversity among the results with `0` corresponding
737
+ to maximum diversity and `1` to minimum diversity.
765
738
  **kwargs: Arguments to pass to the search method.
766
739
 
767
740
  Returns:
768
- List of Documents selected by maximal marginal relevance.
741
+ List of `Document` objects selected by maximal marginal relevance.
769
742
  """
770
743
  raise NotImplementedError
771
744
 
@@ -784,17 +757,15 @@ class VectorStore(ABC):
784
757
 
785
758
  Args:
786
759
  embedding: Embedding to look up documents similar to.
787
- k: Number of Documents to return. Defaults to 4.
788
- fetch_k: Number of Documents to fetch to pass to MMR algorithm.
789
- Default is 20.
790
- lambda_mult: Number between 0 and 1 that determines the degree
791
- of diversity among the results with 0 corresponding
792
- to maximum diversity and 1 to minimum diversity.
793
- Defaults to 0.5.
760
+ k: Number of `Document` objects to return.
761
+ fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
762
+ lambda_mult: Number between `0` and `1` that determines the degree
763
+ of diversity among the results with `0` corresponding
764
+ to maximum diversity and `1` to minimum diversity.
794
765
  **kwargs: Arguments to pass to the search method.
795
766
 
796
767
  Returns:
797
- List of Documents selected by maximal marginal relevance.
768
+ List of `Document` objects selected by maximal marginal relevance.
798
769
  """
799
770
  return await run_in_executor(
800
771
  None,
@@ -813,15 +784,15 @@ class VectorStore(ABC):
813
784
  embedding: Embeddings,
814
785
  **kwargs: Any,
815
786
  ) -> Self:
816
- """Return VectorStore initialized from documents and embeddings.
787
+ """Return `VectorStore` initialized from documents and embeddings.
817
788
 
818
789
  Args:
819
- documents: List of Documents to add to the vectorstore.
790
+ documents: List of `Document` objects to add to the `VectorStore`.
820
791
  embedding: Embedding function to use.
821
- kwargs: Additional keyword arguments.
792
+ **kwargs: Additional keyword arguments.
822
793
 
823
794
  Returns:
824
- VectorStore: VectorStore initialized from documents and embeddings.
795
+ `VectorStore` initialized from documents and embeddings.
825
796
  """
826
797
  texts = [d.page_content for d in documents]
827
798
  metadatas = [d.metadata for d in documents]
@@ -843,15 +814,15 @@ class VectorStore(ABC):
843
814
  embedding: Embeddings,
844
815
  **kwargs: Any,
845
816
  ) -> Self:
846
- """Async return VectorStore initialized from documents and embeddings.
817
+ """Async return `VectorStore` initialized from documents and embeddings.
847
818
 
848
819
  Args:
849
- documents: List of Documents to add to the vectorstore.
820
+ documents: List of `Document` objects to add to the `VectorStore`.
850
821
  embedding: Embedding function to use.
851
- kwargs: Additional keyword arguments.
822
+ **kwargs: Additional keyword arguments.
852
823
 
853
824
  Returns:
854
- VectorStore: VectorStore initialized from documents and embeddings.
825
+ `VectorStore` initialized from documents and embeddings.
855
826
  """
856
827
  texts = [d.page_content for d in documents]
857
828
  metadatas = [d.metadata for d in documents]
@@ -872,23 +843,22 @@ class VectorStore(ABC):
872
843
  cls: type[VST],
873
844
  texts: list[str],
874
845
  embedding: Embeddings,
875
- metadatas: Optional[list[dict]] = None,
846
+ metadatas: list[dict] | None = None,
876
847
  *,
877
- ids: Optional[list[str]] = None,
848
+ ids: list[str] | None = None,
878
849
  **kwargs: Any,
879
850
  ) -> VST:
880
- """Return VectorStore initialized from texts and embeddings.
851
+ """Return `VectorStore` initialized from texts and embeddings.
881
852
 
882
853
  Args:
883
- texts: Texts to add to the vectorstore.
854
+ texts: Texts to add to the `VectorStore`.
884
855
  embedding: Embedding function to use.
885
856
  metadatas: Optional list of metadatas associated with the texts.
886
- Default is None.
887
857
  ids: Optional list of IDs associated with the texts.
888
- kwargs: Additional keyword arguments.
858
+ **kwargs: Additional keyword arguments.
889
859
 
890
860
  Returns:
891
- VectorStore: VectorStore initialized from texts and embeddings.
861
+ `VectorStore` initialized from texts and embeddings.
892
862
  """
893
863
 
894
864
  @classmethod
@@ -896,23 +866,22 @@ class VectorStore(ABC):
896
866
  cls,
897
867
  texts: list[str],
898
868
  embedding: Embeddings,
899
- metadatas: Optional[list[dict]] = None,
869
+ metadatas: list[dict] | None = None,
900
870
  *,
901
- ids: Optional[list[str]] = None,
871
+ ids: list[str] | None = None,
902
872
  **kwargs: Any,
903
873
  ) -> Self:
904
- """Async return VectorStore initialized from texts and embeddings.
874
+ """Async return `VectorStore` initialized from texts and embeddings.
905
875
 
906
876
  Args:
907
- texts: Texts to add to the vectorstore.
877
+ texts: Texts to add to the `VectorStore`.
908
878
  embedding: Embedding function to use.
909
879
  metadatas: Optional list of metadatas associated with the texts.
910
- Default is None.
911
880
  ids: Optional list of IDs associated with the texts.
912
- kwargs: Additional keyword arguments.
881
+ **kwargs: Additional keyword arguments.
913
882
 
914
883
  Returns:
915
- VectorStore: VectorStore initialized from texts and embeddings.
884
+ `VectorStore` initialized from texts and embeddings.
916
885
  """
917
886
  if ids is not None:
918
887
  kwargs["ids"] = ids
@@ -928,60 +897,57 @@ class VectorStore(ABC):
928
897
  return tags
929
898
 
930
899
  def as_retriever(self, **kwargs: Any) -> VectorStoreRetriever:
931
- """Return VectorStoreRetriever initialized from this VectorStore.
900
+ """Return `VectorStoreRetriever` initialized from this `VectorStore`.
932
901
 
933
902
  Args:
934
903
  **kwargs: Keyword arguments to pass to the search function.
935
904
  Can include:
936
- search_type (Optional[str]): Defines the type of search that
937
- the Retriever should perform.
938
- Can be "similarity" (default), "mmr", or
939
- "similarity_score_threshold".
940
- search_kwargs (Optional[Dict]): Keyword arguments to pass to the
941
- search function. Can include things like:
942
- k: Amount of documents to return (Default: 4)
943
- score_threshold: Minimum relevance threshold
944
- for similarity_score_threshold
945
- fetch_k: Amount of documents to pass to MMR algorithm
946
- (Default: 20)
947
- lambda_mult: Diversity of results returned by MMR;
948
- 1 for minimum diversity and 0 for maximum. (Default: 0.5)
949
- filter: Filter by document metadata
905
+
906
+ * `search_type`: Defines the type of search that the Retriever should
907
+ perform. Can be `'similarity'` (default), `'mmr'`, or
908
+ `'similarity_score_threshold'`.
909
+ * `search_kwargs`: Keyword arguments to pass to the search function. Can
910
+ include things like:
911
+
912
+ * `k`: Amount of documents to return (Default: `4`)
913
+ * `score_threshold`: Minimum relevance threshold
914
+ for `similarity_score_threshold`
915
+ * `fetch_k`: Amount of documents to pass to MMR algorithm
916
+ (Default: `20`)
917
+ * `lambda_mult`: Diversity of results returned by MMR;
918
+ `1` for minimum diversity and 0 for maximum. (Default: `0.5`)
919
+ * `filter`: Filter by document metadata
950
920
 
951
921
  Returns:
952
- VectorStoreRetriever: Retriever class for VectorStore.
922
+ Retriever class for `VectorStore`.
953
923
 
954
924
  Examples:
925
+ ```python
926
+ # Retrieve more documents with higher diversity
927
+ # Useful if your dataset has many similar documents
928
+ docsearch.as_retriever(
929
+ search_type="mmr", search_kwargs={"k": 6, "lambda_mult": 0.25}
930
+ )
955
931
 
956
- .. code-block:: python
957
-
958
- # Retrieve more documents with higher diversity
959
- # Useful if your dataset has many similar documents
960
- docsearch.as_retriever(
961
- search_type="mmr", search_kwargs={"k": 6, "lambda_mult": 0.25}
962
- )
963
-
964
- # Fetch more documents for the MMR algorithm to consider
965
- # But only return the top 5
966
- docsearch.as_retriever(
967
- search_type="mmr", search_kwargs={"k": 5, "fetch_k": 50}
968
- )
969
-
970
- # Only retrieve documents that have a relevance score
971
- # Above a certain threshold
972
- docsearch.as_retriever(
973
- search_type="similarity_score_threshold",
974
- search_kwargs={"score_threshold": 0.8},
975
- )
932
+ # Fetch more documents for the MMR algorithm to consider
933
+ # But only return the top 5
934
+ docsearch.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 50})
976
935
 
977
- # Only get the single most similar document from the dataset
978
- docsearch.as_retriever(search_kwargs={"k": 1})
936
+ # Only retrieve documents that have a relevance score
937
+ # Above a certain threshold
938
+ docsearch.as_retriever(
939
+ search_type="similarity_score_threshold",
940
+ search_kwargs={"score_threshold": 0.8},
941
+ )
979
942
 
980
- # Use a filter to only retrieve documents from a specific paper
981
- docsearch.as_retriever(
982
- search_kwargs={"filter": {"paper_title": "GPT-4 Technical Report"}}
983
- )
943
+ # Only get the single most similar document from the dataset
944
+ docsearch.as_retriever(search_kwargs={"k": 1})
984
945
 
946
+ # Use a filter to only retrieve documents from a specific paper
947
+ docsearch.as_retriever(
948
+ search_kwargs={"filter": {"paper_title": "GPT-4 Technical Report"}}
949
+ )
950
+ ```
985
951
  """
986
952
  tags = kwargs.pop("tags", None) or [*self._get_retriever_tags()]
987
953
  return VectorStoreRetriever(vectorstore=self, tags=tags, **kwargs)
@@ -993,7 +959,7 @@ class VectorStoreRetriever(BaseRetriever):
993
959
  vectorstore: VectorStore
994
960
  """VectorStore to use for retrieval."""
995
961
  search_type: str = "similarity"
996
- """Type of search to perform. Defaults to "similarity"."""
962
+ """Type of search to perform."""
997
963
  search_kwargs: dict = Field(default_factory=dict)
998
964
  """Keyword arguments to pass to the search function."""
999
965
  allowed_search_types: ClassVar[Collection[str]] = (
@@ -1015,11 +981,11 @@ class VectorStoreRetriever(BaseRetriever):
1015
981
  values: Values to validate.
1016
982
 
1017
983
  Returns:
1018
- Values: Validated values.
984
+ Validated values.
1019
985
 
1020
986
  Raises:
1021
- ValueError: If search_type is not one of the allowed search types.
1022
- ValueError: If score_threshold is not specified with a float value(0~1)
987
+ ValueError: If `search_type` is not one of the allowed search types.
988
+ ValueError: If `score_threshold` is not specified with a float value(`0~1`)
1023
989
  """
1024
990
  search_type = values.get("search_type", "similarity")
1025
991
  if search_type not in cls.allowed_search_types:
@@ -1107,10 +1073,10 @@ class VectorStoreRetriever(BaseRetriever):
1107
1073
  return docs
1108
1074
 
1109
1075
  def add_documents(self, documents: list[Document], **kwargs: Any) -> list[str]:
1110
- """Add documents to the vectorstore.
1076
+ """Add documents to the `VectorStore`.
1111
1077
 
1112
1078
  Args:
1113
- documents: Documents to add to the vectorstore.
1079
+ documents: Documents to add to the `VectorStore`.
1114
1080
  **kwargs: Other keyword arguments that subclasses might use.
1115
1081
 
1116
1082
  Returns:
@@ -1121,10 +1087,10 @@ class VectorStoreRetriever(BaseRetriever):
1121
1087
  async def aadd_documents(
1122
1088
  self, documents: list[Document], **kwargs: Any
1123
1089
  ) -> list[str]:
1124
- """Async add documents to the vectorstore.
1090
+ """Async add documents to the `VectorStore`.
1125
1091
 
1126
1092
  Args:
1127
- documents: Documents to add to the vectorstore.
1093
+ documents: Documents to add to the `VectorStore`.
1128
1094
  **kwargs: Other keyword arguments that subclasses might use.
1129
1095
 
1130
1096
  Returns: