langchain-core 0.3.79__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (165) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +3 -4
  3. langchain_core/_api/beta_decorator.py +23 -26
  4. langchain_core/_api/deprecation.py +52 -65
  5. langchain_core/_api/path.py +3 -6
  6. langchain_core/_import_utils.py +3 -4
  7. langchain_core/agents.py +19 -19
  8. langchain_core/caches.py +53 -63
  9. langchain_core/callbacks/__init__.py +1 -8
  10. langchain_core/callbacks/base.py +323 -334
  11. langchain_core/callbacks/file.py +44 -44
  12. langchain_core/callbacks/manager.py +441 -507
  13. langchain_core/callbacks/stdout.py +29 -30
  14. langchain_core/callbacks/streaming_stdout.py +32 -32
  15. langchain_core/callbacks/usage.py +60 -57
  16. langchain_core/chat_history.py +48 -63
  17. langchain_core/document_loaders/base.py +23 -23
  18. langchain_core/document_loaders/langsmith.py +37 -37
  19. langchain_core/documents/__init__.py +0 -1
  20. langchain_core/documents/base.py +62 -65
  21. langchain_core/documents/compressor.py +4 -4
  22. langchain_core/documents/transformers.py +28 -29
  23. langchain_core/embeddings/fake.py +50 -54
  24. langchain_core/example_selectors/length_based.py +1 -1
  25. langchain_core/example_selectors/semantic_similarity.py +21 -25
  26. langchain_core/exceptions.py +10 -11
  27. langchain_core/globals.py +3 -151
  28. langchain_core/indexing/api.py +61 -66
  29. langchain_core/indexing/base.py +58 -58
  30. langchain_core/indexing/in_memory.py +3 -3
  31. langchain_core/language_models/__init__.py +14 -27
  32. langchain_core/language_models/_utils.py +270 -84
  33. langchain_core/language_models/base.py +55 -162
  34. langchain_core/language_models/chat_models.py +442 -402
  35. langchain_core/language_models/fake.py +11 -11
  36. langchain_core/language_models/fake_chat_models.py +61 -39
  37. langchain_core/language_models/llms.py +123 -231
  38. langchain_core/load/dump.py +4 -5
  39. langchain_core/load/load.py +18 -28
  40. langchain_core/load/mapping.py +2 -4
  41. langchain_core/load/serializable.py +39 -40
  42. langchain_core/messages/__init__.py +61 -22
  43. langchain_core/messages/ai.py +368 -163
  44. langchain_core/messages/base.py +214 -43
  45. langchain_core/messages/block_translators/__init__.py +111 -0
  46. langchain_core/messages/block_translators/anthropic.py +470 -0
  47. langchain_core/messages/block_translators/bedrock.py +94 -0
  48. langchain_core/messages/block_translators/bedrock_converse.py +297 -0
  49. langchain_core/messages/block_translators/google_genai.py +530 -0
  50. langchain_core/messages/block_translators/google_vertexai.py +21 -0
  51. langchain_core/messages/block_translators/groq.py +143 -0
  52. langchain_core/messages/block_translators/langchain_v0.py +301 -0
  53. langchain_core/messages/block_translators/openai.py +1010 -0
  54. langchain_core/messages/chat.py +2 -6
  55. langchain_core/messages/content.py +1423 -0
  56. langchain_core/messages/function.py +6 -10
  57. langchain_core/messages/human.py +41 -38
  58. langchain_core/messages/modifier.py +2 -2
  59. langchain_core/messages/system.py +38 -28
  60. langchain_core/messages/tool.py +96 -103
  61. langchain_core/messages/utils.py +478 -504
  62. langchain_core/output_parsers/__init__.py +1 -14
  63. langchain_core/output_parsers/base.py +58 -61
  64. langchain_core/output_parsers/json.py +7 -8
  65. langchain_core/output_parsers/list.py +5 -7
  66. langchain_core/output_parsers/openai_functions.py +49 -47
  67. langchain_core/output_parsers/openai_tools.py +14 -19
  68. langchain_core/output_parsers/pydantic.py +12 -13
  69. langchain_core/output_parsers/string.py +2 -2
  70. langchain_core/output_parsers/transform.py +15 -17
  71. langchain_core/output_parsers/xml.py +8 -10
  72. langchain_core/outputs/__init__.py +1 -1
  73. langchain_core/outputs/chat_generation.py +18 -18
  74. langchain_core/outputs/chat_result.py +1 -3
  75. langchain_core/outputs/generation.py +8 -8
  76. langchain_core/outputs/llm_result.py +10 -10
  77. langchain_core/prompt_values.py +12 -12
  78. langchain_core/prompts/__init__.py +3 -27
  79. langchain_core/prompts/base.py +45 -55
  80. langchain_core/prompts/chat.py +254 -313
  81. langchain_core/prompts/dict.py +5 -5
  82. langchain_core/prompts/few_shot.py +81 -88
  83. langchain_core/prompts/few_shot_with_templates.py +11 -13
  84. langchain_core/prompts/image.py +12 -14
  85. langchain_core/prompts/loading.py +6 -8
  86. langchain_core/prompts/message.py +3 -3
  87. langchain_core/prompts/prompt.py +24 -39
  88. langchain_core/prompts/string.py +4 -4
  89. langchain_core/prompts/structured.py +42 -50
  90. langchain_core/rate_limiters.py +51 -60
  91. langchain_core/retrievers.py +49 -190
  92. langchain_core/runnables/base.py +1484 -1709
  93. langchain_core/runnables/branch.py +45 -61
  94. langchain_core/runnables/config.py +80 -88
  95. langchain_core/runnables/configurable.py +117 -134
  96. langchain_core/runnables/fallbacks.py +83 -79
  97. langchain_core/runnables/graph.py +85 -95
  98. langchain_core/runnables/graph_ascii.py +27 -28
  99. langchain_core/runnables/graph_mermaid.py +38 -50
  100. langchain_core/runnables/graph_png.py +15 -16
  101. langchain_core/runnables/history.py +135 -148
  102. langchain_core/runnables/passthrough.py +124 -150
  103. langchain_core/runnables/retry.py +46 -51
  104. langchain_core/runnables/router.py +25 -30
  105. langchain_core/runnables/schema.py +79 -74
  106. langchain_core/runnables/utils.py +62 -68
  107. langchain_core/stores.py +81 -115
  108. langchain_core/structured_query.py +8 -8
  109. langchain_core/sys_info.py +27 -29
  110. langchain_core/tools/__init__.py +1 -14
  111. langchain_core/tools/base.py +179 -187
  112. langchain_core/tools/convert.py +131 -139
  113. langchain_core/tools/render.py +10 -10
  114. langchain_core/tools/retriever.py +11 -11
  115. langchain_core/tools/simple.py +19 -24
  116. langchain_core/tools/structured.py +30 -39
  117. langchain_core/tracers/__init__.py +1 -9
  118. langchain_core/tracers/base.py +97 -99
  119. langchain_core/tracers/context.py +29 -52
  120. langchain_core/tracers/core.py +50 -60
  121. langchain_core/tracers/evaluation.py +11 -11
  122. langchain_core/tracers/event_stream.py +115 -70
  123. langchain_core/tracers/langchain.py +21 -21
  124. langchain_core/tracers/log_stream.py +43 -43
  125. langchain_core/tracers/memory_stream.py +3 -3
  126. langchain_core/tracers/root_listeners.py +16 -16
  127. langchain_core/tracers/run_collector.py +2 -4
  128. langchain_core/tracers/schemas.py +0 -129
  129. langchain_core/tracers/stdout.py +3 -3
  130. langchain_core/utils/__init__.py +1 -4
  131. langchain_core/utils/_merge.py +46 -8
  132. langchain_core/utils/aiter.py +57 -61
  133. langchain_core/utils/env.py +9 -9
  134. langchain_core/utils/function_calling.py +89 -191
  135. langchain_core/utils/html.py +7 -8
  136. langchain_core/utils/input.py +6 -6
  137. langchain_core/utils/interactive_env.py +1 -1
  138. langchain_core/utils/iter.py +37 -42
  139. langchain_core/utils/json.py +4 -3
  140. langchain_core/utils/json_schema.py +8 -8
  141. langchain_core/utils/mustache.py +9 -11
  142. langchain_core/utils/pydantic.py +33 -35
  143. langchain_core/utils/strings.py +5 -5
  144. langchain_core/utils/usage.py +1 -1
  145. langchain_core/utils/utils.py +80 -54
  146. langchain_core/vectorstores/base.py +129 -164
  147. langchain_core/vectorstores/in_memory.py +99 -174
  148. langchain_core/vectorstores/utils.py +5 -5
  149. langchain_core/version.py +1 -1
  150. {langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/METADATA +28 -27
  151. langchain_core-1.0.0.dist-info/RECORD +172 -0
  152. {langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
  153. langchain_core/beta/__init__.py +0 -1
  154. langchain_core/beta/runnables/__init__.py +0 -1
  155. langchain_core/beta/runnables/context.py +0 -447
  156. langchain_core/memory.py +0 -120
  157. langchain_core/messages/content_blocks.py +0 -176
  158. langchain_core/prompts/pipeline.py +0 -138
  159. langchain_core/pydantic_v1/__init__.py +0 -30
  160. langchain_core/pydantic_v1/dataclasses.py +0 -23
  161. langchain_core/pydantic_v1/main.py +0 -23
  162. langchain_core/tracers/langchain_v1.py +0 -31
  163. langchain_core/utils/loading.py +0 -35
  164. langchain_core-0.3.79.dist-info/RECORD +0 -174
  165. langchain_core-0.3.79.dist-info/entry_points.txt +0 -4
@@ -3,21 +3,7 @@
3
3
  One of the most common ways to store and search over unstructured data is to
4
4
  embed it and store the resulting embedding vectors, and then query the store
5
5
  and retrieve the data that are 'most similar' to the embedded query.
6
-
7
- **Class hierarchy:**
8
-
9
- .. code-block::
10
-
11
- VectorStore --> <name> # Examples: Annoy, FAISS, Milvus
12
-
13
- BaseRetriever --> VectorStoreRetriever --> <name>Retriever # Example: VespaRetriever
14
-
15
- **Main helpers:**
16
-
17
- .. code-block::
18
-
19
- Embeddings, Document
20
- """ # noqa: E501
6
+ """
21
7
 
22
8
  from __future__ import annotations
23
9
 
@@ -25,13 +11,12 @@ import logging
25
11
  import math
26
12
  import warnings
27
13
  from abc import ABC, abstractmethod
14
+ from collections.abc import Callable
28
15
  from itertools import cycle
29
16
  from typing import (
30
17
  TYPE_CHECKING,
31
18
  Any,
32
- Callable,
33
19
  ClassVar,
34
- Optional,
35
20
  TypeVar,
36
21
  )
37
22
 
@@ -62,15 +47,15 @@ class VectorStore(ABC):
62
47
  def add_texts(
63
48
  self,
64
49
  texts: Iterable[str],
65
- metadatas: Optional[list[dict]] = None,
50
+ metadatas: list[dict] | None = None,
66
51
  *,
67
- ids: Optional[list[str]] = None,
52
+ ids: list[str] | None = None,
68
53
  **kwargs: Any,
69
54
  ) -> list[str]:
70
- """Run more texts through the embeddings and add to the vectorstore.
55
+ """Run more texts through the embeddings and add to the `VectorStore`.
71
56
 
72
57
  Args:
73
- texts: Iterable of strings to add to the vectorstore.
58
+ texts: Iterable of strings to add to the `VectorStore`.
74
59
  metadatas: Optional list of metadatas associated with the texts.
75
60
  ids: Optional list of IDs associated with the texts.
76
61
  **kwargs: vectorstore specific parameters.
@@ -78,7 +63,7 @@ class VectorStore(ABC):
78
63
  associated with the texts.
79
64
 
80
65
  Returns:
81
- List of ids from adding the texts into the vectorstore.
66
+ List of ids from adding the texts into the `VectorStore`.
82
67
 
83
68
  Raises:
84
69
  ValueError: If the number of metadatas does not match the number of texts.
@@ -98,10 +83,10 @@ class VectorStore(ABC):
98
83
  )
99
84
  raise ValueError(msg)
100
85
  metadatas_ = iter(metadatas) if metadatas else cycle([{}])
101
- ids_: Iterator[Optional[str]] = iter(ids) if ids else cycle([None])
86
+ ids_: Iterator[str | None] = iter(ids) if ids else cycle([None])
102
87
  docs = [
103
88
  Document(id=id_, page_content=text, metadata=metadata_)
104
- for text, metadata_, id_ in zip(texts, metadatas_, ids_)
89
+ for text, metadata_, id_ in zip(texts, metadatas_, ids_, strict=False)
105
90
  ]
106
91
  if ids is not None:
107
92
  # For backward compatibility
@@ -112,7 +97,7 @@ class VectorStore(ABC):
112
97
  raise NotImplementedError(msg)
113
98
 
114
99
  @property
115
- def embeddings(self) -> Optional[Embeddings]:
100
+ def embeddings(self) -> Embeddings | None:
116
101
  """Access the query embedding object if available."""
117
102
  logger.debug(
118
103
  "The embeddings property has not been implemented for %s",
@@ -120,16 +105,15 @@ class VectorStore(ABC):
120
105
  )
121
106
  return None
122
107
 
123
- def delete(self, ids: Optional[list[str]] = None, **kwargs: Any) -> Optional[bool]:
108
+ def delete(self, ids: list[str] | None = None, **kwargs: Any) -> bool | None:
124
109
  """Delete by vector ID or other criteria.
125
110
 
126
111
  Args:
127
- ids: List of ids to delete. If None, delete all. Default is None.
112
+ ids: List of ids to delete. If `None`, delete all.
128
113
  **kwargs: Other keyword arguments that subclasses might use.
129
114
 
130
115
  Returns:
131
- Optional[bool]: True if deletion is successful,
132
- False otherwise, None if not implemented.
116
+ True if deletion is successful, False otherwise, None if not implemented.
133
117
  """
134
118
  msg = "delete method must be implemented by subclass."
135
119
  raise NotImplementedError(msg)
@@ -156,7 +140,7 @@ class VectorStore(ABC):
156
140
  Returns:
157
141
  List of Documents.
158
142
 
159
- .. versionadded:: 0.2.11
143
+ !!! version-added "Added in version 0.2.11"
160
144
  """
161
145
  msg = f"{self.__class__.__name__} does not yet support get_by_ids."
162
146
  raise NotImplementedError(msg)
@@ -184,44 +168,40 @@ class VectorStore(ABC):
184
168
  Returns:
185
169
  List of Documents.
186
170
 
187
- .. versionadded:: 0.2.11
171
+ !!! version-added "Added in version 0.2.11"
188
172
  """
189
173
  return await run_in_executor(None, self.get_by_ids, ids)
190
174
 
191
- async def adelete(
192
- self, ids: Optional[list[str]] = None, **kwargs: Any
193
- ) -> Optional[bool]:
175
+ async def adelete(self, ids: list[str] | None = None, **kwargs: Any) -> bool | None:
194
176
  """Async delete by vector ID or other criteria.
195
177
 
196
178
  Args:
197
- ids: List of ids to delete. If None, delete all. Default is None.
179
+ ids: List of ids to delete. If `None`, delete all.
198
180
  **kwargs: Other keyword arguments that subclasses might use.
199
181
 
200
182
  Returns:
201
- Optional[bool]: True if deletion is successful,
202
- False otherwise, None if not implemented.
183
+ True if deletion is successful, False otherwise, None if not implemented.
203
184
  """
204
185
  return await run_in_executor(None, self.delete, ids, **kwargs)
205
186
 
206
187
  async def aadd_texts(
207
188
  self,
208
189
  texts: Iterable[str],
209
- metadatas: Optional[list[dict]] = None,
190
+ metadatas: list[dict] | None = None,
210
191
  *,
211
- ids: Optional[list[str]] = None,
192
+ ids: list[str] | None = None,
212
193
  **kwargs: Any,
213
194
  ) -> list[str]:
214
- """Async run more texts through the embeddings and add to the vectorstore.
195
+ """Async run more texts through the embeddings and add to the `VectorStore`.
215
196
 
216
197
  Args:
217
- texts: Iterable of strings to add to the vectorstore.
198
+ texts: Iterable of strings to add to the `VectorStore`.
218
199
  metadatas: Optional list of metadatas associated with the texts.
219
- Default is None.
220
200
  ids: Optional list
221
201
  **kwargs: vectorstore specific parameters.
222
202
 
223
203
  Returns:
224
- List of ids from adding the texts into the vectorstore.
204
+ List of ids from adding the texts into the `VectorStore`.
225
205
 
226
206
  Raises:
227
207
  ValueError: If the number of metadatas does not match the number of texts.
@@ -244,11 +224,11 @@ class VectorStore(ABC):
244
224
  )
245
225
  raise ValueError(msg)
246
226
  metadatas_ = iter(metadatas) if metadatas else cycle([{}])
247
- ids_: Iterator[Optional[str]] = iter(ids) if ids else cycle([None])
227
+ ids_: Iterator[str | None] = iter(ids) if ids else cycle([None])
248
228
 
249
229
  docs = [
250
230
  Document(id=id_, page_content=text, metadata=metadata_)
251
- for text, metadata_, id_ in zip(texts, metadatas_, ids_)
231
+ for text, metadata_, id_ in zip(texts, metadatas_, ids_, strict=False)
252
232
  ]
253
233
  return await self.aadd_documents(docs, **kwargs)
254
234
  return await run_in_executor(None, self.add_texts, texts, metadatas, **kwargs)
@@ -257,8 +237,8 @@ class VectorStore(ABC):
257
237
  """Add or update documents in the vectorstore.
258
238
 
259
239
  Args:
260
- documents: Documents to add to the vectorstore.
261
- kwargs: Additional keyword arguments.
240
+ documents: Documents to add to the `VectorStore`.
241
+ **kwargs: Additional keyword arguments.
262
242
  if kwargs contains ids and documents contain ids,
263
243
  the ids in the kwargs will receive precedence.
264
244
 
@@ -286,11 +266,11 @@ class VectorStore(ABC):
286
266
  async def aadd_documents(
287
267
  self, documents: list[Document], **kwargs: Any
288
268
  ) -> list[str]:
289
- """Async run more documents through the embeddings and add to the vectorstore.
269
+ """Async run more documents through the embeddings and add to the `VectorStore`.
290
270
 
291
271
  Args:
292
- documents: Documents to add to the vectorstore.
293
- kwargs: Additional keyword arguments.
272
+ documents: Documents to add to the `VectorStore`.
273
+ **kwargs: Additional keyword arguments.
294
274
 
295
275
  Returns:
296
276
  List of IDs of the added texts.
@@ -321,7 +301,7 @@ class VectorStore(ABC):
321
301
  **kwargs: Arguments to pass to the search method.
322
302
 
323
303
  Returns:
324
- List of Documents most similar to the query.
304
+ List of `Document` objects most similar to the query.
325
305
 
326
306
  Raises:
327
307
  ValueError: If search_type is not one of "similarity",
@@ -355,7 +335,7 @@ class VectorStore(ABC):
355
335
  **kwargs: Arguments to pass to the search method.
356
336
 
357
337
  Returns:
358
- List of Documents most similar to the query.
338
+ List of `Document` objects most similar to the query.
359
339
 
360
340
  Raises:
361
341
  ValueError: If search_type is not one of "similarity",
@@ -384,11 +364,11 @@ class VectorStore(ABC):
384
364
 
385
365
  Args:
386
366
  query: Input text.
387
- k: Number of Documents to return. Defaults to 4.
367
+ k: Number of Documents to return.
388
368
  **kwargs: Arguments to pass to the search method.
389
369
 
390
370
  Returns:
391
- List of Documents most similar to the query.
371
+ List of `Document` objects most similar to the query.
392
372
  """
393
373
 
394
374
  @staticmethod
@@ -443,7 +423,7 @@ class VectorStore(ABC):
443
423
  **kwargs: Arguments to pass to the search method.
444
424
 
445
425
  Returns:
446
- List of Tuples of (doc, similarity_score).
426
+ List of Tuples of `(doc, similarity_score)`.
447
427
  """
448
428
  raise NotImplementedError
449
429
 
@@ -457,7 +437,7 @@ class VectorStore(ABC):
457
437
  **kwargs: Arguments to pass to the search method.
458
438
 
459
439
  Returns:
460
- List of Tuples of (doc, similarity_score).
440
+ List of Tuples of `(doc, similarity_score)`.
461
441
  """
462
442
  # This is a temporary workaround to make the similarity search
463
443
  # asynchronous. The proper solution is to make the similarity search
@@ -475,19 +455,19 @@ class VectorStore(ABC):
475
455
  """Default similarity search with relevance scores.
476
456
 
477
457
  Modify if necessary in subclass.
478
- Return docs and relevance scores in the range [0, 1].
458
+ Return docs and relevance scores in the range `[0, 1]`.
479
459
 
480
- 0 is dissimilar, 1 is most similar.
460
+ `0` is dissimilar, `1` is most similar.
481
461
 
482
462
  Args:
483
463
  query: Input text.
484
- k: Number of Documents to return. Defaults to 4.
485
- **kwargs: kwargs to be passed to similarity search. Should include:
486
- score_threshold: Optional, a floating point value between 0 to 1 to
487
- filter the resulting set of retrieved docs
464
+ k: Number of Documents to return.
465
+ **kwargs: kwargs to be passed to similarity search. Should include
466
+ `score_threshold`, An optional floating point value between `0` to `1`
467
+ to filter the resulting set of retrieved docs
488
468
 
489
469
  Returns:
490
- List of Tuples of (doc, similarity_score)
470
+ List of Tuples of `(doc, similarity_score)`
491
471
  """
492
472
  relevance_score_fn = self._select_relevance_score_fn()
493
473
  docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
@@ -502,19 +482,19 @@ class VectorStore(ABC):
502
482
  """Default similarity search with relevance scores.
503
483
 
504
484
  Modify if necessary in subclass.
505
- Return docs and relevance scores in the range [0, 1].
485
+ Return docs and relevance scores in the range `[0, 1]`.
506
486
 
507
- 0 is dissimilar, 1 is most similar.
487
+ `0` is dissimilar, `1` is most similar.
508
488
 
509
489
  Args:
510
490
  query: Input text.
511
- k: Number of Documents to return. Defaults to 4.
512
- **kwargs: kwargs to be passed to similarity search. Should include:
513
- score_threshold: Optional, a floating point value between 0 to 1 to
514
- filter the resulting set of retrieved docs
491
+ k: Number of Documents to return.
492
+ **kwargs: kwargs to be passed to similarity search. Should include
493
+ `score_threshold`, An optional floating point value between `0` to `1`
494
+ to filter the resulting set of retrieved docs
515
495
 
516
496
  Returns:
517
- List of Tuples of (doc, similarity_score)
497
+ List of Tuples of `(doc, similarity_score)`
518
498
  """
519
499
  relevance_score_fn = self._select_relevance_score_fn()
520
500
  docs_and_scores = await self.asimilarity_search_with_score(query, k, **kwargs)
@@ -526,19 +506,19 @@ class VectorStore(ABC):
526
506
  k: int = 4,
527
507
  **kwargs: Any,
528
508
  ) -> list[tuple[Document, float]]:
529
- """Return docs and relevance scores in the range [0, 1].
509
+ """Return docs and relevance scores in the range `[0, 1]`.
530
510
 
531
- 0 is dissimilar, 1 is most similar.
511
+ `0` is dissimilar, `1` is most similar.
532
512
 
533
513
  Args:
534
514
  query: Input text.
535
- k: Number of Documents to return. Defaults to 4.
536
- **kwargs: kwargs to be passed to similarity search. Should include:
537
- score_threshold: Optional, a floating point value between 0 to 1 to
538
- filter the resulting set of retrieved docs.
515
+ k: Number of Documents to return.
516
+ **kwargs: kwargs to be passed to similarity search. Should include
517
+ `score_threshold`, An optional floating point value between `0` to `1`
518
+ to filter the resulting set of retrieved docs
539
519
 
540
520
  Returns:
541
- List of Tuples of (doc, similarity_score).
521
+ List of Tuples of `(doc, similarity_score)`.
542
522
  """
543
523
  score_threshold = kwargs.pop("score_threshold", None)
544
524
 
@@ -575,19 +555,19 @@ class VectorStore(ABC):
575
555
  k: int = 4,
576
556
  **kwargs: Any,
577
557
  ) -> list[tuple[Document, float]]:
578
- """Async return docs and relevance scores in the range [0, 1].
558
+ """Async return docs and relevance scores in the range `[0, 1]`.
579
559
 
580
- 0 is dissimilar, 1 is most similar.
560
+ `0` is dissimilar, `1` is most similar.
581
561
 
582
562
  Args:
583
563
  query: Input text.
584
- k: Number of Documents to return. Defaults to 4.
585
- **kwargs: kwargs to be passed to similarity search. Should include:
586
- score_threshold: Optional, a floating point value between 0 to 1 to
587
- filter the resulting set of retrieved docs
564
+ k: Number of Documents to return.
565
+ **kwargs: kwargs to be passed to similarity search. Should include
566
+ `score_threshold`, An optional floating point value between `0` to `1`
567
+ to filter the resulting set of retrieved docs
588
568
 
589
569
  Returns:
590
- List of Tuples of (doc, similarity_score)
570
+ List of Tuples of `(doc, similarity_score)`
591
571
  """
592
572
  score_threshold = kwargs.pop("score_threshold", None)
593
573
 
@@ -625,11 +605,11 @@ class VectorStore(ABC):
625
605
 
626
606
  Args:
627
607
  query: Input text.
628
- k: Number of Documents to return. Defaults to 4.
608
+ k: Number of Documents to return.
629
609
  **kwargs: Arguments to pass to the search method.
630
610
 
631
611
  Returns:
632
- List of Documents most similar to the query.
612
+ List of `Document` objects most similar to the query.
633
613
  """
634
614
  # This is a temporary workaround to make the similarity search
635
615
  # asynchronous. The proper solution is to make the similarity search
@@ -643,11 +623,11 @@ class VectorStore(ABC):
643
623
 
644
624
  Args:
645
625
  embedding: Embedding to look up documents similar to.
646
- k: Number of Documents to return. Defaults to 4.
626
+ k: Number of Documents to return.
647
627
  **kwargs: Arguments to pass to the search method.
648
628
 
649
629
  Returns:
650
- List of Documents most similar to the query vector.
630
+ List of `Document` objects most similar to the query vector.
651
631
  """
652
632
  raise NotImplementedError
653
633
 
@@ -658,11 +638,11 @@ class VectorStore(ABC):
658
638
 
659
639
  Args:
660
640
  embedding: Embedding to look up documents similar to.
661
- k: Number of Documents to return. Defaults to 4.
641
+ k: Number of Documents to return.
662
642
  **kwargs: Arguments to pass to the search method.
663
643
 
664
644
  Returns:
665
- List of Documents most similar to the query vector.
645
+ List of `Document` objects most similar to the query vector.
666
646
  """
667
647
  # This is a temporary workaround to make the similarity search
668
648
  # asynchronous. The proper solution is to make the similarity search
@@ -686,17 +666,15 @@ class VectorStore(ABC):
686
666
 
687
667
  Args:
688
668
  query: Text to look up documents similar to.
689
- k: Number of Documents to return. Defaults to 4.
669
+ k: Number of Documents to return.
690
670
  fetch_k: Number of Documents to fetch to pass to MMR algorithm.
691
- Default is 20.
692
671
  lambda_mult: Number between 0 and 1 that determines the degree
693
672
  of diversity among the results with 0 corresponding
694
673
  to maximum diversity and 1 to minimum diversity.
695
- Defaults to 0.5.
696
674
  **kwargs: Arguments to pass to the search method.
697
675
 
698
676
  Returns:
699
- List of Documents selected by maximal marginal relevance.
677
+ List of `Document` objects selected by maximal marginal relevance.
700
678
  """
701
679
  raise NotImplementedError
702
680
 
@@ -715,17 +693,15 @@ class VectorStore(ABC):
715
693
 
716
694
  Args:
717
695
  query: Text to look up documents similar to.
718
- k: Number of Documents to return. Defaults to 4.
696
+ k: Number of Documents to return.
719
697
  fetch_k: Number of Documents to fetch to pass to MMR algorithm.
720
- Default is 20.
721
698
  lambda_mult: Number between 0 and 1 that determines the degree
722
699
  of diversity among the results with 0 corresponding
723
700
  to maximum diversity and 1 to minimum diversity.
724
- Defaults to 0.5.
725
701
  **kwargs: Arguments to pass to the search method.
726
702
 
727
703
  Returns:
728
- List of Documents selected by maximal marginal relevance.
704
+ List of `Document` objects selected by maximal marginal relevance.
729
705
  """
730
706
  # This is a temporary workaround to make the similarity search
731
707
  # asynchronous. The proper solution is to make the similarity search
@@ -755,17 +731,15 @@ class VectorStore(ABC):
755
731
 
756
732
  Args:
757
733
  embedding: Embedding to look up documents similar to.
758
- k: Number of Documents to return. Defaults to 4.
734
+ k: Number of Documents to return.
759
735
  fetch_k: Number of Documents to fetch to pass to MMR algorithm.
760
- Default is 20.
761
736
  lambda_mult: Number between 0 and 1 that determines the degree
762
737
  of diversity among the results with 0 corresponding
763
738
  to maximum diversity and 1 to minimum diversity.
764
- Defaults to 0.5.
765
739
  **kwargs: Arguments to pass to the search method.
766
740
 
767
741
  Returns:
768
- List of Documents selected by maximal marginal relevance.
742
+ List of `Document` objects selected by maximal marginal relevance.
769
743
  """
770
744
  raise NotImplementedError
771
745
 
@@ -784,17 +758,15 @@ class VectorStore(ABC):
784
758
 
785
759
  Args:
786
760
  embedding: Embedding to look up documents similar to.
787
- k: Number of Documents to return. Defaults to 4.
761
+ k: Number of Documents to return.
788
762
  fetch_k: Number of Documents to fetch to pass to MMR algorithm.
789
- Default is 20.
790
763
  lambda_mult: Number between 0 and 1 that determines the degree
791
764
  of diversity among the results with 0 corresponding
792
765
  to maximum diversity and 1 to minimum diversity.
793
- Defaults to 0.5.
794
766
  **kwargs: Arguments to pass to the search method.
795
767
 
796
768
  Returns:
797
- List of Documents selected by maximal marginal relevance.
769
+ List of `Document` objects selected by maximal marginal relevance.
798
770
  """
799
771
  return await run_in_executor(
800
772
  None,
@@ -813,15 +785,15 @@ class VectorStore(ABC):
813
785
  embedding: Embeddings,
814
786
  **kwargs: Any,
815
787
  ) -> Self:
816
- """Return VectorStore initialized from documents and embeddings.
788
+ """Return `VectorStore` initialized from documents and embeddings.
817
789
 
818
790
  Args:
819
- documents: List of Documents to add to the vectorstore.
791
+ documents: List of `Document` objects to add to the `VectorStore`.
820
792
  embedding: Embedding function to use.
821
- kwargs: Additional keyword arguments.
793
+ **kwargs: Additional keyword arguments.
822
794
 
823
795
  Returns:
824
- VectorStore: VectorStore initialized from documents and embeddings.
796
+ `VectorStore` initialized from documents and embeddings.
825
797
  """
826
798
  texts = [d.page_content for d in documents]
827
799
  metadatas = [d.metadata for d in documents]
@@ -843,15 +815,15 @@ class VectorStore(ABC):
843
815
  embedding: Embeddings,
844
816
  **kwargs: Any,
845
817
  ) -> Self:
846
- """Async return VectorStore initialized from documents and embeddings.
818
+ """Async return `VectorStore` initialized from documents and embeddings.
847
819
 
848
820
  Args:
849
- documents: List of Documents to add to the vectorstore.
821
+ documents: List of `Document` objects to add to the `VectorStore`.
850
822
  embedding: Embedding function to use.
851
- kwargs: Additional keyword arguments.
823
+ **kwargs: Additional keyword arguments.
852
824
 
853
825
  Returns:
854
- VectorStore: VectorStore initialized from documents and embeddings.
826
+ `VectorStore` initialized from documents and embeddings.
855
827
  """
856
828
  texts = [d.page_content for d in documents]
857
829
  metadatas = [d.metadata for d in documents]
@@ -872,23 +844,22 @@ class VectorStore(ABC):
872
844
  cls: type[VST],
873
845
  texts: list[str],
874
846
  embedding: Embeddings,
875
- metadatas: Optional[list[dict]] = None,
847
+ metadatas: list[dict] | None = None,
876
848
  *,
877
- ids: Optional[list[str]] = None,
849
+ ids: list[str] | None = None,
878
850
  **kwargs: Any,
879
851
  ) -> VST:
880
852
  """Return VectorStore initialized from texts and embeddings.
881
853
 
882
854
  Args:
883
- texts: Texts to add to the vectorstore.
855
+ texts: Texts to add to the `VectorStore`.
884
856
  embedding: Embedding function to use.
885
857
  metadatas: Optional list of metadatas associated with the texts.
886
- Default is None.
887
858
  ids: Optional list of IDs associated with the texts.
888
- kwargs: Additional keyword arguments.
859
+ **kwargs: Additional keyword arguments.
889
860
 
890
861
  Returns:
891
- VectorStore: VectorStore initialized from texts and embeddings.
862
+ VectorStore initialized from texts and embeddings.
892
863
  """
893
864
 
894
865
  @classmethod
@@ -896,23 +867,22 @@ class VectorStore(ABC):
896
867
  cls,
897
868
  texts: list[str],
898
869
  embedding: Embeddings,
899
- metadatas: Optional[list[dict]] = None,
870
+ metadatas: list[dict] | None = None,
900
871
  *,
901
- ids: Optional[list[str]] = None,
872
+ ids: list[str] | None = None,
902
873
  **kwargs: Any,
903
874
  ) -> Self:
904
875
  """Async return VectorStore initialized from texts and embeddings.
905
876
 
906
877
  Args:
907
- texts: Texts to add to the vectorstore.
878
+ texts: Texts to add to the `VectorStore`.
908
879
  embedding: Embedding function to use.
909
880
  metadatas: Optional list of metadatas associated with the texts.
910
- Default is None.
911
881
  ids: Optional list of IDs associated with the texts.
912
- kwargs: Additional keyword arguments.
882
+ **kwargs: Additional keyword arguments.
913
883
 
914
884
  Returns:
915
- VectorStore: VectorStore initialized from texts and embeddings.
885
+ VectorStore initialized from texts and embeddings.
916
886
  """
917
887
  if ids is not None:
918
888
  kwargs["ids"] = ids
@@ -928,17 +898,16 @@ class VectorStore(ABC):
928
898
  return tags
929
899
 
930
900
  def as_retriever(self, **kwargs: Any) -> VectorStoreRetriever:
931
- """Return VectorStoreRetriever initialized from this VectorStore.
901
+ """Return `VectorStoreRetriever` initialized from this `VectorStore`.
932
902
 
933
903
  Args:
934
904
  **kwargs: Keyword arguments to pass to the search function.
935
905
  Can include:
936
- search_type (Optional[str]): Defines the type of search that
937
- the Retriever should perform.
938
- Can be "similarity" (default), "mmr", or
906
+ search_type: Defines the type of search that the Retriever should
907
+ perform. Can be "similarity" (default), "mmr", or
939
908
  "similarity_score_threshold".
940
- search_kwargs (Optional[Dict]): Keyword arguments to pass to the
941
- search function. Can include things like:
909
+ search_kwargs: Keyword arguments to pass to the search function. Can
910
+ include things like:
942
911
  k: Amount of documents to return (Default: 4)
943
912
  score_threshold: Minimum relevance threshold
944
913
  for similarity_score_threshold
@@ -949,39 +918,35 @@ class VectorStore(ABC):
949
918
  filter: Filter by document metadata
950
919
 
951
920
  Returns:
952
- VectorStoreRetriever: Retriever class for VectorStore.
921
+ Retriever class for `VectorStore`.
953
922
 
954
923
  Examples:
924
+ ```python
925
+ # Retrieve more documents with higher diversity
926
+ # Useful if your dataset has many similar documents
927
+ docsearch.as_retriever(
928
+ search_type="mmr", search_kwargs={"k": 6, "lambda_mult": 0.25}
929
+ )
955
930
 
956
- .. code-block:: python
957
-
958
- # Retrieve more documents with higher diversity
959
- # Useful if your dataset has many similar documents
960
- docsearch.as_retriever(
961
- search_type="mmr", search_kwargs={"k": 6, "lambda_mult": 0.25}
962
- )
963
-
964
- # Fetch more documents for the MMR algorithm to consider
965
- # But only return the top 5
966
- docsearch.as_retriever(
967
- search_type="mmr", search_kwargs={"k": 5, "fetch_k": 50}
968
- )
969
-
970
- # Only retrieve documents that have a relevance score
971
- # Above a certain threshold
972
- docsearch.as_retriever(
973
- search_type="similarity_score_threshold",
974
- search_kwargs={"score_threshold": 0.8},
975
- )
931
+ # Fetch more documents for the MMR algorithm to consider
932
+ # But only return the top 5
933
+ docsearch.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 50})
976
934
 
977
- # Only get the single most similar document from the dataset
978
- docsearch.as_retriever(search_kwargs={"k": 1})
935
+ # Only retrieve documents that have a relevance score
936
+ # Above a certain threshold
937
+ docsearch.as_retriever(
938
+ search_type="similarity_score_threshold",
939
+ search_kwargs={"score_threshold": 0.8},
940
+ )
979
941
 
980
- # Use a filter to only retrieve documents from a specific paper
981
- docsearch.as_retriever(
982
- search_kwargs={"filter": {"paper_title": "GPT-4 Technical Report"}}
983
- )
942
+ # Only get the single most similar document from the dataset
943
+ docsearch.as_retriever(search_kwargs={"k": 1})
984
944
 
945
+ # Use a filter to only retrieve documents from a specific paper
946
+ docsearch.as_retriever(
947
+ search_kwargs={"filter": {"paper_title": "GPT-4 Technical Report"}}
948
+ )
949
+ ```
985
950
  """
986
951
  tags = kwargs.pop("tags", None) or [*self._get_retriever_tags()]
987
952
  return VectorStoreRetriever(vectorstore=self, tags=tags, **kwargs)
@@ -1015,7 +980,7 @@ class VectorStoreRetriever(BaseRetriever):
1015
980
  values: Values to validate.
1016
981
 
1017
982
  Returns:
1018
- Values: Validated values.
983
+ Validated values.
1019
984
 
1020
985
  Raises:
1021
986
  ValueError: If search_type is not one of the allowed search types.
@@ -1107,10 +1072,10 @@ class VectorStoreRetriever(BaseRetriever):
1107
1072
  return docs
1108
1073
 
1109
1074
  def add_documents(self, documents: list[Document], **kwargs: Any) -> list[str]:
1110
- """Add documents to the vectorstore.
1075
+ """Add documents to the `VectorStore`.
1111
1076
 
1112
1077
  Args:
1113
- documents: Documents to add to the vectorstore.
1078
+ documents: Documents to add to the `VectorStore`.
1114
1079
  **kwargs: Other keyword arguments that subclasses might use.
1115
1080
 
1116
1081
  Returns:
@@ -1121,10 +1086,10 @@ class VectorStoreRetriever(BaseRetriever):
1121
1086
  async def aadd_documents(
1122
1087
  self, documents: list[Document], **kwargs: Any
1123
1088
  ) -> list[str]:
1124
- """Async add documents to the vectorstore.
1089
+ """Async add documents to the `VectorStore`.
1125
1090
 
1126
1091
  Args:
1127
- documents: Documents to add to the vectorstore.
1092
+ documents: Documents to add to the `VectorStore`.
1128
1093
  **kwargs: Other keyword arguments that subclasses might use.
1129
1094
 
1130
1095
  Returns: