langchain-core 1.0.0a8__py3-none-any.whl → 1.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (142) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +0 -1
  3. langchain_core/_api/beta_decorator.py +17 -20
  4. langchain_core/_api/deprecation.py +30 -35
  5. langchain_core/_import_utils.py +1 -1
  6. langchain_core/agents.py +10 -9
  7. langchain_core/caches.py +46 -56
  8. langchain_core/callbacks/__init__.py +1 -8
  9. langchain_core/callbacks/base.py +232 -243
  10. langchain_core/callbacks/file.py +33 -33
  11. langchain_core/callbacks/manager.py +353 -416
  12. langchain_core/callbacks/stdout.py +21 -22
  13. langchain_core/callbacks/streaming_stdout.py +32 -32
  14. langchain_core/callbacks/usage.py +54 -51
  15. langchain_core/chat_history.py +43 -58
  16. langchain_core/document_loaders/base.py +21 -21
  17. langchain_core/document_loaders/langsmith.py +22 -22
  18. langchain_core/documents/__init__.py +0 -1
  19. langchain_core/documents/base.py +46 -49
  20. langchain_core/documents/transformers.py +28 -29
  21. langchain_core/embeddings/fake.py +50 -54
  22. langchain_core/example_selectors/semantic_similarity.py +4 -6
  23. langchain_core/exceptions.py +7 -8
  24. langchain_core/indexing/api.py +19 -25
  25. langchain_core/indexing/base.py +24 -24
  26. langchain_core/language_models/__init__.py +11 -27
  27. langchain_core/language_models/_utils.py +53 -54
  28. langchain_core/language_models/base.py +30 -24
  29. langchain_core/language_models/chat_models.py +123 -148
  30. langchain_core/language_models/fake_chat_models.py +7 -7
  31. langchain_core/language_models/llms.py +14 -16
  32. langchain_core/load/dump.py +3 -4
  33. langchain_core/load/load.py +7 -16
  34. langchain_core/load/serializable.py +37 -36
  35. langchain_core/messages/__init__.py +1 -16
  36. langchain_core/messages/ai.py +122 -123
  37. langchain_core/messages/base.py +31 -31
  38. langchain_core/messages/block_translators/__init__.py +17 -17
  39. langchain_core/messages/block_translators/anthropic.py +3 -3
  40. langchain_core/messages/block_translators/bedrock_converse.py +3 -3
  41. langchain_core/messages/block_translators/google_genai.py +5 -4
  42. langchain_core/messages/block_translators/google_vertexai.py +4 -32
  43. langchain_core/messages/block_translators/groq.py +117 -21
  44. langchain_core/messages/block_translators/langchain_v0.py +3 -3
  45. langchain_core/messages/block_translators/openai.py +5 -5
  46. langchain_core/messages/chat.py +2 -6
  47. langchain_core/messages/content.py +222 -209
  48. langchain_core/messages/function.py +6 -10
  49. langchain_core/messages/human.py +17 -24
  50. langchain_core/messages/modifier.py +2 -2
  51. langchain_core/messages/system.py +12 -22
  52. langchain_core/messages/tool.py +53 -69
  53. langchain_core/messages/utils.py +399 -417
  54. langchain_core/output_parsers/__init__.py +1 -14
  55. langchain_core/output_parsers/base.py +46 -47
  56. langchain_core/output_parsers/json.py +3 -4
  57. langchain_core/output_parsers/list.py +2 -2
  58. langchain_core/output_parsers/openai_functions.py +46 -44
  59. langchain_core/output_parsers/openai_tools.py +11 -16
  60. langchain_core/output_parsers/pydantic.py +10 -11
  61. langchain_core/output_parsers/string.py +2 -2
  62. langchain_core/output_parsers/transform.py +2 -2
  63. langchain_core/output_parsers/xml.py +1 -1
  64. langchain_core/outputs/__init__.py +1 -1
  65. langchain_core/outputs/chat_generation.py +14 -14
  66. langchain_core/outputs/generation.py +6 -6
  67. langchain_core/outputs/llm_result.py +5 -5
  68. langchain_core/prompt_values.py +11 -11
  69. langchain_core/prompts/__init__.py +3 -23
  70. langchain_core/prompts/base.py +33 -38
  71. langchain_core/prompts/chat.py +222 -229
  72. langchain_core/prompts/dict.py +3 -3
  73. langchain_core/prompts/few_shot.py +76 -83
  74. langchain_core/prompts/few_shot_with_templates.py +7 -9
  75. langchain_core/prompts/image.py +12 -14
  76. langchain_core/prompts/loading.py +1 -1
  77. langchain_core/prompts/message.py +3 -3
  78. langchain_core/prompts/prompt.py +20 -23
  79. langchain_core/prompts/string.py +20 -8
  80. langchain_core/prompts/structured.py +26 -27
  81. langchain_core/rate_limiters.py +50 -58
  82. langchain_core/retrievers.py +41 -182
  83. langchain_core/runnables/base.py +565 -597
  84. langchain_core/runnables/branch.py +8 -8
  85. langchain_core/runnables/config.py +37 -44
  86. langchain_core/runnables/configurable.py +9 -10
  87. langchain_core/runnables/fallbacks.py +9 -9
  88. langchain_core/runnables/graph.py +46 -50
  89. langchain_core/runnables/graph_ascii.py +19 -18
  90. langchain_core/runnables/graph_mermaid.py +20 -31
  91. langchain_core/runnables/graph_png.py +7 -7
  92. langchain_core/runnables/history.py +22 -22
  93. langchain_core/runnables/passthrough.py +11 -11
  94. langchain_core/runnables/retry.py +3 -3
  95. langchain_core/runnables/router.py +2 -2
  96. langchain_core/runnables/schema.py +33 -33
  97. langchain_core/runnables/utils.py +30 -34
  98. langchain_core/stores.py +72 -102
  99. langchain_core/sys_info.py +27 -29
  100. langchain_core/tools/__init__.py +1 -14
  101. langchain_core/tools/base.py +70 -71
  102. langchain_core/tools/convert.py +100 -104
  103. langchain_core/tools/render.py +9 -9
  104. langchain_core/tools/retriever.py +7 -7
  105. langchain_core/tools/simple.py +6 -7
  106. langchain_core/tools/structured.py +18 -24
  107. langchain_core/tracers/__init__.py +1 -9
  108. langchain_core/tracers/base.py +35 -35
  109. langchain_core/tracers/context.py +12 -17
  110. langchain_core/tracers/event_stream.py +3 -3
  111. langchain_core/tracers/langchain.py +8 -8
  112. langchain_core/tracers/log_stream.py +17 -18
  113. langchain_core/tracers/memory_stream.py +3 -3
  114. langchain_core/tracers/root_listeners.py +2 -2
  115. langchain_core/tracers/schemas.py +0 -129
  116. langchain_core/tracers/stdout.py +1 -2
  117. langchain_core/utils/__init__.py +1 -1
  118. langchain_core/utils/aiter.py +32 -32
  119. langchain_core/utils/env.py +5 -5
  120. langchain_core/utils/function_calling.py +59 -154
  121. langchain_core/utils/html.py +4 -4
  122. langchain_core/utils/input.py +3 -3
  123. langchain_core/utils/interactive_env.py +1 -1
  124. langchain_core/utils/iter.py +20 -20
  125. langchain_core/utils/json.py +1 -1
  126. langchain_core/utils/json_schema.py +2 -2
  127. langchain_core/utils/mustache.py +5 -5
  128. langchain_core/utils/pydantic.py +17 -17
  129. langchain_core/utils/strings.py +5 -5
  130. langchain_core/utils/utils.py +25 -28
  131. langchain_core/vectorstores/base.py +55 -87
  132. langchain_core/vectorstores/in_memory.py +83 -85
  133. langchain_core/vectorstores/utils.py +2 -2
  134. langchain_core/version.py +1 -1
  135. {langchain_core-1.0.0a8.dist-info → langchain_core-1.0.0rc2.dist-info}/METADATA +23 -11
  136. langchain_core-1.0.0rc2.dist-info/RECORD +172 -0
  137. langchain_core/memory.py +0 -120
  138. langchain_core/pydantic_v1/__init__.py +0 -30
  139. langchain_core/pydantic_v1/dataclasses.py +0 -23
  140. langchain_core/pydantic_v1/main.py +0 -23
  141. langchain_core-1.0.0a8.dist-info/RECORD +0 -176
  142. {langchain_core-1.0.0a8.dist-info → langchain_core-1.0.0rc2.dist-info}/WHEEL +0 -0
@@ -22,22 +22,22 @@ class LangSmithLoader(BaseLoader):
22
22
 
23
23
  ??? note "Lazy load"
24
24
 
25
- .. code-block:: python
25
+ ```python
26
+ from langchain_core.document_loaders import LangSmithLoader
26
27
 
27
- from langchain_core.document_loaders import LangSmithLoader
28
+ loader = LangSmithLoader(dataset_id="...", limit=100)
29
+ docs = []
30
+ for doc in loader.lazy_load():
31
+ docs.append(doc)
32
+ ```
28
33
 
29
- loader = LangSmithLoader(dataset_id="...", limit=100)
30
- docs = []
31
- for doc in loader.lazy_load():
32
- docs.append(doc)
33
-
34
- .. code-block:: python
35
-
36
- # -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
34
+ ```python
35
+ # -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
36
+ ```
37
37
 
38
38
  !!! version-added "Added in version 0.2.34"
39
39
 
40
- """ # noqa: E501
40
+ """
41
41
 
42
42
  def __init__(
43
43
  self,
@@ -60,15 +60,15 @@ class LangSmithLoader(BaseLoader):
60
60
  """Create a LangSmith loader.
61
61
 
62
62
  Args:
63
- dataset_id: The ID of the dataset to filter by. Defaults to None.
64
- dataset_name: The name of the dataset to filter by. Defaults to None.
65
- content_key: The inputs key to set as Document page content. ``'.'`` characters
66
- are interpreted as nested keys. E.g. ``content_key="first.second"`` will
63
+ dataset_id: The ID of the dataset to filter by.
64
+ dataset_name: The name of the dataset to filter by.
65
+ content_key: The inputs key to set as Document page content. `'.'` characters
66
+ are interpreted as nested keys. E.g. `content_key="first.second"` will
67
67
  result in
68
- ``Document(page_content=format_content(example.inputs["first"]["second"]))``
68
+ `Document(page_content=format_content(example.inputs["first"]["second"]))`
69
69
  format_content: Function for converting the content extracted from the example
70
70
  inputs into a string. Defaults to JSON-encoding the contents.
71
- example_ids: The IDs of the examples to filter by. Defaults to None.
71
+ example_ids: The IDs of the examples to filter by.
72
72
  as_of: The dataset version tag OR
73
73
  timestamp to retrieve the examples as of.
74
74
  Response examples will only be those that were present at the time
@@ -76,17 +76,17 @@ class LangSmithLoader(BaseLoader):
76
76
  splits: A list of dataset splits, which are
77
77
  divisions of your dataset such as 'train', 'test', or 'validation'.
78
78
  Returns examples only from the specified splits.
79
- inline_s3_urls: Whether to inline S3 URLs. Defaults to True.
80
- offset: The offset to start from. Defaults to 0.
79
+ inline_s3_urls: Whether to inline S3 URLs.
80
+ offset: The offset to start from.
81
81
  limit: The maximum number of examples to return.
82
- metadata: Metadata to filter by. Defaults to None.
82
+ metadata: Metadata to filter by.
83
83
  filter: A structured filter string to apply to the examples.
84
84
  client: LangSmith Client. If not provided will be initialized from below args.
85
85
  client_kwargs: Keyword args to pass to LangSmith client init. Should only be
86
- specified if ``client`` isn't.
86
+ specified if `client` isn't.
87
87
 
88
88
  Raises:
89
- ValueError: If both ``client`` and ``client_kwargs`` are provided.
89
+ ValueError: If both `client` and `client_kwargs` are provided.
90
90
  """ # noqa: E501
91
91
  if client and client_kwargs:
92
92
  raise ValueError
@@ -2,7 +2,6 @@
2
2
 
3
3
  **Document** module is a collection of classes that handle documents
4
4
  and their transformations.
5
-
6
5
  """
7
6
 
8
7
  from typing import TYPE_CHECKING
@@ -57,52 +57,51 @@ class Blob(BaseMedia):
57
57
 
58
58
  Example: Initialize a blob from in-memory data
59
59
 
60
- .. code-block:: python
60
+ ```python
61
+ from langchain_core.documents import Blob
61
62
 
62
- from langchain_core.documents import Blob
63
+ blob = Blob.from_data("Hello, world!")
63
64
 
64
- blob = Blob.from_data("Hello, world!")
65
+ # Read the blob as a string
66
+ print(blob.as_string())
65
67
 
66
- # Read the blob as a string
67
- print(blob.as_string())
68
+ # Read the blob as bytes
69
+ print(blob.as_bytes())
68
70
 
69
- # Read the blob as bytes
70
- print(blob.as_bytes())
71
-
72
- # Read the blob as a byte stream
73
- with blob.as_bytes_io() as f:
74
- print(f.read())
71
+ # Read the blob as a byte stream
72
+ with blob.as_bytes_io() as f:
73
+ print(f.read())
74
+ ```
75
75
 
76
76
  Example: Load from memory and specify mime-type and metadata
77
77
 
78
- .. code-block:: python
79
-
80
- from langchain_core.documents import Blob
78
+ ```python
79
+ from langchain_core.documents import Blob
81
80
 
82
- blob = Blob.from_data(
83
- data="Hello, world!",
84
- mime_type="text/plain",
85
- metadata={"source": "https://example.com"},
86
- )
81
+ blob = Blob.from_data(
82
+ data="Hello, world!",
83
+ mime_type="text/plain",
84
+ metadata={"source": "https://example.com"},
85
+ )
86
+ ```
87
87
 
88
88
  Example: Load the blob from a file
89
89
 
90
- .. code-block:: python
91
-
92
- from langchain_core.documents import Blob
93
-
94
- blob = Blob.from_path("path/to/file.txt")
90
+ ```python
91
+ from langchain_core.documents import Blob
95
92
 
96
- # Read the blob as a string
97
- print(blob.as_string())
93
+ blob = Blob.from_path("path/to/file.txt")
98
94
 
99
- # Read the blob as bytes
100
- print(blob.as_bytes())
95
+ # Read the blob as a string
96
+ print(blob.as_string())
101
97
 
102
- # Read the blob as a byte stream
103
- with blob.as_bytes_io() as f:
104
- print(f.read())
98
+ # Read the blob as bytes
99
+ print(blob.as_bytes())
105
100
 
101
+ # Read the blob as a byte stream
102
+ with blob.as_bytes_io() as f:
103
+ print(f.read())
104
+ ```
106
105
  """
107
106
 
108
107
  data: bytes | str | None = None
@@ -112,7 +111,7 @@ class Blob(BaseMedia):
112
111
  encoding: str = "utf-8"
113
112
  """Encoding to use if decoding the bytes into a string.
114
113
 
115
- Use utf-8 as default encoding, if decoding to string.
114
+ Use `utf-8` as default encoding, if decoding to string.
116
115
  """
117
116
  path: PathLike | None = None
118
117
  """Location where the original content was found."""
@@ -128,7 +127,7 @@ class Blob(BaseMedia):
128
127
 
129
128
  If a path is associated with the blob, it will default to the path location.
130
129
 
131
- Unless explicitly set via a metadata field called "source", in which
130
+ Unless explicitly set via a metadata field called `"source"`, in which
132
131
  case that value will be used instead.
133
132
  """
134
133
  if self.metadata and "source" in self.metadata:
@@ -212,11 +211,11 @@ class Blob(BaseMedia):
212
211
  """Load the blob from a path like object.
213
212
 
214
213
  Args:
215
- path: path like object to file to be read
214
+ path: Path-like object to file to be read
216
215
  encoding: Encoding to use if decoding the bytes into a string
217
- mime_type: if provided, will be set as the mime-type of the data
218
- guess_type: If True, the mimetype will be guessed from the file extension,
219
- if a mime-type was not provided
216
+ mime_type: If provided, will be set as the mime-type of the data
217
+ guess_type: If `True`, the mimetype will be guessed from the file extension,
218
+ if a mime-type was not provided
220
219
  metadata: Metadata to associate with the blob
221
220
 
222
221
  Returns:
@@ -249,10 +248,10 @@ class Blob(BaseMedia):
249
248
  """Initialize the blob from in-memory data.
250
249
 
251
250
  Args:
252
- data: the in-memory data associated with the blob
251
+ data: The in-memory data associated with the blob
253
252
  encoding: Encoding to use if decoding the bytes into a string
254
- mime_type: if provided, will be set as the mime-type of the data
255
- path: if provided, will be set as the source from which the data came
253
+ mime_type: If provided, will be set as the mime-type of the data
254
+ path: If provided, will be set as the source from which the data came
256
255
  metadata: Metadata to associate with the blob
257
256
 
258
257
  Returns:
@@ -278,15 +277,13 @@ class Document(BaseMedia):
278
277
  """Class for storing a piece of text and associated metadata.
279
278
 
280
279
  Example:
280
+ ```python
281
+ from langchain_core.documents import Document
281
282
 
282
- .. code-block:: python
283
-
284
- from langchain_core.documents import Document
285
-
286
- document = Document(
287
- page_content="Hello, world!", metadata={"source": "https://example.com"}
288
- )
289
-
283
+ document = Document(
284
+ page_content="Hello, world!", metadata={"source": "https://example.com"}
285
+ )
286
+ ```
290
287
  """
291
288
 
292
289
  page_content: str
@@ -306,7 +303,7 @@ class Document(BaseMedia):
306
303
 
307
304
  @classmethod
308
305
  def get_lc_namespace(cls) -> list[str]:
309
- """Get the namespace of the langchain object.
306
+ """Get the namespace of the LangChain object.
310
307
 
311
308
  Returns:
312
309
  ["langchain", "schema", "document"]
@@ -20,35 +20,34 @@ class BaseDocumentTransformer(ABC):
20
20
  sequence of transformed Documents.
21
21
 
22
22
  Example:
23
- .. code-block:: python
24
-
25
- class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel):
26
- embeddings: Embeddings
27
- similarity_fn: Callable = cosine_similarity
28
- similarity_threshold: float = 0.95
29
-
30
- class Config:
31
- arbitrary_types_allowed = True
32
-
33
- def transform_documents(
34
- self, documents: Sequence[Document], **kwargs: Any
35
- ) -> Sequence[Document]:
36
- stateful_documents = get_stateful_documents(documents)
37
- embedded_documents = _get_embeddings_from_stateful_docs(
38
- self.embeddings, stateful_documents
39
- )
40
- included_idxs = _filter_similar_embeddings(
41
- embedded_documents,
42
- self.similarity_fn,
43
- self.similarity_threshold,
44
- )
45
- return [stateful_documents[i] for i in sorted(included_idxs)]
46
-
47
- async def atransform_documents(
48
- self, documents: Sequence[Document], **kwargs: Any
49
- ) -> Sequence[Document]:
50
- raise NotImplementedError
51
-
23
+ ```python
24
+ class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel):
25
+ embeddings: Embeddings
26
+ similarity_fn: Callable = cosine_similarity
27
+ similarity_threshold: float = 0.95
28
+
29
+ class Config:
30
+ arbitrary_types_allowed = True
31
+
32
+ def transform_documents(
33
+ self, documents: Sequence[Document], **kwargs: Any
34
+ ) -> Sequence[Document]:
35
+ stateful_documents = get_stateful_documents(documents)
36
+ embedded_documents = _get_embeddings_from_stateful_docs(
37
+ self.embeddings, stateful_documents
38
+ )
39
+ included_idxs = _filter_similar_embeddings(
40
+ embedded_documents,
41
+ self.similarity_fn,
42
+ self.similarity_threshold,
43
+ )
44
+ return [stateful_documents[i] for i in sorted(included_idxs)]
45
+
46
+ async def atransform_documents(
47
+ self, documents: Sequence[Document], **kwargs: Any
48
+ ) -> Sequence[Document]:
49
+ raise NotImplementedError
50
+ ```
52
51
  """
53
52
 
54
53
  @abstractmethod
@@ -18,40 +18,38 @@ class FakeEmbeddings(Embeddings, BaseModel):
18
18
 
19
19
  This embedding model creates embeddings by sampling from a normal distribution.
20
20
 
21
- Do not use this outside of testing, as it is not a real embedding model.
21
+ !!! warning
22
+ Do not use this outside of testing, as it is not a real embedding model.
22
23
 
23
24
  Instantiate:
24
- .. code-block:: python
25
+ ```python
26
+ from langchain_core.embeddings import FakeEmbeddings
25
27
 
26
- from langchain_core.embeddings import FakeEmbeddings
27
-
28
- embed = FakeEmbeddings(size=100)
28
+ embed = FakeEmbeddings(size=100)
29
+ ```
29
30
 
30
31
  Embed single text:
31
- .. code-block:: python
32
-
33
- input_text = "The meaning of life is 42"
34
- vector = embed.embed_query(input_text)
35
- print(vector[:3])
36
-
37
- .. code-block:: python
38
-
39
- [-0.700234640213188, -0.581266257710429, -1.1328482266445354]
32
+ ```python
33
+ input_text = "The meaning of life is 42"
34
+ vector = embed.embed_query(input_text)
35
+ print(vector[:3])
36
+ ```
37
+ ```python
38
+ [-0.700234640213188, -0.581266257710429, -1.1328482266445354]
39
+ ```
40
40
 
41
41
  Embed multiple texts:
42
- .. code-block:: python
43
-
44
- input_texts = ["Document 1...", "Document 2..."]
45
- vectors = embed.embed_documents(input_texts)
46
- print(len(vectors))
47
- # The first 3 coordinates for the first vector
48
- print(vectors[0][:3])
49
-
50
- .. code-block:: python
51
-
52
- 2
53
- [-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
54
-
42
+ ```python
43
+ input_texts = ["Document 1...", "Document 2..."]
44
+ vectors = embed.embed_documents(input_texts)
45
+ print(len(vectors))
46
+ # The first 3 coordinates for the first vector
47
+ print(vectors[0][:3])
48
+ ```
49
+ ```python
50
+ 2
51
+ [-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
52
+ ```
55
53
  """
56
54
 
57
55
  size: int
@@ -75,40 +73,38 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
75
73
  This embedding model creates embeddings by sampling from a normal distribution
76
74
  with a seed based on the hash of the text.
77
75
 
78
- Do not use this outside of testing, as it is not a real embedding model.
76
+ !!! warning
77
+ Do not use this outside of testing, as it is not a real embedding model.
79
78
 
80
79
  Instantiate:
81
- .. code-block:: python
80
+ ```python
81
+ from langchain_core.embeddings import DeterministicFakeEmbedding
82
82
 
83
- from langchain_core.embeddings import DeterministicFakeEmbedding
84
-
85
- embed = DeterministicFakeEmbedding(size=100)
83
+ embed = DeterministicFakeEmbedding(size=100)
84
+ ```
86
85
 
87
86
  Embed single text:
88
- .. code-block:: python
89
-
90
- input_text = "The meaning of life is 42"
91
- vector = embed.embed_query(input_text)
92
- print(vector[:3])
93
-
94
- .. code-block:: python
95
-
96
- [-0.700234640213188, -0.581266257710429, -1.1328482266445354]
87
+ ```python
88
+ input_text = "The meaning of life is 42"
89
+ vector = embed.embed_query(input_text)
90
+ print(vector[:3])
91
+ ```
92
+ ```python
93
+ [-0.700234640213188, -0.581266257710429, -1.1328482266445354]
94
+ ```
97
95
 
98
96
  Embed multiple texts:
99
- .. code-block:: python
100
-
101
- input_texts = ["Document 1...", "Document 2..."]
102
- vectors = embed.embed_documents(input_texts)
103
- print(len(vectors))
104
- # The first 3 coordinates for the first vector
105
- print(vectors[0][:3])
106
-
107
- .. code-block:: python
108
-
109
- 2
110
- [-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
111
-
97
+ ```python
98
+ input_texts = ["Document 1...", "Document 2..."]
99
+ vectors = embed.embed_documents(input_texts)
100
+ print(len(vectors))
101
+ # The first 3 coordinates for the first vector
102
+ print(vectors[0][:3])
103
+ ```
104
+ ```python
105
+ 2
106
+ [-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
107
+ ```
112
108
  """
113
109
 
114
110
  size: int
@@ -154,7 +154,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
154
154
  examples: List of examples to use in the prompt.
155
155
  embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
156
156
  vectorstore_cls: A vector store DB interface class, e.g. FAISS.
157
- k: Number of examples to select. Default is 4.
157
+ k: Number of examples to select.
158
158
  input_keys: If provided, the search is based on the input variables
159
159
  instead of all variables.
160
160
  example_keys: If provided, keys to filter examples to.
@@ -198,7 +198,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
198
198
  examples: List of examples to use in the prompt.
199
199
  embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
200
200
  vectorstore_cls: A vector store DB interface class, e.g. FAISS.
201
- k: Number of examples to select. Default is 4.
201
+ k: Number of examples to select.
202
202
  input_keys: If provided, the search is based on the input variables
203
203
  instead of all variables.
204
204
  example_keys: If provided, keys to filter examples to.
@@ -285,9 +285,8 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
285
285
  examples: List of examples to use in the prompt.
286
286
  embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
287
287
  vectorstore_cls: A vector store DB interface class, e.g. FAISS.
288
- k: Number of examples to select. Default is 4.
288
+ k: Number of examples to select.
289
289
  fetch_k: Number of Documents to fetch to pass to MMR algorithm.
290
- Default is 20.
291
290
  input_keys: If provided, the search is based on the input variables
292
291
  instead of all variables.
293
292
  example_keys: If provided, keys to filter examples to.
@@ -333,9 +332,8 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
333
332
  examples: List of examples to use in the prompt.
334
333
  embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
335
334
  vectorstore_cls: A vector store DB interface class, e.g. FAISS.
336
- k: Number of examples to select. Default is 4.
335
+ k: Number of examples to select.
337
336
  fetch_k: Number of Documents to fetch to pass to MMR algorithm.
338
- Default is 20.
339
337
  input_keys: If provided, the search is based on the input variables
340
338
  instead of all variables.
341
339
  example_keys: If provided, keys to filter examples to.
@@ -16,7 +16,7 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
16
16
  """Exception that output parsers should raise to signify a parsing error.
17
17
 
18
18
  This exists to differentiate parsing errors from other code or execution errors
19
- that also may arise inside the output parser. OutputParserExceptions will be
19
+ that also may arise inside the output parser. `OutputParserException` will be
20
20
  available to catch and handle in ways to fix the parsing error, while other
21
21
  errors will be raised.
22
22
  """
@@ -28,24 +28,23 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
28
28
  llm_output: str | None = None,
29
29
  send_to_llm: bool = False, # noqa: FBT001,FBT002
30
30
  ):
31
- """Create an OutputParserException.
31
+ """Create an `OutputParserException`.
32
32
 
33
33
  Args:
34
34
  error: The error that's being re-raised or an error message.
35
35
  observation: String explanation of error which can be passed to a
36
- model to try and remediate the issue. Defaults to None.
36
+ model to try and remediate the issue.
37
37
  llm_output: String model output which is error-ing.
38
- Defaults to None.
38
+
39
39
  send_to_llm: Whether to send the observation and llm_output back to an Agent
40
- after an OutputParserException has been raised.
40
+ after an `OutputParserException` has been raised.
41
41
  This gives the underlying model driving the agent the context that the
42
42
  previous output was improperly structured, in the hopes that it will
43
43
  update the output to the correct format.
44
- Defaults to False.
45
44
 
46
45
  Raises:
47
- ValueError: If ``send_to_llm`` is True but either observation or
48
- ``llm_output`` are not provided.
46
+ ValueError: If `send_to_llm` is True but either observation or
47
+ `llm_output` are not provided.
49
48
  """
50
49
  if isinstance(error, str):
51
50
  error = create_message(
@@ -299,9 +299,9 @@ def index(
299
299
  are not able to specify the uid of the document.
300
300
 
301
301
  !!! warning "Behavior changed in 0.3.25"
302
- Added ``scoped_full`` cleanup mode.
302
+ Added `scoped_full` cleanup mode.
303
303
 
304
- !!! important
304
+ !!! warning
305
305
 
306
306
  * In full mode, the loader should be returning
307
307
  the entire dataset, and not just a subset of the dataset.
@@ -315,7 +315,7 @@ def index(
315
315
  chunks, and we index them using a batch size of 5, we'll have 3 batches
316
316
  all with the same source id. In general, to avoid doing too much
317
317
  redundant work select as big a batch size as possible.
318
- * The ``scoped_full`` mode is suitable if determining an appropriate batch size
318
+ * The `scoped_full` mode is suitable if determining an appropriate batch size
319
319
  is challenging or if your data loader cannot return the entire dataset at
320
320
  once. This mode keeps track of source IDs in memory, which should be fine
321
321
  for most use cases. If your dataset is large (10M+ docs), you will likely
@@ -326,8 +326,8 @@ def index(
326
326
  record_manager: Timestamped set to keep track of which documents were
327
327
  updated.
328
328
  vector_store: VectorStore or DocumentIndex to index the documents into.
329
- batch_size: Batch size to use when indexing. Default is 100.
330
- cleanup: How to handle clean up of documents. Default is None.
329
+ batch_size: Batch size to use when indexing.
330
+ cleanup: How to handle clean up of documents.
331
331
 
332
332
  - incremental: Cleans up all documents that haven't been updated AND
333
333
  that are associated with source ids that were seen during indexing.
@@ -342,15 +342,12 @@ def index(
342
342
  source ids that were seen during indexing.
343
343
  - None: Do not delete any documents.
344
344
  source_id_key: Optional key that helps identify the original source
345
- of the document. Default is None.
345
+ of the document.
346
346
  cleanup_batch_size: Batch size to use when cleaning up documents.
347
- Default is 1_000.
348
347
  force_update: Force update documents even if they are present in the
349
348
  record manager. Useful if you are re-indexing with updated embeddings.
350
- Default is False.
351
349
  key_encoder: Hashing algorithm to use for hashing the document content and
352
- metadata. Default is "sha1".
353
- Other options include "blake2b", "sha256", and "sha512".
350
+ metadata. Options include "blake2b", "sha256", and "sha512".
354
351
 
355
352
  !!! version-added "Added in version 0.3.66"
356
353
 
@@ -381,8 +378,8 @@ def index(
381
378
  ValueError: If vectorstore does not have
382
379
  "delete" and "add_documents" required methods.
383
380
  ValueError: If source_id_key is not None, but is not a string or callable.
384
- TypeError: If ``vectorstore`` is not a VectorStore or a DocumentIndex.
385
- AssertionError: If ``source_id`` is None when cleanup mode is incremental.
381
+ TypeError: If `vectorstore` is not a VectorStore or a DocumentIndex.
382
+ AssertionError: If `source_id` is None when cleanup mode is incremental.
386
383
  (should be unreachable code).
387
384
  """
388
385
  # Behavior is deprecated, but we keep it for backwards compatibility.
@@ -640,9 +637,9 @@ async def aindex(
640
637
  are not able to specify the uid of the document.
641
638
 
642
639
  !!! warning "Behavior changed in 0.3.25"
643
- Added ``scoped_full`` cleanup mode.
640
+ Added `scoped_full` cleanup mode.
644
641
 
645
- !!! important
642
+ !!! warning
646
643
 
647
644
  * In full mode, the loader should be returning
648
645
  the entire dataset, and not just a subset of the dataset.
@@ -656,7 +653,7 @@ async def aindex(
656
653
  chunks, and we index them using a batch size of 5, we'll have 3 batches
657
654
  all with the same source id. In general, to avoid doing too much
658
655
  redundant work select as big a batch size as possible.
659
- * The ``scoped_full`` mode is suitable if determining an appropriate batch size
656
+ * The `scoped_full` mode is suitable if determining an appropriate batch size
660
657
  is challenging or if your data loader cannot return the entire dataset at
661
658
  once. This mode keeps track of source IDs in memory, which should be fine
662
659
  for most use cases. If your dataset is large (10M+ docs), you will likely
@@ -667,8 +664,8 @@ async def aindex(
667
664
  record_manager: Timestamped set to keep track of which documents were
668
665
  updated.
669
666
  vector_store: VectorStore or DocumentIndex to index the documents into.
670
- batch_size: Batch size to use when indexing. Default is 100.
671
- cleanup: How to handle clean up of documents. Default is None.
667
+ batch_size: Batch size to use when indexing.
668
+ cleanup: How to handle clean up of documents.
672
669
 
673
670
  - incremental: Cleans up all documents that haven't been updated AND
674
671
  that are associated with source ids that were seen during indexing.
@@ -683,15 +680,12 @@ async def aindex(
683
680
  source ids that were seen during indexing.
684
681
  - None: Do not delete any documents.
685
682
  source_id_key: Optional key that helps identify the original source
686
- of the document. Default is None.
683
+ of the document.
687
684
  cleanup_batch_size: Batch size to use when cleaning up documents.
688
- Default is 1_000.
689
685
  force_update: Force update documents even if they are present in the
690
686
  record manager. Useful if you are re-indexing with updated embeddings.
691
- Default is False.
692
687
  key_encoder: Hashing algorithm to use for hashing the document content and
693
- metadata. Default is "sha1".
694
- Other options include "blake2b", "sha256", and "sha512".
688
+ metadata. Options include "blake2b", "sha256", and "sha512".
695
689
 
696
690
  !!! version-added "Added in version 0.3.66"
697
691
 
@@ -722,9 +716,9 @@ async def aindex(
722
716
  ValueError: If vectorstore does not have
723
717
  "adelete" and "aadd_documents" required methods.
724
718
  ValueError: If source_id_key is not None, but is not a string or callable.
725
- TypeError: If ``vector_store`` is not a VectorStore or DocumentIndex.
726
- AssertionError: If ``source_id_key`` is None when cleanup mode is
727
- incremental or ``scoped_full`` (should be unreachable).
719
+ TypeError: If `vector_store` is not a VectorStore or DocumentIndex.
720
+ AssertionError: If `source_id_key` is None when cleanup mode is
721
+ incremental or `scoped_full` (should be unreachable).
728
722
  """
729
723
  # Behavior is deprecated, but we keep it for backwards compatibility.
730
724
  # # Warn only once per process.