langchain-core 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (60) hide show
  1. langchain_core/agents.py +2 -4
  2. langchain_core/caches.py +13 -6
  3. langchain_core/chat_history.py +5 -5
  4. langchain_core/document_loaders/base.py +6 -4
  5. langchain_core/document_loaders/blob_loaders.py +1 -1
  6. langchain_core/document_loaders/langsmith.py +9 -10
  7. langchain_core/documents/__init__.py +24 -3
  8. langchain_core/documents/base.py +72 -59
  9. langchain_core/documents/compressor.py +6 -6
  10. langchain_core/documents/transformers.py +2 -2
  11. langchain_core/embeddings/fake.py +2 -2
  12. langchain_core/example_selectors/semantic_similarity.py +7 -7
  13. langchain_core/exceptions.py +2 -2
  14. langchain_core/indexing/__init__.py +1 -1
  15. langchain_core/indexing/api.py +62 -62
  16. langchain_core/indexing/base.py +16 -16
  17. langchain_core/indexing/in_memory.py +2 -2
  18. langchain_core/language_models/__init__.py +6 -5
  19. langchain_core/language_models/base.py +2 -2
  20. langchain_core/language_models/fake_chat_models.py +1 -1
  21. langchain_core/language_models/llms.py +4 -6
  22. langchain_core/load/dump.py +1 -1
  23. langchain_core/load/serializable.py +4 -1
  24. langchain_core/messages/__init__.py +9 -0
  25. langchain_core/messages/ai.py +11 -7
  26. langchain_core/messages/base.py +4 -0
  27. langchain_core/messages/block_translators/google_genai.py +4 -2
  28. langchain_core/messages/content.py +4 -4
  29. langchain_core/messages/utils.py +13 -13
  30. langchain_core/output_parsers/__init__.py +17 -1
  31. langchain_core/output_parsers/base.py +3 -0
  32. langchain_core/output_parsers/format_instructions.py +9 -4
  33. langchain_core/output_parsers/json.py +5 -2
  34. langchain_core/output_parsers/list.py +16 -16
  35. langchain_core/output_parsers/openai_tools.py +2 -2
  36. langchain_core/output_parsers/pydantic.py +1 -1
  37. langchain_core/output_parsers/string.py +3 -3
  38. langchain_core/output_parsers/xml.py +28 -25
  39. langchain_core/outputs/generation.py +2 -3
  40. langchain_core/prompt_values.py +0 -6
  41. langchain_core/prompts/base.py +5 -3
  42. langchain_core/prompts/chat.py +60 -52
  43. langchain_core/prompts/structured.py +12 -8
  44. langchain_core/retrievers.py +41 -37
  45. langchain_core/runnables/base.py +14 -14
  46. langchain_core/runnables/configurable.py +3 -3
  47. langchain_core/runnables/graph.py +7 -3
  48. langchain_core/tools/base.py +66 -12
  49. langchain_core/tools/convert.py +8 -5
  50. langchain_core/tools/retriever.py +6 -5
  51. langchain_core/tools/structured.py +7 -5
  52. langchain_core/tracers/log_stream.py +2 -2
  53. langchain_core/utils/strings.py +1 -4
  54. langchain_core/utils/utils.py +12 -5
  55. langchain_core/vectorstores/base.py +73 -69
  56. langchain_core/vectorstores/in_memory.py +2 -2
  57. langchain_core/version.py +1 -1
  58. {langchain_core-1.0.1.dist-info → langchain_core-1.0.2.dist-info}/METADATA +1 -1
  59. {langchain_core-1.0.1.dist-info → langchain_core-1.0.2.dist-info}/RECORD +60 -60
  60. {langchain_core-1.0.1.dist-info → langchain_core-1.0.2.dist-info}/WHEEL +0 -0
langchain_core/agents.py CHANGED
@@ -5,12 +5,10 @@
5
5
 
6
6
  !!! warning
7
7
  New agents should be built using the
8
- [langgraph library](https://github.com/langchain-ai/langgraph), which provides a
8
+ [`langchain` library](https://pypi.org/project/langchain/), which provides a
9
9
  simpler and more flexible way to define agents.
10
10
 
11
- Please see the
12
- [migration guide](https://python.langchain.com/docs/how_to/migrate_agent/) for
13
- information on how to migrate existing agents to modern langgraph agents.
11
+ See docs on [building agents](https://docs.langchain.com/oss/python/langchain/agents).
14
12
 
15
13
  Agents use language models to choose a sequence of actions to take.
16
14
 
langchain_core/caches.py CHANGED
@@ -2,8 +2,8 @@
2
2
 
3
3
  Distinct from provider-based [prompt caching](https://docs.langchain.com/oss/python/langchain/models#prompt-caching).
4
4
 
5
- !!! warning
6
- This is a beta feature! Please be wary of deploying experimental code to production
5
+ !!! warning "Beta feature"
6
+ This is a beta feature. Please be wary of deploying experimental code to production
7
7
  unless you've taken appropriate precautions.
8
8
 
9
9
  A cache is useful for two reasons:
@@ -49,17 +49,18 @@ class BaseCache(ABC):
49
49
  """Look up based on `prompt` and `llm_string`.
50
50
 
51
51
  A cache implementation is expected to generate a key from the 2-tuple
52
- of prompt and llm_string (e.g., by concatenating them with a delimiter).
52
+ of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
53
53
 
54
54
  Args:
55
55
  prompt: A string representation of the prompt.
56
56
  In the case of a chat model, the prompt is a non-trivial
57
57
  serialization of the prompt into the language model.
58
58
  llm_string: A string representation of the LLM configuration.
59
+
59
60
  This is used to capture the invocation parameters of the LLM
60
61
  (e.g., model name, temperature, stop tokens, max tokens, etc.).
61
- These invocation parameters are serialized into a string
62
- representation.
62
+
63
+ These invocation parameters are serialized into a string representation.
63
64
 
64
65
  Returns:
65
66
  On a cache miss, return `None`. On a cache hit, return the cached value.
@@ -78,8 +79,10 @@ class BaseCache(ABC):
78
79
  In the case of a chat model, the prompt is a non-trivial
79
80
  serialization of the prompt into the language model.
80
81
  llm_string: A string representation of the LLM configuration.
82
+
81
83
  This is used to capture the invocation parameters of the LLM
82
84
  (e.g., model name, temperature, stop tokens, max tokens, etc.).
85
+
83
86
  These invocation parameters are serialized into a string
84
87
  representation.
85
88
  return_val: The value to be cached. The value is a list of `Generation`
@@ -94,15 +97,17 @@ class BaseCache(ABC):
94
97
  """Async look up based on `prompt` and `llm_string`.
95
98
 
96
99
  A cache implementation is expected to generate a key from the 2-tuple
97
- of prompt and llm_string (e.g., by concatenating them with a delimiter).
100
+ of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
98
101
 
99
102
  Args:
100
103
  prompt: A string representation of the prompt.
101
104
  In the case of a chat model, the prompt is a non-trivial
102
105
  serialization of the prompt into the language model.
103
106
  llm_string: A string representation of the LLM configuration.
107
+
104
108
  This is used to capture the invocation parameters of the LLM
105
109
  (e.g., model name, temperature, stop tokens, max tokens, etc.).
110
+
106
111
  These invocation parameters are serialized into a string
107
112
  representation.
108
113
 
@@ -125,8 +130,10 @@ class BaseCache(ABC):
125
130
  In the case of a chat model, the prompt is a non-trivial
126
131
  serialization of the prompt into the language model.
127
132
  llm_string: A string representation of the LLM configuration.
133
+
128
134
  This is used to capture the invocation parameters of the LLM
129
135
  (e.g., model name, temperature, stop tokens, max tokens, etc.).
136
+
130
137
  These invocation parameters are serialized into a string
131
138
  representation.
132
139
  return_val: The value to be cached. The value is a list of `Generation`
@@ -121,7 +121,7 @@ class BaseChatMessageHistory(ABC):
121
121
  This method may be deprecated in a future release.
122
122
 
123
123
  Args:
124
- message: The human message to add to the store.
124
+ message: The `HumanMessage` to add to the store.
125
125
  """
126
126
  if isinstance(message, HumanMessage):
127
127
  self.add_message(message)
@@ -129,7 +129,7 @@ class BaseChatMessageHistory(ABC):
129
129
  self.add_message(HumanMessage(content=message))
130
130
 
131
131
  def add_ai_message(self, message: AIMessage | str) -> None:
132
- """Convenience method for adding an AI message string to the store.
132
+ """Convenience method for adding an `AIMessage` string to the store.
133
133
 
134
134
  !!! note
135
135
  This is a convenience method. Code should favor the bulk `add_messages`
@@ -138,7 +138,7 @@ class BaseChatMessageHistory(ABC):
138
138
  This method may be deprecated in a future release.
139
139
 
140
140
  Args:
141
- message: The AI message to add.
141
+ message: The `AIMessage` to add.
142
142
  """
143
143
  if isinstance(message, AIMessage):
144
144
  self.add_message(message)
@@ -173,7 +173,7 @@ class BaseChatMessageHistory(ABC):
173
173
  in an efficient manner to avoid unnecessary round-trips to the underlying store.
174
174
 
175
175
  Args:
176
- messages: A sequence of BaseMessage objects to store.
176
+ messages: A sequence of `BaseMessage` objects to store.
177
177
  """
178
178
  for message in messages:
179
179
  self.add_message(message)
@@ -182,7 +182,7 @@ class BaseChatMessageHistory(ABC):
182
182
  """Async add a list of messages.
183
183
 
184
184
  Args:
185
- messages: A sequence of BaseMessage objects to store.
185
+ messages: A sequence of `BaseMessage` objects to store.
186
186
  """
187
187
  await run_in_executor(None, self.add_messages, messages)
188
188
 
@@ -27,7 +27,7 @@ class BaseLoader(ABC): # noqa: B024
27
27
  """Interface for Document Loader.
28
28
 
29
29
  Implementations should implement the lazy-loading method using generators
30
- to avoid loading all Documents into memory at once.
30
+ to avoid loading all documents into memory at once.
31
31
 
32
32
  `load` is provided just for user convenience and should not be overridden.
33
33
  """
@@ -53,9 +53,11 @@ class BaseLoader(ABC): # noqa: B024
53
53
  def load_and_split(
54
54
  self, text_splitter: TextSplitter | None = None
55
55
  ) -> list[Document]:
56
- """Load Documents and split into chunks. Chunks are returned as `Document`.
56
+ """Load `Document` and split into chunks. Chunks are returned as `Document`.
57
57
 
58
- Do not override this method. It should be considered to be deprecated!
58
+ !!! danger
59
+
60
+ Do not override this method. It should be considered to be deprecated!
59
61
 
60
62
  Args:
61
63
  text_splitter: `TextSplitter` instance to use for splitting documents.
@@ -135,7 +137,7 @@ class BaseBlobParser(ABC):
135
137
  """
136
138
 
137
139
  def parse(self, blob: Blob) -> list[Document]:
138
- """Eagerly parse the blob into a `Document` or `Document` objects.
140
+ """Eagerly parse the blob into a `Document` or list of `Document` objects.
139
141
 
140
142
  This is a convenience method for interactive development environment.
141
143
 
@@ -28,7 +28,7 @@ class BlobLoader(ABC):
28
28
  def yield_blobs(
29
29
  self,
30
30
  ) -> Iterable[Blob]:
31
- """A lazy loader for raw data represented by LangChain's Blob object.
31
+ """A lazy loader for raw data represented by LangChain's `Blob` object.
32
32
 
33
33
  Returns:
34
34
  A generator over blobs
@@ -14,13 +14,13 @@ from langchain_core.documents import Document
14
14
 
15
15
 
16
16
  class LangSmithLoader(BaseLoader):
17
- """Load LangSmith Dataset examples as Documents.
17
+ """Load LangSmith Dataset examples as `Document` objects.
18
18
 
19
- Loads the example inputs as the Document page content and places the entire example
20
- into the Document metadata. This allows you to easily create few-shot example
21
- retrievers from the loaded documents.
19
+ Loads the example inputs as the `Document` page content and places the entire
20
+ example into the `Document` metadata. This allows you to easily create few-shot
21
+ example retrievers from the loaded documents.
22
22
 
23
- ??? note "Lazy load"
23
+ ??? note "Lazy loading example"
24
24
 
25
25
  ```python
26
26
  from langchain_core.document_loaders import LangSmithLoader
@@ -66,12 +66,11 @@ class LangSmithLoader(BaseLoader):
66
66
  format_content: Function for converting the content extracted from the example
67
67
  inputs into a string. Defaults to JSON-encoding the contents.
68
68
  example_ids: The IDs of the examples to filter by.
69
- as_of: The dataset version tag OR
70
- timestamp to retrieve the examples as of.
71
- Response examples will only be those that were present at the time
72
- of the tagged (or timestamped) version.
69
+ as_of: The dataset version tag or timestamp to retrieve the examples as of.
70
+ Response examples will only be those that were present at the time of
71
+ the tagged (or timestamped) version.
73
72
  splits: A list of dataset splits, which are
74
- divisions of your dataset such as 'train', 'test', or 'validation'.
73
+ divisions of your dataset such as `train`, `test`, or `validation`.
75
74
  Returns examples only from the specified splits.
76
75
  inline_s3_urls: Whether to inline S3 URLs.
77
76
  offset: The offset to start from.
@@ -1,7 +1,28 @@
1
- """Documents module.
1
+ """Documents module for data retrieval and processing workflows.
2
2
 
3
- **Document** module is a collection of classes that handle documents
4
- and their transformations.
3
+ This module provides core abstractions for handling data in retrieval-augmented
4
+ generation (RAG) pipelines, vector stores, and document processing workflows.
5
+
6
+ !!! warning "Documents vs. message content"
7
+ This module is distinct from `langchain_core.messages.content`, which provides
8
+ multimodal content blocks for **LLM chat I/O** (text, images, audio, etc. within
9
+ messages).
10
+
11
+ **Key distinction:**
12
+
13
+ - **Documents** (this module): For **data retrieval and processing workflows**
14
+ - Vector stores, retrievers, RAG pipelines
15
+ - Text chunking, embedding, and semantic search
16
+ - Example: Chunks of a PDF stored in a vector database
17
+
18
+ - **Content Blocks** (`messages.content`): For **LLM conversational I/O**
19
+ - Multimodal message content sent to/from models
20
+ - Tool calls, reasoning, citations within chat
21
+ - Example: An image sent to a vision model in a chat message (via
22
+ [`ImageContentBlock`][langchain.messages.ImageContentBlock])
23
+
24
+ While both can represent similar data types (text, files), they serve different
25
+ architectural purposes in LangChain applications.
5
26
  """
6
27
 
7
28
  from typing import TYPE_CHECKING
@@ -1,4 +1,16 @@
1
- """Base classes for media and documents."""
1
+ """Base classes for media and documents.
2
+
3
+ This module contains core abstractions for **data retrieval and processing workflows**:
4
+
5
+ - `BaseMedia`: Base class providing `id` and `metadata` fields
6
+ - `Blob`: Raw data loading (files, binary data) - used by document loaders
7
+ - `Document`: Text content for retrieval (RAG, vector stores, semantic search)
8
+
9
+ !!! note "Not for LLM chat messages"
10
+ These classes are for data processing pipelines, not LLM I/O. For multimodal
11
+ content in chat messages (images, audio in conversations), see
12
+ `langchain.messages` content blocks instead.
13
+ """
2
14
 
3
15
  from __future__ import annotations
4
16
 
@@ -19,20 +31,18 @@ PathLike = str | PurePath
19
31
 
20
32
 
21
33
  class BaseMedia(Serializable):
22
- """Use to represent media content.
23
-
24
- Media objects can be used to represent raw data, such as text or binary data.
34
+ """Base class for content used in retrieval and data processing workflows.
25
35
 
26
- LangChain Media objects allow associating metadata and an optional identifier
27
- with the content.
36
+ Provides common fields for content that needs to be stored, indexed, or searched.
28
37
 
29
- The presence of an ID and metadata make it easier to store, index, and search
30
- over the content in a structured way.
38
+ !!! note
39
+ For multimodal content in **chat messages** (images, audio sent to/from LLMs),
40
+ use `langchain.messages` content blocks instead.
31
41
  """
32
42
 
33
43
  # The ID field is optional at the moment.
34
44
  # It will likely become required in a future major release after
35
- # it has been adopted by enough vectorstore implementations.
45
+ # it has been adopted by enough VectorStore implementations.
36
46
  id: str | None = Field(default=None, coerce_numbers_to_str=True)
37
47
  """An optional identifier for the document.
38
48
 
@@ -45,65 +55,64 @@ class BaseMedia(Serializable):
45
55
 
46
56
 
47
57
  class Blob(BaseMedia):
48
- """Blob represents raw data by either reference or value.
58
+ """Raw data abstraction for document loading and file processing.
49
59
 
50
- Provides an interface to materialize the blob in different representations, and
51
- help to decouple the development of data loaders from the downstream parsing of
52
- the raw data.
60
+ Represents raw bytes or text, either in-memory or by file reference. Used
61
+ primarily by document loaders to decouple data loading from parsing.
53
62
 
54
- Inspired by: https://developer.mozilla.org/en-US/docs/Web/API/Blob
63
+ Inspired by [Mozilla's `Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)
55
64
 
56
- Example: Initialize a blob from in-memory data
65
+ ???+ example "Initialize a blob from in-memory data"
57
66
 
58
- ```python
59
- from langchain_core.documents import Blob
67
+ ```python
68
+ from langchain_core.documents import Blob
60
69
 
61
- blob = Blob.from_data("Hello, world!")
70
+ blob = Blob.from_data("Hello, world!")
62
71
 
63
- # Read the blob as a string
64
- print(blob.as_string())
72
+ # Read the blob as a string
73
+ print(blob.as_string())
65
74
 
66
- # Read the blob as bytes
67
- print(blob.as_bytes())
75
+ # Read the blob as bytes
76
+ print(blob.as_bytes())
68
77
 
69
- # Read the blob as a byte stream
70
- with blob.as_bytes_io() as f:
71
- print(f.read())
72
- ```
78
+ # Read the blob as a byte stream
79
+ with blob.as_bytes_io() as f:
80
+ print(f.read())
81
+ ```
73
82
 
74
- Example: Load from memory and specify mime-type and metadata
83
+ ??? example "Load from memory and specify MIME type and metadata"
75
84
 
76
- ```python
77
- from langchain_core.documents import Blob
85
+ ```python
86
+ from langchain_core.documents import Blob
78
87
 
79
- blob = Blob.from_data(
80
- data="Hello, world!",
81
- mime_type="text/plain",
82
- metadata={"source": "https://example.com"},
83
- )
84
- ```
88
+ blob = Blob.from_data(
89
+ data="Hello, world!",
90
+ mime_type="text/plain",
91
+ metadata={"source": "https://example.com"},
92
+ )
93
+ ```
85
94
 
86
- Example: Load the blob from a file
95
+ ??? example "Load the blob from a file"
87
96
 
88
- ```python
89
- from langchain_core.documents import Blob
97
+ ```python
98
+ from langchain_core.documents import Blob
90
99
 
91
- blob = Blob.from_path("path/to/file.txt")
100
+ blob = Blob.from_path("path/to/file.txt")
92
101
 
93
- # Read the blob as a string
94
- print(blob.as_string())
102
+ # Read the blob as a string
103
+ print(blob.as_string())
95
104
 
96
- # Read the blob as bytes
97
- print(blob.as_bytes())
105
+ # Read the blob as bytes
106
+ print(blob.as_bytes())
98
107
 
99
- # Read the blob as a byte stream
100
- with blob.as_bytes_io() as f:
101
- print(f.read())
102
- ```
108
+ # Read the blob as a byte stream
109
+ with blob.as_bytes_io() as f:
110
+ print(f.read())
111
+ ```
103
112
  """
104
113
 
105
114
  data: bytes | str | None = None
106
- """Raw data associated with the blob."""
115
+ """Raw data associated with the `Blob`."""
107
116
  mimetype: str | None = None
108
117
  """MimeType not to be confused with a file extension."""
109
118
  encoding: str = "utf-8"
@@ -123,7 +132,7 @@ class Blob(BaseMedia):
123
132
  def source(self) -> str | None:
124
133
  """The source location of the blob as string if known otherwise none.
125
134
 
126
- If a path is associated with the blob, it will default to the path location.
135
+ If a path is associated with the `Blob`, it will default to the path location.
127
136
 
128
137
  Unless explicitly set via a metadata field called `"source"`, in which
129
138
  case that value will be used instead.
@@ -211,13 +220,13 @@ class Blob(BaseMedia):
211
220
  Args:
212
221
  path: Path-like object to file to be read
213
222
  encoding: Encoding to use if decoding the bytes into a string
214
- mime_type: If provided, will be set as the mime-type of the data
215
- guess_type: If `True`, the mimetype will be guessed from the file extension,
216
- if a mime-type was not provided
217
- metadata: Metadata to associate with the blob
223
+ mime_type: If provided, will be set as the MIME type of the data
224
+ guess_type: If `True`, the MIME type will be guessed from the file
225
+ extension, if a MIME type was not provided
226
+ metadata: Metadata to associate with the `Blob`
218
227
 
219
228
  Returns:
220
- Blob instance
229
+ `Blob` instance
221
230
  """
222
231
  if mime_type is None and guess_type:
223
232
  mimetype = mimetypes.guess_type(path)[0] if guess_type else None
@@ -243,17 +252,17 @@ class Blob(BaseMedia):
243
252
  path: str | None = None,
244
253
  metadata: dict | None = None,
245
254
  ) -> Blob:
246
- """Initialize the blob from in-memory data.
255
+ """Initialize the `Blob` from in-memory data.
247
256
 
248
257
  Args:
249
- data: The in-memory data associated with the blob
258
+ data: The in-memory data associated with the `Blob`
250
259
  encoding: Encoding to use if decoding the bytes into a string
251
- mime_type: If provided, will be set as the mime-type of the data
260
+ mime_type: If provided, will be set as the MIME type of the data
252
261
  path: If provided, will be set as the source from which the data came
253
- metadata: Metadata to associate with the blob
262
+ metadata: Metadata to associate with the `Blob`
254
263
 
255
264
  Returns:
256
- Blob instance
265
+ `Blob` instance
257
266
  """
258
267
  return cls(
259
268
  data=data,
@@ -274,6 +283,10 @@ class Blob(BaseMedia):
274
283
  class Document(BaseMedia):
275
284
  """Class for storing a piece of text and associated metadata.
276
285
 
286
+ !!! note
287
+ `Document` is for **retrieval workflows**, not chat I/O. For sending text
288
+ to an LLM in a conversation, use message types from `langchain.messages`.
289
+
277
290
  Example:
278
291
  ```python
279
292
  from langchain_core.documents import Document
@@ -21,14 +21,14 @@ class BaseDocumentCompressor(BaseModel, ABC):
21
21
 
22
22
  This abstraction is primarily used for post-processing of retrieved documents.
23
23
 
24
- Documents matching a given query are first retrieved.
24
+ `Document` objects matching a given query are first retrieved.
25
25
 
26
26
  Then the list of documents can be further processed.
27
27
 
28
28
  For example, one could re-rank the retrieved documents using an LLM.
29
29
 
30
30
  !!! note
31
- Users should favor using a RunnableLambda instead of sub-classing from this
31
+ Users should favor using a `RunnableLambda` instead of sub-classing from this
32
32
  interface.
33
33
 
34
34
  """
@@ -43,9 +43,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
43
43
  """Compress retrieved documents given the query context.
44
44
 
45
45
  Args:
46
- documents: The retrieved documents.
46
+ documents: The retrieved `Document` objects.
47
47
  query: The query context.
48
- callbacks: Optional callbacks to run during compression.
48
+ callbacks: Optional `Callbacks` to run during compression.
49
49
 
50
50
  Returns:
51
51
  The compressed documents.
@@ -61,9 +61,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
61
61
  """Async compress retrieved documents given the query context.
62
62
 
63
63
  Args:
64
- documents: The retrieved documents.
64
+ documents: The retrieved `Document` objects.
65
65
  query: The query context.
66
- callbacks: Optional callbacks to run during compression.
66
+ callbacks: Optional `Callbacks` to run during compression.
67
67
 
68
68
  Returns:
69
69
  The compressed documents.
@@ -16,8 +16,8 @@ if TYPE_CHECKING:
16
16
  class BaseDocumentTransformer(ABC):
17
17
  """Abstract base class for document transformation.
18
18
 
19
- A document transformation takes a sequence of Documents and returns a
20
- sequence of transformed Documents.
19
+ A document transformation takes a sequence of `Document` objects and returns a
20
+ sequence of transformed `Document` objects.
21
21
 
22
22
  Example:
23
23
  ```python
@@ -18,7 +18,7 @@ class FakeEmbeddings(Embeddings, BaseModel):
18
18
 
19
19
  This embedding model creates embeddings by sampling from a normal distribution.
20
20
 
21
- !!! warning
21
+ !!! danger "Toy model"
22
22
  Do not use this outside of testing, as it is not a real embedding model.
23
23
 
24
24
  Instantiate:
@@ -73,7 +73,7 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
73
73
  This embedding model creates embeddings by sampling from a normal distribution
74
74
  with a seed based on the hash of the text.
75
75
 
76
- !!! warning
76
+ !!! danger "Toy model"
77
77
  Do not use this outside of testing, as it is not a real embedding model.
78
78
 
79
79
  Instantiate:
@@ -41,7 +41,7 @@ class _VectorStoreExampleSelector(BaseExampleSelector, BaseModel, ABC):
41
41
  """Optional keys to filter input to. If provided, the search is based on
42
42
  the input variables instead of all variables."""
43
43
  vectorstore_kwargs: dict[str, Any] | None = None
44
- """Extra arguments passed to similarity_search function of the vectorstore."""
44
+ """Extra arguments passed to similarity_search function of the `VectorStore`."""
45
45
 
46
46
  model_config = ConfigDict(
47
47
  arbitrary_types_allowed=True,
@@ -159,7 +159,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
159
159
  instead of all variables.
160
160
  example_keys: If provided, keys to filter examples to.
161
161
  vectorstore_kwargs: Extra arguments passed to similarity_search function
162
- of the vectorstore.
162
+ of the `VectorStore`.
163
163
  vectorstore_cls_kwargs: optional kwargs containing url for vector store
164
164
 
165
165
  Returns:
@@ -203,7 +203,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
203
203
  instead of all variables.
204
204
  example_keys: If provided, keys to filter examples to.
205
205
  vectorstore_kwargs: Extra arguments passed to similarity_search function
206
- of the vectorstore.
206
+ of the `VectorStore`.
207
207
  vectorstore_cls_kwargs: optional kwargs containing url for vector store
208
208
 
209
209
  Returns:
@@ -286,12 +286,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
286
286
  embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
287
287
  vectorstore_cls: A vector store DB interface class, e.g. FAISS.
288
288
  k: Number of examples to select.
289
- fetch_k: Number of Documents to fetch to pass to MMR algorithm.
289
+ fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
290
290
  input_keys: If provided, the search is based on the input variables
291
291
  instead of all variables.
292
292
  example_keys: If provided, keys to filter examples to.
293
293
  vectorstore_kwargs: Extra arguments passed to similarity_search function
294
- of the vectorstore.
294
+ of the `VectorStore`.
295
295
  vectorstore_cls_kwargs: optional kwargs containing url for vector store
296
296
 
297
297
  Returns:
@@ -333,12 +333,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
333
333
  embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
334
334
  vectorstore_cls: A vector store DB interface class, e.g. FAISS.
335
335
  k: Number of examples to select.
336
- fetch_k: Number of Documents to fetch to pass to MMR algorithm.
336
+ fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
337
337
  input_keys: If provided, the search is based on the input variables
338
338
  instead of all variables.
339
339
  example_keys: If provided, keys to filter examples to.
340
340
  vectorstore_kwargs: Extra arguments passed to similarity_search function
341
- of the vectorstore.
341
+ of the `VectorStore`.
342
342
  vectorstore_cls_kwargs: optional kwargs containing url for vector store
343
343
 
344
344
  Returns:
@@ -86,6 +86,6 @@ def create_message(*, message: str, error_code: ErrorCode) -> str:
86
86
  """
87
87
  return (
88
88
  f"{message}\n"
89
- "For troubleshooting, visit: https://python.langchain.com/docs/"
90
- f"troubleshooting/errors/{error_code.value} "
89
+ "For troubleshooting, visit: https://docs.langchain.com/oss/python/langchain"
90
+ f"/errors/{error_code.value} "
91
91
  )
@@ -1,7 +1,7 @@
1
1
  """Code to help indexing data into a vectorstore.
2
2
 
3
3
  This package contains helper logic to help deal with indexing data into
4
- a vectorstore while avoiding duplicated content and over-writing content
4
+ a `VectorStore` while avoiding duplicated content and over-writing content
5
5
  if it's unchanged.
6
6
  """
7
7