langchain-core 1.0.0rc3__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (76) hide show
  1. langchain_core/agents.py +2 -4
  2. langchain_core/caches.py +16 -7
  3. langchain_core/callbacks/base.py +0 -4
  4. langchain_core/callbacks/manager.py +0 -11
  5. langchain_core/chat_history.py +5 -5
  6. langchain_core/document_loaders/base.py +6 -4
  7. langchain_core/document_loaders/blob_loaders.py +1 -1
  8. langchain_core/document_loaders/langsmith.py +9 -13
  9. langchain_core/documents/__init__.py +24 -3
  10. langchain_core/documents/base.py +72 -61
  11. langchain_core/documents/compressor.py +6 -6
  12. langchain_core/documents/transformers.py +6 -6
  13. langchain_core/embeddings/fake.py +2 -2
  14. langchain_core/example_selectors/semantic_similarity.py +7 -7
  15. langchain_core/exceptions.py +2 -2
  16. langchain_core/indexing/__init__.py +1 -1
  17. langchain_core/indexing/api.py +62 -62
  18. langchain_core/indexing/base.py +20 -22
  19. langchain_core/indexing/in_memory.py +2 -4
  20. langchain_core/language_models/__init__.py +6 -5
  21. langchain_core/language_models/base.py +7 -8
  22. langchain_core/language_models/chat_models.py +84 -78
  23. langchain_core/language_models/fake_chat_models.py +1 -1
  24. langchain_core/language_models/llms.py +20 -18
  25. langchain_core/load/dump.py +6 -8
  26. langchain_core/load/serializable.py +4 -1
  27. langchain_core/messages/__init__.py +9 -0
  28. langchain_core/messages/ai.py +11 -7
  29. langchain_core/messages/base.py +4 -0
  30. langchain_core/messages/block_translators/google_genai.py +5 -3
  31. langchain_core/messages/content.py +4 -4
  32. langchain_core/messages/utils.py +17 -17
  33. langchain_core/output_parsers/__init__.py +17 -1
  34. langchain_core/output_parsers/base.py +3 -0
  35. langchain_core/output_parsers/format_instructions.py +9 -4
  36. langchain_core/output_parsers/json.py +5 -2
  37. langchain_core/output_parsers/list.py +16 -16
  38. langchain_core/output_parsers/openai_tools.py +2 -2
  39. langchain_core/output_parsers/pydantic.py +1 -1
  40. langchain_core/output_parsers/string.py +3 -3
  41. langchain_core/output_parsers/xml.py +28 -25
  42. langchain_core/outputs/generation.py +2 -3
  43. langchain_core/prompt_values.py +0 -6
  44. langchain_core/prompts/base.py +5 -3
  45. langchain_core/prompts/chat.py +60 -52
  46. langchain_core/prompts/string.py +5 -2
  47. langchain_core/prompts/structured.py +12 -8
  48. langchain_core/rate_limiters.py +1 -3
  49. langchain_core/retrievers.py +41 -37
  50. langchain_core/runnables/base.py +25 -29
  51. langchain_core/runnables/branch.py +9 -9
  52. langchain_core/runnables/config.py +2 -4
  53. langchain_core/runnables/configurable.py +3 -3
  54. langchain_core/runnables/fallbacks.py +1 -1
  55. langchain_core/runnables/graph.py +7 -3
  56. langchain_core/runnables/retry.py +1 -1
  57. langchain_core/runnables/schema.py +2 -5
  58. langchain_core/runnables/utils.py +3 -3
  59. langchain_core/stores.py +4 -6
  60. langchain_core/tools/base.py +68 -14
  61. langchain_core/tools/convert.py +8 -7
  62. langchain_core/tools/retriever.py +6 -5
  63. langchain_core/tools/structured.py +7 -5
  64. langchain_core/tracers/event_stream.py +4 -1
  65. langchain_core/tracers/log_stream.py +6 -3
  66. langchain_core/utils/function_calling.py +8 -0
  67. langchain_core/utils/json_schema.py +1 -1
  68. langchain_core/utils/strings.py +1 -4
  69. langchain_core/utils/utils.py +12 -5
  70. langchain_core/vectorstores/base.py +130 -130
  71. langchain_core/vectorstores/in_memory.py +4 -4
  72. langchain_core/vectorstores/utils.py +1 -1
  73. langchain_core/version.py +1 -1
  74. {langchain_core-1.0.0rc3.dist-info → langchain_core-1.0.2.dist-info}/METADATA +8 -7
  75. {langchain_core-1.0.0rc3.dist-info → langchain_core-1.0.2.dist-info}/RECORD +76 -76
  76. {langchain_core-1.0.0rc3.dist-info → langchain_core-1.0.2.dist-info}/WHEEL +0 -0
langchain_core/agents.py CHANGED
@@ -5,12 +5,10 @@
5
5
 
6
6
  !!! warning
7
7
  New agents should be built using the
8
- [langgraph library](https://github.com/langchain-ai/langgraph), which provides a
8
+ [`langchain` library](https://pypi.org/project/langchain/), which provides a
9
9
  simpler and more flexible way to define agents.
10
10
 
11
- Please see the
12
- [migration guide](https://python.langchain.com/docs/how_to/migrate_agent/) for
13
- information on how to migrate existing agents to modern langgraph agents.
11
+ See docs on [building agents](https://docs.langchain.com/oss/python/langchain/agents).
14
12
 
15
13
  Agents use language models to choose a sequence of actions to take.
16
14
 
langchain_core/caches.py CHANGED
@@ -1,7 +1,9 @@
1
- """`caches` provides an optional caching layer for language models.
1
+ """Optional caching layer for language models.
2
2
 
3
- !!! warning
4
- This is a beta feature! Please be wary of deploying experimental code to production
3
+ Distinct from provider-based [prompt caching](https://docs.langchain.com/oss/python/langchain/models#prompt-caching).
4
+
5
+ !!! warning "Beta feature"
6
+ This is a beta feature. Please be wary of deploying experimental code to production
5
7
  unless you've taken appropriate precautions.
6
8
 
7
9
  A cache is useful for two reasons:
@@ -47,17 +49,18 @@ class BaseCache(ABC):
47
49
  """Look up based on `prompt` and `llm_string`.
48
50
 
49
51
  A cache implementation is expected to generate a key from the 2-tuple
50
- of prompt and llm_string (e.g., by concatenating them with a delimiter).
52
+ of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
51
53
 
52
54
  Args:
53
55
  prompt: A string representation of the prompt.
54
56
  In the case of a chat model, the prompt is a non-trivial
55
57
  serialization of the prompt into the language model.
56
58
  llm_string: A string representation of the LLM configuration.
59
+
57
60
  This is used to capture the invocation parameters of the LLM
58
61
  (e.g., model name, temperature, stop tokens, max tokens, etc.).
59
- These invocation parameters are serialized into a string
60
- representation.
62
+
63
+ These invocation parameters are serialized into a string representation.
61
64
 
62
65
  Returns:
63
66
  On a cache miss, return `None`. On a cache hit, return the cached value.
@@ -76,8 +79,10 @@ class BaseCache(ABC):
76
79
  In the case of a chat model, the prompt is a non-trivial
77
80
  serialization of the prompt into the language model.
78
81
  llm_string: A string representation of the LLM configuration.
82
+
79
83
  This is used to capture the invocation parameters of the LLM
80
84
  (e.g., model name, temperature, stop tokens, max tokens, etc.).
85
+
81
86
  These invocation parameters are serialized into a string
82
87
  representation.
83
88
  return_val: The value to be cached. The value is a list of `Generation`
@@ -92,15 +97,17 @@ class BaseCache(ABC):
92
97
  """Async look up based on `prompt` and `llm_string`.
93
98
 
94
99
  A cache implementation is expected to generate a key from the 2-tuple
95
- of prompt and llm_string (e.g., by concatenating them with a delimiter).
100
+ of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
96
101
 
97
102
  Args:
98
103
  prompt: A string representation of the prompt.
99
104
  In the case of a chat model, the prompt is a non-trivial
100
105
  serialization of the prompt into the language model.
101
106
  llm_string: A string representation of the LLM configuration.
107
+
102
108
  This is used to capture the invocation parameters of the LLM
103
109
  (e.g., model name, temperature, stop tokens, max tokens, etc.).
110
+
104
111
  These invocation parameters are serialized into a string
105
112
  representation.
106
113
 
@@ -123,8 +130,10 @@ class BaseCache(ABC):
123
130
  In the case of a chat model, the prompt is a non-trivial
124
131
  serialization of the prompt into the language model.
125
132
  llm_string: A string representation of the LLM configuration.
133
+
126
134
  This is used to capture the invocation parameters of the LLM
127
135
  (e.g., model name, temperature, stop tokens, max tokens, etc.).
136
+
128
137
  These invocation parameters are serialized into a string
129
138
  representation.
130
139
  return_val: The value to be cached. The value is a list of `Generation`
@@ -420,8 +420,6 @@ class RunManagerMixin:
420
420
  (includes inherited tags).
421
421
  metadata: The metadata associated with the custom event
422
422
  (includes inherited metadata).
423
-
424
- !!! version-added "Added in version 0.2.15"
425
423
  """
426
424
 
427
425
 
@@ -882,8 +880,6 @@ class AsyncCallbackHandler(BaseCallbackHandler):
882
880
  (includes inherited tags).
883
881
  metadata: The metadata associated with the custom event
884
882
  (includes inherited metadata).
885
-
886
- !!! version-added "Added in version 0.2.15"
887
883
  """
888
884
 
889
885
 
@@ -1566,9 +1566,6 @@ class CallbackManager(BaseCallbackManager):
1566
1566
 
1567
1567
  Raises:
1568
1568
  ValueError: If additional keyword arguments are passed.
1569
-
1570
- !!! version-added "Added in version 0.2.14"
1571
-
1572
1569
  """
1573
1570
  if not self.handlers:
1574
1571
  return
@@ -2042,8 +2039,6 @@ class AsyncCallbackManager(BaseCallbackManager):
2042
2039
 
2043
2040
  Raises:
2044
2041
  ValueError: If additional keyword arguments are passed.
2045
-
2046
- !!! version-added "Added in version 0.2.14"
2047
2042
  """
2048
2043
  if not self.handlers:
2049
2044
  return
@@ -2555,9 +2550,6 @@ async def adispatch_custom_event(
2555
2550
  This is due to a limitation in asyncio for python <= 3.10 that prevents
2556
2551
  LangChain from automatically propagating the config object on the user's
2557
2552
  behalf.
2558
-
2559
- !!! version-added "Added in version 0.2.15"
2560
-
2561
2553
  """
2562
2554
  # Import locally to prevent circular imports.
2563
2555
  from langchain_core.runnables.config import ( # noqa: PLC0415
@@ -2630,9 +2622,6 @@ def dispatch_custom_event(
2630
2622
  foo_ = RunnableLambda(foo)
2631
2623
  foo_.invoke({"a": "1"}, {"callbacks": [CustomCallbackManager()]})
2632
2624
  ```
2633
-
2634
- !!! version-added "Added in version 0.2.15"
2635
-
2636
2625
  """
2637
2626
  # Import locally to prevent circular imports.
2638
2627
  from langchain_core.runnables.config import ( # noqa: PLC0415
@@ -121,7 +121,7 @@ class BaseChatMessageHistory(ABC):
121
121
  This method may be deprecated in a future release.
122
122
 
123
123
  Args:
124
- message: The human message to add to the store.
124
+ message: The `HumanMessage` to add to the store.
125
125
  """
126
126
  if isinstance(message, HumanMessage):
127
127
  self.add_message(message)
@@ -129,7 +129,7 @@ class BaseChatMessageHistory(ABC):
129
129
  self.add_message(HumanMessage(content=message))
130
130
 
131
131
  def add_ai_message(self, message: AIMessage | str) -> None:
132
- """Convenience method for adding an AI message string to the store.
132
+ """Convenience method for adding an `AIMessage` string to the store.
133
133
 
134
134
  !!! note
135
135
  This is a convenience method. Code should favor the bulk `add_messages`
@@ -138,7 +138,7 @@ class BaseChatMessageHistory(ABC):
138
138
  This method may be deprecated in a future release.
139
139
 
140
140
  Args:
141
- message: The AI message to add.
141
+ message: The `AIMessage` to add.
142
142
  """
143
143
  if isinstance(message, AIMessage):
144
144
  self.add_message(message)
@@ -173,7 +173,7 @@ class BaseChatMessageHistory(ABC):
173
173
  in an efficient manner to avoid unnecessary round-trips to the underlying store.
174
174
 
175
175
  Args:
176
- messages: A sequence of BaseMessage objects to store.
176
+ messages: A sequence of `BaseMessage` objects to store.
177
177
  """
178
178
  for message in messages:
179
179
  self.add_message(message)
@@ -182,7 +182,7 @@ class BaseChatMessageHistory(ABC):
182
182
  """Async add a list of messages.
183
183
 
184
184
  Args:
185
- messages: A sequence of BaseMessage objects to store.
185
+ messages: A sequence of `BaseMessage` objects to store.
186
186
  """
187
187
  await run_in_executor(None, self.add_messages, messages)
188
188
 
@@ -27,7 +27,7 @@ class BaseLoader(ABC): # noqa: B024
27
27
  """Interface for Document Loader.
28
28
 
29
29
  Implementations should implement the lazy-loading method using generators
30
- to avoid loading all Documents into memory at once.
30
+ to avoid loading all documents into memory at once.
31
31
 
32
32
  `load` is provided just for user convenience and should not be overridden.
33
33
  """
@@ -53,9 +53,11 @@ class BaseLoader(ABC): # noqa: B024
53
53
  def load_and_split(
54
54
  self, text_splitter: TextSplitter | None = None
55
55
  ) -> list[Document]:
56
- """Load Documents and split into chunks. Chunks are returned as `Document`.
56
+ """Load `Document` and split into chunks. Chunks are returned as `Document`.
57
57
 
58
- Do not override this method. It should be considered to be deprecated!
58
+ !!! danger
59
+
60
+ Do not override this method. It should be considered to be deprecated!
59
61
 
60
62
  Args:
61
63
  text_splitter: `TextSplitter` instance to use for splitting documents.
@@ -135,7 +137,7 @@ class BaseBlobParser(ABC):
135
137
  """
136
138
 
137
139
  def parse(self, blob: Blob) -> list[Document]:
138
- """Eagerly parse the blob into a `Document` or `Document` objects.
140
+ """Eagerly parse the blob into a `Document` or list of `Document` objects.
139
141
 
140
142
  This is a convenience method for interactive development environment.
141
143
 
@@ -28,7 +28,7 @@ class BlobLoader(ABC):
28
28
  def yield_blobs(
29
29
  self,
30
30
  ) -> Iterable[Blob]:
31
- """A lazy loader for raw data represented by LangChain's Blob object.
31
+ """A lazy loader for raw data represented by LangChain's `Blob` object.
32
32
 
33
33
  Returns:
34
34
  A generator over blobs
@@ -14,13 +14,13 @@ from langchain_core.documents import Document
14
14
 
15
15
 
16
16
  class LangSmithLoader(BaseLoader):
17
- """Load LangSmith Dataset examples as Documents.
17
+ """Load LangSmith Dataset examples as `Document` objects.
18
18
 
19
- Loads the example inputs as the Document page content and places the entire example
20
- into the Document metadata. This allows you to easily create few-shot example
21
- retrievers from the loaded documents.
19
+ Loads the example inputs as the `Document` page content and places the entire
20
+ example into the `Document` metadata. This allows you to easily create few-shot
21
+ example retrievers from the loaded documents.
22
22
 
23
- ??? note "Lazy load"
23
+ ??? note "Lazy loading example"
24
24
 
25
25
  ```python
26
26
  from langchain_core.document_loaders import LangSmithLoader
@@ -34,9 +34,6 @@ class LangSmithLoader(BaseLoader):
34
34
  ```python
35
35
  # -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
36
36
  ```
37
-
38
- !!! version-added "Added in version 0.2.34"
39
-
40
37
  """
41
38
 
42
39
  def __init__(
@@ -69,12 +66,11 @@ class LangSmithLoader(BaseLoader):
69
66
  format_content: Function for converting the content extracted from the example
70
67
  inputs into a string. Defaults to JSON-encoding the contents.
71
68
  example_ids: The IDs of the examples to filter by.
72
- as_of: The dataset version tag OR
73
- timestamp to retrieve the examples as of.
74
- Response examples will only be those that were present at the time
75
- of the tagged (or timestamped) version.
69
+ as_of: The dataset version tag or timestamp to retrieve the examples as of.
70
+ Response examples will only be those that were present at the time of
71
+ the tagged (or timestamped) version.
76
72
  splits: A list of dataset splits, which are
77
- divisions of your dataset such as 'train', 'test', or 'validation'.
73
+ divisions of your dataset such as `train`, `test`, or `validation`.
78
74
  Returns examples only from the specified splits.
79
75
  inline_s3_urls: Whether to inline S3 URLs.
80
76
  offset: The offset to start from.
@@ -1,7 +1,28 @@
1
- """Documents module.
1
+ """Documents module for data retrieval and processing workflows.
2
2
 
3
- **Document** module is a collection of classes that handle documents
4
- and their transformations.
3
+ This module provides core abstractions for handling data in retrieval-augmented
4
+ generation (RAG) pipelines, vector stores, and document processing workflows.
5
+
6
+ !!! warning "Documents vs. message content"
7
+ This module is distinct from `langchain_core.messages.content`, which provides
8
+ multimodal content blocks for **LLM chat I/O** (text, images, audio, etc. within
9
+ messages).
10
+
11
+ **Key distinction:**
12
+
13
+ - **Documents** (this module): For **data retrieval and processing workflows**
14
+ - Vector stores, retrievers, RAG pipelines
15
+ - Text chunking, embedding, and semantic search
16
+ - Example: Chunks of a PDF stored in a vector database
17
+
18
+ - **Content Blocks** (`messages.content`): For **LLM conversational I/O**
19
+ - Multimodal message content sent to/from models
20
+ - Tool calls, reasoning, citations within chat
21
+ - Example: An image sent to a vision model in a chat message (via
22
+ [`ImageContentBlock`][langchain.messages.ImageContentBlock])
23
+
24
+ While both can represent similar data types (text, files), they serve different
25
+ architectural purposes in LangChain applications.
5
26
  """
6
27
 
7
28
  from typing import TYPE_CHECKING
@@ -1,4 +1,16 @@
1
- """Base classes for media and documents."""
1
+ """Base classes for media and documents.
2
+
3
+ This module contains core abstractions for **data retrieval and processing workflows**:
4
+
5
+ - `BaseMedia`: Base class providing `id` and `metadata` fields
6
+ - `Blob`: Raw data loading (files, binary data) - used by document loaders
7
+ - `Document`: Text content for retrieval (RAG, vector stores, semantic search)
8
+
9
+ !!! note "Not for LLM chat messages"
10
+ These classes are for data processing pipelines, not LLM I/O. For multimodal
11
+ content in chat messages (images, audio in conversations), see
12
+ `langchain.messages` content blocks instead.
13
+ """
2
14
 
3
15
  from __future__ import annotations
4
16
 
@@ -19,27 +31,23 @@ PathLike = str | PurePath
19
31
 
20
32
 
21
33
  class BaseMedia(Serializable):
22
- """Use to represent media content.
23
-
24
- Media objects can be used to represent raw data, such as text or binary data.
34
+ """Base class for content used in retrieval and data processing workflows.
25
35
 
26
- LangChain Media objects allow associating metadata and an optional identifier
27
- with the content.
36
+ Provides common fields for content that needs to be stored, indexed, or searched.
28
37
 
29
- The presence of an ID and metadata make it easier to store, index, and search
30
- over the content in a structured way.
38
+ !!! note
39
+ For multimodal content in **chat messages** (images, audio sent to/from LLMs),
40
+ use `langchain.messages` content blocks instead.
31
41
  """
32
42
 
33
43
  # The ID field is optional at the moment.
34
44
  # It will likely become required in a future major release after
35
- # it has been adopted by enough vectorstore implementations.
45
+ # it has been adopted by enough VectorStore implementations.
36
46
  id: str | None = Field(default=None, coerce_numbers_to_str=True)
37
47
  """An optional identifier for the document.
38
48
 
39
49
  Ideally this should be unique across the document collection and formatted
40
50
  as a UUID, but this will not be enforced.
41
-
42
- !!! version-added "Added in version 0.2.11"
43
51
  """
44
52
 
45
53
  metadata: dict = Field(default_factory=dict)
@@ -47,65 +55,64 @@ class BaseMedia(Serializable):
47
55
 
48
56
 
49
57
  class Blob(BaseMedia):
50
- """Blob represents raw data by either reference or value.
58
+ """Raw data abstraction for document loading and file processing.
51
59
 
52
- Provides an interface to materialize the blob in different representations, and
53
- help to decouple the development of data loaders from the downstream parsing of
54
- the raw data.
60
+ Represents raw bytes or text, either in-memory or by file reference. Used
61
+ primarily by document loaders to decouple data loading from parsing.
55
62
 
56
- Inspired by: https://developer.mozilla.org/en-US/docs/Web/API/Blob
63
+ Inspired by [Mozilla's `Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)
57
64
 
58
- Example: Initialize a blob from in-memory data
65
+ ???+ example "Initialize a blob from in-memory data"
59
66
 
60
- ```python
61
- from langchain_core.documents import Blob
67
+ ```python
68
+ from langchain_core.documents import Blob
62
69
 
63
- blob = Blob.from_data("Hello, world!")
70
+ blob = Blob.from_data("Hello, world!")
64
71
 
65
- # Read the blob as a string
66
- print(blob.as_string())
72
+ # Read the blob as a string
73
+ print(blob.as_string())
67
74
 
68
- # Read the blob as bytes
69
- print(blob.as_bytes())
75
+ # Read the blob as bytes
76
+ print(blob.as_bytes())
70
77
 
71
- # Read the blob as a byte stream
72
- with blob.as_bytes_io() as f:
73
- print(f.read())
74
- ```
78
+ # Read the blob as a byte stream
79
+ with blob.as_bytes_io() as f:
80
+ print(f.read())
81
+ ```
75
82
 
76
- Example: Load from memory and specify mime-type and metadata
83
+ ??? example "Load from memory and specify MIME type and metadata"
77
84
 
78
- ```python
79
- from langchain_core.documents import Blob
85
+ ```python
86
+ from langchain_core.documents import Blob
80
87
 
81
- blob = Blob.from_data(
82
- data="Hello, world!",
83
- mime_type="text/plain",
84
- metadata={"source": "https://example.com"},
85
- )
86
- ```
88
+ blob = Blob.from_data(
89
+ data="Hello, world!",
90
+ mime_type="text/plain",
91
+ metadata={"source": "https://example.com"},
92
+ )
93
+ ```
87
94
 
88
- Example: Load the blob from a file
95
+ ??? example "Load the blob from a file"
89
96
 
90
- ```python
91
- from langchain_core.documents import Blob
97
+ ```python
98
+ from langchain_core.documents import Blob
92
99
 
93
- blob = Blob.from_path("path/to/file.txt")
100
+ blob = Blob.from_path("path/to/file.txt")
94
101
 
95
- # Read the blob as a string
96
- print(blob.as_string())
102
+ # Read the blob as a string
103
+ print(blob.as_string())
97
104
 
98
- # Read the blob as bytes
99
- print(blob.as_bytes())
105
+ # Read the blob as bytes
106
+ print(blob.as_bytes())
100
107
 
101
- # Read the blob as a byte stream
102
- with blob.as_bytes_io() as f:
103
- print(f.read())
104
- ```
108
+ # Read the blob as a byte stream
109
+ with blob.as_bytes_io() as f:
110
+ print(f.read())
111
+ ```
105
112
  """
106
113
 
107
114
  data: bytes | str | None = None
108
- """Raw data associated with the blob."""
115
+ """Raw data associated with the `Blob`."""
109
116
  mimetype: str | None = None
110
117
  """MimeType not to be confused with a file extension."""
111
118
  encoding: str = "utf-8"
@@ -125,7 +132,7 @@ class Blob(BaseMedia):
125
132
  def source(self) -> str | None:
126
133
  """The source location of the blob as string if known otherwise none.
127
134
 
128
- If a path is associated with the blob, it will default to the path location.
135
+ If a path is associated with the `Blob`, it will default to the path location.
129
136
 
130
137
  Unless explicitly set via a metadata field called `"source"`, in which
131
138
  case that value will be used instead.
@@ -213,13 +220,13 @@ class Blob(BaseMedia):
213
220
  Args:
214
221
  path: Path-like object to file to be read
215
222
  encoding: Encoding to use if decoding the bytes into a string
216
- mime_type: If provided, will be set as the mime-type of the data
217
- guess_type: If `True`, the mimetype will be guessed from the file extension,
218
- if a mime-type was not provided
219
- metadata: Metadata to associate with the blob
223
+ mime_type: If provided, will be set as the MIME type of the data
224
+ guess_type: If `True`, the MIME type will be guessed from the file
225
+ extension, if a MIME type was not provided
226
+ metadata: Metadata to associate with the `Blob`
220
227
 
221
228
  Returns:
222
- Blob instance
229
+ `Blob` instance
223
230
  """
224
231
  if mime_type is None and guess_type:
225
232
  mimetype = mimetypes.guess_type(path)[0] if guess_type else None
@@ -245,17 +252,17 @@ class Blob(BaseMedia):
245
252
  path: str | None = None,
246
253
  metadata: dict | None = None,
247
254
  ) -> Blob:
248
- """Initialize the blob from in-memory data.
255
+ """Initialize the `Blob` from in-memory data.
249
256
 
250
257
  Args:
251
- data: The in-memory data associated with the blob
258
+ data: The in-memory data associated with the `Blob`
252
259
  encoding: Encoding to use if decoding the bytes into a string
253
- mime_type: If provided, will be set as the mime-type of the data
260
+ mime_type: If provided, will be set as the MIME type of the data
254
261
  path: If provided, will be set as the source from which the data came
255
- metadata: Metadata to associate with the blob
262
+ metadata: Metadata to associate with the `Blob`
256
263
 
257
264
  Returns:
258
- Blob instance
265
+ `Blob` instance
259
266
  """
260
267
  return cls(
261
268
  data=data,
@@ -276,6 +283,10 @@ class Blob(BaseMedia):
276
283
  class Document(BaseMedia):
277
284
  """Class for storing a piece of text and associated metadata.
278
285
 
286
+ !!! note
287
+ `Document` is for **retrieval workflows**, not chat I/O. For sending text
288
+ to an LLM in a conversation, use message types from `langchain.messages`.
289
+
279
290
  Example:
280
291
  ```python
281
292
  from langchain_core.documents import Document
@@ -21,14 +21,14 @@ class BaseDocumentCompressor(BaseModel, ABC):
21
21
 
22
22
  This abstraction is primarily used for post-processing of retrieved documents.
23
23
 
24
- Documents matching a given query are first retrieved.
24
+ `Document` objects matching a given query are first retrieved.
25
25
 
26
26
  Then the list of documents can be further processed.
27
27
 
28
28
  For example, one could re-rank the retrieved documents using an LLM.
29
29
 
30
30
  !!! note
31
- Users should favor using a RunnableLambda instead of sub-classing from this
31
+ Users should favor using a `RunnableLambda` instead of sub-classing from this
32
32
  interface.
33
33
 
34
34
  """
@@ -43,9 +43,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
43
43
  """Compress retrieved documents given the query context.
44
44
 
45
45
  Args:
46
- documents: The retrieved documents.
46
+ documents: The retrieved `Document` objects.
47
47
  query: The query context.
48
- callbacks: Optional callbacks to run during compression.
48
+ callbacks: Optional `Callbacks` to run during compression.
49
49
 
50
50
  Returns:
51
51
  The compressed documents.
@@ -61,9 +61,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
61
61
  """Async compress retrieved documents given the query context.
62
62
 
63
63
  Args:
64
- documents: The retrieved documents.
64
+ documents: The retrieved `Document` objects.
65
65
  query: The query context.
66
- callbacks: Optional callbacks to run during compression.
66
+ callbacks: Optional `Callbacks` to run during compression.
67
67
 
68
68
  Returns:
69
69
  The compressed documents.
@@ -16,8 +16,8 @@ if TYPE_CHECKING:
16
16
  class BaseDocumentTransformer(ABC):
17
17
  """Abstract base class for document transformation.
18
18
 
19
- A document transformation takes a sequence of Documents and returns a
20
- sequence of transformed Documents.
19
+ A document transformation takes a sequence of `Document` objects and returns a
20
+ sequence of transformed `Document` objects.
21
21
 
22
22
  Example:
23
23
  ```python
@@ -57,10 +57,10 @@ class BaseDocumentTransformer(ABC):
57
57
  """Transform a list of documents.
58
58
 
59
59
  Args:
60
- documents: A sequence of Documents to be transformed.
60
+ documents: A sequence of `Document` objects to be transformed.
61
61
 
62
62
  Returns:
63
- A sequence of transformed Documents.
63
+ A sequence of transformed `Document` objects.
64
64
  """
65
65
 
66
66
  async def atransform_documents(
@@ -69,10 +69,10 @@ class BaseDocumentTransformer(ABC):
69
69
  """Asynchronously transform a list of documents.
70
70
 
71
71
  Args:
72
- documents: A sequence of Documents to be transformed.
72
+ documents: A sequence of `Document` objects to be transformed.
73
73
 
74
74
  Returns:
75
- A sequence of transformed Documents.
75
+ A sequence of transformed `Document` objects.
76
76
  """
77
77
  return await run_in_executor(
78
78
  None, self.transform_documents, documents, **kwargs
@@ -18,7 +18,7 @@ class FakeEmbeddings(Embeddings, BaseModel):
18
18
 
19
19
  This embedding model creates embeddings by sampling from a normal distribution.
20
20
 
21
- !!! warning
21
+ !!! danger "Toy model"
22
22
  Do not use this outside of testing, as it is not a real embedding model.
23
23
 
24
24
  Instantiate:
@@ -73,7 +73,7 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
73
73
  This embedding model creates embeddings by sampling from a normal distribution
74
74
  with a seed based on the hash of the text.
75
75
 
76
- !!! warning
76
+ !!! danger "Toy model"
77
77
  Do not use this outside of testing, as it is not a real embedding model.
78
78
 
79
79
  Instantiate: