langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (172) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +3 -4
  3. langchain_core/_api/beta_decorator.py +45 -70
  4. langchain_core/_api/deprecation.py +80 -80
  5. langchain_core/_api/path.py +22 -8
  6. langchain_core/_import_utils.py +10 -4
  7. langchain_core/agents.py +25 -21
  8. langchain_core/caches.py +53 -63
  9. langchain_core/callbacks/__init__.py +1 -8
  10. langchain_core/callbacks/base.py +341 -348
  11. langchain_core/callbacks/file.py +55 -44
  12. langchain_core/callbacks/manager.py +546 -683
  13. langchain_core/callbacks/stdout.py +29 -30
  14. langchain_core/callbacks/streaming_stdout.py +35 -36
  15. langchain_core/callbacks/usage.py +65 -70
  16. langchain_core/chat_history.py +48 -55
  17. langchain_core/document_loaders/base.py +46 -21
  18. langchain_core/document_loaders/langsmith.py +39 -36
  19. langchain_core/documents/__init__.py +0 -1
  20. langchain_core/documents/base.py +96 -74
  21. langchain_core/documents/compressor.py +12 -9
  22. langchain_core/documents/transformers.py +29 -28
  23. langchain_core/embeddings/fake.py +56 -57
  24. langchain_core/env.py +2 -3
  25. langchain_core/example_selectors/base.py +12 -0
  26. langchain_core/example_selectors/length_based.py +1 -1
  27. langchain_core/example_selectors/semantic_similarity.py +21 -25
  28. langchain_core/exceptions.py +15 -9
  29. langchain_core/globals.py +4 -163
  30. langchain_core/indexing/api.py +132 -125
  31. langchain_core/indexing/base.py +64 -67
  32. langchain_core/indexing/in_memory.py +26 -6
  33. langchain_core/language_models/__init__.py +15 -27
  34. langchain_core/language_models/_utils.py +267 -117
  35. langchain_core/language_models/base.py +92 -177
  36. langchain_core/language_models/chat_models.py +547 -407
  37. langchain_core/language_models/fake.py +11 -11
  38. langchain_core/language_models/fake_chat_models.py +72 -118
  39. langchain_core/language_models/llms.py +168 -242
  40. langchain_core/load/dump.py +8 -11
  41. langchain_core/load/load.py +32 -28
  42. langchain_core/load/mapping.py +2 -4
  43. langchain_core/load/serializable.py +50 -56
  44. langchain_core/messages/__init__.py +36 -51
  45. langchain_core/messages/ai.py +377 -150
  46. langchain_core/messages/base.py +239 -47
  47. langchain_core/messages/block_translators/__init__.py +111 -0
  48. langchain_core/messages/block_translators/anthropic.py +470 -0
  49. langchain_core/messages/block_translators/bedrock.py +94 -0
  50. langchain_core/messages/block_translators/bedrock_converse.py +297 -0
  51. langchain_core/messages/block_translators/google_genai.py +530 -0
  52. langchain_core/messages/block_translators/google_vertexai.py +21 -0
  53. langchain_core/messages/block_translators/groq.py +143 -0
  54. langchain_core/messages/block_translators/langchain_v0.py +301 -0
  55. langchain_core/messages/block_translators/openai.py +1010 -0
  56. langchain_core/messages/chat.py +2 -3
  57. langchain_core/messages/content.py +1423 -0
  58. langchain_core/messages/function.py +7 -7
  59. langchain_core/messages/human.py +44 -38
  60. langchain_core/messages/modifier.py +3 -2
  61. langchain_core/messages/system.py +40 -27
  62. langchain_core/messages/tool.py +160 -58
  63. langchain_core/messages/utils.py +527 -638
  64. langchain_core/output_parsers/__init__.py +1 -14
  65. langchain_core/output_parsers/base.py +68 -104
  66. langchain_core/output_parsers/json.py +13 -17
  67. langchain_core/output_parsers/list.py +11 -33
  68. langchain_core/output_parsers/openai_functions.py +56 -74
  69. langchain_core/output_parsers/openai_tools.py +68 -109
  70. langchain_core/output_parsers/pydantic.py +15 -13
  71. langchain_core/output_parsers/string.py +6 -2
  72. langchain_core/output_parsers/transform.py +17 -60
  73. langchain_core/output_parsers/xml.py +34 -44
  74. langchain_core/outputs/__init__.py +1 -1
  75. langchain_core/outputs/chat_generation.py +26 -11
  76. langchain_core/outputs/chat_result.py +1 -3
  77. langchain_core/outputs/generation.py +17 -6
  78. langchain_core/outputs/llm_result.py +15 -8
  79. langchain_core/prompt_values.py +29 -123
  80. langchain_core/prompts/__init__.py +3 -27
  81. langchain_core/prompts/base.py +48 -63
  82. langchain_core/prompts/chat.py +259 -288
  83. langchain_core/prompts/dict.py +19 -11
  84. langchain_core/prompts/few_shot.py +84 -90
  85. langchain_core/prompts/few_shot_with_templates.py +14 -12
  86. langchain_core/prompts/image.py +19 -14
  87. langchain_core/prompts/loading.py +6 -8
  88. langchain_core/prompts/message.py +7 -8
  89. langchain_core/prompts/prompt.py +42 -43
  90. langchain_core/prompts/string.py +37 -16
  91. langchain_core/prompts/structured.py +43 -46
  92. langchain_core/rate_limiters.py +51 -60
  93. langchain_core/retrievers.py +52 -192
  94. langchain_core/runnables/base.py +1727 -1683
  95. langchain_core/runnables/branch.py +52 -73
  96. langchain_core/runnables/config.py +89 -103
  97. langchain_core/runnables/configurable.py +128 -130
  98. langchain_core/runnables/fallbacks.py +93 -82
  99. langchain_core/runnables/graph.py +127 -127
  100. langchain_core/runnables/graph_ascii.py +63 -41
  101. langchain_core/runnables/graph_mermaid.py +87 -70
  102. langchain_core/runnables/graph_png.py +31 -36
  103. langchain_core/runnables/history.py +145 -161
  104. langchain_core/runnables/passthrough.py +141 -144
  105. langchain_core/runnables/retry.py +84 -68
  106. langchain_core/runnables/router.py +33 -37
  107. langchain_core/runnables/schema.py +79 -72
  108. langchain_core/runnables/utils.py +95 -139
  109. langchain_core/stores.py +85 -131
  110. langchain_core/structured_query.py +11 -15
  111. langchain_core/sys_info.py +31 -32
  112. langchain_core/tools/__init__.py +1 -14
  113. langchain_core/tools/base.py +221 -247
  114. langchain_core/tools/convert.py +144 -161
  115. langchain_core/tools/render.py +10 -10
  116. langchain_core/tools/retriever.py +12 -19
  117. langchain_core/tools/simple.py +52 -29
  118. langchain_core/tools/structured.py +56 -60
  119. langchain_core/tracers/__init__.py +1 -9
  120. langchain_core/tracers/_streaming.py +6 -7
  121. langchain_core/tracers/base.py +103 -112
  122. langchain_core/tracers/context.py +29 -48
  123. langchain_core/tracers/core.py +142 -105
  124. langchain_core/tracers/evaluation.py +30 -34
  125. langchain_core/tracers/event_stream.py +162 -117
  126. langchain_core/tracers/langchain.py +34 -36
  127. langchain_core/tracers/log_stream.py +87 -49
  128. langchain_core/tracers/memory_stream.py +3 -3
  129. langchain_core/tracers/root_listeners.py +18 -34
  130. langchain_core/tracers/run_collector.py +8 -20
  131. langchain_core/tracers/schemas.py +0 -125
  132. langchain_core/tracers/stdout.py +3 -3
  133. langchain_core/utils/__init__.py +1 -4
  134. langchain_core/utils/_merge.py +47 -9
  135. langchain_core/utils/aiter.py +70 -66
  136. langchain_core/utils/env.py +12 -9
  137. langchain_core/utils/function_calling.py +139 -206
  138. langchain_core/utils/html.py +7 -8
  139. langchain_core/utils/input.py +6 -6
  140. langchain_core/utils/interactive_env.py +6 -2
  141. langchain_core/utils/iter.py +48 -45
  142. langchain_core/utils/json.py +14 -4
  143. langchain_core/utils/json_schema.py +159 -43
  144. langchain_core/utils/mustache.py +32 -25
  145. langchain_core/utils/pydantic.py +67 -40
  146. langchain_core/utils/strings.py +5 -5
  147. langchain_core/utils/usage.py +1 -1
  148. langchain_core/utils/utils.py +104 -62
  149. langchain_core/vectorstores/base.py +131 -179
  150. langchain_core/vectorstores/in_memory.py +113 -182
  151. langchain_core/vectorstores/utils.py +23 -17
  152. langchain_core/version.py +1 -1
  153. langchain_core-1.0.0.dist-info/METADATA +68 -0
  154. langchain_core-1.0.0.dist-info/RECORD +172 -0
  155. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
  156. langchain_core/beta/__init__.py +0 -1
  157. langchain_core/beta/runnables/__init__.py +0 -1
  158. langchain_core/beta/runnables/context.py +0 -448
  159. langchain_core/memory.py +0 -116
  160. langchain_core/messages/content_blocks.py +0 -1435
  161. langchain_core/prompts/pipeline.py +0 -133
  162. langchain_core/pydantic_v1/__init__.py +0 -30
  163. langchain_core/pydantic_v1/dataclasses.py +0 -23
  164. langchain_core/pydantic_v1/main.py +0 -23
  165. langchain_core/tracers/langchain_v1.py +0 -23
  166. langchain_core/utils/loading.py +0 -31
  167. langchain_core/v1/__init__.py +0 -1
  168. langchain_core/v1/chat_models.py +0 -1047
  169. langchain_core/v1/messages.py +0 -755
  170. langchain_core-0.4.0.dev0.dist-info/METADATA +0 -108
  171. langchain_core-0.4.0.dev0.dist-info/RECORD +0 -177
  172. langchain_core-0.4.0.dev0.dist-info/entry_points.txt +0 -4
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from abc import ABC, abstractmethod
6
- from typing import TYPE_CHECKING, Optional
6
+ from typing import TYPE_CHECKING
7
7
 
8
8
  from langchain_core.runnables import run_in_executor
9
9
 
@@ -15,6 +15,13 @@ if TYPE_CHECKING:
15
15
  from langchain_core.documents import Document
16
16
  from langchain_core.documents.base import Blob
17
17
 
18
+ try:
19
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
20
+
21
+ _HAS_TEXT_SPLITTERS = True
22
+ except ImportError:
23
+ _HAS_TEXT_SPLITTERS = False
24
+
18
25
 
19
26
  class BaseLoader(ABC): # noqa: B024
20
27
  """Interface for Document Loader.
@@ -28,37 +35,47 @@ class BaseLoader(ABC): # noqa: B024
28
35
  # Sub-classes should not implement this method directly. Instead, they
29
36
  # should implement the lazy load method.
30
37
  def load(self) -> list[Document]:
31
- """Load data into Document objects."""
38
+ """Load data into `Document` objects.
39
+
40
+ Returns:
41
+ The documents.
42
+ """
32
43
  return list(self.lazy_load())
33
44
 
34
45
  async def aload(self) -> list[Document]:
35
- """Load data into Document objects."""
46
+ """Load data into `Document` objects.
47
+
48
+ Returns:
49
+ The documents.
50
+ """
36
51
  return [document async for document in self.alazy_load()]
37
52
 
38
53
  def load_and_split(
39
- self, text_splitter: Optional[TextSplitter] = None
54
+ self, text_splitter: TextSplitter | None = None
40
55
  ) -> list[Document]:
41
- """Load Documents and split into chunks. Chunks are returned as Documents.
56
+ """Load Documents and split into chunks. Chunks are returned as `Document`.
42
57
 
43
58
  Do not override this method. It should be considered to be deprecated!
44
59
 
45
60
  Args:
46
- text_splitter: TextSplitter instance to use for splitting documents.
47
- Defaults to RecursiveCharacterTextSplitter.
61
+ text_splitter: `TextSplitter` instance to use for splitting documents.
62
+ Defaults to `RecursiveCharacterTextSplitter`.
63
+
64
+ Raises:
65
+ ImportError: If `langchain-text-splitters` is not installed
66
+ and no `text_splitter` is provided.
48
67
 
49
68
  Returns:
50
- List of Documents.
69
+ List of `Document`.
51
70
  """
52
71
  if text_splitter is None:
53
- try:
54
- from langchain_text_splitters import RecursiveCharacterTextSplitter
55
- except ImportError as e:
72
+ if not _HAS_TEXT_SPLITTERS:
56
73
  msg = (
57
74
  "Unable to import from langchain_text_splitters. Please specify "
58
75
  "text_splitter or install langchain_text_splitters with "
59
76
  "`pip install -U langchain-text-splitters`."
60
77
  )
61
- raise ImportError(msg) from e
78
+ raise ImportError(msg)
62
79
 
63
80
  text_splitter_: TextSplitter = RecursiveCharacterTextSplitter()
64
81
  else:
@@ -69,14 +86,22 @@ class BaseLoader(ABC): # noqa: B024
69
86
  # Attention: This method will be upgraded into an abstractmethod once it's
70
87
  # implemented in all the existing subclasses.
71
88
  def lazy_load(self) -> Iterator[Document]:
72
- """A lazy loader for Documents."""
89
+ """A lazy loader for `Document`.
90
+
91
+ Yields:
92
+ The `Document` objects.
93
+ """
73
94
  if type(self).load != BaseLoader.load:
74
95
  return iter(self.load())
75
96
  msg = f"{self.__class__.__name__} does not implement lazy_load()"
76
97
  raise NotImplementedError(msg)
77
98
 
78
99
  async def alazy_load(self) -> AsyncIterator[Document]:
79
- """A lazy loader for Documents."""
100
+ """A lazy loader for `Document`.
101
+
102
+ Yields:
103
+ The `Document` objects.
104
+ """
80
105
  iterator = await run_in_executor(None, self.lazy_load)
81
106
  done = object()
82
107
  while True:
@@ -90,7 +115,7 @@ class BaseBlobParser(ABC):
90
115
  """Abstract interface for blob parsers.
91
116
 
92
117
  A blob parser provides a way to parse raw data stored in a blob into one
93
- or more documents.
118
+ or more `Document` objects.
94
119
 
95
120
  The parser can be composed with blob loaders, making it easy to reuse
96
121
  a parser independent of how the blob was originally loaded.
@@ -103,25 +128,25 @@ class BaseBlobParser(ABC):
103
128
  Subclasses are required to implement this method.
104
129
 
105
130
  Args:
106
- blob: Blob instance
131
+ blob: `Blob` instance
107
132
 
108
133
  Returns:
109
- Generator of documents
134
+ Generator of `Document` objects
110
135
  """
111
136
 
112
137
  def parse(self, blob: Blob) -> list[Document]:
113
- """Eagerly parse the blob into a document or documents.
138
+ """Eagerly parse the blob into a `Document` or `Document` objects.
114
139
 
115
140
  This is a convenience method for interactive development environment.
116
141
 
117
- Production applications should favor the lazy_parse method instead.
142
+ Production applications should favor the `lazy_parse` method instead.
118
143
 
119
144
  Subclasses should generally not over-ride this parse method.
120
145
 
121
146
  Args:
122
- blob: Blob instance
147
+ blob: `Blob` instance
123
148
 
124
149
  Returns:
125
- List of documents
150
+ List of `Document` objects
126
151
  """
127
152
  return list(self.lazy_parse(blob))
@@ -3,8 +3,8 @@
3
3
  import datetime
4
4
  import json
5
5
  import uuid
6
- from collections.abc import Iterator, Sequence
7
- from typing import Any, Callable, Optional, Union
6
+ from collections.abc import Callable, Iterator, Sequence
7
+ from typing import Any
8
8
 
9
9
  from langsmith import Client as LangSmithClient
10
10
  from typing_extensions import override
@@ -20,55 +20,55 @@ class LangSmithLoader(BaseLoader):
20
20
  into the Document metadata. This allows you to easily create few-shot example
21
21
  retrievers from the loaded documents.
22
22
 
23
- .. dropdown:: Lazy load
23
+ ??? note "Lazy load"
24
24
 
25
- .. code-block:: python
25
+ ```python
26
+ from langchain_core.document_loaders import LangSmithLoader
26
27
 
27
- from langchain_core.document_loaders import LangSmithLoader
28
+ loader = LangSmithLoader(dataset_id="...", limit=100)
29
+ docs = []
30
+ for doc in loader.lazy_load():
31
+ docs.append(doc)
32
+ ```
28
33
 
29
- loader = LangSmithLoader(dataset_id="...", limit=100)
30
- docs = []
31
- for doc in loader.lazy_load():
32
- docs.append(doc)
34
+ ```python
35
+ # -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
36
+ ```
33
37
 
34
- .. code-block:: pycon
38
+ !!! version-added "Added in version 0.2.34"
35
39
 
36
- # -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
37
-
38
- .. versionadded:: 0.2.34
39
-
40
- """ # noqa: E501
40
+ """
41
41
 
42
42
  def __init__(
43
43
  self,
44
44
  *,
45
- dataset_id: Optional[Union[uuid.UUID, str]] = None,
46
- dataset_name: Optional[str] = None,
47
- example_ids: Optional[Sequence[Union[uuid.UUID, str]]] = None,
48
- as_of: Optional[Union[datetime.datetime, str]] = None,
49
- splits: Optional[Sequence[str]] = None,
45
+ dataset_id: uuid.UUID | str | None = None,
46
+ dataset_name: str | None = None,
47
+ example_ids: Sequence[uuid.UUID | str] | None = None,
48
+ as_of: datetime.datetime | str | None = None,
49
+ splits: Sequence[str] | None = None,
50
50
  inline_s3_urls: bool = True,
51
51
  offset: int = 0,
52
- limit: Optional[int] = None,
53
- metadata: Optional[dict] = None,
54
- filter: Optional[str] = None, # noqa: A002
52
+ limit: int | None = None,
53
+ metadata: dict | None = None,
54
+ filter: str | None = None, # noqa: A002
55
55
  content_key: str = "",
56
- format_content: Optional[Callable[..., str]] = None,
57
- client: Optional[LangSmithClient] = None,
56
+ format_content: Callable[..., str] | None = None,
57
+ client: LangSmithClient | None = None,
58
58
  **client_kwargs: Any,
59
59
  ) -> None:
60
60
  """Create a LangSmith loader.
61
61
 
62
62
  Args:
63
- dataset_id: The ID of the dataset to filter by. Defaults to None.
64
- dataset_name: The name of the dataset to filter by. Defaults to None.
65
- content_key: The inputs key to set as Document page content. ``'.'`` characters
66
- are interpreted as nested keys. E.g. ``content_key="first.second"`` will
63
+ dataset_id: The ID of the dataset to filter by.
64
+ dataset_name: The name of the dataset to filter by.
65
+ content_key: The inputs key to set as Document page content. `'.'` characters
66
+ are interpreted as nested keys. E.g. `content_key="first.second"` will
67
67
  result in
68
- ``Document(page_content=format_content(example.inputs["first"]["second"]))``
68
+ `Document(page_content=format_content(example.inputs["first"]["second"]))`
69
69
  format_content: Function for converting the content extracted from the example
70
70
  inputs into a string. Defaults to JSON-encoding the contents.
71
- example_ids: The IDs of the examples to filter by. Defaults to None.
71
+ example_ids: The IDs of the examples to filter by.
72
72
  as_of: The dataset version tag OR
73
73
  timestamp to retrieve the examples as of.
74
74
  Response examples will only be those that were present at the time
@@ -76,14 +76,17 @@ class LangSmithLoader(BaseLoader):
76
76
  splits: A list of dataset splits, which are
77
77
  divisions of your dataset such as 'train', 'test', or 'validation'.
78
78
  Returns examples only from the specified splits.
79
- inline_s3_urls: Whether to inline S3 URLs. Defaults to True.
80
- offset: The offset to start from. Defaults to 0.
79
+ inline_s3_urls: Whether to inline S3 URLs.
80
+ offset: The offset to start from.
81
81
  limit: The maximum number of examples to return.
82
- metadata: Metadata to filter by. Defaults to None.
82
+ metadata: Metadata to filter by.
83
83
  filter: A structured filter string to apply to the examples.
84
84
  client: LangSmith Client. If not provided will be initialized from below args.
85
85
  client_kwargs: Keyword args to pass to LangSmith client init. Should only be
86
- specified if ``client`` isn't.
86
+ specified if `client` isn't.
87
+
88
+ Raises:
89
+ ValueError: If both `client` and `client_kwargs` are provided.
87
90
  """ # noqa: E501
88
91
  if client and client_kwargs:
89
92
  raise ValueError
@@ -126,7 +129,7 @@ class LangSmithLoader(BaseLoader):
126
129
  yield Document(content_str, metadata=metadata)
127
130
 
128
131
 
129
- def _stringify(x: Union[str, dict]) -> str:
132
+ def _stringify(x: str | dict) -> str:
130
133
  if isinstance(x, str):
131
134
  return x
132
135
  try:
@@ -2,7 +2,6 @@
2
2
 
3
3
  **Document** module is a collection of classes that handle documents
4
4
  and their transformations.
5
-
6
5
  """
7
6
 
8
7
  from typing import TYPE_CHECKING
@@ -6,7 +6,7 @@ import contextlib
6
6
  import mimetypes
7
7
  from io import BufferedReader, BytesIO
8
8
  from pathlib import Path, PurePath
9
- from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
9
+ from typing import TYPE_CHECKING, Any, Literal, cast
10
10
 
11
11
  from pydantic import ConfigDict, Field, model_validator
12
12
 
@@ -15,7 +15,7 @@ from langchain_core.load.serializable import Serializable
15
15
  if TYPE_CHECKING:
16
16
  from collections.abc import Generator
17
17
 
18
- PathLike = Union[str, PurePath]
18
+ PathLike = str | PurePath
19
19
 
20
20
 
21
21
  class BaseMedia(Serializable):
@@ -33,13 +33,13 @@ class BaseMedia(Serializable):
33
33
  # The ID field is optional at the moment.
34
34
  # It will likely become required in a future major release after
35
35
  # it has been adopted by enough vectorstore implementations.
36
- id: Optional[str] = Field(default=None, coerce_numbers_to_str=True)
36
+ id: str | None = Field(default=None, coerce_numbers_to_str=True)
37
37
  """An optional identifier for the document.
38
38
 
39
39
  Ideally this should be unique across the document collection and formatted
40
40
  as a UUID, but this will not be enforced.
41
41
 
42
- .. versionadded:: 0.2.11
42
+ !!! version-added "Added in version 0.2.11"
43
43
  """
44
44
 
45
45
  metadata: dict = Field(default_factory=dict)
@@ -57,64 +57,63 @@ class Blob(BaseMedia):
57
57
 
58
58
  Example: Initialize a blob from in-memory data
59
59
 
60
- .. code-block:: python
60
+ ```python
61
+ from langchain_core.documents import Blob
61
62
 
62
- from langchain_core.documents import Blob
63
+ blob = Blob.from_data("Hello, world!")
63
64
 
64
- blob = Blob.from_data("Hello, world!")
65
+ # Read the blob as a string
66
+ print(blob.as_string())
65
67
 
66
- # Read the blob as a string
67
- print(blob.as_string())
68
+ # Read the blob as bytes
69
+ print(blob.as_bytes())
68
70
 
69
- # Read the blob as bytes
70
- print(blob.as_bytes())
71
-
72
- # Read the blob as a byte stream
73
- with blob.as_bytes_io() as f:
74
- print(f.read())
71
+ # Read the blob as a byte stream
72
+ with blob.as_bytes_io() as f:
73
+ print(f.read())
74
+ ```
75
75
 
76
76
  Example: Load from memory and specify mime-type and metadata
77
77
 
78
- .. code-block:: python
79
-
80
- from langchain_core.documents import Blob
78
+ ```python
79
+ from langchain_core.documents import Blob
81
80
 
82
- blob = Blob.from_data(
83
- data="Hello, world!",
84
- mime_type="text/plain",
85
- metadata={"source": "https://example.com"}
86
- )
81
+ blob = Blob.from_data(
82
+ data="Hello, world!",
83
+ mime_type="text/plain",
84
+ metadata={"source": "https://example.com"},
85
+ )
86
+ ```
87
87
 
88
88
  Example: Load the blob from a file
89
89
 
90
- .. code-block:: python
90
+ ```python
91
+ from langchain_core.documents import Blob
91
92
 
92
- from langchain_core.documents import Blob
93
+ blob = Blob.from_path("path/to/file.txt")
93
94
 
94
- blob = Blob.from_path("path/to/file.txt")
95
+ # Read the blob as a string
96
+ print(blob.as_string())
95
97
 
96
- # Read the blob as a string
97
- print(blob.as_string())
98
-
99
- # Read the blob as bytes
100
- print(blob.as_bytes())
101
-
102
- # Read the blob as a byte stream
103
- with blob.as_bytes_io() as f:
104
- print(f.read())
98
+ # Read the blob as bytes
99
+ print(blob.as_bytes())
105
100
 
101
+ # Read the blob as a byte stream
102
+ with blob.as_bytes_io() as f:
103
+ print(f.read())
104
+ ```
106
105
  """
107
106
 
108
- data: Union[bytes, str, None] = None
107
+ data: bytes | str | None = None
109
108
  """Raw data associated with the blob."""
110
- mimetype: Optional[str] = None
109
+ mimetype: str | None = None
111
110
  """MimeType not to be confused with a file extension."""
112
111
  encoding: str = "utf-8"
113
112
  """Encoding to use if decoding the bytes into a string.
114
113
 
115
- Use utf-8 as default encoding, if decoding to string.
114
+ Use `utf-8` as default encoding, if decoding to string.
116
115
  """
117
- path: Optional[PathLike] = None
116
+ path: PathLike | None = None
118
117
  """Location where the original content was found."""
119
118
 
120
119
  model_config = ConfigDict(
@@ -123,16 +122,16 @@ class Blob(BaseMedia):
123
122
  )
124
123
 
125
124
  @property
126
- def source(self) -> Optional[str]:
125
+ def source(self) -> str | None:
127
126
  """The source location of the blob as string if known otherwise none.
128
127
 
129
128
  If a path is associated with the blob, it will default to the path location.
130
129
 
131
- Unless explicitly set via a metadata field called "source", in which
130
+ Unless explicitly set via a metadata field called `"source"`, in which
132
131
  case that value will be used instead.
133
132
  """
134
133
  if self.metadata and "source" in self.metadata:
135
- return cast("Optional[str]", self.metadata["source"])
134
+ return cast("str | None", self.metadata["source"])
136
135
  return str(self.path) if self.path else None
137
136
 
138
137
  @model_validator(mode="before")
@@ -145,7 +144,14 @@ class Blob(BaseMedia):
145
144
  return values
146
145
 
147
146
  def as_string(self) -> str:
148
- """Read data as a string."""
147
+ """Read data as a string.
148
+
149
+ Raises:
150
+ ValueError: If the blob cannot be represented as a string.
151
+
152
+ Returns:
153
+ The data as a string.
154
+ """
149
155
  if self.data is None and self.path:
150
156
  return Path(self.path).read_text(encoding=self.encoding)
151
157
  if isinstance(self.data, bytes):
@@ -156,7 +162,14 @@ class Blob(BaseMedia):
156
162
  raise ValueError(msg)
157
163
 
158
164
  def as_bytes(self) -> bytes:
159
- """Read data as bytes."""
165
+ """Read data as bytes.
166
+
167
+ Raises:
168
+ ValueError: If the blob cannot be represented as bytes.
169
+
170
+ Returns:
171
+ The data as bytes.
172
+ """
160
173
  if isinstance(self.data, bytes):
161
174
  return self.data
162
175
  if isinstance(self.data, str):
@@ -167,8 +180,15 @@ class Blob(BaseMedia):
167
180
  raise ValueError(msg)
168
181
 
169
182
  @contextlib.contextmanager
170
- def as_bytes_io(self) -> Generator[Union[BytesIO, BufferedReader], None, None]:
171
- """Read data as a byte stream."""
183
+ def as_bytes_io(self) -> Generator[BytesIO | BufferedReader, None, None]:
184
+ """Read data as a byte stream.
185
+
186
+ Raises:
187
+ NotImplementedError: If the blob cannot be represented as a byte stream.
188
+
189
+ Yields:
190
+ The data as a byte stream.
191
+ """
172
192
  if isinstance(self.data, bytes):
173
193
  yield BytesIO(self.data)
174
194
  elif self.data is None and self.path:
@@ -184,18 +204,18 @@ class Blob(BaseMedia):
184
204
  path: PathLike,
185
205
  *,
186
206
  encoding: str = "utf-8",
187
- mime_type: Optional[str] = None,
207
+ mime_type: str | None = None,
188
208
  guess_type: bool = True,
189
- metadata: Optional[dict] = None,
209
+ metadata: dict | None = None,
190
210
  ) -> Blob:
191
211
  """Load the blob from a path like object.
192
212
 
193
213
  Args:
194
- path: path like object to file to be read
214
+ path: Path-like object to file to be read
195
215
  encoding: Encoding to use if decoding the bytes into a string
196
- mime_type: if provided, will be set as the mime-type of the data
197
- guess_type: If True, the mimetype will be guessed from the file extension,
198
- if a mime-type was not provided
216
+ mime_type: If provided, will be set as the mime-type of the data
217
+ guess_type: If `True`, the mimetype will be guessed from the file extension,
218
+ if a mime-type was not provided
199
219
  metadata: Metadata to associate with the blob
200
220
 
201
221
  Returns:
@@ -218,20 +238,20 @@ class Blob(BaseMedia):
218
238
  @classmethod
219
239
  def from_data(
220
240
  cls,
221
- data: Union[str, bytes],
241
+ data: str | bytes,
222
242
  *,
223
243
  encoding: str = "utf-8",
224
- mime_type: Optional[str] = None,
225
- path: Optional[str] = None,
226
- metadata: Optional[dict] = None,
244
+ mime_type: str | None = None,
245
+ path: str | None = None,
246
+ metadata: dict | None = None,
227
247
  ) -> Blob:
228
248
  """Initialize the blob from in-memory data.
229
249
 
230
250
  Args:
231
- data: the in-memory data associated with the blob
251
+ data: The in-memory data associated with the blob
232
252
  encoding: Encoding to use if decoding the bytes into a string
233
- mime_type: if provided, will be set as the mime-type of the data
234
- path: if provided, will be set as the source from which the data came
253
+ mime_type: If provided, will be set as the mime-type of the data
254
+ path: If provided, will be set as the source from which the data came
235
255
  metadata: Metadata to associate with the blob
236
256
 
237
257
  Returns:
@@ -246,7 +266,7 @@ class Blob(BaseMedia):
246
266
  )
247
267
 
248
268
  def __repr__(self) -> str:
249
- """Define the blob representation."""
269
+ """Return the blob representation."""
250
270
  str_repr = f"Blob {id(self)}"
251
271
  if self.source:
252
272
  str_repr += f" {self.source}"
@@ -257,16 +277,13 @@ class Document(BaseMedia):
257
277
  """Class for storing a piece of text and associated metadata.
258
278
 
259
279
  Example:
280
+ ```python
281
+ from langchain_core.documents import Document
260
282
 
261
- .. code-block:: python
262
-
263
- from langchain_core.documents import Document
264
-
265
- document = Document(
266
- page_content="Hello, world!",
267
- metadata={"source": "https://example.com"}
268
- )
269
-
283
+ document = Document(
284
+ page_content="Hello, world!", metadata={"source": "https://example.com"}
285
+ )
286
+ ```
270
287
  """
271
288
 
272
289
  page_content: str
@@ -277,23 +294,28 @@ class Document(BaseMedia):
277
294
  """Pass page_content in as positional or named arg."""
278
295
  # my-py is complaining that page_content is not defined on the base class.
279
296
  # Here, we're relying on pydantic base class to handle the validation.
280
- super().__init__(page_content=page_content, **kwargs) # type: ignore[call-arg]
297
+ super().__init__(page_content=page_content, **kwargs)
281
298
 
282
299
  @classmethod
283
300
  def is_lc_serializable(cls) -> bool:
284
- """Return whether this class is serializable."""
301
+ """Return True as this class is serializable."""
285
302
  return True
286
303
 
287
304
  @classmethod
288
305
  def get_lc_namespace(cls) -> list[str]:
289
- """Get the namespace of the langchain object.
306
+ """Get the namespace of the LangChain object.
290
307
 
291
- Default namespace is ["langchain", "schema", "document"].
308
+ Returns:
309
+ ["langchain", "schema", "document"]
292
310
  """
293
311
  return ["langchain", "schema", "document"]
294
312
 
295
313
  def __str__(self) -> str:
296
- """Override __str__ to restrict it to page_content and metadata."""
314
+ """Override __str__ to restrict it to page_content and metadata.
315
+
316
+ Returns:
317
+ A string representation of the Document.
318
+ """
297
319
  # The format matches pydantic format for __str__.
298
320
  #
299
321
  # The purpose of this change is to make sure that user code that
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from abc import ABC, abstractmethod
6
- from typing import TYPE_CHECKING, Optional
6
+ from typing import TYPE_CHECKING
7
7
 
8
8
  from pydantic import BaseModel
9
9
 
@@ -19,17 +19,18 @@ if TYPE_CHECKING:
19
19
  class BaseDocumentCompressor(BaseModel, ABC):
20
20
  """Base class for document compressors.
21
21
 
22
- This abstraction is primarily used for
23
- post-processing of retrieved documents.
22
+ This abstraction is primarily used for post-processing of retrieved documents.
24
23
 
25
24
  Documents matching a given query are first retrieved.
25
+
26
26
  Then the list of documents can be further processed.
27
27
 
28
- For example, one could re-rank the retrieved documents
29
- using an LLM.
28
+ For example, one could re-rank the retrieved documents using an LLM.
29
+
30
+ !!! note
31
+ Users should favor using a RunnableLambda instead of sub-classing from this
32
+ interface.
30
33
 
31
- **Note** users should favor using a RunnableLambda
32
- instead of sub-classing from this interface.
33
34
  """
34
35
 
35
36
  @abstractmethod
@@ -37,7 +38,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
37
38
  self,
38
39
  documents: Sequence[Document],
39
40
  query: str,
40
- callbacks: Optional[Callbacks] = None,
41
+ callbacks: Callbacks | None = None,
41
42
  ) -> Sequence[Document]:
42
43
  """Compress retrieved documents given the query context.
43
44
 
@@ -48,13 +49,14 @@ class BaseDocumentCompressor(BaseModel, ABC):
48
49
 
49
50
  Returns:
50
51
  The compressed documents.
52
+
51
53
  """
52
54
 
53
55
  async def acompress_documents(
54
56
  self,
55
57
  documents: Sequence[Document],
56
58
  query: str,
57
- callbacks: Optional[Callbacks] = None,
59
+ callbacks: Callbacks | None = None,
58
60
  ) -> Sequence[Document]:
59
61
  """Async compress retrieved documents given the query context.
60
62
 
@@ -65,6 +67,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
65
67
 
66
68
  Returns:
67
69
  The compressed documents.
70
+
68
71
  """
69
72
  return await run_in_executor(
70
73
  None, self.compress_documents, documents, query, callbacks