langchain-core 0.3.79__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (165) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +3 -4
  3. langchain_core/_api/beta_decorator.py +23 -26
  4. langchain_core/_api/deprecation.py +52 -65
  5. langchain_core/_api/path.py +3 -6
  6. langchain_core/_import_utils.py +3 -4
  7. langchain_core/agents.py +19 -19
  8. langchain_core/caches.py +53 -63
  9. langchain_core/callbacks/__init__.py +1 -8
  10. langchain_core/callbacks/base.py +323 -334
  11. langchain_core/callbacks/file.py +44 -44
  12. langchain_core/callbacks/manager.py +441 -507
  13. langchain_core/callbacks/stdout.py +29 -30
  14. langchain_core/callbacks/streaming_stdout.py +32 -32
  15. langchain_core/callbacks/usage.py +60 -57
  16. langchain_core/chat_history.py +48 -63
  17. langchain_core/document_loaders/base.py +23 -23
  18. langchain_core/document_loaders/langsmith.py +37 -37
  19. langchain_core/documents/__init__.py +0 -1
  20. langchain_core/documents/base.py +62 -65
  21. langchain_core/documents/compressor.py +4 -4
  22. langchain_core/documents/transformers.py +28 -29
  23. langchain_core/embeddings/fake.py +50 -54
  24. langchain_core/example_selectors/length_based.py +1 -1
  25. langchain_core/example_selectors/semantic_similarity.py +21 -25
  26. langchain_core/exceptions.py +10 -11
  27. langchain_core/globals.py +3 -151
  28. langchain_core/indexing/api.py +61 -66
  29. langchain_core/indexing/base.py +58 -58
  30. langchain_core/indexing/in_memory.py +3 -3
  31. langchain_core/language_models/__init__.py +14 -27
  32. langchain_core/language_models/_utils.py +270 -84
  33. langchain_core/language_models/base.py +55 -162
  34. langchain_core/language_models/chat_models.py +442 -402
  35. langchain_core/language_models/fake.py +11 -11
  36. langchain_core/language_models/fake_chat_models.py +61 -39
  37. langchain_core/language_models/llms.py +123 -231
  38. langchain_core/load/dump.py +4 -5
  39. langchain_core/load/load.py +18 -28
  40. langchain_core/load/mapping.py +2 -4
  41. langchain_core/load/serializable.py +39 -40
  42. langchain_core/messages/__init__.py +61 -22
  43. langchain_core/messages/ai.py +368 -163
  44. langchain_core/messages/base.py +214 -43
  45. langchain_core/messages/block_translators/__init__.py +111 -0
  46. langchain_core/messages/block_translators/anthropic.py +470 -0
  47. langchain_core/messages/block_translators/bedrock.py +94 -0
  48. langchain_core/messages/block_translators/bedrock_converse.py +297 -0
  49. langchain_core/messages/block_translators/google_genai.py +530 -0
  50. langchain_core/messages/block_translators/google_vertexai.py +21 -0
  51. langchain_core/messages/block_translators/groq.py +143 -0
  52. langchain_core/messages/block_translators/langchain_v0.py +301 -0
  53. langchain_core/messages/block_translators/openai.py +1010 -0
  54. langchain_core/messages/chat.py +2 -6
  55. langchain_core/messages/content.py +1423 -0
  56. langchain_core/messages/function.py +6 -10
  57. langchain_core/messages/human.py +41 -38
  58. langchain_core/messages/modifier.py +2 -2
  59. langchain_core/messages/system.py +38 -28
  60. langchain_core/messages/tool.py +96 -103
  61. langchain_core/messages/utils.py +478 -504
  62. langchain_core/output_parsers/__init__.py +1 -14
  63. langchain_core/output_parsers/base.py +58 -61
  64. langchain_core/output_parsers/json.py +7 -8
  65. langchain_core/output_parsers/list.py +5 -7
  66. langchain_core/output_parsers/openai_functions.py +49 -47
  67. langchain_core/output_parsers/openai_tools.py +14 -19
  68. langchain_core/output_parsers/pydantic.py +12 -13
  69. langchain_core/output_parsers/string.py +2 -2
  70. langchain_core/output_parsers/transform.py +15 -17
  71. langchain_core/output_parsers/xml.py +8 -10
  72. langchain_core/outputs/__init__.py +1 -1
  73. langchain_core/outputs/chat_generation.py +18 -18
  74. langchain_core/outputs/chat_result.py +1 -3
  75. langchain_core/outputs/generation.py +8 -8
  76. langchain_core/outputs/llm_result.py +10 -10
  77. langchain_core/prompt_values.py +12 -12
  78. langchain_core/prompts/__init__.py +3 -27
  79. langchain_core/prompts/base.py +45 -55
  80. langchain_core/prompts/chat.py +254 -313
  81. langchain_core/prompts/dict.py +5 -5
  82. langchain_core/prompts/few_shot.py +81 -88
  83. langchain_core/prompts/few_shot_with_templates.py +11 -13
  84. langchain_core/prompts/image.py +12 -14
  85. langchain_core/prompts/loading.py +6 -8
  86. langchain_core/prompts/message.py +3 -3
  87. langchain_core/prompts/prompt.py +24 -39
  88. langchain_core/prompts/string.py +4 -4
  89. langchain_core/prompts/structured.py +42 -50
  90. langchain_core/rate_limiters.py +51 -60
  91. langchain_core/retrievers.py +49 -190
  92. langchain_core/runnables/base.py +1484 -1709
  93. langchain_core/runnables/branch.py +45 -61
  94. langchain_core/runnables/config.py +80 -88
  95. langchain_core/runnables/configurable.py +117 -134
  96. langchain_core/runnables/fallbacks.py +83 -79
  97. langchain_core/runnables/graph.py +85 -95
  98. langchain_core/runnables/graph_ascii.py +27 -28
  99. langchain_core/runnables/graph_mermaid.py +38 -50
  100. langchain_core/runnables/graph_png.py +15 -16
  101. langchain_core/runnables/history.py +135 -148
  102. langchain_core/runnables/passthrough.py +124 -150
  103. langchain_core/runnables/retry.py +46 -51
  104. langchain_core/runnables/router.py +25 -30
  105. langchain_core/runnables/schema.py +79 -74
  106. langchain_core/runnables/utils.py +62 -68
  107. langchain_core/stores.py +81 -115
  108. langchain_core/structured_query.py +8 -8
  109. langchain_core/sys_info.py +27 -29
  110. langchain_core/tools/__init__.py +1 -14
  111. langchain_core/tools/base.py +179 -187
  112. langchain_core/tools/convert.py +131 -139
  113. langchain_core/tools/render.py +10 -10
  114. langchain_core/tools/retriever.py +11 -11
  115. langchain_core/tools/simple.py +19 -24
  116. langchain_core/tools/structured.py +30 -39
  117. langchain_core/tracers/__init__.py +1 -9
  118. langchain_core/tracers/base.py +97 -99
  119. langchain_core/tracers/context.py +29 -52
  120. langchain_core/tracers/core.py +50 -60
  121. langchain_core/tracers/evaluation.py +11 -11
  122. langchain_core/tracers/event_stream.py +115 -70
  123. langchain_core/tracers/langchain.py +21 -21
  124. langchain_core/tracers/log_stream.py +43 -43
  125. langchain_core/tracers/memory_stream.py +3 -3
  126. langchain_core/tracers/root_listeners.py +16 -16
  127. langchain_core/tracers/run_collector.py +2 -4
  128. langchain_core/tracers/schemas.py +0 -129
  129. langchain_core/tracers/stdout.py +3 -3
  130. langchain_core/utils/__init__.py +1 -4
  131. langchain_core/utils/_merge.py +46 -8
  132. langchain_core/utils/aiter.py +57 -61
  133. langchain_core/utils/env.py +9 -9
  134. langchain_core/utils/function_calling.py +89 -191
  135. langchain_core/utils/html.py +7 -8
  136. langchain_core/utils/input.py +6 -6
  137. langchain_core/utils/interactive_env.py +1 -1
  138. langchain_core/utils/iter.py +37 -42
  139. langchain_core/utils/json.py +4 -3
  140. langchain_core/utils/json_schema.py +8 -8
  141. langchain_core/utils/mustache.py +9 -11
  142. langchain_core/utils/pydantic.py +33 -35
  143. langchain_core/utils/strings.py +5 -5
  144. langchain_core/utils/usage.py +1 -1
  145. langchain_core/utils/utils.py +80 -54
  146. langchain_core/vectorstores/base.py +129 -164
  147. langchain_core/vectorstores/in_memory.py +99 -174
  148. langchain_core/vectorstores/utils.py +5 -5
  149. langchain_core/version.py +1 -1
  150. {langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/METADATA +28 -27
  151. langchain_core-1.0.0.dist-info/RECORD +172 -0
  152. {langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
  153. langchain_core/beta/__init__.py +0 -1
  154. langchain_core/beta/runnables/__init__.py +0 -1
  155. langchain_core/beta/runnables/context.py +0 -447
  156. langchain_core/memory.py +0 -120
  157. langchain_core/messages/content_blocks.py +0 -176
  158. langchain_core/prompts/pipeline.py +0 -138
  159. langchain_core/pydantic_v1/__init__.py +0 -30
  160. langchain_core/pydantic_v1/dataclasses.py +0 -23
  161. langchain_core/pydantic_v1/main.py +0 -23
  162. langchain_core/tracers/langchain_v1.py +0 -31
  163. langchain_core/utils/loading.py +0 -35
  164. langchain_core-0.3.79.dist-info/RECORD +0 -174
  165. langchain_core-0.3.79.dist-info/entry_points.txt +0 -4
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from abc import ABC, abstractmethod
6
- from typing import TYPE_CHECKING, Optional
6
+ from typing import TYPE_CHECKING
7
7
 
8
8
  from langchain_core.runnables import run_in_executor
9
9
 
@@ -35,38 +35,38 @@ class BaseLoader(ABC): # noqa: B024
35
35
  # Sub-classes should not implement this method directly. Instead, they
36
36
  # should implement the lazy load method.
37
37
  def load(self) -> list[Document]:
38
- """Load data into Document objects.
38
+ """Load data into `Document` objects.
39
39
 
40
40
  Returns:
41
- the documents.
41
+ The documents.
42
42
  """
43
43
  return list(self.lazy_load())
44
44
 
45
45
  async def aload(self) -> list[Document]:
46
- """Load data into Document objects.
46
+ """Load data into `Document` objects.
47
47
 
48
48
  Returns:
49
- the documents.
49
+ The documents.
50
50
  """
51
51
  return [document async for document in self.alazy_load()]
52
52
 
53
53
  def load_and_split(
54
- self, text_splitter: Optional[TextSplitter] = None
54
+ self, text_splitter: TextSplitter | None = None
55
55
  ) -> list[Document]:
56
- """Load Documents and split into chunks. Chunks are returned as Documents.
56
+ """Load Documents and split into chunks. Chunks are returned as `Document`.
57
57
 
58
58
  Do not override this method. It should be considered to be deprecated!
59
59
 
60
60
  Args:
61
- text_splitter: TextSplitter instance to use for splitting documents.
62
- Defaults to RecursiveCharacterTextSplitter.
61
+ text_splitter: `TextSplitter` instance to use for splitting documents.
62
+ Defaults to `RecursiveCharacterTextSplitter`.
63
63
 
64
64
  Raises:
65
- ImportError: If langchain-text-splitters is not installed
66
- and no text_splitter is provided.
65
+ ImportError: If `langchain-text-splitters` is not installed
66
+ and no `text_splitter` is provided.
67
67
 
68
68
  Returns:
69
- List of Documents.
69
+ List of `Document`.
70
70
  """
71
71
  if text_splitter is None:
72
72
  if not _HAS_TEXT_SPLITTERS:
@@ -86,10 +86,10 @@ class BaseLoader(ABC): # noqa: B024
86
86
  # Attention: This method will be upgraded into an abstractmethod once it's
87
87
  # implemented in all the existing subclasses.
88
88
  def lazy_load(self) -> Iterator[Document]:
89
- """A lazy loader for Documents.
89
+ """A lazy loader for `Document`.
90
90
 
91
91
  Yields:
92
- the documents.
92
+ The `Document` objects.
93
93
  """
94
94
  if type(self).load != BaseLoader.load:
95
95
  return iter(self.load())
@@ -97,10 +97,10 @@ class BaseLoader(ABC): # noqa: B024
97
97
  raise NotImplementedError(msg)
98
98
 
99
99
  async def alazy_load(self) -> AsyncIterator[Document]:
100
- """A lazy loader for Documents.
100
+ """A lazy loader for `Document`.
101
101
 
102
102
  Yields:
103
- the documents.
103
+ The `Document` objects.
104
104
  """
105
105
  iterator = await run_in_executor(None, self.lazy_load)
106
106
  done = object()
@@ -115,7 +115,7 @@ class BaseBlobParser(ABC):
115
115
  """Abstract interface for blob parsers.
116
116
 
117
117
  A blob parser provides a way to parse raw data stored in a blob into one
118
- or more documents.
118
+ or more `Document` objects.
119
119
 
120
120
  The parser can be composed with blob loaders, making it easy to reuse
121
121
  a parser independent of how the blob was originally loaded.
@@ -128,25 +128,25 @@ class BaseBlobParser(ABC):
128
128
  Subclasses are required to implement this method.
129
129
 
130
130
  Args:
131
- blob: Blob instance
131
+ blob: `Blob` instance
132
132
 
133
133
  Returns:
134
- Generator of documents
134
+ Generator of `Document` objects
135
135
  """
136
136
 
137
137
  def parse(self, blob: Blob) -> list[Document]:
138
- """Eagerly parse the blob into a document or documents.
138
+ """Eagerly parse the blob into a `Document` or `Document` objects.
139
139
 
140
140
  This is a convenience method for interactive development environment.
141
141
 
142
- Production applications should favor the lazy_parse method instead.
142
+ Production applications should favor the `lazy_parse` method instead.
143
143
 
144
144
  Subclasses should generally not over-ride this parse method.
145
145
 
146
146
  Args:
147
- blob: Blob instance
147
+ blob: `Blob` instance
148
148
 
149
149
  Returns:
150
- List of documents
150
+ List of `Document` objects
151
151
  """
152
152
  return list(self.lazy_parse(blob))
@@ -3,8 +3,8 @@
3
3
  import datetime
4
4
  import json
5
5
  import uuid
6
- from collections.abc import Iterator, Sequence
7
- from typing import Any, Callable, Optional, Union
6
+ from collections.abc import Callable, Iterator, Sequence
7
+ from typing import Any
8
8
 
9
9
  from langsmith import Client as LangSmithClient
10
10
  from typing_extensions import override
@@ -20,55 +20,55 @@ class LangSmithLoader(BaseLoader):
20
20
  into the Document metadata. This allows you to easily create few-shot example
21
21
  retrievers from the loaded documents.
22
22
 
23
- .. dropdown:: Lazy load
23
+ ??? note "Lazy load"
24
24
 
25
- .. code-block:: python
25
+ ```python
26
+ from langchain_core.document_loaders import LangSmithLoader
26
27
 
27
- from langchain_core.document_loaders import LangSmithLoader
28
+ loader = LangSmithLoader(dataset_id="...", limit=100)
29
+ docs = []
30
+ for doc in loader.lazy_load():
31
+ docs.append(doc)
32
+ ```
28
33
 
29
- loader = LangSmithLoader(dataset_id="...", limit=100)
30
- docs = []
31
- for doc in loader.lazy_load():
32
- docs.append(doc)
34
+ ```python
35
+ # -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
36
+ ```
33
37
 
34
- .. code-block:: python
38
+ !!! version-added "Added in version 0.2.34"
35
39
 
36
- # -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
37
-
38
- .. versionadded:: 0.2.34
39
-
40
- """ # noqa: E501
40
+ """
41
41
 
42
42
  def __init__(
43
43
  self,
44
44
  *,
45
- dataset_id: Optional[Union[uuid.UUID, str]] = None,
46
- dataset_name: Optional[str] = None,
47
- example_ids: Optional[Sequence[Union[uuid.UUID, str]]] = None,
48
- as_of: Optional[Union[datetime.datetime, str]] = None,
49
- splits: Optional[Sequence[str]] = None,
45
+ dataset_id: uuid.UUID | str | None = None,
46
+ dataset_name: str | None = None,
47
+ example_ids: Sequence[uuid.UUID | str] | None = None,
48
+ as_of: datetime.datetime | str | None = None,
49
+ splits: Sequence[str] | None = None,
50
50
  inline_s3_urls: bool = True,
51
51
  offset: int = 0,
52
- limit: Optional[int] = None,
53
- metadata: Optional[dict] = None,
54
- filter: Optional[str] = None, # noqa: A002
52
+ limit: int | None = None,
53
+ metadata: dict | None = None,
54
+ filter: str | None = None, # noqa: A002
55
55
  content_key: str = "",
56
- format_content: Optional[Callable[..., str]] = None,
57
- client: Optional[LangSmithClient] = None,
56
+ format_content: Callable[..., str] | None = None,
57
+ client: LangSmithClient | None = None,
58
58
  **client_kwargs: Any,
59
59
  ) -> None:
60
60
  """Create a LangSmith loader.
61
61
 
62
62
  Args:
63
- dataset_id: The ID of the dataset to filter by. Defaults to None.
64
- dataset_name: The name of the dataset to filter by. Defaults to None.
65
- content_key: The inputs key to set as Document page content. ``'.'`` characters
66
- are interpreted as nested keys. E.g. ``content_key="first.second"`` will
63
+ dataset_id: The ID of the dataset to filter by.
64
+ dataset_name: The name of the dataset to filter by.
65
+ content_key: The inputs key to set as Document page content. `'.'` characters
66
+ are interpreted as nested keys. E.g. `content_key="first.second"` will
67
67
  result in
68
- ``Document(page_content=format_content(example.inputs["first"]["second"]))``
68
+ `Document(page_content=format_content(example.inputs["first"]["second"]))`
69
69
  format_content: Function for converting the content extracted from the example
70
70
  inputs into a string. Defaults to JSON-encoding the contents.
71
- example_ids: The IDs of the examples to filter by. Defaults to None.
71
+ example_ids: The IDs of the examples to filter by.
72
72
  as_of: The dataset version tag OR
73
73
  timestamp to retrieve the examples as of.
74
74
  Response examples will only be those that were present at the time
@@ -76,17 +76,17 @@ class LangSmithLoader(BaseLoader):
76
76
  splits: A list of dataset splits, which are
77
77
  divisions of your dataset such as 'train', 'test', or 'validation'.
78
78
  Returns examples only from the specified splits.
79
- inline_s3_urls: Whether to inline S3 URLs. Defaults to True.
80
- offset: The offset to start from. Defaults to 0.
79
+ inline_s3_urls: Whether to inline S3 URLs.
80
+ offset: The offset to start from.
81
81
  limit: The maximum number of examples to return.
82
- metadata: Metadata to filter by. Defaults to None.
82
+ metadata: Metadata to filter by.
83
83
  filter: A structured filter string to apply to the examples.
84
84
  client: LangSmith Client. If not provided will be initialized from below args.
85
85
  client_kwargs: Keyword args to pass to LangSmith client init. Should only be
86
- specified if ``client`` isn't.
86
+ specified if `client` isn't.
87
87
 
88
88
  Raises:
89
- ValueError: If both ``client`` and ``client_kwargs`` are provided.
89
+ ValueError: If both `client` and `client_kwargs` are provided.
90
90
  """ # noqa: E501
91
91
  if client and client_kwargs:
92
92
  raise ValueError
@@ -129,7 +129,7 @@ class LangSmithLoader(BaseLoader):
129
129
  yield Document(content_str, metadata=metadata)
130
130
 
131
131
 
132
- def _stringify(x: Union[str, dict]) -> str:
132
+ def _stringify(x: str | dict) -> str:
133
133
  if isinstance(x, str):
134
134
  return x
135
135
  try:
@@ -2,7 +2,6 @@
2
2
 
3
3
  **Document** module is a collection of classes that handle documents
4
4
  and their transformations.
5
-
6
5
  """
7
6
 
8
7
  from typing import TYPE_CHECKING
@@ -6,7 +6,7 @@ import contextlib
6
6
  import mimetypes
7
7
  from io import BufferedReader, BytesIO
8
8
  from pathlib import Path, PurePath
9
- from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
9
+ from typing import TYPE_CHECKING, Any, Literal, cast
10
10
 
11
11
  from pydantic import ConfigDict, Field, model_validator
12
12
 
@@ -15,7 +15,7 @@ from langchain_core.load.serializable import Serializable
15
15
  if TYPE_CHECKING:
16
16
  from collections.abc import Generator
17
17
 
18
- PathLike = Union[str, PurePath]
18
+ PathLike = str | PurePath
19
19
 
20
20
 
21
21
  class BaseMedia(Serializable):
@@ -33,13 +33,13 @@ class BaseMedia(Serializable):
33
33
  # The ID field is optional at the moment.
34
34
  # It will likely become required in a future major release after
35
35
  # it has been adopted by enough vectorstore implementations.
36
- id: Optional[str] = Field(default=None, coerce_numbers_to_str=True)
36
+ id: str | None = Field(default=None, coerce_numbers_to_str=True)
37
37
  """An optional identifier for the document.
38
38
 
39
39
  Ideally this should be unique across the document collection and formatted
40
40
  as a UUID, but this will not be enforced.
41
41
 
42
- .. versionadded:: 0.2.11
42
+ !!! version-added "Added in version 0.2.11"
43
43
  """
44
44
 
45
45
  metadata: dict = Field(default_factory=dict)
@@ -57,64 +57,63 @@ class Blob(BaseMedia):
57
57
 
58
58
  Example: Initialize a blob from in-memory data
59
59
 
60
- .. code-block:: python
60
+ ```python
61
+ from langchain_core.documents import Blob
61
62
 
62
- from langchain_core.documents import Blob
63
+ blob = Blob.from_data("Hello, world!")
63
64
 
64
- blob = Blob.from_data("Hello, world!")
65
+ # Read the blob as a string
66
+ print(blob.as_string())
65
67
 
66
- # Read the blob as a string
67
- print(blob.as_string())
68
+ # Read the blob as bytes
69
+ print(blob.as_bytes())
68
70
 
69
- # Read the blob as bytes
70
- print(blob.as_bytes())
71
-
72
- # Read the blob as a byte stream
73
- with blob.as_bytes_io() as f:
74
- print(f.read())
71
+ # Read the blob as a byte stream
72
+ with blob.as_bytes_io() as f:
73
+ print(f.read())
74
+ ```
75
75
 
76
76
  Example: Load from memory and specify mime-type and metadata
77
77
 
78
- .. code-block:: python
79
-
80
- from langchain_core.documents import Blob
78
+ ```python
79
+ from langchain_core.documents import Blob
81
80
 
82
- blob = Blob.from_data(
83
- data="Hello, world!",
84
- mime_type="text/plain",
85
- metadata={"source": "https://example.com"},
86
- )
81
+ blob = Blob.from_data(
82
+ data="Hello, world!",
83
+ mime_type="text/plain",
84
+ metadata={"source": "https://example.com"},
85
+ )
86
+ ```
87
87
 
88
88
  Example: Load the blob from a file
89
89
 
90
- .. code-block:: python
91
-
92
- from langchain_core.documents import Blob
93
-
94
- blob = Blob.from_path("path/to/file.txt")
90
+ ```python
91
+ from langchain_core.documents import Blob
95
92
 
96
- # Read the blob as a string
97
- print(blob.as_string())
93
+ blob = Blob.from_path("path/to/file.txt")
98
94
 
99
- # Read the blob as bytes
100
- print(blob.as_bytes())
95
+ # Read the blob as a string
96
+ print(blob.as_string())
101
97
 
102
- # Read the blob as a byte stream
103
- with blob.as_bytes_io() as f:
104
- print(f.read())
98
+ # Read the blob as bytes
99
+ print(blob.as_bytes())
105
100
 
101
+ # Read the blob as a byte stream
102
+ with blob.as_bytes_io() as f:
103
+ print(f.read())
104
+ ```
106
105
  """
107
106
 
108
- data: Union[bytes, str, None] = None
107
+ data: bytes | str | None = None
109
108
  """Raw data associated with the blob."""
110
- mimetype: Optional[str] = None
109
+ mimetype: str | None = None
111
110
  """MimeType not to be confused with a file extension."""
112
111
  encoding: str = "utf-8"
113
112
  """Encoding to use if decoding the bytes into a string.
114
113
 
115
- Use utf-8 as default encoding, if decoding to string.
114
+ Use `utf-8` as default encoding, if decoding to string.
116
115
  """
117
- path: Optional[PathLike] = None
116
+ path: PathLike | None = None
118
117
  """Location where the original content was found."""
119
118
 
120
119
  model_config = ConfigDict(
@@ -123,16 +122,16 @@ class Blob(BaseMedia):
123
122
  )
124
123
 
125
124
  @property
126
- def source(self) -> Optional[str]:
125
+ def source(self) -> str | None:
127
126
  """The source location of the blob as string if known otherwise none.
128
127
 
129
128
  If a path is associated with the blob, it will default to the path location.
130
129
 
131
- Unless explicitly set via a metadata field called "source", in which
130
+ Unless explicitly set via a metadata field called `"source"`, in which
132
131
  case that value will be used instead.
133
132
  """
134
133
  if self.metadata and "source" in self.metadata:
135
- return cast("Optional[str]", self.metadata["source"])
134
+ return cast("str | None", self.metadata["source"])
136
135
  return str(self.path) if self.path else None
137
136
 
138
137
  @model_validator(mode="before")
@@ -181,7 +180,7 @@ class Blob(BaseMedia):
181
180
  raise ValueError(msg)
182
181
 
183
182
  @contextlib.contextmanager
184
- def as_bytes_io(self) -> Generator[Union[BytesIO, BufferedReader], None, None]:
183
+ def as_bytes_io(self) -> Generator[BytesIO | BufferedReader, None, None]:
185
184
  """Read data as a byte stream.
186
185
 
187
186
  Raises:
@@ -205,18 +204,18 @@ class Blob(BaseMedia):
205
204
  path: PathLike,
206
205
  *,
207
206
  encoding: str = "utf-8",
208
- mime_type: Optional[str] = None,
207
+ mime_type: str | None = None,
209
208
  guess_type: bool = True,
210
- metadata: Optional[dict] = None,
209
+ metadata: dict | None = None,
211
210
  ) -> Blob:
212
211
  """Load the blob from a path like object.
213
212
 
214
213
  Args:
215
- path: path like object to file to be read
214
+ path: Path-like object to file to be read
216
215
  encoding: Encoding to use if decoding the bytes into a string
217
- mime_type: if provided, will be set as the mime-type of the data
218
- guess_type: If True, the mimetype will be guessed from the file extension,
219
- if a mime-type was not provided
216
+ mime_type: If provided, will be set as the mime-type of the data
217
+ guess_type: If `True`, the mimetype will be guessed from the file extension,
218
+ if a mime-type was not provided
220
219
  metadata: Metadata to associate with the blob
221
220
 
222
221
  Returns:
@@ -239,20 +238,20 @@ class Blob(BaseMedia):
239
238
  @classmethod
240
239
  def from_data(
241
240
  cls,
242
- data: Union[str, bytes],
241
+ data: str | bytes,
243
242
  *,
244
243
  encoding: str = "utf-8",
245
- mime_type: Optional[str] = None,
246
- path: Optional[str] = None,
247
- metadata: Optional[dict] = None,
244
+ mime_type: str | None = None,
245
+ path: str | None = None,
246
+ metadata: dict | None = None,
248
247
  ) -> Blob:
249
248
  """Initialize the blob from in-memory data.
250
249
 
251
250
  Args:
252
- data: the in-memory data associated with the blob
251
+ data: The in-memory data associated with the blob
253
252
  encoding: Encoding to use if decoding the bytes into a string
254
- mime_type: if provided, will be set as the mime-type of the data
255
- path: if provided, will be set as the source from which the data came
253
+ mime_type: If provided, will be set as the mime-type of the data
254
+ path: If provided, will be set as the source from which the data came
256
255
  metadata: Metadata to associate with the blob
257
256
 
258
257
  Returns:
@@ -278,15 +277,13 @@ class Document(BaseMedia):
278
277
  """Class for storing a piece of text and associated metadata.
279
278
 
280
279
  Example:
280
+ ```python
281
+ from langchain_core.documents import Document
281
282
 
282
- .. code-block:: python
283
-
284
- from langchain_core.documents import Document
285
-
286
- document = Document(
287
- page_content="Hello, world!", metadata={"source": "https://example.com"}
288
- )
289
-
283
+ document = Document(
284
+ page_content="Hello, world!", metadata={"source": "https://example.com"}
285
+ )
286
+ ```
290
287
  """
291
288
 
292
289
  page_content: str
@@ -306,7 +303,7 @@ class Document(BaseMedia):
306
303
 
307
304
  @classmethod
308
305
  def get_lc_namespace(cls) -> list[str]:
309
- """Get the namespace of the langchain object.
306
+ """Get the namespace of the LangChain object.
310
307
 
311
308
  Returns:
312
309
  ["langchain", "schema", "document"]
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from abc import ABC, abstractmethod
6
- from typing import TYPE_CHECKING, Optional
6
+ from typing import TYPE_CHECKING
7
7
 
8
8
  from pydantic import BaseModel
9
9
 
@@ -27,7 +27,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
27
27
 
28
28
  For example, one could re-rank the retrieved documents using an LLM.
29
29
 
30
- .. note::
30
+ !!! note
31
31
  Users should favor using a RunnableLambda instead of sub-classing from this
32
32
  interface.
33
33
 
@@ -38,7 +38,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
38
38
  self,
39
39
  documents: Sequence[Document],
40
40
  query: str,
41
- callbacks: Optional[Callbacks] = None,
41
+ callbacks: Callbacks | None = None,
42
42
  ) -> Sequence[Document]:
43
43
  """Compress retrieved documents given the query context.
44
44
 
@@ -56,7 +56,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
56
56
  self,
57
57
  documents: Sequence[Document],
58
58
  query: str,
59
- callbacks: Optional[Callbacks] = None,
59
+ callbacks: Callbacks | None = None,
60
60
  ) -> Sequence[Document]:
61
61
  """Async compress retrieved documents given the query context.
62
62
 
@@ -20,35 +20,34 @@ class BaseDocumentTransformer(ABC):
20
20
  sequence of transformed Documents.
21
21
 
22
22
  Example:
23
- .. code-block:: python
24
-
25
- class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel):
26
- embeddings: Embeddings
27
- similarity_fn: Callable = cosine_similarity
28
- similarity_threshold: float = 0.95
29
-
30
- class Config:
31
- arbitrary_types_allowed = True
32
-
33
- def transform_documents(
34
- self, documents: Sequence[Document], **kwargs: Any
35
- ) -> Sequence[Document]:
36
- stateful_documents = get_stateful_documents(documents)
37
- embedded_documents = _get_embeddings_from_stateful_docs(
38
- self.embeddings, stateful_documents
39
- )
40
- included_idxs = _filter_similar_embeddings(
41
- embedded_documents,
42
- self.similarity_fn,
43
- self.similarity_threshold,
44
- )
45
- return [stateful_documents[i] for i in sorted(included_idxs)]
46
-
47
- async def atransform_documents(
48
- self, documents: Sequence[Document], **kwargs: Any
49
- ) -> Sequence[Document]:
50
- raise NotImplementedError
51
-
23
+ ```python
24
+ class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel):
25
+ embeddings: Embeddings
26
+ similarity_fn: Callable = cosine_similarity
27
+ similarity_threshold: float = 0.95
28
+
29
+ class Config:
30
+ arbitrary_types_allowed = True
31
+
32
+ def transform_documents(
33
+ self, documents: Sequence[Document], **kwargs: Any
34
+ ) -> Sequence[Document]:
35
+ stateful_documents = get_stateful_documents(documents)
36
+ embedded_documents = _get_embeddings_from_stateful_docs(
37
+ self.embeddings, stateful_documents
38
+ )
39
+ included_idxs = _filter_similar_embeddings(
40
+ embedded_documents,
41
+ self.similarity_fn,
42
+ self.similarity_threshold,
43
+ )
44
+ return [stateful_documents[i] for i in sorted(included_idxs)]
45
+
46
+ async def atransform_documents(
47
+ self, documents: Sequence[Document], **kwargs: Any
48
+ ) -> Sequence[Document]:
49
+ raise NotImplementedError
50
+ ```
52
51
  """
53
52
 
54
53
  @abstractmethod