PyPI - langchain-core - Versions diffs - 1.0.0a6__py3-none-any.whl → 1.0.4__py3-none-any.whl - Mend

langchain-core 1.0.0a6py3-none-any.whl → 1.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

langchain_core/__init__.py +1 -1
langchain_core/_api/__init__.py +3 -4
langchain_core/_api/beta_decorator.py +23 -26
langchain_core/_api/deprecation.py +51 -64
langchain_core/_api/path.py +3 -6
langchain_core/_import_utils.py +3 -4
langchain_core/agents.py +55 -48
langchain_core/caches.py +65 -66
langchain_core/callbacks/__init__.py +1 -8
langchain_core/callbacks/base.py +321 -336
langchain_core/callbacks/file.py +44 -44
langchain_core/callbacks/manager.py +454 -514
langchain_core/callbacks/stdout.py +29 -30
langchain_core/callbacks/streaming_stdout.py +32 -32
langchain_core/callbacks/usage.py +60 -57
langchain_core/chat_history.py +53 -68
langchain_core/document_loaders/base.py +27 -25
langchain_core/document_loaders/blob_loaders.py +1 -1
langchain_core/document_loaders/langsmith.py +44 -48
langchain_core/documents/__init__.py +23 -3
langchain_core/documents/base.py +102 -94
langchain_core/documents/compressor.py +10 -10
langchain_core/documents/transformers.py +34 -35
langchain_core/embeddings/fake.py +50 -54
langchain_core/example_selectors/length_based.py +2 -2
langchain_core/example_selectors/semantic_similarity.py +28 -32
langchain_core/exceptions.py +21 -20
langchain_core/globals.py +3 -151
langchain_core/indexing/__init__.py +1 -1
langchain_core/indexing/api.py +121 -126
langchain_core/indexing/base.py +73 -75
langchain_core/indexing/in_memory.py +4 -6
langchain_core/language_models/__init__.py +14 -29
langchain_core/language_models/_utils.py +58 -61
langchain_core/language_models/base.py +82 -172
langchain_core/language_models/chat_models.py +329 -402
langchain_core/language_models/fake.py +11 -11
langchain_core/language_models/fake_chat_models.py +42 -36
langchain_core/language_models/llms.py +189 -269
langchain_core/load/dump.py +9 -12
langchain_core/load/load.py +18 -28
langchain_core/load/mapping.py +2 -4
langchain_core/load/serializable.py +42 -40
langchain_core/messages/__init__.py +10 -16
langchain_core/messages/ai.py +148 -148
langchain_core/messages/base.py +53 -51
langchain_core/messages/block_translators/__init__.py +19 -22
langchain_core/messages/block_translators/anthropic.py +6 -6
langchain_core/messages/block_translators/bedrock_converse.py +5 -5
langchain_core/messages/block_translators/google_genai.py +10 -7
langchain_core/messages/block_translators/google_vertexai.py +4 -32
langchain_core/messages/block_translators/groq.py +117 -21
langchain_core/messages/block_translators/langchain_v0.py +5 -5
langchain_core/messages/block_translators/openai.py +11 -11
langchain_core/messages/chat.py +2 -6
langchain_core/messages/content.py +339 -330
langchain_core/messages/function.py +6 -10
langchain_core/messages/human.py +24 -31
langchain_core/messages/modifier.py +2 -2
langchain_core/messages/system.py +19 -29
langchain_core/messages/tool.py +74 -90
langchain_core/messages/utils.py +484 -510
langchain_core/output_parsers/__init__.py +13 -10
langchain_core/output_parsers/base.py +61 -61
langchain_core/output_parsers/format_instructions.py +9 -4
langchain_core/output_parsers/json.py +12 -10
langchain_core/output_parsers/list.py +21 -23
langchain_core/output_parsers/openai_functions.py +49 -47
langchain_core/output_parsers/openai_tools.py +30 -23
langchain_core/output_parsers/pydantic.py +13 -14
langchain_core/output_parsers/string.py +5 -5
langchain_core/output_parsers/transform.py +15 -17
langchain_core/output_parsers/xml.py +35 -34
langchain_core/outputs/__init__.py +1 -1
langchain_core/outputs/chat_generation.py +18 -18
langchain_core/outputs/chat_result.py +1 -3
langchain_core/outputs/generation.py +16 -16
langchain_core/outputs/llm_result.py +10 -10
langchain_core/prompt_values.py +13 -19
langchain_core/prompts/__init__.py +3 -27
langchain_core/prompts/base.py +81 -86
langchain_core/prompts/chat.py +308 -351
langchain_core/prompts/dict.py +6 -6
langchain_core/prompts/few_shot.py +81 -88
langchain_core/prompts/few_shot_with_templates.py +11 -13
langchain_core/prompts/image.py +12 -14
langchain_core/prompts/loading.py +4 -6
langchain_core/prompts/message.py +7 -7
langchain_core/prompts/prompt.py +24 -39
langchain_core/prompts/string.py +26 -10
langchain_core/prompts/structured.py +49 -53
langchain_core/rate_limiters.py +51 -60
langchain_core/retrievers.py +61 -198
langchain_core/runnables/base.py +1551 -1656
langchain_core/runnables/branch.py +68 -70
langchain_core/runnables/config.py +72 -89
langchain_core/runnables/configurable.py +145 -161
langchain_core/runnables/fallbacks.py +102 -96
langchain_core/runnables/graph.py +91 -97
langchain_core/runnables/graph_ascii.py +27 -28
langchain_core/runnables/graph_mermaid.py +42 -51
langchain_core/runnables/graph_png.py +43 -16
langchain_core/runnables/history.py +175 -177
langchain_core/runnables/passthrough.py +151 -167
langchain_core/runnables/retry.py +46 -51
langchain_core/runnables/router.py +30 -35
langchain_core/runnables/schema.py +75 -80
langchain_core/runnables/utils.py +60 -67
langchain_core/stores.py +85 -121
langchain_core/structured_query.py +8 -8
langchain_core/sys_info.py +29 -29
langchain_core/tools/__init__.py +1 -14
langchain_core/tools/base.py +306 -245
langchain_core/tools/convert.py +160 -155
langchain_core/tools/render.py +10 -10
langchain_core/tools/retriever.py +12 -11
langchain_core/tools/simple.py +19 -24
langchain_core/tools/structured.py +32 -39
langchain_core/tracers/__init__.py +1 -9
langchain_core/tracers/base.py +97 -99
langchain_core/tracers/context.py +29 -52
langchain_core/tracers/core.py +49 -53
langchain_core/tracers/evaluation.py +11 -11
langchain_core/tracers/event_stream.py +65 -64
langchain_core/tracers/langchain.py +21 -21
langchain_core/tracers/log_stream.py +45 -45
langchain_core/tracers/memory_stream.py +3 -3
langchain_core/tracers/root_listeners.py +16 -16
langchain_core/tracers/run_collector.py +2 -4
langchain_core/tracers/schemas.py +0 -129
langchain_core/tracers/stdout.py +3 -3
langchain_core/utils/__init__.py +1 -4
langchain_core/utils/_merge.py +2 -2
langchain_core/utils/aiter.py +57 -61
langchain_core/utils/env.py +9 -9
langchain_core/utils/function_calling.py +94 -188
langchain_core/utils/html.py +7 -8
langchain_core/utils/input.py +9 -6
langchain_core/utils/interactive_env.py +1 -1
langchain_core/utils/iter.py +36 -40
langchain_core/utils/json.py +4 -3
langchain_core/utils/json_schema.py +9 -9
langchain_core/utils/mustache.py +8 -10
langchain_core/utils/pydantic.py +35 -37
langchain_core/utils/strings.py +6 -9
langchain_core/utils/usage.py +1 -1
langchain_core/utils/utils.py +66 -62
langchain_core/vectorstores/base.py +182 -216
langchain_core/vectorstores/in_memory.py +101 -176
langchain_core/vectorstores/utils.py +5 -5
langchain_core/version.py +1 -1
langchain_core-1.0.4.dist-info/METADATA +69 -0
langchain_core-1.0.4.dist-info/RECORD +172 -0
{langchain_core-1.0.0a6.dist-info → langchain_core-1.0.4.dist-info}/WHEEL +1 -1
langchain_core/memory.py +0 -120
langchain_core/messages/block_translators/ollama.py +0 -47
langchain_core/prompts/pipeline.py +0 -138
langchain_core/pydantic_v1/__init__.py +0 -30
langchain_core/pydantic_v1/dataclasses.py +0 -23
langchain_core/pydantic_v1/main.py +0 -23
langchain_core/tracers/langchain_v1.py +0 -31
langchain_core/utils/loading.py +0 -35
langchain_core-1.0.0a6.dist-info/METADATA +0 -67
langchain_core-1.0.0a6.dist-info/RECORD +0 -181
langchain_core-1.0.0a6.dist-info/entry_points.txt +0 -4

langchain_core/document_loaders/base.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 from langchain_core.runnables import run_in_executor
@@ -27,7 +27,7 @@ class BaseLoader(ABC):  # noqa: B024
     """Interface for Document Loader.
     Implementations should implement the lazy-loading method using generators
-    to avoid loading all Documents into memory at once.
+    to avoid loading all documents into memory at once.
     `load` is provided just for user convenience and should not be overridden.
     """
@@ -35,38 +35,40 @@ class BaseLoader(ABC):  # noqa: B024
     # Sub-classes should not implement this method directly. Instead, they
     # should implement the lazy load method.
     def load(self) -> list[Document]:
-        """Load data into Document objects.
+        """Load data into `Document` objects.
         Returns:
-            the documents.
+            The documents.
         """
         return list(self.lazy_load())
     async def aload(self) -> list[Document]:
-        """Load data into Document objects.
+        """Load data into `Document` objects.
         Returns:
-            the documents.
+            The documents.
         """
         return [document async for document in self.alazy_load()]
     def load_and_split(
-        self, text_splitter: Optional[TextSplitter] = None
+        self, text_splitter: TextSplitter | None = None
     ) -> list[Document]:
-        """Load Documents and split into chunks. Chunks are returned as Documents.
+        """Load `Document` and split into chunks. Chunks are returned as `Document`.
-        Do not override this method. It should be considered to be deprecated!
+        !!! danger
+            Do not override this method. It should be considered to be deprecated!
         Args:
-            text_splitter: TextSplitter instance to use for splitting documents.
-                Defaults to RecursiveCharacterTextSplitter.
+            text_splitter: `TextSplitter` instance to use for splitting documents.
+                Defaults to `RecursiveCharacterTextSplitter`.
         Raises:
-            ImportError: If langchain-text-splitters is not installed
-                and no text_splitter is provided.
+            ImportError: If `langchain-text-splitters` is not installed
+                and no `text_splitter` is provided.
         Returns:
-            List of Documents.
+            List of `Document`.
         """
         if text_splitter is None:
             if not _HAS_TEXT_SPLITTERS:
@@ -86,10 +88,10 @@ class BaseLoader(ABC):  # noqa: B024
     # Attention: This method will be upgraded into an abstractmethod once it's
     #            implemented in all the existing subclasses.
     def lazy_load(self) -> Iterator[Document]:
-        """A lazy loader for Documents.
+        """A lazy loader for `Document`.
         Yields:
-            the documents.
+            The `Document` objects.
         """
         if type(self).load != BaseLoader.load:
             return iter(self.load())
@@ -97,10 +99,10 @@ class BaseLoader(ABC):  # noqa: B024
         raise NotImplementedError(msg)
     async def alazy_load(self) -> AsyncIterator[Document]:
-        """A lazy loader for Documents.
+        """A lazy loader for `Document`.
         Yields:
-            the documents.
+            The `Document` objects.
         """
         iterator = await run_in_executor(None, self.lazy_load)
         done = object()
@@ -115,7 +117,7 @@ class BaseBlobParser(ABC):
     """Abstract interface for blob parsers.
     A blob parser provides a way to parse raw data stored in a blob into one
-    or more documents.
+    or more `Document` objects.
     The parser can be composed with blob loaders, making it easy to reuse
     a parser independent of how the blob was originally loaded.
@@ -128,25 +130,25 @@ class BaseBlobParser(ABC):
         Subclasses are required to implement this method.
         Args:
-            blob: Blob instance
+            blob: `Blob` instance
         Returns:
-            Generator of documents
+            Generator of `Document` objects
         """
     def parse(self, blob: Blob) -> list[Document]:
-        """Eagerly parse the blob into a document or documents.
+        """Eagerly parse the blob into a `Document` or list of `Document` objects.
         This is a convenience method for interactive development environment.
-        Production applications should favor the lazy_parse method instead.
+        Production applications should favor the `lazy_parse` method instead.
         Subclasses should generally not over-ride this parse method.
         Args:
-            blob: Blob instance
+            blob: `Blob` instance
         Returns:
-            List of documents
+            List of `Document` objects
         """
         return list(self.lazy_parse(blob))

langchain_core/document_loaders/blob_loaders.py CHANGED Viewed

@@ -28,7 +28,7 @@ class BlobLoader(ABC):
     def yield_blobs(
         self,
     ) -> Iterable[Blob]:
-        """A lazy loader for raw data represented by LangChain's Blob object.
+        """A lazy loader for raw data represented by LangChain's `Blob` object.
         Returns:
             A generator over blobs

langchain_core/document_loaders/langsmith.py CHANGED Viewed

@@ -3,8 +3,8 @@
 import datetime
 import json
 import uuid
-from collections.abc import Iterator, Sequence
-from typing import Any, Callable, Optional, Union
+from collections.abc import Callable, Iterator, Sequence
+from typing import Any
 from langsmith import Client as LangSmithClient
 from typing_extensions import override
@@ -14,79 +14,75 @@ from langchain_core.documents import Document
 class LangSmithLoader(BaseLoader):
-    """Load LangSmith Dataset examples as Documents.
+    """Load LangSmith Dataset examples as `Document` objects.
-    Loads the example inputs as the Document page content and places the entire example
-    into the Document metadata. This allows you to easily create few-shot example
-    retrievers from the loaded documents.
+    Loads the example inputs as the `Document` page content and places the entire
+    example into the `Document` metadata. This allows you to easily create few-shot
+    example retrievers from the loaded documents.
-    .. dropdown:: Lazy load
+    ??? note "Lazy loading example"
-        .. code-block:: python
+        ```python
+        from langchain_core.document_loaders import LangSmithLoader
-            from langchain_core.document_loaders import LangSmithLoader
+        loader = LangSmithLoader(dataset_id="...", limit=100)
+        docs = []
+        for doc in loader.lazy_load():
+            docs.append(doc)
+        ```
-            loader = LangSmithLoader(dataset_id="...", limit=100)
-            docs = []
-            for doc in loader.lazy_load():
-                docs.append(doc)
-        .. code-block:: python
-            # -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
-    .. versionadded:: 0.2.34
-    """  # noqa: E501
+        ```python
+        # -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
+        ```
+    """
     def __init__(
         self,
         *,
-        dataset_id: Optional[Union[uuid.UUID, str]] = None,
-        dataset_name: Optional[str] = None,
-        example_ids: Optional[Sequence[Union[uuid.UUID, str]]] = None,
-        as_of: Optional[Union[datetime.datetime, str]] = None,
-        splits: Optional[Sequence[str]] = None,
+        dataset_id: uuid.UUID | str | None = None,
+        dataset_name: str | None = None,
+        example_ids: Sequence[uuid.UUID | str] | None = None,
+        as_of: datetime.datetime | str | None = None,
+        splits: Sequence[str] | None = None,
         inline_s3_urls: bool = True,
         offset: int = 0,
-        limit: Optional[int] = None,
-        metadata: Optional[dict] = None,
-        filter: Optional[str] = None,  # noqa: A002
+        limit: int | None = None,
+        metadata: dict | None = None,
+        filter: str | None = None,  # noqa: A002
         content_key: str = "",
-        format_content: Optional[Callable[..., str]] = None,
-        client: Optional[LangSmithClient] = None,
+        format_content: Callable[..., str] | None = None,
+        client: LangSmithClient | None = None,
         **client_kwargs: Any,
     ) -> None:
         """Create a LangSmith loader.
         Args:
-            dataset_id: The ID of the dataset to filter by. Defaults to None.
-            dataset_name: The name of the dataset to filter by. Defaults to None.
-            content_key: The inputs key to set as Document page content. ``'.'`` characters
-                are interpreted as nested keys. E.g. ``content_key="first.second"`` will
+            dataset_id: The ID of the dataset to filter by.
+            dataset_name: The name of the dataset to filter by.
+            content_key: The inputs key to set as Document page content. `'.'` characters
+                are interpreted as nested keys. E.g. `content_key="first.second"` will
                 result in
-                ``Document(page_content=format_content(example.inputs["first"]["second"]))``
+                `Document(page_content=format_content(example.inputs["first"]["second"]))`
             format_content: Function for converting the content extracted from the example
                 inputs into a string. Defaults to JSON-encoding the contents.
-            example_ids: The IDs of the examples to filter by. Defaults to None.
-            as_of: The dataset version tag OR
-                timestamp to retrieve the examples as of.
-                Response examples will only be those that were present at the time
-                of the tagged (or timestamped) version.
+            example_ids: The IDs of the examples to filter by.
+            as_of: The dataset version tag or timestamp to retrieve the examples as of.
+                Response examples will only be those that were present at the time of
+                the tagged (or timestamped) version.
             splits: A list of dataset splits, which are
-                divisions of your dataset such as 'train', 'test', or 'validation'.
+                divisions of your dataset such as `train`, `test`, or `validation`.
                 Returns examples only from the specified splits.
-            inline_s3_urls: Whether to inline S3 URLs. Defaults to True.
-            offset: The offset to start from. Defaults to 0.
+            inline_s3_urls: Whether to inline S3 URLs.
+            offset: The offset to start from.
             limit: The maximum number of examples to return.
-            metadata: Metadata to filter by. Defaults to None.
+            metadata: Metadata to filter by.
             filter: A structured filter string to apply to the examples.
             client: LangSmith Client. If not provided will be initialized from below args.
             client_kwargs: Keyword args to pass to LangSmith client init. Should only be
-                specified if ``client`` isn't.
+                specified if `client` isn't.
         Raises:
-            ValueError: If both ``client`` and ``client_kwargs`` are provided.
+            ValueError: If both `client` and `client_kwargs` are provided.
         """  # noqa: E501
         if client and client_kwargs:
             raise ValueError
@@ -129,7 +125,7 @@ class LangSmithLoader(BaseLoader):
             yield Document(content_str, metadata=metadata)
-def _stringify(x: Union[str, dict]) -> str:
+def _stringify(x: str | dict) -> str:
     if isinstance(x, str):
         return x
     try:

langchain_core/documents/__init__.py CHANGED Viewed

@@ -1,8 +1,28 @@
-"""Documents module.
+"""Documents module for data retrieval and processing workflows.
-**Document** module is a collection of classes that handle documents
-and their transformations.
+This module provides core abstractions for handling data in retrieval-augmented
+generation (RAG) pipelines, vector stores, and document processing workflows.
+!!! warning "Documents vs. message content"
+    This module is distinct from `langchain_core.messages.content`, which provides
+    multimodal content blocks for **LLM chat I/O** (text, images, audio, etc. within
+    messages).
+    **Key distinction:**
+    - **Documents** (this module): For **data retrieval and processing workflows**
+        - Vector stores, retrievers, RAG pipelines
+        - Text chunking, embedding, and semantic search
+        - Example: Chunks of a PDF stored in a vector database
+    - **Content Blocks** (`messages.content`): For **LLM conversational I/O**
+        - Multimodal message content sent to/from models
+        - Tool calls, reasoning, citations within chat
+        - Example: An image sent to a vision model in a chat message (via
+            [`ImageContentBlock`][langchain.messages.ImageContentBlock])
+    While both can represent similar data types (text, files), they serve different
+    architectural purposes in LangChain applications.
 """
 from typing import TYPE_CHECKING

langchain_core/documents/base.py CHANGED Viewed

@@ -1,4 +1,16 @@
-"""Base classes for media and documents."""
+"""Base classes for media and documents.
+This module contains core abstractions for **data retrieval and processing workflows**:
+- `BaseMedia`: Base class providing `id` and `metadata` fields
+- `Blob`: Raw data loading (files, binary data) - used by document loaders
+- `Document`: Text content for retrieval (RAG, vector stores, semantic search)
+!!! note "Not for LLM chat messages"
+    These classes are for data processing pipelines, not LLM I/O. For multimodal
+    content in chat messages (images, audio in conversations), see
+    `langchain.messages` content blocks instead.
+"""
 from __future__ import annotations
@@ -6,7 +18,7 @@ import contextlib
 import mimetypes
 from io import BufferedReader, BytesIO
 from pathlib import Path, PurePath
-from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
 from pydantic import ConfigDict, Field, model_validator
@@ -15,31 +27,27 @@ from langchain_core.load.serializable import Serializable
 if TYPE_CHECKING:
     from collections.abc import Generator
-PathLike = Union[str, PurePath]
+PathLike = str | PurePath
 class BaseMedia(Serializable):
-    """Use to represent media content.
-    Media objects can be used to represent raw data, such as text or binary data.
+    """Base class for content used in retrieval and data processing workflows.
-    LangChain Media objects allow associating metadata and an optional identifier
-    with the content.
+    Provides common fields for content that needs to be stored, indexed, or searched.
-    The presence of an ID and metadata make it easier to store, index, and search
-    over the content in a structured way.
+    !!! note
+        For multimodal content in **chat messages** (images, audio sent to/from LLMs),
+        use `langchain.messages` content blocks instead.
     """
     # The ID field is optional at the moment.
     # It will likely become required in a future major release after
-    # it has been adopted by enough vectorstore implementations.
-    id: Optional[str] = Field(default=None, coerce_numbers_to_str=True)
+    # it has been adopted by enough VectorStore implementations.
+    id: str | None = Field(default=None, coerce_numbers_to_str=True)
     """An optional identifier for the document.
     Ideally this should be unique across the document collection and formatted
     as a UUID, but this will not be enforced.
-    .. versionadded:: 0.2.11
     """
     metadata: dict = Field(default_factory=dict)
@@ -47,74 +55,72 @@ class BaseMedia(Serializable):
 class Blob(BaseMedia):
-    """Blob represents raw data by either reference or value.
-    Provides an interface to materialize the blob in different representations, and
-    help to decouple the development of data loaders from the downstream parsing of
-    the raw data.
-    Inspired by: https://developer.mozilla.org/en-US/docs/Web/API/Blob
-    Example: Initialize a blob from in-memory data
+    """Raw data abstraction for document loading and file processing.
-        .. code-block:: python
+    Represents raw bytes or text, either in-memory or by file reference. Used
+    primarily by document loaders to decouple data loading from parsing.
-            from langchain_core.documents import Blob
+    Inspired by [Mozilla's `Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)
-            blob = Blob.from_data("Hello, world!")
+    ???+ example "Initialize a blob from in-memory data"
-            # Read the blob as a string
-            print(blob.as_string())
+        ```python
+        from langchain_core.documents import Blob
-            # Read the blob as bytes
-            print(blob.as_bytes())
+        blob = Blob.from_data("Hello, world!")
-            # Read the blob as a byte stream
-            with blob.as_bytes_io() as f:
-                print(f.read())
+        # Read the blob as a string
+        print(blob.as_string())
-    Example: Load from memory and specify mime-type and metadata
+        # Read the blob as bytes
+        print(blob.as_bytes())
-        .. code-block:: python
+        # Read the blob as a byte stream
+        with blob.as_bytes_io() as f:
+            print(f.read())
+        ```
-            from langchain_core.documents import Blob
+    ??? example "Load from memory and specify MIME type and metadata"
-            blob = Blob.from_data(
-                data="Hello, world!",
-                mime_type="text/plain",
-                metadata={"source": "https://example.com"},
-            )
+        ```python
+        from langchain_core.documents import Blob
-    Example: Load the blob from a file
-        .. code-block:: python
+        blob = Blob.from_data(
+            data="Hello, world!",
+            mime_type="text/plain",
+            metadata={"source": "https://example.com"},
+        )
+        ```
-            from langchain_core.documents import Blob
+    ??? example "Load the blob from a file"
-            blob = Blob.from_path("path/to/file.txt")
+        ```python
+        from langchain_core.documents import Blob
-            # Read the blob as a string
-            print(blob.as_string())
+        blob = Blob.from_path("path/to/file.txt")
-            # Read the blob as bytes
-            print(blob.as_bytes())
+        # Read the blob as a string
+        print(blob.as_string())
-            # Read the blob as a byte stream
-            with blob.as_bytes_io() as f:
-                print(f.read())
+        # Read the blob as bytes
+        print(blob.as_bytes())
+        # Read the blob as a byte stream
+        with blob.as_bytes_io() as f:
+            print(f.read())
+        ```
     """
-    data: Union[bytes, str, None] = None
-    """Raw data associated with the blob."""
-    mimetype: Optional[str] = None
-    """MimeType not to be confused with a file extension."""
+    data: bytes | str | None = None
+    """Raw data associated with the `Blob`."""
+    mimetype: str | None = None
+    """MIME type, not to be confused with a file extension."""
     encoding: str = "utf-8"
     """Encoding to use if decoding the bytes into a string.
-    Use utf-8 as default encoding, if decoding to string.
+    Uses `utf-8` as default encoding if decoding to string.
     """
-    path: Optional[PathLike] = None
+    path: PathLike | None = None
     """Location where the original content was found."""
     model_config = ConfigDict(
@@ -123,16 +129,16 @@ class Blob(BaseMedia):
     )
     @property
-    def source(self) -> Optional[str]:
+    def source(self) -> str | None:
         """The source location of the blob as string if known otherwise none.
-        If a path is associated with the blob, it will default to the path location.
+        If a path is associated with the `Blob`, it will default to the path location.
-        Unless explicitly set via a metadata field called "source", in which
+        Unless explicitly set via a metadata field called `'source'`, in which
         case that value will be used instead.
         """
         if self.metadata and "source" in self.metadata:
-            return cast("Optional[str]", self.metadata["source"])
+            return cast("str | None", self.metadata["source"])
         return str(self.path) if self.path else None
     @model_validator(mode="before")
@@ -181,7 +187,7 @@ class Blob(BaseMedia):
         raise ValueError(msg)
     @contextlib.contextmanager
-    def as_bytes_io(self) -> Generator[Union[BytesIO, BufferedReader], None, None]:
+    def as_bytes_io(self) -> Generator[BytesIO | BufferedReader, None, None]:
         """Read data as a byte stream.
         Raises:
@@ -205,22 +211,22 @@ class Blob(BaseMedia):
         path: PathLike,
         *,
         encoding: str = "utf-8",
-        mime_type: Optional[str] = None,
+        mime_type: str | None = None,
         guess_type: bool = True,
-        metadata: Optional[dict] = None,
+        metadata: dict | None = None,
     ) -> Blob:
         """Load the blob from a path like object.
         Args:
-            path: path like object to file to be read
+            path: Path-like object to file to be read
             encoding: Encoding to use if decoding the bytes into a string
-            mime_type: if provided, will be set as the mime-type of the data
-            guess_type: If True, the mimetype will be guessed from the file extension,
-                        if a mime-type was not provided
-            metadata: Metadata to associate with the blob
+            mime_type: If provided, will be set as the MIME type of the data
+            guess_type: If `True`, the MIME type will be guessed from the file
+                extension, if a MIME type was not provided
+            metadata: Metadata to associate with the `Blob`
         Returns:
-            Blob instance
+            `Blob` instance
         """
         if mime_type is None and guess_type:
             mimetype = mimetypes.guess_type(path)[0] if guess_type else None
@@ -239,24 +245,24 @@ class Blob(BaseMedia):
     @classmethod
     def from_data(
         cls,
-        data: Union[str, bytes],
+        data: str | bytes,
         *,
         encoding: str = "utf-8",
-        mime_type: Optional[str] = None,
-        path: Optional[str] = None,
-        metadata: Optional[dict] = None,
+        mime_type: str | None = None,
+        path: str | None = None,
+        metadata: dict | None = None,
     ) -> Blob:
-        """Initialize the blob from in-memory data.
+        """Initialize the `Blob` from in-memory data.
         Args:
-            data: the in-memory data associated with the blob
+            data: The in-memory data associated with the `Blob`
             encoding: Encoding to use if decoding the bytes into a string
-            mime_type: if provided, will be set as the mime-type of the data
-            path: if provided, will be set as the source from which the data came
-            metadata: Metadata to associate with the blob
+            mime_type: If provided, will be set as the MIME type of the data
+            path: If provided, will be set as the source from which the data came
+            metadata: Metadata to associate with the `Blob`
         Returns:
-            Blob instance
+            `Blob` instance
         """
         return cls(
             data=data,
@@ -277,16 +283,18 @@ class Blob(BaseMedia):
 class Document(BaseMedia):
     """Class for storing a piece of text and associated metadata.
-    Example:
-        .. code-block:: python
+    !!! note
+        `Document` is for **retrieval workflows**, not chat I/O. For sending text
+        to an LLM in a conversation, use message types from `langchain.messages`.
-            from langchain_core.documents import Document
-            document = Document(
-                page_content="Hello, world!", metadata={"source": "https://example.com"}
-            )
+    Example:
+        ```python
+        from langchain_core.documents import Document
+        document = Document(
+            page_content="Hello, world!", metadata={"source": "https://example.com"}
+        )
+        ```
     """
     page_content: str
@@ -301,12 +309,12 @@ class Document(BaseMedia):
     @classmethod
     def is_lc_serializable(cls) -> bool:
-        """Return True as this class is serializable."""
+        """Return `True` as this class is serializable."""
         return True
     @classmethod
     def get_lc_namespace(cls) -> list[str]:
-        """Get the namespace of the langchain object.
+        """Get the namespace of the LangChain object.
         Returns:
             ["langchain", "schema", "document"]
@@ -314,10 +322,10 @@ class Document(BaseMedia):
         return ["langchain", "schema", "document"]
     def __str__(self) -> str:
-        """Override __str__ to restrict it to page_content and metadata.
+        """Override `__str__` to restrict it to page_content and metadata.
         Returns:
-            A string representation of the Document.
+            A string representation of the `Document`.
         """
         # The format matches pydantic format for __str__.
         #

langchain-core 1.0.0a6__py3-none-any.whl → 1.0.4__py3-none-any.whl

langchain-core 1.0.0a6py3-none-any.whl → 1.0.4py3-none-any.whl