PyPI - langchain-core - Versions diffs - 0.4.0.dev0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

langchain-core 0.4.0.dev0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langchain-core might be problematic. Click here for more details.

Files changed (172) hide show

langchain_core/__init__.py +1 -1
langchain_core/_api/__init__.py +3 -4
langchain_core/_api/beta_decorator.py +45 -70
langchain_core/_api/deprecation.py +80 -80
langchain_core/_api/path.py +22 -8
langchain_core/_import_utils.py +10 -4
langchain_core/agents.py +25 -21
langchain_core/caches.py +53 -63
langchain_core/callbacks/__init__.py +1 -8
langchain_core/callbacks/base.py +341 -348
langchain_core/callbacks/file.py +55 -44
langchain_core/callbacks/manager.py +546 -683
langchain_core/callbacks/stdout.py +29 -30
langchain_core/callbacks/streaming_stdout.py +35 -36
langchain_core/callbacks/usage.py +65 -70
langchain_core/chat_history.py +48 -55
langchain_core/document_loaders/base.py +46 -21
langchain_core/document_loaders/langsmith.py +39 -36
langchain_core/documents/__init__.py +0 -1
langchain_core/documents/base.py +96 -74
langchain_core/documents/compressor.py +12 -9
langchain_core/documents/transformers.py +29 -28
langchain_core/embeddings/fake.py +56 -57
langchain_core/env.py +2 -3
langchain_core/example_selectors/base.py +12 -0
langchain_core/example_selectors/length_based.py +1 -1
langchain_core/example_selectors/semantic_similarity.py +21 -25
langchain_core/exceptions.py +15 -9
langchain_core/globals.py +4 -163
langchain_core/indexing/api.py +132 -125
langchain_core/indexing/base.py +64 -67
langchain_core/indexing/in_memory.py +26 -6
langchain_core/language_models/__init__.py +15 -27
langchain_core/language_models/_utils.py +267 -117
langchain_core/language_models/base.py +92 -177
langchain_core/language_models/chat_models.py +547 -407
langchain_core/language_models/fake.py +11 -11
langchain_core/language_models/fake_chat_models.py +72 -118
langchain_core/language_models/llms.py +168 -242
langchain_core/load/dump.py +8 -11
langchain_core/load/load.py +32 -28
langchain_core/load/mapping.py +2 -4
langchain_core/load/serializable.py +50 -56
langchain_core/messages/__init__.py +36 -51
langchain_core/messages/ai.py +377 -150
langchain_core/messages/base.py +239 -47
langchain_core/messages/block_translators/__init__.py +111 -0
langchain_core/messages/block_translators/anthropic.py +470 -0
langchain_core/messages/block_translators/bedrock.py +94 -0
langchain_core/messages/block_translators/bedrock_converse.py +297 -0
langchain_core/messages/block_translators/google_genai.py +530 -0
langchain_core/messages/block_translators/google_vertexai.py +21 -0
langchain_core/messages/block_translators/groq.py +143 -0
langchain_core/messages/block_translators/langchain_v0.py +301 -0
langchain_core/messages/block_translators/openai.py +1010 -0
langchain_core/messages/chat.py +2 -3
langchain_core/messages/content.py +1423 -0
langchain_core/messages/function.py +7 -7
langchain_core/messages/human.py +44 -38
langchain_core/messages/modifier.py +3 -2
langchain_core/messages/system.py +40 -27
langchain_core/messages/tool.py +160 -58
langchain_core/messages/utils.py +527 -638
langchain_core/output_parsers/__init__.py +1 -14
langchain_core/output_parsers/base.py +68 -104
langchain_core/output_parsers/json.py +13 -17
langchain_core/output_parsers/list.py +11 -33
langchain_core/output_parsers/openai_functions.py +56 -74
langchain_core/output_parsers/openai_tools.py +68 -109
langchain_core/output_parsers/pydantic.py +15 -13
langchain_core/output_parsers/string.py +6 -2
langchain_core/output_parsers/transform.py +17 -60
langchain_core/output_parsers/xml.py +34 -44
langchain_core/outputs/__init__.py +1 -1
langchain_core/outputs/chat_generation.py +26 -11
langchain_core/outputs/chat_result.py +1 -3
langchain_core/outputs/generation.py +17 -6
langchain_core/outputs/llm_result.py +15 -8
langchain_core/prompt_values.py +29 -123
langchain_core/prompts/__init__.py +3 -27
langchain_core/prompts/base.py +48 -63
langchain_core/prompts/chat.py +259 -288
langchain_core/prompts/dict.py +19 -11
langchain_core/prompts/few_shot.py +84 -90
langchain_core/prompts/few_shot_with_templates.py +14 -12
langchain_core/prompts/image.py +19 -14
langchain_core/prompts/loading.py +6 -8
langchain_core/prompts/message.py +7 -8
langchain_core/prompts/prompt.py +42 -43
langchain_core/prompts/string.py +37 -16
langchain_core/prompts/structured.py +43 -46
langchain_core/rate_limiters.py +51 -60
langchain_core/retrievers.py +52 -192
langchain_core/runnables/base.py +1727 -1683
langchain_core/runnables/branch.py +52 -73
langchain_core/runnables/config.py +89 -103
langchain_core/runnables/configurable.py +128 -130
langchain_core/runnables/fallbacks.py +93 -82
langchain_core/runnables/graph.py +127 -127
langchain_core/runnables/graph_ascii.py +63 -41
langchain_core/runnables/graph_mermaid.py +87 -70
langchain_core/runnables/graph_png.py +31 -36
langchain_core/runnables/history.py +145 -161
langchain_core/runnables/passthrough.py +141 -144
langchain_core/runnables/retry.py +84 -68
langchain_core/runnables/router.py +33 -37
langchain_core/runnables/schema.py +79 -72
langchain_core/runnables/utils.py +95 -139
langchain_core/stores.py +85 -131
langchain_core/structured_query.py +11 -15
langchain_core/sys_info.py +31 -32
langchain_core/tools/__init__.py +1 -14
langchain_core/tools/base.py +221 -247
langchain_core/tools/convert.py +144 -161
langchain_core/tools/render.py +10 -10
langchain_core/tools/retriever.py +12 -19
langchain_core/tools/simple.py +52 -29
langchain_core/tools/structured.py +56 -60
langchain_core/tracers/__init__.py +1 -9
langchain_core/tracers/_streaming.py +6 -7
langchain_core/tracers/base.py +103 -112
langchain_core/tracers/context.py +29 -48
langchain_core/tracers/core.py +142 -105
langchain_core/tracers/evaluation.py +30 -34
langchain_core/tracers/event_stream.py +162 -117
langchain_core/tracers/langchain.py +34 -36
langchain_core/tracers/log_stream.py +87 -49
langchain_core/tracers/memory_stream.py +3 -3
langchain_core/tracers/root_listeners.py +18 -34
langchain_core/tracers/run_collector.py +8 -20
langchain_core/tracers/schemas.py +0 -125
langchain_core/tracers/stdout.py +3 -3
langchain_core/utils/__init__.py +1 -4
langchain_core/utils/_merge.py +47 -9
langchain_core/utils/aiter.py +70 -66
langchain_core/utils/env.py +12 -9
langchain_core/utils/function_calling.py +139 -206
langchain_core/utils/html.py +7 -8
langchain_core/utils/input.py +6 -6
langchain_core/utils/interactive_env.py +6 -2
langchain_core/utils/iter.py +48 -45
langchain_core/utils/json.py +14 -4
langchain_core/utils/json_schema.py +159 -43
langchain_core/utils/mustache.py +32 -25
langchain_core/utils/pydantic.py +67 -40
langchain_core/utils/strings.py +5 -5
langchain_core/utils/usage.py +1 -1
langchain_core/utils/utils.py +104 -62
langchain_core/vectorstores/base.py +131 -179
langchain_core/vectorstores/in_memory.py +113 -182
langchain_core/vectorstores/utils.py +23 -17
langchain_core/version.py +1 -1
langchain_core-1.0.0.dist-info/METADATA +68 -0
langchain_core-1.0.0.dist-info/RECORD +172 -0
{langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
langchain_core/beta/__init__.py +0 -1
langchain_core/beta/runnables/__init__.py +0 -1
langchain_core/beta/runnables/context.py +0 -448
langchain_core/memory.py +0 -116
langchain_core/messages/content_blocks.py +0 -1435
langchain_core/prompts/pipeline.py +0 -133
langchain_core/pydantic_v1/__init__.py +0 -30
langchain_core/pydantic_v1/dataclasses.py +0 -23
langchain_core/pydantic_v1/main.py +0 -23
langchain_core/tracers/langchain_v1.py +0 -23
langchain_core/utils/loading.py +0 -31
langchain_core/v1/__init__.py +0 -1
langchain_core/v1/chat_models.py +0 -1047
langchain_core/v1/messages.py +0 -755
langchain_core-0.4.0.dev0.dist-info/METADATA +0 -108
langchain_core-0.4.0.dev0.dist-info/RECORD +0 -177
langchain_core-0.4.0.dev0.dist-info/entry_points.txt +0 -4

langchain_core/document_loaders/base.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 from langchain_core.runnables import run_in_executor
@@ -15,6 +15,13 @@ if TYPE_CHECKING:
     from langchain_core.documents import Document
     from langchain_core.documents.base import Blob
+try:
+    from langchain_text_splitters import RecursiveCharacterTextSplitter
+    _HAS_TEXT_SPLITTERS = True
+except ImportError:
+    _HAS_TEXT_SPLITTERS = False
 class BaseLoader(ABC):  # noqa: B024
     """Interface for Document Loader.
@@ -28,37 +35,47 @@ class BaseLoader(ABC):  # noqa: B024
     # Sub-classes should not implement this method directly. Instead, they
     # should implement the lazy load method.
     def load(self) -> list[Document]:
-        """Load data into Document objects."""
+        """Load data into `Document` objects.
+        Returns:
+            The documents.
+        """
         return list(self.lazy_load())
     async def aload(self) -> list[Document]:
-        """Load data into Document objects."""
+        """Load data into `Document` objects.
+        Returns:
+            The documents.
+        """
         return [document async for document in self.alazy_load()]
     def load_and_split(
-        self, text_splitter: Optional[TextSplitter] = None
+        self, text_splitter: TextSplitter | None = None
     ) -> list[Document]:
-        """Load Documents and split into chunks. Chunks are returned as Documents.
+        """Load Documents and split into chunks. Chunks are returned as `Document`.
         Do not override this method. It should be considered to be deprecated!
         Args:
-            text_splitter: TextSplitter instance to use for splitting documents.
-              Defaults to RecursiveCharacterTextSplitter.
+            text_splitter: `TextSplitter` instance to use for splitting documents.
+                Defaults to `RecursiveCharacterTextSplitter`.
+        Raises:
+            ImportError: If `langchain-text-splitters` is not installed
+                and no `text_splitter` is provided.
         Returns:
-            List of Documents.
+            List of `Document`.
         """
         if text_splitter is None:
-            try:
-                from langchain_text_splitters import RecursiveCharacterTextSplitter
-            except ImportError as e:
+            if not _HAS_TEXT_SPLITTERS:
                 msg = (
                     "Unable to import from langchain_text_splitters. Please specify "
                     "text_splitter or install langchain_text_splitters with "
                     "`pip install -U langchain-text-splitters`."
                 )
-                raise ImportError(msg) from e
+                raise ImportError(msg)
             text_splitter_: TextSplitter = RecursiveCharacterTextSplitter()
         else:
@@ -69,14 +86,22 @@ class BaseLoader(ABC):  # noqa: B024
     # Attention: This method will be upgraded into an abstractmethod once it's
     #            implemented in all the existing subclasses.
     def lazy_load(self) -> Iterator[Document]:
-        """A lazy loader for Documents."""
+        """A lazy loader for `Document`.
+        Yields:
+            The `Document` objects.
+        """
         if type(self).load != BaseLoader.load:
             return iter(self.load())
         msg = f"{self.__class__.__name__} does not implement lazy_load()"
         raise NotImplementedError(msg)
     async def alazy_load(self) -> AsyncIterator[Document]:
-        """A lazy loader for Documents."""
+        """A lazy loader for `Document`.
+        Yields:
+            The `Document` objects.
+        """
         iterator = await run_in_executor(None, self.lazy_load)
         done = object()
         while True:
@@ -90,7 +115,7 @@ class BaseBlobParser(ABC):
     """Abstract interface for blob parsers.
     A blob parser provides a way to parse raw data stored in a blob into one
-    or more documents.
+    or more `Document` objects.
     The parser can be composed with blob loaders, making it easy to reuse
     a parser independent of how the blob was originally loaded.
@@ -103,25 +128,25 @@ class BaseBlobParser(ABC):
         Subclasses are required to implement this method.
         Args:
-            blob: Blob instance
+            blob: `Blob` instance
         Returns:
-            Generator of documents
+            Generator of `Document` objects
         """
     def parse(self, blob: Blob) -> list[Document]:
-        """Eagerly parse the blob into a document or documents.
+        """Eagerly parse the blob into a `Document` or `Document` objects.
         This is a convenience method for interactive development environment.
-        Production applications should favor the lazy_parse method instead.
+        Production applications should favor the `lazy_parse` method instead.
         Subclasses should generally not over-ride this parse method.
         Args:
-            blob: Blob instance
+            blob: `Blob` instance
         Returns:
-            List of documents
+            List of `Document` objects
         """
         return list(self.lazy_parse(blob))

langchain_core/document_loaders/langsmith.py CHANGED Viewed

@@ -3,8 +3,8 @@
 import datetime
 import json
 import uuid
-from collections.abc import Iterator, Sequence
-from typing import Any, Callable, Optional, Union
+from collections.abc import Callable, Iterator, Sequence
+from typing import Any
 from langsmith import Client as LangSmithClient
 from typing_extensions import override
@@ -20,55 +20,55 @@ class LangSmithLoader(BaseLoader):
     into the Document metadata. This allows you to easily create few-shot example
     retrievers from the loaded documents.
-    .. dropdown:: Lazy load
+    ??? note "Lazy load"
-        .. code-block:: python
+        ```python
+        from langchain_core.document_loaders import LangSmithLoader
-            from langchain_core.document_loaders import LangSmithLoader
+        loader = LangSmithLoader(dataset_id="...", limit=100)
+        docs = []
+        for doc in loader.lazy_load():
+            docs.append(doc)
+        ```
-            loader = LangSmithLoader(dataset_id="...", limit=100)
-            docs = []
-            for doc in loader.lazy_load():
-                docs.append(doc)
+        ```python
+        # -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
+        ```
-        .. code-block:: pycon
+    !!! version-added "Added in version 0.2.34"
-            # -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
-    .. versionadded:: 0.2.34
-    """  # noqa: E501
+    """
     def __init__(
         self,
         *,
-        dataset_id: Optional[Union[uuid.UUID, str]] = None,
-        dataset_name: Optional[str] = None,
-        example_ids: Optional[Sequence[Union[uuid.UUID, str]]] = None,
-        as_of: Optional[Union[datetime.datetime, str]] = None,
-        splits: Optional[Sequence[str]] = None,
+        dataset_id: uuid.UUID | str | None = None,
+        dataset_name: str | None = None,
+        example_ids: Sequence[uuid.UUID | str] | None = None,
+        as_of: datetime.datetime | str | None = None,
+        splits: Sequence[str] | None = None,
         inline_s3_urls: bool = True,
         offset: int = 0,
-        limit: Optional[int] = None,
-        metadata: Optional[dict] = None,
-        filter: Optional[str] = None,  # noqa: A002
+        limit: int | None = None,
+        metadata: dict | None = None,
+        filter: str | None = None,  # noqa: A002
         content_key: str = "",
-        format_content: Optional[Callable[..., str]] = None,
-        client: Optional[LangSmithClient] = None,
+        format_content: Callable[..., str] | None = None,
+        client: LangSmithClient | None = None,
         **client_kwargs: Any,
     ) -> None:
         """Create a LangSmith loader.
         Args:
-            dataset_id: The ID of the dataset to filter by. Defaults to None.
-            dataset_name: The name of the dataset to filter by. Defaults to None.
-            content_key: The inputs key to set as Document page content. ``'.'`` characters
-                are interpreted as nested keys. E.g. ``content_key="first.second"`` will
+            dataset_id: The ID of the dataset to filter by.
+            dataset_name: The name of the dataset to filter by.
+            content_key: The inputs key to set as Document page content. `'.'` characters
+                are interpreted as nested keys. E.g. `content_key="first.second"` will
                 result in
-                ``Document(page_content=format_content(example.inputs["first"]["second"]))``
+                `Document(page_content=format_content(example.inputs["first"]["second"]))`
             format_content: Function for converting the content extracted from the example
                 inputs into a string. Defaults to JSON-encoding the contents.
-            example_ids: The IDs of the examples to filter by. Defaults to None.
+            example_ids: The IDs of the examples to filter by.
             as_of: The dataset version tag OR
                 timestamp to retrieve the examples as of.
                 Response examples will only be those that were present at the time
@@ -76,14 +76,17 @@ class LangSmithLoader(BaseLoader):
             splits: A list of dataset splits, which are
                 divisions of your dataset such as 'train', 'test', or 'validation'.
                 Returns examples only from the specified splits.
-            inline_s3_urls: Whether to inline S3 URLs. Defaults to True.
-            offset: The offset to start from. Defaults to 0.
+            inline_s3_urls: Whether to inline S3 URLs.
+            offset: The offset to start from.
             limit: The maximum number of examples to return.
-            metadata: Metadata to filter by. Defaults to None.
+            metadata: Metadata to filter by.
             filter: A structured filter string to apply to the examples.
             client: LangSmith Client. If not provided will be initialized from below args.
             client_kwargs: Keyword args to pass to LangSmith client init. Should only be
-                specified if ``client`` isn't.
+                specified if `client` isn't.
+        Raises:
+            ValueError: If both `client` and `client_kwargs` are provided.
         """  # noqa: E501
         if client and client_kwargs:
             raise ValueError
@@ -126,7 +129,7 @@ class LangSmithLoader(BaseLoader):
             yield Document(content_str, metadata=metadata)
-def _stringify(x: Union[str, dict]) -> str:
+def _stringify(x: str | dict) -> str:
     if isinstance(x, str):
         return x
     try:

langchain_core/documents/__init__.py CHANGED Viewed

@@ -2,7 +2,6 @@
 **Document** module is a collection of classes that handle documents
 and their transformations.
 """
 from typing import TYPE_CHECKING

langchain_core/documents/base.py CHANGED Viewed

@@ -6,7 +6,7 @@ import contextlib
 import mimetypes
 from io import BufferedReader, BytesIO
 from pathlib import Path, PurePath
-from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
 from pydantic import ConfigDict, Field, model_validator
@@ -15,7 +15,7 @@ from langchain_core.load.serializable import Serializable
 if TYPE_CHECKING:
     from collections.abc import Generator
-PathLike = Union[str, PurePath]
+PathLike = str | PurePath
 class BaseMedia(Serializable):
@@ -33,13 +33,13 @@ class BaseMedia(Serializable):
     # The ID field is optional at the moment.
     # It will likely become required in a future major release after
     # it has been adopted by enough vectorstore implementations.
-    id: Optional[str] = Field(default=None, coerce_numbers_to_str=True)
+    id: str | None = Field(default=None, coerce_numbers_to_str=True)
     """An optional identifier for the document.
     Ideally this should be unique across the document collection and formatted
     as a UUID, but this will not be enforced.
-    .. versionadded:: 0.2.11
+    !!! version-added "Added in version 0.2.11"
     """
     metadata: dict = Field(default_factory=dict)
@@ -57,64 +57,63 @@ class Blob(BaseMedia):
     Example: Initialize a blob from in-memory data
-        .. code-block:: python
+    ```python
+    from langchain_core.documents import Blob
-            from langchain_core.documents import Blob
+    blob = Blob.from_data("Hello, world!")
-            blob = Blob.from_data("Hello, world!")
+    # Read the blob as a string
+    print(blob.as_string())
-            # Read the blob as a string
-            print(blob.as_string())
+    # Read the blob as bytes
+    print(blob.as_bytes())
-            # Read the blob as bytes
-            print(blob.as_bytes())
-            # Read the blob as a byte stream
-            with blob.as_bytes_io() as f:
-                print(f.read())
+    # Read the blob as a byte stream
+    with blob.as_bytes_io() as f:
+        print(f.read())
+    ```
     Example: Load from memory and specify mime-type and metadata
-        .. code-block:: python
-            from langchain_core.documents import Blob
+    ```python
+    from langchain_core.documents import Blob
-            blob = Blob.from_data(
-                data="Hello, world!",
-                mime_type="text/plain",
-                metadata={"source": "https://example.com"}
-            )
+    blob = Blob.from_data(
+        data="Hello, world!",
+        mime_type="text/plain",
+        metadata={"source": "https://example.com"},
+    )
+    ```
     Example: Load the blob from a file
-        .. code-block:: python
+    ```python
+    from langchain_core.documents import Blob
-            from langchain_core.documents import Blob
+    blob = Blob.from_path("path/to/file.txt")
-            blob = Blob.from_path("path/to/file.txt")
+    # Read the blob as a string
+    print(blob.as_string())
-            # Read the blob as a string
-            print(blob.as_string())
-            # Read the blob as bytes
-            print(blob.as_bytes())
-            # Read the blob as a byte stream
-            with blob.as_bytes_io() as f:
-                print(f.read())
+    # Read the blob as bytes
+    print(blob.as_bytes())
+    # Read the blob as a byte stream
+    with blob.as_bytes_io() as f:
+        print(f.read())
+    ```
     """
-    data: Union[bytes, str, None] = None
+    data: bytes | str | None = None
     """Raw data associated with the blob."""
-    mimetype: Optional[str] = None
+    mimetype: str | None = None
     """MimeType not to be confused with a file extension."""
     encoding: str = "utf-8"
     """Encoding to use if decoding the bytes into a string.
-    Use utf-8 as default encoding, if decoding to string.
+    Use `utf-8` as default encoding, if decoding to string.
     """
-    path: Optional[PathLike] = None
+    path: PathLike | None = None
     """Location where the original content was found."""
     model_config = ConfigDict(
@@ -123,16 +122,16 @@ class Blob(BaseMedia):
     )
     @property
-    def source(self) -> Optional[str]:
+    def source(self) -> str | None:
         """The source location of the blob as string if known otherwise none.
         If a path is associated with the blob, it will default to the path location.
-        Unless explicitly set via a metadata field called "source", in which
+        Unless explicitly set via a metadata field called `"source"`, in which
         case that value will be used instead.
         """
         if self.metadata and "source" in self.metadata:
-            return cast("Optional[str]", self.metadata["source"])
+            return cast("str | None", self.metadata["source"])
         return str(self.path) if self.path else None
     @model_validator(mode="before")
@@ -145,7 +144,14 @@ class Blob(BaseMedia):
         return values
     def as_string(self) -> str:
-        """Read data as a string."""
+        """Read data as a string.
+        Raises:
+            ValueError: If the blob cannot be represented as a string.
+        Returns:
+            The data as a string.
+        """
         if self.data is None and self.path:
             return Path(self.path).read_text(encoding=self.encoding)
         if isinstance(self.data, bytes):
@@ -156,7 +162,14 @@ class Blob(BaseMedia):
         raise ValueError(msg)
     def as_bytes(self) -> bytes:
-        """Read data as bytes."""
+        """Read data as bytes.
+        Raises:
+            ValueError: If the blob cannot be represented as bytes.
+        Returns:
+            The data as bytes.
+        """
         if isinstance(self.data, bytes):
             return self.data
         if isinstance(self.data, str):
@@ -167,8 +180,15 @@ class Blob(BaseMedia):
         raise ValueError(msg)
     @contextlib.contextmanager
-    def as_bytes_io(self) -> Generator[Union[BytesIO, BufferedReader], None, None]:
-        """Read data as a byte stream."""
+    def as_bytes_io(self) -> Generator[BytesIO | BufferedReader, None, None]:
+        """Read data as a byte stream.
+        Raises:
+            NotImplementedError: If the blob cannot be represented as a byte stream.
+        Yields:
+            The data as a byte stream.
+        """
         if isinstance(self.data, bytes):
             yield BytesIO(self.data)
         elif self.data is None and self.path:
@@ -184,18 +204,18 @@ class Blob(BaseMedia):
         path: PathLike,
         *,
         encoding: str = "utf-8",
-        mime_type: Optional[str] = None,
+        mime_type: str | None = None,
         guess_type: bool = True,
-        metadata: Optional[dict] = None,
+        metadata: dict | None = None,
     ) -> Blob:
         """Load the blob from a path like object.
         Args:
-            path: path like object to file to be read
+            path: Path-like object to file to be read
             encoding: Encoding to use if decoding the bytes into a string
-            mime_type: if provided, will be set as the mime-type of the data
-            guess_type: If True, the mimetype will be guessed from the file extension,
-                        if a mime-type was not provided
+            mime_type: If provided, will be set as the mime-type of the data
+            guess_type: If `True`, the mimetype will be guessed from the file extension,
+                if a mime-type was not provided
             metadata: Metadata to associate with the blob
         Returns:
@@ -218,20 +238,20 @@ class Blob(BaseMedia):
     @classmethod
     def from_data(
         cls,
-        data: Union[str, bytes],
+        data: str | bytes,
         *,
         encoding: str = "utf-8",
-        mime_type: Optional[str] = None,
-        path: Optional[str] = None,
-        metadata: Optional[dict] = None,
+        mime_type: str | None = None,
+        path: str | None = None,
+        metadata: dict | None = None,
     ) -> Blob:
         """Initialize the blob from in-memory data.
         Args:
-            data: the in-memory data associated with the blob
+            data: The in-memory data associated with the blob
             encoding: Encoding to use if decoding the bytes into a string
-            mime_type: if provided, will be set as the mime-type of the data
-            path: if provided, will be set as the source from which the data came
+            mime_type: If provided, will be set as the mime-type of the data
+            path: If provided, will be set as the source from which the data came
             metadata: Metadata to associate with the blob
         Returns:
@@ -246,7 +266,7 @@ class Blob(BaseMedia):
         )
     def __repr__(self) -> str:
-        """Define the blob representation."""
+        """Return the blob representation."""
         str_repr = f"Blob {id(self)}"
         if self.source:
             str_repr += f" {self.source}"
@@ -257,16 +277,13 @@ class Document(BaseMedia):
     """Class for storing a piece of text and associated metadata.
     Example:
+        ```python
+        from langchain_core.documents import Document
-        .. code-block:: python
-            from langchain_core.documents import Document
-            document = Document(
-                page_content="Hello, world!",
-                metadata={"source": "https://example.com"}
-            )
+        document = Document(
+            page_content="Hello, world!", metadata={"source": "https://example.com"}
+        )
+        ```
     """
     page_content: str
@@ -277,23 +294,28 @@ class Document(BaseMedia):
         """Pass page_content in as positional or named arg."""
         # my-py is complaining that page_content is not defined on the base class.
         # Here, we're relying on pydantic base class to handle the validation.
-        super().__init__(page_content=page_content, **kwargs)  # type: ignore[call-arg]
+        super().__init__(page_content=page_content, **kwargs)
     @classmethod
     def is_lc_serializable(cls) -> bool:
-        """Return whether this class is serializable."""
+        """Return True as this class is serializable."""
         return True
     @classmethod
     def get_lc_namespace(cls) -> list[str]:
-        """Get the namespace of the langchain object.
+        """Get the namespace of the LangChain object.
-        Default namespace is ["langchain", "schema", "document"].
+        Returns:
+            ["langchain", "schema", "document"]
         """
         return ["langchain", "schema", "document"]
     def __str__(self) -> str:
-        """Override __str__ to restrict it to page_content and metadata."""
+        """Override __str__ to restrict it to page_content and metadata.
+        Returns:
+            A string representation of the Document.
+        """
         # The format matches pydantic format for __str__.
         #
         # The purpose of this change is to make sure that user code that

langchain_core/documents/compressor.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 from pydantic import BaseModel
@@ -19,17 +19,18 @@ if TYPE_CHECKING:
 class BaseDocumentCompressor(BaseModel, ABC):
     """Base class for document compressors.
-    This abstraction is primarily used for
-    post-processing of retrieved documents.
+    This abstraction is primarily used for post-processing of retrieved documents.
     Documents matching a given query are first retrieved.
     Then the list of documents can be further processed.
-    For example, one could re-rank the retrieved documents
-    using an LLM.
+    For example, one could re-rank the retrieved documents using an LLM.
+    !!! note
+        Users should favor using a RunnableLambda instead of sub-classing from this
+        interface.
-    **Note** users should favor using a RunnableLambda
-    instead of sub-classing from this interface.
     """
     @abstractmethod
@@ -37,7 +38,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
         self,
         documents: Sequence[Document],
         query: str,
-        callbacks: Optional[Callbacks] = None,
+        callbacks: Callbacks | None = None,
     ) -> Sequence[Document]:
         """Compress retrieved documents given the query context.
@@ -48,13 +49,14 @@ class BaseDocumentCompressor(BaseModel, ABC):
         Returns:
             The compressed documents.
         """
     async def acompress_documents(
         self,
         documents: Sequence[Document],
         query: str,
-        callbacks: Optional[Callbacks] = None,
+        callbacks: Callbacks | None = None,
     ) -> Sequence[Document]:
         """Async compress retrieved documents given the query context.
@@ -65,6 +67,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
         Returns:
             The compressed documents.
         """
         return await run_in_executor(
             None, self.compress_documents, documents, query, callbacks

langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

langchain-core 0.4.0.dev0py3-none-any.whl → 1.0.0py3-none-any.whl