PyPI - langchain-core - Versions diffs - 0.3.79__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

langchain-core 0.3.79py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langchain-core might be problematic. Click here for more details.

Files changed (165) hide show

langchain_core/__init__.py +1 -1
langchain_core/_api/__init__.py +3 -4
langchain_core/_api/beta_decorator.py +23 -26
langchain_core/_api/deprecation.py +52 -65
langchain_core/_api/path.py +3 -6
langchain_core/_import_utils.py +3 -4
langchain_core/agents.py +19 -19
langchain_core/caches.py +53 -63
langchain_core/callbacks/__init__.py +1 -8
langchain_core/callbacks/base.py +323 -334
langchain_core/callbacks/file.py +44 -44
langchain_core/callbacks/manager.py +441 -507
langchain_core/callbacks/stdout.py +29 -30
langchain_core/callbacks/streaming_stdout.py +32 -32
langchain_core/callbacks/usage.py +60 -57
langchain_core/chat_history.py +48 -63
langchain_core/document_loaders/base.py +23 -23
langchain_core/document_loaders/langsmith.py +37 -37
langchain_core/documents/__init__.py +0 -1
langchain_core/documents/base.py +62 -65
langchain_core/documents/compressor.py +4 -4
langchain_core/documents/transformers.py +28 -29
langchain_core/embeddings/fake.py +50 -54
langchain_core/example_selectors/length_based.py +1 -1
langchain_core/example_selectors/semantic_similarity.py +21 -25
langchain_core/exceptions.py +10 -11
langchain_core/globals.py +3 -151
langchain_core/indexing/api.py +61 -66
langchain_core/indexing/base.py +58 -58
langchain_core/indexing/in_memory.py +3 -3
langchain_core/language_models/__init__.py +14 -27
langchain_core/language_models/_utils.py +270 -84
langchain_core/language_models/base.py +55 -162
langchain_core/language_models/chat_models.py +442 -402
langchain_core/language_models/fake.py +11 -11
langchain_core/language_models/fake_chat_models.py +61 -39
langchain_core/language_models/llms.py +123 -231
langchain_core/load/dump.py +4 -5
langchain_core/load/load.py +18 -28
langchain_core/load/mapping.py +2 -4
langchain_core/load/serializable.py +39 -40
langchain_core/messages/__init__.py +61 -22
langchain_core/messages/ai.py +368 -163
langchain_core/messages/base.py +214 -43
langchain_core/messages/block_translators/__init__.py +111 -0
langchain_core/messages/block_translators/anthropic.py +470 -0
langchain_core/messages/block_translators/bedrock.py +94 -0
langchain_core/messages/block_translators/bedrock_converse.py +297 -0
langchain_core/messages/block_translators/google_genai.py +530 -0
langchain_core/messages/block_translators/google_vertexai.py +21 -0
langchain_core/messages/block_translators/groq.py +143 -0
langchain_core/messages/block_translators/langchain_v0.py +301 -0
langchain_core/messages/block_translators/openai.py +1010 -0
langchain_core/messages/chat.py +2 -6
langchain_core/messages/content.py +1423 -0
langchain_core/messages/function.py +6 -10
langchain_core/messages/human.py +41 -38
langchain_core/messages/modifier.py +2 -2
langchain_core/messages/system.py +38 -28
langchain_core/messages/tool.py +96 -103
langchain_core/messages/utils.py +478 -504
langchain_core/output_parsers/__init__.py +1 -14
langchain_core/output_parsers/base.py +58 -61
langchain_core/output_parsers/json.py +7 -8
langchain_core/output_parsers/list.py +5 -7
langchain_core/output_parsers/openai_functions.py +49 -47
langchain_core/output_parsers/openai_tools.py +14 -19
langchain_core/output_parsers/pydantic.py +12 -13
langchain_core/output_parsers/string.py +2 -2
langchain_core/output_parsers/transform.py +15 -17
langchain_core/output_parsers/xml.py +8 -10
langchain_core/outputs/__init__.py +1 -1
langchain_core/outputs/chat_generation.py +18 -18
langchain_core/outputs/chat_result.py +1 -3
langchain_core/outputs/generation.py +8 -8
langchain_core/outputs/llm_result.py +10 -10
langchain_core/prompt_values.py +12 -12
langchain_core/prompts/__init__.py +3 -27
langchain_core/prompts/base.py +45 -55
langchain_core/prompts/chat.py +254 -313
langchain_core/prompts/dict.py +5 -5
langchain_core/prompts/few_shot.py +81 -88
langchain_core/prompts/few_shot_with_templates.py +11 -13
langchain_core/prompts/image.py +12 -14
langchain_core/prompts/loading.py +6 -8
langchain_core/prompts/message.py +3 -3
langchain_core/prompts/prompt.py +24 -39
langchain_core/prompts/string.py +4 -4
langchain_core/prompts/structured.py +42 -50
langchain_core/rate_limiters.py +51 -60
langchain_core/retrievers.py +49 -190
langchain_core/runnables/base.py +1484 -1709
langchain_core/runnables/branch.py +45 -61
langchain_core/runnables/config.py +80 -88
langchain_core/runnables/configurable.py +117 -134
langchain_core/runnables/fallbacks.py +83 -79
langchain_core/runnables/graph.py +85 -95
langchain_core/runnables/graph_ascii.py +27 -28
langchain_core/runnables/graph_mermaid.py +38 -50
langchain_core/runnables/graph_png.py +15 -16
langchain_core/runnables/history.py +135 -148
langchain_core/runnables/passthrough.py +124 -150
langchain_core/runnables/retry.py +46 -51
langchain_core/runnables/router.py +25 -30
langchain_core/runnables/schema.py +79 -74
langchain_core/runnables/utils.py +62 -68
langchain_core/stores.py +81 -115
langchain_core/structured_query.py +8 -8
langchain_core/sys_info.py +27 -29
langchain_core/tools/__init__.py +1 -14
langchain_core/tools/base.py +179 -187
langchain_core/tools/convert.py +131 -139
langchain_core/tools/render.py +10 -10
langchain_core/tools/retriever.py +11 -11
langchain_core/tools/simple.py +19 -24
langchain_core/tools/structured.py +30 -39
langchain_core/tracers/__init__.py +1 -9
langchain_core/tracers/base.py +97 -99
langchain_core/tracers/context.py +29 -52
langchain_core/tracers/core.py +50 -60
langchain_core/tracers/evaluation.py +11 -11
langchain_core/tracers/event_stream.py +115 -70
langchain_core/tracers/langchain.py +21 -21
langchain_core/tracers/log_stream.py +43 -43
langchain_core/tracers/memory_stream.py +3 -3
langchain_core/tracers/root_listeners.py +16 -16
langchain_core/tracers/run_collector.py +2 -4
langchain_core/tracers/schemas.py +0 -129
langchain_core/tracers/stdout.py +3 -3
langchain_core/utils/__init__.py +1 -4
langchain_core/utils/_merge.py +46 -8
langchain_core/utils/aiter.py +57 -61
langchain_core/utils/env.py +9 -9
langchain_core/utils/function_calling.py +89 -191
langchain_core/utils/html.py +7 -8
langchain_core/utils/input.py +6 -6
langchain_core/utils/interactive_env.py +1 -1
langchain_core/utils/iter.py +37 -42
langchain_core/utils/json.py +4 -3
langchain_core/utils/json_schema.py +8 -8
langchain_core/utils/mustache.py +9 -11
langchain_core/utils/pydantic.py +33 -35
langchain_core/utils/strings.py +5 -5
langchain_core/utils/usage.py +1 -1
langchain_core/utils/utils.py +80 -54
langchain_core/vectorstores/base.py +129 -164
langchain_core/vectorstores/in_memory.py +99 -174
langchain_core/vectorstores/utils.py +5 -5
langchain_core/version.py +1 -1
{langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/METADATA +28 -27
langchain_core-1.0.0.dist-info/RECORD +172 -0
{langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
langchain_core/beta/__init__.py +0 -1
langchain_core/beta/runnables/__init__.py +0 -1
langchain_core/beta/runnables/context.py +0 -447
langchain_core/memory.py +0 -120
langchain_core/messages/content_blocks.py +0 -176
langchain_core/prompts/pipeline.py +0 -138
langchain_core/pydantic_v1/__init__.py +0 -30
langchain_core/pydantic_v1/dataclasses.py +0 -23
langchain_core/pydantic_v1/main.py +0 -23
langchain_core/tracers/langchain_v1.py +0 -31
langchain_core/utils/loading.py +0 -35
langchain_core-0.3.79.dist-info/RECORD +0 -174
langchain_core-0.3.79.dist-info/entry_points.txt +0 -4

langchain_core/language_models/__init__.py CHANGED Viewed

@@ -1,50 +1,35 @@
 """Language models.
-**Language Model** is a type of model that can generate text or complete
-text prompts.
+LangChain has two main classes to work with language models: chat models and
+"old-fashioned" LLMs.
-LangChain has two main classes to work with language models: **Chat Models**
-and "old-fashioned" **LLMs**.
-**Chat Models**
+**Chat models**
 Language models that use a sequence of messages as inputs and return chat messages
-as outputs (as opposed to using plain text). These are traditionally newer models (
-older models are generally LLMs, see below). Chat models support the assignment of
+as outputs (as opposed to using plain text). Chat models support the assignment of
 distinct roles to conversation messages, helping to distinguish messages from the AI,
 users, and instructions such as system messages.
 The key abstraction for chat models is `BaseChatModel`. Implementations
-should inherit from this class. Please see LangChain how-to guides with more
-information on how to implement a custom chat model.
-To implement a custom Chat Model, inherit from `BaseChatModel`. See
-the following guide for more information on how to implement a custom Chat Model:
+should inherit from this class.
-https://python.langchain.com/docs/how_to/custom_chat_model/
+See existing [chat model integrations](https://docs.langchain.com/oss/python/integrations/chat).
 **LLMs**
 Language models that takes a string as input and returns a string.
-These are traditionally older models (newer models generally are Chat Models,
-see below).
-Although the underlying models are string in, string out, the LangChain wrappers
-also allow these models to take messages as input. This gives them the same interface
-as Chat Models. When messages are passed in as input, they will be formatted into a
-string under the hood before being passed to the underlying model.
-To implement a custom LLM, inherit from `BaseLLM` or `LLM`.
-Please see the following guide for more information on how to implement a custom LLM:
-https://python.langchain.com/docs/how_to/custom_llm/
+These are traditionally older models (newer models generally are chat models).
+Although the underlying models are string in, string out, the LangChain wrappers also
+allow these models to take messages as input. This gives them the same interface as
+chat models. When messages are passed in as input, they will be formatted into a string
+under the hood before being passed to the underlying model.
 """
 from typing import TYPE_CHECKING
 from langchain_core._import_utils import import_attr
+from langchain_core.language_models._utils import is_openai_data_block
 if TYPE_CHECKING:
     from langchain_core.language_models.base import (
@@ -85,6 +70,7 @@ __all__ = (
     "ParrotFakeChatModel",
     "SimpleChatModel",
     "get_tokenizer",
+    "is_openai_data_block",
 )
 _dynamic_imports = {
@@ -104,6 +90,7 @@ _dynamic_imports = {
     "ParrotFakeChatModel": "fake_chat_models",
     "LLM": "llms",
     "BaseLLM": "llms",
+    "is_openai_data_block": "_utils",
 }

langchain_core/language_models/_utils.py CHANGED Viewed

@@ -1,13 +1,47 @@
 import re
 from collections.abc import Sequence
-from typing import Optional
+from typing import (
+    TYPE_CHECKING,
+    Literal,
+    TypedDict,
+    TypeVar,
+)
-from langchain_core.messages import BaseMessage
+if TYPE_CHECKING:
+    from langchain_core.messages import BaseMessage
+from langchain_core.messages.content import (
+    ContentBlock,
+)
-def _is_openai_data_block(block: dict) -> bool:
-    """Check if the block contains multimodal data in OpenAI Chat Completions format."""
+def is_openai_data_block(
+    block: dict, filter_: Literal["image", "audio", "file"] | None = None
+) -> bool:
+    """Check whether a block contains multimodal data in OpenAI Chat Completions format.
+    Supports both data and ID-style blocks (e.g. `'file_data'` and `'file_id'`)
+    If additional keys are present, they are ignored / will not affect outcome as long
+    as the required keys are present and valid.
+    Args:
+        block: The content block to check.
+        filter_: If provided, only return True for blocks matching this specific type.
+            - "image": Only match image_url blocks
+            - "audio": Only match input_audio blocks
+            - "file": Only match file blocks
+            If `None`, match any valid OpenAI data block type. Note that this means that
+            if the block has a valid OpenAI data type but the filter_ is set to a
+            different type, this function will return False.
+    Returns:
+        `True` if the block is a valid OpenAI data block and matches the filter_
+        (if provided).
+    """
     if block.get("type") == "image_url":
+        if filter_ is not None and filter_ != "image":
+            return False
         if (
             (set(block.keys()) <= {"type", "image_url", "detail"})
             and (image_url := block.get("image_url"))
@@ -15,126 +49,278 @@ def _is_openai_data_block(block: dict) -> bool:
         ):
             url = image_url.get("url")
             if isinstance(url, str):
+                # Required per OpenAI spec
+                return True
+            # Ignore `'detail'` since it's optional and specific to OpenAI
+    elif block.get("type") == "input_audio":
+        if filter_ is not None and filter_ != "audio":
+            return False
+        if (audio := block.get("input_audio")) and isinstance(audio, dict):
+            audio_data = audio.get("data")
+            audio_format = audio.get("format")
+            # Both required per OpenAI spec
+            if isinstance(audio_data, str) and isinstance(audio_format, str):
                 return True
     elif block.get("type") == "file":
+        if filter_ is not None and filter_ != "file":
+            return False
         if (file := block.get("file")) and isinstance(file, dict):
             file_data = file.get("file_data")
-            if isinstance(file_data, str):
-                return True
-    elif block.get("type") == "input_audio":
-        if (input_audio := block.get("input_audio")) and isinstance(input_audio, dict):
-            audio_data = input_audio.get("data")
-            audio_format = input_audio.get("format")
-            if isinstance(audio_data, str) and isinstance(audio_format, str):
+            file_id = file.get("file_id")
+            # Files can be either base64-encoded or pre-uploaded with an ID
+            if isinstance(file_data, str) or isinstance(file_id, str):
                 return True
     else:
         return False
+    # Has no `'type'` key
     return False
-def _parse_data_uri(uri: str) -> Optional[dict]:
-    """Parse a data URI into its components. If parsing fails, return None.
+class ParsedDataUri(TypedDict):
+    source_type: Literal["base64"]
+    data: str
+    mime_type: str
-    Example:
-        .. code-block:: python
+def _parse_data_uri(uri: str) -> ParsedDataUri | None:
+    """Parse a data URI into its components.
-            data_uri = "data:image/jpeg;base64,/9j/4AAQSkZJRg..."
-            parsed = _parse_data_uri(data_uri)
+    If parsing fails, return `None`. If either MIME type or data is missing, return
+    `None`.
-            assert parsed == {
-                "source_type": "base64",
-                "mime_type": "image/jpeg",
-                "data": "/9j/4AAQSkZJRg...",
-            }
+    Example:
+        ```python
+        data_uri = "data:image/jpeg;base64,/9j/4AAQSkZJRg..."
+        parsed = _parse_data_uri(data_uri)
+        assert parsed == {
+            "source_type": "base64",
+            "mime_type": "image/jpeg",
+            "data": "/9j/4AAQSkZJRg...",
+        }
+        ```
     """
     regex = r"^data:(?P<mime_type>[^;]+);base64,(?P<data>.+)$"
     match = re.match(regex, uri)
     if match is None:
         return None
+    mime_type = match.group("mime_type")
+    data = match.group("data")
+    if not mime_type or not data:
+        return None
     return {
         "source_type": "base64",
-        "data": match.group("data"),
-        "mime_type": match.group("mime_type"),
+        "data": data,
+        "mime_type": mime_type,
     }
-def _convert_openai_format_to_data_block(block: dict) -> dict:
-    """Convert OpenAI image content block to standard data content block.
+def _normalize_messages(
+    messages: Sequence["BaseMessage"],
+) -> list["BaseMessage"]:
+    """Normalize message formats to LangChain v1 standard content blocks.
-    If parsing fails, pass-through.
+    Chat models already implement support for:
+    - Images in OpenAI Chat Completions format
+        These will be passed through unchanged
+    - LangChain v1 standard content blocks
-    Args:
-        block: The OpenAI image content block to convert.
+    This function extends support to:
+    - `[Audio](https://platform.openai.com/docs/api-reference/chat/create) and
+        `[file](https://platform.openai.com/docs/api-reference/files) data in OpenAI
+        Chat Completions format
+        - Images are technically supported but we expect chat models to handle them
+            directly; this may change in the future
+    - LangChain v0 standard content blocks for backward compatibility
+    !!! warning "Behavior changed in 1.0.0"
+        In previous versions, this function returned messages in LangChain v0 format.
+        Now, it returns messages in LangChain v1 format, which upgraded chat models now
+        expect to receive when passing back in message history. For backward
+        compatibility, this function will convert v0 message content to v1 format.
+    ??? note "v0 Content Block Schemas"
+        `URLContentBlock`:
+        ```python
+        {
+            mime_type: NotRequired[str]
+            type: Literal['image', 'audio', 'file'],
+            source_type: Literal['url'],
+            url: str,
+        }
+        ```
+        `Base64ContentBlock`:
+        ```python
+        {
+            mime_type: NotRequired[str]
+            type: Literal['image', 'audio', 'file'],
+            source_type: Literal['base64'],
+            data: str,
+        }
+        ```
+        `IDContentBlock`:
+        (In practice, this was never used)
+        ```python
+        {
+            type: Literal["image", "audio", "file"],
+            source_type: Literal["id"],
+            id: str,
+        }
+        ```
+        `PlainTextContentBlock`:
+        ```python
+        {
+            mime_type: NotRequired[str]
+            type: Literal['file'],
+            source_type: Literal['text'],
+            url: str,
+        }
+        ```
+    If a v1 message is passed in, it will be returned as-is, meaning it is safe to
+    always pass in v1 messages to this function for assurance.
+    For posterity, here are the OpenAI Chat Completions schemas we expect:
+    Chat Completions image. Can be URL-based or base64-encoded. Supports MIME types
+    png, jpeg/jpg, webp, static gif:
+    {
+        "type": Literal['image_url'],
+        "image_url": {
+            "url": Union["data:$MIME_TYPE;base64,$BASE64_ENCODED_IMAGE", "$IMAGE_URL"],
+            "detail": Literal['low', 'high', 'auto'] = 'auto',  # Supported by OpenAI
+        }
+    }
+    Chat Completions audio:
+    {
+        "type": Literal['input_audio'],
+        "input_audio": {
+            "format": Literal['wav', 'mp3'],
+            "data": str = "$BASE64_ENCODED_AUDIO",
+        },
+    }
+    Chat Completions files: either base64 or pre-uploaded file ID
+    {
+        "type": Literal['file'],
+        "file": Union[
+            {
+                "filename": str | None = "$FILENAME",
+                "file_data": str = "$BASE64_ENCODED_FILE",
+            },
+            {
+                "file_id": str = "$FILE_ID",  # For pre-uploaded files to OpenAI
+            },
+        ],
+    }
-    Returns:
-        The converted standard data content block.
-    """
-    if block["type"] == "image_url":
-        parsed = _parse_data_uri(block["image_url"]["url"])
-        if parsed is not None:
-            parsed["type"] = "image"
-            return parsed
-        return block
-    if block["type"] == "file":
-        parsed = _parse_data_uri(block["file"]["file_data"])
-        if parsed is not None:
-            parsed["type"] = "file"
-            if filename := block["file"].get("filename"):
-                parsed["filename"] = filename
-            return parsed
-        return block
-    if block["type"] == "input_audio":
-        data = block["input_audio"].get("data")
-        audio_format = block["input_audio"].get("format")
-        if data and audio_format:
-            return {
-                "type": "audio",
-                "source_type": "base64",
-                "data": data,
-                "mime_type": f"audio/{audio_format}",
-            }
-        return block
-    return block
-def _normalize_messages(messages: Sequence[BaseMessage]) -> list[BaseMessage]:
-    """Extend support for message formats.
-    Chat models implement support for images in OpenAI Chat Completions format, as well
-    as other multimodal data as standard data blocks. This function extends support to
-    audio and file data in OpenAI Chat Completions format by converting them to standard
-    data blocks.
     """
+    from langchain_core.messages.block_translators.langchain_v0 import (  # noqa: PLC0415
+        _convert_legacy_v0_content_block_to_v1,
+    )
+    from langchain_core.messages.block_translators.openai import (  # noqa: PLC0415
+        _convert_openai_format_to_data_block,
+    )
     formatted_messages = []
     for message in messages:
+        # We preserve input messages - the caller may reuse them elsewhere and expects
+        # them to remain unchanged. We only create a copy if we need to translate.
         formatted_message = message
         if isinstance(message.content, list):
             for idx, block in enumerate(message.content):
+                # OpenAI Chat Completions multimodal data blocks to v1 standard
                 if (
                     isinstance(block, dict)
-                    # Subset to (PDF) files and audio, as most relevant chat models
-                    # support images in OAI format (and some may not yet support the
-                    # standard data block format)
-                    and block.get("type") in {"file", "input_audio"}
-                    and _is_openai_data_block(block)
+                    and block.get("type") in {"input_audio", "file"}
+                    # Discriminate between OpenAI/LC format since they share `'type'`
+                    and is_openai_data_block(block)
                 ):
-                    if formatted_message is message:
-                        formatted_message = message.model_copy()
-                        # Also shallow-copy content
-                        formatted_message.content = list(formatted_message.content)
-                    formatted_message.content[idx] = (  # type: ignore[index]  # mypy confused by .model_copy
-                        _convert_openai_format_to_data_block(block)
-                    )
+                    formatted_message = _ensure_message_copy(message, formatted_message)
+                    converted_block = _convert_openai_format_to_data_block(block)
+                    _update_content_block(formatted_message, idx, converted_block)
+                # Convert multimodal LangChain v0 to v1 standard content blocks
+                elif (
+                    isinstance(block, dict)
+                    and block.get("type")
+                    in {
+                        "image",
+                        "audio",
+                        "file",
+                    }
+                    and block.get("source_type")  # v1 doesn't have `source_type`
+                    in {
+                        "url",
+                        "base64",
+                        "id",
+                        "text",
+                    }
+                ):
+                    formatted_message = _ensure_message_copy(message, formatted_message)
+                    converted_block = _convert_legacy_v0_content_block_to_v1(block)
+                    _update_content_block(formatted_message, idx, converted_block)
+                    continue
+                # else, pass through blocks that look like they have v1 format unchanged
         formatted_messages.append(formatted_message)
     return formatted_messages
+T = TypeVar("T", bound="BaseMessage")
+def _ensure_message_copy(message: T, formatted_message: T) -> T:
+    """Create a copy of the message if it hasn't been copied yet."""
+    if formatted_message is message:
+        formatted_message = message.model_copy()
+        # Shallow-copy content list to allow modifications
+        formatted_message.content = list(formatted_message.content)
+    return formatted_message
+def _update_content_block(
+    formatted_message: "BaseMessage", idx: int, new_block: ContentBlock | dict
+) -> None:
+    """Update a content block at the given index, handling type issues."""
+    # Type ignore needed because:
+    # - `BaseMessage.content` is typed as `Union[str, list[Union[str, dict]]]`
+    # - When content is str, indexing fails (index error)
+    # - When content is list, the items are `Union[str, dict]` but we're assigning
+    #   `Union[ContentBlock, dict]` where ContentBlock is richer than dict
+    # - This is safe because we only call this when we've verified content is a list and
+    #   we're doing content block conversions
+    formatted_message.content[idx] = new_block  # type: ignore[index, assignment]
+def _update_message_content_to_blocks(message: T, output_version: str) -> T:
+    return message.model_copy(
+        update={
+            "content": message.content_blocks,
+            "response_metadata": {
+                **message.response_metadata,
+                "output_version": output_version,
+            },
+        }
+    )

langchain-core 0.3.79__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

langchain-core 0.3.79py3-none-any.whl → 1.0.0py3-none-any.whl