PyPI - langchain-core - Versions diffs - 0.4.0.dev0__py3-none-any.whl → 1.0.0a2__py3-none-any.whl - Mend

langchain-core 0.4.0.dev0py3-none-any.whl → 1.0.0a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

langchain_core/_api/beta_decorator.py +2 -2
langchain_core/_api/deprecation.py +1 -1
langchain_core/beta/runnables/context.py +1 -1
langchain_core/callbacks/base.py +14 -23
langchain_core/callbacks/file.py +13 -2
langchain_core/callbacks/manager.py +74 -157
langchain_core/callbacks/streaming_stdout.py +3 -4
langchain_core/callbacks/usage.py +2 -12
langchain_core/chat_history.py +6 -6
langchain_core/documents/base.py +1 -1
langchain_core/documents/compressor.py +9 -6
langchain_core/indexing/base.py +2 -2
langchain_core/language_models/_utils.py +232 -101
langchain_core/language_models/base.py +35 -23
langchain_core/language_models/chat_models.py +248 -54
langchain_core/language_models/fake_chat_models.py +28 -81
langchain_core/load/dump.py +3 -4
langchain_core/messages/__init__.py +30 -24
langchain_core/messages/ai.py +188 -30
langchain_core/messages/base.py +164 -25
langchain_core/messages/block_translators/__init__.py +89 -0
langchain_core/messages/block_translators/anthropic.py +451 -0
langchain_core/messages/block_translators/bedrock.py +45 -0
langchain_core/messages/block_translators/bedrock_converse.py +47 -0
langchain_core/messages/block_translators/google_genai.py +45 -0
langchain_core/messages/block_translators/google_vertexai.py +47 -0
langchain_core/messages/block_translators/groq.py +45 -0
langchain_core/messages/block_translators/langchain_v0.py +164 -0
langchain_core/messages/block_translators/ollama.py +45 -0
langchain_core/messages/block_translators/openai.py +798 -0
langchain_core/messages/{content_blocks.py → content.py} +303 -278
langchain_core/messages/human.py +29 -9
langchain_core/messages/system.py +29 -9
langchain_core/messages/tool.py +94 -13
langchain_core/messages/utils.py +34 -234
langchain_core/output_parsers/base.py +14 -50
langchain_core/output_parsers/json.py +2 -5
langchain_core/output_parsers/list.py +2 -7
langchain_core/output_parsers/openai_functions.py +5 -28
langchain_core/output_parsers/openai_tools.py +49 -90
langchain_core/output_parsers/pydantic.py +2 -3
langchain_core/output_parsers/transform.py +12 -53
langchain_core/output_parsers/xml.py +9 -17
langchain_core/prompt_values.py +8 -112
langchain_core/prompts/chat.py +1 -3
langchain_core/runnables/base.py +500 -451
langchain_core/runnables/branch.py +1 -1
langchain_core/runnables/fallbacks.py +4 -4
langchain_core/runnables/history.py +1 -1
langchain_core/runnables/passthrough.py +3 -3
langchain_core/runnables/retry.py +1 -1
langchain_core/runnables/router.py +1 -1
langchain_core/structured_query.py +3 -7
langchain_core/tools/base.py +14 -41
langchain_core/tools/convert.py +2 -22
langchain_core/tools/retriever.py +1 -8
langchain_core/tools/structured.py +2 -10
langchain_core/tracers/_streaming.py +6 -7
langchain_core/tracers/base.py +7 -14
langchain_core/tracers/core.py +4 -27
langchain_core/tracers/event_stream.py +4 -15
langchain_core/tracers/langchain.py +3 -14
langchain_core/tracers/log_stream.py +2 -3
langchain_core/utils/_merge.py +45 -7
langchain_core/utils/function_calling.py +22 -9
langchain_core/utils/utils.py +29 -0
langchain_core/version.py +1 -1
{langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/METADATA +7 -9
{langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/RECORD +71 -64
langchain_core/v1/__init__.py +0 -1
langchain_core/v1/chat_models.py +0 -1047
langchain_core/v1/messages.py +0 -755
{langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/WHEEL +0 -0
{langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/entry_points.txt +0 -0

langchain_core/messages/{content_blocks.py → content.py} RENAMED Viewed

@@ -4,132 +4,143 @@
     This module is under active development. The API is unstable and subject to
     change in future releases.
-This module provides a standardized data structure for representing inputs to and
-outputs from Large Language Models. The core abstraction is the **Content Block**, a
-``TypedDict`` that can represent a piece of text, an image, a tool call, or other
-structured data.
+This module provides standardized data structures for representing inputs to and
+outputs from LLMs. The core abstraction is the **Content Block**, a ``TypedDict``.
+**Rationale**
+Different LLM providers use distinct and incompatible API schemas. This module
+provides a unified, provider-agnostic format to facilitate these interactions. A
+message to or from a model is simply a list of content blocks, allowing for the natural
+interleaving of text, images, and other content in a single ordered sequence.
+An adapter for a specific provider is responsible for translating this standard list of
+blocks into the format required by its API.
+**Extensibility**
 Data **not yet mapped** to a standard block may be represented using the
 ``NonStandardContentBlock``, which allows for provider-specific data to be included
 without losing the benefits of type checking and validation.
 Furthermore, provider-specific fields **within** a standard block are fully supported
-by default. However, since current type checkers do not recognize this, we are temporarily
-applying type ignore comments to suppress warnings. In the future,
-`PEP 728 <https://peps.python.org/pep-0728/>`__ will add an extra param, ``extra_items=Any``.
-When this is supported, we will apply it to block signatures to signify to type checkers
-that additional provider-specific fields are allowed.
-**Example with PEP 728 provider-specific fields:**
+by default in the ``extras`` field of each block. This allows for additional metadata
+to be included without breaking the standard structure.
-.. code-block:: python
+.. warning::
+    Do not heavily rely on the ``extras`` field for provider-specific data! This field
+    is subject to deprecation in future releases as we move towards PEP 728.
-    # Note `extra_items=Any`
-    class TextContentBlock(TypedDict, extra_items=Any):
-        type: Literal["text"]
-        id: NotRequired[str]
-        text: str
-        annotations: NotRequired[list[Annotation]]
-        index: NotRequired[int]
+.. note::
+    Following widespread adoption of `PEP 728 <https://peps.python.org/pep-0728/>`__, we
+    will add ``extra_items=Any`` as a param to Content Blocks. This will signify to type
+    checkers that additional provider-specific fields are allowed outside of the
+    ``extras`` field, and that will become the new standard approach to adding
+    provider-specific metadata.
-.. code-block:: python
+    .. dropdown::
-    from langchain_core.messages.content_blocks import TextContentBlock
+        **Example with PEP 728 provider-specific fields:**
-    my_block: TextContentBlock = {
-        # Add required fields
-        "type": "text",
-        "text": "Hello, world!",
-        # Additional fields not specified in the TypedDict
-        # These are valid with PEP 728 and are typed as Any
-        "openai_metadata": {"model": "gpt-4", "temperature": 0.7},
-        "anthropic_usage": {"input_tokens": 10, "output_tokens": 20},
-        "custom_field": "any value",
-    }
+        .. code-block:: python
-    openai_data = my_block["openai_metadata"]  # Type: Any
+            # Content block definition
+            # NOTE: `extra_items=Any`
+            class TextContentBlock(TypedDict, extra_items=Any):
+                type: Literal["text"]
+                id: NotRequired[str]
+                text: str
+                annotations: NotRequired[list[Annotation]]
+                index: NotRequired[int]
-.. note::
-    PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress warnings
-    from type checkers that don't yet support it. The functionality works correctly
-    in Python 3.13+ and will be fully supported as the ecosystem catches up.
+        .. code-block:: python
-**Rationale**
+            from langchain_core.messages.content import TextContentBlock
+            # Create a text content block with provider-specific fields
+            my_block: TextContentBlock = {
+                # Add required fields
+                "type": "text",
+                "text": "Hello, world!",
+                # Additional fields not specified in the TypedDict
+                # These are valid with PEP 728 and are typed as Any
+                "openai_metadata": {"model": "gpt-4", "temperature": 0.7},
+                "anthropic_usage": {"input_tokens": 10, "output_tokens": 20},
+                "custom_field": "any value",
+            }
-Different LLM providers use distinct and incompatible API schemas. This module
-introduces a unified, provider-agnostic format to standardize these interactions. A
-message to or from a model is simply a `list` of `ContentBlock` objects, allowing for
-the natural interleaving of text, images, and other content in a single, ordered
-sequence.
+            # Mutating an existing block to add provider-specific fields
+            openai_data = my_block["openai_metadata"]  # Type: Any
-An adapter for a specific provider is responsible for translating this standard list of
-blocks into the format required by its API.
+        PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress
+        warnings from type checkers that don't yet support it. The functionality works
+        correctly in Python 3.13+ and will be fully supported as the ecosystem catches
+        up.
 **Key Block Types**
 The module defines several types of content blocks, including:
-- ``TextContentBlock``: Standard text.
-- ``ImageContentBlock``, ``Audio...``, ``Video...``, ``PlainText...``, ``File...``: For multimodal data.
-- ``ToolCallContentBlock``, ``ToolOutputContentBlock``: For function calling.
+- ``TextContentBlock``: Standard text output.
+- ``Citation``: For annotations that link text output to a source document.
+- ``ToolCall``: For function calling.
 - ``ReasoningContentBlock``: To capture a model's thought process.
-- ``Citation``: For annotations that link generated text to a source document.
+- Multimodal data:
+    - ``ImageContentBlock``
+    - ``AudioContentBlock``
+    - ``VideoContentBlock``
+    - ``PlainTextContentBlock`` (e.g. .txt or .md files)
+    - ``FileContentBlock`` (e.g. PDFs, etc.)
 **Example Usage**
 .. code-block:: python
     # Direct construction:
-    from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock
-    multimodal_message: AIMessage = [
-        TextContentBlock(type="text", text="What is shown in this image?"),
-        ImageContentBlock(
-            type="image",
-            url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
-            mime_type="image/png",
-        ),
-    ]
-    from langchain_core.messages.content_blocks import create_text_block, create_image_block
-    # Using factory functions:
-    multimodal_message: AIMessage = [
-        create_text_block("What is shown in this image?"),
-        create_image_block(
-            url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
-            mime_type="image/png",
-        ),
-    ]
-"""  # noqa: E501
-import warnings
-from typing import Any, Literal, Optional, Union
-from uuid import uuid4
+    from langchain_core.messages.content import TextContentBlock, ImageContentBlock
+    multimodal_message: AIMessage(content_blocks=
+        [
+            TextContentBlock(type="text", text="What is shown in this image?"),
+            ImageContentBlock(
+                type="image",
+                url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
+                mime_type="image/png",
+            ),
+        ]
+    )
-from typing_extensions import NotRequired, TypedDict, get_args, get_origin
+    # Using factories:
+    from langchain_core.messages.content import create_text_block, create_image_block
+    multimodal_message: AIMessage(content=
+        [
+            create_text_block("What is shown in this image?"),
+            create_image_block(
+                url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
+                mime_type="image/png",
+            ),
+        ]
+    )
+Factory functions offer benefits such as:
+- Automatic ID generation (when not provided)
+- No need to manually specify the ``type`` field
-def _ensure_id(id_val: Optional[str]) -> str:
-    """Ensure the ID is a valid string, generating a new UUID if not provided.
+"""
-    Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are
-    LangChain-generated IDs.
+from typing import Any, Literal, Optional, Union, get_args, get_type_hints
-    Args:
-        id_val: Optional string ID value to validate.
+from typing_extensions import NotRequired, TypedDict
-    Returns:
-        A valid string ID, either the provided value or a new UUID.
-    """
-    return id_val or str(f"lc_{uuid4()}")
+from langchain_core.utils.utils import ensure_id
 class Citation(TypedDict):
     """Annotation for citing data from a document.
     .. note::
-        ``start/end`` indices refer to the **response text**,
+        ``start``/``end`` indices refer to the **response text**,
         not the source text. This means that the indices are relative to the model's
         response, not the original document (as specified in the ``url``).
@@ -150,18 +161,12 @@ class Citation(TypedDict):
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     url: NotRequired[str]
     """URL of the document source."""
-    # For future consideration, if needed:
-    # provenance: NotRequired[str]
-    # """Provenance of the document, e.g., "Wikipedia", "arXiv", etc.
-    # Included for future compatibility; not currently implemented.
-    # """
     title: NotRequired[str]
     """Source document title.
@@ -169,12 +174,10 @@ class Citation(TypedDict):
     """
     start_index: NotRequired[int]
-    """Start index of the **response text** (``TextContentBlock.text``) for which the
-    annotation applies."""
+    """Start index of the **response text** (``TextContentBlock.text``)."""
     end_index: NotRequired[int]
-    """End index of the **response text** (``TextContentBlock.text``) for which the
-    annotation applies."""
+    """End index of the **response text** (``TextContentBlock.text``)"""
     cited_text: NotRequired[str]
     """Excerpt of source text being cited."""
@@ -196,10 +199,12 @@ class NonStandardAnnotation(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     value: dict[str, Any]
@@ -228,19 +233,21 @@ class TextContentBlock(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     text: str
     """Block text."""
     annotations: NotRequired[list[Annotation]]
-    """Citations and other annotations."""
+    """``Citation``s and other annotations."""
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     extras: NotRequired[dict[str, Any]]
@@ -280,6 +287,7 @@ class ToolCall(TypedDict):
     An identifier is needed to associate a tool call request with a tool
     call result in events when multiple concurrent tool calls are made.
     """
     # TODO: Consider making this NotRequired[str] in the future.
@@ -289,7 +297,7 @@ class ToolCall(TypedDict):
     args: dict[str, Any]
     """The arguments to the tool call."""
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     extras: NotRequired[dict[str, Any]]
@@ -299,7 +307,7 @@ class ToolCall(TypedDict):
 class ToolCallChunk(TypedDict):
     """A chunk of a tool call (e.g., as part of a stream).
-    When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
+    When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``),
     all string attributes are concatenated. Chunks are only merged if their
     values of ``index`` are equal and not ``None``.
@@ -314,15 +322,21 @@ class ToolCallChunk(TypedDict):
             AIMessageChunk(content="", tool_call_chunks=left_chunks)
             + AIMessageChunk(content="", tool_call_chunks=right_chunks)
         ).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)]
     """
     # TODO: Consider making fields NotRequired[str] in the future.
-    type: NotRequired[Literal["tool_call_chunk"]]
+    type: Literal["tool_call_chunk"]
     """Used for serialization."""
     id: Optional[str]
-    """An identifier associated with the tool call."""
+    """An identifier associated with the tool call.
+    An identifier is needed to associate a tool call request with a tool
+    call result in events when multiple concurrent tool calls are made.
+    """
     name: Optional[str]
     """The name of the tool to be called."""
@@ -330,7 +344,7 @@ class ToolCallChunk(TypedDict):
     args: Optional[str]
     """The arguments to the tool call."""
-    index: Optional[int]
+    index: NotRequired[Union[int, str]]
     """The index of the tool call in a sequence."""
     extras: NotRequired[dict[str, Any]]
@@ -342,6 +356,7 @@ class InvalidToolCall(TypedDict):
     Here we add an ``error`` key to surface errors made during generation
     (e.g., invalid JSON arguments.)
     """
     # TODO: Consider making fields NotRequired[str] in the future.
@@ -350,7 +365,12 @@ class InvalidToolCall(TypedDict):
     """Used for discrimination."""
     id: Optional[str]
-    """An identifier associated with the tool call."""
+    """An identifier associated with the tool call.
+    An identifier is needed to associate a tool call request with a tool
+    call result in events when multiple concurrent tool calls are made.
+    """
     name: Optional[str]
     """The name of the tool to be called."""
@@ -361,15 +381,13 @@ class InvalidToolCall(TypedDict):
     error: Optional[str]
     """An error message associated with the tool call."""
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     extras: NotRequired[dict[str, Any]]
     """Provider-specific metadata."""
-# Note: These are not standard tool calls, but rather provider-specific built-in tools.
-# Web search
 class WebSearchCall(TypedDict):
     """Built-in web search tool call."""
@@ -377,16 +395,18 @@ class WebSearchCall(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     query: NotRequired[str]
     """The search query used in the web search tool call."""
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     extras: NotRequired[dict[str, Any]]
@@ -400,16 +420,18 @@ class WebSearchResult(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     urls: NotRequired[list[str]]
     """List of URLs returned by the web search tool call."""
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     extras: NotRequired[dict[str, Any]]
@@ -423,10 +445,12 @@ class CodeInterpreterCall(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     language: NotRequired[str]
@@ -435,7 +459,7 @@ class CodeInterpreterCall(TypedDict):
     code: NotRequired[str]
     """The code to be executed by the code interpreter."""
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     extras: NotRequired[dict[str, Any]]
@@ -447,22 +471,26 @@ class CodeInterpreterOutput(TypedDict):
     Full output of a code interpreter tool call is represented by
     ``CodeInterpreterResult`` which is a list of these blocks.
     """
     type: Literal["code_interpreter_output"]
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     return_code: NotRequired[int]
     """Return code of the executed code.
     Example: ``0`` for success, non-zero for failure.
     """
     stderr: NotRequired[str]
@@ -474,12 +502,6 @@ class CodeInterpreterOutput(TypedDict):
     file_ids: NotRequired[list[str]]
     """List of file IDs generated by the code interpreter."""
-    index: NotRequired[int]
-    """Index of block in aggregate response. Used during streaming."""
-    extras: NotRequired[dict[str, Any]]
-    """Provider-specific metadata."""
 class CodeInterpreterResult(TypedDict):
     """Result of a code interpreter tool call."""
@@ -488,16 +510,18 @@ class CodeInterpreterResult(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     output: list[CodeInterpreterOutput]
     """List of outputs from the code interpreter tool call."""
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     extras: NotRequired[dict[str, Any]]
@@ -520,10 +544,12 @@ class ReasoningContentBlock(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     reasoning: NotRequired[str]
@@ -531,9 +557,10 @@ class ReasoningContentBlock(TypedDict):
     Either the thought summary or the raw reasoning text itself. This is often parsed
     from ``<think>`` tags in the model's response.
     """
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     extras: NotRequired[dict[str, Any]]
@@ -559,10 +586,12 @@ class ImageContentBlock(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     file_id: NotRequired[str]
@@ -572,9 +601,10 @@ class ImageContentBlock(TypedDict):
     """MIME type of the image. Required for base64.
     `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml#image>`__
     """
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     url: NotRequired[str]
@@ -584,7 +614,7 @@ class ImageContentBlock(TypedDict):
     """Data as a base64 string."""
     extras: NotRequired[dict[str, Any]]
-    """Provider-specific metadata."""
+    """Provider-specific metadata. This shouldn't be used for the image data itself."""
 class VideoContentBlock(TypedDict):
@@ -603,10 +633,12 @@ class VideoContentBlock(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     file_id: NotRequired[str]
@@ -616,9 +648,10 @@ class VideoContentBlock(TypedDict):
     """MIME type of the video. Required for base64.
     `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml#video>`__
     """
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     url: NotRequired[str]
@@ -628,7 +661,7 @@ class VideoContentBlock(TypedDict):
     """Data as a base64 string."""
     extras: NotRequired[dict[str, Any]]
-    """Provider-specific metadata."""
+    """Provider-specific metadata. This shouldn't be used for the video data itself."""
 class AudioContentBlock(TypedDict):
@@ -637,7 +670,6 @@ class AudioContentBlock(TypedDict):
     .. note::
         ``create_audio_block`` may also be used as a factory to create an
         ``AudioContentBlock``. Benefits include:
         * Automatic ID generation (when not provided)
         * Required arguments strictly validated at creation time
@@ -647,10 +679,12 @@ class AudioContentBlock(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     file_id: NotRequired[str]
@@ -660,9 +694,10 @@ class AudioContentBlock(TypedDict):
     """MIME type of the audio. Required for base64.
     `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml#audio>`__
     """
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     url: NotRequired[str]
@@ -672,7 +707,7 @@ class AudioContentBlock(TypedDict):
     """Data as a base64 string."""
     extras: NotRequired[dict[str, Any]]
-    """Provider-specific metadata."""
+    """Provider-specific metadata. This shouldn't be used for the audio data itself."""
 class PlainTextContentBlock(TypedDict):
@@ -695,10 +730,12 @@ class PlainTextContentBlock(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     file_id: NotRequired[str]
@@ -707,7 +744,7 @@ class PlainTextContentBlock(TypedDict):
     mime_type: Literal["text/plain"]
     """MIME type of the file. Required for base64."""
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     url: NotRequired[str]
@@ -726,7 +763,7 @@ class PlainTextContentBlock(TypedDict):
     """Context for the text, e.g., a description or summary of the text's content."""
     extras: NotRequired[dict[str, Any]]
-    """Provider-specific metadata."""
+    """Provider-specific metadata. This shouldn't be used for the data itself."""
 class FileContentBlock(TypedDict):
@@ -752,10 +789,12 @@ class FileContentBlock(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     file_id: NotRequired[str]
@@ -765,9 +804,10 @@ class FileContentBlock(TypedDict):
     """MIME type of the file. Required for base64.
     `Examples from IANA <https://www.iana.org/assignments/media-types/media-types.xhtml>`__
     """
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
     url: NotRequired[str]
@@ -777,7 +817,7 @@ class FileContentBlock(TypedDict):
     """Data as a base64 string."""
     extras: NotRequired[dict[str, Any]]
-    """Provider-specific metadata."""
+    """Provider-specific metadata. This shouldn't be used for the file data itself."""
 # Future modalities to consider:
@@ -793,7 +833,10 @@ class NonStandardContentBlock(TypedDict):
     The purpose of this block should be to simply hold a provider-specific payload.
     If a provider's non-standard output includes reasoning and tool calls, it should be
     the adapter's job to parse that payload and emit the corresponding standard
-    ReasoningContentBlock and ToolCallContentBlocks.
+    ``ReasoningContentBlock`` and ``ToolCalls``.
+    Has no ``extras`` field, as provider-specific data should be included in the
+    ``value`` field.
     .. note::
         ``create_non_standard_block`` may also be used as a factory to create a
@@ -808,16 +851,18 @@ class NonStandardContentBlock(TypedDict):
     """Type of the content block. Used for discrimination."""
     id: NotRequired[str]
-    """Content block identifier. Either:
+    """Content block identifier.
+    Either:
     - Generated by the provider (e.g., OpenAI's file ID)
     - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
     """
     value: dict[str, Any]
     """Provider-specific data."""
-    index: NotRequired[int]
+    index: NotRequired[Union[int, str]]
     """Index of block in aggregate response. Used during streaming."""
@@ -832,8 +877,8 @@ DataContentBlock = Union[
 ToolContentBlock = Union[
     ToolCall,
+    ToolCallChunk,
     CodeInterpreterCall,
-    CodeInterpreterOutput,
     CodeInterpreterResult,
     WebSearchCall,
     WebSearchResult,
@@ -841,9 +886,7 @@ ToolContentBlock = Union[
 ContentBlock = Union[
     TextContentBlock,
-    ToolCall,
     InvalidToolCall,
-    ToolCallChunk,
     ReasoningContentBlock,
     NonStandardContentBlock,
     DataContentBlock,
@@ -851,135 +894,75 @@ ContentBlock = Union[
 ]
-def _extract_typedict_type_values(union_type: Any) -> set[str]:
-    """Extract the values of the 'type' field from a TypedDict union type."""
-    result: set[str] = set()
-    for value in get_args(union_type):
-        annotation = value.__annotations__["type"]
-        if get_origin(annotation) is Literal:
-            result.update(get_args(annotation))
-        else:
-            msg = f"{value} 'type' is not a Literal"
-            raise ValueError(msg)
-    return result
 KNOWN_BLOCK_TYPES = {
+    # Text output
     "text",
-    "text-plain",
+    "reasoning",
+    # Tools
     "tool_call",
     "invalid_tool_call",
     "tool_call_chunk",
-    "reasoning",
-    "non_standard",
+    # Multimodal data
     "image",
     "audio",
     "file",
+    "text-plain",
     "video",
+    # Server-side tool calls
     "code_interpreter_call",
-    "code_interpreter_output",
     "code_interpreter_result",
     "web_search_call",
     "web_search_result",
+    # Catch-all
+    "non_standard",
 }
+def _get_data_content_block_types() -> tuple[str, ...]:
+    """Get type literals from DataContentBlock union members dynamically."""
+    data_block_types = []
+    for block_type in get_args(DataContentBlock):
+        hints = get_type_hints(block_type)
+        if "type" in hints:
+            type_annotation = hints["type"]
+            if hasattr(type_annotation, "__args__"):
+                # This is a Literal type, get the literal value
+                literal_value = type_annotation.__args__[0]
+                data_block_types.append(literal_value)
+    return tuple(data_block_types)
 def is_data_content_block(block: dict) -> bool:
-    """Check if the content block is a standard data content block.
+    """Check if the provided content block is a standard v1 data content block.
     Args:
         block: The content block to check.
     Returns:
         True if the content block is a data content block, False otherwise.
     """
-    return block.get("type") in (
-        "audio",
-        "image",
-        "video",
-        "file",
-        "text-plain",
-    ) and any(
-        key in block
-        for key in (
-            "url",
-            "base64",
-            "file_id",
-            "text",
-            "source_type",  # backwards compatibility
-        )
-    )
+    if block.get("type") not in _get_data_content_block_types():
+        return False
+    if any(key in block for key in ("url", "base64", "file_id", "text")):
+        return True
-def convert_to_openai_image_block(block: dict[str, Any]) -> dict:
-    """Convert image content block to format expected by OpenAI Chat Completions API."""
-    if "url" in block:
-        return {
-            "type": "image_url",
-            "image_url": {
-                "url": block["url"],
-            },
-        }
-    if "base64" in block or block.get("source_type") == "base64":
-        if "mime_type" not in block:
-            error_message = "mime_type key is required for base64 data."
-            raise ValueError(error_message)
-        mime_type = block["mime_type"]
-        base64_data = block["data"] if "data" in block else block["base64"]
-        return {
-            "type": "image_url",
-            "image_url": {
-                "url": f"data:{mime_type};base64,{base64_data}",
-            },
-        }
-    error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
-    raise ValueError(error_message)
-def convert_to_openai_data_block(block: dict) -> dict:
-    """Format standard data content block to format expected by OpenAI."""
-    if block["type"] == "image":
-        formatted_block = convert_to_openai_image_block(block)
-    elif block["type"] == "file":
-        if "base64" in block or block.get("source_type") == "base64":
-            base64_data = block["data"] if "source_type" in block else block["base64"]
-            file = {"file_data": f"data:{block['mime_type']};base64,{base64_data}"}
-            if filename := block.get("filename"):
-                file["filename"] = filename
-            elif (metadata := block.get("metadata")) and ("filename" in metadata):
-                file["filename"] = metadata["filename"]
-            else:
-                warnings.warn(
-                    "OpenAI may require a filename for file inputs. Specify a filename "
-                    "in the content block: {'type': 'file', 'mime_type': "
-                    "'application/pdf', 'base64': '...', 'filename': 'my-pdf'}",
-                    stacklevel=1,
-                )
-            formatted_block = {"type": "file", "file": file}
-        elif "file_id" in block or block.get("source_type") == "id":
-            file_id = block["id"] if "source_type" in block else block["file_id"]
-            formatted_block = {"type": "file", "file": {"file_id": file_id}}
-        else:
-            error_msg = "Keys base64 or file_id required for file blocks."
-            raise ValueError(error_msg)
-    elif block["type"] == "audio":
-        if "base64" in block or block.get("source_type") == "base64":
-            base64_data = block["data"] if "source_type" in block else block["base64"]
-            audio_format = block["mime_type"].split("/")[-1]
-            formatted_block = {
-                "type": "input_audio",
-                "input_audio": {"data": base64_data, "format": audio_format},
-            }
-        else:
-            error_msg = "Key base64 is required for audio blocks."
-            raise ValueError(error_msg)
-    else:
-        error_msg = f"Block of type {block['type']} is not supported."
-        raise ValueError(error_msg)
+    # Verify data presence based on source type
+    if "source_type" in block:
+        source_type = block["source_type"]
+        if (source_type == "url" and "url" in block) or (
+            source_type == "base64" and "data" in block
+        ):
+            return True
+        if (source_type == "id" and "id" in block) or (
+            source_type == "text" and "url" in block
+        ):
+            return True
-    return formatted_block
+    return False
 def create_text_block(
@@ -987,14 +970,15 @@ def create_text_block(
     *,
     id: Optional[str] = None,
     annotations: Optional[list[Annotation]] = None,
-    index: Optional[int] = None,
+    index: Optional[Union[int, str]] = None,
+    **kwargs: Any,
 ) -> TextContentBlock:
     """Create a ``TextContentBlock``.
     Args:
         text: The text content of the block.
         id: Content block identifier. Generated automatically if not provided.
-        annotations: Citations and other annotations for the text.
+        annotations: ``Citation``s and other annotations for the text.
         index: Index of block in aggregate response. Used during streaming.
     Returns:
@@ -1008,12 +992,17 @@ def create_text_block(
     block = TextContentBlock(
         type="text",
         text=text,
-        id=_ensure_id(id),
+        id=ensure_id(id),
     )
     if annotations is not None:
         block["annotations"] = annotations
     if index is not None:
         block["index"] = index
+    extras = {k: v for k, v in kwargs.items() if v is not None}
+    if extras:
+        block["extras"] = extras
     return block
@@ -1024,7 +1013,8 @@ def create_image_block(
     file_id: Optional[str] = None,
     mime_type: Optional[str] = None,
     id: Optional[str] = None,
-    index: Optional[int] = None,
+    index: Optional[Union[int, str]] = None,
+    **kwargs: Any,
 ) -> ImageContentBlock:
     """Create an ``ImageContentBlock``.
@@ -1052,11 +1042,7 @@ def create_image_block(
         msg = "Must provide one of: url, base64, or file_id"
         raise ValueError(msg)
-    if base64 and not mime_type:
-        msg = "mime_type is required when using base64 data"
-        raise ValueError(msg)
-    block = ImageContentBlock(type="image", id=_ensure_id(id))
+    block = ImageContentBlock(type="image", id=ensure_id(id))
     if url is not None:
         block["url"] = url
@@ -1069,6 +1055,10 @@ def create_image_block(
     if index is not None:
         block["index"] = index
+    extras = {k: v for k, v in kwargs.items() if v is not None}
+    if extras:
+        block["extras"] = extras
     return block
@@ -1079,7 +1069,8 @@ def create_video_block(
     file_id: Optional[str] = None,
     mime_type: Optional[str] = None,
     id: Optional[str] = None,
-    index: Optional[int] = None,
+    index: Optional[Union[int, str]] = None,
+    **kwargs: Any,
 ) -> VideoContentBlock:
     """Create a ``VideoContentBlock``.
@@ -1111,7 +1102,7 @@ def create_video_block(
         msg = "mime_type is required when using base64 data"
         raise ValueError(msg)
-    block = VideoContentBlock(type="video", id=_ensure_id(id))
+    block = VideoContentBlock(type="video", id=ensure_id(id))
     if url is not None:
         block["url"] = url
@@ -1124,6 +1115,10 @@ def create_video_block(
     if index is not None:
         block["index"] = index
+    extras = {k: v for k, v in kwargs.items() if v is not None}
+    if extras:
+        block["extras"] = extras
     return block
@@ -1134,7 +1129,8 @@ def create_audio_block(
     file_id: Optional[str] = None,
     mime_type: Optional[str] = None,
     id: Optional[str] = None,
-    index: Optional[int] = None,
+    index: Optional[Union[int, str]] = None,
+    **kwargs: Any,
 ) -> AudioContentBlock:
     """Create an ``AudioContentBlock``.
@@ -1166,7 +1162,7 @@ def create_audio_block(
         msg = "mime_type is required when using base64 data"
         raise ValueError(msg)
-    block = AudioContentBlock(type="audio", id=_ensure_id(id))
+    block = AudioContentBlock(type="audio", id=ensure_id(id))
     if url is not None:
         block["url"] = url
@@ -1179,6 +1175,10 @@ def create_audio_block(
     if index is not None:
         block["index"] = index
+    extras = {k: v for k, v in kwargs.items() if v is not None}
+    if extras:
+        block["extras"] = extras
     return block
@@ -1189,7 +1189,8 @@ def create_file_block(
     file_id: Optional[str] = None,
     mime_type: Optional[str] = None,
     id: Optional[str] = None,
-    index: Optional[int] = None,
+    index: Optional[Union[int, str]] = None,
+    **kwargs: Any,
 ) -> FileContentBlock:
     """Create a ``FileContentBlock``.
@@ -1221,7 +1222,7 @@ def create_file_block(
         msg = "mime_type is required when using base64 data"
         raise ValueError(msg)
-    block = FileContentBlock(type="file", id=_ensure_id(id))
+    block = FileContentBlock(type="file", id=ensure_id(id))
     if url is not None:
         block["url"] = url
@@ -1234,19 +1235,23 @@ def create_file_block(
     if index is not None:
         block["index"] = index
+    extras = {k: v for k, v in kwargs.items() if v is not None}
+    if extras:
+        block["extras"] = extras
     return block
 def create_plaintext_block(
-    text: str,
-    *,
+    text: Optional[str] = None,
     url: Optional[str] = None,
     base64: Optional[str] = None,
     file_id: Optional[str] = None,
     title: Optional[str] = None,
     context: Optional[str] = None,
     id: Optional[str] = None,
-    index: Optional[int] = None,
+    index: Optional[Union[int, str]] = None,
+    **kwargs: Any,
 ) -> PlainTextContentBlock:
     """Create a ``PlainTextContentBlock``.
@@ -1271,10 +1276,11 @@ def create_plaintext_block(
     block = PlainTextContentBlock(
         type="text-plain",
         mime_type="text/plain",
-        text=text,
-        id=_ensure_id(id),
+        id=ensure_id(id),
     )
+    if text is not None:
+        block["text"] = text
     if url is not None:
         block["url"] = url
     if base64 is not None:
@@ -1288,6 +1294,10 @@ def create_plaintext_block(
     if index is not None:
         block["index"] = index
+    extras = {k: v for k, v in kwargs.items() if v is not None}
+    if extras:
+        block["extras"] = extras
     return block
@@ -1296,7 +1306,8 @@ def create_tool_call(
     args: dict[str, Any],
     *,
     id: Optional[str] = None,
-    index: Optional[int] = None,
+    index: Optional[Union[int, str]] = None,
+    **kwargs: Any,
 ) -> ToolCall:
     """Create a ``ToolCall``.
@@ -1318,19 +1329,24 @@ def create_tool_call(
         type="tool_call",
         name=name,
         args=args,
-        id=_ensure_id(id),
+        id=ensure_id(id),
     )
     if index is not None:
         block["index"] = index
+    extras = {k: v for k, v in kwargs.items() if v is not None}
+    if extras:
+        block["extras"] = extras
     return block
 def create_reasoning_block(
     reasoning: Optional[str] = None,
     id: Optional[str] = None,
-    index: Optional[int] = None,
+    index: Optional[Union[int, str]] = None,
+    **kwargs: Any,
 ) -> ReasoningContentBlock:
     """Create a ``ReasoningContentBlock``.
@@ -1350,12 +1366,16 @@ def create_reasoning_block(
     block = ReasoningContentBlock(
         type="reasoning",
         reasoning=reasoning or "",
-        id=_ensure_id(id),
+        id=ensure_id(id),
     )
     if index is not None:
         block["index"] = index
+    extras = {k: v for k, v in kwargs.items() if v is not None}
+    if extras:
+        block["extras"] = extras
     return block
@@ -1367,6 +1387,7 @@ def create_citation(
     end_index: Optional[int] = None,
     cited_text: Optional[str] = None,
     id: Optional[str] = None,
+    **kwargs: Any,
 ) -> Citation:
     """Create a ``Citation``.
@@ -1386,7 +1407,7 @@ def create_citation(
         prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
     """
-    block = Citation(type="citation", id=_ensure_id(id))
+    block = Citation(type="citation", id=ensure_id(id))
     if url is not None:
         block["url"] = url
@@ -1399,6 +1420,10 @@ def create_citation(
     if cited_text is not None:
         block["cited_text"] = cited_text
+    extras = {k: v for k, v in kwargs.items() if v is not None}
+    if extras:
+        block["extras"] = extras
     return block
@@ -1406,7 +1431,7 @@ def create_non_standard_block(
     value: dict[str, Any],
     *,
     id: Optional[str] = None,
-    index: Optional[int] = None,
+    index: Optional[Union[int, str]] = None,
 ) -> NonStandardContentBlock:
     """Create a ``NonStandardContentBlock``.
@@ -1426,7 +1451,7 @@ def create_non_standard_block(
     block = NonStandardContentBlock(
         type="non_standard",
         value=value,
-        id=_ensure_id(id),
+        id=ensure_id(id),
     )
     if index is not None:

langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0a2__py3-none-any.whl

langchain-core 0.4.0.dev0py3-none-any.whl → 1.0.0a2py3-none-any.whl