PyPI - langchain-core - Versions diffs - 0.4.0.dev0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

langchain-core 0.4.0.dev0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langchain-core might be problematic. Click here for more details.

Files changed (172) hide show

langchain_core/__init__.py +1 -1
langchain_core/_api/__init__.py +3 -4
langchain_core/_api/beta_decorator.py +45 -70
langchain_core/_api/deprecation.py +80 -80
langchain_core/_api/path.py +22 -8
langchain_core/_import_utils.py +10 -4
langchain_core/agents.py +25 -21
langchain_core/caches.py +53 -63
langchain_core/callbacks/__init__.py +1 -8
langchain_core/callbacks/base.py +341 -348
langchain_core/callbacks/file.py +55 -44
langchain_core/callbacks/manager.py +546 -683
langchain_core/callbacks/stdout.py +29 -30
langchain_core/callbacks/streaming_stdout.py +35 -36
langchain_core/callbacks/usage.py +65 -70
langchain_core/chat_history.py +48 -55
langchain_core/document_loaders/base.py +46 -21
langchain_core/document_loaders/langsmith.py +39 -36
langchain_core/documents/__init__.py +0 -1
langchain_core/documents/base.py +96 -74
langchain_core/documents/compressor.py +12 -9
langchain_core/documents/transformers.py +29 -28
langchain_core/embeddings/fake.py +56 -57
langchain_core/env.py +2 -3
langchain_core/example_selectors/base.py +12 -0
langchain_core/example_selectors/length_based.py +1 -1
langchain_core/example_selectors/semantic_similarity.py +21 -25
langchain_core/exceptions.py +15 -9
langchain_core/globals.py +4 -163
langchain_core/indexing/api.py +132 -125
langchain_core/indexing/base.py +64 -67
langchain_core/indexing/in_memory.py +26 -6
langchain_core/language_models/__init__.py +15 -27
langchain_core/language_models/_utils.py +267 -117
langchain_core/language_models/base.py +92 -177
langchain_core/language_models/chat_models.py +547 -407
langchain_core/language_models/fake.py +11 -11
langchain_core/language_models/fake_chat_models.py +72 -118
langchain_core/language_models/llms.py +168 -242
langchain_core/load/dump.py +8 -11
langchain_core/load/load.py +32 -28
langchain_core/load/mapping.py +2 -4
langchain_core/load/serializable.py +50 -56
langchain_core/messages/__init__.py +36 -51
langchain_core/messages/ai.py +377 -150
langchain_core/messages/base.py +239 -47
langchain_core/messages/block_translators/__init__.py +111 -0
langchain_core/messages/block_translators/anthropic.py +470 -0
langchain_core/messages/block_translators/bedrock.py +94 -0
langchain_core/messages/block_translators/bedrock_converse.py +297 -0
langchain_core/messages/block_translators/google_genai.py +530 -0
langchain_core/messages/block_translators/google_vertexai.py +21 -0
langchain_core/messages/block_translators/groq.py +143 -0
langchain_core/messages/block_translators/langchain_v0.py +301 -0
langchain_core/messages/block_translators/openai.py +1010 -0
langchain_core/messages/chat.py +2 -3
langchain_core/messages/content.py +1423 -0
langchain_core/messages/function.py +7 -7
langchain_core/messages/human.py +44 -38
langchain_core/messages/modifier.py +3 -2
langchain_core/messages/system.py +40 -27
langchain_core/messages/tool.py +160 -58
langchain_core/messages/utils.py +527 -638
langchain_core/output_parsers/__init__.py +1 -14
langchain_core/output_parsers/base.py +68 -104
langchain_core/output_parsers/json.py +13 -17
langchain_core/output_parsers/list.py +11 -33
langchain_core/output_parsers/openai_functions.py +56 -74
langchain_core/output_parsers/openai_tools.py +68 -109
langchain_core/output_parsers/pydantic.py +15 -13
langchain_core/output_parsers/string.py +6 -2
langchain_core/output_parsers/transform.py +17 -60
langchain_core/output_parsers/xml.py +34 -44
langchain_core/outputs/__init__.py +1 -1
langchain_core/outputs/chat_generation.py +26 -11
langchain_core/outputs/chat_result.py +1 -3
langchain_core/outputs/generation.py +17 -6
langchain_core/outputs/llm_result.py +15 -8
langchain_core/prompt_values.py +29 -123
langchain_core/prompts/__init__.py +3 -27
langchain_core/prompts/base.py +48 -63
langchain_core/prompts/chat.py +259 -288
langchain_core/prompts/dict.py +19 -11
langchain_core/prompts/few_shot.py +84 -90
langchain_core/prompts/few_shot_with_templates.py +14 -12
langchain_core/prompts/image.py +19 -14
langchain_core/prompts/loading.py +6 -8
langchain_core/prompts/message.py +7 -8
langchain_core/prompts/prompt.py +42 -43
langchain_core/prompts/string.py +37 -16
langchain_core/prompts/structured.py +43 -46
langchain_core/rate_limiters.py +51 -60
langchain_core/retrievers.py +52 -192
langchain_core/runnables/base.py +1727 -1683
langchain_core/runnables/branch.py +52 -73
langchain_core/runnables/config.py +89 -103
langchain_core/runnables/configurable.py +128 -130
langchain_core/runnables/fallbacks.py +93 -82
langchain_core/runnables/graph.py +127 -127
langchain_core/runnables/graph_ascii.py +63 -41
langchain_core/runnables/graph_mermaid.py +87 -70
langchain_core/runnables/graph_png.py +31 -36
langchain_core/runnables/history.py +145 -161
langchain_core/runnables/passthrough.py +141 -144
langchain_core/runnables/retry.py +84 -68
langchain_core/runnables/router.py +33 -37
langchain_core/runnables/schema.py +79 -72
langchain_core/runnables/utils.py +95 -139
langchain_core/stores.py +85 -131
langchain_core/structured_query.py +11 -15
langchain_core/sys_info.py +31 -32
langchain_core/tools/__init__.py +1 -14
langchain_core/tools/base.py +221 -247
langchain_core/tools/convert.py +144 -161
langchain_core/tools/render.py +10 -10
langchain_core/tools/retriever.py +12 -19
langchain_core/tools/simple.py +52 -29
langchain_core/tools/structured.py +56 -60
langchain_core/tracers/__init__.py +1 -9
langchain_core/tracers/_streaming.py +6 -7
langchain_core/tracers/base.py +103 -112
langchain_core/tracers/context.py +29 -48
langchain_core/tracers/core.py +142 -105
langchain_core/tracers/evaluation.py +30 -34
langchain_core/tracers/event_stream.py +162 -117
langchain_core/tracers/langchain.py +34 -36
langchain_core/tracers/log_stream.py +87 -49
langchain_core/tracers/memory_stream.py +3 -3
langchain_core/tracers/root_listeners.py +18 -34
langchain_core/tracers/run_collector.py +8 -20
langchain_core/tracers/schemas.py +0 -125
langchain_core/tracers/stdout.py +3 -3
langchain_core/utils/__init__.py +1 -4
langchain_core/utils/_merge.py +47 -9
langchain_core/utils/aiter.py +70 -66
langchain_core/utils/env.py +12 -9
langchain_core/utils/function_calling.py +139 -206
langchain_core/utils/html.py +7 -8
langchain_core/utils/input.py +6 -6
langchain_core/utils/interactive_env.py +6 -2
langchain_core/utils/iter.py +48 -45
langchain_core/utils/json.py +14 -4
langchain_core/utils/json_schema.py +159 -43
langchain_core/utils/mustache.py +32 -25
langchain_core/utils/pydantic.py +67 -40
langchain_core/utils/strings.py +5 -5
langchain_core/utils/usage.py +1 -1
langchain_core/utils/utils.py +104 -62
langchain_core/vectorstores/base.py +131 -179
langchain_core/vectorstores/in_memory.py +113 -182
langchain_core/vectorstores/utils.py +23 -17
langchain_core/version.py +1 -1
langchain_core-1.0.0.dist-info/METADATA +68 -0
langchain_core-1.0.0.dist-info/RECORD +172 -0
{langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
langchain_core/beta/__init__.py +0 -1
langchain_core/beta/runnables/__init__.py +0 -1
langchain_core/beta/runnables/context.py +0 -448
langchain_core/memory.py +0 -116
langchain_core/messages/content_blocks.py +0 -1435
langchain_core/prompts/pipeline.py +0 -133
langchain_core/pydantic_v1/__init__.py +0 -30
langchain_core/pydantic_v1/dataclasses.py +0 -23
langchain_core/pydantic_v1/main.py +0 -23
langchain_core/tracers/langchain_v1.py +0 -23
langchain_core/utils/loading.py +0 -31
langchain_core/v1/__init__.py +0 -1
langchain_core/v1/chat_models.py +0 -1047
langchain_core/v1/messages.py +0 -755
langchain_core-0.4.0.dev0.dist-info/METADATA +0 -108
langchain_core-0.4.0.dev0.dist-info/RECORD +0 -177
langchain_core-0.4.0.dev0.dist-info/entry_points.txt +0 -4

langchain_core/language_models/chat_models.py CHANGED Viewed

@@ -6,28 +6,28 @@ import asyncio
 import inspect
 import json
 import typing
-import warnings
 from abc import ABC, abstractmethod
-from collections.abc import AsyncIterator, Iterator, Sequence
+from collections.abc import AsyncIterator, Callable, Iterator, Sequence
 from functools import cached_property
 from operator import itemgetter
-from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
-from pydantic import BaseModel, ConfigDict, Field, model_validator
+from pydantic import BaseModel, ConfigDict, Field
 from typing_extensions import override
-from langchain_core._api import deprecated
 from langchain_core.caches import BaseCache
 from langchain_core.callbacks import (
     AsyncCallbackManager,
     AsyncCallbackManagerForLLMRun,
-    BaseCallbackManager,
     CallbackManager,
     CallbackManagerForLLMRun,
     Callbacks,
 )
 from langchain_core.globals import get_llm_cache
-from langchain_core.language_models._utils import _normalize_messages
+from langchain_core.language_models._utils import (
+    _normalize_messages,
+    _update_message_content_to_blocks,
+)
 from langchain_core.language_models.base import (
     BaseLanguageModel,
     LangSmithParams,
@@ -36,16 +36,21 @@ from langchain_core.language_models.base import (
 from langchain_core.load import dumpd, dumps
 from langchain_core.messages import (
     AIMessage,
+    AIMessageChunk,
     AnyMessage,
     BaseMessage,
-    BaseMessageChunk,
-    HumanMessage,
     convert_to_messages,
-    convert_to_openai_image_block,
     is_data_content_block,
     message_chunk_to_message,
 )
-from langchain_core.messages.ai import _LC_ID_PREFIX
+from langchain_core.messages import content as types
+from langchain_core.messages.block_translators.openai import (
+    convert_to_openai_image_block,
+)
+from langchain_core.output_parsers.openai_tools import (
+    JsonOutputKeyToolsParser,
+    PydanticToolsParser,
+)
 from langchain_core.outputs import (
     ChatGeneration,
     ChatGenerationChunk,
@@ -65,6 +70,7 @@ from langchain_core.utils.function_calling import (
     convert_to_openai_tool,
 )
 from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass
+from langchain_core.utils.utils import LC_ID_PREFIX, from_env
 if TYPE_CHECKING:
     import uuid
@@ -78,6 +84,11 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
     if hasattr(error, "response"):
         response = error.response
         metadata: dict = {}
+        if hasattr(response, "json"):
+            try:
+                metadata["body"] = response.json()
+            except Exception:
+                metadata["body"] = getattr(response, "text", None)
         if hasattr(response, "headers"):
             try:
                 metadata["headers"] = dict(response.headers)
@@ -97,17 +108,18 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
 def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
-    """Format messages for tracing in on_chat_model_start.
+    """Format messages for tracing in `on_chat_model_start`.
     - Update image content blocks to OpenAI Chat Completions format (backward
     compatibility).
-    - Add "type" key to content blocks that have a single key.
+    - Add `type` key to content blocks that have a single key.
     Args:
         messages: List of messages to format.
     Returns:
         List of messages formatted for tracing.
     """
     messages_to_trace = []
     for message in messages:
@@ -119,7 +131,7 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
                     if (
                         block.get("type") == "image"
                         and is_data_content_block(block)
-                        and block.get("source_type") != "id"
+                        and not ("file_id" in block or block.get("source_type") == "id")
                     ):
                         if message_to_trace is message:
                             # Shallow copy
@@ -129,6 +141,22 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
                         message_to_trace.content[idx] = (  # type: ignore[index]  # mypy confused by .model_copy
                             convert_to_openai_image_block(block)
                         )
+                    elif (
+                        block.get("type") == "file"
+                        and is_data_content_block(block)  # v0 (image/audio/file) or v1
+                        and "base64" in block
+                        # Backward compat: convert v1 base64 blocks to v0
+                    ):
+                        if message_to_trace is message:
+                            # Shallow copy
+                            message_to_trace = message.model_copy()
+                            message_to_trace.content = list(message_to_trace.content)
+                        message_to_trace.content[idx] = {  # type: ignore[index]
+                            **{k: v for k, v in block.items() if k != "base64"},
+                            "data": block["base64"],
+                            "source_type": "base64",
+                        }
                     elif len(block) == 1 and "type" not in block:
                         # Tracing assumes all content blocks have a "type" key. Here
                         # we add this key if it is missing, and there's an obvious
@@ -142,8 +170,6 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
                             "type": key,
                             key: block[key],
                         }
-                    else:
-                        pass
         messages_to_trace.append(message_to_trace)
     return messages_to_trace
@@ -153,10 +179,14 @@ def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:
     """Generate from a stream.
     Args:
-        stream: Iterator of ChatGenerationChunk.
+        stream: Iterator of `ChatGenerationChunk`.
+    Raises:
+        ValueError: If no generations are found in the stream.
     Returns:
-        ChatResult: Chat result.
+        Chat result.
     """
     generation = next(stream, None)
     if generation:
@@ -180,16 +210,17 @@ async def agenerate_from_stream(
     """Async generate from a stream.
     Args:
-        stream: Iterator of ChatGenerationChunk.
+        stream: Iterator of `ChatGenerationChunk`.
     Returns:
-        ChatResult: Chat result.
+        Chat result.
     """
     chunks = [chunk async for chunk in stream]
     return await run_in_executor(None, generate_from_stream, iter(chunks))
-def _format_ls_structured_output(ls_structured_output_format: Optional[dict]) -> dict:
+def _format_ls_structured_output(ls_structured_output_format: dict | None) -> dict:
     if ls_structured_output_format:
         try:
             ls_structured_output_format_dict = {
@@ -208,136 +239,99 @@ def _format_ls_structured_output(ls_structured_output_format: Optional[dict]) ->
     return ls_structured_output_format_dict
-class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
-    """Base class for chat models.
+class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
+    r"""Base class for chat models.
     Key imperative methods:
         Methods that actually call the underlying model.
-        +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
-        | Method                    | Input                                                          | Output                                                              | Description                                                                                      |
-        +===========================+================================================================+=====================================================================+==================================================================================================+
-        | `invoke`                  | str | list[dict | tuple | BaseMessage] | PromptValue           | BaseMessage                                                         | A single chat model call.                                                                        |
-        +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
-        | `ainvoke`                 | '''                                                            | BaseMessage                                                         | Defaults to running invoke in an async executor.                                                 |
-        +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
-        | `stream`                  | '''                                                            | Iterator[BaseMessageChunk]                                          | Defaults to yielding output of invoke.                                                           |
-        +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
-        | `astream`                 | '''                                                            | AsyncIterator[BaseMessageChunk]                                     | Defaults to yielding output of ainvoke.                                                          |
-        +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
-        | `astream_events`          | '''                                                            | AsyncIterator[StreamEvent]                                          | Event types: 'on_chat_model_start', 'on_chat_model_stream', 'on_chat_model_end'.                 |
-        +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
-        | `batch`                   | list[''']                                                      | list[BaseMessage]                                                   | Defaults to running invoke in concurrent threads.                                                |
-        +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
-        | `abatch`                  | list[''']                                                      | list[BaseMessage]                                                   | Defaults to running ainvoke in concurrent threads.                                               |
-        +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
-        | `batch_as_completed`      | list[''']                                                      | Iterator[tuple[int, Union[BaseMessage, Exception]]]                 | Defaults to running invoke in concurrent threads.                                                |
-        +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
-        | `abatch_as_completed`     | list[''']                                                      | AsyncIterator[tuple[int, Union[BaseMessage, Exception]]]            | Defaults to running ainvoke in concurrent threads.                                               |
-        +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
-        This table provides a brief overview of the main imperative methods. Please see the base Runnable reference for full documentation.
+        This table provides a brief overview of the main imperative methods. Please see the base `Runnable` reference for full documentation.
+        | Method                 | Input                                                        | Output                                                     | Description                                                                      |
+        | ---------------------- | ------------------------------------------------------------ | ---------------------------------------------------------- | -------------------------------------------------------------------------------- |
+        | `invoke`               | `str` \| `list[dict | tuple | BaseMessage]` \| `PromptValue` | `BaseMessage`                                              | A single chat model call.                                                        |
+        | `ainvoke`              | `'''`                                                        | `BaseMessage`                                              | Defaults to running `invoke` in an async executor.                               |
+        | `stream`               | `'''`                                                        | `Iterator[BaseMessageChunk]`                               | Defaults to yielding output of `invoke`.                                         |
+        | `astream`              | `'''`                                                        | `AsyncIterator[BaseMessageChunk]`                          | Defaults to yielding output of `ainvoke`.                                        |
+        | `astream_events`       | `'''`                                                        | `AsyncIterator[StreamEvent]`                               | Event types: `on_chat_model_start`, `on_chat_model_stream`, `on_chat_model_end`. |
+        | `batch`                | `list[''']`                                                  | `list[BaseMessage]`                                        | Defaults to running `invoke` in concurrent threads.                              |
+        | `abatch`               | `list[''']`                                                  | `list[BaseMessage]`                                        | Defaults to running `ainvoke` in concurrent threads.                             |
+        | `batch_as_completed`   | `list[''']`                                                  | `Iterator[tuple[int, Union[BaseMessage, Exception]]]`      | Defaults to running `invoke` in concurrent threads.                              |
+        | `abatch_as_completed`  | `list[''']`                                                  | `AsyncIterator[tuple[int, Union[BaseMessage, Exception]]]` | Defaults to running `ainvoke` in concurrent threads.                             |
     Key declarative methods:
-        Methods for creating another Runnable using the ChatModel.
-        +----------------------------------+-----------------------------------------------------------------------------------------------------------+
-        | Method                           | Description                                                                                               |
-        +==================================+===========================================================================================================+
-        | `bind_tools`                     | Create ChatModel that can call tools.                                                                     |
-        +----------------------------------+-----------------------------------------------------------------------------------------------------------+
-        | `with_structured_output`         | Create wrapper that structures model output using schema.                                                 |
-        +----------------------------------+-----------------------------------------------------------------------------------------------------------+
-        | `with_retry`                     | Create wrapper that retries model calls on failure.                                                       |
-        +----------------------------------+-----------------------------------------------------------------------------------------------------------+
-        | `with_fallbacks`                 | Create wrapper that falls back to other models on failure.                                                |
-        +----------------------------------+-----------------------------------------------------------------------------------------------------------+
-        | `configurable_fields`            | Specify init args of the model that can be configured at runtime via the RunnableConfig.                  |
-        +----------------------------------+-----------------------------------------------------------------------------------------------------------+
-        | `configurable_alternatives`      | Specify alternative models which can be swapped in at runtime via the RunnableConfig.                     |
-        +----------------------------------+-----------------------------------------------------------------------------------------------------------+
+        Methods for creating another `Runnable` using the chat model.
         This table provides a brief overview of the main declarative methods. Please see the reference for each method for full documentation.
+        | Method                       | Description                                                                                |
+        | ---------------------------- | ------------------------------------------------------------------------------------------ |
+        | `bind_tools`                 | Create chat model that can call tools.                                                     |
+        | `with_structured_output`     | Create wrapper that structures model output using schema.                                  |
+        | `with_retry`                 | Create wrapper that retries model calls on failure.                                        |
+        | `with_fallbacks`             | Create wrapper that falls back to other models on failure.                                 |
+        | `configurable_fields`        | Specify init args of the model that can be configured at runtime via the `RunnableConfig`. |
+        | `configurable_alternatives`  | Specify alternative models which can be swapped in at runtime via the `RunnableConfig`.    |
     Creating custom chat model:
         Custom chat model implementations should inherit from this class.
         Please reference the table below for information about which
         methods and properties are required or optional for implementations.
-        +----------------------------------+--------------------------------------------------------------------+-------------------+
-        | Method/Property                  | Description                                                        | Required/Optional |
-        +==================================+====================================================================+===================+
+        | Method/Property                  | Description                                                        | Required          |
+        | -------------------------------- | ------------------------------------------------------------------ | ----------------- |
         | `_generate`                      | Use to generate a chat result from a prompt                        | Required          |
-        +----------------------------------+--------------------------------------------------------------------+-------------------+
         | `_llm_type` (property)           | Used to uniquely identify the type of the model. Used for logging. | Required          |
-        +----------------------------------+--------------------------------------------------------------------+-------------------+
         | `_identifying_params` (property) | Represent model parameterization for tracing purposes.             | Optional          |
-        +----------------------------------+--------------------------------------------------------------------+-------------------+
         | `_stream`                        | Use to implement streaming                                         | Optional          |
-        +----------------------------------+--------------------------------------------------------------------+-------------------+
         | `_agenerate`                     | Use to implement a native async method                             | Optional          |
-        +----------------------------------+--------------------------------------------------------------------+-------------------+
         | `_astream`                       | Use to implement async version of `_stream`                        | Optional          |
-        +----------------------------------+--------------------------------------------------------------------+-------------------+
-        Follow the guide for more information on how to implement a custom Chat Model:
-        [Guide](https://python.langchain.com/docs/how_to/custom_chat_model/).
     """  # noqa: E501
-    callback_manager: Optional[BaseCallbackManager] = deprecated(
-        name="callback_manager", since="0.1.7", removal="1.0", alternative="callbacks"
-    )(
-        Field(
-            default=None,
-            exclude=True,
-            description="Callback manager to add to the run trace.",
-        )
-    )
-    rate_limiter: Optional[BaseRateLimiter] = Field(default=None, exclude=True)
+    rate_limiter: BaseRateLimiter | None = Field(default=None, exclude=True)
     "An optional rate limiter to use for limiting the number of requests."
-    disable_streaming: Union[bool, Literal["tool_calling"]] = False
+    disable_streaming: bool | Literal["tool_calling"] = False
     """Whether to disable streaming for this model.
-    If streaming is bypassed, then ``stream()``/``astream()``/``astream_events()`` will
-    defer to ``invoke()``/``ainvoke()``.
+    If streaming is bypassed, then `stream`/`astream`/`astream_events` will
+    defer to `invoke`/`ainvoke`.
-    - If True, will always bypass streaming case.
-    - If ``'tool_calling'``, will bypass streaming case only when the model is called
-      with a ``tools`` keyword argument. In other words, LangChain will automatically
-      switch to non-streaming behavior (``invoke()``) only when the tools argument is
-      provided. This offers the best of both worlds.
-    - If False (default), will always use streaming case if available.
+    - If `True`, will always bypass streaming case.
+    - If `'tool_calling'`, will bypass streaming case only when the model is called
+        with a `tools` keyword argument. In other words, LangChain will automatically
+        switch to non-streaming behavior (`invoke`) only when the tools argument is
+        provided. This offers the best of both worlds.
+    - If `False` (Default), will always use streaming case if available.
-    The main reason for this flag is that code might be written using ``.stream()`` and
+    The main reason for this flag is that code might be written using `stream` and
     a user may want to swap out a given model for another model whose the implementation
     does not properly support streaming.
     """
-    @model_validator(mode="before")
-    @classmethod
-    def raise_deprecation(cls, values: dict) -> Any:
-        """Raise deprecation warning if callback_manager is used.
+    output_version: str | None = Field(
+        default_factory=from_env("LC_OUTPUT_VERSION", default=None)
+    )
+    """Version of `AIMessage` output format to store in message content.
-        Args:
-            values (Dict): Values to validate.
+    `AIMessage.content_blocks` will lazily parse the contents of `content` into a
+    standard format. This flag can be used to additionally store the standard format
+    in message content, e.g., for serialization purposes.
-        Returns:
-            Dict: Validated values.
+    Supported values:
-        Raises:
-            DeprecationWarning: If callback_manager is used.
-        """
-        if values.get("callback_manager") is not None:
-            warnings.warn(
-                "callback_manager is deprecated. Please use callbacks instead.",
-                DeprecationWarning,
-                stacklevel=5,
-            )
-            values["callbacks"] = values.pop("callback_manager", None)
-        return values
+    - `'v0'`: provider-specific format in content (can lazily-parse with
+        `content_blocks`)
+    - `'v1'`: standardized format in content (consistent with `content_blocks`)
+    Partner packages (e.g.,
+    [`langchain-openai`](https://pypi.org/project/langchain-openai)) can also use this
+    field to roll out new content formats in a backward-compatible way.
+    !!! version-added "Added in version 1.0"
+    """
     model_config = ConfigDict(
         arbitrary_types_allowed=True,
@@ -352,7 +346,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     @property
     @override
     def OutputType(self) -> Any:
-        """Get the output type for this runnable."""
+        """Get the output type for this `Runnable`."""
         return AnyMessage
     def _convert_input(self, model_input: LanguageModelInput) -> PromptValue:
@@ -372,35 +366,38 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     def invoke(
         self,
         input: LanguageModelInput,
-        config: Optional[RunnableConfig] = None,
+        config: RunnableConfig | None = None,
         *,
-        stop: Optional[list[str]] = None,
+        stop: list[str] | None = None,
         **kwargs: Any,
-    ) -> BaseMessage:
+    ) -> AIMessage:
         config = ensure_config(config)
         return cast(
-            "ChatGeneration",
-            self.generate_prompt(
-                [self._convert_input(input)],
-                stop=stop,
-                callbacks=config.get("callbacks"),
-                tags=config.get("tags"),
-                metadata=config.get("metadata"),
-                run_name=config.get("run_name"),
-                run_id=config.pop("run_id", None),
-                **kwargs,
-            ).generations[0][0],
-        ).message
+            "AIMessage",
+            cast(
+                "ChatGeneration",
+                self.generate_prompt(
+                    [self._convert_input(input)],
+                    stop=stop,
+                    callbacks=config.get("callbacks"),
+                    tags=config.get("tags"),
+                    metadata=config.get("metadata"),
+                    run_name=config.get("run_name"),
+                    run_id=config.pop("run_id", None),
+                    **kwargs,
+                ).generations[0][0],
+            ).message,
+        )
     @override
     async def ainvoke(
         self,
         input: LanguageModelInput,
-        config: Optional[RunnableConfig] = None,
+        config: RunnableConfig | None = None,
         *,
-        stop: Optional[list[str]] = None,
+        stop: list[str] | None = None,
         **kwargs: Any,
-    ) -> BaseMessage:
+    ) -> AIMessage:
         config = ensure_config(config)
         llm_result = await self.agenerate_prompt(
             [self._convert_input(input)],
@@ -412,15 +409,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
             run_id=config.pop("run_id", None),
             **kwargs,
         )
-        return cast("ChatGeneration", llm_result.generations[0][0]).message
+        return cast(
+            "AIMessage", cast("ChatGeneration", llm_result.generations[0][0]).message
+        )
     def _should_stream(
         self,
         *,
         async_api: bool,
-        run_manager: Optional[
-            Union[CallbackManagerForLLMRun, AsyncCallbackManagerForLLMRun]
-        ] = None,
+        run_manager: CallbackManagerForLLMRun
+        | AsyncCallbackManagerForLLMRun
+        | None = None,
         **kwargs: Any,
     ) -> bool:
         """Determine if a given model call should hit the streaming API."""
@@ -445,6 +444,11 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         if "stream" in kwargs:
             return kwargs["stream"]
+        if "streaming" in self.model_fields_set:
+            streaming_value = getattr(self, "streaming", None)
+            if isinstance(streaming_value, bool):
+                return streaming_value
         # Check if any streaming callback handlers have been passed in.
         handlers = run_manager.handlers if run_manager else []
         return any(isinstance(h, _StreamingCallbackHandler) for h in handlers)
@@ -453,15 +457,15 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     def stream(
         self,
         input: LanguageModelInput,
-        config: Optional[RunnableConfig] = None,
+        config: RunnableConfig | None = None,
         *,
-        stop: Optional[list[str]] = None,
+        stop: list[str] | None = None,
         **kwargs: Any,
-    ) -> Iterator[BaseMessageChunk]:
+    ) -> Iterator[AIMessageChunk]:
         if not self._should_stream(async_api=False, **{**kwargs, "stream": True}):
-            # model doesn't implement streaming, so use default implementation
+            # Model doesn't implement streaming, so use default implementation
             yield cast(
-                "BaseMessageChunk",
+                "AIMessageChunk",
                 self.invoke(input, config=config, stop=stop, **kwargs),
             )
         else:
@@ -506,16 +510,51 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
             try:
                 input_messages = _normalize_messages(messages)
-                run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id)))
+                run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))
+                yielded = False
+                index = -1
+                index_type = ""
                 for chunk in self._stream(input_messages, stop=stop, **kwargs):
                     if chunk.message.id is None:
                         chunk.message.id = run_id
                     chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
+                    if self.output_version == "v1":
+                        # Overwrite .content with .content_blocks
+                        chunk.message = _update_message_content_to_blocks(
+                            chunk.message, "v1"
+                        )
+                        for block in cast(
+                            "list[types.ContentBlock]", chunk.message.content
+                        ):
+                            if block["type"] != index_type:
+                                index_type = block["type"]
+                                index = index + 1
+                            if "index" not in block:
+                                block["index"] = index
                     run_manager.on_llm_new_token(
                         cast("str", chunk.message.content), chunk=chunk
                     )
                     chunks.append(chunk)
-                    yield chunk.message
+                    yield cast("AIMessageChunk", chunk.message)
+                    yielded = True
+                # Yield a final empty chunk with chunk_position="last" if not yet
+                # yielded
+                if (
+                    yielded
+                    and isinstance(chunk.message, AIMessageChunk)
+                    and not chunk.message.chunk_position
+                ):
+                    empty_content: str | list = (
+                        "" if isinstance(chunk.message.content, str) else []
+                    )
+                    msg_chunk = AIMessageChunk(
+                        content=empty_content, chunk_position="last", id=run_id
+                    )
+                    run_manager.on_llm_new_token(
+                        "", chunk=ChatGenerationChunk(message=msg_chunk)
+                    )
+                    yield msg_chunk
             except BaseException as e:
                 generations_with_error_metadata = _generate_response_from_error(e)
                 chat_generation_chunk = merge_chat_generation_chunks(chunks)
@@ -528,7 +567,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                     generations = [generations_with_error_metadata]
                 run_manager.on_llm_error(
                     e,
-                    response=LLMResult(generations=generations),  # type: ignore[arg-type]
+                    response=LLMResult(generations=generations),
                 )
                 raise
@@ -544,15 +583,15 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     async def astream(
         self,
         input: LanguageModelInput,
-        config: Optional[RunnableConfig] = None,
+        config: RunnableConfig | None = None,
         *,
-        stop: Optional[list[str]] = None,
+        stop: list[str] | None = None,
         **kwargs: Any,
-    ) -> AsyncIterator[BaseMessageChunk]:
+    ) -> AsyncIterator[AIMessageChunk]:
         if not self._should_stream(async_api=True, **{**kwargs, "stream": True}):
             # No async or sync stream is implemented, so fall back to ainvoke
             yield cast(
-                "BaseMessageChunk",
+                "AIMessageChunk",
                 await self.ainvoke(input, config=config, stop=stop, **kwargs),
             )
             return
@@ -599,7 +638,10 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         try:
             input_messages = _normalize_messages(messages)
-            run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id)))
+            run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))
+            yielded = False
+            index = -1
+            index_type = ""
             async for chunk in self._astream(
                 input_messages,
                 stop=stop,
@@ -608,11 +650,42 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                 if chunk.message.id is None:
                     chunk.message.id = run_id
                 chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
+                if self.output_version == "v1":
+                    # Overwrite .content with .content_blocks
+                    chunk.message = _update_message_content_to_blocks(
+                        chunk.message, "v1"
+                    )
+                    for block in cast(
+                        "list[types.ContentBlock]", chunk.message.content
+                    ):
+                        if block["type"] != index_type:
+                            index_type = block["type"]
+                            index = index + 1
+                        if "index" not in block:
+                            block["index"] = index
                 await run_manager.on_llm_new_token(
                     cast("str", chunk.message.content), chunk=chunk
                 )
                 chunks.append(chunk)
-                yield chunk.message
+                yield cast("AIMessageChunk", chunk.message)
+                yielded = True
+            # Yield a final empty chunk with chunk_position="last" if not yet yielded
+            if (
+                yielded
+                and isinstance(chunk.message, AIMessageChunk)
+                and not chunk.message.chunk_position
+            ):
+                empty_content: str | list = (
+                    "" if isinstance(chunk.message.content, str) else []
+                )
+                msg_chunk = AIMessageChunk(
+                    content=empty_content, chunk_position="last", id=run_id
+                )
+                await run_manager.on_llm_new_token(
+                    "", chunk=ChatGenerationChunk(message=msg_chunk)
+                )
+                yield msg_chunk
         except BaseException as e:
             generations_with_error_metadata = _generate_response_from_error(e)
             chat_generation_chunk = merge_chat_generation_chunks(chunks)
@@ -622,7 +695,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                 generations = [generations_with_error_metadata]
             await run_manager.on_llm_error(
                 e,
-                response=LLMResult(generations=generations),  # type: ignore[arg-type]
+                response=LLMResult(generations=generations),
             )
             raise
@@ -638,7 +711,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     # --- Custom methods ---
-    def _combine_llm_outputs(self, llm_outputs: list[Optional[dict]]) -> dict:  # noqa: ARG002
+    def _combine_llm_outputs(self, llm_outputs: list[dict | None]) -> dict:  # noqa: ARG002
         return {}
     def _convert_cached_generations(self, cache_val: list) -> list[ChatGeneration]:
@@ -653,6 +726,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         Returns:
             List of ChatGeneration objects.
         """
         converted_generations = []
         for gen in cache_val:
@@ -666,12 +740,22 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                 converted_generations.append(chat_gen)
             else:
                 # Already a ChatGeneration or other expected type
+                if hasattr(gen, "message") and isinstance(gen.message, AIMessage):
+                    # We zero out cost on cache hits
+                    gen.message = gen.message.model_copy(
+                        update={
+                            "usage_metadata": {
+                                **(gen.message.usage_metadata or {}),
+                                "total_cost": 0,
+                            }
+                        }
+                    )
                 converted_generations.append(gen)
         return converted_generations
     def _get_invocation_params(
         self,
-        stop: Optional[list[str]] = None,
+        stop: list[str] | None = None,
         **kwargs: Any,
     ) -> dict:
         params = self.dict()
@@ -680,7 +764,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     def _get_ls_params(
         self,
-        stop: Optional[list[str]] = None,
+        stop: list[str] | None = None,
         **kwargs: Any,
     ) -> LangSmithParams:
         """Get standard params for tracing."""
@@ -697,7 +781,9 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
             ls_params["ls_stop"] = stop
         # model
-        if hasattr(self, "model") and isinstance(self.model, str):
+        if "model" in kwargs and isinstance(kwargs["model"], str):
+            ls_params["ls_model_name"] = kwargs["model"]
+        elif hasattr(self, "model") and isinstance(self.model, str):
             ls_params["ls_model_name"] = self.model
         elif hasattr(self, "model_name") and isinstance(self.model_name, str):
             ls_params["ls_model_name"] = self.model_name
@@ -716,7 +802,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         return ls_params
-    def _get_llm_string(self, stop: Optional[list[str]] = None, **kwargs: Any) -> str:
+    def _get_llm_string(self, stop: list[str] | None = None, **kwargs: Any) -> str:
         if self.is_lc_serializable():
             params = {**kwargs, "stop": stop}
             param_string = str(sorted(params.items()))
@@ -733,13 +819,13 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     def generate(
         self,
         messages: list[list[BaseMessage]],
-        stop: Optional[list[str]] = None,
+        stop: list[str] | None = None,
         callbacks: Callbacks = None,
         *,
-        tags: Optional[list[str]] = None,
-        metadata: Optional[dict[str, Any]] = None,
-        run_name: Optional[str] = None,
-        run_id: Optional[uuid.UUID] = None,
+        tags: list[str] | None = None,
+        metadata: dict[str, Any] | None = None,
+        run_name: str | None = None,
+        run_id: uuid.UUID | None = None,
         **kwargs: Any,
     ) -> LLMResult:
         """Pass a sequence of prompts to the model and return model generations.
@@ -748,16 +834,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         API.
         Use this method when you want to:
-            1. take advantage of batched calls,
-            2. need more output from the model than just the top generated value,
-            3. are building chains that are agnostic to the underlying language model
-                type (e.g., pure text completion models vs chat models).
+        1. Take advantage of batched calls,
+        2. Need more output from the model than just the top generated value,
+        3. Are building chains that are agnostic to the underlying language model
+            type (e.g., pure text completion models vs chat models).
         Args:
             messages: List of list of messages.
             stop: Stop words to use when generating. Model output is cut off at the
                 first occurrence of any of these substrings.
-            callbacks: Callbacks to pass through. Used for executing additional
+            callbacks: `Callbacks` to pass through. Used for executing additional
                 functionality, such as logging or streaming, throughout generation.
             tags: The tags to apply.
             metadata: The metadata to apply.
@@ -767,8 +854,9 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                 to the model provider API call.
         Returns:
-            An LLMResult, which contains a list of candidate Generations for each input
-                prompt and additional model provider-specific output.
+            An `LLMResult`, which contains a list of candidate `Generations` for each
+                input prompt and additional model provider-specific output.
         """
         ls_structured_output_format = kwargs.pop(
             "ls_structured_output_format", None
@@ -825,20 +913,22 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                     run_managers[i].on_llm_error(
                         e,
                         response=LLMResult(
-                            generations=[generations_with_error_metadata]  # type: ignore[list-item]
+                            generations=[generations_with_error_metadata]
                         ),
                     )
                 raise
         flattened_outputs = [
-            LLMResult(generations=[res.generations], llm_output=res.llm_output)  # type: ignore[list-item]
+            LLMResult(generations=[res.generations], llm_output=res.llm_output)
             for res in results
         ]
         llm_output = self._combine_llm_outputs([res.llm_output for res in results])
         generations = [res.generations for res in results]
-        output = LLMResult(generations=generations, llm_output=llm_output)  # type: ignore[arg-type]
+        output = LLMResult(generations=generations, llm_output=llm_output)
         if run_managers:
             run_infos = []
-            for manager, flattened_output in zip(run_managers, flattened_outputs):
+            for manager, flattened_output in zip(
+                run_managers, flattened_outputs, strict=False
+            ):
                 manager.on_llm_end(flattened_output)
                 run_infos.append(RunInfo(run_id=manager.run_id))
             output.run = run_infos
@@ -847,13 +937,13 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     async def agenerate(
         self,
         messages: list[list[BaseMessage]],
-        stop: Optional[list[str]] = None,
+        stop: list[str] | None = None,
         callbacks: Callbacks = None,
         *,
-        tags: Optional[list[str]] = None,
-        metadata: Optional[dict[str, Any]] = None,
-        run_name: Optional[str] = None,
-        run_id: Optional[uuid.UUID] = None,
+        tags: list[str] | None = None,
+        metadata: dict[str, Any] | None = None,
+        run_name: str | None = None,
+        run_id: uuid.UUID | None = None,
         **kwargs: Any,
     ) -> LLMResult:
         """Asynchronously pass a sequence of prompts to a model and return generations.
@@ -862,16 +952,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         API.
         Use this method when you want to:
-            1. take advantage of batched calls,
-            2. need more output from the model than just the top generated value,
-            3. are building chains that are agnostic to the underlying language model
-                type (e.g., pure text completion models vs chat models).
+        1. Take advantage of batched calls,
+        2. Need more output from the model than just the top generated value,
+        3. Are building chains that are agnostic to the underlying language model
+            type (e.g., pure text completion models vs chat models).
         Args:
             messages: List of list of messages.
             stop: Stop words to use when generating. Model output is cut off at the
                 first occurrence of any of these substrings.
-            callbacks: Callbacks to pass through. Used for executing additional
+            callbacks: `Callbacks` to pass through. Used for executing additional
                 functionality, such as logging or streaming, throughout generation.
             tags: The tags to apply.
             metadata: The metadata to apply.
@@ -881,8 +972,9 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                 to the model provider API call.
         Returns:
-            An LLMResult, which contains a list of candidate Generations for each input
-                prompt and additional model provider-specific output.
+            An `LLMResult`, which contains a list of candidate `Generations` for each
+                input prompt and additional model provider-specific output.
         """
         ls_structured_output_format = kwargs.pop(
             "ls_structured_output_format", None
@@ -944,7 +1036,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                     await run_managers[i].on_llm_error(
                         res,
                         response=LLMResult(
-                            generations=[generations_with_error_metadata]  # type: ignore[list-item]
+                            generations=[generations_with_error_metadata]
                         ),
                     )
                 exceptions.append(res)
@@ -954,27 +1046,27 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                     *[
                         run_manager.on_llm_end(
                             LLMResult(
-                                generations=[res.generations],  # type: ignore[list-item, union-attr]
+                                generations=[res.generations],  # type: ignore[union-attr]
                                 llm_output=res.llm_output,  # type: ignore[union-attr]
                             )
                         )
-                        for run_manager, res in zip(run_managers, results)
+                        for run_manager, res in zip(run_managers, results, strict=False)
                         if not isinstance(res, Exception)
                     ]
                 )
             raise exceptions[0]
         flattened_outputs = [
-            LLMResult(generations=[res.generations], llm_output=res.llm_output)  # type: ignore[list-item, union-attr]
+            LLMResult(generations=[res.generations], llm_output=res.llm_output)  # type: ignore[union-attr]
             for res in results
         ]
         llm_output = self._combine_llm_outputs([res.llm_output for res in results])  # type: ignore[union-attr]
         generations = [res.generations for res in results]  # type: ignore[union-attr]
-        output = LLMResult(generations=generations, llm_output=llm_output)  # type: ignore[arg-type]
+        output = LLMResult(generations=generations, llm_output=llm_output)
         await asyncio.gather(
             *[
                 run_manager.on_llm_end(flattened_output)
                 for run_manager, flattened_output in zip(
-                    run_managers, flattened_outputs
+                    run_managers, flattened_outputs, strict=False
                 )
             ]
         )
@@ -988,7 +1080,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     def generate_prompt(
         self,
         prompts: list[PromptValue],
-        stop: Optional[list[str]] = None,
+        stop: list[str] | None = None,
         callbacks: Callbacks = None,
         **kwargs: Any,
     ) -> LLMResult:
@@ -999,7 +1091,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     async def agenerate_prompt(
         self,
         prompts: list[PromptValue],
-        stop: Optional[list[str]] = None,
+        stop: list[str] | None = None,
         callbacks: Callbacks = None,
         **kwargs: Any,
     ) -> LLMResult:
@@ -1011,8 +1103,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     def _generate_with_cache(
         self,
         messages: list[BaseMessage],
-        stop: Optional[list[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        stop: list[str] | None = None,
+        run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
         llm_cache = self.cache if isinstance(self.cache, BaseCache) else get_llm_cache()
@@ -1048,15 +1140,53 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
             **kwargs,
         ):
             chunks: list[ChatGenerationChunk] = []
+            run_id: str | None = (
+                f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None
+            )
+            yielded = False
+            index = -1
+            index_type = ""
             for chunk in self._stream(messages, stop=stop, **kwargs):
                 chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
+                if self.output_version == "v1":
+                    # Overwrite .content with .content_blocks
+                    chunk.message = _update_message_content_to_blocks(
+                        chunk.message, "v1"
+                    )
+                    for block in cast(
+                        "list[types.ContentBlock]", chunk.message.content
+                    ):
+                        if block["type"] != index_type:
+                            index_type = block["type"]
+                            index = index + 1
+                        if "index" not in block:
+                            block["index"] = index
                 if run_manager:
                     if chunk.message.id is None:
-                        chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}"
+                        chunk.message.id = run_id
                     run_manager.on_llm_new_token(
                         cast("str", chunk.message.content), chunk=chunk
                     )
                 chunks.append(chunk)
+                yielded = True
+            # Yield a final empty chunk with chunk_position="last" if not yet yielded
+            if (
+                yielded
+                and isinstance(chunk.message, AIMessageChunk)
+                and not chunk.message.chunk_position
+            ):
+                empty_content: str | list = (
+                    "" if isinstance(chunk.message.content, str) else []
+                )
+                chunk = ChatGenerationChunk(
+                    message=AIMessageChunk(
+                        content=empty_content, chunk_position="last", id=run_id
+                    )
+                )
+                if run_manager:
+                    run_manager.on_llm_new_token("", chunk=chunk)
+                chunks.append(chunk)
             result = generate_from_stream(iter(chunks))
         elif inspect.signature(self._generate).parameters.get("run_manager"):
             result = self._generate(
@@ -1065,10 +1195,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         else:
             result = self._generate(messages, stop=stop, **kwargs)
+        if self.output_version == "v1":
+            # Overwrite .content with .content_blocks
+            for generation in result.generations:
+                generation.message = _update_message_content_to_blocks(
+                    generation.message, "v1"
+                )
         # Add response metadata to each generation
         for idx, generation in enumerate(result.generations):
             if run_manager and generation.message.id is None:
-                generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
+                generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
             generation.message.response_metadata = _gen_info_and_msg_metadata(
                 generation
             )
@@ -1084,8 +1221,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     async def _agenerate_with_cache(
         self,
         messages: list[BaseMessage],
-        stop: Optional[list[str]] = None,
-        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        stop: list[str] | None = None,
+        run_manager: AsyncCallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
         llm_cache = self.cache if isinstance(self.cache, BaseCache) else get_llm_cache()
@@ -1121,15 +1258,53 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
             **kwargs,
         ):
             chunks: list[ChatGenerationChunk] = []
+            run_id: str | None = (
+                f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None
+            )
+            yielded = False
+            index = -1
+            index_type = ""
             async for chunk in self._astream(messages, stop=stop, **kwargs):
                 chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
+                if self.output_version == "v1":
+                    # Overwrite .content with .content_blocks
+                    chunk.message = _update_message_content_to_blocks(
+                        chunk.message, "v1"
+                    )
+                    for block in cast(
+                        "list[types.ContentBlock]", chunk.message.content
+                    ):
+                        if block["type"] != index_type:
+                            index_type = block["type"]
+                            index = index + 1
+                        if "index" not in block:
+                            block["index"] = index
                 if run_manager:
                     if chunk.message.id is None:
-                        chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}"
+                        chunk.message.id = run_id
                     await run_manager.on_llm_new_token(
                         cast("str", chunk.message.content), chunk=chunk
                     )
                 chunks.append(chunk)
+                yielded = True
+            # Yield a final empty chunk with chunk_position="last" if not yet yielded
+            if (
+                yielded
+                and isinstance(chunk.message, AIMessageChunk)
+                and not chunk.message.chunk_position
+            ):
+                empty_content: str | list = (
+                    "" if isinstance(chunk.message.content, str) else []
+                )
+                chunk = ChatGenerationChunk(
+                    message=AIMessageChunk(
+                        content=empty_content, chunk_position="last", id=run_id
+                    )
+                )
+                if run_manager:
+                    await run_manager.on_llm_new_token("", chunk=chunk)
+                chunks.append(chunk)
             result = generate_from_stream(iter(chunks))
         elif inspect.signature(self._agenerate).parameters.get("run_manager"):
             result = await self._agenerate(
@@ -1138,10 +1313,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         else:
             result = await self._agenerate(messages, stop=stop, **kwargs)
+        if self.output_version == "v1":
+            # Overwrite .content with .content_blocks
+            for generation in result.generations:
+                generation.message = _update_message_content_to_blocks(
+                    generation.message, "v1"
+                )
         # Add response metadata to each generation
         for idx, generation in enumerate(result.generations):
             if run_manager and generation.message.id is None:
-                generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
+                generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
             generation.message.response_metadata = _gen_info_and_msg_metadata(
                 generation
             )
@@ -1158,20 +1340,40 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     def _generate(
         self,
         messages: list[BaseMessage],
-        stop: Optional[list[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        stop: list[str] | None = None,
+        run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
-        """Top Level call."""
+        """Generate the result.
+        Args:
+            messages: The messages to generate from.
+            stop: Optional list of stop words to use when generating.
+            run_manager: Optional callback manager to use for this call.
+            **kwargs: Additional keyword arguments to pass to the model.
+        Returns:
+            The chat result.
+        """
     async def _agenerate(
         self,
         messages: list[BaseMessage],
-        stop: Optional[list[str]] = None,
-        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        stop: list[str] | None = None,
+        run_manager: AsyncCallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
-        """Top Level call."""
+        """Generate the result.
+        Args:
+            messages: The messages to generate from.
+            stop: Optional list of stop words to use when generating.
+            run_manager: Optional callback manager to use for this call.
+            **kwargs: Additional keyword arguments to pass to the model.
+        Returns:
+            The chat result.
+        """
         return await run_in_executor(
             None,
             self._generate,
@@ -1184,19 +1386,41 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     def _stream(
         self,
         messages: list[BaseMessage],
-        stop: Optional[list[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        stop: list[str] | None = None,
+        run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> Iterator[ChatGenerationChunk]:
+        """Stream the output of the model.
+        Args:
+            messages: The messages to generate from.
+            stop: Optional list of stop words to use when generating.
+            run_manager: Optional callback manager to use for this call.
+            **kwargs: Additional keyword arguments to pass to the model.
+        Yields:
+            The chat generation chunks.
+        """
         raise NotImplementedError
     async def _astream(
         self,
         messages: list[BaseMessage],
-        stop: Optional[list[str]] = None,
-        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        stop: list[str] | None = None,
+        run_manager: AsyncCallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> AsyncIterator[ChatGenerationChunk]:
+        """Stream the output of the model.
+        Args:
+            messages: The messages to generate from.
+            stop: Optional list of stop words to use when generating.
+            run_manager: Optional callback manager to use for this call.
+            **kwargs: Additional keyword arguments to pass to the model.
+        Yields:
+            The chat generation chunks.
+        """
         iterator = await run_in_executor(
             None,
             self._stream,
@@ -1217,40 +1441,10 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                 break
             yield item  # type: ignore[misc]
-    @deprecated("0.1.7", alternative="invoke", removal="1.0")
-    def __call__(
-        self,
-        messages: list[BaseMessage],
-        stop: Optional[list[str]] = None,
-        callbacks: Callbacks = None,
-        **kwargs: Any,
-    ) -> BaseMessage:
-        """Call the model.
-        Args:
-            messages: List of messages.
-            stop: Stop words to use when generating. Model output is cut off at the
-                first occurrence of any of these substrings.
-            callbacks: Callbacks to pass through. Used for executing additional
-                functionality, such as logging or streaming, throughout generation.
-            **kwargs: Arbitrary additional keyword arguments. These are usually passed
-                to the model provider API call.
-        Returns:
-            The model output message.
-        """
-        generation = self.generate(
-            [messages], stop=stop, callbacks=callbacks, **kwargs
-        ).generations[0][0]
-        if isinstance(generation, ChatGeneration):
-            return generation.message
-        msg = "Unexpected generation type"
-        raise ValueError(msg)
     async def _call_async(
         self,
         messages: list[BaseMessage],
-        stop: Optional[list[str]] = None,
+        stop: list[str] | None = None,
         callbacks: Callbacks = None,
         **kwargs: Any,
     ) -> BaseMessage:
@@ -1263,86 +1457,6 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         msg = "Unexpected generation type"
         raise ValueError(msg)
-    @deprecated("0.1.7", alternative="invoke", removal="1.0")
-    def call_as_llm(
-        self, message: str, stop: Optional[list[str]] = None, **kwargs: Any
-    ) -> str:
-        """Call the model.
-        Args:
-            message: The input message.
-            stop: Stop words to use when generating. Model output is cut off at the
-                first occurrence of any of these substrings.
-            **kwargs: Arbitrary additional keyword arguments. These are usually passed
-                to the model provider API call.
-        Returns:
-            The model output string.
-        """
-        return self.predict(message, stop=stop, **kwargs)
-    @deprecated("0.1.7", alternative="invoke", removal="1.0")
-    @override
-    def predict(
-        self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
-    ) -> str:
-        """Predict the next message.
-        Args:
-            text: The input message.
-            stop: Stop words to use when generating. Model output is cut off at the
-                first occurrence of any of these substrings.
-            **kwargs: Arbitrary additional keyword arguments. These are usually passed
-                to the model provider API call.
-        Returns:
-            The predicted output string.
-        """
-        stop_ = None if stop is None else list(stop)
-        result = self([HumanMessage(content=text)], stop=stop_, **kwargs)
-        if isinstance(result.content, str):
-            return result.content
-        msg = "Cannot use predict when output is not a string."
-        raise ValueError(msg)
-    @deprecated("0.1.7", alternative="invoke", removal="1.0")
-    @override
-    def predict_messages(
-        self,
-        messages: list[BaseMessage],
-        *,
-        stop: Optional[Sequence[str]] = None,
-        **kwargs: Any,
-    ) -> BaseMessage:
-        stop_ = None if stop is None else list(stop)
-        return self(messages, stop=stop_, **kwargs)
-    @deprecated("0.1.7", alternative="ainvoke", removal="1.0")
-    @override
-    async def apredict(
-        self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
-    ) -> str:
-        stop_ = None if stop is None else list(stop)
-        result = await self._call_async(
-            [HumanMessage(content=text)], stop=stop_, **kwargs
-        )
-        if isinstance(result.content, str):
-            return result.content
-        msg = "Cannot use predict when output is not a string."
-        raise ValueError(msg)
-    @deprecated("0.1.7", alternative="ainvoke", removal="1.0")
-    @override
-    async def apredict_messages(
-        self,
-        messages: list[BaseMessage],
-        *,
-        stop: Optional[Sequence[str]] = None,
-        **kwargs: Any,
-    ) -> BaseMessage:
-        stop_ = None if stop is None else list(stop)
-        return await self._call_async(messages, stop=stop_, **kwargs)
     @property
     @abstractmethod
     def _llm_type(self) -> str:
@@ -1358,12 +1472,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     def bind_tools(
         self,
         tools: Sequence[
-            Union[typing.Dict[str, Any], type, Callable, BaseTool]  # noqa: UP006
+            typing.Dict[str, Any] | type | Callable | BaseTool  # noqa: UP006
         ],
         *,
-        tool_choice: Optional[Union[str]] = None,
+        tool_choice: str | None = None,
         **kwargs: Any,
-    ) -> Runnable[LanguageModelInput, BaseMessage]:
+    ) -> Runnable[LanguageModelInput, AIMessage]:
         """Bind tools to the model.
         Args:
@@ -1372,16 +1486,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         Returns:
             A Runnable that returns a message.
         """
         raise NotImplementedError
     def with_structured_output(
         self,
-        schema: Union[typing.Dict, type],  # noqa: UP006
+        schema: typing.Dict | type,  # noqa: UP006
         *,
         include_raw: bool = False,
         **kwargs: Any,
-    ) -> Runnable[LanguageModelInput, Union[typing.Dict, BaseModel]]:  # noqa: UP006
+    ) -> Runnable[LanguageModelInput, typing.Dict | BaseModel]:  # noqa: UP006
         """Model wrapper that returns outputs formatted to match the given schema.
         Args:
@@ -1389,102 +1504,130 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                 - an OpenAI function/tool schema,
                 - a JSON Schema,
-                - a TypedDict class,
+                - a `TypedDict` class,
                 - or a Pydantic class.
-                If ``schema`` is a Pydantic class then the model output will be a
+                If `schema` is a Pydantic class then the model output will be a
                 Pydantic instance of that class, and the model-generated fields will be
                 validated by the Pydantic class. Otherwise the model output will be a
-                dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`
-                for more on how to properly specify types and descriptions of
-                schema fields when specifying a Pydantic or TypedDict class.
+                dict and will not be validated.
+                See `langchain_core.utils.function_calling.convert_to_openai_tool` for
+                more on how to properly specify types and descriptions of schema fields
+                when specifying a Pydantic or `TypedDict` class.
             include_raw:
-                If False then only the parsed structured output is returned. If
-                an error occurs during model output parsing it will be raised. If True
-                then both the raw model response (a BaseMessage) and the parsed model
+                If `False` then only the parsed structured output is returned. If
+                an error occurs during model output parsing it will be raised. If `True`
+                then both the raw model response (a `BaseMessage`) and the parsed model
                 response will be returned. If an error occurs during output parsing it
-                will be caught and returned as well. The final output is always a dict
-                with keys ``'raw'``, ``'parsed'``, and ``'parsing_error'``.
+                will be caught and returned as well.
+                The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
+                `'parsing_error'`.
+        Raises:
+            ValueError: If there are any unsupported `kwargs`.
+            NotImplementedError: If the model does not implement
+                `with_structured_output()`.
         Returns:
-            A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
+            A `Runnable` that takes same inputs as a
+                `langchain_core.language_models.chat.BaseChatModel`. If `include_raw` is
+                `False` and `schema` is a Pydantic class, `Runnable` outputs an instance
+                of `schema` (i.e., a Pydantic object). Otherwise, if `include_raw` is
+                `False` then `Runnable` outputs a `dict`.
+                If `include_raw` is `True`, then `Runnable` outputs a `dict` with keys:
-            If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs
-            an instance of ``schema`` (i.e., a Pydantic object).
+                - `'raw'`: `BaseMessage`
+                - `'parsed'`: `None` if there was a parsing error, otherwise the type
+                    depends on the `schema` as described above.
+                - `'parsing_error'`: `BaseException | None`
-            Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
+        Example: Pydantic schema (`include_raw=False`):
-            If ``include_raw`` is True, then Runnable outputs a dict with keys:
+        ```python
+        from pydantic import BaseModel
-            - ``'raw'``: BaseMessage
-            - ``'parsed'``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
-            - ``'parsing_error'``: Optional[BaseException]
-        Example: Pydantic schema (include_raw=False):
-            .. code-block:: python
+        class AnswerWithJustification(BaseModel):
+            '''An answer to the user question along with justification for the answer.'''
-                from pydantic import BaseModel
+            answer: str
+            justification: str
-                class AnswerWithJustification(BaseModel):
-                    '''An answer to the user question along with justification for the answer.'''
-                    answer: str
-                    justification: str
-                llm = ChatModel(model="model-name", temperature=0)
-                structured_llm = llm.with_structured_output(AnswerWithJustification)
+        model = ChatModel(model="model-name", temperature=0)
+        structured_model = model.with_structured_output(AnswerWithJustification)
-                structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
+        structured_model.invoke(
+            "What weighs more a pound of bricks or a pound of feathers"
+        )
+        # -> AnswerWithJustification(
+        #     answer='They weigh the same',
+        #     justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
+        # )
+        ```
+        Example: Pydantic schema (`include_raw=True`):
+        ```python
+        from pydantic import BaseModel
+        class AnswerWithJustification(BaseModel):
+            '''An answer to the user question along with justification for the answer.'''
-                # -> AnswerWithJustification(
-                #     answer='They weigh the same',
-                #     justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
-                # )
+            answer: str
+            justification: str
-        Example: Pydantic schema (include_raw=True):
-            .. code-block:: python
-                from pydantic import BaseModel
+        model = ChatModel(model="model-name", temperature=0)
+        structured_model = model.with_structured_output(
+            AnswerWithJustification, include_raw=True
+        )
-                class AnswerWithJustification(BaseModel):
-                    '''An answer to the user question along with justification for the answer.'''
-                    answer: str
-                    justification: str
+        structured_model.invoke(
+            "What weighs more a pound of bricks or a pound of feathers"
+        )
+        # -> {
+        #     'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
+        #     'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
+        #     'parsing_error': None
+        # }
+        ```
-                llm = ChatModel(model="model-name", temperature=0)
-                structured_llm = llm.with_structured_output(AnswerWithJustification, include_raw=True)
+        Example: `dict` schema (`include_raw=False`):
-                structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
-                # -> {
-                #     'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
-                #     'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
-                #     'parsing_error': None
-                # }
+        ```python
+        from pydantic import BaseModel
+        from langchain_core.utils.function_calling import convert_to_openai_tool
-        Example: Dict schema (include_raw=False):
-            .. code-block:: python
-                from pydantic import BaseModel
-                from langchain_core.utils.function_calling import convert_to_openai_tool
+        class AnswerWithJustification(BaseModel):
+            '''An answer to the user question along with justification for the answer.'''
-                class AnswerWithJustification(BaseModel):
-                    '''An answer to the user question along with justification for the answer.'''
-                    answer: str
-                    justification: str
+            answer: str
+            justification: str
-                dict_schema = convert_to_openai_tool(AnswerWithJustification)
-                llm = ChatModel(model="model-name", temperature=0)
-                structured_llm = llm.with_structured_output(dict_schema)
-                structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
-                # -> {
-                #     'answer': 'They weigh the same',
-                #     'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
-                # }
+        dict_schema = convert_to_openai_tool(AnswerWithJustification)
+        model = ChatModel(model="model-name", temperature=0)
+        structured_model = model.with_structured_output(dict_schema)
-        .. versionchanged:: 0.2.26
+        structured_model.invoke(
+            "What weighs more a pound of bricks or a pound of feathers"
+        )
+        # -> {
+        #     'answer': 'They weigh the same',
+        #     'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
+        # }
+        ```
-                Added support for TypedDict class.
+        !!! warning "Behavior changed in 0.2.26"
+            Added support for TypedDict class.
         """  # noqa: E501
         _ = kwargs.pop("method", None)
@@ -1493,11 +1636,6 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
             msg = f"Received unsupported arguments {kwargs}"
             raise ValueError(msg)
-        from langchain_core.output_parsers.openai_tools import (
-            JsonOutputKeyToolsParser,
-            PydanticToolsParser,
-        )
         if type(self).bind_tools is BaseChatModel.bind_tools:
             msg = "with_structured_output is not implemented for this model."
             raise NotImplementedError(msg)
@@ -1534,15 +1672,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
 class SimpleChatModel(BaseChatModel):
     """Simplified implementation for a chat model to inherit from.
-    **Note** This implementation is primarily here for backwards compatibility.
-        For new implementations, please use `BaseChatModel` directly.
+    !!! note
+        This implementation is primarily here for backwards compatibility. For new
+        implementations, please use `BaseChatModel` directly.
     """
     def _generate(
         self,
         messages: list[BaseMessage],
-        stop: Optional[list[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        stop: list[str] | None = None,
+        run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
         output_str = self._call(messages, stop=stop, run_manager=run_manager, **kwargs)
@@ -1554,8 +1694,8 @@ class SimpleChatModel(BaseChatModel):
     def _call(
         self,
         messages: list[BaseMessage],
-        stop: Optional[list[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        stop: list[str] | None = None,
+        run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> str:
         """Simpler interface."""
@@ -1563,8 +1703,8 @@ class SimpleChatModel(BaseChatModel):
     async def _agenerate(
         self,
         messages: list[BaseMessage],
-        stop: Optional[list[str]] = None,
-        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        stop: list[str] | None = None,
+        run_manager: AsyncCallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
         return await run_in_executor(
@@ -1578,7 +1718,7 @@ class SimpleChatModel(BaseChatModel):
 def _gen_info_and_msg_metadata(
-    generation: Union[ChatGeneration, ChatGenerationChunk],
+    generation: ChatGeneration | ChatGenerationChunk,
 ) -> dict:
     return {
         **(generation.generation_info or {}),

langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

langchain-core 0.4.0.dev0py3-none-any.whl → 1.0.0py3-none-any.whl