PyPI - langchain-core - Versions diffs - 0.4.0.dev0__py3-none-any.whl → 1.0.0a2__py3-none-any.whl - Mend

langchain-core 0.4.0.dev0py3-none-any.whl → 1.0.0a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

langchain_core/_api/beta_decorator.py +2 -2
langchain_core/_api/deprecation.py +1 -1
langchain_core/beta/runnables/context.py +1 -1
langchain_core/callbacks/base.py +14 -23
langchain_core/callbacks/file.py +13 -2
langchain_core/callbacks/manager.py +74 -157
langchain_core/callbacks/streaming_stdout.py +3 -4
langchain_core/callbacks/usage.py +2 -12
langchain_core/chat_history.py +6 -6
langchain_core/documents/base.py +1 -1
langchain_core/documents/compressor.py +9 -6
langchain_core/indexing/base.py +2 -2
langchain_core/language_models/_utils.py +232 -101
langchain_core/language_models/base.py +35 -23
langchain_core/language_models/chat_models.py +248 -54
langchain_core/language_models/fake_chat_models.py +28 -81
langchain_core/load/dump.py +3 -4
langchain_core/messages/__init__.py +30 -24
langchain_core/messages/ai.py +188 -30
langchain_core/messages/base.py +164 -25
langchain_core/messages/block_translators/__init__.py +89 -0
langchain_core/messages/block_translators/anthropic.py +451 -0
langchain_core/messages/block_translators/bedrock.py +45 -0
langchain_core/messages/block_translators/bedrock_converse.py +47 -0
langchain_core/messages/block_translators/google_genai.py +45 -0
langchain_core/messages/block_translators/google_vertexai.py +47 -0
langchain_core/messages/block_translators/groq.py +45 -0
langchain_core/messages/block_translators/langchain_v0.py +164 -0
langchain_core/messages/block_translators/ollama.py +45 -0
langchain_core/messages/block_translators/openai.py +798 -0
langchain_core/messages/{content_blocks.py → content.py} +303 -278
langchain_core/messages/human.py +29 -9
langchain_core/messages/system.py +29 -9
langchain_core/messages/tool.py +94 -13
langchain_core/messages/utils.py +34 -234
langchain_core/output_parsers/base.py +14 -50
langchain_core/output_parsers/json.py +2 -5
langchain_core/output_parsers/list.py +2 -7
langchain_core/output_parsers/openai_functions.py +5 -28
langchain_core/output_parsers/openai_tools.py +49 -90
langchain_core/output_parsers/pydantic.py +2 -3
langchain_core/output_parsers/transform.py +12 -53
langchain_core/output_parsers/xml.py +9 -17
langchain_core/prompt_values.py +8 -112
langchain_core/prompts/chat.py +1 -3
langchain_core/runnables/base.py +500 -451
langchain_core/runnables/branch.py +1 -1
langchain_core/runnables/fallbacks.py +4 -4
langchain_core/runnables/history.py +1 -1
langchain_core/runnables/passthrough.py +3 -3
langchain_core/runnables/retry.py +1 -1
langchain_core/runnables/router.py +1 -1
langchain_core/structured_query.py +3 -7
langchain_core/tools/base.py +14 -41
langchain_core/tools/convert.py +2 -22
langchain_core/tools/retriever.py +1 -8
langchain_core/tools/structured.py +2 -10
langchain_core/tracers/_streaming.py +6 -7
langchain_core/tracers/base.py +7 -14
langchain_core/tracers/core.py +4 -27
langchain_core/tracers/event_stream.py +4 -15
langchain_core/tracers/langchain.py +3 -14
langchain_core/tracers/log_stream.py +2 -3
langchain_core/utils/_merge.py +45 -7
langchain_core/utils/function_calling.py +22 -9
langchain_core/utils/utils.py +29 -0
langchain_core/version.py +1 -1
{langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/METADATA +7 -9
{langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/RECORD +71 -64
langchain_core/v1/__init__.py +0 -1
langchain_core/v1/chat_models.py +0 -1047
langchain_core/v1/messages.py +0 -755
{langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/WHEEL +0 -0
{langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/entry_points.txt +0 -0

langchain_core/language_models/chat_models.py CHANGED Viewed

@@ -27,7 +27,10 @@ from langchain_core.callbacks import (
     Callbacks,
 )
 from langchain_core.globals import get_llm_cache
-from langchain_core.language_models._utils import _normalize_messages
+from langchain_core.language_models._utils import (
+    _normalize_messages,
+    _update_message_content_to_blocks,
+)
 from langchain_core.language_models.base import (
     BaseLanguageModel,
     LangSmithParams,
@@ -36,16 +39,18 @@ from langchain_core.language_models.base import (
 from langchain_core.load import dumpd, dumps
 from langchain_core.messages import (
     AIMessage,
+    AIMessageChunk,
     AnyMessage,
     BaseMessage,
-    BaseMessageChunk,
     HumanMessage,
     convert_to_messages,
-    convert_to_openai_image_block,
     is_data_content_block,
     message_chunk_to_message,
 )
-from langchain_core.messages.ai import _LC_ID_PREFIX
+from langchain_core.messages.block_translators.openai import (
+    convert_to_openai_data_block,
+    convert_to_openai_image_block,
+)
 from langchain_core.outputs import (
     ChatGeneration,
     ChatGenerationChunk,
@@ -65,6 +70,7 @@ from langchain_core.utils.function_calling import (
     convert_to_openai_tool,
 )
 from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass
+from langchain_core.utils.utils import LC_ID_PREFIX, from_env
 if TYPE_CHECKING:
     import uuid
@@ -78,6 +84,11 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
     if hasattr(error, "response"):
         response = error.response
         metadata: dict = {}
+        if hasattr(response, "json"):
+            try:
+                metadata["body"] = response.json()
+            except Exception:
+                metadata["body"] = getattr(response, "text", None)
         if hasattr(response, "headers"):
             try:
                 metadata["headers"] = dict(response.headers)
@@ -97,17 +108,18 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
 def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
-    """Format messages for tracing in on_chat_model_start.
+    """Format messages for tracing in ``on_chat_model_start``.
     - Update image content blocks to OpenAI Chat Completions format (backward
     compatibility).
-    - Add "type" key to content blocks that have a single key.
+    - Add ``type`` key to content blocks that have a single key.
     Args:
         messages: List of messages to format.
     Returns:
         List of messages formatted for tracing.
     """
     messages_to_trace = []
     for message in messages:
@@ -119,7 +131,7 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
                     if (
                         block.get("type") == "image"
                         and is_data_content_block(block)
-                        and block.get("source_type") != "id"
+                        and not ("file_id" in block or block.get("source_type") == "id")
                     ):
                         if message_to_trace is message:
                             # Shallow copy
@@ -129,6 +141,19 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
                         message_to_trace.content[idx] = (  # type: ignore[index]  # mypy confused by .model_copy
                             convert_to_openai_image_block(block)
                         )
+                    elif (
+                        block.get("type") == "file"
+                        and is_data_content_block(block)
+                        and "base64" in block
+                    ):
+                        if message_to_trace is message:
+                            # Shallow copy
+                            message_to_trace = message.model_copy()
+                            message_to_trace.content = list(message_to_trace.content)
+                        message_to_trace.content[idx] = convert_to_openai_data_block(  # type: ignore[index]
+                            block
+                        )
                     elif len(block) == 1 and "type" not in block:
                         # Tracing assumes all content blocks have a "type" key. Here
                         # we add this key if it is missing, and there's an obvious
@@ -153,10 +178,11 @@ def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:
     """Generate from a stream.
     Args:
-        stream: Iterator of ChatGenerationChunk.
+        stream: Iterator of ``ChatGenerationChunk``.
     Returns:
         ChatResult: Chat result.
     """
     generation = next(stream, None)
     if generation:
@@ -180,10 +206,11 @@ async def agenerate_from_stream(
     """Async generate from a stream.
     Args:
-        stream: Iterator of ChatGenerationChunk.
+        stream: Iterator of ``ChatGenerationChunk``.
     Returns:
         ChatResult: Chat result.
     """
     chunks = [chunk async for chunk in stream]
     return await run_in_executor(None, generate_from_stream, iter(chunks))
@@ -208,7 +235,7 @@ def _format_ls_structured_output(ls_structured_output_format: Optional[dict]) ->
     return ls_structured_output_format_dict
-class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
+class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
     """Base class for chat models.
     Key imperative methods:
@@ -311,15 +338,38 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
       provided. This offers the best of both worlds.
     - If False (default), will always use streaming case if available.
-    The main reason for this flag is that code might be written using ``.stream()`` and
+    The main reason for this flag is that code might be written using ``stream()`` and
     a user may want to swap out a given model for another model whose the implementation
     does not properly support streaming.
+    """
+    output_version: Optional[str] = Field(
+        default_factory=from_env("LC_OUTPUT_VERSION", default=None)
+    )
+    """Version of ``AIMessage`` output format to store in message content.
+    ``AIMessage.content_blocks`` will lazily parse the contents of ``content`` into a
+    standard format. This flag can be used to additionally store the standard format
+    in message content, e.g., for serialization purposes.
+    Supported values:
+    - ``"v0"``: provider-specific format in content (can lazily-parse with
+      ``.content_blocks``)
+    - ``"v1"``: standardized format in content (consistent with ``.content_blocks``)
+    Partner packages (e.g., ``langchain-openai``) can also use this field to roll out
+    new content formats in a backward-compatible way.
+    .. versionadded:: 1.0
     """
     @model_validator(mode="before")
     @classmethod
     def raise_deprecation(cls, values: dict) -> Any:
-        """Raise deprecation warning if callback_manager is used.
+        """Raise deprecation warning if ``callback_manager`` is used.
         Args:
             values (Dict): Values to validate.
@@ -328,7 +378,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
             Dict: Validated values.
         Raises:
-            DeprecationWarning: If callback_manager is used.
+            DeprecationWarning: If ``callback_manager`` is used.
         """
         if values.get("callback_manager") is not None:
             warnings.warn(
@@ -376,21 +427,24 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         *,
         stop: Optional[list[str]] = None,
         **kwargs: Any,
-    ) -> BaseMessage:
+    ) -> AIMessage:
         config = ensure_config(config)
         return cast(
-            "ChatGeneration",
-            self.generate_prompt(
-                [self._convert_input(input)],
-                stop=stop,
-                callbacks=config.get("callbacks"),
-                tags=config.get("tags"),
-                metadata=config.get("metadata"),
-                run_name=config.get("run_name"),
-                run_id=config.pop("run_id", None),
-                **kwargs,
-            ).generations[0][0],
-        ).message
+            "AIMessage",
+            cast(
+                "ChatGeneration",
+                self.generate_prompt(
+                    [self._convert_input(input)],
+                    stop=stop,
+                    callbacks=config.get("callbacks"),
+                    tags=config.get("tags"),
+                    metadata=config.get("metadata"),
+                    run_name=config.get("run_name"),
+                    run_id=config.pop("run_id", None),
+                    **kwargs,
+                ).generations[0][0],
+            ).message,
+        )
     @override
     async def ainvoke(
@@ -400,7 +454,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         *,
         stop: Optional[list[str]] = None,
         **kwargs: Any,
-    ) -> BaseMessage:
+    ) -> AIMessage:
         config = ensure_config(config)
         llm_result = await self.agenerate_prompt(
             [self._convert_input(input)],
@@ -412,7 +466,9 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
             run_id=config.pop("run_id", None),
             **kwargs,
         )
-        return cast("ChatGeneration", llm_result.generations[0][0]).message
+        return cast(
+            "AIMessage", cast("ChatGeneration", llm_result.generations[0][0]).message
+        )
     def _should_stream(
         self,
@@ -457,11 +513,11 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         *,
         stop: Optional[list[str]] = None,
         **kwargs: Any,
-    ) -> Iterator[BaseMessageChunk]:
+    ) -> Iterator[AIMessageChunk]:
         if not self._should_stream(async_api=False, **{**kwargs, "stream": True}):
             # model doesn't implement streaming, so use default implementation
             yield cast(
-                "BaseMessageChunk",
+                "AIMessageChunk",
                 self.invoke(input, config=config, stop=stop, **kwargs),
             )
         else:
@@ -506,16 +562,41 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
             try:
                 input_messages = _normalize_messages(messages)
-                run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id)))
+                run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))
+                yielded = False
                 for chunk in self._stream(input_messages, stop=stop, **kwargs):
                     if chunk.message.id is None:
                         chunk.message.id = run_id
                     chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
+                    if self.output_version == "v1":
+                        # Overwrite .content with .content_blocks
+                        chunk.message = _update_message_content_to_blocks(
+                            chunk.message, "v1"
+                        )
                     run_manager.on_llm_new_token(
                         cast("str", chunk.message.content), chunk=chunk
                     )
                     chunks.append(chunk)
-                    yield chunk.message
+                    yield cast("AIMessageChunk", chunk.message)
+                    yielded = True
+                # Yield a final empty chunk with chunk_position="last" if not yet
+                # yielded
+                if (
+                    yielded
+                    and isinstance(chunk.message, AIMessageChunk)
+                    and not chunk.message.chunk_position
+                ):
+                    empty_content: Union[str, list] = (
+                        "" if isinstance(chunk.message.content, str) else []
+                    )
+                    msg_chunk = AIMessageChunk(
+                        content=empty_content, chunk_position="last", id=run_id
+                    )
+                    run_manager.on_llm_new_token(
+                        "", chunk=ChatGenerationChunk(message=msg_chunk)
+                    )
+                    yield msg_chunk
             except BaseException as e:
                 generations_with_error_metadata = _generate_response_from_error(e)
                 chat_generation_chunk = merge_chat_generation_chunks(chunks)
@@ -528,7 +609,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                     generations = [generations_with_error_metadata]
                 run_manager.on_llm_error(
                     e,
-                    response=LLMResult(generations=generations),  # type: ignore[arg-type]
+                    response=LLMResult(generations=generations),
                 )
                 raise
@@ -548,11 +629,11 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         *,
         stop: Optional[list[str]] = None,
         **kwargs: Any,
-    ) -> AsyncIterator[BaseMessageChunk]:
+    ) -> AsyncIterator[AIMessageChunk]:
         if not self._should_stream(async_api=True, **{**kwargs, "stream": True}):
             # No async or sync stream is implemented, so fall back to ainvoke
             yield cast(
-                "BaseMessageChunk",
+                "AIMessageChunk",
                 await self.ainvoke(input, config=config, stop=stop, **kwargs),
             )
             return
@@ -599,7 +680,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         try:
             input_messages = _normalize_messages(messages)
-            run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id)))
+            run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))
+            yielded = False
             async for chunk in self._astream(
                 input_messages,
                 stop=stop,
@@ -608,11 +690,34 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                 if chunk.message.id is None:
                     chunk.message.id = run_id
                 chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
+                if self.output_version == "v1":
+                    # Overwrite .content with .content_blocks
+                    chunk.message = _update_message_content_to_blocks(
+                        chunk.message, "v1"
+                    )
                 await run_manager.on_llm_new_token(
                     cast("str", chunk.message.content), chunk=chunk
                 )
                 chunks.append(chunk)
-                yield chunk.message
+                yield cast("AIMessageChunk", chunk.message)
+                yielded = True
+            # Yield a final empty chunk with chunk_position="last" if not yet yielded
+            if (
+                yielded
+                and isinstance(chunk.message, AIMessageChunk)
+                and not chunk.message.chunk_position
+            ):
+                empty_content: Union[str, list] = (
+                    "" if isinstance(chunk.message.content, str) else []
+                )
+                msg_chunk = AIMessageChunk(
+                    content=empty_content, chunk_position="last", id=run_id
+                )
+                await run_manager.on_llm_new_token(
+                    "", chunk=ChatGenerationChunk(message=msg_chunk)
+                )
+                yield msg_chunk
         except BaseException as e:
             generations_with_error_metadata = _generate_response_from_error(e)
             chat_generation_chunk = merge_chat_generation_chunks(chunks)
@@ -622,7 +727,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                 generations = [generations_with_error_metadata]
             await run_manager.on_llm_error(
                 e,
-                response=LLMResult(generations=generations),  # type: ignore[arg-type]
+                response=LLMResult(generations=generations),
             )
             raise
@@ -653,6 +758,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         Returns:
             List of ChatGeneration objects.
         """
         converted_generations = []
         for gen in cache_val:
@@ -666,6 +772,16 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                 converted_generations.append(chat_gen)
             else:
                 # Already a ChatGeneration or other expected type
+                if hasattr(gen, "message") and isinstance(gen.message, AIMessage):
+                    # We zero out cost on cache hits
+                    gen.message = gen.message.model_copy(
+                        update={
+                            "usage_metadata": {
+                                **(gen.message.usage_metadata or {}),
+                                "total_cost": 0,
+                            }
+                        }
+                    )
                 converted_generations.append(gen)
         return converted_generations
@@ -768,7 +884,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         Returns:
             An LLMResult, which contains a list of candidate Generations for each input
-                prompt and additional model provider-specific output.
+            prompt and additional model provider-specific output.
         """
         ls_structured_output_format = kwargs.pop(
             "ls_structured_output_format", None
@@ -825,17 +942,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                     run_managers[i].on_llm_error(
                         e,
                         response=LLMResult(
-                            generations=[generations_with_error_metadata]  # type: ignore[list-item]
+                            generations=[generations_with_error_metadata]
                         ),
                     )
                 raise
         flattened_outputs = [
-            LLMResult(generations=[res.generations], llm_output=res.llm_output)  # type: ignore[list-item]
+            LLMResult(generations=[res.generations], llm_output=res.llm_output)
             for res in results
         ]
         llm_output = self._combine_llm_outputs([res.llm_output for res in results])
         generations = [res.generations for res in results]
-        output = LLMResult(generations=generations, llm_output=llm_output)  # type: ignore[arg-type]
+        output = LLMResult(generations=generations, llm_output=llm_output)
         if run_managers:
             run_infos = []
             for manager, flattened_output in zip(run_managers, flattened_outputs):
@@ -882,7 +999,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         Returns:
             An LLMResult, which contains a list of candidate Generations for each input
-                prompt and additional model provider-specific output.
+            prompt and additional model provider-specific output.
         """
         ls_structured_output_format = kwargs.pop(
             "ls_structured_output_format", None
@@ -944,7 +1062,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                     await run_managers[i].on_llm_error(
                         res,
                         response=LLMResult(
-                            generations=[generations_with_error_metadata]  # type: ignore[list-item]
+                            generations=[generations_with_error_metadata]
                         ),
                     )
                 exceptions.append(res)
@@ -954,7 +1072,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                     *[
                         run_manager.on_llm_end(
                             LLMResult(
-                                generations=[res.generations],  # type: ignore[list-item, union-attr]
+                                generations=[res.generations],  # type: ignore[union-attr]
                                 llm_output=res.llm_output,  # type: ignore[union-attr]
                             )
                         )
@@ -964,12 +1082,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                 )
             raise exceptions[0]
         flattened_outputs = [
-            LLMResult(generations=[res.generations], llm_output=res.llm_output)  # type: ignore[list-item, union-attr]
+            LLMResult(generations=[res.generations], llm_output=res.llm_output)  # type: ignore[union-attr]
             for res in results
         ]
         llm_output = self._combine_llm_outputs([res.llm_output for res in results])  # type: ignore[union-attr]
         generations = [res.generations for res in results]  # type: ignore[union-attr]
-        output = LLMResult(generations=generations, llm_output=llm_output)  # type: ignore[arg-type]
+        output = LLMResult(generations=generations, llm_output=llm_output)
         await asyncio.gather(
             *[
                 run_manager.on_llm_end(flattened_output)
@@ -1048,15 +1166,43 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
             **kwargs,
         ):
             chunks: list[ChatGenerationChunk] = []
+            run_id: Optional[str] = (
+                f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None
+            )
+            yielded = False
             for chunk in self._stream(messages, stop=stop, **kwargs):
                 chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
+                if self.output_version == "v1":
+                    # Overwrite .content with .content_blocks
+                    chunk.message = _update_message_content_to_blocks(
+                        chunk.message, "v1"
+                    )
                 if run_manager:
                     if chunk.message.id is None:
-                        chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}"
+                        chunk.message.id = run_id
                     run_manager.on_llm_new_token(
                         cast("str", chunk.message.content), chunk=chunk
                     )
                 chunks.append(chunk)
+                yielded = True
+            # Yield a final empty chunk with chunk_position="last" if not yet yielded
+            if (
+                yielded
+                and isinstance(chunk.message, AIMessageChunk)
+                and not chunk.message.chunk_position
+            ):
+                empty_content: Union[str, list] = (
+                    "" if isinstance(chunk.message.content, str) else []
+                )
+                chunk = ChatGenerationChunk(
+                    message=AIMessageChunk(
+                        content=empty_content, chunk_position="last", id=run_id
+                    )
+                )
+                if run_manager:
+                    run_manager.on_llm_new_token("", chunk=chunk)
+                chunks.append(chunk)
             result = generate_from_stream(iter(chunks))
         elif inspect.signature(self._generate).parameters.get("run_manager"):
             result = self._generate(
@@ -1065,10 +1211,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         else:
             result = self._generate(messages, stop=stop, **kwargs)
+        if self.output_version == "v1":
+            # Overwrite .content with .content_blocks
+            for generation in result.generations:
+                generation.message = _update_message_content_to_blocks(
+                    generation.message, "v1"
+                )
         # Add response metadata to each generation
         for idx, generation in enumerate(result.generations):
             if run_manager and generation.message.id is None:
-                generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
+                generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
             generation.message.response_metadata = _gen_info_and_msg_metadata(
                 generation
             )
@@ -1121,15 +1274,43 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
             **kwargs,
         ):
             chunks: list[ChatGenerationChunk] = []
+            run_id: Optional[str] = (
+                f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None
+            )
+            yielded = False
             async for chunk in self._astream(messages, stop=stop, **kwargs):
                 chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
+                if self.output_version == "v1":
+                    # Overwrite .content with .content_blocks
+                    chunk.message = _update_message_content_to_blocks(
+                        chunk.message, "v1"
+                    )
                 if run_manager:
                     if chunk.message.id is None:
-                        chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}"
+                        chunk.message.id = run_id
                     await run_manager.on_llm_new_token(
                         cast("str", chunk.message.content), chunk=chunk
                     )
                 chunks.append(chunk)
+                yielded = True
+            # Yield a final empty chunk with chunk_position="last" if not yet yielded
+            if (
+                yielded
+                and isinstance(chunk.message, AIMessageChunk)
+                and not chunk.message.chunk_position
+            ):
+                empty_content: Union[str, list] = (
+                    "" if isinstance(chunk.message.content, str) else []
+                )
+                chunk = ChatGenerationChunk(
+                    message=AIMessageChunk(
+                        content=empty_content, chunk_position="last", id=run_id
+                    )
+                )
+                if run_manager:
+                    await run_manager.on_llm_new_token("", chunk=chunk)
+                chunks.append(chunk)
             result = generate_from_stream(iter(chunks))
         elif inspect.signature(self._agenerate).parameters.get("run_manager"):
             result = await self._agenerate(
@@ -1138,10 +1319,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         else:
             result = await self._agenerate(messages, stop=stop, **kwargs)
+        if self.output_version == "v1":
+            # Overwrite .content with .content_blocks
+            for generation in result.generations:
+                generation.message = _update_message_content_to_blocks(
+                    generation.message, "v1"
+                )
         # Add response metadata to each generation
         for idx, generation in enumerate(result.generations):
             if run_manager and generation.message.id is None:
-                generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
+                generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
             generation.message.response_metadata = _gen_info_and_msg_metadata(
                 generation
             )
@@ -1238,6 +1426,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         Returns:
             The model output message.
         """
         generation = self.generate(
             [messages], stop=stop, callbacks=callbacks, **kwargs
@@ -1278,6 +1467,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         Returns:
             The model output string.
         """
         return self.predict(message, stop=stop, **kwargs)
@@ -1297,6 +1487,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         Returns:
             The predicted output string.
         """
         stop_ = None if stop is None else list(stop)
         result = self([HumanMessage(content=text)], stop=stop_, **kwargs)
@@ -1363,7 +1554,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         *,
         tool_choice: Optional[Union[str]] = None,
         **kwargs: Any,
-    ) -> Runnable[LanguageModelInput, BaseMessage]:
+    ) -> Runnable[LanguageModelInput, AIMessage]:
         """Bind tools to the model.
         Args:
@@ -1372,6 +1563,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         Returns:
             A Runnable that returns a message.
         """
         raise NotImplementedError
@@ -1534,8 +1726,10 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
 class SimpleChatModel(BaseChatModel):
     """Simplified implementation for a chat model to inherit from.
-    **Note** This implementation is primarily here for backwards compatibility.
-        For new implementations, please use `BaseChatModel` directly.
+    .. note::
+        This implementation is primarily here for backwards compatibility. For new
+        implementations, please use ``BaseChatModel`` directly.
     """
     def _generate(

langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0a2__py3-none-any.whl

langchain-core 0.4.0.dev0py3-none-any.whl → 1.0.0a2py3-none-any.whl