langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (172) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +3 -4
  3. langchain_core/_api/beta_decorator.py +45 -70
  4. langchain_core/_api/deprecation.py +80 -80
  5. langchain_core/_api/path.py +22 -8
  6. langchain_core/_import_utils.py +10 -4
  7. langchain_core/agents.py +25 -21
  8. langchain_core/caches.py +53 -63
  9. langchain_core/callbacks/__init__.py +1 -8
  10. langchain_core/callbacks/base.py +341 -348
  11. langchain_core/callbacks/file.py +55 -44
  12. langchain_core/callbacks/manager.py +546 -683
  13. langchain_core/callbacks/stdout.py +29 -30
  14. langchain_core/callbacks/streaming_stdout.py +35 -36
  15. langchain_core/callbacks/usage.py +65 -70
  16. langchain_core/chat_history.py +48 -55
  17. langchain_core/document_loaders/base.py +46 -21
  18. langchain_core/document_loaders/langsmith.py +39 -36
  19. langchain_core/documents/__init__.py +0 -1
  20. langchain_core/documents/base.py +96 -74
  21. langchain_core/documents/compressor.py +12 -9
  22. langchain_core/documents/transformers.py +29 -28
  23. langchain_core/embeddings/fake.py +56 -57
  24. langchain_core/env.py +2 -3
  25. langchain_core/example_selectors/base.py +12 -0
  26. langchain_core/example_selectors/length_based.py +1 -1
  27. langchain_core/example_selectors/semantic_similarity.py +21 -25
  28. langchain_core/exceptions.py +15 -9
  29. langchain_core/globals.py +4 -163
  30. langchain_core/indexing/api.py +132 -125
  31. langchain_core/indexing/base.py +64 -67
  32. langchain_core/indexing/in_memory.py +26 -6
  33. langchain_core/language_models/__init__.py +15 -27
  34. langchain_core/language_models/_utils.py +267 -117
  35. langchain_core/language_models/base.py +92 -177
  36. langchain_core/language_models/chat_models.py +547 -407
  37. langchain_core/language_models/fake.py +11 -11
  38. langchain_core/language_models/fake_chat_models.py +72 -118
  39. langchain_core/language_models/llms.py +168 -242
  40. langchain_core/load/dump.py +8 -11
  41. langchain_core/load/load.py +32 -28
  42. langchain_core/load/mapping.py +2 -4
  43. langchain_core/load/serializable.py +50 -56
  44. langchain_core/messages/__init__.py +36 -51
  45. langchain_core/messages/ai.py +377 -150
  46. langchain_core/messages/base.py +239 -47
  47. langchain_core/messages/block_translators/__init__.py +111 -0
  48. langchain_core/messages/block_translators/anthropic.py +470 -0
  49. langchain_core/messages/block_translators/bedrock.py +94 -0
  50. langchain_core/messages/block_translators/bedrock_converse.py +297 -0
  51. langchain_core/messages/block_translators/google_genai.py +530 -0
  52. langchain_core/messages/block_translators/google_vertexai.py +21 -0
  53. langchain_core/messages/block_translators/groq.py +143 -0
  54. langchain_core/messages/block_translators/langchain_v0.py +301 -0
  55. langchain_core/messages/block_translators/openai.py +1010 -0
  56. langchain_core/messages/chat.py +2 -3
  57. langchain_core/messages/content.py +1423 -0
  58. langchain_core/messages/function.py +7 -7
  59. langchain_core/messages/human.py +44 -38
  60. langchain_core/messages/modifier.py +3 -2
  61. langchain_core/messages/system.py +40 -27
  62. langchain_core/messages/tool.py +160 -58
  63. langchain_core/messages/utils.py +527 -638
  64. langchain_core/output_parsers/__init__.py +1 -14
  65. langchain_core/output_parsers/base.py +68 -104
  66. langchain_core/output_parsers/json.py +13 -17
  67. langchain_core/output_parsers/list.py +11 -33
  68. langchain_core/output_parsers/openai_functions.py +56 -74
  69. langchain_core/output_parsers/openai_tools.py +68 -109
  70. langchain_core/output_parsers/pydantic.py +15 -13
  71. langchain_core/output_parsers/string.py +6 -2
  72. langchain_core/output_parsers/transform.py +17 -60
  73. langchain_core/output_parsers/xml.py +34 -44
  74. langchain_core/outputs/__init__.py +1 -1
  75. langchain_core/outputs/chat_generation.py +26 -11
  76. langchain_core/outputs/chat_result.py +1 -3
  77. langchain_core/outputs/generation.py +17 -6
  78. langchain_core/outputs/llm_result.py +15 -8
  79. langchain_core/prompt_values.py +29 -123
  80. langchain_core/prompts/__init__.py +3 -27
  81. langchain_core/prompts/base.py +48 -63
  82. langchain_core/prompts/chat.py +259 -288
  83. langchain_core/prompts/dict.py +19 -11
  84. langchain_core/prompts/few_shot.py +84 -90
  85. langchain_core/prompts/few_shot_with_templates.py +14 -12
  86. langchain_core/prompts/image.py +19 -14
  87. langchain_core/prompts/loading.py +6 -8
  88. langchain_core/prompts/message.py +7 -8
  89. langchain_core/prompts/prompt.py +42 -43
  90. langchain_core/prompts/string.py +37 -16
  91. langchain_core/prompts/structured.py +43 -46
  92. langchain_core/rate_limiters.py +51 -60
  93. langchain_core/retrievers.py +52 -192
  94. langchain_core/runnables/base.py +1727 -1683
  95. langchain_core/runnables/branch.py +52 -73
  96. langchain_core/runnables/config.py +89 -103
  97. langchain_core/runnables/configurable.py +128 -130
  98. langchain_core/runnables/fallbacks.py +93 -82
  99. langchain_core/runnables/graph.py +127 -127
  100. langchain_core/runnables/graph_ascii.py +63 -41
  101. langchain_core/runnables/graph_mermaid.py +87 -70
  102. langchain_core/runnables/graph_png.py +31 -36
  103. langchain_core/runnables/history.py +145 -161
  104. langchain_core/runnables/passthrough.py +141 -144
  105. langchain_core/runnables/retry.py +84 -68
  106. langchain_core/runnables/router.py +33 -37
  107. langchain_core/runnables/schema.py +79 -72
  108. langchain_core/runnables/utils.py +95 -139
  109. langchain_core/stores.py +85 -131
  110. langchain_core/structured_query.py +11 -15
  111. langchain_core/sys_info.py +31 -32
  112. langchain_core/tools/__init__.py +1 -14
  113. langchain_core/tools/base.py +221 -247
  114. langchain_core/tools/convert.py +144 -161
  115. langchain_core/tools/render.py +10 -10
  116. langchain_core/tools/retriever.py +12 -19
  117. langchain_core/tools/simple.py +52 -29
  118. langchain_core/tools/structured.py +56 -60
  119. langchain_core/tracers/__init__.py +1 -9
  120. langchain_core/tracers/_streaming.py +6 -7
  121. langchain_core/tracers/base.py +103 -112
  122. langchain_core/tracers/context.py +29 -48
  123. langchain_core/tracers/core.py +142 -105
  124. langchain_core/tracers/evaluation.py +30 -34
  125. langchain_core/tracers/event_stream.py +162 -117
  126. langchain_core/tracers/langchain.py +34 -36
  127. langchain_core/tracers/log_stream.py +87 -49
  128. langchain_core/tracers/memory_stream.py +3 -3
  129. langchain_core/tracers/root_listeners.py +18 -34
  130. langchain_core/tracers/run_collector.py +8 -20
  131. langchain_core/tracers/schemas.py +0 -125
  132. langchain_core/tracers/stdout.py +3 -3
  133. langchain_core/utils/__init__.py +1 -4
  134. langchain_core/utils/_merge.py +47 -9
  135. langchain_core/utils/aiter.py +70 -66
  136. langchain_core/utils/env.py +12 -9
  137. langchain_core/utils/function_calling.py +139 -206
  138. langchain_core/utils/html.py +7 -8
  139. langchain_core/utils/input.py +6 -6
  140. langchain_core/utils/interactive_env.py +6 -2
  141. langchain_core/utils/iter.py +48 -45
  142. langchain_core/utils/json.py +14 -4
  143. langchain_core/utils/json_schema.py +159 -43
  144. langchain_core/utils/mustache.py +32 -25
  145. langchain_core/utils/pydantic.py +67 -40
  146. langchain_core/utils/strings.py +5 -5
  147. langchain_core/utils/usage.py +1 -1
  148. langchain_core/utils/utils.py +104 -62
  149. langchain_core/vectorstores/base.py +131 -179
  150. langchain_core/vectorstores/in_memory.py +113 -182
  151. langchain_core/vectorstores/utils.py +23 -17
  152. langchain_core/version.py +1 -1
  153. langchain_core-1.0.0.dist-info/METADATA +68 -0
  154. langchain_core-1.0.0.dist-info/RECORD +172 -0
  155. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
  156. langchain_core/beta/__init__.py +0 -1
  157. langchain_core/beta/runnables/__init__.py +0 -1
  158. langchain_core/beta/runnables/context.py +0 -448
  159. langchain_core/memory.py +0 -116
  160. langchain_core/messages/content_blocks.py +0 -1435
  161. langchain_core/prompts/pipeline.py +0 -133
  162. langchain_core/pydantic_v1/__init__.py +0 -30
  163. langchain_core/pydantic_v1/dataclasses.py +0 -23
  164. langchain_core/pydantic_v1/main.py +0 -23
  165. langchain_core/tracers/langchain_v1.py +0 -23
  166. langchain_core/utils/loading.py +0 -31
  167. langchain_core/v1/__init__.py +0 -1
  168. langchain_core/v1/chat_models.py +0 -1047
  169. langchain_core/v1/messages.py +0 -755
  170. langchain_core-0.4.0.dev0.dist-info/METADATA +0 -108
  171. langchain_core-0.4.0.dev0.dist-info/RECORD +0 -177
  172. langchain_core-0.4.0.dev0.dist-info/entry_points.txt +0 -4
@@ -6,28 +6,28 @@ import asyncio
6
6
  import inspect
7
7
  import json
8
8
  import typing
9
- import warnings
10
9
  from abc import ABC, abstractmethod
11
- from collections.abc import AsyncIterator, Iterator, Sequence
10
+ from collections.abc import AsyncIterator, Callable, Iterator, Sequence
12
11
  from functools import cached_property
13
12
  from operator import itemgetter
14
- from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union, cast
13
+ from typing import TYPE_CHECKING, Any, Literal, cast
15
14
 
16
- from pydantic import BaseModel, ConfigDict, Field, model_validator
15
+ from pydantic import BaseModel, ConfigDict, Field
17
16
  from typing_extensions import override
18
17
 
19
- from langchain_core._api import deprecated
20
18
  from langchain_core.caches import BaseCache
21
19
  from langchain_core.callbacks import (
22
20
  AsyncCallbackManager,
23
21
  AsyncCallbackManagerForLLMRun,
24
- BaseCallbackManager,
25
22
  CallbackManager,
26
23
  CallbackManagerForLLMRun,
27
24
  Callbacks,
28
25
  )
29
26
  from langchain_core.globals import get_llm_cache
30
- from langchain_core.language_models._utils import _normalize_messages
27
+ from langchain_core.language_models._utils import (
28
+ _normalize_messages,
29
+ _update_message_content_to_blocks,
30
+ )
31
31
  from langchain_core.language_models.base import (
32
32
  BaseLanguageModel,
33
33
  LangSmithParams,
@@ -36,16 +36,21 @@ from langchain_core.language_models.base import (
36
36
  from langchain_core.load import dumpd, dumps
37
37
  from langchain_core.messages import (
38
38
  AIMessage,
39
+ AIMessageChunk,
39
40
  AnyMessage,
40
41
  BaseMessage,
41
- BaseMessageChunk,
42
- HumanMessage,
43
42
  convert_to_messages,
44
- convert_to_openai_image_block,
45
43
  is_data_content_block,
46
44
  message_chunk_to_message,
47
45
  )
48
- from langchain_core.messages.ai import _LC_ID_PREFIX
46
+ from langchain_core.messages import content as types
47
+ from langchain_core.messages.block_translators.openai import (
48
+ convert_to_openai_image_block,
49
+ )
50
+ from langchain_core.output_parsers.openai_tools import (
51
+ JsonOutputKeyToolsParser,
52
+ PydanticToolsParser,
53
+ )
49
54
  from langchain_core.outputs import (
50
55
  ChatGeneration,
51
56
  ChatGenerationChunk,
@@ -65,6 +70,7 @@ from langchain_core.utils.function_calling import (
65
70
  convert_to_openai_tool,
66
71
  )
67
72
  from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass
73
+ from langchain_core.utils.utils import LC_ID_PREFIX, from_env
68
74
 
69
75
  if TYPE_CHECKING:
70
76
  import uuid
@@ -78,6 +84,11 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
78
84
  if hasattr(error, "response"):
79
85
  response = error.response
80
86
  metadata: dict = {}
87
+ if hasattr(response, "json"):
88
+ try:
89
+ metadata["body"] = response.json()
90
+ except Exception:
91
+ metadata["body"] = getattr(response, "text", None)
81
92
  if hasattr(response, "headers"):
82
93
  try:
83
94
  metadata["headers"] = dict(response.headers)
@@ -97,17 +108,18 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
97
108
 
98
109
 
99
110
  def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
100
- """Format messages for tracing in on_chat_model_start.
111
+ """Format messages for tracing in `on_chat_model_start`.
101
112
 
102
113
  - Update image content blocks to OpenAI Chat Completions format (backward
103
114
  compatibility).
104
- - Add "type" key to content blocks that have a single key.
115
+ - Add `type` key to content blocks that have a single key.
105
116
 
106
117
  Args:
107
118
  messages: List of messages to format.
108
119
 
109
120
  Returns:
110
121
  List of messages formatted for tracing.
122
+
111
123
  """
112
124
  messages_to_trace = []
113
125
  for message in messages:
@@ -119,7 +131,7 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
119
131
  if (
120
132
  block.get("type") == "image"
121
133
  and is_data_content_block(block)
122
- and block.get("source_type") != "id"
134
+ and not ("file_id" in block or block.get("source_type") == "id")
123
135
  ):
124
136
  if message_to_trace is message:
125
137
  # Shallow copy
@@ -129,6 +141,22 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
129
141
  message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
130
142
  convert_to_openai_image_block(block)
131
143
  )
144
+ elif (
145
+ block.get("type") == "file"
146
+ and is_data_content_block(block) # v0 (image/audio/file) or v1
147
+ and "base64" in block
148
+ # Backward compat: convert v1 base64 blocks to v0
149
+ ):
150
+ if message_to_trace is message:
151
+ # Shallow copy
152
+ message_to_trace = message.model_copy()
153
+ message_to_trace.content = list(message_to_trace.content)
154
+
155
+ message_to_trace.content[idx] = { # type: ignore[index]
156
+ **{k: v for k, v in block.items() if k != "base64"},
157
+ "data": block["base64"],
158
+ "source_type": "base64",
159
+ }
132
160
  elif len(block) == 1 and "type" not in block:
133
161
  # Tracing assumes all content blocks have a "type" key. Here
134
162
  # we add this key if it is missing, and there's an obvious
@@ -142,8 +170,6 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
142
170
  "type": key,
143
171
  key: block[key],
144
172
  }
145
- else:
146
- pass
147
173
  messages_to_trace.append(message_to_trace)
148
174
 
149
175
  return messages_to_trace
@@ -153,10 +179,14 @@ def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:
153
179
  """Generate from a stream.
154
180
 
155
181
  Args:
156
- stream: Iterator of ChatGenerationChunk.
182
+ stream: Iterator of `ChatGenerationChunk`.
183
+
184
+ Raises:
185
+ ValueError: If no generations are found in the stream.
157
186
 
158
187
  Returns:
159
- ChatResult: Chat result.
188
+ Chat result.
189
+
160
190
  """
161
191
  generation = next(stream, None)
162
192
  if generation:
@@ -180,16 +210,17 @@ async def agenerate_from_stream(
180
210
  """Async generate from a stream.
181
211
 
182
212
  Args:
183
- stream: Iterator of ChatGenerationChunk.
213
+ stream: Iterator of `ChatGenerationChunk`.
184
214
 
185
215
  Returns:
186
- ChatResult: Chat result.
216
+ Chat result.
217
+
187
218
  """
188
219
  chunks = [chunk async for chunk in stream]
189
220
  return await run_in_executor(None, generate_from_stream, iter(chunks))
190
221
 
191
222
 
192
- def _format_ls_structured_output(ls_structured_output_format: Optional[dict]) -> dict:
223
+ def _format_ls_structured_output(ls_structured_output_format: dict | None) -> dict:
193
224
  if ls_structured_output_format:
194
225
  try:
195
226
  ls_structured_output_format_dict = {
@@ -208,136 +239,99 @@ def _format_ls_structured_output(ls_structured_output_format: Optional[dict]) ->
208
239
  return ls_structured_output_format_dict
209
240
 
210
241
 
211
- class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
212
- """Base class for chat models.
242
+ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
243
+ r"""Base class for chat models.
213
244
 
214
245
  Key imperative methods:
215
246
  Methods that actually call the underlying model.
216
247
 
217
- +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
218
- | Method | Input | Output | Description |
219
- +===========================+================================================================+=====================================================================+==================================================================================================+
220
- | `invoke` | str | list[dict | tuple | BaseMessage] | PromptValue | BaseMessage | A single chat model call. |
221
- +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
222
- | `ainvoke` | ''' | BaseMessage | Defaults to running invoke in an async executor. |
223
- +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
224
- | `stream` | ''' | Iterator[BaseMessageChunk] | Defaults to yielding output of invoke. |
225
- +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
226
- | `astream` | ''' | AsyncIterator[BaseMessageChunk] | Defaults to yielding output of ainvoke. |
227
- +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
228
- | `astream_events` | ''' | AsyncIterator[StreamEvent] | Event types: 'on_chat_model_start', 'on_chat_model_stream', 'on_chat_model_end'. |
229
- +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
230
- | `batch` | list['''] | list[BaseMessage] | Defaults to running invoke in concurrent threads. |
231
- +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
232
- | `abatch` | list['''] | list[BaseMessage] | Defaults to running ainvoke in concurrent threads. |
233
- +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
234
- | `batch_as_completed` | list['''] | Iterator[tuple[int, Union[BaseMessage, Exception]]] | Defaults to running invoke in concurrent threads. |
235
- +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
236
- | `abatch_as_completed` | list['''] | AsyncIterator[tuple[int, Union[BaseMessage, Exception]]] | Defaults to running ainvoke in concurrent threads. |
237
- +---------------------------+----------------------------------------------------------------+---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+
238
-
239
- This table provides a brief overview of the main imperative methods. Please see the base Runnable reference for full documentation.
248
+ This table provides a brief overview of the main imperative methods. Please see the base `Runnable` reference for full documentation.
249
+
250
+ | Method | Input | Output | Description |
251
+ | ---------------------- | ------------------------------------------------------------ | ---------------------------------------------------------- | -------------------------------------------------------------------------------- |
252
+ | `invoke` | `str` \| `list[dict | tuple | BaseMessage]` \| `PromptValue` | `BaseMessage` | A single chat model call. |
253
+ | `ainvoke` | `'''` | `BaseMessage` | Defaults to running `invoke` in an async executor. |
254
+ | `stream` | `'''` | `Iterator[BaseMessageChunk]` | Defaults to yielding output of `invoke`. |
255
+ | `astream` | `'''` | `AsyncIterator[BaseMessageChunk]` | Defaults to yielding output of `ainvoke`. |
256
+ | `astream_events` | `'''` | `AsyncIterator[StreamEvent]` | Event types: `on_chat_model_start`, `on_chat_model_stream`, `on_chat_model_end`. |
257
+ | `batch` | `list[''']` | `list[BaseMessage]` | Defaults to running `invoke` in concurrent threads. |
258
+ | `abatch` | `list[''']` | `list[BaseMessage]` | Defaults to running `ainvoke` in concurrent threads. |
259
+ | `batch_as_completed` | `list[''']` | `Iterator[tuple[int, Union[BaseMessage, Exception]]]` | Defaults to running `invoke` in concurrent threads. |
260
+ | `abatch_as_completed` | `list[''']` | `AsyncIterator[tuple[int, Union[BaseMessage, Exception]]]` | Defaults to running `ainvoke` in concurrent threads. |
240
261
 
241
262
  Key declarative methods:
242
- Methods for creating another Runnable using the ChatModel.
243
-
244
- +----------------------------------+-----------------------------------------------------------------------------------------------------------+
245
- | Method | Description |
246
- +==================================+===========================================================================================================+
247
- | `bind_tools` | Create ChatModel that can call tools. |
248
- +----------------------------------+-----------------------------------------------------------------------------------------------------------+
249
- | `with_structured_output` | Create wrapper that structures model output using schema. |
250
- +----------------------------------+-----------------------------------------------------------------------------------------------------------+
251
- | `with_retry` | Create wrapper that retries model calls on failure. |
252
- +----------------------------------+-----------------------------------------------------------------------------------------------------------+
253
- | `with_fallbacks` | Create wrapper that falls back to other models on failure. |
254
- +----------------------------------+-----------------------------------------------------------------------------------------------------------+
255
- | `configurable_fields` | Specify init args of the model that can be configured at runtime via the RunnableConfig. |
256
- +----------------------------------+-----------------------------------------------------------------------------------------------------------+
257
- | `configurable_alternatives` | Specify alternative models which can be swapped in at runtime via the RunnableConfig. |
258
- +----------------------------------+-----------------------------------------------------------------------------------------------------------+
263
+ Methods for creating another `Runnable` using the chat model.
259
264
 
260
265
  This table provides a brief overview of the main declarative methods. Please see the reference for each method for full documentation.
261
266
 
267
+ | Method | Description |
268
+ | ---------------------------- | ------------------------------------------------------------------------------------------ |
269
+ | `bind_tools` | Create chat model that can call tools. |
270
+ | `with_structured_output` | Create wrapper that structures model output using schema. |
271
+ | `with_retry` | Create wrapper that retries model calls on failure. |
272
+ | `with_fallbacks` | Create wrapper that falls back to other models on failure. |
273
+ | `configurable_fields` | Specify init args of the model that can be configured at runtime via the `RunnableConfig`. |
274
+ | `configurable_alternatives` | Specify alternative models which can be swapped in at runtime via the `RunnableConfig`. |
275
+
262
276
  Creating custom chat model:
263
277
  Custom chat model implementations should inherit from this class.
264
278
  Please reference the table below for information about which
265
279
  methods and properties are required or optional for implementations.
266
280
 
267
- +----------------------------------+--------------------------------------------------------------------+-------------------+
268
- | Method/Property | Description | Required/Optional |
269
- +==================================+====================================================================+===================+
281
+ | Method/Property | Description | Required |
282
+ | -------------------------------- | ------------------------------------------------------------------ | ----------------- |
270
283
  | `_generate` | Use to generate a chat result from a prompt | Required |
271
- +----------------------------------+--------------------------------------------------------------------+-------------------+
272
284
  | `_llm_type` (property) | Used to uniquely identify the type of the model. Used for logging. | Required |
273
- +----------------------------------+--------------------------------------------------------------------+-------------------+
274
285
  | `_identifying_params` (property) | Represent model parameterization for tracing purposes. | Optional |
275
- +----------------------------------+--------------------------------------------------------------------+-------------------+
276
286
  | `_stream` | Use to implement streaming | Optional |
277
- +----------------------------------+--------------------------------------------------------------------+-------------------+
278
287
  | `_agenerate` | Use to implement a native async method | Optional |
279
- +----------------------------------+--------------------------------------------------------------------+-------------------+
280
288
  | `_astream` | Use to implement async version of `_stream` | Optional |
281
- +----------------------------------+--------------------------------------------------------------------+-------------------+
282
-
283
- Follow the guide for more information on how to implement a custom Chat Model:
284
- [Guide](https://python.langchain.com/docs/how_to/custom_chat_model/).
285
289
 
286
290
  """ # noqa: E501
287
291
 
288
- callback_manager: Optional[BaseCallbackManager] = deprecated(
289
- name="callback_manager", since="0.1.7", removal="1.0", alternative="callbacks"
290
- )(
291
- Field(
292
- default=None,
293
- exclude=True,
294
- description="Callback manager to add to the run trace.",
295
- )
296
- )
297
-
298
- rate_limiter: Optional[BaseRateLimiter] = Field(default=None, exclude=True)
292
+ rate_limiter: BaseRateLimiter | None = Field(default=None, exclude=True)
299
293
  "An optional rate limiter to use for limiting the number of requests."
300
294
 
301
- disable_streaming: Union[bool, Literal["tool_calling"]] = False
295
+ disable_streaming: bool | Literal["tool_calling"] = False
302
296
  """Whether to disable streaming for this model.
303
297
 
304
- If streaming is bypassed, then ``stream()``/``astream()``/``astream_events()`` will
305
- defer to ``invoke()``/``ainvoke()``.
298
+ If streaming is bypassed, then `stream`/`astream`/`astream_events` will
299
+ defer to `invoke`/`ainvoke`.
306
300
 
307
- - If True, will always bypass streaming case.
308
- - If ``'tool_calling'``, will bypass streaming case only when the model is called
309
- with a ``tools`` keyword argument. In other words, LangChain will automatically
310
- switch to non-streaming behavior (``invoke()``) only when the tools argument is
311
- provided. This offers the best of both worlds.
312
- - If False (default), will always use streaming case if available.
301
+ - If `True`, will always bypass streaming case.
302
+ - If `'tool_calling'`, will bypass streaming case only when the model is called
303
+ with a `tools` keyword argument. In other words, LangChain will automatically
304
+ switch to non-streaming behavior (`invoke`) only when the tools argument is
305
+ provided. This offers the best of both worlds.
306
+ - If `False` (Default), will always use streaming case if available.
313
307
 
314
- The main reason for this flag is that code might be written using ``.stream()`` and
308
+ The main reason for this flag is that code might be written using `stream` and
315
309
  a user may want to swap out a given model for another model whose the implementation
316
310
  does not properly support streaming.
317
311
  """
318
312
 
319
- @model_validator(mode="before")
320
- @classmethod
321
- def raise_deprecation(cls, values: dict) -> Any:
322
- """Raise deprecation warning if callback_manager is used.
313
+ output_version: str | None = Field(
314
+ default_factory=from_env("LC_OUTPUT_VERSION", default=None)
315
+ )
316
+ """Version of `AIMessage` output format to store in message content.
323
317
 
324
- Args:
325
- values (Dict): Values to validate.
318
+ `AIMessage.content_blocks` will lazily parse the contents of `content` into a
319
+ standard format. This flag can be used to additionally store the standard format
320
+ in message content, e.g., for serialization purposes.
326
321
 
327
- Returns:
328
- Dict: Validated values.
322
+ Supported values:
329
323
 
330
- Raises:
331
- DeprecationWarning: If callback_manager is used.
332
- """
333
- if values.get("callback_manager") is not None:
334
- warnings.warn(
335
- "callback_manager is deprecated. Please use callbacks instead.",
336
- DeprecationWarning,
337
- stacklevel=5,
338
- )
339
- values["callbacks"] = values.pop("callback_manager", None)
340
- return values
324
+ - `'v0'`: provider-specific format in content (can lazily-parse with
325
+ `content_blocks`)
326
+ - `'v1'`: standardized format in content (consistent with `content_blocks`)
327
+
328
+ Partner packages (e.g.,
329
+ [`langchain-openai`](https://pypi.org/project/langchain-openai)) can also use this
330
+ field to roll out new content formats in a backward-compatible way.
331
+
332
+ !!! version-added "Added in version 1.0"
333
+
334
+ """
341
335
 
342
336
  model_config = ConfigDict(
343
337
  arbitrary_types_allowed=True,
@@ -352,7 +346,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
352
346
  @property
353
347
  @override
354
348
  def OutputType(self) -> Any:
355
- """Get the output type for this runnable."""
349
+ """Get the output type for this `Runnable`."""
356
350
  return AnyMessage
357
351
 
358
352
  def _convert_input(self, model_input: LanguageModelInput) -> PromptValue:
@@ -372,35 +366,38 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
372
366
  def invoke(
373
367
  self,
374
368
  input: LanguageModelInput,
375
- config: Optional[RunnableConfig] = None,
369
+ config: RunnableConfig | None = None,
376
370
  *,
377
- stop: Optional[list[str]] = None,
371
+ stop: list[str] | None = None,
378
372
  **kwargs: Any,
379
- ) -> BaseMessage:
373
+ ) -> AIMessage:
380
374
  config = ensure_config(config)
381
375
  return cast(
382
- "ChatGeneration",
383
- self.generate_prompt(
384
- [self._convert_input(input)],
385
- stop=stop,
386
- callbacks=config.get("callbacks"),
387
- tags=config.get("tags"),
388
- metadata=config.get("metadata"),
389
- run_name=config.get("run_name"),
390
- run_id=config.pop("run_id", None),
391
- **kwargs,
392
- ).generations[0][0],
393
- ).message
376
+ "AIMessage",
377
+ cast(
378
+ "ChatGeneration",
379
+ self.generate_prompt(
380
+ [self._convert_input(input)],
381
+ stop=stop,
382
+ callbacks=config.get("callbacks"),
383
+ tags=config.get("tags"),
384
+ metadata=config.get("metadata"),
385
+ run_name=config.get("run_name"),
386
+ run_id=config.pop("run_id", None),
387
+ **kwargs,
388
+ ).generations[0][0],
389
+ ).message,
390
+ )
394
391
 
395
392
  @override
396
393
  async def ainvoke(
397
394
  self,
398
395
  input: LanguageModelInput,
399
- config: Optional[RunnableConfig] = None,
396
+ config: RunnableConfig | None = None,
400
397
  *,
401
- stop: Optional[list[str]] = None,
398
+ stop: list[str] | None = None,
402
399
  **kwargs: Any,
403
- ) -> BaseMessage:
400
+ ) -> AIMessage:
404
401
  config = ensure_config(config)
405
402
  llm_result = await self.agenerate_prompt(
406
403
  [self._convert_input(input)],
@@ -412,15 +409,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
412
409
  run_id=config.pop("run_id", None),
413
410
  **kwargs,
414
411
  )
415
- return cast("ChatGeneration", llm_result.generations[0][0]).message
412
+ return cast(
413
+ "AIMessage", cast("ChatGeneration", llm_result.generations[0][0]).message
414
+ )
416
415
 
417
416
  def _should_stream(
418
417
  self,
419
418
  *,
420
419
  async_api: bool,
421
- run_manager: Optional[
422
- Union[CallbackManagerForLLMRun, AsyncCallbackManagerForLLMRun]
423
- ] = None,
420
+ run_manager: CallbackManagerForLLMRun
421
+ | AsyncCallbackManagerForLLMRun
422
+ | None = None,
424
423
  **kwargs: Any,
425
424
  ) -> bool:
426
425
  """Determine if a given model call should hit the streaming API."""
@@ -445,6 +444,11 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
445
444
  if "stream" in kwargs:
446
445
  return kwargs["stream"]
447
446
 
447
+ if "streaming" in self.model_fields_set:
448
+ streaming_value = getattr(self, "streaming", None)
449
+ if isinstance(streaming_value, bool):
450
+ return streaming_value
451
+
448
452
  # Check if any streaming callback handlers have been passed in.
449
453
  handlers = run_manager.handlers if run_manager else []
450
454
  return any(isinstance(h, _StreamingCallbackHandler) for h in handlers)
@@ -453,15 +457,15 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
453
457
  def stream(
454
458
  self,
455
459
  input: LanguageModelInput,
456
- config: Optional[RunnableConfig] = None,
460
+ config: RunnableConfig | None = None,
457
461
  *,
458
- stop: Optional[list[str]] = None,
462
+ stop: list[str] | None = None,
459
463
  **kwargs: Any,
460
- ) -> Iterator[BaseMessageChunk]:
464
+ ) -> Iterator[AIMessageChunk]:
461
465
  if not self._should_stream(async_api=False, **{**kwargs, "stream": True}):
462
- # model doesn't implement streaming, so use default implementation
466
+ # Model doesn't implement streaming, so use default implementation
463
467
  yield cast(
464
- "BaseMessageChunk",
468
+ "AIMessageChunk",
465
469
  self.invoke(input, config=config, stop=stop, **kwargs),
466
470
  )
467
471
  else:
@@ -506,16 +510,51 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
506
510
 
507
511
  try:
508
512
  input_messages = _normalize_messages(messages)
509
- run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id)))
513
+ run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))
514
+ yielded = False
515
+ index = -1
516
+ index_type = ""
510
517
  for chunk in self._stream(input_messages, stop=stop, **kwargs):
511
518
  if chunk.message.id is None:
512
519
  chunk.message.id = run_id
513
520
  chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
521
+ if self.output_version == "v1":
522
+ # Overwrite .content with .content_blocks
523
+ chunk.message = _update_message_content_to_blocks(
524
+ chunk.message, "v1"
525
+ )
526
+ for block in cast(
527
+ "list[types.ContentBlock]", chunk.message.content
528
+ ):
529
+ if block["type"] != index_type:
530
+ index_type = block["type"]
531
+ index = index + 1
532
+ if "index" not in block:
533
+ block["index"] = index
514
534
  run_manager.on_llm_new_token(
515
535
  cast("str", chunk.message.content), chunk=chunk
516
536
  )
517
537
  chunks.append(chunk)
518
- yield chunk.message
538
+ yield cast("AIMessageChunk", chunk.message)
539
+ yielded = True
540
+
541
+ # Yield a final empty chunk with chunk_position="last" if not yet
542
+ # yielded
543
+ if (
544
+ yielded
545
+ and isinstance(chunk.message, AIMessageChunk)
546
+ and not chunk.message.chunk_position
547
+ ):
548
+ empty_content: str | list = (
549
+ "" if isinstance(chunk.message.content, str) else []
550
+ )
551
+ msg_chunk = AIMessageChunk(
552
+ content=empty_content, chunk_position="last", id=run_id
553
+ )
554
+ run_manager.on_llm_new_token(
555
+ "", chunk=ChatGenerationChunk(message=msg_chunk)
556
+ )
557
+ yield msg_chunk
519
558
  except BaseException as e:
520
559
  generations_with_error_metadata = _generate_response_from_error(e)
521
560
  chat_generation_chunk = merge_chat_generation_chunks(chunks)
@@ -528,7 +567,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
528
567
  generations = [generations_with_error_metadata]
529
568
  run_manager.on_llm_error(
530
569
  e,
531
- response=LLMResult(generations=generations), # type: ignore[arg-type]
570
+ response=LLMResult(generations=generations),
532
571
  )
533
572
  raise
534
573
 
@@ -544,15 +583,15 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
544
583
  async def astream(
545
584
  self,
546
585
  input: LanguageModelInput,
547
- config: Optional[RunnableConfig] = None,
586
+ config: RunnableConfig | None = None,
548
587
  *,
549
- stop: Optional[list[str]] = None,
588
+ stop: list[str] | None = None,
550
589
  **kwargs: Any,
551
- ) -> AsyncIterator[BaseMessageChunk]:
590
+ ) -> AsyncIterator[AIMessageChunk]:
552
591
  if not self._should_stream(async_api=True, **{**kwargs, "stream": True}):
553
592
  # No async or sync stream is implemented, so fall back to ainvoke
554
593
  yield cast(
555
- "BaseMessageChunk",
594
+ "AIMessageChunk",
556
595
  await self.ainvoke(input, config=config, stop=stop, **kwargs),
557
596
  )
558
597
  return
@@ -599,7 +638,10 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
599
638
 
600
639
  try:
601
640
  input_messages = _normalize_messages(messages)
602
- run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id)))
641
+ run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))
642
+ yielded = False
643
+ index = -1
644
+ index_type = ""
603
645
  async for chunk in self._astream(
604
646
  input_messages,
605
647
  stop=stop,
@@ -608,11 +650,42 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
608
650
  if chunk.message.id is None:
609
651
  chunk.message.id = run_id
610
652
  chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
653
+ if self.output_version == "v1":
654
+ # Overwrite .content with .content_blocks
655
+ chunk.message = _update_message_content_to_blocks(
656
+ chunk.message, "v1"
657
+ )
658
+ for block in cast(
659
+ "list[types.ContentBlock]", chunk.message.content
660
+ ):
661
+ if block["type"] != index_type:
662
+ index_type = block["type"]
663
+ index = index + 1
664
+ if "index" not in block:
665
+ block["index"] = index
611
666
  await run_manager.on_llm_new_token(
612
667
  cast("str", chunk.message.content), chunk=chunk
613
668
  )
614
669
  chunks.append(chunk)
615
- yield chunk.message
670
+ yield cast("AIMessageChunk", chunk.message)
671
+ yielded = True
672
+
673
+ # Yield a final empty chunk with chunk_position="last" if not yet yielded
674
+ if (
675
+ yielded
676
+ and isinstance(chunk.message, AIMessageChunk)
677
+ and not chunk.message.chunk_position
678
+ ):
679
+ empty_content: str | list = (
680
+ "" if isinstance(chunk.message.content, str) else []
681
+ )
682
+ msg_chunk = AIMessageChunk(
683
+ content=empty_content, chunk_position="last", id=run_id
684
+ )
685
+ await run_manager.on_llm_new_token(
686
+ "", chunk=ChatGenerationChunk(message=msg_chunk)
687
+ )
688
+ yield msg_chunk
616
689
  except BaseException as e:
617
690
  generations_with_error_metadata = _generate_response_from_error(e)
618
691
  chat_generation_chunk = merge_chat_generation_chunks(chunks)
@@ -622,7 +695,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
622
695
  generations = [generations_with_error_metadata]
623
696
  await run_manager.on_llm_error(
624
697
  e,
625
- response=LLMResult(generations=generations), # type: ignore[arg-type]
698
+ response=LLMResult(generations=generations),
626
699
  )
627
700
  raise
628
701
 
@@ -638,7 +711,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
638
711
 
639
712
  # --- Custom methods ---
640
713
 
641
- def _combine_llm_outputs(self, llm_outputs: list[Optional[dict]]) -> dict: # noqa: ARG002
714
+ def _combine_llm_outputs(self, llm_outputs: list[dict | None]) -> dict: # noqa: ARG002
642
715
  return {}
643
716
 
644
717
  def _convert_cached_generations(self, cache_val: list) -> list[ChatGeneration]:
@@ -653,6 +726,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
653
726
 
654
727
  Returns:
655
728
  List of ChatGeneration objects.
729
+
656
730
  """
657
731
  converted_generations = []
658
732
  for gen in cache_val:
@@ -666,12 +740,22 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
666
740
  converted_generations.append(chat_gen)
667
741
  else:
668
742
  # Already a ChatGeneration or other expected type
743
+ if hasattr(gen, "message") and isinstance(gen.message, AIMessage):
744
+ # We zero out cost on cache hits
745
+ gen.message = gen.message.model_copy(
746
+ update={
747
+ "usage_metadata": {
748
+ **(gen.message.usage_metadata or {}),
749
+ "total_cost": 0,
750
+ }
751
+ }
752
+ )
669
753
  converted_generations.append(gen)
670
754
  return converted_generations
671
755
 
672
756
  def _get_invocation_params(
673
757
  self,
674
- stop: Optional[list[str]] = None,
758
+ stop: list[str] | None = None,
675
759
  **kwargs: Any,
676
760
  ) -> dict:
677
761
  params = self.dict()
@@ -680,7 +764,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
680
764
 
681
765
  def _get_ls_params(
682
766
  self,
683
- stop: Optional[list[str]] = None,
767
+ stop: list[str] | None = None,
684
768
  **kwargs: Any,
685
769
  ) -> LangSmithParams:
686
770
  """Get standard params for tracing."""
@@ -697,7 +781,9 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
697
781
  ls_params["ls_stop"] = stop
698
782
 
699
783
  # model
700
- if hasattr(self, "model") and isinstance(self.model, str):
784
+ if "model" in kwargs and isinstance(kwargs["model"], str):
785
+ ls_params["ls_model_name"] = kwargs["model"]
786
+ elif hasattr(self, "model") and isinstance(self.model, str):
701
787
  ls_params["ls_model_name"] = self.model
702
788
  elif hasattr(self, "model_name") and isinstance(self.model_name, str):
703
789
  ls_params["ls_model_name"] = self.model_name
@@ -716,7 +802,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
716
802
 
717
803
  return ls_params
718
804
 
719
- def _get_llm_string(self, stop: Optional[list[str]] = None, **kwargs: Any) -> str:
805
+ def _get_llm_string(self, stop: list[str] | None = None, **kwargs: Any) -> str:
720
806
  if self.is_lc_serializable():
721
807
  params = {**kwargs, "stop": stop}
722
808
  param_string = str(sorted(params.items()))
@@ -733,13 +819,13 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
733
819
  def generate(
734
820
  self,
735
821
  messages: list[list[BaseMessage]],
736
- stop: Optional[list[str]] = None,
822
+ stop: list[str] | None = None,
737
823
  callbacks: Callbacks = None,
738
824
  *,
739
- tags: Optional[list[str]] = None,
740
- metadata: Optional[dict[str, Any]] = None,
741
- run_name: Optional[str] = None,
742
- run_id: Optional[uuid.UUID] = None,
825
+ tags: list[str] | None = None,
826
+ metadata: dict[str, Any] | None = None,
827
+ run_name: str | None = None,
828
+ run_id: uuid.UUID | None = None,
743
829
  **kwargs: Any,
744
830
  ) -> LLMResult:
745
831
  """Pass a sequence of prompts to the model and return model generations.
@@ -748,16 +834,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
748
834
  API.
749
835
 
750
836
  Use this method when you want to:
751
- 1. take advantage of batched calls,
752
- 2. need more output from the model than just the top generated value,
753
- 3. are building chains that are agnostic to the underlying language model
754
- type (e.g., pure text completion models vs chat models).
837
+
838
+ 1. Take advantage of batched calls,
839
+ 2. Need more output from the model than just the top generated value,
840
+ 3. Are building chains that are agnostic to the underlying language model
841
+ type (e.g., pure text completion models vs chat models).
755
842
 
756
843
  Args:
757
844
  messages: List of list of messages.
758
845
  stop: Stop words to use when generating. Model output is cut off at the
759
846
  first occurrence of any of these substrings.
760
- callbacks: Callbacks to pass through. Used for executing additional
847
+ callbacks: `Callbacks` to pass through. Used for executing additional
761
848
  functionality, such as logging or streaming, throughout generation.
762
849
  tags: The tags to apply.
763
850
  metadata: The metadata to apply.
@@ -767,8 +854,9 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
767
854
  to the model provider API call.
768
855
 
769
856
  Returns:
770
- An LLMResult, which contains a list of candidate Generations for each input
771
- prompt and additional model provider-specific output.
857
+ An `LLMResult`, which contains a list of candidate `Generations` for each
858
+ input prompt and additional model provider-specific output.
859
+
772
860
  """
773
861
  ls_structured_output_format = kwargs.pop(
774
862
  "ls_structured_output_format", None
@@ -825,20 +913,22 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
825
913
  run_managers[i].on_llm_error(
826
914
  e,
827
915
  response=LLMResult(
828
- generations=[generations_with_error_metadata] # type: ignore[list-item]
916
+ generations=[generations_with_error_metadata]
829
917
  ),
830
918
  )
831
919
  raise
832
920
  flattened_outputs = [
833
- LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[list-item]
921
+ LLMResult(generations=[res.generations], llm_output=res.llm_output)
834
922
  for res in results
835
923
  ]
836
924
  llm_output = self._combine_llm_outputs([res.llm_output for res in results])
837
925
  generations = [res.generations for res in results]
838
- output = LLMResult(generations=generations, llm_output=llm_output) # type: ignore[arg-type]
926
+ output = LLMResult(generations=generations, llm_output=llm_output)
839
927
  if run_managers:
840
928
  run_infos = []
841
- for manager, flattened_output in zip(run_managers, flattened_outputs):
929
+ for manager, flattened_output in zip(
930
+ run_managers, flattened_outputs, strict=False
931
+ ):
842
932
  manager.on_llm_end(flattened_output)
843
933
  run_infos.append(RunInfo(run_id=manager.run_id))
844
934
  output.run = run_infos
@@ -847,13 +937,13 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
847
937
  async def agenerate(
848
938
  self,
849
939
  messages: list[list[BaseMessage]],
850
- stop: Optional[list[str]] = None,
940
+ stop: list[str] | None = None,
851
941
  callbacks: Callbacks = None,
852
942
  *,
853
- tags: Optional[list[str]] = None,
854
- metadata: Optional[dict[str, Any]] = None,
855
- run_name: Optional[str] = None,
856
- run_id: Optional[uuid.UUID] = None,
943
+ tags: list[str] | None = None,
944
+ metadata: dict[str, Any] | None = None,
945
+ run_name: str | None = None,
946
+ run_id: uuid.UUID | None = None,
857
947
  **kwargs: Any,
858
948
  ) -> LLMResult:
859
949
  """Asynchronously pass a sequence of prompts to a model and return generations.
@@ -862,16 +952,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
862
952
  API.
863
953
 
864
954
  Use this method when you want to:
865
- 1. take advantage of batched calls,
866
- 2. need more output from the model than just the top generated value,
867
- 3. are building chains that are agnostic to the underlying language model
868
- type (e.g., pure text completion models vs chat models).
955
+
956
+ 1. Take advantage of batched calls,
957
+ 2. Need more output from the model than just the top generated value,
958
+ 3. Are building chains that are agnostic to the underlying language model
959
+ type (e.g., pure text completion models vs chat models).
869
960
 
870
961
  Args:
871
962
  messages: List of list of messages.
872
963
  stop: Stop words to use when generating. Model output is cut off at the
873
964
  first occurrence of any of these substrings.
874
- callbacks: Callbacks to pass through. Used for executing additional
965
+ callbacks: `Callbacks` to pass through. Used for executing additional
875
966
  functionality, such as logging or streaming, throughout generation.
876
967
  tags: The tags to apply.
877
968
  metadata: The metadata to apply.
@@ -881,8 +972,9 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
881
972
  to the model provider API call.
882
973
 
883
974
  Returns:
884
- An LLMResult, which contains a list of candidate Generations for each input
885
- prompt and additional model provider-specific output.
975
+ An `LLMResult`, which contains a list of candidate `Generations` for each
976
+ input prompt and additional model provider-specific output.
977
+
886
978
  """
887
979
  ls_structured_output_format = kwargs.pop(
888
980
  "ls_structured_output_format", None
@@ -944,7 +1036,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
944
1036
  await run_managers[i].on_llm_error(
945
1037
  res,
946
1038
  response=LLMResult(
947
- generations=[generations_with_error_metadata] # type: ignore[list-item]
1039
+ generations=[generations_with_error_metadata]
948
1040
  ),
949
1041
  )
950
1042
  exceptions.append(res)
@@ -954,27 +1046,27 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
954
1046
  *[
955
1047
  run_manager.on_llm_end(
956
1048
  LLMResult(
957
- generations=[res.generations], # type: ignore[list-item, union-attr]
1049
+ generations=[res.generations], # type: ignore[union-attr]
958
1050
  llm_output=res.llm_output, # type: ignore[union-attr]
959
1051
  )
960
1052
  )
961
- for run_manager, res in zip(run_managers, results)
1053
+ for run_manager, res in zip(run_managers, results, strict=False)
962
1054
  if not isinstance(res, Exception)
963
1055
  ]
964
1056
  )
965
1057
  raise exceptions[0]
966
1058
  flattened_outputs = [
967
- LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[list-item, union-attr]
1059
+ LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[union-attr]
968
1060
  for res in results
969
1061
  ]
970
1062
  llm_output = self._combine_llm_outputs([res.llm_output for res in results]) # type: ignore[union-attr]
971
1063
  generations = [res.generations for res in results] # type: ignore[union-attr]
972
- output = LLMResult(generations=generations, llm_output=llm_output) # type: ignore[arg-type]
1064
+ output = LLMResult(generations=generations, llm_output=llm_output)
973
1065
  await asyncio.gather(
974
1066
  *[
975
1067
  run_manager.on_llm_end(flattened_output)
976
1068
  for run_manager, flattened_output in zip(
977
- run_managers, flattened_outputs
1069
+ run_managers, flattened_outputs, strict=False
978
1070
  )
979
1071
  ]
980
1072
  )
@@ -988,7 +1080,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
988
1080
  def generate_prompt(
989
1081
  self,
990
1082
  prompts: list[PromptValue],
991
- stop: Optional[list[str]] = None,
1083
+ stop: list[str] | None = None,
992
1084
  callbacks: Callbacks = None,
993
1085
  **kwargs: Any,
994
1086
  ) -> LLMResult:
@@ -999,7 +1091,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
999
1091
  async def agenerate_prompt(
1000
1092
  self,
1001
1093
  prompts: list[PromptValue],
1002
- stop: Optional[list[str]] = None,
1094
+ stop: list[str] | None = None,
1003
1095
  callbacks: Callbacks = None,
1004
1096
  **kwargs: Any,
1005
1097
  ) -> LLMResult:
@@ -1011,8 +1103,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1011
1103
  def _generate_with_cache(
1012
1104
  self,
1013
1105
  messages: list[BaseMessage],
1014
- stop: Optional[list[str]] = None,
1015
- run_manager: Optional[CallbackManagerForLLMRun] = None,
1106
+ stop: list[str] | None = None,
1107
+ run_manager: CallbackManagerForLLMRun | None = None,
1016
1108
  **kwargs: Any,
1017
1109
  ) -> ChatResult:
1018
1110
  llm_cache = self.cache if isinstance(self.cache, BaseCache) else get_llm_cache()
@@ -1048,15 +1140,53 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1048
1140
  **kwargs,
1049
1141
  ):
1050
1142
  chunks: list[ChatGenerationChunk] = []
1143
+ run_id: str | None = (
1144
+ f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None
1145
+ )
1146
+ yielded = False
1147
+ index = -1
1148
+ index_type = ""
1051
1149
  for chunk in self._stream(messages, stop=stop, **kwargs):
1052
1150
  chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
1151
+ if self.output_version == "v1":
1152
+ # Overwrite .content with .content_blocks
1153
+ chunk.message = _update_message_content_to_blocks(
1154
+ chunk.message, "v1"
1155
+ )
1156
+ for block in cast(
1157
+ "list[types.ContentBlock]", chunk.message.content
1158
+ ):
1159
+ if block["type"] != index_type:
1160
+ index_type = block["type"]
1161
+ index = index + 1
1162
+ if "index" not in block:
1163
+ block["index"] = index
1053
1164
  if run_manager:
1054
1165
  if chunk.message.id is None:
1055
- chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}"
1166
+ chunk.message.id = run_id
1056
1167
  run_manager.on_llm_new_token(
1057
1168
  cast("str", chunk.message.content), chunk=chunk
1058
1169
  )
1059
1170
  chunks.append(chunk)
1171
+ yielded = True
1172
+
1173
+ # Yield a final empty chunk with chunk_position="last" if not yet yielded
1174
+ if (
1175
+ yielded
1176
+ and isinstance(chunk.message, AIMessageChunk)
1177
+ and not chunk.message.chunk_position
1178
+ ):
1179
+ empty_content: str | list = (
1180
+ "" if isinstance(chunk.message.content, str) else []
1181
+ )
1182
+ chunk = ChatGenerationChunk(
1183
+ message=AIMessageChunk(
1184
+ content=empty_content, chunk_position="last", id=run_id
1185
+ )
1186
+ )
1187
+ if run_manager:
1188
+ run_manager.on_llm_new_token("", chunk=chunk)
1189
+ chunks.append(chunk)
1060
1190
  result = generate_from_stream(iter(chunks))
1061
1191
  elif inspect.signature(self._generate).parameters.get("run_manager"):
1062
1192
  result = self._generate(
@@ -1065,10 +1195,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1065
1195
  else:
1066
1196
  result = self._generate(messages, stop=stop, **kwargs)
1067
1197
 
1198
+ if self.output_version == "v1":
1199
+ # Overwrite .content with .content_blocks
1200
+ for generation in result.generations:
1201
+ generation.message = _update_message_content_to_blocks(
1202
+ generation.message, "v1"
1203
+ )
1204
+
1068
1205
  # Add response metadata to each generation
1069
1206
  for idx, generation in enumerate(result.generations):
1070
1207
  if run_manager and generation.message.id is None:
1071
- generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
1208
+ generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
1072
1209
  generation.message.response_metadata = _gen_info_and_msg_metadata(
1073
1210
  generation
1074
1211
  )
@@ -1084,8 +1221,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1084
1221
  async def _agenerate_with_cache(
1085
1222
  self,
1086
1223
  messages: list[BaseMessage],
1087
- stop: Optional[list[str]] = None,
1088
- run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
1224
+ stop: list[str] | None = None,
1225
+ run_manager: AsyncCallbackManagerForLLMRun | None = None,
1089
1226
  **kwargs: Any,
1090
1227
  ) -> ChatResult:
1091
1228
  llm_cache = self.cache if isinstance(self.cache, BaseCache) else get_llm_cache()
@@ -1121,15 +1258,53 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1121
1258
  **kwargs,
1122
1259
  ):
1123
1260
  chunks: list[ChatGenerationChunk] = []
1261
+ run_id: str | None = (
1262
+ f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None
1263
+ )
1264
+ yielded = False
1265
+ index = -1
1266
+ index_type = ""
1124
1267
  async for chunk in self._astream(messages, stop=stop, **kwargs):
1125
1268
  chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
1269
+ if self.output_version == "v1":
1270
+ # Overwrite .content with .content_blocks
1271
+ chunk.message = _update_message_content_to_blocks(
1272
+ chunk.message, "v1"
1273
+ )
1274
+ for block in cast(
1275
+ "list[types.ContentBlock]", chunk.message.content
1276
+ ):
1277
+ if block["type"] != index_type:
1278
+ index_type = block["type"]
1279
+ index = index + 1
1280
+ if "index" not in block:
1281
+ block["index"] = index
1126
1282
  if run_manager:
1127
1283
  if chunk.message.id is None:
1128
- chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}"
1284
+ chunk.message.id = run_id
1129
1285
  await run_manager.on_llm_new_token(
1130
1286
  cast("str", chunk.message.content), chunk=chunk
1131
1287
  )
1132
1288
  chunks.append(chunk)
1289
+ yielded = True
1290
+
1291
+ # Yield a final empty chunk with chunk_position="last" if not yet yielded
1292
+ if (
1293
+ yielded
1294
+ and isinstance(chunk.message, AIMessageChunk)
1295
+ and not chunk.message.chunk_position
1296
+ ):
1297
+ empty_content: str | list = (
1298
+ "" if isinstance(chunk.message.content, str) else []
1299
+ )
1300
+ chunk = ChatGenerationChunk(
1301
+ message=AIMessageChunk(
1302
+ content=empty_content, chunk_position="last", id=run_id
1303
+ )
1304
+ )
1305
+ if run_manager:
1306
+ await run_manager.on_llm_new_token("", chunk=chunk)
1307
+ chunks.append(chunk)
1133
1308
  result = generate_from_stream(iter(chunks))
1134
1309
  elif inspect.signature(self._agenerate).parameters.get("run_manager"):
1135
1310
  result = await self._agenerate(
@@ -1138,10 +1313,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1138
1313
  else:
1139
1314
  result = await self._agenerate(messages, stop=stop, **kwargs)
1140
1315
 
1316
+ if self.output_version == "v1":
1317
+ # Overwrite .content with .content_blocks
1318
+ for generation in result.generations:
1319
+ generation.message = _update_message_content_to_blocks(
1320
+ generation.message, "v1"
1321
+ )
1322
+
1141
1323
  # Add response metadata to each generation
1142
1324
  for idx, generation in enumerate(result.generations):
1143
1325
  if run_manager and generation.message.id is None:
1144
- generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
1326
+ generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}"
1145
1327
  generation.message.response_metadata = _gen_info_and_msg_metadata(
1146
1328
  generation
1147
1329
  )
@@ -1158,20 +1340,40 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1158
1340
  def _generate(
1159
1341
  self,
1160
1342
  messages: list[BaseMessage],
1161
- stop: Optional[list[str]] = None,
1162
- run_manager: Optional[CallbackManagerForLLMRun] = None,
1343
+ stop: list[str] | None = None,
1344
+ run_manager: CallbackManagerForLLMRun | None = None,
1163
1345
  **kwargs: Any,
1164
1346
  ) -> ChatResult:
1165
- """Top Level call."""
1347
+ """Generate the result.
1348
+
1349
+ Args:
1350
+ messages: The messages to generate from.
1351
+ stop: Optional list of stop words to use when generating.
1352
+ run_manager: Optional callback manager to use for this call.
1353
+ **kwargs: Additional keyword arguments to pass to the model.
1354
+
1355
+ Returns:
1356
+ The chat result.
1357
+ """
1166
1358
 
1167
1359
  async def _agenerate(
1168
1360
  self,
1169
1361
  messages: list[BaseMessage],
1170
- stop: Optional[list[str]] = None,
1171
- run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
1362
+ stop: list[str] | None = None,
1363
+ run_manager: AsyncCallbackManagerForLLMRun | None = None,
1172
1364
  **kwargs: Any,
1173
1365
  ) -> ChatResult:
1174
- """Top Level call."""
1366
+ """Generate the result.
1367
+
1368
+ Args:
1369
+ messages: The messages to generate from.
1370
+ stop: Optional list of stop words to use when generating.
1371
+ run_manager: Optional callback manager to use for this call.
1372
+ **kwargs: Additional keyword arguments to pass to the model.
1373
+
1374
+ Returns:
1375
+ The chat result.
1376
+ """
1175
1377
  return await run_in_executor(
1176
1378
  None,
1177
1379
  self._generate,
@@ -1184,19 +1386,41 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1184
1386
  def _stream(
1185
1387
  self,
1186
1388
  messages: list[BaseMessage],
1187
- stop: Optional[list[str]] = None,
1188
- run_manager: Optional[CallbackManagerForLLMRun] = None,
1389
+ stop: list[str] | None = None,
1390
+ run_manager: CallbackManagerForLLMRun | None = None,
1189
1391
  **kwargs: Any,
1190
1392
  ) -> Iterator[ChatGenerationChunk]:
1393
+ """Stream the output of the model.
1394
+
1395
+ Args:
1396
+ messages: The messages to generate from.
1397
+ stop: Optional list of stop words to use when generating.
1398
+ run_manager: Optional callback manager to use for this call.
1399
+ **kwargs: Additional keyword arguments to pass to the model.
1400
+
1401
+ Yields:
1402
+ The chat generation chunks.
1403
+ """
1191
1404
  raise NotImplementedError
1192
1405
 
1193
1406
  async def _astream(
1194
1407
  self,
1195
1408
  messages: list[BaseMessage],
1196
- stop: Optional[list[str]] = None,
1197
- run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
1409
+ stop: list[str] | None = None,
1410
+ run_manager: AsyncCallbackManagerForLLMRun | None = None,
1198
1411
  **kwargs: Any,
1199
1412
  ) -> AsyncIterator[ChatGenerationChunk]:
1413
+ """Stream the output of the model.
1414
+
1415
+ Args:
1416
+ messages: The messages to generate from.
1417
+ stop: Optional list of stop words to use when generating.
1418
+ run_manager: Optional callback manager to use for this call.
1419
+ **kwargs: Additional keyword arguments to pass to the model.
1420
+
1421
+ Yields:
1422
+ The chat generation chunks.
1423
+ """
1200
1424
  iterator = await run_in_executor(
1201
1425
  None,
1202
1426
  self._stream,
@@ -1217,40 +1441,10 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1217
1441
  break
1218
1442
  yield item # type: ignore[misc]
1219
1443
 
1220
- @deprecated("0.1.7", alternative="invoke", removal="1.0")
1221
- def __call__(
1222
- self,
1223
- messages: list[BaseMessage],
1224
- stop: Optional[list[str]] = None,
1225
- callbacks: Callbacks = None,
1226
- **kwargs: Any,
1227
- ) -> BaseMessage:
1228
- """Call the model.
1229
-
1230
- Args:
1231
- messages: List of messages.
1232
- stop: Stop words to use when generating. Model output is cut off at the
1233
- first occurrence of any of these substrings.
1234
- callbacks: Callbacks to pass through. Used for executing additional
1235
- functionality, such as logging or streaming, throughout generation.
1236
- **kwargs: Arbitrary additional keyword arguments. These are usually passed
1237
- to the model provider API call.
1238
-
1239
- Returns:
1240
- The model output message.
1241
- """
1242
- generation = self.generate(
1243
- [messages], stop=stop, callbacks=callbacks, **kwargs
1244
- ).generations[0][0]
1245
- if isinstance(generation, ChatGeneration):
1246
- return generation.message
1247
- msg = "Unexpected generation type"
1248
- raise ValueError(msg)
1249
-
1250
1444
  async def _call_async(
1251
1445
  self,
1252
1446
  messages: list[BaseMessage],
1253
- stop: Optional[list[str]] = None,
1447
+ stop: list[str] | None = None,
1254
1448
  callbacks: Callbacks = None,
1255
1449
  **kwargs: Any,
1256
1450
  ) -> BaseMessage:
@@ -1263,86 +1457,6 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1263
1457
  msg = "Unexpected generation type"
1264
1458
  raise ValueError(msg)
1265
1459
 
1266
- @deprecated("0.1.7", alternative="invoke", removal="1.0")
1267
- def call_as_llm(
1268
- self, message: str, stop: Optional[list[str]] = None, **kwargs: Any
1269
- ) -> str:
1270
- """Call the model.
1271
-
1272
- Args:
1273
- message: The input message.
1274
- stop: Stop words to use when generating. Model output is cut off at the
1275
- first occurrence of any of these substrings.
1276
- **kwargs: Arbitrary additional keyword arguments. These are usually passed
1277
- to the model provider API call.
1278
-
1279
- Returns:
1280
- The model output string.
1281
- """
1282
- return self.predict(message, stop=stop, **kwargs)
1283
-
1284
- @deprecated("0.1.7", alternative="invoke", removal="1.0")
1285
- @override
1286
- def predict(
1287
- self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
1288
- ) -> str:
1289
- """Predict the next message.
1290
-
1291
- Args:
1292
- text: The input message.
1293
- stop: Stop words to use when generating. Model output is cut off at the
1294
- first occurrence of any of these substrings.
1295
- **kwargs: Arbitrary additional keyword arguments. These are usually passed
1296
- to the model provider API call.
1297
-
1298
- Returns:
1299
- The predicted output string.
1300
- """
1301
- stop_ = None if stop is None else list(stop)
1302
- result = self([HumanMessage(content=text)], stop=stop_, **kwargs)
1303
- if isinstance(result.content, str):
1304
- return result.content
1305
- msg = "Cannot use predict when output is not a string."
1306
- raise ValueError(msg)
1307
-
1308
- @deprecated("0.1.7", alternative="invoke", removal="1.0")
1309
- @override
1310
- def predict_messages(
1311
- self,
1312
- messages: list[BaseMessage],
1313
- *,
1314
- stop: Optional[Sequence[str]] = None,
1315
- **kwargs: Any,
1316
- ) -> BaseMessage:
1317
- stop_ = None if stop is None else list(stop)
1318
- return self(messages, stop=stop_, **kwargs)
1319
-
1320
- @deprecated("0.1.7", alternative="ainvoke", removal="1.0")
1321
- @override
1322
- async def apredict(
1323
- self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
1324
- ) -> str:
1325
- stop_ = None if stop is None else list(stop)
1326
- result = await self._call_async(
1327
- [HumanMessage(content=text)], stop=stop_, **kwargs
1328
- )
1329
- if isinstance(result.content, str):
1330
- return result.content
1331
- msg = "Cannot use predict when output is not a string."
1332
- raise ValueError(msg)
1333
-
1334
- @deprecated("0.1.7", alternative="ainvoke", removal="1.0")
1335
- @override
1336
- async def apredict_messages(
1337
- self,
1338
- messages: list[BaseMessage],
1339
- *,
1340
- stop: Optional[Sequence[str]] = None,
1341
- **kwargs: Any,
1342
- ) -> BaseMessage:
1343
- stop_ = None if stop is None else list(stop)
1344
- return await self._call_async(messages, stop=stop_, **kwargs)
1345
-
1346
1460
  @property
1347
1461
  @abstractmethod
1348
1462
  def _llm_type(self) -> str:
@@ -1358,12 +1472,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1358
1472
  def bind_tools(
1359
1473
  self,
1360
1474
  tools: Sequence[
1361
- Union[typing.Dict[str, Any], type, Callable, BaseTool] # noqa: UP006
1475
+ typing.Dict[str, Any] | type | Callable | BaseTool # noqa: UP006
1362
1476
  ],
1363
1477
  *,
1364
- tool_choice: Optional[Union[str]] = None,
1478
+ tool_choice: str | None = None,
1365
1479
  **kwargs: Any,
1366
- ) -> Runnable[LanguageModelInput, BaseMessage]:
1480
+ ) -> Runnable[LanguageModelInput, AIMessage]:
1367
1481
  """Bind tools to the model.
1368
1482
 
1369
1483
  Args:
@@ -1372,16 +1486,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1372
1486
 
1373
1487
  Returns:
1374
1488
  A Runnable that returns a message.
1489
+
1375
1490
  """
1376
1491
  raise NotImplementedError
1377
1492
 
1378
1493
  def with_structured_output(
1379
1494
  self,
1380
- schema: Union[typing.Dict, type], # noqa: UP006
1495
+ schema: typing.Dict | type, # noqa: UP006
1381
1496
  *,
1382
1497
  include_raw: bool = False,
1383
1498
  **kwargs: Any,
1384
- ) -> Runnable[LanguageModelInput, Union[typing.Dict, BaseModel]]: # noqa: UP006
1499
+ ) -> Runnable[LanguageModelInput, typing.Dict | BaseModel]: # noqa: UP006
1385
1500
  """Model wrapper that returns outputs formatted to match the given schema.
1386
1501
 
1387
1502
  Args:
@@ -1389,102 +1504,130 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1389
1504
 
1390
1505
  - an OpenAI function/tool schema,
1391
1506
  - a JSON Schema,
1392
- - a TypedDict class,
1507
+ - a `TypedDict` class,
1393
1508
  - or a Pydantic class.
1394
1509
 
1395
- If ``schema`` is a Pydantic class then the model output will be a
1510
+ If `schema` is a Pydantic class then the model output will be a
1396
1511
  Pydantic instance of that class, and the model-generated fields will be
1397
1512
  validated by the Pydantic class. Otherwise the model output will be a
1398
- dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`
1399
- for more on how to properly specify types and descriptions of
1400
- schema fields when specifying a Pydantic or TypedDict class.
1513
+ dict and will not be validated.
1514
+
1515
+ See `langchain_core.utils.function_calling.convert_to_openai_tool` for
1516
+ more on how to properly specify types and descriptions of schema fields
1517
+ when specifying a Pydantic or `TypedDict` class.
1401
1518
 
1402
1519
  include_raw:
1403
- If False then only the parsed structured output is returned. If
1404
- an error occurs during model output parsing it will be raised. If True
1405
- then both the raw model response (a BaseMessage) and the parsed model
1520
+ If `False` then only the parsed structured output is returned. If
1521
+ an error occurs during model output parsing it will be raised. If `True`
1522
+ then both the raw model response (a `BaseMessage`) and the parsed model
1406
1523
  response will be returned. If an error occurs during output parsing it
1407
- will be caught and returned as well. The final output is always a dict
1408
- with keys ``'raw'``, ``'parsed'``, and ``'parsing_error'``.
1524
+ will be caught and returned as well.
1525
+
1526
+ The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
1527
+ `'parsing_error'`.
1528
+
1529
+ Raises:
1530
+ ValueError: If there are any unsupported `kwargs`.
1531
+ NotImplementedError: If the model does not implement
1532
+ `with_structured_output()`.
1409
1533
 
1410
1534
  Returns:
1411
- A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
1535
+ A `Runnable` that takes same inputs as a
1536
+ `langchain_core.language_models.chat.BaseChatModel`. If `include_raw` is
1537
+ `False` and `schema` is a Pydantic class, `Runnable` outputs an instance
1538
+ of `schema` (i.e., a Pydantic object). Otherwise, if `include_raw` is
1539
+ `False` then `Runnable` outputs a `dict`.
1540
+
1541
+ If `include_raw` is `True`, then `Runnable` outputs a `dict` with keys:
1412
1542
 
1413
- If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs
1414
- an instance of ``schema`` (i.e., a Pydantic object).
1543
+ - `'raw'`: `BaseMessage`
1544
+ - `'parsed'`: `None` if there was a parsing error, otherwise the type
1545
+ depends on the `schema` as described above.
1546
+ - `'parsing_error'`: `BaseException | None`
1415
1547
 
1416
- Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
1548
+ Example: Pydantic schema (`include_raw=False`):
1417
1549
 
1418
- If ``include_raw`` is True, then Runnable outputs a dict with keys:
1550
+ ```python
1551
+ from pydantic import BaseModel
1419
1552
 
1420
- - ``'raw'``: BaseMessage
1421
- - ``'parsed'``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
1422
- - ``'parsing_error'``: Optional[BaseException]
1423
1553
 
1424
- Example: Pydantic schema (include_raw=False):
1425
- .. code-block:: python
1554
+ class AnswerWithJustification(BaseModel):
1555
+ '''An answer to the user question along with justification for the answer.'''
1426
1556
 
1427
- from pydantic import BaseModel
1557
+ answer: str
1558
+ justification: str
1428
1559
 
1429
- class AnswerWithJustification(BaseModel):
1430
- '''An answer to the user question along with justification for the answer.'''
1431
- answer: str
1432
- justification: str
1433
1560
 
1434
- llm = ChatModel(model="model-name", temperature=0)
1435
- structured_llm = llm.with_structured_output(AnswerWithJustification)
1561
+ model = ChatModel(model="model-name", temperature=0)
1562
+ structured_model = model.with_structured_output(AnswerWithJustification)
1436
1563
 
1437
- structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
1564
+ structured_model.invoke(
1565
+ "What weighs more a pound of bricks or a pound of feathers"
1566
+ )
1567
+
1568
+ # -> AnswerWithJustification(
1569
+ # answer='They weigh the same',
1570
+ # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
1571
+ # )
1572
+ ```
1573
+
1574
+ Example: Pydantic schema (`include_raw=True`):
1575
+
1576
+ ```python
1577
+ from pydantic import BaseModel
1578
+
1579
+
1580
+ class AnswerWithJustification(BaseModel):
1581
+ '''An answer to the user question along with justification for the answer.'''
1438
1582
 
1439
- # -> AnswerWithJustification(
1440
- # answer='They weigh the same',
1441
- # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
1442
- # )
1583
+ answer: str
1584
+ justification: str
1443
1585
 
1444
- Example: Pydantic schema (include_raw=True):
1445
- .. code-block:: python
1446
1586
 
1447
- from pydantic import BaseModel
1587
+ model = ChatModel(model="model-name", temperature=0)
1588
+ structured_model = model.with_structured_output(
1589
+ AnswerWithJustification, include_raw=True
1590
+ )
1448
1591
 
1449
- class AnswerWithJustification(BaseModel):
1450
- '''An answer to the user question along with justification for the answer.'''
1451
- answer: str
1452
- justification: str
1592
+ structured_model.invoke(
1593
+ "What weighs more a pound of bricks or a pound of feathers"
1594
+ )
1595
+ # -> {
1596
+ # 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
1597
+ # 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
1598
+ # 'parsing_error': None
1599
+ # }
1600
+ ```
1453
1601
 
1454
- llm = ChatModel(model="model-name", temperature=0)
1455
- structured_llm = llm.with_structured_output(AnswerWithJustification, include_raw=True)
1602
+ Example: `dict` schema (`include_raw=False`):
1456
1603
 
1457
- structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
1458
- # -> {
1459
- # 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
1460
- # 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
1461
- # 'parsing_error': None
1462
- # }
1604
+ ```python
1605
+ from pydantic import BaseModel
1606
+ from langchain_core.utils.function_calling import convert_to_openai_tool
1463
1607
 
1464
- Example: Dict schema (include_raw=False):
1465
- .. code-block:: python
1466
1608
 
1467
- from pydantic import BaseModel
1468
- from langchain_core.utils.function_calling import convert_to_openai_tool
1609
+ class AnswerWithJustification(BaseModel):
1610
+ '''An answer to the user question along with justification for the answer.'''
1469
1611
 
1470
- class AnswerWithJustification(BaseModel):
1471
- '''An answer to the user question along with justification for the answer.'''
1472
- answer: str
1473
- justification: str
1612
+ answer: str
1613
+ justification: str
1474
1614
 
1475
- dict_schema = convert_to_openai_tool(AnswerWithJustification)
1476
- llm = ChatModel(model="model-name", temperature=0)
1477
- structured_llm = llm.with_structured_output(dict_schema)
1478
1615
 
1479
- structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
1480
- # -> {
1481
- # 'answer': 'They weigh the same',
1482
- # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
1483
- # }
1616
+ dict_schema = convert_to_openai_tool(AnswerWithJustification)
1617
+ model = ChatModel(model="model-name", temperature=0)
1618
+ structured_model = model.with_structured_output(dict_schema)
1484
1619
 
1485
- .. versionchanged:: 0.2.26
1620
+ structured_model.invoke(
1621
+ "What weighs more a pound of bricks or a pound of feathers"
1622
+ )
1623
+ # -> {
1624
+ # 'answer': 'They weigh the same',
1625
+ # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
1626
+ # }
1627
+ ```
1486
1628
 
1487
- Added support for TypedDict class.
1629
+ !!! warning "Behavior changed in 0.2.26"
1630
+ Added support for TypedDict class.
1488
1631
 
1489
1632
  """ # noqa: E501
1490
1633
  _ = kwargs.pop("method", None)
@@ -1493,11 +1636,6 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1493
1636
  msg = f"Received unsupported arguments {kwargs}"
1494
1637
  raise ValueError(msg)
1495
1638
 
1496
- from langchain_core.output_parsers.openai_tools import (
1497
- JsonOutputKeyToolsParser,
1498
- PydanticToolsParser,
1499
- )
1500
-
1501
1639
  if type(self).bind_tools is BaseChatModel.bind_tools:
1502
1640
  msg = "with_structured_output is not implemented for this model."
1503
1641
  raise NotImplementedError(msg)
@@ -1534,15 +1672,17 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
1534
1672
  class SimpleChatModel(BaseChatModel):
1535
1673
  """Simplified implementation for a chat model to inherit from.
1536
1674
 
1537
- **Note** This implementation is primarily here for backwards compatibility.
1538
- For new implementations, please use `BaseChatModel` directly.
1675
+ !!! note
1676
+ This implementation is primarily here for backwards compatibility. For new
1677
+ implementations, please use `BaseChatModel` directly.
1678
+
1539
1679
  """
1540
1680
 
1541
1681
  def _generate(
1542
1682
  self,
1543
1683
  messages: list[BaseMessage],
1544
- stop: Optional[list[str]] = None,
1545
- run_manager: Optional[CallbackManagerForLLMRun] = None,
1684
+ stop: list[str] | None = None,
1685
+ run_manager: CallbackManagerForLLMRun | None = None,
1546
1686
  **kwargs: Any,
1547
1687
  ) -> ChatResult:
1548
1688
  output_str = self._call(messages, stop=stop, run_manager=run_manager, **kwargs)
@@ -1554,8 +1694,8 @@ class SimpleChatModel(BaseChatModel):
1554
1694
  def _call(
1555
1695
  self,
1556
1696
  messages: list[BaseMessage],
1557
- stop: Optional[list[str]] = None,
1558
- run_manager: Optional[CallbackManagerForLLMRun] = None,
1697
+ stop: list[str] | None = None,
1698
+ run_manager: CallbackManagerForLLMRun | None = None,
1559
1699
  **kwargs: Any,
1560
1700
  ) -> str:
1561
1701
  """Simpler interface."""
@@ -1563,8 +1703,8 @@ class SimpleChatModel(BaseChatModel):
1563
1703
  async def _agenerate(
1564
1704
  self,
1565
1705
  messages: list[BaseMessage],
1566
- stop: Optional[list[str]] = None,
1567
- run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
1706
+ stop: list[str] | None = None,
1707
+ run_manager: AsyncCallbackManagerForLLMRun | None = None,
1568
1708
  **kwargs: Any,
1569
1709
  ) -> ChatResult:
1570
1710
  return await run_in_executor(
@@ -1578,7 +1718,7 @@ class SimpleChatModel(BaseChatModel):
1578
1718
 
1579
1719
 
1580
1720
  def _gen_info_and_msg_metadata(
1581
- generation: Union[ChatGeneration, ChatGenerationChunk],
1721
+ generation: ChatGeneration | ChatGenerationChunk,
1582
1722
  ) -> dict:
1583
1723
  return {
1584
1724
  **(generation.generation_info or {}),