langchain-core 0.3.79__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (165) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +3 -4
  3. langchain_core/_api/beta_decorator.py +23 -26
  4. langchain_core/_api/deprecation.py +52 -65
  5. langchain_core/_api/path.py +3 -6
  6. langchain_core/_import_utils.py +3 -4
  7. langchain_core/agents.py +19 -19
  8. langchain_core/caches.py +53 -63
  9. langchain_core/callbacks/__init__.py +1 -8
  10. langchain_core/callbacks/base.py +323 -334
  11. langchain_core/callbacks/file.py +44 -44
  12. langchain_core/callbacks/manager.py +441 -507
  13. langchain_core/callbacks/stdout.py +29 -30
  14. langchain_core/callbacks/streaming_stdout.py +32 -32
  15. langchain_core/callbacks/usage.py +60 -57
  16. langchain_core/chat_history.py +48 -63
  17. langchain_core/document_loaders/base.py +23 -23
  18. langchain_core/document_loaders/langsmith.py +37 -37
  19. langchain_core/documents/__init__.py +0 -1
  20. langchain_core/documents/base.py +62 -65
  21. langchain_core/documents/compressor.py +4 -4
  22. langchain_core/documents/transformers.py +28 -29
  23. langchain_core/embeddings/fake.py +50 -54
  24. langchain_core/example_selectors/length_based.py +1 -1
  25. langchain_core/example_selectors/semantic_similarity.py +21 -25
  26. langchain_core/exceptions.py +10 -11
  27. langchain_core/globals.py +3 -151
  28. langchain_core/indexing/api.py +61 -66
  29. langchain_core/indexing/base.py +58 -58
  30. langchain_core/indexing/in_memory.py +3 -3
  31. langchain_core/language_models/__init__.py +14 -27
  32. langchain_core/language_models/_utils.py +270 -84
  33. langchain_core/language_models/base.py +55 -162
  34. langchain_core/language_models/chat_models.py +442 -402
  35. langchain_core/language_models/fake.py +11 -11
  36. langchain_core/language_models/fake_chat_models.py +61 -39
  37. langchain_core/language_models/llms.py +123 -231
  38. langchain_core/load/dump.py +4 -5
  39. langchain_core/load/load.py +18 -28
  40. langchain_core/load/mapping.py +2 -4
  41. langchain_core/load/serializable.py +39 -40
  42. langchain_core/messages/__init__.py +61 -22
  43. langchain_core/messages/ai.py +368 -163
  44. langchain_core/messages/base.py +214 -43
  45. langchain_core/messages/block_translators/__init__.py +111 -0
  46. langchain_core/messages/block_translators/anthropic.py +470 -0
  47. langchain_core/messages/block_translators/bedrock.py +94 -0
  48. langchain_core/messages/block_translators/bedrock_converse.py +297 -0
  49. langchain_core/messages/block_translators/google_genai.py +530 -0
  50. langchain_core/messages/block_translators/google_vertexai.py +21 -0
  51. langchain_core/messages/block_translators/groq.py +143 -0
  52. langchain_core/messages/block_translators/langchain_v0.py +301 -0
  53. langchain_core/messages/block_translators/openai.py +1010 -0
  54. langchain_core/messages/chat.py +2 -6
  55. langchain_core/messages/content.py +1423 -0
  56. langchain_core/messages/function.py +6 -10
  57. langchain_core/messages/human.py +41 -38
  58. langchain_core/messages/modifier.py +2 -2
  59. langchain_core/messages/system.py +38 -28
  60. langchain_core/messages/tool.py +96 -103
  61. langchain_core/messages/utils.py +478 -504
  62. langchain_core/output_parsers/__init__.py +1 -14
  63. langchain_core/output_parsers/base.py +58 -61
  64. langchain_core/output_parsers/json.py +7 -8
  65. langchain_core/output_parsers/list.py +5 -7
  66. langchain_core/output_parsers/openai_functions.py +49 -47
  67. langchain_core/output_parsers/openai_tools.py +14 -19
  68. langchain_core/output_parsers/pydantic.py +12 -13
  69. langchain_core/output_parsers/string.py +2 -2
  70. langchain_core/output_parsers/transform.py +15 -17
  71. langchain_core/output_parsers/xml.py +8 -10
  72. langchain_core/outputs/__init__.py +1 -1
  73. langchain_core/outputs/chat_generation.py +18 -18
  74. langchain_core/outputs/chat_result.py +1 -3
  75. langchain_core/outputs/generation.py +8 -8
  76. langchain_core/outputs/llm_result.py +10 -10
  77. langchain_core/prompt_values.py +12 -12
  78. langchain_core/prompts/__init__.py +3 -27
  79. langchain_core/prompts/base.py +45 -55
  80. langchain_core/prompts/chat.py +254 -313
  81. langchain_core/prompts/dict.py +5 -5
  82. langchain_core/prompts/few_shot.py +81 -88
  83. langchain_core/prompts/few_shot_with_templates.py +11 -13
  84. langchain_core/prompts/image.py +12 -14
  85. langchain_core/prompts/loading.py +6 -8
  86. langchain_core/prompts/message.py +3 -3
  87. langchain_core/prompts/prompt.py +24 -39
  88. langchain_core/prompts/string.py +4 -4
  89. langchain_core/prompts/structured.py +42 -50
  90. langchain_core/rate_limiters.py +51 -60
  91. langchain_core/retrievers.py +49 -190
  92. langchain_core/runnables/base.py +1484 -1709
  93. langchain_core/runnables/branch.py +45 -61
  94. langchain_core/runnables/config.py +80 -88
  95. langchain_core/runnables/configurable.py +117 -134
  96. langchain_core/runnables/fallbacks.py +83 -79
  97. langchain_core/runnables/graph.py +85 -95
  98. langchain_core/runnables/graph_ascii.py +27 -28
  99. langchain_core/runnables/graph_mermaid.py +38 -50
  100. langchain_core/runnables/graph_png.py +15 -16
  101. langchain_core/runnables/history.py +135 -148
  102. langchain_core/runnables/passthrough.py +124 -150
  103. langchain_core/runnables/retry.py +46 -51
  104. langchain_core/runnables/router.py +25 -30
  105. langchain_core/runnables/schema.py +79 -74
  106. langchain_core/runnables/utils.py +62 -68
  107. langchain_core/stores.py +81 -115
  108. langchain_core/structured_query.py +8 -8
  109. langchain_core/sys_info.py +27 -29
  110. langchain_core/tools/__init__.py +1 -14
  111. langchain_core/tools/base.py +179 -187
  112. langchain_core/tools/convert.py +131 -139
  113. langchain_core/tools/render.py +10 -10
  114. langchain_core/tools/retriever.py +11 -11
  115. langchain_core/tools/simple.py +19 -24
  116. langchain_core/tools/structured.py +30 -39
  117. langchain_core/tracers/__init__.py +1 -9
  118. langchain_core/tracers/base.py +97 -99
  119. langchain_core/tracers/context.py +29 -52
  120. langchain_core/tracers/core.py +50 -60
  121. langchain_core/tracers/evaluation.py +11 -11
  122. langchain_core/tracers/event_stream.py +115 -70
  123. langchain_core/tracers/langchain.py +21 -21
  124. langchain_core/tracers/log_stream.py +43 -43
  125. langchain_core/tracers/memory_stream.py +3 -3
  126. langchain_core/tracers/root_listeners.py +16 -16
  127. langchain_core/tracers/run_collector.py +2 -4
  128. langchain_core/tracers/schemas.py +0 -129
  129. langchain_core/tracers/stdout.py +3 -3
  130. langchain_core/utils/__init__.py +1 -4
  131. langchain_core/utils/_merge.py +46 -8
  132. langchain_core/utils/aiter.py +57 -61
  133. langchain_core/utils/env.py +9 -9
  134. langchain_core/utils/function_calling.py +89 -191
  135. langchain_core/utils/html.py +7 -8
  136. langchain_core/utils/input.py +6 -6
  137. langchain_core/utils/interactive_env.py +1 -1
  138. langchain_core/utils/iter.py +37 -42
  139. langchain_core/utils/json.py +4 -3
  140. langchain_core/utils/json_schema.py +8 -8
  141. langchain_core/utils/mustache.py +9 -11
  142. langchain_core/utils/pydantic.py +33 -35
  143. langchain_core/utils/strings.py +5 -5
  144. langchain_core/utils/usage.py +1 -1
  145. langchain_core/utils/utils.py +80 -54
  146. langchain_core/vectorstores/base.py +129 -164
  147. langchain_core/vectorstores/in_memory.py +99 -174
  148. langchain_core/vectorstores/utils.py +5 -5
  149. langchain_core/version.py +1 -1
  150. {langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/METADATA +28 -27
  151. langchain_core-1.0.0.dist-info/RECORD +172 -0
  152. {langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
  153. langchain_core/beta/__init__.py +0 -1
  154. langchain_core/beta/runnables/__init__.py +0 -1
  155. langchain_core/beta/runnables/context.py +0 -447
  156. langchain_core/memory.py +0 -120
  157. langchain_core/messages/content_blocks.py +0 -176
  158. langchain_core/prompts/pipeline.py +0 -138
  159. langchain_core/pydantic_v1/__init__.py +0 -30
  160. langchain_core/pydantic_v1/dataclasses.py +0 -23
  161. langchain_core/pydantic_v1/main.py +0 -23
  162. langchain_core/tracers/langchain_v1.py +0 -31
  163. langchain_core/utils/loading.py +0 -35
  164. langchain_core-0.3.79.dist-info/RECORD +0 -174
  165. langchain_core-0.3.79.dist-info/entry_points.txt +0 -4
@@ -1,50 +1,35 @@
1
1
  """Language models.
2
2
 
3
- **Language Model** is a type of model that can generate text or complete
4
- text prompts.
3
+ LangChain has two main classes to work with language models: chat models and
4
+ "old-fashioned" LLMs.
5
5
 
6
- LangChain has two main classes to work with language models: **Chat Models**
7
- and "old-fashioned" **LLMs**.
8
-
9
- **Chat Models**
6
+ **Chat models**
10
7
 
11
8
  Language models that use a sequence of messages as inputs and return chat messages
12
- as outputs (as opposed to using plain text). These are traditionally newer models (
13
- older models are generally LLMs, see below). Chat models support the assignment of
9
+ as outputs (as opposed to using plain text). Chat models support the assignment of
14
10
  distinct roles to conversation messages, helping to distinguish messages from the AI,
15
11
  users, and instructions such as system messages.
16
12
 
17
13
  The key abstraction for chat models is `BaseChatModel`. Implementations
18
- should inherit from this class. Please see LangChain how-to guides with more
19
- information on how to implement a custom chat model.
20
-
21
- To implement a custom Chat Model, inherit from `BaseChatModel`. See
22
- the following guide for more information on how to implement a custom Chat Model:
14
+ should inherit from this class.
23
15
 
24
- https://python.langchain.com/docs/how_to/custom_chat_model/
16
+ See existing [chat model integrations](https://docs.langchain.com/oss/python/integrations/chat).
25
17
 
26
18
  **LLMs**
27
19
 
28
20
  Language models that takes a string as input and returns a string.
29
- These are traditionally older models (newer models generally are Chat Models,
30
- see below).
31
-
32
- Although the underlying models are string in, string out, the LangChain wrappers
33
- also allow these models to take messages as input. This gives them the same interface
34
- as Chat Models. When messages are passed in as input, they will be formatted into a
35
- string under the hood before being passed to the underlying model.
36
-
37
- To implement a custom LLM, inherit from `BaseLLM` or `LLM`.
38
- Please see the following guide for more information on how to implement a custom LLM:
39
-
40
- https://python.langchain.com/docs/how_to/custom_llm/
41
-
21
+ These are traditionally older models (newer models generally are chat models).
42
22
 
23
+ Although the underlying models are string in, string out, the LangChain wrappers also
24
+ allow these models to take messages as input. This gives them the same interface as
25
+ chat models. When messages are passed in as input, they will be formatted into a string
26
+ under the hood before being passed to the underlying model.
43
27
  """
44
28
 
45
29
  from typing import TYPE_CHECKING
46
30
 
47
31
  from langchain_core._import_utils import import_attr
32
+ from langchain_core.language_models._utils import is_openai_data_block
48
33
 
49
34
  if TYPE_CHECKING:
50
35
  from langchain_core.language_models.base import (
@@ -85,6 +70,7 @@ __all__ = (
85
70
  "ParrotFakeChatModel",
86
71
  "SimpleChatModel",
87
72
  "get_tokenizer",
73
+ "is_openai_data_block",
88
74
  )
89
75
 
90
76
  _dynamic_imports = {
@@ -104,6 +90,7 @@ _dynamic_imports = {
104
90
  "ParrotFakeChatModel": "fake_chat_models",
105
91
  "LLM": "llms",
106
92
  "BaseLLM": "llms",
93
+ "is_openai_data_block": "_utils",
107
94
  }
108
95
 
109
96
 
@@ -1,13 +1,47 @@
1
1
  import re
2
2
  from collections.abc import Sequence
3
- from typing import Optional
3
+ from typing import (
4
+ TYPE_CHECKING,
5
+ Literal,
6
+ TypedDict,
7
+ TypeVar,
8
+ )
4
9
 
5
- from langchain_core.messages import BaseMessage
10
+ if TYPE_CHECKING:
11
+ from langchain_core.messages import BaseMessage
12
+ from langchain_core.messages.content import (
13
+ ContentBlock,
14
+ )
6
15
 
7
16
 
8
- def _is_openai_data_block(block: dict) -> bool:
9
- """Check if the block contains multimodal data in OpenAI Chat Completions format."""
17
+ def is_openai_data_block(
18
+ block: dict, filter_: Literal["image", "audio", "file"] | None = None
19
+ ) -> bool:
20
+ """Check whether a block contains multimodal data in OpenAI Chat Completions format.
21
+
22
+ Supports both data and ID-style blocks (e.g. `'file_data'` and `'file_id'`)
23
+
24
+ If additional keys are present, they are ignored / will not affect outcome as long
25
+ as the required keys are present and valid.
26
+
27
+ Args:
28
+ block: The content block to check.
29
+ filter_: If provided, only return True for blocks matching this specific type.
30
+ - "image": Only match image_url blocks
31
+ - "audio": Only match input_audio blocks
32
+ - "file": Only match file blocks
33
+ If `None`, match any valid OpenAI data block type. Note that this means that
34
+ if the block has a valid OpenAI data type but the filter_ is set to a
35
+ different type, this function will return False.
36
+
37
+ Returns:
38
+ `True` if the block is a valid OpenAI data block and matches the filter_
39
+ (if provided).
40
+
41
+ """
10
42
  if block.get("type") == "image_url":
43
+ if filter_ is not None and filter_ != "image":
44
+ return False
11
45
  if (
12
46
  (set(block.keys()) <= {"type", "image_url", "detail"})
13
47
  and (image_url := block.get("image_url"))
@@ -15,126 +49,278 @@ def _is_openai_data_block(block: dict) -> bool:
15
49
  ):
16
50
  url = image_url.get("url")
17
51
  if isinstance(url, str):
52
+ # Required per OpenAI spec
53
+ return True
54
+ # Ignore `'detail'` since it's optional and specific to OpenAI
55
+
56
+ elif block.get("type") == "input_audio":
57
+ if filter_ is not None and filter_ != "audio":
58
+ return False
59
+ if (audio := block.get("input_audio")) and isinstance(audio, dict):
60
+ audio_data = audio.get("data")
61
+ audio_format = audio.get("format")
62
+ # Both required per OpenAI spec
63
+ if isinstance(audio_data, str) and isinstance(audio_format, str):
18
64
  return True
19
65
 
20
66
  elif block.get("type") == "file":
67
+ if filter_ is not None and filter_ != "file":
68
+ return False
21
69
  if (file := block.get("file")) and isinstance(file, dict):
22
70
  file_data = file.get("file_data")
23
- if isinstance(file_data, str):
24
- return True
25
-
26
- elif block.get("type") == "input_audio":
27
- if (input_audio := block.get("input_audio")) and isinstance(input_audio, dict):
28
- audio_data = input_audio.get("data")
29
- audio_format = input_audio.get("format")
30
- if isinstance(audio_data, str) and isinstance(audio_format, str):
71
+ file_id = file.get("file_id")
72
+ # Files can be either base64-encoded or pre-uploaded with an ID
73
+ if isinstance(file_data, str) or isinstance(file_id, str):
31
74
  return True
32
75
 
33
76
  else:
34
77
  return False
35
78
 
79
+ # Has no `'type'` key
36
80
  return False
37
81
 
38
82
 
39
- def _parse_data_uri(uri: str) -> Optional[dict]:
40
- """Parse a data URI into its components. If parsing fails, return None.
83
+ class ParsedDataUri(TypedDict):
84
+ source_type: Literal["base64"]
85
+ data: str
86
+ mime_type: str
41
87
 
42
- Example:
43
88
 
44
- .. code-block:: python
89
+ def _parse_data_uri(uri: str) -> ParsedDataUri | None:
90
+ """Parse a data URI into its components.
45
91
 
46
- data_uri = "..."
47
- parsed = _parse_data_uri(data_uri)
92
+ If parsing fails, return `None`. If either MIME type or data is missing, return
93
+ `None`.
48
94
 
49
- assert parsed == {
50
- "source_type": "base64",
51
- "mime_type": "image/jpeg",
52
- "data": "/9j/4AAQSkZJRg...",
53
- }
95
+ Example:
96
+ ```python
97
+ data_uri = "..."
98
+ parsed = _parse_data_uri(data_uri)
54
99
 
100
+ assert parsed == {
101
+ "source_type": "base64",
102
+ "mime_type": "image/jpeg",
103
+ "data": "/9j/4AAQSkZJRg...",
104
+ }
105
+ ```
55
106
  """
56
107
  regex = r"^data:(?P<mime_type>[^;]+);base64,(?P<data>.+)$"
57
108
  match = re.match(regex, uri)
58
109
  if match is None:
59
110
  return None
111
+
112
+ mime_type = match.group("mime_type")
113
+ data = match.group("data")
114
+ if not mime_type or not data:
115
+ return None
116
+
60
117
  return {
61
118
  "source_type": "base64",
62
- "data": match.group("data"),
63
- "mime_type": match.group("mime_type"),
119
+ "data": data,
120
+ "mime_type": mime_type,
64
121
  }
65
122
 
66
123
 
67
- def _convert_openai_format_to_data_block(block: dict) -> dict:
68
- """Convert OpenAI image content block to standard data content block.
124
+ def _normalize_messages(
125
+ messages: Sequence["BaseMessage"],
126
+ ) -> list["BaseMessage"]:
127
+ """Normalize message formats to LangChain v1 standard content blocks.
69
128
 
70
- If parsing fails, pass-through.
129
+ Chat models already implement support for:
130
+ - Images in OpenAI Chat Completions format
131
+ These will be passed through unchanged
132
+ - LangChain v1 standard content blocks
71
133
 
72
- Args:
73
- block: The OpenAI image content block to convert.
134
+ This function extends support to:
135
+ - `[Audio](https://platform.openai.com/docs/api-reference/chat/create) and
136
+ `[file](https://platform.openai.com/docs/api-reference/files) data in OpenAI
137
+ Chat Completions format
138
+ - Images are technically supported but we expect chat models to handle them
139
+ directly; this may change in the future
140
+ - LangChain v0 standard content blocks for backward compatibility
141
+
142
+ !!! warning "Behavior changed in 1.0.0"
143
+ In previous versions, this function returned messages in LangChain v0 format.
144
+ Now, it returns messages in LangChain v1 format, which upgraded chat models now
145
+ expect to receive when passing back in message history. For backward
146
+ compatibility, this function will convert v0 message content to v1 format.
147
+
148
+ ??? note "v0 Content Block Schemas"
149
+
150
+ `URLContentBlock`:
151
+
152
+ ```python
153
+ {
154
+ mime_type: NotRequired[str]
155
+ type: Literal['image', 'audio', 'file'],
156
+ source_type: Literal['url'],
157
+ url: str,
158
+ }
159
+ ```
160
+
161
+ `Base64ContentBlock`:
162
+
163
+ ```python
164
+ {
165
+ mime_type: NotRequired[str]
166
+ type: Literal['image', 'audio', 'file'],
167
+ source_type: Literal['base64'],
168
+ data: str,
169
+ }
170
+ ```
171
+
172
+ `IDContentBlock`:
173
+
174
+ (In practice, this was never used)
175
+
176
+ ```python
177
+ {
178
+ type: Literal["image", "audio", "file"],
179
+ source_type: Literal["id"],
180
+ id: str,
181
+ }
182
+ ```
183
+
184
+ `PlainTextContentBlock`:
185
+
186
+ ```python
187
+ {
188
+ mime_type: NotRequired[str]
189
+ type: Literal['file'],
190
+ source_type: Literal['text'],
191
+ url: str,
192
+ }
193
+ ```
194
+
195
+ If a v1 message is passed in, it will be returned as-is, meaning it is safe to
196
+ always pass in v1 messages to this function for assurance.
197
+
198
+ For posterity, here are the OpenAI Chat Completions schemas we expect:
199
+
200
+ Chat Completions image. Can be URL-based or base64-encoded. Supports MIME types
201
+ png, jpeg/jpg, webp, static gif:
202
+ {
203
+ "type": Literal['image_url'],
204
+ "image_url": {
205
+ "url": Union["data:$MIME_TYPE;base64,$BASE64_ENCODED_IMAGE", "$IMAGE_URL"],
206
+ "detail": Literal['low', 'high', 'auto'] = 'auto', # Supported by OpenAI
207
+ }
208
+ }
209
+
210
+ Chat Completions audio:
211
+ {
212
+ "type": Literal['input_audio'],
213
+ "input_audio": {
214
+ "format": Literal['wav', 'mp3'],
215
+ "data": str = "$BASE64_ENCODED_AUDIO",
216
+ },
217
+ }
218
+
219
+ Chat Completions files: either base64 or pre-uploaded file ID
220
+ {
221
+ "type": Literal['file'],
222
+ "file": Union[
223
+ {
224
+ "filename": str | None = "$FILENAME",
225
+ "file_data": str = "$BASE64_ENCODED_FILE",
226
+ },
227
+ {
228
+ "file_id": str = "$FILE_ID", # For pre-uploaded files to OpenAI
229
+ },
230
+ ],
231
+ }
74
232
 
75
- Returns:
76
- The converted standard data content block.
77
- """
78
- if block["type"] == "image_url":
79
- parsed = _parse_data_uri(block["image_url"]["url"])
80
- if parsed is not None:
81
- parsed["type"] = "image"
82
- return parsed
83
- return block
84
-
85
- if block["type"] == "file":
86
- parsed = _parse_data_uri(block["file"]["file_data"])
87
- if parsed is not None:
88
- parsed["type"] = "file"
89
- if filename := block["file"].get("filename"):
90
- parsed["filename"] = filename
91
- return parsed
92
- return block
93
-
94
- if block["type"] == "input_audio":
95
- data = block["input_audio"].get("data")
96
- audio_format = block["input_audio"].get("format")
97
- if data and audio_format:
98
- return {
99
- "type": "audio",
100
- "source_type": "base64",
101
- "data": data,
102
- "mime_type": f"audio/{audio_format}",
103
- }
104
- return block
105
-
106
- return block
107
-
108
-
109
- def _normalize_messages(messages: Sequence[BaseMessage]) -> list[BaseMessage]:
110
- """Extend support for message formats.
111
-
112
- Chat models implement support for images in OpenAI Chat Completions format, as well
113
- as other multimodal data as standard data blocks. This function extends support to
114
- audio and file data in OpenAI Chat Completions format by converting them to standard
115
- data blocks.
116
233
  """
234
+ from langchain_core.messages.block_translators.langchain_v0 import ( # noqa: PLC0415
235
+ _convert_legacy_v0_content_block_to_v1,
236
+ )
237
+ from langchain_core.messages.block_translators.openai import ( # noqa: PLC0415
238
+ _convert_openai_format_to_data_block,
239
+ )
240
+
117
241
  formatted_messages = []
118
242
  for message in messages:
243
+ # We preserve input messages - the caller may reuse them elsewhere and expects
244
+ # them to remain unchanged. We only create a copy if we need to translate.
119
245
  formatted_message = message
246
+
120
247
  if isinstance(message.content, list):
121
248
  for idx, block in enumerate(message.content):
249
+ # OpenAI Chat Completions multimodal data blocks to v1 standard
122
250
  if (
123
251
  isinstance(block, dict)
124
- # Subset to (PDF) files and audio, as most relevant chat models
125
- # support images in OAI format (and some may not yet support the
126
- # standard data block format)
127
- and block.get("type") in {"file", "input_audio"}
128
- and _is_openai_data_block(block)
252
+ and block.get("type") in {"input_audio", "file"}
253
+ # Discriminate between OpenAI/LC format since they share `'type'`
254
+ and is_openai_data_block(block)
129
255
  ):
130
- if formatted_message is message:
131
- formatted_message = message.model_copy()
132
- # Also shallow-copy content
133
- formatted_message.content = list(formatted_message.content)
134
-
135
- formatted_message.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
136
- _convert_openai_format_to_data_block(block)
137
- )
256
+ formatted_message = _ensure_message_copy(message, formatted_message)
257
+
258
+ converted_block = _convert_openai_format_to_data_block(block)
259
+ _update_content_block(formatted_message, idx, converted_block)
260
+
261
+ # Convert multimodal LangChain v0 to v1 standard content blocks
262
+ elif (
263
+ isinstance(block, dict)
264
+ and block.get("type")
265
+ in {
266
+ "image",
267
+ "audio",
268
+ "file",
269
+ }
270
+ and block.get("source_type") # v1 doesn't have `source_type`
271
+ in {
272
+ "url",
273
+ "base64",
274
+ "id",
275
+ "text",
276
+ }
277
+ ):
278
+ formatted_message = _ensure_message_copy(message, formatted_message)
279
+
280
+ converted_block = _convert_legacy_v0_content_block_to_v1(block)
281
+ _update_content_block(formatted_message, idx, converted_block)
282
+ continue
283
+
284
+ # else, pass through blocks that look like they have v1 format unchanged
285
+
138
286
  formatted_messages.append(formatted_message)
139
287
 
140
288
  return formatted_messages
289
+
290
+
291
+ T = TypeVar("T", bound="BaseMessage")
292
+
293
+
294
+ def _ensure_message_copy(message: T, formatted_message: T) -> T:
295
+ """Create a copy of the message if it hasn't been copied yet."""
296
+ if formatted_message is message:
297
+ formatted_message = message.model_copy()
298
+ # Shallow-copy content list to allow modifications
299
+ formatted_message.content = list(formatted_message.content)
300
+ return formatted_message
301
+
302
+
303
+ def _update_content_block(
304
+ formatted_message: "BaseMessage", idx: int, new_block: ContentBlock | dict
305
+ ) -> None:
306
+ """Update a content block at the given index, handling type issues."""
307
+ # Type ignore needed because:
308
+ # - `BaseMessage.content` is typed as `Union[str, list[Union[str, dict]]]`
309
+ # - When content is str, indexing fails (index error)
310
+ # - When content is list, the items are `Union[str, dict]` but we're assigning
311
+ # `Union[ContentBlock, dict]` where ContentBlock is richer than dict
312
+ # - This is safe because we only call this when we've verified content is a list and
313
+ # we're doing content block conversions
314
+ formatted_message.content[idx] = new_block # type: ignore[index, assignment]
315
+
316
+
317
+ def _update_message_content_to_blocks(message: T, output_version: str) -> T:
318
+ return message.model_copy(
319
+ update={
320
+ "content": message.content_blocks,
321
+ "response_metadata": {
322
+ **message.response_metadata,
323
+ "output_version": output_version,
324
+ },
325
+ }
326
+ )