langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (172) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +3 -4
  3. langchain_core/_api/beta_decorator.py +45 -70
  4. langchain_core/_api/deprecation.py +80 -80
  5. langchain_core/_api/path.py +22 -8
  6. langchain_core/_import_utils.py +10 -4
  7. langchain_core/agents.py +25 -21
  8. langchain_core/caches.py +53 -63
  9. langchain_core/callbacks/__init__.py +1 -8
  10. langchain_core/callbacks/base.py +341 -348
  11. langchain_core/callbacks/file.py +55 -44
  12. langchain_core/callbacks/manager.py +546 -683
  13. langchain_core/callbacks/stdout.py +29 -30
  14. langchain_core/callbacks/streaming_stdout.py +35 -36
  15. langchain_core/callbacks/usage.py +65 -70
  16. langchain_core/chat_history.py +48 -55
  17. langchain_core/document_loaders/base.py +46 -21
  18. langchain_core/document_loaders/langsmith.py +39 -36
  19. langchain_core/documents/__init__.py +0 -1
  20. langchain_core/documents/base.py +96 -74
  21. langchain_core/documents/compressor.py +12 -9
  22. langchain_core/documents/transformers.py +29 -28
  23. langchain_core/embeddings/fake.py +56 -57
  24. langchain_core/env.py +2 -3
  25. langchain_core/example_selectors/base.py +12 -0
  26. langchain_core/example_selectors/length_based.py +1 -1
  27. langchain_core/example_selectors/semantic_similarity.py +21 -25
  28. langchain_core/exceptions.py +15 -9
  29. langchain_core/globals.py +4 -163
  30. langchain_core/indexing/api.py +132 -125
  31. langchain_core/indexing/base.py +64 -67
  32. langchain_core/indexing/in_memory.py +26 -6
  33. langchain_core/language_models/__init__.py +15 -27
  34. langchain_core/language_models/_utils.py +267 -117
  35. langchain_core/language_models/base.py +92 -177
  36. langchain_core/language_models/chat_models.py +547 -407
  37. langchain_core/language_models/fake.py +11 -11
  38. langchain_core/language_models/fake_chat_models.py +72 -118
  39. langchain_core/language_models/llms.py +168 -242
  40. langchain_core/load/dump.py +8 -11
  41. langchain_core/load/load.py +32 -28
  42. langchain_core/load/mapping.py +2 -4
  43. langchain_core/load/serializable.py +50 -56
  44. langchain_core/messages/__init__.py +36 -51
  45. langchain_core/messages/ai.py +377 -150
  46. langchain_core/messages/base.py +239 -47
  47. langchain_core/messages/block_translators/__init__.py +111 -0
  48. langchain_core/messages/block_translators/anthropic.py +470 -0
  49. langchain_core/messages/block_translators/bedrock.py +94 -0
  50. langchain_core/messages/block_translators/bedrock_converse.py +297 -0
  51. langchain_core/messages/block_translators/google_genai.py +530 -0
  52. langchain_core/messages/block_translators/google_vertexai.py +21 -0
  53. langchain_core/messages/block_translators/groq.py +143 -0
  54. langchain_core/messages/block_translators/langchain_v0.py +301 -0
  55. langchain_core/messages/block_translators/openai.py +1010 -0
  56. langchain_core/messages/chat.py +2 -3
  57. langchain_core/messages/content.py +1423 -0
  58. langchain_core/messages/function.py +7 -7
  59. langchain_core/messages/human.py +44 -38
  60. langchain_core/messages/modifier.py +3 -2
  61. langchain_core/messages/system.py +40 -27
  62. langchain_core/messages/tool.py +160 -58
  63. langchain_core/messages/utils.py +527 -638
  64. langchain_core/output_parsers/__init__.py +1 -14
  65. langchain_core/output_parsers/base.py +68 -104
  66. langchain_core/output_parsers/json.py +13 -17
  67. langchain_core/output_parsers/list.py +11 -33
  68. langchain_core/output_parsers/openai_functions.py +56 -74
  69. langchain_core/output_parsers/openai_tools.py +68 -109
  70. langchain_core/output_parsers/pydantic.py +15 -13
  71. langchain_core/output_parsers/string.py +6 -2
  72. langchain_core/output_parsers/transform.py +17 -60
  73. langchain_core/output_parsers/xml.py +34 -44
  74. langchain_core/outputs/__init__.py +1 -1
  75. langchain_core/outputs/chat_generation.py +26 -11
  76. langchain_core/outputs/chat_result.py +1 -3
  77. langchain_core/outputs/generation.py +17 -6
  78. langchain_core/outputs/llm_result.py +15 -8
  79. langchain_core/prompt_values.py +29 -123
  80. langchain_core/prompts/__init__.py +3 -27
  81. langchain_core/prompts/base.py +48 -63
  82. langchain_core/prompts/chat.py +259 -288
  83. langchain_core/prompts/dict.py +19 -11
  84. langchain_core/prompts/few_shot.py +84 -90
  85. langchain_core/prompts/few_shot_with_templates.py +14 -12
  86. langchain_core/prompts/image.py +19 -14
  87. langchain_core/prompts/loading.py +6 -8
  88. langchain_core/prompts/message.py +7 -8
  89. langchain_core/prompts/prompt.py +42 -43
  90. langchain_core/prompts/string.py +37 -16
  91. langchain_core/prompts/structured.py +43 -46
  92. langchain_core/rate_limiters.py +51 -60
  93. langchain_core/retrievers.py +52 -192
  94. langchain_core/runnables/base.py +1727 -1683
  95. langchain_core/runnables/branch.py +52 -73
  96. langchain_core/runnables/config.py +89 -103
  97. langchain_core/runnables/configurable.py +128 -130
  98. langchain_core/runnables/fallbacks.py +93 -82
  99. langchain_core/runnables/graph.py +127 -127
  100. langchain_core/runnables/graph_ascii.py +63 -41
  101. langchain_core/runnables/graph_mermaid.py +87 -70
  102. langchain_core/runnables/graph_png.py +31 -36
  103. langchain_core/runnables/history.py +145 -161
  104. langchain_core/runnables/passthrough.py +141 -144
  105. langchain_core/runnables/retry.py +84 -68
  106. langchain_core/runnables/router.py +33 -37
  107. langchain_core/runnables/schema.py +79 -72
  108. langchain_core/runnables/utils.py +95 -139
  109. langchain_core/stores.py +85 -131
  110. langchain_core/structured_query.py +11 -15
  111. langchain_core/sys_info.py +31 -32
  112. langchain_core/tools/__init__.py +1 -14
  113. langchain_core/tools/base.py +221 -247
  114. langchain_core/tools/convert.py +144 -161
  115. langchain_core/tools/render.py +10 -10
  116. langchain_core/tools/retriever.py +12 -19
  117. langchain_core/tools/simple.py +52 -29
  118. langchain_core/tools/structured.py +56 -60
  119. langchain_core/tracers/__init__.py +1 -9
  120. langchain_core/tracers/_streaming.py +6 -7
  121. langchain_core/tracers/base.py +103 -112
  122. langchain_core/tracers/context.py +29 -48
  123. langchain_core/tracers/core.py +142 -105
  124. langchain_core/tracers/evaluation.py +30 -34
  125. langchain_core/tracers/event_stream.py +162 -117
  126. langchain_core/tracers/langchain.py +34 -36
  127. langchain_core/tracers/log_stream.py +87 -49
  128. langchain_core/tracers/memory_stream.py +3 -3
  129. langchain_core/tracers/root_listeners.py +18 -34
  130. langchain_core/tracers/run_collector.py +8 -20
  131. langchain_core/tracers/schemas.py +0 -125
  132. langchain_core/tracers/stdout.py +3 -3
  133. langchain_core/utils/__init__.py +1 -4
  134. langchain_core/utils/_merge.py +47 -9
  135. langchain_core/utils/aiter.py +70 -66
  136. langchain_core/utils/env.py +12 -9
  137. langchain_core/utils/function_calling.py +139 -206
  138. langchain_core/utils/html.py +7 -8
  139. langchain_core/utils/input.py +6 -6
  140. langchain_core/utils/interactive_env.py +6 -2
  141. langchain_core/utils/iter.py +48 -45
  142. langchain_core/utils/json.py +14 -4
  143. langchain_core/utils/json_schema.py +159 -43
  144. langchain_core/utils/mustache.py +32 -25
  145. langchain_core/utils/pydantic.py +67 -40
  146. langchain_core/utils/strings.py +5 -5
  147. langchain_core/utils/usage.py +1 -1
  148. langchain_core/utils/utils.py +104 -62
  149. langchain_core/vectorstores/base.py +131 -179
  150. langchain_core/vectorstores/in_memory.py +113 -182
  151. langchain_core/vectorstores/utils.py +23 -17
  152. langchain_core/version.py +1 -1
  153. langchain_core-1.0.0.dist-info/METADATA +68 -0
  154. langchain_core-1.0.0.dist-info/RECORD +172 -0
  155. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
  156. langchain_core/beta/__init__.py +0 -1
  157. langchain_core/beta/runnables/__init__.py +0 -1
  158. langchain_core/beta/runnables/context.py +0 -448
  159. langchain_core/memory.py +0 -116
  160. langchain_core/messages/content_blocks.py +0 -1435
  161. langchain_core/prompts/pipeline.py +0 -133
  162. langchain_core/pydantic_v1/__init__.py +0 -30
  163. langchain_core/pydantic_v1/dataclasses.py +0 -23
  164. langchain_core/pydantic_v1/main.py +0 -23
  165. langchain_core/tracers/langchain_v1.py +0 -23
  166. langchain_core/utils/loading.py +0 -31
  167. langchain_core/v1/__init__.py +0 -1
  168. langchain_core/v1/chat_models.py +0 -1047
  169. langchain_core/v1/messages.py +0 -755
  170. langchain_core-0.4.0.dev0.dist-info/METADATA +0 -108
  171. langchain_core-0.4.0.dev0.dist-info/RECORD +0 -177
  172. langchain_core-0.4.0.dev0.dist-info/entry_points.txt +0 -4
@@ -0,0 +1,1010 @@
1
+ """Derivations of standard content blocks from OpenAI content."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import warnings
7
+ from collections.abc import Iterable
8
+ from typing import TYPE_CHECKING, Any, Literal, cast
9
+
10
+ from langchain_core.language_models._utils import (
11
+ _parse_data_uri,
12
+ is_openai_data_block,
13
+ )
14
+ from langchain_core.messages import content as types
15
+
16
+ if TYPE_CHECKING:
17
+ from langchain_core.messages import AIMessage, AIMessageChunk
18
+
19
+
20
+ def convert_to_openai_image_block(block: dict[str, Any]) -> dict:
21
+ """Convert `ImageContentBlock` to format expected by OpenAI Chat Completions."""
22
+ if "url" in block:
23
+ return {
24
+ "type": "image_url",
25
+ "image_url": {
26
+ "url": block["url"],
27
+ },
28
+ }
29
+ if "base64" in block or block.get("source_type") == "base64":
30
+ if "mime_type" not in block:
31
+ error_message = "mime_type key is required for base64 data."
32
+ raise ValueError(error_message)
33
+ mime_type = block["mime_type"]
34
+ base64_data = block["data"] if "data" in block else block["base64"]
35
+ return {
36
+ "type": "image_url",
37
+ "image_url": {
38
+ "url": f"data:{mime_type};base64,{base64_data}",
39
+ },
40
+ }
41
+ error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
42
+ raise ValueError(error_message)
43
+
44
+
45
+ def convert_to_openai_data_block(
46
+ block: dict, api: Literal["chat/completions", "responses"] = "chat/completions"
47
+ ) -> dict:
48
+ """Format standard data content block to format expected by OpenAI.
49
+
50
+ "Standard data content block" can include old-style LangChain v0 blocks
51
+ (URLContentBlock, Base64ContentBlock, IDContentBlock) or new ones.
52
+ """
53
+ if block["type"] == "image":
54
+ chat_completions_block = convert_to_openai_image_block(block)
55
+ if api == "responses":
56
+ formatted_block = {
57
+ "type": "input_image",
58
+ "image_url": chat_completions_block["image_url"]["url"],
59
+ }
60
+ if chat_completions_block["image_url"].get("detail"):
61
+ formatted_block["detail"] = chat_completions_block["image_url"][
62
+ "detail"
63
+ ]
64
+ else:
65
+ formatted_block = chat_completions_block
66
+
67
+ elif block["type"] == "file":
68
+ if block.get("source_type") == "base64" or "base64" in block:
69
+ # Handle v0 format (Base64CB): {"source_type": "base64", "data": "...", ...}
70
+ # Handle v1 format (IDCB): {"base64": "...", ...}
71
+ base64_data = block["data"] if "source_type" in block else block["base64"]
72
+ file = {"file_data": f"data:{block['mime_type']};base64,{base64_data}"}
73
+ if filename := block.get("filename"):
74
+ file["filename"] = filename
75
+ elif (extras := block.get("extras")) and ("filename" in extras):
76
+ file["filename"] = extras["filename"]
77
+ elif (extras := block.get("metadata")) and ("filename" in extras):
78
+ # Backward compat
79
+ file["filename"] = extras["filename"]
80
+ else:
81
+ # Can't infer filename
82
+ warnings.warn(
83
+ "OpenAI may require a filename for file uploads. Specify a filename"
84
+ " in the content block, e.g.: {'type': 'file', 'mime_type': "
85
+ "'...', 'base64': '...', 'filename': 'my-file.pdf'}",
86
+ stacklevel=1,
87
+ )
88
+ formatted_block = {"type": "file", "file": file}
89
+ if api == "responses":
90
+ formatted_block = {"type": "input_file", **formatted_block["file"]}
91
+ elif block.get("source_type") == "id" or "file_id" in block:
92
+ # Handle v0 format (IDContentBlock): {"source_type": "id", "id": "...", ...}
93
+ # Handle v1 format (IDCB): {"file_id": "...", ...}
94
+ file_id = block["id"] if "source_type" in block else block["file_id"]
95
+ formatted_block = {"type": "file", "file": {"file_id": file_id}}
96
+ if api == "responses":
97
+ formatted_block = {"type": "input_file", **formatted_block["file"]}
98
+ elif "url" in block: # Intentionally do not check for source_type="url"
99
+ if api == "chat/completions":
100
+ error_msg = "OpenAI Chat Completions does not support file URLs."
101
+ raise ValueError(error_msg)
102
+ # Only supported by Responses API; return in that format
103
+ formatted_block = {"type": "input_file", "file_url": block["url"]}
104
+ else:
105
+ error_msg = "Keys base64, url, or file_id required for file blocks."
106
+ raise ValueError(error_msg)
107
+
108
+ elif block["type"] == "audio":
109
+ if "base64" in block or block.get("source_type") == "base64":
110
+ # Handle v0 format: {"source_type": "base64", "data": "...", ...}
111
+ # Handle v1 format: {"base64": "...", ...}
112
+ base64_data = block["data"] if "source_type" in block else block["base64"]
113
+ audio_format = block["mime_type"].split("/")[-1]
114
+ formatted_block = {
115
+ "type": "input_audio",
116
+ "input_audio": {"data": base64_data, "format": audio_format},
117
+ }
118
+ else:
119
+ error_msg = "Key base64 is required for audio blocks."
120
+ raise ValueError(error_msg)
121
+ else:
122
+ error_msg = f"Block of type {block['type']} is not supported."
123
+ raise ValueError(error_msg)
124
+
125
+ return formatted_block
126
+
127
+
128
+ # v1 / Chat Completions
129
+ def _convert_to_v1_from_chat_completions(
130
+ message: AIMessage,
131
+ ) -> list[types.ContentBlock]:
132
+ """Mutate a Chat Completions message to v1 format."""
133
+ content_blocks: list[types.ContentBlock] = []
134
+ if isinstance(message.content, str):
135
+ if message.content:
136
+ content_blocks = [{"type": "text", "text": message.content}]
137
+ else:
138
+ content_blocks = []
139
+
140
+ for tool_call in message.tool_calls:
141
+ content_blocks.append(
142
+ {
143
+ "type": "tool_call",
144
+ "name": tool_call["name"],
145
+ "args": tool_call["args"],
146
+ "id": tool_call.get("id"),
147
+ }
148
+ )
149
+
150
+ return content_blocks
151
+
152
+
153
+ def _convert_to_v1_from_chat_completions_input(
154
+ content: list[types.ContentBlock],
155
+ ) -> list[types.ContentBlock]:
156
+ """Convert OpenAI Chat Completions format blocks to v1 format.
157
+
158
+ During the `content_blocks` parsing process, we wrap blocks not recognized as a v1
159
+ block as a `'non_standard'` block with the original block stored in the `value`
160
+ field. This function attempts to unpack those blocks and convert any blocks that
161
+ might be OpenAI format to v1 ContentBlocks.
162
+
163
+ If conversion fails, the block is left as a `'non_standard'` block.
164
+
165
+ Args:
166
+ content: List of content blocks to process.
167
+
168
+ Returns:
169
+ Updated list with OpenAI blocks converted to v1 format.
170
+ """
171
+ from langchain_core.messages import content as types # noqa: PLC0415
172
+
173
+ converted_blocks = []
174
+ unpacked_blocks: list[dict[str, Any]] = [
175
+ cast("dict[str, Any]", block)
176
+ if block.get("type") != "non_standard"
177
+ else block["value"] # type: ignore[typeddict-item] # this is only non-standard blocks
178
+ for block in content
179
+ ]
180
+ for block in unpacked_blocks:
181
+ if block.get("type") in {
182
+ "image_url",
183
+ "input_audio",
184
+ "file",
185
+ } and is_openai_data_block(block):
186
+ converted_block = _convert_openai_format_to_data_block(block)
187
+ # If conversion succeeded, use it; otherwise keep as non_standard
188
+ if (
189
+ isinstance(converted_block, dict)
190
+ and converted_block.get("type") in types.KNOWN_BLOCK_TYPES
191
+ ):
192
+ converted_blocks.append(cast("types.ContentBlock", converted_block))
193
+ else:
194
+ converted_blocks.append({"type": "non_standard", "value": block})
195
+ elif block.get("type") in types.KNOWN_BLOCK_TYPES:
196
+ converted_blocks.append(cast("types.ContentBlock", block))
197
+ else:
198
+ converted_blocks.append({"type": "non_standard", "value": block})
199
+
200
+ return converted_blocks
201
+
202
+
203
+ def _convert_to_v1_from_chat_completions_chunk(
204
+ chunk: AIMessageChunk,
205
+ ) -> list[types.ContentBlock]:
206
+ """Mutate a Chat Completions chunk to v1 format."""
207
+ content_blocks: list[types.ContentBlock] = []
208
+ if isinstance(chunk.content, str):
209
+ if chunk.content:
210
+ content_blocks = [{"type": "text", "text": chunk.content}]
211
+ else:
212
+ content_blocks = []
213
+
214
+ if chunk.chunk_position == "last":
215
+ for tool_call in chunk.tool_calls:
216
+ content_blocks.append(
217
+ {
218
+ "type": "tool_call",
219
+ "name": tool_call["name"],
220
+ "args": tool_call["args"],
221
+ "id": tool_call.get("id"),
222
+ }
223
+ )
224
+
225
+ else:
226
+ for tool_call_chunk in chunk.tool_call_chunks:
227
+ tc: types.ToolCallChunk = {
228
+ "type": "tool_call_chunk",
229
+ "id": tool_call_chunk.get("id"),
230
+ "name": tool_call_chunk.get("name"),
231
+ "args": tool_call_chunk.get("args"),
232
+ }
233
+ if (idx := tool_call_chunk.get("index")) is not None:
234
+ tc["index"] = idx
235
+ content_blocks.append(tc)
236
+
237
+ return content_blocks
238
+
239
+
240
+ def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage:
241
+ """Convert a v1 message to the Chat Completions format."""
242
+ if isinstance(message.content, list):
243
+ new_content: list = []
244
+ for block in message.content:
245
+ if isinstance(block, dict):
246
+ block_type = block.get("type")
247
+ if block_type == "text":
248
+ # Strip annotations
249
+ new_content.append({"type": "text", "text": block["text"]})
250
+ elif block_type in ("reasoning", "tool_call"):
251
+ pass
252
+ else:
253
+ new_content.append(block)
254
+ else:
255
+ new_content.append(block)
256
+ return message.model_copy(update={"content": new_content})
257
+
258
+ return message
259
+
260
+
261
+ # Responses
262
+ _FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__"
263
+
264
+
265
+ def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage:
266
+ """Convert v0 AIMessage into `output_version="responses/v1"` format."""
267
+ from langchain_core.messages import AIMessageChunk # noqa: PLC0415
268
+
269
+ # Only update ChatOpenAI v0.3 AIMessages
270
+ is_chatopenai_v03 = (
271
+ isinstance(message.content, list)
272
+ and all(isinstance(b, dict) for b in message.content)
273
+ ) and (
274
+ any(
275
+ item in message.additional_kwargs
276
+ for item in [
277
+ "reasoning",
278
+ "tool_outputs",
279
+ "refusal",
280
+ _FUNCTION_CALL_IDS_MAP_KEY,
281
+ ]
282
+ )
283
+ or (
284
+ isinstance(message.id, str)
285
+ and message.id.startswith("msg_")
286
+ and (response_id := message.response_metadata.get("id"))
287
+ and isinstance(response_id, str)
288
+ and response_id.startswith("resp_")
289
+ )
290
+ )
291
+ if not is_chatopenai_v03:
292
+ return message
293
+
294
+ content_order = [
295
+ "reasoning",
296
+ "code_interpreter_call",
297
+ "mcp_call",
298
+ "image_generation_call",
299
+ "text",
300
+ "refusal",
301
+ "function_call",
302
+ "computer_call",
303
+ "mcp_list_tools",
304
+ "mcp_approval_request",
305
+ # N. B. "web_search_call" and "file_search_call" were not passed back in
306
+ # in v0.3
307
+ ]
308
+
309
+ # Build a bucket for every known block type
310
+ buckets: dict[str, list] = {key: [] for key in content_order}
311
+ unknown_blocks = []
312
+
313
+ # Reasoning
314
+ if reasoning := message.additional_kwargs.get("reasoning"):
315
+ if isinstance(message, AIMessageChunk) and message.chunk_position != "last":
316
+ buckets["reasoning"].append({**reasoning, "type": "reasoning"})
317
+ else:
318
+ buckets["reasoning"].append(reasoning)
319
+
320
+ # Refusal
321
+ if refusal := message.additional_kwargs.get("refusal"):
322
+ buckets["refusal"].append({"type": "refusal", "refusal": refusal})
323
+
324
+ # Text
325
+ for block in message.content:
326
+ if isinstance(block, dict) and block.get("type") == "text":
327
+ block_copy = block.copy()
328
+ if isinstance(message.id, str) and message.id.startswith("msg_"):
329
+ block_copy["id"] = message.id
330
+ buckets["text"].append(block_copy)
331
+ else:
332
+ unknown_blocks.append(block)
333
+
334
+ # Function calls
335
+ function_call_ids = message.additional_kwargs.get(_FUNCTION_CALL_IDS_MAP_KEY)
336
+ if (
337
+ isinstance(message, AIMessageChunk)
338
+ and len(message.tool_call_chunks) == 1
339
+ and message.chunk_position != "last"
340
+ ):
341
+ # Isolated chunk
342
+ tool_call_chunk = message.tool_call_chunks[0]
343
+ function_call = {
344
+ "type": "function_call",
345
+ "name": tool_call_chunk.get("name"),
346
+ "arguments": tool_call_chunk.get("args"),
347
+ "call_id": tool_call_chunk.get("id"),
348
+ }
349
+ if function_call_ids is not None and (
350
+ _id := function_call_ids.get(tool_call_chunk.get("id"))
351
+ ):
352
+ function_call["id"] = _id
353
+ buckets["function_call"].append(function_call)
354
+ else:
355
+ for tool_call in message.tool_calls:
356
+ function_call = {
357
+ "type": "function_call",
358
+ "name": tool_call["name"],
359
+ "arguments": json.dumps(tool_call["args"], ensure_ascii=False),
360
+ "call_id": tool_call["id"],
361
+ }
362
+ if function_call_ids is not None and (
363
+ _id := function_call_ids.get(tool_call["id"])
364
+ ):
365
+ function_call["id"] = _id
366
+ buckets["function_call"].append(function_call)
367
+
368
+ # Tool outputs
369
+ tool_outputs = message.additional_kwargs.get("tool_outputs", [])
370
+ for block in tool_outputs:
371
+ if isinstance(block, dict) and (key := block.get("type")) and key in buckets:
372
+ buckets[key].append(block)
373
+ else:
374
+ unknown_blocks.append(block)
375
+
376
+ # Re-assemble the content list in the canonical order
377
+ new_content = []
378
+ for key in content_order:
379
+ new_content.extend(buckets[key])
380
+ new_content.extend(unknown_blocks)
381
+
382
+ new_additional_kwargs = dict(message.additional_kwargs)
383
+ new_additional_kwargs.pop("reasoning", None)
384
+ new_additional_kwargs.pop("refusal", None)
385
+ new_additional_kwargs.pop("tool_outputs", None)
386
+
387
+ if "id" in message.response_metadata:
388
+ new_id = message.response_metadata["id"]
389
+ else:
390
+ new_id = message.id
391
+
392
+ return message.model_copy(
393
+ update={
394
+ "content": new_content,
395
+ "additional_kwargs": new_additional_kwargs,
396
+ "id": new_id,
397
+ },
398
+ deep=False,
399
+ )
400
+
401
+
402
+ def _convert_openai_format_to_data_block(
403
+ block: dict,
404
+ ) -> types.ContentBlock | dict[Any, Any]:
405
+ """Convert OpenAI image/audio/file content block to respective v1 multimodal block.
406
+
407
+ We expect that the incoming block is verified to be in OpenAI Chat Completions
408
+ format.
409
+
410
+ If parsing fails, passes block through unchanged.
411
+
412
+ Mappings (Chat Completions to LangChain v1):
413
+ - Image -> `ImageContentBlock`
414
+ - Audio -> `AudioContentBlock`
415
+ - File -> `FileContentBlock`
416
+
417
+ """
418
+
419
+ # Extract extra keys to put them in `extras`
420
+ def _extract_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]:
421
+ """Extract unknown keys from block to preserve as extras."""
422
+ return {k: v for k, v in block_dict.items() if k not in known_keys}
423
+
424
+ # base64-style image block
425
+ if (block["type"] == "image_url") and (
426
+ parsed := _parse_data_uri(block["image_url"]["url"])
427
+ ):
428
+ known_keys = {"type", "image_url"}
429
+ extras = _extract_extras(block, known_keys)
430
+
431
+ # Also extract extras from nested image_url dict
432
+ image_url_known_keys = {"url"}
433
+ image_url_extras = _extract_extras(block["image_url"], image_url_known_keys)
434
+
435
+ # Merge extras
436
+ all_extras = {**extras}
437
+ for key, value in image_url_extras.items():
438
+ if key == "detail": # Don't rename
439
+ all_extras["detail"] = value
440
+ else:
441
+ all_extras[f"image_url_{key}"] = value
442
+
443
+ return types.create_image_block(
444
+ # Even though this is labeled as `url`, it can be base64-encoded
445
+ base64=parsed["data"],
446
+ mime_type=parsed["mime_type"],
447
+ **all_extras,
448
+ )
449
+
450
+ # url-style image block
451
+ if (block["type"] == "image_url") and isinstance(
452
+ block["image_url"].get("url"), str
453
+ ):
454
+ known_keys = {"type", "image_url"}
455
+ extras = _extract_extras(block, known_keys)
456
+
457
+ image_url_known_keys = {"url"}
458
+ image_url_extras = _extract_extras(block["image_url"], image_url_known_keys)
459
+
460
+ all_extras = {**extras}
461
+ for key, value in image_url_extras.items():
462
+ if key == "detail": # Don't rename
463
+ all_extras["detail"] = value
464
+ else:
465
+ all_extras[f"image_url_{key}"] = value
466
+
467
+ return types.create_image_block(
468
+ url=block["image_url"]["url"],
469
+ **all_extras,
470
+ )
471
+
472
+ # base64-style audio block
473
+ # audio is only represented via raw data, no url or ID option
474
+ if block["type"] == "input_audio":
475
+ known_keys = {"type", "input_audio"}
476
+ extras = _extract_extras(block, known_keys)
477
+
478
+ # Also extract extras from nested audio dict
479
+ audio_known_keys = {"data", "format"}
480
+ audio_extras = _extract_extras(block["input_audio"], audio_known_keys)
481
+
482
+ all_extras = {**extras}
483
+ for key, value in audio_extras.items():
484
+ all_extras[f"audio_{key}"] = value
485
+
486
+ return types.create_audio_block(
487
+ base64=block["input_audio"]["data"],
488
+ mime_type=f"audio/{block['input_audio']['format']}",
489
+ **all_extras,
490
+ )
491
+
492
+ # id-style file block
493
+ if block.get("type") == "file" and "file_id" in block.get("file", {}):
494
+ known_keys = {"type", "file"}
495
+ extras = _extract_extras(block, known_keys)
496
+
497
+ file_known_keys = {"file_id"}
498
+ file_extras = _extract_extras(block["file"], file_known_keys)
499
+
500
+ all_extras = {**extras}
501
+ for key, value in file_extras.items():
502
+ all_extras[f"file_{key}"] = value
503
+
504
+ return types.create_file_block(
505
+ file_id=block["file"]["file_id"],
506
+ **all_extras,
507
+ )
508
+
509
+ # base64-style file block
510
+ if (block["type"] == "file") and (
511
+ parsed := _parse_data_uri(block["file"]["file_data"])
512
+ ):
513
+ known_keys = {"type", "file"}
514
+ extras = _extract_extras(block, known_keys)
515
+
516
+ file_known_keys = {"file_data", "filename"}
517
+ file_extras = _extract_extras(block["file"], file_known_keys)
518
+
519
+ all_extras = {**extras}
520
+ for key, value in file_extras.items():
521
+ all_extras[f"file_{key}"] = value
522
+
523
+ filename = block["file"].get("filename")
524
+ return types.create_file_block(
525
+ base64=parsed["data"],
526
+ mime_type="application/pdf",
527
+ filename=filename,
528
+ **all_extras,
529
+ )
530
+
531
+ # Escape hatch
532
+ return block
533
+
534
+
535
+ # v1 / Responses
536
+ def _convert_annotation_to_v1(annotation: dict[str, Any]) -> types.Annotation:
537
+ annotation_type = annotation.get("type")
538
+
539
+ if annotation_type == "url_citation":
540
+ known_fields = {
541
+ "type",
542
+ "url",
543
+ "title",
544
+ "cited_text",
545
+ "start_index",
546
+ "end_index",
547
+ }
548
+ url_citation = cast("types.Citation", {})
549
+ for field in ("end_index", "start_index", "title"):
550
+ if field in annotation:
551
+ url_citation[field] = annotation[field]
552
+ url_citation["type"] = "citation"
553
+ url_citation["url"] = annotation["url"]
554
+ for field, value in annotation.items():
555
+ if field not in known_fields:
556
+ if "extras" not in url_citation:
557
+ url_citation["extras"] = {}
558
+ url_citation["extras"][field] = value
559
+ return url_citation
560
+
561
+ if annotation_type == "file_citation":
562
+ known_fields = {
563
+ "type",
564
+ "title",
565
+ "cited_text",
566
+ "start_index",
567
+ "end_index",
568
+ "filename",
569
+ }
570
+ document_citation: types.Citation = {"type": "citation"}
571
+ if "filename" in annotation:
572
+ document_citation["title"] = annotation["filename"]
573
+ for field, value in annotation.items():
574
+ if field not in known_fields:
575
+ if "extras" not in document_citation:
576
+ document_citation["extras"] = {}
577
+ document_citation["extras"][field] = value
578
+
579
+ return document_citation
580
+
581
+ # TODO: standardise container_file_citation?
582
+ non_standard_annotation: types.NonStandardAnnotation = {
583
+ "type": "non_standard_annotation",
584
+ "value": annotation,
585
+ }
586
+ return non_standard_annotation
587
+
588
+
589
+ def _explode_reasoning(block: dict[str, Any]) -> Iterable[types.ReasoningContentBlock]:
590
+ if "summary" not in block:
591
+ yield cast("types.ReasoningContentBlock", block)
592
+ return
593
+
594
+ known_fields = {"type", "reasoning", "id", "index"}
595
+ unknown_fields = [
596
+ field for field in block if field != "summary" and field not in known_fields
597
+ ]
598
+ if unknown_fields:
599
+ block["extras"] = {}
600
+ for field in unknown_fields:
601
+ block["extras"][field] = block.pop(field)
602
+
603
+ if not block["summary"]:
604
+ # [{'id': 'rs_...', 'summary': [], 'type': 'reasoning', 'index': 0}]
605
+ block = {k: v for k, v in block.items() if k != "summary"}
606
+ if "index" in block:
607
+ meaningful_idx = f"{block['index']}_0"
608
+ block["index"] = f"lc_rs_{meaningful_idx.encode().hex()}"
609
+ yield cast("types.ReasoningContentBlock", block)
610
+ return
611
+
612
+ # Common part for every exploded line, except 'summary'
613
+ common = {k: v for k, v in block.items() if k in known_fields}
614
+
615
+ # Optional keys that must appear only in the first exploded item
616
+ first_only = block.pop("extras", None)
617
+
618
+ for idx, part in enumerate(block["summary"]):
619
+ new_block = dict(common)
620
+ new_block["reasoning"] = part.get("text", "")
621
+ if idx == 0 and first_only:
622
+ new_block.update(first_only)
623
+ if "index" in new_block:
624
+ summary_index = part.get("index", 0)
625
+ meaningful_idx = f"{new_block['index']}_{summary_index}"
626
+ new_block["index"] = f"lc_rs_{meaningful_idx.encode().hex()}"
627
+
628
+ yield cast("types.ReasoningContentBlock", new_block)
629
+
630
+
631
+ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock]:
632
+ """Convert a Responses message to v1 format."""
633
+
634
+ def _iter_blocks() -> Iterable[types.ContentBlock]:
635
+ for raw_block in message.content:
636
+ if not isinstance(raw_block, dict):
637
+ continue
638
+ block = raw_block.copy()
639
+ block_type = block.get("type")
640
+
641
+ if block_type == "text":
642
+ if "text" not in block:
643
+ block["text"] = ""
644
+ if "annotations" in block:
645
+ block["annotations"] = [
646
+ _convert_annotation_to_v1(a) for a in block["annotations"]
647
+ ]
648
+ if "index" in block:
649
+ block["index"] = f"lc_txt_{block['index']}"
650
+ yield cast("types.TextContentBlock", block)
651
+
652
+ elif block_type == "reasoning":
653
+ yield from _explode_reasoning(block)
654
+
655
+ elif block_type == "image_generation_call" and (
656
+ result := block.get("result")
657
+ ):
658
+ new_block = {"type": "image", "base64": result}
659
+ if output_format := block.get("output_format"):
660
+ new_block["mime_type"] = f"image/{output_format}"
661
+ if "id" in block:
662
+ new_block["id"] = block["id"]
663
+ if "index" in block:
664
+ new_block["index"] = f"lc_img_{block['index']}"
665
+ for extra_key in (
666
+ "status",
667
+ "background",
668
+ "output_format",
669
+ "quality",
670
+ "revised_prompt",
671
+ "size",
672
+ ):
673
+ if extra_key in block:
674
+ if "extras" not in new_block:
675
+ new_block["extras"] = {}
676
+ new_block["extras"][extra_key] = block[extra_key]
677
+ yield cast("types.ImageContentBlock", new_block)
678
+
679
+ elif block_type == "function_call":
680
+ tool_call_block: (
681
+ types.ToolCall | types.InvalidToolCall | types.ToolCallChunk | None
682
+ ) = None
683
+ call_id = block.get("call_id", "")
684
+
685
+ from langchain_core.messages import AIMessageChunk # noqa: PLC0415
686
+
687
+ if (
688
+ isinstance(message, AIMessageChunk)
689
+ and len(message.tool_call_chunks) == 1
690
+ and message.chunk_position != "last"
691
+ ):
692
+ tool_call_block = message.tool_call_chunks[0].copy() # type: ignore[assignment]
693
+ elif call_id:
694
+ for tool_call in message.tool_calls or []:
695
+ if tool_call.get("id") == call_id:
696
+ tool_call_block = {
697
+ "type": "tool_call",
698
+ "name": tool_call["name"],
699
+ "args": tool_call["args"],
700
+ "id": tool_call.get("id"),
701
+ }
702
+ break
703
+ else:
704
+ for invalid_tool_call in message.invalid_tool_calls or []:
705
+ if invalid_tool_call.get("id") == call_id:
706
+ tool_call_block = invalid_tool_call.copy()
707
+ break
708
+ else:
709
+ pass
710
+ if tool_call_block:
711
+ if "id" in block:
712
+ if "extras" not in tool_call_block:
713
+ tool_call_block["extras"] = {}
714
+ tool_call_block["extras"]["item_id"] = block["id"]
715
+ if "index" in block:
716
+ tool_call_block["index"] = f"lc_tc_{block['index']}"
717
+ yield tool_call_block
718
+
719
+ elif block_type == "web_search_call":
720
+ web_search_call = {
721
+ "type": "server_tool_call",
722
+ "name": "web_search",
723
+ "args": {},
724
+ "id": block["id"],
725
+ }
726
+ if "index" in block:
727
+ web_search_call["index"] = f"lc_wsc_{block['index']}"
728
+
729
+ sources: dict[str, Any] | None = None
730
+ if "action" in block and isinstance(block["action"], dict):
731
+ if "sources" in block["action"]:
732
+ sources = block["action"]["sources"]
733
+ web_search_call["args"] = {
734
+ k: v for k, v in block["action"].items() if k != "sources"
735
+ }
736
+ for key in block:
737
+ if key not in ("type", "id", "action", "status", "index"):
738
+ web_search_call[key] = block[key]
739
+
740
+ yield cast("types.ServerToolCall", web_search_call)
741
+
742
+ # If .content already has web_search_result, don't add
743
+ if not any(
744
+ isinstance(other_block, dict)
745
+ and other_block.get("type") == "web_search_result"
746
+ and other_block.get("id") == block["id"]
747
+ for other_block in message.content
748
+ ):
749
+ web_search_result = {
750
+ "type": "server_tool_result",
751
+ "tool_call_id": block["id"],
752
+ }
753
+ if sources:
754
+ web_search_result["output"] = {"sources": sources}
755
+
756
+ status = block.get("status")
757
+ if status == "failed":
758
+ web_search_result["status"] = "error"
759
+ elif status == "completed":
760
+ web_search_result["status"] = "success"
761
+ elif status:
762
+ web_search_result["extras"] = {"status": status}
763
+ else:
764
+ pass
765
+ if "index" in block and isinstance(block["index"], int):
766
+ web_search_result["index"] = f"lc_wsr_{block['index'] + 1}"
767
+ yield cast("types.ServerToolResult", web_search_result)
768
+
769
+ elif block_type == "file_search_call":
770
+ file_search_call = {
771
+ "type": "server_tool_call",
772
+ "name": "file_search",
773
+ "id": block["id"],
774
+ "args": {"queries": block.get("queries", [])},
775
+ }
776
+ if "index" in block:
777
+ file_search_call["index"] = f"lc_fsc_{block['index']}"
778
+
779
+ for key in block:
780
+ if key not in (
781
+ "type",
782
+ "id",
783
+ "queries",
784
+ "results",
785
+ "status",
786
+ "index",
787
+ ):
788
+ file_search_call[key] = block[key]
789
+
790
+ yield cast("types.ServerToolCall", file_search_call)
791
+
792
+ file_search_result = {
793
+ "type": "server_tool_result",
794
+ "tool_call_id": block["id"],
795
+ }
796
+ if file_search_output := block.get("results"):
797
+ file_search_result["output"] = file_search_output
798
+
799
+ status = block.get("status")
800
+ if status == "failed":
801
+ file_search_result["status"] = "error"
802
+ elif status == "completed":
803
+ file_search_result["status"] = "success"
804
+ elif status:
805
+ file_search_result["extras"] = {"status": status}
806
+ else:
807
+ pass
808
+ if "index" in block and isinstance(block["index"], int):
809
+ file_search_result["index"] = f"lc_fsr_{block['index'] + 1}"
810
+ yield cast("types.ServerToolResult", file_search_result)
811
+
812
+ elif block_type == "code_interpreter_call":
813
+ code_interpreter_call = {
814
+ "type": "server_tool_call",
815
+ "name": "code_interpreter",
816
+ "id": block["id"],
817
+ }
818
+ if "code" in block:
819
+ code_interpreter_call["args"] = {"code": block["code"]}
820
+ if "index" in block:
821
+ code_interpreter_call["index"] = f"lc_cic_{block['index']}"
822
+ known_fields = {
823
+ "type",
824
+ "id",
825
+ "outputs",
826
+ "status",
827
+ "code",
828
+ "extras",
829
+ "index",
830
+ }
831
+ for key in block:
832
+ if key not in known_fields:
833
+ if "extras" not in code_interpreter_call:
834
+ code_interpreter_call["extras"] = {}
835
+ code_interpreter_call["extras"][key] = block[key]
836
+
837
+ code_interpreter_result = {
838
+ "type": "server_tool_result",
839
+ "tool_call_id": block["id"],
840
+ }
841
+ if "outputs" in block:
842
+ code_interpreter_result["output"] = block["outputs"]
843
+
844
+ status = block.get("status")
845
+ if status == "failed":
846
+ code_interpreter_result["status"] = "error"
847
+ elif status == "completed":
848
+ code_interpreter_result["status"] = "success"
849
+ elif status:
850
+ code_interpreter_result["extras"] = {"status": status}
851
+ else:
852
+ pass
853
+ if "index" in block and isinstance(block["index"], int):
854
+ code_interpreter_result["index"] = f"lc_cir_{block['index'] + 1}"
855
+
856
+ yield cast("types.ServerToolCall", code_interpreter_call)
857
+ yield cast("types.ServerToolResult", code_interpreter_result)
858
+
859
+ elif block_type == "mcp_call":
860
+ mcp_call = {
861
+ "type": "server_tool_call",
862
+ "name": "remote_mcp",
863
+ "id": block["id"],
864
+ }
865
+ if (arguments := block.get("arguments")) and isinstance(arguments, str):
866
+ try:
867
+ mcp_call["args"] = json.loads(block["arguments"])
868
+ except json.JSONDecodeError:
869
+ mcp_call["extras"] = {"arguments": arguments}
870
+ if "name" in block:
871
+ if "extras" not in mcp_call:
872
+ mcp_call["extras"] = {}
873
+ mcp_call["extras"]["tool_name"] = block["name"]
874
+ if "server_label" in block:
875
+ if "extras" not in mcp_call:
876
+ mcp_call["extras"] = {}
877
+ mcp_call["extras"]["server_label"] = block["server_label"]
878
+ if "index" in block:
879
+ mcp_call["index"] = f"lc_mcp_{block['index']}"
880
+ known_fields = {
881
+ "type",
882
+ "id",
883
+ "arguments",
884
+ "name",
885
+ "server_label",
886
+ "output",
887
+ "error",
888
+ "extras",
889
+ "index",
890
+ }
891
+ for key in block:
892
+ if key not in known_fields:
893
+ if "extras" not in mcp_call:
894
+ mcp_call["extras"] = {}
895
+ mcp_call["extras"][key] = block[key]
896
+
897
+ yield cast("types.ServerToolCall", mcp_call)
898
+
899
+ mcp_result = {
900
+ "type": "server_tool_result",
901
+ "tool_call_id": block["id"],
902
+ }
903
+ if mcp_output := block.get("output"):
904
+ mcp_result["output"] = mcp_output
905
+
906
+ error = block.get("error")
907
+ if error:
908
+ if "extras" not in mcp_result:
909
+ mcp_result["extras"] = {}
910
+ mcp_result["extras"]["error"] = error
911
+ mcp_result["status"] = "error"
912
+ else:
913
+ mcp_result["status"] = "success"
914
+
915
+ if "index" in block and isinstance(block["index"], int):
916
+ mcp_result["index"] = f"lc_mcpr_{block['index'] + 1}"
917
+ yield cast("types.ServerToolResult", mcp_result)
918
+
919
+ elif block_type == "mcp_list_tools":
920
+ mcp_list_tools_call = {
921
+ "type": "server_tool_call",
922
+ "name": "mcp_list_tools",
923
+ "args": {},
924
+ "id": block["id"],
925
+ }
926
+ if "server_label" in block:
927
+ mcp_list_tools_call["extras"] = {}
928
+ mcp_list_tools_call["extras"]["server_label"] = block[
929
+ "server_label"
930
+ ]
931
+ if "index" in block:
932
+ mcp_list_tools_call["index"] = f"lc_mlt_{block['index']}"
933
+ known_fields = {
934
+ "type",
935
+ "id",
936
+ "name",
937
+ "server_label",
938
+ "tools",
939
+ "error",
940
+ "extras",
941
+ "index",
942
+ }
943
+ for key in block:
944
+ if key not in known_fields:
945
+ if "extras" not in mcp_list_tools_call:
946
+ mcp_list_tools_call["extras"] = {}
947
+ mcp_list_tools_call["extras"][key] = block[key]
948
+
949
+ yield cast("types.ServerToolCall", mcp_list_tools_call)
950
+
951
+ mcp_list_tools_result = {
952
+ "type": "server_tool_result",
953
+ "tool_call_id": block["id"],
954
+ }
955
+ if mcp_output := block.get("tools"):
956
+ mcp_list_tools_result["output"] = mcp_output
957
+
958
+ error = block.get("error")
959
+ if error:
960
+ if "extras" not in mcp_list_tools_result:
961
+ mcp_list_tools_result["extras"] = {}
962
+ mcp_list_tools_result["extras"]["error"] = error
963
+ mcp_list_tools_result["status"] = "error"
964
+ else:
965
+ mcp_list_tools_result["status"] = "success"
966
+
967
+ if "index" in block and isinstance(block["index"], int):
968
+ mcp_list_tools_result["index"] = f"lc_mltr_{block['index'] + 1}"
969
+ yield cast("types.ServerToolResult", mcp_list_tools_result)
970
+
971
+ elif block_type in types.KNOWN_BLOCK_TYPES:
972
+ yield cast("types.ContentBlock", block)
973
+ else:
974
+ new_block = {"type": "non_standard", "value": block}
975
+ if "index" in new_block["value"]:
976
+ new_block["index"] = f"lc_ns_{new_block['value'].pop('index')}"
977
+ yield cast("types.NonStandardContentBlock", new_block)
978
+
979
+ return list(_iter_blocks())
980
+
981
+
982
+ def translate_content(message: AIMessage) -> list[types.ContentBlock]:
983
+ """Derive standard content blocks from a message with OpenAI content."""
984
+ if isinstance(message.content, str):
985
+ return _convert_to_v1_from_chat_completions(message)
986
+ message = _convert_from_v03_ai_message(message)
987
+ return _convert_to_v1_from_responses(message)
988
+
989
+
990
+ def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]:
991
+ """Derive standard content blocks from a message chunk with OpenAI content."""
992
+ if isinstance(message.content, str):
993
+ return _convert_to_v1_from_chat_completions_chunk(message)
994
+ message = _convert_from_v03_ai_message(message) # type: ignore[assignment]
995
+ return _convert_to_v1_from_responses(message)
996
+
997
+
998
+ def _register_openai_translator() -> None:
999
+ """Register the OpenAI translator with the central registry.
1000
+
1001
+ Run automatically when the module is imported.
1002
+ """
1003
+ from langchain_core.messages.block_translators import ( # noqa: PLC0415
1004
+ register_translator,
1005
+ )
1006
+
1007
+ register_translator("openai", translate_content, translate_content_chunk)
1008
+
1009
+
1010
+ _register_openai_translator()