langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. langchain_core/_api/beta_decorator.py +2 -2
  2. langchain_core/_api/deprecation.py +1 -1
  3. langchain_core/beta/runnables/context.py +1 -1
  4. langchain_core/callbacks/base.py +14 -23
  5. langchain_core/callbacks/file.py +13 -2
  6. langchain_core/callbacks/manager.py +74 -157
  7. langchain_core/callbacks/streaming_stdout.py +3 -4
  8. langchain_core/callbacks/usage.py +2 -12
  9. langchain_core/chat_history.py +6 -6
  10. langchain_core/documents/base.py +1 -1
  11. langchain_core/documents/compressor.py +9 -6
  12. langchain_core/indexing/base.py +2 -2
  13. langchain_core/language_models/_utils.py +232 -101
  14. langchain_core/language_models/base.py +35 -23
  15. langchain_core/language_models/chat_models.py +248 -54
  16. langchain_core/language_models/fake_chat_models.py +28 -81
  17. langchain_core/load/dump.py +3 -4
  18. langchain_core/messages/__init__.py +30 -24
  19. langchain_core/messages/ai.py +188 -30
  20. langchain_core/messages/base.py +164 -25
  21. langchain_core/messages/block_translators/__init__.py +89 -0
  22. langchain_core/messages/block_translators/anthropic.py +451 -0
  23. langchain_core/messages/block_translators/bedrock.py +45 -0
  24. langchain_core/messages/block_translators/bedrock_converse.py +47 -0
  25. langchain_core/messages/block_translators/google_genai.py +45 -0
  26. langchain_core/messages/block_translators/google_vertexai.py +47 -0
  27. langchain_core/messages/block_translators/groq.py +45 -0
  28. langchain_core/messages/block_translators/langchain_v0.py +164 -0
  29. langchain_core/messages/block_translators/ollama.py +45 -0
  30. langchain_core/messages/block_translators/openai.py +798 -0
  31. langchain_core/messages/{content_blocks.py → content.py} +303 -278
  32. langchain_core/messages/human.py +29 -9
  33. langchain_core/messages/system.py +29 -9
  34. langchain_core/messages/tool.py +94 -13
  35. langchain_core/messages/utils.py +34 -234
  36. langchain_core/output_parsers/base.py +14 -50
  37. langchain_core/output_parsers/json.py +2 -5
  38. langchain_core/output_parsers/list.py +2 -7
  39. langchain_core/output_parsers/openai_functions.py +5 -28
  40. langchain_core/output_parsers/openai_tools.py +49 -90
  41. langchain_core/output_parsers/pydantic.py +2 -3
  42. langchain_core/output_parsers/transform.py +12 -53
  43. langchain_core/output_parsers/xml.py +9 -17
  44. langchain_core/prompt_values.py +8 -112
  45. langchain_core/prompts/chat.py +1 -3
  46. langchain_core/runnables/base.py +500 -451
  47. langchain_core/runnables/branch.py +1 -1
  48. langchain_core/runnables/fallbacks.py +4 -4
  49. langchain_core/runnables/history.py +1 -1
  50. langchain_core/runnables/passthrough.py +3 -3
  51. langchain_core/runnables/retry.py +1 -1
  52. langchain_core/runnables/router.py +1 -1
  53. langchain_core/structured_query.py +3 -7
  54. langchain_core/tools/base.py +14 -41
  55. langchain_core/tools/convert.py +2 -22
  56. langchain_core/tools/retriever.py +1 -8
  57. langchain_core/tools/structured.py +2 -10
  58. langchain_core/tracers/_streaming.py +6 -7
  59. langchain_core/tracers/base.py +7 -14
  60. langchain_core/tracers/core.py +4 -27
  61. langchain_core/tracers/event_stream.py +4 -15
  62. langchain_core/tracers/langchain.py +3 -14
  63. langchain_core/tracers/log_stream.py +2 -3
  64. langchain_core/utils/_merge.py +45 -7
  65. langchain_core/utils/function_calling.py +22 -9
  66. langchain_core/utils/utils.py +29 -0
  67. langchain_core/version.py +1 -1
  68. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/METADATA +7 -9
  69. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/RECORD +71 -64
  70. langchain_core/v1/__init__.py +0 -1
  71. langchain_core/v1/chat_models.py +0 -1047
  72. langchain_core/v1/messages.py +0 -755
  73. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/WHEEL +0 -0
  74. {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,798 @@
1
+ """Derivations of standard content blocks from OpenAI content."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import warnings
7
+ from collections.abc import Iterable
8
+ from typing import TYPE_CHECKING, Any, Optional, Union, cast
9
+
10
+ from langchain_core.language_models._utils import (
11
+ _is_openai_data_block,
12
+ _parse_data_uri,
13
+ )
14
+ from langchain_core.messages import content as types
15
+
16
+ if TYPE_CHECKING:
17
+ from langchain_core.messages import AIMessage, AIMessageChunk
18
+
19
+
20
+ def convert_to_openai_image_block(block: dict[str, Any]) -> dict:
21
+ """Convert ``ImageContentBlock`` to format expected by OpenAI Chat Completions."""
22
+ if "url" in block:
23
+ return {
24
+ "type": "image_url",
25
+ "image_url": {
26
+ "url": block["url"],
27
+ },
28
+ }
29
+ if "base64" in block or block.get("source_type") == "base64":
30
+ if "mime_type" not in block:
31
+ error_message = "mime_type key is required for base64 data."
32
+ raise ValueError(error_message)
33
+ mime_type = block["mime_type"]
34
+ base64_data = block["data"] if "data" in block else block["base64"]
35
+ return {
36
+ "type": "image_url",
37
+ "image_url": {
38
+ "url": f"data:{mime_type};base64,{base64_data}",
39
+ },
40
+ }
41
+ error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
42
+ raise ValueError(error_message)
43
+
44
+
45
+ def convert_to_openai_data_block(block: dict) -> dict:
46
+ """Format standard data content block to format expected by OpenAI."""
47
+ if block["type"] == "image":
48
+ formatted_block = convert_to_openai_image_block(block)
49
+
50
+ elif block["type"] == "file":
51
+ if "base64" in block or block.get("source_type") == "base64":
52
+ # Handle v0 format: {"source_type": "base64", "data": "...", ...}
53
+ # Handle v1 format: {"base64": "...", ...}
54
+ base64_data = block["data"] if "source_type" in block else block["base64"]
55
+ file = {"file_data": f"data:{block['mime_type']};base64,{base64_data}"}
56
+ if filename := block.get("filename"):
57
+ file["filename"] = filename
58
+ elif (extras := block.get("extras")) and ("filename" in extras):
59
+ file["filename"] = extras["filename"]
60
+ elif (extras := block.get("metadata")) and ("filename" in extras):
61
+ # Backward compat
62
+ file["filename"] = extras["filename"]
63
+ else:
64
+ warnings.warn(
65
+ "OpenAI may require a filename for file inputs. Specify a filename "
66
+ "in the content block: {'type': 'file', 'mime_type': "
67
+ "'application/pdf', 'base64': '...', 'filename': 'my-pdf'}",
68
+ stacklevel=1,
69
+ )
70
+ formatted_block = {"type": "file", "file": file}
71
+ elif "file_id" in block or block.get("source_type") == "id":
72
+ # Handle v0 format: {"source_type": "id", "id": "...", ...}
73
+ # Handle v1 format: {"file_id": "...", ...}
74
+ file_id = block["id"] if "source_type" in block else block["file_id"]
75
+ formatted_block = {"type": "file", "file": {"file_id": file_id}}
76
+ else:
77
+ error_msg = "Keys base64 or file_id required for file blocks."
78
+ raise ValueError(error_msg)
79
+
80
+ elif block["type"] == "audio":
81
+ if "base64" in block or block.get("source_type") == "base64":
82
+ # Handle v0 format: {"source_type": "base64", "data": "...", ...}
83
+ # Handle v1 format: {"base64": "...", ...}
84
+ base64_data = block["data"] if "source_type" in block else block["base64"]
85
+ audio_format = block["mime_type"].split("/")[-1]
86
+ formatted_block = {
87
+ "type": "input_audio",
88
+ "input_audio": {"data": base64_data, "format": audio_format},
89
+ }
90
+ else:
91
+ error_msg = "Key base64 is required for audio blocks."
92
+ raise ValueError(error_msg)
93
+ else:
94
+ error_msg = f"Block of type {block['type']} is not supported."
95
+ raise ValueError(error_msg)
96
+
97
+ return formatted_block
98
+
99
+
100
+ # v1 / Chat Completions
101
+ def _convert_to_v1_from_chat_completions(
102
+ message: AIMessage,
103
+ ) -> list[types.ContentBlock]:
104
+ """Mutate a Chat Completions message to v1 format."""
105
+ content_blocks: list[types.ContentBlock] = []
106
+ if isinstance(message.content, str):
107
+ if message.content:
108
+ content_blocks = [{"type": "text", "text": message.content}]
109
+ else:
110
+ content_blocks = []
111
+
112
+ for tool_call in message.tool_calls:
113
+ content_blocks.append(
114
+ {
115
+ "type": "tool_call",
116
+ "name": tool_call["name"],
117
+ "args": tool_call["args"],
118
+ "id": tool_call.get("id"),
119
+ }
120
+ )
121
+
122
+ return content_blocks
123
+
124
+
125
+ def _convert_to_v1_from_chat_completions_input(
126
+ blocks: list[types.ContentBlock],
127
+ ) -> list[types.ContentBlock]:
128
+ """Convert OpenAI Chat Completions format blocks to v1 format.
129
+
130
+ Processes non_standard blocks that might be OpenAI format and converts them
131
+ to proper ContentBlocks. If conversion fails, leaves them as non_standard.
132
+
133
+ Args:
134
+ blocks: List of content blocks to process.
135
+
136
+ Returns:
137
+ Updated list with OpenAI blocks converted to v1 format.
138
+ """
139
+ from langchain_core.messages import content as types
140
+
141
+ converted_blocks = []
142
+ unpacked_blocks: list[dict[str, Any]] = [
143
+ cast("dict[str, Any]", block)
144
+ if block.get("type") != "non_standard"
145
+ else block["value"] # type: ignore[typeddict-item] # this is only non-standard blocks
146
+ for block in blocks
147
+ ]
148
+ for block in unpacked_blocks:
149
+ if block.get("type") in {
150
+ "image_url",
151
+ "input_audio",
152
+ "file",
153
+ } and _is_openai_data_block(block):
154
+ converted_block = _convert_openai_format_to_data_block(block)
155
+ # If conversion succeeded, use it; otherwise keep as non_standard
156
+ if (
157
+ isinstance(converted_block, dict)
158
+ and converted_block.get("type") in types.KNOWN_BLOCK_TYPES
159
+ ):
160
+ converted_blocks.append(cast("types.ContentBlock", converted_block))
161
+ else:
162
+ converted_blocks.append({"type": "non_standard", "value": block})
163
+ elif block.get("type") in types.KNOWN_BLOCK_TYPES:
164
+ converted_blocks.append(cast("types.ContentBlock", block))
165
+ else:
166
+ converted_blocks.append({"type": "non_standard", "value": block})
167
+
168
+ return converted_blocks
169
+
170
+
171
+ def _convert_to_v1_from_chat_completions_chunk(
172
+ chunk: AIMessageChunk,
173
+ ) -> list[types.ContentBlock]:
174
+ """Mutate a Chat Completions chunk to v1 format."""
175
+ content_blocks: list[types.ContentBlock] = []
176
+ if isinstance(chunk.content, str):
177
+ if chunk.content:
178
+ content_blocks = [{"type": "text", "text": chunk.content}]
179
+ else:
180
+ content_blocks = []
181
+
182
+ if chunk.chunk_position == "last":
183
+ for tool_call in chunk.tool_calls:
184
+ content_blocks.append(
185
+ {
186
+ "type": "tool_call",
187
+ "name": tool_call["name"],
188
+ "args": tool_call["args"],
189
+ "id": tool_call.get("id"),
190
+ }
191
+ )
192
+
193
+ else:
194
+ for tool_call_chunk in chunk.tool_call_chunks:
195
+ tc: types.ToolCallChunk = {
196
+ "type": "tool_call_chunk",
197
+ "id": tool_call_chunk.get("id"),
198
+ "name": tool_call_chunk.get("name"),
199
+ "args": tool_call_chunk.get("args"),
200
+ }
201
+ if (idx := tool_call_chunk.get("index")) is not None:
202
+ tc["index"] = idx
203
+ content_blocks.append(tc)
204
+
205
+ return content_blocks
206
+
207
+
208
+ def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage:
209
+ """Convert a v1 message to the Chat Completions format."""
210
+ if isinstance(message.content, list):
211
+ new_content: list = []
212
+ for block in message.content:
213
+ if isinstance(block, dict):
214
+ block_type = block.get("type")
215
+ if block_type == "text":
216
+ # Strip annotations
217
+ new_content.append({"type": "text", "text": block["text"]})
218
+ elif block_type in ("reasoning", "tool_call"):
219
+ pass
220
+ else:
221
+ new_content.append(block)
222
+ else:
223
+ new_content.append(block)
224
+ return message.model_copy(update={"content": new_content})
225
+
226
+ return message
227
+
228
+
229
+ # Responses
230
+ _FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__"
231
+
232
+
233
+ def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage:
234
+ """Convert v0 AIMessage into ``output_version="responses/v1"`` format."""
235
+ from langchain_core.messages import AIMessageChunk
236
+
237
+ # Only update ChatOpenAI v0.3 AIMessages
238
+ is_chatopenai_v03 = (
239
+ isinstance(message.content, list)
240
+ and all(isinstance(b, dict) for b in message.content)
241
+ ) and (
242
+ any(
243
+ item in message.additional_kwargs
244
+ for item in [
245
+ "reasoning",
246
+ "tool_outputs",
247
+ "refusal",
248
+ _FUNCTION_CALL_IDS_MAP_KEY,
249
+ ]
250
+ )
251
+ or (
252
+ isinstance(message.id, str)
253
+ and message.id.startswith("msg_")
254
+ and (response_id := message.response_metadata.get("id"))
255
+ and isinstance(response_id, str)
256
+ and response_id.startswith("resp_")
257
+ )
258
+ )
259
+ if not is_chatopenai_v03:
260
+ return message
261
+
262
+ content_order = [
263
+ "reasoning",
264
+ "code_interpreter_call",
265
+ "mcp_call",
266
+ "image_generation_call",
267
+ "text",
268
+ "refusal",
269
+ "function_call",
270
+ "computer_call",
271
+ "mcp_list_tools",
272
+ "mcp_approval_request",
273
+ # N. B. "web_search_call" and "file_search_call" were not passed back in
274
+ # in v0.3
275
+ ]
276
+
277
+ # Build a bucket for every known block type
278
+ buckets: dict[str, list] = {key: [] for key in content_order}
279
+ unknown_blocks = []
280
+
281
+ # Reasoning
282
+ if reasoning := message.additional_kwargs.get("reasoning"):
283
+ if isinstance(message, AIMessageChunk) and message.chunk_position != "last":
284
+ buckets["reasoning"].append({**reasoning, "type": "reasoning"})
285
+ else:
286
+ buckets["reasoning"].append(reasoning)
287
+
288
+ # Refusal
289
+ if refusal := message.additional_kwargs.get("refusal"):
290
+ buckets["refusal"].append({"type": "refusal", "refusal": refusal})
291
+
292
+ # Text
293
+ for block in message.content:
294
+ if isinstance(block, dict) and block.get("type") == "text":
295
+ block_copy = block.copy()
296
+ if isinstance(message.id, str) and message.id.startswith("msg_"):
297
+ block_copy["id"] = message.id
298
+ buckets["text"].append(block_copy)
299
+ else:
300
+ unknown_blocks.append(block)
301
+
302
+ # Function calls
303
+ function_call_ids = message.additional_kwargs.get(_FUNCTION_CALL_IDS_MAP_KEY)
304
+ if (
305
+ isinstance(message, AIMessageChunk)
306
+ and len(message.tool_call_chunks) == 1
307
+ and message.chunk_position != "last"
308
+ ):
309
+ # Isolated chunk
310
+ tool_call_chunk = message.tool_call_chunks[0]
311
+ function_call = {
312
+ "type": "function_call",
313
+ "name": tool_call_chunk.get("name"),
314
+ "arguments": tool_call_chunk.get("args"),
315
+ "call_id": tool_call_chunk.get("id"),
316
+ }
317
+ if function_call_ids is not None and (
318
+ _id := function_call_ids.get(tool_call_chunk.get("id"))
319
+ ):
320
+ function_call["id"] = _id
321
+ buckets["function_call"].append(function_call)
322
+ else:
323
+ for tool_call in message.tool_calls:
324
+ function_call = {
325
+ "type": "function_call",
326
+ "name": tool_call["name"],
327
+ "arguments": json.dumps(tool_call["args"], ensure_ascii=False),
328
+ "call_id": tool_call["id"],
329
+ }
330
+ if function_call_ids is not None and (
331
+ _id := function_call_ids.get(tool_call["id"])
332
+ ):
333
+ function_call["id"] = _id
334
+ buckets["function_call"].append(function_call)
335
+
336
+ # Tool outputs
337
+ tool_outputs = message.additional_kwargs.get("tool_outputs", [])
338
+ for block in tool_outputs:
339
+ if isinstance(block, dict) and (key := block.get("type")) and key in buckets:
340
+ buckets[key].append(block)
341
+ else:
342
+ unknown_blocks.append(block)
343
+
344
+ # Re-assemble the content list in the canonical order
345
+ new_content = []
346
+ for key in content_order:
347
+ new_content.extend(buckets[key])
348
+ new_content.extend(unknown_blocks)
349
+
350
+ new_additional_kwargs = dict(message.additional_kwargs)
351
+ new_additional_kwargs.pop("reasoning", None)
352
+ new_additional_kwargs.pop("refusal", None)
353
+ new_additional_kwargs.pop("tool_outputs", None)
354
+
355
+ if "id" in message.response_metadata:
356
+ new_id = message.response_metadata["id"]
357
+ else:
358
+ new_id = message.id
359
+
360
+ return message.model_copy(
361
+ update={
362
+ "content": new_content,
363
+ "additional_kwargs": new_additional_kwargs,
364
+ "id": new_id,
365
+ },
366
+ deep=False,
367
+ )
368
+
369
+
370
+ def _convert_openai_format_to_data_block(
371
+ block: dict,
372
+ ) -> Union[types.ContentBlock, dict[Any, Any]]:
373
+ """Convert OpenAI image/audio/file content block to respective v1 multimodal block.
374
+
375
+ We expect that the incoming block is verified to be in OpenAI Chat Completions
376
+ format.
377
+
378
+ If parsing fails, passes block through unchanged.
379
+
380
+ Mappings (Chat Completions to LangChain v1):
381
+ - Image -> `ImageContentBlock`
382
+ - Audio -> `AudioContentBlock`
383
+ - File -> `FileContentBlock`
384
+
385
+ """
386
+
387
+ # Extract extra keys to put them in `extras`
388
+ def _extract_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]:
389
+ """Extract unknown keys from block to preserve as extras."""
390
+ return {k: v for k, v in block_dict.items() if k not in known_keys}
391
+
392
+ # base64-style image block
393
+ if (block["type"] == "image_url") and (
394
+ parsed := _parse_data_uri(block["image_url"]["url"])
395
+ ):
396
+ known_keys = {"type", "image_url"}
397
+ extras = _extract_extras(block, known_keys)
398
+
399
+ # Also extract extras from nested image_url dict
400
+ image_url_known_keys = {"url"}
401
+ image_url_extras = _extract_extras(block["image_url"], image_url_known_keys)
402
+
403
+ # Merge extras
404
+ all_extras = {**extras}
405
+ for key, value in image_url_extras.items():
406
+ if key == "detail": # Don't rename
407
+ all_extras["detail"] = value
408
+ else:
409
+ all_extras[f"image_url_{key}"] = value
410
+
411
+ return types.create_image_block(
412
+ # Even though this is labeled as `url`, it can be base64-encoded
413
+ base64=parsed["data"],
414
+ mime_type=parsed["mime_type"],
415
+ **all_extras,
416
+ )
417
+
418
+ # url-style image block
419
+ if (block["type"] == "image_url") and isinstance(
420
+ block["image_url"].get("url"), str
421
+ ):
422
+ known_keys = {"type", "image_url"}
423
+ extras = _extract_extras(block, known_keys)
424
+
425
+ image_url_known_keys = {"url"}
426
+ image_url_extras = _extract_extras(block["image_url"], image_url_known_keys)
427
+
428
+ all_extras = {**extras}
429
+ for key, value in image_url_extras.items():
430
+ if key == "detail": # Don't rename
431
+ all_extras["detail"] = value
432
+ else:
433
+ all_extras[f"image_url_{key}"] = value
434
+
435
+ return types.create_image_block(
436
+ url=block["image_url"]["url"],
437
+ **all_extras,
438
+ )
439
+
440
+ # base64-style audio block
441
+ # audio is only represented via raw data, no url or ID option
442
+ if block["type"] == "input_audio":
443
+ known_keys = {"type", "input_audio"}
444
+ extras = _extract_extras(block, known_keys)
445
+
446
+ # Also extract extras from nested audio dict
447
+ audio_known_keys = {"data", "format"}
448
+ audio_extras = _extract_extras(block["input_audio"], audio_known_keys)
449
+
450
+ all_extras = {**extras}
451
+ for key, value in audio_extras.items():
452
+ all_extras[f"audio_{key}"] = value
453
+
454
+ return types.create_audio_block(
455
+ base64=block["input_audio"]["data"],
456
+ mime_type=f"audio/{block['input_audio']['format']}",
457
+ **all_extras,
458
+ )
459
+
460
+ # id-style file block
461
+ if block.get("type") == "file" and "file_id" in block.get("file", {}):
462
+ known_keys = {"type", "file"}
463
+ extras = _extract_extras(block, known_keys)
464
+
465
+ file_known_keys = {"file_id"}
466
+ file_extras = _extract_extras(block["file"], file_known_keys)
467
+
468
+ all_extras = {**extras}
469
+ for key, value in file_extras.items():
470
+ all_extras[f"file_{key}"] = value
471
+
472
+ return types.create_file_block(
473
+ file_id=block["file"]["file_id"],
474
+ **all_extras,
475
+ )
476
+
477
+ # base64-style file block
478
+ if (block["type"] == "file") and (
479
+ parsed := _parse_data_uri(block["file"]["file_data"])
480
+ ):
481
+ known_keys = {"type", "file"}
482
+ extras = _extract_extras(block, known_keys)
483
+
484
+ file_known_keys = {"file_data", "filename"}
485
+ file_extras = _extract_extras(block["file"], file_known_keys)
486
+
487
+ all_extras = {**extras}
488
+ for key, value in file_extras.items():
489
+ all_extras[f"file_{key}"] = value
490
+
491
+ filename = block["file"].get("filename")
492
+ return types.create_file_block(
493
+ base64=parsed["data"],
494
+ mime_type="application/pdf",
495
+ filename=filename,
496
+ **all_extras,
497
+ )
498
+
499
+ # Escape hatch
500
+ return block
501
+
502
+
503
+ # v1 / Responses
504
+ def _convert_annotation_to_v1(annotation: dict[str, Any]) -> types.Annotation:
505
+ annotation_type = annotation.get("type")
506
+
507
+ if annotation_type == "url_citation":
508
+ known_fields = {
509
+ "type",
510
+ "url",
511
+ "title",
512
+ "cited_text",
513
+ "start_index",
514
+ "end_index",
515
+ }
516
+ url_citation = cast("types.Citation", {})
517
+ for field in ("end_index", "start_index", "title"):
518
+ if field in annotation:
519
+ url_citation[field] = annotation[field]
520
+ url_citation["type"] = "citation"
521
+ url_citation["url"] = annotation["url"]
522
+ for field, value in annotation.items():
523
+ if field not in known_fields:
524
+ if "extras" not in url_citation:
525
+ url_citation["extras"] = {}
526
+ url_citation["extras"][field] = value
527
+ return url_citation
528
+
529
+ if annotation_type == "file_citation":
530
+ known_fields = {
531
+ "type",
532
+ "title",
533
+ "cited_text",
534
+ "start_index",
535
+ "end_index",
536
+ "filename",
537
+ }
538
+ document_citation: types.Citation = {"type": "citation"}
539
+ if "filename" in annotation:
540
+ document_citation["title"] = annotation["filename"]
541
+ for field, value in annotation.items():
542
+ if field not in known_fields:
543
+ if "extras" not in document_citation:
544
+ document_citation["extras"] = {}
545
+ document_citation["extras"][field] = value
546
+
547
+ return document_citation
548
+
549
+ # TODO: standardise container_file_citation?
550
+ non_standard_annotation: types.NonStandardAnnotation = {
551
+ "type": "non_standard_annotation",
552
+ "value": annotation,
553
+ }
554
+ return non_standard_annotation
555
+
556
+
557
+ def _explode_reasoning(block: dict[str, Any]) -> Iterable[types.ReasoningContentBlock]:
558
+ if "summary" not in block:
559
+ yield cast("types.ReasoningContentBlock", block)
560
+ return
561
+
562
+ known_fields = {"type", "reasoning", "id", "index"}
563
+ unknown_fields = [
564
+ field for field in block if field != "summary" and field not in known_fields
565
+ ]
566
+ if unknown_fields:
567
+ block["extras"] = {}
568
+ for field in unknown_fields:
569
+ block["extras"][field] = block.pop(field)
570
+
571
+ if not block["summary"]:
572
+ # [{'id': 'rs_...', 'summary': [], 'type': 'reasoning', 'index': 0}]
573
+ block = {k: v for k, v in block.items() if k != "summary"}
574
+ if "index" in block:
575
+ meaningful_idx = f"{block['index']}_0"
576
+ block["index"] = f"lc_rs_{meaningful_idx.encode().hex()}"
577
+ yield cast("types.ReasoningContentBlock", block)
578
+ return
579
+
580
+ # Common part for every exploded line, except 'summary'
581
+ common = {k: v for k, v in block.items() if k in known_fields}
582
+
583
+ # Optional keys that must appear only in the first exploded item
584
+ first_only = block.pop("extras", None)
585
+
586
+ for idx, part in enumerate(block["summary"]):
587
+ new_block = dict(common)
588
+ new_block["reasoning"] = part.get("text", "")
589
+ if idx == 0 and first_only:
590
+ new_block.update(first_only)
591
+ if "index" in new_block:
592
+ summary_index = part.get("index", 0)
593
+ meaningful_idx = f"{new_block['index']}_{summary_index}"
594
+ new_block["index"] = f"lc_rs_{meaningful_idx.encode().hex()}"
595
+
596
+ yield cast("types.ReasoningContentBlock", new_block)
597
+
598
+
599
+ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock]:
600
+ """Convert a Responses message to v1 format."""
601
+
602
+ def _iter_blocks() -> Iterable[types.ContentBlock]:
603
+ for raw_block in message.content:
604
+ if not isinstance(raw_block, dict):
605
+ continue
606
+ block = raw_block.copy()
607
+ block_type = block.get("type")
608
+
609
+ if block_type == "text":
610
+ if "text" not in block:
611
+ block["text"] = ""
612
+ if "annotations" in block:
613
+ block["annotations"] = [
614
+ _convert_annotation_to_v1(a) for a in block["annotations"]
615
+ ]
616
+ if "index" in block:
617
+ block["index"] = f"lc_txt_{block['index']}"
618
+ yield cast("types.TextContentBlock", block)
619
+
620
+ elif block_type == "reasoning":
621
+ yield from _explode_reasoning(block)
622
+
623
+ elif block_type == "image_generation_call" and (
624
+ result := block.get("result")
625
+ ):
626
+ new_block = {"type": "image", "base64": result}
627
+ if output_format := block.get("output_format"):
628
+ new_block["mime_type"] = f"image/{output_format}"
629
+ if "id" in block:
630
+ new_block["id"] = block["id"]
631
+ if "index" in block:
632
+ new_block["index"] = f"lc_img_{block['index']}"
633
+ for extra_key in (
634
+ "status",
635
+ "background",
636
+ "output_format",
637
+ "quality",
638
+ "revised_prompt",
639
+ "size",
640
+ ):
641
+ if extra_key in block:
642
+ if "extras" not in new_block:
643
+ new_block["extras"] = {}
644
+ new_block["extras"][extra_key] = block[extra_key]
645
+ yield cast("types.ImageContentBlock", new_block)
646
+
647
+ elif block_type == "function_call":
648
+ tool_call_block: Optional[
649
+ Union[types.ToolCall, types.InvalidToolCall, types.ToolCallChunk]
650
+ ] = None
651
+ call_id = block.get("call_id", "")
652
+
653
+ from langchain_core.messages import AIMessageChunk
654
+
655
+ if (
656
+ isinstance(message, AIMessageChunk)
657
+ and len(message.tool_call_chunks) == 1
658
+ and message.chunk_position != "last"
659
+ ):
660
+ tool_call_block = message.tool_call_chunks[0].copy() # type: ignore[assignment]
661
+ elif call_id:
662
+ for tool_call in message.tool_calls or []:
663
+ if tool_call.get("id") == call_id:
664
+ tool_call_block = {
665
+ "type": "tool_call",
666
+ "name": tool_call["name"],
667
+ "args": tool_call["args"],
668
+ "id": tool_call.get("id"),
669
+ }
670
+ break
671
+ else:
672
+ for invalid_tool_call in message.invalid_tool_calls or []:
673
+ if invalid_tool_call.get("id") == call_id:
674
+ tool_call_block = invalid_tool_call.copy()
675
+ break
676
+ else:
677
+ pass
678
+ if tool_call_block:
679
+ if "id" in block:
680
+ if "extras" not in tool_call_block:
681
+ tool_call_block["extras"] = {}
682
+ tool_call_block["extras"]["item_id"] = block["id"]
683
+ if "index" in block:
684
+ tool_call_block["index"] = f"lc_tc_{block['index']}"
685
+ yield tool_call_block
686
+
687
+ elif block_type == "web_search_call":
688
+ web_search_call = {"type": "web_search_call", "id": block["id"]}
689
+ if "index" in block:
690
+ web_search_call["index"] = f"lc_wsc_{block['index']}"
691
+ if (
692
+ "action" in block
693
+ and isinstance(block["action"], dict)
694
+ and block["action"].get("type") == "search"
695
+ and "query" in block["action"]
696
+ ):
697
+ web_search_call["query"] = block["action"]["query"]
698
+ for key in block:
699
+ if key not in ("type", "id", "index"):
700
+ web_search_call[key] = block[key]
701
+
702
+ yield cast("types.WebSearchCall", web_search_call)
703
+
704
+ # If .content already has web_search_result, don't add
705
+ if not any(
706
+ isinstance(other_block, dict)
707
+ and other_block.get("type") == "web_search_result"
708
+ and other_block.get("id") == block["id"]
709
+ for other_block in message.content
710
+ ):
711
+ web_search_result = {"type": "web_search_result", "id": block["id"]}
712
+ if "index" in block and isinstance(block["index"], int):
713
+ web_search_result["index"] = f"lc_wsr_{block['index'] + 1}"
714
+ yield cast("types.WebSearchResult", web_search_result)
715
+
716
+ elif block_type == "code_interpreter_call":
717
+ code_interpreter_call = {
718
+ "type": "code_interpreter_call",
719
+ "id": block["id"],
720
+ }
721
+ if "code" in block:
722
+ code_interpreter_call["code"] = block["code"]
723
+ if "index" in block:
724
+ code_interpreter_call["index"] = f"lc_cic_{block['index']}"
725
+ known_fields = {"type", "id", "language", "code", "extras", "index"}
726
+ for key in block:
727
+ if key not in known_fields:
728
+ if "extras" not in code_interpreter_call:
729
+ code_interpreter_call["extras"] = {}
730
+ code_interpreter_call["extras"][key] = block[key]
731
+
732
+ code_interpreter_result = {
733
+ "type": "code_interpreter_result",
734
+ "id": block["id"],
735
+ }
736
+ if "outputs" in block:
737
+ code_interpreter_result["outputs"] = block["outputs"]
738
+ for output in block["outputs"]:
739
+ if (
740
+ isinstance(output, dict)
741
+ and (output_type := output.get("type"))
742
+ and output_type == "logs"
743
+ ):
744
+ if "output" not in code_interpreter_result:
745
+ code_interpreter_result["output"] = []
746
+ code_interpreter_result["output"].append(
747
+ {
748
+ "type": "code_interpreter_output",
749
+ "stdout": output.get("logs", ""),
750
+ }
751
+ )
752
+
753
+ if "status" in block:
754
+ code_interpreter_result["status"] = block["status"]
755
+ if "index" in block and isinstance(block["index"], int):
756
+ code_interpreter_result["index"] = f"lc_cir_{block['index'] + 1}"
757
+
758
+ yield cast("types.CodeInterpreterCall", code_interpreter_call)
759
+ yield cast("types.CodeInterpreterResult", code_interpreter_result)
760
+
761
+ elif block_type in types.KNOWN_BLOCK_TYPES:
762
+ yield cast("types.ContentBlock", block)
763
+ else:
764
+ new_block = {"type": "non_standard", "value": block}
765
+ if "index" in new_block["value"]:
766
+ new_block["index"] = f"lc_ns_{new_block['value'].pop('index')}"
767
+ yield cast("types.NonStandardContentBlock", new_block)
768
+
769
+ return list(_iter_blocks())
770
+
771
+
772
+ def translate_content(message: AIMessage) -> list[types.ContentBlock]:
773
+ """Derive standard content blocks from a message with OpenAI content."""
774
+ if isinstance(message.content, str):
775
+ return _convert_to_v1_from_chat_completions(message)
776
+ message = _convert_from_v03_ai_message(message)
777
+ return _convert_to_v1_from_responses(message)
778
+
779
+
780
+ def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]:
781
+ """Derive standard content blocks from a message chunk with OpenAI content."""
782
+ if isinstance(message.content, str):
783
+ return _convert_to_v1_from_chat_completions_chunk(message)
784
+ message = _convert_from_v03_ai_message(message) # type: ignore[assignment]
785
+ return _convert_to_v1_from_responses(message)
786
+
787
+
788
+ def _register_openai_translator() -> None:
789
+ """Register the OpenAI translator with the central registry.
790
+
791
+ Run automatically when the module is imported.
792
+ """
793
+ from langchain_core.messages.block_translators import register_translator
794
+
795
+ register_translator("openai", translate_content, translate_content_chunk)
796
+
797
+
798
+ _register_openai_translator()