langchain-core 0.3.75__py3-none-any.whl → 1.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (32) hide show
  1. langchain_core/language_models/_utils.py +233 -68
  2. langchain_core/language_models/base.py +2 -1
  3. langchain_core/language_models/chat_models.py +196 -33
  4. langchain_core/language_models/fake_chat_models.py +22 -6
  5. langchain_core/messages/__init__.py +74 -4
  6. langchain_core/messages/ai.py +191 -26
  7. langchain_core/messages/base.py +164 -25
  8. langchain_core/messages/block_translators/__init__.py +89 -0
  9. langchain_core/messages/block_translators/anthropic.py +451 -0
  10. langchain_core/messages/block_translators/bedrock.py +45 -0
  11. langchain_core/messages/block_translators/bedrock_converse.py +47 -0
  12. langchain_core/messages/block_translators/google_genai.py +45 -0
  13. langchain_core/messages/block_translators/google_vertexai.py +47 -0
  14. langchain_core/messages/block_translators/groq.py +45 -0
  15. langchain_core/messages/block_translators/langchain_v0.py +297 -0
  16. langchain_core/messages/block_translators/ollama.py +45 -0
  17. langchain_core/messages/block_translators/openai.py +586 -0
  18. langchain_core/messages/content.py +1568 -0
  19. langchain_core/messages/human.py +29 -9
  20. langchain_core/messages/system.py +29 -9
  21. langchain_core/messages/tool.py +30 -27
  22. langchain_core/messages/utils.py +12 -5
  23. langchain_core/prompt_values.py +1 -1
  24. langchain_core/runnables/base.py +1 -1
  25. langchain_core/utils/_merge.py +44 -6
  26. langchain_core/utils/utils.py +29 -0
  27. langchain_core/version.py +1 -1
  28. {langchain_core-0.3.75.dist-info → langchain_core-1.0.0a1.dist-info}/METADATA +2 -2
  29. {langchain_core-0.3.75.dist-info → langchain_core-1.0.0a1.dist-info}/RECORD +31 -21
  30. langchain_core/messages/content_blocks.py +0 -155
  31. {langchain_core-0.3.75.dist-info → langchain_core-1.0.0a1.dist-info}/WHEEL +0 -0
  32. {langchain_core-0.3.75.dist-info → langchain_core-1.0.0a1.dist-info}/entry_points.txt +0 -0
@@ -1,12 +1,30 @@
1
1
  import re
2
2
  from collections.abc import Sequence
3
- from typing import Optional
3
+ from typing import (
4
+ TYPE_CHECKING,
5
+ Literal,
6
+ Optional,
7
+ TypedDict,
8
+ TypeVar,
9
+ Union,
10
+ )
4
11
 
5
- from langchain_core.messages import BaseMessage
12
+ if TYPE_CHECKING:
13
+ from langchain_core.messages import BaseMessage
14
+ from langchain_core.messages.content import (
15
+ ContentBlock,
16
+ )
6
17
 
7
18
 
8
19
  def _is_openai_data_block(block: dict) -> bool:
9
- """Check if the block contains multimodal data in OpenAI Chat Completions format."""
20
+ """Check if the block contains multimodal data in OpenAI Chat Completions format.
21
+
22
+ Supports both data and ID-style blocks (e.g. ``'file_data'`` and ``'file_id'``)
23
+
24
+ If additional keys are present, they are ignored / will not affect outcome as long
25
+ as the required keys are present and valid.
26
+
27
+ """
10
28
  if block.get("type") == "image_url":
11
29
  if (
12
30
  (set(block.keys()) <= {"type", "image_url", "detail"})
@@ -15,29 +33,43 @@ def _is_openai_data_block(block: dict) -> bool:
15
33
  ):
16
34
  url = image_url.get("url")
17
35
  if isinstance(url, str):
36
+ # Required per OpenAI spec
37
+ return True
38
+ # Ignore `'detail'` since it's optional and specific to OpenAI
39
+
40
+ elif block.get("type") == "input_audio":
41
+ if (audio := block.get("input_audio")) and isinstance(audio, dict):
42
+ audio_data = audio.get("data")
43
+ audio_format = audio.get("format")
44
+ # Both required per OpenAI spec
45
+ if isinstance(audio_data, str) and isinstance(audio_format, str):
18
46
  return True
19
47
 
20
48
  elif block.get("type") == "file":
21
49
  if (file := block.get("file")) and isinstance(file, dict):
22
50
  file_data = file.get("file_data")
23
- if isinstance(file_data, str):
24
- return True
25
-
26
- elif block.get("type") == "input_audio":
27
- if (input_audio := block.get("input_audio")) and isinstance(input_audio, dict):
28
- audio_data = input_audio.get("data")
29
- audio_format = input_audio.get("format")
30
- if isinstance(audio_data, str) and isinstance(audio_format, str):
51
+ file_id = file.get("file_id")
52
+ # Files can be either base64-encoded or pre-uploaded with an ID
53
+ if isinstance(file_data, str) or isinstance(file_id, str):
31
54
  return True
32
55
 
33
56
  else:
34
57
  return False
35
58
 
59
+ # Has no `'type'` key
36
60
  return False
37
61
 
38
62
 
39
- def _parse_data_uri(uri: str) -> Optional[dict]:
40
- """Parse a data URI into its components. If parsing fails, return None.
63
+ class ParsedDataUri(TypedDict):
64
+ source_type: Literal["base64"]
65
+ data: str
66
+ mime_type: str
67
+
68
+
69
+ def _parse_data_uri(uri: str) -> Optional[ParsedDataUri]:
70
+ """Parse a data URI into its components.
71
+
72
+ If parsing fails, return None. If either MIME type or data is missing, return None.
41
73
 
42
74
  Example:
43
75
 
@@ -57,84 +89,217 @@ def _parse_data_uri(uri: str) -> Optional[dict]:
57
89
  match = re.match(regex, uri)
58
90
  if match is None:
59
91
  return None
92
+
93
+ mime_type = match.group("mime_type")
94
+ data = match.group("data")
95
+ if not mime_type or not data:
96
+ return None
97
+
60
98
  return {
61
99
  "source_type": "base64",
62
- "data": match.group("data"),
63
- "mime_type": match.group("mime_type"),
100
+ "data": data,
101
+ "mime_type": mime_type,
64
102
  }
65
103
 
66
104
 
67
- def _convert_openai_format_to_data_block(block: dict) -> dict:
68
- """Convert OpenAI image content block to standard data content block.
105
+ def _normalize_messages(
106
+ messages: Sequence["BaseMessage"],
107
+ ) -> list["BaseMessage"]:
108
+ """Normalize message formats to LangChain v1 standard content blocks.
69
109
 
70
- If parsing fails, pass-through.
110
+ Chat models already implement support for:
111
+ - Images in OpenAI Chat Completions format
112
+ These will be passed through unchanged
113
+ - LangChain v1 standard content blocks
71
114
 
72
- Args:
73
- block: The OpenAI image content block to convert.
115
+ This function extends support to:
116
+ - `Audio <https://platform.openai.com/docs/api-reference/chat/create>`__ and
117
+ `file <https://platform.openai.com/docs/api-reference/files>`__ data in OpenAI
118
+ Chat Completions format
119
+ - Images are technically supported but we expect chat models to handle them
120
+ directly; this may change in the future
121
+ - LangChain v0 standard content blocks for backward compatibility
74
122
 
75
- Returns:
76
- The converted standard data content block.
77
- """
78
- if block["type"] == "image_url":
79
- parsed = _parse_data_uri(block["image_url"]["url"])
80
- if parsed is not None:
81
- parsed["type"] = "image"
82
- return parsed
83
- return block
84
-
85
- if block["type"] == "file":
86
- parsed = _parse_data_uri(block["file"]["file_data"])
87
- if parsed is not None:
88
- parsed["type"] = "file"
89
- if filename := block["file"].get("filename"):
90
- parsed["filename"] = filename
91
- return parsed
92
- return block
93
-
94
- if block["type"] == "input_audio":
95
- data = block["input_audio"].get("data")
96
- audio_format = block["input_audio"].get("format")
97
- if data and audio_format:
98
- return {
99
- "type": "audio",
100
- "source_type": "base64",
101
- "data": data,
102
- "mime_type": f"audio/{audio_format}",
123
+ .. versionchanged:: 1.0.0
124
+ In previous versions, this function returned messages in LangChain v0 format.
125
+ Now, it returns messages in LangChain v1 format, which upgraded chat models now
126
+ expect to receive when passing back in message history. For backward
127
+ compatibility, this function will convert v0 message content to v1 format.
128
+
129
+ .. dropdown:: v0 Content Block Schemas
130
+
131
+ ``URLContentBlock``:
132
+
133
+ .. codeblock::
134
+
135
+ {
136
+ mime_type: NotRequired[str]
137
+ type: Literal['image', 'audio', 'file'],
138
+ source_type: Literal['url'],
139
+ url: str,
103
140
  }
104
- return block
105
141
 
106
- return block
142
+ ``Base64ContentBlock``:
143
+
144
+ .. codeblock::
145
+
146
+ {
147
+ mime_type: NotRequired[str]
148
+ type: Literal['image', 'audio', 'file'],
149
+ source_type: Literal['base64'],
150
+ data: str,
151
+ }
107
152
 
153
+ ``IDContentBlock``:
108
154
 
109
- def _normalize_messages(messages: Sequence[BaseMessage]) -> list[BaseMessage]:
110
- """Extend support for message formats.
155
+ (In practice, this was never used)
156
+
157
+ .. codeblock::
158
+
159
+ {
160
+ type: Literal['image', 'audio', 'file'],
161
+ source_type: Literal['id'],
162
+ id: str,
163
+ }
164
+
165
+ ``PlainTextContentBlock``:
166
+
167
+ .. codeblock::
168
+
169
+ {
170
+ mime_type: NotRequired[str]
171
+ type: Literal['file'],
172
+ source_type: Literal['text'],
173
+ url: str,
174
+ }
175
+
176
+ If a v1 message is passed in, it will be returned as-is, meaning it is safe to
177
+ always pass in v1 messages to this function for assurance.
178
+
179
+ For posterity, here are the OpenAI Chat Completions schemas we expect:
180
+
181
+ Chat Completions image. Can be URL-based or base64-encoded. Supports MIME types
182
+ png, jpeg/jpg, webp, static gif:
183
+ {
184
+ "type": Literal['image_url'],
185
+ "image_url": {
186
+ "url": Union["data:$MIME_TYPE;base64,$BASE64_ENCODED_IMAGE", "$IMAGE_URL"],
187
+ "detail": Literal['low', 'high', 'auto'] = 'auto', # Supported by OpenAI
188
+ }
189
+ }
190
+
191
+ Chat Completions audio:
192
+ {
193
+ "type": Literal['input_audio'],
194
+ "input_audio": {
195
+ "format": Literal['wav', 'mp3'],
196
+ "data": str = "$BASE64_ENCODED_AUDIO",
197
+ },
198
+ }
199
+
200
+ Chat Completions files: either base64 or pre-uploaded file ID
201
+ {
202
+ "type": Literal['file'],
203
+ "file": Union[
204
+ {
205
+ "filename": Optional[str] = "$FILENAME",
206
+ "file_data": str = "$BASE64_ENCODED_FILE",
207
+ },
208
+ {
209
+ "file_id": str = "$FILE_ID", # For pre-uploaded files to OpenAI
210
+ },
211
+ ],
212
+ }
111
213
 
112
- Chat models implement support for images in OpenAI Chat Completions format, as well
113
- as other multimodal data as standard data blocks. This function extends support to
114
- audio and file data in OpenAI Chat Completions format by converting them to standard
115
- data blocks.
116
214
  """
215
+ from langchain_core.messages.block_translators.langchain_v0 import (
216
+ _convert_legacy_v0_content_block_to_v1,
217
+ _convert_openai_format_to_data_block,
218
+ )
219
+
117
220
  formatted_messages = []
118
221
  for message in messages:
222
+ # We preserve input messages - the caller may reuse them elsewhere and expects
223
+ # them to remain unchanged. We only create a copy if we need to translate.
119
224
  formatted_message = message
225
+
120
226
  if isinstance(message.content, list):
121
227
  for idx, block in enumerate(message.content):
228
+ # OpenAI Chat Completions multimodal data blocks to v1 standard
122
229
  if (
123
230
  isinstance(block, dict)
124
- # Subset to (PDF) files and audio, as most relevant chat models
125
- # support images in OAI format (and some may not yet support the
126
- # standard data block format)
127
- and block.get("type") in {"file", "input_audio"}
231
+ and block.get("type") in {"input_audio", "file"}
232
+ # Discriminate between OpenAI/LC format since they share `'type'`
128
233
  and _is_openai_data_block(block)
129
234
  ):
130
- if formatted_message is message:
131
- formatted_message = message.model_copy()
132
- # Also shallow-copy content
133
- formatted_message.content = list(formatted_message.content)
134
-
135
- formatted_message.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
136
- _convert_openai_format_to_data_block(block)
137
- )
235
+ formatted_message = _ensure_message_copy(message, formatted_message)
236
+
237
+ converted_block = _convert_openai_format_to_data_block(block)
238
+ _update_content_block(formatted_message, idx, converted_block)
239
+
240
+ # Convert multimodal LangChain v0 to v1 standard content blocks
241
+ elif (
242
+ isinstance(block, dict)
243
+ and block.get("type")
244
+ in {
245
+ "image",
246
+ "audio",
247
+ "file",
248
+ }
249
+ and block.get("source_type") # v1 doesn't have `source_type`
250
+ in {
251
+ "url",
252
+ "base64",
253
+ "id",
254
+ "text",
255
+ }
256
+ ):
257
+ formatted_message = _ensure_message_copy(message, formatted_message)
258
+
259
+ converted_block = _convert_legacy_v0_content_block_to_v1(block)
260
+ _update_content_block(formatted_message, idx, converted_block)
261
+ continue
262
+
263
+ # else, pass through blocks that look like they have v1 format unchanged
264
+
138
265
  formatted_messages.append(formatted_message)
139
266
 
140
267
  return formatted_messages
268
+
269
+
270
+ T = TypeVar("T", bound="BaseMessage")
271
+
272
+
273
+ def _ensure_message_copy(message: T, formatted_message: T) -> T:
274
+ """Create a copy of the message if it hasn't been copied yet."""
275
+ if formatted_message is message:
276
+ formatted_message = message.model_copy()
277
+ # Shallow-copy content list to allow modifications
278
+ formatted_message.content = list(formatted_message.content)
279
+ return formatted_message
280
+
281
+
282
+ def _update_content_block(
283
+ formatted_message: "BaseMessage", idx: int, new_block: Union[ContentBlock, dict]
284
+ ) -> None:
285
+ """Update a content block at the given index, handling type issues."""
286
+ # Type ignore needed because:
287
+ # - `BaseMessage.content` is typed as `Union[str, list[Union[str, dict]]]`
288
+ # - When content is str, indexing fails (index error)
289
+ # - When content is list, the items are `Union[str, dict]` but we're assigning
290
+ # `Union[ContentBlock, dict]` where ContentBlock is richer than dict
291
+ # - This is safe because we only call this when we've verified content is a list and
292
+ # we're doing content block conversions
293
+ formatted_message.content[idx] = new_block # type: ignore[index, assignment]
294
+
295
+
296
+ def _update_message_content_to_blocks(message: T, output_version: str) -> T:
297
+ return message.model_copy(
298
+ update={
299
+ "content": message.content_blocks,
300
+ "response_metadata": {
301
+ **message.response_metadata,
302
+ "output_version": output_version,
303
+ },
304
+ }
305
+ )
@@ -23,6 +23,7 @@ from langchain_core._api import deprecated
23
23
  from langchain_core.caches import BaseCache
24
24
  from langchain_core.callbacks import Callbacks
25
25
  from langchain_core.messages import (
26
+ AIMessage,
26
27
  AnyMessage,
27
28
  BaseMessage,
28
29
  MessageLikeRepresentation,
@@ -85,7 +86,7 @@ def _get_token_ids_default_method(text: str) -> list[int]:
85
86
  LanguageModelInput = Union[PromptValue, str, Sequence[MessageLikeRepresentation]]
86
87
  LanguageModelOutput = Union[BaseMessage, str]
87
88
  LanguageModelLike = Runnable[LanguageModelInput, LanguageModelOutput]
88
- LanguageModelOutputVar = TypeVar("LanguageModelOutputVar", BaseMessage, str)
89
+ LanguageModelOutputVar = TypeVar("LanguageModelOutputVar", AIMessage, str)
89
90
 
90
91
 
91
92
  def _get_verbosity() -> bool: