langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_core/_api/beta_decorator.py +2 -2
- langchain_core/_api/deprecation.py +1 -1
- langchain_core/beta/runnables/context.py +1 -1
- langchain_core/callbacks/base.py +14 -23
- langchain_core/callbacks/file.py +13 -2
- langchain_core/callbacks/manager.py +74 -157
- langchain_core/callbacks/streaming_stdout.py +3 -4
- langchain_core/callbacks/usage.py +2 -12
- langchain_core/chat_history.py +6 -6
- langchain_core/documents/base.py +1 -1
- langchain_core/documents/compressor.py +9 -6
- langchain_core/indexing/base.py +2 -2
- langchain_core/language_models/_utils.py +232 -101
- langchain_core/language_models/base.py +35 -23
- langchain_core/language_models/chat_models.py +248 -54
- langchain_core/language_models/fake_chat_models.py +28 -81
- langchain_core/load/dump.py +3 -4
- langchain_core/messages/__init__.py +30 -24
- langchain_core/messages/ai.py +188 -30
- langchain_core/messages/base.py +164 -25
- langchain_core/messages/block_translators/__init__.py +89 -0
- langchain_core/messages/block_translators/anthropic.py +451 -0
- langchain_core/messages/block_translators/bedrock.py +45 -0
- langchain_core/messages/block_translators/bedrock_converse.py +47 -0
- langchain_core/messages/block_translators/google_genai.py +45 -0
- langchain_core/messages/block_translators/google_vertexai.py +47 -0
- langchain_core/messages/block_translators/groq.py +45 -0
- langchain_core/messages/block_translators/langchain_v0.py +164 -0
- langchain_core/messages/block_translators/ollama.py +45 -0
- langchain_core/messages/block_translators/openai.py +798 -0
- langchain_core/messages/{content_blocks.py → content.py} +303 -278
- langchain_core/messages/human.py +29 -9
- langchain_core/messages/system.py +29 -9
- langchain_core/messages/tool.py +94 -13
- langchain_core/messages/utils.py +34 -234
- langchain_core/output_parsers/base.py +14 -50
- langchain_core/output_parsers/json.py +2 -5
- langchain_core/output_parsers/list.py +2 -7
- langchain_core/output_parsers/openai_functions.py +5 -28
- langchain_core/output_parsers/openai_tools.py +49 -90
- langchain_core/output_parsers/pydantic.py +2 -3
- langchain_core/output_parsers/transform.py +12 -53
- langchain_core/output_parsers/xml.py +9 -17
- langchain_core/prompt_values.py +8 -112
- langchain_core/prompts/chat.py +1 -3
- langchain_core/runnables/base.py +500 -451
- langchain_core/runnables/branch.py +1 -1
- langchain_core/runnables/fallbacks.py +4 -4
- langchain_core/runnables/history.py +1 -1
- langchain_core/runnables/passthrough.py +3 -3
- langchain_core/runnables/retry.py +1 -1
- langchain_core/runnables/router.py +1 -1
- langchain_core/structured_query.py +3 -7
- langchain_core/tools/base.py +14 -41
- langchain_core/tools/convert.py +2 -22
- langchain_core/tools/retriever.py +1 -8
- langchain_core/tools/structured.py +2 -10
- langchain_core/tracers/_streaming.py +6 -7
- langchain_core/tracers/base.py +7 -14
- langchain_core/tracers/core.py +4 -27
- langchain_core/tracers/event_stream.py +4 -15
- langchain_core/tracers/langchain.py +3 -14
- langchain_core/tracers/log_stream.py +2 -3
- langchain_core/utils/_merge.py +45 -7
- langchain_core/utils/function_calling.py +22 -9
- langchain_core/utils/utils.py +29 -0
- langchain_core/version.py +1 -1
- {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/METADATA +7 -9
- {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/RECORD +71 -64
- langchain_core/v1/__init__.py +0 -1
- langchain_core/v1/chat_models.py +0 -1047
- langchain_core/v1/messages.py +0 -755
- {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/WHEEL +0 -0
- {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0a2.dist-info}/entry_points.txt +0 -0
|
@@ -4,16 +4,14 @@ import threading
|
|
|
4
4
|
from collections.abc import Generator
|
|
5
5
|
from contextlib import contextmanager
|
|
6
6
|
from contextvars import ContextVar
|
|
7
|
-
from typing import Any, Optional
|
|
7
|
+
from typing import Any, Optional
|
|
8
8
|
|
|
9
9
|
from typing_extensions import override
|
|
10
10
|
|
|
11
11
|
from langchain_core.callbacks import BaseCallbackHandler
|
|
12
12
|
from langchain_core.messages import AIMessage
|
|
13
13
|
from langchain_core.messages.ai import UsageMetadata, add_usage
|
|
14
|
-
from langchain_core.messages.utils import convert_from_v1_message
|
|
15
14
|
from langchain_core.outputs import ChatGeneration, LLMResult
|
|
16
|
-
from langchain_core.v1.messages import AIMessage as AIMessageV1
|
|
17
15
|
|
|
18
16
|
|
|
19
17
|
class UsageMetadataCallbackHandler(BaseCallbackHandler):
|
|
@@ -60,17 +58,9 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
|
|
|
60
58
|
return str(self.usage_metadata)
|
|
61
59
|
|
|
62
60
|
@override
|
|
63
|
-
def on_llm_end(
|
|
64
|
-
self, response: Union[LLMResult, AIMessageV1], **kwargs: Any
|
|
65
|
-
) -> None:
|
|
61
|
+
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
|
66
62
|
"""Collect token usage."""
|
|
67
63
|
# Check for usage_metadata (langchain-core >= 0.2.2)
|
|
68
|
-
if isinstance(response, AIMessageV1):
|
|
69
|
-
response = LLMResult(
|
|
70
|
-
generations=[
|
|
71
|
-
[ChatGeneration(message=convert_from_v1_message(response))]
|
|
72
|
-
]
|
|
73
|
-
)
|
|
74
64
|
try:
|
|
75
65
|
generation = response.generations[0][0]
|
|
76
66
|
except IndexError:
|
langchain_core/chat_history.py
CHANGED
|
@@ -117,9 +117,9 @@ class BaseChatMessageHistory(ABC):
|
|
|
117
117
|
def add_user_message(self, message: Union[HumanMessage, str]) -> None:
|
|
118
118
|
"""Convenience method for adding a human message string to the store.
|
|
119
119
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
120
|
+
.. note::
|
|
121
|
+
This is a convenience method. Code should favor the bulk ``add_messages``
|
|
122
|
+
interface instead to save on round-trips to the persistence layer.
|
|
123
123
|
|
|
124
124
|
This method may be deprecated in a future release.
|
|
125
125
|
|
|
@@ -134,9 +134,9 @@ class BaseChatMessageHistory(ABC):
|
|
|
134
134
|
def add_ai_message(self, message: Union[AIMessage, str]) -> None:
|
|
135
135
|
"""Convenience method for adding an AI message string to the store.
|
|
136
136
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
137
|
+
.. note::
|
|
138
|
+
This is a convenience method. Code should favor the bulk ``add_messages``
|
|
139
|
+
interface instead to save on round-trips to the persistence layer.
|
|
140
140
|
|
|
141
141
|
This method may be deprecated in a future release.
|
|
142
142
|
|
langchain_core/documents/base.py
CHANGED
|
@@ -277,7 +277,7 @@ class Document(BaseMedia):
|
|
|
277
277
|
"""Pass page_content in as positional or named arg."""
|
|
278
278
|
# my-py is complaining that page_content is not defined on the base class.
|
|
279
279
|
# Here, we're relying on pydantic base class to handle the validation.
|
|
280
|
-
super().__init__(page_content=page_content, **kwargs)
|
|
280
|
+
super().__init__(page_content=page_content, **kwargs)
|
|
281
281
|
|
|
282
282
|
@classmethod
|
|
283
283
|
def is_lc_serializable(cls) -> bool:
|
|
@@ -19,17 +19,18 @@ if TYPE_CHECKING:
|
|
|
19
19
|
class BaseDocumentCompressor(BaseModel, ABC):
|
|
20
20
|
"""Base class for document compressors.
|
|
21
21
|
|
|
22
|
-
This abstraction is primarily used for
|
|
23
|
-
post-processing of retrieved documents.
|
|
22
|
+
This abstraction is primarily used for post-processing of retrieved documents.
|
|
24
23
|
|
|
25
24
|
Documents matching a given query are first retrieved.
|
|
25
|
+
|
|
26
26
|
Then the list of documents can be further processed.
|
|
27
27
|
|
|
28
|
-
For example, one could re-rank the retrieved documents
|
|
29
|
-
|
|
28
|
+
For example, one could re-rank the retrieved documents using an LLM.
|
|
29
|
+
|
|
30
|
+
.. note::
|
|
31
|
+
Users should favor using a RunnableLambda instead of sub-classing from this
|
|
32
|
+
interface.
|
|
30
33
|
|
|
31
|
-
**Note** users should favor using a RunnableLambda
|
|
32
|
-
instead of sub-classing from this interface.
|
|
33
34
|
"""
|
|
34
35
|
|
|
35
36
|
@abstractmethod
|
|
@@ -48,6 +49,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
|
|
48
49
|
|
|
49
50
|
Returns:
|
|
50
51
|
The compressed documents.
|
|
52
|
+
|
|
51
53
|
"""
|
|
52
54
|
|
|
53
55
|
async def acompress_documents(
|
|
@@ -65,6 +67,7 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
|
|
65
67
|
|
|
66
68
|
Returns:
|
|
67
69
|
The compressed documents.
|
|
70
|
+
|
|
68
71
|
"""
|
|
69
72
|
return await run_in_executor(
|
|
70
73
|
None, self.compress_documents, documents, query, callbacks
|
langchain_core/indexing/base.py
CHANGED
|
@@ -488,8 +488,8 @@ class DeleteResponse(TypedDict, total=False):
|
|
|
488
488
|
failed: Sequence[str]
|
|
489
489
|
"""The IDs that failed to be deleted.
|
|
490
490
|
|
|
491
|
-
|
|
492
|
-
|
|
491
|
+
.. warning::
|
|
492
|
+
Deleting an ID that does not exist is **NOT** considered a failure.
|
|
493
493
|
"""
|
|
494
494
|
|
|
495
495
|
num_failed: int
|
|
@@ -1,14 +1,30 @@
|
|
|
1
|
-
import copy
|
|
2
1
|
import re
|
|
3
2
|
from collections.abc import Sequence
|
|
4
|
-
from typing import
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
from typing import (
|
|
4
|
+
TYPE_CHECKING,
|
|
5
|
+
Literal,
|
|
6
|
+
Optional,
|
|
7
|
+
TypedDict,
|
|
8
|
+
TypeVar,
|
|
9
|
+
Union,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from langchain_core.messages import BaseMessage
|
|
14
|
+
from langchain_core.messages.content import (
|
|
15
|
+
ContentBlock,
|
|
16
|
+
)
|
|
8
17
|
|
|
9
18
|
|
|
10
19
|
def _is_openai_data_block(block: dict) -> bool:
|
|
11
|
-
"""Check if the block contains multimodal data in OpenAI Chat Completions format.
|
|
20
|
+
"""Check if the block contains multimodal data in OpenAI Chat Completions format.
|
|
21
|
+
|
|
22
|
+
Supports both data and ID-style blocks (e.g. ``'file_data'`` and ``'file_id'``)
|
|
23
|
+
|
|
24
|
+
If additional keys are present, they are ignored / will not affect outcome as long
|
|
25
|
+
as the required keys are present and valid.
|
|
26
|
+
|
|
27
|
+
"""
|
|
12
28
|
if block.get("type") == "image_url":
|
|
13
29
|
if (
|
|
14
30
|
(set(block.keys()) <= {"type", "image_url", "detail"})
|
|
@@ -17,29 +33,43 @@ def _is_openai_data_block(block: dict) -> bool:
|
|
|
17
33
|
):
|
|
18
34
|
url = image_url.get("url")
|
|
19
35
|
if isinstance(url, str):
|
|
36
|
+
# Required per OpenAI spec
|
|
37
|
+
return True
|
|
38
|
+
# Ignore `'detail'` since it's optional and specific to OpenAI
|
|
39
|
+
|
|
40
|
+
elif block.get("type") == "input_audio":
|
|
41
|
+
if (audio := block.get("input_audio")) and isinstance(audio, dict):
|
|
42
|
+
audio_data = audio.get("data")
|
|
43
|
+
audio_format = audio.get("format")
|
|
44
|
+
# Both required per OpenAI spec
|
|
45
|
+
if isinstance(audio_data, str) and isinstance(audio_format, str):
|
|
20
46
|
return True
|
|
21
47
|
|
|
22
48
|
elif block.get("type") == "file":
|
|
23
49
|
if (file := block.get("file")) and isinstance(file, dict):
|
|
24
50
|
file_data = file.get("file_data")
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
elif block.get("type") == "input_audio":
|
|
29
|
-
if (input_audio := block.get("input_audio")) and isinstance(input_audio, dict):
|
|
30
|
-
audio_data = input_audio.get("data")
|
|
31
|
-
audio_format = input_audio.get("format")
|
|
32
|
-
if isinstance(audio_data, str) and isinstance(audio_format, str):
|
|
51
|
+
file_id = file.get("file_id")
|
|
52
|
+
# Files can be either base64-encoded or pre-uploaded with an ID
|
|
53
|
+
if isinstance(file_data, str) or isinstance(file_id, str):
|
|
33
54
|
return True
|
|
34
55
|
|
|
35
56
|
else:
|
|
36
57
|
return False
|
|
37
58
|
|
|
59
|
+
# Has no `'type'` key
|
|
38
60
|
return False
|
|
39
61
|
|
|
40
62
|
|
|
41
|
-
|
|
42
|
-
""
|
|
63
|
+
class ParsedDataUri(TypedDict):
|
|
64
|
+
source_type: Literal["base64"]
|
|
65
|
+
data: str
|
|
66
|
+
mime_type: str
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _parse_data_uri(uri: str) -> Optional[ParsedDataUri]:
|
|
70
|
+
"""Parse a data URI into its components.
|
|
71
|
+
|
|
72
|
+
If parsing fails, return None. If either MIME type or data is missing, return None.
|
|
43
73
|
|
|
44
74
|
Example:
|
|
45
75
|
|
|
@@ -59,118 +89,219 @@ def _parse_data_uri(uri: str) -> Optional[dict]:
|
|
|
59
89
|
match = re.match(regex, uri)
|
|
60
90
|
if match is None:
|
|
61
91
|
return None
|
|
92
|
+
|
|
93
|
+
mime_type = match.group("mime_type")
|
|
94
|
+
data = match.group("data")
|
|
95
|
+
if not mime_type or not data:
|
|
96
|
+
return None
|
|
97
|
+
|
|
62
98
|
return {
|
|
63
99
|
"source_type": "base64",
|
|
64
|
-
"data":
|
|
65
|
-
"mime_type":
|
|
100
|
+
"data": data,
|
|
101
|
+
"mime_type": mime_type,
|
|
66
102
|
}
|
|
67
103
|
|
|
68
104
|
|
|
69
|
-
def
|
|
70
|
-
""
|
|
105
|
+
def _normalize_messages(
|
|
106
|
+
messages: Sequence["BaseMessage"],
|
|
107
|
+
) -> list["BaseMessage"]:
|
|
108
|
+
"""Normalize message formats to LangChain v1 standard content blocks.
|
|
71
109
|
|
|
72
|
-
|
|
110
|
+
Chat models already implement support for:
|
|
111
|
+
- Images in OpenAI Chat Completions format
|
|
112
|
+
These will be passed through unchanged
|
|
113
|
+
- LangChain v1 standard content blocks
|
|
73
114
|
|
|
74
|
-
|
|
75
|
-
|
|
115
|
+
This function extends support to:
|
|
116
|
+
- `Audio <https://platform.openai.com/docs/api-reference/chat/create>`__ and
|
|
117
|
+
`file <https://platform.openai.com/docs/api-reference/files>`__ data in OpenAI
|
|
118
|
+
Chat Completions format
|
|
119
|
+
- Images are technically supported but we expect chat models to handle them
|
|
120
|
+
directly; this may change in the future
|
|
121
|
+
- LangChain v0 standard content blocks for backward compatibility
|
|
76
122
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
return block
|
|
95
|
-
|
|
96
|
-
if block["type"] == "input_audio":
|
|
97
|
-
data = block["input_audio"].get("data")
|
|
98
|
-
audio_format = block["input_audio"].get("format")
|
|
99
|
-
if data and audio_format:
|
|
100
|
-
return {
|
|
101
|
-
"type": "audio",
|
|
102
|
-
"source_type": "base64",
|
|
103
|
-
"data": data,
|
|
104
|
-
"mime_type": f"audio/{audio_format}",
|
|
123
|
+
.. versionchanged:: 1.0.0
|
|
124
|
+
In previous versions, this function returned messages in LangChain v0 format.
|
|
125
|
+
Now, it returns messages in LangChain v1 format, which upgraded chat models now
|
|
126
|
+
expect to receive when passing back in message history. For backward
|
|
127
|
+
compatibility, this function will convert v0 message content to v1 format.
|
|
128
|
+
|
|
129
|
+
.. dropdown:: v0 Content Block Schemas
|
|
130
|
+
|
|
131
|
+
``URLContentBlock``:
|
|
132
|
+
|
|
133
|
+
.. codeblock::
|
|
134
|
+
|
|
135
|
+
{
|
|
136
|
+
mime_type: NotRequired[str]
|
|
137
|
+
type: Literal['image', 'audio', 'file'],
|
|
138
|
+
source_type: Literal['url'],
|
|
139
|
+
url: str,
|
|
105
140
|
}
|
|
106
|
-
return block
|
|
107
141
|
|
|
108
|
-
|
|
142
|
+
``Base64ContentBlock``:
|
|
143
|
+
|
|
144
|
+
.. codeblock::
|
|
145
|
+
|
|
146
|
+
{
|
|
147
|
+
mime_type: NotRequired[str]
|
|
148
|
+
type: Literal['image', 'audio', 'file'],
|
|
149
|
+
source_type: Literal['base64'],
|
|
150
|
+
data: str,
|
|
151
|
+
}
|
|
109
152
|
|
|
153
|
+
``IDContentBlock``:
|
|
110
154
|
|
|
111
|
-
|
|
112
|
-
|
|
155
|
+
(In practice, this was never used)
|
|
156
|
+
|
|
157
|
+
.. codeblock::
|
|
158
|
+
|
|
159
|
+
{
|
|
160
|
+
type: Literal['image', 'audio', 'file'],
|
|
161
|
+
source_type: Literal['id'],
|
|
162
|
+
id: str,
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
``PlainTextContentBlock``:
|
|
166
|
+
|
|
167
|
+
.. codeblock::
|
|
168
|
+
|
|
169
|
+
{
|
|
170
|
+
mime_type: NotRequired[str]
|
|
171
|
+
type: Literal['file'],
|
|
172
|
+
source_type: Literal['text'],
|
|
173
|
+
url: str,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
If a v1 message is passed in, it will be returned as-is, meaning it is safe to
|
|
177
|
+
always pass in v1 messages to this function for assurance.
|
|
178
|
+
|
|
179
|
+
For posterity, here are the OpenAI Chat Completions schemas we expect:
|
|
180
|
+
|
|
181
|
+
Chat Completions image. Can be URL-based or base64-encoded. Supports MIME types
|
|
182
|
+
png, jpeg/jpg, webp, static gif:
|
|
183
|
+
{
|
|
184
|
+
"type": Literal['image_url'],
|
|
185
|
+
"image_url": {
|
|
186
|
+
"url": Union["data:$MIME_TYPE;base64,$BASE64_ENCODED_IMAGE", "$IMAGE_URL"],
|
|
187
|
+
"detail": Literal['low', 'high', 'auto'] = 'auto', # Supported by OpenAI
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
Chat Completions audio:
|
|
192
|
+
{
|
|
193
|
+
"type": Literal['input_audio'],
|
|
194
|
+
"input_audio": {
|
|
195
|
+
"format": Literal['wav', 'mp3'],
|
|
196
|
+
"data": str = "$BASE64_ENCODED_AUDIO",
|
|
197
|
+
},
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
Chat Completions files: either base64 or pre-uploaded file ID
|
|
201
|
+
{
|
|
202
|
+
"type": Literal['file'],
|
|
203
|
+
"file": Union[
|
|
204
|
+
{
|
|
205
|
+
"filename": Optional[str] = "$FILENAME",
|
|
206
|
+
"file_data": str = "$BASE64_ENCODED_FILE",
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
"file_id": str = "$FILE_ID", # For pre-uploaded files to OpenAI
|
|
210
|
+
},
|
|
211
|
+
],
|
|
212
|
+
}
|
|
113
213
|
|
|
114
|
-
Chat models implement support for images in OpenAI Chat Completions format, as well
|
|
115
|
-
as other multimodal data as standard data blocks. This function extends support to
|
|
116
|
-
audio and file data in OpenAI Chat Completions format by converting them to standard
|
|
117
|
-
data blocks.
|
|
118
214
|
"""
|
|
215
|
+
from langchain_core.messages.block_translators.langchain_v0 import (
|
|
216
|
+
_convert_legacy_v0_content_block_to_v1,
|
|
217
|
+
)
|
|
218
|
+
from langchain_core.messages.block_translators.openai import (
|
|
219
|
+
_convert_openai_format_to_data_block,
|
|
220
|
+
)
|
|
221
|
+
|
|
119
222
|
formatted_messages = []
|
|
120
223
|
for message in messages:
|
|
224
|
+
# We preserve input messages - the caller may reuse them elsewhere and expects
|
|
225
|
+
# them to remain unchanged. We only create a copy if we need to translate.
|
|
121
226
|
formatted_message = message
|
|
227
|
+
|
|
122
228
|
if isinstance(message.content, list):
|
|
123
229
|
for idx, block in enumerate(message.content):
|
|
230
|
+
# OpenAI Chat Completions multimodal data blocks to v1 standard
|
|
124
231
|
if (
|
|
125
232
|
isinstance(block, dict)
|
|
126
|
-
|
|
127
|
-
#
|
|
128
|
-
# standard data block format)
|
|
129
|
-
and block.get("type") in {"file", "input_audio"}
|
|
233
|
+
and block.get("type") in {"input_audio", "file"}
|
|
234
|
+
# Discriminate between OpenAI/LC format since they share `'type'`
|
|
130
235
|
and _is_openai_data_block(block)
|
|
131
236
|
):
|
|
132
|
-
|
|
133
|
-
formatted_message = message.model_copy()
|
|
134
|
-
# Also shallow-copy content
|
|
135
|
-
formatted_message.content = list(formatted_message.content)
|
|
136
|
-
|
|
137
|
-
formatted_message.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
|
|
138
|
-
_convert_openai_format_to_data_block(block)
|
|
139
|
-
)
|
|
140
|
-
formatted_messages.append(formatted_message)
|
|
141
|
-
|
|
142
|
-
return formatted_messages
|
|
237
|
+
formatted_message = _ensure_message_copy(message, formatted_message)
|
|
143
238
|
|
|
239
|
+
converted_block = _convert_openai_format_to_data_block(block)
|
|
240
|
+
_update_content_block(formatted_message, idx, converted_block)
|
|
144
241
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
Chat models implement support for images in OpenAI Chat Completions format, as well
|
|
149
|
-
as other multimodal data as standard data blocks. This function extends support to
|
|
150
|
-
audio and file data in OpenAI Chat Completions format by converting them to standard
|
|
151
|
-
data blocks.
|
|
152
|
-
"""
|
|
153
|
-
formatted_messages = []
|
|
154
|
-
for message in messages:
|
|
155
|
-
formatted_message = message
|
|
156
|
-
if isinstance(message.content, list):
|
|
157
|
-
for idx, block in enumerate(message.content):
|
|
158
|
-
if (
|
|
242
|
+
# Convert multimodal LangChain v0 to v1 standard content blocks
|
|
243
|
+
elif (
|
|
159
244
|
isinstance(block, dict)
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
245
|
+
and block.get("type")
|
|
246
|
+
in {
|
|
247
|
+
"image",
|
|
248
|
+
"audio",
|
|
249
|
+
"file",
|
|
250
|
+
}
|
|
251
|
+
and block.get("source_type") # v1 doesn't have `source_type`
|
|
252
|
+
in {
|
|
253
|
+
"url",
|
|
254
|
+
"base64",
|
|
255
|
+
"id",
|
|
256
|
+
"text",
|
|
257
|
+
}
|
|
165
258
|
):
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
259
|
+
formatted_message = _ensure_message_copy(message, formatted_message)
|
|
260
|
+
|
|
261
|
+
converted_block = _convert_legacy_v0_content_block_to_v1(block)
|
|
262
|
+
_update_content_block(formatted_message, idx, converted_block)
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
# else, pass through blocks that look like they have v1 format unchanged
|
|
266
|
+
|
|
174
267
|
formatted_messages.append(formatted_message)
|
|
175
268
|
|
|
176
269
|
return formatted_messages
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
T = TypeVar("T", bound="BaseMessage")
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _ensure_message_copy(message: T, formatted_message: T) -> T:
|
|
276
|
+
"""Create a copy of the message if it hasn't been copied yet."""
|
|
277
|
+
if formatted_message is message:
|
|
278
|
+
formatted_message = message.model_copy()
|
|
279
|
+
# Shallow-copy content list to allow modifications
|
|
280
|
+
formatted_message.content = list(formatted_message.content)
|
|
281
|
+
return formatted_message
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _update_content_block(
|
|
285
|
+
formatted_message: "BaseMessage", idx: int, new_block: Union[ContentBlock, dict]
|
|
286
|
+
) -> None:
|
|
287
|
+
"""Update a content block at the given index, handling type issues."""
|
|
288
|
+
# Type ignore needed because:
|
|
289
|
+
# - `BaseMessage.content` is typed as `Union[str, list[Union[str, dict]]]`
|
|
290
|
+
# - When content is str, indexing fails (index error)
|
|
291
|
+
# - When content is list, the items are `Union[str, dict]` but we're assigning
|
|
292
|
+
# `Union[ContentBlock, dict]` where ContentBlock is richer than dict
|
|
293
|
+
# - This is safe because we only call this when we've verified content is a list and
|
|
294
|
+
# we're doing content block conversions
|
|
295
|
+
formatted_message.content[idx] = new_block # type: ignore[index, assignment]
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _update_message_content_to_blocks(message: T, output_version: str) -> T:
|
|
299
|
+
return message.model_copy(
|
|
300
|
+
update={
|
|
301
|
+
"content": message.content_blocks,
|
|
302
|
+
"response_metadata": {
|
|
303
|
+
**message.response_metadata,
|
|
304
|
+
"output_version": output_version,
|
|
305
|
+
},
|
|
306
|
+
}
|
|
307
|
+
)
|