langchain-core 1.0.0a5__py3-none-any.whl → 1.0.0a7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/_api/__init__.py +3 -3
- langchain_core/_api/beta_decorator.py +6 -6
- langchain_core/_api/deprecation.py +21 -29
- langchain_core/_api/path.py +3 -6
- langchain_core/_import_utils.py +2 -3
- langchain_core/agents.py +10 -11
- langchain_core/caches.py +7 -7
- langchain_core/callbacks/base.py +91 -91
- langchain_core/callbacks/file.py +11 -11
- langchain_core/callbacks/manager.py +86 -89
- langchain_core/callbacks/stdout.py +8 -8
- langchain_core/callbacks/usage.py +4 -4
- langchain_core/chat_history.py +1 -37
- langchain_core/document_loaders/base.py +2 -2
- langchain_core/document_loaders/langsmith.py +15 -15
- langchain_core/documents/base.py +16 -16
- langchain_core/documents/compressor.py +4 -4
- langchain_core/example_selectors/length_based.py +1 -1
- langchain_core/example_selectors/semantic_similarity.py +17 -19
- langchain_core/exceptions.py +3 -3
- langchain_core/globals.py +3 -151
- langchain_core/indexing/api.py +44 -43
- langchain_core/indexing/base.py +30 -30
- langchain_core/indexing/in_memory.py +3 -3
- langchain_core/language_models/_utils.py +5 -7
- langchain_core/language_models/base.py +18 -132
- langchain_core/language_models/chat_models.py +118 -227
- langchain_core/language_models/fake.py +11 -11
- langchain_core/language_models/fake_chat_models.py +35 -29
- langchain_core/language_models/llms.py +91 -201
- langchain_core/load/dump.py +1 -1
- langchain_core/load/load.py +11 -12
- langchain_core/load/mapping.py +2 -4
- langchain_core/load/serializable.py +2 -4
- langchain_core/messages/ai.py +17 -20
- langchain_core/messages/base.py +28 -26
- langchain_core/messages/block_translators/__init__.py +17 -7
- langchain_core/messages/block_translators/anthropic.py +3 -3
- langchain_core/messages/block_translators/bedrock_converse.py +2 -2
- langchain_core/messages/block_translators/google_genai.py +502 -20
- langchain_core/messages/block_translators/langchain_v0.py +2 -2
- langchain_core/messages/block_translators/openai.py +6 -6
- langchain_core/messages/content.py +120 -124
- langchain_core/messages/human.py +7 -7
- langchain_core/messages/system.py +7 -7
- langchain_core/messages/tool.py +24 -24
- langchain_core/messages/utils.py +67 -79
- langchain_core/output_parsers/base.py +12 -14
- langchain_core/output_parsers/json.py +4 -4
- langchain_core/output_parsers/list.py +3 -5
- langchain_core/output_parsers/openai_functions.py +3 -3
- langchain_core/output_parsers/openai_tools.py +3 -3
- langchain_core/output_parsers/pydantic.py +2 -2
- langchain_core/output_parsers/transform.py +13 -15
- langchain_core/output_parsers/xml.py +7 -9
- langchain_core/outputs/chat_generation.py +4 -4
- langchain_core/outputs/chat_result.py +1 -3
- langchain_core/outputs/generation.py +2 -2
- langchain_core/outputs/llm_result.py +5 -5
- langchain_core/prompts/__init__.py +1 -5
- langchain_core/prompts/base.py +10 -15
- langchain_core/prompts/chat.py +31 -82
- langchain_core/prompts/dict.py +2 -2
- langchain_core/prompts/few_shot.py +5 -5
- langchain_core/prompts/few_shot_with_templates.py +4 -4
- langchain_core/prompts/loading.py +3 -5
- langchain_core/prompts/prompt.py +4 -16
- langchain_core/prompts/string.py +2 -1
- langchain_core/prompts/structured.py +16 -23
- langchain_core/rate_limiters.py +3 -4
- langchain_core/retrievers.py +14 -14
- langchain_core/runnables/base.py +938 -1054
- langchain_core/runnables/branch.py +36 -40
- langchain_core/runnables/config.py +27 -35
- langchain_core/runnables/configurable.py +108 -124
- langchain_core/runnables/fallbacks.py +76 -72
- langchain_core/runnables/graph.py +39 -45
- langchain_core/runnables/graph_ascii.py +9 -11
- langchain_core/runnables/graph_mermaid.py +18 -19
- langchain_core/runnables/graph_png.py +8 -9
- langchain_core/runnables/history.py +114 -127
- langchain_core/runnables/passthrough.py +113 -139
- langchain_core/runnables/retry.py +43 -48
- langchain_core/runnables/router.py +23 -28
- langchain_core/runnables/schema.py +42 -44
- langchain_core/runnables/utils.py +28 -31
- langchain_core/stores.py +9 -13
- langchain_core/structured_query.py +8 -8
- langchain_core/tools/base.py +63 -115
- langchain_core/tools/convert.py +31 -35
- langchain_core/tools/render.py +1 -1
- langchain_core/tools/retriever.py +4 -4
- langchain_core/tools/simple.py +13 -17
- langchain_core/tools/structured.py +12 -15
- langchain_core/tracers/base.py +62 -64
- langchain_core/tracers/context.py +17 -35
- langchain_core/tracers/core.py +49 -53
- langchain_core/tracers/evaluation.py +11 -11
- langchain_core/tracers/event_stream.py +58 -60
- langchain_core/tracers/langchain.py +13 -13
- langchain_core/tracers/log_stream.py +22 -24
- langchain_core/tracers/root_listeners.py +14 -14
- langchain_core/tracers/run_collector.py +2 -4
- langchain_core/tracers/schemas.py +8 -8
- langchain_core/tracers/stdout.py +2 -1
- langchain_core/utils/__init__.py +0 -3
- langchain_core/utils/_merge.py +2 -2
- langchain_core/utils/aiter.py +24 -28
- langchain_core/utils/env.py +4 -4
- langchain_core/utils/function_calling.py +31 -41
- langchain_core/utils/html.py +3 -4
- langchain_core/utils/input.py +3 -3
- langchain_core/utils/iter.py +15 -19
- langchain_core/utils/json.py +3 -2
- langchain_core/utils/json_schema.py +6 -6
- langchain_core/utils/mustache.py +3 -5
- langchain_core/utils/pydantic.py +16 -18
- langchain_core/utils/usage.py +1 -1
- langchain_core/utils/utils.py +29 -29
- langchain_core/vectorstores/base.py +18 -21
- langchain_core/vectorstores/in_memory.py +14 -87
- langchain_core/vectorstores/utils.py +2 -2
- langchain_core/version.py +1 -1
- {langchain_core-1.0.0a5.dist-info → langchain_core-1.0.0a7.dist-info}/METADATA +10 -31
- langchain_core-1.0.0a7.dist-info/RECORD +176 -0
- {langchain_core-1.0.0a5.dist-info → langchain_core-1.0.0a7.dist-info}/WHEEL +1 -1
- langchain_core/messages/block_translators/ollama.py +0 -47
- langchain_core/prompts/pipeline.py +0 -138
- langchain_core/tracers/langchain_v1.py +0 -31
- langchain_core/utils/loading.py +0 -35
- langchain_core-1.0.0a5.dist-info/RECORD +0 -181
- langchain_core-1.0.0a5.dist-info/entry_points.txt +0 -4
|
@@ -1,35 +1,517 @@
|
|
|
1
1
|
"""Derivations of standard content blocks from Google (GenAI) content."""
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
import base64
|
|
4
|
+
import re
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from typing import Any, cast
|
|
4
7
|
|
|
5
8
|
from langchain_core.messages import AIMessage, AIMessageChunk
|
|
6
9
|
from langchain_core.messages import content as types
|
|
10
|
+
from langchain_core.messages.content import Citation, create_citation
|
|
7
11
|
|
|
8
|
-
WARNED = False
|
|
9
12
|
|
|
13
|
+
def _bytes_to_b64_str(bytes_: bytes) -> str:
|
|
14
|
+
"""Convert bytes to base64 encoded string."""
|
|
15
|
+
return base64.b64encode(bytes_).decode("utf-8")
|
|
10
16
|
|
|
11
|
-
|
|
17
|
+
|
|
18
|
+
def translate_grounding_metadata_to_citations(
|
|
19
|
+
grounding_metadata: dict[str, Any],
|
|
20
|
+
) -> list[Citation]:
|
|
21
|
+
"""Translate Google AI grounding metadata to LangChain Citations.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
grounding_metadata: Google AI grounding metadata containing web search
|
|
25
|
+
queries, grounding chunks, and grounding supports.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
List of Citation content blocks derived from the grounding metadata.
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
>>> metadata = {
|
|
32
|
+
... "web_search_queries": ["UEFA Euro 2024 winner"],
|
|
33
|
+
... "grounding_chunks": [
|
|
34
|
+
... {
|
|
35
|
+
... "web": {
|
|
36
|
+
... "uri": "https://uefa.com/euro2024",
|
|
37
|
+
... "title": "UEFA Euro 2024 Results",
|
|
38
|
+
... }
|
|
39
|
+
... }
|
|
40
|
+
... ],
|
|
41
|
+
... "grounding_supports": [
|
|
42
|
+
... {
|
|
43
|
+
... "segment": {
|
|
44
|
+
... "start_index": 0,
|
|
45
|
+
... "end_index": 47,
|
|
46
|
+
... "text": "Spain won the UEFA Euro 2024 championship",
|
|
47
|
+
... },
|
|
48
|
+
... "grounding_chunk_indices": [0],
|
|
49
|
+
... }
|
|
50
|
+
... ],
|
|
51
|
+
... }
|
|
52
|
+
>>> citations = translate_grounding_metadata_to_citations(metadata)
|
|
53
|
+
>>> len(citations)
|
|
54
|
+
1
|
|
55
|
+
>>> citations[0]["url"]
|
|
56
|
+
'https://uefa.com/euro2024'
|
|
57
|
+
"""
|
|
58
|
+
if not grounding_metadata:
|
|
59
|
+
return []
|
|
60
|
+
|
|
61
|
+
grounding_chunks = grounding_metadata.get("grounding_chunks", [])
|
|
62
|
+
grounding_supports = grounding_metadata.get("grounding_supports", [])
|
|
63
|
+
web_search_queries = grounding_metadata.get("web_search_queries", [])
|
|
64
|
+
|
|
65
|
+
citations: list[Citation] = []
|
|
66
|
+
|
|
67
|
+
for support in grounding_supports:
|
|
68
|
+
segment = support.get("segment", {})
|
|
69
|
+
chunk_indices = support.get("grounding_chunk_indices", [])
|
|
70
|
+
|
|
71
|
+
start_index = segment.get("start_index")
|
|
72
|
+
end_index = segment.get("end_index")
|
|
73
|
+
cited_text = segment.get("text")
|
|
74
|
+
|
|
75
|
+
# Create a citation for each referenced chunk
|
|
76
|
+
for chunk_index in chunk_indices:
|
|
77
|
+
if chunk_index < len(grounding_chunks):
|
|
78
|
+
chunk = grounding_chunks[chunk_index]
|
|
79
|
+
web_info = chunk.get("web", {})
|
|
80
|
+
|
|
81
|
+
citation = create_citation(
|
|
82
|
+
url=web_info.get("uri"),
|
|
83
|
+
title=web_info.get("title"),
|
|
84
|
+
start_index=start_index,
|
|
85
|
+
end_index=end_index,
|
|
86
|
+
cited_text=cited_text,
|
|
87
|
+
extras={
|
|
88
|
+
"google_ai_metadata": {
|
|
89
|
+
"web_search_queries": web_search_queries,
|
|
90
|
+
"grounding_chunk_index": chunk_index,
|
|
91
|
+
"confidence_scores": support.get("confidence_scores", []),
|
|
92
|
+
}
|
|
93
|
+
},
|
|
94
|
+
)
|
|
95
|
+
citations.append(citation)
|
|
96
|
+
|
|
97
|
+
return citations
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _convert_to_v1_from_genai_input(
|
|
101
|
+
content: list[types.ContentBlock],
|
|
102
|
+
) -> list[types.ContentBlock]:
|
|
103
|
+
"""Convert Google GenAI format blocks to v1 format.
|
|
104
|
+
|
|
105
|
+
Called when message isn't an `AIMessage` or `model_provider` isn't set on
|
|
106
|
+
`response_metadata`.
|
|
107
|
+
|
|
108
|
+
During the `.content_blocks` parsing process, we wrap blocks not recognized as a v1
|
|
109
|
+
block as a ``'non_standard'`` block with the original block stored in the ``value``
|
|
110
|
+
field. This function attempts to unpack those blocks and convert any blocks that
|
|
111
|
+
might be GenAI format to v1 ContentBlocks.
|
|
112
|
+
|
|
113
|
+
If conversion fails, the block is left as a ``'non_standard'`` block.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
content: List of content blocks to process.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Updated list with GenAI blocks converted to v1 format.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
def _iter_blocks() -> Iterable[types.ContentBlock]:
|
|
123
|
+
blocks: list[dict[str, Any]] = [
|
|
124
|
+
cast("dict[str, Any]", block)
|
|
125
|
+
if block.get("type") != "non_standard"
|
|
126
|
+
else block["value"] # type: ignore[typeddict-item] # this is only non-standard blocks
|
|
127
|
+
for block in content
|
|
128
|
+
]
|
|
129
|
+
for block in blocks:
|
|
130
|
+
num_keys = len(block)
|
|
131
|
+
block_type = block.get("type")
|
|
132
|
+
|
|
133
|
+
if num_keys == 1 and (text := block.get("text")):
|
|
134
|
+
# This is probably a TextContentBlock
|
|
135
|
+
yield {"type": "text", "text": text}
|
|
136
|
+
|
|
137
|
+
elif (
|
|
138
|
+
num_keys == 1
|
|
139
|
+
and (document := block.get("document"))
|
|
140
|
+
and isinstance(document, dict)
|
|
141
|
+
and "format" in document
|
|
142
|
+
):
|
|
143
|
+
# Handle document format conversion
|
|
144
|
+
doc_format = document.get("format")
|
|
145
|
+
source = document.get("source", {})
|
|
146
|
+
|
|
147
|
+
if doc_format == "pdf" and "bytes" in source:
|
|
148
|
+
# PDF document with byte data
|
|
149
|
+
file_block: types.FileContentBlock = {
|
|
150
|
+
"type": "file",
|
|
151
|
+
"base64": source["bytes"]
|
|
152
|
+
if isinstance(source["bytes"], str)
|
|
153
|
+
else _bytes_to_b64_str(source["bytes"]),
|
|
154
|
+
"mime_type": "application/pdf",
|
|
155
|
+
}
|
|
156
|
+
# Preserve extra fields
|
|
157
|
+
extras = {
|
|
158
|
+
key: value
|
|
159
|
+
for key, value in document.items()
|
|
160
|
+
if key not in {"format", "source"}
|
|
161
|
+
}
|
|
162
|
+
if extras:
|
|
163
|
+
file_block["extras"] = extras
|
|
164
|
+
yield file_block
|
|
165
|
+
|
|
166
|
+
elif doc_format == "txt" and "text" in source:
|
|
167
|
+
# Text document
|
|
168
|
+
plain_text_block: types.PlainTextContentBlock = {
|
|
169
|
+
"type": "text-plain",
|
|
170
|
+
"text": source["text"],
|
|
171
|
+
"mime_type": "text/plain",
|
|
172
|
+
}
|
|
173
|
+
# Preserve extra fields
|
|
174
|
+
extras = {
|
|
175
|
+
key: value
|
|
176
|
+
for key, value in document.items()
|
|
177
|
+
if key not in {"format", "source"}
|
|
178
|
+
}
|
|
179
|
+
if extras:
|
|
180
|
+
plain_text_block["extras"] = extras
|
|
181
|
+
yield plain_text_block
|
|
182
|
+
|
|
183
|
+
else:
|
|
184
|
+
# Unknown document format
|
|
185
|
+
yield {"type": "non_standard", "value": block}
|
|
186
|
+
|
|
187
|
+
elif (
|
|
188
|
+
num_keys == 1
|
|
189
|
+
and (image := block.get("image"))
|
|
190
|
+
and isinstance(image, dict)
|
|
191
|
+
and "format" in image
|
|
192
|
+
):
|
|
193
|
+
# Handle image format conversion
|
|
194
|
+
img_format = image.get("format")
|
|
195
|
+
source = image.get("source", {})
|
|
196
|
+
|
|
197
|
+
if "bytes" in source:
|
|
198
|
+
# Image with byte data
|
|
199
|
+
image_block: types.ImageContentBlock = {
|
|
200
|
+
"type": "image",
|
|
201
|
+
"base64": source["bytes"]
|
|
202
|
+
if isinstance(source["bytes"], str)
|
|
203
|
+
else _bytes_to_b64_str(source["bytes"]),
|
|
204
|
+
"mime_type": f"image/{img_format}",
|
|
205
|
+
}
|
|
206
|
+
# Preserve extra fields
|
|
207
|
+
extras = {}
|
|
208
|
+
for key, value in image.items():
|
|
209
|
+
if key not in {"format", "source"}:
|
|
210
|
+
extras[key] = value
|
|
211
|
+
if extras:
|
|
212
|
+
image_block["extras"] = extras
|
|
213
|
+
yield image_block
|
|
214
|
+
|
|
215
|
+
else:
|
|
216
|
+
# Image without byte data
|
|
217
|
+
yield {"type": "non_standard", "value": block}
|
|
218
|
+
|
|
219
|
+
elif block_type == "file_data" and "file_uri" in block:
|
|
220
|
+
# Handle FileData URI-based content
|
|
221
|
+
uri_file_block: types.FileContentBlock = {
|
|
222
|
+
"type": "file",
|
|
223
|
+
"url": block["file_uri"],
|
|
224
|
+
}
|
|
225
|
+
if mime_type := block.get("mime_type"):
|
|
226
|
+
uri_file_block["mime_type"] = mime_type
|
|
227
|
+
yield uri_file_block
|
|
228
|
+
|
|
229
|
+
elif block_type == "function_call" and "name" in block:
|
|
230
|
+
# Handle function calls
|
|
231
|
+
tool_call_block: types.ToolCall = {
|
|
232
|
+
"type": "tool_call",
|
|
233
|
+
"name": block["name"],
|
|
234
|
+
"args": block.get("args", {}),
|
|
235
|
+
"id": block.get("id", ""),
|
|
236
|
+
}
|
|
237
|
+
yield tool_call_block
|
|
238
|
+
|
|
239
|
+
elif block_type == "executable_code":
|
|
240
|
+
server_tool_call_input: types.ServerToolCall = {
|
|
241
|
+
"type": "server_tool_call",
|
|
242
|
+
"name": "code_interpreter",
|
|
243
|
+
"args": {
|
|
244
|
+
"code": block.get("executable_code", ""),
|
|
245
|
+
"language": block.get("language", "python"),
|
|
246
|
+
},
|
|
247
|
+
"id": block.get("id", ""),
|
|
248
|
+
}
|
|
249
|
+
yield server_tool_call_input
|
|
250
|
+
|
|
251
|
+
elif block_type == "code_execution_result":
|
|
252
|
+
outcome = block.get("outcome", 1)
|
|
253
|
+
status = "success" if outcome == 1 else "error"
|
|
254
|
+
server_tool_result_input: types.ServerToolResult = {
|
|
255
|
+
"type": "server_tool_result",
|
|
256
|
+
"tool_call_id": block.get("tool_call_id", ""),
|
|
257
|
+
"status": status, # type: ignore[typeddict-item]
|
|
258
|
+
"output": block.get("code_execution_result", ""),
|
|
259
|
+
}
|
|
260
|
+
if outcome is not None:
|
|
261
|
+
server_tool_result_input["extras"] = {"outcome": outcome}
|
|
262
|
+
yield server_tool_result_input
|
|
263
|
+
|
|
264
|
+
elif block.get("type") in types.KNOWN_BLOCK_TYPES:
|
|
265
|
+
# We see a standard block type, so we just cast it, even if
|
|
266
|
+
# we don't fully understand it. This may be dangerous, but
|
|
267
|
+
# it's better than losing information.
|
|
268
|
+
yield cast("types.ContentBlock", block)
|
|
269
|
+
|
|
270
|
+
else:
|
|
271
|
+
# We don't understand this block at all.
|
|
272
|
+
yield {"type": "non_standard", "value": block}
|
|
273
|
+
|
|
274
|
+
return list(_iter_blocks())
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
|
278
|
+
"""Convert Google GenAI message content to v1 format.
|
|
279
|
+
|
|
280
|
+
Calling `.content_blocks` on an `AIMessage` where `response_metadata.model_provider`
|
|
281
|
+
is set to `'google_genai'` will invoke this function to parse the content into
|
|
282
|
+
standard content blocks for returning.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
message: The AIMessage or AIMessageChunk to convert.
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
List of standard content blocks derived from the message content.
|
|
289
|
+
"""
|
|
290
|
+
if isinstance(message.content, str):
|
|
291
|
+
# String content -> TextContentBlock (only add if non-empty in case of audio)
|
|
292
|
+
string_blocks: list[types.ContentBlock] = []
|
|
293
|
+
if message.content:
|
|
294
|
+
string_blocks.append({"type": "text", "text": message.content})
|
|
295
|
+
|
|
296
|
+
# Add any missing tool calls from message.tool_calls field
|
|
297
|
+
content_tool_call_ids = {
|
|
298
|
+
block.get("id")
|
|
299
|
+
for block in string_blocks
|
|
300
|
+
if isinstance(block, dict) and block.get("type") == "tool_call"
|
|
301
|
+
}
|
|
302
|
+
for tool_call in message.tool_calls:
|
|
303
|
+
id_ = tool_call.get("id")
|
|
304
|
+
if id_ and id_ not in content_tool_call_ids:
|
|
305
|
+
string_tool_call_block: types.ToolCall = {
|
|
306
|
+
"type": "tool_call",
|
|
307
|
+
"id": id_,
|
|
308
|
+
"name": tool_call["name"],
|
|
309
|
+
"args": tool_call["args"],
|
|
310
|
+
}
|
|
311
|
+
string_blocks.append(string_tool_call_block)
|
|
312
|
+
|
|
313
|
+
# Handle audio from additional_kwargs if present (for empty content cases)
|
|
314
|
+
audio_data = message.additional_kwargs.get("audio")
|
|
315
|
+
if audio_data and isinstance(audio_data, bytes):
|
|
316
|
+
audio_block: types.AudioContentBlock = {
|
|
317
|
+
"type": "audio",
|
|
318
|
+
"base64": _bytes_to_b64_str(audio_data),
|
|
319
|
+
"mime_type": "audio/wav", # Default to WAV for Google GenAI
|
|
320
|
+
}
|
|
321
|
+
string_blocks.append(audio_block)
|
|
322
|
+
|
|
323
|
+
grounding_metadata = message.response_metadata.get("grounding_metadata")
|
|
324
|
+
if grounding_metadata:
|
|
325
|
+
citations = translate_grounding_metadata_to_citations(grounding_metadata)
|
|
326
|
+
|
|
327
|
+
for block in string_blocks:
|
|
328
|
+
if block["type"] == "text" and citations:
|
|
329
|
+
# Add citations to the first text block only
|
|
330
|
+
block["annotations"] = cast("list[types.Annotation]", citations)
|
|
331
|
+
break
|
|
332
|
+
|
|
333
|
+
return string_blocks
|
|
334
|
+
|
|
335
|
+
if not isinstance(message.content, list):
|
|
336
|
+
# Unexpected content type, attempt to represent as text
|
|
337
|
+
return [{"type": "text", "text": str(message.content)}]
|
|
338
|
+
|
|
339
|
+
converted_blocks: list[types.ContentBlock] = []
|
|
340
|
+
|
|
341
|
+
for item in message.content:
|
|
342
|
+
if isinstance(item, str):
|
|
343
|
+
# Conversation history strings
|
|
344
|
+
|
|
345
|
+
# Citations are handled below after all blocks are converted
|
|
346
|
+
converted_blocks.append({"type": "text", "text": item}) # TextContentBlock
|
|
347
|
+
|
|
348
|
+
elif isinstance(item, dict):
|
|
349
|
+
item_type = item.get("type")
|
|
350
|
+
if item_type == "image_url":
|
|
351
|
+
# Convert image_url to standard image block (base64)
|
|
352
|
+
# (since the original implementation returned as url-base64 CC style)
|
|
353
|
+
image_url = item.get("image_url", {})
|
|
354
|
+
url = image_url.get("url", "")
|
|
355
|
+
if url:
|
|
356
|
+
# Extract base64 data
|
|
357
|
+
match = re.match(r"data:([^;]+);base64,(.+)", url)
|
|
358
|
+
if match:
|
|
359
|
+
# Data URI provided
|
|
360
|
+
mime_type, base64_data = match.groups()
|
|
361
|
+
converted_blocks.append(
|
|
362
|
+
{
|
|
363
|
+
"type": "image",
|
|
364
|
+
"base64": base64_data,
|
|
365
|
+
"mime_type": mime_type,
|
|
366
|
+
}
|
|
367
|
+
)
|
|
368
|
+
else:
|
|
369
|
+
# Assume it's raw base64 without data URI
|
|
370
|
+
try:
|
|
371
|
+
# Validate base64 and decode for mime type detection
|
|
372
|
+
decoded_bytes = base64.b64decode(url, validate=True)
|
|
373
|
+
|
|
374
|
+
image_url_b64_block = {
|
|
375
|
+
"type": "image",
|
|
376
|
+
"base64": url,
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
try:
|
|
380
|
+
import filetype # type: ignore[import-not-found] # noqa: PLC0415
|
|
381
|
+
|
|
382
|
+
# Guess mime type based on file bytes
|
|
383
|
+
mime_type = None
|
|
384
|
+
kind = filetype.guess(decoded_bytes)
|
|
385
|
+
if kind:
|
|
386
|
+
mime_type = kind.mime
|
|
387
|
+
if mime_type:
|
|
388
|
+
image_url_b64_block["mime_type"] = mime_type
|
|
389
|
+
except ImportError:
|
|
390
|
+
# filetype library not available, skip type detection
|
|
391
|
+
pass
|
|
392
|
+
|
|
393
|
+
converted_blocks.append(
|
|
394
|
+
cast("types.ImageContentBlock", image_url_b64_block)
|
|
395
|
+
)
|
|
396
|
+
except Exception:
|
|
397
|
+
# Not valid base64, treat as non-standard
|
|
398
|
+
converted_blocks.append(
|
|
399
|
+
{"type": "non_standard", "value": item}
|
|
400
|
+
)
|
|
401
|
+
else:
|
|
402
|
+
# This likely won't be reached according to previous implementations
|
|
403
|
+
converted_blocks.append({"type": "non_standard", "value": item})
|
|
404
|
+
msg = "Image URL not a data URI; appending as non-standard block."
|
|
405
|
+
raise ValueError(msg)
|
|
406
|
+
elif item_type == "function_call":
|
|
407
|
+
# Handle Google GenAI function calls
|
|
408
|
+
function_call_block: types.ToolCall = {
|
|
409
|
+
"type": "tool_call",
|
|
410
|
+
"name": item.get("name", ""),
|
|
411
|
+
"args": item.get("args", {}),
|
|
412
|
+
"id": item.get("id", ""),
|
|
413
|
+
}
|
|
414
|
+
converted_blocks.append(function_call_block)
|
|
415
|
+
elif item_type == "file_data":
|
|
416
|
+
# Handle FileData URI-based content
|
|
417
|
+
file_block: types.FileContentBlock = {
|
|
418
|
+
"type": "file",
|
|
419
|
+
"url": item.get("file_uri", ""),
|
|
420
|
+
}
|
|
421
|
+
if mime_type := item.get("mime_type"):
|
|
422
|
+
file_block["mime_type"] = mime_type
|
|
423
|
+
converted_blocks.append(file_block)
|
|
424
|
+
elif item_type == "thinking":
|
|
425
|
+
# Handling for the 'thinking' type we package thoughts as
|
|
426
|
+
reasoning_block: types.ReasoningContentBlock = {
|
|
427
|
+
"type": "reasoning",
|
|
428
|
+
"reasoning": item.get("thinking", ""),
|
|
429
|
+
}
|
|
430
|
+
if signature := item.get("signature"):
|
|
431
|
+
reasoning_block["extras"] = {"signature": signature}
|
|
432
|
+
|
|
433
|
+
converted_blocks.append(reasoning_block)
|
|
434
|
+
elif item_type == "executable_code":
|
|
435
|
+
# Convert to standard server tool call block at the moment
|
|
436
|
+
server_tool_call_block: types.ServerToolCall = {
|
|
437
|
+
"type": "server_tool_call",
|
|
438
|
+
"name": "code_interpreter",
|
|
439
|
+
"args": {
|
|
440
|
+
"code": item.get("executable_code", ""),
|
|
441
|
+
"language": item.get("language", "python"), # Default to python
|
|
442
|
+
},
|
|
443
|
+
"id": item.get("id", ""),
|
|
444
|
+
}
|
|
445
|
+
converted_blocks.append(server_tool_call_block)
|
|
446
|
+
elif item_type == "code_execution_result":
|
|
447
|
+
# Map outcome to status: OUTCOME_OK (1) → success, else → error
|
|
448
|
+
outcome = item.get("outcome", 1)
|
|
449
|
+
status = "success" if outcome == 1 else "error"
|
|
450
|
+
server_tool_result_block: types.ServerToolResult = {
|
|
451
|
+
"type": "server_tool_result",
|
|
452
|
+
"tool_call_id": item.get("tool_call_id", ""),
|
|
453
|
+
"status": status, # type: ignore[typeddict-item]
|
|
454
|
+
"output": item.get("code_execution_result", ""),
|
|
455
|
+
}
|
|
456
|
+
# Preserve original outcome in extras
|
|
457
|
+
if outcome is not None:
|
|
458
|
+
server_tool_result_block["extras"] = {"outcome": outcome}
|
|
459
|
+
converted_blocks.append(server_tool_result_block)
|
|
460
|
+
else:
|
|
461
|
+
# Unknown type, preserve as non-standard
|
|
462
|
+
converted_blocks.append({"type": "non_standard", "value": item})
|
|
463
|
+
else:
|
|
464
|
+
# Non-dict, non-string content
|
|
465
|
+
converted_blocks.append({"type": "non_standard", "value": item})
|
|
466
|
+
|
|
467
|
+
grounding_metadata = message.response_metadata.get("grounding_metadata")
|
|
468
|
+
if grounding_metadata:
|
|
469
|
+
citations = translate_grounding_metadata_to_citations(grounding_metadata)
|
|
470
|
+
|
|
471
|
+
for block in converted_blocks:
|
|
472
|
+
if block["type"] == "text" and citations:
|
|
473
|
+
# Add citations to text blocks (only the first text block)
|
|
474
|
+
block["annotations"] = cast("list[types.Annotation]", citations)
|
|
475
|
+
break
|
|
476
|
+
|
|
477
|
+
# Audio is stored on the message.additional_kwargs
|
|
478
|
+
audio_data = message.additional_kwargs.get("audio")
|
|
479
|
+
if audio_data and isinstance(audio_data, bytes):
|
|
480
|
+
audio_block_kwargs: types.AudioContentBlock = {
|
|
481
|
+
"type": "audio",
|
|
482
|
+
"base64": _bytes_to_b64_str(audio_data),
|
|
483
|
+
"mime_type": "audio/wav", # Default to WAV for Google GenAI
|
|
484
|
+
}
|
|
485
|
+
converted_blocks.append(audio_block_kwargs)
|
|
486
|
+
|
|
487
|
+
# Add any missing tool calls from message.tool_calls field
|
|
488
|
+
content_tool_call_ids = {
|
|
489
|
+
block.get("id")
|
|
490
|
+
for block in converted_blocks
|
|
491
|
+
if isinstance(block, dict) and block.get("type") == "tool_call"
|
|
492
|
+
}
|
|
493
|
+
for tool_call in message.tool_calls:
|
|
494
|
+
id_ = tool_call.get("id")
|
|
495
|
+
if id_ and id_ not in content_tool_call_ids:
|
|
496
|
+
missing_tool_call_block: types.ToolCall = {
|
|
497
|
+
"type": "tool_call",
|
|
498
|
+
"id": id_,
|
|
499
|
+
"name": tool_call["name"],
|
|
500
|
+
"args": tool_call["args"],
|
|
501
|
+
}
|
|
502
|
+
converted_blocks.append(missing_tool_call_block)
|
|
503
|
+
|
|
504
|
+
return converted_blocks
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def translate_content(message: AIMessage) -> list[types.ContentBlock]:
|
|
12
508
|
"""Derive standard content blocks from a message with Google (GenAI) content."""
|
|
13
|
-
|
|
14
|
-
if not WARNED:
|
|
15
|
-
warning_message = (
|
|
16
|
-
"Content block standardization is not yet fully supported for Google GenAI."
|
|
17
|
-
)
|
|
18
|
-
warnings.warn(warning_message, stacklevel=2)
|
|
19
|
-
WARNED = True
|
|
20
|
-
raise NotImplementedError
|
|
509
|
+
return _convert_to_v1_from_genai(message)
|
|
21
510
|
|
|
22
511
|
|
|
23
|
-
def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]:
|
|
512
|
+
def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]:
|
|
24
513
|
"""Derive standard content blocks from a chunk with Google (GenAI) content."""
|
|
25
|
-
|
|
26
|
-
if not WARNED:
|
|
27
|
-
warning_message = (
|
|
28
|
-
"Content block standardization is not yet fully supported for Google GenAI."
|
|
29
|
-
)
|
|
30
|
-
warnings.warn(warning_message, stacklevel=2)
|
|
31
|
-
WARNED = True
|
|
32
|
-
raise NotImplementedError
|
|
514
|
+
return _convert_to_v1_from_genai(message)
|
|
33
515
|
|
|
34
516
|
|
|
35
517
|
def _register_google_genai_translator() -> None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Derivations of standard content blocks from LangChain v0 multimodal content."""
|
|
2
2
|
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, cast
|
|
4
4
|
|
|
5
5
|
from langchain_core.messages import content as types
|
|
6
6
|
|
|
@@ -45,7 +45,7 @@ def _convert_v0_multimodal_input_to_v1(
|
|
|
45
45
|
|
|
46
46
|
def _convert_legacy_v0_content_block_to_v1(
|
|
47
47
|
block: dict,
|
|
48
|
-
) ->
|
|
48
|
+
) -> types.ContentBlock | dict:
|
|
49
49
|
"""Convert a LangChain v0 content block to v1 format.
|
|
50
50
|
|
|
51
51
|
Preserves unknown keys as extras to avoid data loss.
|
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import json
|
|
6
6
|
import warnings
|
|
7
7
|
from collections.abc import Iterable
|
|
8
|
-
from typing import TYPE_CHECKING, Any, Literal,
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Literal, cast
|
|
9
9
|
|
|
10
10
|
from langchain_core.language_models._utils import (
|
|
11
11
|
_parse_data_uri,
|
|
@@ -401,7 +401,7 @@ def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage:
|
|
|
401
401
|
|
|
402
402
|
def _convert_openai_format_to_data_block(
|
|
403
403
|
block: dict,
|
|
404
|
-
) ->
|
|
404
|
+
) -> types.ContentBlock | dict[Any, Any]:
|
|
405
405
|
"""Convert OpenAI image/audio/file content block to respective v1 multimodal block.
|
|
406
406
|
|
|
407
407
|
We expect that the incoming block is verified to be in OpenAI Chat Completions
|
|
@@ -677,9 +677,9 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
|
|
|
677
677
|
yield cast("types.ImageContentBlock", new_block)
|
|
678
678
|
|
|
679
679
|
elif block_type == "function_call":
|
|
680
|
-
tool_call_block:
|
|
681
|
-
|
|
682
|
-
|
|
680
|
+
tool_call_block: (
|
|
681
|
+
types.ToolCall | types.InvalidToolCall | types.ToolCallChunk | None
|
|
682
|
+
) = None
|
|
683
683
|
call_id = block.get("call_id", "")
|
|
684
684
|
|
|
685
685
|
from langchain_core.messages import AIMessageChunk # noqa: PLC0415
|
|
@@ -726,7 +726,7 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
|
|
|
726
726
|
if "index" in block:
|
|
727
727
|
web_search_call["index"] = f"lc_wsc_{block['index']}"
|
|
728
728
|
|
|
729
|
-
sources:
|
|
729
|
+
sources: dict[str, Any] | None = None
|
|
730
730
|
if "action" in block and isinstance(block["action"], dict):
|
|
731
731
|
if "sources" in block["action"]:
|
|
732
732
|
sources = block["action"]["sources"]
|