langchain-core 0.4.0.dev0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/__init__.py +1 -1
- langchain_core/_api/__init__.py +3 -4
- langchain_core/_api/beta_decorator.py +45 -70
- langchain_core/_api/deprecation.py +80 -80
- langchain_core/_api/path.py +22 -8
- langchain_core/_import_utils.py +10 -4
- langchain_core/agents.py +25 -21
- langchain_core/caches.py +53 -63
- langchain_core/callbacks/__init__.py +1 -8
- langchain_core/callbacks/base.py +341 -348
- langchain_core/callbacks/file.py +55 -44
- langchain_core/callbacks/manager.py +546 -683
- langchain_core/callbacks/stdout.py +29 -30
- langchain_core/callbacks/streaming_stdout.py +35 -36
- langchain_core/callbacks/usage.py +65 -70
- langchain_core/chat_history.py +48 -55
- langchain_core/document_loaders/base.py +46 -21
- langchain_core/document_loaders/langsmith.py +39 -36
- langchain_core/documents/__init__.py +0 -1
- langchain_core/documents/base.py +96 -74
- langchain_core/documents/compressor.py +12 -9
- langchain_core/documents/transformers.py +29 -28
- langchain_core/embeddings/fake.py +56 -57
- langchain_core/env.py +2 -3
- langchain_core/example_selectors/base.py +12 -0
- langchain_core/example_selectors/length_based.py +1 -1
- langchain_core/example_selectors/semantic_similarity.py +21 -25
- langchain_core/exceptions.py +15 -9
- langchain_core/globals.py +4 -163
- langchain_core/indexing/api.py +132 -125
- langchain_core/indexing/base.py +64 -67
- langchain_core/indexing/in_memory.py +26 -6
- langchain_core/language_models/__init__.py +15 -27
- langchain_core/language_models/_utils.py +267 -117
- langchain_core/language_models/base.py +92 -177
- langchain_core/language_models/chat_models.py +547 -407
- langchain_core/language_models/fake.py +11 -11
- langchain_core/language_models/fake_chat_models.py +72 -118
- langchain_core/language_models/llms.py +168 -242
- langchain_core/load/dump.py +8 -11
- langchain_core/load/load.py +32 -28
- langchain_core/load/mapping.py +2 -4
- langchain_core/load/serializable.py +50 -56
- langchain_core/messages/__init__.py +36 -51
- langchain_core/messages/ai.py +377 -150
- langchain_core/messages/base.py +239 -47
- langchain_core/messages/block_translators/__init__.py +111 -0
- langchain_core/messages/block_translators/anthropic.py +470 -0
- langchain_core/messages/block_translators/bedrock.py +94 -0
- langchain_core/messages/block_translators/bedrock_converse.py +297 -0
- langchain_core/messages/block_translators/google_genai.py +530 -0
- langchain_core/messages/block_translators/google_vertexai.py +21 -0
- langchain_core/messages/block_translators/groq.py +143 -0
- langchain_core/messages/block_translators/langchain_v0.py +301 -0
- langchain_core/messages/block_translators/openai.py +1010 -0
- langchain_core/messages/chat.py +2 -3
- langchain_core/messages/content.py +1423 -0
- langchain_core/messages/function.py +7 -7
- langchain_core/messages/human.py +44 -38
- langchain_core/messages/modifier.py +3 -2
- langchain_core/messages/system.py +40 -27
- langchain_core/messages/tool.py +160 -58
- langchain_core/messages/utils.py +527 -638
- langchain_core/output_parsers/__init__.py +1 -14
- langchain_core/output_parsers/base.py +68 -104
- langchain_core/output_parsers/json.py +13 -17
- langchain_core/output_parsers/list.py +11 -33
- langchain_core/output_parsers/openai_functions.py +56 -74
- langchain_core/output_parsers/openai_tools.py +68 -109
- langchain_core/output_parsers/pydantic.py +15 -13
- langchain_core/output_parsers/string.py +6 -2
- langchain_core/output_parsers/transform.py +17 -60
- langchain_core/output_parsers/xml.py +34 -44
- langchain_core/outputs/__init__.py +1 -1
- langchain_core/outputs/chat_generation.py +26 -11
- langchain_core/outputs/chat_result.py +1 -3
- langchain_core/outputs/generation.py +17 -6
- langchain_core/outputs/llm_result.py +15 -8
- langchain_core/prompt_values.py +29 -123
- langchain_core/prompts/__init__.py +3 -27
- langchain_core/prompts/base.py +48 -63
- langchain_core/prompts/chat.py +259 -288
- langchain_core/prompts/dict.py +19 -11
- langchain_core/prompts/few_shot.py +84 -90
- langchain_core/prompts/few_shot_with_templates.py +14 -12
- langchain_core/prompts/image.py +19 -14
- langchain_core/prompts/loading.py +6 -8
- langchain_core/prompts/message.py +7 -8
- langchain_core/prompts/prompt.py +42 -43
- langchain_core/prompts/string.py +37 -16
- langchain_core/prompts/structured.py +43 -46
- langchain_core/rate_limiters.py +51 -60
- langchain_core/retrievers.py +52 -192
- langchain_core/runnables/base.py +1727 -1683
- langchain_core/runnables/branch.py +52 -73
- langchain_core/runnables/config.py +89 -103
- langchain_core/runnables/configurable.py +128 -130
- langchain_core/runnables/fallbacks.py +93 -82
- langchain_core/runnables/graph.py +127 -127
- langchain_core/runnables/graph_ascii.py +63 -41
- langchain_core/runnables/graph_mermaid.py +87 -70
- langchain_core/runnables/graph_png.py +31 -36
- langchain_core/runnables/history.py +145 -161
- langchain_core/runnables/passthrough.py +141 -144
- langchain_core/runnables/retry.py +84 -68
- langchain_core/runnables/router.py +33 -37
- langchain_core/runnables/schema.py +79 -72
- langchain_core/runnables/utils.py +95 -139
- langchain_core/stores.py +85 -131
- langchain_core/structured_query.py +11 -15
- langchain_core/sys_info.py +31 -32
- langchain_core/tools/__init__.py +1 -14
- langchain_core/tools/base.py +221 -247
- langchain_core/tools/convert.py +144 -161
- langchain_core/tools/render.py +10 -10
- langchain_core/tools/retriever.py +12 -19
- langchain_core/tools/simple.py +52 -29
- langchain_core/tools/structured.py +56 -60
- langchain_core/tracers/__init__.py +1 -9
- langchain_core/tracers/_streaming.py +6 -7
- langchain_core/tracers/base.py +103 -112
- langchain_core/tracers/context.py +29 -48
- langchain_core/tracers/core.py +142 -105
- langchain_core/tracers/evaluation.py +30 -34
- langchain_core/tracers/event_stream.py +162 -117
- langchain_core/tracers/langchain.py +34 -36
- langchain_core/tracers/log_stream.py +87 -49
- langchain_core/tracers/memory_stream.py +3 -3
- langchain_core/tracers/root_listeners.py +18 -34
- langchain_core/tracers/run_collector.py +8 -20
- langchain_core/tracers/schemas.py +0 -125
- langchain_core/tracers/stdout.py +3 -3
- langchain_core/utils/__init__.py +1 -4
- langchain_core/utils/_merge.py +47 -9
- langchain_core/utils/aiter.py +70 -66
- langchain_core/utils/env.py +12 -9
- langchain_core/utils/function_calling.py +139 -206
- langchain_core/utils/html.py +7 -8
- langchain_core/utils/input.py +6 -6
- langchain_core/utils/interactive_env.py +6 -2
- langchain_core/utils/iter.py +48 -45
- langchain_core/utils/json.py +14 -4
- langchain_core/utils/json_schema.py +159 -43
- langchain_core/utils/mustache.py +32 -25
- langchain_core/utils/pydantic.py +67 -40
- langchain_core/utils/strings.py +5 -5
- langchain_core/utils/usage.py +1 -1
- langchain_core/utils/utils.py +104 -62
- langchain_core/vectorstores/base.py +131 -179
- langchain_core/vectorstores/in_memory.py +113 -182
- langchain_core/vectorstores/utils.py +23 -17
- langchain_core/version.py +1 -1
- langchain_core-1.0.0.dist-info/METADATA +68 -0
- langchain_core-1.0.0.dist-info/RECORD +172 -0
- {langchain_core-0.4.0.dev0.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
- langchain_core/beta/__init__.py +0 -1
- langchain_core/beta/runnables/__init__.py +0 -1
- langchain_core/beta/runnables/context.py +0 -448
- langchain_core/memory.py +0 -116
- langchain_core/messages/content_blocks.py +0 -1435
- langchain_core/prompts/pipeline.py +0 -133
- langchain_core/pydantic_v1/__init__.py +0 -30
- langchain_core/pydantic_v1/dataclasses.py +0 -23
- langchain_core/pydantic_v1/main.py +0 -23
- langchain_core/tracers/langchain_v1.py +0 -23
- langchain_core/utils/loading.py +0 -31
- langchain_core/v1/__init__.py +0 -1
- langchain_core/v1/chat_models.py +0 -1047
- langchain_core/v1/messages.py +0 -755
- langchain_core-0.4.0.dev0.dist-info/METADATA +0 -108
- langchain_core-0.4.0.dev0.dist-info/RECORD +0 -177
- langchain_core-0.4.0.dev0.dist-info/entry_points.txt +0 -4
langchain_core/messages/utils.py
CHANGED
|
@@ -14,16 +14,13 @@ import inspect
|
|
|
14
14
|
import json
|
|
15
15
|
import logging
|
|
16
16
|
import math
|
|
17
|
-
from collections.abc import Iterable, Sequence
|
|
17
|
+
from collections.abc import Callable, Iterable, Sequence
|
|
18
18
|
from functools import partial
|
|
19
19
|
from typing import (
|
|
20
20
|
TYPE_CHECKING,
|
|
21
21
|
Annotated,
|
|
22
22
|
Any,
|
|
23
|
-
Callable,
|
|
24
23
|
Literal,
|
|
25
|
-
Optional,
|
|
26
|
-
Union,
|
|
27
24
|
cast,
|
|
28
25
|
overload,
|
|
29
26
|
)
|
|
@@ -31,29 +28,33 @@ from typing import (
|
|
|
31
28
|
from pydantic import Discriminator, Field, Tag
|
|
32
29
|
|
|
33
30
|
from langchain_core.exceptions import ErrorCode, create_message
|
|
34
|
-
from langchain_core.messages import convert_to_openai_data_block, is_data_content_block
|
|
35
31
|
from langchain_core.messages.ai import AIMessage, AIMessageChunk
|
|
36
32
|
from langchain_core.messages.base import BaseMessage, BaseMessageChunk
|
|
33
|
+
from langchain_core.messages.block_translators.openai import (
|
|
34
|
+
convert_to_openai_data_block,
|
|
35
|
+
)
|
|
37
36
|
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
|
|
37
|
+
from langchain_core.messages.content import (
|
|
38
|
+
is_data_content_block,
|
|
39
|
+
)
|
|
38
40
|
from langchain_core.messages.function import FunctionMessage, FunctionMessageChunk
|
|
39
41
|
from langchain_core.messages.human import HumanMessage, HumanMessageChunk
|
|
40
42
|
from langchain_core.messages.modifier import RemoveMessage
|
|
41
43
|
from langchain_core.messages.system import SystemMessage, SystemMessageChunk
|
|
42
44
|
from langchain_core.messages.tool import ToolCall, ToolMessage, ToolMessageChunk
|
|
43
|
-
from langchain_core.v1.messages import AIMessage as AIMessageV1
|
|
44
|
-
from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1
|
|
45
|
-
from langchain_core.v1.messages import HumanMessage as HumanMessageV1
|
|
46
|
-
from langchain_core.v1.messages import MessageV1, MessageV1Types
|
|
47
|
-
from langchain_core.v1.messages import SystemMessage as SystemMessageV1
|
|
48
|
-
from langchain_core.v1.messages import ToolMessage as ToolMessageV1
|
|
49
45
|
|
|
50
46
|
if TYPE_CHECKING:
|
|
51
|
-
from langchain_text_splitters import TextSplitter
|
|
52
|
-
|
|
53
47
|
from langchain_core.language_models import BaseLanguageModel
|
|
54
48
|
from langchain_core.prompt_values import PromptValue
|
|
55
49
|
from langchain_core.runnables.base import Runnable
|
|
56
50
|
|
|
51
|
+
try:
|
|
52
|
+
from langchain_text_splitters import TextSplitter
|
|
53
|
+
|
|
54
|
+
_HAS_LANGCHAIN_TEXT_SPLITTERS = True
|
|
55
|
+
except ImportError:
|
|
56
|
+
_HAS_LANGCHAIN_TEXT_SPLITTERS = False
|
|
57
|
+
|
|
57
58
|
logger = logging.getLogger(__name__)
|
|
58
59
|
|
|
59
60
|
|
|
@@ -71,34 +72,32 @@ def _get_type(v: Any) -> str:
|
|
|
71
72
|
|
|
72
73
|
|
|
73
74
|
AnyMessage = Annotated[
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
|
|
87
|
-
],
|
|
75
|
+
Annotated[AIMessage, Tag(tag="ai")]
|
|
76
|
+
| Annotated[HumanMessage, Tag(tag="human")]
|
|
77
|
+
| Annotated[ChatMessage, Tag(tag="chat")]
|
|
78
|
+
| Annotated[SystemMessage, Tag(tag="system")]
|
|
79
|
+
| Annotated[FunctionMessage, Tag(tag="function")]
|
|
80
|
+
| Annotated[ToolMessage, Tag(tag="tool")]
|
|
81
|
+
| Annotated[AIMessageChunk, Tag(tag="AIMessageChunk")]
|
|
82
|
+
| Annotated[HumanMessageChunk, Tag(tag="HumanMessageChunk")]
|
|
83
|
+
| Annotated[ChatMessageChunk, Tag(tag="ChatMessageChunk")]
|
|
84
|
+
| Annotated[SystemMessageChunk, Tag(tag="SystemMessageChunk")]
|
|
85
|
+
| Annotated[FunctionMessageChunk, Tag(tag="FunctionMessageChunk")]
|
|
86
|
+
| Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
|
|
88
87
|
Field(discriminator=Discriminator(_get_type)),
|
|
89
88
|
]
|
|
89
|
+
""""A type representing any defined `Message` or `MessageChunk` type."""
|
|
90
90
|
|
|
91
91
|
|
|
92
92
|
def get_buffer_string(
|
|
93
93
|
messages: Sequence[BaseMessage], human_prefix: str = "Human", ai_prefix: str = "AI"
|
|
94
94
|
) -> str:
|
|
95
|
-
r"""Convert a sequence of
|
|
95
|
+
r"""Convert a sequence of messages to strings and concatenate them into one string.
|
|
96
96
|
|
|
97
97
|
Args:
|
|
98
98
|
messages: Messages to be converted to strings.
|
|
99
|
-
human_prefix: The prefix to prepend to contents of
|
|
100
|
-
|
|
101
|
-
ai_prefix: THe prefix to prepend to contents of AIMessages. Default is "AI".
|
|
99
|
+
human_prefix: The prefix to prepend to contents of `HumanMessage`s.
|
|
100
|
+
ai_prefix: The prefix to prepend to contents of `AIMessage`.
|
|
102
101
|
|
|
103
102
|
Returns:
|
|
104
103
|
A single string concatenation of all input messages.
|
|
@@ -107,17 +106,16 @@ def get_buffer_string(
|
|
|
107
106
|
ValueError: If an unsupported message type is encountered.
|
|
108
107
|
|
|
109
108
|
Example:
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
109
|
+
```python
|
|
110
|
+
from langchain_core import AIMessage, HumanMessage
|
|
111
|
+
|
|
112
|
+
messages = [
|
|
113
|
+
HumanMessage(content="Hi, how are you?"),
|
|
114
|
+
AIMessage(content="Good, how are you?"),
|
|
115
|
+
]
|
|
116
|
+
get_buffer_string(messages)
|
|
117
|
+
# -> "Human: Hi, how are you?\nAI: Good, how are you?"
|
|
118
|
+
```
|
|
121
119
|
"""
|
|
122
120
|
string_messages = []
|
|
123
121
|
for m in messages:
|
|
@@ -136,7 +134,7 @@ def get_buffer_string(
|
|
|
136
134
|
else:
|
|
137
135
|
msg = f"Got unsupported message type: {m}"
|
|
138
136
|
raise ValueError(msg) # noqa: TRY004
|
|
139
|
-
message = f"{role}: {m.text
|
|
137
|
+
message = f"{role}: {m.text}"
|
|
140
138
|
if isinstance(m, AIMessage) and "function_call" in m.additional_kwargs:
|
|
141
139
|
message += f"{m.additional_kwargs['function_call']}"
|
|
142
140
|
string_messages.append(message)
|
|
@@ -177,19 +175,20 @@ def _message_from_dict(message: dict) -> BaseMessage:
|
|
|
177
175
|
|
|
178
176
|
|
|
179
177
|
def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]:
|
|
180
|
-
"""Convert a sequence of messages from dicts to Message objects.
|
|
178
|
+
"""Convert a sequence of messages from dicts to `Message` objects.
|
|
181
179
|
|
|
182
180
|
Args:
|
|
183
181
|
messages: Sequence of messages (as dicts) to convert.
|
|
184
182
|
|
|
185
183
|
Returns:
|
|
186
184
|
list of messages (BaseMessages).
|
|
185
|
+
|
|
187
186
|
"""
|
|
188
187
|
return [_message_from_dict(m) for m in messages]
|
|
189
188
|
|
|
190
189
|
|
|
191
|
-
def message_chunk_to_message(chunk:
|
|
192
|
-
"""Convert a message chunk to a
|
|
190
|
+
def message_chunk_to_message(chunk: BaseMessage) -> BaseMessage:
|
|
191
|
+
"""Convert a message chunk to a `Message`.
|
|
193
192
|
|
|
194
193
|
Args:
|
|
195
194
|
chunk: Message chunk to convert.
|
|
@@ -202,43 +201,45 @@ def message_chunk_to_message(chunk: BaseMessageChunk) -> BaseMessage:
|
|
|
202
201
|
# chunk classes always have the equivalent non-chunk class as their first parent
|
|
203
202
|
ignore_keys = ["type"]
|
|
204
203
|
if isinstance(chunk, AIMessageChunk):
|
|
205
|
-
ignore_keys.
|
|
204
|
+
ignore_keys.extend(["tool_call_chunks", "chunk_position"])
|
|
206
205
|
return chunk.__class__.__mro__[1](
|
|
207
206
|
**{k: v for k, v in chunk.__dict__.items() if k not in ignore_keys}
|
|
208
207
|
)
|
|
209
208
|
|
|
210
209
|
|
|
211
|
-
MessageLikeRepresentation =
|
|
212
|
-
BaseMessage
|
|
213
|
-
|
|
210
|
+
MessageLikeRepresentation = (
|
|
211
|
+
BaseMessage | list[str] | tuple[str, str] | str | dict[str, Any]
|
|
212
|
+
)
|
|
213
|
+
"""A type representing the various ways a message can be represented."""
|
|
214
214
|
|
|
215
215
|
|
|
216
216
|
def _create_message_from_message_type(
|
|
217
217
|
message_type: str,
|
|
218
218
|
content: str,
|
|
219
|
-
name:
|
|
220
|
-
tool_call_id:
|
|
221
|
-
tool_calls:
|
|
222
|
-
id:
|
|
219
|
+
name: str | None = None,
|
|
220
|
+
tool_call_id: str | None = None,
|
|
221
|
+
tool_calls: list[dict[str, Any]] | None = None,
|
|
222
|
+
id: str | None = None,
|
|
223
223
|
**additional_kwargs: Any,
|
|
224
224
|
) -> BaseMessage:
|
|
225
|
-
"""Create a message from a
|
|
225
|
+
"""Create a message from a `Message` type and content string.
|
|
226
226
|
|
|
227
227
|
Args:
|
|
228
|
-
message_type: (str) the type of the message (e.g.,
|
|
228
|
+
message_type: (str) the type of the message (e.g., `'human'`, `'ai'`, etc.).
|
|
229
229
|
content: (str) the content string.
|
|
230
|
-
name: (str) the name of the message.
|
|
231
|
-
tool_call_id: (str) the tool call id.
|
|
232
|
-
tool_calls: (list[dict[str, Any]]) the tool calls.
|
|
233
|
-
id: (str) the id of the message.
|
|
230
|
+
name: (str) the name of the message.
|
|
231
|
+
tool_call_id: (str) the tool call id.
|
|
232
|
+
tool_calls: (list[dict[str, Any]]) the tool calls.
|
|
233
|
+
id: (str) the id of the message.
|
|
234
234
|
additional_kwargs: (dict[str, Any]) additional keyword arguments.
|
|
235
235
|
|
|
236
236
|
Returns:
|
|
237
237
|
a message of the appropriate type.
|
|
238
238
|
|
|
239
239
|
Raises:
|
|
240
|
-
ValueError: if the message type is not one of
|
|
241
|
-
|
|
240
|
+
ValueError: if the message type is not one of `'human'`, `'user'`, `'ai'`,
|
|
241
|
+
`'assistant'`, `'function'`, `'tool'`, `'system'`, or
|
|
242
|
+
`'developer'`.
|
|
242
243
|
"""
|
|
243
244
|
kwargs: dict[str, Any] = {}
|
|
244
245
|
if name is not None:
|
|
@@ -287,6 +288,9 @@ def _create_message_from_message_type(
|
|
|
287
288
|
message = FunctionMessage(content=content, **kwargs)
|
|
288
289
|
elif message_type == "tool":
|
|
289
290
|
artifact = kwargs.get("additional_kwargs", {}).pop("artifact", None)
|
|
291
|
+
status = kwargs.get("additional_kwargs", {}).pop("status", None)
|
|
292
|
+
if status is not None:
|
|
293
|
+
kwargs["status"] = status
|
|
290
294
|
message = ToolMessage(content=content, artifact=artifact, **kwargs)
|
|
291
295
|
elif message_type == "remove":
|
|
292
296
|
message = RemoveMessage(**kwargs)
|
|
@@ -300,150 +304,27 @@ def _create_message_from_message_type(
|
|
|
300
304
|
return message
|
|
301
305
|
|
|
302
306
|
|
|
303
|
-
def _create_message_from_message_type_v1(
|
|
304
|
-
message_type: str,
|
|
305
|
-
content: str,
|
|
306
|
-
name: Optional[str] = None,
|
|
307
|
-
tool_call_id: Optional[str] = None,
|
|
308
|
-
tool_calls: Optional[list[dict[str, Any]]] = None,
|
|
309
|
-
id: Optional[str] = None,
|
|
310
|
-
**kwargs: Any,
|
|
311
|
-
) -> MessageV1:
|
|
312
|
-
"""Create a message from a message type and content string.
|
|
313
|
-
|
|
314
|
-
Args:
|
|
315
|
-
message_type: (str) the type of the message (e.g., "human", "ai", etc.).
|
|
316
|
-
content: (str) the content string.
|
|
317
|
-
name: (str) the name of the message. Default is None.
|
|
318
|
-
tool_call_id: (str) the tool call id. Default is None.
|
|
319
|
-
tool_calls: (list[dict[str, Any]]) the tool calls. Default is None.
|
|
320
|
-
id: (str) the id of the message. Default is None.
|
|
321
|
-
kwargs: (dict[str, Any]) additional keyword arguments.
|
|
322
|
-
|
|
323
|
-
Returns:
|
|
324
|
-
a message of the appropriate type.
|
|
325
|
-
|
|
326
|
-
Raises:
|
|
327
|
-
ValueError: if the message type is not one of "human", "user", "ai",
|
|
328
|
-
"assistant", "tool", "system", or "developer".
|
|
329
|
-
"""
|
|
330
|
-
if name is not None:
|
|
331
|
-
kwargs["name"] = name
|
|
332
|
-
if tool_call_id is not None:
|
|
333
|
-
kwargs["tool_call_id"] = tool_call_id
|
|
334
|
-
if kwargs and (response_metadata := kwargs.pop("response_metadata", None)):
|
|
335
|
-
kwargs["response_metadata"] = response_metadata
|
|
336
|
-
if id is not None:
|
|
337
|
-
kwargs["id"] = id
|
|
338
|
-
if tool_calls is not None:
|
|
339
|
-
kwargs["tool_calls"] = []
|
|
340
|
-
for tool_call in tool_calls:
|
|
341
|
-
# Convert OpenAI-format tool call to LangChain format.
|
|
342
|
-
if "function" in tool_call:
|
|
343
|
-
args = tool_call["function"]["arguments"]
|
|
344
|
-
if isinstance(args, str):
|
|
345
|
-
args = json.loads(args, strict=False)
|
|
346
|
-
kwargs["tool_calls"].append(
|
|
347
|
-
{
|
|
348
|
-
"name": tool_call["function"]["name"],
|
|
349
|
-
"args": args,
|
|
350
|
-
"id": tool_call["id"],
|
|
351
|
-
"type": "tool_call",
|
|
352
|
-
}
|
|
353
|
-
)
|
|
354
|
-
else:
|
|
355
|
-
kwargs["tool_calls"].append(tool_call)
|
|
356
|
-
if message_type in {"human", "user"}:
|
|
357
|
-
message: MessageV1 = HumanMessageV1(content=content, **kwargs)
|
|
358
|
-
elif message_type in {"ai", "assistant"}:
|
|
359
|
-
message = AIMessageV1(content=content, **kwargs)
|
|
360
|
-
elif message_type in {"system", "developer"}:
|
|
361
|
-
if message_type == "developer":
|
|
362
|
-
kwargs["custom_role"] = "developer"
|
|
363
|
-
message = SystemMessageV1(content=content, **kwargs)
|
|
364
|
-
elif message_type == "tool":
|
|
365
|
-
artifact = kwargs.pop("artifact", None)
|
|
366
|
-
message = ToolMessageV1(content=content, artifact=artifact, **kwargs)
|
|
367
|
-
else:
|
|
368
|
-
msg = (
|
|
369
|
-
f"Unexpected message type: '{message_type}'. Use one of 'human',"
|
|
370
|
-
f" 'user', 'ai', 'assistant', 'function', 'tool', 'system', or 'developer'."
|
|
371
|
-
)
|
|
372
|
-
msg = create_message(message=msg, error_code=ErrorCode.MESSAGE_COERCION_FAILURE)
|
|
373
|
-
raise ValueError(msg)
|
|
374
|
-
return message
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
def convert_from_v1_message(message: MessageV1) -> BaseMessage:
|
|
378
|
-
"""Compatibility layer to convert v1 messages to current messages.
|
|
379
|
-
|
|
380
|
-
Args:
|
|
381
|
-
message: MessageV1 instance to convert.
|
|
382
|
-
|
|
383
|
-
Returns:
|
|
384
|
-
BaseMessage: Converted message instance.
|
|
385
|
-
"""
|
|
386
|
-
content = cast("Union[str, list[str | dict]]", message.content)
|
|
387
|
-
if isinstance(message, AIMessageV1):
|
|
388
|
-
return AIMessage(
|
|
389
|
-
content=content,
|
|
390
|
-
id=message.id,
|
|
391
|
-
name=message.name,
|
|
392
|
-
tool_calls=message.tool_calls,
|
|
393
|
-
response_metadata=cast("dict", message.response_metadata),
|
|
394
|
-
)
|
|
395
|
-
if isinstance(message, AIMessageChunkV1):
|
|
396
|
-
return AIMessageChunk(
|
|
397
|
-
content=content,
|
|
398
|
-
id=message.id,
|
|
399
|
-
name=message.name,
|
|
400
|
-
tool_call_chunks=message.tool_call_chunks,
|
|
401
|
-
response_metadata=cast("dict", message.response_metadata),
|
|
402
|
-
)
|
|
403
|
-
if isinstance(message, HumanMessageV1):
|
|
404
|
-
return HumanMessage(
|
|
405
|
-
content=content,
|
|
406
|
-
id=message.id,
|
|
407
|
-
name=message.name,
|
|
408
|
-
)
|
|
409
|
-
if isinstance(message, SystemMessageV1):
|
|
410
|
-
return SystemMessage(
|
|
411
|
-
content=content,
|
|
412
|
-
id=message.id,
|
|
413
|
-
)
|
|
414
|
-
if isinstance(message, ToolMessageV1):
|
|
415
|
-
return ToolMessage(
|
|
416
|
-
content=content,
|
|
417
|
-
id=message.id,
|
|
418
|
-
tool_call_id=message.tool_call_id,
|
|
419
|
-
artifact=message.artifact,
|
|
420
|
-
name=message.name,
|
|
421
|
-
status=message.status,
|
|
422
|
-
)
|
|
423
|
-
message = f"Unsupported message type: {type(message)}"
|
|
424
|
-
raise NotImplementedError(message)
|
|
425
|
-
|
|
426
|
-
|
|
427
307
|
def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
|
|
428
|
-
"""Instantiate a
|
|
308
|
+
"""Instantiate a `Message` from a variety of message formats.
|
|
429
309
|
|
|
430
310
|
The message format can be one of the following:
|
|
431
311
|
|
|
432
|
-
- BaseMessagePromptTemplate
|
|
433
|
-
- BaseMessage
|
|
434
|
-
- 2-tuple of (role string, template); e.g., (
|
|
312
|
+
- `BaseMessagePromptTemplate`
|
|
313
|
+
- `BaseMessage`
|
|
314
|
+
- 2-tuple of (role string, template); e.g., (`'human'`, `'{user_input}'`)
|
|
435
315
|
- dict: a message dict with role and content keys
|
|
436
|
-
- string: shorthand for (
|
|
316
|
+
- string: shorthand for (`'human'`, template); e.g., `'{user_input}'`
|
|
437
317
|
|
|
438
318
|
Args:
|
|
439
319
|
message: a representation of a message in one of the supported formats.
|
|
440
320
|
|
|
441
321
|
Returns:
|
|
442
|
-
|
|
322
|
+
An instance of a message or a message template.
|
|
443
323
|
|
|
444
324
|
Raises:
|
|
445
325
|
NotImplementedError: if the message type is not supported.
|
|
446
326
|
ValueError: if the message dict does not contain the required keys.
|
|
327
|
+
|
|
447
328
|
"""
|
|
448
329
|
if isinstance(message, BaseMessage):
|
|
449
330
|
message_ = message
|
|
@@ -471,66 +352,6 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
|
|
|
471
352
|
message_ = _create_message_from_message_type(
|
|
472
353
|
msg_type, msg_content, **msg_kwargs
|
|
473
354
|
)
|
|
474
|
-
elif isinstance(message, MessageV1Types):
|
|
475
|
-
message_ = convert_from_v1_message(message)
|
|
476
|
-
else:
|
|
477
|
-
msg = f"Unsupported message type: {type(message)}"
|
|
478
|
-
msg = create_message(message=msg, error_code=ErrorCode.MESSAGE_COERCION_FAILURE)
|
|
479
|
-
raise NotImplementedError(msg)
|
|
480
|
-
|
|
481
|
-
return message_
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
def _convert_to_message_v1(message: MessageLikeRepresentation) -> MessageV1:
|
|
485
|
-
"""Instantiate a message from a variety of message formats.
|
|
486
|
-
|
|
487
|
-
The message format can be one of the following:
|
|
488
|
-
|
|
489
|
-
- BaseMessagePromptTemplate
|
|
490
|
-
- BaseMessage
|
|
491
|
-
- 2-tuple of (role string, template); e.g., ("human", "{user_input}")
|
|
492
|
-
- dict: a message dict with role and content keys
|
|
493
|
-
- string: shorthand for ("human", template); e.g., "{user_input}"
|
|
494
|
-
|
|
495
|
-
Args:
|
|
496
|
-
message: a representation of a message in one of the supported formats.
|
|
497
|
-
|
|
498
|
-
Returns:
|
|
499
|
-
an instance of a message or a message template.
|
|
500
|
-
|
|
501
|
-
Raises:
|
|
502
|
-
NotImplementedError: if the message type is not supported.
|
|
503
|
-
ValueError: if the message dict does not contain the required keys.
|
|
504
|
-
"""
|
|
505
|
-
if isinstance(message, MessageV1Types):
|
|
506
|
-
if isinstance(message, AIMessageChunkV1):
|
|
507
|
-
message_: MessageV1 = message.to_message()
|
|
508
|
-
else:
|
|
509
|
-
message_ = message
|
|
510
|
-
elif isinstance(message, str):
|
|
511
|
-
message_ = _create_message_from_message_type_v1("human", message)
|
|
512
|
-
elif isinstance(message, Sequence) and len(message) == 2:
|
|
513
|
-
# mypy doesn't realise this can't be a string given the previous branch
|
|
514
|
-
message_type_str, template = message # type: ignore[misc]
|
|
515
|
-
message_ = _create_message_from_message_type_v1(message_type_str, template)
|
|
516
|
-
elif isinstance(message, dict):
|
|
517
|
-
msg_kwargs = message.copy()
|
|
518
|
-
try:
|
|
519
|
-
try:
|
|
520
|
-
msg_type = msg_kwargs.pop("role")
|
|
521
|
-
except KeyError:
|
|
522
|
-
msg_type = msg_kwargs.pop("type")
|
|
523
|
-
# None msg content is not allowed
|
|
524
|
-
msg_content = msg_kwargs.pop("content") or ""
|
|
525
|
-
except KeyError as e:
|
|
526
|
-
msg = f"Message dict must contain 'role' and 'content' keys, got {message}"
|
|
527
|
-
msg = create_message(
|
|
528
|
-
message=msg, error_code=ErrorCode.MESSAGE_COERCION_FAILURE
|
|
529
|
-
)
|
|
530
|
-
raise ValueError(msg) from e
|
|
531
|
-
message_ = _create_message_from_message_type_v1(
|
|
532
|
-
msg_type, msg_content, **msg_kwargs
|
|
533
|
-
)
|
|
534
355
|
else:
|
|
535
356
|
msg = f"Unsupported message type: {type(message)}"
|
|
536
357
|
msg = create_message(message=msg, error_code=ErrorCode.MESSAGE_COERCION_FAILURE)
|
|
@@ -540,7 +361,7 @@ def _convert_to_message_v1(message: MessageLikeRepresentation) -> MessageV1:
|
|
|
540
361
|
|
|
541
362
|
|
|
542
363
|
def convert_to_messages(
|
|
543
|
-
messages:
|
|
364
|
+
messages: Iterable[MessageLikeRepresentation] | PromptValue,
|
|
544
365
|
) -> list[BaseMessage]:
|
|
545
366
|
"""Convert a sequence of messages to a list of messages.
|
|
546
367
|
|
|
@@ -549,34 +370,16 @@ def convert_to_messages(
|
|
|
549
370
|
|
|
550
371
|
Returns:
|
|
551
372
|
list of messages (BaseMessages).
|
|
373
|
+
|
|
552
374
|
"""
|
|
553
375
|
# Import here to avoid circular imports
|
|
554
|
-
from langchain_core.prompt_values import PromptValue
|
|
376
|
+
from langchain_core.prompt_values import PromptValue # noqa: PLC0415
|
|
555
377
|
|
|
556
378
|
if isinstance(messages, PromptValue):
|
|
557
379
|
return messages.to_messages()
|
|
558
380
|
return [_convert_to_message(m) for m in messages]
|
|
559
381
|
|
|
560
382
|
|
|
561
|
-
def convert_to_messages_v1(
|
|
562
|
-
messages: Union[Iterable[MessageLikeRepresentation], PromptValue],
|
|
563
|
-
) -> list[MessageV1]:
|
|
564
|
-
"""Convert a sequence of messages to a list of messages.
|
|
565
|
-
|
|
566
|
-
Args:
|
|
567
|
-
messages: Sequence of messages to convert.
|
|
568
|
-
|
|
569
|
-
Returns:
|
|
570
|
-
list of messages (BaseMessages).
|
|
571
|
-
"""
|
|
572
|
-
# Import here to avoid circular imports
|
|
573
|
-
from langchain_core.prompt_values import PromptValue
|
|
574
|
-
|
|
575
|
-
if isinstance(messages, PromptValue):
|
|
576
|
-
return messages.to_messages(message_version="v1")
|
|
577
|
-
return [_convert_to_message_v1(m) for m in messages]
|
|
578
|
-
|
|
579
|
-
|
|
580
383
|
def _runnable_support(func: Callable) -> Callable:
|
|
581
384
|
@overload
|
|
582
385
|
def wrapped(
|
|
@@ -589,13 +392,14 @@ def _runnable_support(func: Callable) -> Callable:
|
|
|
589
392
|
) -> list[BaseMessage]: ...
|
|
590
393
|
|
|
591
394
|
def wrapped(
|
|
592
|
-
messages:
|
|
395
|
+
messages: Sequence[MessageLikeRepresentation] | None = None,
|
|
593
396
|
**kwargs: Any,
|
|
594
|
-
) ->
|
|
595
|
-
list[BaseMessage]
|
|
596
|
-
Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]]
|
|
597
|
-
|
|
598
|
-
|
|
397
|
+
) -> (
|
|
398
|
+
list[BaseMessage]
|
|
399
|
+
| Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]]
|
|
400
|
+
):
|
|
401
|
+
# Import locally to prevent circular import.
|
|
402
|
+
from langchain_core.runnables.base import RunnableLambda # noqa: PLC0415
|
|
599
403
|
|
|
600
404
|
if messages is not None:
|
|
601
405
|
return func(messages, **kwargs)
|
|
@@ -607,74 +411,89 @@ def _runnable_support(func: Callable) -> Callable:
|
|
|
607
411
|
|
|
608
412
|
@_runnable_support
|
|
609
413
|
def filter_messages(
|
|
610
|
-
messages:
|
|
414
|
+
messages: Iterable[MessageLikeRepresentation] | PromptValue,
|
|
611
415
|
*,
|
|
612
|
-
include_names:
|
|
613
|
-
exclude_names:
|
|
614
|
-
include_types:
|
|
615
|
-
exclude_types:
|
|
616
|
-
include_ids:
|
|
617
|
-
exclude_ids:
|
|
618
|
-
exclude_tool_calls:
|
|
416
|
+
include_names: Sequence[str] | None = None,
|
|
417
|
+
exclude_names: Sequence[str] | None = None,
|
|
418
|
+
include_types: Sequence[str | type[BaseMessage]] | None = None,
|
|
419
|
+
exclude_types: Sequence[str | type[BaseMessage]] | None = None,
|
|
420
|
+
include_ids: Sequence[str] | None = None,
|
|
421
|
+
exclude_ids: Sequence[str] | None = None,
|
|
422
|
+
exclude_tool_calls: Sequence[str] | bool | None = None,
|
|
619
423
|
) -> list[BaseMessage]:
|
|
620
|
-
"""Filter messages based on name
|
|
424
|
+
"""Filter messages based on `name`, `type` or `id`.
|
|
621
425
|
|
|
622
426
|
Args:
|
|
623
427
|
messages: Sequence Message-like objects to filter.
|
|
624
|
-
include_names: Message names to include.
|
|
625
|
-
exclude_names: Messages names to exclude.
|
|
626
|
-
include_types: Message types to include. Can be specified as string names
|
|
627
|
-
|
|
628
|
-
SystemMessage
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
428
|
+
include_names: Message names to include.
|
|
429
|
+
exclude_names: Messages names to exclude.
|
|
430
|
+
include_types: Message types to include. Can be specified as string names
|
|
431
|
+
(e.g. `'system'`, `'human'`, `'ai'`, ...) or as `BaseMessage`
|
|
432
|
+
classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`, ...).
|
|
433
|
+
|
|
434
|
+
exclude_types: Message types to exclude. Can be specified as string names
|
|
435
|
+
(e.g. `'system'`, `'human'`, `'ai'`, ...) or as `BaseMessage`
|
|
436
|
+
classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`, ...).
|
|
437
|
+
|
|
438
|
+
include_ids: Message IDs to include.
|
|
439
|
+
exclude_ids: Message IDs to exclude.
|
|
440
|
+
exclude_tool_calls: Tool call IDs to exclude.
|
|
635
441
|
Can be one of the following:
|
|
636
|
-
- `True`: all
|
|
442
|
+
- `True`: all `AIMessage`s with tool calls and all
|
|
443
|
+
`ToolMessage` objects will be excluded.
|
|
637
444
|
- a sequence of tool call IDs to exclude:
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
445
|
+
- `ToolMessage` objects with the corresponding tool call ID will be
|
|
446
|
+
excluded.
|
|
447
|
+
- The `tool_calls` in the AIMessage will be updated to exclude
|
|
448
|
+
matching tool calls. If all `tool_calls` are filtered from an
|
|
449
|
+
AIMessage, the whole message is excluded.
|
|
641
450
|
|
|
642
451
|
Returns:
|
|
643
|
-
A list of Messages that meets at least one of the incl_
|
|
644
|
-
of the excl_
|
|
452
|
+
A list of Messages that meets at least one of the `incl_*` conditions and none
|
|
453
|
+
of the `excl_*` conditions. If not `incl_*` conditions are specified then
|
|
645
454
|
anything that is not explicitly excluded will be included.
|
|
646
455
|
|
|
647
456
|
Raises:
|
|
648
|
-
ValueError
|
|
457
|
+
ValueError: If two incompatible arguments are provided.
|
|
649
458
|
|
|
650
459
|
Example:
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
AIMessage("steve-o", id="bar", name="example_assistant"),
|
|
659
|
-
HumanMessage("what's your favorite color", id="baz",),
|
|
660
|
-
AIMessage("silicon blue", id="blah",),
|
|
661
|
-
]
|
|
662
|
-
|
|
663
|
-
filter_messages(
|
|
664
|
-
messages,
|
|
665
|
-
incl_names=("example_user", "example_assistant"),
|
|
666
|
-
incl_types=("system",),
|
|
667
|
-
excl_ids=("bar",),
|
|
668
|
-
)
|
|
669
|
-
|
|
670
|
-
.. code-block:: python
|
|
671
|
-
|
|
672
|
-
[
|
|
673
|
-
SystemMessage("you're a good assistant."),
|
|
674
|
-
HumanMessage("what's your name", id="foo", name="example_user"),
|
|
675
|
-
]
|
|
460
|
+
```python
|
|
461
|
+
from langchain_core.messages import (
|
|
462
|
+
filter_messages,
|
|
463
|
+
AIMessage,
|
|
464
|
+
HumanMessage,
|
|
465
|
+
SystemMessage,
|
|
466
|
+
)
|
|
676
467
|
|
|
677
|
-
|
|
468
|
+
messages = [
|
|
469
|
+
SystemMessage("you're a good assistant."),
|
|
470
|
+
HumanMessage("what's your name", id="foo", name="example_user"),
|
|
471
|
+
AIMessage("steve-o", id="bar", name="example_assistant"),
|
|
472
|
+
HumanMessage(
|
|
473
|
+
"what's your favorite color",
|
|
474
|
+
id="baz",
|
|
475
|
+
),
|
|
476
|
+
AIMessage(
|
|
477
|
+
"silicon blue",
|
|
478
|
+
id="blah",
|
|
479
|
+
),
|
|
480
|
+
]
|
|
481
|
+
|
|
482
|
+
filter_messages(
|
|
483
|
+
messages,
|
|
484
|
+
incl_names=("example_user", "example_assistant"),
|
|
485
|
+
incl_types=("system",),
|
|
486
|
+
excl_ids=("bar",),
|
|
487
|
+
)
|
|
488
|
+
```
|
|
489
|
+
|
|
490
|
+
```python
|
|
491
|
+
[
|
|
492
|
+
SystemMessage("you're a good assistant."),
|
|
493
|
+
HumanMessage("what's your name", id="foo", name="example_user"),
|
|
494
|
+
]
|
|
495
|
+
```
|
|
496
|
+
"""
|
|
678
497
|
messages = convert_to_messages(messages)
|
|
679
498
|
filtered: list[BaseMessage] = []
|
|
680
499
|
for msg in messages:
|
|
@@ -736,76 +555,107 @@ def filter_messages(
|
|
|
736
555
|
|
|
737
556
|
@_runnable_support
|
|
738
557
|
def merge_message_runs(
|
|
739
|
-
messages:
|
|
558
|
+
messages: Iterable[MessageLikeRepresentation] | PromptValue,
|
|
740
559
|
*,
|
|
741
560
|
chunk_separator: str = "\n",
|
|
742
561
|
) -> list[BaseMessage]:
|
|
743
562
|
r"""Merge consecutive Messages of the same type.
|
|
744
563
|
|
|
745
|
-
|
|
746
|
-
|
|
564
|
+
!!! note
|
|
565
|
+
`ToolMessage` objects are not merged, as each has a distinct tool call id that
|
|
566
|
+
can't be merged.
|
|
747
567
|
|
|
748
568
|
Args:
|
|
749
569
|
messages: Sequence Message-like objects to merge.
|
|
750
570
|
chunk_separator: Specify the string to be inserted between message chunks.
|
|
751
|
-
Default is "\n".
|
|
752
571
|
|
|
753
572
|
Returns:
|
|
754
573
|
list of BaseMessages with consecutive runs of message types merged into single
|
|
755
574
|
messages. By default, if two messages being merged both have string contents,
|
|
756
|
-
the merged content is a concatenation of the two strings with a new-line
|
|
575
|
+
the merged content is a concatenation of the two strings with a new-line
|
|
576
|
+
separator.
|
|
757
577
|
The separator inserted between message chunks can be controlled by specifying
|
|
758
|
-
any string with
|
|
759
|
-
content blocks, the merged content is a list of content blocks.
|
|
578
|
+
any string with `chunk_separator`. If at least one of the messages has a list
|
|
579
|
+
of content blocks, the merged content is a list of content blocks.
|
|
760
580
|
|
|
761
581
|
Example:
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
)
|
|
582
|
+
```python
|
|
583
|
+
from langchain_core.messages import (
|
|
584
|
+
merge_message_runs,
|
|
585
|
+
AIMessage,
|
|
586
|
+
HumanMessage,
|
|
587
|
+
SystemMessage,
|
|
588
|
+
ToolCall,
|
|
589
|
+
)
|
|
771
590
|
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
591
|
+
messages = [
|
|
592
|
+
SystemMessage("you're a good assistant."),
|
|
593
|
+
HumanMessage(
|
|
594
|
+
"what's your favorite color",
|
|
595
|
+
id="foo",
|
|
596
|
+
),
|
|
597
|
+
HumanMessage(
|
|
598
|
+
"wait your favorite food",
|
|
599
|
+
id="bar",
|
|
600
|
+
),
|
|
601
|
+
AIMessage(
|
|
602
|
+
"my favorite colo",
|
|
603
|
+
tool_calls=[
|
|
604
|
+
ToolCall(
|
|
605
|
+
name="blah_tool", args={"x": 2}, id="123", type="tool_call"
|
|
606
|
+
)
|
|
607
|
+
],
|
|
608
|
+
id="baz",
|
|
609
|
+
),
|
|
610
|
+
AIMessage(
|
|
611
|
+
[{"type": "text", "text": "my favorite dish is lasagna"}],
|
|
612
|
+
tool_calls=[
|
|
613
|
+
ToolCall(
|
|
614
|
+
name="blah_tool",
|
|
615
|
+
args={"x": -10},
|
|
616
|
+
id="456",
|
|
617
|
+
type="tool_call",
|
|
618
|
+
)
|
|
619
|
+
],
|
|
620
|
+
id="blur",
|
|
621
|
+
),
|
|
622
|
+
]
|
|
623
|
+
|
|
624
|
+
merge_message_runs(messages)
|
|
625
|
+
```
|
|
626
|
+
|
|
627
|
+
```python
|
|
628
|
+
[
|
|
629
|
+
SystemMessage("you're a good assistant."),
|
|
630
|
+
HumanMessage(
|
|
631
|
+
"what's your favorite color\\n"
|
|
632
|
+
"wait your favorite food", id="foo",
|
|
633
|
+
),
|
|
634
|
+
AIMessage(
|
|
635
|
+
[
|
|
777
636
|
"my favorite colo",
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
"my favorite colo",
|
|
798
|
-
{"type": "text", "text": "my favorite dish is lasagna"}
|
|
799
|
-
],
|
|
800
|
-
tool_calls=[
|
|
801
|
-
ToolCall({"name": "blah_tool", "args": {"x": 2}, "id": "123", "type": "tool_call"}),
|
|
802
|
-
ToolCall({"name": "blah_tool", "args": {"x": -10}, "id": "456", "type": "tool_call"})
|
|
803
|
-
]
|
|
804
|
-
id="baz"
|
|
805
|
-
),
|
|
806
|
-
]
|
|
637
|
+
{"type": "text", "text": "my favorite dish is lasagna"}
|
|
638
|
+
],
|
|
639
|
+
tool_calls=[
|
|
640
|
+
ToolCall({
|
|
641
|
+
"name": "blah_tool",
|
|
642
|
+
"args": {"x": 2},
|
|
643
|
+
"id": "123",
|
|
644
|
+
"type": "tool_call"
|
|
645
|
+
}),
|
|
646
|
+
ToolCall({
|
|
647
|
+
"name": "blah_tool",
|
|
648
|
+
"args": {"x": -10},
|
|
649
|
+
"id": "456",
|
|
650
|
+
"type": "tool_call"
|
|
651
|
+
})
|
|
652
|
+
]
|
|
653
|
+
id="baz"
|
|
654
|
+
),
|
|
655
|
+
]
|
|
807
656
|
|
|
808
|
-
|
|
657
|
+
```
|
|
658
|
+
"""
|
|
809
659
|
if not messages:
|
|
810
660
|
return []
|
|
811
661
|
messages = convert_to_messages(messages)
|
|
@@ -836,165 +686,161 @@ def merge_message_runs(
|
|
|
836
686
|
# init not at runtime.
|
|
837
687
|
@_runnable_support
|
|
838
688
|
def trim_messages(
|
|
839
|
-
messages:
|
|
689
|
+
messages: Iterable[MessageLikeRepresentation] | PromptValue,
|
|
840
690
|
*,
|
|
841
691
|
max_tokens: int,
|
|
842
|
-
token_counter:
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
BaseLanguageModel,
|
|
846
|
-
],
|
|
692
|
+
token_counter: Callable[[list[BaseMessage]], int]
|
|
693
|
+
| Callable[[BaseMessage], int]
|
|
694
|
+
| BaseLanguageModel,
|
|
847
695
|
strategy: Literal["first", "last"] = "last",
|
|
848
696
|
allow_partial: bool = False,
|
|
849
|
-
end_on:
|
|
850
|
-
|
|
851
|
-
] = None,
|
|
852
|
-
start_on: Optional[
|
|
853
|
-
Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
|
|
854
|
-
] = None,
|
|
697
|
+
end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
|
|
698
|
+
start_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
|
|
855
699
|
include_system: bool = False,
|
|
856
|
-
text_splitter:
|
|
700
|
+
text_splitter: Callable[[str], list[str]] | TextSplitter | None = None,
|
|
857
701
|
) -> list[BaseMessage]:
|
|
858
702
|
r"""Trim messages to be below a token count.
|
|
859
703
|
|
|
860
|
-
trim_messages can be used to reduce the size of a chat history to a specified
|
|
861
|
-
|
|
704
|
+
`trim_messages` can be used to reduce the size of a chat history to a specified
|
|
705
|
+
token or message count.
|
|
862
706
|
|
|
863
707
|
In either case, if passing the trimmed chat history back into a chat model
|
|
864
708
|
directly, the resulting chat history should usually satisfy the following
|
|
865
709
|
properties:
|
|
866
710
|
|
|
867
711
|
1. The resulting chat history should be valid. Most chat models expect that chat
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
Please see the following link for more information about messages:
|
|
873
|
-
https://python.langchain.com/docs/concepts/#messages
|
|
712
|
+
history starts with either (1) a `HumanMessage` or (2) a `SystemMessage`
|
|
713
|
+
followed by a `HumanMessage`. To achieve this, set `start_on='human'`.
|
|
714
|
+
In addition, generally a `ToolMessage` can only appear after an `AIMessage`
|
|
715
|
+
that involved a tool call.
|
|
874
716
|
2. It includes recent messages and drops old messages in the chat history.
|
|
875
|
-
|
|
717
|
+
To achieve this set the `strategy='last'`.
|
|
876
718
|
3. Usually, the new chat history should include the `SystemMessage` if it
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
719
|
+
was present in the original chat history since the `SystemMessage` includes
|
|
720
|
+
special instructions to the chat model. The `SystemMessage` is almost always
|
|
721
|
+
the first message in the history if present. To achieve this set the
|
|
722
|
+
`include_system=True`.
|
|
881
723
|
|
|
882
|
-
|
|
883
|
-
|
|
724
|
+
!!! note
|
|
725
|
+
The examples below show how to configure `trim_messages` to achieve a behavior
|
|
726
|
+
consistent with the above properties.
|
|
884
727
|
|
|
885
728
|
Args:
|
|
886
729
|
messages: Sequence of Message-like objects to trim.
|
|
887
730
|
max_tokens: Max token count of trimmed messages.
|
|
888
|
-
token_counter: Function or llm for counting tokens in a BaseMessage or a
|
|
889
|
-
BaseMessage
|
|
890
|
-
BaseLanguageModel.get_num_tokens_from_messages() will be used.
|
|
731
|
+
token_counter: Function or llm for counting tokens in a `BaseMessage` or a
|
|
732
|
+
list of `BaseMessage`. If a `BaseLanguageModel` is passed in then
|
|
733
|
+
`BaseLanguageModel.get_num_tokens_from_messages()` will be used.
|
|
891
734
|
Set to `len` to count the number of **messages** in the chat history.
|
|
892
735
|
|
|
893
|
-
|
|
894
|
-
Use `count_tokens_approximately` to get fast, approximate token
|
|
736
|
+
!!! note
|
|
737
|
+
Use `count_tokens_approximately` to get fast, approximate token
|
|
738
|
+
counts.
|
|
895
739
|
This is recommended for using `trim_messages` on the hot path, where
|
|
896
740
|
exact token counting is not necessary.
|
|
897
741
|
|
|
898
742
|
strategy: Strategy for trimming.
|
|
899
|
-
-
|
|
900
|
-
-
|
|
901
|
-
Default is "last".
|
|
743
|
+
- `'first'`: Keep the first `<= n_count` tokens of the messages.
|
|
744
|
+
- `'last'`: Keep the last `<= n_count` tokens of the messages.
|
|
902
745
|
allow_partial: Whether to split a message if only part of the message can be
|
|
903
|
-
included. If
|
|
904
|
-
are included. If
|
|
746
|
+
included. If `strategy='last'` then the last partial contents of a message
|
|
747
|
+
are included. If `strategy='first'` then the first partial contents of a
|
|
905
748
|
message are included.
|
|
906
|
-
Default is False.
|
|
907
749
|
end_on: The message type to end on. If specified then every message after the
|
|
908
|
-
last occurrence of this type is ignored. If
|
|
909
|
-
is done before we attempt to get the last
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
AIMessage
|
|
914
|
-
|
|
750
|
+
last occurrence of this type is ignored. If `strategy='last'` then this
|
|
751
|
+
is done before we attempt to get the last `max_tokens`. If
|
|
752
|
+
`strategy='first'` then this is done after we get the first
|
|
753
|
+
`max_tokens`. Can be specified as string names (e.g. `'system'`,
|
|
754
|
+
`'human'`, `'ai'`, ...) or as `BaseMessage` classes (e.g.
|
|
755
|
+
`SystemMessage`, `HumanMessage`, `AIMessage`, ...). Can be a single
|
|
756
|
+
type or a list of types.
|
|
757
|
+
|
|
915
758
|
start_on: The message type to start on. Should only be specified if
|
|
916
|
-
|
|
759
|
+
`strategy='last'`. If specified then every message before
|
|
917
760
|
the first occurrence of this type is ignored. This is done after we trim
|
|
918
|
-
the initial messages to the last
|
|
919
|
-
apply to a SystemMessage at index 0 if
|
|
920
|
-
specified as string names (e.g.
|
|
921
|
-
BaseMessage classes (e.g. SystemMessage
|
|
922
|
-
be a single type or a list of types.
|
|
923
|
-
|
|
924
|
-
include_system: Whether to keep the SystemMessage if there is one at index
|
|
925
|
-
Should only be specified if
|
|
926
|
-
|
|
927
|
-
text_splitter: Function or ``langchain_text_splitters.TextSplitter`` for
|
|
761
|
+
the initial messages to the last `max_tokens`. Does not
|
|
762
|
+
apply to a `SystemMessage` at index 0 if `include_system=True`. Can be
|
|
763
|
+
specified as string names (e.g. `'system'`, `'human'`, `'ai'`, ...) or
|
|
764
|
+
as `BaseMessage` classes (e.g. `SystemMessage`, `HumanMessage`,
|
|
765
|
+
`AIMessage`, ...). Can be a single type or a list of types.
|
|
766
|
+
|
|
767
|
+
include_system: Whether to keep the `SystemMessage` if there is one at index
|
|
768
|
+
`0`. Should only be specified if `strategy="last"`.
|
|
769
|
+
text_splitter: Function or `langchain_text_splitters.TextSplitter` for
|
|
928
770
|
splitting the string contents of a message. Only used if
|
|
929
|
-
|
|
930
|
-
from a partial message will be included. if
|
|
771
|
+
`allow_partial=True`. If `strategy='last'` then the last split tokens
|
|
772
|
+
from a partial message will be included. if `strategy='first'` then the
|
|
931
773
|
first split tokens from a partial message will be included. Token splitter
|
|
932
774
|
assumes that separators are kept, so that split contents can be directly
|
|
933
775
|
concatenated to recreate the original text. Defaults to splitting on
|
|
934
776
|
newlines.
|
|
935
777
|
|
|
936
778
|
Returns:
|
|
937
|
-
|
|
779
|
+
List of trimmed `BaseMessage`.
|
|
938
780
|
|
|
939
781
|
Raises:
|
|
940
782
|
ValueError: if two incompatible arguments are specified or an unrecognized
|
|
941
|
-
|
|
783
|
+
`strategy` is specified.
|
|
942
784
|
|
|
943
785
|
Example:
|
|
944
|
-
Trim chat history based on token count, keeping the SystemMessage if
|
|
945
|
-
present, and ensuring that the chat history starts with a HumanMessage (
|
|
946
|
-
or a SystemMessage followed by a HumanMessage).
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
)
|
|
957
|
-
|
|
958
|
-
messages = [
|
|
959
|
-
SystemMessage("you're a good assistant, you always respond with a joke."),
|
|
960
|
-
HumanMessage("i wonder why it's called langchain"),
|
|
961
|
-
AIMessage(
|
|
962
|
-
'Well, I guess they thought "WordRope" and "SentenceString" just didn\'t have the same ring to it!'
|
|
963
|
-
),
|
|
964
|
-
HumanMessage("and who is harrison chasing anyways"),
|
|
965
|
-
AIMessage(
|
|
966
|
-
"Hmmm let me think.\n\nWhy, he's probably chasing after the last cup of coffee in the office!"
|
|
967
|
-
),
|
|
968
|
-
HumanMessage("what do you call a speechless parrot"),
|
|
969
|
-
]
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
trim_messages(
|
|
973
|
-
messages,
|
|
974
|
-
max_tokens=45,
|
|
975
|
-
strategy="last",
|
|
976
|
-
token_counter=ChatOpenAI(model="gpt-4o"),
|
|
977
|
-
# Most chat models expect that chat history starts with either:
|
|
978
|
-
# (1) a HumanMessage or
|
|
979
|
-
# (2) a SystemMessage followed by a HumanMessage
|
|
980
|
-
start_on="human",
|
|
981
|
-
# Usually, we want to keep the SystemMessage
|
|
982
|
-
# if it's present in the original history.
|
|
983
|
-
# The SystemMessage has special instructions for the model.
|
|
984
|
-
include_system=True,
|
|
985
|
-
allow_partial=False,
|
|
986
|
-
)
|
|
786
|
+
Trim chat history based on token count, keeping the `SystemMessage` if
|
|
787
|
+
present, and ensuring that the chat history starts with a `HumanMessage` (
|
|
788
|
+
or a `SystemMessage` followed by a `HumanMessage`).
|
|
789
|
+
|
|
790
|
+
```python
|
|
791
|
+
from langchain_core.messages import (
|
|
792
|
+
AIMessage,
|
|
793
|
+
HumanMessage,
|
|
794
|
+
BaseMessage,
|
|
795
|
+
SystemMessage,
|
|
796
|
+
trim_messages,
|
|
797
|
+
)
|
|
987
798
|
|
|
988
|
-
|
|
799
|
+
messages = [
|
|
800
|
+
SystemMessage("you're a good assistant, you always respond with a joke."),
|
|
801
|
+
HumanMessage("i wonder why it's called langchain"),
|
|
802
|
+
AIMessage(
|
|
803
|
+
'Well, I guess they thought "WordRope" and "SentenceString" just '
|
|
804
|
+
"didn't have the same ring to it!"
|
|
805
|
+
),
|
|
806
|
+
HumanMessage("and who is harrison chasing anyways"),
|
|
807
|
+
AIMessage(
|
|
808
|
+
"Hmmm let me think.\n\nWhy, he's probably chasing after the last "
|
|
809
|
+
"cup of coffee in the office!"
|
|
810
|
+
),
|
|
811
|
+
HumanMessage("what do you call a speechless parrot"),
|
|
812
|
+
]
|
|
813
|
+
|
|
814
|
+
|
|
815
|
+
trim_messages(
|
|
816
|
+
messages,
|
|
817
|
+
max_tokens=45,
|
|
818
|
+
strategy="last",
|
|
819
|
+
token_counter=ChatOpenAI(model="gpt-4o"),
|
|
820
|
+
# Most chat models expect that chat history starts with either:
|
|
821
|
+
# (1) a HumanMessage or
|
|
822
|
+
# (2) a SystemMessage followed by a HumanMessage
|
|
823
|
+
start_on="human",
|
|
824
|
+
# Usually, we want to keep the SystemMessage
|
|
825
|
+
# if it's present in the original history.
|
|
826
|
+
# The SystemMessage has special instructions for the model.
|
|
827
|
+
include_system=True,
|
|
828
|
+
allow_partial=False,
|
|
829
|
+
)
|
|
830
|
+
```
|
|
989
831
|
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
832
|
+
```python
|
|
833
|
+
[
|
|
834
|
+
SystemMessage(
|
|
835
|
+
content="you're a good assistant, you always respond with a joke."
|
|
836
|
+
),
|
|
837
|
+
HumanMessage(content="what do you call a speechless parrot"),
|
|
838
|
+
]
|
|
839
|
+
```
|
|
994
840
|
|
|
995
|
-
Trim chat history based on the message count, keeping the SystemMessage if
|
|
996
|
-
present, and ensuring that the chat history starts with a HumanMessage (
|
|
997
|
-
or a SystemMessage followed by a HumanMessage).
|
|
841
|
+
Trim chat history based on the message count, keeping the `SystemMessage` if
|
|
842
|
+
present, and ensuring that the chat history starts with a `HumanMessage` (
|
|
843
|
+
or a `SystemMessage` followed by a `HumanMessage`).
|
|
998
844
|
|
|
999
845
|
trim_messages(
|
|
1000
846
|
messages,
|
|
@@ -1016,72 +862,96 @@ def trim_messages(
|
|
|
1016
862
|
allow_partial=False,
|
|
1017
863
|
)
|
|
1018
864
|
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
865
|
+
```python
|
|
866
|
+
[
|
|
867
|
+
SystemMessage(
|
|
868
|
+
content="you're a good assistant, you always respond with a joke."
|
|
869
|
+
),
|
|
870
|
+
HumanMessage(content="and who is harrison chasing anyways"),
|
|
871
|
+
AIMessage(
|
|
872
|
+
content="Hmmm let me think.\n\nWhy, he's probably chasing after "
|
|
873
|
+
"the last cup of coffee in the office!"
|
|
874
|
+
),
|
|
875
|
+
HumanMessage(content="what do you call a speechless parrot"),
|
|
876
|
+
]
|
|
877
|
+
```
|
|
1029
878
|
Trim chat history using a custom token counter function that counts the
|
|
1030
879
|
number of tokens in each message.
|
|
1031
880
|
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
{"type": "text", "text": "This is the FIRST 4 token block."},
|
|
1040
|
-
{"type": "text", "text": "This is the SECOND 4 token block."},
|
|
1041
|
-
],
|
|
1042
|
-
id="second",
|
|
1043
|
-
),
|
|
1044
|
-
HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"),
|
|
1045
|
-
AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"),
|
|
1046
|
-
]
|
|
1047
|
-
|
|
1048
|
-
def dummy_token_counter(messages: list[BaseMessage]) -> int:
|
|
1049
|
-
# treat each message like it adds 3 default tokens at the beginning
|
|
1050
|
-
# of the message and at the end of the message. 3 + 4 + 3 = 10 tokens
|
|
1051
|
-
# per message.
|
|
1052
|
-
|
|
1053
|
-
default_content_len = 4
|
|
1054
|
-
default_msg_prefix_len = 3
|
|
1055
|
-
default_msg_suffix_len = 3
|
|
1056
|
-
|
|
1057
|
-
count = 0
|
|
1058
|
-
for msg in messages:
|
|
1059
|
-
if isinstance(msg.content, str):
|
|
1060
|
-
count += default_msg_prefix_len + default_content_len + default_msg_suffix_len
|
|
1061
|
-
if isinstance(msg.content, list):
|
|
1062
|
-
count += default_msg_prefix_len + len(msg.content) * default_content_len + default_msg_suffix_len
|
|
1063
|
-
return count
|
|
1064
|
-
|
|
1065
|
-
First 30 tokens, allowing partial messages:
|
|
1066
|
-
.. code-block:: python
|
|
1067
|
-
|
|
1068
|
-
trim_messages(
|
|
1069
|
-
messages,
|
|
1070
|
-
max_tokens=30,
|
|
1071
|
-
token_counter=dummy_token_counter,
|
|
1072
|
-
strategy="first",
|
|
1073
|
-
allow_partial=True,
|
|
1074
|
-
)
|
|
1075
|
-
|
|
1076
|
-
.. code-block:: python
|
|
1077
|
-
|
|
881
|
+
```python
|
|
882
|
+
messages = [
|
|
883
|
+
SystemMessage("This is a 4 token text. The full message is 10 tokens."),
|
|
884
|
+
HumanMessage(
|
|
885
|
+
"This is a 4 token text. The full message is 10 tokens.", id="first"
|
|
886
|
+
),
|
|
887
|
+
AIMessage(
|
|
1078
888
|
[
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
889
|
+
{"type": "text", "text": "This is the FIRST 4 token block."},
|
|
890
|
+
{"type": "text", "text": "This is the SECOND 4 token block."},
|
|
891
|
+
],
|
|
892
|
+
id="second",
|
|
893
|
+
),
|
|
894
|
+
HumanMessage(
|
|
895
|
+
"This is a 4 token text. The full message is 10 tokens.", id="third"
|
|
896
|
+
),
|
|
897
|
+
AIMessage(
|
|
898
|
+
"This is a 4 token text. The full message is 10 tokens.",
|
|
899
|
+
id="fourth",
|
|
900
|
+
),
|
|
901
|
+
]
|
|
902
|
+
|
|
903
|
+
|
|
904
|
+
def dummy_token_counter(messages: list[BaseMessage]) -> int:
|
|
905
|
+
# treat each message like it adds 3 default tokens at the beginning
|
|
906
|
+
# of the message and at the end of the message. 3 + 4 + 3 = 10 tokens
|
|
907
|
+
# per message.
|
|
908
|
+
|
|
909
|
+
default_content_len = 4
|
|
910
|
+
default_msg_prefix_len = 3
|
|
911
|
+
default_msg_suffix_len = 3
|
|
912
|
+
|
|
913
|
+
count = 0
|
|
914
|
+
for msg in messages:
|
|
915
|
+
if isinstance(msg.content, str):
|
|
916
|
+
count += (
|
|
917
|
+
default_msg_prefix_len
|
|
918
|
+
+ default_content_len
|
|
919
|
+
+ default_msg_suffix_len
|
|
920
|
+
)
|
|
921
|
+
if isinstance(msg.content, list):
|
|
922
|
+
count += (
|
|
923
|
+
default_msg_prefix_len
|
|
924
|
+
+ len(msg.content) * default_content_len
|
|
925
|
+
+ default_msg_suffix_len
|
|
926
|
+
)
|
|
927
|
+
return count
|
|
928
|
+
```
|
|
1083
929
|
|
|
1084
|
-
|
|
930
|
+
First 30 tokens, allowing partial messages:
|
|
931
|
+
```python
|
|
932
|
+
trim_messages(
|
|
933
|
+
messages,
|
|
934
|
+
max_tokens=30,
|
|
935
|
+
token_counter=dummy_token_counter,
|
|
936
|
+
strategy="first",
|
|
937
|
+
allow_partial=True,
|
|
938
|
+
)
|
|
939
|
+
```
|
|
940
|
+
|
|
941
|
+
```python
|
|
942
|
+
[
|
|
943
|
+
SystemMessage("This is a 4 token text. The full message is 10 tokens."),
|
|
944
|
+
HumanMessage(
|
|
945
|
+
"This is a 4 token text. The full message is 10 tokens.",
|
|
946
|
+
id="first",
|
|
947
|
+
),
|
|
948
|
+
AIMessage(
|
|
949
|
+
[{"type": "text", "text": "This is the FIRST 4 token block."}],
|
|
950
|
+
id="second",
|
|
951
|
+
),
|
|
952
|
+
]
|
|
953
|
+
```
|
|
954
|
+
"""
|
|
1085
955
|
# Validate arguments
|
|
1086
956
|
if start_on and strategy == "first":
|
|
1087
957
|
msg = "start_on parameter is only valid with strategy='last'"
|
|
@@ -1112,17 +982,12 @@ def trim_messages(
|
|
|
1112
982
|
)
|
|
1113
983
|
raise ValueError(msg)
|
|
1114
984
|
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
text_splitter_fn
|
|
985
|
+
if _HAS_LANGCHAIN_TEXT_SPLITTERS and isinstance(text_splitter, TextSplitter):
|
|
986
|
+
text_splitter_fn = text_splitter.split_text
|
|
987
|
+
elif text_splitter:
|
|
988
|
+
text_splitter_fn = cast("Callable", text_splitter)
|
|
1119
989
|
else:
|
|
1120
|
-
|
|
1121
|
-
text_splitter_fn = text_splitter.split_text
|
|
1122
|
-
else:
|
|
1123
|
-
text_splitter_fn = text_splitter
|
|
1124
|
-
|
|
1125
|
-
text_splitter_fn = text_splitter_fn or _default_text_splitter
|
|
990
|
+
text_splitter_fn = _default_text_splitter
|
|
1126
991
|
|
|
1127
992
|
if strategy == "first":
|
|
1128
993
|
return _first_max_tokens(
|
|
@@ -1149,65 +1014,90 @@ def trim_messages(
|
|
|
1149
1014
|
|
|
1150
1015
|
|
|
1151
1016
|
def convert_to_openai_messages(
|
|
1152
|
-
messages:
|
|
1017
|
+
messages: MessageLikeRepresentation | Sequence[MessageLikeRepresentation],
|
|
1153
1018
|
*,
|
|
1154
1019
|
text_format: Literal["string", "block"] = "string",
|
|
1155
|
-
|
|
1020
|
+
include_id: bool = False,
|
|
1021
|
+
) -> dict | list[dict]:
|
|
1156
1022
|
"""Convert LangChain messages into OpenAI message dicts.
|
|
1157
1023
|
|
|
1158
1024
|
Args:
|
|
1159
1025
|
messages: Message-like object or iterable of objects whose contents are
|
|
1160
1026
|
in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
|
|
1161
1027
|
text_format: How to format string or text block contents:
|
|
1162
|
-
|
|
1163
|
-
- "string":
|
|
1028
|
+
- `'string'`:
|
|
1164
1029
|
If a message has a string content, this is left as a string. If
|
|
1165
|
-
a message has content blocks that are all of type 'text'
|
|
1166
|
-
joined with a newline to make a single string. If a message has
|
|
1167
|
-
content blocks and at least one isn't of type 'text'
|
|
1030
|
+
a message has content blocks that are all of type `'text'`, these
|
|
1031
|
+
are joined with a newline to make a single string. If a message has
|
|
1032
|
+
content blocks and at least one isn't of type `'text'`, then
|
|
1168
1033
|
all blocks are left as dicts.
|
|
1169
|
-
-
|
|
1034
|
+
- `'block'`:
|
|
1170
1035
|
If a message has a string content, this is turned into a list
|
|
1171
|
-
with a single content block of type 'text'
|
|
1172
|
-
blocks these are left as is.
|
|
1036
|
+
with a single content block of type `'text'`. If a message has
|
|
1037
|
+
content blocks these are left as is.
|
|
1038
|
+
include_id: Whether to include message ids in the openai messages, if they
|
|
1039
|
+
are present in the source messages.
|
|
1040
|
+
|
|
1041
|
+
Raises:
|
|
1042
|
+
ValueError: if an unrecognized `text_format` is specified, or if a message
|
|
1043
|
+
content block is missing expected keys.
|
|
1173
1044
|
|
|
1174
1045
|
Returns:
|
|
1175
1046
|
The return type depends on the input type:
|
|
1176
|
-
- dict:
|
|
1177
|
-
If a single message-like object is passed in, a single OpenAI message
|
|
1178
|
-
dict is returned.
|
|
1179
|
-
- list[dict]:
|
|
1180
|
-
If a sequence of message-like objects are passed in, a list of OpenAI
|
|
1181
|
-
message dicts is returned.
|
|
1182
|
-
|
|
1183
|
-
Example:
|
|
1184
1047
|
|
|
1185
|
-
|
|
1048
|
+
- dict:
|
|
1049
|
+
If a single message-like object is passed in, a single OpenAI message
|
|
1050
|
+
dict is returned.
|
|
1051
|
+
- list[dict]:
|
|
1052
|
+
If a sequence of message-like objects are passed in, a list of OpenAI
|
|
1053
|
+
message dicts is returned.
|
|
1186
1054
|
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1055
|
+
Example:
|
|
1056
|
+
```python
|
|
1057
|
+
from langchain_core.messages import (
|
|
1058
|
+
convert_to_openai_messages,
|
|
1059
|
+
AIMessage,
|
|
1060
|
+
SystemMessage,
|
|
1061
|
+
ToolMessage,
|
|
1062
|
+
)
|
|
1193
1063
|
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1064
|
+
messages = [
|
|
1065
|
+
SystemMessage([{"type": "text", "text": "foo"}]),
|
|
1066
|
+
{
|
|
1067
|
+
"role": "user",
|
|
1068
|
+
"content": [
|
|
1069
|
+
{"type": "text", "text": "whats in this"},
|
|
1070
|
+
{
|
|
1071
|
+
"type": "image_url",
|
|
1072
|
+
"image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},
|
|
1073
|
+
},
|
|
1074
|
+
],
|
|
1075
|
+
},
|
|
1076
|
+
AIMessage(
|
|
1077
|
+
"",
|
|
1078
|
+
tool_calls=[
|
|
1079
|
+
{
|
|
1080
|
+
"name": "analyze",
|
|
1081
|
+
"args": {"baz": "buz"},
|
|
1082
|
+
"id": "1",
|
|
1083
|
+
"type": "tool_call",
|
|
1084
|
+
}
|
|
1085
|
+
],
|
|
1086
|
+
),
|
|
1087
|
+
ToolMessage("foobar", tool_call_id="1", name="bar"),
|
|
1088
|
+
{"role": "assistant", "content": "thats nice"},
|
|
1089
|
+
]
|
|
1090
|
+
oai_messages = convert_to_openai_messages(messages)
|
|
1091
|
+
# -> [
|
|
1092
|
+
# {'role': 'system', 'content': 'foo'},
|
|
1093
|
+
# {'role': 'user', 'content': [{'type': 'text', 'text': 'whats in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
|
|
1094
|
+
# {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': '1','function': {'name': 'analyze', 'arguments': '{"baz": "buz"}'}}], 'content': ''},
|
|
1095
|
+
# {'role': 'tool', 'name': 'bar', 'content': 'foobar'},
|
|
1096
|
+
# {'role': 'assistant', 'content': 'thats nice'}
|
|
1097
|
+
# ]
|
|
1098
|
+
```
|
|
1099
|
+
|
|
1100
|
+
!!! version-added "Added in version 0.3.11"
|
|
1211
1101
|
|
|
1212
1102
|
""" # noqa: E501
|
|
1213
1103
|
if text_format not in {"string", "block"}:
|
|
@@ -1216,16 +1106,15 @@ def convert_to_openai_messages(
|
|
|
1216
1106
|
|
|
1217
1107
|
oai_messages: list = []
|
|
1218
1108
|
|
|
1219
|
-
if is_single := isinstance(messages, (BaseMessage, dict, str
|
|
1109
|
+
if is_single := isinstance(messages, (BaseMessage, dict, str)):
|
|
1220
1110
|
messages = [messages]
|
|
1221
1111
|
|
|
1222
|
-
|
|
1223
|
-
messages = convert_to_messages(messages) # type: ignore[arg-type]
|
|
1112
|
+
messages = convert_to_messages(messages)
|
|
1224
1113
|
|
|
1225
1114
|
for i, message in enumerate(messages):
|
|
1226
1115
|
oai_msg: dict = {"role": _get_message_openai_role(message)}
|
|
1227
1116
|
tool_messages: list = []
|
|
1228
|
-
content:
|
|
1117
|
+
content: str | list[dict]
|
|
1229
1118
|
|
|
1230
1119
|
if message.name:
|
|
1231
1120
|
oai_msg["name"] = message.name
|
|
@@ -1235,6 +1124,8 @@ def convert_to_openai_messages(
|
|
|
1235
1124
|
oai_msg["refusal"] = message.additional_kwargs["refusal"]
|
|
1236
1125
|
if isinstance(message, ToolMessage):
|
|
1237
1126
|
oai_msg["tool_call_id"] = message.tool_call_id
|
|
1127
|
+
if include_id and message.id:
|
|
1128
|
+
oai_msg["id"] = message.id
|
|
1238
1129
|
|
|
1239
1130
|
if not message.content:
|
|
1240
1131
|
content = "" if text_format == "string" else []
|
|
@@ -1475,7 +1366,7 @@ def convert_to_openai_messages(
|
|
|
1475
1366
|
},
|
|
1476
1367
|
}
|
|
1477
1368
|
)
|
|
1478
|
-
elif block.get("type")
|
|
1369
|
+
elif block.get("type") in ["thinking", "reasoning"]:
|
|
1479
1370
|
content.append(block)
|
|
1480
1371
|
else:
|
|
1481
1372
|
err = (
|
|
@@ -1506,10 +1397,8 @@ def _first_max_tokens(
|
|
|
1506
1397
|
max_tokens: int,
|
|
1507
1398
|
token_counter: Callable[[list[BaseMessage]], int],
|
|
1508
1399
|
text_splitter: Callable[[str], list[str]],
|
|
1509
|
-
partial_strategy:
|
|
1510
|
-
end_on:
|
|
1511
|
-
Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
|
|
1512
|
-
] = None,
|
|
1400
|
+
partial_strategy: Literal["first", "last"] | None = None,
|
|
1401
|
+
end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
|
|
1513
1402
|
) -> list[BaseMessage]:
|
|
1514
1403
|
messages = list(messages)
|
|
1515
1404
|
if not messages:
|
|
@@ -1626,12 +1515,8 @@ def _last_max_tokens(
|
|
|
1626
1515
|
text_splitter: Callable[[str], list[str]],
|
|
1627
1516
|
allow_partial: bool = False,
|
|
1628
1517
|
include_system: bool = False,
|
|
1629
|
-
start_on:
|
|
1630
|
-
|
|
1631
|
-
] = None,
|
|
1632
|
-
end_on: Optional[
|
|
1633
|
-
Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
|
|
1634
|
-
] = None,
|
|
1518
|
+
start_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
|
|
1519
|
+
end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
|
|
1635
1520
|
) -> list[BaseMessage]:
|
|
1636
1521
|
messages = list(messages)
|
|
1637
1522
|
if len(messages) == 0:
|
|
@@ -1707,11 +1592,15 @@ def _msg_to_chunk(message: BaseMessage) -> BaseMessageChunk:
|
|
|
1707
1592
|
def _chunk_to_msg(chunk: BaseMessageChunk) -> BaseMessage:
|
|
1708
1593
|
if chunk.__class__ in _CHUNK_MSG_MAP:
|
|
1709
1594
|
return _CHUNK_MSG_MAP[chunk.__class__](
|
|
1710
|
-
**chunk.model_dump(exclude={"type", "tool_call_chunks"})
|
|
1595
|
+
**chunk.model_dump(exclude={"type", "tool_call_chunks", "chunk_position"})
|
|
1711
1596
|
)
|
|
1712
1597
|
for chunk_cls, msg_cls in _CHUNK_MSG_MAP.items():
|
|
1713
1598
|
if isinstance(chunk, chunk_cls):
|
|
1714
|
-
return msg_cls(
|
|
1599
|
+
return msg_cls(
|
|
1600
|
+
**chunk.model_dump(
|
|
1601
|
+
exclude={"type", "tool_call_chunks", "chunk_position"}
|
|
1602
|
+
)
|
|
1603
|
+
)
|
|
1715
1604
|
|
|
1716
1605
|
msg = (
|
|
1717
1606
|
f"Unrecognized message chunk class {chunk.__class__}. Supported classes are "
|
|
@@ -1728,7 +1617,7 @@ def _default_text_splitter(text: str) -> list[str]:
|
|
|
1728
1617
|
|
|
1729
1618
|
def _is_message_type(
|
|
1730
1619
|
message: BaseMessage,
|
|
1731
|
-
type_:
|
|
1620
|
+
type_: str | type[BaseMessage] | Sequence[str | type[BaseMessage]],
|
|
1732
1621
|
) -> bool:
|
|
1733
1622
|
types = [type_] if isinstance(type_, (str, type)) else type_
|
|
1734
1623
|
types_str = [t for t in types if isinstance(t, str)]
|
|
@@ -1788,28 +1677,28 @@ def count_tokens_approximately(
|
|
|
1788
1677
|
Args:
|
|
1789
1678
|
messages: List of messages to count tokens for.
|
|
1790
1679
|
chars_per_token: Number of characters per token to use for the approximation.
|
|
1791
|
-
|
|
1792
|
-
You can also specify float values for more fine-grained control.
|
|
1793
|
-
See more here
|
|
1794
|
-
extra_tokens_per_message: Number of extra tokens to add per message.
|
|
1795
|
-
|
|
1796
|
-
You can also specify float values for more fine-grained control.
|
|
1797
|
-
See more here
|
|
1798
|
-
https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
|
1680
|
+
One token corresponds to ~4 chars for common English text.
|
|
1681
|
+
You can also specify `float` values for more fine-grained control.
|
|
1682
|
+
[See more here](https://platform.openai.com/tokenizer).
|
|
1683
|
+
extra_tokens_per_message: Number of extra tokens to add per message, e.g.
|
|
1684
|
+
special tokens, including beginning/end of message.
|
|
1685
|
+
You can also specify `float` values for more fine-grained control.
|
|
1686
|
+
[See more here](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb).
|
|
1799
1687
|
count_name: Whether to include message names in the count.
|
|
1800
1688
|
Enabled by default.
|
|
1801
1689
|
|
|
1802
1690
|
Returns:
|
|
1803
1691
|
Approximate number of tokens in the messages.
|
|
1804
1692
|
|
|
1805
|
-
|
|
1806
|
-
This is a simple approximation that may not match the exact token count
|
|
1807
|
-
|
|
1693
|
+
!!! note
|
|
1694
|
+
This is a simple approximation that may not match the exact token count used by
|
|
1695
|
+
specific models. For accurate counts, use model-specific tokenizers.
|
|
1808
1696
|
|
|
1809
1697
|
Warning:
|
|
1810
1698
|
This function does not currently support counting image tokens.
|
|
1811
1699
|
|
|
1812
|
-
|
|
1700
|
+
!!! version-added "Added in version 0.3.46"
|
|
1701
|
+
|
|
1813
1702
|
"""
|
|
1814
1703
|
token_count = 0.0
|
|
1815
1704
|
for message in convert_to_messages(messages):
|