langchain-core 1.0.0a6__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_core/__init__.py +1 -1
- langchain_core/_api/__init__.py +3 -4
- langchain_core/_api/beta_decorator.py +23 -26
- langchain_core/_api/deprecation.py +51 -64
- langchain_core/_api/path.py +3 -6
- langchain_core/_import_utils.py +3 -4
- langchain_core/agents.py +55 -48
- langchain_core/caches.py +65 -66
- langchain_core/callbacks/__init__.py +1 -8
- langchain_core/callbacks/base.py +321 -336
- langchain_core/callbacks/file.py +44 -44
- langchain_core/callbacks/manager.py +454 -514
- langchain_core/callbacks/stdout.py +29 -30
- langchain_core/callbacks/streaming_stdout.py +32 -32
- langchain_core/callbacks/usage.py +60 -57
- langchain_core/chat_history.py +53 -68
- langchain_core/document_loaders/base.py +27 -25
- langchain_core/document_loaders/blob_loaders.py +1 -1
- langchain_core/document_loaders/langsmith.py +44 -48
- langchain_core/documents/__init__.py +23 -3
- langchain_core/documents/base.py +102 -94
- langchain_core/documents/compressor.py +10 -10
- langchain_core/documents/transformers.py +34 -35
- langchain_core/embeddings/fake.py +50 -54
- langchain_core/example_selectors/length_based.py +2 -2
- langchain_core/example_selectors/semantic_similarity.py +28 -32
- langchain_core/exceptions.py +21 -20
- langchain_core/globals.py +3 -151
- langchain_core/indexing/__init__.py +1 -1
- langchain_core/indexing/api.py +121 -126
- langchain_core/indexing/base.py +73 -75
- langchain_core/indexing/in_memory.py +4 -6
- langchain_core/language_models/__init__.py +14 -29
- langchain_core/language_models/_utils.py +58 -61
- langchain_core/language_models/base.py +82 -172
- langchain_core/language_models/chat_models.py +329 -402
- langchain_core/language_models/fake.py +11 -11
- langchain_core/language_models/fake_chat_models.py +42 -36
- langchain_core/language_models/llms.py +189 -269
- langchain_core/load/dump.py +9 -12
- langchain_core/load/load.py +18 -28
- langchain_core/load/mapping.py +2 -4
- langchain_core/load/serializable.py +42 -40
- langchain_core/messages/__init__.py +10 -16
- langchain_core/messages/ai.py +148 -148
- langchain_core/messages/base.py +53 -51
- langchain_core/messages/block_translators/__init__.py +19 -22
- langchain_core/messages/block_translators/anthropic.py +6 -6
- langchain_core/messages/block_translators/bedrock_converse.py +5 -5
- langchain_core/messages/block_translators/google_genai.py +10 -7
- langchain_core/messages/block_translators/google_vertexai.py +4 -32
- langchain_core/messages/block_translators/groq.py +117 -21
- langchain_core/messages/block_translators/langchain_v0.py +5 -5
- langchain_core/messages/block_translators/openai.py +11 -11
- langchain_core/messages/chat.py +2 -6
- langchain_core/messages/content.py +339 -330
- langchain_core/messages/function.py +6 -10
- langchain_core/messages/human.py +24 -31
- langchain_core/messages/modifier.py +2 -2
- langchain_core/messages/system.py +19 -29
- langchain_core/messages/tool.py +74 -90
- langchain_core/messages/utils.py +484 -510
- langchain_core/output_parsers/__init__.py +13 -10
- langchain_core/output_parsers/base.py +61 -61
- langchain_core/output_parsers/format_instructions.py +9 -4
- langchain_core/output_parsers/json.py +12 -10
- langchain_core/output_parsers/list.py +21 -23
- langchain_core/output_parsers/openai_functions.py +49 -47
- langchain_core/output_parsers/openai_tools.py +30 -23
- langchain_core/output_parsers/pydantic.py +13 -14
- langchain_core/output_parsers/string.py +5 -5
- langchain_core/output_parsers/transform.py +15 -17
- langchain_core/output_parsers/xml.py +35 -34
- langchain_core/outputs/__init__.py +1 -1
- langchain_core/outputs/chat_generation.py +18 -18
- langchain_core/outputs/chat_result.py +1 -3
- langchain_core/outputs/generation.py +16 -16
- langchain_core/outputs/llm_result.py +10 -10
- langchain_core/prompt_values.py +13 -19
- langchain_core/prompts/__init__.py +3 -27
- langchain_core/prompts/base.py +81 -86
- langchain_core/prompts/chat.py +308 -351
- langchain_core/prompts/dict.py +6 -6
- langchain_core/prompts/few_shot.py +81 -88
- langchain_core/prompts/few_shot_with_templates.py +11 -13
- langchain_core/prompts/image.py +12 -14
- langchain_core/prompts/loading.py +4 -6
- langchain_core/prompts/message.py +7 -7
- langchain_core/prompts/prompt.py +24 -39
- langchain_core/prompts/string.py +26 -10
- langchain_core/prompts/structured.py +49 -53
- langchain_core/rate_limiters.py +51 -60
- langchain_core/retrievers.py +61 -198
- langchain_core/runnables/base.py +1551 -1656
- langchain_core/runnables/branch.py +68 -70
- langchain_core/runnables/config.py +72 -89
- langchain_core/runnables/configurable.py +145 -161
- langchain_core/runnables/fallbacks.py +102 -96
- langchain_core/runnables/graph.py +91 -97
- langchain_core/runnables/graph_ascii.py +27 -28
- langchain_core/runnables/graph_mermaid.py +42 -51
- langchain_core/runnables/graph_png.py +43 -16
- langchain_core/runnables/history.py +175 -177
- langchain_core/runnables/passthrough.py +151 -167
- langchain_core/runnables/retry.py +46 -51
- langchain_core/runnables/router.py +30 -35
- langchain_core/runnables/schema.py +75 -80
- langchain_core/runnables/utils.py +60 -67
- langchain_core/stores.py +85 -121
- langchain_core/structured_query.py +8 -8
- langchain_core/sys_info.py +29 -29
- langchain_core/tools/__init__.py +1 -14
- langchain_core/tools/base.py +306 -245
- langchain_core/tools/convert.py +160 -155
- langchain_core/tools/render.py +10 -10
- langchain_core/tools/retriever.py +12 -11
- langchain_core/tools/simple.py +19 -24
- langchain_core/tools/structured.py +32 -39
- langchain_core/tracers/__init__.py +1 -9
- langchain_core/tracers/base.py +97 -99
- langchain_core/tracers/context.py +29 -52
- langchain_core/tracers/core.py +49 -53
- langchain_core/tracers/evaluation.py +11 -11
- langchain_core/tracers/event_stream.py +65 -64
- langchain_core/tracers/langchain.py +21 -21
- langchain_core/tracers/log_stream.py +45 -45
- langchain_core/tracers/memory_stream.py +3 -3
- langchain_core/tracers/root_listeners.py +16 -16
- langchain_core/tracers/run_collector.py +2 -4
- langchain_core/tracers/schemas.py +0 -129
- langchain_core/tracers/stdout.py +3 -3
- langchain_core/utils/__init__.py +1 -4
- langchain_core/utils/_merge.py +2 -2
- langchain_core/utils/aiter.py +57 -61
- langchain_core/utils/env.py +9 -9
- langchain_core/utils/function_calling.py +94 -188
- langchain_core/utils/html.py +7 -8
- langchain_core/utils/input.py +9 -6
- langchain_core/utils/interactive_env.py +1 -1
- langchain_core/utils/iter.py +36 -40
- langchain_core/utils/json.py +4 -3
- langchain_core/utils/json_schema.py +9 -9
- langchain_core/utils/mustache.py +8 -10
- langchain_core/utils/pydantic.py +35 -37
- langchain_core/utils/strings.py +6 -9
- langchain_core/utils/usage.py +1 -1
- langchain_core/utils/utils.py +66 -62
- langchain_core/vectorstores/base.py +182 -216
- langchain_core/vectorstores/in_memory.py +101 -176
- langchain_core/vectorstores/utils.py +5 -5
- langchain_core/version.py +1 -1
- langchain_core-1.0.4.dist-info/METADATA +69 -0
- langchain_core-1.0.4.dist-info/RECORD +172 -0
- {langchain_core-1.0.0a6.dist-info → langchain_core-1.0.4.dist-info}/WHEEL +1 -1
- langchain_core/memory.py +0 -120
- langchain_core/messages/block_translators/ollama.py +0 -47
- langchain_core/prompts/pipeline.py +0 -138
- langchain_core/pydantic_v1/__init__.py +0 -30
- langchain_core/pydantic_v1/dataclasses.py +0 -23
- langchain_core/pydantic_v1/main.py +0 -23
- langchain_core/tracers/langchain_v1.py +0 -31
- langchain_core/utils/loading.py +0 -35
- langchain_core-1.0.0a6.dist-info/METADATA +0 -67
- langchain_core-1.0.0a6.dist-info/RECORD +0 -181
- langchain_core-1.0.0a6.dist-info/entry_points.txt +0 -4
langchain_core/messages/utils.py
CHANGED
|
@@ -5,7 +5,6 @@ Some examples of what you can do with these functions include:
|
|
|
5
5
|
* Convert messages to strings (serialization)
|
|
6
6
|
* Convert messages from dicts to Message objects (deserialization)
|
|
7
7
|
* Filter messages from a list of messages based on name, type or id etc.
|
|
8
|
-
|
|
9
8
|
"""
|
|
10
9
|
|
|
11
10
|
from __future__ import annotations
|
|
@@ -15,16 +14,13 @@ import inspect
|
|
|
15
14
|
import json
|
|
16
15
|
import logging
|
|
17
16
|
import math
|
|
18
|
-
from collections.abc import Iterable, Sequence
|
|
17
|
+
from collections.abc import Callable, Iterable, Sequence
|
|
19
18
|
from functools import partial
|
|
20
19
|
from typing import (
|
|
21
20
|
TYPE_CHECKING,
|
|
22
21
|
Annotated,
|
|
23
22
|
Any,
|
|
24
|
-
Callable,
|
|
25
23
|
Literal,
|
|
26
|
-
Optional,
|
|
27
|
-
Union,
|
|
28
24
|
cast,
|
|
29
25
|
overload,
|
|
30
26
|
)
|
|
@@ -76,22 +72,21 @@ def _get_type(v: Any) -> str:
|
|
|
76
72
|
|
|
77
73
|
|
|
78
74
|
AnyMessage = Annotated[
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
|
|
92
|
-
],
|
|
75
|
+
Annotated[AIMessage, Tag(tag="ai")]
|
|
76
|
+
| Annotated[HumanMessage, Tag(tag="human")]
|
|
77
|
+
| Annotated[ChatMessage, Tag(tag="chat")]
|
|
78
|
+
| Annotated[SystemMessage, Tag(tag="system")]
|
|
79
|
+
| Annotated[FunctionMessage, Tag(tag="function")]
|
|
80
|
+
| Annotated[ToolMessage, Tag(tag="tool")]
|
|
81
|
+
| Annotated[AIMessageChunk, Tag(tag="AIMessageChunk")]
|
|
82
|
+
| Annotated[HumanMessageChunk, Tag(tag="HumanMessageChunk")]
|
|
83
|
+
| Annotated[ChatMessageChunk, Tag(tag="ChatMessageChunk")]
|
|
84
|
+
| Annotated[SystemMessageChunk, Tag(tag="SystemMessageChunk")]
|
|
85
|
+
| Annotated[FunctionMessageChunk, Tag(tag="FunctionMessageChunk")]
|
|
86
|
+
| Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
|
|
93
87
|
Field(discriminator=Discriminator(_get_type)),
|
|
94
88
|
]
|
|
89
|
+
"""A type representing any defined `Message` or `MessageChunk` type."""
|
|
95
90
|
|
|
96
91
|
|
|
97
92
|
def get_buffer_string(
|
|
@@ -101,10 +96,8 @@ def get_buffer_string(
|
|
|
101
96
|
|
|
102
97
|
Args:
|
|
103
98
|
messages: Messages to be converted to strings.
|
|
104
|
-
human_prefix: The prefix to prepend to contents of
|
|
105
|
-
|
|
106
|
-
ai_prefix: The prefix to prepend to contents of ``AIMessage``. Default is
|
|
107
|
-
``'AI'``.
|
|
99
|
+
human_prefix: The prefix to prepend to contents of `HumanMessage`s.
|
|
100
|
+
ai_prefix: The prefix to prepend to contents of `AIMessage`.
|
|
108
101
|
|
|
109
102
|
Returns:
|
|
110
103
|
A single string concatenation of all input messages.
|
|
@@ -113,17 +106,16 @@ def get_buffer_string(
|
|
|
113
106
|
ValueError: If an unsupported message type is encountered.
|
|
114
107
|
|
|
115
108
|
Example:
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
109
|
+
```python
|
|
110
|
+
from langchain_core import AIMessage, HumanMessage
|
|
111
|
+
|
|
112
|
+
messages = [
|
|
113
|
+
HumanMessage(content="Hi, how are you?"),
|
|
114
|
+
AIMessage(content="Good, how are you?"),
|
|
115
|
+
]
|
|
116
|
+
get_buffer_string(messages)
|
|
117
|
+
# -> "Human: Hi, how are you?\nAI: Good, how are you?"
|
|
118
|
+
```
|
|
127
119
|
"""
|
|
128
120
|
string_messages = []
|
|
129
121
|
for m in messages:
|
|
@@ -183,7 +175,7 @@ def _message_from_dict(message: dict) -> BaseMessage:
|
|
|
183
175
|
|
|
184
176
|
|
|
185
177
|
def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]:
|
|
186
|
-
"""Convert a sequence of messages from dicts to
|
|
178
|
+
"""Convert a sequence of messages from dicts to `Message` objects.
|
|
187
179
|
|
|
188
180
|
Args:
|
|
189
181
|
messages: Sequence of messages (as dicts) to convert.
|
|
@@ -196,7 +188,7 @@ def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]:
|
|
|
196
188
|
|
|
197
189
|
|
|
198
190
|
def message_chunk_to_message(chunk: BaseMessage) -> BaseMessage:
|
|
199
|
-
"""Convert a message chunk to a
|
|
191
|
+
"""Convert a message chunk to a `Message`.
|
|
200
192
|
|
|
201
193
|
Args:
|
|
202
194
|
chunk: Message chunk to convert.
|
|
@@ -215,38 +207,39 @@ def message_chunk_to_message(chunk: BaseMessage) -> BaseMessage:
|
|
|
215
207
|
)
|
|
216
208
|
|
|
217
209
|
|
|
218
|
-
MessageLikeRepresentation =
|
|
219
|
-
BaseMessage
|
|
220
|
-
|
|
210
|
+
MessageLikeRepresentation = (
|
|
211
|
+
BaseMessage | list[str] | tuple[str, str] | str | dict[str, Any]
|
|
212
|
+
)
|
|
213
|
+
"""A type representing the various ways a message can be represented."""
|
|
221
214
|
|
|
222
215
|
|
|
223
216
|
def _create_message_from_message_type(
|
|
224
217
|
message_type: str,
|
|
225
218
|
content: str,
|
|
226
|
-
name:
|
|
227
|
-
tool_call_id:
|
|
228
|
-
tool_calls:
|
|
229
|
-
id:
|
|
219
|
+
name: str | None = None,
|
|
220
|
+
tool_call_id: str | None = None,
|
|
221
|
+
tool_calls: list[dict[str, Any]] | None = None,
|
|
222
|
+
id: str | None = None,
|
|
230
223
|
**additional_kwargs: Any,
|
|
231
224
|
) -> BaseMessage:
|
|
232
|
-
"""Create a message from a
|
|
225
|
+
"""Create a message from a `Message` type and content string.
|
|
233
226
|
|
|
234
227
|
Args:
|
|
235
|
-
message_type: (str) the type of the message (e.g.,
|
|
228
|
+
message_type: (str) the type of the message (e.g., `'human'`, `'ai'`, etc.).
|
|
236
229
|
content: (str) the content string.
|
|
237
|
-
name: (str) the name of the message.
|
|
238
|
-
tool_call_id: (str) the tool call id.
|
|
239
|
-
tool_calls: (list[dict[str, Any]]) the tool calls.
|
|
240
|
-
id: (str) the id of the message.
|
|
230
|
+
name: (str) the name of the message.
|
|
231
|
+
tool_call_id: (str) the tool call id.
|
|
232
|
+
tool_calls: (list[dict[str, Any]]) the tool calls.
|
|
233
|
+
id: (str) the id of the message.
|
|
241
234
|
additional_kwargs: (dict[str, Any]) additional keyword arguments.
|
|
242
235
|
|
|
243
236
|
Returns:
|
|
244
237
|
a message of the appropriate type.
|
|
245
238
|
|
|
246
239
|
Raises:
|
|
247
|
-
ValueError: if the message type is not one of
|
|
248
|
-
|
|
249
|
-
|
|
240
|
+
ValueError: if the message type is not one of `'human'`, `'user'`, `'ai'`,
|
|
241
|
+
`'assistant'`, `'function'`, `'tool'`, `'system'`, or
|
|
242
|
+
`'developer'`.
|
|
250
243
|
"""
|
|
251
244
|
kwargs: dict[str, Any] = {}
|
|
252
245
|
if name is not None:
|
|
@@ -312,21 +305,21 @@ def _create_message_from_message_type(
|
|
|
312
305
|
|
|
313
306
|
|
|
314
307
|
def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
|
|
315
|
-
"""Instantiate a
|
|
308
|
+
"""Instantiate a `Message` from a variety of message formats.
|
|
316
309
|
|
|
317
310
|
The message format can be one of the following:
|
|
318
311
|
|
|
319
|
-
-
|
|
320
|
-
-
|
|
321
|
-
- 2-tuple of (role string, template); e.g., (
|
|
312
|
+
- `BaseMessagePromptTemplate`
|
|
313
|
+
- `BaseMessage`
|
|
314
|
+
- 2-tuple of (role string, template); e.g., (`'human'`, `'{user_input}'`)
|
|
322
315
|
- dict: a message dict with role and content keys
|
|
323
|
-
- string: shorthand for (
|
|
316
|
+
- string: shorthand for (`'human'`, template); e.g., `'{user_input}'`
|
|
324
317
|
|
|
325
318
|
Args:
|
|
326
319
|
message: a representation of a message in one of the supported formats.
|
|
327
320
|
|
|
328
321
|
Returns:
|
|
329
|
-
|
|
322
|
+
An instance of a message or a message template.
|
|
330
323
|
|
|
331
324
|
Raises:
|
|
332
325
|
NotImplementedError: if the message type is not supported.
|
|
@@ -335,12 +328,16 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
|
|
|
335
328
|
"""
|
|
336
329
|
if isinstance(message, BaseMessage):
|
|
337
330
|
message_ = message
|
|
338
|
-
elif isinstance(message,
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
331
|
+
elif isinstance(message, Sequence):
|
|
332
|
+
if isinstance(message, str):
|
|
333
|
+
message_ = _create_message_from_message_type("human", message)
|
|
334
|
+
else:
|
|
335
|
+
try:
|
|
336
|
+
message_type_str, template = message
|
|
337
|
+
except ValueError as e:
|
|
338
|
+
msg = "Message as a sequence must be (role string, template)"
|
|
339
|
+
raise NotImplementedError(msg) from e
|
|
340
|
+
message_ = _create_message_from_message_type(message_type_str, template)
|
|
344
341
|
elif isinstance(message, dict):
|
|
345
342
|
msg_kwargs = message.copy()
|
|
346
343
|
try:
|
|
@@ -368,7 +365,7 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
|
|
|
368
365
|
|
|
369
366
|
|
|
370
367
|
def convert_to_messages(
|
|
371
|
-
messages:
|
|
368
|
+
messages: Iterable[MessageLikeRepresentation] | PromptValue,
|
|
372
369
|
) -> list[BaseMessage]:
|
|
373
370
|
"""Convert a sequence of messages to a list of messages.
|
|
374
371
|
|
|
@@ -399,12 +396,12 @@ def _runnable_support(func: Callable) -> Callable:
|
|
|
399
396
|
) -> list[BaseMessage]: ...
|
|
400
397
|
|
|
401
398
|
def wrapped(
|
|
402
|
-
messages:
|
|
399
|
+
messages: Sequence[MessageLikeRepresentation] | None = None,
|
|
403
400
|
**kwargs: Any,
|
|
404
|
-
) ->
|
|
405
|
-
list[BaseMessage]
|
|
406
|
-
Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]]
|
|
407
|
-
|
|
401
|
+
) -> (
|
|
402
|
+
list[BaseMessage]
|
|
403
|
+
| Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]]
|
|
404
|
+
):
|
|
408
405
|
# Import locally to prevent circular import.
|
|
409
406
|
from langchain_core.runnables.base import RunnableLambda # noqa: PLC0415
|
|
410
407
|
|
|
@@ -418,89 +415,88 @@ def _runnable_support(func: Callable) -> Callable:
|
|
|
418
415
|
|
|
419
416
|
@_runnable_support
|
|
420
417
|
def filter_messages(
|
|
421
|
-
messages:
|
|
418
|
+
messages: Iterable[MessageLikeRepresentation] | PromptValue,
|
|
422
419
|
*,
|
|
423
|
-
include_names:
|
|
424
|
-
exclude_names:
|
|
425
|
-
include_types:
|
|
426
|
-
exclude_types:
|
|
427
|
-
include_ids:
|
|
428
|
-
exclude_ids:
|
|
429
|
-
exclude_tool_calls:
|
|
420
|
+
include_names: Sequence[str] | None = None,
|
|
421
|
+
exclude_names: Sequence[str] | None = None,
|
|
422
|
+
include_types: Sequence[str | type[BaseMessage]] | None = None,
|
|
423
|
+
exclude_types: Sequence[str | type[BaseMessage]] | None = None,
|
|
424
|
+
include_ids: Sequence[str] | None = None,
|
|
425
|
+
exclude_ids: Sequence[str] | None = None,
|
|
426
|
+
exclude_tool_calls: Sequence[str] | bool | None = None,
|
|
430
427
|
) -> list[BaseMessage]:
|
|
431
|
-
"""Filter messages based on
|
|
428
|
+
"""Filter messages based on `name`, `type` or `id`.
|
|
432
429
|
|
|
433
430
|
Args:
|
|
434
431
|
messages: Sequence Message-like objects to filter.
|
|
435
|
-
include_names: Message names to include.
|
|
436
|
-
exclude_names: Messages names to exclude.
|
|
432
|
+
include_names: Message names to include.
|
|
433
|
+
exclude_names: Messages names to exclude.
|
|
437
434
|
include_types: Message types to include. Can be specified as string names
|
|
438
|
-
(e.g.
|
|
439
|
-
classes (e.g.
|
|
440
|
-
|
|
435
|
+
(e.g. `'system'`, `'human'`, `'ai'`, ...) or as `BaseMessage`
|
|
436
|
+
classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`, ...).
|
|
437
|
+
|
|
441
438
|
exclude_types: Message types to exclude. Can be specified as string names
|
|
442
|
-
(e.g.
|
|
443
|
-
classes (e.g.
|
|
444
|
-
|
|
445
|
-
include_ids: Message IDs to include.
|
|
446
|
-
exclude_ids: Message IDs to exclude.
|
|
447
|
-
exclude_tool_calls: Tool call IDs to exclude.
|
|
439
|
+
(e.g. `'system'`, `'human'`, `'ai'`, ...) or as `BaseMessage`
|
|
440
|
+
classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`, ...).
|
|
441
|
+
|
|
442
|
+
include_ids: Message IDs to include.
|
|
443
|
+
exclude_ids: Message IDs to exclude.
|
|
444
|
+
exclude_tool_calls: Tool call IDs to exclude.
|
|
448
445
|
Can be one of the following:
|
|
449
|
-
-
|
|
450
|
-
|
|
446
|
+
- `True`: All `AIMessage` objects with tool calls and all `ToolMessage`
|
|
447
|
+
objects will be excluded.
|
|
451
448
|
- a sequence of tool call IDs to exclude:
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
449
|
+
- `ToolMessage` objects with the corresponding tool call ID will be
|
|
450
|
+
excluded.
|
|
451
|
+
- The `tool_calls` in the AIMessage will be updated to exclude
|
|
452
|
+
matching tool calls. If all `tool_calls` are filtered from an
|
|
453
|
+
AIMessage, the whole message is excluded.
|
|
457
454
|
|
|
458
455
|
Returns:
|
|
459
|
-
A list of Messages that meets at least one of the
|
|
460
|
-
of the
|
|
456
|
+
A list of Messages that meets at least one of the `incl_*` conditions and none
|
|
457
|
+
of the `excl_*` conditions. If not `incl_*` conditions are specified then
|
|
461
458
|
anything that is not explicitly excluded will be included.
|
|
462
459
|
|
|
463
460
|
Raises:
|
|
464
|
-
ValueError
|
|
461
|
+
ValueError: If two incompatible arguments are provided.
|
|
465
462
|
|
|
466
463
|
Example:
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
)
|
|
475
|
-
|
|
476
|
-
messages = [
|
|
477
|
-
SystemMessage("you're a good assistant."),
|
|
478
|
-
HumanMessage("what's your name", id="foo", name="example_user"),
|
|
479
|
-
AIMessage("steve-o", id="bar", name="example_assistant"),
|
|
480
|
-
HumanMessage(
|
|
481
|
-
"what's your favorite color",
|
|
482
|
-
id="baz",
|
|
483
|
-
),
|
|
484
|
-
AIMessage(
|
|
485
|
-
"silicon blue",
|
|
486
|
-
id="blah",
|
|
487
|
-
),
|
|
488
|
-
]
|
|
489
|
-
|
|
490
|
-
filter_messages(
|
|
491
|
-
messages,
|
|
492
|
-
incl_names=("example_user", "example_assistant"),
|
|
493
|
-
incl_types=("system",),
|
|
494
|
-
excl_ids=("bar",),
|
|
495
|
-
)
|
|
496
|
-
|
|
497
|
-
.. code-block:: python
|
|
498
|
-
|
|
499
|
-
[
|
|
500
|
-
SystemMessage("you're a good assistant."),
|
|
501
|
-
HumanMessage("what's your name", id="foo", name="example_user"),
|
|
502
|
-
]
|
|
464
|
+
```python
|
|
465
|
+
from langchain_core.messages import (
|
|
466
|
+
filter_messages,
|
|
467
|
+
AIMessage,
|
|
468
|
+
HumanMessage,
|
|
469
|
+
SystemMessage,
|
|
470
|
+
)
|
|
503
471
|
|
|
472
|
+
messages = [
|
|
473
|
+
SystemMessage("you're a good assistant."),
|
|
474
|
+
HumanMessage("what's your name", id="foo", name="example_user"),
|
|
475
|
+
AIMessage("steve-o", id="bar", name="example_assistant"),
|
|
476
|
+
HumanMessage(
|
|
477
|
+
"what's your favorite color",
|
|
478
|
+
id="baz",
|
|
479
|
+
),
|
|
480
|
+
AIMessage(
|
|
481
|
+
"silicon blue",
|
|
482
|
+
id="blah",
|
|
483
|
+
),
|
|
484
|
+
]
|
|
485
|
+
|
|
486
|
+
filter_messages(
|
|
487
|
+
messages,
|
|
488
|
+
incl_names=("example_user", "example_assistant"),
|
|
489
|
+
incl_types=("system",),
|
|
490
|
+
excl_ids=("bar",),
|
|
491
|
+
)
|
|
492
|
+
```
|
|
493
|
+
|
|
494
|
+
```python
|
|
495
|
+
[
|
|
496
|
+
SystemMessage("you're a good assistant."),
|
|
497
|
+
HumanMessage("what's your name", id="foo", name="example_user"),
|
|
498
|
+
]
|
|
499
|
+
```
|
|
504
500
|
"""
|
|
505
501
|
messages = convert_to_messages(messages)
|
|
506
502
|
filtered: list[BaseMessage] = []
|
|
@@ -563,20 +559,19 @@ def filter_messages(
|
|
|
563
559
|
|
|
564
560
|
@_runnable_support
|
|
565
561
|
def merge_message_runs(
|
|
566
|
-
messages:
|
|
562
|
+
messages: Iterable[MessageLikeRepresentation] | PromptValue,
|
|
567
563
|
*,
|
|
568
564
|
chunk_separator: str = "\n",
|
|
569
565
|
) -> list[BaseMessage]:
|
|
570
566
|
r"""Merge consecutive Messages of the same type.
|
|
571
567
|
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
merged.
|
|
568
|
+
!!! note
|
|
569
|
+
`ToolMessage` objects are not merged, as each has a distinct tool call id that
|
|
570
|
+
can't be merged.
|
|
575
571
|
|
|
576
572
|
Args:
|
|
577
573
|
messages: Sequence Message-like objects to merge.
|
|
578
574
|
chunk_separator: Specify the string to be inserted between message chunks.
|
|
579
|
-
Default is ``'\n'``.
|
|
580
575
|
|
|
581
576
|
Returns:
|
|
582
577
|
list of BaseMessages with consecutive runs of message types merged into single
|
|
@@ -584,87 +579,86 @@ def merge_message_runs(
|
|
|
584
579
|
the merged content is a concatenation of the two strings with a new-line
|
|
585
580
|
separator.
|
|
586
581
|
The separator inserted between message chunks can be controlled by specifying
|
|
587
|
-
any string with
|
|
582
|
+
any string with `chunk_separator`. If at least one of the messages has a list
|
|
588
583
|
of content blocks, the merged content is a list of content blocks.
|
|
589
584
|
|
|
590
585
|
Example:
|
|
586
|
+
```python
|
|
587
|
+
from langchain_core.messages import (
|
|
588
|
+
merge_message_runs,
|
|
589
|
+
AIMessage,
|
|
590
|
+
HumanMessage,
|
|
591
|
+
SystemMessage,
|
|
592
|
+
ToolCall,
|
|
593
|
+
)
|
|
591
594
|
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
595
|
+
messages = [
|
|
596
|
+
SystemMessage("you're a good assistant."),
|
|
597
|
+
HumanMessage(
|
|
598
|
+
"what's your favorite color",
|
|
599
|
+
id="foo",
|
|
600
|
+
),
|
|
601
|
+
HumanMessage(
|
|
602
|
+
"wait your favorite food",
|
|
603
|
+
id="bar",
|
|
604
|
+
),
|
|
605
|
+
AIMessage(
|
|
606
|
+
"my favorite colo",
|
|
607
|
+
tool_calls=[
|
|
608
|
+
ToolCall(
|
|
609
|
+
name="blah_tool", args={"x": 2}, id="123", type="tool_call"
|
|
610
|
+
)
|
|
611
|
+
],
|
|
612
|
+
id="baz",
|
|
613
|
+
),
|
|
614
|
+
AIMessage(
|
|
615
|
+
[{"type": "text", "text": "my favorite dish is lasagna"}],
|
|
616
|
+
tool_calls=[
|
|
617
|
+
ToolCall(
|
|
618
|
+
name="blah_tool",
|
|
619
|
+
args={"x": -10},
|
|
620
|
+
id="456",
|
|
621
|
+
type="tool_call",
|
|
622
|
+
)
|
|
623
|
+
],
|
|
624
|
+
id="blur",
|
|
625
|
+
),
|
|
626
|
+
]
|
|
627
|
+
|
|
628
|
+
merge_message_runs(messages)
|
|
629
|
+
```
|
|
630
|
+
|
|
631
|
+
```python
|
|
632
|
+
[
|
|
633
|
+
SystemMessage("you're a good assistant."),
|
|
634
|
+
HumanMessage(
|
|
635
|
+
"what's your favorite color\\n"
|
|
636
|
+
"wait your favorite food", id="foo",
|
|
637
|
+
),
|
|
638
|
+
AIMessage(
|
|
639
|
+
[
|
|
613
640
|
"my favorite colo",
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
]
|
|
634
|
-
|
|
635
|
-
merge_message_runs(messages)
|
|
636
|
-
|
|
637
|
-
.. code-block:: python
|
|
638
|
-
|
|
639
|
-
[
|
|
640
|
-
SystemMessage("you're a good assistant."),
|
|
641
|
-
HumanMessage(
|
|
642
|
-
"what's your favorite color\\n"
|
|
643
|
-
"wait your favorite food", id="foo",
|
|
644
|
-
),
|
|
645
|
-
AIMessage(
|
|
646
|
-
[
|
|
647
|
-
"my favorite colo",
|
|
648
|
-
{"type": "text", "text": "my favorite dish is lasagna"}
|
|
649
|
-
],
|
|
650
|
-
tool_calls=[
|
|
651
|
-
ToolCall({
|
|
652
|
-
"name": "blah_tool",
|
|
653
|
-
"args": {"x": 2},
|
|
654
|
-
"id": "123",
|
|
655
|
-
"type": "tool_call"
|
|
656
|
-
}),
|
|
657
|
-
ToolCall({
|
|
658
|
-
"name": "blah_tool",
|
|
659
|
-
"args": {"x": -10},
|
|
660
|
-
"id": "456",
|
|
661
|
-
"type": "tool_call"
|
|
662
|
-
})
|
|
663
|
-
]
|
|
664
|
-
id="baz"
|
|
665
|
-
),
|
|
666
|
-
]
|
|
641
|
+
{"type": "text", "text": "my favorite dish is lasagna"}
|
|
642
|
+
],
|
|
643
|
+
tool_calls=[
|
|
644
|
+
ToolCall({
|
|
645
|
+
"name": "blah_tool",
|
|
646
|
+
"args": {"x": 2},
|
|
647
|
+
"id": "123",
|
|
648
|
+
"type": "tool_call"
|
|
649
|
+
}),
|
|
650
|
+
ToolCall({
|
|
651
|
+
"name": "blah_tool",
|
|
652
|
+
"args": {"x": -10},
|
|
653
|
+
"id": "456",
|
|
654
|
+
"type": "tool_call"
|
|
655
|
+
})
|
|
656
|
+
]
|
|
657
|
+
id="baz"
|
|
658
|
+
),
|
|
659
|
+
]
|
|
667
660
|
|
|
661
|
+
```
|
|
668
662
|
"""
|
|
669
663
|
if not messages:
|
|
670
664
|
return []
|
|
@@ -696,174 +690,161 @@ def merge_message_runs(
|
|
|
696
690
|
# init not at runtime.
|
|
697
691
|
@_runnable_support
|
|
698
692
|
def trim_messages(
|
|
699
|
-
messages:
|
|
693
|
+
messages: Iterable[MessageLikeRepresentation] | PromptValue,
|
|
700
694
|
*,
|
|
701
695
|
max_tokens: int,
|
|
702
|
-
token_counter:
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
BaseLanguageModel,
|
|
706
|
-
],
|
|
696
|
+
token_counter: Callable[[list[BaseMessage]], int]
|
|
697
|
+
| Callable[[BaseMessage], int]
|
|
698
|
+
| BaseLanguageModel,
|
|
707
699
|
strategy: Literal["first", "last"] = "last",
|
|
708
700
|
allow_partial: bool = False,
|
|
709
|
-
end_on:
|
|
710
|
-
|
|
711
|
-
] = None,
|
|
712
|
-
start_on: Optional[
|
|
713
|
-
Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
|
|
714
|
-
] = None,
|
|
701
|
+
end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
|
|
702
|
+
start_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
|
|
715
703
|
include_system: bool = False,
|
|
716
|
-
text_splitter:
|
|
704
|
+
text_splitter: Callable[[str], list[str]] | TextSplitter | None = None,
|
|
717
705
|
) -> list[BaseMessage]:
|
|
718
706
|
r"""Trim messages to be below a token count.
|
|
719
707
|
|
|
720
|
-
|
|
721
|
-
token
|
|
708
|
+
`trim_messages` can be used to reduce the size of a chat history to a specified
|
|
709
|
+
token or message count.
|
|
722
710
|
|
|
723
711
|
In either case, if passing the trimmed chat history back into a chat model
|
|
724
712
|
directly, the resulting chat history should usually satisfy the following
|
|
725
713
|
properties:
|
|
726
714
|
|
|
727
715
|
1. The resulting chat history should be valid. Most chat models expect that chat
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
Please see the following link for more information about messages:
|
|
733
|
-
https://python.langchain.com/docs/concepts/#messages
|
|
716
|
+
history starts with either (1) a `HumanMessage` or (2) a `SystemMessage`
|
|
717
|
+
followed by a `HumanMessage`. To achieve this, set `start_on='human'`.
|
|
718
|
+
In addition, generally a `ToolMessage` can only appear after an `AIMessage`
|
|
719
|
+
that involved a tool call.
|
|
734
720
|
2. It includes recent messages and drops old messages in the chat history.
|
|
735
|
-
|
|
736
|
-
3. Usually, the new chat history should include the
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
The examples below show how to configure
|
|
721
|
+
To achieve this set the `strategy='last'`.
|
|
722
|
+
3. Usually, the new chat history should include the `SystemMessage` if it
|
|
723
|
+
was present in the original chat history since the `SystemMessage` includes
|
|
724
|
+
special instructions to the chat model. The `SystemMessage` is almost always
|
|
725
|
+
the first message in the history if present. To achieve this set the
|
|
726
|
+
`include_system=True`.
|
|
727
|
+
|
|
728
|
+
!!! note
|
|
729
|
+
The examples below show how to configure `trim_messages` to achieve a behavior
|
|
744
730
|
consistent with the above properties.
|
|
745
731
|
|
|
746
732
|
Args:
|
|
747
733
|
messages: Sequence of Message-like objects to trim.
|
|
748
734
|
max_tokens: Max token count of trimmed messages.
|
|
749
|
-
token_counter: Function or llm for counting tokens in a
|
|
750
|
-
list of
|
|
751
|
-
|
|
752
|
-
Set to
|
|
735
|
+
token_counter: Function or llm for counting tokens in a `BaseMessage` or a
|
|
736
|
+
list of `BaseMessage`. If a `BaseLanguageModel` is passed in then
|
|
737
|
+
`BaseLanguageModel.get_num_tokens_from_messages()` will be used.
|
|
738
|
+
Set to `len` to count the number of **messages** in the chat history.
|
|
753
739
|
|
|
754
|
-
|
|
755
|
-
Use
|
|
740
|
+
!!! note
|
|
741
|
+
Use `count_tokens_approximately` to get fast, approximate token
|
|
756
742
|
counts.
|
|
757
|
-
This is recommended for using
|
|
743
|
+
This is recommended for using `trim_messages` on the hot path, where
|
|
758
744
|
exact token counting is not necessary.
|
|
759
745
|
|
|
760
746
|
strategy: Strategy for trimming.
|
|
761
|
-
-
|
|
762
|
-
-
|
|
763
|
-
Default is ``'last'``.
|
|
747
|
+
- `'first'`: Keep the first `<= n_count` tokens of the messages.
|
|
748
|
+
- `'last'`: Keep the last `<= n_count` tokens of the messages.
|
|
764
749
|
allow_partial: Whether to split a message if only part of the message can be
|
|
765
|
-
included. If
|
|
766
|
-
are included. If
|
|
750
|
+
included. If `strategy='last'` then the last partial contents of a message
|
|
751
|
+
are included. If `strategy='first'` then the first partial contents of a
|
|
767
752
|
message are included.
|
|
768
|
-
Default is False.
|
|
769
753
|
end_on: The message type to end on. If specified then every message after the
|
|
770
|
-
last occurrence of this type is ignored. If
|
|
771
|
-
is done before we attempt to get the last
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
754
|
+
last occurrence of this type is ignored. If `strategy='last'` then this
|
|
755
|
+
is done before we attempt to get the last `max_tokens`. If
|
|
756
|
+
`strategy='first'` then this is done after we get the first
|
|
757
|
+
`max_tokens`. Can be specified as string names (e.g. `'system'`,
|
|
758
|
+
`'human'`, `'ai'`, ...) or as `BaseMessage` classes (e.g.
|
|
759
|
+
`SystemMessage`, `HumanMessage`, `AIMessage`, ...). Can be a single
|
|
776
760
|
type or a list of types.
|
|
777
|
-
|
|
761
|
+
|
|
778
762
|
start_on: The message type to start on. Should only be specified if
|
|
779
|
-
|
|
763
|
+
`strategy='last'`. If specified then every message before
|
|
780
764
|
the first occurrence of this type is ignored. This is done after we trim
|
|
781
|
-
the initial messages to the last
|
|
782
|
-
apply to a
|
|
783
|
-
specified as string names (e.g.
|
|
784
|
-
as
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
include_system: Whether to keep the SystemMessage if there is one at index
|
|
788
|
-
Should only be specified if
|
|
789
|
-
|
|
790
|
-
text_splitter: Function or ``langchain_text_splitters.TextSplitter`` for
|
|
765
|
+
the initial messages to the last `max_tokens`. Does not
|
|
766
|
+
apply to a `SystemMessage` at index 0 if `include_system=True`. Can be
|
|
767
|
+
specified as string names (e.g. `'system'`, `'human'`, `'ai'`, ...) or
|
|
768
|
+
as `BaseMessage` classes (e.g. `SystemMessage`, `HumanMessage`,
|
|
769
|
+
`AIMessage`, ...). Can be a single type or a list of types.
|
|
770
|
+
|
|
771
|
+
include_system: Whether to keep the `SystemMessage` if there is one at index
|
|
772
|
+
`0`. Should only be specified if `strategy="last"`.
|
|
773
|
+
text_splitter: Function or `langchain_text_splitters.TextSplitter` for
|
|
791
774
|
splitting the string contents of a message. Only used if
|
|
792
|
-
|
|
793
|
-
from a partial message will be included. if
|
|
775
|
+
`allow_partial=True`. If `strategy='last'` then the last split tokens
|
|
776
|
+
from a partial message will be included. if `strategy='first'` then the
|
|
794
777
|
first split tokens from a partial message will be included. Token splitter
|
|
795
778
|
assumes that separators are kept, so that split contents can be directly
|
|
796
779
|
concatenated to recreate the original text. Defaults to splitting on
|
|
797
780
|
newlines.
|
|
798
781
|
|
|
799
782
|
Returns:
|
|
800
|
-
|
|
783
|
+
List of trimmed `BaseMessage`.
|
|
801
784
|
|
|
802
785
|
Raises:
|
|
803
786
|
ValueError: if two incompatible arguments are specified or an unrecognized
|
|
804
|
-
|
|
787
|
+
`strategy` is specified.
|
|
805
788
|
|
|
806
789
|
Example:
|
|
807
|
-
Trim chat history based on token count, keeping the
|
|
808
|
-
present, and ensuring that the chat history starts with a
|
|
809
|
-
or a
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
)
|
|
820
|
-
|
|
821
|
-
messages = [
|
|
822
|
-
SystemMessage(
|
|
823
|
-
"you're a good assistant, you always respond with a joke."
|
|
824
|
-
),
|
|
825
|
-
HumanMessage("i wonder why it's called langchain"),
|
|
826
|
-
AIMessage(
|
|
827
|
-
'Well, I guess they thought "WordRope" and "SentenceString" just '
|
|
828
|
-
"didn't have the same ring to it!"
|
|
829
|
-
),
|
|
830
|
-
HumanMessage("and who is harrison chasing anyways"),
|
|
831
|
-
AIMessage(
|
|
832
|
-
"Hmmm let me think.\n\nWhy, he's probably chasing after the last "
|
|
833
|
-
"cup of coffee in the office!"
|
|
834
|
-
),
|
|
835
|
-
HumanMessage("what do you call a speechless parrot"),
|
|
836
|
-
]
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
trim_messages(
|
|
840
|
-
messages,
|
|
841
|
-
max_tokens=45,
|
|
842
|
-
strategy="last",
|
|
843
|
-
token_counter=ChatOpenAI(model="gpt-4o"),
|
|
844
|
-
# Most chat models expect that chat history starts with either:
|
|
845
|
-
# (1) a HumanMessage or
|
|
846
|
-
# (2) a SystemMessage followed by a HumanMessage
|
|
847
|
-
start_on="human",
|
|
848
|
-
# Usually, we want to keep the SystemMessage
|
|
849
|
-
# if it's present in the original history.
|
|
850
|
-
# The SystemMessage has special instructions for the model.
|
|
851
|
-
include_system=True,
|
|
852
|
-
allow_partial=False,
|
|
853
|
-
)
|
|
790
|
+
Trim chat history based on token count, keeping the `SystemMessage` if
|
|
791
|
+
present, and ensuring that the chat history starts with a `HumanMessage` (
|
|
792
|
+
or a `SystemMessage` followed by a `HumanMessage`).
|
|
793
|
+
|
|
794
|
+
```python
|
|
795
|
+
from langchain_core.messages import (
|
|
796
|
+
AIMessage,
|
|
797
|
+
HumanMessage,
|
|
798
|
+
BaseMessage,
|
|
799
|
+
SystemMessage,
|
|
800
|
+
trim_messages,
|
|
801
|
+
)
|
|
854
802
|
|
|
855
|
-
|
|
803
|
+
messages = [
|
|
804
|
+
SystemMessage("you're a good assistant, you always respond with a joke."),
|
|
805
|
+
HumanMessage("i wonder why it's called langchain"),
|
|
806
|
+
AIMessage(
|
|
807
|
+
'Well, I guess they thought "WordRope" and "SentenceString" just '
|
|
808
|
+
"didn't have the same ring to it!"
|
|
809
|
+
),
|
|
810
|
+
HumanMessage("and who is harrison chasing anyways"),
|
|
811
|
+
AIMessage(
|
|
812
|
+
"Hmmm let me think.\n\nWhy, he's probably chasing after the last "
|
|
813
|
+
"cup of coffee in the office!"
|
|
814
|
+
),
|
|
815
|
+
HumanMessage("what do you call a speechless parrot"),
|
|
816
|
+
]
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
trim_messages(
|
|
820
|
+
messages,
|
|
821
|
+
max_tokens=45,
|
|
822
|
+
strategy="last",
|
|
823
|
+
token_counter=ChatOpenAI(model="gpt-4o"),
|
|
824
|
+
# Most chat models expect that chat history starts with either:
|
|
825
|
+
# (1) a HumanMessage or
|
|
826
|
+
# (2) a SystemMessage followed by a HumanMessage
|
|
827
|
+
start_on="human",
|
|
828
|
+
# Usually, we want to keep the SystemMessage
|
|
829
|
+
# if it's present in the original history.
|
|
830
|
+
# The SystemMessage has special instructions for the model.
|
|
831
|
+
include_system=True,
|
|
832
|
+
allow_partial=False,
|
|
833
|
+
)
|
|
834
|
+
```
|
|
856
835
|
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
836
|
+
```python
|
|
837
|
+
[
|
|
838
|
+
SystemMessage(
|
|
839
|
+
content="you're a good assistant, you always respond with a joke."
|
|
840
|
+
),
|
|
841
|
+
HumanMessage(content="what do you call a speechless parrot"),
|
|
842
|
+
]
|
|
843
|
+
```
|
|
863
844
|
|
|
864
|
-
Trim chat history based on the message count, keeping the
|
|
865
|
-
present, and ensuring that the chat history starts with a
|
|
866
|
-
or a
|
|
845
|
+
Trim chat history based on the message count, keeping the `SystemMessage` if
|
|
846
|
+
present, and ensuring that the chat history starts with a `HumanMessage` (
|
|
847
|
+
or a `SystemMessage` followed by a `HumanMessage`).
|
|
867
848
|
|
|
868
849
|
trim_messages(
|
|
869
850
|
messages,
|
|
@@ -885,100 +866,95 @@ def trim_messages(
|
|
|
885
866
|
allow_partial=False,
|
|
886
867
|
)
|
|
887
868
|
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
869
|
+
```python
|
|
870
|
+
[
|
|
871
|
+
SystemMessage(
|
|
872
|
+
content="you're a good assistant, you always respond with a joke."
|
|
873
|
+
),
|
|
874
|
+
HumanMessage(content="and who is harrison chasing anyways"),
|
|
875
|
+
AIMessage(
|
|
876
|
+
content="Hmmm let me think.\n\nWhy, he's probably chasing after "
|
|
877
|
+
"the last cup of coffee in the office!"
|
|
878
|
+
),
|
|
879
|
+
HumanMessage(content="what do you call a speechless parrot"),
|
|
880
|
+
]
|
|
881
|
+
```
|
|
903
882
|
Trim chat history using a custom token counter function that counts the
|
|
904
883
|
number of tokens in each message.
|
|
905
884
|
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
AIMessage(
|
|
914
|
-
[
|
|
915
|
-
{"type": "text", "text": "This is the FIRST 4 token block."},
|
|
916
|
-
{"type": "text", "text": "This is the SECOND 4 token block."},
|
|
917
|
-
],
|
|
918
|
-
id="second",
|
|
919
|
-
),
|
|
920
|
-
HumanMessage(
|
|
921
|
-
"This is a 4 token text. The full message is 10 tokens.", id="third"
|
|
922
|
-
),
|
|
923
|
-
AIMessage(
|
|
924
|
-
"This is a 4 token text. The full message is 10 tokens.",
|
|
925
|
-
id="fourth",
|
|
926
|
-
),
|
|
927
|
-
]
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
def dummy_token_counter(messages: list[BaseMessage]) -> int:
|
|
931
|
-
# treat each message like it adds 3 default tokens at the beginning
|
|
932
|
-
# of the message and at the end of the message. 3 + 4 + 3 = 10 tokens
|
|
933
|
-
# per message.
|
|
934
|
-
|
|
935
|
-
default_content_len = 4
|
|
936
|
-
default_msg_prefix_len = 3
|
|
937
|
-
default_msg_suffix_len = 3
|
|
938
|
-
|
|
939
|
-
count = 0
|
|
940
|
-
for msg in messages:
|
|
941
|
-
if isinstance(msg.content, str):
|
|
942
|
-
count += (
|
|
943
|
-
default_msg_prefix_len
|
|
944
|
-
+ default_content_len
|
|
945
|
-
+ default_msg_suffix_len
|
|
946
|
-
)
|
|
947
|
-
if isinstance(msg.content, list):
|
|
948
|
-
count += (
|
|
949
|
-
default_msg_prefix_len
|
|
950
|
-
+ len(msg.content) * default_content_len
|
|
951
|
-
+ default_msg_suffix_len
|
|
952
|
-
)
|
|
953
|
-
return count
|
|
954
|
-
|
|
955
|
-
First 30 tokens, allowing partial messages:
|
|
956
|
-
.. code-block:: python
|
|
957
|
-
|
|
958
|
-
trim_messages(
|
|
959
|
-
messages,
|
|
960
|
-
max_tokens=30,
|
|
961
|
-
token_counter=dummy_token_counter,
|
|
962
|
-
strategy="first",
|
|
963
|
-
allow_partial=True,
|
|
964
|
-
)
|
|
965
|
-
|
|
966
|
-
.. code-block:: python
|
|
967
|
-
|
|
885
|
+
```python
|
|
886
|
+
messages = [
|
|
887
|
+
SystemMessage("This is a 4 token text. The full message is 10 tokens."),
|
|
888
|
+
HumanMessage(
|
|
889
|
+
"This is a 4 token text. The full message is 10 tokens.", id="first"
|
|
890
|
+
),
|
|
891
|
+
AIMessage(
|
|
968
892
|
[
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
893
|
+
{"type": "text", "text": "This is the FIRST 4 token block."},
|
|
894
|
+
{"type": "text", "text": "This is the SECOND 4 token block."},
|
|
895
|
+
],
|
|
896
|
+
id="second",
|
|
897
|
+
),
|
|
898
|
+
HumanMessage(
|
|
899
|
+
"This is a 4 token text. The full message is 10 tokens.", id="third"
|
|
900
|
+
),
|
|
901
|
+
AIMessage(
|
|
902
|
+
"This is a 4 token text. The full message is 10 tokens.",
|
|
903
|
+
id="fourth",
|
|
904
|
+
),
|
|
905
|
+
]
|
|
906
|
+
|
|
907
|
+
|
|
908
|
+
def dummy_token_counter(messages: list[BaseMessage]) -> int:
|
|
909
|
+
# treat each message like it adds 3 default tokens at the beginning
|
|
910
|
+
# of the message and at the end of the message. 3 + 4 + 3 = 10 tokens
|
|
911
|
+
# per message.
|
|
912
|
+
|
|
913
|
+
default_content_len = 4
|
|
914
|
+
default_msg_prefix_len = 3
|
|
915
|
+
default_msg_suffix_len = 3
|
|
916
|
+
|
|
917
|
+
count = 0
|
|
918
|
+
for msg in messages:
|
|
919
|
+
if isinstance(msg.content, str):
|
|
920
|
+
count += (
|
|
921
|
+
default_msg_prefix_len
|
|
922
|
+
+ default_content_len
|
|
923
|
+
+ default_msg_suffix_len
|
|
924
|
+
)
|
|
925
|
+
if isinstance(msg.content, list):
|
|
926
|
+
count += (
|
|
927
|
+
default_msg_prefix_len
|
|
928
|
+
+ len(msg.content) * default_content_len
|
|
929
|
+
+ default_msg_suffix_len
|
|
930
|
+
)
|
|
931
|
+
return count
|
|
932
|
+
```
|
|
981
933
|
|
|
934
|
+
First 30 tokens, allowing partial messages:
|
|
935
|
+
```python
|
|
936
|
+
trim_messages(
|
|
937
|
+
messages,
|
|
938
|
+
max_tokens=30,
|
|
939
|
+
token_counter=dummy_token_counter,
|
|
940
|
+
strategy="first",
|
|
941
|
+
allow_partial=True,
|
|
942
|
+
)
|
|
943
|
+
```
|
|
944
|
+
|
|
945
|
+
```python
|
|
946
|
+
[
|
|
947
|
+
SystemMessage("This is a 4 token text. The full message is 10 tokens."),
|
|
948
|
+
HumanMessage(
|
|
949
|
+
"This is a 4 token text. The full message is 10 tokens.",
|
|
950
|
+
id="first",
|
|
951
|
+
),
|
|
952
|
+
AIMessage(
|
|
953
|
+
[{"type": "text", "text": "This is the FIRST 4 token block."}],
|
|
954
|
+
id="second",
|
|
955
|
+
),
|
|
956
|
+
]
|
|
957
|
+
```
|
|
982
958
|
"""
|
|
983
959
|
# Validate arguments
|
|
984
960
|
if start_on and strategy == "first":
|
|
@@ -1042,88 +1018,90 @@ def trim_messages(
|
|
|
1042
1018
|
|
|
1043
1019
|
|
|
1044
1020
|
def convert_to_openai_messages(
|
|
1045
|
-
messages:
|
|
1021
|
+
messages: MessageLikeRepresentation | Sequence[MessageLikeRepresentation],
|
|
1046
1022
|
*,
|
|
1047
1023
|
text_format: Literal["string", "block"] = "string",
|
|
1048
|
-
|
|
1024
|
+
include_id: bool = False,
|
|
1025
|
+
) -> dict | list[dict]:
|
|
1049
1026
|
"""Convert LangChain messages into OpenAI message dicts.
|
|
1050
1027
|
|
|
1051
1028
|
Args:
|
|
1052
1029
|
messages: Message-like object or iterable of objects whose contents are
|
|
1053
1030
|
in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
|
|
1054
1031
|
text_format: How to format string or text block contents:
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1032
|
+
- `'string'`:
|
|
1033
|
+
If a message has a string content, this is left as a string. If
|
|
1034
|
+
a message has content blocks that are all of type `'text'`, these
|
|
1035
|
+
are joined with a newline to make a single string. If a message has
|
|
1036
|
+
content blocks and at least one isn't of type `'text'`, then
|
|
1037
|
+
all blocks are left as dicts.
|
|
1038
|
+
- `'block'`:
|
|
1039
|
+
If a message has a string content, this is turned into a list
|
|
1040
|
+
with a single content block of type `'text'`. If a message has
|
|
1041
|
+
content blocks these are left as is.
|
|
1042
|
+
include_id: Whether to include message IDs in the openai messages, if they
|
|
1043
|
+
are present in the source messages.
|
|
1065
1044
|
|
|
1066
1045
|
Raises:
|
|
1067
|
-
ValueError: if an unrecognized
|
|
1046
|
+
ValueError: if an unrecognized `text_format` is specified, or if a message
|
|
1068
1047
|
content block is missing expected keys.
|
|
1069
1048
|
|
|
1070
1049
|
Returns:
|
|
1071
1050
|
The return type depends on the input type:
|
|
1072
1051
|
|
|
1073
1052
|
- dict:
|
|
1074
|
-
|
|
1075
|
-
|
|
1053
|
+
If a single message-like object is passed in, a single OpenAI message
|
|
1054
|
+
dict is returned.
|
|
1076
1055
|
- list[dict]:
|
|
1077
|
-
|
|
1078
|
-
|
|
1056
|
+
If a sequence of message-like objects are passed in, a list of OpenAI
|
|
1057
|
+
message dicts is returned.
|
|
1079
1058
|
|
|
1080
1059
|
Example:
|
|
1060
|
+
```python
|
|
1061
|
+
from langchain_core.messages import (
|
|
1062
|
+
convert_to_openai_messages,
|
|
1063
|
+
AIMessage,
|
|
1064
|
+
SystemMessage,
|
|
1065
|
+
ToolMessage,
|
|
1066
|
+
)
|
|
1081
1067
|
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
# {'role': 'system', 'content': 'foo'},
|
|
1120
|
-
# {'role': 'user', 'content': [{'type': 'text', 'text': 'whats in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
|
|
1121
|
-
# {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': '1','function': {'name': 'analyze', 'arguments': '{"baz": "buz"}'}}], 'content': ''},
|
|
1122
|
-
# {'role': 'tool', 'name': 'bar', 'content': 'foobar'},
|
|
1123
|
-
# {'role': 'assistant', 'content': 'thats nice'}
|
|
1124
|
-
# ]
|
|
1125
|
-
|
|
1126
|
-
.. versionadded:: 0.3.11
|
|
1068
|
+
messages = [
|
|
1069
|
+
SystemMessage([{"type": "text", "text": "foo"}]),
|
|
1070
|
+
{
|
|
1071
|
+
"role": "user",
|
|
1072
|
+
"content": [
|
|
1073
|
+
{"type": "text", "text": "whats in this"},
|
|
1074
|
+
{
|
|
1075
|
+
"type": "image_url",
|
|
1076
|
+
"image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},
|
|
1077
|
+
},
|
|
1078
|
+
],
|
|
1079
|
+
},
|
|
1080
|
+
AIMessage(
|
|
1081
|
+
"",
|
|
1082
|
+
tool_calls=[
|
|
1083
|
+
{
|
|
1084
|
+
"name": "analyze",
|
|
1085
|
+
"args": {"baz": "buz"},
|
|
1086
|
+
"id": "1",
|
|
1087
|
+
"type": "tool_call",
|
|
1088
|
+
}
|
|
1089
|
+
],
|
|
1090
|
+
),
|
|
1091
|
+
ToolMessage("foobar", tool_call_id="1", name="bar"),
|
|
1092
|
+
{"role": "assistant", "content": "thats nice"},
|
|
1093
|
+
]
|
|
1094
|
+
oai_messages = convert_to_openai_messages(messages)
|
|
1095
|
+
# -> [
|
|
1096
|
+
# {'role': 'system', 'content': 'foo'},
|
|
1097
|
+
# {'role': 'user', 'content': [{'type': 'text', 'text': 'whats in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
|
|
1098
|
+
# {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': '1','function': {'name': 'analyze', 'arguments': '{"baz": "buz"}'}}], 'content': ''},
|
|
1099
|
+
# {'role': 'tool', 'name': 'bar', 'content': 'foobar'},
|
|
1100
|
+
# {'role': 'assistant', 'content': 'thats nice'}
|
|
1101
|
+
# ]
|
|
1102
|
+
```
|
|
1103
|
+
|
|
1104
|
+
!!! version-added "Added in `langchain-core` 0.3.11"
|
|
1127
1105
|
|
|
1128
1106
|
""" # noqa: E501
|
|
1129
1107
|
if text_format not in {"string", "block"}:
|
|
@@ -1140,7 +1118,7 @@ def convert_to_openai_messages(
|
|
|
1140
1118
|
for i, message in enumerate(messages):
|
|
1141
1119
|
oai_msg: dict = {"role": _get_message_openai_role(message)}
|
|
1142
1120
|
tool_messages: list = []
|
|
1143
|
-
content:
|
|
1121
|
+
content: str | list[dict]
|
|
1144
1122
|
|
|
1145
1123
|
if message.name:
|
|
1146
1124
|
oai_msg["name"] = message.name
|
|
@@ -1150,6 +1128,8 @@ def convert_to_openai_messages(
|
|
|
1150
1128
|
oai_msg["refusal"] = message.additional_kwargs["refusal"]
|
|
1151
1129
|
if isinstance(message, ToolMessage):
|
|
1152
1130
|
oai_msg["tool_call_id"] = message.tool_call_id
|
|
1131
|
+
if include_id and message.id:
|
|
1132
|
+
oai_msg["id"] = message.id
|
|
1153
1133
|
|
|
1154
1134
|
if not message.content:
|
|
1155
1135
|
content = "" if text_format == "string" else []
|
|
@@ -1421,10 +1401,8 @@ def _first_max_tokens(
|
|
|
1421
1401
|
max_tokens: int,
|
|
1422
1402
|
token_counter: Callable[[list[BaseMessage]], int],
|
|
1423
1403
|
text_splitter: Callable[[str], list[str]],
|
|
1424
|
-
partial_strategy:
|
|
1425
|
-
end_on:
|
|
1426
|
-
Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
|
|
1427
|
-
] = None,
|
|
1404
|
+
partial_strategy: Literal["first", "last"] | None = None,
|
|
1405
|
+
end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
|
|
1428
1406
|
) -> list[BaseMessage]:
|
|
1429
1407
|
messages = list(messages)
|
|
1430
1408
|
if not messages:
|
|
@@ -1541,12 +1519,8 @@ def _last_max_tokens(
|
|
|
1541
1519
|
text_splitter: Callable[[str], list[str]],
|
|
1542
1520
|
allow_partial: bool = False,
|
|
1543
1521
|
include_system: bool = False,
|
|
1544
|
-
start_on:
|
|
1545
|
-
|
|
1546
|
-
] = None,
|
|
1547
|
-
end_on: Optional[
|
|
1548
|
-
Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
|
|
1549
|
-
] = None,
|
|
1522
|
+
start_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
|
|
1523
|
+
end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
|
|
1550
1524
|
) -> list[BaseMessage]:
|
|
1551
1525
|
messages = list(messages)
|
|
1552
1526
|
if len(messages) == 0:
|
|
@@ -1647,7 +1621,7 @@ def _default_text_splitter(text: str) -> list[str]:
|
|
|
1647
1621
|
|
|
1648
1622
|
def _is_message_type(
|
|
1649
1623
|
message: BaseMessage,
|
|
1650
|
-
type_:
|
|
1624
|
+
type_: str | type[BaseMessage] | Sequence[str | type[BaseMessage]],
|
|
1651
1625
|
) -> bool:
|
|
1652
1626
|
types = [type_] if isinstance(type_, (str, type)) else type_
|
|
1653
1627
|
types_str = [t for t in types if isinstance(t, str)]
|
|
@@ -1707,27 +1681,27 @@ def count_tokens_approximately(
|
|
|
1707
1681
|
Args:
|
|
1708
1682
|
messages: List of messages to count tokens for.
|
|
1709
1683
|
chars_per_token: Number of characters per token to use for the approximation.
|
|
1710
|
-
|
|
1711
|
-
You can also specify float values for more fine-grained control.
|
|
1712
|
-
|
|
1713
|
-
extra_tokens_per_message: Number of extra tokens to add per message.
|
|
1714
|
-
|
|
1715
|
-
You can also specify float values for more fine-grained control.
|
|
1716
|
-
|
|
1684
|
+
One token corresponds to ~4 chars for common English text.
|
|
1685
|
+
You can also specify `float` values for more fine-grained control.
|
|
1686
|
+
[See more here](https://platform.openai.com/tokenizer).
|
|
1687
|
+
extra_tokens_per_message: Number of extra tokens to add per message, e.g.
|
|
1688
|
+
special tokens, including beginning/end of message.
|
|
1689
|
+
You can also specify `float` values for more fine-grained control.
|
|
1690
|
+
[See more here](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb).
|
|
1717
1691
|
count_name: Whether to include message names in the count.
|
|
1718
1692
|
Enabled by default.
|
|
1719
1693
|
|
|
1720
1694
|
Returns:
|
|
1721
1695
|
Approximate number of tokens in the messages.
|
|
1722
1696
|
|
|
1723
|
-
|
|
1697
|
+
!!! note
|
|
1724
1698
|
This is a simple approximation that may not match the exact token count used by
|
|
1725
1699
|
specific models. For accurate counts, use model-specific tokenizers.
|
|
1726
1700
|
|
|
1727
1701
|
Warning:
|
|
1728
1702
|
This function does not currently support counting image tokens.
|
|
1729
1703
|
|
|
1730
|
-
|
|
1704
|
+
!!! version-added "Added in `langchain-core` 0.3.46"
|
|
1731
1705
|
|
|
1732
1706
|
"""
|
|
1733
1707
|
token_count = 0.0
|