langchain-core 1.0.0a6__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +3 -4
  3. langchain_core/_api/beta_decorator.py +23 -26
  4. langchain_core/_api/deprecation.py +51 -64
  5. langchain_core/_api/path.py +3 -6
  6. langchain_core/_import_utils.py +3 -4
  7. langchain_core/agents.py +20 -22
  8. langchain_core/caches.py +65 -66
  9. langchain_core/callbacks/__init__.py +1 -8
  10. langchain_core/callbacks/base.py +321 -336
  11. langchain_core/callbacks/file.py +44 -44
  12. langchain_core/callbacks/manager.py +436 -513
  13. langchain_core/callbacks/stdout.py +29 -30
  14. langchain_core/callbacks/streaming_stdout.py +32 -32
  15. langchain_core/callbacks/usage.py +60 -57
  16. langchain_core/chat_history.py +53 -68
  17. langchain_core/document_loaders/base.py +27 -25
  18. langchain_core/document_loaders/blob_loaders.py +1 -1
  19. langchain_core/document_loaders/langsmith.py +44 -48
  20. langchain_core/documents/__init__.py +23 -3
  21. langchain_core/documents/base.py +98 -90
  22. langchain_core/documents/compressor.py +10 -10
  23. langchain_core/documents/transformers.py +34 -35
  24. langchain_core/embeddings/fake.py +50 -54
  25. langchain_core/example_selectors/length_based.py +1 -1
  26. langchain_core/example_selectors/semantic_similarity.py +28 -32
  27. langchain_core/exceptions.py +21 -20
  28. langchain_core/globals.py +3 -151
  29. langchain_core/indexing/__init__.py +1 -1
  30. langchain_core/indexing/api.py +121 -126
  31. langchain_core/indexing/base.py +73 -75
  32. langchain_core/indexing/in_memory.py +4 -6
  33. langchain_core/language_models/__init__.py +14 -29
  34. langchain_core/language_models/_utils.py +58 -61
  35. langchain_core/language_models/base.py +53 -162
  36. langchain_core/language_models/chat_models.py +298 -387
  37. langchain_core/language_models/fake.py +11 -11
  38. langchain_core/language_models/fake_chat_models.py +42 -36
  39. langchain_core/language_models/llms.py +125 -235
  40. langchain_core/load/dump.py +9 -12
  41. langchain_core/load/load.py +18 -28
  42. langchain_core/load/mapping.py +2 -4
  43. langchain_core/load/serializable.py +42 -40
  44. langchain_core/messages/__init__.py +10 -16
  45. langchain_core/messages/ai.py +148 -148
  46. langchain_core/messages/base.py +53 -51
  47. langchain_core/messages/block_translators/__init__.py +19 -22
  48. langchain_core/messages/block_translators/anthropic.py +6 -6
  49. langchain_core/messages/block_translators/bedrock_converse.py +5 -5
  50. langchain_core/messages/block_translators/google_genai.py +10 -7
  51. langchain_core/messages/block_translators/google_vertexai.py +4 -32
  52. langchain_core/messages/block_translators/groq.py +117 -21
  53. langchain_core/messages/block_translators/langchain_v0.py +5 -5
  54. langchain_core/messages/block_translators/openai.py +11 -11
  55. langchain_core/messages/chat.py +2 -6
  56. langchain_core/messages/content.py +337 -328
  57. langchain_core/messages/function.py +6 -10
  58. langchain_core/messages/human.py +24 -31
  59. langchain_core/messages/modifier.py +2 -2
  60. langchain_core/messages/system.py +19 -29
  61. langchain_core/messages/tool.py +74 -90
  62. langchain_core/messages/utils.py +474 -504
  63. langchain_core/output_parsers/__init__.py +13 -10
  64. langchain_core/output_parsers/base.py +61 -61
  65. langchain_core/output_parsers/format_instructions.py +9 -4
  66. langchain_core/output_parsers/json.py +12 -10
  67. langchain_core/output_parsers/list.py +21 -23
  68. langchain_core/output_parsers/openai_functions.py +49 -47
  69. langchain_core/output_parsers/openai_tools.py +16 -21
  70. langchain_core/output_parsers/pydantic.py +13 -14
  71. langchain_core/output_parsers/string.py +5 -5
  72. langchain_core/output_parsers/transform.py +15 -17
  73. langchain_core/output_parsers/xml.py +35 -34
  74. langchain_core/outputs/__init__.py +1 -1
  75. langchain_core/outputs/chat_generation.py +18 -18
  76. langchain_core/outputs/chat_result.py +1 -3
  77. langchain_core/outputs/generation.py +10 -11
  78. langchain_core/outputs/llm_result.py +10 -10
  79. langchain_core/prompt_values.py +11 -17
  80. langchain_core/prompts/__init__.py +3 -27
  81. langchain_core/prompts/base.py +48 -56
  82. langchain_core/prompts/chat.py +275 -325
  83. langchain_core/prompts/dict.py +5 -5
  84. langchain_core/prompts/few_shot.py +81 -88
  85. langchain_core/prompts/few_shot_with_templates.py +11 -13
  86. langchain_core/prompts/image.py +12 -14
  87. langchain_core/prompts/loading.py +4 -6
  88. langchain_core/prompts/message.py +3 -3
  89. langchain_core/prompts/prompt.py +24 -39
  90. langchain_core/prompts/string.py +26 -10
  91. langchain_core/prompts/structured.py +49 -53
  92. langchain_core/rate_limiters.py +51 -60
  93. langchain_core/retrievers.py +61 -198
  94. langchain_core/runnables/base.py +1476 -1626
  95. langchain_core/runnables/branch.py +53 -57
  96. langchain_core/runnables/config.py +72 -89
  97. langchain_core/runnables/configurable.py +120 -137
  98. langchain_core/runnables/fallbacks.py +83 -79
  99. langchain_core/runnables/graph.py +91 -97
  100. langchain_core/runnables/graph_ascii.py +27 -28
  101. langchain_core/runnables/graph_mermaid.py +38 -50
  102. langchain_core/runnables/graph_png.py +15 -16
  103. langchain_core/runnables/history.py +135 -148
  104. langchain_core/runnables/passthrough.py +124 -150
  105. langchain_core/runnables/retry.py +46 -51
  106. langchain_core/runnables/router.py +25 -30
  107. langchain_core/runnables/schema.py +75 -80
  108. langchain_core/runnables/utils.py +60 -67
  109. langchain_core/stores.py +85 -121
  110. langchain_core/structured_query.py +8 -8
  111. langchain_core/sys_info.py +27 -29
  112. langchain_core/tools/__init__.py +1 -14
  113. langchain_core/tools/base.py +284 -229
  114. langchain_core/tools/convert.py +160 -155
  115. langchain_core/tools/render.py +10 -10
  116. langchain_core/tools/retriever.py +12 -11
  117. langchain_core/tools/simple.py +19 -24
  118. langchain_core/tools/structured.py +32 -39
  119. langchain_core/tracers/__init__.py +1 -9
  120. langchain_core/tracers/base.py +97 -99
  121. langchain_core/tracers/context.py +29 -52
  122. langchain_core/tracers/core.py +49 -53
  123. langchain_core/tracers/evaluation.py +11 -11
  124. langchain_core/tracers/event_stream.py +65 -64
  125. langchain_core/tracers/langchain.py +21 -21
  126. langchain_core/tracers/log_stream.py +45 -45
  127. langchain_core/tracers/memory_stream.py +3 -3
  128. langchain_core/tracers/root_listeners.py +16 -16
  129. langchain_core/tracers/run_collector.py +2 -4
  130. langchain_core/tracers/schemas.py +0 -129
  131. langchain_core/tracers/stdout.py +3 -3
  132. langchain_core/utils/__init__.py +1 -4
  133. langchain_core/utils/_merge.py +2 -2
  134. langchain_core/utils/aiter.py +57 -61
  135. langchain_core/utils/env.py +9 -9
  136. langchain_core/utils/function_calling.py +89 -186
  137. langchain_core/utils/html.py +7 -8
  138. langchain_core/utils/input.py +6 -6
  139. langchain_core/utils/interactive_env.py +1 -1
  140. langchain_core/utils/iter.py +36 -40
  141. langchain_core/utils/json.py +4 -3
  142. langchain_core/utils/json_schema.py +9 -9
  143. langchain_core/utils/mustache.py +8 -10
  144. langchain_core/utils/pydantic.py +33 -35
  145. langchain_core/utils/strings.py +6 -9
  146. langchain_core/utils/usage.py +1 -1
  147. langchain_core/utils/utils.py +66 -62
  148. langchain_core/vectorstores/base.py +182 -216
  149. langchain_core/vectorstores/in_memory.py +101 -176
  150. langchain_core/vectorstores/utils.py +5 -5
  151. langchain_core/version.py +1 -1
  152. langchain_core-1.0.3.dist-info/METADATA +69 -0
  153. langchain_core-1.0.3.dist-info/RECORD +172 -0
  154. {langchain_core-1.0.0a6.dist-info → langchain_core-1.0.3.dist-info}/WHEEL +1 -1
  155. langchain_core/memory.py +0 -120
  156. langchain_core/messages/block_translators/ollama.py +0 -47
  157. langchain_core/prompts/pipeline.py +0 -138
  158. langchain_core/pydantic_v1/__init__.py +0 -30
  159. langchain_core/pydantic_v1/dataclasses.py +0 -23
  160. langchain_core/pydantic_v1/main.py +0 -23
  161. langchain_core/tracers/langchain_v1.py +0 -31
  162. langchain_core/utils/loading.py +0 -35
  163. langchain_core-1.0.0a6.dist-info/METADATA +0 -67
  164. langchain_core-1.0.0a6.dist-info/RECORD +0 -181
  165. langchain_core-1.0.0a6.dist-info/entry_points.txt +0 -4
@@ -5,7 +5,6 @@ Some examples of what you can do with these functions include:
5
5
  * Convert messages to strings (serialization)
6
6
  * Convert messages from dicts to Message objects (deserialization)
7
7
  * Filter messages from a list of messages based on name, type or id etc.
8
-
9
8
  """
10
9
 
11
10
  from __future__ import annotations
@@ -15,16 +14,13 @@ import inspect
15
14
  import json
16
15
  import logging
17
16
  import math
18
- from collections.abc import Iterable, Sequence
17
+ from collections.abc import Callable, Iterable, Sequence
19
18
  from functools import partial
20
19
  from typing import (
21
20
  TYPE_CHECKING,
22
21
  Annotated,
23
22
  Any,
24
- Callable,
25
23
  Literal,
26
- Optional,
27
- Union,
28
24
  cast,
29
25
  overload,
30
26
  )
@@ -76,22 +72,21 @@ def _get_type(v: Any) -> str:
76
72
 
77
73
 
78
74
  AnyMessage = Annotated[
79
- Union[
80
- Annotated[AIMessage, Tag(tag="ai")],
81
- Annotated[HumanMessage, Tag(tag="human")],
82
- Annotated[ChatMessage, Tag(tag="chat")],
83
- Annotated[SystemMessage, Tag(tag="system")],
84
- Annotated[FunctionMessage, Tag(tag="function")],
85
- Annotated[ToolMessage, Tag(tag="tool")],
86
- Annotated[AIMessageChunk, Tag(tag="AIMessageChunk")],
87
- Annotated[HumanMessageChunk, Tag(tag="HumanMessageChunk")],
88
- Annotated[ChatMessageChunk, Tag(tag="ChatMessageChunk")],
89
- Annotated[SystemMessageChunk, Tag(tag="SystemMessageChunk")],
90
- Annotated[FunctionMessageChunk, Tag(tag="FunctionMessageChunk")],
91
- Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
92
- ],
75
+ Annotated[AIMessage, Tag(tag="ai")]
76
+ | Annotated[HumanMessage, Tag(tag="human")]
77
+ | Annotated[ChatMessage, Tag(tag="chat")]
78
+ | Annotated[SystemMessage, Tag(tag="system")]
79
+ | Annotated[FunctionMessage, Tag(tag="function")]
80
+ | Annotated[ToolMessage, Tag(tag="tool")]
81
+ | Annotated[AIMessageChunk, Tag(tag="AIMessageChunk")]
82
+ | Annotated[HumanMessageChunk, Tag(tag="HumanMessageChunk")]
83
+ | Annotated[ChatMessageChunk, Tag(tag="ChatMessageChunk")]
84
+ | Annotated[SystemMessageChunk, Tag(tag="SystemMessageChunk")]
85
+ | Annotated[FunctionMessageChunk, Tag(tag="FunctionMessageChunk")]
86
+ | Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
93
87
  Field(discriminator=Discriminator(_get_type)),
94
88
  ]
89
+ """A type representing any defined `Message` or `MessageChunk` type."""
95
90
 
96
91
 
97
92
  def get_buffer_string(
@@ -101,10 +96,8 @@ def get_buffer_string(
101
96
 
102
97
  Args:
103
98
  messages: Messages to be converted to strings.
104
- human_prefix: The prefix to prepend to contents of ``HumanMessage``s.
105
- Default is ``'Human'``.
106
- ai_prefix: The prefix to prepend to contents of ``AIMessage``. Default is
107
- ``'AI'``.
99
+ human_prefix: The prefix to prepend to contents of `HumanMessage`s.
100
+ ai_prefix: The prefix to prepend to contents of `AIMessage`.
108
101
 
109
102
  Returns:
110
103
  A single string concatenation of all input messages.
@@ -113,17 +106,16 @@ def get_buffer_string(
113
106
  ValueError: If an unsupported message type is encountered.
114
107
 
115
108
  Example:
116
- .. code-block:: python
117
-
118
- from langchain_core import AIMessage, HumanMessage
119
-
120
- messages = [
121
- HumanMessage(content="Hi, how are you?"),
122
- AIMessage(content="Good, how are you?"),
123
- ]
124
- get_buffer_string(messages)
125
- # -> "Human: Hi, how are you?\nAI: Good, how are you?"
126
-
109
+ ```python
110
+ from langchain_core import AIMessage, HumanMessage
111
+
112
+ messages = [
113
+ HumanMessage(content="Hi, how are you?"),
114
+ AIMessage(content="Good, how are you?"),
115
+ ]
116
+ get_buffer_string(messages)
117
+ # -> "Human: Hi, how are you?\nAI: Good, how are you?"
118
+ ```
127
119
  """
128
120
  string_messages = []
129
121
  for m in messages:
@@ -183,7 +175,7 @@ def _message_from_dict(message: dict) -> BaseMessage:
183
175
 
184
176
 
185
177
  def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]:
186
- """Convert a sequence of messages from dicts to ``Message`` objects.
178
+ """Convert a sequence of messages from dicts to `Message` objects.
187
179
 
188
180
  Args:
189
181
  messages: Sequence of messages (as dicts) to convert.
@@ -196,7 +188,7 @@ def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]:
196
188
 
197
189
 
198
190
  def message_chunk_to_message(chunk: BaseMessage) -> BaseMessage:
199
- """Convert a message chunk to a ``Message``.
191
+ """Convert a message chunk to a `Message`.
200
192
 
201
193
  Args:
202
194
  chunk: Message chunk to convert.
@@ -215,38 +207,39 @@ def message_chunk_to_message(chunk: BaseMessage) -> BaseMessage:
215
207
  )
216
208
 
217
209
 
218
- MessageLikeRepresentation = Union[
219
- BaseMessage, list[str], tuple[str, str], str, dict[str, Any]
220
- ]
210
+ MessageLikeRepresentation = (
211
+ BaseMessage | list[str] | tuple[str, str] | str | dict[str, Any]
212
+ )
213
+ """A type representing the various ways a message can be represented."""
221
214
 
222
215
 
223
216
  def _create_message_from_message_type(
224
217
  message_type: str,
225
218
  content: str,
226
- name: Optional[str] = None,
227
- tool_call_id: Optional[str] = None,
228
- tool_calls: Optional[list[dict[str, Any]]] = None,
229
- id: Optional[str] = None,
219
+ name: str | None = None,
220
+ tool_call_id: str | None = None,
221
+ tool_calls: list[dict[str, Any]] | None = None,
222
+ id: str | None = None,
230
223
  **additional_kwargs: Any,
231
224
  ) -> BaseMessage:
232
- """Create a message from a ``Message`` type and content string.
225
+ """Create a message from a `Message` type and content string.
233
226
 
234
227
  Args:
235
- message_type: (str) the type of the message (e.g., ``'human'``, ``'ai'``, etc.).
228
+ message_type: (str) the type of the message (e.g., `'human'`, `'ai'`, etc.).
236
229
  content: (str) the content string.
237
- name: (str) the name of the message. Default is None.
238
- tool_call_id: (str) the tool call id. Default is None.
239
- tool_calls: (list[dict[str, Any]]) the tool calls. Default is None.
240
- id: (str) the id of the message. Default is None.
230
+ name: (str) the name of the message.
231
+ tool_call_id: (str) the tool call id.
232
+ tool_calls: (list[dict[str, Any]]) the tool calls.
233
+ id: (str) the id of the message.
241
234
  additional_kwargs: (dict[str, Any]) additional keyword arguments.
242
235
 
243
236
  Returns:
244
237
  a message of the appropriate type.
245
238
 
246
239
  Raises:
247
- ValueError: if the message type is not one of ``'human'``, ``'user'``, ``'ai'``,
248
- ``'assistant'``, ``'function'``, ``'tool'``, ``'system'``, or
249
- ``'developer'``.
240
+ ValueError: if the message type is not one of `'human'`, `'user'`, `'ai'`,
241
+ `'assistant'`, `'function'`, `'tool'`, `'system'`, or
242
+ `'developer'`.
250
243
  """
251
244
  kwargs: dict[str, Any] = {}
252
245
  if name is not None:
@@ -312,21 +305,21 @@ def _create_message_from_message_type(
312
305
 
313
306
 
314
307
  def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
315
- """Instantiate a ``Message`` from a variety of message formats.
308
+ """Instantiate a `Message` from a variety of message formats.
316
309
 
317
310
  The message format can be one of the following:
318
311
 
319
- - ``BaseMessagePromptTemplate``
320
- - ``BaseMessage``
321
- - 2-tuple of (role string, template); e.g., (``'human'``, ``'{user_input}'``)
312
+ - `BaseMessagePromptTemplate`
313
+ - `BaseMessage`
314
+ - 2-tuple of (role string, template); e.g., (`'human'`, `'{user_input}'`)
322
315
  - dict: a message dict with role and content keys
323
- - string: shorthand for (``'human'``, template); e.g., ``'{user_input}'``
316
+ - string: shorthand for (`'human'`, template); e.g., `'{user_input}'`
324
317
 
325
318
  Args:
326
319
  message: a representation of a message in one of the supported formats.
327
320
 
328
321
  Returns:
329
- an instance of a message or a message template.
322
+ An instance of a message or a message template.
330
323
 
331
324
  Raises:
332
325
  NotImplementedError: if the message type is not supported.
@@ -368,7 +361,7 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
368
361
 
369
362
 
370
363
  def convert_to_messages(
371
- messages: Union[Iterable[MessageLikeRepresentation], PromptValue],
364
+ messages: Iterable[MessageLikeRepresentation] | PromptValue,
372
365
  ) -> list[BaseMessage]:
373
366
  """Convert a sequence of messages to a list of messages.
374
367
 
@@ -399,12 +392,12 @@ def _runnable_support(func: Callable) -> Callable:
399
392
  ) -> list[BaseMessage]: ...
400
393
 
401
394
  def wrapped(
402
- messages: Union[Sequence[MessageLikeRepresentation], None] = None,
395
+ messages: Sequence[MessageLikeRepresentation] | None = None,
403
396
  **kwargs: Any,
404
- ) -> Union[
405
- list[BaseMessage],
406
- Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]],
407
- ]:
397
+ ) -> (
398
+ list[BaseMessage]
399
+ | Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]]
400
+ ):
408
401
  # Import locally to prevent circular import.
409
402
  from langchain_core.runnables.base import RunnableLambda # noqa: PLC0415
410
403
 
@@ -418,89 +411,88 @@ def _runnable_support(func: Callable) -> Callable:
418
411
 
419
412
  @_runnable_support
420
413
  def filter_messages(
421
- messages: Union[Iterable[MessageLikeRepresentation], PromptValue],
414
+ messages: Iterable[MessageLikeRepresentation] | PromptValue,
422
415
  *,
423
- include_names: Optional[Sequence[str]] = None,
424
- exclude_names: Optional[Sequence[str]] = None,
425
- include_types: Optional[Sequence[Union[str, type[BaseMessage]]]] = None,
426
- exclude_types: Optional[Sequence[Union[str, type[BaseMessage]]]] = None,
427
- include_ids: Optional[Sequence[str]] = None,
428
- exclude_ids: Optional[Sequence[str]] = None,
429
- exclude_tool_calls: Optional[Sequence[str] | bool] = None,
416
+ include_names: Sequence[str] | None = None,
417
+ exclude_names: Sequence[str] | None = None,
418
+ include_types: Sequence[str | type[BaseMessage]] | None = None,
419
+ exclude_types: Sequence[str | type[BaseMessage]] | None = None,
420
+ include_ids: Sequence[str] | None = None,
421
+ exclude_ids: Sequence[str] | None = None,
422
+ exclude_tool_calls: Sequence[str] | bool | None = None,
430
423
  ) -> list[BaseMessage]:
431
- """Filter messages based on ``name``, ``type`` or ``id``.
424
+ """Filter messages based on `name`, `type` or `id`.
432
425
 
433
426
  Args:
434
427
  messages: Sequence Message-like objects to filter.
435
- include_names: Message names to include. Default is None.
436
- exclude_names: Messages names to exclude. Default is None.
428
+ include_names: Message names to include.
429
+ exclude_names: Messages names to exclude.
437
430
  include_types: Message types to include. Can be specified as string names
438
- (e.g. ``'system'``, ``'human'``, ``'ai'``, ...) or as ``BaseMessage``
439
- classes (e.g. ``SystemMessage``, ``HumanMessage``, ``AIMessage``, ...).
440
- Default is None.
431
+ (e.g. `'system'`, `'human'`, `'ai'`, ...) or as `BaseMessage`
432
+ classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`, ...).
433
+
441
434
  exclude_types: Message types to exclude. Can be specified as string names
442
- (e.g. ``'system'``, ``'human'``, ``'ai'``, ...) or as ``BaseMessage``
443
- classes (e.g. ``SystemMessage``, ``HumanMessage``, ``AIMessage``, ...).
444
- Default is None.
445
- include_ids: Message IDs to include. Default is None.
446
- exclude_ids: Message IDs to exclude. Default is None.
447
- exclude_tool_calls: Tool call IDs to exclude. Default is None.
435
+ (e.g. `'system'`, `'human'`, `'ai'`, ...) or as `BaseMessage`
436
+ classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`, ...).
437
+
438
+ include_ids: Message IDs to include.
439
+ exclude_ids: Message IDs to exclude.
440
+ exclude_tool_calls: Tool call IDs to exclude.
448
441
  Can be one of the following:
449
- - ``True``: all ``AIMessage``s with tool calls and all
450
- ``ToolMessage``s will be excluded.
442
+ - `True`: All `AIMessage` objects with tool calls and all `ToolMessage`
443
+ objects will be excluded.
451
444
  - a sequence of tool call IDs to exclude:
452
- - ``ToolMessage``s with the corresponding tool call ID will be
453
- excluded.
454
- - The ``tool_calls`` in the AIMessage will be updated to exclude
455
- matching tool calls. If all ``tool_calls`` are filtered from an
456
- AIMessage, the whole message is excluded.
445
+ - `ToolMessage` objects with the corresponding tool call ID will be
446
+ excluded.
447
+ - The `tool_calls` in the AIMessage will be updated to exclude
448
+ matching tool calls. If all `tool_calls` are filtered from an
449
+ AIMessage, the whole message is excluded.
457
450
 
458
451
  Returns:
459
- A list of Messages that meets at least one of the ``incl_*`` conditions and none
460
- of the ``excl_*`` conditions. If not ``incl_*`` conditions are specified then
452
+ A list of Messages that meets at least one of the `incl_*` conditions and none
453
+ of the `excl_*` conditions. If not `incl_*` conditions are specified then
461
454
  anything that is not explicitly excluded will be included.
462
455
 
463
456
  Raises:
464
- ValueError if two incompatible arguments are provided.
457
+ ValueError: If two incompatible arguments are provided.
465
458
 
466
459
  Example:
467
- .. code-block:: python
468
-
469
- from langchain_core.messages import (
470
- filter_messages,
471
- AIMessage,
472
- HumanMessage,
473
- SystemMessage,
474
- )
475
-
476
- messages = [
477
- SystemMessage("you're a good assistant."),
478
- HumanMessage("what's your name", id="foo", name="example_user"),
479
- AIMessage("steve-o", id="bar", name="example_assistant"),
480
- HumanMessage(
481
- "what's your favorite color",
482
- id="baz",
483
- ),
484
- AIMessage(
485
- "silicon blue",
486
- id="blah",
487
- ),
488
- ]
489
-
490
- filter_messages(
491
- messages,
492
- incl_names=("example_user", "example_assistant"),
493
- incl_types=("system",),
494
- excl_ids=("bar",),
495
- )
496
-
497
- .. code-block:: python
498
-
499
- [
500
- SystemMessage("you're a good assistant."),
501
- HumanMessage("what's your name", id="foo", name="example_user"),
502
- ]
460
+ ```python
461
+ from langchain_core.messages import (
462
+ filter_messages,
463
+ AIMessage,
464
+ HumanMessage,
465
+ SystemMessage,
466
+ )
503
467
 
468
+ messages = [
469
+ SystemMessage("you're a good assistant."),
470
+ HumanMessage("what's your name", id="foo", name="example_user"),
471
+ AIMessage("steve-o", id="bar", name="example_assistant"),
472
+ HumanMessage(
473
+ "what's your favorite color",
474
+ id="baz",
475
+ ),
476
+ AIMessage(
477
+ "silicon blue",
478
+ id="blah",
479
+ ),
480
+ ]
481
+
482
+ filter_messages(
483
+ messages,
484
+ incl_names=("example_user", "example_assistant"),
485
+ incl_types=("system",),
486
+ excl_ids=("bar",),
487
+ )
488
+ ```
489
+
490
+ ```python
491
+ [
492
+ SystemMessage("you're a good assistant."),
493
+ HumanMessage("what's your name", id="foo", name="example_user"),
494
+ ]
495
+ ```
504
496
  """
505
497
  messages = convert_to_messages(messages)
506
498
  filtered: list[BaseMessage] = []
@@ -563,20 +555,19 @@ def filter_messages(
563
555
 
564
556
  @_runnable_support
565
557
  def merge_message_runs(
566
- messages: Union[Iterable[MessageLikeRepresentation], PromptValue],
558
+ messages: Iterable[MessageLikeRepresentation] | PromptValue,
567
559
  *,
568
560
  chunk_separator: str = "\n",
569
561
  ) -> list[BaseMessage]:
570
562
  r"""Merge consecutive Messages of the same type.
571
563
 
572
- .. note::
573
- ToolMessages are not merged, as each has a distinct tool call id that can't be
574
- merged.
564
+ !!! note
565
+ `ToolMessage` objects are not merged, as each has a distinct tool call id that
566
+ can't be merged.
575
567
 
576
568
  Args:
577
569
  messages: Sequence Message-like objects to merge.
578
570
  chunk_separator: Specify the string to be inserted between message chunks.
579
- Default is ``'\n'``.
580
571
 
581
572
  Returns:
582
573
  list of BaseMessages with consecutive runs of message types merged into single
@@ -584,87 +575,86 @@ def merge_message_runs(
584
575
  the merged content is a concatenation of the two strings with a new-line
585
576
  separator.
586
577
  The separator inserted between message chunks can be controlled by specifying
587
- any string with ``chunk_separator``. If at least one of the messages has a list
578
+ any string with `chunk_separator`. If at least one of the messages has a list
588
579
  of content blocks, the merged content is a list of content blocks.
589
580
 
590
581
  Example:
582
+ ```python
583
+ from langchain_core.messages import (
584
+ merge_message_runs,
585
+ AIMessage,
586
+ HumanMessage,
587
+ SystemMessage,
588
+ ToolCall,
589
+ )
591
590
 
592
- .. code-block:: python
593
-
594
- from langchain_core.messages import (
595
- merge_message_runs,
596
- AIMessage,
597
- HumanMessage,
598
- SystemMessage,
599
- ToolCall,
600
- )
601
-
602
- messages = [
603
- SystemMessage("you're a good assistant."),
604
- HumanMessage(
605
- "what's your favorite color",
606
- id="foo",
607
- ),
608
- HumanMessage(
609
- "wait your favorite food",
610
- id="bar",
611
- ),
612
- AIMessage(
591
+ messages = [
592
+ SystemMessage("you're a good assistant."),
593
+ HumanMessage(
594
+ "what's your favorite color",
595
+ id="foo",
596
+ ),
597
+ HumanMessage(
598
+ "wait your favorite food",
599
+ id="bar",
600
+ ),
601
+ AIMessage(
602
+ "my favorite colo",
603
+ tool_calls=[
604
+ ToolCall(
605
+ name="blah_tool", args={"x": 2}, id="123", type="tool_call"
606
+ )
607
+ ],
608
+ id="baz",
609
+ ),
610
+ AIMessage(
611
+ [{"type": "text", "text": "my favorite dish is lasagna"}],
612
+ tool_calls=[
613
+ ToolCall(
614
+ name="blah_tool",
615
+ args={"x": -10},
616
+ id="456",
617
+ type="tool_call",
618
+ )
619
+ ],
620
+ id="blur",
621
+ ),
622
+ ]
623
+
624
+ merge_message_runs(messages)
625
+ ```
626
+
627
+ ```python
628
+ [
629
+ SystemMessage("you're a good assistant."),
630
+ HumanMessage(
631
+ "what's your favorite color\\n"
632
+ "wait your favorite food", id="foo",
633
+ ),
634
+ AIMessage(
635
+ [
613
636
  "my favorite colo",
614
- tool_calls=[
615
- ToolCall(
616
- name="blah_tool", args={"x": 2}, id="123", type="tool_call"
617
- )
618
- ],
619
- id="baz",
620
- ),
621
- AIMessage(
622
- [{"type": "text", "text": "my favorite dish is lasagna"}],
623
- tool_calls=[
624
- ToolCall(
625
- name="blah_tool",
626
- args={"x": -10},
627
- id="456",
628
- type="tool_call",
629
- )
630
- ],
631
- id="blur",
632
- ),
633
- ]
634
-
635
- merge_message_runs(messages)
636
-
637
- .. code-block:: python
638
-
639
- [
640
- SystemMessage("you're a good assistant."),
641
- HumanMessage(
642
- "what's your favorite color\\n"
643
- "wait your favorite food", id="foo",
644
- ),
645
- AIMessage(
646
- [
647
- "my favorite colo",
648
- {"type": "text", "text": "my favorite dish is lasagna"}
649
- ],
650
- tool_calls=[
651
- ToolCall({
652
- "name": "blah_tool",
653
- "args": {"x": 2},
654
- "id": "123",
655
- "type": "tool_call"
656
- }),
657
- ToolCall({
658
- "name": "blah_tool",
659
- "args": {"x": -10},
660
- "id": "456",
661
- "type": "tool_call"
662
- })
663
- ]
664
- id="baz"
665
- ),
666
- ]
637
+ {"type": "text", "text": "my favorite dish is lasagna"}
638
+ ],
639
+ tool_calls=[
640
+ ToolCall({
641
+ "name": "blah_tool",
642
+ "args": {"x": 2},
643
+ "id": "123",
644
+ "type": "tool_call"
645
+ }),
646
+ ToolCall({
647
+ "name": "blah_tool",
648
+ "args": {"x": -10},
649
+ "id": "456",
650
+ "type": "tool_call"
651
+ })
652
+ ]
653
+ id="baz"
654
+ ),
655
+ ]
667
656
 
657
+ ```
668
658
  """
669
659
  if not messages:
670
660
  return []
@@ -696,174 +686,161 @@ def merge_message_runs(
696
686
  # init not at runtime.
697
687
  @_runnable_support
698
688
  def trim_messages(
699
- messages: Union[Iterable[MessageLikeRepresentation], PromptValue],
689
+ messages: Iterable[MessageLikeRepresentation] | PromptValue,
700
690
  *,
701
691
  max_tokens: int,
702
- token_counter: Union[
703
- Callable[[list[BaseMessage]], int],
704
- Callable[[BaseMessage], int],
705
- BaseLanguageModel,
706
- ],
692
+ token_counter: Callable[[list[BaseMessage]], int]
693
+ | Callable[[BaseMessage], int]
694
+ | BaseLanguageModel,
707
695
  strategy: Literal["first", "last"] = "last",
708
696
  allow_partial: bool = False,
709
- end_on: Optional[
710
- Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
711
- ] = None,
712
- start_on: Optional[
713
- Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
714
- ] = None,
697
+ end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
698
+ start_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
715
699
  include_system: bool = False,
716
- text_splitter: Optional[Union[Callable[[str], list[str]], TextSplitter]] = None,
700
+ text_splitter: Callable[[str], list[str]] | TextSplitter | None = None,
717
701
  ) -> list[BaseMessage]:
718
702
  r"""Trim messages to be below a token count.
719
703
 
720
- ``trim_messages`` can be used to reduce the size of a chat history to a specified
721
- token count or specified message count.
704
+ `trim_messages` can be used to reduce the size of a chat history to a specified
705
+ token or message count.
722
706
 
723
707
  In either case, if passing the trimmed chat history back into a chat model
724
708
  directly, the resulting chat history should usually satisfy the following
725
709
  properties:
726
710
 
727
711
  1. The resulting chat history should be valid. Most chat models expect that chat
728
- history starts with either (1) a ``HumanMessage`` or (2) a ``SystemMessage``
729
- followed by a ``HumanMessage``. To achieve this, set ``start_on='human'``.
730
- In addition, generally a ``ToolMessage`` can only appear after an ``AIMessage``
731
- that involved a tool call.
732
- Please see the following link for more information about messages:
733
- https://python.langchain.com/docs/concepts/#messages
712
+ history starts with either (1) a `HumanMessage` or (2) a `SystemMessage`
713
+ followed by a `HumanMessage`. To achieve this, set `start_on='human'`.
714
+ In addition, generally a `ToolMessage` can only appear after an `AIMessage`
715
+ that involved a tool call.
734
716
  2. It includes recent messages and drops old messages in the chat history.
735
- To achieve this set the ``strategy='last'``.
736
- 3. Usually, the new chat history should include the ``SystemMessage`` if it
737
- was present in the original chat history since the ``SystemMessage`` includes
738
- special instructions to the chat model. The ``SystemMessage`` is almost always
739
- the first message in the history if present. To achieve this set the
740
- ``include_system=True``.
741
-
742
- .. note::
743
- The examples below show how to configure ``trim_messages`` to achieve a behavior
717
+ To achieve this set the `strategy='last'`.
718
+ 3. Usually, the new chat history should include the `SystemMessage` if it
719
+ was present in the original chat history since the `SystemMessage` includes
720
+ special instructions to the chat model. The `SystemMessage` is almost always
721
+ the first message in the history if present. To achieve this set the
722
+ `include_system=True`.
723
+
724
+ !!! note
725
+ The examples below show how to configure `trim_messages` to achieve a behavior
744
726
  consistent with the above properties.
745
727
 
746
728
  Args:
747
729
  messages: Sequence of Message-like objects to trim.
748
730
  max_tokens: Max token count of trimmed messages.
749
- token_counter: Function or llm for counting tokens in a ``BaseMessage`` or a
750
- list of ``BaseMessage``. If a ``BaseLanguageModel`` is passed in then
751
- ``BaseLanguageModel.get_num_tokens_from_messages()`` will be used.
752
- Set to ``len`` to count the number of **messages** in the chat history.
731
+ token_counter: Function or llm for counting tokens in a `BaseMessage` or a
732
+ list of `BaseMessage`. If a `BaseLanguageModel` is passed in then
733
+ `BaseLanguageModel.get_num_tokens_from_messages()` will be used.
734
+ Set to `len` to count the number of **messages** in the chat history.
753
735
 
754
- .. note::
755
- Use ``count_tokens_approximately`` to get fast, approximate token
736
+ !!! note
737
+ Use `count_tokens_approximately` to get fast, approximate token
756
738
  counts.
757
- This is recommended for using ``trim_messages`` on the hot path, where
739
+ This is recommended for using `trim_messages` on the hot path, where
758
740
  exact token counting is not necessary.
759
741
 
760
742
  strategy: Strategy for trimming.
761
- - ``'first'``: Keep the first ``<= n_count`` tokens of the messages.
762
- - ``'last'``: Keep the last ``<= n_count`` tokens of the messages.
763
- Default is ``'last'``.
743
+ - `'first'`: Keep the first `<= n_count` tokens of the messages.
744
+ - `'last'`: Keep the last `<= n_count` tokens of the messages.
764
745
  allow_partial: Whether to split a message if only part of the message can be
765
- included. If ``strategy='last'`` then the last partial contents of a message
766
- are included. If ``strategy='first'`` then the first partial contents of a
746
+ included. If `strategy='last'` then the last partial contents of a message
747
+ are included. If `strategy='first'` then the first partial contents of a
767
748
  message are included.
768
- Default is False.
769
749
  end_on: The message type to end on. If specified then every message after the
770
- last occurrence of this type is ignored. If ``strategy='last'`` then this
771
- is done before we attempt to get the last ``max_tokens``. If
772
- ``strategy='first'`` then this is done after we get the first
773
- ``max_tokens``. Can be specified as string names (e.g. ``'system'``,
774
- ``'human'``, ``'ai'``, ...) or as ``BaseMessage`` classes (e.g.
775
- ``SystemMessage``, ``HumanMessage``, ``AIMessage``, ...). Can be a single
750
+ last occurrence of this type is ignored. If `strategy='last'` then this
751
+ is done before we attempt to get the last `max_tokens`. If
752
+ `strategy='first'` then this is done after we get the first
753
+ `max_tokens`. Can be specified as string names (e.g. `'system'`,
754
+ `'human'`, `'ai'`, ...) or as `BaseMessage` classes (e.g.
755
+ `SystemMessage`, `HumanMessage`, `AIMessage`, ...). Can be a single
776
756
  type or a list of types.
777
- Default is None.
757
+
778
758
  start_on: The message type to start on. Should only be specified if
779
- ``strategy='last'``. If specified then every message before
759
+ `strategy='last'`. If specified then every message before
780
760
  the first occurrence of this type is ignored. This is done after we trim
781
- the initial messages to the last ``max_tokens``. Does not
782
- apply to a ``SystemMessage`` at index 0 if ``include_system=True``. Can be
783
- specified as string names (e.g. ``'system'``, ``'human'``, ``'ai'``, ...) or
784
- as ``BaseMessage`` classes (e.g. ``SystemMessage``, ``HumanMessage``,
785
- ``AIMessage``, ...). Can be a single type or a list of types.
786
- Default is None.
787
- include_system: Whether to keep the SystemMessage if there is one at index 0.
788
- Should only be specified if ``strategy="last"``.
789
- Default is False.
790
- text_splitter: Function or ``langchain_text_splitters.TextSplitter`` for
761
+ the initial messages to the last `max_tokens`. Does not
762
+ apply to a `SystemMessage` at index 0 if `include_system=True`. Can be
763
+ specified as string names (e.g. `'system'`, `'human'`, `'ai'`, ...) or
764
+ as `BaseMessage` classes (e.g. `SystemMessage`, `HumanMessage`,
765
+ `AIMessage`, ...). Can be a single type or a list of types.
766
+
767
+ include_system: Whether to keep the `SystemMessage` if there is one at index
768
+ `0`. Should only be specified if `strategy="last"`.
769
+ text_splitter: Function or `langchain_text_splitters.TextSplitter` for
791
770
  splitting the string contents of a message. Only used if
792
- ``allow_partial=True``. If ``strategy='last'`` then the last split tokens
793
- from a partial message will be included. if ``strategy='first'`` then the
771
+ `allow_partial=True`. If `strategy='last'` then the last split tokens
772
+ from a partial message will be included. if `strategy='first'` then the
794
773
  first split tokens from a partial message will be included. Token splitter
795
774
  assumes that separators are kept, so that split contents can be directly
796
775
  concatenated to recreate the original text. Defaults to splitting on
797
776
  newlines.
798
777
 
799
778
  Returns:
800
- list of trimmed ``BaseMessage``.
779
+ List of trimmed `BaseMessage`.
801
780
 
802
781
  Raises:
803
782
  ValueError: if two incompatible arguments are specified or an unrecognized
804
- ``strategy`` is specified.
783
+ `strategy` is specified.
805
784
 
806
785
  Example:
807
- Trim chat history based on token count, keeping the ``SystemMessage`` if
808
- present, and ensuring that the chat history starts with a ``HumanMessage`` (
809
- or a ``SystemMessage`` followed by a ``HumanMessage``).
810
-
811
- .. code-block:: python
812
-
813
- from langchain_core.messages import (
814
- AIMessage,
815
- HumanMessage,
816
- BaseMessage,
817
- SystemMessage,
818
- trim_messages,
819
- )
820
-
821
- messages = [
822
- SystemMessage(
823
- "you're a good assistant, you always respond with a joke."
824
- ),
825
- HumanMessage("i wonder why it's called langchain"),
826
- AIMessage(
827
- 'Well, I guess they thought "WordRope" and "SentenceString" just '
828
- "didn't have the same ring to it!"
829
- ),
830
- HumanMessage("and who is harrison chasing anyways"),
831
- AIMessage(
832
- "Hmmm let me think.\n\nWhy, he's probably chasing after the last "
833
- "cup of coffee in the office!"
834
- ),
835
- HumanMessage("what do you call a speechless parrot"),
836
- ]
837
-
838
-
839
- trim_messages(
840
- messages,
841
- max_tokens=45,
842
- strategy="last",
843
- token_counter=ChatOpenAI(model="gpt-4o"),
844
- # Most chat models expect that chat history starts with either:
845
- # (1) a HumanMessage or
846
- # (2) a SystemMessage followed by a HumanMessage
847
- start_on="human",
848
- # Usually, we want to keep the SystemMessage
849
- # if it's present in the original history.
850
- # The SystemMessage has special instructions for the model.
851
- include_system=True,
852
- allow_partial=False,
853
- )
786
+ Trim chat history based on token count, keeping the `SystemMessage` if
787
+ present, and ensuring that the chat history starts with a `HumanMessage` (
788
+ or a `SystemMessage` followed by a `HumanMessage`).
789
+
790
+ ```python
791
+ from langchain_core.messages import (
792
+ AIMessage,
793
+ HumanMessage,
794
+ BaseMessage,
795
+ SystemMessage,
796
+ trim_messages,
797
+ )
854
798
 
855
- .. code-block:: python
799
+ messages = [
800
+ SystemMessage("you're a good assistant, you always respond with a joke."),
801
+ HumanMessage("i wonder why it's called langchain"),
802
+ AIMessage(
803
+ 'Well, I guess they thought "WordRope" and "SentenceString" just '
804
+ "didn't have the same ring to it!"
805
+ ),
806
+ HumanMessage("and who is harrison chasing anyways"),
807
+ AIMessage(
808
+ "Hmmm let me think.\n\nWhy, he's probably chasing after the last "
809
+ "cup of coffee in the office!"
810
+ ),
811
+ HumanMessage("what do you call a speechless parrot"),
812
+ ]
813
+
814
+
815
+ trim_messages(
816
+ messages,
817
+ max_tokens=45,
818
+ strategy="last",
819
+ token_counter=ChatOpenAI(model="gpt-4o"),
820
+ # Most chat models expect that chat history starts with either:
821
+ # (1) a HumanMessage or
822
+ # (2) a SystemMessage followed by a HumanMessage
823
+ start_on="human",
824
+ # Usually, we want to keep the SystemMessage
825
+ # if it's present in the original history.
826
+ # The SystemMessage has special instructions for the model.
827
+ include_system=True,
828
+ allow_partial=False,
829
+ )
830
+ ```
856
831
 
857
- [
858
- SystemMessage(
859
- content="you're a good assistant, you always respond with a joke."
860
- ),
861
- HumanMessage(content="what do you call a speechless parrot"),
862
- ]
832
+ ```python
833
+ [
834
+ SystemMessage(
835
+ content="you're a good assistant, you always respond with a joke."
836
+ ),
837
+ HumanMessage(content="what do you call a speechless parrot"),
838
+ ]
839
+ ```
863
840
 
864
- Trim chat history based on the message count, keeping the ``SystemMessage`` if
865
- present, and ensuring that the chat history starts with a ``HumanMessage`` (
866
- or a ``SystemMessage`` followed by a ``HumanMessage``).
841
+ Trim chat history based on the message count, keeping the `SystemMessage` if
842
+ present, and ensuring that the chat history starts with a `HumanMessage` (
843
+ or a `SystemMessage` followed by a `HumanMessage`).
867
844
 
868
845
  trim_messages(
869
846
  messages,
@@ -885,100 +862,95 @@ def trim_messages(
885
862
  allow_partial=False,
886
863
  )
887
864
 
888
- .. code-block:: python
889
-
890
- [
891
- SystemMessage(
892
- content="you're a good assistant, you always respond with a joke."
893
- ),
894
- HumanMessage(content="and who is harrison chasing anyways"),
895
- AIMessage(
896
- content="Hmmm let me think.\n\nWhy, he's probably chasing after "
897
- "the last cup of coffee in the office!"
898
- ),
899
- HumanMessage(content="what do you call a speechless parrot"),
900
- ]
901
-
902
-
865
+ ```python
866
+ [
867
+ SystemMessage(
868
+ content="you're a good assistant, you always respond with a joke."
869
+ ),
870
+ HumanMessage(content="and who is harrison chasing anyways"),
871
+ AIMessage(
872
+ content="Hmmm let me think.\n\nWhy, he's probably chasing after "
873
+ "the last cup of coffee in the office!"
874
+ ),
875
+ HumanMessage(content="what do you call a speechless parrot"),
876
+ ]
877
+ ```
903
878
  Trim chat history using a custom token counter function that counts the
904
879
  number of tokens in each message.
905
880
 
906
- .. code-block:: python
907
-
908
- messages = [
909
- SystemMessage("This is a 4 token text. The full message is 10 tokens."),
910
- HumanMessage(
911
- "This is a 4 token text. The full message is 10 tokens.", id="first"
912
- ),
913
- AIMessage(
914
- [
915
- {"type": "text", "text": "This is the FIRST 4 token block."},
916
- {"type": "text", "text": "This is the SECOND 4 token block."},
917
- ],
918
- id="second",
919
- ),
920
- HumanMessage(
921
- "This is a 4 token text. The full message is 10 tokens.", id="third"
922
- ),
923
- AIMessage(
924
- "This is a 4 token text. The full message is 10 tokens.",
925
- id="fourth",
926
- ),
927
- ]
928
-
929
-
930
- def dummy_token_counter(messages: list[BaseMessage]) -> int:
931
- # treat each message like it adds 3 default tokens at the beginning
932
- # of the message and at the end of the message. 3 + 4 + 3 = 10 tokens
933
- # per message.
934
-
935
- default_content_len = 4
936
- default_msg_prefix_len = 3
937
- default_msg_suffix_len = 3
938
-
939
- count = 0
940
- for msg in messages:
941
- if isinstance(msg.content, str):
942
- count += (
943
- default_msg_prefix_len
944
- + default_content_len
945
- + default_msg_suffix_len
946
- )
947
- if isinstance(msg.content, list):
948
- count += (
949
- default_msg_prefix_len
950
- + len(msg.content) * default_content_len
951
- + default_msg_suffix_len
952
- )
953
- return count
954
-
955
- First 30 tokens, allowing partial messages:
956
- .. code-block:: python
957
-
958
- trim_messages(
959
- messages,
960
- max_tokens=30,
961
- token_counter=dummy_token_counter,
962
- strategy="first",
963
- allow_partial=True,
964
- )
965
-
966
- .. code-block:: python
967
-
881
+ ```python
882
+ messages = [
883
+ SystemMessage("This is a 4 token text. The full message is 10 tokens."),
884
+ HumanMessage(
885
+ "This is a 4 token text. The full message is 10 tokens.", id="first"
886
+ ),
887
+ AIMessage(
968
888
  [
969
- SystemMessage(
970
- "This is a 4 token text. The full message is 10 tokens."
971
- ),
972
- HumanMessage(
973
- "This is a 4 token text. The full message is 10 tokens.",
974
- id="first",
975
- ),
976
- AIMessage(
977
- [{"type": "text", "text": "This is the FIRST 4 token block."}],
978
- id="second",
979
- ),
980
- ]
889
+ {"type": "text", "text": "This is the FIRST 4 token block."},
890
+ {"type": "text", "text": "This is the SECOND 4 token block."},
891
+ ],
892
+ id="second",
893
+ ),
894
+ HumanMessage(
895
+ "This is a 4 token text. The full message is 10 tokens.", id="third"
896
+ ),
897
+ AIMessage(
898
+ "This is a 4 token text. The full message is 10 tokens.",
899
+ id="fourth",
900
+ ),
901
+ ]
902
+
903
+
904
+ def dummy_token_counter(messages: list[BaseMessage]) -> int:
905
+ # treat each message like it adds 3 default tokens at the beginning
906
+ # of the message and at the end of the message. 3 + 4 + 3 = 10 tokens
907
+ # per message.
908
+
909
+ default_content_len = 4
910
+ default_msg_prefix_len = 3
911
+ default_msg_suffix_len = 3
912
+
913
+ count = 0
914
+ for msg in messages:
915
+ if isinstance(msg.content, str):
916
+ count += (
917
+ default_msg_prefix_len
918
+ + default_content_len
919
+ + default_msg_suffix_len
920
+ )
921
+ if isinstance(msg.content, list):
922
+ count += (
923
+ default_msg_prefix_len
924
+ + len(msg.content) * default_content_len
925
+ + default_msg_suffix_len
926
+ )
927
+ return count
928
+ ```
981
929
 
930
+ First 30 tokens, allowing partial messages:
931
+ ```python
932
+ trim_messages(
933
+ messages,
934
+ max_tokens=30,
935
+ token_counter=dummy_token_counter,
936
+ strategy="first",
937
+ allow_partial=True,
938
+ )
939
+ ```
940
+
941
+ ```python
942
+ [
943
+ SystemMessage("This is a 4 token text. The full message is 10 tokens."),
944
+ HumanMessage(
945
+ "This is a 4 token text. The full message is 10 tokens.",
946
+ id="first",
947
+ ),
948
+ AIMessage(
949
+ [{"type": "text", "text": "This is the FIRST 4 token block."}],
950
+ id="second",
951
+ ),
952
+ ]
953
+ ```
982
954
  """
983
955
  # Validate arguments
984
956
  if start_on and strategy == "first":
@@ -1042,88 +1014,90 @@ def trim_messages(
1042
1014
 
1043
1015
 
1044
1016
  def convert_to_openai_messages(
1045
- messages: Union[MessageLikeRepresentation, Sequence[MessageLikeRepresentation]],
1017
+ messages: MessageLikeRepresentation | Sequence[MessageLikeRepresentation],
1046
1018
  *,
1047
1019
  text_format: Literal["string", "block"] = "string",
1048
- ) -> Union[dict, list[dict]]:
1020
+ include_id: bool = False,
1021
+ ) -> dict | list[dict]:
1049
1022
  """Convert LangChain messages into OpenAI message dicts.
1050
1023
 
1051
1024
  Args:
1052
1025
  messages: Message-like object or iterable of objects whose contents are
1053
1026
  in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
1054
1027
  text_format: How to format string or text block contents:
1055
- - ``'string'``:
1056
- If a message has a string content, this is left as a string. If
1057
- a message has content blocks that are all of type ``'text'``, these
1058
- are joined with a newline to make a single string. If a message has
1059
- content blocks and at least one isn't of type ``'text'``, then
1060
- all blocks are left as dicts.
1061
- - ``'block'``:
1062
- If a message has a string content, this is turned into a list
1063
- with a single content block of type ``'text'``. If a message has
1064
- content blocks these are left as is.
1028
+ - `'string'`:
1029
+ If a message has a string content, this is left as a string. If
1030
+ a message has content blocks that are all of type `'text'`, these
1031
+ are joined with a newline to make a single string. If a message has
1032
+ content blocks and at least one isn't of type `'text'`, then
1033
+ all blocks are left as dicts.
1034
+ - `'block'`:
1035
+ If a message has a string content, this is turned into a list
1036
+ with a single content block of type `'text'`. If a message has
1037
+ content blocks these are left as is.
1038
+ include_id: Whether to include message IDs in the openai messages, if they
1039
+ are present in the source messages.
1065
1040
 
1066
1041
  Raises:
1067
- ValueError: if an unrecognized ``text_format`` is specified, or if a message
1042
+ ValueError: if an unrecognized `text_format` is specified, or if a message
1068
1043
  content block is missing expected keys.
1069
1044
 
1070
1045
  Returns:
1071
1046
  The return type depends on the input type:
1072
1047
 
1073
1048
  - dict:
1074
- If a single message-like object is passed in, a single OpenAI message
1075
- dict is returned.
1049
+ If a single message-like object is passed in, a single OpenAI message
1050
+ dict is returned.
1076
1051
  - list[dict]:
1077
- If a sequence of message-like objects are passed in, a list of OpenAI
1078
- message dicts is returned.
1052
+ If a sequence of message-like objects are passed in, a list of OpenAI
1053
+ message dicts is returned.
1079
1054
 
1080
1055
  Example:
1056
+ ```python
1057
+ from langchain_core.messages import (
1058
+ convert_to_openai_messages,
1059
+ AIMessage,
1060
+ SystemMessage,
1061
+ ToolMessage,
1062
+ )
1081
1063
 
1082
- .. code-block:: python
1083
-
1084
- from langchain_core.messages import (
1085
- convert_to_openai_messages,
1086
- AIMessage,
1087
- SystemMessage,
1088
- ToolMessage,
1089
- )
1090
-
1091
- messages = [
1092
- SystemMessage([{"type": "text", "text": "foo"}]),
1093
- {
1094
- "role": "user",
1095
- "content": [
1096
- {"type": "text", "text": "whats in this"},
1097
- {
1098
- "type": "image_url",
1099
- "image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},
1100
- },
1101
- ],
1102
- },
1103
- AIMessage(
1104
- "",
1105
- tool_calls=[
1106
- {
1107
- "name": "analyze",
1108
- "args": {"baz": "buz"},
1109
- "id": "1",
1110
- "type": "tool_call",
1111
- }
1112
- ],
1113
- ),
1114
- ToolMessage("foobar", tool_call_id="1", name="bar"),
1115
- {"role": "assistant", "content": "thats nice"},
1116
- ]
1117
- oai_messages = convert_to_openai_messages(messages)
1118
- # -> [
1119
- # {'role': 'system', 'content': 'foo'},
1120
- # {'role': 'user', 'content': [{'type': 'text', 'text': 'whats in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
1121
- # {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': '1','function': {'name': 'analyze', 'arguments': '{"baz": "buz"}'}}], 'content': ''},
1122
- # {'role': 'tool', 'name': 'bar', 'content': 'foobar'},
1123
- # {'role': 'assistant', 'content': 'thats nice'}
1124
- # ]
1125
-
1126
- .. versionadded:: 0.3.11
1064
+ messages = [
1065
+ SystemMessage([{"type": "text", "text": "foo"}]),
1066
+ {
1067
+ "role": "user",
1068
+ "content": [
1069
+ {"type": "text", "text": "whats in this"},
1070
+ {
1071
+ "type": "image_url",
1072
+ "image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},
1073
+ },
1074
+ ],
1075
+ },
1076
+ AIMessage(
1077
+ "",
1078
+ tool_calls=[
1079
+ {
1080
+ "name": "analyze",
1081
+ "args": {"baz": "buz"},
1082
+ "id": "1",
1083
+ "type": "tool_call",
1084
+ }
1085
+ ],
1086
+ ),
1087
+ ToolMessage("foobar", tool_call_id="1", name="bar"),
1088
+ {"role": "assistant", "content": "thats nice"},
1089
+ ]
1090
+ oai_messages = convert_to_openai_messages(messages)
1091
+ # -> [
1092
+ # {'role': 'system', 'content': 'foo'},
1093
+ # {'role': 'user', 'content': [{'type': 'text', 'text': 'whats in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
1094
+ # {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': '1','function': {'name': 'analyze', 'arguments': '{"baz": "buz"}'}}], 'content': ''},
1095
+ # {'role': 'tool', 'name': 'bar', 'content': 'foobar'},
1096
+ # {'role': 'assistant', 'content': 'thats nice'}
1097
+ # ]
1098
+ ```
1099
+
1100
+ !!! version-added "Added in version 0.3.11"
1127
1101
 
1128
1102
  """ # noqa: E501
1129
1103
  if text_format not in {"string", "block"}:
@@ -1140,7 +1114,7 @@ def convert_to_openai_messages(
1140
1114
  for i, message in enumerate(messages):
1141
1115
  oai_msg: dict = {"role": _get_message_openai_role(message)}
1142
1116
  tool_messages: list = []
1143
- content: Union[str, list[dict]]
1117
+ content: str | list[dict]
1144
1118
 
1145
1119
  if message.name:
1146
1120
  oai_msg["name"] = message.name
@@ -1150,6 +1124,8 @@ def convert_to_openai_messages(
1150
1124
  oai_msg["refusal"] = message.additional_kwargs["refusal"]
1151
1125
  if isinstance(message, ToolMessage):
1152
1126
  oai_msg["tool_call_id"] = message.tool_call_id
1127
+ if include_id and message.id:
1128
+ oai_msg["id"] = message.id
1153
1129
 
1154
1130
  if not message.content:
1155
1131
  content = "" if text_format == "string" else []
@@ -1421,10 +1397,8 @@ def _first_max_tokens(
1421
1397
  max_tokens: int,
1422
1398
  token_counter: Callable[[list[BaseMessage]], int],
1423
1399
  text_splitter: Callable[[str], list[str]],
1424
- partial_strategy: Optional[Literal["first", "last"]] = None,
1425
- end_on: Optional[
1426
- Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
1427
- ] = None,
1400
+ partial_strategy: Literal["first", "last"] | None = None,
1401
+ end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
1428
1402
  ) -> list[BaseMessage]:
1429
1403
  messages = list(messages)
1430
1404
  if not messages:
@@ -1541,12 +1515,8 @@ def _last_max_tokens(
1541
1515
  text_splitter: Callable[[str], list[str]],
1542
1516
  allow_partial: bool = False,
1543
1517
  include_system: bool = False,
1544
- start_on: Optional[
1545
- Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
1546
- ] = None,
1547
- end_on: Optional[
1548
- Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
1549
- ] = None,
1518
+ start_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
1519
+ end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
1550
1520
  ) -> list[BaseMessage]:
1551
1521
  messages = list(messages)
1552
1522
  if len(messages) == 0:
@@ -1647,7 +1617,7 @@ def _default_text_splitter(text: str) -> list[str]:
1647
1617
 
1648
1618
  def _is_message_type(
1649
1619
  message: BaseMessage,
1650
- type_: Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]],
1620
+ type_: str | type[BaseMessage] | Sequence[str | type[BaseMessage]],
1651
1621
  ) -> bool:
1652
1622
  types = [type_] if isinstance(type_, (str, type)) else type_
1653
1623
  types_str = [t for t in types if isinstance(t, str)]
@@ -1707,27 +1677,27 @@ def count_tokens_approximately(
1707
1677
  Args:
1708
1678
  messages: List of messages to count tokens for.
1709
1679
  chars_per_token: Number of characters per token to use for the approximation.
1710
- Default is 4 (one token corresponds to ~4 chars for common English text).
1711
- You can also specify float values for more fine-grained control.
1712
- `See more here. <https://platform.openai.com/tokenizer>`__
1713
- extra_tokens_per_message: Number of extra tokens to add per message.
1714
- Default is 3 (special tokens, including beginning/end of message).
1715
- You can also specify float values for more fine-grained control.
1716
- `See more here. <https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb>`__
1680
+ One token corresponds to ~4 chars for common English text.
1681
+ You can also specify `float` values for more fine-grained control.
1682
+ [See more here](https://platform.openai.com/tokenizer).
1683
+ extra_tokens_per_message: Number of extra tokens to add per message, e.g.
1684
+ special tokens, including beginning/end of message.
1685
+ You can also specify `float` values for more fine-grained control.
1686
+ [See more here](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb).
1717
1687
  count_name: Whether to include message names in the count.
1718
1688
  Enabled by default.
1719
1689
 
1720
1690
  Returns:
1721
1691
  Approximate number of tokens in the messages.
1722
1692
 
1723
- .. note::
1693
+ !!! note
1724
1694
  This is a simple approximation that may not match the exact token count used by
1725
1695
  specific models. For accurate counts, use model-specific tokenizers.
1726
1696
 
1727
1697
  Warning:
1728
1698
  This function does not currently support counting image tokens.
1729
1699
 
1730
- .. versionadded:: 0.3.46
1700
+ !!! version-added "Added in version 0.3.46"
1731
1701
 
1732
1702
  """
1733
1703
  token_count = 0.0