langchain-core 1.0.0a6__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +3 -4
  3. langchain_core/_api/beta_decorator.py +23 -26
  4. langchain_core/_api/deprecation.py +51 -64
  5. langchain_core/_api/path.py +3 -6
  6. langchain_core/_import_utils.py +3 -4
  7. langchain_core/agents.py +55 -48
  8. langchain_core/caches.py +65 -66
  9. langchain_core/callbacks/__init__.py +1 -8
  10. langchain_core/callbacks/base.py +321 -336
  11. langchain_core/callbacks/file.py +44 -44
  12. langchain_core/callbacks/manager.py +454 -514
  13. langchain_core/callbacks/stdout.py +29 -30
  14. langchain_core/callbacks/streaming_stdout.py +32 -32
  15. langchain_core/callbacks/usage.py +60 -57
  16. langchain_core/chat_history.py +53 -68
  17. langchain_core/document_loaders/base.py +27 -25
  18. langchain_core/document_loaders/blob_loaders.py +1 -1
  19. langchain_core/document_loaders/langsmith.py +44 -48
  20. langchain_core/documents/__init__.py +23 -3
  21. langchain_core/documents/base.py +102 -94
  22. langchain_core/documents/compressor.py +10 -10
  23. langchain_core/documents/transformers.py +34 -35
  24. langchain_core/embeddings/fake.py +50 -54
  25. langchain_core/example_selectors/length_based.py +2 -2
  26. langchain_core/example_selectors/semantic_similarity.py +28 -32
  27. langchain_core/exceptions.py +21 -20
  28. langchain_core/globals.py +3 -151
  29. langchain_core/indexing/__init__.py +1 -1
  30. langchain_core/indexing/api.py +121 -126
  31. langchain_core/indexing/base.py +73 -75
  32. langchain_core/indexing/in_memory.py +4 -6
  33. langchain_core/language_models/__init__.py +14 -29
  34. langchain_core/language_models/_utils.py +58 -61
  35. langchain_core/language_models/base.py +82 -172
  36. langchain_core/language_models/chat_models.py +329 -402
  37. langchain_core/language_models/fake.py +11 -11
  38. langchain_core/language_models/fake_chat_models.py +42 -36
  39. langchain_core/language_models/llms.py +189 -269
  40. langchain_core/load/dump.py +9 -12
  41. langchain_core/load/load.py +18 -28
  42. langchain_core/load/mapping.py +2 -4
  43. langchain_core/load/serializable.py +42 -40
  44. langchain_core/messages/__init__.py +10 -16
  45. langchain_core/messages/ai.py +148 -148
  46. langchain_core/messages/base.py +53 -51
  47. langchain_core/messages/block_translators/__init__.py +19 -22
  48. langchain_core/messages/block_translators/anthropic.py +6 -6
  49. langchain_core/messages/block_translators/bedrock_converse.py +5 -5
  50. langchain_core/messages/block_translators/google_genai.py +10 -7
  51. langchain_core/messages/block_translators/google_vertexai.py +4 -32
  52. langchain_core/messages/block_translators/groq.py +117 -21
  53. langchain_core/messages/block_translators/langchain_v0.py +5 -5
  54. langchain_core/messages/block_translators/openai.py +11 -11
  55. langchain_core/messages/chat.py +2 -6
  56. langchain_core/messages/content.py +339 -330
  57. langchain_core/messages/function.py +6 -10
  58. langchain_core/messages/human.py +24 -31
  59. langchain_core/messages/modifier.py +2 -2
  60. langchain_core/messages/system.py +19 -29
  61. langchain_core/messages/tool.py +74 -90
  62. langchain_core/messages/utils.py +484 -510
  63. langchain_core/output_parsers/__init__.py +13 -10
  64. langchain_core/output_parsers/base.py +61 -61
  65. langchain_core/output_parsers/format_instructions.py +9 -4
  66. langchain_core/output_parsers/json.py +12 -10
  67. langchain_core/output_parsers/list.py +21 -23
  68. langchain_core/output_parsers/openai_functions.py +49 -47
  69. langchain_core/output_parsers/openai_tools.py +30 -23
  70. langchain_core/output_parsers/pydantic.py +13 -14
  71. langchain_core/output_parsers/string.py +5 -5
  72. langchain_core/output_parsers/transform.py +15 -17
  73. langchain_core/output_parsers/xml.py +35 -34
  74. langchain_core/outputs/__init__.py +1 -1
  75. langchain_core/outputs/chat_generation.py +18 -18
  76. langchain_core/outputs/chat_result.py +1 -3
  77. langchain_core/outputs/generation.py +16 -16
  78. langchain_core/outputs/llm_result.py +10 -10
  79. langchain_core/prompt_values.py +13 -19
  80. langchain_core/prompts/__init__.py +3 -27
  81. langchain_core/prompts/base.py +81 -86
  82. langchain_core/prompts/chat.py +308 -351
  83. langchain_core/prompts/dict.py +6 -6
  84. langchain_core/prompts/few_shot.py +81 -88
  85. langchain_core/prompts/few_shot_with_templates.py +11 -13
  86. langchain_core/prompts/image.py +12 -14
  87. langchain_core/prompts/loading.py +4 -6
  88. langchain_core/prompts/message.py +7 -7
  89. langchain_core/prompts/prompt.py +24 -39
  90. langchain_core/prompts/string.py +26 -10
  91. langchain_core/prompts/structured.py +49 -53
  92. langchain_core/rate_limiters.py +51 -60
  93. langchain_core/retrievers.py +61 -198
  94. langchain_core/runnables/base.py +1551 -1656
  95. langchain_core/runnables/branch.py +68 -70
  96. langchain_core/runnables/config.py +72 -89
  97. langchain_core/runnables/configurable.py +145 -161
  98. langchain_core/runnables/fallbacks.py +102 -96
  99. langchain_core/runnables/graph.py +91 -97
  100. langchain_core/runnables/graph_ascii.py +27 -28
  101. langchain_core/runnables/graph_mermaid.py +42 -51
  102. langchain_core/runnables/graph_png.py +43 -16
  103. langchain_core/runnables/history.py +175 -177
  104. langchain_core/runnables/passthrough.py +151 -167
  105. langchain_core/runnables/retry.py +46 -51
  106. langchain_core/runnables/router.py +30 -35
  107. langchain_core/runnables/schema.py +75 -80
  108. langchain_core/runnables/utils.py +60 -67
  109. langchain_core/stores.py +85 -121
  110. langchain_core/structured_query.py +8 -8
  111. langchain_core/sys_info.py +29 -29
  112. langchain_core/tools/__init__.py +1 -14
  113. langchain_core/tools/base.py +306 -245
  114. langchain_core/tools/convert.py +160 -155
  115. langchain_core/tools/render.py +10 -10
  116. langchain_core/tools/retriever.py +12 -11
  117. langchain_core/tools/simple.py +19 -24
  118. langchain_core/tools/structured.py +32 -39
  119. langchain_core/tracers/__init__.py +1 -9
  120. langchain_core/tracers/base.py +97 -99
  121. langchain_core/tracers/context.py +29 -52
  122. langchain_core/tracers/core.py +49 -53
  123. langchain_core/tracers/evaluation.py +11 -11
  124. langchain_core/tracers/event_stream.py +65 -64
  125. langchain_core/tracers/langchain.py +21 -21
  126. langchain_core/tracers/log_stream.py +45 -45
  127. langchain_core/tracers/memory_stream.py +3 -3
  128. langchain_core/tracers/root_listeners.py +16 -16
  129. langchain_core/tracers/run_collector.py +2 -4
  130. langchain_core/tracers/schemas.py +0 -129
  131. langchain_core/tracers/stdout.py +3 -3
  132. langchain_core/utils/__init__.py +1 -4
  133. langchain_core/utils/_merge.py +2 -2
  134. langchain_core/utils/aiter.py +57 -61
  135. langchain_core/utils/env.py +9 -9
  136. langchain_core/utils/function_calling.py +94 -188
  137. langchain_core/utils/html.py +7 -8
  138. langchain_core/utils/input.py +9 -6
  139. langchain_core/utils/interactive_env.py +1 -1
  140. langchain_core/utils/iter.py +36 -40
  141. langchain_core/utils/json.py +4 -3
  142. langchain_core/utils/json_schema.py +9 -9
  143. langchain_core/utils/mustache.py +8 -10
  144. langchain_core/utils/pydantic.py +35 -37
  145. langchain_core/utils/strings.py +6 -9
  146. langchain_core/utils/usage.py +1 -1
  147. langchain_core/utils/utils.py +66 -62
  148. langchain_core/vectorstores/base.py +182 -216
  149. langchain_core/vectorstores/in_memory.py +101 -176
  150. langchain_core/vectorstores/utils.py +5 -5
  151. langchain_core/version.py +1 -1
  152. langchain_core-1.0.4.dist-info/METADATA +69 -0
  153. langchain_core-1.0.4.dist-info/RECORD +172 -0
  154. {langchain_core-1.0.0a6.dist-info → langchain_core-1.0.4.dist-info}/WHEEL +1 -1
  155. langchain_core/memory.py +0 -120
  156. langchain_core/messages/block_translators/ollama.py +0 -47
  157. langchain_core/prompts/pipeline.py +0 -138
  158. langchain_core/pydantic_v1/__init__.py +0 -30
  159. langchain_core/pydantic_v1/dataclasses.py +0 -23
  160. langchain_core/pydantic_v1/main.py +0 -23
  161. langchain_core/tracers/langchain_v1.py +0 -31
  162. langchain_core/utils/loading.py +0 -35
  163. langchain_core-1.0.0a6.dist-info/METADATA +0 -67
  164. langchain_core-1.0.0a6.dist-info/RECORD +0 -181
  165. langchain_core-1.0.0a6.dist-info/entry_points.txt +0 -4
@@ -5,7 +5,6 @@ Some examples of what you can do with these functions include:
5
5
  * Convert messages to strings (serialization)
6
6
  * Convert messages from dicts to Message objects (deserialization)
7
7
  * Filter messages from a list of messages based on name, type or id etc.
8
-
9
8
  """
10
9
 
11
10
  from __future__ import annotations
@@ -15,16 +14,13 @@ import inspect
15
14
  import json
16
15
  import logging
17
16
  import math
18
- from collections.abc import Iterable, Sequence
17
+ from collections.abc import Callable, Iterable, Sequence
19
18
  from functools import partial
20
19
  from typing import (
21
20
  TYPE_CHECKING,
22
21
  Annotated,
23
22
  Any,
24
- Callable,
25
23
  Literal,
26
- Optional,
27
- Union,
28
24
  cast,
29
25
  overload,
30
26
  )
@@ -76,22 +72,21 @@ def _get_type(v: Any) -> str:
76
72
 
77
73
 
78
74
  AnyMessage = Annotated[
79
- Union[
80
- Annotated[AIMessage, Tag(tag="ai")],
81
- Annotated[HumanMessage, Tag(tag="human")],
82
- Annotated[ChatMessage, Tag(tag="chat")],
83
- Annotated[SystemMessage, Tag(tag="system")],
84
- Annotated[FunctionMessage, Tag(tag="function")],
85
- Annotated[ToolMessage, Tag(tag="tool")],
86
- Annotated[AIMessageChunk, Tag(tag="AIMessageChunk")],
87
- Annotated[HumanMessageChunk, Tag(tag="HumanMessageChunk")],
88
- Annotated[ChatMessageChunk, Tag(tag="ChatMessageChunk")],
89
- Annotated[SystemMessageChunk, Tag(tag="SystemMessageChunk")],
90
- Annotated[FunctionMessageChunk, Tag(tag="FunctionMessageChunk")],
91
- Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
92
- ],
75
+ Annotated[AIMessage, Tag(tag="ai")]
76
+ | Annotated[HumanMessage, Tag(tag="human")]
77
+ | Annotated[ChatMessage, Tag(tag="chat")]
78
+ | Annotated[SystemMessage, Tag(tag="system")]
79
+ | Annotated[FunctionMessage, Tag(tag="function")]
80
+ | Annotated[ToolMessage, Tag(tag="tool")]
81
+ | Annotated[AIMessageChunk, Tag(tag="AIMessageChunk")]
82
+ | Annotated[HumanMessageChunk, Tag(tag="HumanMessageChunk")]
83
+ | Annotated[ChatMessageChunk, Tag(tag="ChatMessageChunk")]
84
+ | Annotated[SystemMessageChunk, Tag(tag="SystemMessageChunk")]
85
+ | Annotated[FunctionMessageChunk, Tag(tag="FunctionMessageChunk")]
86
+ | Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
93
87
  Field(discriminator=Discriminator(_get_type)),
94
88
  ]
89
+ """A type representing any defined `Message` or `MessageChunk` type."""
95
90
 
96
91
 
97
92
  def get_buffer_string(
@@ -101,10 +96,8 @@ def get_buffer_string(
101
96
 
102
97
  Args:
103
98
  messages: Messages to be converted to strings.
104
- human_prefix: The prefix to prepend to contents of ``HumanMessage``s.
105
- Default is ``'Human'``.
106
- ai_prefix: The prefix to prepend to contents of ``AIMessage``. Default is
107
- ``'AI'``.
99
+ human_prefix: The prefix to prepend to contents of `HumanMessage`s.
100
+ ai_prefix: The prefix to prepend to contents of `AIMessage`.
108
101
 
109
102
  Returns:
110
103
  A single string concatenation of all input messages.
@@ -113,17 +106,16 @@ def get_buffer_string(
113
106
  ValueError: If an unsupported message type is encountered.
114
107
 
115
108
  Example:
116
- .. code-block:: python
117
-
118
- from langchain_core import AIMessage, HumanMessage
119
-
120
- messages = [
121
- HumanMessage(content="Hi, how are you?"),
122
- AIMessage(content="Good, how are you?"),
123
- ]
124
- get_buffer_string(messages)
125
- # -> "Human: Hi, how are you?\nAI: Good, how are you?"
126
-
109
+ ```python
110
+ from langchain_core import AIMessage, HumanMessage
111
+
112
+ messages = [
113
+ HumanMessage(content="Hi, how are you?"),
114
+ AIMessage(content="Good, how are you?"),
115
+ ]
116
+ get_buffer_string(messages)
117
+ # -> "Human: Hi, how are you?\nAI: Good, how are you?"
118
+ ```
127
119
  """
128
120
  string_messages = []
129
121
  for m in messages:
@@ -183,7 +175,7 @@ def _message_from_dict(message: dict) -> BaseMessage:
183
175
 
184
176
 
185
177
  def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]:
186
- """Convert a sequence of messages from dicts to ``Message`` objects.
178
+ """Convert a sequence of messages from dicts to `Message` objects.
187
179
 
188
180
  Args:
189
181
  messages: Sequence of messages (as dicts) to convert.
@@ -196,7 +188,7 @@ def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]:
196
188
 
197
189
 
198
190
  def message_chunk_to_message(chunk: BaseMessage) -> BaseMessage:
199
- """Convert a message chunk to a ``Message``.
191
+ """Convert a message chunk to a `Message`.
200
192
 
201
193
  Args:
202
194
  chunk: Message chunk to convert.
@@ -215,38 +207,39 @@ def message_chunk_to_message(chunk: BaseMessage) -> BaseMessage:
215
207
  )
216
208
 
217
209
 
218
- MessageLikeRepresentation = Union[
219
- BaseMessage, list[str], tuple[str, str], str, dict[str, Any]
220
- ]
210
+ MessageLikeRepresentation = (
211
+ BaseMessage | list[str] | tuple[str, str] | str | dict[str, Any]
212
+ )
213
+ """A type representing the various ways a message can be represented."""
221
214
 
222
215
 
223
216
  def _create_message_from_message_type(
224
217
  message_type: str,
225
218
  content: str,
226
- name: Optional[str] = None,
227
- tool_call_id: Optional[str] = None,
228
- tool_calls: Optional[list[dict[str, Any]]] = None,
229
- id: Optional[str] = None,
219
+ name: str | None = None,
220
+ tool_call_id: str | None = None,
221
+ tool_calls: list[dict[str, Any]] | None = None,
222
+ id: str | None = None,
230
223
  **additional_kwargs: Any,
231
224
  ) -> BaseMessage:
232
- """Create a message from a ``Message`` type and content string.
225
+ """Create a message from a `Message` type and content string.
233
226
 
234
227
  Args:
235
- message_type: (str) the type of the message (e.g., ``'human'``, ``'ai'``, etc.).
228
+ message_type: (str) the type of the message (e.g., `'human'`, `'ai'`, etc.).
236
229
  content: (str) the content string.
237
- name: (str) the name of the message. Default is None.
238
- tool_call_id: (str) the tool call id. Default is None.
239
- tool_calls: (list[dict[str, Any]]) the tool calls. Default is None.
240
- id: (str) the id of the message. Default is None.
230
+ name: (str) the name of the message.
231
+ tool_call_id: (str) the tool call id.
232
+ tool_calls: (list[dict[str, Any]]) the tool calls.
233
+ id: (str) the id of the message.
241
234
  additional_kwargs: (dict[str, Any]) additional keyword arguments.
242
235
 
243
236
  Returns:
244
237
  a message of the appropriate type.
245
238
 
246
239
  Raises:
247
- ValueError: if the message type is not one of ``'human'``, ``'user'``, ``'ai'``,
248
- ``'assistant'``, ``'function'``, ``'tool'``, ``'system'``, or
249
- ``'developer'``.
240
+ ValueError: if the message type is not one of `'human'`, `'user'`, `'ai'`,
241
+ `'assistant'`, `'function'`, `'tool'`, `'system'`, or
242
+ `'developer'`.
250
243
  """
251
244
  kwargs: dict[str, Any] = {}
252
245
  if name is not None:
@@ -312,21 +305,21 @@ def _create_message_from_message_type(
312
305
 
313
306
 
314
307
  def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
315
- """Instantiate a ``Message`` from a variety of message formats.
308
+ """Instantiate a `Message` from a variety of message formats.
316
309
 
317
310
  The message format can be one of the following:
318
311
 
319
- - ``BaseMessagePromptTemplate``
320
- - ``BaseMessage``
321
- - 2-tuple of (role string, template); e.g., (``'human'``, ``'{user_input}'``)
312
+ - `BaseMessagePromptTemplate`
313
+ - `BaseMessage`
314
+ - 2-tuple of (role string, template); e.g., (`'human'`, `'{user_input}'`)
322
315
  - dict: a message dict with role and content keys
323
- - string: shorthand for (``'human'``, template); e.g., ``'{user_input}'``
316
+ - string: shorthand for (`'human'`, template); e.g., `'{user_input}'`
324
317
 
325
318
  Args:
326
319
  message: a representation of a message in one of the supported formats.
327
320
 
328
321
  Returns:
329
- an instance of a message or a message template.
322
+ An instance of a message or a message template.
330
323
 
331
324
  Raises:
332
325
  NotImplementedError: if the message type is not supported.
@@ -335,12 +328,16 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
335
328
  """
336
329
  if isinstance(message, BaseMessage):
337
330
  message_ = message
338
- elif isinstance(message, str):
339
- message_ = _create_message_from_message_type("human", message)
340
- elif isinstance(message, Sequence) and len(message) == 2:
341
- # mypy doesn't realise this can't be a string given the previous branch
342
- message_type_str, template = message # type: ignore[misc]
343
- message_ = _create_message_from_message_type(message_type_str, template)
331
+ elif isinstance(message, Sequence):
332
+ if isinstance(message, str):
333
+ message_ = _create_message_from_message_type("human", message)
334
+ else:
335
+ try:
336
+ message_type_str, template = message
337
+ except ValueError as e:
338
+ msg = "Message as a sequence must be (role string, template)"
339
+ raise NotImplementedError(msg) from e
340
+ message_ = _create_message_from_message_type(message_type_str, template)
344
341
  elif isinstance(message, dict):
345
342
  msg_kwargs = message.copy()
346
343
  try:
@@ -368,7 +365,7 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
368
365
 
369
366
 
370
367
  def convert_to_messages(
371
- messages: Union[Iterable[MessageLikeRepresentation], PromptValue],
368
+ messages: Iterable[MessageLikeRepresentation] | PromptValue,
372
369
  ) -> list[BaseMessage]:
373
370
  """Convert a sequence of messages to a list of messages.
374
371
 
@@ -399,12 +396,12 @@ def _runnable_support(func: Callable) -> Callable:
399
396
  ) -> list[BaseMessage]: ...
400
397
 
401
398
  def wrapped(
402
- messages: Union[Sequence[MessageLikeRepresentation], None] = None,
399
+ messages: Sequence[MessageLikeRepresentation] | None = None,
403
400
  **kwargs: Any,
404
- ) -> Union[
405
- list[BaseMessage],
406
- Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]],
407
- ]:
401
+ ) -> (
402
+ list[BaseMessage]
403
+ | Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]]
404
+ ):
408
405
  # Import locally to prevent circular import.
409
406
  from langchain_core.runnables.base import RunnableLambda # noqa: PLC0415
410
407
 
@@ -418,89 +415,88 @@ def _runnable_support(func: Callable) -> Callable:
418
415
 
419
416
  @_runnable_support
420
417
  def filter_messages(
421
- messages: Union[Iterable[MessageLikeRepresentation], PromptValue],
418
+ messages: Iterable[MessageLikeRepresentation] | PromptValue,
422
419
  *,
423
- include_names: Optional[Sequence[str]] = None,
424
- exclude_names: Optional[Sequence[str]] = None,
425
- include_types: Optional[Sequence[Union[str, type[BaseMessage]]]] = None,
426
- exclude_types: Optional[Sequence[Union[str, type[BaseMessage]]]] = None,
427
- include_ids: Optional[Sequence[str]] = None,
428
- exclude_ids: Optional[Sequence[str]] = None,
429
- exclude_tool_calls: Optional[Sequence[str] | bool] = None,
420
+ include_names: Sequence[str] | None = None,
421
+ exclude_names: Sequence[str] | None = None,
422
+ include_types: Sequence[str | type[BaseMessage]] | None = None,
423
+ exclude_types: Sequence[str | type[BaseMessage]] | None = None,
424
+ include_ids: Sequence[str] | None = None,
425
+ exclude_ids: Sequence[str] | None = None,
426
+ exclude_tool_calls: Sequence[str] | bool | None = None,
430
427
  ) -> list[BaseMessage]:
431
- """Filter messages based on ``name``, ``type`` or ``id``.
428
+ """Filter messages based on `name`, `type` or `id`.
432
429
 
433
430
  Args:
434
431
  messages: Sequence Message-like objects to filter.
435
- include_names: Message names to include. Default is None.
436
- exclude_names: Messages names to exclude. Default is None.
432
+ include_names: Message names to include.
433
+ exclude_names: Messages names to exclude.
437
434
  include_types: Message types to include. Can be specified as string names
438
- (e.g. ``'system'``, ``'human'``, ``'ai'``, ...) or as ``BaseMessage``
439
- classes (e.g. ``SystemMessage``, ``HumanMessage``, ``AIMessage``, ...).
440
- Default is None.
435
+ (e.g. `'system'`, `'human'`, `'ai'`, ...) or as `BaseMessage`
436
+ classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`, ...).
437
+
441
438
  exclude_types: Message types to exclude. Can be specified as string names
442
- (e.g. ``'system'``, ``'human'``, ``'ai'``, ...) or as ``BaseMessage``
443
- classes (e.g. ``SystemMessage``, ``HumanMessage``, ``AIMessage``, ...).
444
- Default is None.
445
- include_ids: Message IDs to include. Default is None.
446
- exclude_ids: Message IDs to exclude. Default is None.
447
- exclude_tool_calls: Tool call IDs to exclude. Default is None.
439
+ (e.g. `'system'`, `'human'`, `'ai'`, ...) or as `BaseMessage`
440
+ classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`, ...).
441
+
442
+ include_ids: Message IDs to include.
443
+ exclude_ids: Message IDs to exclude.
444
+ exclude_tool_calls: Tool call IDs to exclude.
448
445
  Can be one of the following:
449
- - ``True``: all ``AIMessage``s with tool calls and all
450
- ``ToolMessage``s will be excluded.
446
+ - `True`: All `AIMessage` objects with tool calls and all `ToolMessage`
447
+ objects will be excluded.
451
448
  - a sequence of tool call IDs to exclude:
452
- - ``ToolMessage``s with the corresponding tool call ID will be
453
- excluded.
454
- - The ``tool_calls`` in the AIMessage will be updated to exclude
455
- matching tool calls. If all ``tool_calls`` are filtered from an
456
- AIMessage, the whole message is excluded.
449
+ - `ToolMessage` objects with the corresponding tool call ID will be
450
+ excluded.
451
+ - The `tool_calls` in the AIMessage will be updated to exclude
452
+ matching tool calls. If all `tool_calls` are filtered from an
453
+ AIMessage, the whole message is excluded.
457
454
 
458
455
  Returns:
459
- A list of Messages that meets at least one of the ``incl_*`` conditions and none
460
- of the ``excl_*`` conditions. If not ``incl_*`` conditions are specified then
456
+ A list of Messages that meets at least one of the `incl_*` conditions and none
457
+ of the `excl_*` conditions. If not `incl_*` conditions are specified then
461
458
  anything that is not explicitly excluded will be included.
462
459
 
463
460
  Raises:
464
- ValueError if two incompatible arguments are provided.
461
+ ValueError: If two incompatible arguments are provided.
465
462
 
466
463
  Example:
467
- .. code-block:: python
468
-
469
- from langchain_core.messages import (
470
- filter_messages,
471
- AIMessage,
472
- HumanMessage,
473
- SystemMessage,
474
- )
475
-
476
- messages = [
477
- SystemMessage("you're a good assistant."),
478
- HumanMessage("what's your name", id="foo", name="example_user"),
479
- AIMessage("steve-o", id="bar", name="example_assistant"),
480
- HumanMessage(
481
- "what's your favorite color",
482
- id="baz",
483
- ),
484
- AIMessage(
485
- "silicon blue",
486
- id="blah",
487
- ),
488
- ]
489
-
490
- filter_messages(
491
- messages,
492
- incl_names=("example_user", "example_assistant"),
493
- incl_types=("system",),
494
- excl_ids=("bar",),
495
- )
496
-
497
- .. code-block:: python
498
-
499
- [
500
- SystemMessage("you're a good assistant."),
501
- HumanMessage("what's your name", id="foo", name="example_user"),
502
- ]
464
+ ```python
465
+ from langchain_core.messages import (
466
+ filter_messages,
467
+ AIMessage,
468
+ HumanMessage,
469
+ SystemMessage,
470
+ )
503
471
 
472
+ messages = [
473
+ SystemMessage("you're a good assistant."),
474
+ HumanMessage("what's your name", id="foo", name="example_user"),
475
+ AIMessage("steve-o", id="bar", name="example_assistant"),
476
+ HumanMessage(
477
+ "what's your favorite color",
478
+ id="baz",
479
+ ),
480
+ AIMessage(
481
+ "silicon blue",
482
+ id="blah",
483
+ ),
484
+ ]
485
+
486
+ filter_messages(
487
+ messages,
488
+ incl_names=("example_user", "example_assistant"),
489
+ incl_types=("system",),
490
+ excl_ids=("bar",),
491
+ )
492
+ ```
493
+
494
+ ```python
495
+ [
496
+ SystemMessage("you're a good assistant."),
497
+ HumanMessage("what's your name", id="foo", name="example_user"),
498
+ ]
499
+ ```
504
500
  """
505
501
  messages = convert_to_messages(messages)
506
502
  filtered: list[BaseMessage] = []
@@ -563,20 +559,19 @@ def filter_messages(
563
559
 
564
560
  @_runnable_support
565
561
  def merge_message_runs(
566
- messages: Union[Iterable[MessageLikeRepresentation], PromptValue],
562
+ messages: Iterable[MessageLikeRepresentation] | PromptValue,
567
563
  *,
568
564
  chunk_separator: str = "\n",
569
565
  ) -> list[BaseMessage]:
570
566
  r"""Merge consecutive Messages of the same type.
571
567
 
572
- .. note::
573
- ToolMessages are not merged, as each has a distinct tool call id that can't be
574
- merged.
568
+ !!! note
569
+ `ToolMessage` objects are not merged, as each has a distinct tool call id that
570
+ can't be merged.
575
571
 
576
572
  Args:
577
573
  messages: Sequence Message-like objects to merge.
578
574
  chunk_separator: Specify the string to be inserted between message chunks.
579
- Default is ``'\n'``.
580
575
 
581
576
  Returns:
582
577
  list of BaseMessages with consecutive runs of message types merged into single
@@ -584,87 +579,86 @@ def merge_message_runs(
584
579
  the merged content is a concatenation of the two strings with a new-line
585
580
  separator.
586
581
  The separator inserted between message chunks can be controlled by specifying
587
- any string with ``chunk_separator``. If at least one of the messages has a list
582
+ any string with `chunk_separator`. If at least one of the messages has a list
588
583
  of content blocks, the merged content is a list of content blocks.
589
584
 
590
585
  Example:
586
+ ```python
587
+ from langchain_core.messages import (
588
+ merge_message_runs,
589
+ AIMessage,
590
+ HumanMessage,
591
+ SystemMessage,
592
+ ToolCall,
593
+ )
591
594
 
592
- .. code-block:: python
593
-
594
- from langchain_core.messages import (
595
- merge_message_runs,
596
- AIMessage,
597
- HumanMessage,
598
- SystemMessage,
599
- ToolCall,
600
- )
601
-
602
- messages = [
603
- SystemMessage("you're a good assistant."),
604
- HumanMessage(
605
- "what's your favorite color",
606
- id="foo",
607
- ),
608
- HumanMessage(
609
- "wait your favorite food",
610
- id="bar",
611
- ),
612
- AIMessage(
595
+ messages = [
596
+ SystemMessage("you're a good assistant."),
597
+ HumanMessage(
598
+ "what's your favorite color",
599
+ id="foo",
600
+ ),
601
+ HumanMessage(
602
+ "wait your favorite food",
603
+ id="bar",
604
+ ),
605
+ AIMessage(
606
+ "my favorite colo",
607
+ tool_calls=[
608
+ ToolCall(
609
+ name="blah_tool", args={"x": 2}, id="123", type="tool_call"
610
+ )
611
+ ],
612
+ id="baz",
613
+ ),
614
+ AIMessage(
615
+ [{"type": "text", "text": "my favorite dish is lasagna"}],
616
+ tool_calls=[
617
+ ToolCall(
618
+ name="blah_tool",
619
+ args={"x": -10},
620
+ id="456",
621
+ type="tool_call",
622
+ )
623
+ ],
624
+ id="blur",
625
+ ),
626
+ ]
627
+
628
+ merge_message_runs(messages)
629
+ ```
630
+
631
+ ```python
632
+ [
633
+ SystemMessage("you're a good assistant."),
634
+ HumanMessage(
635
+ "what's your favorite color\\n"
636
+ "wait your favorite food", id="foo",
637
+ ),
638
+ AIMessage(
639
+ [
613
640
  "my favorite colo",
614
- tool_calls=[
615
- ToolCall(
616
- name="blah_tool", args={"x": 2}, id="123", type="tool_call"
617
- )
618
- ],
619
- id="baz",
620
- ),
621
- AIMessage(
622
- [{"type": "text", "text": "my favorite dish is lasagna"}],
623
- tool_calls=[
624
- ToolCall(
625
- name="blah_tool",
626
- args={"x": -10},
627
- id="456",
628
- type="tool_call",
629
- )
630
- ],
631
- id="blur",
632
- ),
633
- ]
634
-
635
- merge_message_runs(messages)
636
-
637
- .. code-block:: python
638
-
639
- [
640
- SystemMessage("you're a good assistant."),
641
- HumanMessage(
642
- "what's your favorite color\\n"
643
- "wait your favorite food", id="foo",
644
- ),
645
- AIMessage(
646
- [
647
- "my favorite colo",
648
- {"type": "text", "text": "my favorite dish is lasagna"}
649
- ],
650
- tool_calls=[
651
- ToolCall({
652
- "name": "blah_tool",
653
- "args": {"x": 2},
654
- "id": "123",
655
- "type": "tool_call"
656
- }),
657
- ToolCall({
658
- "name": "blah_tool",
659
- "args": {"x": -10},
660
- "id": "456",
661
- "type": "tool_call"
662
- })
663
- ]
664
- id="baz"
665
- ),
666
- ]
641
+ {"type": "text", "text": "my favorite dish is lasagna"}
642
+ ],
643
+ tool_calls=[
644
+ ToolCall({
645
+ "name": "blah_tool",
646
+ "args": {"x": 2},
647
+ "id": "123",
648
+ "type": "tool_call"
649
+ }),
650
+ ToolCall({
651
+ "name": "blah_tool",
652
+ "args": {"x": -10},
653
+ "id": "456",
654
+ "type": "tool_call"
655
+ })
656
+ ]
657
+ id="baz"
658
+ ),
659
+ ]
667
660
 
661
+ ```
668
662
  """
669
663
  if not messages:
670
664
  return []
@@ -696,174 +690,161 @@ def merge_message_runs(
696
690
  # init not at runtime.
697
691
  @_runnable_support
698
692
  def trim_messages(
699
- messages: Union[Iterable[MessageLikeRepresentation], PromptValue],
693
+ messages: Iterable[MessageLikeRepresentation] | PromptValue,
700
694
  *,
701
695
  max_tokens: int,
702
- token_counter: Union[
703
- Callable[[list[BaseMessage]], int],
704
- Callable[[BaseMessage], int],
705
- BaseLanguageModel,
706
- ],
696
+ token_counter: Callable[[list[BaseMessage]], int]
697
+ | Callable[[BaseMessage], int]
698
+ | BaseLanguageModel,
707
699
  strategy: Literal["first", "last"] = "last",
708
700
  allow_partial: bool = False,
709
- end_on: Optional[
710
- Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
711
- ] = None,
712
- start_on: Optional[
713
- Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
714
- ] = None,
701
+ end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
702
+ start_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
715
703
  include_system: bool = False,
716
- text_splitter: Optional[Union[Callable[[str], list[str]], TextSplitter]] = None,
704
+ text_splitter: Callable[[str], list[str]] | TextSplitter | None = None,
717
705
  ) -> list[BaseMessage]:
718
706
  r"""Trim messages to be below a token count.
719
707
 
720
- ``trim_messages`` can be used to reduce the size of a chat history to a specified
721
- token count or specified message count.
708
+ `trim_messages` can be used to reduce the size of a chat history to a specified
709
+ token or message count.
722
710
 
723
711
  In either case, if passing the trimmed chat history back into a chat model
724
712
  directly, the resulting chat history should usually satisfy the following
725
713
  properties:
726
714
 
727
715
  1. The resulting chat history should be valid. Most chat models expect that chat
728
- history starts with either (1) a ``HumanMessage`` or (2) a ``SystemMessage``
729
- followed by a ``HumanMessage``. To achieve this, set ``start_on='human'``.
730
- In addition, generally a ``ToolMessage`` can only appear after an ``AIMessage``
731
- that involved a tool call.
732
- Please see the following link for more information about messages:
733
- https://python.langchain.com/docs/concepts/#messages
716
+ history starts with either (1) a `HumanMessage` or (2) a `SystemMessage`
717
+ followed by a `HumanMessage`. To achieve this, set `start_on='human'`.
718
+ In addition, generally a `ToolMessage` can only appear after an `AIMessage`
719
+ that involved a tool call.
734
720
  2. It includes recent messages and drops old messages in the chat history.
735
- To achieve this set the ``strategy='last'``.
736
- 3. Usually, the new chat history should include the ``SystemMessage`` if it
737
- was present in the original chat history since the ``SystemMessage`` includes
738
- special instructions to the chat model. The ``SystemMessage`` is almost always
739
- the first message in the history if present. To achieve this set the
740
- ``include_system=True``.
741
-
742
- .. note::
743
- The examples below show how to configure ``trim_messages`` to achieve a behavior
721
+ To achieve this set the `strategy='last'`.
722
+ 3. Usually, the new chat history should include the `SystemMessage` if it
723
+ was present in the original chat history since the `SystemMessage` includes
724
+ special instructions to the chat model. The `SystemMessage` is almost always
725
+ the first message in the history if present. To achieve this set the
726
+ `include_system=True`.
727
+
728
+ !!! note
729
+ The examples below show how to configure `trim_messages` to achieve a behavior
744
730
  consistent with the above properties.
745
731
 
746
732
  Args:
747
733
  messages: Sequence of Message-like objects to trim.
748
734
  max_tokens: Max token count of trimmed messages.
749
- token_counter: Function or llm for counting tokens in a ``BaseMessage`` or a
750
- list of ``BaseMessage``. If a ``BaseLanguageModel`` is passed in then
751
- ``BaseLanguageModel.get_num_tokens_from_messages()`` will be used.
752
- Set to ``len`` to count the number of **messages** in the chat history.
735
+ token_counter: Function or llm for counting tokens in a `BaseMessage` or a
736
+ list of `BaseMessage`. If a `BaseLanguageModel` is passed in then
737
+ `BaseLanguageModel.get_num_tokens_from_messages()` will be used.
738
+ Set to `len` to count the number of **messages** in the chat history.
753
739
 
754
- .. note::
755
- Use ``count_tokens_approximately`` to get fast, approximate token
740
+ !!! note
741
+ Use `count_tokens_approximately` to get fast, approximate token
756
742
  counts.
757
- This is recommended for using ``trim_messages`` on the hot path, where
743
+ This is recommended for using `trim_messages` on the hot path, where
758
744
  exact token counting is not necessary.
759
745
 
760
746
  strategy: Strategy for trimming.
761
- - ``'first'``: Keep the first ``<= n_count`` tokens of the messages.
762
- - ``'last'``: Keep the last ``<= n_count`` tokens of the messages.
763
- Default is ``'last'``.
747
+ - `'first'`: Keep the first `<= n_count` tokens of the messages.
748
+ - `'last'`: Keep the last `<= n_count` tokens of the messages.
764
749
  allow_partial: Whether to split a message if only part of the message can be
765
- included. If ``strategy='last'`` then the last partial contents of a message
766
- are included. If ``strategy='first'`` then the first partial contents of a
750
+ included. If `strategy='last'` then the last partial contents of a message
751
+ are included. If `strategy='first'` then the first partial contents of a
767
752
  message are included.
768
- Default is False.
769
753
  end_on: The message type to end on. If specified then every message after the
770
- last occurrence of this type is ignored. If ``strategy='last'`` then this
771
- is done before we attempt to get the last ``max_tokens``. If
772
- ``strategy='first'`` then this is done after we get the first
773
- ``max_tokens``. Can be specified as string names (e.g. ``'system'``,
774
- ``'human'``, ``'ai'``, ...) or as ``BaseMessage`` classes (e.g.
775
- ``SystemMessage``, ``HumanMessage``, ``AIMessage``, ...). Can be a single
754
+ last occurrence of this type is ignored. If `strategy='last'` then this
755
+ is done before we attempt to get the last `max_tokens`. If
756
+ `strategy='first'` then this is done after we get the first
757
+ `max_tokens`. Can be specified as string names (e.g. `'system'`,
758
+ `'human'`, `'ai'`, ...) or as `BaseMessage` classes (e.g.
759
+ `SystemMessage`, `HumanMessage`, `AIMessage`, ...). Can be a single
776
760
  type or a list of types.
777
- Default is None.
761
+
778
762
  start_on: The message type to start on. Should only be specified if
779
- ``strategy='last'``. If specified then every message before
763
+ `strategy='last'`. If specified then every message before
780
764
  the first occurrence of this type is ignored. This is done after we trim
781
- the initial messages to the last ``max_tokens``. Does not
782
- apply to a ``SystemMessage`` at index 0 if ``include_system=True``. Can be
783
- specified as string names (e.g. ``'system'``, ``'human'``, ``'ai'``, ...) or
784
- as ``BaseMessage`` classes (e.g. ``SystemMessage``, ``HumanMessage``,
785
- ``AIMessage``, ...). Can be a single type or a list of types.
786
- Default is None.
787
- include_system: Whether to keep the SystemMessage if there is one at index 0.
788
- Should only be specified if ``strategy="last"``.
789
- Default is False.
790
- text_splitter: Function or ``langchain_text_splitters.TextSplitter`` for
765
+ the initial messages to the last `max_tokens`. Does not
766
+ apply to a `SystemMessage` at index 0 if `include_system=True`. Can be
767
+ specified as string names (e.g. `'system'`, `'human'`, `'ai'`, ...) or
768
+ as `BaseMessage` classes (e.g. `SystemMessage`, `HumanMessage`,
769
+ `AIMessage`, ...). Can be a single type or a list of types.
770
+
771
+ include_system: Whether to keep the `SystemMessage` if there is one at index
772
+ `0`. Should only be specified if `strategy="last"`.
773
+ text_splitter: Function or `langchain_text_splitters.TextSplitter` for
791
774
  splitting the string contents of a message. Only used if
792
- ``allow_partial=True``. If ``strategy='last'`` then the last split tokens
793
- from a partial message will be included. if ``strategy='first'`` then the
775
+ `allow_partial=True`. If `strategy='last'` then the last split tokens
776
+ from a partial message will be included. if `strategy='first'` then the
794
777
  first split tokens from a partial message will be included. Token splitter
795
778
  assumes that separators are kept, so that split contents can be directly
796
779
  concatenated to recreate the original text. Defaults to splitting on
797
780
  newlines.
798
781
 
799
782
  Returns:
800
- list of trimmed ``BaseMessage``.
783
+ List of trimmed `BaseMessage`.
801
784
 
802
785
  Raises:
803
786
  ValueError: if two incompatible arguments are specified or an unrecognized
804
- ``strategy`` is specified.
787
+ `strategy` is specified.
805
788
 
806
789
  Example:
807
- Trim chat history based on token count, keeping the ``SystemMessage`` if
808
- present, and ensuring that the chat history starts with a ``HumanMessage`` (
809
- or a ``SystemMessage`` followed by a ``HumanMessage``).
810
-
811
- .. code-block:: python
812
-
813
- from langchain_core.messages import (
814
- AIMessage,
815
- HumanMessage,
816
- BaseMessage,
817
- SystemMessage,
818
- trim_messages,
819
- )
820
-
821
- messages = [
822
- SystemMessage(
823
- "you're a good assistant, you always respond with a joke."
824
- ),
825
- HumanMessage("i wonder why it's called langchain"),
826
- AIMessage(
827
- 'Well, I guess they thought "WordRope" and "SentenceString" just '
828
- "didn't have the same ring to it!"
829
- ),
830
- HumanMessage("and who is harrison chasing anyways"),
831
- AIMessage(
832
- "Hmmm let me think.\n\nWhy, he's probably chasing after the last "
833
- "cup of coffee in the office!"
834
- ),
835
- HumanMessage("what do you call a speechless parrot"),
836
- ]
837
-
838
-
839
- trim_messages(
840
- messages,
841
- max_tokens=45,
842
- strategy="last",
843
- token_counter=ChatOpenAI(model="gpt-4o"),
844
- # Most chat models expect that chat history starts with either:
845
- # (1) a HumanMessage or
846
- # (2) a SystemMessage followed by a HumanMessage
847
- start_on="human",
848
- # Usually, we want to keep the SystemMessage
849
- # if it's present in the original history.
850
- # The SystemMessage has special instructions for the model.
851
- include_system=True,
852
- allow_partial=False,
853
- )
790
+ Trim chat history based on token count, keeping the `SystemMessage` if
791
+ present, and ensuring that the chat history starts with a `HumanMessage` (
792
+ or a `SystemMessage` followed by a `HumanMessage`).
793
+
794
+ ```python
795
+ from langchain_core.messages import (
796
+ AIMessage,
797
+ HumanMessage,
798
+ BaseMessage,
799
+ SystemMessage,
800
+ trim_messages,
801
+ )
854
802
 
855
- .. code-block:: python
803
+ messages = [
804
+ SystemMessage("you're a good assistant, you always respond with a joke."),
805
+ HumanMessage("i wonder why it's called langchain"),
806
+ AIMessage(
807
+ 'Well, I guess they thought "WordRope" and "SentenceString" just '
808
+ "didn't have the same ring to it!"
809
+ ),
810
+ HumanMessage("and who is harrison chasing anyways"),
811
+ AIMessage(
812
+ "Hmmm let me think.\n\nWhy, he's probably chasing after the last "
813
+ "cup of coffee in the office!"
814
+ ),
815
+ HumanMessage("what do you call a speechless parrot"),
816
+ ]
817
+
818
+
819
+ trim_messages(
820
+ messages,
821
+ max_tokens=45,
822
+ strategy="last",
823
+ token_counter=ChatOpenAI(model="gpt-4o"),
824
+ # Most chat models expect that chat history starts with either:
825
+ # (1) a HumanMessage or
826
+ # (2) a SystemMessage followed by a HumanMessage
827
+ start_on="human",
828
+ # Usually, we want to keep the SystemMessage
829
+ # if it's present in the original history.
830
+ # The SystemMessage has special instructions for the model.
831
+ include_system=True,
832
+ allow_partial=False,
833
+ )
834
+ ```
856
835
 
857
- [
858
- SystemMessage(
859
- content="you're a good assistant, you always respond with a joke."
860
- ),
861
- HumanMessage(content="what do you call a speechless parrot"),
862
- ]
836
+ ```python
837
+ [
838
+ SystemMessage(
839
+ content="you're a good assistant, you always respond with a joke."
840
+ ),
841
+ HumanMessage(content="what do you call a speechless parrot"),
842
+ ]
843
+ ```
863
844
 
864
- Trim chat history based on the message count, keeping the ``SystemMessage`` if
865
- present, and ensuring that the chat history starts with a ``HumanMessage`` (
866
- or a ``SystemMessage`` followed by a ``HumanMessage``).
845
+ Trim chat history based on the message count, keeping the `SystemMessage` if
846
+ present, and ensuring that the chat history starts with a `HumanMessage` (
847
+ or a `SystemMessage` followed by a `HumanMessage`).
867
848
 
868
849
  trim_messages(
869
850
  messages,
@@ -885,100 +866,95 @@ def trim_messages(
885
866
  allow_partial=False,
886
867
  )
887
868
 
888
- .. code-block:: python
889
-
890
- [
891
- SystemMessage(
892
- content="you're a good assistant, you always respond with a joke."
893
- ),
894
- HumanMessage(content="and who is harrison chasing anyways"),
895
- AIMessage(
896
- content="Hmmm let me think.\n\nWhy, he's probably chasing after "
897
- "the last cup of coffee in the office!"
898
- ),
899
- HumanMessage(content="what do you call a speechless parrot"),
900
- ]
901
-
902
-
869
+ ```python
870
+ [
871
+ SystemMessage(
872
+ content="you're a good assistant, you always respond with a joke."
873
+ ),
874
+ HumanMessage(content="and who is harrison chasing anyways"),
875
+ AIMessage(
876
+ content="Hmmm let me think.\n\nWhy, he's probably chasing after "
877
+ "the last cup of coffee in the office!"
878
+ ),
879
+ HumanMessage(content="what do you call a speechless parrot"),
880
+ ]
881
+ ```
903
882
  Trim chat history using a custom token counter function that counts the
904
883
  number of tokens in each message.
905
884
 
906
- .. code-block:: python
907
-
908
- messages = [
909
- SystemMessage("This is a 4 token text. The full message is 10 tokens."),
910
- HumanMessage(
911
- "This is a 4 token text. The full message is 10 tokens.", id="first"
912
- ),
913
- AIMessage(
914
- [
915
- {"type": "text", "text": "This is the FIRST 4 token block."},
916
- {"type": "text", "text": "This is the SECOND 4 token block."},
917
- ],
918
- id="second",
919
- ),
920
- HumanMessage(
921
- "This is a 4 token text. The full message is 10 tokens.", id="third"
922
- ),
923
- AIMessage(
924
- "This is a 4 token text. The full message is 10 tokens.",
925
- id="fourth",
926
- ),
927
- ]
928
-
929
-
930
- def dummy_token_counter(messages: list[BaseMessage]) -> int:
931
- # treat each message like it adds 3 default tokens at the beginning
932
- # of the message and at the end of the message. 3 + 4 + 3 = 10 tokens
933
- # per message.
934
-
935
- default_content_len = 4
936
- default_msg_prefix_len = 3
937
- default_msg_suffix_len = 3
938
-
939
- count = 0
940
- for msg in messages:
941
- if isinstance(msg.content, str):
942
- count += (
943
- default_msg_prefix_len
944
- + default_content_len
945
- + default_msg_suffix_len
946
- )
947
- if isinstance(msg.content, list):
948
- count += (
949
- default_msg_prefix_len
950
- + len(msg.content) * default_content_len
951
- + default_msg_suffix_len
952
- )
953
- return count
954
-
955
- First 30 tokens, allowing partial messages:
956
- .. code-block:: python
957
-
958
- trim_messages(
959
- messages,
960
- max_tokens=30,
961
- token_counter=dummy_token_counter,
962
- strategy="first",
963
- allow_partial=True,
964
- )
965
-
966
- .. code-block:: python
967
-
885
+ ```python
886
+ messages = [
887
+ SystemMessage("This is a 4 token text. The full message is 10 tokens."),
888
+ HumanMessage(
889
+ "This is a 4 token text. The full message is 10 tokens.", id="first"
890
+ ),
891
+ AIMessage(
968
892
  [
969
- SystemMessage(
970
- "This is a 4 token text. The full message is 10 tokens."
971
- ),
972
- HumanMessage(
973
- "This is a 4 token text. The full message is 10 tokens.",
974
- id="first",
975
- ),
976
- AIMessage(
977
- [{"type": "text", "text": "This is the FIRST 4 token block."}],
978
- id="second",
979
- ),
980
- ]
893
+ {"type": "text", "text": "This is the FIRST 4 token block."},
894
+ {"type": "text", "text": "This is the SECOND 4 token block."},
895
+ ],
896
+ id="second",
897
+ ),
898
+ HumanMessage(
899
+ "This is a 4 token text. The full message is 10 tokens.", id="third"
900
+ ),
901
+ AIMessage(
902
+ "This is a 4 token text. The full message is 10 tokens.",
903
+ id="fourth",
904
+ ),
905
+ ]
906
+
907
+
908
+ def dummy_token_counter(messages: list[BaseMessage]) -> int:
909
+ # treat each message like it adds 3 default tokens at the beginning
910
+ # of the message and at the end of the message. 3 + 4 + 3 = 10 tokens
911
+ # per message.
912
+
913
+ default_content_len = 4
914
+ default_msg_prefix_len = 3
915
+ default_msg_suffix_len = 3
916
+
917
+ count = 0
918
+ for msg in messages:
919
+ if isinstance(msg.content, str):
920
+ count += (
921
+ default_msg_prefix_len
922
+ + default_content_len
923
+ + default_msg_suffix_len
924
+ )
925
+ if isinstance(msg.content, list):
926
+ count += (
927
+ default_msg_prefix_len
928
+ + len(msg.content) * default_content_len
929
+ + default_msg_suffix_len
930
+ )
931
+ return count
932
+ ```
981
933
 
934
+ First 30 tokens, allowing partial messages:
935
+ ```python
936
+ trim_messages(
937
+ messages,
938
+ max_tokens=30,
939
+ token_counter=dummy_token_counter,
940
+ strategy="first",
941
+ allow_partial=True,
942
+ )
943
+ ```
944
+
945
+ ```python
946
+ [
947
+ SystemMessage("This is a 4 token text. The full message is 10 tokens."),
948
+ HumanMessage(
949
+ "This is a 4 token text. The full message is 10 tokens.",
950
+ id="first",
951
+ ),
952
+ AIMessage(
953
+ [{"type": "text", "text": "This is the FIRST 4 token block."}],
954
+ id="second",
955
+ ),
956
+ ]
957
+ ```
982
958
  """
983
959
  # Validate arguments
984
960
  if start_on and strategy == "first":
@@ -1042,88 +1018,90 @@ def trim_messages(
1042
1018
 
1043
1019
 
1044
1020
  def convert_to_openai_messages(
1045
- messages: Union[MessageLikeRepresentation, Sequence[MessageLikeRepresentation]],
1021
+ messages: MessageLikeRepresentation | Sequence[MessageLikeRepresentation],
1046
1022
  *,
1047
1023
  text_format: Literal["string", "block"] = "string",
1048
- ) -> Union[dict, list[dict]]:
1024
+ include_id: bool = False,
1025
+ ) -> dict | list[dict]:
1049
1026
  """Convert LangChain messages into OpenAI message dicts.
1050
1027
 
1051
1028
  Args:
1052
1029
  messages: Message-like object or iterable of objects whose contents are
1053
1030
  in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
1054
1031
  text_format: How to format string or text block contents:
1055
- - ``'string'``:
1056
- If a message has a string content, this is left as a string. If
1057
- a message has content blocks that are all of type ``'text'``, these
1058
- are joined with a newline to make a single string. If a message has
1059
- content blocks and at least one isn't of type ``'text'``, then
1060
- all blocks are left as dicts.
1061
- - ``'block'``:
1062
- If a message has a string content, this is turned into a list
1063
- with a single content block of type ``'text'``. If a message has
1064
- content blocks these are left as is.
1032
+ - `'string'`:
1033
+ If a message has a string content, this is left as a string. If
1034
+ a message has content blocks that are all of type `'text'`, these
1035
+ are joined with a newline to make a single string. If a message has
1036
+ content blocks and at least one isn't of type `'text'`, then
1037
+ all blocks are left as dicts.
1038
+ - `'block'`:
1039
+ If a message has a string content, this is turned into a list
1040
+ with a single content block of type `'text'`. If a message has
1041
+ content blocks these are left as is.
1042
+ include_id: Whether to include message IDs in the openai messages, if they
1043
+ are present in the source messages.
1065
1044
 
1066
1045
  Raises:
1067
- ValueError: if an unrecognized ``text_format`` is specified, or if a message
1046
+ ValueError: if an unrecognized `text_format` is specified, or if a message
1068
1047
  content block is missing expected keys.
1069
1048
 
1070
1049
  Returns:
1071
1050
  The return type depends on the input type:
1072
1051
 
1073
1052
  - dict:
1074
- If a single message-like object is passed in, a single OpenAI message
1075
- dict is returned.
1053
+ If a single message-like object is passed in, a single OpenAI message
1054
+ dict is returned.
1076
1055
  - list[dict]:
1077
- If a sequence of message-like objects are passed in, a list of OpenAI
1078
- message dicts is returned.
1056
+ If a sequence of message-like objects are passed in, a list of OpenAI
1057
+ message dicts is returned.
1079
1058
 
1080
1059
  Example:
1060
+ ```python
1061
+ from langchain_core.messages import (
1062
+ convert_to_openai_messages,
1063
+ AIMessage,
1064
+ SystemMessage,
1065
+ ToolMessage,
1066
+ )
1081
1067
 
1082
- .. code-block:: python
1083
-
1084
- from langchain_core.messages import (
1085
- convert_to_openai_messages,
1086
- AIMessage,
1087
- SystemMessage,
1088
- ToolMessage,
1089
- )
1090
-
1091
- messages = [
1092
- SystemMessage([{"type": "text", "text": "foo"}]),
1093
- {
1094
- "role": "user",
1095
- "content": [
1096
- {"type": "text", "text": "whats in this"},
1097
- {
1098
- "type": "image_url",
1099
- "image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},
1100
- },
1101
- ],
1102
- },
1103
- AIMessage(
1104
- "",
1105
- tool_calls=[
1106
- {
1107
- "name": "analyze",
1108
- "args": {"baz": "buz"},
1109
- "id": "1",
1110
- "type": "tool_call",
1111
- }
1112
- ],
1113
- ),
1114
- ToolMessage("foobar", tool_call_id="1", name="bar"),
1115
- {"role": "assistant", "content": "thats nice"},
1116
- ]
1117
- oai_messages = convert_to_openai_messages(messages)
1118
- # -> [
1119
- # {'role': 'system', 'content': 'foo'},
1120
- # {'role': 'user', 'content': [{'type': 'text', 'text': 'whats in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
1121
- # {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': '1','function': {'name': 'analyze', 'arguments': '{"baz": "buz"}'}}], 'content': ''},
1122
- # {'role': 'tool', 'name': 'bar', 'content': 'foobar'},
1123
- # {'role': 'assistant', 'content': 'thats nice'}
1124
- # ]
1125
-
1126
- .. versionadded:: 0.3.11
1068
+ messages = [
1069
+ SystemMessage([{"type": "text", "text": "foo"}]),
1070
+ {
1071
+ "role": "user",
1072
+ "content": [
1073
+ {"type": "text", "text": "whats in this"},
1074
+ {
1075
+ "type": "image_url",
1076
+ "image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},
1077
+ },
1078
+ ],
1079
+ },
1080
+ AIMessage(
1081
+ "",
1082
+ tool_calls=[
1083
+ {
1084
+ "name": "analyze",
1085
+ "args": {"baz": "buz"},
1086
+ "id": "1",
1087
+ "type": "tool_call",
1088
+ }
1089
+ ],
1090
+ ),
1091
+ ToolMessage("foobar", tool_call_id="1", name="bar"),
1092
+ {"role": "assistant", "content": "thats nice"},
1093
+ ]
1094
+ oai_messages = convert_to_openai_messages(messages)
1095
+ # -> [
1096
+ # {'role': 'system', 'content': 'foo'},
1097
+ # {'role': 'user', 'content': [{'type': 'text', 'text': 'whats in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
1098
+ # {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': '1','function': {'name': 'analyze', 'arguments': '{"baz": "buz"}'}}], 'content': ''},
1099
+ # {'role': 'tool', 'name': 'bar', 'content': 'foobar'},
1100
+ # {'role': 'assistant', 'content': 'thats nice'}
1101
+ # ]
1102
+ ```
1103
+
1104
+ !!! version-added "Added in `langchain-core` 0.3.11"
1127
1105
 
1128
1106
  """ # noqa: E501
1129
1107
  if text_format not in {"string", "block"}:
@@ -1140,7 +1118,7 @@ def convert_to_openai_messages(
1140
1118
  for i, message in enumerate(messages):
1141
1119
  oai_msg: dict = {"role": _get_message_openai_role(message)}
1142
1120
  tool_messages: list = []
1143
- content: Union[str, list[dict]]
1121
+ content: str | list[dict]
1144
1122
 
1145
1123
  if message.name:
1146
1124
  oai_msg["name"] = message.name
@@ -1150,6 +1128,8 @@ def convert_to_openai_messages(
1150
1128
  oai_msg["refusal"] = message.additional_kwargs["refusal"]
1151
1129
  if isinstance(message, ToolMessage):
1152
1130
  oai_msg["tool_call_id"] = message.tool_call_id
1131
+ if include_id and message.id:
1132
+ oai_msg["id"] = message.id
1153
1133
 
1154
1134
  if not message.content:
1155
1135
  content = "" if text_format == "string" else []
@@ -1421,10 +1401,8 @@ def _first_max_tokens(
1421
1401
  max_tokens: int,
1422
1402
  token_counter: Callable[[list[BaseMessage]], int],
1423
1403
  text_splitter: Callable[[str], list[str]],
1424
- partial_strategy: Optional[Literal["first", "last"]] = None,
1425
- end_on: Optional[
1426
- Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
1427
- ] = None,
1404
+ partial_strategy: Literal["first", "last"] | None = None,
1405
+ end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
1428
1406
  ) -> list[BaseMessage]:
1429
1407
  messages = list(messages)
1430
1408
  if not messages:
@@ -1541,12 +1519,8 @@ def _last_max_tokens(
1541
1519
  text_splitter: Callable[[str], list[str]],
1542
1520
  allow_partial: bool = False,
1543
1521
  include_system: bool = False,
1544
- start_on: Optional[
1545
- Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
1546
- ] = None,
1547
- end_on: Optional[
1548
- Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]]
1549
- ] = None,
1522
+ start_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
1523
+ end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
1550
1524
  ) -> list[BaseMessage]:
1551
1525
  messages = list(messages)
1552
1526
  if len(messages) == 0:
@@ -1647,7 +1621,7 @@ def _default_text_splitter(text: str) -> list[str]:
1647
1621
 
1648
1622
  def _is_message_type(
1649
1623
  message: BaseMessage,
1650
- type_: Union[str, type[BaseMessage], Sequence[Union[str, type[BaseMessage]]]],
1624
+ type_: str | type[BaseMessage] | Sequence[str | type[BaseMessage]],
1651
1625
  ) -> bool:
1652
1626
  types = [type_] if isinstance(type_, (str, type)) else type_
1653
1627
  types_str = [t for t in types if isinstance(t, str)]
@@ -1707,27 +1681,27 @@ def count_tokens_approximately(
1707
1681
  Args:
1708
1682
  messages: List of messages to count tokens for.
1709
1683
  chars_per_token: Number of characters per token to use for the approximation.
1710
- Default is 4 (one token corresponds to ~4 chars for common English text).
1711
- You can also specify float values for more fine-grained control.
1712
- `See more here. <https://platform.openai.com/tokenizer>`__
1713
- extra_tokens_per_message: Number of extra tokens to add per message.
1714
- Default is 3 (special tokens, including beginning/end of message).
1715
- You can also specify float values for more fine-grained control.
1716
- `See more here. <https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb>`__
1684
+ One token corresponds to ~4 chars for common English text.
1685
+ You can also specify `float` values for more fine-grained control.
1686
+ [See more here](https://platform.openai.com/tokenizer).
1687
+ extra_tokens_per_message: Number of extra tokens to add per message, e.g.
1688
+ special tokens, including beginning/end of message.
1689
+ You can also specify `float` values for more fine-grained control.
1690
+ [See more here](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb).
1717
1691
  count_name: Whether to include message names in the count.
1718
1692
  Enabled by default.
1719
1693
 
1720
1694
  Returns:
1721
1695
  Approximate number of tokens in the messages.
1722
1696
 
1723
- .. note::
1697
+ !!! note
1724
1698
  This is a simple approximation that may not match the exact token count used by
1725
1699
  specific models. For accurate counts, use model-specific tokenizers.
1726
1700
 
1727
1701
  Warning:
1728
1702
  This function does not currently support counting image tokens.
1729
1703
 
1730
- .. versionadded:: 0.3.46
1704
+ !!! version-added "Added in `langchain-core` 0.3.46"
1731
1705
 
1732
1706
  """
1733
1707
  token_count = 0.0