langchain-core 1.0.0a8__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (135) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +0 -1
  3. langchain_core/_api/beta_decorator.py +17 -20
  4. langchain_core/_api/deprecation.py +30 -35
  5. langchain_core/_import_utils.py +1 -1
  6. langchain_core/agents.py +7 -6
  7. langchain_core/caches.py +4 -10
  8. langchain_core/callbacks/__init__.py +1 -8
  9. langchain_core/callbacks/base.py +232 -243
  10. langchain_core/callbacks/file.py +33 -33
  11. langchain_core/callbacks/manager.py +353 -416
  12. langchain_core/callbacks/stdout.py +21 -22
  13. langchain_core/callbacks/streaming_stdout.py +32 -32
  14. langchain_core/callbacks/usage.py +54 -51
  15. langchain_core/chat_history.py +42 -57
  16. langchain_core/document_loaders/langsmith.py +21 -21
  17. langchain_core/documents/__init__.py +0 -1
  18. langchain_core/documents/base.py +37 -40
  19. langchain_core/documents/transformers.py +28 -29
  20. langchain_core/embeddings/fake.py +46 -52
  21. langchain_core/exceptions.py +5 -5
  22. langchain_core/indexing/api.py +11 -11
  23. langchain_core/indexing/base.py +24 -24
  24. langchain_core/language_models/__init__.py +0 -2
  25. langchain_core/language_models/_utils.py +51 -53
  26. langchain_core/language_models/base.py +23 -24
  27. langchain_core/language_models/chat_models.py +121 -144
  28. langchain_core/language_models/fake_chat_models.py +5 -5
  29. langchain_core/language_models/llms.py +10 -12
  30. langchain_core/load/dump.py +1 -1
  31. langchain_core/load/load.py +16 -16
  32. langchain_core/load/serializable.py +35 -34
  33. langchain_core/messages/__init__.py +1 -16
  34. langchain_core/messages/ai.py +105 -104
  35. langchain_core/messages/base.py +26 -26
  36. langchain_core/messages/block_translators/__init__.py +17 -17
  37. langchain_core/messages/block_translators/anthropic.py +2 -2
  38. langchain_core/messages/block_translators/bedrock_converse.py +2 -2
  39. langchain_core/messages/block_translators/google_genai.py +2 -2
  40. langchain_core/messages/block_translators/groq.py +117 -21
  41. langchain_core/messages/block_translators/langchain_v0.py +2 -2
  42. langchain_core/messages/block_translators/openai.py +4 -4
  43. langchain_core/messages/chat.py +1 -1
  44. langchain_core/messages/content.py +189 -193
  45. langchain_core/messages/function.py +5 -5
  46. langchain_core/messages/human.py +15 -17
  47. langchain_core/messages/modifier.py +1 -1
  48. langchain_core/messages/system.py +12 -14
  49. langchain_core/messages/tool.py +45 -49
  50. langchain_core/messages/utils.py +384 -396
  51. langchain_core/output_parsers/__init__.py +1 -14
  52. langchain_core/output_parsers/base.py +22 -23
  53. langchain_core/output_parsers/json.py +3 -3
  54. langchain_core/output_parsers/list.py +1 -1
  55. langchain_core/output_parsers/openai_functions.py +46 -44
  56. langchain_core/output_parsers/openai_tools.py +7 -7
  57. langchain_core/output_parsers/pydantic.py +10 -11
  58. langchain_core/output_parsers/string.py +1 -1
  59. langchain_core/output_parsers/transform.py +2 -2
  60. langchain_core/output_parsers/xml.py +1 -1
  61. langchain_core/outputs/__init__.py +1 -1
  62. langchain_core/outputs/chat_generation.py +14 -14
  63. langchain_core/outputs/generation.py +5 -5
  64. langchain_core/outputs/llm_result.py +5 -5
  65. langchain_core/prompt_values.py +5 -5
  66. langchain_core/prompts/__init__.py +3 -23
  67. langchain_core/prompts/base.py +32 -37
  68. langchain_core/prompts/chat.py +216 -222
  69. langchain_core/prompts/dict.py +2 -2
  70. langchain_core/prompts/few_shot.py +76 -83
  71. langchain_core/prompts/few_shot_with_templates.py +6 -8
  72. langchain_core/prompts/image.py +11 -13
  73. langchain_core/prompts/loading.py +1 -1
  74. langchain_core/prompts/message.py +2 -2
  75. langchain_core/prompts/prompt.py +14 -16
  76. langchain_core/prompts/string.py +19 -7
  77. langchain_core/prompts/structured.py +24 -25
  78. langchain_core/rate_limiters.py +36 -38
  79. langchain_core/retrievers.py +41 -182
  80. langchain_core/runnables/base.py +565 -590
  81. langchain_core/runnables/branch.py +7 -7
  82. langchain_core/runnables/config.py +37 -44
  83. langchain_core/runnables/configurable.py +8 -9
  84. langchain_core/runnables/fallbacks.py +8 -8
  85. langchain_core/runnables/graph.py +28 -27
  86. langchain_core/runnables/graph_ascii.py +19 -18
  87. langchain_core/runnables/graph_mermaid.py +20 -31
  88. langchain_core/runnables/graph_png.py +7 -7
  89. langchain_core/runnables/history.py +20 -20
  90. langchain_core/runnables/passthrough.py +8 -8
  91. langchain_core/runnables/retry.py +3 -3
  92. langchain_core/runnables/router.py +1 -1
  93. langchain_core/runnables/schema.py +33 -33
  94. langchain_core/runnables/utils.py +30 -34
  95. langchain_core/stores.py +72 -102
  96. langchain_core/sys_info.py +27 -29
  97. langchain_core/tools/__init__.py +1 -14
  98. langchain_core/tools/base.py +63 -63
  99. langchain_core/tools/convert.py +92 -92
  100. langchain_core/tools/render.py +9 -9
  101. langchain_core/tools/retriever.py +1 -1
  102. langchain_core/tools/simple.py +6 -7
  103. langchain_core/tools/structured.py +17 -18
  104. langchain_core/tracers/__init__.py +1 -9
  105. langchain_core/tracers/base.py +35 -35
  106. langchain_core/tracers/context.py +12 -17
  107. langchain_core/tracers/event_stream.py +3 -3
  108. langchain_core/tracers/langchain.py +8 -8
  109. langchain_core/tracers/log_stream.py +17 -18
  110. langchain_core/tracers/memory_stream.py +2 -2
  111. langchain_core/tracers/schemas.py +0 -129
  112. langchain_core/utils/aiter.py +31 -31
  113. langchain_core/utils/env.py +5 -5
  114. langchain_core/utils/function_calling.py +48 -120
  115. langchain_core/utils/html.py +4 -4
  116. langchain_core/utils/input.py +2 -2
  117. langchain_core/utils/interactive_env.py +1 -1
  118. langchain_core/utils/iter.py +19 -19
  119. langchain_core/utils/json.py +1 -1
  120. langchain_core/utils/json_schema.py +2 -2
  121. langchain_core/utils/mustache.py +5 -5
  122. langchain_core/utils/pydantic.py +17 -17
  123. langchain_core/utils/strings.py +4 -4
  124. langchain_core/utils/utils.py +25 -28
  125. langchain_core/vectorstores/base.py +43 -64
  126. langchain_core/vectorstores/in_memory.py +83 -85
  127. langchain_core/version.py +1 -1
  128. {langchain_core-1.0.0a8.dist-info → langchain_core-1.0.0rc1.dist-info}/METADATA +23 -11
  129. langchain_core-1.0.0rc1.dist-info/RECORD +172 -0
  130. langchain_core/memory.py +0 -120
  131. langchain_core/pydantic_v1/__init__.py +0 -30
  132. langchain_core/pydantic_v1/dataclasses.py +0 -23
  133. langchain_core/pydantic_v1/main.py +0 -23
  134. langchain_core-1.0.0a8.dist-info/RECORD +0 -176
  135. {langchain_core-1.0.0a8.dist-info → langchain_core-1.0.0rc1.dist-info}/WHEEL +0 -0
@@ -5,7 +5,6 @@ Some examples of what you can do with these functions include:
5
5
  * Convert messages to strings (serialization)
6
6
  * Convert messages from dicts to Message objects (deserialization)
7
7
  * Filter messages from a list of messages based on name, type or id etc.
8
-
9
8
  """
10
9
 
11
10
  from __future__ import annotations
@@ -96,10 +95,10 @@ def get_buffer_string(
96
95
 
97
96
  Args:
98
97
  messages: Messages to be converted to strings.
99
- human_prefix: The prefix to prepend to contents of ``HumanMessage``s.
100
- Default is ``'Human'``.
101
- ai_prefix: The prefix to prepend to contents of ``AIMessage``. Default is
102
- ``'AI'``.
98
+ human_prefix: The prefix to prepend to contents of `HumanMessage`s.
99
+ Default is `'Human'`.
100
+ ai_prefix: The prefix to prepend to contents of `AIMessage`. Default is
101
+ `'AI'`.
103
102
 
104
103
  Returns:
105
104
  A single string concatenation of all input messages.
@@ -108,17 +107,16 @@ def get_buffer_string(
108
107
  ValueError: If an unsupported message type is encountered.
109
108
 
110
109
  Example:
111
- .. code-block:: python
112
-
113
- from langchain_core import AIMessage, HumanMessage
114
-
115
- messages = [
116
- HumanMessage(content="Hi, how are you?"),
117
- AIMessage(content="Good, how are you?"),
118
- ]
119
- get_buffer_string(messages)
120
- # -> "Human: Hi, how are you?\nAI: Good, how are you?"
121
-
110
+ ```python
111
+ from langchain_core import AIMessage, HumanMessage
112
+
113
+ messages = [
114
+ HumanMessage(content="Hi, how are you?"),
115
+ AIMessage(content="Good, how are you?"),
116
+ ]
117
+ get_buffer_string(messages)
118
+ # -> "Human: Hi, how are you?\nAI: Good, how are you?"
119
+ ```
122
120
  """
123
121
  string_messages = []
124
122
  for m in messages:
@@ -178,7 +176,7 @@ def _message_from_dict(message: dict) -> BaseMessage:
178
176
 
179
177
 
180
178
  def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]:
181
- """Convert a sequence of messages from dicts to ``Message`` objects.
179
+ """Convert a sequence of messages from dicts to `Message` objects.
182
180
 
183
181
  Args:
184
182
  messages: Sequence of messages (as dicts) to convert.
@@ -191,7 +189,7 @@ def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]:
191
189
 
192
190
 
193
191
  def message_chunk_to_message(chunk: BaseMessage) -> BaseMessage:
194
- """Convert a message chunk to a ``Message``.
192
+ """Convert a message chunk to a `Message`.
195
193
 
196
194
  Args:
197
195
  chunk: Message chunk to convert.
@@ -224,10 +222,10 @@ def _create_message_from_message_type(
224
222
  id: str | None = None,
225
223
  **additional_kwargs: Any,
226
224
  ) -> BaseMessage:
227
- """Create a message from a ``Message`` type and content string.
225
+ """Create a message from a `Message` type and content string.
228
226
 
229
227
  Args:
230
- message_type: (str) the type of the message (e.g., ``'human'``, ``'ai'``, etc.).
228
+ message_type: (str) the type of the message (e.g., `'human'`, `'ai'`, etc.).
231
229
  content: (str) the content string.
232
230
  name: (str) the name of the message. Default is None.
233
231
  tool_call_id: (str) the tool call id. Default is None.
@@ -239,9 +237,9 @@ def _create_message_from_message_type(
239
237
  a message of the appropriate type.
240
238
 
241
239
  Raises:
242
- ValueError: if the message type is not one of ``'human'``, ``'user'``, ``'ai'``,
243
- ``'assistant'``, ``'function'``, ``'tool'``, ``'system'``, or
244
- ``'developer'``.
240
+ ValueError: if the message type is not one of `'human'`, `'user'`, `'ai'`,
241
+ `'assistant'`, `'function'`, `'tool'`, `'system'`, or
242
+ `'developer'`.
245
243
  """
246
244
  kwargs: dict[str, Any] = {}
247
245
  if name is not None:
@@ -307,15 +305,15 @@ def _create_message_from_message_type(
307
305
 
308
306
 
309
307
  def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
310
- """Instantiate a ``Message`` from a variety of message formats.
308
+ """Instantiate a `Message` from a variety of message formats.
311
309
 
312
310
  The message format can be one of the following:
313
311
 
314
- - ``BaseMessagePromptTemplate``
315
- - ``BaseMessage``
316
- - 2-tuple of (role string, template); e.g., (``'human'``, ``'{user_input}'``)
312
+ - `BaseMessagePromptTemplate`
313
+ - `BaseMessage`
314
+ - 2-tuple of (role string, template); e.g., (`'human'`, `'{user_input}'`)
317
315
  - dict: a message dict with role and content keys
318
- - string: shorthand for (``'human'``, template); e.g., ``'{user_input}'``
316
+ - string: shorthand for (`'human'`, template); e.g., `'{user_input}'`
319
317
 
320
318
  Args:
321
319
  message: a representation of a message in one of the supported formats.
@@ -423,79 +421,78 @@ def filter_messages(
423
421
  exclude_ids: Sequence[str] | None = None,
424
422
  exclude_tool_calls: Sequence[str] | bool | None = None,
425
423
  ) -> list[BaseMessage]:
426
- """Filter messages based on ``name``, ``type`` or ``id``.
424
+ """Filter messages based on `name`, `type` or `id`.
427
425
 
428
426
  Args:
429
427
  messages: Sequence Message-like objects to filter.
430
428
  include_names: Message names to include. Default is None.
431
429
  exclude_names: Messages names to exclude. Default is None.
432
430
  include_types: Message types to include. Can be specified as string names
433
- (e.g. ``'system'``, ``'human'``, ``'ai'``, ...) or as ``BaseMessage``
434
- classes (e.g. ``SystemMessage``, ``HumanMessage``, ``AIMessage``, ...).
431
+ (e.g. `'system'`, `'human'`, `'ai'`, ...) or as `BaseMessage`
432
+ classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`, ...).
435
433
  Default is None.
436
434
  exclude_types: Message types to exclude. Can be specified as string names
437
- (e.g. ``'system'``, ``'human'``, ``'ai'``, ...) or as ``BaseMessage``
438
- classes (e.g. ``SystemMessage``, ``HumanMessage``, ``AIMessage``, ...).
435
+ (e.g. `'system'`, `'human'`, `'ai'`, ...) or as `BaseMessage`
436
+ classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`, ...).
439
437
  Default is None.
440
438
  include_ids: Message IDs to include. Default is None.
441
439
  exclude_ids: Message IDs to exclude. Default is None.
442
440
  exclude_tool_calls: Tool call IDs to exclude. Default is None.
443
441
  Can be one of the following:
444
- - ``True``: all ``AIMessage``s with tool calls and all
445
- ``ToolMessage``s will be excluded.
442
+ - `True`: all `AIMessage`s with tool calls and all
443
+ `ToolMessage` objects will be excluded.
446
444
  - a sequence of tool call IDs to exclude:
447
- - ``ToolMessage``s with the corresponding tool call ID will be
448
- excluded.
449
- - The ``tool_calls`` in the AIMessage will be updated to exclude
450
- matching tool calls. If all ``tool_calls`` are filtered from an
451
- AIMessage, the whole message is excluded.
445
+ - `ToolMessage` objects with the corresponding tool call ID will be
446
+ excluded.
447
+ - The `tool_calls` in the AIMessage will be updated to exclude
448
+ matching tool calls. If all `tool_calls` are filtered from an
449
+ AIMessage, the whole message is excluded.
452
450
 
453
451
  Returns:
454
- A list of Messages that meets at least one of the ``incl_*`` conditions and none
455
- of the ``excl_*`` conditions. If not ``incl_*`` conditions are specified then
452
+ A list of Messages that meets at least one of the `incl_*` conditions and none
453
+ of the `excl_*` conditions. If not `incl_*` conditions are specified then
456
454
  anything that is not explicitly excluded will be included.
457
455
 
458
456
  Raises:
459
457
  ValueError: If two incompatible arguments are provided.
460
458
 
461
459
  Example:
462
- .. code-block:: python
463
-
464
- from langchain_core.messages import (
465
- filter_messages,
466
- AIMessage,
467
- HumanMessage,
468
- SystemMessage,
469
- )
470
-
471
- messages = [
472
- SystemMessage("you're a good assistant."),
473
- HumanMessage("what's your name", id="foo", name="example_user"),
474
- AIMessage("steve-o", id="bar", name="example_assistant"),
475
- HumanMessage(
476
- "what's your favorite color",
477
- id="baz",
478
- ),
479
- AIMessage(
480
- "silicon blue",
481
- id="blah",
482
- ),
483
- ]
484
-
485
- filter_messages(
486
- messages,
487
- incl_names=("example_user", "example_assistant"),
488
- incl_types=("system",),
489
- excl_ids=("bar",),
490
- )
491
-
492
- .. code-block:: python
493
-
494
- [
495
- SystemMessage("you're a good assistant."),
496
- HumanMessage("what's your name", id="foo", name="example_user"),
497
- ]
460
+ ```python
461
+ from langchain_core.messages import (
462
+ filter_messages,
463
+ AIMessage,
464
+ HumanMessage,
465
+ SystemMessage,
466
+ )
498
467
 
468
+ messages = [
469
+ SystemMessage("you're a good assistant."),
470
+ HumanMessage("what's your name", id="foo", name="example_user"),
471
+ AIMessage("steve-o", id="bar", name="example_assistant"),
472
+ HumanMessage(
473
+ "what's your favorite color",
474
+ id="baz",
475
+ ),
476
+ AIMessage(
477
+ "silicon blue",
478
+ id="blah",
479
+ ),
480
+ ]
481
+
482
+ filter_messages(
483
+ messages,
484
+ incl_names=("example_user", "example_assistant"),
485
+ incl_types=("system",),
486
+ excl_ids=("bar",),
487
+ )
488
+ ```
489
+
490
+ ```python
491
+ [
492
+ SystemMessage("you're a good assistant."),
493
+ HumanMessage("what's your name", id="foo", name="example_user"),
494
+ ]
495
+ ```
499
496
  """
500
497
  messages = convert_to_messages(messages)
501
498
  filtered: list[BaseMessage] = []
@@ -565,13 +562,13 @@ def merge_message_runs(
565
562
  r"""Merge consecutive Messages of the same type.
566
563
 
567
564
  !!! note
568
- ToolMessages are not merged, as each has a distinct tool call id that can't be
569
- merged.
565
+ `ToolMessage` objects are not merged, as each has a distinct tool call id that
566
+ can't be merged.
570
567
 
571
568
  Args:
572
569
  messages: Sequence Message-like objects to merge.
573
570
  chunk_separator: Specify the string to be inserted between message chunks.
574
- Defaults to ``'\n'``.
571
+ Defaults to `'\n'`.
575
572
 
576
573
  Returns:
577
574
  list of BaseMessages with consecutive runs of message types merged into single
@@ -579,87 +576,86 @@ def merge_message_runs(
579
576
  the merged content is a concatenation of the two strings with a new-line
580
577
  separator.
581
578
  The separator inserted between message chunks can be controlled by specifying
582
- any string with ``chunk_separator``. If at least one of the messages has a list
579
+ any string with `chunk_separator`. If at least one of the messages has a list
583
580
  of content blocks, the merged content is a list of content blocks.
584
581
 
585
582
  Example:
583
+ ```python
584
+ from langchain_core.messages import (
585
+ merge_message_runs,
586
+ AIMessage,
587
+ HumanMessage,
588
+ SystemMessage,
589
+ ToolCall,
590
+ )
586
591
 
587
- .. code-block:: python
588
-
589
- from langchain_core.messages import (
590
- merge_message_runs,
591
- AIMessage,
592
- HumanMessage,
593
- SystemMessage,
594
- ToolCall,
595
- )
596
-
597
- messages = [
598
- SystemMessage("you're a good assistant."),
599
- HumanMessage(
600
- "what's your favorite color",
601
- id="foo",
602
- ),
603
- HumanMessage(
604
- "wait your favorite food",
605
- id="bar",
606
- ),
607
- AIMessage(
592
+ messages = [
593
+ SystemMessage("you're a good assistant."),
594
+ HumanMessage(
595
+ "what's your favorite color",
596
+ id="foo",
597
+ ),
598
+ HumanMessage(
599
+ "wait your favorite food",
600
+ id="bar",
601
+ ),
602
+ AIMessage(
603
+ "my favorite colo",
604
+ tool_calls=[
605
+ ToolCall(
606
+ name="blah_tool", args={"x": 2}, id="123", type="tool_call"
607
+ )
608
+ ],
609
+ id="baz",
610
+ ),
611
+ AIMessage(
612
+ [{"type": "text", "text": "my favorite dish is lasagna"}],
613
+ tool_calls=[
614
+ ToolCall(
615
+ name="blah_tool",
616
+ args={"x": -10},
617
+ id="456",
618
+ type="tool_call",
619
+ )
620
+ ],
621
+ id="blur",
622
+ ),
623
+ ]
624
+
625
+ merge_message_runs(messages)
626
+ ```
627
+
628
+ ```python
629
+ [
630
+ SystemMessage("you're a good assistant."),
631
+ HumanMessage(
632
+ "what's your favorite color\\n"
633
+ "wait your favorite food", id="foo",
634
+ ),
635
+ AIMessage(
636
+ [
608
637
  "my favorite colo",
609
- tool_calls=[
610
- ToolCall(
611
- name="blah_tool", args={"x": 2}, id="123", type="tool_call"
612
- )
613
- ],
614
- id="baz",
615
- ),
616
- AIMessage(
617
- [{"type": "text", "text": "my favorite dish is lasagna"}],
618
- tool_calls=[
619
- ToolCall(
620
- name="blah_tool",
621
- args={"x": -10},
622
- id="456",
623
- type="tool_call",
624
- )
625
- ],
626
- id="blur",
627
- ),
628
- ]
629
-
630
- merge_message_runs(messages)
631
-
632
- .. code-block:: python
633
-
634
- [
635
- SystemMessage("you're a good assistant."),
636
- HumanMessage(
637
- "what's your favorite color\\n"
638
- "wait your favorite food", id="foo",
639
- ),
640
- AIMessage(
641
- [
642
- "my favorite colo",
643
- {"type": "text", "text": "my favorite dish is lasagna"}
644
- ],
645
- tool_calls=[
646
- ToolCall({
647
- "name": "blah_tool",
648
- "args": {"x": 2},
649
- "id": "123",
650
- "type": "tool_call"
651
- }),
652
- ToolCall({
653
- "name": "blah_tool",
654
- "args": {"x": -10},
655
- "id": "456",
656
- "type": "tool_call"
657
- })
658
- ]
659
- id="baz"
660
- ),
661
- ]
638
+ {"type": "text", "text": "my favorite dish is lasagna"}
639
+ ],
640
+ tool_calls=[
641
+ ToolCall({
642
+ "name": "blah_tool",
643
+ "args": {"x": 2},
644
+ "id": "123",
645
+ "type": "tool_call"
646
+ }),
647
+ ToolCall({
648
+ "name": "blah_tool",
649
+ "args": {"x": -10},
650
+ "id": "456",
651
+ "type": "tool_call"
652
+ })
653
+ ]
654
+ id="baz"
655
+ ),
656
+ ]
662
657
 
658
+ ```
663
659
  """
664
660
  if not messages:
665
661
  return []
@@ -706,7 +702,7 @@ def trim_messages(
706
702
  ) -> list[BaseMessage]:
707
703
  r"""Trim messages to be below a token count.
708
704
 
709
- ``trim_messages`` can be used to reduce the size of a chat history to a specified
705
+ `trim_messages` can be used to reduce the size of a chat history to a specified
710
706
  token count or specified message count.
711
707
 
712
708
  In either case, if passing the trimmed chat history back into a chat model
@@ -714,145 +710,143 @@ def trim_messages(
714
710
  properties:
715
711
 
716
712
  1. The resulting chat history should be valid. Most chat models expect that chat
717
- history starts with either (1) a ``HumanMessage`` or (2) a ``SystemMessage``
718
- followed by a ``HumanMessage``. To achieve this, set ``start_on='human'``.
719
- In addition, generally a ``ToolMessage`` can only appear after an ``AIMessage``
720
- that involved a tool call.
721
- Please see the following link for more information about messages:
722
- https://python.langchain.com/docs/concepts/#messages
713
+ history starts with either (1) a `HumanMessage` or (2) a `SystemMessage`
714
+ followed by a `HumanMessage`. To achieve this, set `start_on='human'`.
715
+ In addition, generally a `ToolMessage` can only appear after an `AIMessage`
716
+ that involved a tool call.
717
+ Please see the following link for more information about messages:
718
+ https://python.langchain.com/docs/concepts/#messages
723
719
  2. It includes recent messages and drops old messages in the chat history.
724
- To achieve this set the ``strategy='last'``.
725
- 3. Usually, the new chat history should include the ``SystemMessage`` if it
726
- was present in the original chat history since the ``SystemMessage`` includes
727
- special instructions to the chat model. The ``SystemMessage`` is almost always
728
- the first message in the history if present. To achieve this set the
729
- ``include_system=True``.
720
+ To achieve this set the `strategy='last'`.
721
+ 3. Usually, the new chat history should include the `SystemMessage` if it
722
+ was present in the original chat history since the `SystemMessage` includes
723
+ special instructions to the chat model. The `SystemMessage` is almost always
724
+ the first message in the history if present. To achieve this set the
725
+ `include_system=True`.
730
726
 
731
727
  !!! note
732
- The examples below show how to configure ``trim_messages`` to achieve a behavior
728
+ The examples below show how to configure `trim_messages` to achieve a behavior
733
729
  consistent with the above properties.
734
730
 
735
731
  Args:
736
732
  messages: Sequence of Message-like objects to trim.
737
733
  max_tokens: Max token count of trimmed messages.
738
- token_counter: Function or llm for counting tokens in a ``BaseMessage`` or a
739
- list of ``BaseMessage``. If a ``BaseLanguageModel`` is passed in then
740
- ``BaseLanguageModel.get_num_tokens_from_messages()`` will be used.
741
- Set to ``len`` to count the number of **messages** in the chat history.
734
+ token_counter: Function or llm for counting tokens in a `BaseMessage` or a
735
+ list of `BaseMessage`. If a `BaseLanguageModel` is passed in then
736
+ `BaseLanguageModel.get_num_tokens_from_messages()` will be used.
737
+ Set to `len` to count the number of **messages** in the chat history.
742
738
 
743
739
  !!! note
744
- Use ``count_tokens_approximately`` to get fast, approximate token
740
+ Use `count_tokens_approximately` to get fast, approximate token
745
741
  counts.
746
- This is recommended for using ``trim_messages`` on the hot path, where
742
+ This is recommended for using `trim_messages` on the hot path, where
747
743
  exact token counting is not necessary.
748
744
 
749
745
  strategy: Strategy for trimming.
750
- - ``'first'``: Keep the first ``<= n_count`` tokens of the messages.
751
- - ``'last'``: Keep the last ``<= n_count`` tokens of the messages.
752
- Default is ``'last'``.
746
+ - `'first'`: Keep the first `<= n_count` tokens of the messages.
747
+ - `'last'`: Keep the last `<= n_count` tokens of the messages.
748
+ Default is `'last'`.
753
749
  allow_partial: Whether to split a message if only part of the message can be
754
- included. If ``strategy='last'`` then the last partial contents of a message
755
- are included. If ``strategy='first'`` then the first partial contents of a
750
+ included. If `strategy='last'` then the last partial contents of a message
751
+ are included. If `strategy='first'` then the first partial contents of a
756
752
  message are included.
757
753
  Default is False.
758
754
  end_on: The message type to end on. If specified then every message after the
759
- last occurrence of this type is ignored. If ``strategy='last'`` then this
760
- is done before we attempt to get the last ``max_tokens``. If
761
- ``strategy='first'`` then this is done after we get the first
762
- ``max_tokens``. Can be specified as string names (e.g. ``'system'``,
763
- ``'human'``, ``'ai'``, ...) or as ``BaseMessage`` classes (e.g.
764
- ``SystemMessage``, ``HumanMessage``, ``AIMessage``, ...). Can be a single
755
+ last occurrence of this type is ignored. If `strategy='last'` then this
756
+ is done before we attempt to get the last `max_tokens`. If
757
+ `strategy='first'` then this is done after we get the first
758
+ `max_tokens`. Can be specified as string names (e.g. `'system'`,
759
+ `'human'`, `'ai'`, ...) or as `BaseMessage` classes (e.g.
760
+ `SystemMessage`, `HumanMessage`, `AIMessage`, ...). Can be a single
765
761
  type or a list of types.
766
762
  Default is None.
767
763
  start_on: The message type to start on. Should only be specified if
768
- ``strategy='last'``. If specified then every message before
764
+ `strategy='last'`. If specified then every message before
769
765
  the first occurrence of this type is ignored. This is done after we trim
770
- the initial messages to the last ``max_tokens``. Does not
771
- apply to a ``SystemMessage`` at index 0 if ``include_system=True``. Can be
772
- specified as string names (e.g. ``'system'``, ``'human'``, ``'ai'``, ...) or
773
- as ``BaseMessage`` classes (e.g. ``SystemMessage``, ``HumanMessage``,
774
- ``AIMessage``, ...). Can be a single type or a list of types.
766
+ the initial messages to the last `max_tokens`. Does not
767
+ apply to a `SystemMessage` at index 0 if `include_system=True`. Can be
768
+ specified as string names (e.g. `'system'`, `'human'`, `'ai'`, ...) or
769
+ as `BaseMessage` classes (e.g. `SystemMessage`, `HumanMessage`,
770
+ `AIMessage`, ...). Can be a single type or a list of types.
775
771
  Default is None.
776
772
  include_system: Whether to keep the SystemMessage if there is one at index 0.
777
- Should only be specified if ``strategy="last"``.
773
+ Should only be specified if `strategy="last"`.
778
774
  Default is False.
779
- text_splitter: Function or ``langchain_text_splitters.TextSplitter`` for
775
+ text_splitter: Function or `langchain_text_splitters.TextSplitter` for
780
776
  splitting the string contents of a message. Only used if
781
- ``allow_partial=True``. If ``strategy='last'`` then the last split tokens
782
- from a partial message will be included. if ``strategy='first'`` then the
777
+ `allow_partial=True`. If `strategy='last'` then the last split tokens
778
+ from a partial message will be included. if `strategy='first'` then the
783
779
  first split tokens from a partial message will be included. Token splitter
784
780
  assumes that separators are kept, so that split contents can be directly
785
781
  concatenated to recreate the original text. Defaults to splitting on
786
782
  newlines.
787
783
 
788
784
  Returns:
789
- list of trimmed ``BaseMessage``.
785
+ list of trimmed `BaseMessage`.
790
786
 
791
787
  Raises:
792
788
  ValueError: if two incompatible arguments are specified or an unrecognized
793
- ``strategy`` is specified.
789
+ `strategy` is specified.
794
790
 
795
791
  Example:
796
- Trim chat history based on token count, keeping the ``SystemMessage`` if
797
- present, and ensuring that the chat history starts with a ``HumanMessage`` (
798
- or a ``SystemMessage`` followed by a ``HumanMessage``).
799
-
800
- .. code-block:: python
801
-
802
- from langchain_core.messages import (
803
- AIMessage,
804
- HumanMessage,
805
- BaseMessage,
806
- SystemMessage,
807
- trim_messages,
808
- )
809
-
810
- messages = [
811
- SystemMessage(
812
- "you're a good assistant, you always respond with a joke."
813
- ),
814
- HumanMessage("i wonder why it's called langchain"),
815
- AIMessage(
816
- 'Well, I guess they thought "WordRope" and "SentenceString" just '
817
- "didn't have the same ring to it!"
818
- ),
819
- HumanMessage("and who is harrison chasing anyways"),
820
- AIMessage(
821
- "Hmmm let me think.\n\nWhy, he's probably chasing after the last "
822
- "cup of coffee in the office!"
823
- ),
824
- HumanMessage("what do you call a speechless parrot"),
825
- ]
826
-
827
-
828
- trim_messages(
829
- messages,
830
- max_tokens=45,
831
- strategy="last",
832
- token_counter=ChatOpenAI(model="gpt-4o"),
833
- # Most chat models expect that chat history starts with either:
834
- # (1) a HumanMessage or
835
- # (2) a SystemMessage followed by a HumanMessage
836
- start_on="human",
837
- # Usually, we want to keep the SystemMessage
838
- # if it's present in the original history.
839
- # The SystemMessage has special instructions for the model.
840
- include_system=True,
841
- allow_partial=False,
842
- )
792
+ Trim chat history based on token count, keeping the `SystemMessage` if
793
+ present, and ensuring that the chat history starts with a `HumanMessage` (
794
+ or a `SystemMessage` followed by a `HumanMessage`).
795
+
796
+ ```python
797
+ from langchain_core.messages import (
798
+ AIMessage,
799
+ HumanMessage,
800
+ BaseMessage,
801
+ SystemMessage,
802
+ trim_messages,
803
+ )
843
804
 
844
- .. code-block:: python
805
+ messages = [
806
+ SystemMessage("you're a good assistant, you always respond with a joke."),
807
+ HumanMessage("i wonder why it's called langchain"),
808
+ AIMessage(
809
+ 'Well, I guess they thought "WordRope" and "SentenceString" just '
810
+ "didn't have the same ring to it!"
811
+ ),
812
+ HumanMessage("and who is harrison chasing anyways"),
813
+ AIMessage(
814
+ "Hmmm let me think.\n\nWhy, he's probably chasing after the last "
815
+ "cup of coffee in the office!"
816
+ ),
817
+ HumanMessage("what do you call a speechless parrot"),
818
+ ]
819
+
820
+
821
+ trim_messages(
822
+ messages,
823
+ max_tokens=45,
824
+ strategy="last",
825
+ token_counter=ChatOpenAI(model="gpt-4o"),
826
+ # Most chat models expect that chat history starts with either:
827
+ # (1) a HumanMessage or
828
+ # (2) a SystemMessage followed by a HumanMessage
829
+ start_on="human",
830
+ # Usually, we want to keep the SystemMessage
831
+ # if it's present in the original history.
832
+ # The SystemMessage has special instructions for the model.
833
+ include_system=True,
834
+ allow_partial=False,
835
+ )
836
+ ```
845
837
 
846
- [
847
- SystemMessage(
848
- content="you're a good assistant, you always respond with a joke."
849
- ),
850
- HumanMessage(content="what do you call a speechless parrot"),
851
- ]
838
+ ```python
839
+ [
840
+ SystemMessage(
841
+ content="you're a good assistant, you always respond with a joke."
842
+ ),
843
+ HumanMessage(content="what do you call a speechless parrot"),
844
+ ]
845
+ ```
852
846
 
853
- Trim chat history based on the message count, keeping the ``SystemMessage`` if
854
- present, and ensuring that the chat history starts with a ``HumanMessage`` (
855
- or a ``SystemMessage`` followed by a ``HumanMessage``).
847
+ Trim chat history based on the message count, keeping the `SystemMessage` if
848
+ present, and ensuring that the chat history starts with a `HumanMessage` (
849
+ or a `SystemMessage` followed by a `HumanMessage`).
856
850
 
857
851
  trim_messages(
858
852
  messages,
@@ -874,100 +868,95 @@ def trim_messages(
874
868
  allow_partial=False,
875
869
  )
876
870
 
877
- .. code-block:: python
878
-
879
- [
880
- SystemMessage(
881
- content="you're a good assistant, you always respond with a joke."
882
- ),
883
- HumanMessage(content="and who is harrison chasing anyways"),
884
- AIMessage(
885
- content="Hmmm let me think.\n\nWhy, he's probably chasing after "
886
- "the last cup of coffee in the office!"
887
- ),
888
- HumanMessage(content="what do you call a speechless parrot"),
889
- ]
890
-
891
-
871
+ ```python
872
+ [
873
+ SystemMessage(
874
+ content="you're a good assistant, you always respond with a joke."
875
+ ),
876
+ HumanMessage(content="and who is harrison chasing anyways"),
877
+ AIMessage(
878
+ content="Hmmm let me think.\n\nWhy, he's probably chasing after "
879
+ "the last cup of coffee in the office!"
880
+ ),
881
+ HumanMessage(content="what do you call a speechless parrot"),
882
+ ]
883
+ ```
892
884
  Trim chat history using a custom token counter function that counts the
893
885
  number of tokens in each message.
894
886
 
895
- .. code-block:: python
896
-
897
- messages = [
898
- SystemMessage("This is a 4 token text. The full message is 10 tokens."),
899
- HumanMessage(
900
- "This is a 4 token text. The full message is 10 tokens.", id="first"
901
- ),
902
- AIMessage(
903
- [
904
- {"type": "text", "text": "This is the FIRST 4 token block."},
905
- {"type": "text", "text": "This is the SECOND 4 token block."},
906
- ],
907
- id="second",
908
- ),
909
- HumanMessage(
910
- "This is a 4 token text. The full message is 10 tokens.", id="third"
911
- ),
912
- AIMessage(
913
- "This is a 4 token text. The full message is 10 tokens.",
914
- id="fourth",
915
- ),
916
- ]
917
-
918
-
919
- def dummy_token_counter(messages: list[BaseMessage]) -> int:
920
- # treat each message like it adds 3 default tokens at the beginning
921
- # of the message and at the end of the message. 3 + 4 + 3 = 10 tokens
922
- # per message.
923
-
924
- default_content_len = 4
925
- default_msg_prefix_len = 3
926
- default_msg_suffix_len = 3
927
-
928
- count = 0
929
- for msg in messages:
930
- if isinstance(msg.content, str):
931
- count += (
932
- default_msg_prefix_len
933
- + default_content_len
934
- + default_msg_suffix_len
935
- )
936
- if isinstance(msg.content, list):
937
- count += (
938
- default_msg_prefix_len
939
- + len(msg.content) * default_content_len
940
- + default_msg_suffix_len
941
- )
942
- return count
943
-
944
- First 30 tokens, allowing partial messages:
945
- .. code-block:: python
946
-
947
- trim_messages(
948
- messages,
949
- max_tokens=30,
950
- token_counter=dummy_token_counter,
951
- strategy="first",
952
- allow_partial=True,
953
- )
954
-
955
- .. code-block:: python
956
-
887
+ ```python
888
+ messages = [
889
+ SystemMessage("This is a 4 token text. The full message is 10 tokens."),
890
+ HumanMessage(
891
+ "This is a 4 token text. The full message is 10 tokens.", id="first"
892
+ ),
893
+ AIMessage(
957
894
  [
958
- SystemMessage(
959
- "This is a 4 token text. The full message is 10 tokens."
960
- ),
961
- HumanMessage(
962
- "This is a 4 token text. The full message is 10 tokens.",
963
- id="first",
964
- ),
965
- AIMessage(
966
- [{"type": "text", "text": "This is the FIRST 4 token block."}],
967
- id="second",
968
- ),
969
- ]
895
+ {"type": "text", "text": "This is the FIRST 4 token block."},
896
+ {"type": "text", "text": "This is the SECOND 4 token block."},
897
+ ],
898
+ id="second",
899
+ ),
900
+ HumanMessage(
901
+ "This is a 4 token text. The full message is 10 tokens.", id="third"
902
+ ),
903
+ AIMessage(
904
+ "This is a 4 token text. The full message is 10 tokens.",
905
+ id="fourth",
906
+ ),
907
+ ]
908
+
909
+
910
+ def dummy_token_counter(messages: list[BaseMessage]) -> int:
911
+ # treat each message like it adds 3 default tokens at the beginning
912
+ # of the message and at the end of the message. 3 + 4 + 3 = 10 tokens
913
+ # per message.
914
+
915
+ default_content_len = 4
916
+ default_msg_prefix_len = 3
917
+ default_msg_suffix_len = 3
918
+
919
+ count = 0
920
+ for msg in messages:
921
+ if isinstance(msg.content, str):
922
+ count += (
923
+ default_msg_prefix_len
924
+ + default_content_len
925
+ + default_msg_suffix_len
926
+ )
927
+ if isinstance(msg.content, list):
928
+ count += (
929
+ default_msg_prefix_len
930
+ + len(msg.content) * default_content_len
931
+ + default_msg_suffix_len
932
+ )
933
+ return count
934
+ ```
970
935
 
936
+ First 30 tokens, allowing partial messages:
937
+ ```python
938
+ trim_messages(
939
+ messages,
940
+ max_tokens=30,
941
+ token_counter=dummy_token_counter,
942
+ strategy="first",
943
+ allow_partial=True,
944
+ )
945
+ ```
946
+
947
+ ```python
948
+ [
949
+ SystemMessage("This is a 4 token text. The full message is 10 tokens."),
950
+ HumanMessage(
951
+ "This is a 4 token text. The full message is 10 tokens.",
952
+ id="first",
953
+ ),
954
+ AIMessage(
955
+ [{"type": "text", "text": "This is the FIRST 4 token block."}],
956
+ id="second",
957
+ ),
958
+ ]
959
+ ```
971
960
  """
972
961
  # Validate arguments
973
962
  if start_on and strategy == "first":
@@ -1042,21 +1031,21 @@ def convert_to_openai_messages(
1042
1031
  messages: Message-like object or iterable of objects whose contents are
1043
1032
  in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
1044
1033
  text_format: How to format string or text block contents:
1045
- - ``'string'``:
1034
+ - `'string'`:
1046
1035
  If a message has a string content, this is left as a string. If
1047
- a message has content blocks that are all of type ``'text'``, these
1036
+ a message has content blocks that are all of type `'text'`, these
1048
1037
  are joined with a newline to make a single string. If a message has
1049
- content blocks and at least one isn't of type ``'text'``, then
1038
+ content blocks and at least one isn't of type `'text'`, then
1050
1039
  all blocks are left as dicts.
1051
- - ``'block'``:
1040
+ - `'block'`:
1052
1041
  If a message has a string content, this is turned into a list
1053
- with a single content block of type ``'text'``. If a message has
1042
+ with a single content block of type `'text'`. If a message has
1054
1043
  content blocks these are left as is.
1055
1044
  include_id: Whether to include message ids in the openai messages, if they
1056
1045
  are present in the source messages.
1057
1046
 
1058
1047
  Raises:
1059
- ValueError: if an unrecognized ``text_format`` is specified, or if a message
1048
+ ValueError: if an unrecognized `text_format` is specified, or if a message
1060
1049
  content block is missing expected keys.
1061
1050
 
1062
1051
  Returns:
@@ -1070,50 +1059,49 @@ def convert_to_openai_messages(
1070
1059
  message dicts is returned.
1071
1060
 
1072
1061
  Example:
1062
+ ```python
1063
+ from langchain_core.messages import (
1064
+ convert_to_openai_messages,
1065
+ AIMessage,
1066
+ SystemMessage,
1067
+ ToolMessage,
1068
+ )
1073
1069
 
1074
- .. code-block:: python
1075
-
1076
- from langchain_core.messages import (
1077
- convert_to_openai_messages,
1078
- AIMessage,
1079
- SystemMessage,
1080
- ToolMessage,
1081
- )
1082
-
1083
- messages = [
1084
- SystemMessage([{"type": "text", "text": "foo"}]),
1085
- {
1086
- "role": "user",
1087
- "content": [
1088
- {"type": "text", "text": "whats in this"},
1089
- {
1090
- "type": "image_url",
1091
- "image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},
1092
- },
1093
- ],
1094
- },
1095
- AIMessage(
1096
- "",
1097
- tool_calls=[
1098
- {
1099
- "name": "analyze",
1100
- "args": {"baz": "buz"},
1101
- "id": "1",
1102
- "type": "tool_call",
1103
- }
1104
- ],
1105
- ),
1106
- ToolMessage("foobar", tool_call_id="1", name="bar"),
1107
- {"role": "assistant", "content": "thats nice"},
1108
- ]
1109
- oai_messages = convert_to_openai_messages(messages)
1110
- # -> [
1111
- # {'role': 'system', 'content': 'foo'},
1112
- # {'role': 'user', 'content': [{'type': 'text', 'text': 'whats in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
1113
- # {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': '1','function': {'name': 'analyze', 'arguments': '{"baz": "buz"}'}}], 'content': ''},
1114
- # {'role': 'tool', 'name': 'bar', 'content': 'foobar'},
1115
- # {'role': 'assistant', 'content': 'thats nice'}
1116
- # ]
1070
+ messages = [
1071
+ SystemMessage([{"type": "text", "text": "foo"}]),
1072
+ {
1073
+ "role": "user",
1074
+ "content": [
1075
+ {"type": "text", "text": "whats in this"},
1076
+ {
1077
+ "type": "image_url",
1078
+ "image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},
1079
+ },
1080
+ ],
1081
+ },
1082
+ AIMessage(
1083
+ "",
1084
+ tool_calls=[
1085
+ {
1086
+ "name": "analyze",
1087
+ "args": {"baz": "buz"},
1088
+ "id": "1",
1089
+ "type": "tool_call",
1090
+ }
1091
+ ],
1092
+ ),
1093
+ ToolMessage("foobar", tool_call_id="1", name="bar"),
1094
+ {"role": "assistant", "content": "thats nice"},
1095
+ ]
1096
+ oai_messages = convert_to_openai_messages(messages)
1097
+ # -> [
1098
+ # {'role': 'system', 'content': 'foo'},
1099
+ # {'role': 'user', 'content': [{'type': 'text', 'text': 'whats in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},
1100
+ # {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': '1','function': {'name': 'analyze', 'arguments': '{"baz": "buz"}'}}], 'content': ''},
1101
+ # {'role': 'tool', 'name': 'bar', 'content': 'foobar'},
1102
+ # {'role': 'assistant', 'content': 'thats nice'}
1103
+ # ]
1104
+ ```
1117
1105
 
1118
1106
  !!! version-added "Added in version 0.3.11"
1119
1107
 
@@ -1697,11 +1685,11 @@ def count_tokens_approximately(
1697
1685
  chars_per_token: Number of characters per token to use for the approximation.
1698
1686
  Default is 4 (one token corresponds to ~4 chars for common English text).
1699
1687
  You can also specify float values for more fine-grained control.
1700
- `See more here. <https://platform.openai.com/tokenizer>`__
1688
+ [See more here](https://platform.openai.com/tokenizer).
1701
1689
  extra_tokens_per_message: Number of extra tokens to add per message.
1702
1690
  Default is 3 (special tokens, including beginning/end of message).
1703
1691
  You can also specify float values for more fine-grained control.
1704
- `See more here. <https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb>`__
1692
+ [See more here](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb).
1705
1693
  count_name: Whether to include message names in the count.
1706
1694
  Enabled by default.
1707
1695