langchain-core 0.3.74__py3-none-any.whl → 0.3.76__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/_api/beta_decorator.py +18 -41
- langchain_core/_api/deprecation.py +20 -7
- langchain_core/_api/path.py +19 -2
- langchain_core/_import_utils.py +7 -0
- langchain_core/agents.py +10 -6
- langchain_core/beta/runnables/context.py +2 -3
- langchain_core/callbacks/base.py +11 -4
- langchain_core/callbacks/file.py +13 -2
- langchain_core/callbacks/manager.py +129 -78
- langchain_core/callbacks/usage.py +4 -2
- langchain_core/chat_history.py +10 -12
- langchain_core/document_loaders/base.py +34 -9
- langchain_core/document_loaders/langsmith.py +3 -0
- langchain_core/documents/base.py +36 -11
- langchain_core/documents/compressor.py +9 -6
- langchain_core/documents/transformers.py +4 -2
- langchain_core/embeddings/fake.py +8 -5
- langchain_core/env.py +2 -3
- langchain_core/example_selectors/base.py +12 -0
- langchain_core/exceptions.py +7 -0
- langchain_core/globals.py +17 -28
- langchain_core/indexing/api.py +56 -44
- langchain_core/indexing/base.py +7 -10
- langchain_core/indexing/in_memory.py +23 -3
- langchain_core/language_models/__init__.py +3 -2
- langchain_core/language_models/base.py +64 -39
- langchain_core/language_models/chat_models.py +130 -42
- langchain_core/language_models/fake_chat_models.py +10 -11
- langchain_core/language_models/llms.py +49 -17
- langchain_core/load/dump.py +5 -7
- langchain_core/load/load.py +15 -1
- langchain_core/load/serializable.py +38 -43
- langchain_core/memory.py +7 -3
- langchain_core/messages/ai.py +36 -16
- langchain_core/messages/base.py +13 -6
- langchain_core/messages/content_blocks.py +23 -2
- langchain_core/messages/human.py +2 -6
- langchain_core/messages/modifier.py +1 -1
- langchain_core/messages/system.py +2 -6
- langchain_core/messages/tool.py +36 -16
- langchain_core/messages/utils.py +198 -87
- langchain_core/output_parsers/base.py +5 -2
- langchain_core/output_parsers/json.py +4 -4
- langchain_core/output_parsers/list.py +7 -22
- langchain_core/output_parsers/openai_functions.py +3 -0
- langchain_core/output_parsers/openai_tools.py +8 -1
- langchain_core/output_parsers/pydantic.py +4 -0
- langchain_core/output_parsers/string.py +5 -1
- langchain_core/output_parsers/transform.py +2 -2
- langchain_core/output_parsers/xml.py +23 -22
- langchain_core/outputs/chat_generation.py +18 -7
- langchain_core/outputs/generation.py +14 -3
- langchain_core/outputs/llm_result.py +8 -1
- langchain_core/prompt_values.py +10 -4
- langchain_core/prompts/base.py +4 -9
- langchain_core/prompts/chat.py +88 -61
- langchain_core/prompts/dict.py +16 -8
- langchain_core/prompts/few_shot.py +9 -11
- langchain_core/prompts/few_shot_with_templates.py +5 -1
- langchain_core/prompts/image.py +12 -5
- langchain_core/prompts/message.py +5 -6
- langchain_core/prompts/pipeline.py +13 -8
- langchain_core/prompts/prompt.py +22 -8
- langchain_core/prompts/string.py +18 -10
- langchain_core/prompts/structured.py +7 -2
- langchain_core/rate_limiters.py +2 -2
- langchain_core/retrievers.py +7 -6
- langchain_core/runnables/base.py +842 -567
- langchain_core/runnables/branch.py +15 -20
- langchain_core/runnables/config.py +11 -17
- langchain_core/runnables/configurable.py +34 -19
- langchain_core/runnables/fallbacks.py +24 -17
- langchain_core/runnables/graph.py +47 -40
- langchain_core/runnables/graph_ascii.py +40 -17
- langchain_core/runnables/graph_mermaid.py +27 -15
- langchain_core/runnables/graph_png.py +27 -31
- langchain_core/runnables/history.py +56 -59
- langchain_core/runnables/passthrough.py +47 -24
- langchain_core/runnables/retry.py +10 -6
- langchain_core/runnables/router.py +10 -9
- langchain_core/runnables/schema.py +2 -0
- langchain_core/runnables/utils.py +51 -89
- langchain_core/stores.py +13 -25
- langchain_core/structured_query.py +3 -7
- langchain_core/sys_info.py +9 -8
- langchain_core/tools/base.py +30 -23
- langchain_core/tools/convert.py +24 -13
- langchain_core/tools/simple.py +35 -3
- langchain_core/tools/structured.py +26 -3
- langchain_core/tracers/_streaming.py +6 -7
- langchain_core/tracers/base.py +2 -2
- langchain_core/tracers/context.py +5 -1
- langchain_core/tracers/core.py +109 -39
- langchain_core/tracers/evaluation.py +22 -26
- langchain_core/tracers/event_stream.py +41 -28
- langchain_core/tracers/langchain.py +12 -3
- langchain_core/tracers/langchain_v1.py +10 -2
- langchain_core/tracers/log_stream.py +57 -18
- langchain_core/tracers/root_listeners.py +4 -20
- langchain_core/tracers/run_collector.py +6 -16
- langchain_core/tracers/schemas.py +5 -1
- langchain_core/utils/aiter.py +14 -6
- langchain_core/utils/env.py +3 -0
- langchain_core/utils/function_calling.py +49 -30
- langchain_core/utils/interactive_env.py +6 -2
- langchain_core/utils/iter.py +11 -3
- langchain_core/utils/json.py +5 -2
- langchain_core/utils/json_schema.py +15 -5
- langchain_core/utils/loading.py +5 -1
- langchain_core/utils/mustache.py +24 -15
- langchain_core/utils/pydantic.py +32 -4
- langchain_core/utils/utils.py +24 -8
- langchain_core/vectorstores/base.py +7 -20
- langchain_core/vectorstores/in_memory.py +18 -12
- langchain_core/vectorstores/utils.py +18 -12
- langchain_core/version.py +1 -1
- langchain_core-0.3.76.dist-info/METADATA +77 -0
- langchain_core-0.3.76.dist-info/RECORD +174 -0
- langchain_core-0.3.74.dist-info/METADATA +0 -108
- langchain_core-0.3.74.dist-info/RECORD +0 -174
- {langchain_core-0.3.74.dist-info → langchain_core-0.3.76.dist-info}/WHEEL +0 -0
- {langchain_core-0.3.74.dist-info → langchain_core-0.3.76.dist-info}/entry_points.txt +0 -0
langchain_core/messages/utils.py
CHANGED
|
@@ -42,12 +42,17 @@ from langchain_core.messages.system import SystemMessage, SystemMessageChunk
|
|
|
42
42
|
from langchain_core.messages.tool import ToolCall, ToolMessage, ToolMessageChunk
|
|
43
43
|
|
|
44
44
|
if TYPE_CHECKING:
|
|
45
|
-
from langchain_text_splitters import TextSplitter
|
|
46
|
-
|
|
47
45
|
from langchain_core.language_models import BaseLanguageModel
|
|
48
46
|
from langchain_core.prompt_values import PromptValue
|
|
49
47
|
from langchain_core.runnables.base import Runnable
|
|
50
48
|
|
|
49
|
+
try:
|
|
50
|
+
from langchain_text_splitters import TextSplitter
|
|
51
|
+
|
|
52
|
+
_HAS_LANGCHAIN_TEXT_SPLITTERS = True
|
|
53
|
+
except ImportError:
|
|
54
|
+
_HAS_LANGCHAIN_TEXT_SPLITTERS = False
|
|
55
|
+
|
|
51
56
|
logger = logging.getLogger(__name__)
|
|
52
57
|
|
|
53
58
|
|
|
@@ -182,7 +187,7 @@ def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]:
|
|
|
182
187
|
return [_message_from_dict(m) for m in messages]
|
|
183
188
|
|
|
184
189
|
|
|
185
|
-
def message_chunk_to_message(chunk:
|
|
190
|
+
def message_chunk_to_message(chunk: BaseMessage) -> BaseMessage:
|
|
186
191
|
"""Convert a message chunk to a message.
|
|
187
192
|
|
|
188
193
|
Args:
|
|
@@ -213,7 +218,7 @@ def _create_message_from_message_type(
|
|
|
213
218
|
name: Optional[str] = None,
|
|
214
219
|
tool_call_id: Optional[str] = None,
|
|
215
220
|
tool_calls: Optional[list[dict[str, Any]]] = None,
|
|
216
|
-
id: Optional[str] = None,
|
|
221
|
+
id: Optional[str] = None,
|
|
217
222
|
**additional_kwargs: Any,
|
|
218
223
|
) -> BaseMessage:
|
|
219
224
|
"""Create a message from a message type and content string.
|
|
@@ -361,7 +366,7 @@ def convert_to_messages(
|
|
|
361
366
|
list of messages (BaseMessages).
|
|
362
367
|
"""
|
|
363
368
|
# Import here to avoid circular imports
|
|
364
|
-
from langchain_core.prompt_values import PromptValue
|
|
369
|
+
from langchain_core.prompt_values import PromptValue # noqa: PLC0415
|
|
365
370
|
|
|
366
371
|
if isinstance(messages, PromptValue):
|
|
367
372
|
return messages.to_messages()
|
|
@@ -386,7 +391,8 @@ def _runnable_support(func: Callable) -> Callable:
|
|
|
386
391
|
list[BaseMessage],
|
|
387
392
|
Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]],
|
|
388
393
|
]:
|
|
389
|
-
|
|
394
|
+
# Import locally to prevent circular import.
|
|
395
|
+
from langchain_core.runnables.base import RunnableLambda # noqa: PLC0415
|
|
390
396
|
|
|
391
397
|
if messages is not None:
|
|
392
398
|
return func(messages, **kwargs)
|
|
@@ -424,11 +430,16 @@ def filter_messages(
|
|
|
424
430
|
exclude_ids: Message IDs to exclude. Default is None.
|
|
425
431
|
exclude_tool_calls: Tool call IDs to exclude. Default is None.
|
|
426
432
|
Can be one of the following:
|
|
427
|
-
|
|
433
|
+
|
|
434
|
+
- ``True``: Each ``AIMessages`` with tool calls and all ``ToolMessages``
|
|
435
|
+
will be excluded.
|
|
428
436
|
- a sequence of tool call IDs to exclude:
|
|
437
|
+
|
|
429
438
|
- ToolMessages with the corresponding tool call ID will be excluded.
|
|
430
|
-
- The
|
|
431
|
-
|
|
439
|
+
- The ``tool_calls`` in the AIMessage will be updated to exclude matching
|
|
440
|
+
tool calls.
|
|
441
|
+
If all tool_calls are filtered from an AIMessage,
|
|
442
|
+
the whole message is excluded.
|
|
432
443
|
|
|
433
444
|
Returns:
|
|
434
445
|
A list of Messages that meets at least one of the incl_* conditions and none
|
|
@@ -441,14 +452,25 @@ def filter_messages(
|
|
|
441
452
|
Example:
|
|
442
453
|
.. code-block:: python
|
|
443
454
|
|
|
444
|
-
from langchain_core.messages import
|
|
455
|
+
from langchain_core.messages import (
|
|
456
|
+
filter_messages,
|
|
457
|
+
AIMessage,
|
|
458
|
+
HumanMessage,
|
|
459
|
+
SystemMessage,
|
|
460
|
+
)
|
|
445
461
|
|
|
446
462
|
messages = [
|
|
447
463
|
SystemMessage("you're a good assistant."),
|
|
448
464
|
HumanMessage("what's your name", id="foo", name="example_user"),
|
|
449
465
|
AIMessage("steve-o", id="bar", name="example_assistant"),
|
|
450
|
-
HumanMessage(
|
|
451
|
-
|
|
466
|
+
HumanMessage(
|
|
467
|
+
"what's your favorite color",
|
|
468
|
+
id="baz",
|
|
469
|
+
),
|
|
470
|
+
AIMessage(
|
|
471
|
+
"silicon blue",
|
|
472
|
+
id="blah",
|
|
473
|
+
),
|
|
452
474
|
]
|
|
453
475
|
|
|
454
476
|
filter_messages(
|
|
@@ -465,7 +487,7 @@ def filter_messages(
|
|
|
465
487
|
HumanMessage("what's your name", id="foo", name="example_user"),
|
|
466
488
|
]
|
|
467
489
|
|
|
468
|
-
"""
|
|
490
|
+
"""
|
|
469
491
|
messages = convert_to_messages(messages)
|
|
470
492
|
filtered: list[BaseMessage] = []
|
|
471
493
|
for msg in messages:
|
|
@@ -544,12 +566,14 @@ def merge_message_runs(
|
|
|
544
566
|
Returns:
|
|
545
567
|
list of BaseMessages with consecutive runs of message types merged into single
|
|
546
568
|
messages. By default, if two messages being merged both have string contents,
|
|
547
|
-
the merged content is a concatenation of the two strings with a new-line
|
|
569
|
+
the merged content is a concatenation of the two strings with a new-line
|
|
570
|
+
separator.
|
|
548
571
|
The separator inserted between message chunks can be controlled by specifying
|
|
549
|
-
any string with ``chunk_separator``. If at least one of the messages has a list
|
|
550
|
-
content blocks, the merged content is a list of content blocks.
|
|
572
|
+
any string with ``chunk_separator``. If at least one of the messages has a list
|
|
573
|
+
of content blocks, the merged content is a list of content blocks.
|
|
551
574
|
|
|
552
575
|
Example:
|
|
576
|
+
|
|
553
577
|
.. code-block:: python
|
|
554
578
|
|
|
555
579
|
from langchain_core.messages import (
|
|
@@ -562,16 +586,33 @@ def merge_message_runs(
|
|
|
562
586
|
|
|
563
587
|
messages = [
|
|
564
588
|
SystemMessage("you're a good assistant."),
|
|
565
|
-
HumanMessage(
|
|
566
|
-
|
|
589
|
+
HumanMessage(
|
|
590
|
+
"what's your favorite color",
|
|
591
|
+
id="foo",
|
|
592
|
+
),
|
|
593
|
+
HumanMessage(
|
|
594
|
+
"wait your favorite food",
|
|
595
|
+
id="bar",
|
|
596
|
+
),
|
|
567
597
|
AIMessage(
|
|
568
598
|
"my favorite colo",
|
|
569
|
-
tool_calls=[
|
|
599
|
+
tool_calls=[
|
|
600
|
+
ToolCall(
|
|
601
|
+
name="blah_tool", args={"x": 2}, id="123", type="tool_call"
|
|
602
|
+
)
|
|
603
|
+
],
|
|
570
604
|
id="baz",
|
|
571
605
|
),
|
|
572
606
|
AIMessage(
|
|
573
607
|
[{"type": "text", "text": "my favorite dish is lasagna"}],
|
|
574
|
-
tool_calls=[
|
|
608
|
+
tool_calls=[
|
|
609
|
+
ToolCall(
|
|
610
|
+
name="blah_tool",
|
|
611
|
+
args={"x": -10},
|
|
612
|
+
id="456",
|
|
613
|
+
type="tool_call",
|
|
614
|
+
)
|
|
615
|
+
],
|
|
575
616
|
id="blur",
|
|
576
617
|
),
|
|
577
618
|
]
|
|
@@ -582,21 +623,34 @@ def merge_message_runs(
|
|
|
582
623
|
|
|
583
624
|
[
|
|
584
625
|
SystemMessage("you're a good assistant."),
|
|
585
|
-
HumanMessage(
|
|
626
|
+
HumanMessage(
|
|
627
|
+
"what's your favorite color\\n"
|
|
628
|
+
"wait your favorite food", id="foo",
|
|
629
|
+
),
|
|
586
630
|
AIMessage(
|
|
587
631
|
[
|
|
588
632
|
"my favorite colo",
|
|
589
633
|
{"type": "text", "text": "my favorite dish is lasagna"}
|
|
590
634
|
],
|
|
591
635
|
tool_calls=[
|
|
592
|
-
ToolCall({
|
|
593
|
-
|
|
636
|
+
ToolCall({
|
|
637
|
+
"name": "blah_tool",
|
|
638
|
+
"args": {"x": 2},
|
|
639
|
+
"id": "123",
|
|
640
|
+
"type": "tool_call"
|
|
641
|
+
}),
|
|
642
|
+
ToolCall({
|
|
643
|
+
"name": "blah_tool",
|
|
644
|
+
"args": {"x": -10},
|
|
645
|
+
"id": "456",
|
|
646
|
+
"type": "tool_call"
|
|
647
|
+
})
|
|
594
648
|
]
|
|
595
649
|
id="baz"
|
|
596
650
|
),
|
|
597
651
|
]
|
|
598
652
|
|
|
599
|
-
"""
|
|
653
|
+
"""
|
|
600
654
|
if not messages:
|
|
601
655
|
return []
|
|
602
656
|
messages = convert_to_messages(messages)
|
|
@@ -656,22 +710,23 @@ def trim_messages(
|
|
|
656
710
|
properties:
|
|
657
711
|
|
|
658
712
|
1. The resulting chat history should be valid. Most chat models expect that chat
|
|
659
|
-
history starts with either (1) a
|
|
660
|
-
by a
|
|
661
|
-
In addition, generally a
|
|
713
|
+
history starts with either (1) a ``HumanMessage`` or (2) a ``SystemMessage``
|
|
714
|
+
followed by a ``HumanMessage``. To achieve this, set ``start_on="human"``.
|
|
715
|
+
In addition, generally a ``ToolMessage`` can only appear after an ``AIMessage``
|
|
662
716
|
that involved a tool call.
|
|
663
717
|
Please see the following link for more information about messages:
|
|
664
718
|
https://python.langchain.com/docs/concepts/#messages
|
|
665
719
|
2. It includes recent messages and drops old messages in the chat history.
|
|
666
|
-
To achieve this set the
|
|
667
|
-
3. Usually, the new chat history should include the
|
|
668
|
-
was present in the original chat history since the
|
|
669
|
-
special instructions to the chat model. The
|
|
720
|
+
To achieve this set the ``strategy="last"``.
|
|
721
|
+
3. Usually, the new chat history should include the ``SystemMessage`` if it
|
|
722
|
+
was present in the original chat history since the ``SystemMessage`` includes
|
|
723
|
+
special instructions to the chat model. The ``SystemMessage`` is almost always
|
|
670
724
|
the first message in the history if present. To achieve this set the
|
|
671
|
-
|
|
725
|
+
``include_system=True``.
|
|
672
726
|
|
|
673
|
-
|
|
674
|
-
|
|
727
|
+
.. note::
|
|
728
|
+
The examples below show how to configure ``trim_messages`` to achieve a behavior
|
|
729
|
+
consistent with the above properties.
|
|
675
730
|
|
|
676
731
|
Args:
|
|
677
732
|
messages: Sequence of Message-like objects to trim.
|
|
@@ -687,9 +742,11 @@ def trim_messages(
|
|
|
687
742
|
exact token counting is not necessary.
|
|
688
743
|
|
|
689
744
|
strategy: Strategy for trimming.
|
|
745
|
+
|
|
690
746
|
- "first": Keep the first <= n_count tokens of the messages.
|
|
691
747
|
- "last": Keep the last <= n_count tokens of the messages.
|
|
692
|
-
|
|
748
|
+
|
|
749
|
+
Default is ``'last'``.
|
|
693
750
|
allow_partial: Whether to split a message if only part of the message can be
|
|
694
751
|
included. If ``strategy="last"`` then the last partial contents of a message
|
|
695
752
|
are included. If ``strategy="first"`` then the first partial contents of a
|
|
@@ -747,14 +804,18 @@ def trim_messages(
|
|
|
747
804
|
)
|
|
748
805
|
|
|
749
806
|
messages = [
|
|
750
|
-
SystemMessage(
|
|
807
|
+
SystemMessage(
|
|
808
|
+
"you're a good assistant, you always respond with a joke."
|
|
809
|
+
),
|
|
751
810
|
HumanMessage("i wonder why it's called langchain"),
|
|
752
811
|
AIMessage(
|
|
753
|
-
'Well, I guess they thought "WordRope" and "SentenceString" just
|
|
812
|
+
'Well, I guess they thought "WordRope" and "SentenceString" just '
|
|
813
|
+
"didn't have the same ring to it!"
|
|
754
814
|
),
|
|
755
815
|
HumanMessage("and who is harrison chasing anyways"),
|
|
756
816
|
AIMessage(
|
|
757
|
-
"Hmmm let me think.\n\nWhy, he's probably chasing after the last
|
|
817
|
+
"Hmmm let me think.\n\nWhy, he's probably chasing after the last "
|
|
818
|
+
"cup of coffee in the office!"
|
|
758
819
|
),
|
|
759
820
|
HumanMessage("what do you call a speechless parrot"),
|
|
760
821
|
]
|
|
@@ -779,8 +840,10 @@ def trim_messages(
|
|
|
779
840
|
.. code-block:: python
|
|
780
841
|
|
|
781
842
|
[
|
|
782
|
-
SystemMessage(
|
|
783
|
-
|
|
843
|
+
SystemMessage(
|
|
844
|
+
content="you're a good assistant, you always respond with a joke."
|
|
845
|
+
),
|
|
846
|
+
HumanMessage(content="what do you call a speechless parrot"),
|
|
784
847
|
]
|
|
785
848
|
|
|
786
849
|
Trim chat history based on the message count, keeping the SystemMessage if
|
|
@@ -810,10 +873,15 @@ def trim_messages(
|
|
|
810
873
|
.. code-block:: python
|
|
811
874
|
|
|
812
875
|
[
|
|
813
|
-
SystemMessage(
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
HumanMessage(content=
|
|
876
|
+
SystemMessage(
|
|
877
|
+
content="you're a good assistant, you always respond with a joke."
|
|
878
|
+
),
|
|
879
|
+
HumanMessage(content="and who is harrison chasing anyways"),
|
|
880
|
+
AIMessage(
|
|
881
|
+
content="Hmmm let me think.\n\nWhy, he's probably chasing after "
|
|
882
|
+
"the last cup of coffee in the office!"
|
|
883
|
+
),
|
|
884
|
+
HumanMessage(content="what do you call a speechless parrot"),
|
|
817
885
|
]
|
|
818
886
|
|
|
819
887
|
|
|
@@ -824,7 +892,9 @@ def trim_messages(
|
|
|
824
892
|
|
|
825
893
|
messages = [
|
|
826
894
|
SystemMessage("This is a 4 token text. The full message is 10 tokens."),
|
|
827
|
-
HumanMessage(
|
|
895
|
+
HumanMessage(
|
|
896
|
+
"This is a 4 token text. The full message is 10 tokens.", id="first"
|
|
897
|
+
),
|
|
828
898
|
AIMessage(
|
|
829
899
|
[
|
|
830
900
|
{"type": "text", "text": "This is the FIRST 4 token block."},
|
|
@@ -832,10 +902,16 @@ def trim_messages(
|
|
|
832
902
|
],
|
|
833
903
|
id="second",
|
|
834
904
|
),
|
|
835
|
-
HumanMessage(
|
|
836
|
-
|
|
905
|
+
HumanMessage(
|
|
906
|
+
"This is a 4 token text. The full message is 10 tokens.", id="third"
|
|
907
|
+
),
|
|
908
|
+
AIMessage(
|
|
909
|
+
"This is a 4 token text. The full message is 10 tokens.",
|
|
910
|
+
id="fourth",
|
|
911
|
+
),
|
|
837
912
|
]
|
|
838
913
|
|
|
914
|
+
|
|
839
915
|
def dummy_token_counter(messages: list[BaseMessage]) -> int:
|
|
840
916
|
# treat each message like it adds 3 default tokens at the beginning
|
|
841
917
|
# of the message and at the end of the message. 3 + 4 + 3 = 10 tokens
|
|
@@ -848,9 +924,17 @@ def trim_messages(
|
|
|
848
924
|
count = 0
|
|
849
925
|
for msg in messages:
|
|
850
926
|
if isinstance(msg.content, str):
|
|
851
|
-
count +=
|
|
927
|
+
count += (
|
|
928
|
+
default_msg_prefix_len
|
|
929
|
+
+ default_content_len
|
|
930
|
+
+ default_msg_suffix_len
|
|
931
|
+
)
|
|
852
932
|
if isinstance(msg.content, list):
|
|
853
|
-
count +=
|
|
933
|
+
count += (
|
|
934
|
+
default_msg_prefix_len
|
|
935
|
+
+ len(msg.content) * default_content_len
|
|
936
|
+
+ default_msg_suffix_len
|
|
937
|
+
)
|
|
854
938
|
return count
|
|
855
939
|
|
|
856
940
|
First 30 tokens, allowing partial messages:
|
|
@@ -867,12 +951,20 @@ def trim_messages(
|
|
|
867
951
|
.. code-block:: python
|
|
868
952
|
|
|
869
953
|
[
|
|
870
|
-
SystemMessage(
|
|
871
|
-
|
|
872
|
-
|
|
954
|
+
SystemMessage(
|
|
955
|
+
"This is a 4 token text. The full message is 10 tokens."
|
|
956
|
+
),
|
|
957
|
+
HumanMessage(
|
|
958
|
+
"This is a 4 token text. The full message is 10 tokens.",
|
|
959
|
+
id="first",
|
|
960
|
+
),
|
|
961
|
+
AIMessage(
|
|
962
|
+
[{"type": "text", "text": "This is the FIRST 4 token block."}],
|
|
963
|
+
id="second",
|
|
964
|
+
),
|
|
873
965
|
]
|
|
874
966
|
|
|
875
|
-
"""
|
|
967
|
+
"""
|
|
876
968
|
# Validate arguments
|
|
877
969
|
if start_on and strategy == "first":
|
|
878
970
|
msg = "start_on parameter is only valid with strategy='last'"
|
|
@@ -903,17 +995,12 @@ def trim_messages(
|
|
|
903
995
|
)
|
|
904
996
|
raise ValueError(msg)
|
|
905
997
|
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
text_splitter_fn
|
|
998
|
+
if _HAS_LANGCHAIN_TEXT_SPLITTERS and isinstance(text_splitter, TextSplitter):
|
|
999
|
+
text_splitter_fn = text_splitter.split_text
|
|
1000
|
+
elif text_splitter:
|
|
1001
|
+
text_splitter_fn = cast("Callable", text_splitter)
|
|
910
1002
|
else:
|
|
911
|
-
|
|
912
|
-
text_splitter_fn = text_splitter.split_text
|
|
913
|
-
else:
|
|
914
|
-
text_splitter_fn = text_splitter
|
|
915
|
-
|
|
916
|
-
text_splitter_fn = text_splitter_fn or _default_text_splitter
|
|
1003
|
+
text_splitter_fn = _default_text_splitter
|
|
917
1004
|
|
|
918
1005
|
if strategy == "first":
|
|
919
1006
|
return _first_max_tokens(
|
|
@@ -951,25 +1038,30 @@ def convert_to_openai_messages(
|
|
|
951
1038
|
in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
|
|
952
1039
|
text_format: How to format string or text block contents:
|
|
953
1040
|
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
1041
|
+
- ``'string'``:
|
|
1042
|
+
If a message has a string content, this is left as a string. If
|
|
1043
|
+
a message has content blocks that are all of type 'text', these are
|
|
1044
|
+
joined with a newline to make a single string. If a message has
|
|
1045
|
+
content blocks and at least one isn't of type 'text', then
|
|
1046
|
+
all blocks are left as dicts.
|
|
1047
|
+
- ``'block'``:
|
|
1048
|
+
If a message has a string content, this is turned into a list
|
|
1049
|
+
with a single content block of type 'text'. If a message has content
|
|
1050
|
+
blocks these are left as is.
|
|
1051
|
+
|
|
1052
|
+
Raises:
|
|
1053
|
+
ValueError: if an unrecognized ``text_format`` is specified, or if a message
|
|
1054
|
+
content block is missing expected keys.
|
|
964
1055
|
|
|
965
1056
|
Returns:
|
|
966
1057
|
The return type depends on the input type:
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
1058
|
+
|
|
1059
|
+
- dict:
|
|
1060
|
+
If a single message-like object is passed in, a single OpenAI message
|
|
1061
|
+
dict is returned.
|
|
1062
|
+
- list[dict]:
|
|
1063
|
+
If a sequence of message-like objects are passed in, a list of OpenAI
|
|
1064
|
+
message dicts is returned.
|
|
973
1065
|
|
|
974
1066
|
Example:
|
|
975
1067
|
|
|
@@ -984,8 +1076,27 @@ def convert_to_openai_messages(
|
|
|
984
1076
|
|
|
985
1077
|
messages = [
|
|
986
1078
|
SystemMessage([{"type": "text", "text": "foo"}]),
|
|
987
|
-
{
|
|
988
|
-
|
|
1079
|
+
{
|
|
1080
|
+
"role": "user",
|
|
1081
|
+
"content": [
|
|
1082
|
+
{"type": "text", "text": "whats in this"},
|
|
1083
|
+
{
|
|
1084
|
+
"type": "image_url",
|
|
1085
|
+
"image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},
|
|
1086
|
+
},
|
|
1087
|
+
],
|
|
1088
|
+
},
|
|
1089
|
+
AIMessage(
|
|
1090
|
+
"",
|
|
1091
|
+
tool_calls=[
|
|
1092
|
+
{
|
|
1093
|
+
"name": "analyze",
|
|
1094
|
+
"args": {"baz": "buz"},
|
|
1095
|
+
"id": "1",
|
|
1096
|
+
"type": "tool_call",
|
|
1097
|
+
}
|
|
1098
|
+
],
|
|
1099
|
+
),
|
|
989
1100
|
ToolMessage("foobar", tool_call_id="1", name="bar"),
|
|
990
1101
|
{"role": "assistant", "content": "thats nice"},
|
|
991
1102
|
]
|
|
@@ -1580,26 +1691,26 @@ def count_tokens_approximately(
|
|
|
1580
1691
|
chars_per_token: Number of characters per token to use for the approximation.
|
|
1581
1692
|
Default is 4 (one token corresponds to ~4 chars for common English text).
|
|
1582
1693
|
You can also specify float values for more fine-grained control.
|
|
1583
|
-
See more here
|
|
1694
|
+
`See more here. <https://platform.openai.com/tokenizer>`__
|
|
1584
1695
|
extra_tokens_per_message: Number of extra tokens to add per message.
|
|
1585
1696
|
Default is 3 (special tokens, including beginning/end of message).
|
|
1586
1697
|
You can also specify float values for more fine-grained control.
|
|
1587
|
-
See more here
|
|
1588
|
-
https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
|
1698
|
+
`See more here. <https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb>`__
|
|
1589
1699
|
count_name: Whether to include message names in the count.
|
|
1590
1700
|
Enabled by default.
|
|
1591
1701
|
|
|
1592
1702
|
Returns:
|
|
1593
1703
|
Approximate number of tokens in the messages.
|
|
1594
1704
|
|
|
1595
|
-
|
|
1596
|
-
This is a simple approximation that may not match the exact token count
|
|
1597
|
-
|
|
1705
|
+
.. note::
|
|
1706
|
+
This is a simple approximation that may not match the exact token count used by
|
|
1707
|
+
specific models. For accurate counts, use model-specific tokenizers.
|
|
1598
1708
|
|
|
1599
1709
|
Warning:
|
|
1600
1710
|
This function does not currently support counting image tokens.
|
|
1601
1711
|
|
|
1602
1712
|
.. versionadded:: 0.3.46
|
|
1713
|
+
|
|
1603
1714
|
"""
|
|
1604
1715
|
token_count = 0.0
|
|
1605
1716
|
for message in convert_to_messages(messages):
|
|
@@ -144,7 +144,10 @@ class BaseOutputParser(
|
|
|
144
144
|
|
|
145
145
|
def parse(self, text: str) -> bool:
|
|
146
146
|
cleaned_text = text.strip().upper()
|
|
147
|
-
if cleaned_text not in (
|
|
147
|
+
if cleaned_text not in (
|
|
148
|
+
self.true_val.upper(),
|
|
149
|
+
self.false_val.upper(),
|
|
150
|
+
):
|
|
148
151
|
raise OutputParserException(
|
|
149
152
|
f"BooleanOutputParser expected output value to either be "
|
|
150
153
|
f"{self.true_val} or {self.false_val} (case-insensitive). "
|
|
@@ -156,7 +159,7 @@ class BaseOutputParser(
|
|
|
156
159
|
def _type(self) -> str:
|
|
157
160
|
return "boolean_output_parser"
|
|
158
161
|
|
|
159
|
-
"""
|
|
162
|
+
"""
|
|
160
163
|
|
|
161
164
|
@property
|
|
162
165
|
@override
|
|
@@ -46,13 +46,13 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
|
|
|
46
46
|
def _diff(self, prev: Optional[Any], next: Any) -> Any:
|
|
47
47
|
return jsonpatch.make_patch(prev, next).patch
|
|
48
48
|
|
|
49
|
-
|
|
49
|
+
@staticmethod
|
|
50
|
+
def _get_schema(pydantic_object: type[TBaseModel]) -> dict[str, Any]:
|
|
50
51
|
if issubclass(pydantic_object, pydantic.BaseModel):
|
|
51
52
|
return pydantic_object.model_json_schema()
|
|
52
|
-
|
|
53
|
-
return pydantic_object.schema()
|
|
54
|
-
return None
|
|
53
|
+
return pydantic_object.schema()
|
|
55
54
|
|
|
55
|
+
@override
|
|
56
56
|
def parse_result(self, result: list[Generation], *, partial: bool = False) -> Any:
|
|
57
57
|
"""Parse the result of an LLM call to a JSON object.
|
|
58
58
|
|
|
@@ -143,10 +143,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
|
|
|
143
143
|
|
|
144
144
|
@classmethod
|
|
145
145
|
def is_lc_serializable(cls) -> bool:
|
|
146
|
-
"""
|
|
147
|
-
|
|
148
|
-
Returns True.
|
|
149
|
-
"""
|
|
146
|
+
"""Return True as this class is serializable."""
|
|
150
147
|
return True
|
|
151
148
|
|
|
152
149
|
@classmethod
|
|
@@ -154,11 +151,11 @@ class CommaSeparatedListOutputParser(ListOutputParser):
|
|
|
154
151
|
"""Get the namespace of the langchain object.
|
|
155
152
|
|
|
156
153
|
Returns:
|
|
157
|
-
|
|
158
|
-
Default is ["langchain", "output_parsers", "list"].
|
|
154
|
+
``["langchain", "output_parsers", "list"]``
|
|
159
155
|
"""
|
|
160
156
|
return ["langchain", "output_parsers", "list"]
|
|
161
157
|
|
|
158
|
+
@override
|
|
162
159
|
def get_format_instructions(self) -> str:
|
|
163
160
|
"""Return the format instructions for the comma-separated list output."""
|
|
164
161
|
return (
|
|
@@ -166,6 +163,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
|
|
|
166
163
|
"eg: `foo, bar, baz` or `foo,bar,baz`"
|
|
167
164
|
)
|
|
168
165
|
|
|
166
|
+
@override
|
|
169
167
|
def parse(self, text: str) -> list[str]:
|
|
170
168
|
"""Parse the output of an LLM call.
|
|
171
169
|
|
|
@@ -213,15 +211,8 @@ class NumberedListOutputParser(ListOutputParser):
|
|
|
213
211
|
"""
|
|
214
212
|
return re.findall(self.pattern, text)
|
|
215
213
|
|
|
214
|
+
@override
|
|
216
215
|
def parse_iter(self, text: str) -> Iterator[re.Match]:
|
|
217
|
-
"""Parse the output of an LLM call.
|
|
218
|
-
|
|
219
|
-
Args:
|
|
220
|
-
text: The output of an LLM call.
|
|
221
|
-
|
|
222
|
-
Yields:
|
|
223
|
-
A match object for each part of the output.
|
|
224
|
-
"""
|
|
225
216
|
return re.finditer(self.pattern, text)
|
|
226
217
|
|
|
227
218
|
@property
|
|
@@ -235,6 +226,7 @@ class MarkdownListOutputParser(ListOutputParser):
|
|
|
235
226
|
pattern: str = r"^\s*[-*]\s([^\n]+)$"
|
|
236
227
|
"""The pattern to match a Markdown list item."""
|
|
237
228
|
|
|
229
|
+
@override
|
|
238
230
|
def get_format_instructions(self) -> str:
|
|
239
231
|
"""Return the format instructions for the Markdown list output."""
|
|
240
232
|
return "Your response should be a markdown list, eg: `- foo\n- bar\n- baz`"
|
|
@@ -250,15 +242,8 @@ class MarkdownListOutputParser(ListOutputParser):
|
|
|
250
242
|
"""
|
|
251
243
|
return re.findall(self.pattern, text, re.MULTILINE)
|
|
252
244
|
|
|
245
|
+
@override
|
|
253
246
|
def parse_iter(self, text: str) -> Iterator[re.Match]:
|
|
254
|
-
"""Parse the output of an LLM call.
|
|
255
|
-
|
|
256
|
-
Args:
|
|
257
|
-
text: The output of an LLM call.
|
|
258
|
-
|
|
259
|
-
Yields:
|
|
260
|
-
A match object for each part of the output.
|
|
261
|
-
"""
|
|
262
247
|
return re.finditer(self.pattern, text, re.MULTILINE)
|
|
263
248
|
|
|
264
249
|
@property
|
|
@@ -261,6 +261,9 @@ class PydanticOutputFunctionsParser(OutputFunctionsParser):
|
|
|
261
261
|
result: The result of the LLM call.
|
|
262
262
|
partial: Whether to parse partial JSON objects. Default is False.
|
|
263
263
|
|
|
264
|
+
Raises:
|
|
265
|
+
ValueError: If the pydantic schema is not valid.
|
|
266
|
+
|
|
264
267
|
Returns:
|
|
265
268
|
The parsed JSON object.
|
|
266
269
|
"""
|