langchain-core 0.3.74__py3-none-any.whl → 0.3.76__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (122) hide show
  1. langchain_core/_api/beta_decorator.py +18 -41
  2. langchain_core/_api/deprecation.py +20 -7
  3. langchain_core/_api/path.py +19 -2
  4. langchain_core/_import_utils.py +7 -0
  5. langchain_core/agents.py +10 -6
  6. langchain_core/beta/runnables/context.py +2 -3
  7. langchain_core/callbacks/base.py +11 -4
  8. langchain_core/callbacks/file.py +13 -2
  9. langchain_core/callbacks/manager.py +129 -78
  10. langchain_core/callbacks/usage.py +4 -2
  11. langchain_core/chat_history.py +10 -12
  12. langchain_core/document_loaders/base.py +34 -9
  13. langchain_core/document_loaders/langsmith.py +3 -0
  14. langchain_core/documents/base.py +36 -11
  15. langchain_core/documents/compressor.py +9 -6
  16. langchain_core/documents/transformers.py +4 -2
  17. langchain_core/embeddings/fake.py +8 -5
  18. langchain_core/env.py +2 -3
  19. langchain_core/example_selectors/base.py +12 -0
  20. langchain_core/exceptions.py +7 -0
  21. langchain_core/globals.py +17 -28
  22. langchain_core/indexing/api.py +56 -44
  23. langchain_core/indexing/base.py +7 -10
  24. langchain_core/indexing/in_memory.py +23 -3
  25. langchain_core/language_models/__init__.py +3 -2
  26. langchain_core/language_models/base.py +64 -39
  27. langchain_core/language_models/chat_models.py +130 -42
  28. langchain_core/language_models/fake_chat_models.py +10 -11
  29. langchain_core/language_models/llms.py +49 -17
  30. langchain_core/load/dump.py +5 -7
  31. langchain_core/load/load.py +15 -1
  32. langchain_core/load/serializable.py +38 -43
  33. langchain_core/memory.py +7 -3
  34. langchain_core/messages/ai.py +36 -16
  35. langchain_core/messages/base.py +13 -6
  36. langchain_core/messages/content_blocks.py +23 -2
  37. langchain_core/messages/human.py +2 -6
  38. langchain_core/messages/modifier.py +1 -1
  39. langchain_core/messages/system.py +2 -6
  40. langchain_core/messages/tool.py +36 -16
  41. langchain_core/messages/utils.py +198 -87
  42. langchain_core/output_parsers/base.py +5 -2
  43. langchain_core/output_parsers/json.py +4 -4
  44. langchain_core/output_parsers/list.py +7 -22
  45. langchain_core/output_parsers/openai_functions.py +3 -0
  46. langchain_core/output_parsers/openai_tools.py +8 -1
  47. langchain_core/output_parsers/pydantic.py +4 -0
  48. langchain_core/output_parsers/string.py +5 -1
  49. langchain_core/output_parsers/transform.py +2 -2
  50. langchain_core/output_parsers/xml.py +23 -22
  51. langchain_core/outputs/chat_generation.py +18 -7
  52. langchain_core/outputs/generation.py +14 -3
  53. langchain_core/outputs/llm_result.py +8 -1
  54. langchain_core/prompt_values.py +10 -4
  55. langchain_core/prompts/base.py +4 -9
  56. langchain_core/prompts/chat.py +88 -61
  57. langchain_core/prompts/dict.py +16 -8
  58. langchain_core/prompts/few_shot.py +9 -11
  59. langchain_core/prompts/few_shot_with_templates.py +5 -1
  60. langchain_core/prompts/image.py +12 -5
  61. langchain_core/prompts/message.py +5 -6
  62. langchain_core/prompts/pipeline.py +13 -8
  63. langchain_core/prompts/prompt.py +22 -8
  64. langchain_core/prompts/string.py +18 -10
  65. langchain_core/prompts/structured.py +7 -2
  66. langchain_core/rate_limiters.py +2 -2
  67. langchain_core/retrievers.py +7 -6
  68. langchain_core/runnables/base.py +842 -567
  69. langchain_core/runnables/branch.py +15 -20
  70. langchain_core/runnables/config.py +11 -17
  71. langchain_core/runnables/configurable.py +34 -19
  72. langchain_core/runnables/fallbacks.py +24 -17
  73. langchain_core/runnables/graph.py +47 -40
  74. langchain_core/runnables/graph_ascii.py +40 -17
  75. langchain_core/runnables/graph_mermaid.py +27 -15
  76. langchain_core/runnables/graph_png.py +27 -31
  77. langchain_core/runnables/history.py +56 -59
  78. langchain_core/runnables/passthrough.py +47 -24
  79. langchain_core/runnables/retry.py +10 -6
  80. langchain_core/runnables/router.py +10 -9
  81. langchain_core/runnables/schema.py +2 -0
  82. langchain_core/runnables/utils.py +51 -89
  83. langchain_core/stores.py +13 -25
  84. langchain_core/structured_query.py +3 -7
  85. langchain_core/sys_info.py +9 -8
  86. langchain_core/tools/base.py +30 -23
  87. langchain_core/tools/convert.py +24 -13
  88. langchain_core/tools/simple.py +35 -3
  89. langchain_core/tools/structured.py +26 -3
  90. langchain_core/tracers/_streaming.py +6 -7
  91. langchain_core/tracers/base.py +2 -2
  92. langchain_core/tracers/context.py +5 -1
  93. langchain_core/tracers/core.py +109 -39
  94. langchain_core/tracers/evaluation.py +22 -26
  95. langchain_core/tracers/event_stream.py +41 -28
  96. langchain_core/tracers/langchain.py +12 -3
  97. langchain_core/tracers/langchain_v1.py +10 -2
  98. langchain_core/tracers/log_stream.py +57 -18
  99. langchain_core/tracers/root_listeners.py +4 -20
  100. langchain_core/tracers/run_collector.py +6 -16
  101. langchain_core/tracers/schemas.py +5 -1
  102. langchain_core/utils/aiter.py +14 -6
  103. langchain_core/utils/env.py +3 -0
  104. langchain_core/utils/function_calling.py +49 -30
  105. langchain_core/utils/interactive_env.py +6 -2
  106. langchain_core/utils/iter.py +11 -3
  107. langchain_core/utils/json.py +5 -2
  108. langchain_core/utils/json_schema.py +15 -5
  109. langchain_core/utils/loading.py +5 -1
  110. langchain_core/utils/mustache.py +24 -15
  111. langchain_core/utils/pydantic.py +32 -4
  112. langchain_core/utils/utils.py +24 -8
  113. langchain_core/vectorstores/base.py +7 -20
  114. langchain_core/vectorstores/in_memory.py +18 -12
  115. langchain_core/vectorstores/utils.py +18 -12
  116. langchain_core/version.py +1 -1
  117. langchain_core-0.3.76.dist-info/METADATA +77 -0
  118. langchain_core-0.3.76.dist-info/RECORD +174 -0
  119. langchain_core-0.3.74.dist-info/METADATA +0 -108
  120. langchain_core-0.3.74.dist-info/RECORD +0 -174
  121. {langchain_core-0.3.74.dist-info → langchain_core-0.3.76.dist-info}/WHEEL +0 -0
  122. {langchain_core-0.3.74.dist-info → langchain_core-0.3.76.dist-info}/entry_points.txt +0 -0
@@ -42,12 +42,17 @@ from langchain_core.messages.system import SystemMessage, SystemMessageChunk
42
42
  from langchain_core.messages.tool import ToolCall, ToolMessage, ToolMessageChunk
43
43
 
44
44
  if TYPE_CHECKING:
45
- from langchain_text_splitters import TextSplitter
46
-
47
45
  from langchain_core.language_models import BaseLanguageModel
48
46
  from langchain_core.prompt_values import PromptValue
49
47
  from langchain_core.runnables.base import Runnable
50
48
 
49
+ try:
50
+ from langchain_text_splitters import TextSplitter
51
+
52
+ _HAS_LANGCHAIN_TEXT_SPLITTERS = True
53
+ except ImportError:
54
+ _HAS_LANGCHAIN_TEXT_SPLITTERS = False
55
+
51
56
  logger = logging.getLogger(__name__)
52
57
 
53
58
 
@@ -182,7 +187,7 @@ def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]:
182
187
  return [_message_from_dict(m) for m in messages]
183
188
 
184
189
 
185
- def message_chunk_to_message(chunk: BaseMessageChunk) -> BaseMessage:
190
+ def message_chunk_to_message(chunk: BaseMessage) -> BaseMessage:
186
191
  """Convert a message chunk to a message.
187
192
 
188
193
  Args:
@@ -213,7 +218,7 @@ def _create_message_from_message_type(
213
218
  name: Optional[str] = None,
214
219
  tool_call_id: Optional[str] = None,
215
220
  tool_calls: Optional[list[dict[str, Any]]] = None,
216
- id: Optional[str] = None, # noqa: A002
221
+ id: Optional[str] = None,
217
222
  **additional_kwargs: Any,
218
223
  ) -> BaseMessage:
219
224
  """Create a message from a message type and content string.
@@ -361,7 +366,7 @@ def convert_to_messages(
361
366
  list of messages (BaseMessages).
362
367
  """
363
368
  # Import here to avoid circular imports
364
- from langchain_core.prompt_values import PromptValue
369
+ from langchain_core.prompt_values import PromptValue # noqa: PLC0415
365
370
 
366
371
  if isinstance(messages, PromptValue):
367
372
  return messages.to_messages()
@@ -386,7 +391,8 @@ def _runnable_support(func: Callable) -> Callable:
386
391
  list[BaseMessage],
387
392
  Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]],
388
393
  ]:
389
- from langchain_core.runnables.base import RunnableLambda
394
+ # Import locally to prevent circular import.
395
+ from langchain_core.runnables.base import RunnableLambda # noqa: PLC0415
390
396
 
391
397
  if messages is not None:
392
398
  return func(messages, **kwargs)
@@ -424,11 +430,16 @@ def filter_messages(
424
430
  exclude_ids: Message IDs to exclude. Default is None.
425
431
  exclude_tool_calls: Tool call IDs to exclude. Default is None.
426
432
  Can be one of the following:
427
- - `True`: all AIMessages with tool calls and all ToolMessages will be excluded.
433
+
434
+ - ``True``: Each ``AIMessages`` with tool calls and all ``ToolMessages``
435
+ will be excluded.
428
436
  - a sequence of tool call IDs to exclude:
437
+
429
438
  - ToolMessages with the corresponding tool call ID will be excluded.
430
- - The `tool_calls` in the AIMessage will be updated to exclude matching tool calls.
431
- If all tool_calls are filtered from an AIMessage, the whole message is excluded.
439
+ - The ``tool_calls`` in the AIMessage will be updated to exclude matching
440
+ tool calls.
441
+ If all tool_calls are filtered from an AIMessage,
442
+ the whole message is excluded.
432
443
 
433
444
  Returns:
434
445
  A list of Messages that meets at least one of the incl_* conditions and none
@@ -441,14 +452,25 @@ def filter_messages(
441
452
  Example:
442
453
  .. code-block:: python
443
454
 
444
- from langchain_core.messages import filter_messages, AIMessage, HumanMessage, SystemMessage
455
+ from langchain_core.messages import (
456
+ filter_messages,
457
+ AIMessage,
458
+ HumanMessage,
459
+ SystemMessage,
460
+ )
445
461
 
446
462
  messages = [
447
463
  SystemMessage("you're a good assistant."),
448
464
  HumanMessage("what's your name", id="foo", name="example_user"),
449
465
  AIMessage("steve-o", id="bar", name="example_assistant"),
450
- HumanMessage("what's your favorite color", id="baz",),
451
- AIMessage("silicon blue", id="blah",),
466
+ HumanMessage(
467
+ "what's your favorite color",
468
+ id="baz",
469
+ ),
470
+ AIMessage(
471
+ "silicon blue",
472
+ id="blah",
473
+ ),
452
474
  ]
453
475
 
454
476
  filter_messages(
@@ -465,7 +487,7 @@ def filter_messages(
465
487
  HumanMessage("what's your name", id="foo", name="example_user"),
466
488
  ]
467
489
 
468
- """ # noqa: E501
490
+ """
469
491
  messages = convert_to_messages(messages)
470
492
  filtered: list[BaseMessage] = []
471
493
  for msg in messages:
@@ -544,12 +566,14 @@ def merge_message_runs(
544
566
  Returns:
545
567
  list of BaseMessages with consecutive runs of message types merged into single
546
568
  messages. By default, if two messages being merged both have string contents,
547
- the merged content is a concatenation of the two strings with a new-line separator.
569
+ the merged content is a concatenation of the two strings with a new-line
570
+ separator.
548
571
  The separator inserted between message chunks can be controlled by specifying
549
- any string with ``chunk_separator``. If at least one of the messages has a list of
550
- content blocks, the merged content is a list of content blocks.
572
+ any string with ``chunk_separator``. If at least one of the messages has a list
573
+ of content blocks, the merged content is a list of content blocks.
551
574
 
552
575
  Example:
576
+
553
577
  .. code-block:: python
554
578
 
555
579
  from langchain_core.messages import (
@@ -562,16 +586,33 @@ def merge_message_runs(
562
586
 
563
587
  messages = [
564
588
  SystemMessage("you're a good assistant."),
565
- HumanMessage("what's your favorite color", id="foo",),
566
- HumanMessage("wait your favorite food", id="bar",),
589
+ HumanMessage(
590
+ "what's your favorite color",
591
+ id="foo",
592
+ ),
593
+ HumanMessage(
594
+ "wait your favorite food",
595
+ id="bar",
596
+ ),
567
597
  AIMessage(
568
598
  "my favorite colo",
569
- tool_calls=[ToolCall(name="blah_tool", args={"x": 2}, id="123", type="tool_call")],
599
+ tool_calls=[
600
+ ToolCall(
601
+ name="blah_tool", args={"x": 2}, id="123", type="tool_call"
602
+ )
603
+ ],
570
604
  id="baz",
571
605
  ),
572
606
  AIMessage(
573
607
  [{"type": "text", "text": "my favorite dish is lasagna"}],
574
- tool_calls=[ToolCall(name="blah_tool", args={"x": -10}, id="456", type="tool_call")],
608
+ tool_calls=[
609
+ ToolCall(
610
+ name="blah_tool",
611
+ args={"x": -10},
612
+ id="456",
613
+ type="tool_call",
614
+ )
615
+ ],
575
616
  id="blur",
576
617
  ),
577
618
  ]
@@ -582,21 +623,34 @@ def merge_message_runs(
582
623
 
583
624
  [
584
625
  SystemMessage("you're a good assistant."),
585
- HumanMessage("what's your favorite color\\nwait your favorite food", id="foo",),
626
+ HumanMessage(
627
+ "what's your favorite color\\n"
628
+ "wait your favorite food", id="foo",
629
+ ),
586
630
  AIMessage(
587
631
  [
588
632
  "my favorite colo",
589
633
  {"type": "text", "text": "my favorite dish is lasagna"}
590
634
  ],
591
635
  tool_calls=[
592
- ToolCall({"name": "blah_tool", "args": {"x": 2}, "id": "123", "type": "tool_call"}),
593
- ToolCall({"name": "blah_tool", "args": {"x": -10}, "id": "456", "type": "tool_call"})
636
+ ToolCall({
637
+ "name": "blah_tool",
638
+ "args": {"x": 2},
639
+ "id": "123",
640
+ "type": "tool_call"
641
+ }),
642
+ ToolCall({
643
+ "name": "blah_tool",
644
+ "args": {"x": -10},
645
+ "id": "456",
646
+ "type": "tool_call"
647
+ })
594
648
  ]
595
649
  id="baz"
596
650
  ),
597
651
  ]
598
652
 
599
- """ # noqa: E501
653
+ """
600
654
  if not messages:
601
655
  return []
602
656
  messages = convert_to_messages(messages)
@@ -656,22 +710,23 @@ def trim_messages(
656
710
  properties:
657
711
 
658
712
  1. The resulting chat history should be valid. Most chat models expect that chat
659
- history starts with either (1) a `HumanMessage` or (2) a `SystemMessage` followed
660
- by a `HumanMessage`. To achieve this, set `start_on="human"`.
661
- In addition, generally a `ToolMessage` can only appear after an `AIMessage`
713
+ history starts with either (1) a ``HumanMessage`` or (2) a ``SystemMessage``
714
+ followed by a ``HumanMessage``. To achieve this, set ``start_on="human"``.
715
+ In addition, generally a ``ToolMessage`` can only appear after an ``AIMessage``
662
716
  that involved a tool call.
663
717
  Please see the following link for more information about messages:
664
718
  https://python.langchain.com/docs/concepts/#messages
665
719
  2. It includes recent messages and drops old messages in the chat history.
666
- To achieve this set the `strategy="last"`.
667
- 3. Usually, the new chat history should include the `SystemMessage` if it
668
- was present in the original chat history since the `SystemMessage` includes
669
- special instructions to the chat model. The `SystemMessage` is almost always
720
+ To achieve this set the ``strategy="last"``.
721
+ 3. Usually, the new chat history should include the ``SystemMessage`` if it
722
+ was present in the original chat history since the ``SystemMessage`` includes
723
+ special instructions to the chat model. The ``SystemMessage`` is almost always
670
724
  the first message in the history if present. To achieve this set the
671
- `include_system=True`.
725
+ ``include_system=True``.
672
726
 
673
- **Note** The examples below show how to configure `trim_messages` to achieve
674
- a behavior consistent with the above properties.
727
+ .. note::
728
+ The examples below show how to configure ``trim_messages`` to achieve a behavior
729
+ consistent with the above properties.
675
730
 
676
731
  Args:
677
732
  messages: Sequence of Message-like objects to trim.
@@ -687,9 +742,11 @@ def trim_messages(
687
742
  exact token counting is not necessary.
688
743
 
689
744
  strategy: Strategy for trimming.
745
+
690
746
  - "first": Keep the first <= n_count tokens of the messages.
691
747
  - "last": Keep the last <= n_count tokens of the messages.
692
- Default is "last".
748
+
749
+ Default is ``'last'``.
693
750
  allow_partial: Whether to split a message if only part of the message can be
694
751
  included. If ``strategy="last"`` then the last partial contents of a message
695
752
  are included. If ``strategy="first"`` then the first partial contents of a
@@ -747,14 +804,18 @@ def trim_messages(
747
804
  )
748
805
 
749
806
  messages = [
750
- SystemMessage("you're a good assistant, you always respond with a joke."),
807
+ SystemMessage(
808
+ "you're a good assistant, you always respond with a joke."
809
+ ),
751
810
  HumanMessage("i wonder why it's called langchain"),
752
811
  AIMessage(
753
- 'Well, I guess they thought "WordRope" and "SentenceString" just didn\'t have the same ring to it!'
812
+ 'Well, I guess they thought "WordRope" and "SentenceString" just '
813
+ "didn't have the same ring to it!"
754
814
  ),
755
815
  HumanMessage("and who is harrison chasing anyways"),
756
816
  AIMessage(
757
- "Hmmm let me think.\n\nWhy, he's probably chasing after the last cup of coffee in the office!"
817
+ "Hmmm let me think.\n\nWhy, he's probably chasing after the last "
818
+ "cup of coffee in the office!"
758
819
  ),
759
820
  HumanMessage("what do you call a speechless parrot"),
760
821
  ]
@@ -779,8 +840,10 @@ def trim_messages(
779
840
  .. code-block:: python
780
841
 
781
842
  [
782
- SystemMessage(content="you're a good assistant, you always respond with a joke."),
783
- HumanMessage(content='what do you call a speechless parrot'),
843
+ SystemMessage(
844
+ content="you're a good assistant, you always respond with a joke."
845
+ ),
846
+ HumanMessage(content="what do you call a speechless parrot"),
784
847
  ]
785
848
 
786
849
  Trim chat history based on the message count, keeping the SystemMessage if
@@ -810,10 +873,15 @@ def trim_messages(
810
873
  .. code-block:: python
811
874
 
812
875
  [
813
- SystemMessage(content="you're a good assistant, you always respond with a joke."),
814
- HumanMessage(content='and who is harrison chasing anyways'),
815
- AIMessage(content="Hmmm let me think.\n\nWhy, he's probably chasing after the last cup of coffee in the office!"),
816
- HumanMessage(content='what do you call a speechless parrot'),
876
+ SystemMessage(
877
+ content="you're a good assistant, you always respond with a joke."
878
+ ),
879
+ HumanMessage(content="and who is harrison chasing anyways"),
880
+ AIMessage(
881
+ content="Hmmm let me think.\n\nWhy, he's probably chasing after "
882
+ "the last cup of coffee in the office!"
883
+ ),
884
+ HumanMessage(content="what do you call a speechless parrot"),
817
885
  ]
818
886
 
819
887
 
@@ -824,7 +892,9 @@ def trim_messages(
824
892
 
825
893
  messages = [
826
894
  SystemMessage("This is a 4 token text. The full message is 10 tokens."),
827
- HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"),
895
+ HumanMessage(
896
+ "This is a 4 token text. The full message is 10 tokens.", id="first"
897
+ ),
828
898
  AIMessage(
829
899
  [
830
900
  {"type": "text", "text": "This is the FIRST 4 token block."},
@@ -832,10 +902,16 @@ def trim_messages(
832
902
  ],
833
903
  id="second",
834
904
  ),
835
- HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"),
836
- AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"),
905
+ HumanMessage(
906
+ "This is a 4 token text. The full message is 10 tokens.", id="third"
907
+ ),
908
+ AIMessage(
909
+ "This is a 4 token text. The full message is 10 tokens.",
910
+ id="fourth",
911
+ ),
837
912
  ]
838
913
 
914
+
839
915
  def dummy_token_counter(messages: list[BaseMessage]) -> int:
840
916
  # treat each message like it adds 3 default tokens at the beginning
841
917
  # of the message and at the end of the message. 3 + 4 + 3 = 10 tokens
@@ -848,9 +924,17 @@ def trim_messages(
848
924
  count = 0
849
925
  for msg in messages:
850
926
  if isinstance(msg.content, str):
851
- count += default_msg_prefix_len + default_content_len + default_msg_suffix_len
927
+ count += (
928
+ default_msg_prefix_len
929
+ + default_content_len
930
+ + default_msg_suffix_len
931
+ )
852
932
  if isinstance(msg.content, list):
853
- count += default_msg_prefix_len + len(msg.content) * default_content_len + default_msg_suffix_len
933
+ count += (
934
+ default_msg_prefix_len
935
+ + len(msg.content) * default_content_len
936
+ + default_msg_suffix_len
937
+ )
854
938
  return count
855
939
 
856
940
  First 30 tokens, allowing partial messages:
@@ -867,12 +951,20 @@ def trim_messages(
867
951
  .. code-block:: python
868
952
 
869
953
  [
870
- SystemMessage("This is a 4 token text. The full message is 10 tokens."),
871
- HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"),
872
- AIMessage( [{"type": "text", "text": "This is the FIRST 4 token block."}], id="second"),
954
+ SystemMessage(
955
+ "This is a 4 token text. The full message is 10 tokens."
956
+ ),
957
+ HumanMessage(
958
+ "This is a 4 token text. The full message is 10 tokens.",
959
+ id="first",
960
+ ),
961
+ AIMessage(
962
+ [{"type": "text", "text": "This is the FIRST 4 token block."}],
963
+ id="second",
964
+ ),
873
965
  ]
874
966
 
875
- """ # noqa: E501
967
+ """
876
968
  # Validate arguments
877
969
  if start_on and strategy == "first":
878
970
  msg = "start_on parameter is only valid with strategy='last'"
@@ -903,17 +995,12 @@ def trim_messages(
903
995
  )
904
996
  raise ValueError(msg)
905
997
 
906
- try:
907
- from langchain_text_splitters import TextSplitter
908
- except ImportError:
909
- text_splitter_fn: Optional[Callable] = cast("Optional[Callable]", text_splitter)
998
+ if _HAS_LANGCHAIN_TEXT_SPLITTERS and isinstance(text_splitter, TextSplitter):
999
+ text_splitter_fn = text_splitter.split_text
1000
+ elif text_splitter:
1001
+ text_splitter_fn = cast("Callable", text_splitter)
910
1002
  else:
911
- if isinstance(text_splitter, TextSplitter):
912
- text_splitter_fn = text_splitter.split_text
913
- else:
914
- text_splitter_fn = text_splitter
915
-
916
- text_splitter_fn = text_splitter_fn or _default_text_splitter
1003
+ text_splitter_fn = _default_text_splitter
917
1004
 
918
1005
  if strategy == "first":
919
1006
  return _first_max_tokens(
@@ -951,25 +1038,30 @@ def convert_to_openai_messages(
951
1038
  in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
952
1039
  text_format: How to format string or text block contents:
953
1040
 
954
- - "string":
955
- If a message has a string content, this is left as a string. If
956
- a message has content blocks that are all of type 'text', these are
957
- joined with a newline to make a single string. If a message has
958
- content blocks and at least one isn't of type 'text', then
959
- all blocks are left as dicts.
960
- - "block":
961
- If a message has a string content, this is turned into a list
962
- with a single content block of type 'text'. If a message has content
963
- blocks these are left as is.
1041
+ - ``'string'``:
1042
+ If a message has a string content, this is left as a string. If
1043
+ a message has content blocks that are all of type 'text', these are
1044
+ joined with a newline to make a single string. If a message has
1045
+ content blocks and at least one isn't of type 'text', then
1046
+ all blocks are left as dicts.
1047
+ - ``'block'``:
1048
+ If a message has a string content, this is turned into a list
1049
+ with a single content block of type 'text'. If a message has content
1050
+ blocks these are left as is.
1051
+
1052
+ Raises:
1053
+ ValueError: if an unrecognized ``text_format`` is specified, or if a message
1054
+ content block is missing expected keys.
964
1055
 
965
1056
  Returns:
966
1057
  The return type depends on the input type:
967
- - dict:
968
- If a single message-like object is passed in, a single OpenAI message
969
- dict is returned.
970
- - list[dict]:
971
- If a sequence of message-like objects are passed in, a list of OpenAI
972
- message dicts is returned.
1058
+
1059
+ - dict:
1060
+ If a single message-like object is passed in, a single OpenAI message
1061
+ dict is returned.
1062
+ - list[dict]:
1063
+ If a sequence of message-like objects are passed in, a list of OpenAI
1064
+ message dicts is returned.
973
1065
 
974
1066
  Example:
975
1067
 
@@ -984,8 +1076,27 @@ def convert_to_openai_messages(
984
1076
 
985
1077
  messages = [
986
1078
  SystemMessage([{"type": "text", "text": "foo"}]),
987
- {"role": "user", "content": [{"type": "text", "text": "whats in this"}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"}}]},
988
- AIMessage("", tool_calls=[{"name": "analyze", "args": {"baz": "buz"}, "id": "1", "type": "tool_call"}]),
1079
+ {
1080
+ "role": "user",
1081
+ "content": [
1082
+ {"type": "text", "text": "whats in this"},
1083
+ {
1084
+ "type": "image_url",
1085
+ "image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},
1086
+ },
1087
+ ],
1088
+ },
1089
+ AIMessage(
1090
+ "",
1091
+ tool_calls=[
1092
+ {
1093
+ "name": "analyze",
1094
+ "args": {"baz": "buz"},
1095
+ "id": "1",
1096
+ "type": "tool_call",
1097
+ }
1098
+ ],
1099
+ ),
989
1100
  ToolMessage("foobar", tool_call_id="1", name="bar"),
990
1101
  {"role": "assistant", "content": "thats nice"},
991
1102
  ]
@@ -1580,26 +1691,26 @@ def count_tokens_approximately(
1580
1691
  chars_per_token: Number of characters per token to use for the approximation.
1581
1692
  Default is 4 (one token corresponds to ~4 chars for common English text).
1582
1693
  You can also specify float values for more fine-grained control.
1583
- See more here: https://platform.openai.com/tokenizer
1694
+ `See more here. <https://platform.openai.com/tokenizer>`__
1584
1695
  extra_tokens_per_message: Number of extra tokens to add per message.
1585
1696
  Default is 3 (special tokens, including beginning/end of message).
1586
1697
  You can also specify float values for more fine-grained control.
1587
- See more here:
1588
- https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
1698
+ `See more here. <https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb>`__
1589
1699
  count_name: Whether to include message names in the count.
1590
1700
  Enabled by default.
1591
1701
 
1592
1702
  Returns:
1593
1703
  Approximate number of tokens in the messages.
1594
1704
 
1595
- Note:
1596
- This is a simple approximation that may not match the exact token count
1597
- used by specific models. For accurate counts, use model-specific tokenizers.
1705
+ .. note::
1706
+ This is a simple approximation that may not match the exact token count used by
1707
+ specific models. For accurate counts, use model-specific tokenizers.
1598
1708
 
1599
1709
  Warning:
1600
1710
  This function does not currently support counting image tokens.
1601
1711
 
1602
1712
  .. versionadded:: 0.3.46
1713
+
1603
1714
  """
1604
1715
  token_count = 0.0
1605
1716
  for message in convert_to_messages(messages):
@@ -144,7 +144,10 @@ class BaseOutputParser(
144
144
 
145
145
  def parse(self, text: str) -> bool:
146
146
  cleaned_text = text.strip().upper()
147
- if cleaned_text not in (self.true_val.upper(), self.false_val.upper()):
147
+ if cleaned_text not in (
148
+ self.true_val.upper(),
149
+ self.false_val.upper(),
150
+ ):
148
151
  raise OutputParserException(
149
152
  f"BooleanOutputParser expected output value to either be "
150
153
  f"{self.true_val} or {self.false_val} (case-insensitive). "
@@ -156,7 +159,7 @@ class BaseOutputParser(
156
159
  def _type(self) -> str:
157
160
  return "boolean_output_parser"
158
161
 
159
- """ # noqa: E501
162
+ """
160
163
 
161
164
  @property
162
165
  @override
@@ -46,13 +46,13 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
46
46
  def _diff(self, prev: Optional[Any], next: Any) -> Any:
47
47
  return jsonpatch.make_patch(prev, next).patch
48
48
 
49
- def _get_schema(self, pydantic_object: type[TBaseModel]) -> dict[str, Any]:
49
+ @staticmethod
50
+ def _get_schema(pydantic_object: type[TBaseModel]) -> dict[str, Any]:
50
51
  if issubclass(pydantic_object, pydantic.BaseModel):
51
52
  return pydantic_object.model_json_schema()
52
- if issubclass(pydantic_object, pydantic.v1.BaseModel):
53
- return pydantic_object.schema()
54
- return None
53
+ return pydantic_object.schema()
55
54
 
55
+ @override
56
56
  def parse_result(self, result: list[Generation], *, partial: bool = False) -> Any:
57
57
  """Parse the result of an LLM call to a JSON object.
58
58
 
@@ -143,10 +143,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
143
143
 
144
144
  @classmethod
145
145
  def is_lc_serializable(cls) -> bool:
146
- """Check if the langchain object is serializable.
147
-
148
- Returns True.
149
- """
146
+ """Return True as this class is serializable."""
150
147
  return True
151
148
 
152
149
  @classmethod
@@ -154,11 +151,11 @@ class CommaSeparatedListOutputParser(ListOutputParser):
154
151
  """Get the namespace of the langchain object.
155
152
 
156
153
  Returns:
157
- A list of strings.
158
- Default is ["langchain", "output_parsers", "list"].
154
+ ``["langchain", "output_parsers", "list"]``
159
155
  """
160
156
  return ["langchain", "output_parsers", "list"]
161
157
 
158
+ @override
162
159
  def get_format_instructions(self) -> str:
163
160
  """Return the format instructions for the comma-separated list output."""
164
161
  return (
@@ -166,6 +163,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
166
163
  "eg: `foo, bar, baz` or `foo,bar,baz`"
167
164
  )
168
165
 
166
+ @override
169
167
  def parse(self, text: str) -> list[str]:
170
168
  """Parse the output of an LLM call.
171
169
 
@@ -213,15 +211,8 @@ class NumberedListOutputParser(ListOutputParser):
213
211
  """
214
212
  return re.findall(self.pattern, text)
215
213
 
214
+ @override
216
215
  def parse_iter(self, text: str) -> Iterator[re.Match]:
217
- """Parse the output of an LLM call.
218
-
219
- Args:
220
- text: The output of an LLM call.
221
-
222
- Yields:
223
- A match object for each part of the output.
224
- """
225
216
  return re.finditer(self.pattern, text)
226
217
 
227
218
  @property
@@ -235,6 +226,7 @@ class MarkdownListOutputParser(ListOutputParser):
235
226
  pattern: str = r"^\s*[-*]\s([^\n]+)$"
236
227
  """The pattern to match a Markdown list item."""
237
228
 
229
+ @override
238
230
  def get_format_instructions(self) -> str:
239
231
  """Return the format instructions for the Markdown list output."""
240
232
  return "Your response should be a markdown list, eg: `- foo\n- bar\n- baz`"
@@ -250,15 +242,8 @@ class MarkdownListOutputParser(ListOutputParser):
250
242
  """
251
243
  return re.findall(self.pattern, text, re.MULTILINE)
252
244
 
245
+ @override
253
246
  def parse_iter(self, text: str) -> Iterator[re.Match]:
254
- """Parse the output of an LLM call.
255
-
256
- Args:
257
- text: The output of an LLM call.
258
-
259
- Yields:
260
- A match object for each part of the output.
261
- """
262
247
  return re.finditer(self.pattern, text, re.MULTILINE)
263
248
 
264
249
  @property
@@ -261,6 +261,9 @@ class PydanticOutputFunctionsParser(OutputFunctionsParser):
261
261
  result: The result of the LLM call.
262
262
  partial: Whether to parse partial JSON objects. Default is False.
263
263
 
264
+ Raises:
265
+ ValueError: If the pydantic schema is not valid.
266
+
264
267
  Returns:
265
268
  The parsed JSON object.
266
269
  """