langchain-core 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (60) hide show
  1. langchain_core/agents.py +2 -4
  2. langchain_core/caches.py +13 -6
  3. langchain_core/chat_history.py +5 -5
  4. langchain_core/document_loaders/base.py +6 -4
  5. langchain_core/document_loaders/blob_loaders.py +1 -1
  6. langchain_core/document_loaders/langsmith.py +9 -10
  7. langchain_core/documents/__init__.py +24 -3
  8. langchain_core/documents/base.py +72 -59
  9. langchain_core/documents/compressor.py +6 -6
  10. langchain_core/documents/transformers.py +2 -2
  11. langchain_core/embeddings/fake.py +2 -2
  12. langchain_core/example_selectors/semantic_similarity.py +7 -7
  13. langchain_core/exceptions.py +2 -2
  14. langchain_core/indexing/__init__.py +1 -1
  15. langchain_core/indexing/api.py +62 -62
  16. langchain_core/indexing/base.py +16 -16
  17. langchain_core/indexing/in_memory.py +2 -2
  18. langchain_core/language_models/__init__.py +6 -5
  19. langchain_core/language_models/base.py +2 -2
  20. langchain_core/language_models/fake_chat_models.py +1 -1
  21. langchain_core/language_models/llms.py +4 -6
  22. langchain_core/load/dump.py +1 -1
  23. langchain_core/load/serializable.py +4 -1
  24. langchain_core/messages/__init__.py +9 -0
  25. langchain_core/messages/ai.py +11 -7
  26. langchain_core/messages/base.py +4 -0
  27. langchain_core/messages/block_translators/google_genai.py +4 -2
  28. langchain_core/messages/content.py +4 -4
  29. langchain_core/messages/utils.py +13 -13
  30. langchain_core/output_parsers/__init__.py +17 -1
  31. langchain_core/output_parsers/base.py +3 -0
  32. langchain_core/output_parsers/format_instructions.py +9 -4
  33. langchain_core/output_parsers/json.py +5 -2
  34. langchain_core/output_parsers/list.py +16 -16
  35. langchain_core/output_parsers/openai_tools.py +2 -2
  36. langchain_core/output_parsers/pydantic.py +1 -1
  37. langchain_core/output_parsers/string.py +3 -3
  38. langchain_core/output_parsers/xml.py +28 -25
  39. langchain_core/outputs/generation.py +2 -3
  40. langchain_core/prompt_values.py +0 -6
  41. langchain_core/prompts/base.py +5 -3
  42. langchain_core/prompts/chat.py +60 -52
  43. langchain_core/prompts/structured.py +12 -8
  44. langchain_core/retrievers.py +41 -37
  45. langchain_core/runnables/base.py +14 -14
  46. langchain_core/runnables/configurable.py +3 -3
  47. langchain_core/runnables/graph.py +7 -3
  48. langchain_core/tools/base.py +66 -12
  49. langchain_core/tools/convert.py +8 -5
  50. langchain_core/tools/retriever.py +6 -5
  51. langchain_core/tools/structured.py +7 -5
  52. langchain_core/tracers/log_stream.py +2 -2
  53. langchain_core/utils/strings.py +1 -4
  54. langchain_core/utils/utils.py +12 -5
  55. langchain_core/vectorstores/base.py +73 -69
  56. langchain_core/vectorstores/in_memory.py +2 -2
  57. langchain_core/version.py +1 -1
  58. {langchain_core-1.0.1.dist-info → langchain_core-1.0.2.dist-info}/METADATA +1 -1
  59. {langchain_core-1.0.1.dist-info → langchain_core-1.0.2.dist-info}/RECORD +60 -60
  60. {langchain_core-1.0.1.dist-info → langchain_core-1.0.2.dist-info}/WHEEL +0 -0
@@ -304,42 +304,42 @@ def index(
304
304
  !!! warning
305
305
 
306
306
  * In full mode, the loader should be returning
307
- the entire dataset, and not just a subset of the dataset.
308
- Otherwise, the auto_cleanup will remove documents that it is not
309
- supposed to.
307
+ the entire dataset, and not just a subset of the dataset.
308
+ Otherwise, the auto_cleanup will remove documents that it is not
309
+ supposed to.
310
310
  * In incremental mode, if documents associated with a particular
311
- source id appear across different batches, the indexing API
312
- will do some redundant work. This will still result in the
313
- correct end state of the index, but will unfortunately not be
314
- 100% efficient. For example, if a given document is split into 15
315
- chunks, and we index them using a batch size of 5, we'll have 3 batches
316
- all with the same source id. In general, to avoid doing too much
317
- redundant work select as big a batch size as possible.
311
+ source id appear across different batches, the indexing API
312
+ will do some redundant work. This will still result in the
313
+ correct end state of the index, but will unfortunately not be
314
+ 100% efficient. For example, if a given document is split into 15
315
+ chunks, and we index them using a batch size of 5, we'll have 3 batches
316
+ all with the same source id. In general, to avoid doing too much
317
+ redundant work select as big a batch size as possible.
318
318
  * The `scoped_full` mode is suitable if determining an appropriate batch size
319
- is challenging or if your data loader cannot return the entire dataset at
320
- once. This mode keeps track of source IDs in memory, which should be fine
321
- for most use cases. If your dataset is large (10M+ docs), you will likely
322
- need to parallelize the indexing process regardless.
319
+ is challenging or if your data loader cannot return the entire dataset at
320
+ once. This mode keeps track of source IDs in memory, which should be fine
321
+ for most use cases. If your dataset is large (10M+ docs), you will likely
322
+ need to parallelize the indexing process regardless.
323
323
 
324
324
  Args:
325
325
  docs_source: Data loader or iterable of documents to index.
326
326
  record_manager: Timestamped set to keep track of which documents were
327
327
  updated.
328
- vector_store: VectorStore or DocumentIndex to index the documents into.
328
+ vector_store: `VectorStore` or DocumentIndex to index the documents into.
329
329
  batch_size: Batch size to use when indexing.
330
330
  cleanup: How to handle clean up of documents.
331
331
 
332
332
  - incremental: Cleans up all documents that haven't been updated AND
333
- that are associated with source ids that were seen during indexing.
334
- Clean up is done continuously during indexing helping to minimize the
335
- probability of users seeing duplicated content.
333
+ that are associated with source IDs that were seen during indexing.
334
+ Clean up is done continuously during indexing helping to minimize the
335
+ probability of users seeing duplicated content.
336
336
  - full: Delete all documents that have not been returned by the loader
337
- during this run of indexing.
338
- Clean up runs after all documents have been indexed.
339
- This means that users may see duplicated content during indexing.
337
+ during this run of indexing.
338
+ Clean up runs after all documents have been indexed.
339
+ This means that users may see duplicated content during indexing.
340
340
  - scoped_full: Similar to Full, but only deletes all documents
341
- that haven't been updated AND that are associated with
342
- source ids that were seen during indexing.
341
+ that haven't been updated AND that are associated with
342
+ source IDs that were seen during indexing.
343
343
  - None: Do not delete any documents.
344
344
  source_id_key: Optional key that helps identify the original source
345
345
  of the document.
@@ -363,7 +363,7 @@ def index(
363
363
  When changing the key encoder, you must change the
364
364
  index as well to avoid duplicated documents in the cache.
365
365
  upsert_kwargs: Additional keyword arguments to pass to the add_documents
366
- method of the VectorStore or the upsert method of the DocumentIndex.
366
+ method of the `VectorStore` or the upsert method of the DocumentIndex.
367
367
  For example, you can use this to specify a custom vector_field:
368
368
  upsert_kwargs={"vector_field": "embedding"}
369
369
  !!! version-added "Added in version 0.3.10"
@@ -375,10 +375,10 @@ def index(
375
375
  Raises:
376
376
  ValueError: If cleanup mode is not one of 'incremental', 'full' or None
377
377
  ValueError: If cleanup mode is incremental and source_id_key is None.
378
- ValueError: If vectorstore does not have
378
+ ValueError: If `VectorStore` does not have
379
379
  "delete" and "add_documents" required methods.
380
380
  ValueError: If source_id_key is not None, but is not a string or callable.
381
- TypeError: If `vectorstore` is not a VectorStore or a DocumentIndex.
381
+ TypeError: If `vectorstore` is not a `VectorStore` or a DocumentIndex.
382
382
  AssertionError: If `source_id` is None when cleanup mode is incremental.
383
383
  (should be unreachable code).
384
384
  """
@@ -415,7 +415,7 @@ def index(
415
415
  raise ValueError(msg)
416
416
 
417
417
  if type(destination).delete == VectorStore.delete:
418
- # Checking if the vectorstore has overridden the default delete method
418
+ # Checking if the VectorStore has overridden the default delete method
419
419
  # implementation which just raises a NotImplementedError
420
420
  msg = "Vectorstore has not implemented the delete method"
421
421
  raise ValueError(msg)
@@ -466,11 +466,11 @@ def index(
466
466
  ]
467
467
 
468
468
  if cleanup in {"incremental", "scoped_full"}:
469
- # source ids are required.
469
+ # Source IDs are required.
470
470
  for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
471
471
  if source_id is None:
472
472
  msg = (
473
- f"Source ids are required when cleanup mode is "
473
+ f"Source IDs are required when cleanup mode is "
474
474
  f"incremental or scoped_full. "
475
475
  f"Document that starts with "
476
476
  f"content: {hashed_doc.page_content[:100]} "
@@ -479,7 +479,7 @@ def index(
479
479
  raise ValueError(msg)
480
480
  if cleanup == "scoped_full":
481
481
  scoped_full_cleanup_source_ids.add(source_id)
482
- # source ids cannot be None after for loop above.
482
+ # Source IDs cannot be None after for loop above.
483
483
  source_ids = cast("Sequence[str]", source_ids)
484
484
 
485
485
  exists_batch = record_manager.exists(
@@ -538,7 +538,7 @@ def index(
538
538
  # If source IDs are provided, we can do the deletion incrementally!
539
539
  if cleanup == "incremental":
540
540
  # Get the uids of the documents that were not returned by the loader.
541
- # mypy isn't good enough to determine that source ids cannot be None
541
+ # mypy isn't good enough to determine that source IDs cannot be None
542
542
  # here due to a check that's happening above, so we check again.
543
543
  for source_id in source_ids:
544
544
  if source_id is None:
@@ -642,42 +642,42 @@ async def aindex(
642
642
  !!! warning
643
643
 
644
644
  * In full mode, the loader should be returning
645
- the entire dataset, and not just a subset of the dataset.
646
- Otherwise, the auto_cleanup will remove documents that it is not
647
- supposed to.
645
+ the entire dataset, and not just a subset of the dataset.
646
+ Otherwise, the auto_cleanup will remove documents that it is not
647
+ supposed to.
648
648
  * In incremental mode, if documents associated with a particular
649
- source id appear across different batches, the indexing API
650
- will do some redundant work. This will still result in the
651
- correct end state of the index, but will unfortunately not be
652
- 100% efficient. For example, if a given document is split into 15
653
- chunks, and we index them using a batch size of 5, we'll have 3 batches
654
- all with the same source id. In general, to avoid doing too much
655
- redundant work select as big a batch size as possible.
649
+ source id appear across different batches, the indexing API
650
+ will do some redundant work. This will still result in the
651
+ correct end state of the index, but will unfortunately not be
652
+ 100% efficient. For example, if a given document is split into 15
653
+ chunks, and we index them using a batch size of 5, we'll have 3 batches
654
+ all with the same source id. In general, to avoid doing too much
655
+ redundant work select as big a batch size as possible.
656
656
  * The `scoped_full` mode is suitable if determining an appropriate batch size
657
- is challenging or if your data loader cannot return the entire dataset at
658
- once. This mode keeps track of source IDs in memory, which should be fine
659
- for most use cases. If your dataset is large (10M+ docs), you will likely
660
- need to parallelize the indexing process regardless.
657
+ is challenging or if your data loader cannot return the entire dataset at
658
+ once. This mode keeps track of source IDs in memory, which should be fine
659
+ for most use cases. If your dataset is large (10M+ docs), you will likely
660
+ need to parallelize the indexing process regardless.
661
661
 
662
662
  Args:
663
663
  docs_source: Data loader or iterable of documents to index.
664
664
  record_manager: Timestamped set to keep track of which documents were
665
665
  updated.
666
- vector_store: VectorStore or DocumentIndex to index the documents into.
666
+ vector_store: `VectorStore` or DocumentIndex to index the documents into.
667
667
  batch_size: Batch size to use when indexing.
668
668
  cleanup: How to handle clean up of documents.
669
669
 
670
670
  - incremental: Cleans up all documents that haven't been updated AND
671
- that are associated with source ids that were seen during indexing.
672
- Clean up is done continuously during indexing helping to minimize the
673
- probability of users seeing duplicated content.
671
+ that are associated with source IDs that were seen during indexing.
672
+ Clean up is done continuously during indexing helping to minimize the
673
+ probability of users seeing duplicated content.
674
674
  - full: Delete all documents that have not been returned by the loader
675
- during this run of indexing.
676
- Clean up runs after all documents have been indexed.
677
- This means that users may see duplicated content during indexing.
675
+ during this run of indexing.
676
+ Clean up runs after all documents have been indexed.
677
+ This means that users may see duplicated content during indexing.
678
678
  - scoped_full: Similar to Full, but only deletes all documents
679
- that haven't been updated AND that are associated with
680
- source ids that were seen during indexing.
679
+ that haven't been updated AND that are associated with
680
+ source IDs that were seen during indexing.
681
681
  - None: Do not delete any documents.
682
682
  source_id_key: Optional key that helps identify the original source
683
683
  of the document.
@@ -701,7 +701,7 @@ async def aindex(
701
701
  When changing the key encoder, you must change the
702
702
  index as well to avoid duplicated documents in the cache.
703
703
  upsert_kwargs: Additional keyword arguments to pass to the add_documents
704
- method of the VectorStore or the upsert method of the DocumentIndex.
704
+ method of the `VectorStore` or the upsert method of the DocumentIndex.
705
705
  For example, you can use this to specify a custom vector_field:
706
706
  upsert_kwargs={"vector_field": "embedding"}
707
707
  !!! version-added "Added in version 0.3.10"
@@ -713,10 +713,10 @@ async def aindex(
713
713
  Raises:
714
714
  ValueError: If cleanup mode is not one of 'incremental', 'full' or None
715
715
  ValueError: If cleanup mode is incremental and source_id_key is None.
716
- ValueError: If vectorstore does not have
716
+ ValueError: If `VectorStore` does not have
717
717
  "adelete" and "aadd_documents" required methods.
718
718
  ValueError: If source_id_key is not None, but is not a string or callable.
719
- TypeError: If `vector_store` is not a VectorStore or DocumentIndex.
719
+ TypeError: If `vector_store` is not a `VectorStore` or DocumentIndex.
720
720
  AssertionError: If `source_id_key` is None when cleanup mode is
721
721
  incremental or `scoped_full` (should be unreachable).
722
722
  """
@@ -757,7 +757,7 @@ async def aindex(
757
757
  type(destination).adelete == VectorStore.adelete
758
758
  and type(destination).delete == VectorStore.delete
759
759
  ):
760
- # Checking if the vectorstore has overridden the default adelete or delete
760
+ # Checking if the VectorStore has overridden the default adelete or delete
761
761
  # methods implementation which just raises a NotImplementedError
762
762
  msg = "Vectorstore has not implemented the adelete or delete method"
763
763
  raise ValueError(msg)
@@ -815,11 +815,11 @@ async def aindex(
815
815
  ]
816
816
 
817
817
  if cleanup in {"incremental", "scoped_full"}:
818
- # If the cleanup mode is incremental, source ids are required.
818
+ # If the cleanup mode is incremental, source IDs are required.
819
819
  for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
820
820
  if source_id is None:
821
821
  msg = (
822
- f"Source ids are required when cleanup mode is "
822
+ f"Source IDs are required when cleanup mode is "
823
823
  f"incremental or scoped_full. "
824
824
  f"Document that starts with "
825
825
  f"content: {hashed_doc.page_content[:100]} "
@@ -828,7 +828,7 @@ async def aindex(
828
828
  raise ValueError(msg)
829
829
  if cleanup == "scoped_full":
830
830
  scoped_full_cleanup_source_ids.add(source_id)
831
- # source ids cannot be None after for loop above.
831
+ # Source IDs cannot be None after for loop above.
832
832
  source_ids = cast("Sequence[str]", source_ids)
833
833
 
834
834
  exists_batch = await record_manager.aexists(
@@ -888,7 +888,7 @@ async def aindex(
888
888
  if cleanup == "incremental":
889
889
  # Get the uids of the documents that were not returned by the loader.
890
890
 
891
- # mypy isn't good enough to determine that source ids cannot be None
891
+ # mypy isn't good enough to determine that source IDs cannot be None
892
892
  # here due to a check that's happening above, so we check again.
893
893
  for source_id in source_ids:
894
894
  if source_id is None:
@@ -25,7 +25,7 @@ class RecordManager(ABC):
25
25
  The record manager abstraction is used by the langchain indexing API.
26
26
 
27
27
  The record manager keeps track of which documents have been
28
- written into a vectorstore and when they were written.
28
+ written into a `VectorStore` and when they were written.
29
29
 
30
30
  The indexing API computes hashes for each document and stores the hash
31
31
  together with the write time and the source id in the record manager.
@@ -37,7 +37,7 @@ class RecordManager(ABC):
37
37
  already been indexed, and to only index new documents.
38
38
 
39
39
  The main benefit of this abstraction is that it works across many vectorstores.
40
- To be supported, a vectorstore needs to only support the ability to add and
40
+ To be supported, a `VectorStore` needs to only support the ability to add and
41
41
  delete documents by ID. Using the record manager, the indexing API will
42
42
  be able to delete outdated documents and avoid redundant indexing of documents
43
43
  that have already been indexed.
@@ -45,13 +45,13 @@ class RecordManager(ABC):
45
45
  The main constraints of this abstraction are:
46
46
 
47
47
  1. It relies on the time-stamps to determine which documents have been
48
- indexed and which have not. This means that the time-stamps must be
49
- monotonically increasing. The timestamp should be the timestamp
50
- as measured by the server to minimize issues.
48
+ indexed and which have not. This means that the time-stamps must be
49
+ monotonically increasing. The timestamp should be the timestamp
50
+ as measured by the server to minimize issues.
51
51
  2. The record manager is currently implemented separately from the
52
- vectorstore, which means that the overall system becomes distributed
53
- and may create issues with consistency. For example, writing to
54
- record manager succeeds, but corresponding writing to vectorstore fails.
52
+ vectorstore, which means that the overall system becomes distributed
53
+ and may create issues with consistency. For example, writing to
54
+ record manager succeeds, but corresponding writing to `VectorStore` fails.
55
55
  """
56
56
 
57
57
  def __init__(
@@ -460,7 +460,7 @@ class UpsertResponse(TypedDict):
460
460
  class DeleteResponse(TypedDict, total=False):
461
461
  """A generic response for delete operation.
462
462
 
463
- The fields in this response are optional and whether the vectorstore
463
+ The fields in this response are optional and whether the `VectorStore`
464
464
  returns them or not is up to the implementation.
465
465
  """
466
466
 
@@ -518,7 +518,7 @@ class DocumentIndex(BaseRetriever):
518
518
  if it is provided. If the ID is not provided, the upsert method is free
519
519
  to generate an ID for the content.
520
520
 
521
- When an ID is specified and the content already exists in the vectorstore,
521
+ When an ID is specified and the content already exists in the `VectorStore`,
522
522
  the upsert method should update the content with the new data. If the content
523
523
  does not exist, the upsert method should add the item to the `VectorStore`.
524
524
 
@@ -528,20 +528,20 @@ class DocumentIndex(BaseRetriever):
528
528
 
529
529
  Returns:
530
530
  A response object that contains the list of IDs that were
531
- successfully added or updated in the vectorstore and the list of IDs that
531
+ successfully added or updated in the `VectorStore` and the list of IDs that
532
532
  failed to be added or updated.
533
533
  """
534
534
 
535
535
  async def aupsert(
536
536
  self, items: Sequence[Document], /, **kwargs: Any
537
537
  ) -> UpsertResponse:
538
- """Add or update documents in the vectorstore. Async version of upsert.
538
+ """Add or update documents in the `VectorStore`. Async version of `upsert`.
539
539
 
540
540
  The upsert functionality should utilize the ID field of the item
541
541
  if it is provided. If the ID is not provided, the upsert method is free
542
542
  to generate an ID for the item.
543
543
 
544
- When an ID is specified and the item already exists in the vectorstore,
544
+ When an ID is specified and the item already exists in the `VectorStore`,
545
545
  the upsert method should update the item with the new data. If the item
546
546
  does not exist, the upsert method should add the item to the `VectorStore`.
547
547
 
@@ -551,7 +551,7 @@ class DocumentIndex(BaseRetriever):
551
551
 
552
552
  Returns:
553
553
  A response object that contains the list of IDs that were
554
- successfully added or updated in the vectorstore and the list of IDs that
554
+ successfully added or updated in the `VectorStore` and the list of IDs that
555
555
  failed to be added or updated.
556
556
  """
557
557
  return await run_in_executor(
@@ -568,7 +568,7 @@ class DocumentIndex(BaseRetriever):
568
568
  Calling delete without any input parameters should raise a ValueError!
569
569
 
570
570
  Args:
571
- ids: List of ids to delete.
571
+ ids: List of IDs to delete.
572
572
  **kwargs: Additional keyword arguments. This is up to the implementation.
573
573
  For example, can include an option to delete the entire index,
574
574
  or else issue a non-blocking delete etc.
@@ -586,7 +586,7 @@ class DocumentIndex(BaseRetriever):
586
586
  Calling adelete without any input parameters should raise a ValueError!
587
587
 
588
588
  Args:
589
- ids: List of ids to delete.
589
+ ids: List of IDs to delete.
590
590
  **kwargs: Additional keyword arguments. This is up to the implementation.
591
591
  For example, can include an option to delete the entire index.
592
592
 
@@ -62,10 +62,10 @@ class InMemoryDocumentIndex(DocumentIndex):
62
62
  """Delete by IDs.
63
63
 
64
64
  Args:
65
- ids: List of ids to delete.
65
+ ids: List of IDs to delete.
66
66
 
67
67
  Raises:
68
- ValueError: If ids is None.
68
+ ValueError: If IDs is None.
69
69
 
70
70
  Returns:
71
71
  A response object that contains the list of IDs that were successfully
@@ -6,12 +6,13 @@ LangChain has two main classes to work with language models: chat models and
6
6
  **Chat models**
7
7
 
8
8
  Language models that use a sequence of messages as inputs and return chat messages
9
- as outputs (as opposed to using plain text). Chat models support the assignment of
10
- distinct roles to conversation messages, helping to distinguish messages from the AI,
11
- users, and instructions such as system messages.
9
+ as outputs (as opposed to using plain text).
12
10
 
13
- The key abstraction for chat models is `BaseChatModel`. Implementations
14
- should inherit from this class.
11
+ Chat models support the assignment of distinct roles to conversation messages, helping
12
+ to distinguish messages from the AI, users, and instructions such as system messages.
13
+
14
+ The key abstraction for chat models is `BaseChatModel`. Implementations should inherit
15
+ from this class.
15
16
 
16
17
  See existing [chat model integrations](https://docs.langchain.com/oss/python/integrations/chat).
17
18
 
@@ -262,13 +262,13 @@ class BaseLanguageModel(
262
262
  return self.lc_attributes
263
263
 
264
264
  def get_token_ids(self, text: str) -> list[int]:
265
- """Return the ordered ids of the tokens in a text.
265
+ """Return the ordered IDs of the tokens in a text.
266
266
 
267
267
  Args:
268
268
  text: The string input to tokenize.
269
269
 
270
270
  Returns:
271
- A list of ids corresponding to the tokens in the text, in order they occur
271
+ A list of IDs corresponding to the tokens in the text, in order they occur
272
272
  in the text.
273
273
  """
274
274
  if self.custom_get_token_ids is not None:
@@ -1,4 +1,4 @@
1
- """Fake chat model for testing purposes."""
1
+ """Fake chat models for testing purposes."""
2
2
 
3
3
  import asyncio
4
4
  import re
@@ -1,4 +1,7 @@
1
- """Base interface for large language models to expose."""
1
+ """Base interface for traditional large language models (LLMs) to expose.
2
+
3
+ These are traditionally older models (newer models generally are chat models).
4
+ """
2
5
 
3
6
  from __future__ import annotations
4
7
 
@@ -1391,11 +1394,6 @@ class LLM(BaseLLM):
1391
1394
  `astream` will use `_astream` if provided, otherwise it will implement
1392
1395
  a fallback behavior that will use `_stream` if `_stream` is implemented,
1393
1396
  and use `_acall` if `_stream` is not implemented.
1394
-
1395
- Please see the following guide for more information on how to
1396
- implement a custom LLM:
1397
-
1398
- https://python.langchain.com/docs/how_to/custom_llm/
1399
1397
  """
1400
1398
 
1401
1399
  @abstractmethod
@@ -17,7 +17,7 @@ def default(obj: Any) -> Any:
17
17
  obj: The object to serialize to json if it is a Serializable object.
18
18
 
19
19
  Returns:
20
- A json serializable object or a SerializedNotImplemented object.
20
+ A JSON serializable object or a SerializedNotImplemented object.
21
21
  """
22
22
  if isinstance(obj, Serializable):
23
23
  return obj.to_json()
@@ -97,11 +97,14 @@ class Serializable(BaseModel, ABC):
97
97
  by default. This is to prevent accidental serialization of objects that should
98
98
  not be serialized.
99
99
  - `get_lc_namespace`: Get the namespace of the LangChain object.
100
+
100
101
  During deserialization, this namespace is used to identify
101
102
  the correct class to instantiate.
103
+
102
104
  Please see the `Reviver` class in `langchain_core.load.load` for more details.
103
105
  During deserialization an additional mapping is handle classes that have moved
104
106
  or been renamed across package versions.
107
+
105
108
  - `lc_secrets`: A map of constructor argument names to secret ids.
106
109
  - `lc_attributes`: List of additional attribute names that should be included
107
110
  as part of the serialized representation.
@@ -194,7 +197,7 @@ class Serializable(BaseModel, ABC):
194
197
  ValueError: If the class has deprecated attributes.
195
198
 
196
199
  Returns:
197
- A json serializable object or a `SerializedNotImplemented` object.
200
+ A JSON serializable object or a `SerializedNotImplemented` object.
198
201
  """
199
202
  if not self.is_lc_serializable():
200
203
  return self.to_json_not_implemented()
@@ -9,6 +9,9 @@ if TYPE_CHECKING:
9
9
  from langchain_core.messages.ai import (
10
10
  AIMessage,
11
11
  AIMessageChunk,
12
+ InputTokenDetails,
13
+ OutputTokenDetails,
14
+ UsageMetadata,
12
15
  )
13
16
  from langchain_core.messages.base import (
14
17
  BaseMessage,
@@ -87,10 +90,12 @@ __all__ = (
87
90
  "HumanMessage",
88
91
  "HumanMessageChunk",
89
92
  "ImageContentBlock",
93
+ "InputTokenDetails",
90
94
  "InvalidToolCall",
91
95
  "MessageLikeRepresentation",
92
96
  "NonStandardAnnotation",
93
97
  "NonStandardContentBlock",
98
+ "OutputTokenDetails",
94
99
  "PlainTextContentBlock",
95
100
  "ReasoningContentBlock",
96
101
  "RemoveMessage",
@@ -104,6 +109,7 @@ __all__ = (
104
109
  "ToolCallChunk",
105
110
  "ToolMessage",
106
111
  "ToolMessageChunk",
112
+ "UsageMetadata",
107
113
  "VideoContentBlock",
108
114
  "_message_from_dict",
109
115
  "convert_to_messages",
@@ -145,6 +151,7 @@ _dynamic_imports = {
145
151
  "HumanMessageChunk": "human",
146
152
  "NonStandardAnnotation": "content",
147
153
  "NonStandardContentBlock": "content",
154
+ "OutputTokenDetails": "ai",
148
155
  "PlainTextContentBlock": "content",
149
156
  "ReasoningContentBlock": "content",
150
157
  "RemoveMessage": "modifier",
@@ -154,12 +161,14 @@ _dynamic_imports = {
154
161
  "SystemMessage": "system",
155
162
  "SystemMessageChunk": "system",
156
163
  "ImageContentBlock": "content",
164
+ "InputTokenDetails": "ai",
157
165
  "InvalidToolCall": "tool",
158
166
  "TextContentBlock": "content",
159
167
  "ToolCall": "tool",
160
168
  "ToolCallChunk": "tool",
161
169
  "ToolMessage": "tool",
162
170
  "ToolMessageChunk": "tool",
171
+ "UsageMetadata": "ai",
163
172
  "VideoContentBlock": "content",
164
173
  "AnyMessage": "utils",
165
174
  "MessageLikeRepresentation": "utils",
@@ -48,10 +48,10 @@ class InputTokenDetails(TypedDict, total=False):
48
48
  }
49
49
  ```
50
50
 
51
- !!! version-added "Added in version 0.3.9"
52
-
53
51
  May also hold extra provider-specific keys.
54
52
 
53
+ !!! version-added "Added in version 0.3.9"
54
+
55
55
  """
56
56
 
57
57
  audio: int
@@ -83,6 +83,8 @@ class OutputTokenDetails(TypedDict, total=False):
83
83
  }
84
84
  ```
85
85
 
86
+ May also hold extra provider-specific keys.
87
+
86
88
  !!! version-added "Added in version 0.3.9"
87
89
 
88
90
  """
@@ -124,6 +126,10 @@ class UsageMetadata(TypedDict):
124
126
  !!! warning "Behavior changed in 0.3.9"
125
127
  Added `input_token_details` and `output_token_details`.
126
128
 
129
+ !!! note "LangSmith SDK"
130
+ The LangSmith SDK also has a `UsageMetadata` class. While the two share fields,
131
+ LangSmith's `UsageMetadata` has additional fields to capture cost information
132
+ used by the LangSmith platform.
127
133
  """
128
134
 
129
135
  input_tokens: int
@@ -131,7 +137,7 @@ class UsageMetadata(TypedDict):
131
137
  output_tokens: int
132
138
  """Count of output (or completion) tokens. Sum of all output token types."""
133
139
  total_tokens: int
134
- """Total token count. Sum of input_tokens + output_tokens."""
140
+ """Total token count. Sum of `input_tokens` + `output_tokens`."""
135
141
  input_token_details: NotRequired[InputTokenDetails]
136
142
  """Breakdown of input token counts.
137
143
 
@@ -141,7 +147,6 @@ class UsageMetadata(TypedDict):
141
147
  """Breakdown of output token counts.
142
148
 
143
149
  Does *not* need to sum to full output token count. Does *not* need to have all keys.
144
-
145
150
  """
146
151
 
147
152
 
@@ -153,7 +158,6 @@ class AIMessage(BaseMessage):
153
158
  This message represents the output of the model and consists of both
154
159
  the raw output as returned by the model and standardized fields
155
160
  (e.g., tool calls, usage metadata) added by the LangChain framework.
156
-
157
161
  """
158
162
 
159
163
  tool_calls: list[ToolCall] = []
@@ -651,13 +655,13 @@ def add_ai_message_chunks(
651
655
  chunk_id = id_
652
656
  break
653
657
  else:
654
- # second pass: prefer lc_run-* ids over lc_* ids
658
+ # second pass: prefer lc_run-* IDs over lc_* IDs
655
659
  for id_ in candidates:
656
660
  if id_ and id_.startswith(LC_ID_PREFIX):
657
661
  chunk_id = id_
658
662
  break
659
663
  else:
660
- # third pass: take any remaining id (auto-generated lc_* ids)
664
+ # third pass: take any remaining ID (auto-generated lc_* IDs)
661
665
  for id_ in candidates:
662
666
  if id_:
663
667
  chunk_id = id_
@@ -93,6 +93,10 @@ class BaseMessage(Serializable):
93
93
  """Base abstract message class.
94
94
 
95
95
  Messages are the inputs and outputs of a chat model.
96
+
97
+ Examples include [`HumanMessage`][langchain.messages.HumanMessage],
98
+ [`AIMessage`][langchain.messages.AIMessage], and
99
+ [`SystemMessage`][langchain.messages.SystemMessage].
96
100
  """
97
101
 
98
102
  content: str | list[str | dict]
@@ -368,7 +368,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
368
368
  else:
369
369
  # Assume it's raw base64 without data URI
370
370
  try:
371
- # Validate base64 and decode for mime type detection
371
+ # Validate base64 and decode for MIME type detection
372
372
  decoded_bytes = base64.b64decode(url, validate=True)
373
373
 
374
374
  image_url_b64_block = {
@@ -379,7 +379,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
379
379
  try:
380
380
  import filetype # type: ignore[import-not-found] # noqa: PLC0415
381
381
 
382
- # Guess mime type based on file bytes
382
+ # Guess MIME type based on file bytes
383
383
  mime_type = None
384
384
  kind = filetype.guess(decoded_bytes)
385
385
  if kind:
@@ -458,6 +458,8 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
458
458
  if outcome is not None:
459
459
  server_tool_result_block["extras"]["outcome"] = outcome
460
460
  converted_blocks.append(server_tool_result_block)
461
+ elif item_type == "text":
462
+ converted_blocks.append(cast("types.TextContentBlock", item))
461
463
  else:
462
464
  # Unknown type, preserve as non-standard
463
465
  converted_blocks.append({"type": "non_standard", "value": item})