langchain-core 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/agents.py +2 -4
- langchain_core/caches.py +13 -6
- langchain_core/chat_history.py +5 -5
- langchain_core/document_loaders/base.py +6 -4
- langchain_core/document_loaders/blob_loaders.py +1 -1
- langchain_core/document_loaders/langsmith.py +9 -10
- langchain_core/documents/__init__.py +24 -3
- langchain_core/documents/base.py +72 -59
- langchain_core/documents/compressor.py +6 -6
- langchain_core/documents/transformers.py +2 -2
- langchain_core/embeddings/fake.py +2 -2
- langchain_core/example_selectors/semantic_similarity.py +7 -7
- langchain_core/exceptions.py +2 -2
- langchain_core/indexing/__init__.py +1 -1
- langchain_core/indexing/api.py +62 -62
- langchain_core/indexing/base.py +16 -16
- langchain_core/indexing/in_memory.py +2 -2
- langchain_core/language_models/__init__.py +6 -5
- langchain_core/language_models/base.py +2 -2
- langchain_core/language_models/fake_chat_models.py +1 -1
- langchain_core/language_models/llms.py +4 -6
- langchain_core/load/dump.py +1 -1
- langchain_core/load/serializable.py +4 -1
- langchain_core/messages/__init__.py +9 -0
- langchain_core/messages/ai.py +11 -7
- langchain_core/messages/base.py +4 -0
- langchain_core/messages/block_translators/google_genai.py +4 -2
- langchain_core/messages/content.py +4 -4
- langchain_core/messages/utils.py +13 -13
- langchain_core/output_parsers/__init__.py +17 -1
- langchain_core/output_parsers/base.py +3 -0
- langchain_core/output_parsers/format_instructions.py +9 -4
- langchain_core/output_parsers/json.py +5 -2
- langchain_core/output_parsers/list.py +16 -16
- langchain_core/output_parsers/openai_tools.py +2 -2
- langchain_core/output_parsers/pydantic.py +1 -1
- langchain_core/output_parsers/string.py +3 -3
- langchain_core/output_parsers/xml.py +28 -25
- langchain_core/outputs/generation.py +2 -3
- langchain_core/prompt_values.py +0 -6
- langchain_core/prompts/base.py +5 -3
- langchain_core/prompts/chat.py +60 -52
- langchain_core/prompts/structured.py +12 -8
- langchain_core/retrievers.py +41 -37
- langchain_core/runnables/base.py +14 -14
- langchain_core/runnables/configurable.py +3 -3
- langchain_core/runnables/graph.py +7 -3
- langchain_core/tools/base.py +66 -12
- langchain_core/tools/convert.py +8 -5
- langchain_core/tools/retriever.py +6 -5
- langchain_core/tools/structured.py +7 -5
- langchain_core/tracers/log_stream.py +2 -2
- langchain_core/utils/strings.py +1 -4
- langchain_core/utils/utils.py +12 -5
- langchain_core/vectorstores/base.py +73 -69
- langchain_core/vectorstores/in_memory.py +2 -2
- langchain_core/version.py +1 -1
- {langchain_core-1.0.1.dist-info → langchain_core-1.0.2.dist-info}/METADATA +1 -1
- {langchain_core-1.0.1.dist-info → langchain_core-1.0.2.dist-info}/RECORD +60 -60
- {langchain_core-1.0.1.dist-info → langchain_core-1.0.2.dist-info}/WHEEL +0 -0
langchain_core/indexing/api.py
CHANGED
|
@@ -304,42 +304,42 @@ def index(
|
|
|
304
304
|
!!! warning
|
|
305
305
|
|
|
306
306
|
* In full mode, the loader should be returning
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
307
|
+
the entire dataset, and not just a subset of the dataset.
|
|
308
|
+
Otherwise, the auto_cleanup will remove documents that it is not
|
|
309
|
+
supposed to.
|
|
310
310
|
* In incremental mode, if documents associated with a particular
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
311
|
+
source id appear across different batches, the indexing API
|
|
312
|
+
will do some redundant work. This will still result in the
|
|
313
|
+
correct end state of the index, but will unfortunately not be
|
|
314
|
+
100% efficient. For example, if a given document is split into 15
|
|
315
|
+
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
|
316
|
+
all with the same source id. In general, to avoid doing too much
|
|
317
|
+
redundant work select as big a batch size as possible.
|
|
318
318
|
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
319
|
+
is challenging or if your data loader cannot return the entire dataset at
|
|
320
|
+
once. This mode keeps track of source IDs in memory, which should be fine
|
|
321
|
+
for most use cases. If your dataset is large (10M+ docs), you will likely
|
|
322
|
+
need to parallelize the indexing process regardless.
|
|
323
323
|
|
|
324
324
|
Args:
|
|
325
325
|
docs_source: Data loader or iterable of documents to index.
|
|
326
326
|
record_manager: Timestamped set to keep track of which documents were
|
|
327
327
|
updated.
|
|
328
|
-
vector_store: VectorStore or DocumentIndex to index the documents into.
|
|
328
|
+
vector_store: `VectorStore` or DocumentIndex to index the documents into.
|
|
329
329
|
batch_size: Batch size to use when indexing.
|
|
330
330
|
cleanup: How to handle clean up of documents.
|
|
331
331
|
|
|
332
332
|
- incremental: Cleans up all documents that haven't been updated AND
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
333
|
+
that are associated with source IDs that were seen during indexing.
|
|
334
|
+
Clean up is done continuously during indexing helping to minimize the
|
|
335
|
+
probability of users seeing duplicated content.
|
|
336
336
|
- full: Delete all documents that have not been returned by the loader
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
337
|
+
during this run of indexing.
|
|
338
|
+
Clean up runs after all documents have been indexed.
|
|
339
|
+
This means that users may see duplicated content during indexing.
|
|
340
340
|
- scoped_full: Similar to Full, but only deletes all documents
|
|
341
|
-
|
|
342
|
-
|
|
341
|
+
that haven't been updated AND that are associated with
|
|
342
|
+
source IDs that were seen during indexing.
|
|
343
343
|
- None: Do not delete any documents.
|
|
344
344
|
source_id_key: Optional key that helps identify the original source
|
|
345
345
|
of the document.
|
|
@@ -363,7 +363,7 @@ def index(
|
|
|
363
363
|
When changing the key encoder, you must change the
|
|
364
364
|
index as well to avoid duplicated documents in the cache.
|
|
365
365
|
upsert_kwargs: Additional keyword arguments to pass to the add_documents
|
|
366
|
-
method of the VectorStore or the upsert method of the DocumentIndex.
|
|
366
|
+
method of the `VectorStore` or the upsert method of the DocumentIndex.
|
|
367
367
|
For example, you can use this to specify a custom vector_field:
|
|
368
368
|
upsert_kwargs={"vector_field": "embedding"}
|
|
369
369
|
!!! version-added "Added in version 0.3.10"
|
|
@@ -375,10 +375,10 @@ def index(
|
|
|
375
375
|
Raises:
|
|
376
376
|
ValueError: If cleanup mode is not one of 'incremental', 'full' or None
|
|
377
377
|
ValueError: If cleanup mode is incremental and source_id_key is None.
|
|
378
|
-
ValueError: If
|
|
378
|
+
ValueError: If `VectorStore` does not have
|
|
379
379
|
"delete" and "add_documents" required methods.
|
|
380
380
|
ValueError: If source_id_key is not None, but is not a string or callable.
|
|
381
|
-
TypeError: If `vectorstore` is not a VectorStore or a DocumentIndex.
|
|
381
|
+
TypeError: If `vectorstore` is not a `VectorStore` or a DocumentIndex.
|
|
382
382
|
AssertionError: If `source_id` is None when cleanup mode is incremental.
|
|
383
383
|
(should be unreachable code).
|
|
384
384
|
"""
|
|
@@ -415,7 +415,7 @@ def index(
|
|
|
415
415
|
raise ValueError(msg)
|
|
416
416
|
|
|
417
417
|
if type(destination).delete == VectorStore.delete:
|
|
418
|
-
# Checking if the
|
|
418
|
+
# Checking if the VectorStore has overridden the default delete method
|
|
419
419
|
# implementation which just raises a NotImplementedError
|
|
420
420
|
msg = "Vectorstore has not implemented the delete method"
|
|
421
421
|
raise ValueError(msg)
|
|
@@ -466,11 +466,11 @@ def index(
|
|
|
466
466
|
]
|
|
467
467
|
|
|
468
468
|
if cleanup in {"incremental", "scoped_full"}:
|
|
469
|
-
#
|
|
469
|
+
# Source IDs are required.
|
|
470
470
|
for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
|
|
471
471
|
if source_id is None:
|
|
472
472
|
msg = (
|
|
473
|
-
f"Source
|
|
473
|
+
f"Source IDs are required when cleanup mode is "
|
|
474
474
|
f"incremental or scoped_full. "
|
|
475
475
|
f"Document that starts with "
|
|
476
476
|
f"content: {hashed_doc.page_content[:100]} "
|
|
@@ -479,7 +479,7 @@ def index(
|
|
|
479
479
|
raise ValueError(msg)
|
|
480
480
|
if cleanup == "scoped_full":
|
|
481
481
|
scoped_full_cleanup_source_ids.add(source_id)
|
|
482
|
-
#
|
|
482
|
+
# Source IDs cannot be None after for loop above.
|
|
483
483
|
source_ids = cast("Sequence[str]", source_ids)
|
|
484
484
|
|
|
485
485
|
exists_batch = record_manager.exists(
|
|
@@ -538,7 +538,7 @@ def index(
|
|
|
538
538
|
# If source IDs are provided, we can do the deletion incrementally!
|
|
539
539
|
if cleanup == "incremental":
|
|
540
540
|
# Get the uids of the documents that were not returned by the loader.
|
|
541
|
-
# mypy isn't good enough to determine that source
|
|
541
|
+
# mypy isn't good enough to determine that source IDs cannot be None
|
|
542
542
|
# here due to a check that's happening above, so we check again.
|
|
543
543
|
for source_id in source_ids:
|
|
544
544
|
if source_id is None:
|
|
@@ -642,42 +642,42 @@ async def aindex(
|
|
|
642
642
|
!!! warning
|
|
643
643
|
|
|
644
644
|
* In full mode, the loader should be returning
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
645
|
+
the entire dataset, and not just a subset of the dataset.
|
|
646
|
+
Otherwise, the auto_cleanup will remove documents that it is not
|
|
647
|
+
supposed to.
|
|
648
648
|
* In incremental mode, if documents associated with a particular
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
649
|
+
source id appear across different batches, the indexing API
|
|
650
|
+
will do some redundant work. This will still result in the
|
|
651
|
+
correct end state of the index, but will unfortunately not be
|
|
652
|
+
100% efficient. For example, if a given document is split into 15
|
|
653
|
+
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
|
654
|
+
all with the same source id. In general, to avoid doing too much
|
|
655
|
+
redundant work select as big a batch size as possible.
|
|
656
656
|
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
657
|
+
is challenging or if your data loader cannot return the entire dataset at
|
|
658
|
+
once. This mode keeps track of source IDs in memory, which should be fine
|
|
659
|
+
for most use cases. If your dataset is large (10M+ docs), you will likely
|
|
660
|
+
need to parallelize the indexing process regardless.
|
|
661
661
|
|
|
662
662
|
Args:
|
|
663
663
|
docs_source: Data loader or iterable of documents to index.
|
|
664
664
|
record_manager: Timestamped set to keep track of which documents were
|
|
665
665
|
updated.
|
|
666
|
-
vector_store: VectorStore or DocumentIndex to index the documents into.
|
|
666
|
+
vector_store: `VectorStore` or DocumentIndex to index the documents into.
|
|
667
667
|
batch_size: Batch size to use when indexing.
|
|
668
668
|
cleanup: How to handle clean up of documents.
|
|
669
669
|
|
|
670
670
|
- incremental: Cleans up all documents that haven't been updated AND
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
671
|
+
that are associated with source IDs that were seen during indexing.
|
|
672
|
+
Clean up is done continuously during indexing helping to minimize the
|
|
673
|
+
probability of users seeing duplicated content.
|
|
674
674
|
- full: Delete all documents that have not been returned by the loader
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
675
|
+
during this run of indexing.
|
|
676
|
+
Clean up runs after all documents have been indexed.
|
|
677
|
+
This means that users may see duplicated content during indexing.
|
|
678
678
|
- scoped_full: Similar to Full, but only deletes all documents
|
|
679
|
-
|
|
680
|
-
|
|
679
|
+
that haven't been updated AND that are associated with
|
|
680
|
+
source IDs that were seen during indexing.
|
|
681
681
|
- None: Do not delete any documents.
|
|
682
682
|
source_id_key: Optional key that helps identify the original source
|
|
683
683
|
of the document.
|
|
@@ -701,7 +701,7 @@ async def aindex(
|
|
|
701
701
|
When changing the key encoder, you must change the
|
|
702
702
|
index as well to avoid duplicated documents in the cache.
|
|
703
703
|
upsert_kwargs: Additional keyword arguments to pass to the add_documents
|
|
704
|
-
method of the VectorStore or the upsert method of the DocumentIndex.
|
|
704
|
+
method of the `VectorStore` or the upsert method of the DocumentIndex.
|
|
705
705
|
For example, you can use this to specify a custom vector_field:
|
|
706
706
|
upsert_kwargs={"vector_field": "embedding"}
|
|
707
707
|
!!! version-added "Added in version 0.3.10"
|
|
@@ -713,10 +713,10 @@ async def aindex(
|
|
|
713
713
|
Raises:
|
|
714
714
|
ValueError: If cleanup mode is not one of 'incremental', 'full' or None
|
|
715
715
|
ValueError: If cleanup mode is incremental and source_id_key is None.
|
|
716
|
-
ValueError: If
|
|
716
|
+
ValueError: If `VectorStore` does not have
|
|
717
717
|
"adelete" and "aadd_documents" required methods.
|
|
718
718
|
ValueError: If source_id_key is not None, but is not a string or callable.
|
|
719
|
-
TypeError: If `vector_store` is not a VectorStore or DocumentIndex.
|
|
719
|
+
TypeError: If `vector_store` is not a `VectorStore` or DocumentIndex.
|
|
720
720
|
AssertionError: If `source_id_key` is None when cleanup mode is
|
|
721
721
|
incremental or `scoped_full` (should be unreachable).
|
|
722
722
|
"""
|
|
@@ -757,7 +757,7 @@ async def aindex(
|
|
|
757
757
|
type(destination).adelete == VectorStore.adelete
|
|
758
758
|
and type(destination).delete == VectorStore.delete
|
|
759
759
|
):
|
|
760
|
-
# Checking if the
|
|
760
|
+
# Checking if the VectorStore has overridden the default adelete or delete
|
|
761
761
|
# methods implementation which just raises a NotImplementedError
|
|
762
762
|
msg = "Vectorstore has not implemented the adelete or delete method"
|
|
763
763
|
raise ValueError(msg)
|
|
@@ -815,11 +815,11 @@ async def aindex(
|
|
|
815
815
|
]
|
|
816
816
|
|
|
817
817
|
if cleanup in {"incremental", "scoped_full"}:
|
|
818
|
-
# If the cleanup mode is incremental, source
|
|
818
|
+
# If the cleanup mode is incremental, source IDs are required.
|
|
819
819
|
for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
|
|
820
820
|
if source_id is None:
|
|
821
821
|
msg = (
|
|
822
|
-
f"Source
|
|
822
|
+
f"Source IDs are required when cleanup mode is "
|
|
823
823
|
f"incremental or scoped_full. "
|
|
824
824
|
f"Document that starts with "
|
|
825
825
|
f"content: {hashed_doc.page_content[:100]} "
|
|
@@ -828,7 +828,7 @@ async def aindex(
|
|
|
828
828
|
raise ValueError(msg)
|
|
829
829
|
if cleanup == "scoped_full":
|
|
830
830
|
scoped_full_cleanup_source_ids.add(source_id)
|
|
831
|
-
#
|
|
831
|
+
# Source IDs cannot be None after for loop above.
|
|
832
832
|
source_ids = cast("Sequence[str]", source_ids)
|
|
833
833
|
|
|
834
834
|
exists_batch = await record_manager.aexists(
|
|
@@ -888,7 +888,7 @@ async def aindex(
|
|
|
888
888
|
if cleanup == "incremental":
|
|
889
889
|
# Get the uids of the documents that were not returned by the loader.
|
|
890
890
|
|
|
891
|
-
# mypy isn't good enough to determine that source
|
|
891
|
+
# mypy isn't good enough to determine that source IDs cannot be None
|
|
892
892
|
# here due to a check that's happening above, so we check again.
|
|
893
893
|
for source_id in source_ids:
|
|
894
894
|
if source_id is None:
|
langchain_core/indexing/base.py
CHANGED
|
@@ -25,7 +25,7 @@ class RecordManager(ABC):
|
|
|
25
25
|
The record manager abstraction is used by the langchain indexing API.
|
|
26
26
|
|
|
27
27
|
The record manager keeps track of which documents have been
|
|
28
|
-
written into a
|
|
28
|
+
written into a `VectorStore` and when they were written.
|
|
29
29
|
|
|
30
30
|
The indexing API computes hashes for each document and stores the hash
|
|
31
31
|
together with the write time and the source id in the record manager.
|
|
@@ -37,7 +37,7 @@ class RecordManager(ABC):
|
|
|
37
37
|
already been indexed, and to only index new documents.
|
|
38
38
|
|
|
39
39
|
The main benefit of this abstraction is that it works across many vectorstores.
|
|
40
|
-
To be supported, a
|
|
40
|
+
To be supported, a `VectorStore` needs to only support the ability to add and
|
|
41
41
|
delete documents by ID. Using the record manager, the indexing API will
|
|
42
42
|
be able to delete outdated documents and avoid redundant indexing of documents
|
|
43
43
|
that have already been indexed.
|
|
@@ -45,13 +45,13 @@ class RecordManager(ABC):
|
|
|
45
45
|
The main constraints of this abstraction are:
|
|
46
46
|
|
|
47
47
|
1. It relies on the time-stamps to determine which documents have been
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
48
|
+
indexed and which have not. This means that the time-stamps must be
|
|
49
|
+
monotonically increasing. The timestamp should be the timestamp
|
|
50
|
+
as measured by the server to minimize issues.
|
|
51
51
|
2. The record manager is currently implemented separately from the
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
52
|
+
vectorstore, which means that the overall system becomes distributed
|
|
53
|
+
and may create issues with consistency. For example, writing to
|
|
54
|
+
record manager succeeds, but corresponding writing to `VectorStore` fails.
|
|
55
55
|
"""
|
|
56
56
|
|
|
57
57
|
def __init__(
|
|
@@ -460,7 +460,7 @@ class UpsertResponse(TypedDict):
|
|
|
460
460
|
class DeleteResponse(TypedDict, total=False):
|
|
461
461
|
"""A generic response for delete operation.
|
|
462
462
|
|
|
463
|
-
The fields in this response are optional and whether the
|
|
463
|
+
The fields in this response are optional and whether the `VectorStore`
|
|
464
464
|
returns them or not is up to the implementation.
|
|
465
465
|
"""
|
|
466
466
|
|
|
@@ -518,7 +518,7 @@ class DocumentIndex(BaseRetriever):
|
|
|
518
518
|
if it is provided. If the ID is not provided, the upsert method is free
|
|
519
519
|
to generate an ID for the content.
|
|
520
520
|
|
|
521
|
-
When an ID is specified and the content already exists in the
|
|
521
|
+
When an ID is specified and the content already exists in the `VectorStore`,
|
|
522
522
|
the upsert method should update the content with the new data. If the content
|
|
523
523
|
does not exist, the upsert method should add the item to the `VectorStore`.
|
|
524
524
|
|
|
@@ -528,20 +528,20 @@ class DocumentIndex(BaseRetriever):
|
|
|
528
528
|
|
|
529
529
|
Returns:
|
|
530
530
|
A response object that contains the list of IDs that were
|
|
531
|
-
successfully added or updated in the
|
|
531
|
+
successfully added or updated in the `VectorStore` and the list of IDs that
|
|
532
532
|
failed to be added or updated.
|
|
533
533
|
"""
|
|
534
534
|
|
|
535
535
|
async def aupsert(
|
|
536
536
|
self, items: Sequence[Document], /, **kwargs: Any
|
|
537
537
|
) -> UpsertResponse:
|
|
538
|
-
"""Add or update documents in the
|
|
538
|
+
"""Add or update documents in the `VectorStore`. Async version of `upsert`.
|
|
539
539
|
|
|
540
540
|
The upsert functionality should utilize the ID field of the item
|
|
541
541
|
if it is provided. If the ID is not provided, the upsert method is free
|
|
542
542
|
to generate an ID for the item.
|
|
543
543
|
|
|
544
|
-
When an ID is specified and the item already exists in the
|
|
544
|
+
When an ID is specified and the item already exists in the `VectorStore`,
|
|
545
545
|
the upsert method should update the item with the new data. If the item
|
|
546
546
|
does not exist, the upsert method should add the item to the `VectorStore`.
|
|
547
547
|
|
|
@@ -551,7 +551,7 @@ class DocumentIndex(BaseRetriever):
|
|
|
551
551
|
|
|
552
552
|
Returns:
|
|
553
553
|
A response object that contains the list of IDs that were
|
|
554
|
-
successfully added or updated in the
|
|
554
|
+
successfully added or updated in the `VectorStore` and the list of IDs that
|
|
555
555
|
failed to be added or updated.
|
|
556
556
|
"""
|
|
557
557
|
return await run_in_executor(
|
|
@@ -568,7 +568,7 @@ class DocumentIndex(BaseRetriever):
|
|
|
568
568
|
Calling delete without any input parameters should raise a ValueError!
|
|
569
569
|
|
|
570
570
|
Args:
|
|
571
|
-
ids: List of
|
|
571
|
+
ids: List of IDs to delete.
|
|
572
572
|
**kwargs: Additional keyword arguments. This is up to the implementation.
|
|
573
573
|
For example, can include an option to delete the entire index,
|
|
574
574
|
or else issue a non-blocking delete etc.
|
|
@@ -586,7 +586,7 @@ class DocumentIndex(BaseRetriever):
|
|
|
586
586
|
Calling adelete without any input parameters should raise a ValueError!
|
|
587
587
|
|
|
588
588
|
Args:
|
|
589
|
-
ids: List of
|
|
589
|
+
ids: List of IDs to delete.
|
|
590
590
|
**kwargs: Additional keyword arguments. This is up to the implementation.
|
|
591
591
|
For example, can include an option to delete the entire index.
|
|
592
592
|
|
|
@@ -62,10 +62,10 @@ class InMemoryDocumentIndex(DocumentIndex):
|
|
|
62
62
|
"""Delete by IDs.
|
|
63
63
|
|
|
64
64
|
Args:
|
|
65
|
-
ids: List of
|
|
65
|
+
ids: List of IDs to delete.
|
|
66
66
|
|
|
67
67
|
Raises:
|
|
68
|
-
ValueError: If
|
|
68
|
+
ValueError: If IDs is None.
|
|
69
69
|
|
|
70
70
|
Returns:
|
|
71
71
|
A response object that contains the list of IDs that were successfully
|
|
@@ -6,12 +6,13 @@ LangChain has two main classes to work with language models: chat models and
|
|
|
6
6
|
**Chat models**
|
|
7
7
|
|
|
8
8
|
Language models that use a sequence of messages as inputs and return chat messages
|
|
9
|
-
as outputs (as opposed to using plain text).
|
|
10
|
-
distinct roles to conversation messages, helping to distinguish messages from the AI,
|
|
11
|
-
users, and instructions such as system messages.
|
|
9
|
+
as outputs (as opposed to using plain text).
|
|
12
10
|
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
Chat models support the assignment of distinct roles to conversation messages, helping
|
|
12
|
+
to distinguish messages from the AI, users, and instructions such as system messages.
|
|
13
|
+
|
|
14
|
+
The key abstraction for chat models is `BaseChatModel`. Implementations should inherit
|
|
15
|
+
from this class.
|
|
15
16
|
|
|
16
17
|
See existing [chat model integrations](https://docs.langchain.com/oss/python/integrations/chat).
|
|
17
18
|
|
|
@@ -262,13 +262,13 @@ class BaseLanguageModel(
|
|
|
262
262
|
return self.lc_attributes
|
|
263
263
|
|
|
264
264
|
def get_token_ids(self, text: str) -> list[int]:
|
|
265
|
-
"""Return the ordered
|
|
265
|
+
"""Return the ordered IDs of the tokens in a text.
|
|
266
266
|
|
|
267
267
|
Args:
|
|
268
268
|
text: The string input to tokenize.
|
|
269
269
|
|
|
270
270
|
Returns:
|
|
271
|
-
A list of
|
|
271
|
+
A list of IDs corresponding to the tokens in the text, in order they occur
|
|
272
272
|
in the text.
|
|
273
273
|
"""
|
|
274
274
|
if self.custom_get_token_ids is not None:
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
"""Base interface for large language models to expose.
|
|
1
|
+
"""Base interface for traditional large language models (LLMs) to expose.
|
|
2
|
+
|
|
3
|
+
These are traditionally older models (newer models generally are chat models).
|
|
4
|
+
"""
|
|
2
5
|
|
|
3
6
|
from __future__ import annotations
|
|
4
7
|
|
|
@@ -1391,11 +1394,6 @@ class LLM(BaseLLM):
|
|
|
1391
1394
|
`astream` will use `_astream` if provided, otherwise it will implement
|
|
1392
1395
|
a fallback behavior that will use `_stream` if `_stream` is implemented,
|
|
1393
1396
|
and use `_acall` if `_stream` is not implemented.
|
|
1394
|
-
|
|
1395
|
-
Please see the following guide for more information on how to
|
|
1396
|
-
implement a custom LLM:
|
|
1397
|
-
|
|
1398
|
-
https://python.langchain.com/docs/how_to/custom_llm/
|
|
1399
1397
|
"""
|
|
1400
1398
|
|
|
1401
1399
|
@abstractmethod
|
langchain_core/load/dump.py
CHANGED
|
@@ -17,7 +17,7 @@ def default(obj: Any) -> Any:
|
|
|
17
17
|
obj: The object to serialize to json if it is a Serializable object.
|
|
18
18
|
|
|
19
19
|
Returns:
|
|
20
|
-
A
|
|
20
|
+
A JSON serializable object or a SerializedNotImplemented object.
|
|
21
21
|
"""
|
|
22
22
|
if isinstance(obj, Serializable):
|
|
23
23
|
return obj.to_json()
|
|
@@ -97,11 +97,14 @@ class Serializable(BaseModel, ABC):
|
|
|
97
97
|
by default. This is to prevent accidental serialization of objects that should
|
|
98
98
|
not be serialized.
|
|
99
99
|
- `get_lc_namespace`: Get the namespace of the LangChain object.
|
|
100
|
+
|
|
100
101
|
During deserialization, this namespace is used to identify
|
|
101
102
|
the correct class to instantiate.
|
|
103
|
+
|
|
102
104
|
Please see the `Reviver` class in `langchain_core.load.load` for more details.
|
|
103
105
|
During deserialization an additional mapping is handle classes that have moved
|
|
104
106
|
or been renamed across package versions.
|
|
107
|
+
|
|
105
108
|
- `lc_secrets`: A map of constructor argument names to secret ids.
|
|
106
109
|
- `lc_attributes`: List of additional attribute names that should be included
|
|
107
110
|
as part of the serialized representation.
|
|
@@ -194,7 +197,7 @@ class Serializable(BaseModel, ABC):
|
|
|
194
197
|
ValueError: If the class has deprecated attributes.
|
|
195
198
|
|
|
196
199
|
Returns:
|
|
197
|
-
A
|
|
200
|
+
A JSON serializable object or a `SerializedNotImplemented` object.
|
|
198
201
|
"""
|
|
199
202
|
if not self.is_lc_serializable():
|
|
200
203
|
return self.to_json_not_implemented()
|
|
@@ -9,6 +9,9 @@ if TYPE_CHECKING:
|
|
|
9
9
|
from langchain_core.messages.ai import (
|
|
10
10
|
AIMessage,
|
|
11
11
|
AIMessageChunk,
|
|
12
|
+
InputTokenDetails,
|
|
13
|
+
OutputTokenDetails,
|
|
14
|
+
UsageMetadata,
|
|
12
15
|
)
|
|
13
16
|
from langchain_core.messages.base import (
|
|
14
17
|
BaseMessage,
|
|
@@ -87,10 +90,12 @@ __all__ = (
|
|
|
87
90
|
"HumanMessage",
|
|
88
91
|
"HumanMessageChunk",
|
|
89
92
|
"ImageContentBlock",
|
|
93
|
+
"InputTokenDetails",
|
|
90
94
|
"InvalidToolCall",
|
|
91
95
|
"MessageLikeRepresentation",
|
|
92
96
|
"NonStandardAnnotation",
|
|
93
97
|
"NonStandardContentBlock",
|
|
98
|
+
"OutputTokenDetails",
|
|
94
99
|
"PlainTextContentBlock",
|
|
95
100
|
"ReasoningContentBlock",
|
|
96
101
|
"RemoveMessage",
|
|
@@ -104,6 +109,7 @@ __all__ = (
|
|
|
104
109
|
"ToolCallChunk",
|
|
105
110
|
"ToolMessage",
|
|
106
111
|
"ToolMessageChunk",
|
|
112
|
+
"UsageMetadata",
|
|
107
113
|
"VideoContentBlock",
|
|
108
114
|
"_message_from_dict",
|
|
109
115
|
"convert_to_messages",
|
|
@@ -145,6 +151,7 @@ _dynamic_imports = {
|
|
|
145
151
|
"HumanMessageChunk": "human",
|
|
146
152
|
"NonStandardAnnotation": "content",
|
|
147
153
|
"NonStandardContentBlock": "content",
|
|
154
|
+
"OutputTokenDetails": "ai",
|
|
148
155
|
"PlainTextContentBlock": "content",
|
|
149
156
|
"ReasoningContentBlock": "content",
|
|
150
157
|
"RemoveMessage": "modifier",
|
|
@@ -154,12 +161,14 @@ _dynamic_imports = {
|
|
|
154
161
|
"SystemMessage": "system",
|
|
155
162
|
"SystemMessageChunk": "system",
|
|
156
163
|
"ImageContentBlock": "content",
|
|
164
|
+
"InputTokenDetails": "ai",
|
|
157
165
|
"InvalidToolCall": "tool",
|
|
158
166
|
"TextContentBlock": "content",
|
|
159
167
|
"ToolCall": "tool",
|
|
160
168
|
"ToolCallChunk": "tool",
|
|
161
169
|
"ToolMessage": "tool",
|
|
162
170
|
"ToolMessageChunk": "tool",
|
|
171
|
+
"UsageMetadata": "ai",
|
|
163
172
|
"VideoContentBlock": "content",
|
|
164
173
|
"AnyMessage": "utils",
|
|
165
174
|
"MessageLikeRepresentation": "utils",
|
langchain_core/messages/ai.py
CHANGED
|
@@ -48,10 +48,10 @@ class InputTokenDetails(TypedDict, total=False):
|
|
|
48
48
|
}
|
|
49
49
|
```
|
|
50
50
|
|
|
51
|
-
!!! version-added "Added in version 0.3.9"
|
|
52
|
-
|
|
53
51
|
May also hold extra provider-specific keys.
|
|
54
52
|
|
|
53
|
+
!!! version-added "Added in version 0.3.9"
|
|
54
|
+
|
|
55
55
|
"""
|
|
56
56
|
|
|
57
57
|
audio: int
|
|
@@ -83,6 +83,8 @@ class OutputTokenDetails(TypedDict, total=False):
|
|
|
83
83
|
}
|
|
84
84
|
```
|
|
85
85
|
|
|
86
|
+
May also hold extra provider-specific keys.
|
|
87
|
+
|
|
86
88
|
!!! version-added "Added in version 0.3.9"
|
|
87
89
|
|
|
88
90
|
"""
|
|
@@ -124,6 +126,10 @@ class UsageMetadata(TypedDict):
|
|
|
124
126
|
!!! warning "Behavior changed in 0.3.9"
|
|
125
127
|
Added `input_token_details` and `output_token_details`.
|
|
126
128
|
|
|
129
|
+
!!! note "LangSmith SDK"
|
|
130
|
+
The LangSmith SDK also has a `UsageMetadata` class. While the two share fields,
|
|
131
|
+
LangSmith's `UsageMetadata` has additional fields to capture cost information
|
|
132
|
+
used by the LangSmith platform.
|
|
127
133
|
"""
|
|
128
134
|
|
|
129
135
|
input_tokens: int
|
|
@@ -131,7 +137,7 @@ class UsageMetadata(TypedDict):
|
|
|
131
137
|
output_tokens: int
|
|
132
138
|
"""Count of output (or completion) tokens. Sum of all output token types."""
|
|
133
139
|
total_tokens: int
|
|
134
|
-
"""Total token count. Sum of input_tokens + output_tokens
|
|
140
|
+
"""Total token count. Sum of `input_tokens` + `output_tokens`."""
|
|
135
141
|
input_token_details: NotRequired[InputTokenDetails]
|
|
136
142
|
"""Breakdown of input token counts.
|
|
137
143
|
|
|
@@ -141,7 +147,6 @@ class UsageMetadata(TypedDict):
|
|
|
141
147
|
"""Breakdown of output token counts.
|
|
142
148
|
|
|
143
149
|
Does *not* need to sum to full output token count. Does *not* need to have all keys.
|
|
144
|
-
|
|
145
150
|
"""
|
|
146
151
|
|
|
147
152
|
|
|
@@ -153,7 +158,6 @@ class AIMessage(BaseMessage):
|
|
|
153
158
|
This message represents the output of the model and consists of both
|
|
154
159
|
the raw output as returned by the model and standardized fields
|
|
155
160
|
(e.g., tool calls, usage metadata) added by the LangChain framework.
|
|
156
|
-
|
|
157
161
|
"""
|
|
158
162
|
|
|
159
163
|
tool_calls: list[ToolCall] = []
|
|
@@ -651,13 +655,13 @@ def add_ai_message_chunks(
|
|
|
651
655
|
chunk_id = id_
|
|
652
656
|
break
|
|
653
657
|
else:
|
|
654
|
-
# second pass: prefer lc_run-*
|
|
658
|
+
# second pass: prefer lc_run-* IDs over lc_* IDs
|
|
655
659
|
for id_ in candidates:
|
|
656
660
|
if id_ and id_.startswith(LC_ID_PREFIX):
|
|
657
661
|
chunk_id = id_
|
|
658
662
|
break
|
|
659
663
|
else:
|
|
660
|
-
# third pass: take any remaining
|
|
664
|
+
# third pass: take any remaining ID (auto-generated lc_* IDs)
|
|
661
665
|
for id_ in candidates:
|
|
662
666
|
if id_:
|
|
663
667
|
chunk_id = id_
|
langchain_core/messages/base.py
CHANGED
|
@@ -93,6 +93,10 @@ class BaseMessage(Serializable):
|
|
|
93
93
|
"""Base abstract message class.
|
|
94
94
|
|
|
95
95
|
Messages are the inputs and outputs of a chat model.
|
|
96
|
+
|
|
97
|
+
Examples include [`HumanMessage`][langchain.messages.HumanMessage],
|
|
98
|
+
[`AIMessage`][langchain.messages.AIMessage], and
|
|
99
|
+
[`SystemMessage`][langchain.messages.SystemMessage].
|
|
96
100
|
"""
|
|
97
101
|
|
|
98
102
|
content: str | list[str | dict]
|
|
@@ -368,7 +368,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
|
|
368
368
|
else:
|
|
369
369
|
# Assume it's raw base64 without data URI
|
|
370
370
|
try:
|
|
371
|
-
# Validate base64 and decode for
|
|
371
|
+
# Validate base64 and decode for MIME type detection
|
|
372
372
|
decoded_bytes = base64.b64decode(url, validate=True)
|
|
373
373
|
|
|
374
374
|
image_url_b64_block = {
|
|
@@ -379,7 +379,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
|
|
379
379
|
try:
|
|
380
380
|
import filetype # type: ignore[import-not-found] # noqa: PLC0415
|
|
381
381
|
|
|
382
|
-
# Guess
|
|
382
|
+
# Guess MIME type based on file bytes
|
|
383
383
|
mime_type = None
|
|
384
384
|
kind = filetype.guess(decoded_bytes)
|
|
385
385
|
if kind:
|
|
@@ -458,6 +458,8 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
|
|
458
458
|
if outcome is not None:
|
|
459
459
|
server_tool_result_block["extras"]["outcome"] = outcome
|
|
460
460
|
converted_blocks.append(server_tool_result_block)
|
|
461
|
+
elif item_type == "text":
|
|
462
|
+
converted_blocks.append(cast("types.TextContentBlock", item))
|
|
461
463
|
else:
|
|
462
464
|
# Unknown type, preserve as non-standard
|
|
463
465
|
converted_blocks.append({"type": "non_standard", "value": item})
|