langchain-core 1.0.0rc3__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (76) hide show
  1. langchain_core/agents.py +2 -4
  2. langchain_core/caches.py +16 -7
  3. langchain_core/callbacks/base.py +0 -4
  4. langchain_core/callbacks/manager.py +0 -11
  5. langchain_core/chat_history.py +5 -5
  6. langchain_core/document_loaders/base.py +6 -4
  7. langchain_core/document_loaders/blob_loaders.py +1 -1
  8. langchain_core/document_loaders/langsmith.py +9 -13
  9. langchain_core/documents/__init__.py +24 -3
  10. langchain_core/documents/base.py +72 -61
  11. langchain_core/documents/compressor.py +6 -6
  12. langchain_core/documents/transformers.py +6 -6
  13. langchain_core/embeddings/fake.py +2 -2
  14. langchain_core/example_selectors/semantic_similarity.py +7 -7
  15. langchain_core/exceptions.py +2 -2
  16. langchain_core/indexing/__init__.py +1 -1
  17. langchain_core/indexing/api.py +62 -62
  18. langchain_core/indexing/base.py +20 -22
  19. langchain_core/indexing/in_memory.py +2 -4
  20. langchain_core/language_models/__init__.py +6 -5
  21. langchain_core/language_models/base.py +7 -8
  22. langchain_core/language_models/chat_models.py +84 -78
  23. langchain_core/language_models/fake_chat_models.py +1 -1
  24. langchain_core/language_models/llms.py +20 -18
  25. langchain_core/load/dump.py +6 -8
  26. langchain_core/load/serializable.py +4 -1
  27. langchain_core/messages/__init__.py +9 -0
  28. langchain_core/messages/ai.py +11 -7
  29. langchain_core/messages/base.py +4 -0
  30. langchain_core/messages/block_translators/google_genai.py +5 -3
  31. langchain_core/messages/content.py +4 -4
  32. langchain_core/messages/utils.py +17 -17
  33. langchain_core/output_parsers/__init__.py +17 -1
  34. langchain_core/output_parsers/base.py +3 -0
  35. langchain_core/output_parsers/format_instructions.py +9 -4
  36. langchain_core/output_parsers/json.py +5 -2
  37. langchain_core/output_parsers/list.py +16 -16
  38. langchain_core/output_parsers/openai_tools.py +2 -2
  39. langchain_core/output_parsers/pydantic.py +1 -1
  40. langchain_core/output_parsers/string.py +3 -3
  41. langchain_core/output_parsers/xml.py +28 -25
  42. langchain_core/outputs/generation.py +2 -3
  43. langchain_core/prompt_values.py +0 -6
  44. langchain_core/prompts/base.py +5 -3
  45. langchain_core/prompts/chat.py +60 -52
  46. langchain_core/prompts/string.py +5 -2
  47. langchain_core/prompts/structured.py +12 -8
  48. langchain_core/rate_limiters.py +1 -3
  49. langchain_core/retrievers.py +41 -37
  50. langchain_core/runnables/base.py +25 -29
  51. langchain_core/runnables/branch.py +9 -9
  52. langchain_core/runnables/config.py +2 -4
  53. langchain_core/runnables/configurable.py +3 -3
  54. langchain_core/runnables/fallbacks.py +1 -1
  55. langchain_core/runnables/graph.py +7 -3
  56. langchain_core/runnables/retry.py +1 -1
  57. langchain_core/runnables/schema.py +2 -5
  58. langchain_core/runnables/utils.py +3 -3
  59. langchain_core/stores.py +4 -6
  60. langchain_core/tools/base.py +68 -14
  61. langchain_core/tools/convert.py +8 -7
  62. langchain_core/tools/retriever.py +6 -5
  63. langchain_core/tools/structured.py +7 -5
  64. langchain_core/tracers/event_stream.py +4 -1
  65. langchain_core/tracers/log_stream.py +6 -3
  66. langchain_core/utils/function_calling.py +8 -0
  67. langchain_core/utils/json_schema.py +1 -1
  68. langchain_core/utils/strings.py +1 -4
  69. langchain_core/utils/utils.py +12 -5
  70. langchain_core/vectorstores/base.py +130 -130
  71. langchain_core/vectorstores/in_memory.py +4 -4
  72. langchain_core/vectorstores/utils.py +1 -1
  73. langchain_core/version.py +1 -1
  74. {langchain_core-1.0.0rc3.dist-info → langchain_core-1.0.2.dist-info}/METADATA +8 -7
  75. {langchain_core-1.0.0rc3.dist-info → langchain_core-1.0.2.dist-info}/RECORD +76 -76
  76. {langchain_core-1.0.0rc3.dist-info → langchain_core-1.0.2.dist-info}/WHEEL +0 -0
@@ -41,7 +41,7 @@ class _VectorStoreExampleSelector(BaseExampleSelector, BaseModel, ABC):
41
41
  """Optional keys to filter input to. If provided, the search is based on
42
42
  the input variables instead of all variables."""
43
43
  vectorstore_kwargs: dict[str, Any] | None = None
44
- """Extra arguments passed to similarity_search function of the vectorstore."""
44
+ """Extra arguments passed to similarity_search function of the `VectorStore`."""
45
45
 
46
46
  model_config = ConfigDict(
47
47
  arbitrary_types_allowed=True,
@@ -159,7 +159,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
159
159
  instead of all variables.
160
160
  example_keys: If provided, keys to filter examples to.
161
161
  vectorstore_kwargs: Extra arguments passed to similarity_search function
162
- of the vectorstore.
162
+ of the `VectorStore`.
163
163
  vectorstore_cls_kwargs: optional kwargs containing url for vector store
164
164
 
165
165
  Returns:
@@ -203,7 +203,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
203
203
  instead of all variables.
204
204
  example_keys: If provided, keys to filter examples to.
205
205
  vectorstore_kwargs: Extra arguments passed to similarity_search function
206
- of the vectorstore.
206
+ of the `VectorStore`.
207
207
  vectorstore_cls_kwargs: optional kwargs containing url for vector store
208
208
 
209
209
  Returns:
@@ -286,12 +286,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
286
286
  embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
287
287
  vectorstore_cls: A vector store DB interface class, e.g. FAISS.
288
288
  k: Number of examples to select.
289
- fetch_k: Number of Documents to fetch to pass to MMR algorithm.
289
+ fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
290
290
  input_keys: If provided, the search is based on the input variables
291
291
  instead of all variables.
292
292
  example_keys: If provided, keys to filter examples to.
293
293
  vectorstore_kwargs: Extra arguments passed to similarity_search function
294
- of the vectorstore.
294
+ of the `VectorStore`.
295
295
  vectorstore_cls_kwargs: optional kwargs containing url for vector store
296
296
 
297
297
  Returns:
@@ -333,12 +333,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
333
333
  embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
334
334
  vectorstore_cls: A vector store DB interface class, e.g. FAISS.
335
335
  k: Number of examples to select.
336
- fetch_k: Number of Documents to fetch to pass to MMR algorithm.
336
+ fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
337
337
  input_keys: If provided, the search is based on the input variables
338
338
  instead of all variables.
339
339
  example_keys: If provided, keys to filter examples to.
340
340
  vectorstore_kwargs: Extra arguments passed to similarity_search function
341
- of the vectorstore.
341
+ of the `VectorStore`.
342
342
  vectorstore_cls_kwargs: optional kwargs containing url for vector store
343
343
 
344
344
  Returns:
@@ -86,6 +86,6 @@ def create_message(*, message: str, error_code: ErrorCode) -> str:
86
86
  """
87
87
  return (
88
88
  f"{message}\n"
89
- "For troubleshooting, visit: https://python.langchain.com/docs/"
90
- f"troubleshooting/errors/{error_code.value} "
89
+ "For troubleshooting, visit: https://docs.langchain.com/oss/python/langchain"
90
+ f"/errors/{error_code.value} "
91
91
  )
@@ -1,7 +1,7 @@
1
1
  """Code to help indexing data into a vectorstore.
2
2
 
3
3
  This package contains helper logic to help deal with indexing data into
4
- a vectorstore while avoiding duplicated content and over-writing content
4
+ a `VectorStore` while avoiding duplicated content and over-writing content
5
5
  if it's unchanged.
6
6
  """
7
7
 
@@ -304,42 +304,42 @@ def index(
304
304
  !!! warning
305
305
 
306
306
  * In full mode, the loader should be returning
307
- the entire dataset, and not just a subset of the dataset.
308
- Otherwise, the auto_cleanup will remove documents that it is not
309
- supposed to.
307
+ the entire dataset, and not just a subset of the dataset.
308
+ Otherwise, the auto_cleanup will remove documents that it is not
309
+ supposed to.
310
310
  * In incremental mode, if documents associated with a particular
311
- source id appear across different batches, the indexing API
312
- will do some redundant work. This will still result in the
313
- correct end state of the index, but will unfortunately not be
314
- 100% efficient. For example, if a given document is split into 15
315
- chunks, and we index them using a batch size of 5, we'll have 3 batches
316
- all with the same source id. In general, to avoid doing too much
317
- redundant work select as big a batch size as possible.
311
+ source id appear across different batches, the indexing API
312
+ will do some redundant work. This will still result in the
313
+ correct end state of the index, but will unfortunately not be
314
+ 100% efficient. For example, if a given document is split into 15
315
+ chunks, and we index them using a batch size of 5, we'll have 3 batches
316
+ all with the same source id. In general, to avoid doing too much
317
+ redundant work select as big a batch size as possible.
318
318
  * The `scoped_full` mode is suitable if determining an appropriate batch size
319
- is challenging or if your data loader cannot return the entire dataset at
320
- once. This mode keeps track of source IDs in memory, which should be fine
321
- for most use cases. If your dataset is large (10M+ docs), you will likely
322
- need to parallelize the indexing process regardless.
319
+ is challenging or if your data loader cannot return the entire dataset at
320
+ once. This mode keeps track of source IDs in memory, which should be fine
321
+ for most use cases. If your dataset is large (10M+ docs), you will likely
322
+ need to parallelize the indexing process regardless.
323
323
 
324
324
  Args:
325
325
  docs_source: Data loader or iterable of documents to index.
326
326
  record_manager: Timestamped set to keep track of which documents were
327
327
  updated.
328
- vector_store: VectorStore or DocumentIndex to index the documents into.
328
+ vector_store: `VectorStore` or DocumentIndex to index the documents into.
329
329
  batch_size: Batch size to use when indexing.
330
330
  cleanup: How to handle clean up of documents.
331
331
 
332
332
  - incremental: Cleans up all documents that haven't been updated AND
333
- that are associated with source ids that were seen during indexing.
334
- Clean up is done continuously during indexing helping to minimize the
335
- probability of users seeing duplicated content.
333
+ that are associated with source IDs that were seen during indexing.
334
+ Clean up is done continuously during indexing helping to minimize the
335
+ probability of users seeing duplicated content.
336
336
  - full: Delete all documents that have not been returned by the loader
337
- during this run of indexing.
338
- Clean up runs after all documents have been indexed.
339
- This means that users may see duplicated content during indexing.
337
+ during this run of indexing.
338
+ Clean up runs after all documents have been indexed.
339
+ This means that users may see duplicated content during indexing.
340
340
  - scoped_full: Similar to Full, but only deletes all documents
341
- that haven't been updated AND that are associated with
342
- source ids that were seen during indexing.
341
+ that haven't been updated AND that are associated with
342
+ source IDs that were seen during indexing.
343
343
  - None: Do not delete any documents.
344
344
  source_id_key: Optional key that helps identify the original source
345
345
  of the document.
@@ -363,7 +363,7 @@ def index(
363
363
  When changing the key encoder, you must change the
364
364
  index as well to avoid duplicated documents in the cache.
365
365
  upsert_kwargs: Additional keyword arguments to pass to the add_documents
366
- method of the VectorStore or the upsert method of the DocumentIndex.
366
+ method of the `VectorStore` or the upsert method of the DocumentIndex.
367
367
  For example, you can use this to specify a custom vector_field:
368
368
  upsert_kwargs={"vector_field": "embedding"}
369
369
  !!! version-added "Added in version 0.3.10"
@@ -375,10 +375,10 @@ def index(
375
375
  Raises:
376
376
  ValueError: If cleanup mode is not one of 'incremental', 'full' or None
377
377
  ValueError: If cleanup mode is incremental and source_id_key is None.
378
- ValueError: If vectorstore does not have
378
+ ValueError: If `VectorStore` does not have
379
379
  "delete" and "add_documents" required methods.
380
380
  ValueError: If source_id_key is not None, but is not a string or callable.
381
- TypeError: If `vectorstore` is not a VectorStore or a DocumentIndex.
381
+ TypeError: If `vectorstore` is not a `VectorStore` or a DocumentIndex.
382
382
  AssertionError: If `source_id` is None when cleanup mode is incremental.
383
383
  (should be unreachable code).
384
384
  """
@@ -415,7 +415,7 @@ def index(
415
415
  raise ValueError(msg)
416
416
 
417
417
  if type(destination).delete == VectorStore.delete:
418
- # Checking if the vectorstore has overridden the default delete method
418
+ # Checking if the VectorStore has overridden the default delete method
419
419
  # implementation which just raises a NotImplementedError
420
420
  msg = "Vectorstore has not implemented the delete method"
421
421
  raise ValueError(msg)
@@ -466,11 +466,11 @@ def index(
466
466
  ]
467
467
 
468
468
  if cleanup in {"incremental", "scoped_full"}:
469
- # source ids are required.
469
+ # Source IDs are required.
470
470
  for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
471
471
  if source_id is None:
472
472
  msg = (
473
- f"Source ids are required when cleanup mode is "
473
+ f"Source IDs are required when cleanup mode is "
474
474
  f"incremental or scoped_full. "
475
475
  f"Document that starts with "
476
476
  f"content: {hashed_doc.page_content[:100]} "
@@ -479,7 +479,7 @@ def index(
479
479
  raise ValueError(msg)
480
480
  if cleanup == "scoped_full":
481
481
  scoped_full_cleanup_source_ids.add(source_id)
482
- # source ids cannot be None after for loop above.
482
+ # Source IDs cannot be None after for loop above.
483
483
  source_ids = cast("Sequence[str]", source_ids)
484
484
 
485
485
  exists_batch = record_manager.exists(
@@ -538,7 +538,7 @@ def index(
538
538
  # If source IDs are provided, we can do the deletion incrementally!
539
539
  if cleanup == "incremental":
540
540
  # Get the uids of the documents that were not returned by the loader.
541
- # mypy isn't good enough to determine that source ids cannot be None
541
+ # mypy isn't good enough to determine that source IDs cannot be None
542
542
  # here due to a check that's happening above, so we check again.
543
543
  for source_id in source_ids:
544
544
  if source_id is None:
@@ -642,42 +642,42 @@ async def aindex(
642
642
  !!! warning
643
643
 
644
644
  * In full mode, the loader should be returning
645
- the entire dataset, and not just a subset of the dataset.
646
- Otherwise, the auto_cleanup will remove documents that it is not
647
- supposed to.
645
+ the entire dataset, and not just a subset of the dataset.
646
+ Otherwise, the auto_cleanup will remove documents that it is not
647
+ supposed to.
648
648
  * In incremental mode, if documents associated with a particular
649
- source id appear across different batches, the indexing API
650
- will do some redundant work. This will still result in the
651
- correct end state of the index, but will unfortunately not be
652
- 100% efficient. For example, if a given document is split into 15
653
- chunks, and we index them using a batch size of 5, we'll have 3 batches
654
- all with the same source id. In general, to avoid doing too much
655
- redundant work select as big a batch size as possible.
649
+ source id appear across different batches, the indexing API
650
+ will do some redundant work. This will still result in the
651
+ correct end state of the index, but will unfortunately not be
652
+ 100% efficient. For example, if a given document is split into 15
653
+ chunks, and we index them using a batch size of 5, we'll have 3 batches
654
+ all with the same source id. In general, to avoid doing too much
655
+ redundant work select as big a batch size as possible.
656
656
  * The `scoped_full` mode is suitable if determining an appropriate batch size
657
- is challenging or if your data loader cannot return the entire dataset at
658
- once. This mode keeps track of source IDs in memory, which should be fine
659
- for most use cases. If your dataset is large (10M+ docs), you will likely
660
- need to parallelize the indexing process regardless.
657
+ is challenging or if your data loader cannot return the entire dataset at
658
+ once. This mode keeps track of source IDs in memory, which should be fine
659
+ for most use cases. If your dataset is large (10M+ docs), you will likely
660
+ need to parallelize the indexing process regardless.
661
661
 
662
662
  Args:
663
663
  docs_source: Data loader or iterable of documents to index.
664
664
  record_manager: Timestamped set to keep track of which documents were
665
665
  updated.
666
- vector_store: VectorStore or DocumentIndex to index the documents into.
666
+ vector_store: `VectorStore` or DocumentIndex to index the documents into.
667
667
  batch_size: Batch size to use when indexing.
668
668
  cleanup: How to handle clean up of documents.
669
669
 
670
670
  - incremental: Cleans up all documents that haven't been updated AND
671
- that are associated with source ids that were seen during indexing.
672
- Clean up is done continuously during indexing helping to minimize the
673
- probability of users seeing duplicated content.
671
+ that are associated with source IDs that were seen during indexing.
672
+ Clean up is done continuously during indexing helping to minimize the
673
+ probability of users seeing duplicated content.
674
674
  - full: Delete all documents that have not been returned by the loader
675
- during this run of indexing.
676
- Clean up runs after all documents have been indexed.
677
- This means that users may see duplicated content during indexing.
675
+ during this run of indexing.
676
+ Clean up runs after all documents have been indexed.
677
+ This means that users may see duplicated content during indexing.
678
678
  - scoped_full: Similar to Full, but only deletes all documents
679
- that haven't been updated AND that are associated with
680
- source ids that were seen during indexing.
679
+ that haven't been updated AND that are associated with
680
+ source IDs that were seen during indexing.
681
681
  - None: Do not delete any documents.
682
682
  source_id_key: Optional key that helps identify the original source
683
683
  of the document.
@@ -701,7 +701,7 @@ async def aindex(
701
701
  When changing the key encoder, you must change the
702
702
  index as well to avoid duplicated documents in the cache.
703
703
  upsert_kwargs: Additional keyword arguments to pass to the add_documents
704
- method of the VectorStore or the upsert method of the DocumentIndex.
704
+ method of the `VectorStore` or the upsert method of the DocumentIndex.
705
705
  For example, you can use this to specify a custom vector_field:
706
706
  upsert_kwargs={"vector_field": "embedding"}
707
707
  !!! version-added "Added in version 0.3.10"
@@ -713,10 +713,10 @@ async def aindex(
713
713
  Raises:
714
714
  ValueError: If cleanup mode is not one of 'incremental', 'full' or None
715
715
  ValueError: If cleanup mode is incremental and source_id_key is None.
716
- ValueError: If vectorstore does not have
716
+ ValueError: If `VectorStore` does not have
717
717
  "adelete" and "aadd_documents" required methods.
718
718
  ValueError: If source_id_key is not None, but is not a string or callable.
719
- TypeError: If `vector_store` is not a VectorStore or DocumentIndex.
719
+ TypeError: If `vector_store` is not a `VectorStore` or DocumentIndex.
720
720
  AssertionError: If `source_id_key` is None when cleanup mode is
721
721
  incremental or `scoped_full` (should be unreachable).
722
722
  """
@@ -757,7 +757,7 @@ async def aindex(
757
757
  type(destination).adelete == VectorStore.adelete
758
758
  and type(destination).delete == VectorStore.delete
759
759
  ):
760
- # Checking if the vectorstore has overridden the default adelete or delete
760
+ # Checking if the VectorStore has overridden the default adelete or delete
761
761
  # methods implementation which just raises a NotImplementedError
762
762
  msg = "Vectorstore has not implemented the adelete or delete method"
763
763
  raise ValueError(msg)
@@ -815,11 +815,11 @@ async def aindex(
815
815
  ]
816
816
 
817
817
  if cleanup in {"incremental", "scoped_full"}:
818
- # If the cleanup mode is incremental, source ids are required.
818
+ # If the cleanup mode is incremental, source IDs are required.
819
819
  for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
820
820
  if source_id is None:
821
821
  msg = (
822
- f"Source ids are required when cleanup mode is "
822
+ f"Source IDs are required when cleanup mode is "
823
823
  f"incremental or scoped_full. "
824
824
  f"Document that starts with "
825
825
  f"content: {hashed_doc.page_content[:100]} "
@@ -828,7 +828,7 @@ async def aindex(
828
828
  raise ValueError(msg)
829
829
  if cleanup == "scoped_full":
830
830
  scoped_full_cleanup_source_ids.add(source_id)
831
- # source ids cannot be None after for loop above.
831
+ # Source IDs cannot be None after for loop above.
832
832
  source_ids = cast("Sequence[str]", source_ids)
833
833
 
834
834
  exists_batch = await record_manager.aexists(
@@ -888,7 +888,7 @@ async def aindex(
888
888
  if cleanup == "incremental":
889
889
  # Get the uids of the documents that were not returned by the loader.
890
890
 
891
- # mypy isn't good enough to determine that source ids cannot be None
891
+ # mypy isn't good enough to determine that source IDs cannot be None
892
892
  # here due to a check that's happening above, so we check again.
893
893
  for source_id in source_ids:
894
894
  if source_id is None:
@@ -25,7 +25,7 @@ class RecordManager(ABC):
25
25
  The record manager abstraction is used by the langchain indexing API.
26
26
 
27
27
  The record manager keeps track of which documents have been
28
- written into a vectorstore and when they were written.
28
+ written into a `VectorStore` and when they were written.
29
29
 
30
30
  The indexing API computes hashes for each document and stores the hash
31
31
  together with the write time and the source id in the record manager.
@@ -37,7 +37,7 @@ class RecordManager(ABC):
37
37
  already been indexed, and to only index new documents.
38
38
 
39
39
  The main benefit of this abstraction is that it works across many vectorstores.
40
- To be supported, a vectorstore needs to only support the ability to add and
40
+ To be supported, a `VectorStore` needs to only support the ability to add and
41
41
  delete documents by ID. Using the record manager, the indexing API will
42
42
  be able to delete outdated documents and avoid redundant indexing of documents
43
43
  that have already been indexed.
@@ -45,13 +45,13 @@ class RecordManager(ABC):
45
45
  The main constraints of this abstraction are:
46
46
 
47
47
  1. It relies on the time-stamps to determine which documents have been
48
- indexed and which have not. This means that the time-stamps must be
49
- monotonically increasing. The timestamp should be the timestamp
50
- as measured by the server to minimize issues.
48
+ indexed and which have not. This means that the time-stamps must be
49
+ monotonically increasing. The timestamp should be the timestamp
50
+ as measured by the server to minimize issues.
51
51
  2. The record manager is currently implemented separately from the
52
- vectorstore, which means that the overall system becomes distributed
53
- and may create issues with consistency. For example, writing to
54
- record manager succeeds, but corresponding writing to vectorstore fails.
52
+ vectorstore, which means that the overall system becomes distributed
53
+ and may create issues with consistency. For example, writing to
54
+ record manager succeeds, but corresponding writing to `VectorStore` fails.
55
55
  """
56
56
 
57
57
  def __init__(
@@ -460,7 +460,7 @@ class UpsertResponse(TypedDict):
460
460
  class DeleteResponse(TypedDict, total=False):
461
461
  """A generic response for delete operation.
462
462
 
463
- The fields in this response are optional and whether the vectorstore
463
+ The fields in this response are optional and whether the `VectorStore`
464
464
  returns them or not is up to the implementation.
465
465
  """
466
466
 
@@ -508,8 +508,6 @@ class DocumentIndex(BaseRetriever):
508
508
  1. Storing document in the index.
509
509
  2. Fetching document by ID.
510
510
  3. Searching for document using a query.
511
-
512
- !!! version-added "Added in version 0.2.29"
513
511
  """
514
512
 
515
513
  @abc.abstractmethod
@@ -520,40 +518,40 @@ class DocumentIndex(BaseRetriever):
520
518
  if it is provided. If the ID is not provided, the upsert method is free
521
519
  to generate an ID for the content.
522
520
 
523
- When an ID is specified and the content already exists in the vectorstore,
521
+ When an ID is specified and the content already exists in the `VectorStore`,
524
522
  the upsert method should update the content with the new data. If the content
525
- does not exist, the upsert method should add the item to the vectorstore.
523
+ does not exist, the upsert method should add the item to the `VectorStore`.
526
524
 
527
525
  Args:
528
- items: Sequence of documents to add to the vectorstore.
526
+ items: Sequence of documents to add to the `VectorStore`.
529
527
  **kwargs: Additional keyword arguments.
530
528
 
531
529
  Returns:
532
530
  A response object that contains the list of IDs that were
533
- successfully added or updated in the vectorstore and the list of IDs that
531
+ successfully added or updated in the `VectorStore` and the list of IDs that
534
532
  failed to be added or updated.
535
533
  """
536
534
 
537
535
  async def aupsert(
538
536
  self, items: Sequence[Document], /, **kwargs: Any
539
537
  ) -> UpsertResponse:
540
- """Add or update documents in the vectorstore. Async version of upsert.
538
+ """Add or update documents in the `VectorStore`. Async version of `upsert`.
541
539
 
542
540
  The upsert functionality should utilize the ID field of the item
543
541
  if it is provided. If the ID is not provided, the upsert method is free
544
542
  to generate an ID for the item.
545
543
 
546
- When an ID is specified and the item already exists in the vectorstore,
544
+ When an ID is specified and the item already exists in the `VectorStore`,
547
545
  the upsert method should update the item with the new data. If the item
548
- does not exist, the upsert method should add the item to the vectorstore.
546
+ does not exist, the upsert method should add the item to the `VectorStore`.
549
547
 
550
548
  Args:
551
- items: Sequence of documents to add to the vectorstore.
549
+ items: Sequence of documents to add to the `VectorStore`.
552
550
  **kwargs: Additional keyword arguments.
553
551
 
554
552
  Returns:
555
553
  A response object that contains the list of IDs that were
556
- successfully added or updated in the vectorstore and the list of IDs that
554
+ successfully added or updated in the `VectorStore` and the list of IDs that
557
555
  failed to be added or updated.
558
556
  """
559
557
  return await run_in_executor(
@@ -570,7 +568,7 @@ class DocumentIndex(BaseRetriever):
570
568
  Calling delete without any input parameters should raise a ValueError!
571
569
 
572
570
  Args:
573
- ids: List of ids to delete.
571
+ ids: List of IDs to delete.
574
572
  **kwargs: Additional keyword arguments. This is up to the implementation.
575
573
  For example, can include an option to delete the entire index,
576
574
  or else issue a non-blocking delete etc.
@@ -588,7 +586,7 @@ class DocumentIndex(BaseRetriever):
588
586
  Calling adelete without any input parameters should raise a ValueError!
589
587
 
590
588
  Args:
591
- ids: List of ids to delete.
589
+ ids: List of IDs to delete.
592
590
  **kwargs: Additional keyword arguments. This is up to the implementation.
593
591
  For example, can include an option to delete the entire index.
594
592
 
@@ -23,8 +23,6 @@ class InMemoryDocumentIndex(DocumentIndex):
23
23
 
24
24
  It provides a simple search API that returns documents by the number of
25
25
  counts the given query appears in the document.
26
-
27
- !!! version-added "Added in version 0.2.29"
28
26
  """
29
27
 
30
28
  store: dict[str, Document] = Field(default_factory=dict)
@@ -64,10 +62,10 @@ class InMemoryDocumentIndex(DocumentIndex):
64
62
  """Delete by IDs.
65
63
 
66
64
  Args:
67
- ids: List of ids to delete.
65
+ ids: List of IDs to delete.
68
66
 
69
67
  Raises:
70
- ValueError: If ids is None.
68
+ ValueError: If IDs is None.
71
69
 
72
70
  Returns:
73
71
  A response object that contains the list of IDs that were successfully
@@ -6,12 +6,13 @@ LangChain has two main classes to work with language models: chat models and
6
6
  **Chat models**
7
7
 
8
8
  Language models that use a sequence of messages as inputs and return chat messages
9
- as outputs (as opposed to using plain text). Chat models support the assignment of
10
- distinct roles to conversation messages, helping to distinguish messages from the AI,
11
- users, and instructions such as system messages.
9
+ as outputs (as opposed to using plain text).
12
10
 
13
- The key abstraction for chat models is `BaseChatModel`. Implementations
14
- should inherit from this class.
11
+ Chat models support the assignment of distinct roles to conversation messages, helping
12
+ to distinguish messages from the AI, users, and instructions such as system messages.
13
+
14
+ The key abstraction for chat models is `BaseChatModel`. Implementations should inherit
15
+ from this class.
15
16
 
16
17
  See existing [chat model integrations](https://docs.langchain.com/oss/python/integrations/chat).
17
18
 
@@ -200,14 +200,14 @@ class BaseLanguageModel(
200
200
  pure text generation models and `BaseMessage` objects for chat models).
201
201
  stop: Stop words to use when generating. Model output is cut off at the
202
202
  first occurrence of any of these substrings.
203
- callbacks: Callbacks to pass through. Used for executing additional
203
+ callbacks: `Callbacks` to pass through. Used for executing additional
204
204
  functionality, such as logging or streaming, throughout generation.
205
205
  **kwargs: Arbitrary additional keyword arguments. These are usually passed
206
206
  to the model provider API call.
207
207
 
208
208
  Returns:
209
209
  An `LLMResult`, which contains a list of candidate `Generation` objects for
210
- each input prompt and additional model provider-specific output.
210
+ each input prompt and additional model provider-specific output.
211
211
 
212
212
  """
213
213
 
@@ -237,14 +237,14 @@ class BaseLanguageModel(
237
237
  pure text generation models and `BaseMessage` objects for chat models).
238
238
  stop: Stop words to use when generating. Model output is cut off at the
239
239
  first occurrence of any of these substrings.
240
- callbacks: Callbacks to pass through. Used for executing additional
240
+ callbacks: `Callbacks` to pass through. Used for executing additional
241
241
  functionality, such as logging or streaming, throughout generation.
242
242
  **kwargs: Arbitrary additional keyword arguments. These are usually passed
243
243
  to the model provider API call.
244
244
 
245
245
  Returns:
246
246
  An `LLMResult`, which contains a list of candidate `Generation` objects for
247
- each input prompt and additional model provider-specific output.
247
+ each input prompt and additional model provider-specific output.
248
248
 
249
249
  """
250
250
 
@@ -262,15 +262,14 @@ class BaseLanguageModel(
262
262
  return self.lc_attributes
263
263
 
264
264
  def get_token_ids(self, text: str) -> list[int]:
265
- """Return the ordered ids of the tokens in a text.
265
+ """Return the ordered IDs of the tokens in a text.
266
266
 
267
267
  Args:
268
268
  text: The string input to tokenize.
269
269
 
270
270
  Returns:
271
- A list of ids corresponding to the tokens in the text, in order they occur
272
- in the text.
273
-
271
+ A list of IDs corresponding to the tokens in the text, in order they occur
272
+ in the text.
274
273
  """
275
274
  if self.custom_get_token_ids is not None:
276
275
  return self.custom_get_token_ids(text)