langchain-core 0.3.79__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-core might be problematic. Click here for more details.

Files changed (165) hide show
  1. langchain_core/__init__.py +1 -1
  2. langchain_core/_api/__init__.py +3 -4
  3. langchain_core/_api/beta_decorator.py +23 -26
  4. langchain_core/_api/deprecation.py +52 -65
  5. langchain_core/_api/path.py +3 -6
  6. langchain_core/_import_utils.py +3 -4
  7. langchain_core/agents.py +19 -19
  8. langchain_core/caches.py +53 -63
  9. langchain_core/callbacks/__init__.py +1 -8
  10. langchain_core/callbacks/base.py +323 -334
  11. langchain_core/callbacks/file.py +44 -44
  12. langchain_core/callbacks/manager.py +441 -507
  13. langchain_core/callbacks/stdout.py +29 -30
  14. langchain_core/callbacks/streaming_stdout.py +32 -32
  15. langchain_core/callbacks/usage.py +60 -57
  16. langchain_core/chat_history.py +48 -63
  17. langchain_core/document_loaders/base.py +23 -23
  18. langchain_core/document_loaders/langsmith.py +37 -37
  19. langchain_core/documents/__init__.py +0 -1
  20. langchain_core/documents/base.py +62 -65
  21. langchain_core/documents/compressor.py +4 -4
  22. langchain_core/documents/transformers.py +28 -29
  23. langchain_core/embeddings/fake.py +50 -54
  24. langchain_core/example_selectors/length_based.py +1 -1
  25. langchain_core/example_selectors/semantic_similarity.py +21 -25
  26. langchain_core/exceptions.py +10 -11
  27. langchain_core/globals.py +3 -151
  28. langchain_core/indexing/api.py +61 -66
  29. langchain_core/indexing/base.py +58 -58
  30. langchain_core/indexing/in_memory.py +3 -3
  31. langchain_core/language_models/__init__.py +14 -27
  32. langchain_core/language_models/_utils.py +270 -84
  33. langchain_core/language_models/base.py +55 -162
  34. langchain_core/language_models/chat_models.py +442 -402
  35. langchain_core/language_models/fake.py +11 -11
  36. langchain_core/language_models/fake_chat_models.py +61 -39
  37. langchain_core/language_models/llms.py +123 -231
  38. langchain_core/load/dump.py +4 -5
  39. langchain_core/load/load.py +18 -28
  40. langchain_core/load/mapping.py +2 -4
  41. langchain_core/load/serializable.py +39 -40
  42. langchain_core/messages/__init__.py +61 -22
  43. langchain_core/messages/ai.py +368 -163
  44. langchain_core/messages/base.py +214 -43
  45. langchain_core/messages/block_translators/__init__.py +111 -0
  46. langchain_core/messages/block_translators/anthropic.py +470 -0
  47. langchain_core/messages/block_translators/bedrock.py +94 -0
  48. langchain_core/messages/block_translators/bedrock_converse.py +297 -0
  49. langchain_core/messages/block_translators/google_genai.py +530 -0
  50. langchain_core/messages/block_translators/google_vertexai.py +21 -0
  51. langchain_core/messages/block_translators/groq.py +143 -0
  52. langchain_core/messages/block_translators/langchain_v0.py +301 -0
  53. langchain_core/messages/block_translators/openai.py +1010 -0
  54. langchain_core/messages/chat.py +2 -6
  55. langchain_core/messages/content.py +1423 -0
  56. langchain_core/messages/function.py +6 -10
  57. langchain_core/messages/human.py +41 -38
  58. langchain_core/messages/modifier.py +2 -2
  59. langchain_core/messages/system.py +38 -28
  60. langchain_core/messages/tool.py +96 -103
  61. langchain_core/messages/utils.py +478 -504
  62. langchain_core/output_parsers/__init__.py +1 -14
  63. langchain_core/output_parsers/base.py +58 -61
  64. langchain_core/output_parsers/json.py +7 -8
  65. langchain_core/output_parsers/list.py +5 -7
  66. langchain_core/output_parsers/openai_functions.py +49 -47
  67. langchain_core/output_parsers/openai_tools.py +14 -19
  68. langchain_core/output_parsers/pydantic.py +12 -13
  69. langchain_core/output_parsers/string.py +2 -2
  70. langchain_core/output_parsers/transform.py +15 -17
  71. langchain_core/output_parsers/xml.py +8 -10
  72. langchain_core/outputs/__init__.py +1 -1
  73. langchain_core/outputs/chat_generation.py +18 -18
  74. langchain_core/outputs/chat_result.py +1 -3
  75. langchain_core/outputs/generation.py +8 -8
  76. langchain_core/outputs/llm_result.py +10 -10
  77. langchain_core/prompt_values.py +12 -12
  78. langchain_core/prompts/__init__.py +3 -27
  79. langchain_core/prompts/base.py +45 -55
  80. langchain_core/prompts/chat.py +254 -313
  81. langchain_core/prompts/dict.py +5 -5
  82. langchain_core/prompts/few_shot.py +81 -88
  83. langchain_core/prompts/few_shot_with_templates.py +11 -13
  84. langchain_core/prompts/image.py +12 -14
  85. langchain_core/prompts/loading.py +6 -8
  86. langchain_core/prompts/message.py +3 -3
  87. langchain_core/prompts/prompt.py +24 -39
  88. langchain_core/prompts/string.py +4 -4
  89. langchain_core/prompts/structured.py +42 -50
  90. langchain_core/rate_limiters.py +51 -60
  91. langchain_core/retrievers.py +49 -190
  92. langchain_core/runnables/base.py +1484 -1709
  93. langchain_core/runnables/branch.py +45 -61
  94. langchain_core/runnables/config.py +80 -88
  95. langchain_core/runnables/configurable.py +117 -134
  96. langchain_core/runnables/fallbacks.py +83 -79
  97. langchain_core/runnables/graph.py +85 -95
  98. langchain_core/runnables/graph_ascii.py +27 -28
  99. langchain_core/runnables/graph_mermaid.py +38 -50
  100. langchain_core/runnables/graph_png.py +15 -16
  101. langchain_core/runnables/history.py +135 -148
  102. langchain_core/runnables/passthrough.py +124 -150
  103. langchain_core/runnables/retry.py +46 -51
  104. langchain_core/runnables/router.py +25 -30
  105. langchain_core/runnables/schema.py +79 -74
  106. langchain_core/runnables/utils.py +62 -68
  107. langchain_core/stores.py +81 -115
  108. langchain_core/structured_query.py +8 -8
  109. langchain_core/sys_info.py +27 -29
  110. langchain_core/tools/__init__.py +1 -14
  111. langchain_core/tools/base.py +179 -187
  112. langchain_core/tools/convert.py +131 -139
  113. langchain_core/tools/render.py +10 -10
  114. langchain_core/tools/retriever.py +11 -11
  115. langchain_core/tools/simple.py +19 -24
  116. langchain_core/tools/structured.py +30 -39
  117. langchain_core/tracers/__init__.py +1 -9
  118. langchain_core/tracers/base.py +97 -99
  119. langchain_core/tracers/context.py +29 -52
  120. langchain_core/tracers/core.py +50 -60
  121. langchain_core/tracers/evaluation.py +11 -11
  122. langchain_core/tracers/event_stream.py +115 -70
  123. langchain_core/tracers/langchain.py +21 -21
  124. langchain_core/tracers/log_stream.py +43 -43
  125. langchain_core/tracers/memory_stream.py +3 -3
  126. langchain_core/tracers/root_listeners.py +16 -16
  127. langchain_core/tracers/run_collector.py +2 -4
  128. langchain_core/tracers/schemas.py +0 -129
  129. langchain_core/tracers/stdout.py +3 -3
  130. langchain_core/utils/__init__.py +1 -4
  131. langchain_core/utils/_merge.py +46 -8
  132. langchain_core/utils/aiter.py +57 -61
  133. langchain_core/utils/env.py +9 -9
  134. langchain_core/utils/function_calling.py +89 -191
  135. langchain_core/utils/html.py +7 -8
  136. langchain_core/utils/input.py +6 -6
  137. langchain_core/utils/interactive_env.py +1 -1
  138. langchain_core/utils/iter.py +37 -42
  139. langchain_core/utils/json.py +4 -3
  140. langchain_core/utils/json_schema.py +8 -8
  141. langchain_core/utils/mustache.py +9 -11
  142. langchain_core/utils/pydantic.py +33 -35
  143. langchain_core/utils/strings.py +5 -5
  144. langchain_core/utils/usage.py +1 -1
  145. langchain_core/utils/utils.py +80 -54
  146. langchain_core/vectorstores/base.py +129 -164
  147. langchain_core/vectorstores/in_memory.py +99 -174
  148. langchain_core/vectorstores/utils.py +5 -5
  149. langchain_core/version.py +1 -1
  150. {langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/METADATA +28 -27
  151. langchain_core-1.0.0.dist-info/RECORD +172 -0
  152. {langchain_core-0.3.79.dist-info → langchain_core-1.0.0.dist-info}/WHEEL +1 -1
  153. langchain_core/beta/__init__.py +0 -1
  154. langchain_core/beta/runnables/__init__.py +0 -1
  155. langchain_core/beta/runnables/context.py +0 -447
  156. langchain_core/memory.py +0 -120
  157. langchain_core/messages/content_blocks.py +0 -176
  158. langchain_core/prompts/pipeline.py +0 -138
  159. langchain_core/pydantic_v1/__init__.py +0 -30
  160. langchain_core/pydantic_v1/dataclasses.py +0 -23
  161. langchain_core/pydantic_v1/main.py +0 -23
  162. langchain_core/tracers/langchain_v1.py +0 -31
  163. langchain_core/utils/loading.py +0 -35
  164. langchain_core-0.3.79.dist-info/RECORD +0 -174
  165. langchain_core-0.3.79.dist-info/entry_points.txt +0 -4
@@ -6,16 +6,20 @@ import hashlib
6
6
  import json
7
7
  import uuid
8
8
  import warnings
9
- from collections.abc import AsyncIterable, AsyncIterator, Iterable, Iterator, Sequence
9
+ from collections.abc import (
10
+ AsyncIterable,
11
+ AsyncIterator,
12
+ Callable,
13
+ Iterable,
14
+ Iterator,
15
+ Sequence,
16
+ )
10
17
  from itertools import islice
11
18
  from typing import (
12
19
  Any,
13
- Callable,
14
20
  Literal,
15
- Optional,
16
21
  TypedDict,
17
22
  TypeVar,
18
- Union,
19
23
  cast,
20
24
  )
21
25
 
@@ -107,8 +111,8 @@ async def _abatch(size: int, iterable: AsyncIterable[T]) -> AsyncIterator[list[T
107
111
 
108
112
 
109
113
  def _get_source_id_assigner(
110
- source_id_key: Union[str, Callable[[Document], str], None],
111
- ) -> Callable[[Document], Union[str, None]]:
114
+ source_id_key: str | Callable[[Document], str] | None,
115
+ ) -> Callable[[Document], str | None]:
112
116
  """Get the source id from the document."""
113
117
  if source_id_key is None:
114
118
  return lambda _doc: None
@@ -162,9 +166,8 @@ def _calculate_hash(
162
166
  def _get_document_with_hash(
163
167
  document: Document,
164
168
  *,
165
- key_encoder: Union[
166
- Callable[[Document], str], Literal["sha1", "sha256", "sha512", "blake2b"]
167
- ],
169
+ key_encoder: Callable[[Document], str]
170
+ | Literal["sha1", "sha256", "sha512", "blake2b"],
168
171
  ) -> Document:
169
172
  """Calculate a hash of the document, and assign it to the uid.
170
173
 
@@ -233,7 +236,7 @@ class _HashedDocument:
233
236
 
234
237
 
235
238
  def _delete(
236
- vector_store: Union[VectorStore, DocumentIndex],
239
+ vector_store: VectorStore | DocumentIndex,
237
240
  ids: list[str],
238
241
  ) -> None:
239
242
  if isinstance(vector_store, VectorStore):
@@ -271,19 +274,18 @@ class IndexingResult(TypedDict):
271
274
 
272
275
 
273
276
  def index(
274
- docs_source: Union[BaseLoader, Iterable[Document]],
277
+ docs_source: BaseLoader | Iterable[Document],
275
278
  record_manager: RecordManager,
276
- vector_store: Union[VectorStore, DocumentIndex],
279
+ vector_store: VectorStore | DocumentIndex,
277
280
  *,
278
281
  batch_size: int = 100,
279
- cleanup: Optional[Literal["incremental", "full", "scoped_full"]] = None,
280
- source_id_key: Union[str, Callable[[Document], str], None] = None,
282
+ cleanup: Literal["incremental", "full", "scoped_full"] | None = None,
283
+ source_id_key: str | Callable[[Document], str] | None = None,
281
284
  cleanup_batch_size: int = 1_000,
282
285
  force_update: bool = False,
283
- key_encoder: Union[
284
- Literal["sha1", "sha256", "sha512", "blake2b"], Callable[[Document], str]
285
- ] = "sha1",
286
- upsert_kwargs: Optional[dict[str, Any]] = None,
286
+ key_encoder: Literal["sha1", "sha256", "sha512", "blake2b"]
287
+ | Callable[[Document], str] = "sha1",
288
+ upsert_kwargs: dict[str, Any] | None = None,
287
289
  ) -> IndexingResult:
288
290
  """Index data from the loader into the vector store.
289
291
 
@@ -296,10 +298,10 @@ def index(
296
298
  For the time being, documents are indexed using their hashes, and users
297
299
  are not able to specify the uid of the document.
298
300
 
299
- .. versionchanged:: 0.3.25
300
- Added ``scoped_full`` cleanup mode.
301
+ !!! warning "Behavior changed in 0.3.25"
302
+ Added `scoped_full` cleanup mode.
301
303
 
302
- .. important::
304
+ !!! warning
303
305
 
304
306
  * In full mode, the loader should be returning
305
307
  the entire dataset, and not just a subset of the dataset.
@@ -313,7 +315,7 @@ def index(
313
315
  chunks, and we index them using a batch size of 5, we'll have 3 batches
314
316
  all with the same source id. In general, to avoid doing too much
315
317
  redundant work select as big a batch size as possible.
316
- * The ``scoped_full`` mode is suitable if determining an appropriate batch size
318
+ * The `scoped_full` mode is suitable if determining an appropriate batch size
317
319
  is challenging or if your data loader cannot return the entire dataset at
318
320
  once. This mode keeps track of source IDs in memory, which should be fine
319
321
  for most use cases. If your dataset is large (10M+ docs), you will likely
@@ -324,8 +326,8 @@ def index(
324
326
  record_manager: Timestamped set to keep track of which documents were
325
327
  updated.
326
328
  vector_store: VectorStore or DocumentIndex to index the documents into.
327
- batch_size: Batch size to use when indexing. Default is 100.
328
- cleanup: How to handle clean up of documents. Default is None.
329
+ batch_size: Batch size to use when indexing.
330
+ cleanup: How to handle clean up of documents.
329
331
 
330
332
  - incremental: Cleans up all documents that haven't been updated AND
331
333
  that are associated with source ids that were seen during indexing.
@@ -340,17 +342,14 @@ def index(
340
342
  source ids that were seen during indexing.
341
343
  - None: Do not delete any documents.
342
344
  source_id_key: Optional key that helps identify the original source
343
- of the document. Default is None.
345
+ of the document.
344
346
  cleanup_batch_size: Batch size to use when cleaning up documents.
345
- Default is 1_000.
346
347
  force_update: Force update documents even if they are present in the
347
348
  record manager. Useful if you are re-indexing with updated embeddings.
348
- Default is False.
349
349
  key_encoder: Hashing algorithm to use for hashing the document content and
350
- metadata. Default is "sha1".
351
- Other options include "blake2b", "sha256", and "sha512".
350
+ metadata. Options include "blake2b", "sha256", and "sha512".
352
351
 
353
- .. versionadded:: 0.3.66
352
+ !!! version-added "Added in version 0.3.66"
354
353
 
355
354
  key_encoder: Hashing algorithm to use for hashing the document.
356
355
  If not provided, a default encoder using SHA-1 will be used.
@@ -367,7 +366,7 @@ def index(
367
366
  method of the VectorStore or the upsert method of the DocumentIndex.
368
367
  For example, you can use this to specify a custom vector_field:
369
368
  upsert_kwargs={"vector_field": "embedding"}
370
- .. versionadded:: 0.3.10
369
+ !!! version-added "Added in version 0.3.10"
371
370
 
372
371
  Returns:
373
372
  Indexing result which contains information about how many documents
@@ -379,8 +378,8 @@ def index(
379
378
  ValueError: If vectorstore does not have
380
379
  "delete" and "add_documents" required methods.
381
380
  ValueError: If source_id_key is not None, but is not a string or callable.
382
- TypeError: If ``vectorstore`` is not a VectorStore or a DocumentIndex.
383
- AssertionError: If ``source_id`` is None when cleanup mode is incremental.
381
+ TypeError: If `vectorstore` is not a VectorStore or a DocumentIndex.
382
+ AssertionError: If `source_id` is None when cleanup mode is incremental.
384
383
  (should be unreachable code).
385
384
  """
386
385
  # Behavior is deprecated, but we keep it for backwards compatibility.
@@ -462,13 +461,13 @@ def index(
462
461
  # Count documents removed by within-batch deduplication
463
462
  num_skipped += original_batch_size - len(hashed_docs)
464
463
 
465
- source_ids: Sequence[Optional[str]] = [
464
+ source_ids: Sequence[str | None] = [
466
465
  source_id_assigner(hashed_doc) for hashed_doc in hashed_docs
467
466
  ]
468
467
 
469
468
  if cleanup in {"incremental", "scoped_full"}:
470
469
  # source ids are required.
471
- for source_id, hashed_doc in zip(source_ids, hashed_docs):
470
+ for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
472
471
  if source_id is None:
473
472
  msg = (
474
473
  f"Source ids are required when cleanup mode is "
@@ -492,7 +491,7 @@ def index(
492
491
  docs_to_index = []
493
492
  uids_to_refresh = []
494
493
  seen_docs: set[str] = set()
495
- for hashed_doc, doc_exists in zip(hashed_docs, exists_batch):
494
+ for hashed_doc, doc_exists in zip(hashed_docs, exists_batch, strict=False):
496
495
  hashed_id = cast("str", hashed_doc.id)
497
496
  if doc_exists:
498
497
  if force_update:
@@ -563,7 +562,7 @@ def index(
563
562
  if cleanup == "full" or (
564
563
  cleanup == "scoped_full" and scoped_full_cleanup_source_ids
565
564
  ):
566
- delete_group_ids: Optional[Sequence[str]] = None
565
+ delete_group_ids: Sequence[str] | None = None
567
566
  if cleanup == "scoped_full":
568
567
  delete_group_ids = list(scoped_full_cleanup_source_ids)
569
568
  while uids_to_delete := record_manager.list_keys(
@@ -591,7 +590,7 @@ async def _to_async_iterator(iterator: Iterable[T]) -> AsyncIterator[T]:
591
590
 
592
591
 
593
592
  async def _adelete(
594
- vector_store: Union[VectorStore, DocumentIndex],
593
+ vector_store: VectorStore | DocumentIndex,
595
594
  ids: list[str],
596
595
  ) -> None:
597
596
  if isinstance(vector_store, VectorStore):
@@ -613,19 +612,18 @@ async def _adelete(
613
612
 
614
613
 
615
614
  async def aindex(
616
- docs_source: Union[BaseLoader, Iterable[Document], AsyncIterator[Document]],
615
+ docs_source: BaseLoader | Iterable[Document] | AsyncIterator[Document],
617
616
  record_manager: RecordManager,
618
- vector_store: Union[VectorStore, DocumentIndex],
617
+ vector_store: VectorStore | DocumentIndex,
619
618
  *,
620
619
  batch_size: int = 100,
621
- cleanup: Optional[Literal["incremental", "full", "scoped_full"]] = None,
622
- source_id_key: Union[str, Callable[[Document], str], None] = None,
620
+ cleanup: Literal["incremental", "full", "scoped_full"] | None = None,
621
+ source_id_key: str | Callable[[Document], str] | None = None,
623
622
  cleanup_batch_size: int = 1_000,
624
623
  force_update: bool = False,
625
- key_encoder: Union[
626
- Literal["sha1", "sha256", "sha512", "blake2b"], Callable[[Document], str]
627
- ] = "sha1",
628
- upsert_kwargs: Optional[dict[str, Any]] = None,
624
+ key_encoder: Literal["sha1", "sha256", "sha512", "blake2b"]
625
+ | Callable[[Document], str] = "sha1",
626
+ upsert_kwargs: dict[str, Any] | None = None,
629
627
  ) -> IndexingResult:
630
628
  """Async index data from the loader into the vector store.
631
629
 
@@ -638,10 +636,10 @@ async def aindex(
638
636
  For the time being, documents are indexed using their hashes, and users
639
637
  are not able to specify the uid of the document.
640
638
 
641
- .. versionchanged:: 0.3.25
642
- Added ``scoped_full`` cleanup mode.
639
+ !!! warning "Behavior changed in 0.3.25"
640
+ Added `scoped_full` cleanup mode.
643
641
 
644
- .. important::
642
+ !!! warning
645
643
 
646
644
  * In full mode, the loader should be returning
647
645
  the entire dataset, and not just a subset of the dataset.
@@ -655,7 +653,7 @@ async def aindex(
655
653
  chunks, and we index them using a batch size of 5, we'll have 3 batches
656
654
  all with the same source id. In general, to avoid doing too much
657
655
  redundant work select as big a batch size as possible.
658
- * The ``scoped_full`` mode is suitable if determining an appropriate batch size
656
+ * The `scoped_full` mode is suitable if determining an appropriate batch size
659
657
  is challenging or if your data loader cannot return the entire dataset at
660
658
  once. This mode keeps track of source IDs in memory, which should be fine
661
659
  for most use cases. If your dataset is large (10M+ docs), you will likely
@@ -666,8 +664,8 @@ async def aindex(
666
664
  record_manager: Timestamped set to keep track of which documents were
667
665
  updated.
668
666
  vector_store: VectorStore or DocumentIndex to index the documents into.
669
- batch_size: Batch size to use when indexing. Default is 100.
670
- cleanup: How to handle clean up of documents. Default is None.
667
+ batch_size: Batch size to use when indexing.
668
+ cleanup: How to handle clean up of documents.
671
669
 
672
670
  - incremental: Cleans up all documents that haven't been updated AND
673
671
  that are associated with source ids that were seen during indexing.
@@ -682,17 +680,14 @@ async def aindex(
682
680
  source ids that were seen during indexing.
683
681
  - None: Do not delete any documents.
684
682
  source_id_key: Optional key that helps identify the original source
685
- of the document. Default is None.
683
+ of the document.
686
684
  cleanup_batch_size: Batch size to use when cleaning up documents.
687
- Default is 1_000.
688
685
  force_update: Force update documents even if they are present in the
689
686
  record manager. Useful if you are re-indexing with updated embeddings.
690
- Default is False.
691
687
  key_encoder: Hashing algorithm to use for hashing the document content and
692
- metadata. Default is "sha1".
693
- Other options include "blake2b", "sha256", and "sha512".
688
+ metadata. Options include "blake2b", "sha256", and "sha512".
694
689
 
695
- .. versionadded:: 0.3.66
690
+ !!! version-added "Added in version 0.3.66"
696
691
 
697
692
  key_encoder: Hashing algorithm to use for hashing the document.
698
693
  If not provided, a default encoder using SHA-1 will be used.
@@ -709,7 +704,7 @@ async def aindex(
709
704
  method of the VectorStore or the upsert method of the DocumentIndex.
710
705
  For example, you can use this to specify a custom vector_field:
711
706
  upsert_kwargs={"vector_field": "embedding"}
712
- .. versionadded:: 0.3.10
707
+ !!! version-added "Added in version 0.3.10"
713
708
 
714
709
  Returns:
715
710
  Indexing result which contains information about how many documents
@@ -721,9 +716,9 @@ async def aindex(
721
716
  ValueError: If vectorstore does not have
722
717
  "adelete" and "aadd_documents" required methods.
723
718
  ValueError: If source_id_key is not None, but is not a string or callable.
724
- TypeError: If ``vector_store`` is not a VectorStore or DocumentIndex.
725
- AssertionError: If ``source_id_key`` is None when cleanup mode is
726
- incremental or ``scoped_full`` (should be unreachable).
719
+ TypeError: If `vector_store` is not a VectorStore or DocumentIndex.
720
+ AssertionError: If `source_id_key` is None when cleanup mode is
721
+ incremental or `scoped_full` (should be unreachable).
727
722
  """
728
723
  # Behavior is deprecated, but we keep it for backwards compatibility.
729
724
  # # Warn only once per process.
@@ -815,13 +810,13 @@ async def aindex(
815
810
  # Count documents removed by within-batch deduplication
816
811
  num_skipped += original_batch_size - len(hashed_docs)
817
812
 
818
- source_ids: Sequence[Optional[str]] = [
813
+ source_ids: Sequence[str | None] = [
819
814
  source_id_assigner(doc) for doc in hashed_docs
820
815
  ]
821
816
 
822
817
  if cleanup in {"incremental", "scoped_full"}:
823
818
  # If the cleanup mode is incremental, source ids are required.
824
- for source_id, hashed_doc in zip(source_ids, hashed_docs):
819
+ for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
825
820
  if source_id is None:
826
821
  msg = (
827
822
  f"Source ids are required when cleanup mode is "
@@ -845,7 +840,7 @@ async def aindex(
845
840
  docs_to_index: list[Document] = []
846
841
  uids_to_refresh = []
847
842
  seen_docs: set[str] = set()
848
- for hashed_doc, doc_exists in zip(hashed_docs, exists_batch):
843
+ for hashed_doc, doc_exists in zip(hashed_docs, exists_batch, strict=False):
849
844
  hashed_id = cast("str", hashed_doc.id)
850
845
  if doc_exists:
851
846
  if force_update:
@@ -917,7 +912,7 @@ async def aindex(
917
912
  if cleanup == "full" or (
918
913
  cleanup == "scoped_full" and scoped_full_cleanup_source_ids
919
914
  ):
920
- delete_group_ids: Optional[Sequence[str]] = None
915
+ delete_group_ids: Sequence[str] | None = None
921
916
  if cleanup == "scoped_full":
922
917
  delete_group_ids = list(scoped_full_cleanup_source_ids)
923
918
  while uids_to_delete := await record_manager.alist_keys(
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  import abc
6
6
  import time
7
7
  from abc import ABC, abstractmethod
8
- from typing import TYPE_CHECKING, Any, Optional, TypedDict
8
+ from typing import TYPE_CHECKING, Any, TypedDict
9
9
 
10
10
  from typing_extensions import override
11
11
 
@@ -61,7 +61,7 @@ class RecordManager(ABC):
61
61
  """Initialize the record manager.
62
62
 
63
63
  Args:
64
- namespace (str): The namespace for the record manager.
64
+ namespace: The namespace for the record manager.
65
65
  """
66
66
  self.namespace = namespace
67
67
 
@@ -100,8 +100,8 @@ class RecordManager(ABC):
100
100
  self,
101
101
  keys: Sequence[str],
102
102
  *,
103
- group_ids: Optional[Sequence[Optional[str]]] = None,
104
- time_at_least: Optional[float] = None,
103
+ group_ids: Sequence[str | None] | None = None,
104
+ time_at_least: float | None = None,
105
105
  ) -> None:
106
106
  """Upsert records into the database.
107
107
 
@@ -128,8 +128,8 @@ class RecordManager(ABC):
128
128
  self,
129
129
  keys: Sequence[str],
130
130
  *,
131
- group_ids: Optional[Sequence[Optional[str]]] = None,
132
- time_at_least: Optional[float] = None,
131
+ group_ids: Sequence[str | None] | None = None,
132
+ time_at_least: float | None = None,
133
133
  ) -> None:
134
134
  """Asynchronously upsert records into the database.
135
135
 
@@ -177,10 +177,10 @@ class RecordManager(ABC):
177
177
  def list_keys(
178
178
  self,
179
179
  *,
180
- before: Optional[float] = None,
181
- after: Optional[float] = None,
182
- group_ids: Optional[Sequence[str]] = None,
183
- limit: Optional[int] = None,
180
+ before: float | None = None,
181
+ after: float | None = None,
182
+ group_ids: Sequence[str] | None = None,
183
+ limit: int | None = None,
184
184
  ) -> list[str]:
185
185
  """List records in the database based on the provided filters.
186
186
 
@@ -198,10 +198,10 @@ class RecordManager(ABC):
198
198
  async def alist_keys(
199
199
  self,
200
200
  *,
201
- before: Optional[float] = None,
202
- after: Optional[float] = None,
203
- group_ids: Optional[Sequence[str]] = None,
204
- limit: Optional[int] = None,
201
+ before: float | None = None,
202
+ after: float | None = None,
203
+ group_ids: Sequence[str] | None = None,
204
+ limit: int | None = None,
205
205
  ) -> list[str]:
206
206
  """Asynchronously list records in the database based on the provided filters.
207
207
 
@@ -233,7 +233,7 @@ class RecordManager(ABC):
233
233
 
234
234
 
235
235
  class _Record(TypedDict):
236
- group_id: Optional[str]
236
+ group_id: str | None
237
237
  updated_at: float
238
238
 
239
239
 
@@ -244,7 +244,7 @@ class InMemoryRecordManager(RecordManager):
244
244
  """Initialize the in-memory record manager.
245
245
 
246
246
  Args:
247
- namespace (str): The namespace for the record manager.
247
+ namespace: The namespace for the record manager.
248
248
  """
249
249
  super().__init__(namespace)
250
250
  # Each key points to a dictionary
@@ -270,18 +270,18 @@ class InMemoryRecordManager(RecordManager):
270
270
  self,
271
271
  keys: Sequence[str],
272
272
  *,
273
- group_ids: Optional[Sequence[Optional[str]]] = None,
274
- time_at_least: Optional[float] = None,
273
+ group_ids: Sequence[str | None] | None = None,
274
+ time_at_least: float | None = None,
275
275
  ) -> None:
276
276
  """Upsert records into the database.
277
277
 
278
278
  Args:
279
279
  keys: A list of record keys to upsert.
280
280
  group_ids: A list of group IDs corresponding to the keys.
281
- Defaults to None.
281
+
282
282
  time_at_least: Optional timestamp. Implementation can use this
283
283
  to optionally verify that the timestamp IS at least this time
284
- in the system that stores. Defaults to None.
284
+ in the system that stores.
285
285
  E.g., use to validate that the time in the postgres database
286
286
  is equal to or larger than the given timestamp, if not
287
287
  raise an error.
@@ -307,18 +307,18 @@ class InMemoryRecordManager(RecordManager):
307
307
  self,
308
308
  keys: Sequence[str],
309
309
  *,
310
- group_ids: Optional[Sequence[Optional[str]]] = None,
311
- time_at_least: Optional[float] = None,
310
+ group_ids: Sequence[str | None] | None = None,
311
+ time_at_least: float | None = None,
312
312
  ) -> None:
313
313
  """Async upsert records into the database.
314
314
 
315
315
  Args:
316
316
  keys: A list of record keys to upsert.
317
317
  group_ids: A list of group IDs corresponding to the keys.
318
- Defaults to None.
318
+
319
319
  time_at_least: Optional timestamp. Implementation can use this
320
320
  to optionally verify that the timestamp IS at least this time
321
- in the system that stores. Defaults to None.
321
+ in the system that stores.
322
322
  E.g., use to validate that the time in the postgres database
323
323
  is equal to or larger than the given timestamp, if not
324
324
  raise an error.
@@ -352,22 +352,22 @@ class InMemoryRecordManager(RecordManager):
352
352
  def list_keys(
353
353
  self,
354
354
  *,
355
- before: Optional[float] = None,
356
- after: Optional[float] = None,
357
- group_ids: Optional[Sequence[str]] = None,
358
- limit: Optional[int] = None,
355
+ before: float | None = None,
356
+ after: float | None = None,
357
+ group_ids: Sequence[str] | None = None,
358
+ limit: int | None = None,
359
359
  ) -> list[str]:
360
360
  """List records in the database based on the provided filters.
361
361
 
362
362
  Args:
363
363
  before: Filter to list records updated before this time.
364
- Defaults to None.
364
+
365
365
  after: Filter to list records updated after this time.
366
- Defaults to None.
366
+
367
367
  group_ids: Filter to list records with specific group IDs.
368
- Defaults to None.
368
+
369
369
  limit: optional limit on the number of records to return.
370
- Defaults to None.
370
+
371
371
 
372
372
  Returns:
373
373
  A list of keys for the matching records.
@@ -388,22 +388,22 @@ class InMemoryRecordManager(RecordManager):
388
388
  async def alist_keys(
389
389
  self,
390
390
  *,
391
- before: Optional[float] = None,
392
- after: Optional[float] = None,
393
- group_ids: Optional[Sequence[str]] = None,
394
- limit: Optional[int] = None,
391
+ before: float | None = None,
392
+ after: float | None = None,
393
+ group_ids: Sequence[str] | None = None,
394
+ limit: int | None = None,
395
395
  ) -> list[str]:
396
396
  """Async list records in the database based on the provided filters.
397
397
 
398
398
  Args:
399
399
  before: Filter to list records updated before this time.
400
- Defaults to None.
400
+
401
401
  after: Filter to list records updated after this time.
402
- Defaults to None.
402
+
403
403
  group_ids: Filter to list records with specific group IDs.
404
- Defaults to None.
404
+
405
405
  limit: optional limit on the number of records to return.
406
- Defaults to None.
406
+
407
407
 
408
408
  Returns:
409
409
  A list of keys for the matching records.
@@ -485,7 +485,7 @@ class DeleteResponse(TypedDict, total=False):
485
485
  failed: Sequence[str]
486
486
  """The IDs that failed to be deleted.
487
487
 
488
- .. warning::
488
+ !!! warning
489
489
  Deleting an ID that does not exist is **NOT** considered a failure.
490
490
  """
491
491
 
@@ -509,7 +509,7 @@ class DocumentIndex(BaseRetriever):
509
509
  2. Fetching document by ID.
510
510
  3. Searching for document using a query.
511
511
 
512
- .. versionadded:: 0.2.29
512
+ !!! version-added "Added in version 0.2.29"
513
513
  """
514
514
 
515
515
  @abc.abstractmethod
@@ -522,14 +522,14 @@ class DocumentIndex(BaseRetriever):
522
522
 
523
523
  When an ID is specified and the content already exists in the vectorstore,
524
524
  the upsert method should update the content with the new data. If the content
525
- does not exist, the upsert method should add the item to the vectorstore.
525
+ does not exist, the upsert method should add the item to the `VectorStore`.
526
526
 
527
527
  Args:
528
- items: Sequence of documents to add to the vectorstore.
528
+ items: Sequence of documents to add to the `VectorStore`.
529
529
  **kwargs: Additional keyword arguments.
530
530
 
531
531
  Returns:
532
- UpsertResponse: A response object that contains the list of IDs that were
532
+ A response object that contains the list of IDs that were
533
533
  successfully added or updated in the vectorstore and the list of IDs that
534
534
  failed to be added or updated.
535
535
  """
@@ -545,14 +545,14 @@ class DocumentIndex(BaseRetriever):
545
545
 
546
546
  When an ID is specified and the item already exists in the vectorstore,
547
547
  the upsert method should update the item with the new data. If the item
548
- does not exist, the upsert method should add the item to the vectorstore.
548
+ does not exist, the upsert method should add the item to the `VectorStore`.
549
549
 
550
550
  Args:
551
- items: Sequence of documents to add to the vectorstore.
551
+ items: Sequence of documents to add to the `VectorStore`.
552
552
  **kwargs: Additional keyword arguments.
553
553
 
554
554
  Returns:
555
- UpsertResponse: A response object that contains the list of IDs that were
555
+ A response object that contains the list of IDs that were
556
556
  successfully added or updated in the vectorstore and the list of IDs that
557
557
  failed to be added or updated.
558
558
  """
@@ -564,24 +564,24 @@ class DocumentIndex(BaseRetriever):
564
564
  )
565
565
 
566
566
  @abc.abstractmethod
567
- def delete(self, ids: Optional[list[str]] = None, **kwargs: Any) -> DeleteResponse:
567
+ def delete(self, ids: list[str] | None = None, **kwargs: Any) -> DeleteResponse:
568
568
  """Delete by IDs or other criteria.
569
569
 
570
570
  Calling delete without any input parameters should raise a ValueError!
571
571
 
572
572
  Args:
573
573
  ids: List of ids to delete.
574
- kwargs: Additional keyword arguments. This is up to the implementation.
574
+ **kwargs: Additional keyword arguments. This is up to the implementation.
575
575
  For example, can include an option to delete the entire index,
576
576
  or else issue a non-blocking delete etc.
577
577
 
578
578
  Returns:
579
- DeleteResponse: A response object that contains the list of IDs that were
579
+ A response object that contains the list of IDs that were
580
580
  successfully deleted and the list of IDs that failed to be deleted.
581
581
  """
582
582
 
583
583
  async def adelete(
584
- self, ids: Optional[list[str]] = None, **kwargs: Any
584
+ self, ids: list[str] | None = None, **kwargs: Any
585
585
  ) -> DeleteResponse:
586
586
  """Delete by IDs or other criteria. Async variant.
587
587
 
@@ -589,11 +589,11 @@ class DocumentIndex(BaseRetriever):
589
589
 
590
590
  Args:
591
591
  ids: List of ids to delete.
592
- kwargs: Additional keyword arguments. This is up to the implementation.
592
+ **kwargs: Additional keyword arguments. This is up to the implementation.
593
593
  For example, can include an option to delete the entire index.
594
594
 
595
595
  Returns:
596
- DeleteResponse: A response object that contains the list of IDs that were
596
+ A response object that contains the list of IDs that were
597
597
  successfully deleted and the list of IDs that failed to be deleted.
598
598
  """
599
599
  return await run_in_executor(
@@ -624,10 +624,10 @@ class DocumentIndex(BaseRetriever):
624
624
 
625
625
  Args:
626
626
  ids: List of IDs to get.
627
- kwargs: Additional keyword arguments. These are up to the implementation.
627
+ **kwargs: Additional keyword arguments. These are up to the implementation.
628
628
 
629
629
  Returns:
630
- list[Document]: List of documents that were found.
630
+ List of documents that were found.
631
631
  """
632
632
 
633
633
  async def aget(
@@ -650,10 +650,10 @@ class DocumentIndex(BaseRetriever):
650
650
 
651
651
  Args:
652
652
  ids: List of IDs to get.
653
- kwargs: Additional keyword arguments. These are up to the implementation.
653
+ **kwargs: Additional keyword arguments. These are up to the implementation.
654
654
 
655
655
  Returns:
656
- list[Document]: List of documents that were found.
656
+ List of documents that were found.
657
657
  """
658
658
  return await run_in_executor(
659
659
  None,
@@ -3,7 +3,7 @@
3
3
  import operator
4
4
  import uuid
5
5
  from collections.abc import Sequence
6
- from typing import Any, Optional, cast
6
+ from typing import Any, cast
7
7
 
8
8
  from pydantic import Field
9
9
  from typing_extensions import override
@@ -24,7 +24,7 @@ class InMemoryDocumentIndex(DocumentIndex):
24
24
  It provides a simple search API that returns documents by the number of
25
25
  counts the given query appears in the document.
26
26
 
27
- .. versionadded:: 0.2.29
27
+ !!! version-added "Added in version 0.2.29"
28
28
  """
29
29
 
30
30
  store: dict[str, Document] = Field(default_factory=dict)
@@ -60,7 +60,7 @@ class InMemoryDocumentIndex(DocumentIndex):
60
60
  return UpsertResponse(succeeded=ok_ids, failed=[])
61
61
 
62
62
  @override
63
- def delete(self, ids: Optional[list[str]] = None, **kwargs: Any) -> DeleteResponse:
63
+ def delete(self, ids: list[str] | None = None, **kwargs: Any) -> DeleteResponse:
64
64
  """Delete by IDs.
65
65
 
66
66
  Args: