langchain-core 1.0.0a8__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/__init__.py +1 -1
- langchain_core/_api/__init__.py +0 -1
- langchain_core/_api/beta_decorator.py +17 -20
- langchain_core/_api/deprecation.py +30 -35
- langchain_core/_import_utils.py +1 -1
- langchain_core/agents.py +7 -6
- langchain_core/caches.py +4 -10
- langchain_core/callbacks/__init__.py +1 -8
- langchain_core/callbacks/base.py +232 -243
- langchain_core/callbacks/file.py +33 -33
- langchain_core/callbacks/manager.py +353 -416
- langchain_core/callbacks/stdout.py +21 -22
- langchain_core/callbacks/streaming_stdout.py +32 -32
- langchain_core/callbacks/usage.py +54 -51
- langchain_core/chat_history.py +42 -57
- langchain_core/document_loaders/langsmith.py +21 -21
- langchain_core/documents/__init__.py +0 -1
- langchain_core/documents/base.py +37 -40
- langchain_core/documents/transformers.py +28 -29
- langchain_core/embeddings/fake.py +46 -52
- langchain_core/exceptions.py +5 -5
- langchain_core/indexing/api.py +11 -11
- langchain_core/indexing/base.py +24 -24
- langchain_core/language_models/__init__.py +0 -2
- langchain_core/language_models/_utils.py +51 -53
- langchain_core/language_models/base.py +23 -24
- langchain_core/language_models/chat_models.py +121 -144
- langchain_core/language_models/fake_chat_models.py +5 -5
- langchain_core/language_models/llms.py +10 -12
- langchain_core/load/dump.py +1 -1
- langchain_core/load/load.py +16 -16
- langchain_core/load/serializable.py +35 -34
- langchain_core/messages/__init__.py +1 -16
- langchain_core/messages/ai.py +105 -104
- langchain_core/messages/base.py +26 -26
- langchain_core/messages/block_translators/__init__.py +17 -17
- langchain_core/messages/block_translators/anthropic.py +2 -2
- langchain_core/messages/block_translators/bedrock_converse.py +2 -2
- langchain_core/messages/block_translators/google_genai.py +2 -2
- langchain_core/messages/block_translators/groq.py +117 -21
- langchain_core/messages/block_translators/langchain_v0.py +2 -2
- langchain_core/messages/block_translators/openai.py +4 -4
- langchain_core/messages/chat.py +1 -1
- langchain_core/messages/content.py +189 -193
- langchain_core/messages/function.py +5 -5
- langchain_core/messages/human.py +15 -17
- langchain_core/messages/modifier.py +1 -1
- langchain_core/messages/system.py +12 -14
- langchain_core/messages/tool.py +45 -49
- langchain_core/messages/utils.py +384 -396
- langchain_core/output_parsers/__init__.py +1 -14
- langchain_core/output_parsers/base.py +22 -23
- langchain_core/output_parsers/json.py +3 -3
- langchain_core/output_parsers/list.py +1 -1
- langchain_core/output_parsers/openai_functions.py +46 -44
- langchain_core/output_parsers/openai_tools.py +7 -7
- langchain_core/output_parsers/pydantic.py +10 -11
- langchain_core/output_parsers/string.py +1 -1
- langchain_core/output_parsers/transform.py +2 -2
- langchain_core/output_parsers/xml.py +1 -1
- langchain_core/outputs/__init__.py +1 -1
- langchain_core/outputs/chat_generation.py +14 -14
- langchain_core/outputs/generation.py +5 -5
- langchain_core/outputs/llm_result.py +5 -5
- langchain_core/prompt_values.py +5 -5
- langchain_core/prompts/__init__.py +3 -23
- langchain_core/prompts/base.py +32 -37
- langchain_core/prompts/chat.py +216 -222
- langchain_core/prompts/dict.py +2 -2
- langchain_core/prompts/few_shot.py +76 -83
- langchain_core/prompts/few_shot_with_templates.py +6 -8
- langchain_core/prompts/image.py +11 -13
- langchain_core/prompts/loading.py +1 -1
- langchain_core/prompts/message.py +2 -2
- langchain_core/prompts/prompt.py +14 -16
- langchain_core/prompts/string.py +19 -7
- langchain_core/prompts/structured.py +24 -25
- langchain_core/rate_limiters.py +36 -38
- langchain_core/retrievers.py +41 -182
- langchain_core/runnables/base.py +565 -590
- langchain_core/runnables/branch.py +7 -7
- langchain_core/runnables/config.py +37 -44
- langchain_core/runnables/configurable.py +8 -9
- langchain_core/runnables/fallbacks.py +8 -8
- langchain_core/runnables/graph.py +28 -27
- langchain_core/runnables/graph_ascii.py +19 -18
- langchain_core/runnables/graph_mermaid.py +20 -31
- langchain_core/runnables/graph_png.py +7 -7
- langchain_core/runnables/history.py +20 -20
- langchain_core/runnables/passthrough.py +8 -8
- langchain_core/runnables/retry.py +3 -3
- langchain_core/runnables/router.py +1 -1
- langchain_core/runnables/schema.py +33 -33
- langchain_core/runnables/utils.py +30 -34
- langchain_core/stores.py +72 -102
- langchain_core/sys_info.py +27 -29
- langchain_core/tools/__init__.py +1 -14
- langchain_core/tools/base.py +63 -63
- langchain_core/tools/convert.py +92 -92
- langchain_core/tools/render.py +9 -9
- langchain_core/tools/retriever.py +1 -1
- langchain_core/tools/simple.py +6 -7
- langchain_core/tools/structured.py +17 -18
- langchain_core/tracers/__init__.py +1 -9
- langchain_core/tracers/base.py +35 -35
- langchain_core/tracers/context.py +12 -17
- langchain_core/tracers/event_stream.py +3 -3
- langchain_core/tracers/langchain.py +8 -8
- langchain_core/tracers/log_stream.py +17 -18
- langchain_core/tracers/memory_stream.py +2 -2
- langchain_core/tracers/schemas.py +0 -129
- langchain_core/utils/aiter.py +31 -31
- langchain_core/utils/env.py +5 -5
- langchain_core/utils/function_calling.py +48 -120
- langchain_core/utils/html.py +4 -4
- langchain_core/utils/input.py +2 -2
- langchain_core/utils/interactive_env.py +1 -1
- langchain_core/utils/iter.py +19 -19
- langchain_core/utils/json.py +1 -1
- langchain_core/utils/json_schema.py +2 -2
- langchain_core/utils/mustache.py +5 -5
- langchain_core/utils/pydantic.py +17 -17
- langchain_core/utils/strings.py +4 -4
- langchain_core/utils/utils.py +25 -28
- langchain_core/vectorstores/base.py +43 -64
- langchain_core/vectorstores/in_memory.py +83 -85
- langchain_core/version.py +1 -1
- {langchain_core-1.0.0a8.dist-info → langchain_core-1.0.0rc1.dist-info}/METADATA +23 -11
- langchain_core-1.0.0rc1.dist-info/RECORD +172 -0
- langchain_core/memory.py +0 -120
- langchain_core/pydantic_v1/__init__.py +0 -30
- langchain_core/pydantic_v1/dataclasses.py +0 -23
- langchain_core/pydantic_v1/main.py +0 -23
- langchain_core-1.0.0a8.dist-info/RECORD +0 -176
- {langchain_core-1.0.0a8.dist-info → langchain_core-1.0.0rc1.dist-info}/WHEEL +0 -0
langchain_core/documents/base.py
CHANGED
|
@@ -57,52 +57,51 @@ class Blob(BaseMedia):
|
|
|
57
57
|
|
|
58
58
|
Example: Initialize a blob from in-memory data
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
```python
|
|
61
|
+
from langchain_core.documents import Blob
|
|
61
62
|
|
|
62
|
-
|
|
63
|
+
blob = Blob.from_data("Hello, world!")
|
|
63
64
|
|
|
64
|
-
|
|
65
|
+
# Read the blob as a string
|
|
66
|
+
print(blob.as_string())
|
|
65
67
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
+
# Read the blob as bytes
|
|
69
|
+
print(blob.as_bytes())
|
|
68
70
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
with blob.as_bytes_io() as f:
|
|
74
|
-
print(f.read())
|
|
71
|
+
# Read the blob as a byte stream
|
|
72
|
+
with blob.as_bytes_io() as f:
|
|
73
|
+
print(f.read())
|
|
74
|
+
```
|
|
75
75
|
|
|
76
76
|
Example: Load from memory and specify mime-type and metadata
|
|
77
77
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
from langchain_core.documents import Blob
|
|
78
|
+
```python
|
|
79
|
+
from langchain_core.documents import Blob
|
|
81
80
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
81
|
+
blob = Blob.from_data(
|
|
82
|
+
data="Hello, world!",
|
|
83
|
+
mime_type="text/plain",
|
|
84
|
+
metadata={"source": "https://example.com"},
|
|
85
|
+
)
|
|
86
|
+
```
|
|
87
87
|
|
|
88
88
|
Example: Load the blob from a file
|
|
89
89
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
from langchain_core.documents import Blob
|
|
93
|
-
|
|
94
|
-
blob = Blob.from_path("path/to/file.txt")
|
|
90
|
+
```python
|
|
91
|
+
from langchain_core.documents import Blob
|
|
95
92
|
|
|
96
|
-
|
|
97
|
-
print(blob.as_string())
|
|
93
|
+
blob = Blob.from_path("path/to/file.txt")
|
|
98
94
|
|
|
99
|
-
|
|
100
|
-
|
|
95
|
+
# Read the blob as a string
|
|
96
|
+
print(blob.as_string())
|
|
101
97
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
print(f.read())
|
|
98
|
+
# Read the blob as bytes
|
|
99
|
+
print(blob.as_bytes())
|
|
105
100
|
|
|
101
|
+
# Read the blob as a byte stream
|
|
102
|
+
with blob.as_bytes_io() as f:
|
|
103
|
+
print(f.read())
|
|
104
|
+
```
|
|
106
105
|
"""
|
|
107
106
|
|
|
108
107
|
data: bytes | str | None = None
|
|
@@ -215,7 +214,7 @@ class Blob(BaseMedia):
|
|
|
215
214
|
path: path like object to file to be read
|
|
216
215
|
encoding: Encoding to use if decoding the bytes into a string
|
|
217
216
|
mime_type: if provided, will be set as the mime-type of the data
|
|
218
|
-
guess_type: If True
|
|
217
|
+
guess_type: If `True`, the mimetype will be guessed from the file extension,
|
|
219
218
|
if a mime-type was not provided
|
|
220
219
|
metadata: Metadata to associate with the blob
|
|
221
220
|
|
|
@@ -278,15 +277,13 @@ class Document(BaseMedia):
|
|
|
278
277
|
"""Class for storing a piece of text and associated metadata.
|
|
279
278
|
|
|
280
279
|
Example:
|
|
280
|
+
```python
|
|
281
|
+
from langchain_core.documents import Document
|
|
281
282
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
document = Document(
|
|
287
|
-
page_content="Hello, world!", metadata={"source": "https://example.com"}
|
|
288
|
-
)
|
|
289
|
-
|
|
283
|
+
document = Document(
|
|
284
|
+
page_content="Hello, world!", metadata={"source": "https://example.com"}
|
|
285
|
+
)
|
|
286
|
+
```
|
|
290
287
|
"""
|
|
291
288
|
|
|
292
289
|
page_content: str
|
|
@@ -20,35 +20,34 @@ class BaseDocumentTransformer(ABC):
|
|
|
20
20
|
sequence of transformed Documents.
|
|
21
21
|
|
|
22
22
|
Example:
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
23
|
+
```python
|
|
24
|
+
class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel):
|
|
25
|
+
embeddings: Embeddings
|
|
26
|
+
similarity_fn: Callable = cosine_similarity
|
|
27
|
+
similarity_threshold: float = 0.95
|
|
28
|
+
|
|
29
|
+
class Config:
|
|
30
|
+
arbitrary_types_allowed = True
|
|
31
|
+
|
|
32
|
+
def transform_documents(
|
|
33
|
+
self, documents: Sequence[Document], **kwargs: Any
|
|
34
|
+
) -> Sequence[Document]:
|
|
35
|
+
stateful_documents = get_stateful_documents(documents)
|
|
36
|
+
embedded_documents = _get_embeddings_from_stateful_docs(
|
|
37
|
+
self.embeddings, stateful_documents
|
|
38
|
+
)
|
|
39
|
+
included_idxs = _filter_similar_embeddings(
|
|
40
|
+
embedded_documents,
|
|
41
|
+
self.similarity_fn,
|
|
42
|
+
self.similarity_threshold,
|
|
43
|
+
)
|
|
44
|
+
return [stateful_documents[i] for i in sorted(included_idxs)]
|
|
45
|
+
|
|
46
|
+
async def atransform_documents(
|
|
47
|
+
self, documents: Sequence[Document], **kwargs: Any
|
|
48
|
+
) -> Sequence[Document]:
|
|
49
|
+
raise NotImplementedError
|
|
50
|
+
```
|
|
52
51
|
"""
|
|
53
52
|
|
|
54
53
|
@abstractmethod
|
|
@@ -21,37 +21,34 @@ class FakeEmbeddings(Embeddings, BaseModel):
|
|
|
21
21
|
Do not use this outside of testing, as it is not a real embedding model.
|
|
22
22
|
|
|
23
23
|
Instantiate:
|
|
24
|
-
|
|
24
|
+
```python
|
|
25
|
+
from langchain_core.embeddings import FakeEmbeddings
|
|
25
26
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
embed = FakeEmbeddings(size=100)
|
|
27
|
+
embed = FakeEmbeddings(size=100)
|
|
28
|
+
```
|
|
29
29
|
|
|
30
30
|
Embed single text:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
[-0.700234640213188, -0.581266257710429, -1.1328482266445354]
|
|
31
|
+
```python
|
|
32
|
+
input_text = "The meaning of life is 42"
|
|
33
|
+
vector = embed.embed_query(input_text)
|
|
34
|
+
print(vector[:3])
|
|
35
|
+
```
|
|
36
|
+
```python
|
|
37
|
+
[-0.700234640213188, -0.581266257710429, -1.1328482266445354]
|
|
38
|
+
```
|
|
40
39
|
|
|
41
40
|
Embed multiple texts:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
[-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
|
|
54
|
-
|
|
41
|
+
```python
|
|
42
|
+
input_texts = ["Document 1...", "Document 2..."]
|
|
43
|
+
vectors = embed.embed_documents(input_texts)
|
|
44
|
+
print(len(vectors))
|
|
45
|
+
# The first 3 coordinates for the first vector
|
|
46
|
+
print(vectors[0][:3])
|
|
47
|
+
```
|
|
48
|
+
```python
|
|
49
|
+
2
|
|
50
|
+
[-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
|
|
51
|
+
```
|
|
55
52
|
"""
|
|
56
53
|
|
|
57
54
|
size: int
|
|
@@ -78,37 +75,34 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
|
|
|
78
75
|
Do not use this outside of testing, as it is not a real embedding model.
|
|
79
76
|
|
|
80
77
|
Instantiate:
|
|
81
|
-
|
|
78
|
+
```python
|
|
79
|
+
from langchain_core.embeddings import DeterministicFakeEmbedding
|
|
82
80
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
embed = DeterministicFakeEmbedding(size=100)
|
|
81
|
+
embed = DeterministicFakeEmbedding(size=100)
|
|
82
|
+
```
|
|
86
83
|
|
|
87
84
|
Embed single text:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
[-0.700234640213188, -0.581266257710429, -1.1328482266445354]
|
|
85
|
+
```python
|
|
86
|
+
input_text = "The meaning of life is 42"
|
|
87
|
+
vector = embed.embed_query(input_text)
|
|
88
|
+
print(vector[:3])
|
|
89
|
+
```
|
|
90
|
+
```python
|
|
91
|
+
[-0.700234640213188, -0.581266257710429, -1.1328482266445354]
|
|
92
|
+
```
|
|
97
93
|
|
|
98
94
|
Embed multiple texts:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
[-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
|
|
111
|
-
|
|
95
|
+
```python
|
|
96
|
+
input_texts = ["Document 1...", "Document 2..."]
|
|
97
|
+
vectors = embed.embed_documents(input_texts)
|
|
98
|
+
print(len(vectors))
|
|
99
|
+
# The first 3 coordinates for the first vector
|
|
100
|
+
print(vectors[0][:3])
|
|
101
|
+
```
|
|
102
|
+
```python
|
|
103
|
+
2
|
|
104
|
+
[-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
|
|
105
|
+
```
|
|
112
106
|
"""
|
|
113
107
|
|
|
114
108
|
size: int
|
langchain_core/exceptions.py
CHANGED
|
@@ -33,19 +33,19 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
|
|
|
33
33
|
Args:
|
|
34
34
|
error: The error that's being re-raised or an error message.
|
|
35
35
|
observation: String explanation of error which can be passed to a
|
|
36
|
-
model to try and remediate the issue.
|
|
36
|
+
model to try and remediate the issue.
|
|
37
37
|
llm_output: String model output which is error-ing.
|
|
38
|
-
|
|
38
|
+
|
|
39
39
|
send_to_llm: Whether to send the observation and llm_output back to an Agent
|
|
40
40
|
after an OutputParserException has been raised.
|
|
41
41
|
This gives the underlying model driving the agent the context that the
|
|
42
42
|
previous output was improperly structured, in the hopes that it will
|
|
43
43
|
update the output to the correct format.
|
|
44
|
-
Defaults to False
|
|
44
|
+
Defaults to `False`.
|
|
45
45
|
|
|
46
46
|
Raises:
|
|
47
|
-
ValueError: If
|
|
48
|
-
|
|
47
|
+
ValueError: If `send_to_llm` is True but either observation or
|
|
48
|
+
`llm_output` are not provided.
|
|
49
49
|
"""
|
|
50
50
|
if isinstance(error, str):
|
|
51
51
|
error = create_message(
|
langchain_core/indexing/api.py
CHANGED
|
@@ -299,9 +299,9 @@ def index(
|
|
|
299
299
|
are not able to specify the uid of the document.
|
|
300
300
|
|
|
301
301
|
!!! warning "Behavior changed in 0.3.25"
|
|
302
|
-
Added
|
|
302
|
+
Added `scoped_full` cleanup mode.
|
|
303
303
|
|
|
304
|
-
!!!
|
|
304
|
+
!!! warning
|
|
305
305
|
|
|
306
306
|
* In full mode, the loader should be returning
|
|
307
307
|
the entire dataset, and not just a subset of the dataset.
|
|
@@ -315,7 +315,7 @@ def index(
|
|
|
315
315
|
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
|
316
316
|
all with the same source id. In general, to avoid doing too much
|
|
317
317
|
redundant work select as big a batch size as possible.
|
|
318
|
-
* The
|
|
318
|
+
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
|
319
319
|
is challenging or if your data loader cannot return the entire dataset at
|
|
320
320
|
once. This mode keeps track of source IDs in memory, which should be fine
|
|
321
321
|
for most use cases. If your dataset is large (10M+ docs), you will likely
|
|
@@ -381,8 +381,8 @@ def index(
|
|
|
381
381
|
ValueError: If vectorstore does not have
|
|
382
382
|
"delete" and "add_documents" required methods.
|
|
383
383
|
ValueError: If source_id_key is not None, but is not a string or callable.
|
|
384
|
-
TypeError: If
|
|
385
|
-
AssertionError: If
|
|
384
|
+
TypeError: If `vectorstore` is not a VectorStore or a DocumentIndex.
|
|
385
|
+
AssertionError: If `source_id` is None when cleanup mode is incremental.
|
|
386
386
|
(should be unreachable code).
|
|
387
387
|
"""
|
|
388
388
|
# Behavior is deprecated, but we keep it for backwards compatibility.
|
|
@@ -640,9 +640,9 @@ async def aindex(
|
|
|
640
640
|
are not able to specify the uid of the document.
|
|
641
641
|
|
|
642
642
|
!!! warning "Behavior changed in 0.3.25"
|
|
643
|
-
Added
|
|
643
|
+
Added `scoped_full` cleanup mode.
|
|
644
644
|
|
|
645
|
-
!!!
|
|
645
|
+
!!! warning
|
|
646
646
|
|
|
647
647
|
* In full mode, the loader should be returning
|
|
648
648
|
the entire dataset, and not just a subset of the dataset.
|
|
@@ -656,7 +656,7 @@ async def aindex(
|
|
|
656
656
|
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
|
657
657
|
all with the same source id. In general, to avoid doing too much
|
|
658
658
|
redundant work select as big a batch size as possible.
|
|
659
|
-
* The
|
|
659
|
+
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
|
660
660
|
is challenging or if your data loader cannot return the entire dataset at
|
|
661
661
|
once. This mode keeps track of source IDs in memory, which should be fine
|
|
662
662
|
for most use cases. If your dataset is large (10M+ docs), you will likely
|
|
@@ -722,9 +722,9 @@ async def aindex(
|
|
|
722
722
|
ValueError: If vectorstore does not have
|
|
723
723
|
"adelete" and "aadd_documents" required methods.
|
|
724
724
|
ValueError: If source_id_key is not None, but is not a string or callable.
|
|
725
|
-
TypeError: If
|
|
726
|
-
AssertionError: If
|
|
727
|
-
incremental or
|
|
725
|
+
TypeError: If `vector_store` is not a VectorStore or DocumentIndex.
|
|
726
|
+
AssertionError: If `source_id_key` is None when cleanup mode is
|
|
727
|
+
incremental or `scoped_full` (should be unreachable).
|
|
728
728
|
"""
|
|
729
729
|
# Behavior is deprecated, but we keep it for backwards compatibility.
|
|
730
730
|
# # Warn only once per process.
|
langchain_core/indexing/base.py
CHANGED
|
@@ -61,7 +61,7 @@ class RecordManager(ABC):
|
|
|
61
61
|
"""Initialize the record manager.
|
|
62
62
|
|
|
63
63
|
Args:
|
|
64
|
-
namespace
|
|
64
|
+
namespace: The namespace for the record manager.
|
|
65
65
|
"""
|
|
66
66
|
self.namespace = namespace
|
|
67
67
|
|
|
@@ -244,7 +244,7 @@ class InMemoryRecordManager(RecordManager):
|
|
|
244
244
|
"""Initialize the in-memory record manager.
|
|
245
245
|
|
|
246
246
|
Args:
|
|
247
|
-
namespace
|
|
247
|
+
namespace: The namespace for the record manager.
|
|
248
248
|
"""
|
|
249
249
|
super().__init__(namespace)
|
|
250
250
|
# Each key points to a dictionary
|
|
@@ -278,10 +278,10 @@ class InMemoryRecordManager(RecordManager):
|
|
|
278
278
|
Args:
|
|
279
279
|
keys: A list of record keys to upsert.
|
|
280
280
|
group_ids: A list of group IDs corresponding to the keys.
|
|
281
|
-
|
|
281
|
+
|
|
282
282
|
time_at_least: Optional timestamp. Implementation can use this
|
|
283
283
|
to optionally verify that the timestamp IS at least this time
|
|
284
|
-
in the system that stores.
|
|
284
|
+
in the system that stores.
|
|
285
285
|
E.g., use to validate that the time in the postgres database
|
|
286
286
|
is equal to or larger than the given timestamp, if not
|
|
287
287
|
raise an error.
|
|
@@ -315,10 +315,10 @@ class InMemoryRecordManager(RecordManager):
|
|
|
315
315
|
Args:
|
|
316
316
|
keys: A list of record keys to upsert.
|
|
317
317
|
group_ids: A list of group IDs corresponding to the keys.
|
|
318
|
-
|
|
318
|
+
|
|
319
319
|
time_at_least: Optional timestamp. Implementation can use this
|
|
320
320
|
to optionally verify that the timestamp IS at least this time
|
|
321
|
-
in the system that stores.
|
|
321
|
+
in the system that stores.
|
|
322
322
|
E.g., use to validate that the time in the postgres database
|
|
323
323
|
is equal to or larger than the given timestamp, if not
|
|
324
324
|
raise an error.
|
|
@@ -361,13 +361,13 @@ class InMemoryRecordManager(RecordManager):
|
|
|
361
361
|
|
|
362
362
|
Args:
|
|
363
363
|
before: Filter to list records updated before this time.
|
|
364
|
-
|
|
364
|
+
|
|
365
365
|
after: Filter to list records updated after this time.
|
|
366
|
-
|
|
366
|
+
|
|
367
367
|
group_ids: Filter to list records with specific group IDs.
|
|
368
|
-
|
|
368
|
+
|
|
369
369
|
limit: optional limit on the number of records to return.
|
|
370
|
-
|
|
370
|
+
|
|
371
371
|
|
|
372
372
|
Returns:
|
|
373
373
|
A list of keys for the matching records.
|
|
@@ -397,13 +397,13 @@ class InMemoryRecordManager(RecordManager):
|
|
|
397
397
|
|
|
398
398
|
Args:
|
|
399
399
|
before: Filter to list records updated before this time.
|
|
400
|
-
|
|
400
|
+
|
|
401
401
|
after: Filter to list records updated after this time.
|
|
402
|
-
|
|
402
|
+
|
|
403
403
|
group_ids: Filter to list records with specific group IDs.
|
|
404
|
-
|
|
404
|
+
|
|
405
405
|
limit: optional limit on the number of records to return.
|
|
406
|
-
|
|
406
|
+
|
|
407
407
|
|
|
408
408
|
Returns:
|
|
409
409
|
A list of keys for the matching records.
|
|
@@ -529,7 +529,7 @@ class DocumentIndex(BaseRetriever):
|
|
|
529
529
|
**kwargs: Additional keyword arguments.
|
|
530
530
|
|
|
531
531
|
Returns:
|
|
532
|
-
|
|
532
|
+
A response object that contains the list of IDs that were
|
|
533
533
|
successfully added or updated in the vectorstore and the list of IDs that
|
|
534
534
|
failed to be added or updated.
|
|
535
535
|
"""
|
|
@@ -552,7 +552,7 @@ class DocumentIndex(BaseRetriever):
|
|
|
552
552
|
**kwargs: Additional keyword arguments.
|
|
553
553
|
|
|
554
554
|
Returns:
|
|
555
|
-
|
|
555
|
+
A response object that contains the list of IDs that were
|
|
556
556
|
successfully added or updated in the vectorstore and the list of IDs that
|
|
557
557
|
failed to be added or updated.
|
|
558
558
|
"""
|
|
@@ -571,12 +571,12 @@ class DocumentIndex(BaseRetriever):
|
|
|
571
571
|
|
|
572
572
|
Args:
|
|
573
573
|
ids: List of ids to delete.
|
|
574
|
-
kwargs: Additional keyword arguments. This is up to the implementation.
|
|
574
|
+
**kwargs: Additional keyword arguments. This is up to the implementation.
|
|
575
575
|
For example, can include an option to delete the entire index,
|
|
576
576
|
or else issue a non-blocking delete etc.
|
|
577
577
|
|
|
578
578
|
Returns:
|
|
579
|
-
|
|
579
|
+
A response object that contains the list of IDs that were
|
|
580
580
|
successfully deleted and the list of IDs that failed to be deleted.
|
|
581
581
|
"""
|
|
582
582
|
|
|
@@ -589,11 +589,11 @@ class DocumentIndex(BaseRetriever):
|
|
|
589
589
|
|
|
590
590
|
Args:
|
|
591
591
|
ids: List of ids to delete.
|
|
592
|
-
kwargs: Additional keyword arguments. This is up to the implementation.
|
|
592
|
+
**kwargs: Additional keyword arguments. This is up to the implementation.
|
|
593
593
|
For example, can include an option to delete the entire index.
|
|
594
594
|
|
|
595
595
|
Returns:
|
|
596
|
-
|
|
596
|
+
A response object that contains the list of IDs that were
|
|
597
597
|
successfully deleted and the list of IDs that failed to be deleted.
|
|
598
598
|
"""
|
|
599
599
|
return await run_in_executor(
|
|
@@ -624,10 +624,10 @@ class DocumentIndex(BaseRetriever):
|
|
|
624
624
|
|
|
625
625
|
Args:
|
|
626
626
|
ids: List of IDs to get.
|
|
627
|
-
kwargs: Additional keyword arguments. These are up to the implementation.
|
|
627
|
+
**kwargs: Additional keyword arguments. These are up to the implementation.
|
|
628
628
|
|
|
629
629
|
Returns:
|
|
630
|
-
|
|
630
|
+
List of documents that were found.
|
|
631
631
|
"""
|
|
632
632
|
|
|
633
633
|
async def aget(
|
|
@@ -650,10 +650,10 @@ class DocumentIndex(BaseRetriever):
|
|
|
650
650
|
|
|
651
651
|
Args:
|
|
652
652
|
ids: List of IDs to get.
|
|
653
|
-
kwargs: Additional keyword arguments. These are up to the implementation.
|
|
653
|
+
**kwargs: Additional keyword arguments. These are up to the implementation.
|
|
654
654
|
|
|
655
655
|
Returns:
|
|
656
|
-
|
|
656
|
+
List of documents that were found.
|
|
657
657
|
"""
|
|
658
658
|
return await run_in_executor(
|
|
659
659
|
None,
|