langchain-core 1.0.0a2__py3-none-any.whl → 1.0.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/_api/beta_decorator.py +17 -40
- langchain_core/_api/deprecation.py +20 -7
- langchain_core/_api/path.py +19 -2
- langchain_core/_import_utils.py +7 -0
- langchain_core/agents.py +10 -6
- langchain_core/callbacks/base.py +28 -15
- langchain_core/callbacks/manager.py +81 -69
- langchain_core/callbacks/usage.py +4 -2
- langchain_core/chat_history.py +29 -21
- langchain_core/document_loaders/base.py +34 -9
- langchain_core/document_loaders/langsmith.py +3 -0
- langchain_core/documents/base.py +35 -10
- langchain_core/documents/transformers.py +4 -2
- langchain_core/embeddings/fake.py +8 -5
- langchain_core/env.py +2 -3
- langchain_core/example_selectors/base.py +12 -0
- langchain_core/exceptions.py +7 -0
- langchain_core/globals.py +17 -28
- langchain_core/indexing/api.py +57 -45
- langchain_core/indexing/base.py +5 -8
- langchain_core/indexing/in_memory.py +23 -3
- langchain_core/language_models/__init__.py +6 -2
- langchain_core/language_models/_utils.py +27 -5
- langchain_core/language_models/base.py +33 -21
- langchain_core/language_models/chat_models.py +104 -31
- langchain_core/language_models/fake_chat_models.py +5 -7
- langchain_core/language_models/llms.py +54 -20
- langchain_core/load/dump.py +2 -3
- langchain_core/load/load.py +15 -1
- langchain_core/load/serializable.py +38 -43
- langchain_core/memory.py +7 -3
- langchain_core/messages/__init__.py +1 -1
- langchain_core/messages/ai.py +41 -34
- langchain_core/messages/base.py +20 -7
- langchain_core/messages/block_translators/__init__.py +10 -8
- langchain_core/messages/block_translators/anthropic.py +11 -7
- langchain_core/messages/block_translators/bedrock.py +76 -27
- langchain_core/messages/block_translators/bedrock_converse.py +259 -23
- langchain_core/messages/block_translators/google_genai.py +3 -1
- langchain_core/messages/block_translators/google_vertexai.py +3 -1
- langchain_core/messages/block_translators/groq.py +3 -1
- langchain_core/messages/block_translators/ollama.py +3 -1
- langchain_core/messages/block_translators/openai.py +50 -20
- langchain_core/messages/content.py +23 -13
- langchain_core/messages/human.py +2 -13
- langchain_core/messages/system.py +2 -6
- langchain_core/messages/tool.py +34 -14
- langchain_core/messages/utils.py +186 -73
- langchain_core/output_parsers/base.py +5 -2
- langchain_core/output_parsers/json.py +4 -4
- langchain_core/output_parsers/list.py +7 -22
- langchain_core/output_parsers/openai_functions.py +3 -0
- langchain_core/output_parsers/openai_tools.py +6 -1
- langchain_core/output_parsers/pydantic.py +4 -0
- langchain_core/output_parsers/string.py +5 -1
- langchain_core/output_parsers/xml.py +19 -19
- langchain_core/outputs/chat_generation.py +18 -7
- langchain_core/outputs/generation.py +14 -3
- langchain_core/outputs/llm_result.py +8 -1
- langchain_core/prompt_values.py +10 -4
- langchain_core/prompts/base.py +6 -11
- langchain_core/prompts/chat.py +88 -60
- langchain_core/prompts/dict.py +16 -8
- langchain_core/prompts/few_shot.py +9 -11
- langchain_core/prompts/few_shot_with_templates.py +5 -1
- langchain_core/prompts/image.py +12 -5
- langchain_core/prompts/loading.py +2 -2
- langchain_core/prompts/message.py +5 -6
- langchain_core/prompts/pipeline.py +13 -8
- langchain_core/prompts/prompt.py +22 -8
- langchain_core/prompts/string.py +18 -10
- langchain_core/prompts/structured.py +7 -2
- langchain_core/rate_limiters.py +2 -2
- langchain_core/retrievers.py +7 -6
- langchain_core/runnables/base.py +387 -246
- langchain_core/runnables/branch.py +11 -28
- langchain_core/runnables/config.py +20 -17
- langchain_core/runnables/configurable.py +34 -19
- langchain_core/runnables/fallbacks.py +20 -13
- langchain_core/runnables/graph.py +48 -38
- langchain_core/runnables/graph_ascii.py +40 -17
- langchain_core/runnables/graph_mermaid.py +54 -25
- langchain_core/runnables/graph_png.py +27 -31
- langchain_core/runnables/history.py +55 -58
- langchain_core/runnables/passthrough.py +44 -21
- langchain_core/runnables/retry.py +44 -23
- langchain_core/runnables/router.py +9 -8
- langchain_core/runnables/schema.py +9 -0
- langchain_core/runnables/utils.py +53 -90
- langchain_core/stores.py +19 -31
- langchain_core/sys_info.py +9 -8
- langchain_core/tools/base.py +36 -27
- langchain_core/tools/convert.py +25 -14
- langchain_core/tools/simple.py +36 -8
- langchain_core/tools/structured.py +25 -12
- langchain_core/tracers/base.py +2 -2
- langchain_core/tracers/context.py +5 -1
- langchain_core/tracers/core.py +110 -46
- langchain_core/tracers/evaluation.py +22 -26
- langchain_core/tracers/event_stream.py +97 -42
- langchain_core/tracers/langchain.py +12 -3
- langchain_core/tracers/langchain_v1.py +10 -2
- langchain_core/tracers/log_stream.py +56 -17
- langchain_core/tracers/root_listeners.py +4 -20
- langchain_core/tracers/run_collector.py +6 -16
- langchain_core/tracers/schemas.py +5 -1
- langchain_core/utils/aiter.py +14 -6
- langchain_core/utils/env.py +3 -0
- langchain_core/utils/function_calling.py +46 -20
- langchain_core/utils/interactive_env.py +6 -2
- langchain_core/utils/iter.py +12 -5
- langchain_core/utils/json.py +12 -3
- langchain_core/utils/json_schema.py +156 -40
- langchain_core/utils/loading.py +5 -1
- langchain_core/utils/mustache.py +25 -16
- langchain_core/utils/pydantic.py +38 -9
- langchain_core/utils/utils.py +25 -9
- langchain_core/vectorstores/base.py +7 -20
- langchain_core/vectorstores/in_memory.py +20 -14
- langchain_core/vectorstores/utils.py +18 -12
- langchain_core/version.py +1 -1
- langchain_core-1.0.0a4.dist-info/METADATA +77 -0
- langchain_core-1.0.0a4.dist-info/RECORD +181 -0
- langchain_core/beta/__init__.py +0 -1
- langchain_core/beta/runnables/__init__.py +0 -1
- langchain_core/beta/runnables/context.py +0 -448
- langchain_core-1.0.0a2.dist-info/METADATA +0 -106
- langchain_core-1.0.0a2.dist-info/RECORD +0 -184
- {langchain_core-1.0.0a2.dist-info → langchain_core-1.0.0a4.dist-info}/WHEEL +0 -0
- {langchain_core-1.0.0a2.dist-info → langchain_core-1.0.0a4.dist-info}/entry_points.txt +0 -0
langchain_core/documents/base.py
CHANGED
|
@@ -82,7 +82,7 @@ class Blob(BaseMedia):
|
|
|
82
82
|
blob = Blob.from_data(
|
|
83
83
|
data="Hello, world!",
|
|
84
84
|
mime_type="text/plain",
|
|
85
|
-
metadata={"source": "https://example.com"}
|
|
85
|
+
metadata={"source": "https://example.com"},
|
|
86
86
|
)
|
|
87
87
|
|
|
88
88
|
Example: Load the blob from a file
|
|
@@ -145,7 +145,14 @@ class Blob(BaseMedia):
|
|
|
145
145
|
return values
|
|
146
146
|
|
|
147
147
|
def as_string(self) -> str:
|
|
148
|
-
"""Read data as a string.
|
|
148
|
+
"""Read data as a string.
|
|
149
|
+
|
|
150
|
+
Raises:
|
|
151
|
+
ValueError: If the blob cannot be represented as a string.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
The data as a string.
|
|
155
|
+
"""
|
|
149
156
|
if self.data is None and self.path:
|
|
150
157
|
return Path(self.path).read_text(encoding=self.encoding)
|
|
151
158
|
if isinstance(self.data, bytes):
|
|
@@ -156,7 +163,14 @@ class Blob(BaseMedia):
|
|
|
156
163
|
raise ValueError(msg)
|
|
157
164
|
|
|
158
165
|
def as_bytes(self) -> bytes:
|
|
159
|
-
"""Read data as bytes.
|
|
166
|
+
"""Read data as bytes.
|
|
167
|
+
|
|
168
|
+
Raises:
|
|
169
|
+
ValueError: If the blob cannot be represented as bytes.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
The data as bytes.
|
|
173
|
+
"""
|
|
160
174
|
if isinstance(self.data, bytes):
|
|
161
175
|
return self.data
|
|
162
176
|
if isinstance(self.data, str):
|
|
@@ -168,7 +182,14 @@ class Blob(BaseMedia):
|
|
|
168
182
|
|
|
169
183
|
@contextlib.contextmanager
|
|
170
184
|
def as_bytes_io(self) -> Generator[Union[BytesIO, BufferedReader], None, None]:
|
|
171
|
-
"""Read data as a byte stream.
|
|
185
|
+
"""Read data as a byte stream.
|
|
186
|
+
|
|
187
|
+
Raises:
|
|
188
|
+
NotImplementedError: If the blob cannot be represented as a byte stream.
|
|
189
|
+
|
|
190
|
+
Yields:
|
|
191
|
+
The data as a byte stream.
|
|
192
|
+
"""
|
|
172
193
|
if isinstance(self.data, bytes):
|
|
173
194
|
yield BytesIO(self.data)
|
|
174
195
|
elif self.data is None and self.path:
|
|
@@ -246,7 +267,7 @@ class Blob(BaseMedia):
|
|
|
246
267
|
)
|
|
247
268
|
|
|
248
269
|
def __repr__(self) -> str:
|
|
249
|
-
"""
|
|
270
|
+
"""Return the blob representation."""
|
|
250
271
|
str_repr = f"Blob {id(self)}"
|
|
251
272
|
if self.source:
|
|
252
273
|
str_repr += f" {self.source}"
|
|
@@ -263,8 +284,7 @@ class Document(BaseMedia):
|
|
|
263
284
|
from langchain_core.documents import Document
|
|
264
285
|
|
|
265
286
|
document = Document(
|
|
266
|
-
page_content="Hello, world!",
|
|
267
|
-
metadata={"source": "https://example.com"}
|
|
287
|
+
page_content="Hello, world!", metadata={"source": "https://example.com"}
|
|
268
288
|
)
|
|
269
289
|
|
|
270
290
|
"""
|
|
@@ -281,19 +301,24 @@ class Document(BaseMedia):
|
|
|
281
301
|
|
|
282
302
|
@classmethod
|
|
283
303
|
def is_lc_serializable(cls) -> bool:
|
|
284
|
-
"""Return
|
|
304
|
+
"""Return True as this class is serializable."""
|
|
285
305
|
return True
|
|
286
306
|
|
|
287
307
|
@classmethod
|
|
288
308
|
def get_lc_namespace(cls) -> list[str]:
|
|
289
309
|
"""Get the namespace of the langchain object.
|
|
290
310
|
|
|
291
|
-
|
|
311
|
+
Returns:
|
|
312
|
+
["langchain", "schema", "document"]
|
|
292
313
|
"""
|
|
293
314
|
return ["langchain", "schema", "document"]
|
|
294
315
|
|
|
295
316
|
def __str__(self) -> str:
|
|
296
|
-
"""Override __str__ to restrict it to page_content and metadata.
|
|
317
|
+
"""Override __str__ to restrict it to page_content and metadata.
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
A string representation of the Document.
|
|
321
|
+
"""
|
|
297
322
|
# The format matches pydantic format for __str__.
|
|
298
323
|
#
|
|
299
324
|
# The purpose of this change is to make sure that user code that
|
|
@@ -38,7 +38,9 @@ class BaseDocumentTransformer(ABC):
|
|
|
38
38
|
self.embeddings, stateful_documents
|
|
39
39
|
)
|
|
40
40
|
included_idxs = _filter_similar_embeddings(
|
|
41
|
-
embedded_documents,
|
|
41
|
+
embedded_documents,
|
|
42
|
+
self.similarity_fn,
|
|
43
|
+
self.similarity_threshold,
|
|
42
44
|
)
|
|
43
45
|
return [stateful_documents[i] for i in sorted(included_idxs)]
|
|
44
46
|
|
|
@@ -47,7 +49,7 @@ class BaseDocumentTransformer(ABC):
|
|
|
47
49
|
) -> Sequence[Document]:
|
|
48
50
|
raise NotImplementedError
|
|
49
51
|
|
|
50
|
-
"""
|
|
52
|
+
"""
|
|
51
53
|
|
|
52
54
|
@abstractmethod
|
|
53
55
|
def transform_documents(
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Module contains a few fake embedding models for testing purposes."""
|
|
2
2
|
|
|
3
3
|
# Please do not add additional fake embedding model implementations here.
|
|
4
|
+
import contextlib
|
|
4
5
|
import hashlib
|
|
5
6
|
|
|
6
7
|
from pydantic import BaseModel
|
|
@@ -8,6 +9,9 @@ from typing_extensions import override
|
|
|
8
9
|
|
|
9
10
|
from langchain_core.embeddings import Embeddings
|
|
10
11
|
|
|
12
|
+
with contextlib.suppress(ImportError):
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
11
15
|
|
|
12
16
|
class FakeEmbeddings(Embeddings, BaseModel):
|
|
13
17
|
"""Fake embedding model for unit testing purposes.
|
|
@@ -20,6 +24,7 @@ class FakeEmbeddings(Embeddings, BaseModel):
|
|
|
20
24
|
.. code-block:: python
|
|
21
25
|
|
|
22
26
|
from langchain_core.embeddings import FakeEmbeddings
|
|
27
|
+
|
|
23
28
|
embed = FakeEmbeddings(size=100)
|
|
24
29
|
|
|
25
30
|
Embed single text:
|
|
@@ -53,8 +58,6 @@ class FakeEmbeddings(Embeddings, BaseModel):
|
|
|
53
58
|
"""The size of the embedding vector."""
|
|
54
59
|
|
|
55
60
|
def _get_embedding(self) -> list[float]:
|
|
56
|
-
import numpy as np
|
|
57
|
-
|
|
58
61
|
return list(np.random.default_rng().normal(size=self.size))
|
|
59
62
|
|
|
60
63
|
@override
|
|
@@ -78,6 +81,7 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
|
|
|
78
81
|
.. code-block:: python
|
|
79
82
|
|
|
80
83
|
from langchain_core.embeddings import DeterministicFakeEmbedding
|
|
84
|
+
|
|
81
85
|
embed = DeterministicFakeEmbedding(size=100)
|
|
82
86
|
|
|
83
87
|
Embed single text:
|
|
@@ -111,13 +115,12 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
|
|
|
111
115
|
"""The size of the embedding vector."""
|
|
112
116
|
|
|
113
117
|
def _get_embedding(self, seed: int) -> list[float]:
|
|
114
|
-
import numpy as np
|
|
115
|
-
|
|
116
118
|
# set the seed for the random generator
|
|
117
119
|
rng = np.random.default_rng(seed)
|
|
118
120
|
return list(rng.normal(size=self.size))
|
|
119
121
|
|
|
120
|
-
|
|
122
|
+
@staticmethod
|
|
123
|
+
def _get_seed(text: str) -> int:
|
|
121
124
|
"""Get a seed for the random generator, using the hash of the text."""
|
|
122
125
|
return int(hashlib.sha256(text.encode("utf-8")).hexdigest(), 16) % 10**8
|
|
123
126
|
|
langchain_core/env.py
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
import platform
|
|
4
4
|
from functools import lru_cache
|
|
5
5
|
|
|
6
|
+
from langchain_core import __version__
|
|
7
|
+
|
|
6
8
|
|
|
7
9
|
@lru_cache(maxsize=1)
|
|
8
10
|
def get_runtime_environment() -> dict:
|
|
@@ -11,9 +13,6 @@ def get_runtime_environment() -> dict:
|
|
|
11
13
|
Returns:
|
|
12
14
|
A dictionary with information about the runtime environment.
|
|
13
15
|
"""
|
|
14
|
-
# Lazy import to avoid circular imports
|
|
15
|
-
from langchain_core import __version__
|
|
16
|
-
|
|
17
16
|
return {
|
|
18
17
|
"library_version": __version__,
|
|
19
18
|
"library": "langchain-core",
|
|
@@ -16,6 +16,9 @@ class BaseExampleSelector(ABC):
|
|
|
16
16
|
Args:
|
|
17
17
|
example: A dictionary with keys as input variables
|
|
18
18
|
and values as their values.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Any return value.
|
|
19
22
|
"""
|
|
20
23
|
|
|
21
24
|
async def aadd_example(self, example: dict[str, str]) -> Any:
|
|
@@ -24,6 +27,9 @@ class BaseExampleSelector(ABC):
|
|
|
24
27
|
Args:
|
|
25
28
|
example: A dictionary with keys as input variables
|
|
26
29
|
and values as their values.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Any return value.
|
|
27
33
|
"""
|
|
28
34
|
return await run_in_executor(None, self.add_example, example)
|
|
29
35
|
|
|
@@ -34,6 +40,9 @@ class BaseExampleSelector(ABC):
|
|
|
34
40
|
Args:
|
|
35
41
|
input_variables: A dictionary with keys as input variables
|
|
36
42
|
and values as their values.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
A list of examples.
|
|
37
46
|
"""
|
|
38
47
|
|
|
39
48
|
async def aselect_examples(self, input_variables: dict[str, str]) -> list[dict]:
|
|
@@ -42,5 +51,8 @@ class BaseExampleSelector(ABC):
|
|
|
42
51
|
Args:
|
|
43
52
|
input_variables: A dictionary with keys as input variables
|
|
44
53
|
and values as their values.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
A list of examples.
|
|
45
57
|
"""
|
|
46
58
|
return await run_in_executor(None, self.select_examples, input_variables)
|
langchain_core/exceptions.py
CHANGED
|
@@ -42,6 +42,10 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
|
|
|
42
42
|
previous output was improperly structured, in the hopes that it will
|
|
43
43
|
update the output to the correct format.
|
|
44
44
|
Defaults to False.
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
ValueError: If ``send_to_llm`` is True but either observation or
|
|
48
|
+
``llm_output`` are not provided.
|
|
45
49
|
"""
|
|
46
50
|
if isinstance(error, str):
|
|
47
51
|
error = create_message(
|
|
@@ -77,6 +81,9 @@ def create_message(*, message: str, error_code: ErrorCode) -> str:
|
|
|
77
81
|
Args:
|
|
78
82
|
message: The message to display.
|
|
79
83
|
error_code: The error code to display.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
The full message with the troubleshooting link.
|
|
80
87
|
"""
|
|
81
88
|
return (
|
|
82
89
|
f"{message}\n"
|
langchain_core/globals.py
CHANGED
|
@@ -6,6 +6,13 @@ from typing import TYPE_CHECKING, Optional
|
|
|
6
6
|
if TYPE_CHECKING:
|
|
7
7
|
from langchain_core.caches import BaseCache
|
|
8
8
|
|
|
9
|
+
try:
|
|
10
|
+
import langchain # type: ignore[import-not-found]
|
|
11
|
+
|
|
12
|
+
_HAS_LANGCHAIN = True
|
|
13
|
+
except ImportError:
|
|
14
|
+
_HAS_LANGCHAIN = False
|
|
15
|
+
|
|
9
16
|
|
|
10
17
|
# DO NOT USE THESE VALUES DIRECTLY!
|
|
11
18
|
# Use them only via `get_<X>()` and `set_<X>()` below,
|
|
@@ -22,9 +29,7 @@ def set_verbose(value: bool) -> None: # noqa: FBT001
|
|
|
22
29
|
Args:
|
|
23
30
|
value: The new value for the `verbose` global setting.
|
|
24
31
|
"""
|
|
25
|
-
|
|
26
|
-
import langchain # type: ignore[import-not-found]
|
|
27
|
-
|
|
32
|
+
if _HAS_LANGCHAIN:
|
|
28
33
|
# We're about to run some deprecated code, don't report warnings from it.
|
|
29
34
|
# The user called the correct (non-deprecated) code path and shouldn't get
|
|
30
35
|
# warnings.
|
|
@@ -43,8 +48,6 @@ def set_verbose(value: bool) -> None: # noqa: FBT001
|
|
|
43
48
|
# Remove it once `langchain.verbose` is no longer supported, and once all
|
|
44
49
|
# users have migrated to using `set_verbose()` here.
|
|
45
50
|
langchain.verbose = value
|
|
46
|
-
except ImportError:
|
|
47
|
-
pass
|
|
48
51
|
|
|
49
52
|
global _verbose # noqa: PLW0603
|
|
50
53
|
_verbose = value
|
|
@@ -56,9 +59,7 @@ def get_verbose() -> bool:
|
|
|
56
59
|
Returns:
|
|
57
60
|
The value of the `verbose` global setting.
|
|
58
61
|
"""
|
|
59
|
-
|
|
60
|
-
import langchain
|
|
61
|
-
|
|
62
|
+
if _HAS_LANGCHAIN:
|
|
62
63
|
# We're about to run some deprecated code, don't report warnings from it.
|
|
63
64
|
# The user called the correct (non-deprecated) code path and shouldn't get
|
|
64
65
|
# warnings.
|
|
@@ -83,7 +84,7 @@ def get_verbose() -> bool:
|
|
|
83
84
|
# deprecation warnings directing them to use `set_verbose()` when they
|
|
84
85
|
# import `langchain.verbose`.
|
|
85
86
|
old_verbose = langchain.verbose
|
|
86
|
-
|
|
87
|
+
else:
|
|
87
88
|
old_verbose = False
|
|
88
89
|
|
|
89
90
|
return _verbose or old_verbose
|
|
@@ -95,9 +96,7 @@ def set_debug(value: bool) -> None: # noqa: FBT001
|
|
|
95
96
|
Args:
|
|
96
97
|
value: The new value for the `debug` global setting.
|
|
97
98
|
"""
|
|
98
|
-
|
|
99
|
-
import langchain
|
|
100
|
-
|
|
99
|
+
if _HAS_LANGCHAIN:
|
|
101
100
|
# We're about to run some deprecated code, don't report warnings from it.
|
|
102
101
|
# The user called the correct (non-deprecated) code path and shouldn't get
|
|
103
102
|
# warnings.
|
|
@@ -114,8 +113,6 @@ def set_debug(value: bool) -> None: # noqa: FBT001
|
|
|
114
113
|
# Remove it once `langchain.debug` is no longer supported, and once all
|
|
115
114
|
# users have migrated to using `set_debug()` here.
|
|
116
115
|
langchain.debug = value
|
|
117
|
-
except ImportError:
|
|
118
|
-
pass
|
|
119
116
|
|
|
120
117
|
global _debug # noqa: PLW0603
|
|
121
118
|
_debug = value
|
|
@@ -127,9 +124,7 @@ def get_debug() -> bool:
|
|
|
127
124
|
Returns:
|
|
128
125
|
The value of the `debug` global setting.
|
|
129
126
|
"""
|
|
130
|
-
|
|
131
|
-
import langchain
|
|
132
|
-
|
|
127
|
+
if _HAS_LANGCHAIN:
|
|
133
128
|
# We're about to run some deprecated code, don't report warnings from it.
|
|
134
129
|
# The user called the correct (non-deprecated) code path and shouldn't get
|
|
135
130
|
# warnings.
|
|
@@ -151,7 +146,7 @@ def get_debug() -> bool:
|
|
|
151
146
|
# to using `set_debug()` yet. Those users are getting deprecation warnings
|
|
152
147
|
# directing them to use `set_debug()` when they import `langchain.debug`.
|
|
153
148
|
old_debug = langchain.debug
|
|
154
|
-
|
|
149
|
+
else:
|
|
155
150
|
old_debug = False
|
|
156
151
|
|
|
157
152
|
return _debug or old_debug
|
|
@@ -163,9 +158,7 @@ def set_llm_cache(value: Optional["BaseCache"]) -> None:
|
|
|
163
158
|
Args:
|
|
164
159
|
value: The new LLM cache to use. If `None`, the LLM cache is disabled.
|
|
165
160
|
"""
|
|
166
|
-
|
|
167
|
-
import langchain
|
|
168
|
-
|
|
161
|
+
if _HAS_LANGCHAIN:
|
|
169
162
|
# We're about to run some deprecated code, don't report warnings from it.
|
|
170
163
|
# The user called the correct (non-deprecated) code path and shouldn't get
|
|
171
164
|
# warnings.
|
|
@@ -184,22 +177,18 @@ def set_llm_cache(value: Optional["BaseCache"]) -> None:
|
|
|
184
177
|
# Remove it once `langchain.llm_cache` is no longer supported, and
|
|
185
178
|
# once all users have migrated to using `set_llm_cache()` here.
|
|
186
179
|
langchain.llm_cache = value
|
|
187
|
-
except ImportError:
|
|
188
|
-
pass
|
|
189
180
|
|
|
190
181
|
global _llm_cache # noqa: PLW0603
|
|
191
182
|
_llm_cache = value
|
|
192
183
|
|
|
193
184
|
|
|
194
|
-
def get_llm_cache() -> "BaseCache":
|
|
185
|
+
def get_llm_cache() -> Optional["BaseCache"]:
|
|
195
186
|
"""Get the value of the `llm_cache` global setting.
|
|
196
187
|
|
|
197
188
|
Returns:
|
|
198
189
|
The value of the `llm_cache` global setting.
|
|
199
190
|
"""
|
|
200
|
-
|
|
201
|
-
import langchain
|
|
202
|
-
|
|
191
|
+
if _HAS_LANGCHAIN:
|
|
203
192
|
# We're about to run some deprecated code, don't report warnings from it.
|
|
204
193
|
# The user called the correct (non-deprecated) code path and shouldn't get
|
|
205
194
|
# warnings.
|
|
@@ -225,7 +214,7 @@ def get_llm_cache() -> "BaseCache":
|
|
|
225
214
|
# Those users are getting deprecation warnings directing them
|
|
226
215
|
# to use `set_llm_cache()` when they import `langchain.llm_cache`.
|
|
227
216
|
old_llm_cache = langchain.llm_cache
|
|
228
|
-
|
|
217
|
+
else:
|
|
229
218
|
old_llm_cache = None
|
|
230
219
|
|
|
231
220
|
return _llm_cache or old_llm_cache
|
langchain_core/indexing/api.py
CHANGED
|
@@ -56,7 +56,7 @@ def _warn_about_sha1() -> None:
|
|
|
56
56
|
"that map to the same fingerprint. If this matters in your "
|
|
57
57
|
"threat model, switch to a stronger algorithm such "
|
|
58
58
|
"as 'blake2b', 'sha256', or 'sha512' by specifying "
|
|
59
|
-
" `key_encoder` parameter in the
|
|
59
|
+
" `key_encoder` parameter in the `index` or `aindex` function. ",
|
|
60
60
|
category=UserWarning,
|
|
61
61
|
stacklevel=2,
|
|
62
62
|
)
|
|
@@ -185,6 +185,9 @@ def _get_document_with_hash(
|
|
|
185
185
|
When changing the key encoder, you must change the
|
|
186
186
|
index as well to avoid duplicated documents in the cache.
|
|
187
187
|
|
|
188
|
+
Raises:
|
|
189
|
+
ValueError: If the metadata cannot be serialized using json.
|
|
190
|
+
|
|
188
191
|
Returns:
|
|
189
192
|
Document with a unique identifier based on the hash of the content and metadata.
|
|
190
193
|
"""
|
|
@@ -291,21 +294,21 @@ def index(
|
|
|
291
294
|
documents were deleted, which documents should be skipped.
|
|
292
295
|
|
|
293
296
|
For the time being, documents are indexed using their hashes, and users
|
|
294
|
-
|
|
297
|
+
are not able to specify the uid of the document.
|
|
295
298
|
|
|
296
299
|
Important:
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
300
|
+
* In full mode, the loader should be returning
|
|
301
|
+
the entire dataset, and not just a subset of the dataset.
|
|
302
|
+
Otherwise, the auto_cleanup will remove documents that it is not
|
|
303
|
+
supposed to.
|
|
304
|
+
* In incremental mode, if documents associated with a particular
|
|
305
|
+
source id appear across different batches, the indexing API
|
|
306
|
+
will do some redundant work. This will still result in the
|
|
307
|
+
correct end state of the index, but will unfortunately not be
|
|
308
|
+
100% efficient. For example, if a given document is split into 15
|
|
309
|
+
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
|
310
|
+
all with the same source id. In general, to avoid doing too much
|
|
311
|
+
redundant work select as big a batch size as possible.
|
|
309
312
|
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
|
310
313
|
is challenging or if your data loader cannot return the entire dataset at
|
|
311
314
|
once. This mode keeps track of source IDs in memory, which should be fine
|
|
@@ -315,23 +318,22 @@ def index(
|
|
|
315
318
|
Args:
|
|
316
319
|
docs_source: Data loader or iterable of documents to index.
|
|
317
320
|
record_manager: Timestamped set to keep track of which documents were
|
|
318
|
-
|
|
321
|
+
updated.
|
|
319
322
|
vector_store: VectorStore or DocumentIndex to index the documents into.
|
|
320
323
|
batch_size: Batch size to use when indexing. Default is 100.
|
|
321
324
|
cleanup: How to handle clean up of documents. Default is None.
|
|
325
|
+
|
|
322
326
|
- incremental: Cleans up all documents that haven't been updated AND
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
to minimize the probability of users seeing duplicated
|
|
327
|
-
content.
|
|
327
|
+
that are associated with source ids that were seen during indexing.
|
|
328
|
+
Clean up is done continuously during indexing helping to minimize the
|
|
329
|
+
probability of users seeing duplicated content.
|
|
328
330
|
- full: Delete all documents that have not been returned by the loader
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
331
|
+
during this run of indexing.
|
|
332
|
+
Clean up runs after all documents have been indexed.
|
|
333
|
+
This means that users may see duplicated content during indexing.
|
|
332
334
|
- scoped_full: Similar to Full, but only deletes all documents
|
|
333
|
-
|
|
334
|
-
|
|
335
|
+
that haven't been updated AND that are associated with
|
|
336
|
+
source ids that were seen during indexing.
|
|
335
337
|
- None: Do not delete any documents.
|
|
336
338
|
source_id_key: Optional key that helps identify the original source
|
|
337
339
|
of the document. Default is None.
|
|
@@ -358,10 +360,9 @@ def index(
|
|
|
358
360
|
When changing the key encoder, you must change the
|
|
359
361
|
index as well to avoid duplicated documents in the cache.
|
|
360
362
|
upsert_kwargs: Additional keyword arguments to pass to the add_documents
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
upsert_kwargs={"vector_field": "embedding"}
|
|
363
|
+
method of the VectorStore or the upsert method of the DocumentIndex.
|
|
364
|
+
For example, you can use this to specify a custom vector_field:
|
|
365
|
+
upsert_kwargs={"vector_field": "embedding"}
|
|
365
366
|
.. versionadded:: 0.3.10
|
|
366
367
|
|
|
367
368
|
Returns:
|
|
@@ -374,6 +375,9 @@ def index(
|
|
|
374
375
|
ValueError: If vectorstore does not have
|
|
375
376
|
"delete" and "add_documents" required methods.
|
|
376
377
|
ValueError: If source_id_key is not None, but is not a string or callable.
|
|
378
|
+
TypeError: If ``vectorstore`` is not a VectorStore or a DocumentIndex.
|
|
379
|
+
AssertionError: If ``source_id`` is None when cleanup mode is incremental.
|
|
380
|
+
(should be unreachable code).
|
|
377
381
|
|
|
378
382
|
.. version_modified:: 0.3.25
|
|
379
383
|
|
|
@@ -656,22 +660,22 @@ async def aindex(
|
|
|
656
660
|
Args:
|
|
657
661
|
docs_source: Data loader or iterable of documents to index.
|
|
658
662
|
record_manager: Timestamped set to keep track of which documents were
|
|
659
|
-
|
|
663
|
+
updated.
|
|
660
664
|
vector_store: VectorStore or DocumentIndex to index the documents into.
|
|
661
665
|
batch_size: Batch size to use when indexing. Default is 100.
|
|
662
666
|
cleanup: How to handle clean up of documents. Default is None.
|
|
667
|
+
|
|
663
668
|
- incremental: Cleans up all documents that haven't been updated AND
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
This means that users may see duplicated content during indexing.
|
|
669
|
+
that are associated with source ids that were seen during indexing.
|
|
670
|
+
Clean up is done continuously during indexing helping to minimize the
|
|
671
|
+
probability of users seeing duplicated content.
|
|
672
|
+
- full: Delete all documents that have not been returned by the loader
|
|
673
|
+
during this run of indexing.
|
|
674
|
+
Clean up runs after all documents have been indexed.
|
|
675
|
+
This means that users may see duplicated content during indexing.
|
|
672
676
|
- scoped_full: Similar to Full, but only deletes all documents
|
|
673
|
-
|
|
674
|
-
|
|
677
|
+
that haven't been updated AND that are associated with
|
|
678
|
+
source ids that were seen during indexing.
|
|
675
679
|
- None: Do not delete any documents.
|
|
676
680
|
source_id_key: Optional key that helps identify the original source
|
|
677
681
|
of the document. Default is None.
|
|
@@ -680,6 +684,12 @@ async def aindex(
|
|
|
680
684
|
force_update: Force update documents even if they are present in the
|
|
681
685
|
record manager. Useful if you are re-indexing with updated embeddings.
|
|
682
686
|
Default is False.
|
|
687
|
+
key_encoder: Hashing algorithm to use for hashing the document content and
|
|
688
|
+
metadata. Default is "sha1".
|
|
689
|
+
Other options include "blake2b", "sha256", and "sha512".
|
|
690
|
+
|
|
691
|
+
.. versionadded:: 0.3.66
|
|
692
|
+
|
|
683
693
|
key_encoder: Hashing algorithm to use for hashing the document.
|
|
684
694
|
If not provided, a default encoder using SHA-1 will be used.
|
|
685
695
|
SHA-1 is not collision-resistant, and a motivated attacker
|
|
@@ -691,11 +701,10 @@ async def aindex(
|
|
|
691
701
|
|
|
692
702
|
When changing the key encoder, you must change the
|
|
693
703
|
index as well to avoid duplicated documents in the cache.
|
|
694
|
-
upsert_kwargs: Additional keyword arguments to pass to the
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
upsert_kwargs={"vector_field": "embedding"}
|
|
704
|
+
upsert_kwargs: Additional keyword arguments to pass to the add_documents
|
|
705
|
+
method of the VectorStore or the upsert method of the DocumentIndex.
|
|
706
|
+
For example, you can use this to specify a custom vector_field:
|
|
707
|
+
upsert_kwargs={"vector_field": "embedding"}
|
|
699
708
|
.. versionadded:: 0.3.10
|
|
700
709
|
|
|
701
710
|
Returns:
|
|
@@ -708,6 +717,9 @@ async def aindex(
|
|
|
708
717
|
ValueError: If vectorstore does not have
|
|
709
718
|
"adelete" and "aadd_documents" required methods.
|
|
710
719
|
ValueError: If source_id_key is not None, but is not a string or callable.
|
|
720
|
+
TypeError: If ``vector_store`` is not a VectorStore or DocumentIndex.
|
|
721
|
+
AssertionError: If ``source_id_key`` is None when cleanup mode is
|
|
722
|
+
incremental or ``scoped_full`` (should be unreachable).
|
|
711
723
|
|
|
712
724
|
.. version_modified:: 0.3.25
|
|
713
725
|
|
langchain_core/indexing/base.py
CHANGED
|
@@ -7,6 +7,8 @@ import time
|
|
|
7
7
|
from abc import ABC, abstractmethod
|
|
8
8
|
from typing import TYPE_CHECKING, Any, Optional, TypedDict
|
|
9
9
|
|
|
10
|
+
from typing_extensions import override
|
|
11
|
+
|
|
10
12
|
from langchain_core._api import beta
|
|
11
13
|
from langchain_core.retrievers import BaseRetriever
|
|
12
14
|
from langchain_core.runnables import run_in_executor
|
|
@@ -254,14 +256,14 @@ class InMemoryRecordManager(RecordManager):
|
|
|
254
256
|
"""In-memory schema creation is simply ensuring the structure is initialized."""
|
|
255
257
|
|
|
256
258
|
async def acreate_schema(self) -> None:
|
|
257
|
-
"""
|
|
259
|
+
"""In-memory schema creation is simply ensuring the structure is initialized."""
|
|
258
260
|
|
|
261
|
+
@override
|
|
259
262
|
def get_time(self) -> float:
|
|
260
|
-
"""Get the current server time as a high resolution timestamp!"""
|
|
261
263
|
return time.time()
|
|
262
264
|
|
|
265
|
+
@override
|
|
263
266
|
async def aget_time(self) -> float:
|
|
264
|
-
"""Async get the current server time as a high resolution timestamp!"""
|
|
265
267
|
return self.get_time()
|
|
266
268
|
|
|
267
269
|
def update(
|
|
@@ -322,11 +324,6 @@ class InMemoryRecordManager(RecordManager):
|
|
|
322
324
|
raise an error.
|
|
323
325
|
This is meant to help prevent time-drift issues since
|
|
324
326
|
time may not be monotonically increasing!
|
|
325
|
-
|
|
326
|
-
Raises:
|
|
327
|
-
ValueError: If the length of keys doesn't match the length of group
|
|
328
|
-
ids.
|
|
329
|
-
ValueError: If time_at_least is in the future.
|
|
330
327
|
"""
|
|
331
328
|
self.update(keys, group_ids=group_ids, time_at_least=time_at_least)
|
|
332
329
|
|
|
@@ -32,7 +32,17 @@ class InMemoryDocumentIndex(DocumentIndex):
|
|
|
32
32
|
|
|
33
33
|
@override
|
|
34
34
|
def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse:
|
|
35
|
-
"""Upsert
|
|
35
|
+
"""Upsert documents into the index.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
items: Sequence of documents to add to the index.
|
|
39
|
+
**kwargs: Additional keyword arguments.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
A response object that contains the list of IDs that were
|
|
43
|
+
successfully added or updated in the index and the list of IDs that
|
|
44
|
+
failed to be added or updated.
|
|
45
|
+
"""
|
|
36
46
|
ok_ids = []
|
|
37
47
|
|
|
38
48
|
for item in items:
|
|
@@ -51,7 +61,18 @@ class InMemoryDocumentIndex(DocumentIndex):
|
|
|
51
61
|
|
|
52
62
|
@override
|
|
53
63
|
def delete(self, ids: Optional[list[str]] = None, **kwargs: Any) -> DeleteResponse:
|
|
54
|
-
"""Delete by
|
|
64
|
+
"""Delete by IDs.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
ids: List of ids to delete.
|
|
68
|
+
|
|
69
|
+
Raises:
|
|
70
|
+
ValueError: If ids is None.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
A response object that contains the list of IDs that were successfully
|
|
74
|
+
deleted and the list of IDs that failed to be deleted.
|
|
75
|
+
"""
|
|
55
76
|
if ids is None:
|
|
56
77
|
msg = "IDs must be provided for deletion"
|
|
57
78
|
raise ValueError(msg)
|
|
@@ -69,7 +90,6 @@ class InMemoryDocumentIndex(DocumentIndex):
|
|
|
69
90
|
|
|
70
91
|
@override
|
|
71
92
|
def get(self, ids: Sequence[str], /, **kwargs: Any) -> list[Document]:
|
|
72
|
-
"""Get by ids."""
|
|
73
93
|
return [self.store[id_] for id_ in ids if id_ in self.store]
|
|
74
94
|
|
|
75
95
|
@override
|