langchain-core 1.0.0a8__py3-none-any.whl → 1.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/__init__.py +1 -1
- langchain_core/_api/__init__.py +0 -1
- langchain_core/_api/beta_decorator.py +17 -20
- langchain_core/_api/deprecation.py +30 -35
- langchain_core/_import_utils.py +1 -1
- langchain_core/agents.py +10 -9
- langchain_core/caches.py +46 -56
- langchain_core/callbacks/__init__.py +1 -8
- langchain_core/callbacks/base.py +232 -243
- langchain_core/callbacks/file.py +33 -33
- langchain_core/callbacks/manager.py +353 -416
- langchain_core/callbacks/stdout.py +21 -22
- langchain_core/callbacks/streaming_stdout.py +32 -32
- langchain_core/callbacks/usage.py +54 -51
- langchain_core/chat_history.py +43 -58
- langchain_core/document_loaders/base.py +21 -21
- langchain_core/document_loaders/langsmith.py +22 -22
- langchain_core/documents/__init__.py +0 -1
- langchain_core/documents/base.py +46 -49
- langchain_core/documents/transformers.py +28 -29
- langchain_core/embeddings/fake.py +50 -54
- langchain_core/example_selectors/semantic_similarity.py +4 -6
- langchain_core/exceptions.py +7 -8
- langchain_core/indexing/api.py +19 -25
- langchain_core/indexing/base.py +24 -24
- langchain_core/language_models/__init__.py +11 -27
- langchain_core/language_models/_utils.py +53 -54
- langchain_core/language_models/base.py +30 -24
- langchain_core/language_models/chat_models.py +123 -148
- langchain_core/language_models/fake_chat_models.py +7 -7
- langchain_core/language_models/llms.py +14 -16
- langchain_core/load/dump.py +3 -4
- langchain_core/load/load.py +7 -16
- langchain_core/load/serializable.py +37 -36
- langchain_core/messages/__init__.py +1 -16
- langchain_core/messages/ai.py +122 -123
- langchain_core/messages/base.py +31 -31
- langchain_core/messages/block_translators/__init__.py +17 -17
- langchain_core/messages/block_translators/anthropic.py +3 -3
- langchain_core/messages/block_translators/bedrock_converse.py +3 -3
- langchain_core/messages/block_translators/google_genai.py +5 -4
- langchain_core/messages/block_translators/google_vertexai.py +4 -32
- langchain_core/messages/block_translators/groq.py +117 -21
- langchain_core/messages/block_translators/langchain_v0.py +3 -3
- langchain_core/messages/block_translators/openai.py +5 -5
- langchain_core/messages/chat.py +2 -6
- langchain_core/messages/content.py +222 -209
- langchain_core/messages/function.py +6 -10
- langchain_core/messages/human.py +17 -24
- langchain_core/messages/modifier.py +2 -2
- langchain_core/messages/system.py +12 -22
- langchain_core/messages/tool.py +53 -69
- langchain_core/messages/utils.py +399 -417
- langchain_core/output_parsers/__init__.py +1 -14
- langchain_core/output_parsers/base.py +46 -47
- langchain_core/output_parsers/json.py +3 -4
- langchain_core/output_parsers/list.py +2 -2
- langchain_core/output_parsers/openai_functions.py +46 -44
- langchain_core/output_parsers/openai_tools.py +11 -16
- langchain_core/output_parsers/pydantic.py +10 -11
- langchain_core/output_parsers/string.py +2 -2
- langchain_core/output_parsers/transform.py +2 -2
- langchain_core/output_parsers/xml.py +1 -1
- langchain_core/outputs/__init__.py +1 -1
- langchain_core/outputs/chat_generation.py +14 -14
- langchain_core/outputs/generation.py +6 -6
- langchain_core/outputs/llm_result.py +5 -5
- langchain_core/prompt_values.py +11 -11
- langchain_core/prompts/__init__.py +3 -23
- langchain_core/prompts/base.py +33 -38
- langchain_core/prompts/chat.py +222 -229
- langchain_core/prompts/dict.py +3 -3
- langchain_core/prompts/few_shot.py +76 -83
- langchain_core/prompts/few_shot_with_templates.py +7 -9
- langchain_core/prompts/image.py +12 -14
- langchain_core/prompts/loading.py +1 -1
- langchain_core/prompts/message.py +3 -3
- langchain_core/prompts/prompt.py +20 -23
- langchain_core/prompts/string.py +20 -8
- langchain_core/prompts/structured.py +26 -27
- langchain_core/rate_limiters.py +50 -58
- langchain_core/retrievers.py +41 -182
- langchain_core/runnables/base.py +565 -597
- langchain_core/runnables/branch.py +8 -8
- langchain_core/runnables/config.py +37 -44
- langchain_core/runnables/configurable.py +9 -10
- langchain_core/runnables/fallbacks.py +9 -9
- langchain_core/runnables/graph.py +46 -50
- langchain_core/runnables/graph_ascii.py +19 -18
- langchain_core/runnables/graph_mermaid.py +20 -31
- langchain_core/runnables/graph_png.py +7 -7
- langchain_core/runnables/history.py +22 -22
- langchain_core/runnables/passthrough.py +11 -11
- langchain_core/runnables/retry.py +3 -3
- langchain_core/runnables/router.py +2 -2
- langchain_core/runnables/schema.py +33 -33
- langchain_core/runnables/utils.py +30 -34
- langchain_core/stores.py +72 -102
- langchain_core/sys_info.py +27 -29
- langchain_core/tools/__init__.py +1 -14
- langchain_core/tools/base.py +70 -71
- langchain_core/tools/convert.py +100 -104
- langchain_core/tools/render.py +9 -9
- langchain_core/tools/retriever.py +7 -7
- langchain_core/tools/simple.py +6 -7
- langchain_core/tools/structured.py +18 -24
- langchain_core/tracers/__init__.py +1 -9
- langchain_core/tracers/base.py +35 -35
- langchain_core/tracers/context.py +12 -17
- langchain_core/tracers/event_stream.py +3 -3
- langchain_core/tracers/langchain.py +8 -8
- langchain_core/tracers/log_stream.py +17 -18
- langchain_core/tracers/memory_stream.py +3 -3
- langchain_core/tracers/root_listeners.py +2 -2
- langchain_core/tracers/schemas.py +0 -129
- langchain_core/tracers/stdout.py +1 -2
- langchain_core/utils/__init__.py +1 -1
- langchain_core/utils/aiter.py +32 -32
- langchain_core/utils/env.py +5 -5
- langchain_core/utils/function_calling.py +59 -154
- langchain_core/utils/html.py +4 -4
- langchain_core/utils/input.py +3 -3
- langchain_core/utils/interactive_env.py +1 -1
- langchain_core/utils/iter.py +20 -20
- langchain_core/utils/json.py +1 -1
- langchain_core/utils/json_schema.py +2 -2
- langchain_core/utils/mustache.py +5 -5
- langchain_core/utils/pydantic.py +17 -17
- langchain_core/utils/strings.py +5 -5
- langchain_core/utils/utils.py +25 -28
- langchain_core/vectorstores/base.py +55 -87
- langchain_core/vectorstores/in_memory.py +83 -85
- langchain_core/vectorstores/utils.py +2 -2
- langchain_core/version.py +1 -1
- {langchain_core-1.0.0a8.dist-info → langchain_core-1.0.0rc2.dist-info}/METADATA +23 -11
- langchain_core-1.0.0rc2.dist-info/RECORD +172 -0
- langchain_core/memory.py +0 -120
- langchain_core/pydantic_v1/__init__.py +0 -30
- langchain_core/pydantic_v1/dataclasses.py +0 -23
- langchain_core/pydantic_v1/main.py +0 -23
- langchain_core-1.0.0a8.dist-info/RECORD +0 -176
- {langchain_core-1.0.0a8.dist-info → langchain_core-1.0.0rc2.dist-info}/WHEEL +0 -0
|
@@ -22,22 +22,22 @@ class LangSmithLoader(BaseLoader):
|
|
|
22
22
|
|
|
23
23
|
??? note "Lazy load"
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
```python
|
|
26
|
+
from langchain_core.document_loaders import LangSmithLoader
|
|
26
27
|
|
|
27
|
-
|
|
28
|
+
loader = LangSmithLoader(dataset_id="...", limit=100)
|
|
29
|
+
docs = []
|
|
30
|
+
for doc in loader.lazy_load():
|
|
31
|
+
docs.append(doc)
|
|
32
|
+
```
|
|
28
33
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
docs.append(doc)
|
|
33
|
-
|
|
34
|
-
.. code-block:: python
|
|
35
|
-
|
|
36
|
-
# -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
|
|
34
|
+
```python
|
|
35
|
+
# -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
|
|
36
|
+
```
|
|
37
37
|
|
|
38
38
|
!!! version-added "Added in version 0.2.34"
|
|
39
39
|
|
|
40
|
-
"""
|
|
40
|
+
"""
|
|
41
41
|
|
|
42
42
|
def __init__(
|
|
43
43
|
self,
|
|
@@ -60,15 +60,15 @@ class LangSmithLoader(BaseLoader):
|
|
|
60
60
|
"""Create a LangSmith loader.
|
|
61
61
|
|
|
62
62
|
Args:
|
|
63
|
-
dataset_id: The ID of the dataset to filter by.
|
|
64
|
-
dataset_name: The name of the dataset to filter by.
|
|
65
|
-
content_key: The inputs key to set as Document page content.
|
|
66
|
-
are interpreted as nested keys. E.g.
|
|
63
|
+
dataset_id: The ID of the dataset to filter by.
|
|
64
|
+
dataset_name: The name of the dataset to filter by.
|
|
65
|
+
content_key: The inputs key to set as Document page content. `'.'` characters
|
|
66
|
+
are interpreted as nested keys. E.g. `content_key="first.second"` will
|
|
67
67
|
result in
|
|
68
|
-
|
|
68
|
+
`Document(page_content=format_content(example.inputs["first"]["second"]))`
|
|
69
69
|
format_content: Function for converting the content extracted from the example
|
|
70
70
|
inputs into a string. Defaults to JSON-encoding the contents.
|
|
71
|
-
example_ids: The IDs of the examples to filter by.
|
|
71
|
+
example_ids: The IDs of the examples to filter by.
|
|
72
72
|
as_of: The dataset version tag OR
|
|
73
73
|
timestamp to retrieve the examples as of.
|
|
74
74
|
Response examples will only be those that were present at the time
|
|
@@ -76,17 +76,17 @@ class LangSmithLoader(BaseLoader):
|
|
|
76
76
|
splits: A list of dataset splits, which are
|
|
77
77
|
divisions of your dataset such as 'train', 'test', or 'validation'.
|
|
78
78
|
Returns examples only from the specified splits.
|
|
79
|
-
inline_s3_urls: Whether to inline S3 URLs.
|
|
80
|
-
offset: The offset to start from.
|
|
79
|
+
inline_s3_urls: Whether to inline S3 URLs.
|
|
80
|
+
offset: The offset to start from.
|
|
81
81
|
limit: The maximum number of examples to return.
|
|
82
|
-
metadata: Metadata to filter by.
|
|
82
|
+
metadata: Metadata to filter by.
|
|
83
83
|
filter: A structured filter string to apply to the examples.
|
|
84
84
|
client: LangSmith Client. If not provided will be initialized from below args.
|
|
85
85
|
client_kwargs: Keyword args to pass to LangSmith client init. Should only be
|
|
86
|
-
specified if
|
|
86
|
+
specified if `client` isn't.
|
|
87
87
|
|
|
88
88
|
Raises:
|
|
89
|
-
ValueError: If both
|
|
89
|
+
ValueError: If both `client` and `client_kwargs` are provided.
|
|
90
90
|
""" # noqa: E501
|
|
91
91
|
if client and client_kwargs:
|
|
92
92
|
raise ValueError
|
langchain_core/documents/base.py
CHANGED
|
@@ -57,52 +57,51 @@ class Blob(BaseMedia):
|
|
|
57
57
|
|
|
58
58
|
Example: Initialize a blob from in-memory data
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
```python
|
|
61
|
+
from langchain_core.documents import Blob
|
|
61
62
|
|
|
62
|
-
|
|
63
|
+
blob = Blob.from_data("Hello, world!")
|
|
63
64
|
|
|
64
|
-
|
|
65
|
+
# Read the blob as a string
|
|
66
|
+
print(blob.as_string())
|
|
65
67
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
+
# Read the blob as bytes
|
|
69
|
+
print(blob.as_bytes())
|
|
68
70
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
with blob.as_bytes_io() as f:
|
|
74
|
-
print(f.read())
|
|
71
|
+
# Read the blob as a byte stream
|
|
72
|
+
with blob.as_bytes_io() as f:
|
|
73
|
+
print(f.read())
|
|
74
|
+
```
|
|
75
75
|
|
|
76
76
|
Example: Load from memory and specify mime-type and metadata
|
|
77
77
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
from langchain_core.documents import Blob
|
|
78
|
+
```python
|
|
79
|
+
from langchain_core.documents import Blob
|
|
81
80
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
81
|
+
blob = Blob.from_data(
|
|
82
|
+
data="Hello, world!",
|
|
83
|
+
mime_type="text/plain",
|
|
84
|
+
metadata={"source": "https://example.com"},
|
|
85
|
+
)
|
|
86
|
+
```
|
|
87
87
|
|
|
88
88
|
Example: Load the blob from a file
|
|
89
89
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
from langchain_core.documents import Blob
|
|
93
|
-
|
|
94
|
-
blob = Blob.from_path("path/to/file.txt")
|
|
90
|
+
```python
|
|
91
|
+
from langchain_core.documents import Blob
|
|
95
92
|
|
|
96
|
-
|
|
97
|
-
print(blob.as_string())
|
|
93
|
+
blob = Blob.from_path("path/to/file.txt")
|
|
98
94
|
|
|
99
|
-
|
|
100
|
-
|
|
95
|
+
# Read the blob as a string
|
|
96
|
+
print(blob.as_string())
|
|
101
97
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
print(f.read())
|
|
98
|
+
# Read the blob as bytes
|
|
99
|
+
print(blob.as_bytes())
|
|
105
100
|
|
|
101
|
+
# Read the blob as a byte stream
|
|
102
|
+
with blob.as_bytes_io() as f:
|
|
103
|
+
print(f.read())
|
|
104
|
+
```
|
|
106
105
|
"""
|
|
107
106
|
|
|
108
107
|
data: bytes | str | None = None
|
|
@@ -112,7 +111,7 @@ class Blob(BaseMedia):
|
|
|
112
111
|
encoding: str = "utf-8"
|
|
113
112
|
"""Encoding to use if decoding the bytes into a string.
|
|
114
113
|
|
|
115
|
-
Use utf-8 as default encoding, if decoding to string.
|
|
114
|
+
Use `utf-8` as default encoding, if decoding to string.
|
|
116
115
|
"""
|
|
117
116
|
path: PathLike | None = None
|
|
118
117
|
"""Location where the original content was found."""
|
|
@@ -128,7 +127,7 @@ class Blob(BaseMedia):
|
|
|
128
127
|
|
|
129
128
|
If a path is associated with the blob, it will default to the path location.
|
|
130
129
|
|
|
131
|
-
Unless explicitly set via a metadata field called "source"
|
|
130
|
+
Unless explicitly set via a metadata field called `"source"`, in which
|
|
132
131
|
case that value will be used instead.
|
|
133
132
|
"""
|
|
134
133
|
if self.metadata and "source" in self.metadata:
|
|
@@ -212,11 +211,11 @@ class Blob(BaseMedia):
|
|
|
212
211
|
"""Load the blob from a path like object.
|
|
213
212
|
|
|
214
213
|
Args:
|
|
215
|
-
path:
|
|
214
|
+
path: Path-like object to file to be read
|
|
216
215
|
encoding: Encoding to use if decoding the bytes into a string
|
|
217
|
-
mime_type:
|
|
218
|
-
guess_type: If True
|
|
219
|
-
|
|
216
|
+
mime_type: If provided, will be set as the mime-type of the data
|
|
217
|
+
guess_type: If `True`, the mimetype will be guessed from the file extension,
|
|
218
|
+
if a mime-type was not provided
|
|
220
219
|
metadata: Metadata to associate with the blob
|
|
221
220
|
|
|
222
221
|
Returns:
|
|
@@ -249,10 +248,10 @@ class Blob(BaseMedia):
|
|
|
249
248
|
"""Initialize the blob from in-memory data.
|
|
250
249
|
|
|
251
250
|
Args:
|
|
252
|
-
data:
|
|
251
|
+
data: The in-memory data associated with the blob
|
|
253
252
|
encoding: Encoding to use if decoding the bytes into a string
|
|
254
|
-
mime_type:
|
|
255
|
-
path:
|
|
253
|
+
mime_type: If provided, will be set as the mime-type of the data
|
|
254
|
+
path: If provided, will be set as the source from which the data came
|
|
256
255
|
metadata: Metadata to associate with the blob
|
|
257
256
|
|
|
258
257
|
Returns:
|
|
@@ -278,15 +277,13 @@ class Document(BaseMedia):
|
|
|
278
277
|
"""Class for storing a piece of text and associated metadata.
|
|
279
278
|
|
|
280
279
|
Example:
|
|
280
|
+
```python
|
|
281
|
+
from langchain_core.documents import Document
|
|
281
282
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
document = Document(
|
|
287
|
-
page_content="Hello, world!", metadata={"source": "https://example.com"}
|
|
288
|
-
)
|
|
289
|
-
|
|
283
|
+
document = Document(
|
|
284
|
+
page_content="Hello, world!", metadata={"source": "https://example.com"}
|
|
285
|
+
)
|
|
286
|
+
```
|
|
290
287
|
"""
|
|
291
288
|
|
|
292
289
|
page_content: str
|
|
@@ -306,7 +303,7 @@ class Document(BaseMedia):
|
|
|
306
303
|
|
|
307
304
|
@classmethod
|
|
308
305
|
def get_lc_namespace(cls) -> list[str]:
|
|
309
|
-
"""Get the namespace of the
|
|
306
|
+
"""Get the namespace of the LangChain object.
|
|
310
307
|
|
|
311
308
|
Returns:
|
|
312
309
|
["langchain", "schema", "document"]
|
|
@@ -20,35 +20,34 @@ class BaseDocumentTransformer(ABC):
|
|
|
20
20
|
sequence of transformed Documents.
|
|
21
21
|
|
|
22
22
|
Example:
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
23
|
+
```python
|
|
24
|
+
class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel):
|
|
25
|
+
embeddings: Embeddings
|
|
26
|
+
similarity_fn: Callable = cosine_similarity
|
|
27
|
+
similarity_threshold: float = 0.95
|
|
28
|
+
|
|
29
|
+
class Config:
|
|
30
|
+
arbitrary_types_allowed = True
|
|
31
|
+
|
|
32
|
+
def transform_documents(
|
|
33
|
+
self, documents: Sequence[Document], **kwargs: Any
|
|
34
|
+
) -> Sequence[Document]:
|
|
35
|
+
stateful_documents = get_stateful_documents(documents)
|
|
36
|
+
embedded_documents = _get_embeddings_from_stateful_docs(
|
|
37
|
+
self.embeddings, stateful_documents
|
|
38
|
+
)
|
|
39
|
+
included_idxs = _filter_similar_embeddings(
|
|
40
|
+
embedded_documents,
|
|
41
|
+
self.similarity_fn,
|
|
42
|
+
self.similarity_threshold,
|
|
43
|
+
)
|
|
44
|
+
return [stateful_documents[i] for i in sorted(included_idxs)]
|
|
45
|
+
|
|
46
|
+
async def atransform_documents(
|
|
47
|
+
self, documents: Sequence[Document], **kwargs: Any
|
|
48
|
+
) -> Sequence[Document]:
|
|
49
|
+
raise NotImplementedError
|
|
50
|
+
```
|
|
52
51
|
"""
|
|
53
52
|
|
|
54
53
|
@abstractmethod
|
|
@@ -18,40 +18,38 @@ class FakeEmbeddings(Embeddings, BaseModel):
|
|
|
18
18
|
|
|
19
19
|
This embedding model creates embeddings by sampling from a normal distribution.
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
!!! warning
|
|
22
|
+
Do not use this outside of testing, as it is not a real embedding model.
|
|
22
23
|
|
|
23
24
|
Instantiate:
|
|
24
|
-
|
|
25
|
+
```python
|
|
26
|
+
from langchain_core.embeddings import FakeEmbeddings
|
|
25
27
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
embed = FakeEmbeddings(size=100)
|
|
28
|
+
embed = FakeEmbeddings(size=100)
|
|
29
|
+
```
|
|
29
30
|
|
|
30
31
|
Embed single text:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
[-0.700234640213188, -0.581266257710429, -1.1328482266445354]
|
|
32
|
+
```python
|
|
33
|
+
input_text = "The meaning of life is 42"
|
|
34
|
+
vector = embed.embed_query(input_text)
|
|
35
|
+
print(vector[:3])
|
|
36
|
+
```
|
|
37
|
+
```python
|
|
38
|
+
[-0.700234640213188, -0.581266257710429, -1.1328482266445354]
|
|
39
|
+
```
|
|
40
40
|
|
|
41
41
|
Embed multiple texts:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
[-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
|
|
54
|
-
|
|
42
|
+
```python
|
|
43
|
+
input_texts = ["Document 1...", "Document 2..."]
|
|
44
|
+
vectors = embed.embed_documents(input_texts)
|
|
45
|
+
print(len(vectors))
|
|
46
|
+
# The first 3 coordinates for the first vector
|
|
47
|
+
print(vectors[0][:3])
|
|
48
|
+
```
|
|
49
|
+
```python
|
|
50
|
+
2
|
|
51
|
+
[-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
|
|
52
|
+
```
|
|
55
53
|
"""
|
|
56
54
|
|
|
57
55
|
size: int
|
|
@@ -75,40 +73,38 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
|
|
|
75
73
|
This embedding model creates embeddings by sampling from a normal distribution
|
|
76
74
|
with a seed based on the hash of the text.
|
|
77
75
|
|
|
78
|
-
|
|
76
|
+
!!! warning
|
|
77
|
+
Do not use this outside of testing, as it is not a real embedding model.
|
|
79
78
|
|
|
80
79
|
Instantiate:
|
|
81
|
-
|
|
80
|
+
```python
|
|
81
|
+
from langchain_core.embeddings import DeterministicFakeEmbedding
|
|
82
82
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
embed = DeterministicFakeEmbedding(size=100)
|
|
83
|
+
embed = DeterministicFakeEmbedding(size=100)
|
|
84
|
+
```
|
|
86
85
|
|
|
87
86
|
Embed single text:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
[-0.700234640213188, -0.581266257710429, -1.1328482266445354]
|
|
87
|
+
```python
|
|
88
|
+
input_text = "The meaning of life is 42"
|
|
89
|
+
vector = embed.embed_query(input_text)
|
|
90
|
+
print(vector[:3])
|
|
91
|
+
```
|
|
92
|
+
```python
|
|
93
|
+
[-0.700234640213188, -0.581266257710429, -1.1328482266445354]
|
|
94
|
+
```
|
|
97
95
|
|
|
98
96
|
Embed multiple texts:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
[-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
|
|
111
|
-
|
|
97
|
+
```python
|
|
98
|
+
input_texts = ["Document 1...", "Document 2..."]
|
|
99
|
+
vectors = embed.embed_documents(input_texts)
|
|
100
|
+
print(len(vectors))
|
|
101
|
+
# The first 3 coordinates for the first vector
|
|
102
|
+
print(vectors[0][:3])
|
|
103
|
+
```
|
|
104
|
+
```python
|
|
105
|
+
2
|
|
106
|
+
[-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
|
|
107
|
+
```
|
|
112
108
|
"""
|
|
113
109
|
|
|
114
110
|
size: int
|
|
@@ -154,7 +154,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
|
|
|
154
154
|
examples: List of examples to use in the prompt.
|
|
155
155
|
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
|
|
156
156
|
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
|
|
157
|
-
k: Number of examples to select.
|
|
157
|
+
k: Number of examples to select.
|
|
158
158
|
input_keys: If provided, the search is based on the input variables
|
|
159
159
|
instead of all variables.
|
|
160
160
|
example_keys: If provided, keys to filter examples to.
|
|
@@ -198,7 +198,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
|
|
|
198
198
|
examples: List of examples to use in the prompt.
|
|
199
199
|
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
|
|
200
200
|
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
|
|
201
|
-
k: Number of examples to select.
|
|
201
|
+
k: Number of examples to select.
|
|
202
202
|
input_keys: If provided, the search is based on the input variables
|
|
203
203
|
instead of all variables.
|
|
204
204
|
example_keys: If provided, keys to filter examples to.
|
|
@@ -285,9 +285,8 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
|
|
|
285
285
|
examples: List of examples to use in the prompt.
|
|
286
286
|
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
|
|
287
287
|
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
|
|
288
|
-
k: Number of examples to select.
|
|
288
|
+
k: Number of examples to select.
|
|
289
289
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
|
290
|
-
Default is 20.
|
|
291
290
|
input_keys: If provided, the search is based on the input variables
|
|
292
291
|
instead of all variables.
|
|
293
292
|
example_keys: If provided, keys to filter examples to.
|
|
@@ -333,9 +332,8 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
|
|
|
333
332
|
examples: List of examples to use in the prompt.
|
|
334
333
|
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
|
|
335
334
|
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
|
|
336
|
-
k: Number of examples to select.
|
|
335
|
+
k: Number of examples to select.
|
|
337
336
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
|
338
|
-
Default is 20.
|
|
339
337
|
input_keys: If provided, the search is based on the input variables
|
|
340
338
|
instead of all variables.
|
|
341
339
|
example_keys: If provided, keys to filter examples to.
|
langchain_core/exceptions.py
CHANGED
|
@@ -16,7 +16,7 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
|
|
|
16
16
|
"""Exception that output parsers should raise to signify a parsing error.
|
|
17
17
|
|
|
18
18
|
This exists to differentiate parsing errors from other code or execution errors
|
|
19
|
-
that also may arise inside the output parser.
|
|
19
|
+
that also may arise inside the output parser. `OutputParserException` will be
|
|
20
20
|
available to catch and handle in ways to fix the parsing error, while other
|
|
21
21
|
errors will be raised.
|
|
22
22
|
"""
|
|
@@ -28,24 +28,23 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
|
|
|
28
28
|
llm_output: str | None = None,
|
|
29
29
|
send_to_llm: bool = False, # noqa: FBT001,FBT002
|
|
30
30
|
):
|
|
31
|
-
"""Create an OutputParserException
|
|
31
|
+
"""Create an `OutputParserException`.
|
|
32
32
|
|
|
33
33
|
Args:
|
|
34
34
|
error: The error that's being re-raised or an error message.
|
|
35
35
|
observation: String explanation of error which can be passed to a
|
|
36
|
-
model to try and remediate the issue.
|
|
36
|
+
model to try and remediate the issue.
|
|
37
37
|
llm_output: String model output which is error-ing.
|
|
38
|
-
|
|
38
|
+
|
|
39
39
|
send_to_llm: Whether to send the observation and llm_output back to an Agent
|
|
40
|
-
after an OutputParserException has been raised.
|
|
40
|
+
after an `OutputParserException` has been raised.
|
|
41
41
|
This gives the underlying model driving the agent the context that the
|
|
42
42
|
previous output was improperly structured, in the hopes that it will
|
|
43
43
|
update the output to the correct format.
|
|
44
|
-
Defaults to False.
|
|
45
44
|
|
|
46
45
|
Raises:
|
|
47
|
-
ValueError: If
|
|
48
|
-
|
|
46
|
+
ValueError: If `send_to_llm` is True but either observation or
|
|
47
|
+
`llm_output` are not provided.
|
|
49
48
|
"""
|
|
50
49
|
if isinstance(error, str):
|
|
51
50
|
error = create_message(
|
langchain_core/indexing/api.py
CHANGED
|
@@ -299,9 +299,9 @@ def index(
|
|
|
299
299
|
are not able to specify the uid of the document.
|
|
300
300
|
|
|
301
301
|
!!! warning "Behavior changed in 0.3.25"
|
|
302
|
-
Added
|
|
302
|
+
Added `scoped_full` cleanup mode.
|
|
303
303
|
|
|
304
|
-
!!!
|
|
304
|
+
!!! warning
|
|
305
305
|
|
|
306
306
|
* In full mode, the loader should be returning
|
|
307
307
|
the entire dataset, and not just a subset of the dataset.
|
|
@@ -315,7 +315,7 @@ def index(
|
|
|
315
315
|
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
|
316
316
|
all with the same source id. In general, to avoid doing too much
|
|
317
317
|
redundant work select as big a batch size as possible.
|
|
318
|
-
* The
|
|
318
|
+
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
|
319
319
|
is challenging or if your data loader cannot return the entire dataset at
|
|
320
320
|
once. This mode keeps track of source IDs in memory, which should be fine
|
|
321
321
|
for most use cases. If your dataset is large (10M+ docs), you will likely
|
|
@@ -326,8 +326,8 @@ def index(
|
|
|
326
326
|
record_manager: Timestamped set to keep track of which documents were
|
|
327
327
|
updated.
|
|
328
328
|
vector_store: VectorStore or DocumentIndex to index the documents into.
|
|
329
|
-
batch_size: Batch size to use when indexing.
|
|
330
|
-
cleanup: How to handle clean up of documents.
|
|
329
|
+
batch_size: Batch size to use when indexing.
|
|
330
|
+
cleanup: How to handle clean up of documents.
|
|
331
331
|
|
|
332
332
|
- incremental: Cleans up all documents that haven't been updated AND
|
|
333
333
|
that are associated with source ids that were seen during indexing.
|
|
@@ -342,15 +342,12 @@ def index(
|
|
|
342
342
|
source ids that were seen during indexing.
|
|
343
343
|
- None: Do not delete any documents.
|
|
344
344
|
source_id_key: Optional key that helps identify the original source
|
|
345
|
-
of the document.
|
|
345
|
+
of the document.
|
|
346
346
|
cleanup_batch_size: Batch size to use when cleaning up documents.
|
|
347
|
-
Default is 1_000.
|
|
348
347
|
force_update: Force update documents even if they are present in the
|
|
349
348
|
record manager. Useful if you are re-indexing with updated embeddings.
|
|
350
|
-
Default is False.
|
|
351
349
|
key_encoder: Hashing algorithm to use for hashing the document content and
|
|
352
|
-
metadata.
|
|
353
|
-
Other options include "blake2b", "sha256", and "sha512".
|
|
350
|
+
metadata. Options include "blake2b", "sha256", and "sha512".
|
|
354
351
|
|
|
355
352
|
!!! version-added "Added in version 0.3.66"
|
|
356
353
|
|
|
@@ -381,8 +378,8 @@ def index(
|
|
|
381
378
|
ValueError: If vectorstore does not have
|
|
382
379
|
"delete" and "add_documents" required methods.
|
|
383
380
|
ValueError: If source_id_key is not None, but is not a string or callable.
|
|
384
|
-
TypeError: If
|
|
385
|
-
AssertionError: If
|
|
381
|
+
TypeError: If `vectorstore` is not a VectorStore or a DocumentIndex.
|
|
382
|
+
AssertionError: If `source_id` is None when cleanup mode is incremental.
|
|
386
383
|
(should be unreachable code).
|
|
387
384
|
"""
|
|
388
385
|
# Behavior is deprecated, but we keep it for backwards compatibility.
|
|
@@ -640,9 +637,9 @@ async def aindex(
|
|
|
640
637
|
are not able to specify the uid of the document.
|
|
641
638
|
|
|
642
639
|
!!! warning "Behavior changed in 0.3.25"
|
|
643
|
-
Added
|
|
640
|
+
Added `scoped_full` cleanup mode.
|
|
644
641
|
|
|
645
|
-
!!!
|
|
642
|
+
!!! warning
|
|
646
643
|
|
|
647
644
|
* In full mode, the loader should be returning
|
|
648
645
|
the entire dataset, and not just a subset of the dataset.
|
|
@@ -656,7 +653,7 @@ async def aindex(
|
|
|
656
653
|
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
|
657
654
|
all with the same source id. In general, to avoid doing too much
|
|
658
655
|
redundant work select as big a batch size as possible.
|
|
659
|
-
* The
|
|
656
|
+
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
|
660
657
|
is challenging or if your data loader cannot return the entire dataset at
|
|
661
658
|
once. This mode keeps track of source IDs in memory, which should be fine
|
|
662
659
|
for most use cases. If your dataset is large (10M+ docs), you will likely
|
|
@@ -667,8 +664,8 @@ async def aindex(
|
|
|
667
664
|
record_manager: Timestamped set to keep track of which documents were
|
|
668
665
|
updated.
|
|
669
666
|
vector_store: VectorStore or DocumentIndex to index the documents into.
|
|
670
|
-
batch_size: Batch size to use when indexing.
|
|
671
|
-
cleanup: How to handle clean up of documents.
|
|
667
|
+
batch_size: Batch size to use when indexing.
|
|
668
|
+
cleanup: How to handle clean up of documents.
|
|
672
669
|
|
|
673
670
|
- incremental: Cleans up all documents that haven't been updated AND
|
|
674
671
|
that are associated with source ids that were seen during indexing.
|
|
@@ -683,15 +680,12 @@ async def aindex(
|
|
|
683
680
|
source ids that were seen during indexing.
|
|
684
681
|
- None: Do not delete any documents.
|
|
685
682
|
source_id_key: Optional key that helps identify the original source
|
|
686
|
-
of the document.
|
|
683
|
+
of the document.
|
|
687
684
|
cleanup_batch_size: Batch size to use when cleaning up documents.
|
|
688
|
-
Default is 1_000.
|
|
689
685
|
force_update: Force update documents even if they are present in the
|
|
690
686
|
record manager. Useful if you are re-indexing with updated embeddings.
|
|
691
|
-
Default is False.
|
|
692
687
|
key_encoder: Hashing algorithm to use for hashing the document content and
|
|
693
|
-
metadata.
|
|
694
|
-
Other options include "blake2b", "sha256", and "sha512".
|
|
688
|
+
metadata. Options include "blake2b", "sha256", and "sha512".
|
|
695
689
|
|
|
696
690
|
!!! version-added "Added in version 0.3.66"
|
|
697
691
|
|
|
@@ -722,9 +716,9 @@ async def aindex(
|
|
|
722
716
|
ValueError: If vectorstore does not have
|
|
723
717
|
"adelete" and "aadd_documents" required methods.
|
|
724
718
|
ValueError: If source_id_key is not None, but is not a string or callable.
|
|
725
|
-
TypeError: If
|
|
726
|
-
AssertionError: If
|
|
727
|
-
incremental or
|
|
719
|
+
TypeError: If `vector_store` is not a VectorStore or DocumentIndex.
|
|
720
|
+
AssertionError: If `source_id_key` is None when cleanup mode is
|
|
721
|
+
incremental or `scoped_full` (should be unreachable).
|
|
728
722
|
"""
|
|
729
723
|
# Behavior is deprecated, but we keep it for backwards compatibility.
|
|
730
724
|
# # Warn only once per process.
|