langchain-core 1.0.0rc1__py3-none-any.whl → 1.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain-core might be problematic. Click here for more details.
- langchain_core/agents.py +3 -3
- langchain_core/caches.py +44 -48
- langchain_core/callbacks/base.py +5 -5
- langchain_core/callbacks/file.py +2 -2
- langchain_core/callbacks/stdout.py +1 -1
- langchain_core/chat_history.py +1 -1
- langchain_core/document_loaders/base.py +21 -21
- langchain_core/document_loaders/langsmith.py +2 -2
- langchain_core/documents/base.py +39 -39
- langchain_core/embeddings/fake.py +4 -2
- langchain_core/example_selectors/semantic_similarity.py +4 -6
- langchain_core/exceptions.py +3 -4
- langchain_core/indexing/api.py +8 -14
- langchain_core/language_models/__init__.py +11 -25
- langchain_core/language_models/_utils.py +2 -1
- langchain_core/language_models/base.py +7 -0
- langchain_core/language_models/chat_models.py +24 -25
- langchain_core/language_models/fake_chat_models.py +3 -3
- langchain_core/language_models/llms.py +4 -4
- langchain_core/load/dump.py +3 -4
- langchain_core/load/load.py +0 -9
- langchain_core/load/serializable.py +3 -3
- langchain_core/messages/ai.py +20 -22
- langchain_core/messages/base.py +8 -8
- langchain_core/messages/block_translators/__init__.py +1 -1
- langchain_core/messages/block_translators/anthropic.py +1 -1
- langchain_core/messages/block_translators/bedrock_converse.py +1 -1
- langchain_core/messages/block_translators/google_genai.py +3 -2
- langchain_core/messages/block_translators/google_vertexai.py +4 -32
- langchain_core/messages/block_translators/langchain_v0.py +1 -1
- langchain_core/messages/block_translators/openai.py +1 -1
- langchain_core/messages/chat.py +2 -6
- langchain_core/messages/content.py +34 -17
- langchain_core/messages/function.py +3 -7
- langchain_core/messages/human.py +4 -9
- langchain_core/messages/modifier.py +1 -1
- langchain_core/messages/system.py +2 -10
- langchain_core/messages/tool.py +30 -42
- langchain_core/messages/utils.py +24 -30
- langchain_core/output_parsers/base.py +24 -24
- langchain_core/output_parsers/json.py +0 -1
- langchain_core/output_parsers/list.py +1 -1
- langchain_core/output_parsers/openai_functions.py +2 -2
- langchain_core/output_parsers/openai_tools.py +4 -9
- langchain_core/output_parsers/string.py +1 -1
- langchain_core/outputs/generation.py +1 -1
- langchain_core/prompt_values.py +7 -7
- langchain_core/prompts/base.py +1 -1
- langchain_core/prompts/chat.py +12 -13
- langchain_core/prompts/dict.py +2 -2
- langchain_core/prompts/few_shot_with_templates.py +1 -1
- langchain_core/prompts/image.py +1 -1
- langchain_core/prompts/message.py +2 -2
- langchain_core/prompts/prompt.py +7 -8
- langchain_core/prompts/string.py +1 -1
- langchain_core/prompts/structured.py +2 -2
- langchain_core/rate_limiters.py +23 -29
- langchain_core/retrievers.py +29 -29
- langchain_core/runnables/base.py +15 -22
- langchain_core/runnables/branch.py +1 -1
- langchain_core/runnables/config.py +7 -7
- langchain_core/runnables/configurable.py +2 -2
- langchain_core/runnables/fallbacks.py +1 -1
- langchain_core/runnables/graph.py +23 -28
- langchain_core/runnables/graph_mermaid.py +9 -9
- langchain_core/runnables/graph_png.py +1 -1
- langchain_core/runnables/history.py +2 -2
- langchain_core/runnables/passthrough.py +3 -3
- langchain_core/runnables/router.py +1 -1
- langchain_core/runnables/utils.py +5 -5
- langchain_core/tools/base.py +56 -11
- langchain_core/tools/convert.py +13 -17
- langchain_core/tools/retriever.py +6 -6
- langchain_core/tools/simple.py +1 -1
- langchain_core/tools/structured.py +5 -10
- langchain_core/tracers/memory_stream.py +1 -1
- langchain_core/tracers/root_listeners.py +2 -2
- langchain_core/tracers/stdout.py +1 -2
- langchain_core/utils/__init__.py +1 -1
- langchain_core/utils/aiter.py +1 -1
- langchain_core/utils/function_calling.py +15 -38
- langchain_core/utils/input.py +1 -1
- langchain_core/utils/iter.py +1 -1
- langchain_core/utils/json.py +1 -1
- langchain_core/utils/strings.py +1 -1
- langchain_core/vectorstores/base.py +14 -25
- langchain_core/vectorstores/utils.py +2 -2
- langchain_core/version.py +1 -1
- {langchain_core-1.0.0rc1.dist-info → langchain_core-1.0.0rc3.dist-info}/METADATA +1 -1
- langchain_core-1.0.0rc3.dist-info/RECORD +172 -0
- langchain_core-1.0.0rc1.dist-info/RECORD +0 -172
- {langchain_core-1.0.0rc1.dist-info → langchain_core-1.0.0rc3.dist-info}/WHEEL +0 -0
langchain_core/agents.py
CHANGED
|
@@ -84,7 +84,7 @@ class AgentAction(Serializable):
|
|
|
84
84
|
|
|
85
85
|
@classmethod
|
|
86
86
|
def get_lc_namespace(cls) -> list[str]:
|
|
87
|
-
"""Get the namespace of the
|
|
87
|
+
"""Get the namespace of the LangChain object.
|
|
88
88
|
|
|
89
89
|
Returns:
|
|
90
90
|
`["langchain", "schema", "agent"]`
|
|
@@ -112,7 +112,7 @@ class AgentActionMessageLog(AgentAction):
|
|
|
112
112
|
if (tool, tool_input) cannot be used to fully recreate the LLM
|
|
113
113
|
prediction, and you need that LLM prediction (for future agent iteration).
|
|
114
114
|
Compared to `log`, this is useful when the underlying LLM is a
|
|
115
|
-
|
|
115
|
+
chat model (and therefore returns messages rather than a string)."""
|
|
116
116
|
# Ignoring type because we're overriding the type from AgentAction.
|
|
117
117
|
# And this is the correct thing to do in this case.
|
|
118
118
|
# The type literal is used for serialization purposes.
|
|
@@ -161,7 +161,7 @@ class AgentFinish(Serializable):
|
|
|
161
161
|
|
|
162
162
|
@classmethod
|
|
163
163
|
def get_lc_namespace(cls) -> list[str]:
|
|
164
|
-
"""Get the namespace of the
|
|
164
|
+
"""Get the namespace of the LangChain object.
|
|
165
165
|
|
|
166
166
|
Returns:
|
|
167
167
|
`["langchain", "schema", "agent"]`
|
langchain_core/caches.py
CHANGED
|
@@ -1,18 +1,15 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""`caches` provides an optional caching layer for language models.
|
|
2
2
|
|
|
3
3
|
!!! warning
|
|
4
|
-
|
|
4
|
+
This is a beta feature! Please be wary of deploying experimental code to production
|
|
5
|
+
unless you've taken appropriate precautions.
|
|
5
6
|
|
|
6
|
-
|
|
7
|
+
A cache is useful for two reasons:
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
- It can save you money by reducing the number of API calls you make to the LLM
|
|
9
|
+
1. It can save you money by reducing the number of API calls you make to the LLM
|
|
11
10
|
provider if you're often requesting the same completion multiple times.
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
Cache directly competes with Memory. See documentation for Pros and Cons.
|
|
11
|
+
2. It can speed up your application by reducing the number of API calls you make to the
|
|
12
|
+
LLM provider.
|
|
16
13
|
"""
|
|
17
14
|
|
|
18
15
|
from __future__ import annotations
|
|
@@ -34,8 +31,8 @@ class BaseCache(ABC):
|
|
|
34
31
|
|
|
35
32
|
The cache interface consists of the following methods:
|
|
36
33
|
|
|
37
|
-
- lookup: Look up a value based on a prompt and llm_string
|
|
38
|
-
- update: Update the cache based on a prompt and llm_string
|
|
34
|
+
- lookup: Look up a value based on a prompt and `llm_string`.
|
|
35
|
+
- update: Update the cache based on a prompt and `llm_string`.
|
|
39
36
|
- clear: Clear the cache.
|
|
40
37
|
|
|
41
38
|
In addition, the cache interface provides an async version of each method.
|
|
@@ -47,14 +44,14 @@ class BaseCache(ABC):
|
|
|
47
44
|
|
|
48
45
|
@abstractmethod
|
|
49
46
|
def lookup(self, prompt: str, llm_string: str) -> RETURN_VAL_TYPE | None:
|
|
50
|
-
"""Look up based on prompt and llm_string
|
|
47
|
+
"""Look up based on `prompt` and `llm_string`.
|
|
51
48
|
|
|
52
49
|
A cache implementation is expected to generate a key from the 2-tuple
|
|
53
50
|
of prompt and llm_string (e.g., by concatenating them with a delimiter).
|
|
54
51
|
|
|
55
52
|
Args:
|
|
56
|
-
prompt:
|
|
57
|
-
In the case of a
|
|
53
|
+
prompt: A string representation of the prompt.
|
|
54
|
+
In the case of a chat model, the prompt is a non-trivial
|
|
58
55
|
serialization of the prompt into the language model.
|
|
59
56
|
llm_string: A string representation of the LLM configuration.
|
|
60
57
|
This is used to capture the invocation parameters of the LLM
|
|
@@ -63,27 +60,27 @@ class BaseCache(ABC):
|
|
|
63
60
|
representation.
|
|
64
61
|
|
|
65
62
|
Returns:
|
|
66
|
-
On a cache miss, return None
|
|
67
|
-
The cached value is a list of
|
|
63
|
+
On a cache miss, return `None`. On a cache hit, return the cached value.
|
|
64
|
+
The cached value is a list of `Generation` (or subclasses).
|
|
68
65
|
"""
|
|
69
66
|
|
|
70
67
|
@abstractmethod
|
|
71
68
|
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
|
|
72
|
-
"""Update cache based on prompt and llm_string
|
|
69
|
+
"""Update cache based on `prompt` and `llm_string`.
|
|
73
70
|
|
|
74
71
|
The prompt and llm_string are used to generate a key for the cache.
|
|
75
72
|
The key should match that of the lookup method.
|
|
76
73
|
|
|
77
74
|
Args:
|
|
78
|
-
prompt:
|
|
79
|
-
In the case of a
|
|
75
|
+
prompt: A string representation of the prompt.
|
|
76
|
+
In the case of a chat model, the prompt is a non-trivial
|
|
80
77
|
serialization of the prompt into the language model.
|
|
81
78
|
llm_string: A string representation of the LLM configuration.
|
|
82
79
|
This is used to capture the invocation parameters of the LLM
|
|
83
80
|
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
|
84
81
|
These invocation parameters are serialized into a string
|
|
85
82
|
representation.
|
|
86
|
-
return_val: The value to be cached. The value is a list of
|
|
83
|
+
return_val: The value to be cached. The value is a list of `Generation`
|
|
87
84
|
(or subclasses).
|
|
88
85
|
"""
|
|
89
86
|
|
|
@@ -92,14 +89,14 @@ class BaseCache(ABC):
|
|
|
92
89
|
"""Clear cache that can take additional keyword arguments."""
|
|
93
90
|
|
|
94
91
|
async def alookup(self, prompt: str, llm_string: str) -> RETURN_VAL_TYPE | None:
|
|
95
|
-
"""Async look up based on prompt and llm_string
|
|
92
|
+
"""Async look up based on `prompt` and `llm_string`.
|
|
96
93
|
|
|
97
94
|
A cache implementation is expected to generate a key from the 2-tuple
|
|
98
95
|
of prompt and llm_string (e.g., by concatenating them with a delimiter).
|
|
99
96
|
|
|
100
97
|
Args:
|
|
101
|
-
prompt:
|
|
102
|
-
In the case of a
|
|
98
|
+
prompt: A string representation of the prompt.
|
|
99
|
+
In the case of a chat model, the prompt is a non-trivial
|
|
103
100
|
serialization of the prompt into the language model.
|
|
104
101
|
llm_string: A string representation of the LLM configuration.
|
|
105
102
|
This is used to capture the invocation parameters of the LLM
|
|
@@ -108,29 +105,29 @@ class BaseCache(ABC):
|
|
|
108
105
|
representation.
|
|
109
106
|
|
|
110
107
|
Returns:
|
|
111
|
-
On a cache miss, return None
|
|
112
|
-
The cached value is a list of
|
|
108
|
+
On a cache miss, return `None`. On a cache hit, return the cached value.
|
|
109
|
+
The cached value is a list of `Generation` (or subclasses).
|
|
113
110
|
"""
|
|
114
111
|
return await run_in_executor(None, self.lookup, prompt, llm_string)
|
|
115
112
|
|
|
116
113
|
async def aupdate(
|
|
117
114
|
self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
|
|
118
115
|
) -> None:
|
|
119
|
-
"""Async update cache based on prompt and llm_string
|
|
116
|
+
"""Async update cache based on `prompt` and `llm_string`.
|
|
120
117
|
|
|
121
118
|
The prompt and llm_string are used to generate a key for the cache.
|
|
122
119
|
The key should match that of the look up method.
|
|
123
120
|
|
|
124
121
|
Args:
|
|
125
|
-
prompt:
|
|
126
|
-
In the case of a
|
|
122
|
+
prompt: A string representation of the prompt.
|
|
123
|
+
In the case of a chat model, the prompt is a non-trivial
|
|
127
124
|
serialization of the prompt into the language model.
|
|
128
125
|
llm_string: A string representation of the LLM configuration.
|
|
129
126
|
This is used to capture the invocation parameters of the LLM
|
|
130
127
|
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
|
131
128
|
These invocation parameters are serialized into a string
|
|
132
129
|
representation.
|
|
133
|
-
return_val: The value to be cached. The value is a list of
|
|
130
|
+
return_val: The value to be cached. The value is a list of `Generation`
|
|
134
131
|
(or subclasses).
|
|
135
132
|
"""
|
|
136
133
|
return await run_in_executor(None, self.update, prompt, llm_string, return_val)
|
|
@@ -150,10 +147,9 @@ class InMemoryCache(BaseCache):
|
|
|
150
147
|
maxsize: The maximum number of items to store in the cache.
|
|
151
148
|
If `None`, the cache has no maximum size.
|
|
152
149
|
If the cache exceeds the maximum size, the oldest items are removed.
|
|
153
|
-
Default is None.
|
|
154
150
|
|
|
155
151
|
Raises:
|
|
156
|
-
ValueError: If maxsize is less than or equal to 0
|
|
152
|
+
ValueError: If `maxsize` is less than or equal to `0`.
|
|
157
153
|
"""
|
|
158
154
|
self._cache: dict[tuple[str, str], RETURN_VAL_TYPE] = {}
|
|
159
155
|
if maxsize is not None and maxsize <= 0:
|
|
@@ -162,28 +158,28 @@ class InMemoryCache(BaseCache):
|
|
|
162
158
|
self._maxsize = maxsize
|
|
163
159
|
|
|
164
160
|
def lookup(self, prompt: str, llm_string: str) -> RETURN_VAL_TYPE | None:
|
|
165
|
-
"""Look up based on prompt and llm_string
|
|
161
|
+
"""Look up based on `prompt` and `llm_string`.
|
|
166
162
|
|
|
167
163
|
Args:
|
|
168
|
-
prompt:
|
|
169
|
-
In the case of a
|
|
164
|
+
prompt: A string representation of the prompt.
|
|
165
|
+
In the case of a chat model, the prompt is a non-trivial
|
|
170
166
|
serialization of the prompt into the language model.
|
|
171
167
|
llm_string: A string representation of the LLM configuration.
|
|
172
168
|
|
|
173
169
|
Returns:
|
|
174
|
-
On a cache miss, return None
|
|
170
|
+
On a cache miss, return `None`. On a cache hit, return the cached value.
|
|
175
171
|
"""
|
|
176
172
|
return self._cache.get((prompt, llm_string), None)
|
|
177
173
|
|
|
178
174
|
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
|
|
179
|
-
"""Update cache based on prompt and llm_string
|
|
175
|
+
"""Update cache based on `prompt` and `llm_string`.
|
|
180
176
|
|
|
181
177
|
Args:
|
|
182
|
-
prompt:
|
|
183
|
-
In the case of a
|
|
178
|
+
prompt: A string representation of the prompt.
|
|
179
|
+
In the case of a chat model, the prompt is a non-trivial
|
|
184
180
|
serialization of the prompt into the language model.
|
|
185
181
|
llm_string: A string representation of the LLM configuration.
|
|
186
|
-
return_val: The value to be cached. The value is a list of
|
|
182
|
+
return_val: The value to be cached. The value is a list of `Generation`
|
|
187
183
|
(or subclasses).
|
|
188
184
|
"""
|
|
189
185
|
if self._maxsize is not None and len(self._cache) == self._maxsize:
|
|
@@ -196,30 +192,30 @@ class InMemoryCache(BaseCache):
|
|
|
196
192
|
self._cache = {}
|
|
197
193
|
|
|
198
194
|
async def alookup(self, prompt: str, llm_string: str) -> RETURN_VAL_TYPE | None:
|
|
199
|
-
"""Async look up based on prompt and llm_string
|
|
195
|
+
"""Async look up based on `prompt` and `llm_string`.
|
|
200
196
|
|
|
201
197
|
Args:
|
|
202
|
-
prompt:
|
|
203
|
-
In the case of a
|
|
198
|
+
prompt: A string representation of the prompt.
|
|
199
|
+
In the case of a chat model, the prompt is a non-trivial
|
|
204
200
|
serialization of the prompt into the language model.
|
|
205
201
|
llm_string: A string representation of the LLM configuration.
|
|
206
202
|
|
|
207
203
|
Returns:
|
|
208
|
-
On a cache miss, return None
|
|
204
|
+
On a cache miss, return `None`. On a cache hit, return the cached value.
|
|
209
205
|
"""
|
|
210
206
|
return self.lookup(prompt, llm_string)
|
|
211
207
|
|
|
212
208
|
async def aupdate(
|
|
213
209
|
self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE
|
|
214
210
|
) -> None:
|
|
215
|
-
"""Async update cache based on prompt and llm_string
|
|
211
|
+
"""Async update cache based on `prompt` and `llm_string`.
|
|
216
212
|
|
|
217
213
|
Args:
|
|
218
|
-
prompt:
|
|
219
|
-
In the case of a
|
|
214
|
+
prompt: A string representation of the prompt.
|
|
215
|
+
In the case of a chat model, the prompt is a non-trivial
|
|
220
216
|
serialization of the prompt into the language model.
|
|
221
217
|
llm_string: A string representation of the LLM configuration.
|
|
222
|
-
return_val: The value to be cached. The value is a list of
|
|
218
|
+
return_val: The value to be cached. The value is a list of `Generation`
|
|
223
219
|
(or subclasses).
|
|
224
220
|
"""
|
|
225
221
|
self.update(prompt, llm_string, return_val)
|
langchain_core/callbacks/base.py
CHANGED
|
@@ -1001,7 +1001,7 @@ class BaseCallbackManager(CallbackManagerMixin):
|
|
|
1001
1001
|
|
|
1002
1002
|
Args:
|
|
1003
1003
|
handler: The handler to add.
|
|
1004
|
-
inherit: Whether to inherit the handler.
|
|
1004
|
+
inherit: Whether to inherit the handler.
|
|
1005
1005
|
"""
|
|
1006
1006
|
if handler not in self.handlers:
|
|
1007
1007
|
self.handlers.append(handler)
|
|
@@ -1028,7 +1028,7 @@ class BaseCallbackManager(CallbackManagerMixin):
|
|
|
1028
1028
|
|
|
1029
1029
|
Args:
|
|
1030
1030
|
handlers: The handlers to set.
|
|
1031
|
-
inherit: Whether to inherit the handlers.
|
|
1031
|
+
inherit: Whether to inherit the handlers.
|
|
1032
1032
|
"""
|
|
1033
1033
|
self.handlers = []
|
|
1034
1034
|
self.inheritable_handlers = []
|
|
@@ -1044,7 +1044,7 @@ class BaseCallbackManager(CallbackManagerMixin):
|
|
|
1044
1044
|
|
|
1045
1045
|
Args:
|
|
1046
1046
|
handler: The handler to set.
|
|
1047
|
-
inherit: Whether to inherit the handler.
|
|
1047
|
+
inherit: Whether to inherit the handler.
|
|
1048
1048
|
"""
|
|
1049
1049
|
self.set_handlers([handler], inherit=inherit)
|
|
1050
1050
|
|
|
@@ -1057,7 +1057,7 @@ class BaseCallbackManager(CallbackManagerMixin):
|
|
|
1057
1057
|
|
|
1058
1058
|
Args:
|
|
1059
1059
|
tags: The tags to add.
|
|
1060
|
-
inherit: Whether to inherit the tags.
|
|
1060
|
+
inherit: Whether to inherit the tags.
|
|
1061
1061
|
"""
|
|
1062
1062
|
for tag in tags:
|
|
1063
1063
|
if tag in self.tags:
|
|
@@ -1087,7 +1087,7 @@ class BaseCallbackManager(CallbackManagerMixin):
|
|
|
1087
1087
|
|
|
1088
1088
|
Args:
|
|
1089
1089
|
metadata: The metadata to add.
|
|
1090
|
-
inherit: Whether to inherit the metadata.
|
|
1090
|
+
inherit: Whether to inherit the metadata.
|
|
1091
1091
|
"""
|
|
1092
1092
|
self.metadata.update(metadata)
|
|
1093
1093
|
if inherit:
|
langchain_core/callbacks/file.py
CHANGED
|
@@ -132,7 +132,7 @@ class FileCallbackHandler(BaseCallbackHandler):
|
|
|
132
132
|
Args:
|
|
133
133
|
text: The text to write to the file.
|
|
134
134
|
color: Optional color for the text. Defaults to `self.color`.
|
|
135
|
-
end: String appended after the text.
|
|
135
|
+
end: String appended after the text.
|
|
136
136
|
file: Optional file to write to. Defaults to `self.file`.
|
|
137
137
|
|
|
138
138
|
Raises:
|
|
@@ -239,7 +239,7 @@ class FileCallbackHandler(BaseCallbackHandler):
|
|
|
239
239
|
text: The text to write.
|
|
240
240
|
color: Color override for this specific output. If `None`, uses
|
|
241
241
|
`self.color`.
|
|
242
|
-
end: String appended after the text.
|
|
242
|
+
end: String appended after the text.
|
|
243
243
|
**kwargs: Additional keyword arguments.
|
|
244
244
|
|
|
245
245
|
"""
|
|
@@ -104,7 +104,7 @@ class StdOutCallbackHandler(BaseCallbackHandler):
|
|
|
104
104
|
Args:
|
|
105
105
|
text: The text to print.
|
|
106
106
|
color: The color to use for the text.
|
|
107
|
-
end: The end character to use.
|
|
107
|
+
end: The end character to use.
|
|
108
108
|
**kwargs: Additional keyword arguments.
|
|
109
109
|
"""
|
|
110
110
|
print_text(text, color=color or self.color, end=end)
|
langchain_core/chat_history.py
CHANGED
|
@@ -153,7 +153,7 @@ class BaseChatMessageHistory(ABC):
|
|
|
153
153
|
|
|
154
154
|
Raises:
|
|
155
155
|
NotImplementedError: If the sub-class has not implemented an efficient
|
|
156
|
-
add_messages method.
|
|
156
|
+
`add_messages` method.
|
|
157
157
|
"""
|
|
158
158
|
if type(self).add_messages != BaseChatMessageHistory.add_messages:
|
|
159
159
|
# This means that the sub-class has implemented an efficient add_messages
|
|
@@ -35,38 +35,38 @@ class BaseLoader(ABC): # noqa: B024
|
|
|
35
35
|
# Sub-classes should not implement this method directly. Instead, they
|
|
36
36
|
# should implement the lazy load method.
|
|
37
37
|
def load(self) -> list[Document]:
|
|
38
|
-
"""Load data into Document objects.
|
|
38
|
+
"""Load data into `Document` objects.
|
|
39
39
|
|
|
40
40
|
Returns:
|
|
41
|
-
|
|
41
|
+
The documents.
|
|
42
42
|
"""
|
|
43
43
|
return list(self.lazy_load())
|
|
44
44
|
|
|
45
45
|
async def aload(self) -> list[Document]:
|
|
46
|
-
"""Load data into Document objects.
|
|
46
|
+
"""Load data into `Document` objects.
|
|
47
47
|
|
|
48
48
|
Returns:
|
|
49
|
-
|
|
49
|
+
The documents.
|
|
50
50
|
"""
|
|
51
51
|
return [document async for document in self.alazy_load()]
|
|
52
52
|
|
|
53
53
|
def load_and_split(
|
|
54
54
|
self, text_splitter: TextSplitter | None = None
|
|
55
55
|
) -> list[Document]:
|
|
56
|
-
"""Load Documents and split into chunks. Chunks are returned as
|
|
56
|
+
"""Load Documents and split into chunks. Chunks are returned as `Document`.
|
|
57
57
|
|
|
58
58
|
Do not override this method. It should be considered to be deprecated!
|
|
59
59
|
|
|
60
60
|
Args:
|
|
61
|
-
text_splitter: TextSplitter instance to use for splitting documents.
|
|
62
|
-
Defaults to RecursiveCharacterTextSplitter
|
|
61
|
+
text_splitter: `TextSplitter` instance to use for splitting documents.
|
|
62
|
+
Defaults to `RecursiveCharacterTextSplitter`.
|
|
63
63
|
|
|
64
64
|
Raises:
|
|
65
|
-
ImportError: If langchain-text-splitters is not installed
|
|
66
|
-
and no text_splitter is provided.
|
|
65
|
+
ImportError: If `langchain-text-splitters` is not installed
|
|
66
|
+
and no `text_splitter` is provided.
|
|
67
67
|
|
|
68
68
|
Returns:
|
|
69
|
-
List of
|
|
69
|
+
List of `Document`.
|
|
70
70
|
"""
|
|
71
71
|
if text_splitter is None:
|
|
72
72
|
if not _HAS_TEXT_SPLITTERS:
|
|
@@ -86,10 +86,10 @@ class BaseLoader(ABC): # noqa: B024
|
|
|
86
86
|
# Attention: This method will be upgraded into an abstractmethod once it's
|
|
87
87
|
# implemented in all the existing subclasses.
|
|
88
88
|
def lazy_load(self) -> Iterator[Document]:
|
|
89
|
-
"""A lazy loader for
|
|
89
|
+
"""A lazy loader for `Document`.
|
|
90
90
|
|
|
91
91
|
Yields:
|
|
92
|
-
|
|
92
|
+
The `Document` objects.
|
|
93
93
|
"""
|
|
94
94
|
if type(self).load != BaseLoader.load:
|
|
95
95
|
return iter(self.load())
|
|
@@ -97,10 +97,10 @@ class BaseLoader(ABC): # noqa: B024
|
|
|
97
97
|
raise NotImplementedError(msg)
|
|
98
98
|
|
|
99
99
|
async def alazy_load(self) -> AsyncIterator[Document]:
|
|
100
|
-
"""A lazy loader for
|
|
100
|
+
"""A lazy loader for `Document`.
|
|
101
101
|
|
|
102
102
|
Yields:
|
|
103
|
-
|
|
103
|
+
The `Document` objects.
|
|
104
104
|
"""
|
|
105
105
|
iterator = await run_in_executor(None, self.lazy_load)
|
|
106
106
|
done = object()
|
|
@@ -115,7 +115,7 @@ class BaseBlobParser(ABC):
|
|
|
115
115
|
"""Abstract interface for blob parsers.
|
|
116
116
|
|
|
117
117
|
A blob parser provides a way to parse raw data stored in a blob into one
|
|
118
|
-
or more
|
|
118
|
+
or more `Document` objects.
|
|
119
119
|
|
|
120
120
|
The parser can be composed with blob loaders, making it easy to reuse
|
|
121
121
|
a parser independent of how the blob was originally loaded.
|
|
@@ -128,25 +128,25 @@ class BaseBlobParser(ABC):
|
|
|
128
128
|
Subclasses are required to implement this method.
|
|
129
129
|
|
|
130
130
|
Args:
|
|
131
|
-
blob: Blob instance
|
|
131
|
+
blob: `Blob` instance
|
|
132
132
|
|
|
133
133
|
Returns:
|
|
134
|
-
Generator of
|
|
134
|
+
Generator of `Document` objects
|
|
135
135
|
"""
|
|
136
136
|
|
|
137
137
|
def parse(self, blob: Blob) -> list[Document]:
|
|
138
|
-
"""Eagerly parse the blob into a
|
|
138
|
+
"""Eagerly parse the blob into a `Document` or `Document` objects.
|
|
139
139
|
|
|
140
140
|
This is a convenience method for interactive development environment.
|
|
141
141
|
|
|
142
|
-
Production applications should favor the lazy_parse method instead.
|
|
142
|
+
Production applications should favor the `lazy_parse` method instead.
|
|
143
143
|
|
|
144
144
|
Subclasses should generally not over-ride this parse method.
|
|
145
145
|
|
|
146
146
|
Args:
|
|
147
|
-
blob: Blob instance
|
|
147
|
+
blob: `Blob` instance
|
|
148
148
|
|
|
149
149
|
Returns:
|
|
150
|
-
List of
|
|
150
|
+
List of `Document` objects
|
|
151
151
|
"""
|
|
152
152
|
return list(self.lazy_parse(blob))
|
|
@@ -76,8 +76,8 @@ class LangSmithLoader(BaseLoader):
|
|
|
76
76
|
splits: A list of dataset splits, which are
|
|
77
77
|
divisions of your dataset such as 'train', 'test', or 'validation'.
|
|
78
78
|
Returns examples only from the specified splits.
|
|
79
|
-
inline_s3_urls: Whether to inline S3 URLs.
|
|
80
|
-
offset: The offset to start from.
|
|
79
|
+
inline_s3_urls: Whether to inline S3 URLs.
|
|
80
|
+
offset: The offset to start from.
|
|
81
81
|
limit: The maximum number of examples to return.
|
|
82
82
|
metadata: Metadata to filter by.
|
|
83
83
|
filter: A structured filter string to apply to the examples.
|
langchain_core/documents/base.py
CHANGED
|
@@ -57,51 +57,51 @@ class Blob(BaseMedia):
|
|
|
57
57
|
|
|
58
58
|
Example: Initialize a blob from in-memory data
|
|
59
59
|
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
```python
|
|
61
|
+
from langchain_core.documents import Blob
|
|
62
62
|
|
|
63
|
-
|
|
63
|
+
blob = Blob.from_data("Hello, world!")
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
# Read the blob as a string
|
|
66
|
+
print(blob.as_string())
|
|
67
67
|
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
# Read the blob as bytes
|
|
69
|
+
print(blob.as_bytes())
|
|
70
70
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
71
|
+
# Read the blob as a byte stream
|
|
72
|
+
with blob.as_bytes_io() as f:
|
|
73
|
+
print(f.read())
|
|
74
|
+
```
|
|
75
75
|
|
|
76
76
|
Example: Load from memory and specify mime-type and metadata
|
|
77
77
|
|
|
78
|
-
|
|
79
|
-
|
|
78
|
+
```python
|
|
79
|
+
from langchain_core.documents import Blob
|
|
80
80
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
81
|
+
blob = Blob.from_data(
|
|
82
|
+
data="Hello, world!",
|
|
83
|
+
mime_type="text/plain",
|
|
84
|
+
metadata={"source": "https://example.com"},
|
|
85
|
+
)
|
|
86
|
+
```
|
|
87
87
|
|
|
88
88
|
Example: Load the blob from a file
|
|
89
89
|
|
|
90
|
-
|
|
91
|
-
|
|
90
|
+
```python
|
|
91
|
+
from langchain_core.documents import Blob
|
|
92
92
|
|
|
93
|
-
|
|
93
|
+
blob = Blob.from_path("path/to/file.txt")
|
|
94
94
|
|
|
95
|
-
|
|
96
|
-
|
|
95
|
+
# Read the blob as a string
|
|
96
|
+
print(blob.as_string())
|
|
97
97
|
|
|
98
|
-
|
|
99
|
-
|
|
98
|
+
# Read the blob as bytes
|
|
99
|
+
print(blob.as_bytes())
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
101
|
+
# Read the blob as a byte stream
|
|
102
|
+
with blob.as_bytes_io() as f:
|
|
103
|
+
print(f.read())
|
|
104
|
+
```
|
|
105
105
|
"""
|
|
106
106
|
|
|
107
107
|
data: bytes | str | None = None
|
|
@@ -111,7 +111,7 @@ class Blob(BaseMedia):
|
|
|
111
111
|
encoding: str = "utf-8"
|
|
112
112
|
"""Encoding to use if decoding the bytes into a string.
|
|
113
113
|
|
|
114
|
-
Use utf-8 as default encoding, if decoding to string.
|
|
114
|
+
Use `utf-8` as default encoding, if decoding to string.
|
|
115
115
|
"""
|
|
116
116
|
path: PathLike | None = None
|
|
117
117
|
"""Location where the original content was found."""
|
|
@@ -127,7 +127,7 @@ class Blob(BaseMedia):
|
|
|
127
127
|
|
|
128
128
|
If a path is associated with the blob, it will default to the path location.
|
|
129
129
|
|
|
130
|
-
Unless explicitly set via a metadata field called "source"
|
|
130
|
+
Unless explicitly set via a metadata field called `"source"`, in which
|
|
131
131
|
case that value will be used instead.
|
|
132
132
|
"""
|
|
133
133
|
if self.metadata and "source" in self.metadata:
|
|
@@ -211,11 +211,11 @@ class Blob(BaseMedia):
|
|
|
211
211
|
"""Load the blob from a path like object.
|
|
212
212
|
|
|
213
213
|
Args:
|
|
214
|
-
path:
|
|
214
|
+
path: Path-like object to file to be read
|
|
215
215
|
encoding: Encoding to use if decoding the bytes into a string
|
|
216
|
-
mime_type:
|
|
216
|
+
mime_type: If provided, will be set as the mime-type of the data
|
|
217
217
|
guess_type: If `True`, the mimetype will be guessed from the file extension,
|
|
218
|
-
|
|
218
|
+
if a mime-type was not provided
|
|
219
219
|
metadata: Metadata to associate with the blob
|
|
220
220
|
|
|
221
221
|
Returns:
|
|
@@ -248,10 +248,10 @@ class Blob(BaseMedia):
|
|
|
248
248
|
"""Initialize the blob from in-memory data.
|
|
249
249
|
|
|
250
250
|
Args:
|
|
251
|
-
data:
|
|
251
|
+
data: The in-memory data associated with the blob
|
|
252
252
|
encoding: Encoding to use if decoding the bytes into a string
|
|
253
|
-
mime_type:
|
|
254
|
-
path:
|
|
253
|
+
mime_type: If provided, will be set as the mime-type of the data
|
|
254
|
+
path: If provided, will be set as the source from which the data came
|
|
255
255
|
metadata: Metadata to associate with the blob
|
|
256
256
|
|
|
257
257
|
Returns:
|
|
@@ -303,7 +303,7 @@ class Document(BaseMedia):
|
|
|
303
303
|
|
|
304
304
|
@classmethod
|
|
305
305
|
def get_lc_namespace(cls) -> list[str]:
|
|
306
|
-
"""Get the namespace of the
|
|
306
|
+
"""Get the namespace of the LangChain object.
|
|
307
307
|
|
|
308
308
|
Returns:
|
|
309
309
|
["langchain", "schema", "document"]
|
|
@@ -18,7 +18,8 @@ class FakeEmbeddings(Embeddings, BaseModel):
|
|
|
18
18
|
|
|
19
19
|
This embedding model creates embeddings by sampling from a normal distribution.
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
!!! warning
|
|
22
|
+
Do not use this outside of testing, as it is not a real embedding model.
|
|
22
23
|
|
|
23
24
|
Instantiate:
|
|
24
25
|
```python
|
|
@@ -72,7 +73,8 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
|
|
|
72
73
|
This embedding model creates embeddings by sampling from a normal distribution
|
|
73
74
|
with a seed based on the hash of the text.
|
|
74
75
|
|
|
75
|
-
|
|
76
|
+
!!! warning
|
|
77
|
+
Do not use this outside of testing, as it is not a real embedding model.
|
|
76
78
|
|
|
77
79
|
Instantiate:
|
|
78
80
|
```python
|