langchain 1.0.0a12__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain/__init__.py +1 -1
- langchain/agents/__init__.py +7 -1
- langchain/agents/factory.py +722 -226
- langchain/agents/middleware/__init__.py +36 -9
- langchain/agents/middleware/_execution.py +388 -0
- langchain/agents/middleware/_redaction.py +350 -0
- langchain/agents/middleware/context_editing.py +46 -17
- langchain/agents/middleware/file_search.py +382 -0
- langchain/agents/middleware/human_in_the_loop.py +220 -173
- langchain/agents/middleware/model_call_limit.py +43 -10
- langchain/agents/middleware/model_fallback.py +79 -36
- langchain/agents/middleware/pii.py +68 -504
- langchain/agents/middleware/shell_tool.py +718 -0
- langchain/agents/middleware/summarization.py +2 -2
- langchain/agents/middleware/{planning.py → todo.py} +35 -16
- langchain/agents/middleware/tool_call_limit.py +308 -114
- langchain/agents/middleware/tool_emulator.py +200 -0
- langchain/agents/middleware/tool_retry.py +384 -0
- langchain/agents/middleware/tool_selection.py +25 -21
- langchain/agents/middleware/types.py +714 -257
- langchain/agents/structured_output.py +37 -27
- langchain/chat_models/__init__.py +7 -1
- langchain/chat_models/base.py +192 -190
- langchain/embeddings/__init__.py +13 -3
- langchain/embeddings/base.py +49 -29
- langchain/messages/__init__.py +50 -1
- langchain/tools/__init__.py +9 -7
- langchain/tools/tool_node.py +16 -1174
- langchain-1.0.4.dist-info/METADATA +92 -0
- langchain-1.0.4.dist-info/RECORD +34 -0
- langchain/_internal/__init__.py +0 -0
- langchain/_internal/_documents.py +0 -35
- langchain/_internal/_lazy_import.py +0 -35
- langchain/_internal/_prompts.py +0 -158
- langchain/_internal/_typing.py +0 -70
- langchain/_internal/_utils.py +0 -7
- langchain/agents/_internal/__init__.py +0 -1
- langchain/agents/_internal/_typing.py +0 -13
- langchain/agents/middleware/prompt_caching.py +0 -86
- langchain/documents/__init__.py +0 -7
- langchain/embeddings/cache.py +0 -361
- langchain/storage/__init__.py +0 -22
- langchain/storage/encoder_backed.py +0 -123
- langchain/storage/exceptions.py +0 -5
- langchain/storage/in_memory.py +0 -13
- langchain-1.0.0a12.dist-info/METADATA +0 -122
- langchain-1.0.0a12.dist-info/RECORD +0 -43
- {langchain-1.0.0a12.dist-info → langchain-1.0.4.dist-info}/WHEEL +0 -0
- {langchain-1.0.0a12.dist-info → langchain-1.0.4.dist-info}/licenses/LICENSE +0 -0
langchain/embeddings/cache.py
DELETED
|
@@ -1,361 +0,0 @@
|
|
|
1
|
-
"""Module contains code for a cache backed embedder.
|
|
2
|
-
|
|
3
|
-
The cache backed embedder is a wrapper around an embedder that caches
|
|
4
|
-
embeddings in a key-value store. The cache is used to avoid recomputing
|
|
5
|
-
embeddings for the same text.
|
|
6
|
-
|
|
7
|
-
The text is hashed and the hash is used as the key in the cache.
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
from __future__ import annotations
|
|
11
|
-
|
|
12
|
-
import hashlib
|
|
13
|
-
import json
|
|
14
|
-
import uuid
|
|
15
|
-
import warnings
|
|
16
|
-
from typing import TYPE_CHECKING, Literal, cast
|
|
17
|
-
|
|
18
|
-
from langchain_core.embeddings import Embeddings
|
|
19
|
-
from langchain_core.utils.iter import batch_iterate
|
|
20
|
-
|
|
21
|
-
from langchain.storage.encoder_backed import EncoderBackedStore
|
|
22
|
-
|
|
23
|
-
if TYPE_CHECKING:
|
|
24
|
-
from collections.abc import Callable, Sequence
|
|
25
|
-
|
|
26
|
-
from langchain_core.stores import BaseStore, ByteStore
|
|
27
|
-
|
|
28
|
-
NAMESPACE_UUID = uuid.UUID(int=1985)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def _sha1_hash_to_uuid(text: str) -> uuid.UUID:
|
|
32
|
-
"""Return a UUID derived from *text* using SHA-1 (deterministic).
|
|
33
|
-
|
|
34
|
-
Deterministic and fast, **but not collision-resistant**.
|
|
35
|
-
|
|
36
|
-
A malicious attacker could try to create two different texts that hash to the same
|
|
37
|
-
UUID. This may not necessarily be an issue in the context of caching embeddings,
|
|
38
|
-
but new applications should swap this out for a stronger hash function like
|
|
39
|
-
xxHash, BLAKE2 or SHA-256, which are collision-resistant.
|
|
40
|
-
"""
|
|
41
|
-
sha1_hex = hashlib.sha1(text.encode("utf-8"), usedforsecurity=False).hexdigest()
|
|
42
|
-
# Embed the hex string in `uuid5` to obtain a valid UUID.
|
|
43
|
-
return uuid.uuid5(NAMESPACE_UUID, sha1_hex)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def _make_default_key_encoder(namespace: str, algorithm: str) -> Callable[[str], str]:
|
|
47
|
-
"""Create a default key encoder function.
|
|
48
|
-
|
|
49
|
-
Args:
|
|
50
|
-
namespace: Prefix that segregates keys from different embedding models.
|
|
51
|
-
algorithm:
|
|
52
|
-
* ``'sha1'`` - fast but not collision-resistant
|
|
53
|
-
* ``'blake2b'`` - cryptographically strong, faster than SHA-1
|
|
54
|
-
* ``'sha256'`` - cryptographically strong, slower than SHA-1
|
|
55
|
-
* ``'sha512'`` - cryptographically strong, slower than SHA-1
|
|
56
|
-
|
|
57
|
-
Returns:
|
|
58
|
-
A function that encodes a key using the specified algorithm.
|
|
59
|
-
"""
|
|
60
|
-
if algorithm == "sha1":
|
|
61
|
-
_warn_about_sha1_encoder()
|
|
62
|
-
|
|
63
|
-
def _key_encoder(key: str) -> str:
|
|
64
|
-
"""Encode a key using the specified algorithm."""
|
|
65
|
-
if algorithm == "sha1":
|
|
66
|
-
return f"{namespace}{_sha1_hash_to_uuid(key)}"
|
|
67
|
-
if algorithm == "blake2b":
|
|
68
|
-
return f"{namespace}{hashlib.blake2b(key.encode('utf-8')).hexdigest()}"
|
|
69
|
-
if algorithm == "sha256":
|
|
70
|
-
return f"{namespace}{hashlib.sha256(key.encode('utf-8')).hexdigest()}"
|
|
71
|
-
if algorithm == "sha512":
|
|
72
|
-
return f"{namespace}{hashlib.sha512(key.encode('utf-8')).hexdigest()}"
|
|
73
|
-
msg = f"Unsupported algorithm: {algorithm}"
|
|
74
|
-
raise ValueError(msg)
|
|
75
|
-
|
|
76
|
-
return _key_encoder
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def _value_serializer(value: Sequence[float]) -> bytes:
|
|
80
|
-
"""Serialize a value."""
|
|
81
|
-
return json.dumps(value).encode()
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def _value_deserializer(serialized_value: bytes) -> list[float]:
|
|
85
|
-
"""Deserialize a value."""
|
|
86
|
-
return cast("list[float]", json.loads(serialized_value.decode()))
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
# The warning is global; track emission, so it appears only once.
|
|
90
|
-
_warned_about_sha1: bool = False
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def _warn_about_sha1_encoder() -> None:
|
|
94
|
-
"""Emit a one-time warning about SHA-1 collision weaknesses."""
|
|
95
|
-
global _warned_about_sha1 # noqa: PLW0603
|
|
96
|
-
if not _warned_about_sha1:
|
|
97
|
-
warnings.warn(
|
|
98
|
-
"Using default key encoder: SHA-1 is *not* collision-resistant. "
|
|
99
|
-
"While acceptable for most cache scenarios, a motivated attacker "
|
|
100
|
-
"can craft two different payloads that map to the same cache key. "
|
|
101
|
-
"If that risk matters in your environment, supply a stronger "
|
|
102
|
-
"encoder (e.g. SHA-256 or BLAKE2) via the `key_encoder` argument. "
|
|
103
|
-
"If you change the key encoder, consider also creating a new cache, "
|
|
104
|
-
"to avoid (the potential for) collisions with existing keys.",
|
|
105
|
-
category=UserWarning,
|
|
106
|
-
stacklevel=2,
|
|
107
|
-
)
|
|
108
|
-
_warned_about_sha1 = True
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
class CacheBackedEmbeddings(Embeddings):
|
|
112
|
-
"""Interface for caching results from embedding models.
|
|
113
|
-
|
|
114
|
-
The interface allows works with any store that implements
|
|
115
|
-
the abstract store interface accepting keys of type str and values of list of
|
|
116
|
-
floats.
|
|
117
|
-
|
|
118
|
-
If need be, the interface can be extended to accept other implementations
|
|
119
|
-
of the value serializer and deserializer, as well as the key encoder.
|
|
120
|
-
|
|
121
|
-
Note that by default only document embeddings are cached. To cache query
|
|
122
|
-
embeddings too, pass in a query_embedding_store to constructor.
|
|
123
|
-
|
|
124
|
-
Examples:
|
|
125
|
-
.. code-block: python
|
|
126
|
-
|
|
127
|
-
from langchain.embeddings import CacheBackedEmbeddings
|
|
128
|
-
from langchain.storage import LocalFileStore
|
|
129
|
-
from langchain_community.embeddings import OpenAIEmbeddings
|
|
130
|
-
|
|
131
|
-
store = LocalFileStore('./my_cache')
|
|
132
|
-
|
|
133
|
-
underlying_embedder = OpenAIEmbeddings()
|
|
134
|
-
embedder = CacheBackedEmbeddings.from_bytes_store(
|
|
135
|
-
underlying_embedder, store, namespace=underlying_embedder.model
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
# Embedding is computed and cached
|
|
139
|
-
embeddings = embedder.embed_documents(["hello", "goodbye"])
|
|
140
|
-
|
|
141
|
-
# Embeddings are retrieved from the cache, no computation is done
|
|
142
|
-
embeddings = embedder.embed_documents(["hello", "goodbye"])
|
|
143
|
-
"""
|
|
144
|
-
|
|
145
|
-
def __init__(
|
|
146
|
-
self,
|
|
147
|
-
underlying_embeddings: Embeddings,
|
|
148
|
-
document_embedding_store: BaseStore[str, list[float]],
|
|
149
|
-
*,
|
|
150
|
-
batch_size: int | None = None,
|
|
151
|
-
query_embedding_store: BaseStore[str, list[float]] | None = None,
|
|
152
|
-
) -> None:
|
|
153
|
-
"""Initialize the embedder.
|
|
154
|
-
|
|
155
|
-
Args:
|
|
156
|
-
underlying_embeddings: the embedder to use for computing embeddings.
|
|
157
|
-
document_embedding_store: The store to use for caching document embeddings.
|
|
158
|
-
batch_size: The number of documents to embed between store updates.
|
|
159
|
-
query_embedding_store: The store to use for caching query embeddings.
|
|
160
|
-
If ``None``, query embeddings are not cached.
|
|
161
|
-
"""
|
|
162
|
-
super().__init__()
|
|
163
|
-
self.document_embedding_store = document_embedding_store
|
|
164
|
-
self.query_embedding_store = query_embedding_store
|
|
165
|
-
self.underlying_embeddings = underlying_embeddings
|
|
166
|
-
self.batch_size = batch_size
|
|
167
|
-
|
|
168
|
-
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
|
169
|
-
"""Embed a list of texts.
|
|
170
|
-
|
|
171
|
-
The method first checks the cache for the embeddings.
|
|
172
|
-
If the embeddings are not found, the method uses the underlying embedder
|
|
173
|
-
to embed the documents and stores the results in the cache.
|
|
174
|
-
|
|
175
|
-
Args:
|
|
176
|
-
texts: A list of texts to embed.
|
|
177
|
-
|
|
178
|
-
Returns:
|
|
179
|
-
A list of embeddings for the given texts.
|
|
180
|
-
"""
|
|
181
|
-
vectors: list[list[float] | None] = self.document_embedding_store.mget(
|
|
182
|
-
texts,
|
|
183
|
-
)
|
|
184
|
-
all_missing_indices: list[int] = [i for i, vector in enumerate(vectors) if vector is None]
|
|
185
|
-
|
|
186
|
-
for missing_indices in batch_iterate(self.batch_size, all_missing_indices):
|
|
187
|
-
missing_texts = [texts[i] for i in missing_indices]
|
|
188
|
-
missing_vectors = self.underlying_embeddings.embed_documents(missing_texts)
|
|
189
|
-
self.document_embedding_store.mset(
|
|
190
|
-
list(zip(missing_texts, missing_vectors, strict=False)),
|
|
191
|
-
)
|
|
192
|
-
for index, updated_vector in zip(missing_indices, missing_vectors, strict=False):
|
|
193
|
-
vectors[index] = updated_vector
|
|
194
|
-
|
|
195
|
-
return cast(
|
|
196
|
-
"list[list[float]]",
|
|
197
|
-
vectors,
|
|
198
|
-
) # Nones should have been resolved by now
|
|
199
|
-
|
|
200
|
-
async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
|
|
201
|
-
"""Embed a list of texts.
|
|
202
|
-
|
|
203
|
-
The method first checks the cache for the embeddings.
|
|
204
|
-
If the embeddings are not found, the method uses the underlying embedder
|
|
205
|
-
to embed the documents and stores the results in the cache.
|
|
206
|
-
|
|
207
|
-
Args:
|
|
208
|
-
texts: A list of texts to embed.
|
|
209
|
-
|
|
210
|
-
Returns:
|
|
211
|
-
A list of embeddings for the given texts.
|
|
212
|
-
"""
|
|
213
|
-
vectors: list[list[float] | None] = await self.document_embedding_store.amget(texts)
|
|
214
|
-
all_missing_indices: list[int] = [i for i, vector in enumerate(vectors) if vector is None]
|
|
215
|
-
|
|
216
|
-
# batch_iterate supports None batch_size which returns all elements at once
|
|
217
|
-
# as a single batch.
|
|
218
|
-
for missing_indices in batch_iterate(self.batch_size, all_missing_indices):
|
|
219
|
-
missing_texts = [texts[i] for i in missing_indices]
|
|
220
|
-
missing_vectors = await self.underlying_embeddings.aembed_documents(
|
|
221
|
-
missing_texts,
|
|
222
|
-
)
|
|
223
|
-
await self.document_embedding_store.amset(
|
|
224
|
-
list(zip(missing_texts, missing_vectors, strict=False)),
|
|
225
|
-
)
|
|
226
|
-
for index, updated_vector in zip(missing_indices, missing_vectors, strict=False):
|
|
227
|
-
vectors[index] = updated_vector
|
|
228
|
-
|
|
229
|
-
return cast(
|
|
230
|
-
"list[list[float]]",
|
|
231
|
-
vectors,
|
|
232
|
-
) # Nones should have been resolved by now
|
|
233
|
-
|
|
234
|
-
def embed_query(self, text: str) -> list[float]:
|
|
235
|
-
"""Embed query text.
|
|
236
|
-
|
|
237
|
-
By default, this method does not cache queries. To enable caching, set the
|
|
238
|
-
``cache_query`` parameter to ``True`` when initializing the embedder.
|
|
239
|
-
|
|
240
|
-
Args:
|
|
241
|
-
text: The text to embed.
|
|
242
|
-
|
|
243
|
-
Returns:
|
|
244
|
-
The embedding for the given text.
|
|
245
|
-
"""
|
|
246
|
-
if not self.query_embedding_store:
|
|
247
|
-
return self.underlying_embeddings.embed_query(text)
|
|
248
|
-
|
|
249
|
-
(cached,) = self.query_embedding_store.mget([text])
|
|
250
|
-
if cached is not None:
|
|
251
|
-
return cached
|
|
252
|
-
|
|
253
|
-
vector = self.underlying_embeddings.embed_query(text)
|
|
254
|
-
self.query_embedding_store.mset([(text, vector)])
|
|
255
|
-
return vector
|
|
256
|
-
|
|
257
|
-
async def aembed_query(self, text: str) -> list[float]:
|
|
258
|
-
"""Embed query text.
|
|
259
|
-
|
|
260
|
-
By default, this method does not cache queries. To enable caching, set the
|
|
261
|
-
``cache_query`` parameter to ``True`` when initializing the embedder.
|
|
262
|
-
|
|
263
|
-
Args:
|
|
264
|
-
text: The text to embed.
|
|
265
|
-
|
|
266
|
-
Returns:
|
|
267
|
-
The embedding for the given text.
|
|
268
|
-
"""
|
|
269
|
-
if not self.query_embedding_store:
|
|
270
|
-
return await self.underlying_embeddings.aembed_query(text)
|
|
271
|
-
|
|
272
|
-
(cached,) = await self.query_embedding_store.amget([text])
|
|
273
|
-
if cached is not None:
|
|
274
|
-
return cached
|
|
275
|
-
|
|
276
|
-
vector = await self.underlying_embeddings.aembed_query(text)
|
|
277
|
-
await self.query_embedding_store.amset([(text, vector)])
|
|
278
|
-
return vector
|
|
279
|
-
|
|
280
|
-
@classmethod
|
|
281
|
-
def from_bytes_store(
|
|
282
|
-
cls,
|
|
283
|
-
underlying_embeddings: Embeddings,
|
|
284
|
-
document_embedding_cache: ByteStore,
|
|
285
|
-
*,
|
|
286
|
-
namespace: str = "",
|
|
287
|
-
batch_size: int | None = None,
|
|
288
|
-
query_embedding_cache: bool | ByteStore = False,
|
|
289
|
-
key_encoder: Callable[[str], str] | Literal["sha1", "blake2b", "sha256", "sha512"] = "sha1",
|
|
290
|
-
) -> CacheBackedEmbeddings:
|
|
291
|
-
"""On-ramp that adds the necessary serialization and encoding to the store.
|
|
292
|
-
|
|
293
|
-
Args:
|
|
294
|
-
underlying_embeddings: The embedder to use for embedding.
|
|
295
|
-
document_embedding_cache: The cache to use for storing document embeddings.
|
|
296
|
-
namespace: The namespace to use for document cache.
|
|
297
|
-
This namespace is used to avoid collisions with other caches.
|
|
298
|
-
For example, set it to the name of the embedding model used.
|
|
299
|
-
batch_size: The number of documents to embed between store updates.
|
|
300
|
-
query_embedding_cache: The cache to use for storing query embeddings.
|
|
301
|
-
True to use the same cache as document embeddings.
|
|
302
|
-
False to not cache query embeddings.
|
|
303
|
-
key_encoder: Optional callable to encode keys. If not provided,
|
|
304
|
-
a default encoder using SHA-1 will be used. SHA-1 is not
|
|
305
|
-
collision-resistant, and a motivated attacker could craft two
|
|
306
|
-
different texts that hash to the same cache key.
|
|
307
|
-
|
|
308
|
-
New applications should use one of the alternative encoders
|
|
309
|
-
or provide a custom and strong key encoder function to avoid this risk.
|
|
310
|
-
|
|
311
|
-
If you change a key encoder in an existing cache, consider
|
|
312
|
-
just creating a new cache, to avoid (the potential for)
|
|
313
|
-
collisions with existing keys or having duplicate keys
|
|
314
|
-
for the same text in the cache.
|
|
315
|
-
|
|
316
|
-
Returns:
|
|
317
|
-
An instance of CacheBackedEmbeddings that uses the provided cache.
|
|
318
|
-
"""
|
|
319
|
-
if isinstance(key_encoder, str):
|
|
320
|
-
key_encoder = _make_default_key_encoder(namespace, key_encoder)
|
|
321
|
-
elif callable(key_encoder):
|
|
322
|
-
# If a custom key encoder is provided, it should not be used with a
|
|
323
|
-
# namespace.
|
|
324
|
-
# A user can handle namespacing in directly their custom key encoder.
|
|
325
|
-
if namespace:
|
|
326
|
-
msg = (
|
|
327
|
-
"Do not supply `namespace` when using a custom key_encoder; "
|
|
328
|
-
"add any prefixing inside the encoder itself."
|
|
329
|
-
)
|
|
330
|
-
raise ValueError(msg)
|
|
331
|
-
else:
|
|
332
|
-
msg = (
|
|
333
|
-
"key_encoder must be either 'blake2b', 'sha1', 'sha256', 'sha512' "
|
|
334
|
-
"or a callable that encodes keys."
|
|
335
|
-
)
|
|
336
|
-
raise ValueError(msg) # noqa: TRY004
|
|
337
|
-
|
|
338
|
-
document_embedding_store = EncoderBackedStore[str, list[float]](
|
|
339
|
-
document_embedding_cache,
|
|
340
|
-
key_encoder,
|
|
341
|
-
_value_serializer,
|
|
342
|
-
_value_deserializer,
|
|
343
|
-
)
|
|
344
|
-
if query_embedding_cache is True:
|
|
345
|
-
query_embedding_store = document_embedding_store
|
|
346
|
-
elif query_embedding_cache is False:
|
|
347
|
-
query_embedding_store = None
|
|
348
|
-
else:
|
|
349
|
-
query_embedding_store = EncoderBackedStore[str, list[float]](
|
|
350
|
-
query_embedding_cache,
|
|
351
|
-
key_encoder,
|
|
352
|
-
_value_serializer,
|
|
353
|
-
_value_deserializer,
|
|
354
|
-
)
|
|
355
|
-
|
|
356
|
-
return cls(
|
|
357
|
-
underlying_embeddings,
|
|
358
|
-
document_embedding_store,
|
|
359
|
-
batch_size=batch_size,
|
|
360
|
-
query_embedding_store=query_embedding_store,
|
|
361
|
-
)
|
langchain/storage/__init__.py
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
"""Implementations of key-value stores and storage helpers.
|
|
2
|
-
|
|
3
|
-
Module provides implementations of various key-value stores that conform
|
|
4
|
-
to a simple key-value interface.
|
|
5
|
-
|
|
6
|
-
The primary goal of these storages is to support implementation of caching.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from langchain_core.stores import (
|
|
10
|
-
InMemoryByteStore,
|
|
11
|
-
InMemoryStore,
|
|
12
|
-
InvalidKeyException,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
from langchain.storage.encoder_backed import EncoderBackedStore
|
|
16
|
-
|
|
17
|
-
__all__ = [
|
|
18
|
-
"EncoderBackedStore",
|
|
19
|
-
"InMemoryByteStore",
|
|
20
|
-
"InMemoryStore",
|
|
21
|
-
"InvalidKeyException",
|
|
22
|
-
]
|
|
@@ -1,123 +0,0 @@
|
|
|
1
|
-
"""Encoder-backed store implementation."""
|
|
2
|
-
|
|
3
|
-
from collections.abc import AsyncIterator, Callable, Iterator, Sequence
|
|
4
|
-
from typing import (
|
|
5
|
-
Any,
|
|
6
|
-
TypeVar,
|
|
7
|
-
)
|
|
8
|
-
|
|
9
|
-
from langchain_core.stores import BaseStore
|
|
10
|
-
|
|
11
|
-
K = TypeVar("K")
|
|
12
|
-
V = TypeVar("V")
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class EncoderBackedStore(BaseStore[K, V]):
|
|
16
|
-
"""Wraps a store with key and value encoders/decoders.
|
|
17
|
-
|
|
18
|
-
Examples that uses JSON for encoding/decoding:
|
|
19
|
-
|
|
20
|
-
.. code-block:: python
|
|
21
|
-
|
|
22
|
-
import json
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def key_encoder(key: int) -> str:
|
|
26
|
-
return json.dumps(key)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def value_serializer(value: float) -> str:
|
|
30
|
-
return json.dumps(value)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def value_deserializer(serialized_value: str) -> float:
|
|
34
|
-
return json.loads(serialized_value)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
# Create an instance of the abstract store
|
|
38
|
-
abstract_store = MyCustomStore()
|
|
39
|
-
|
|
40
|
-
# Create an instance of the encoder-backed store
|
|
41
|
-
store = EncoderBackedStore(
|
|
42
|
-
store=abstract_store,
|
|
43
|
-
key_encoder=key_encoder,
|
|
44
|
-
value_serializer=value_serializer,
|
|
45
|
-
value_deserializer=value_deserializer,
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
# Use the encoder-backed store methods
|
|
49
|
-
store.mset([(1, 3.14), (2, 2.718)])
|
|
50
|
-
values = store.mget([1, 2]) # Retrieves [3.14, 2.718]
|
|
51
|
-
store.mdelete([1, 2]) # Deletes the keys 1 and 2
|
|
52
|
-
|
|
53
|
-
"""
|
|
54
|
-
|
|
55
|
-
def __init__(
|
|
56
|
-
self,
|
|
57
|
-
store: BaseStore[str, Any],
|
|
58
|
-
key_encoder: Callable[[K], str],
|
|
59
|
-
value_serializer: Callable[[V], bytes],
|
|
60
|
-
value_deserializer: Callable[[Any], V],
|
|
61
|
-
) -> None:
|
|
62
|
-
"""Initialize an EncodedStore."""
|
|
63
|
-
self.store = store
|
|
64
|
-
self.key_encoder = key_encoder
|
|
65
|
-
self.value_serializer = value_serializer
|
|
66
|
-
self.value_deserializer = value_deserializer
|
|
67
|
-
|
|
68
|
-
def mget(self, keys: Sequence[K]) -> list[V | None]:
|
|
69
|
-
"""Get the values associated with the given keys."""
|
|
70
|
-
encoded_keys: list[str] = [self.key_encoder(key) for key in keys]
|
|
71
|
-
values = self.store.mget(encoded_keys)
|
|
72
|
-
return [self.value_deserializer(value) if value is not None else value for value in values]
|
|
73
|
-
|
|
74
|
-
async def amget(self, keys: Sequence[K]) -> list[V | None]:
|
|
75
|
-
"""Get the values associated with the given keys."""
|
|
76
|
-
encoded_keys: list[str] = [self.key_encoder(key) for key in keys]
|
|
77
|
-
values = await self.store.amget(encoded_keys)
|
|
78
|
-
return [self.value_deserializer(value) if value is not None else value for value in values]
|
|
79
|
-
|
|
80
|
-
def mset(self, key_value_pairs: Sequence[tuple[K, V]]) -> None:
|
|
81
|
-
"""Set the values for the given keys."""
|
|
82
|
-
encoded_pairs = [
|
|
83
|
-
(self.key_encoder(key), self.value_serializer(value)) for key, value in key_value_pairs
|
|
84
|
-
]
|
|
85
|
-
self.store.mset(encoded_pairs)
|
|
86
|
-
|
|
87
|
-
async def amset(self, key_value_pairs: Sequence[tuple[K, V]]) -> None:
|
|
88
|
-
"""Set the values for the given keys."""
|
|
89
|
-
encoded_pairs = [
|
|
90
|
-
(self.key_encoder(key), self.value_serializer(value)) for key, value in key_value_pairs
|
|
91
|
-
]
|
|
92
|
-
await self.store.amset(encoded_pairs)
|
|
93
|
-
|
|
94
|
-
def mdelete(self, keys: Sequence[K]) -> None:
|
|
95
|
-
"""Delete the given keys and their associated values."""
|
|
96
|
-
encoded_keys = [self.key_encoder(key) for key in keys]
|
|
97
|
-
self.store.mdelete(encoded_keys)
|
|
98
|
-
|
|
99
|
-
async def amdelete(self, keys: Sequence[K]) -> None:
|
|
100
|
-
"""Delete the given keys and their associated values."""
|
|
101
|
-
encoded_keys = [self.key_encoder(key) for key in keys]
|
|
102
|
-
await self.store.amdelete(encoded_keys)
|
|
103
|
-
|
|
104
|
-
def yield_keys(
|
|
105
|
-
self,
|
|
106
|
-
*,
|
|
107
|
-
prefix: str | None = None,
|
|
108
|
-
) -> Iterator[K] | Iterator[str]:
|
|
109
|
-
"""Get an iterator over keys that match the given prefix."""
|
|
110
|
-
# For the time being this does not return K, but str
|
|
111
|
-
# it's for debugging purposes. Should fix this.
|
|
112
|
-
yield from self.store.yield_keys(prefix=prefix)
|
|
113
|
-
|
|
114
|
-
async def ayield_keys(
|
|
115
|
-
self,
|
|
116
|
-
*,
|
|
117
|
-
prefix: str | None = None,
|
|
118
|
-
) -> AsyncIterator[K] | AsyncIterator[str]:
|
|
119
|
-
"""Get an iterator over keys that match the given prefix."""
|
|
120
|
-
# For the time being this does not return K, but str
|
|
121
|
-
# it's for debugging purposes. Should fix this.
|
|
122
|
-
async for key in self.store.ayield_keys(prefix=prefix):
|
|
123
|
-
yield key
|
langchain/storage/exceptions.py
DELETED
langchain/storage/in_memory.py
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
"""In memory store that is not thread safe and has no eviction policy.
|
|
2
|
-
|
|
3
|
-
This is a simple implementation of the BaseStore using a dictionary that is useful
|
|
4
|
-
primarily for unit testing purposes.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from langchain_core.stores import InMemoryBaseStore, InMemoryByteStore, InMemoryStore
|
|
8
|
-
|
|
9
|
-
__all__ = [
|
|
10
|
-
"InMemoryBaseStore",
|
|
11
|
-
"InMemoryByteStore",
|
|
12
|
-
"InMemoryStore",
|
|
13
|
-
]
|
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: langchain
|
|
3
|
-
Version: 1.0.0a12
|
|
4
|
-
Summary: Building applications with LLMs through composability
|
|
5
|
-
Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/langchain
|
|
6
|
-
Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain%3D%3D0%22&expanded=true
|
|
7
|
-
Project-URL: repository, https://github.com/langchain-ai/langchain
|
|
8
|
-
License: MIT
|
|
9
|
-
License-File: LICENSE
|
|
10
|
-
Requires-Python: <4.0.0,>=3.10.0
|
|
11
|
-
Requires-Dist: langchain-core<2.0.0,>=1.0.0a6
|
|
12
|
-
Requires-Dist: langgraph<2.0.0,>=1.0.0a4
|
|
13
|
-
Requires-Dist: pydantic<3.0.0,>=2.7.4
|
|
14
|
-
Provides-Extra: anthropic
|
|
15
|
-
Requires-Dist: langchain-anthropic; extra == 'anthropic'
|
|
16
|
-
Provides-Extra: aws
|
|
17
|
-
Requires-Dist: langchain-aws; extra == 'aws'
|
|
18
|
-
Provides-Extra: community
|
|
19
|
-
Requires-Dist: langchain-community; extra == 'community'
|
|
20
|
-
Provides-Extra: deepseek
|
|
21
|
-
Requires-Dist: langchain-deepseek; extra == 'deepseek'
|
|
22
|
-
Provides-Extra: fireworks
|
|
23
|
-
Requires-Dist: langchain-fireworks; extra == 'fireworks'
|
|
24
|
-
Provides-Extra: google-genai
|
|
25
|
-
Requires-Dist: langchain-google-genai; extra == 'google-genai'
|
|
26
|
-
Provides-Extra: google-vertexai
|
|
27
|
-
Requires-Dist: langchain-google-vertexai; extra == 'google-vertexai'
|
|
28
|
-
Provides-Extra: groq
|
|
29
|
-
Requires-Dist: langchain-groq; extra == 'groq'
|
|
30
|
-
Provides-Extra: mistralai
|
|
31
|
-
Requires-Dist: langchain-mistralai; extra == 'mistralai'
|
|
32
|
-
Provides-Extra: ollama
|
|
33
|
-
Requires-Dist: langchain-ollama; extra == 'ollama'
|
|
34
|
-
Provides-Extra: openai
|
|
35
|
-
Requires-Dist: langchain-openai; extra == 'openai'
|
|
36
|
-
Provides-Extra: perplexity
|
|
37
|
-
Requires-Dist: langchain-perplexity; extra == 'perplexity'
|
|
38
|
-
Provides-Extra: together
|
|
39
|
-
Requires-Dist: langchain-together; extra == 'together'
|
|
40
|
-
Provides-Extra: xai
|
|
41
|
-
Requires-Dist: langchain-xai; extra == 'xai'
|
|
42
|
-
Description-Content-Type: text/markdown
|
|
43
|
-
|
|
44
|
-
# 🦜️🔗 LangChain
|
|
45
|
-
|
|
46
|
-
⚡ Building applications with LLMs through composability ⚡
|
|
47
|
-
|
|
48
|
-
[](https://opensource.org/licenses/MIT)
|
|
49
|
-
[](https://pypistats.org/packages/langchain)
|
|
50
|
-
[](https://twitter.com/langchainai)
|
|
51
|
-
|
|
52
|
-
Looking for the JS/TS version? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
|
|
53
|
-
|
|
54
|
-
To help you ship LangChain apps to production faster, check out [LangSmith](https://smith.langchain.com).
|
|
55
|
-
[LangSmith](https://smith.langchain.com) is a unified developer platform for building, testing, and monitoring LLM applications.
|
|
56
|
-
|
|
57
|
-
## Quick Install
|
|
58
|
-
|
|
59
|
-
`pip install langchain`
|
|
60
|
-
|
|
61
|
-
## 🤔 What is this?
|
|
62
|
-
|
|
63
|
-
Large language models (LLMs) are emerging as a transformative technology, enabling developers to build applications that they previously could not. However, using these LLMs in isolation is often insufficient for creating a truly powerful app - the real power comes when you can combine them with other sources of computation or knowledge.
|
|
64
|
-
|
|
65
|
-
This library aims to assist in the development of those types of applications. Common examples of these applications include:
|
|
66
|
-
|
|
67
|
-
**❓ Question answering with RAG**
|
|
68
|
-
|
|
69
|
-
- [Documentation](https://python.langchain.com/docs/tutorials/rag/)
|
|
70
|
-
- End-to-end Example: [Chat LangChain](https://chat.langchain.com) and [repo](https://github.com/langchain-ai/chat-langchain)
|
|
71
|
-
|
|
72
|
-
**🧱 Extracting structured output**
|
|
73
|
-
|
|
74
|
-
- [Documentation](https://python.langchain.com/docs/tutorials/extraction/)
|
|
75
|
-
- End-to-end Example: [SQL Llama2 Template](https://github.com/langchain-ai/langchain-extract/)
|
|
76
|
-
|
|
77
|
-
**🤖 Chatbots**
|
|
78
|
-
|
|
79
|
-
- [Documentation](https://python.langchain.com/docs/tutorials/chatbot/)
|
|
80
|
-
- End-to-end Example: [Web LangChain (web researcher chatbot)](https://weblangchain.vercel.app) and [repo](https://github.com/langchain-ai/weblangchain)
|
|
81
|
-
|
|
82
|
-
## 📖 Documentation
|
|
83
|
-
|
|
84
|
-
Please see [our full documentation](https://python.langchain.com) on:
|
|
85
|
-
|
|
86
|
-
- Getting started (installation, setting up the environment, simple examples)
|
|
87
|
-
- How-To examples (demos, integrations, helper functions)
|
|
88
|
-
- Reference (full API docs)
|
|
89
|
-
- Resources (high-level explanation of core concepts)
|
|
90
|
-
|
|
91
|
-
## 🚀 What can this help with?
|
|
92
|
-
|
|
93
|
-
There are five main areas that LangChain is designed to help with.
|
|
94
|
-
These are, in increasing order of complexity:
|
|
95
|
-
|
|
96
|
-
**🤖 Agents:**
|
|
97
|
-
|
|
98
|
-
Agents involve an LLM making decisions about which Actions to take, taking that Action, seeing an Observation, and repeating that until done. LangChain provides a standard interface for agents, a selection of agents to choose from, and examples of end-to-end agents.
|
|
99
|
-
|
|
100
|
-
**📚 Retrieval Augmented Generation:**
|
|
101
|
-
|
|
102
|
-
Retrieval Augmented Generation involves specific types of chains that first interact with an external data source to fetch data for use in the generation step. Examples include summarization of long pieces of text and question/answering over specific data sources.
|
|
103
|
-
|
|
104
|
-
**🧐 Evaluation:**
|
|
105
|
-
|
|
106
|
-
Generative models are notoriously hard to evaluate with traditional metrics. One new way of evaluating them is using language models themselves to do the evaluation. LangChain provides some prompts/chains for assisting in this.
|
|
107
|
-
|
|
108
|
-
**📃 Models and Prompts:**
|
|
109
|
-
|
|
110
|
-
This includes prompt management, prompt optimization, a generic interface for all LLMs, and common utilities for working with chat models and LLMs.
|
|
111
|
-
|
|
112
|
-
**🔗 Chains:**
|
|
113
|
-
|
|
114
|
-
Chains go beyond a single LLM call and involve sequences of calls (whether to an LLM or a different utility). LangChain provides a standard interface for chains, lots of integrations with other tools, and end-to-end chains for common applications.
|
|
115
|
-
|
|
116
|
-
For more information on these concepts, please see our [full documentation](https://python.langchain.com).
|
|
117
|
-
|
|
118
|
-
## 💁 Contributing
|
|
119
|
-
|
|
120
|
-
As an open-source project in a rapidly developing field, we are extremely open to contributions, whether it be in the form of a new feature, improved infrastructure, or better documentation.
|
|
121
|
-
|
|
122
|
-
For detailed information on how to contribute, see the [Contributing Guide](https://python.langchain.com/docs/contributing/).
|