langchain-postgres 0.0.9__tar.gz → 0.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/PKG-INFO +3 -3
- {langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/_utils.py +2 -4
- {langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/vectorstores.py +189 -90
- {langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/pyproject.toml +6 -3
- {langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/LICENSE +0 -0
- {langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/README.md +0 -0
- {langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/__init__.py +0 -0
- {langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/chat_message_histories.py +0 -0
- {langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/py.typed +0 -0
- {langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/translator.py +0 -0
@@ -1,17 +1,17 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: langchain-postgres
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.11
|
4
4
|
Summary: An integration package connecting Postgres and LangChain
|
5
5
|
Home-page: https://github.com/langchain-ai/langchain-postgres
|
6
6
|
License: MIT
|
7
|
-
Requires-Python: >=3.
|
7
|
+
Requires-Python: >=3.9,<4.0
|
8
8
|
Classifier: License :: OSI Approved :: MIT License
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
10
10
|
Classifier: Programming Language :: Python :: 3.9
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
13
13
|
Classifier: Programming Language :: Python :: 3.12
|
14
|
-
Requires-Dist: langchain-core (>=0.
|
14
|
+
Requires-Dist: langchain-core (>=0.2.13,<0.4.0)
|
15
15
|
Requires-Dist: numpy (>=1,<2)
|
16
16
|
Requires-Dist: pgvector (>=0.2.5,<0.3.0)
|
17
17
|
Requires-Dist: psycopg (>=3,<4)
|
@@ -30,10 +30,8 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
|
|
30
30
|
|
31
31
|
X = np.array(X, dtype=np.float32)
|
32
32
|
Y = np.array(Y, dtype=np.float32)
|
33
|
-
Z = 1 - simd.cdist(X, Y, metric="cosine")
|
34
|
-
|
35
|
-
return np.array([Z])
|
36
|
-
return np.array(Z)
|
33
|
+
Z = 1 - np.array(simd.cdist(X, Y, metric="cosine"))
|
34
|
+
return Z
|
37
35
|
except ImportError:
|
38
36
|
logger.debug(
|
39
37
|
"Unable to import simsimd, defaulting to NumPy implementation. If you want "
|
@@ -246,98 +246,130 @@ DBConnection = Union[sqlalchemy.engine.Engine, str]
|
|
246
246
|
|
247
247
|
|
248
248
|
class PGVector(VectorStore):
|
249
|
-
"""
|
249
|
+
"""Postgres vector store integration.
|
250
250
|
|
251
|
-
|
251
|
+
Setup:
|
252
|
+
Install ``langchain_postgres`` and run the docker container.
|
252
253
|
|
253
|
-
|
254
|
-
the tables and re-add the documents.
|
254
|
+
.. code-block:: bash
|
255
255
|
|
256
|
-
|
257
|
-
|
256
|
+
pip install -qU langchain-postgres
|
257
|
+
docker run --name pgvector-container -e POSTGRES_USER=langchain -e POSTGRES_PASSWORD=langchain -e POSTGRES_DB=langchain -p 6024:5432 -d pgvector/pgvector:pg16
|
258
258
|
|
259
|
-
|
260
|
-
|
261
|
-
|
259
|
+
Key init args — indexing params:
|
260
|
+
collection_name: str
|
261
|
+
Name of the collection.
|
262
|
+
embeddings: Embeddings
|
263
|
+
Embedding function to use.
|
262
264
|
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
```
|
265
|
+
Key init args — client params:
|
266
|
+
connection: Union[None, DBConnection, Engine, AsyncEngine, str]
|
267
|
+
Connection string or engine.
|
267
268
|
|
268
|
-
|
269
|
+
Instantiate:
|
269
270
|
.. code-block:: python
|
270
271
|
|
272
|
+
from langchain_postgres import PGVector
|
271
273
|
from langchain_postgres.vectorstores import PGVector
|
272
|
-
from langchain_openai
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
connection=connection_string,
|
274
|
+
from langchain_openai import OpenAIEmbeddings
|
275
|
+
|
276
|
+
# See docker command above to launch a postgres instance with pgvector enabled.
|
277
|
+
connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain" # Uses psycopg3!
|
278
|
+
collection_name = "my_docs"
|
279
|
+
|
280
|
+
vector_store = PGVector(
|
281
|
+
embeddings=OpenAIEmbeddings(model="text-embedding-3-large"),
|
281
282
|
collection_name=collection_name,
|
283
|
+
connection=connection,
|
282
284
|
use_jsonb=True,
|
283
|
-
async_mode=False,
|
284
285
|
)
|
285
286
|
|
287
|
+
Add Documents:
|
288
|
+
.. code-block:: python
|
289
|
+
|
290
|
+
from langchain_core.documents import Document
|
291
|
+
|
292
|
+
document_1 = Document(page_content="foo", metadata={"baz": "bar"})
|
293
|
+
document_2 = Document(page_content="thud", metadata={"bar": "baz"})
|
294
|
+
document_3 = Document(page_content="i will be deleted :(")
|
295
|
+
|
296
|
+
documents = [document_1, document_2, document_3]
|
297
|
+
ids = ["1", "2", "3"]
|
298
|
+
vector_store.add_documents(documents=documents, ids=ids)
|
299
|
+
|
300
|
+
Delete Documents:
|
301
|
+
.. code-block:: python
|
302
|
+
|
303
|
+
vector_store.delete(ids=["3"])
|
304
|
+
|
305
|
+
Search:
|
306
|
+
.. code-block:: python
|
307
|
+
|
308
|
+
results = vector_store.similarity_search(query="thud",k=1)
|
309
|
+
for doc in results:
|
310
|
+
print(f"* {doc.page_content} [{doc.metadata}]")
|
311
|
+
|
312
|
+
.. code-block:: python
|
313
|
+
|
314
|
+
* thud [{'bar': 'baz'}]
|
315
|
+
|
316
|
+
Search with filter:
|
317
|
+
.. code-block:: python
|
318
|
+
|
319
|
+
results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"})
|
320
|
+
for doc in results:
|
321
|
+
print(f"* {doc.page_content} [{doc.metadata}]")
|
322
|
+
|
323
|
+
.. code-block:: python
|
324
|
+
|
325
|
+
* thud [{'bar': 'baz'}]
|
326
|
+
|
327
|
+
Search with score:
|
328
|
+
.. code-block:: python
|
329
|
+
|
330
|
+
results = vector_store.similarity_search_with_score(query="qux",k=1)
|
331
|
+
for doc, score in results:
|
332
|
+
print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
|
333
|
+
|
334
|
+
.. code-block:: python
|
335
|
+
|
336
|
+
* [SIM=0.499243] foo [{'baz': 'bar'}]
|
337
|
+
|
338
|
+
Async:
|
339
|
+
.. code-block:: python
|
340
|
+
|
341
|
+
# add documents
|
342
|
+
# await vector_store.aadd_documents(documents=documents, ids=ids)
|
343
|
+
|
344
|
+
# delete documents
|
345
|
+
# await vector_store.adelete(ids=["3"])
|
346
|
+
|
347
|
+
# search
|
348
|
+
# results = vector_store.asimilarity_search(query="thud",k=1)
|
349
|
+
|
350
|
+
# search with score
|
351
|
+
results = await vector_store.asimilarity_search_with_score(query="qux",k=1)
|
352
|
+
for doc,score in results:
|
353
|
+
print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
|
354
|
+
|
355
|
+
.. code-block:: python
|
356
|
+
|
357
|
+
* [SIM=0.499243] foo [{'baz': 'bar'}]
|
358
|
+
|
359
|
+
Use as Retriever:
|
360
|
+
.. code-block:: python
|
361
|
+
|
362
|
+
retriever = vector_store.as_retriever(
|
363
|
+
search_type="mmr",
|
364
|
+
search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
|
365
|
+
)
|
366
|
+
retriever.invoke("thud")
|
367
|
+
|
368
|
+
.. code-block:: python
|
369
|
+
|
370
|
+
[Document(metadata={'bar': 'baz'}, page_content='thud')]
|
286
371
|
|
287
|
-
|
288
|
-
to allow users to easily transition from langchain_community to langchain_postgres.
|
289
|
-
|
290
|
-
Some changes had to be made to address issues with the community implementation:
|
291
|
-
* langchain_postgres now works with psycopg3. Please update your
|
292
|
-
connection strings from `postgresql+psycopg2://...` to
|
293
|
-
`postgresql+psycopg://langchain:langchain@...`
|
294
|
-
(yes, the driver name is `psycopg` not `psycopg3`)
|
295
|
-
* The schema of the embedding store and collection have been changed to make
|
296
|
-
add_documents work correctly with user specified ids, specifically
|
297
|
-
when overwriting existing documents.
|
298
|
-
You will need to recreate the tables if you are using an existing database.
|
299
|
-
* A Connection object has to be provided explicitly. Connections will not be
|
300
|
-
picked up automatically based on env variables.
|
301
|
-
* langchain_postgres now accept async connections. If you want to use the async
|
302
|
-
version, you need to set `async_mode=True` when initializing the store or
|
303
|
-
use an async engine.
|
304
|
-
|
305
|
-
Supported filter operators:
|
306
|
-
|
307
|
-
* $eq: Equality operator
|
308
|
-
* $ne: Not equal operator
|
309
|
-
* $lt: Less than operator
|
310
|
-
* $lte: Less than or equal operator
|
311
|
-
* $gt: Greater than operator
|
312
|
-
* $gte: Greater than or equal operator
|
313
|
-
* $in: In operator
|
314
|
-
* $nin: Not in operator
|
315
|
-
* $between: Between operator
|
316
|
-
* $exists: Exists operator
|
317
|
-
* $like: Like operator
|
318
|
-
* $ilike: Case insensitive like operator
|
319
|
-
* $and: Logical AND operator
|
320
|
-
* $or: Logical OR operator
|
321
|
-
* $not: Logical NOT operator
|
322
|
-
|
323
|
-
Example:
|
324
|
-
|
325
|
-
.. code-block:: python
|
326
|
-
|
327
|
-
vectorstore.similarity_search('kitty', k=10, filter={
|
328
|
-
'id': {'$in': [1, 5, 2, 9]}
|
329
|
-
})
|
330
|
-
#%% md
|
331
|
-
|
332
|
-
If you provide a dict with multiple fields, but no operators,
|
333
|
-
the top level will be interpreted as a logical **AND** filter
|
334
|
-
|
335
|
-
vectorstore.similarity_search('ducks', k=10, filter={
|
336
|
-
'id': {'$in': [1, 5, 2, 9]},
|
337
|
-
'location': {'$in': ["pond", "market"]}
|
338
|
-
})
|
339
|
-
|
340
|
-
"""
|
372
|
+
""" # noqa: E501
|
341
373
|
|
342
374
|
def __init__(
|
343
375
|
self,
|
@@ -714,7 +746,7 @@ class PGVector(VectorStore):
|
|
714
746
|
|
715
747
|
def add_embeddings(
|
716
748
|
self,
|
717
|
-
texts:
|
749
|
+
texts: Sequence[str],
|
718
750
|
embeddings: List[List[float]],
|
719
751
|
metadatas: Optional[List[dict]] = None,
|
720
752
|
ids: Optional[List[str]] = None,
|
@@ -732,7 +764,9 @@ class PGVector(VectorStore):
|
|
732
764
|
"""
|
733
765
|
assert not self._async_engine, "This method must be called with sync_mode"
|
734
766
|
if ids is None:
|
735
|
-
|
767
|
+
ids_ = [str(uuid.uuid4()) for _ in texts]
|
768
|
+
else:
|
769
|
+
ids_ = [id if id is not None else str(uuid.uuid4()) for id in ids]
|
736
770
|
|
737
771
|
if not metadatas:
|
738
772
|
metadatas = [{} for _ in texts]
|
@@ -750,7 +784,7 @@ class PGVector(VectorStore):
|
|
750
784
|
"cmetadata": metadata or {},
|
751
785
|
}
|
752
786
|
for text, metadata, embedding, id in zip(
|
753
|
-
texts, metadatas, embeddings,
|
787
|
+
texts, metadatas, embeddings, ids_
|
754
788
|
)
|
755
789
|
]
|
756
790
|
stmt = insert(self.EmbeddingStore).values(data)
|
@@ -766,11 +800,11 @@ class PGVector(VectorStore):
|
|
766
800
|
session.execute(on_conflict_stmt)
|
767
801
|
session.commit()
|
768
802
|
|
769
|
-
return
|
803
|
+
return ids_
|
770
804
|
|
771
805
|
async def aadd_embeddings(
|
772
806
|
self,
|
773
|
-
texts:
|
807
|
+
texts: Sequence[str],
|
774
808
|
embeddings: List[List[float]],
|
775
809
|
metadatas: Optional[List[dict]] = None,
|
776
810
|
ids: Optional[List[str]] = None,
|
@@ -787,8 +821,11 @@ class PGVector(VectorStore):
|
|
787
821
|
kwargs: vectorstore specific parameters
|
788
822
|
"""
|
789
823
|
await self.__apost_init__() # Lazy async init
|
824
|
+
|
790
825
|
if ids is None:
|
791
|
-
|
826
|
+
ids_ = [str(uuid.uuid4()) for _ in texts]
|
827
|
+
else:
|
828
|
+
ids_ = [id if id is not None else str(uuid.uuid4()) for id in ids]
|
792
829
|
|
793
830
|
if not metadatas:
|
794
831
|
metadatas = [{} for _ in texts]
|
@@ -806,7 +843,7 @@ class PGVector(VectorStore):
|
|
806
843
|
"cmetadata": metadata or {},
|
807
844
|
}
|
808
845
|
for text, metadata, embedding, id in zip(
|
809
|
-
texts, metadatas, embeddings,
|
846
|
+
texts, metadatas, embeddings, ids_
|
810
847
|
)
|
811
848
|
]
|
812
849
|
stmt = insert(self.EmbeddingStore).values(data)
|
@@ -822,7 +859,7 @@ class PGVector(VectorStore):
|
|
822
859
|
await session.execute(on_conflict_stmt)
|
823
860
|
await session.commit()
|
824
861
|
|
825
|
-
return
|
862
|
+
return ids_
|
826
863
|
|
827
864
|
def add_texts(
|
828
865
|
self,
|
@@ -844,9 +881,14 @@ class PGVector(VectorStore):
|
|
844
881
|
List of ids from adding the texts into the vectorstore.
|
845
882
|
"""
|
846
883
|
assert not self._async_engine, "This method must be called without async_mode"
|
847
|
-
|
884
|
+
texts_ = list(texts)
|
885
|
+
embeddings = self.embedding_function.embed_documents(texts_)
|
848
886
|
return self.add_embeddings(
|
849
|
-
texts=
|
887
|
+
texts=texts_,
|
888
|
+
embeddings=list(embeddings),
|
889
|
+
metadatas=list(metadatas) if metadatas else None,
|
890
|
+
ids=list(ids) if ids else None,
|
891
|
+
**kwargs,
|
850
892
|
)
|
851
893
|
|
852
894
|
async def aadd_texts(
|
@@ -869,9 +911,14 @@ class PGVector(VectorStore):
|
|
869
911
|
List of ids from adding the texts into the vectorstore.
|
870
912
|
"""
|
871
913
|
await self.__apost_init__() # Lazy async init
|
872
|
-
|
914
|
+
texts_ = list(texts)
|
915
|
+
embeddings = await self.embedding_function.aembed_documents(texts_)
|
873
916
|
return await self.aadd_embeddings(
|
874
|
-
texts=
|
917
|
+
texts=texts_,
|
918
|
+
embeddings=list(embeddings),
|
919
|
+
metadatas=list(metadatas) if metadatas else None,
|
920
|
+
ids=list(ids) if ids else None,
|
921
|
+
**kwargs,
|
875
922
|
)
|
876
923
|
|
877
924
|
def similarity_search(
|
@@ -1014,6 +1061,7 @@ class PGVector(VectorStore):
|
|
1014
1061
|
docs = [
|
1015
1062
|
(
|
1016
1063
|
Document(
|
1064
|
+
id=str(result.EmbeddingStore.id),
|
1017
1065
|
page_content=result.EmbeddingStore.document,
|
1018
1066
|
metadata=result.EmbeddingStore.cmetadata,
|
1019
1067
|
),
|
@@ -2178,3 +2226,54 @@ class PGVector(VectorStore):
|
|
2178
2226
|
)
|
2179
2227
|
async with self.session_maker() as session:
|
2180
2228
|
yield typing_cast(AsyncSession, session)
|
2229
|
+
|
2230
|
+
def get_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
2231
|
+
"""Get documents by ids."""
|
2232
|
+
documents = []
|
2233
|
+
with self._make_sync_session() as session:
|
2234
|
+
collection = self.get_collection(session)
|
2235
|
+
filter_by = [self.EmbeddingStore.collection_id == collection.uuid]
|
2236
|
+
stmt = (
|
2237
|
+
select(
|
2238
|
+
self.EmbeddingStore,
|
2239
|
+
)
|
2240
|
+
.where(self.EmbeddingStore.id.in_(ids))
|
2241
|
+
.filter(*filter_by)
|
2242
|
+
)
|
2243
|
+
|
2244
|
+
for result in session.execute(stmt).scalars().all():
|
2245
|
+
documents.append(
|
2246
|
+
Document(
|
2247
|
+
id=result.id,
|
2248
|
+
page_content=result.document,
|
2249
|
+
metadata=result.cmetadata,
|
2250
|
+
)
|
2251
|
+
)
|
2252
|
+
return documents
|
2253
|
+
|
2254
|
+
async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
2255
|
+
"""Get documents by ids."""
|
2256
|
+
documents = []
|
2257
|
+
async with self._make_async_session() as session:
|
2258
|
+
collection = await self.aget_collection(session)
|
2259
|
+
filter_by = [self.EmbeddingStore.collection_id == collection.uuid]
|
2260
|
+
|
2261
|
+
stmt = (
|
2262
|
+
select(
|
2263
|
+
self.EmbeddingStore,
|
2264
|
+
)
|
2265
|
+
.where(self.EmbeddingStore.id.in_(ids))
|
2266
|
+
.filter(*filter_by)
|
2267
|
+
)
|
2268
|
+
|
2269
|
+
results: Sequence[Any] = (await session.execute(stmt)).scalars().all()
|
2270
|
+
|
2271
|
+
for result in results:
|
2272
|
+
documents.append(
|
2273
|
+
Document(
|
2274
|
+
id=str(result.id),
|
2275
|
+
page_content=result.document,
|
2276
|
+
metadata=result.cmetadata,
|
2277
|
+
)
|
2278
|
+
)
|
2279
|
+
return documents
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "langchain-postgres"
|
3
|
-
version = "0.0.
|
3
|
+
version = "0.0.11"
|
4
4
|
description = "An integration package connecting Postgres and LangChain"
|
5
5
|
authors = []
|
6
6
|
readme = "README.md"
|
@@ -11,8 +11,8 @@ license = "MIT"
|
|
11
11
|
"Source Code" = "https://github.com/langchain-ai/langchain-postgres/tree/master/langchain_postgres"
|
12
12
|
|
13
13
|
[tool.poetry.dependencies]
|
14
|
-
python = "^3.
|
15
|
-
langchain-core = ">=0.
|
14
|
+
python = "^3.9"
|
15
|
+
langchain-core = ">=0.2.13,<0.4.0"
|
16
16
|
psycopg = "^3"
|
17
17
|
psycopg-pool = "^3.2.1"
|
18
18
|
sqlalchemy = "^2"
|
@@ -24,6 +24,7 @@ numpy = "^1"
|
|
24
24
|
[tool.poetry.group.dev.dependencies]
|
25
25
|
jupyterlab = "^3.6.1"
|
26
26
|
|
27
|
+
|
27
28
|
[tool.poetry.group.test]
|
28
29
|
optional = true
|
29
30
|
|
@@ -33,6 +34,8 @@ pytest-asyncio = "^0.23.2"
|
|
33
34
|
pytest-socket = "^0.7.0"
|
34
35
|
pytest-cov = "^5.0.0"
|
35
36
|
pytest-timeout = "^2.3.1"
|
37
|
+
langchain-core = {git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core/"}
|
38
|
+
langchain-standard-tests = {git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/standard-tests/"}
|
36
39
|
|
37
40
|
[tool.poetry.group.codespell]
|
38
41
|
optional = true
|
File without changes
|
File without changes
|
File without changes
|
{langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/chat_message_histories.py
RENAMED
File without changes
|
File without changes
|
File without changes
|