langchain-postgres 0.0.13__py3-none-any.whl → 0.0.14rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_postgres/__init__.py +6 -0
- langchain_postgres/chat_message_histories.py +7 -1
- langchain_postgres/utils/pgvector_migrator.py +321 -0
- langchain_postgres/v2/__init__.py +0 -0
- langchain_postgres/v2/async_vectorstore.py +1268 -0
- langchain_postgres/v2/engine.py +351 -0
- langchain_postgres/v2/indexes.py +155 -0
- langchain_postgres/v2/vectorstores.py +842 -0
- langchain_postgres/vectorstores.py +11 -4
- langchain_postgres-0.0.14rc1.dist-info/METADATA +170 -0
- langchain_postgres-0.0.14rc1.dist-info/RECORD +16 -0
- langchain_postgres-0.0.13.dist-info/METADATA +0 -109
- langchain_postgres-0.0.13.dist-info/RECORD +0 -10
- {langchain_postgres-0.0.13.dist-info → langchain_postgres-0.0.14rc1.dist-info}/LICENSE +0 -0
- {langchain_postgres-0.0.13.dist-info → langchain_postgres-0.0.14rc1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,842 @@
|
|
1
|
+
# TODO: Remove below import when minimum supported Python version is 3.10
|
2
|
+
from __future__ import annotations
|
3
|
+
|
4
|
+
from typing import Any, Callable, Iterable, Optional, Sequence
|
5
|
+
|
6
|
+
from langchain_core.documents import Document
|
7
|
+
from langchain_core.embeddings import Embeddings
|
8
|
+
from langchain_core.vectorstores import VectorStore
|
9
|
+
|
10
|
+
from .async_vectorstore import AsyncPGVectorStore
|
11
|
+
from .engine import PGEngine
|
12
|
+
from .indexes import (
|
13
|
+
DEFAULT_DISTANCE_STRATEGY,
|
14
|
+
BaseIndex,
|
15
|
+
DistanceStrategy,
|
16
|
+
QueryOptions,
|
17
|
+
)
|
18
|
+
|
19
|
+
|
20
|
+
class PGVectorStore(VectorStore):
|
21
|
+
"""Postgres Vector Store class"""
|
22
|
+
|
23
|
+
__create_key = object()
|
24
|
+
|
25
|
+
def __init__(self, key: object, engine: PGEngine, vs: AsyncPGVectorStore):
|
26
|
+
"""PGVectorStore constructor.
|
27
|
+
Args:
|
28
|
+
key (object): Prevent direct constructor usage.
|
29
|
+
engine (PGEngine): Connection pool engine for managing connections to Postgres database.
|
30
|
+
vs (AsyncPGVectorStore): The async only VectorStore implementation
|
31
|
+
|
32
|
+
|
33
|
+
Raises:
|
34
|
+
Exception: If called directly by user.
|
35
|
+
"""
|
36
|
+
if key != PGVectorStore.__create_key:
|
37
|
+
raise Exception(
|
38
|
+
"Only create class through 'create' or 'create_sync' methods!"
|
39
|
+
)
|
40
|
+
|
41
|
+
self._engine = engine
|
42
|
+
self.__vs = vs
|
43
|
+
|
44
|
+
@classmethod
|
45
|
+
async def create(
|
46
|
+
cls: type[PGVectorStore],
|
47
|
+
engine: PGEngine,
|
48
|
+
embedding_service: Embeddings,
|
49
|
+
table_name: str,
|
50
|
+
schema_name: str = "public",
|
51
|
+
content_column: str = "content",
|
52
|
+
embedding_column: str = "embedding",
|
53
|
+
metadata_columns: Optional[list[str]] = None,
|
54
|
+
ignore_metadata_columns: Optional[list[str]] = None,
|
55
|
+
id_column: str = "langchain_id",
|
56
|
+
metadata_json_column: Optional[str] = "langchain_metadata",
|
57
|
+
distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
|
58
|
+
k: int = 4,
|
59
|
+
fetch_k: int = 20,
|
60
|
+
lambda_mult: float = 0.5,
|
61
|
+
index_query_options: Optional[QueryOptions] = None,
|
62
|
+
) -> PGVectorStore:
|
63
|
+
"""Create an PGVectorStore instance.
|
64
|
+
|
65
|
+
Args:
|
66
|
+
engine (PGEngine): Connection pool engine for managing connections to postgres database.
|
67
|
+
embedding_service (Embeddings): Text embedding model to use.
|
68
|
+
table_name (str): Name of an existing table.
|
69
|
+
schema_name (str, optional): Name of the database schema. Defaults to "public".
|
70
|
+
content_column (str): Column that represent a Document's page_content. Defaults to "content".
|
71
|
+
embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding".
|
72
|
+
metadata_columns (list[str]): Column(s) that represent a document's metadata.
|
73
|
+
ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None.
|
74
|
+
id_column (str): Column that represents the Document's id. Defaults to "langchain_id".
|
75
|
+
metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata".
|
76
|
+
distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.
|
77
|
+
k (int): Number of Documents to return from search. Defaults to 4.
|
78
|
+
fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
|
79
|
+
lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
80
|
+
index_query_options (QueryOptions): Index query option.
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
PGVectorStore
|
84
|
+
"""
|
85
|
+
coro = AsyncPGVectorStore.create(
|
86
|
+
engine,
|
87
|
+
embedding_service,
|
88
|
+
table_name,
|
89
|
+
schema_name=schema_name,
|
90
|
+
content_column=content_column,
|
91
|
+
embedding_column=embedding_column,
|
92
|
+
metadata_columns=metadata_columns,
|
93
|
+
ignore_metadata_columns=ignore_metadata_columns,
|
94
|
+
metadata_json_column=metadata_json_column,
|
95
|
+
id_column=id_column,
|
96
|
+
distance_strategy=distance_strategy,
|
97
|
+
k=k,
|
98
|
+
fetch_k=fetch_k,
|
99
|
+
lambda_mult=lambda_mult,
|
100
|
+
index_query_options=index_query_options,
|
101
|
+
)
|
102
|
+
vs = await engine._run_as_async(coro)
|
103
|
+
return cls(cls.__create_key, engine, vs)
|
104
|
+
|
105
|
+
@classmethod
|
106
|
+
def create_sync(
|
107
|
+
cls,
|
108
|
+
engine: PGEngine,
|
109
|
+
embedding_service: Embeddings,
|
110
|
+
table_name: str,
|
111
|
+
schema_name: str = "public",
|
112
|
+
content_column: str = "content",
|
113
|
+
embedding_column: str = "embedding",
|
114
|
+
metadata_columns: Optional[list[str]] = None,
|
115
|
+
ignore_metadata_columns: Optional[list[str]] = None,
|
116
|
+
id_column: str = "langchain_id",
|
117
|
+
metadata_json_column: str = "langchain_metadata",
|
118
|
+
distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
|
119
|
+
k: int = 4,
|
120
|
+
fetch_k: int = 20,
|
121
|
+
lambda_mult: float = 0.5,
|
122
|
+
index_query_options: Optional[QueryOptions] = None,
|
123
|
+
) -> PGVectorStore:
|
124
|
+
"""Create an PGVectorStore instance.
|
125
|
+
|
126
|
+
Args:
|
127
|
+
key (object): Prevent direct constructor usage.
|
128
|
+
engine (PGEngine): Connection pool engine for managing connections to postgres database.
|
129
|
+
embedding_service (Embeddings): Text embedding model to use.
|
130
|
+
table_name (str): Name of an existing table.
|
131
|
+
schema_name (str, optional): Name of the database schema. Defaults to "public".
|
132
|
+
content_column (str, optional): Column that represent a Document's page_content. Defaults to "content".
|
133
|
+
embedding_column (str, optional): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding".
|
134
|
+
metadata_columns (list[str], optional): Column(s) that represent a document's metadata. Defaults to None.
|
135
|
+
ignore_metadata_columns (Optional[list[str]]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None.
|
136
|
+
id_column (str, optional): Column that represents the Document's id. Defaults to "langchain_id".
|
137
|
+
metadata_json_column (str, optional): Column to store metadata as JSON. Defaults to "langchain_metadata".
|
138
|
+
distance_strategy (DistanceStrategy, optional): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.
|
139
|
+
k (int, optional): Number of Documents to return from search. Defaults to 4.
|
140
|
+
fetch_k (int, optional): Number of Documents to fetch to pass to MMR algorithm. Defaults to 20.
|
141
|
+
lambda_mult (float, optional): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
142
|
+
index_query_options (Optional[QueryOptions], optional): Index query option. Defaults to None.
|
143
|
+
|
144
|
+
Returns:
|
145
|
+
PGVectorStore
|
146
|
+
"""
|
147
|
+
coro = AsyncPGVectorStore.create(
|
148
|
+
engine,
|
149
|
+
embedding_service,
|
150
|
+
table_name,
|
151
|
+
schema_name=schema_name,
|
152
|
+
content_column=content_column,
|
153
|
+
embedding_column=embedding_column,
|
154
|
+
metadata_columns=metadata_columns,
|
155
|
+
ignore_metadata_columns=ignore_metadata_columns,
|
156
|
+
metadata_json_column=metadata_json_column,
|
157
|
+
id_column=id_column,
|
158
|
+
distance_strategy=distance_strategy,
|
159
|
+
k=k,
|
160
|
+
fetch_k=fetch_k,
|
161
|
+
lambda_mult=lambda_mult,
|
162
|
+
index_query_options=index_query_options,
|
163
|
+
)
|
164
|
+
vs = engine._run_as_sync(coro)
|
165
|
+
return cls(cls.__create_key, engine, vs)
|
166
|
+
|
167
|
+
@property
|
168
|
+
def embeddings(self) -> Embeddings:
|
169
|
+
return self.__vs.embedding_service
|
170
|
+
|
171
|
+
async def aadd_embeddings(
|
172
|
+
self,
|
173
|
+
texts: Iterable[str],
|
174
|
+
embeddings: list[list[float]],
|
175
|
+
metadatas: Optional[list[dict]] = None,
|
176
|
+
ids: Optional[list[str]] = None,
|
177
|
+
**kwargs: Any,
|
178
|
+
) -> list[str]:
|
179
|
+
"""Add data along with embeddings to the table."""
|
180
|
+
return await self._engine._run_as_async(
|
181
|
+
self.__vs.aadd_embeddings(texts, embeddings, metadatas, ids, **kwargs)
|
182
|
+
)
|
183
|
+
|
184
|
+
async def aadd_texts(
|
185
|
+
self,
|
186
|
+
texts: Iterable[str],
|
187
|
+
metadatas: Optional[list[dict]] = None,
|
188
|
+
ids: Optional[list] = None,
|
189
|
+
**kwargs: Any,
|
190
|
+
) -> list[str]:
|
191
|
+
"""Embed texts and add to the table.
|
192
|
+
|
193
|
+
Raises:
|
194
|
+
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
195
|
+
"""
|
196
|
+
return await self._engine._run_as_async(
|
197
|
+
self.__vs.aadd_texts(texts, metadatas, ids, **kwargs)
|
198
|
+
)
|
199
|
+
|
200
|
+
async def aadd_documents(
|
201
|
+
self,
|
202
|
+
documents: list[Document],
|
203
|
+
ids: Optional[list] = None,
|
204
|
+
**kwargs: Any,
|
205
|
+
) -> list[str]:
|
206
|
+
"""Embed documents and add to the table.
|
207
|
+
|
208
|
+
Raises:
|
209
|
+
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
210
|
+
"""
|
211
|
+
return await self._engine._run_as_async(
|
212
|
+
self.__vs.aadd_documents(documents, ids, **kwargs)
|
213
|
+
)
|
214
|
+
|
215
|
+
def add_embeddings(
|
216
|
+
self,
|
217
|
+
texts: Iterable[str],
|
218
|
+
embeddings: list[list[float]],
|
219
|
+
metadatas: Optional[list[dict]] = None,
|
220
|
+
ids: Optional[list[str]] = None,
|
221
|
+
**kwargs: Any,
|
222
|
+
) -> list[str]:
|
223
|
+
"""Add data along with embeddings to the table."""
|
224
|
+
return self._engine._run_as_sync(
|
225
|
+
self.__vs.aadd_embeddings(texts, embeddings, metadatas, ids, **kwargs)
|
226
|
+
)
|
227
|
+
|
228
|
+
def add_texts(
|
229
|
+
self,
|
230
|
+
texts: Iterable[str],
|
231
|
+
metadatas: Optional[list[dict]] = None,
|
232
|
+
ids: Optional[list] = None,
|
233
|
+
**kwargs: Any,
|
234
|
+
) -> list[str]:
|
235
|
+
"""Embed texts and add to the table.
|
236
|
+
|
237
|
+
Raises:
|
238
|
+
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
239
|
+
"""
|
240
|
+
return self._engine._run_as_sync(
|
241
|
+
self.__vs.aadd_texts(texts, metadatas, ids, **kwargs)
|
242
|
+
)
|
243
|
+
|
244
|
+
def add_documents(
|
245
|
+
self,
|
246
|
+
documents: list[Document],
|
247
|
+
ids: Optional[list] = None,
|
248
|
+
**kwargs: Any,
|
249
|
+
) -> list[str]:
|
250
|
+
"""Embed documents and add to the table.
|
251
|
+
|
252
|
+
Raises:
|
253
|
+
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
254
|
+
"""
|
255
|
+
return self._engine._run_as_sync(
|
256
|
+
self.__vs.aadd_documents(documents, ids, **kwargs)
|
257
|
+
)
|
258
|
+
|
259
|
+
async def adelete(
|
260
|
+
self,
|
261
|
+
ids: Optional[list] = None,
|
262
|
+
**kwargs: Any,
|
263
|
+
) -> Optional[bool]:
|
264
|
+
"""Delete records from the table.
|
265
|
+
|
266
|
+
Raises:
|
267
|
+
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
268
|
+
"""
|
269
|
+
return await self._engine._run_as_async(self.__vs.adelete(ids, **kwargs))
|
270
|
+
|
271
|
+
def delete(
|
272
|
+
self,
|
273
|
+
ids: Optional[list] = None,
|
274
|
+
**kwargs: Any,
|
275
|
+
) -> Optional[bool]:
|
276
|
+
"""Delete records from the table.
|
277
|
+
|
278
|
+
Raises:
|
279
|
+
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
280
|
+
"""
|
281
|
+
return self._engine._run_as_sync(self.__vs.adelete(ids, **kwargs))
|
282
|
+
|
283
|
+
@classmethod
|
284
|
+
async def afrom_texts( # type: ignore[override]
|
285
|
+
cls: type[PGVectorStore],
|
286
|
+
texts: list[str],
|
287
|
+
embedding: Embeddings,
|
288
|
+
engine: PGEngine,
|
289
|
+
table_name: str,
|
290
|
+
schema_name: str = "public",
|
291
|
+
metadatas: Optional[list[dict]] = None,
|
292
|
+
ids: Optional[list] = None,
|
293
|
+
content_column: str = "content",
|
294
|
+
embedding_column: str = "embedding",
|
295
|
+
metadata_columns: Optional[list[str]] = None,
|
296
|
+
ignore_metadata_columns: Optional[list[str]] = None,
|
297
|
+
id_column: str = "langchain_id",
|
298
|
+
metadata_json_column: str = "langchain_metadata",
|
299
|
+
distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
|
300
|
+
k: int = 4,
|
301
|
+
fetch_k: int = 20,
|
302
|
+
lambda_mult: float = 0.5,
|
303
|
+
index_query_options: Optional[QueryOptions] = None,
|
304
|
+
**kwargs: Any,
|
305
|
+
) -> PGVectorStore:
|
306
|
+
"""Create an PGVectorStore instance from texts.
|
307
|
+
|
308
|
+
Args:
|
309
|
+
texts (list[str]): Texts to add to the vector store.
|
310
|
+
embedding (Embeddings): Text embedding model to use.
|
311
|
+
engine (PGEngine): Connection pool engine for managing connections to postgres database.
|
312
|
+
table_name (str): Name of an existing table.
|
313
|
+
schema_name (str, optional): Name of the database schema. Defaults to "public".
|
314
|
+
metadatas (Optional[list[dict]], optional): List of metadatas to add to table records. Defaults to None.
|
315
|
+
ids: (Optional[list]): List of IDs to add to table records. Defaults to None.
|
316
|
+
content_column (str, optional): Column that represent a Document's page_content. Defaults to "content".
|
317
|
+
embedding_column (str, optional): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding".
|
318
|
+
metadata_columns (list[str], optional): Column(s) that represent a document's metadata. Defaults to an empty list.
|
319
|
+
ignore_metadata_columns (Optional[list[str]], optional): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None.
|
320
|
+
id_column (str, optional): Column that represents the Document's id. Defaults to "langchain_id".
|
321
|
+
metadata_json_column (str, optional): Column to store metadata as JSON. Defaults to "langchain_metadata".
|
322
|
+
distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.
|
323
|
+
k (int): Number of Documents to return from search. Defaults to 4.
|
324
|
+
fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
|
325
|
+
lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
326
|
+
index_query_options (QueryOptions): Index query option.
|
327
|
+
|
328
|
+
Raises:
|
329
|
+
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
330
|
+
|
331
|
+
Returns:
|
332
|
+
PGVectorStore
|
333
|
+
"""
|
334
|
+
vs = await cls.create(
|
335
|
+
engine,
|
336
|
+
embedding,
|
337
|
+
table_name,
|
338
|
+
schema_name=schema_name,
|
339
|
+
content_column=content_column,
|
340
|
+
embedding_column=embedding_column,
|
341
|
+
metadata_columns=metadata_columns,
|
342
|
+
ignore_metadata_columns=ignore_metadata_columns,
|
343
|
+
metadata_json_column=metadata_json_column,
|
344
|
+
id_column=id_column,
|
345
|
+
distance_strategy=distance_strategy,
|
346
|
+
k=k,
|
347
|
+
fetch_k=fetch_k,
|
348
|
+
lambda_mult=lambda_mult,
|
349
|
+
index_query_options=index_query_options,
|
350
|
+
)
|
351
|
+
await vs.aadd_texts(texts, metadatas=metadatas, ids=ids)
|
352
|
+
return vs
|
353
|
+
|
354
|
+
@classmethod
|
355
|
+
async def afrom_documents( # type: ignore[override]
|
356
|
+
cls: type[PGVectorStore],
|
357
|
+
documents: list[Document],
|
358
|
+
embedding: Embeddings,
|
359
|
+
engine: PGEngine,
|
360
|
+
table_name: str,
|
361
|
+
schema_name: str = "public",
|
362
|
+
ids: Optional[list] = None,
|
363
|
+
content_column: str = "content",
|
364
|
+
embedding_column: str = "embedding",
|
365
|
+
metadata_columns: Optional[list[str]] = None,
|
366
|
+
ignore_metadata_columns: Optional[list[str]] = None,
|
367
|
+
id_column: str = "langchain_id",
|
368
|
+
metadata_json_column: str = "langchain_metadata",
|
369
|
+
distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
|
370
|
+
k: int = 4,
|
371
|
+
fetch_k: int = 20,
|
372
|
+
lambda_mult: float = 0.5,
|
373
|
+
index_query_options: Optional[QueryOptions] = None,
|
374
|
+
**kwargs: Any,
|
375
|
+
) -> PGVectorStore:
|
376
|
+
"""Create an PGVectorStore instance from documents.
|
377
|
+
|
378
|
+
Args:
|
379
|
+
documents (list[Document]): Documents to add to the vector store.
|
380
|
+
embedding (Embeddings): Text embedding model to use.
|
381
|
+
engine (PGEngine): Connection pool engine for managing connections to postgres database.
|
382
|
+
table_name (str): Name of an existing table.
|
383
|
+
schema_name (str, optional): Name of the database schema. Defaults to "public".
|
384
|
+
ids: (Optional[list]): List of IDs to add to table records. Defaults to None.
|
385
|
+
content_column (str, optional): Column that represent a Document's page_content. Defaults to "content".
|
386
|
+
embedding_column (str, optional): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding".
|
387
|
+
metadata_columns (list[str], optional): Column(s) that represent a document's metadata. Defaults to an empty list.
|
388
|
+
ignore_metadata_columns (Optional[list[str]], optional): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None.
|
389
|
+
id_column (str, optional): Column that represents the Document's id. Defaults to "langchain_id".
|
390
|
+
metadata_json_column (str, optional): Column to store metadata as JSON. Defaults to "langchain_metadata".
|
391
|
+
distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.
|
392
|
+
k (int): Number of Documents to return from search. Defaults to 4.
|
393
|
+
fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
|
394
|
+
lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
395
|
+
index_query_options (QueryOptions): Index query option.
|
396
|
+
|
397
|
+
Raises:
|
398
|
+
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
399
|
+
|
400
|
+
Returns:
|
401
|
+
PGVectorStore
|
402
|
+
"""
|
403
|
+
|
404
|
+
vs = await cls.create(
|
405
|
+
engine,
|
406
|
+
embedding,
|
407
|
+
table_name,
|
408
|
+
schema_name=schema_name,
|
409
|
+
content_column=content_column,
|
410
|
+
embedding_column=embedding_column,
|
411
|
+
metadata_columns=metadata_columns,
|
412
|
+
ignore_metadata_columns=ignore_metadata_columns,
|
413
|
+
metadata_json_column=metadata_json_column,
|
414
|
+
id_column=id_column,
|
415
|
+
distance_strategy=distance_strategy,
|
416
|
+
k=k,
|
417
|
+
fetch_k=fetch_k,
|
418
|
+
lambda_mult=lambda_mult,
|
419
|
+
index_query_options=index_query_options,
|
420
|
+
)
|
421
|
+
await vs.aadd_documents(documents, ids=ids)
|
422
|
+
return vs
|
423
|
+
|
424
|
+
@classmethod
|
425
|
+
def from_texts( # type: ignore[override]
|
426
|
+
cls: type[PGVectorStore],
|
427
|
+
texts: list[str],
|
428
|
+
embedding: Embeddings,
|
429
|
+
engine: PGEngine,
|
430
|
+
table_name: str,
|
431
|
+
schema_name: str = "public",
|
432
|
+
metadatas: Optional[list[dict]] = None,
|
433
|
+
ids: Optional[list] = None,
|
434
|
+
content_column: str = "content",
|
435
|
+
embedding_column: str = "embedding",
|
436
|
+
metadata_columns: Optional[list[str]] = None,
|
437
|
+
ignore_metadata_columns: Optional[list[str]] = None,
|
438
|
+
id_column: str = "langchain_id",
|
439
|
+
metadata_json_column: str = "langchain_metadata",
|
440
|
+
distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
|
441
|
+
k: int = 4,
|
442
|
+
fetch_k: int = 20,
|
443
|
+
lambda_mult: float = 0.5,
|
444
|
+
index_query_options: Optional[QueryOptions] = None,
|
445
|
+
**kwargs: Any,
|
446
|
+
) -> PGVectorStore:
|
447
|
+
"""Create an PGVectorStore instance from texts.
|
448
|
+
|
449
|
+
Args:
|
450
|
+
texts (list[str]): Texts to add to the vector store.
|
451
|
+
embedding (Embeddings): Text embedding model to use.
|
452
|
+
engine (PGEngine): Connection pool engine for managing connections to postgres database.
|
453
|
+
table_name (str): Name of an existing table.
|
454
|
+
schema_name (str, optional): Name of the database schema. Defaults to "public".
|
455
|
+
metadatas (Optional[list[dict]], optional): List of metadatas to add to table records. Defaults to None.
|
456
|
+
ids: (Optional[list]): List of IDs to add to table records. Defaults to None.
|
457
|
+
content_column (str, optional): Column that represent a Document's page_content. Defaults to "content".
|
458
|
+
embedding_column (str, optional): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding".
|
459
|
+
metadata_columns (list[str], optional): Column(s) that represent a document's metadata. Defaults to empty list.
|
460
|
+
ignore_metadata_columns (Optional[list[str]], optional): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None.
|
461
|
+
id_column (str, optional): Column that represents the Document's id. Defaults to "langchain_id".
|
462
|
+
metadata_json_column (str, optional): Column to store metadata as JSON. Defaults to "langchain_metadata".
|
463
|
+
distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.
|
464
|
+
k (int): Number of Documents to return from search. Defaults to 4.
|
465
|
+
fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
|
466
|
+
lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
467
|
+
index_query_options (QueryOptions): Index query option.
|
468
|
+
|
469
|
+
Raises:
|
470
|
+
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
471
|
+
|
472
|
+
Returns:
|
473
|
+
PGVectorStore
|
474
|
+
"""
|
475
|
+
vs = cls.create_sync(
|
476
|
+
engine,
|
477
|
+
embedding,
|
478
|
+
table_name,
|
479
|
+
schema_name=schema_name,
|
480
|
+
content_column=content_column,
|
481
|
+
embedding_column=embedding_column,
|
482
|
+
metadata_columns=metadata_columns,
|
483
|
+
ignore_metadata_columns=ignore_metadata_columns,
|
484
|
+
metadata_json_column=metadata_json_column,
|
485
|
+
id_column=id_column,
|
486
|
+
distance_strategy=distance_strategy,
|
487
|
+
k=k,
|
488
|
+
fetch_k=fetch_k,
|
489
|
+
lambda_mult=lambda_mult,
|
490
|
+
index_query_options=index_query_options,
|
491
|
+
**kwargs,
|
492
|
+
)
|
493
|
+
vs.add_texts(texts, metadatas=metadatas, ids=ids)
|
494
|
+
return vs
|
495
|
+
|
496
|
+
@classmethod
|
497
|
+
def from_documents( # type: ignore[override]
|
498
|
+
cls: type[PGVectorStore],
|
499
|
+
documents: list[Document],
|
500
|
+
embedding: Embeddings,
|
501
|
+
engine: PGEngine,
|
502
|
+
table_name: str,
|
503
|
+
schema_name: str = "public",
|
504
|
+
ids: Optional[list] = None,
|
505
|
+
content_column: str = "content",
|
506
|
+
embedding_column: str = "embedding",
|
507
|
+
metadata_columns: Optional[list[str]] = None,
|
508
|
+
ignore_metadata_columns: Optional[list[str]] = None,
|
509
|
+
id_column: str = "langchain_id",
|
510
|
+
metadata_json_column: str = "langchain_metadata",
|
511
|
+
distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
|
512
|
+
k: int = 4,
|
513
|
+
fetch_k: int = 20,
|
514
|
+
lambda_mult: float = 0.5,
|
515
|
+
index_query_options: Optional[QueryOptions] = None,
|
516
|
+
**kwargs: Any,
|
517
|
+
) -> PGVectorStore:
|
518
|
+
"""Create an PGVectorStore instance from documents.
|
519
|
+
|
520
|
+
Args:
|
521
|
+
documents (list[Document]): Documents to add to the vector store.
|
522
|
+
embedding (Embeddings): Text embedding model to use.
|
523
|
+
engine (PGEngine): Connection pool engine for managing connections to postgres database.
|
524
|
+
table_name (str): Name of an existing table.
|
525
|
+
schema_name (str, optional): Name of the database schema. Defaults to "public".
|
526
|
+
ids: (Optional[list]): List of IDs to add to table records. Defaults to None.
|
527
|
+
content_column (str, optional): Column that represent a Document's page_content. Defaults to "content".
|
528
|
+
embedding_column (str, optional): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding".
|
529
|
+
metadata_columns (list[str], optional): Column(s) that represent a document's metadata. Defaults to an empty list.
|
530
|
+
ignore_metadata_columns (Optional[list[str]], optional): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None.
|
531
|
+
id_column (str, optional): Column that represents the Document's id. Defaults to "langchain_id".
|
532
|
+
metadata_json_column (str, optional): Column to store metadata as JSON. Defaults to "langchain_metadata".
|
533
|
+
distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.
|
534
|
+
k (int): Number of Documents to return from search. Defaults to 4.
|
535
|
+
fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
|
536
|
+
lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
537
|
+
index_query_options (QueryOptions): Index query option.
|
538
|
+
|
539
|
+
Raises:
|
540
|
+
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
541
|
+
|
542
|
+
Returns:
|
543
|
+
PGVectorStore
|
544
|
+
"""
|
545
|
+
vs = cls.create_sync(
|
546
|
+
engine,
|
547
|
+
embedding,
|
548
|
+
table_name,
|
549
|
+
schema_name=schema_name,
|
550
|
+
content_column=content_column,
|
551
|
+
embedding_column=embedding_column,
|
552
|
+
metadata_columns=metadata_columns,
|
553
|
+
ignore_metadata_columns=ignore_metadata_columns,
|
554
|
+
metadata_json_column=metadata_json_column,
|
555
|
+
id_column=id_column,
|
556
|
+
distance_strategy=distance_strategy,
|
557
|
+
k=k,
|
558
|
+
fetch_k=fetch_k,
|
559
|
+
lambda_mult=lambda_mult,
|
560
|
+
index_query_options=index_query_options,
|
561
|
+
**kwargs,
|
562
|
+
)
|
563
|
+
vs.add_documents(documents, ids=ids)
|
564
|
+
return vs
|
565
|
+
|
566
|
+
def similarity_search(
|
567
|
+
self,
|
568
|
+
query: str,
|
569
|
+
k: Optional[int] = None,
|
570
|
+
filter: Optional[dict] = None,
|
571
|
+
**kwargs: Any,
|
572
|
+
) -> list[Document]:
|
573
|
+
"""Return docs selected by similarity search on query."""
|
574
|
+
return self._engine._run_as_sync(
|
575
|
+
self.__vs.asimilarity_search(query, k, filter, **kwargs)
|
576
|
+
)
|
577
|
+
|
578
|
+
async def asimilarity_search(
|
579
|
+
self,
|
580
|
+
query: str,
|
581
|
+
k: Optional[int] = None,
|
582
|
+
filter: Optional[dict] = None,
|
583
|
+
**kwargs: Any,
|
584
|
+
) -> list[Document]:
|
585
|
+
"""Return docs selected by similarity search on query."""
|
586
|
+
return await self._engine._run_as_async(
|
587
|
+
self.__vs.asimilarity_search(query, k, filter, **kwargs)
|
588
|
+
)
|
589
|
+
|
590
|
+
# Required for (a)similarity_search_with_relevance_scores
|
591
|
+
def _select_relevance_score_fn(self) -> Callable[[float], float]:
|
592
|
+
"""Select a relevance function based on distance strategy."""
|
593
|
+
# Calculate distance strategy provided in vectorstore constructor
|
594
|
+
if self.__vs.distance_strategy == DistanceStrategy.COSINE_DISTANCE:
|
595
|
+
return self._cosine_relevance_score_fn
|
596
|
+
if self.__vs.distance_strategy == DistanceStrategy.INNER_PRODUCT:
|
597
|
+
return self._max_inner_product_relevance_score_fn
|
598
|
+
elif self.__vs.distance_strategy == DistanceStrategy.EUCLIDEAN:
|
599
|
+
return self._euclidean_relevance_score_fn
|
600
|
+
|
601
|
+
async def asimilarity_search_with_score(
|
602
|
+
self,
|
603
|
+
query: str,
|
604
|
+
k: Optional[int] = None,
|
605
|
+
filter: Optional[dict] = None,
|
606
|
+
**kwargs: Any,
|
607
|
+
) -> list[tuple[Document, float]]:
|
608
|
+
"""Return docs and distance scores selected by similarity search on query."""
|
609
|
+
return await self._engine._run_as_async(
|
610
|
+
self.__vs.asimilarity_search_with_score(query, k, filter, **kwargs)
|
611
|
+
)
|
612
|
+
|
613
|
+
async def asimilarity_search_by_vector(
|
614
|
+
self,
|
615
|
+
embedding: list[float],
|
616
|
+
k: Optional[int] = None,
|
617
|
+
filter: Optional[dict] = None,
|
618
|
+
**kwargs: Any,
|
619
|
+
) -> list[Document]:
|
620
|
+
"""Return docs selected by vector similarity search."""
|
621
|
+
return await self._engine._run_as_async(
|
622
|
+
self.__vs.asimilarity_search_by_vector(embedding, k, filter, **kwargs)
|
623
|
+
)
|
624
|
+
|
625
|
+
async def asimilarity_search_with_score_by_vector(
|
626
|
+
self,
|
627
|
+
embedding: list[float],
|
628
|
+
k: Optional[int] = None,
|
629
|
+
filter: Optional[dict] = None,
|
630
|
+
**kwargs: Any,
|
631
|
+
) -> list[tuple[Document, float]]:
|
632
|
+
"""Return docs and distance scores selected by vector similarity search."""
|
633
|
+
return await self._engine._run_as_async(
|
634
|
+
self.__vs.asimilarity_search_with_score_by_vector(
|
635
|
+
embedding, k, filter, **kwargs
|
636
|
+
)
|
637
|
+
)
|
638
|
+
|
639
|
+
async def amax_marginal_relevance_search(
|
640
|
+
self,
|
641
|
+
query: str,
|
642
|
+
k: Optional[int] = None,
|
643
|
+
fetch_k: Optional[int] = None,
|
644
|
+
lambda_mult: Optional[float] = None,
|
645
|
+
filter: Optional[dict] = None,
|
646
|
+
**kwargs: Any,
|
647
|
+
) -> list[Document]:
|
648
|
+
"""Return docs selected using the maximal marginal relevance."""
|
649
|
+
return await self._engine._run_as_async(
|
650
|
+
self.__vs.amax_marginal_relevance_search(
|
651
|
+
query, k, fetch_k, lambda_mult, filter, **kwargs
|
652
|
+
)
|
653
|
+
)
|
654
|
+
|
655
|
+
async def amax_marginal_relevance_search_by_vector(
|
656
|
+
self,
|
657
|
+
embedding: list[float],
|
658
|
+
k: Optional[int] = None,
|
659
|
+
fetch_k: Optional[int] = None,
|
660
|
+
lambda_mult: Optional[float] = None,
|
661
|
+
filter: Optional[dict] = None,
|
662
|
+
**kwargs: Any,
|
663
|
+
) -> list[Document]:
|
664
|
+
"""Return docs selected using the maximal marginal relevance."""
|
665
|
+
return await self._engine._run_as_async(
|
666
|
+
self.__vs.amax_marginal_relevance_search_by_vector(
|
667
|
+
embedding, k, fetch_k, lambda_mult, filter, **kwargs
|
668
|
+
)
|
669
|
+
)
|
670
|
+
|
671
|
+
async def amax_marginal_relevance_search_with_score_by_vector(
|
672
|
+
self,
|
673
|
+
embedding: list[float],
|
674
|
+
k: Optional[int] = None,
|
675
|
+
fetch_k: Optional[int] = None,
|
676
|
+
lambda_mult: Optional[float] = None,
|
677
|
+
filter: Optional[dict] = None,
|
678
|
+
**kwargs: Any,
|
679
|
+
) -> list[tuple[Document, float]]:
|
680
|
+
"""Return docs and distance scores selected using the maximal marginal relevance."""
|
681
|
+
return await self._engine._run_as_async(
|
682
|
+
self.__vs.amax_marginal_relevance_search_with_score_by_vector(
|
683
|
+
embedding, k, fetch_k, lambda_mult, filter, **kwargs
|
684
|
+
)
|
685
|
+
)
|
686
|
+
|
687
|
+
def similarity_search_with_score(
|
688
|
+
self,
|
689
|
+
query: str,
|
690
|
+
k: Optional[int] = None,
|
691
|
+
filter: Optional[dict] = None,
|
692
|
+
**kwargs: Any,
|
693
|
+
) -> list[tuple[Document, float]]:
|
694
|
+
"""Return docs and distance scores selected by similarity search on query."""
|
695
|
+
return self._engine._run_as_sync(
|
696
|
+
self.__vs.asimilarity_search_with_score(query, k, filter, **kwargs)
|
697
|
+
)
|
698
|
+
|
699
|
+
def similarity_search_by_vector(
|
700
|
+
self,
|
701
|
+
embedding: list[float],
|
702
|
+
k: Optional[int] = None,
|
703
|
+
filter: Optional[dict] = None,
|
704
|
+
**kwargs: Any,
|
705
|
+
) -> list[Document]:
|
706
|
+
"""Return docs selected by vector similarity search."""
|
707
|
+
return self._engine._run_as_sync(
|
708
|
+
self.__vs.asimilarity_search_by_vector(embedding, k, filter, **kwargs)
|
709
|
+
)
|
710
|
+
|
711
|
+
def similarity_search_with_score_by_vector(
|
712
|
+
self,
|
713
|
+
embedding: list[float],
|
714
|
+
k: Optional[int] = None,
|
715
|
+
filter: Optional[dict] = None,
|
716
|
+
**kwargs: Any,
|
717
|
+
) -> list[tuple[Document, float]]:
|
718
|
+
"""Return docs and distance scores selected by similarity search on vector."""
|
719
|
+
return self._engine._run_as_sync(
|
720
|
+
self.__vs.asimilarity_search_with_score_by_vector(
|
721
|
+
embedding, k, filter, **kwargs
|
722
|
+
)
|
723
|
+
)
|
724
|
+
|
725
|
+
def max_marginal_relevance_search(
|
726
|
+
self,
|
727
|
+
query: str,
|
728
|
+
k: Optional[int] = None,
|
729
|
+
fetch_k: Optional[int] = None,
|
730
|
+
lambda_mult: Optional[float] = None,
|
731
|
+
filter: Optional[dict] = None,
|
732
|
+
**kwargs: Any,
|
733
|
+
) -> list[Document]:
|
734
|
+
"""Return docs selected using the maximal marginal relevance."""
|
735
|
+
return self._engine._run_as_sync(
|
736
|
+
self.__vs.amax_marginal_relevance_search(
|
737
|
+
query, k, fetch_k, lambda_mult, filter, **kwargs
|
738
|
+
)
|
739
|
+
)
|
740
|
+
|
741
|
+
def max_marginal_relevance_search_by_vector(
|
742
|
+
self,
|
743
|
+
embedding: list[float],
|
744
|
+
k: Optional[int] = None,
|
745
|
+
fetch_k: Optional[int] = None,
|
746
|
+
lambda_mult: Optional[float] = None,
|
747
|
+
filter: Optional[dict] = None,
|
748
|
+
**kwargs: Any,
|
749
|
+
) -> list[Document]:
|
750
|
+
"""Return docs selected using the maximal marginal relevance."""
|
751
|
+
return self._engine._run_as_sync(
|
752
|
+
self.__vs.amax_marginal_relevance_search_by_vector(
|
753
|
+
embedding, k, fetch_k, lambda_mult, filter, **kwargs
|
754
|
+
)
|
755
|
+
)
|
756
|
+
|
757
|
+
def max_marginal_relevance_search_with_score_by_vector(
|
758
|
+
self,
|
759
|
+
embedding: list[float],
|
760
|
+
k: Optional[int] = None,
|
761
|
+
fetch_k: Optional[int] = None,
|
762
|
+
lambda_mult: Optional[float] = None,
|
763
|
+
filter: Optional[dict] = None,
|
764
|
+
**kwargs: Any,
|
765
|
+
) -> list[tuple[Document, float]]:
|
766
|
+
"""Return docs and distance scores selected using the maximal marginal relevance."""
|
767
|
+
return self._engine._run_as_sync(
|
768
|
+
self.__vs.amax_marginal_relevance_search_with_score_by_vector(
|
769
|
+
embedding, k, fetch_k, lambda_mult, filter, **kwargs
|
770
|
+
)
|
771
|
+
)
|
772
|
+
|
773
|
+
async def aapply_vector_index(
|
774
|
+
self,
|
775
|
+
index: BaseIndex,
|
776
|
+
name: Optional[str] = None,
|
777
|
+
concurrently: bool = False,
|
778
|
+
) -> None:
|
779
|
+
"""Create an index on the vector store table."""
|
780
|
+
return await self._engine._run_as_async(
|
781
|
+
self.__vs.aapply_vector_index(index, name, concurrently=concurrently)
|
782
|
+
)
|
783
|
+
|
784
|
+
def apply_vector_index(
|
785
|
+
self,
|
786
|
+
index: BaseIndex,
|
787
|
+
name: Optional[str] = None,
|
788
|
+
concurrently: bool = False,
|
789
|
+
) -> None:
|
790
|
+
"""Create an index on the vector store table."""
|
791
|
+
return self._engine._run_as_sync(
|
792
|
+
self.__vs.aapply_vector_index(index, name, concurrently=concurrently)
|
793
|
+
)
|
794
|
+
|
795
|
+
async def areindex(self, index_name: Optional[str] = None) -> None:
|
796
|
+
"""Re-index the vector store table."""
|
797
|
+
return await self._engine._run_as_async(self.__vs.areindex(index_name))
|
798
|
+
|
799
|
+
def reindex(self, index_name: Optional[str] = None) -> None:
|
800
|
+
"""Re-index the vector store table."""
|
801
|
+
return self._engine._run_as_sync(self.__vs.areindex(index_name))
|
802
|
+
|
803
|
+
async def adrop_vector_index(
|
804
|
+
self,
|
805
|
+
index_name: Optional[str] = None,
|
806
|
+
) -> None:
|
807
|
+
"""Drop the vector index."""
|
808
|
+
return await self._engine._run_as_async(
|
809
|
+
self.__vs.adrop_vector_index(index_name)
|
810
|
+
)
|
811
|
+
|
812
|
+
def drop_vector_index(
|
813
|
+
self,
|
814
|
+
index_name: Optional[str] = None,
|
815
|
+
) -> None:
|
816
|
+
"""Drop the vector index."""
|
817
|
+
return self._engine._run_as_sync(self.__vs.adrop_vector_index(index_name))
|
818
|
+
|
819
|
+
async def ais_valid_index(
|
820
|
+
self,
|
821
|
+
index_name: Optional[str] = None,
|
822
|
+
) -> bool:
|
823
|
+
"""Check if index exists in the table."""
|
824
|
+
return await self._engine._run_as_async(self.__vs.is_valid_index(index_name))
|
825
|
+
|
826
|
+
def is_valid_index(
|
827
|
+
self,
|
828
|
+
index_name: Optional[str] = None,
|
829
|
+
) -> bool:
|
830
|
+
"""Check if index exists in the table."""
|
831
|
+
return self._engine._run_as_sync(self.__vs.is_valid_index(index_name))
|
832
|
+
|
833
|
+
async def aget_by_ids(self, ids: Sequence[str]) -> list[Document]:
|
834
|
+
"""Get documents by ids."""
|
835
|
+
return await self._engine._run_as_async(self.__vs.aget_by_ids(ids=ids))
|
836
|
+
|
837
|
+
def get_by_ids(self, ids: Sequence[str]) -> list[Document]:
|
838
|
+
"""Get documents by ids."""
|
839
|
+
return self._engine._run_as_sync(self.__vs.aget_by_ids(ids=ids))
|
840
|
+
|
841
|
+
def get_table_name(self) -> str:
|
842
|
+
return self.__vs.table_name
|