langchain-postgres 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,842 @@
1
+ # TODO: Remove below import when minimum supported Python version is 3.10
2
+ from __future__ import annotations
3
+
4
+ from typing import Any, Callable, Iterable, Optional, Sequence
5
+
6
+ from langchain_core.documents import Document
7
+ from langchain_core.embeddings import Embeddings
8
+ from langchain_core.vectorstores import VectorStore
9
+
10
+ from .async_vectorstore import AsyncPGVectorStore
11
+ from .engine import PGEngine
12
+ from .indexes import (
13
+ DEFAULT_DISTANCE_STRATEGY,
14
+ BaseIndex,
15
+ DistanceStrategy,
16
+ QueryOptions,
17
+ )
18
+
19
+
20
+ class PGVectorStore(VectorStore):
21
+ """Postgres Vector Store class"""
22
+
23
+ __create_key = object()
24
+
25
+ def __init__(self, key: object, engine: PGEngine, vs: AsyncPGVectorStore):
26
+ """PGVectorStore constructor.
27
+ Args:
28
+ key (object): Prevent direct constructor usage.
29
+ engine (PGEngine): Connection pool engine for managing connections to Postgres database.
30
+ vs (AsyncPGVectorStore): The async only VectorStore implementation
31
+
32
+
33
+ Raises:
34
+ Exception: If called directly by user.
35
+ """
36
+ if key != PGVectorStore.__create_key:
37
+ raise Exception(
38
+ "Only create class through 'create' or 'create_sync' methods!"
39
+ )
40
+
41
+ self._engine = engine
42
+ self.__vs = vs
43
+
44
+ @classmethod
45
+ async def create(
46
+ cls: type[PGVectorStore],
47
+ engine: PGEngine,
48
+ embedding_service: Embeddings,
49
+ table_name: str,
50
+ schema_name: str = "public",
51
+ content_column: str = "content",
52
+ embedding_column: str = "embedding",
53
+ metadata_columns: Optional[list[str]] = None,
54
+ ignore_metadata_columns: Optional[list[str]] = None,
55
+ id_column: str = "langchain_id",
56
+ metadata_json_column: Optional[str] = "langchain_metadata",
57
+ distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
58
+ k: int = 4,
59
+ fetch_k: int = 20,
60
+ lambda_mult: float = 0.5,
61
+ index_query_options: Optional[QueryOptions] = None,
62
+ ) -> PGVectorStore:
63
+ """Create an PGVectorStore instance.
64
+
65
+ Args:
66
+ engine (PGEngine): Connection pool engine for managing connections to postgres database.
67
+ embedding_service (Embeddings): Text embedding model to use.
68
+ table_name (str): Name of an existing table.
69
+ schema_name (str, optional): Name of the database schema. Defaults to "public".
70
+ content_column (str): Column that represent a Document's page_content. Defaults to "content".
71
+ embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding".
72
+ metadata_columns (list[str]): Column(s) that represent a document's metadata.
73
+ ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None.
74
+ id_column (str): Column that represents the Document's id. Defaults to "langchain_id".
75
+ metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata".
76
+ distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.
77
+ k (int): Number of Documents to return from search. Defaults to 4.
78
+ fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
79
+ lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
80
+ index_query_options (QueryOptions): Index query option.
81
+
82
+ Returns:
83
+ PGVectorStore
84
+ """
85
+ coro = AsyncPGVectorStore.create(
86
+ engine,
87
+ embedding_service,
88
+ table_name,
89
+ schema_name=schema_name,
90
+ content_column=content_column,
91
+ embedding_column=embedding_column,
92
+ metadata_columns=metadata_columns,
93
+ ignore_metadata_columns=ignore_metadata_columns,
94
+ metadata_json_column=metadata_json_column,
95
+ id_column=id_column,
96
+ distance_strategy=distance_strategy,
97
+ k=k,
98
+ fetch_k=fetch_k,
99
+ lambda_mult=lambda_mult,
100
+ index_query_options=index_query_options,
101
+ )
102
+ vs = await engine._run_as_async(coro)
103
+ return cls(cls.__create_key, engine, vs)
104
+
105
+ @classmethod
106
+ def create_sync(
107
+ cls,
108
+ engine: PGEngine,
109
+ embedding_service: Embeddings,
110
+ table_name: str,
111
+ schema_name: str = "public",
112
+ content_column: str = "content",
113
+ embedding_column: str = "embedding",
114
+ metadata_columns: Optional[list[str]] = None,
115
+ ignore_metadata_columns: Optional[list[str]] = None,
116
+ id_column: str = "langchain_id",
117
+ metadata_json_column: str = "langchain_metadata",
118
+ distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
119
+ k: int = 4,
120
+ fetch_k: int = 20,
121
+ lambda_mult: float = 0.5,
122
+ index_query_options: Optional[QueryOptions] = None,
123
+ ) -> PGVectorStore:
124
+ """Create an PGVectorStore instance.
125
+
126
+ Args:
127
+ key (object): Prevent direct constructor usage.
128
+ engine (PGEngine): Connection pool engine for managing connections to postgres database.
129
+ embedding_service (Embeddings): Text embedding model to use.
130
+ table_name (str): Name of an existing table.
131
+ schema_name (str, optional): Name of the database schema. Defaults to "public".
132
+ content_column (str, optional): Column that represent a Document's page_content. Defaults to "content".
133
+ embedding_column (str, optional): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding".
134
+ metadata_columns (list[str], optional): Column(s) that represent a document's metadata. Defaults to None.
135
+ ignore_metadata_columns (Optional[list[str]]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None.
136
+ id_column (str, optional): Column that represents the Document's id. Defaults to "langchain_id".
137
+ metadata_json_column (str, optional): Column to store metadata as JSON. Defaults to "langchain_metadata".
138
+ distance_strategy (DistanceStrategy, optional): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.
139
+ k (int, optional): Number of Documents to return from search. Defaults to 4.
140
+ fetch_k (int, optional): Number of Documents to fetch to pass to MMR algorithm. Defaults to 20.
141
+ lambda_mult (float, optional): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
142
+ index_query_options (Optional[QueryOptions], optional): Index query option. Defaults to None.
143
+
144
+ Returns:
145
+ PGVectorStore
146
+ """
147
+ coro = AsyncPGVectorStore.create(
148
+ engine,
149
+ embedding_service,
150
+ table_name,
151
+ schema_name=schema_name,
152
+ content_column=content_column,
153
+ embedding_column=embedding_column,
154
+ metadata_columns=metadata_columns,
155
+ ignore_metadata_columns=ignore_metadata_columns,
156
+ metadata_json_column=metadata_json_column,
157
+ id_column=id_column,
158
+ distance_strategy=distance_strategy,
159
+ k=k,
160
+ fetch_k=fetch_k,
161
+ lambda_mult=lambda_mult,
162
+ index_query_options=index_query_options,
163
+ )
164
+ vs = engine._run_as_sync(coro)
165
+ return cls(cls.__create_key, engine, vs)
166
+
167
+ @property
168
+ def embeddings(self) -> Embeddings:
169
+ return self.__vs.embedding_service
170
+
171
+ async def aadd_embeddings(
172
+ self,
173
+ texts: Iterable[str],
174
+ embeddings: list[list[float]],
175
+ metadatas: Optional[list[dict]] = None,
176
+ ids: Optional[list[str]] = None,
177
+ **kwargs: Any,
178
+ ) -> list[str]:
179
+ """Add data along with embeddings to the table."""
180
+ return await self._engine._run_as_async(
181
+ self.__vs.aadd_embeddings(texts, embeddings, metadatas, ids, **kwargs)
182
+ )
183
+
184
+ async def aadd_texts(
185
+ self,
186
+ texts: Iterable[str],
187
+ metadatas: Optional[list[dict]] = None,
188
+ ids: Optional[list] = None,
189
+ **kwargs: Any,
190
+ ) -> list[str]:
191
+ """Embed texts and add to the table.
192
+
193
+ Raises:
194
+ :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
195
+ """
196
+ return await self._engine._run_as_async(
197
+ self.__vs.aadd_texts(texts, metadatas, ids, **kwargs)
198
+ )
199
+
200
+ async def aadd_documents(
201
+ self,
202
+ documents: list[Document],
203
+ ids: Optional[list] = None,
204
+ **kwargs: Any,
205
+ ) -> list[str]:
206
+ """Embed documents and add to the table.
207
+
208
+ Raises:
209
+ :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
210
+ """
211
+ return await self._engine._run_as_async(
212
+ self.__vs.aadd_documents(documents, ids, **kwargs)
213
+ )
214
+
215
+ def add_embeddings(
216
+ self,
217
+ texts: Iterable[str],
218
+ embeddings: list[list[float]],
219
+ metadatas: Optional[list[dict]] = None,
220
+ ids: Optional[list[str]] = None,
221
+ **kwargs: Any,
222
+ ) -> list[str]:
223
+ """Add data along with embeddings to the table."""
224
+ return self._engine._run_as_sync(
225
+ self.__vs.aadd_embeddings(texts, embeddings, metadatas, ids, **kwargs)
226
+ )
227
+
228
+ def add_texts(
229
+ self,
230
+ texts: Iterable[str],
231
+ metadatas: Optional[list[dict]] = None,
232
+ ids: Optional[list] = None,
233
+ **kwargs: Any,
234
+ ) -> list[str]:
235
+ """Embed texts and add to the table.
236
+
237
+ Raises:
238
+ :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
239
+ """
240
+ return self._engine._run_as_sync(
241
+ self.__vs.aadd_texts(texts, metadatas, ids, **kwargs)
242
+ )
243
+
244
+ def add_documents(
245
+ self,
246
+ documents: list[Document],
247
+ ids: Optional[list] = None,
248
+ **kwargs: Any,
249
+ ) -> list[str]:
250
+ """Embed documents and add to the table.
251
+
252
+ Raises:
253
+ :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
254
+ """
255
+ return self._engine._run_as_sync(
256
+ self.__vs.aadd_documents(documents, ids, **kwargs)
257
+ )
258
+
259
+ async def adelete(
260
+ self,
261
+ ids: Optional[list] = None,
262
+ **kwargs: Any,
263
+ ) -> Optional[bool]:
264
+ """Delete records from the table.
265
+
266
+ Raises:
267
+ :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
268
+ """
269
+ return await self._engine._run_as_async(self.__vs.adelete(ids, **kwargs))
270
+
271
+ def delete(
272
+ self,
273
+ ids: Optional[list] = None,
274
+ **kwargs: Any,
275
+ ) -> Optional[bool]:
276
+ """Delete records from the table.
277
+
278
+ Raises:
279
+ :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
280
+ """
281
+ return self._engine._run_as_sync(self.__vs.adelete(ids, **kwargs))
282
+
283
+ @classmethod
284
+ async def afrom_texts( # type: ignore[override]
285
+ cls: type[PGVectorStore],
286
+ texts: list[str],
287
+ embedding: Embeddings,
288
+ engine: PGEngine,
289
+ table_name: str,
290
+ schema_name: str = "public",
291
+ metadatas: Optional[list[dict]] = None,
292
+ ids: Optional[list] = None,
293
+ content_column: str = "content",
294
+ embedding_column: str = "embedding",
295
+ metadata_columns: Optional[list[str]] = None,
296
+ ignore_metadata_columns: Optional[list[str]] = None,
297
+ id_column: str = "langchain_id",
298
+ metadata_json_column: str = "langchain_metadata",
299
+ distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
300
+ k: int = 4,
301
+ fetch_k: int = 20,
302
+ lambda_mult: float = 0.5,
303
+ index_query_options: Optional[QueryOptions] = None,
304
+ **kwargs: Any,
305
+ ) -> PGVectorStore:
306
+ """Create an PGVectorStore instance from texts.
307
+
308
+ Args:
309
+ texts (list[str]): Texts to add to the vector store.
310
+ embedding (Embeddings): Text embedding model to use.
311
+ engine (PGEngine): Connection pool engine for managing connections to postgres database.
312
+ table_name (str): Name of an existing table.
313
+ schema_name (str, optional): Name of the database schema. Defaults to "public".
314
+ metadatas (Optional[list[dict]], optional): List of metadatas to add to table records. Defaults to None.
315
+ ids: (Optional[list]): List of IDs to add to table records. Defaults to None.
316
+ content_column (str, optional): Column that represent a Document's page_content. Defaults to "content".
317
+ embedding_column (str, optional): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding".
318
+ metadata_columns (list[str], optional): Column(s) that represent a document's metadata. Defaults to an empty list.
319
+ ignore_metadata_columns (Optional[list[str]], optional): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None.
320
+ id_column (str, optional): Column that represents the Document's id. Defaults to "langchain_id".
321
+ metadata_json_column (str, optional): Column to store metadata as JSON. Defaults to "langchain_metadata".
322
+ distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.
323
+ k (int): Number of Documents to return from search. Defaults to 4.
324
+ fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
325
+ lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
326
+ index_query_options (QueryOptions): Index query option.
327
+
328
+ Raises:
329
+ :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
330
+
331
+ Returns:
332
+ PGVectorStore
333
+ """
334
+ vs = await cls.create(
335
+ engine,
336
+ embedding,
337
+ table_name,
338
+ schema_name=schema_name,
339
+ content_column=content_column,
340
+ embedding_column=embedding_column,
341
+ metadata_columns=metadata_columns,
342
+ ignore_metadata_columns=ignore_metadata_columns,
343
+ metadata_json_column=metadata_json_column,
344
+ id_column=id_column,
345
+ distance_strategy=distance_strategy,
346
+ k=k,
347
+ fetch_k=fetch_k,
348
+ lambda_mult=lambda_mult,
349
+ index_query_options=index_query_options,
350
+ )
351
+ await vs.aadd_texts(texts, metadatas=metadatas, ids=ids)
352
+ return vs
353
+
354
+ @classmethod
355
+ async def afrom_documents( # type: ignore[override]
356
+ cls: type[PGVectorStore],
357
+ documents: list[Document],
358
+ embedding: Embeddings,
359
+ engine: PGEngine,
360
+ table_name: str,
361
+ schema_name: str = "public",
362
+ ids: Optional[list] = None,
363
+ content_column: str = "content",
364
+ embedding_column: str = "embedding",
365
+ metadata_columns: Optional[list[str]] = None,
366
+ ignore_metadata_columns: Optional[list[str]] = None,
367
+ id_column: str = "langchain_id",
368
+ metadata_json_column: str = "langchain_metadata",
369
+ distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
370
+ k: int = 4,
371
+ fetch_k: int = 20,
372
+ lambda_mult: float = 0.5,
373
+ index_query_options: Optional[QueryOptions] = None,
374
+ **kwargs: Any,
375
+ ) -> PGVectorStore:
376
+ """Create an PGVectorStore instance from documents.
377
+
378
+ Args:
379
+ documents (list[Document]): Documents to add to the vector store.
380
+ embedding (Embeddings): Text embedding model to use.
381
+ engine (PGEngine): Connection pool engine for managing connections to postgres database.
382
+ table_name (str): Name of an existing table.
383
+ schema_name (str, optional): Name of the database schema. Defaults to "public".
384
+ ids: (Optional[list]): List of IDs to add to table records. Defaults to None.
385
+ content_column (str, optional): Column that represent a Document's page_content. Defaults to "content".
386
+ embedding_column (str, optional): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding".
387
+ metadata_columns (list[str], optional): Column(s) that represent a document's metadata. Defaults to an empty list.
388
+ ignore_metadata_columns (Optional[list[str]], optional): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None.
389
+ id_column (str, optional): Column that represents the Document's id. Defaults to "langchain_id".
390
+ metadata_json_column (str, optional): Column to store metadata as JSON. Defaults to "langchain_metadata".
391
+ distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.
392
+ k (int): Number of Documents to return from search. Defaults to 4.
393
+ fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
394
+ lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
395
+ index_query_options (QueryOptions): Index query option.
396
+
397
+ Raises:
398
+ :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
399
+
400
+ Returns:
401
+ PGVectorStore
402
+ """
403
+
404
+ vs = await cls.create(
405
+ engine,
406
+ embedding,
407
+ table_name,
408
+ schema_name=schema_name,
409
+ content_column=content_column,
410
+ embedding_column=embedding_column,
411
+ metadata_columns=metadata_columns,
412
+ ignore_metadata_columns=ignore_metadata_columns,
413
+ metadata_json_column=metadata_json_column,
414
+ id_column=id_column,
415
+ distance_strategy=distance_strategy,
416
+ k=k,
417
+ fetch_k=fetch_k,
418
+ lambda_mult=lambda_mult,
419
+ index_query_options=index_query_options,
420
+ )
421
+ await vs.aadd_documents(documents, ids=ids)
422
+ return vs
423
+
424
+ @classmethod
425
+ def from_texts( # type: ignore[override]
426
+ cls: type[PGVectorStore],
427
+ texts: list[str],
428
+ embedding: Embeddings,
429
+ engine: PGEngine,
430
+ table_name: str,
431
+ schema_name: str = "public",
432
+ metadatas: Optional[list[dict]] = None,
433
+ ids: Optional[list] = None,
434
+ content_column: str = "content",
435
+ embedding_column: str = "embedding",
436
+ metadata_columns: Optional[list[str]] = None,
437
+ ignore_metadata_columns: Optional[list[str]] = None,
438
+ id_column: str = "langchain_id",
439
+ metadata_json_column: str = "langchain_metadata",
440
+ distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
441
+ k: int = 4,
442
+ fetch_k: int = 20,
443
+ lambda_mult: float = 0.5,
444
+ index_query_options: Optional[QueryOptions] = None,
445
+ **kwargs: Any,
446
+ ) -> PGVectorStore:
447
+ """Create an PGVectorStore instance from texts.
448
+
449
+ Args:
450
+ texts (list[str]): Texts to add to the vector store.
451
+ embedding (Embeddings): Text embedding model to use.
452
+ engine (PGEngine): Connection pool engine for managing connections to postgres database.
453
+ table_name (str): Name of an existing table.
454
+ schema_name (str, optional): Name of the database schema. Defaults to "public".
455
+ metadatas (Optional[list[dict]], optional): List of metadatas to add to table records. Defaults to None.
456
+ ids: (Optional[list]): List of IDs to add to table records. Defaults to None.
457
+ content_column (str, optional): Column that represent a Document's page_content. Defaults to "content".
458
+ embedding_column (str, optional): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding".
459
+ metadata_columns (list[str], optional): Column(s) that represent a document's metadata. Defaults to empty list.
460
+ ignore_metadata_columns (Optional[list[str]], optional): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None.
461
+ id_column (str, optional): Column that represents the Document's id. Defaults to "langchain_id".
462
+ metadata_json_column (str, optional): Column to store metadata as JSON. Defaults to "langchain_metadata".
463
+ distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.
464
+ k (int): Number of Documents to return from search. Defaults to 4.
465
+ fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
466
+ lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
467
+ index_query_options (QueryOptions): Index query option.
468
+
469
+ Raises:
470
+ :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
471
+
472
+ Returns:
473
+ PGVectorStore
474
+ """
475
+ vs = cls.create_sync(
476
+ engine,
477
+ embedding,
478
+ table_name,
479
+ schema_name=schema_name,
480
+ content_column=content_column,
481
+ embedding_column=embedding_column,
482
+ metadata_columns=metadata_columns,
483
+ ignore_metadata_columns=ignore_metadata_columns,
484
+ metadata_json_column=metadata_json_column,
485
+ id_column=id_column,
486
+ distance_strategy=distance_strategy,
487
+ k=k,
488
+ fetch_k=fetch_k,
489
+ lambda_mult=lambda_mult,
490
+ index_query_options=index_query_options,
491
+ **kwargs,
492
+ )
493
+ vs.add_texts(texts, metadatas=metadatas, ids=ids)
494
+ return vs
495
+
496
+ @classmethod
497
+ def from_documents( # type: ignore[override]
498
+ cls: type[PGVectorStore],
499
+ documents: list[Document],
500
+ embedding: Embeddings,
501
+ engine: PGEngine,
502
+ table_name: str,
503
+ schema_name: str = "public",
504
+ ids: Optional[list] = None,
505
+ content_column: str = "content",
506
+ embedding_column: str = "embedding",
507
+ metadata_columns: Optional[list[str]] = None,
508
+ ignore_metadata_columns: Optional[list[str]] = None,
509
+ id_column: str = "langchain_id",
510
+ metadata_json_column: str = "langchain_metadata",
511
+ distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
512
+ k: int = 4,
513
+ fetch_k: int = 20,
514
+ lambda_mult: float = 0.5,
515
+ index_query_options: Optional[QueryOptions] = None,
516
+ **kwargs: Any,
517
+ ) -> PGVectorStore:
518
+ """Create an PGVectorStore instance from documents.
519
+
520
+ Args:
521
+ documents (list[Document]): Documents to add to the vector store.
522
+ embedding (Embeddings): Text embedding model to use.
523
+ engine (PGEngine): Connection pool engine for managing connections to postgres database.
524
+ table_name (str): Name of an existing table.
525
+ schema_name (str, optional): Name of the database schema. Defaults to "public".
526
+ ids: (Optional[list]): List of IDs to add to table records. Defaults to None.
527
+ content_column (str, optional): Column that represent a Document's page_content. Defaults to "content".
528
+ embedding_column (str, optional): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding".
529
+ metadata_columns (list[str], optional): Column(s) that represent a document's metadata. Defaults to an empty list.
530
+ ignore_metadata_columns (Optional[list[str]], optional): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None.
531
+ id_column (str, optional): Column that represents the Document's id. Defaults to "langchain_id".
532
+ metadata_json_column (str, optional): Column to store metadata as JSON. Defaults to "langchain_metadata".
533
+ distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE.
534
+ k (int): Number of Documents to return from search. Defaults to 4.
535
+ fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
536
+ lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
537
+ index_query_options (QueryOptions): Index query option.
538
+
539
+ Raises:
540
+ :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
541
+
542
+ Returns:
543
+ PGVectorStore
544
+ """
545
+ vs = cls.create_sync(
546
+ engine,
547
+ embedding,
548
+ table_name,
549
+ schema_name=schema_name,
550
+ content_column=content_column,
551
+ embedding_column=embedding_column,
552
+ metadata_columns=metadata_columns,
553
+ ignore_metadata_columns=ignore_metadata_columns,
554
+ metadata_json_column=metadata_json_column,
555
+ id_column=id_column,
556
+ distance_strategy=distance_strategy,
557
+ k=k,
558
+ fetch_k=fetch_k,
559
+ lambda_mult=lambda_mult,
560
+ index_query_options=index_query_options,
561
+ **kwargs,
562
+ )
563
+ vs.add_documents(documents, ids=ids)
564
+ return vs
565
+
566
+ def similarity_search(
567
+ self,
568
+ query: str,
569
+ k: Optional[int] = None,
570
+ filter: Optional[dict] = None,
571
+ **kwargs: Any,
572
+ ) -> list[Document]:
573
+ """Return docs selected by similarity search on query."""
574
+ return self._engine._run_as_sync(
575
+ self.__vs.asimilarity_search(query, k, filter, **kwargs)
576
+ )
577
+
578
+ async def asimilarity_search(
579
+ self,
580
+ query: str,
581
+ k: Optional[int] = None,
582
+ filter: Optional[dict] = None,
583
+ **kwargs: Any,
584
+ ) -> list[Document]:
585
+ """Return docs selected by similarity search on query."""
586
+ return await self._engine._run_as_async(
587
+ self.__vs.asimilarity_search(query, k, filter, **kwargs)
588
+ )
589
+
590
+ # Required for (a)similarity_search_with_relevance_scores
591
+ def _select_relevance_score_fn(self) -> Callable[[float], float]:
592
+ """Select a relevance function based on distance strategy."""
593
+ # Calculate distance strategy provided in vectorstore constructor
594
+ if self.__vs.distance_strategy == DistanceStrategy.COSINE_DISTANCE:
595
+ return self._cosine_relevance_score_fn
596
+ if self.__vs.distance_strategy == DistanceStrategy.INNER_PRODUCT:
597
+ return self._max_inner_product_relevance_score_fn
598
+ elif self.__vs.distance_strategy == DistanceStrategy.EUCLIDEAN:
599
+ return self._euclidean_relevance_score_fn
600
+
601
+ async def asimilarity_search_with_score(
602
+ self,
603
+ query: str,
604
+ k: Optional[int] = None,
605
+ filter: Optional[dict] = None,
606
+ **kwargs: Any,
607
+ ) -> list[tuple[Document, float]]:
608
+ """Return docs and distance scores selected by similarity search on query."""
609
+ return await self._engine._run_as_async(
610
+ self.__vs.asimilarity_search_with_score(query, k, filter, **kwargs)
611
+ )
612
+
613
+ async def asimilarity_search_by_vector(
614
+ self,
615
+ embedding: list[float],
616
+ k: Optional[int] = None,
617
+ filter: Optional[dict] = None,
618
+ **kwargs: Any,
619
+ ) -> list[Document]:
620
+ """Return docs selected by vector similarity search."""
621
+ return await self._engine._run_as_async(
622
+ self.__vs.asimilarity_search_by_vector(embedding, k, filter, **kwargs)
623
+ )
624
+
625
+ async def asimilarity_search_with_score_by_vector(
626
+ self,
627
+ embedding: list[float],
628
+ k: Optional[int] = None,
629
+ filter: Optional[dict] = None,
630
+ **kwargs: Any,
631
+ ) -> list[tuple[Document, float]]:
632
+ """Return docs and distance scores selected by vector similarity search."""
633
+ return await self._engine._run_as_async(
634
+ self.__vs.asimilarity_search_with_score_by_vector(
635
+ embedding, k, filter, **kwargs
636
+ )
637
+ )
638
+
639
+ async def amax_marginal_relevance_search(
640
+ self,
641
+ query: str,
642
+ k: Optional[int] = None,
643
+ fetch_k: Optional[int] = None,
644
+ lambda_mult: Optional[float] = None,
645
+ filter: Optional[dict] = None,
646
+ **kwargs: Any,
647
+ ) -> list[Document]:
648
+ """Return docs selected using the maximal marginal relevance."""
649
+ return await self._engine._run_as_async(
650
+ self.__vs.amax_marginal_relevance_search(
651
+ query, k, fetch_k, lambda_mult, filter, **kwargs
652
+ )
653
+ )
654
+
655
+ async def amax_marginal_relevance_search_by_vector(
656
+ self,
657
+ embedding: list[float],
658
+ k: Optional[int] = None,
659
+ fetch_k: Optional[int] = None,
660
+ lambda_mult: Optional[float] = None,
661
+ filter: Optional[dict] = None,
662
+ **kwargs: Any,
663
+ ) -> list[Document]:
664
+ """Return docs selected using the maximal marginal relevance."""
665
+ return await self._engine._run_as_async(
666
+ self.__vs.amax_marginal_relevance_search_by_vector(
667
+ embedding, k, fetch_k, lambda_mult, filter, **kwargs
668
+ )
669
+ )
670
+
671
+ async def amax_marginal_relevance_search_with_score_by_vector(
672
+ self,
673
+ embedding: list[float],
674
+ k: Optional[int] = None,
675
+ fetch_k: Optional[int] = None,
676
+ lambda_mult: Optional[float] = None,
677
+ filter: Optional[dict] = None,
678
+ **kwargs: Any,
679
+ ) -> list[tuple[Document, float]]:
680
+ """Return docs and distance scores selected using the maximal marginal relevance."""
681
+ return await self._engine._run_as_async(
682
+ self.__vs.amax_marginal_relevance_search_with_score_by_vector(
683
+ embedding, k, fetch_k, lambda_mult, filter, **kwargs
684
+ )
685
+ )
686
+
687
+ def similarity_search_with_score(
688
+ self,
689
+ query: str,
690
+ k: Optional[int] = None,
691
+ filter: Optional[dict] = None,
692
+ **kwargs: Any,
693
+ ) -> list[tuple[Document, float]]:
694
+ """Return docs and distance scores selected by similarity search on query."""
695
+ return self._engine._run_as_sync(
696
+ self.__vs.asimilarity_search_with_score(query, k, filter, **kwargs)
697
+ )
698
+
699
+ def similarity_search_by_vector(
700
+ self,
701
+ embedding: list[float],
702
+ k: Optional[int] = None,
703
+ filter: Optional[dict] = None,
704
+ **kwargs: Any,
705
+ ) -> list[Document]:
706
+ """Return docs selected by vector similarity search."""
707
+ return self._engine._run_as_sync(
708
+ self.__vs.asimilarity_search_by_vector(embedding, k, filter, **kwargs)
709
+ )
710
+
711
+ def similarity_search_with_score_by_vector(
712
+ self,
713
+ embedding: list[float],
714
+ k: Optional[int] = None,
715
+ filter: Optional[dict] = None,
716
+ **kwargs: Any,
717
+ ) -> list[tuple[Document, float]]:
718
+ """Return docs and distance scores selected by similarity search on vector."""
719
+ return self._engine._run_as_sync(
720
+ self.__vs.asimilarity_search_with_score_by_vector(
721
+ embedding, k, filter, **kwargs
722
+ )
723
+ )
724
+
725
+ def max_marginal_relevance_search(
726
+ self,
727
+ query: str,
728
+ k: Optional[int] = None,
729
+ fetch_k: Optional[int] = None,
730
+ lambda_mult: Optional[float] = None,
731
+ filter: Optional[dict] = None,
732
+ **kwargs: Any,
733
+ ) -> list[Document]:
734
+ """Return docs selected using the maximal marginal relevance."""
735
+ return self._engine._run_as_sync(
736
+ self.__vs.amax_marginal_relevance_search(
737
+ query, k, fetch_k, lambda_mult, filter, **kwargs
738
+ )
739
+ )
740
+
741
+ def max_marginal_relevance_search_by_vector(
742
+ self,
743
+ embedding: list[float],
744
+ k: Optional[int] = None,
745
+ fetch_k: Optional[int] = None,
746
+ lambda_mult: Optional[float] = None,
747
+ filter: Optional[dict] = None,
748
+ **kwargs: Any,
749
+ ) -> list[Document]:
750
+ """Return docs selected using the maximal marginal relevance."""
751
+ return self._engine._run_as_sync(
752
+ self.__vs.amax_marginal_relevance_search_by_vector(
753
+ embedding, k, fetch_k, lambda_mult, filter, **kwargs
754
+ )
755
+ )
756
+
757
+ def max_marginal_relevance_search_with_score_by_vector(
758
+ self,
759
+ embedding: list[float],
760
+ k: Optional[int] = None,
761
+ fetch_k: Optional[int] = None,
762
+ lambda_mult: Optional[float] = None,
763
+ filter: Optional[dict] = None,
764
+ **kwargs: Any,
765
+ ) -> list[tuple[Document, float]]:
766
+ """Return docs and distance scores selected using the maximal marginal relevance."""
767
+ return self._engine._run_as_sync(
768
+ self.__vs.amax_marginal_relevance_search_with_score_by_vector(
769
+ embedding, k, fetch_k, lambda_mult, filter, **kwargs
770
+ )
771
+ )
772
+
773
+ async def aapply_vector_index(
774
+ self,
775
+ index: BaseIndex,
776
+ name: Optional[str] = None,
777
+ concurrently: bool = False,
778
+ ) -> None:
779
+ """Create an index on the vector store table."""
780
+ return await self._engine._run_as_async(
781
+ self.__vs.aapply_vector_index(index, name, concurrently=concurrently)
782
+ )
783
+
784
+ def apply_vector_index(
785
+ self,
786
+ index: BaseIndex,
787
+ name: Optional[str] = None,
788
+ concurrently: bool = False,
789
+ ) -> None:
790
+ """Create an index on the vector store table."""
791
+ return self._engine._run_as_sync(
792
+ self.__vs.aapply_vector_index(index, name, concurrently=concurrently)
793
+ )
794
+
795
+ async def areindex(self, index_name: Optional[str] = None) -> None:
796
+ """Re-index the vector store table."""
797
+ return await self._engine._run_as_async(self.__vs.areindex(index_name))
798
+
799
+ def reindex(self, index_name: Optional[str] = None) -> None:
800
+ """Re-index the vector store table."""
801
+ return self._engine._run_as_sync(self.__vs.areindex(index_name))
802
+
803
+ async def adrop_vector_index(
804
+ self,
805
+ index_name: Optional[str] = None,
806
+ ) -> None:
807
+ """Drop the vector index."""
808
+ return await self._engine._run_as_async(
809
+ self.__vs.adrop_vector_index(index_name)
810
+ )
811
+
812
+ def drop_vector_index(
813
+ self,
814
+ index_name: Optional[str] = None,
815
+ ) -> None:
816
+ """Drop the vector index."""
817
+ return self._engine._run_as_sync(self.__vs.adrop_vector_index(index_name))
818
+
819
+ async def ais_valid_index(
820
+ self,
821
+ index_name: Optional[str] = None,
822
+ ) -> bool:
823
+ """Check if index exists in the table."""
824
+ return await self._engine._run_as_async(self.__vs.is_valid_index(index_name))
825
+
826
+ def is_valid_index(
827
+ self,
828
+ index_name: Optional[str] = None,
829
+ ) -> bool:
830
+ """Check if index exists in the table."""
831
+ return self._engine._run_as_sync(self.__vs.is_valid_index(index_name))
832
+
833
+ async def aget_by_ids(self, ids: Sequence[str]) -> list[Document]:
834
+ """Get documents by ids."""
835
+ return await self._engine._run_as_async(self.__vs.aget_by_ids(ids=ids))
836
+
837
+ def get_by_ids(self, ids: Sequence[str]) -> list[Document]:
838
+ """Get documents by ids."""
839
+ return self._engine._run_as_sync(self.__vs.aget_by_ids(ids=ids))
840
+
841
+ def get_table_name(self) -> str:
842
+ return self.__vs.table_name