langchain-postgres 0.0.14rc1__py3-none-any.whl → 0.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_postgres/__init__.py +1 -1
- langchain_postgres/chat_message_histories.py +1 -0
- langchain_postgres/utils/pgvector_migrator.py +1 -1
- langchain_postgres/v2/async_vectorstore.py +190 -47
- langchain_postgres/v2/engine.py +63 -4
- langchain_postgres/v2/hybrid_search_config.py +149 -0
- langchain_postgres/v2/vectorstores.py +19 -0
- langchain_postgres/vectorstores.py +1 -8
- {langchain_postgres-0.0.14rc1.dist-info → langchain_postgres-0.0.15.dist-info}/METADATA +22 -29
- langchain_postgres-0.0.15.dist-info/RECORD +17 -0
- {langchain_postgres-0.0.14rc1.dist-info → langchain_postgres-0.0.15.dist-info}/WHEEL +1 -1
- langchain_postgres-0.0.14rc1.dist-info/RECORD +0 -16
- {langchain_postgres-0.0.14rc1.dist-info → langchain_postgres-0.0.15.dist-info/licenses}/LICENSE +0 -0
langchain_postgres/__init__.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
from importlib import metadata
|
2
2
|
|
3
3
|
from langchain_postgres.chat_message_histories import PostgresChatMessageHistory
|
4
|
-
from langchain_postgres.v2.engine import Column, PGEngine, ColumnDict
|
5
4
|
from langchain_postgres.translator import PGVectorTranslator
|
5
|
+
from langchain_postgres.v2.engine import Column, ColumnDict, PGEngine
|
6
6
|
from langchain_postgres.v2.vectorstores import PGVectorStore
|
7
7
|
from langchain_postgres.vectorstores import PGVector
|
8
8
|
|
@@ -68,7 +68,7 @@ async def __aextract_pgvector_collection(
|
|
68
68
|
if not rows:
|
69
69
|
break
|
70
70
|
yield [row._mapping for row in rows]
|
71
|
-
except ValueError
|
71
|
+
except ValueError:
|
72
72
|
raise ValueError(f"Collection, {collection_name} does not exist.")
|
73
73
|
except SQLAlchemyError as e:
|
74
74
|
raise ProgrammingError(
|
@@ -14,6 +14,7 @@ from sqlalchemy import RowMapping, text
|
|
14
14
|
from sqlalchemy.ext.asyncio import AsyncEngine
|
15
15
|
|
16
16
|
from .engine import PGEngine
|
17
|
+
from .hybrid_search_config import HybridSearchConfig
|
17
18
|
from .indexes import (
|
18
19
|
DEFAULT_DISTANCE_STRATEGY,
|
19
20
|
DEFAULT_INDEX_NAME_SUFFIX,
|
@@ -77,6 +78,7 @@ class AsyncPGVectorStore(VectorStore):
|
|
77
78
|
fetch_k: int = 20,
|
78
79
|
lambda_mult: float = 0.5,
|
79
80
|
index_query_options: Optional[QueryOptions] = None,
|
81
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
80
82
|
):
|
81
83
|
"""AsyncPGVectorStore constructor.
|
82
84
|
Args:
|
@@ -95,6 +97,7 @@ class AsyncPGVectorStore(VectorStore):
|
|
95
97
|
fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
|
96
98
|
lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
97
99
|
index_query_options (QueryOptions): Index query option.
|
100
|
+
hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
|
98
101
|
|
99
102
|
|
100
103
|
Raises:
|
@@ -119,6 +122,7 @@ class AsyncPGVectorStore(VectorStore):
|
|
119
122
|
self.fetch_k = fetch_k
|
120
123
|
self.lambda_mult = lambda_mult
|
121
124
|
self.index_query_options = index_query_options
|
125
|
+
self.hybrid_search_config = hybrid_search_config
|
122
126
|
|
123
127
|
@classmethod
|
124
128
|
async def create(
|
@@ -139,6 +143,7 @@ class AsyncPGVectorStore(VectorStore):
|
|
139
143
|
fetch_k: int = 20,
|
140
144
|
lambda_mult: float = 0.5,
|
141
145
|
index_query_options: Optional[QueryOptions] = None,
|
146
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
142
147
|
) -> AsyncPGVectorStore:
|
143
148
|
"""Create an AsyncPGVectorStore instance.
|
144
149
|
|
@@ -158,6 +163,7 @@ class AsyncPGVectorStore(VectorStore):
|
|
158
163
|
fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
|
159
164
|
lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
160
165
|
index_query_options (QueryOptions): Index query option.
|
166
|
+
hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
|
161
167
|
|
162
168
|
Returns:
|
163
169
|
AsyncPGVectorStore
|
@@ -193,6 +199,15 @@ class AsyncPGVectorStore(VectorStore):
|
|
193
199
|
raise ValueError(
|
194
200
|
f"Content column, {content_column}, is type, {content_type}. It must be a type of character string."
|
195
201
|
)
|
202
|
+
if hybrid_search_config:
|
203
|
+
tsv_column_name = (
|
204
|
+
hybrid_search_config.tsv_column
|
205
|
+
if hybrid_search_config.tsv_column
|
206
|
+
else content_column + "_tsv"
|
207
|
+
)
|
208
|
+
if tsv_column_name not in columns or columns[tsv_column_name] != "tsvector":
|
209
|
+
# mark tsv_column as empty because there is no TSV column in table
|
210
|
+
hybrid_search_config.tsv_column = ""
|
196
211
|
if embedding_column not in columns:
|
197
212
|
raise ValueError(f"Embedding column, {embedding_column}, does not exist.")
|
198
213
|
if columns[embedding_column] != "USER-DEFINED":
|
@@ -236,6 +251,7 @@ class AsyncPGVectorStore(VectorStore):
|
|
236
251
|
fetch_k=fetch_k,
|
237
252
|
lambda_mult=lambda_mult,
|
238
253
|
index_query_options=index_query_options,
|
254
|
+
hybrid_search_config=hybrid_search_config,
|
239
255
|
)
|
240
256
|
|
241
257
|
@property
|
@@ -273,17 +289,30 @@ class AsyncPGVectorStore(VectorStore):
|
|
273
289
|
if len(self.metadata_columns) > 0
|
274
290
|
else ""
|
275
291
|
)
|
276
|
-
|
292
|
+
hybrid_search_column = (
|
293
|
+
f', "{self.hybrid_search_config.tsv_column}"'
|
294
|
+
if self.hybrid_search_config and self.hybrid_search_config.tsv_column
|
295
|
+
else ""
|
296
|
+
)
|
297
|
+
insert_stmt = f'INSERT INTO "{self.schema_name}"."{self.table_name}"("{self.id_column}", "{self.content_column}", "{self.embedding_column}"{hybrid_search_column}{metadata_col_names}'
|
277
298
|
values = {
|
278
|
-
"
|
299
|
+
"langchain_id": id,
|
279
300
|
"content": content,
|
280
301
|
"embedding": str([float(dimension) for dimension in embedding]),
|
281
302
|
}
|
282
|
-
values_stmt = "VALUES (:
|
303
|
+
values_stmt = "VALUES (:langchain_id, :content, :embedding"
|
283
304
|
|
284
305
|
if not embedding and can_inline_embed:
|
285
|
-
values_stmt = f"VALUES (:
|
306
|
+
values_stmt = f"VALUES (:langchain_id, :content, {self.embedding_service.embed_query_inline(content)}" # type: ignore
|
286
307
|
|
308
|
+
if self.hybrid_search_config and self.hybrid_search_config.tsv_column:
|
309
|
+
lang = (
|
310
|
+
f"'{self.hybrid_search_config.tsv_lang}',"
|
311
|
+
if self.hybrid_search_config.tsv_lang
|
312
|
+
else ""
|
313
|
+
)
|
314
|
+
values_stmt += f", to_tsvector({lang} :tsv_content)"
|
315
|
+
values["tsv_content"] = content
|
287
316
|
# Add metadata
|
288
317
|
extra = copy.deepcopy(metadata)
|
289
318
|
for metadata_column in self.metadata_columns:
|
@@ -308,6 +337,9 @@ class AsyncPGVectorStore(VectorStore):
|
|
308
337
|
|
309
338
|
upsert_stmt = f' ON CONFLICT ("{self.id_column}") DO UPDATE SET "{self.content_column}" = EXCLUDED."{self.content_column}", "{self.embedding_column}" = EXCLUDED."{self.embedding_column}"'
|
310
339
|
|
340
|
+
if self.hybrid_search_config and self.hybrid_search_config.tsv_column:
|
341
|
+
upsert_stmt += f', "{self.hybrid_search_config.tsv_column}" = EXCLUDED."{self.hybrid_search_config.tsv_column}"'
|
342
|
+
|
311
343
|
if self.metadata_json_column:
|
312
344
|
upsert_stmt += f', "{self.metadata_json_column}" = EXCLUDED."{self.metadata_json_column}"'
|
313
345
|
|
@@ -408,6 +440,7 @@ class AsyncPGVectorStore(VectorStore):
|
|
408
440
|
fetch_k: int = 20,
|
409
441
|
lambda_mult: float = 0.5,
|
410
442
|
index_query_options: Optional[QueryOptions] = None,
|
443
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
411
444
|
**kwargs: Any,
|
412
445
|
) -> AsyncPGVectorStore:
|
413
446
|
"""Create an AsyncPGVectorStore instance from texts.
|
@@ -453,6 +486,7 @@ class AsyncPGVectorStore(VectorStore):
|
|
453
486
|
fetch_k=fetch_k,
|
454
487
|
lambda_mult=lambda_mult,
|
455
488
|
index_query_options=index_query_options,
|
489
|
+
hybrid_search_config=hybrid_search_config,
|
456
490
|
)
|
457
491
|
await vs.aadd_texts(texts, metadatas=metadatas, ids=ids, **kwargs)
|
458
492
|
return vs
|
@@ -478,6 +512,7 @@ class AsyncPGVectorStore(VectorStore):
|
|
478
512
|
fetch_k: int = 20,
|
479
513
|
lambda_mult: float = 0.5,
|
480
514
|
index_query_options: Optional[QueryOptions] = None,
|
515
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
481
516
|
**kwargs: Any,
|
482
517
|
) -> AsyncPGVectorStore:
|
483
518
|
"""Create an AsyncPGVectorStore instance from documents.
|
@@ -524,6 +559,7 @@ class AsyncPGVectorStore(VectorStore):
|
|
524
559
|
fetch_k=fetch_k,
|
525
560
|
lambda_mult=lambda_mult,
|
526
561
|
index_query_options=index_query_options,
|
562
|
+
hybrid_search_config=hybrid_search_config,
|
527
563
|
)
|
528
564
|
texts = [doc.page_content for doc in documents]
|
529
565
|
metadatas = [doc.metadata for doc in documents]
|
@@ -538,16 +574,30 @@ class AsyncPGVectorStore(VectorStore):
|
|
538
574
|
filter: Optional[dict] = None,
|
539
575
|
**kwargs: Any,
|
540
576
|
) -> Sequence[RowMapping]:
|
541
|
-
"""
|
542
|
-
|
577
|
+
"""
|
578
|
+
Perform similarity search (or hybrid search) query on database.
|
579
|
+
Queries might be slow if the hybrid search column does not exist.
|
580
|
+
For best hybrid search performance, consider creating a TSV column
|
581
|
+
and adding GIN index.
|
582
|
+
"""
|
583
|
+
if not k:
|
584
|
+
k = (
|
585
|
+
max(
|
586
|
+
self.k,
|
587
|
+
self.hybrid_search_config.primary_top_k,
|
588
|
+
self.hybrid_search_config.secondary_top_k,
|
589
|
+
)
|
590
|
+
if self.hybrid_search_config
|
591
|
+
else self.k
|
592
|
+
)
|
543
593
|
operator = self.distance_strategy.operator
|
544
594
|
search_function = self.distance_strategy.search_function
|
545
595
|
|
546
|
-
columns =
|
596
|
+
columns = [
|
547
597
|
self.id_column,
|
548
598
|
self.content_column,
|
549
599
|
self.embedding_column,
|
550
|
-
]
|
600
|
+
] + self.metadata_columns
|
551
601
|
if self.metadata_json_column:
|
552
602
|
columns.append(self.metadata_json_column)
|
553
603
|
|
@@ -557,14 +607,17 @@ class AsyncPGVectorStore(VectorStore):
|
|
557
607
|
filter_dict = None
|
558
608
|
if filter and isinstance(filter, dict):
|
559
609
|
safe_filter, filter_dict = self._create_filter_clause(filter)
|
560
|
-
|
610
|
+
|
561
611
|
inline_embed_func = getattr(self.embedding_service, "embed_query_inline", None)
|
562
612
|
if not embedding and callable(inline_embed_func) and "query" in kwargs:
|
563
613
|
query_embedding = self.embedding_service.embed_query_inline(kwargs["query"]) # type: ignore
|
614
|
+
embedding_data_string = f"{query_embedding}"
|
564
615
|
else:
|
565
616
|
query_embedding = f"{[float(dimension) for dimension in embedding]}"
|
566
|
-
|
567
|
-
|
617
|
+
embedding_data_string = ":query_embedding"
|
618
|
+
where_filters = f"WHERE {safe_filter}" if safe_filter else ""
|
619
|
+
dense_query_stmt = f"""SELECT {column_names}, {search_function}("{self.embedding_column}", {embedding_data_string}) as distance
|
620
|
+
FROM "{self.schema_name}"."{self.table_name}" {where_filters} ORDER BY "{self.embedding_column}" {operator} {embedding_data_string} LIMIT :k;
|
568
621
|
"""
|
569
622
|
param_dict = {"query_embedding": query_embedding, "k": k}
|
570
623
|
if filter_dict:
|
@@ -575,15 +628,51 @@ class AsyncPGVectorStore(VectorStore):
|
|
575
628
|
for query_option in self.index_query_options.to_parameter():
|
576
629
|
query_options_stmt = f"SET LOCAL {query_option};"
|
577
630
|
await conn.execute(text(query_options_stmt))
|
578
|
-
result = await conn.execute(text(
|
631
|
+
result = await conn.execute(text(dense_query_stmt), param_dict)
|
579
632
|
result_map = result.mappings()
|
580
|
-
|
633
|
+
dense_results = result_map.fetchall()
|
581
634
|
else:
|
582
635
|
async with self.engine.connect() as conn:
|
583
|
-
result = await conn.execute(text(
|
636
|
+
result = await conn.execute(text(dense_query_stmt), param_dict)
|
637
|
+
result_map = result.mappings()
|
638
|
+
dense_results = result_map.fetchall()
|
639
|
+
|
640
|
+
hybrid_search_config = kwargs.get(
|
641
|
+
"hybrid_search_config", self.hybrid_search_config
|
642
|
+
)
|
643
|
+
fts_query = (
|
644
|
+
hybrid_search_config.fts_query
|
645
|
+
if hybrid_search_config and hybrid_search_config.fts_query
|
646
|
+
else kwargs.get("fts_query", "")
|
647
|
+
)
|
648
|
+
if hybrid_search_config and fts_query:
|
649
|
+
hybrid_search_config.fusion_function_parameters["fetch_top_k"] = k
|
650
|
+
# do the sparse query
|
651
|
+
lang = (
|
652
|
+
f"'{hybrid_search_config.tsv_lang}',"
|
653
|
+
if hybrid_search_config.tsv_lang
|
654
|
+
else ""
|
655
|
+
)
|
656
|
+
query_tsv = f"plainto_tsquery({lang} :fts_query)"
|
657
|
+
param_dict["fts_query"] = fts_query
|
658
|
+
if hybrid_search_config.tsv_column:
|
659
|
+
content_tsv = f'"{hybrid_search_config.tsv_column}"'
|
660
|
+
else:
|
661
|
+
content_tsv = f'to_tsvector({lang} "{self.content_column}")'
|
662
|
+
and_filters = f"AND ({safe_filter})" if safe_filter else ""
|
663
|
+
sparse_query_stmt = f'SELECT {column_names}, ts_rank_cd({content_tsv}, {query_tsv}) as distance FROM "{self.schema_name}"."{self.table_name}" WHERE {content_tsv} @@ {query_tsv} {and_filters} ORDER BY distance desc LIMIT {hybrid_search_config.secondary_top_k};'
|
664
|
+
async with self.engine.connect() as conn:
|
665
|
+
result = await conn.execute(text(sparse_query_stmt), param_dict)
|
584
666
|
result_map = result.mappings()
|
585
|
-
|
586
|
-
|
667
|
+
sparse_results = result_map.fetchall()
|
668
|
+
|
669
|
+
combined_results = hybrid_search_config.fusion_function(
|
670
|
+
dense_results,
|
671
|
+
sparse_results,
|
672
|
+
**hybrid_search_config.fusion_function_parameters,
|
673
|
+
)
|
674
|
+
return combined_results
|
675
|
+
return dense_results
|
587
676
|
|
588
677
|
async def asimilarity_search(
|
589
678
|
self,
|
@@ -601,6 +690,14 @@ class AsyncPGVectorStore(VectorStore):
|
|
601
690
|
)
|
602
691
|
kwargs["query"] = query
|
603
692
|
|
693
|
+
# add fts_query to hybrid_search_config
|
694
|
+
hybrid_search_config = kwargs.get(
|
695
|
+
"hybrid_search_config", self.hybrid_search_config
|
696
|
+
)
|
697
|
+
if hybrid_search_config and not hybrid_search_config.fts_query:
|
698
|
+
hybrid_search_config.fts_query = query
|
699
|
+
kwargs["hybrid_search_config"] = hybrid_search_config
|
700
|
+
|
604
701
|
return await self.asimilarity_search_by_vector(
|
605
702
|
embedding=embedding, k=k, filter=filter, **kwargs
|
606
703
|
)
|
@@ -632,6 +729,14 @@ class AsyncPGVectorStore(VectorStore):
|
|
632
729
|
)
|
633
730
|
kwargs["query"] = query
|
634
731
|
|
732
|
+
# add fts_query to hybrid_search_config
|
733
|
+
hybrid_search_config = kwargs.get(
|
734
|
+
"hybrid_search_config", self.hybrid_search_config
|
735
|
+
)
|
736
|
+
if hybrid_search_config and not hybrid_search_config.fts_query:
|
737
|
+
hybrid_search_config.fts_query = query
|
738
|
+
kwargs["hybrid_search_config"] = hybrid_search_config
|
739
|
+
|
635
740
|
docs = await self.asimilarity_search_with_score_by_vector(
|
636
741
|
embedding=embedding, k=k, filter=filter, **kwargs
|
637
742
|
)
|
@@ -776,6 +881,41 @@ class AsyncPGVectorStore(VectorStore):
|
|
776
881
|
|
777
882
|
return [r for i, r in enumerate(documents_with_scores) if i in mmr_selected]
|
778
883
|
|
884
|
+
async def aapply_hybrid_search_index(
|
885
|
+
self,
|
886
|
+
concurrently: bool = False,
|
887
|
+
) -> None:
|
888
|
+
"""Creates a TSV index in the vector store table if possible."""
|
889
|
+
if (
|
890
|
+
not self.hybrid_search_config
|
891
|
+
or not self.hybrid_search_config.index_type
|
892
|
+
or not self.hybrid_search_config.index_name
|
893
|
+
):
|
894
|
+
# no index needs to be created
|
895
|
+
raise ValueError("Hybrid Search Config cannot create index.")
|
896
|
+
|
897
|
+
lang = (
|
898
|
+
f"'{self.hybrid_search_config.tsv_lang}',"
|
899
|
+
if self.hybrid_search_config.tsv_lang
|
900
|
+
else ""
|
901
|
+
)
|
902
|
+
tsv_column_name = (
|
903
|
+
self.hybrid_search_config.tsv_column
|
904
|
+
if self.hybrid_search_config.tsv_column
|
905
|
+
else f"to_tsvector({lang} {self.content_column})"
|
906
|
+
)
|
907
|
+
tsv_index_query = f'CREATE INDEX {"CONCURRENTLY" if concurrently else ""} {self.hybrid_search_config.index_name} ON "{self.schema_name}"."{self.table_name}" USING {self.hybrid_search_config.index_type}({tsv_column_name});'
|
908
|
+
if concurrently:
|
909
|
+
async with self.engine.connect() as conn:
|
910
|
+
autocommit_conn = await conn.execution_options(
|
911
|
+
isolation_level="AUTOCOMMIT"
|
912
|
+
)
|
913
|
+
await autocommit_conn.execute(text(tsv_index_query))
|
914
|
+
else:
|
915
|
+
async with self.engine.connect() as conn:
|
916
|
+
await conn.execute(text(tsv_index_query))
|
917
|
+
await conn.commit()
|
918
|
+
|
779
919
|
async def aapply_vector_index(
|
780
920
|
self,
|
781
921
|
index: BaseIndex,
|
@@ -800,10 +940,11 @@ class AsyncPGVectorStore(VectorStore):
|
|
800
940
|
filter = f"WHERE ({index.partial_indexes})" if index.partial_indexes else ""
|
801
941
|
params = "WITH " + index.index_options()
|
802
942
|
if name is None:
|
803
|
-
if index.name
|
943
|
+
if index.name is None:
|
804
944
|
index.name = self.table_name + DEFAULT_INDEX_NAME_SUFFIX
|
805
945
|
name = index.name
|
806
946
|
stmt = f'CREATE INDEX {"CONCURRENTLY" if concurrently else ""} "{name}" ON "{self.schema_name}"."{self.table_name}" USING {index.index_type} ({self.embedding_column} {function}) {params} {filter};'
|
947
|
+
|
807
948
|
if concurrently:
|
808
949
|
async with self.engine.connect() as conn:
|
809
950
|
autocommit_conn = await conn.execution_options(
|
@@ -954,46 +1095,48 @@ class AsyncPGVectorStore(VectorStore):
|
|
954
1095
|
operator = "$eq"
|
955
1096
|
filter_value = value
|
956
1097
|
|
1098
|
+
suffix_id = str(uuid.uuid4()).split("-")[0]
|
957
1099
|
if operator in COMPARISONS_TO_NATIVE:
|
958
1100
|
# Then we implement an equality filter
|
959
1101
|
# native is trusted input
|
960
1102
|
native = COMPARISONS_TO_NATIVE[operator]
|
961
|
-
|
962
|
-
return f"{field} {native} :{
|
1103
|
+
param_name = f"{field}_{suffix_id}"
|
1104
|
+
return f"{field} {native} :{param_name}", {f"{param_name}": filter_value}
|
963
1105
|
elif operator == "$between":
|
964
1106
|
# Use AND with two comparisons
|
965
1107
|
low, high = filter_value
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
f"{
|
1108
|
+
low_param_name = f"{field}_low_{suffix_id}"
|
1109
|
+
high_param_name = f"{field}_high_{suffix_id}"
|
1110
|
+
return f"({field} BETWEEN :{low_param_name} AND :{high_param_name})", {
|
1111
|
+
f"{low_param_name}": low,
|
1112
|
+
f"{high_param_name}": high,
|
970
1113
|
}
|
971
|
-
elif operator in {"$in", "$nin"
|
1114
|
+
elif operator in {"$in", "$nin"}:
|
972
1115
|
# We'll do force coercion to text
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
elif operator in {"$like"}:
|
990
|
-
return f"({field} LIKE :{field}_like)", {f"{field}_like": filter_value}
|
991
|
-
elif operator in {"$ilike"}:
|
992
|
-
return f"({field} ILIKE :{field}_ilike)", {
|
993
|
-
f"{field}_ilike": filter_value
|
1116
|
+
for val in filter_value:
|
1117
|
+
if not isinstance(val, (str, int, float)):
|
1118
|
+
raise NotImplementedError(
|
1119
|
+
f"Unsupported type: {type(val)} for value: {val}"
|
1120
|
+
)
|
1121
|
+
|
1122
|
+
if isinstance(val, bool): # b/c bool is an instance of int
|
1123
|
+
raise NotImplementedError(
|
1124
|
+
f"Unsupported type: {type(val)} for value: {val}"
|
1125
|
+
)
|
1126
|
+
param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}"
|
1127
|
+
if operator == "$in":
|
1128
|
+
return f"{field} = ANY(:{param_name})", {f"{param_name}": filter_value}
|
1129
|
+
else: # i.e. $nin
|
1130
|
+
return f"{field} <> ALL (:{param_name})", {
|
1131
|
+
f"{param_name}": filter_value
|
994
1132
|
}
|
995
|
-
|
996
|
-
|
1133
|
+
|
1134
|
+
elif operator in {"$like", "$ilike"}:
|
1135
|
+
param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}"
|
1136
|
+
if operator == "$like":
|
1137
|
+
return f"({field} LIKE :{param_name})", {f"{param_name}": filter_value}
|
1138
|
+
else: # i.e. $ilike
|
1139
|
+
return f"({field} ILIKE :{param_name})", {f"{param_name}": filter_value}
|
997
1140
|
elif operator == "$exists":
|
998
1141
|
if not isinstance(filter_value, bool):
|
999
1142
|
raise ValueError(
|
langchain_postgres/v2/engine.py
CHANGED
@@ -3,14 +3,13 @@ from __future__ import annotations
|
|
3
3
|
import asyncio
|
4
4
|
from dataclasses import dataclass
|
5
5
|
from threading import Thread
|
6
|
-
from typing import
|
6
|
+
from typing import Any, Awaitable, Optional, TypedDict, TypeVar, Union
|
7
7
|
|
8
8
|
from sqlalchemy import text
|
9
9
|
from sqlalchemy.engine import URL
|
10
10
|
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
|
11
11
|
|
12
|
-
|
13
|
-
import asyncpg # type: ignore
|
12
|
+
from .hybrid_search_config import HybridSearchConfig
|
14
13
|
|
15
14
|
T = TypeVar("T")
|
16
15
|
|
@@ -159,6 +158,7 @@ class PGEngine:
|
|
159
158
|
id_column: Union[str, Column, ColumnDict] = "langchain_id",
|
160
159
|
overwrite_existing: bool = False,
|
161
160
|
store_metadata: bool = True,
|
161
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
162
162
|
) -> None:
|
163
163
|
"""
|
164
164
|
Create a table for saving of vectors to be used with PGVectorStore.
|
@@ -181,6 +181,8 @@ class PGEngine:
|
|
181
181
|
overwrite_existing (bool): Whether to drop existing table. Default: False.
|
182
182
|
store_metadata (bool): Whether to store metadata in the table.
|
183
183
|
Default: True.
|
184
|
+
hybrid_search_config (HybridSearchConfig): Hybrid search configuration.
|
185
|
+
Default: None.
|
184
186
|
|
185
187
|
Raises:
|
186
188
|
:class:`DuplicateTableError <asyncpg.exceptions.DuplicateTableError>`: if table already exists.
|
@@ -189,6 +191,7 @@ class PGEngine:
|
|
189
191
|
|
190
192
|
schema_name = self._escape_postgres_identifier(schema_name)
|
191
193
|
table_name = self._escape_postgres_identifier(table_name)
|
194
|
+
hybrid_search_default_column_name = content_column + "_tsv"
|
192
195
|
content_column = self._escape_postgres_identifier(content_column)
|
193
196
|
embedding_column = self._escape_postgres_identifier(embedding_column)
|
194
197
|
if metadata_columns is None:
|
@@ -229,10 +232,22 @@ class PGEngine:
|
|
229
232
|
id_data_type = id_column["data_type"]
|
230
233
|
id_column_name = id_column["name"]
|
231
234
|
|
235
|
+
hybrid_search_column = "" # Default is no TSV column for hybrid search
|
236
|
+
if hybrid_search_config:
|
237
|
+
hybrid_search_column_name = (
|
238
|
+
hybrid_search_config.tsv_column or hybrid_search_default_column_name
|
239
|
+
)
|
240
|
+
hybrid_search_column_name = self._escape_postgres_identifier(
|
241
|
+
hybrid_search_column_name
|
242
|
+
)
|
243
|
+
hybrid_search_config.tsv_column = hybrid_search_column_name
|
244
|
+
hybrid_search_column = f',"{self._escape_postgres_identifier(hybrid_search_column_name)}" TSVECTOR NOT NULL'
|
245
|
+
|
232
246
|
query = f"""CREATE TABLE "{schema_name}"."{table_name}"(
|
233
247
|
"{id_column_name}" {id_data_type} PRIMARY KEY,
|
234
248
|
"{content_column}" TEXT NOT NULL,
|
235
|
-
"{embedding_column}" vector({vector_size}) NOT NULL
|
249
|
+
"{embedding_column}" vector({vector_size}) NOT NULL
|
250
|
+
{hybrid_search_column}"""
|
236
251
|
for column in metadata_columns:
|
237
252
|
if isinstance(column, Column):
|
238
253
|
nullable = "NOT NULL" if not column.nullable else ""
|
@@ -261,6 +276,7 @@ class PGEngine:
|
|
261
276
|
id_column: Union[str, Column, ColumnDict] = "langchain_id",
|
262
277
|
overwrite_existing: bool = False,
|
263
278
|
store_metadata: bool = True,
|
279
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
264
280
|
) -> None:
|
265
281
|
"""
|
266
282
|
Create a table for saving of vectors to be used with PGVectorStore.
|
@@ -283,6 +299,10 @@ class PGEngine:
|
|
283
299
|
overwrite_existing (bool): Whether to drop existing table. Default: False.
|
284
300
|
store_metadata (bool): Whether to store metadata in the table.
|
285
301
|
Default: True.
|
302
|
+
hybrid_search_config (HybridSearchConfig): Hybrid search configuration.
|
303
|
+
Note that queries might be slow if the hybrid search column does not exist.
|
304
|
+
For best hybrid search performance, consider creating a TSV column and adding GIN index.
|
305
|
+
Default: None.
|
286
306
|
"""
|
287
307
|
await self._run_as_async(
|
288
308
|
self._ainit_vectorstore_table(
|
@@ -296,6 +316,7 @@ class PGEngine:
|
|
296
316
|
id_column=id_column,
|
297
317
|
overwrite_existing=overwrite_existing,
|
298
318
|
store_metadata=store_metadata,
|
319
|
+
hybrid_search_config=hybrid_search_config,
|
299
320
|
)
|
300
321
|
)
|
301
322
|
|
@@ -312,6 +333,7 @@ class PGEngine:
|
|
312
333
|
id_column: Union[str, Column, ColumnDict] = "langchain_id",
|
313
334
|
overwrite_existing: bool = False,
|
314
335
|
store_metadata: bool = True,
|
336
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
315
337
|
) -> None:
|
316
338
|
"""
|
317
339
|
Create a table for saving of vectors to be used with PGVectorStore.
|
@@ -334,6 +356,10 @@ class PGEngine:
|
|
334
356
|
overwrite_existing (bool): Whether to drop existing table. Default: False.
|
335
357
|
store_metadata (bool): Whether to store metadata in the table.
|
336
358
|
Default: True.
|
359
|
+
hybrid_search_config (HybridSearchConfig): Hybrid search configuration.
|
360
|
+
Note that queries might be slow if the hybrid search column does not exist.
|
361
|
+
For best hybrid search performance, consider creating a TSV column and adding GIN index.
|
362
|
+
Default: None.
|
337
363
|
"""
|
338
364
|
self._run_as_sync(
|
339
365
|
self._ainit_vectorstore_table(
|
@@ -347,5 +373,38 @@ class PGEngine:
|
|
347
373
|
id_column=id_column,
|
348
374
|
overwrite_existing=overwrite_existing,
|
349
375
|
store_metadata=store_metadata,
|
376
|
+
hybrid_search_config=hybrid_search_config,
|
350
377
|
)
|
351
378
|
)
|
379
|
+
|
380
|
+
async def _adrop_table(
|
381
|
+
self,
|
382
|
+
table_name: str,
|
383
|
+
*,
|
384
|
+
schema_name: str = "public",
|
385
|
+
) -> None:
|
386
|
+
"""Drop the vector store table"""
|
387
|
+
query = f'DROP TABLE IF EXISTS "{schema_name}"."{table_name}";'
|
388
|
+
async with self._pool.connect() as conn:
|
389
|
+
await conn.execute(text(query))
|
390
|
+
await conn.commit()
|
391
|
+
|
392
|
+
async def adrop_table(
|
393
|
+
self,
|
394
|
+
table_name: str,
|
395
|
+
*,
|
396
|
+
schema_name: str = "public",
|
397
|
+
) -> None:
|
398
|
+
await self._run_as_async(
|
399
|
+
self._adrop_table(table_name=table_name, schema_name=schema_name)
|
400
|
+
)
|
401
|
+
|
402
|
+
def drop_table(
|
403
|
+
self,
|
404
|
+
table_name: str,
|
405
|
+
*,
|
406
|
+
schema_name: str = "public",
|
407
|
+
) -> None:
|
408
|
+
self._run_as_sync(
|
409
|
+
self._adrop_table(table_name=table_name, schema_name=schema_name)
|
410
|
+
)
|
@@ -0,0 +1,149 @@
|
|
1
|
+
from abc import ABC
|
2
|
+
from dataclasses import dataclass, field
|
3
|
+
from typing import Any, Callable, Optional, Sequence
|
4
|
+
|
5
|
+
from sqlalchemy import RowMapping
|
6
|
+
|
7
|
+
|
8
|
+
def weighted_sum_ranking(
|
9
|
+
primary_search_results: Sequence[RowMapping],
|
10
|
+
secondary_search_results: Sequence[RowMapping],
|
11
|
+
primary_results_weight: float = 0.5,
|
12
|
+
secondary_results_weight: float = 0.5,
|
13
|
+
fetch_top_k: int = 4,
|
14
|
+
) -> Sequence[dict[str, Any]]:
|
15
|
+
"""
|
16
|
+
Ranks documents using a weighted sum of scores from two sources.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
primary_search_results: A list of (document, distance) tuples from
|
20
|
+
the primary search.
|
21
|
+
secondary_search_results: A list of (document, distance) tuples from
|
22
|
+
the secondary search.
|
23
|
+
primary_results_weight: The weight for the primary source's scores.
|
24
|
+
Defaults to 0.5.
|
25
|
+
secondary_results_weight: The weight for the secondary source's scores.
|
26
|
+
Defaults to 0.5.
|
27
|
+
fetch_top_k: The number of documents to fetch after merging the results.
|
28
|
+
Defaults to 4.
|
29
|
+
|
30
|
+
Returns:
|
31
|
+
A list of (document, distance) tuples, sorted by weighted_score in
|
32
|
+
descending order.
|
33
|
+
"""
|
34
|
+
|
35
|
+
# stores computed metric with provided distance metric and weights
|
36
|
+
weighted_scores: dict[str, dict[str, Any]] = {}
|
37
|
+
|
38
|
+
# Process results from primary source
|
39
|
+
for row in primary_search_results:
|
40
|
+
values = list(row.values())
|
41
|
+
doc_id = str(values[0]) # first value is doc_id
|
42
|
+
distance = float(values[-1]) # type: ignore # last value is distance
|
43
|
+
row_values = dict(row)
|
44
|
+
row_values["distance"] = primary_results_weight * distance
|
45
|
+
weighted_scores[doc_id] = row_values
|
46
|
+
|
47
|
+
# Process results from secondary source,
|
48
|
+
# adding to existing scores or creating new ones
|
49
|
+
for row in secondary_search_results:
|
50
|
+
values = list(row.values())
|
51
|
+
doc_id = str(values[0]) # first value is doc_id
|
52
|
+
distance = float(values[-1]) # type: ignore # last value is distance
|
53
|
+
primary_score = (
|
54
|
+
weighted_scores[doc_id]["distance"] if doc_id in weighted_scores else 0.0
|
55
|
+
)
|
56
|
+
row_values = dict(row)
|
57
|
+
row_values["distance"] = distance * secondary_results_weight + primary_score
|
58
|
+
weighted_scores[doc_id] = row_values
|
59
|
+
|
60
|
+
# Sort the results by weighted score in descending order
|
61
|
+
ranked_results = sorted(
|
62
|
+
weighted_scores.values(), key=lambda item: item["distance"], reverse=True
|
63
|
+
)
|
64
|
+
return ranked_results[:fetch_top_k]
|
65
|
+
|
66
|
+
|
67
|
+
def reciprocal_rank_fusion(
|
68
|
+
primary_search_results: Sequence[RowMapping],
|
69
|
+
secondary_search_results: Sequence[RowMapping],
|
70
|
+
rrf_k: float = 60,
|
71
|
+
fetch_top_k: int = 4,
|
72
|
+
) -> Sequence[dict[str, Any]]:
|
73
|
+
"""
|
74
|
+
Ranks documents using Reciprocal Rank Fusion (RRF) of scores from two sources.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
primary_search_results: A list of (document, distance) tuples from
|
78
|
+
the primary search.
|
79
|
+
secondary_search_results: A list of (document, distance) tuples from
|
80
|
+
the secondary search.
|
81
|
+
rrf_k: The RRF parameter k.
|
82
|
+
Defaults to 60.
|
83
|
+
fetch_top_k: The number of documents to fetch after merging the results.
|
84
|
+
Defaults to 4.
|
85
|
+
|
86
|
+
Returns:
|
87
|
+
A list of (document_id, rrf_score) tuples, sorted by rrf_score
|
88
|
+
in descending order.
|
89
|
+
"""
|
90
|
+
rrf_scores: dict[str, dict[str, Any]] = {}
|
91
|
+
|
92
|
+
# Process results from primary source
|
93
|
+
for rank, row in enumerate(
|
94
|
+
sorted(primary_search_results, key=lambda item: item["distance"], reverse=True)
|
95
|
+
):
|
96
|
+
values = list(row.values())
|
97
|
+
doc_id = str(values[0])
|
98
|
+
row_values = dict(row)
|
99
|
+
primary_score = rrf_scores[doc_id]["distance"] if doc_id in rrf_scores else 0.0
|
100
|
+
primary_score += 1.0 / (rank + rrf_k)
|
101
|
+
row_values["distance"] = primary_score
|
102
|
+
rrf_scores[doc_id] = row_values
|
103
|
+
|
104
|
+
# Process results from secondary source
|
105
|
+
for rank, row in enumerate(
|
106
|
+
sorted(
|
107
|
+
secondary_search_results, key=lambda item: item["distance"], reverse=True
|
108
|
+
)
|
109
|
+
):
|
110
|
+
values = list(row.values())
|
111
|
+
doc_id = str(values[0])
|
112
|
+
row_values = dict(row)
|
113
|
+
secondary_score = (
|
114
|
+
rrf_scores[doc_id]["distance"] if doc_id in rrf_scores else 0.0
|
115
|
+
)
|
116
|
+
secondary_score += 1.0 / (rank + rrf_k)
|
117
|
+
row_values["distance"] = secondary_score
|
118
|
+
rrf_scores[doc_id] = row_values
|
119
|
+
|
120
|
+
# Sort the results by rrf score in descending order
|
121
|
+
# Sort the results by weighted score in descending order
|
122
|
+
ranked_results = sorted(
|
123
|
+
rrf_scores.values(), key=lambda item: item["distance"], reverse=True
|
124
|
+
)
|
125
|
+
# Extract only the RowMapping for the top results
|
126
|
+
return ranked_results[:fetch_top_k]
|
127
|
+
|
128
|
+
|
129
|
+
@dataclass
|
130
|
+
class HybridSearchConfig(ABC):
|
131
|
+
"""
|
132
|
+
AlloyDB Vector Store Hybrid Search Config.
|
133
|
+
|
134
|
+
Queries might be slow if the hybrid search column does not exist.
|
135
|
+
For best hybrid search performance, consider creating a TSV column
|
136
|
+
and adding GIN index.
|
137
|
+
"""
|
138
|
+
|
139
|
+
tsv_column: Optional[str] = ""
|
140
|
+
tsv_lang: Optional[str] = "pg_catalog.english"
|
141
|
+
fts_query: Optional[str] = ""
|
142
|
+
fusion_function: Callable[
|
143
|
+
[Sequence[RowMapping], Sequence[RowMapping], Any], Sequence[Any]
|
144
|
+
] = weighted_sum_ranking # Updated default
|
145
|
+
fusion_function_parameters: dict[str, Any] = field(default_factory=dict)
|
146
|
+
primary_top_k: int = 4
|
147
|
+
secondary_top_k: int = 4
|
148
|
+
index_name: str = "langchain_tsv_index"
|
149
|
+
index_type: str = "GIN"
|
@@ -9,6 +9,7 @@ from langchain_core.vectorstores import VectorStore
|
|
9
9
|
|
10
10
|
from .async_vectorstore import AsyncPGVectorStore
|
11
11
|
from .engine import PGEngine
|
12
|
+
from .hybrid_search_config import HybridSearchConfig
|
12
13
|
from .indexes import (
|
13
14
|
DEFAULT_DISTANCE_STRATEGY,
|
14
15
|
BaseIndex,
|
@@ -59,6 +60,7 @@ class PGVectorStore(VectorStore):
|
|
59
60
|
fetch_k: int = 20,
|
60
61
|
lambda_mult: float = 0.5,
|
61
62
|
index_query_options: Optional[QueryOptions] = None,
|
63
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
62
64
|
) -> PGVectorStore:
|
63
65
|
"""Create an PGVectorStore instance.
|
64
66
|
|
@@ -78,6 +80,7 @@ class PGVectorStore(VectorStore):
|
|
78
80
|
fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
|
79
81
|
lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
80
82
|
index_query_options (QueryOptions): Index query option.
|
83
|
+
hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
|
81
84
|
|
82
85
|
Returns:
|
83
86
|
PGVectorStore
|
@@ -98,6 +101,7 @@ class PGVectorStore(VectorStore):
|
|
98
101
|
fetch_k=fetch_k,
|
99
102
|
lambda_mult=lambda_mult,
|
100
103
|
index_query_options=index_query_options,
|
104
|
+
hybrid_search_config=hybrid_search_config,
|
101
105
|
)
|
102
106
|
vs = await engine._run_as_async(coro)
|
103
107
|
return cls(cls.__create_key, engine, vs)
|
@@ -120,6 +124,7 @@ class PGVectorStore(VectorStore):
|
|
120
124
|
fetch_k: int = 20,
|
121
125
|
lambda_mult: float = 0.5,
|
122
126
|
index_query_options: Optional[QueryOptions] = None,
|
127
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
123
128
|
) -> PGVectorStore:
|
124
129
|
"""Create an PGVectorStore instance.
|
125
130
|
|
@@ -140,6 +145,7 @@ class PGVectorStore(VectorStore):
|
|
140
145
|
fetch_k (int, optional): Number of Documents to fetch to pass to MMR algorithm. Defaults to 20.
|
141
146
|
lambda_mult (float, optional): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
142
147
|
index_query_options (Optional[QueryOptions], optional): Index query option. Defaults to None.
|
148
|
+
hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
|
143
149
|
|
144
150
|
Returns:
|
145
151
|
PGVectorStore
|
@@ -160,6 +166,7 @@ class PGVectorStore(VectorStore):
|
|
160
166
|
fetch_k=fetch_k,
|
161
167
|
lambda_mult=lambda_mult,
|
162
168
|
index_query_options=index_query_options,
|
169
|
+
hybrid_search_config=hybrid_search_config,
|
163
170
|
)
|
164
171
|
vs = engine._run_as_sync(coro)
|
165
172
|
return cls(cls.__create_key, engine, vs)
|
@@ -301,6 +308,7 @@ class PGVectorStore(VectorStore):
|
|
301
308
|
fetch_k: int = 20,
|
302
309
|
lambda_mult: float = 0.5,
|
303
310
|
index_query_options: Optional[QueryOptions] = None,
|
311
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
304
312
|
**kwargs: Any,
|
305
313
|
) -> PGVectorStore:
|
306
314
|
"""Create an PGVectorStore instance from texts.
|
@@ -324,6 +332,7 @@ class PGVectorStore(VectorStore):
|
|
324
332
|
fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
|
325
333
|
lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
326
334
|
index_query_options (QueryOptions): Index query option.
|
335
|
+
hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
|
327
336
|
|
328
337
|
Raises:
|
329
338
|
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
@@ -347,6 +356,7 @@ class PGVectorStore(VectorStore):
|
|
347
356
|
fetch_k=fetch_k,
|
348
357
|
lambda_mult=lambda_mult,
|
349
358
|
index_query_options=index_query_options,
|
359
|
+
hybrid_search_config=hybrid_search_config,
|
350
360
|
)
|
351
361
|
await vs.aadd_texts(texts, metadatas=metadatas, ids=ids)
|
352
362
|
return vs
|
@@ -371,6 +381,7 @@ class PGVectorStore(VectorStore):
|
|
371
381
|
fetch_k: int = 20,
|
372
382
|
lambda_mult: float = 0.5,
|
373
383
|
index_query_options: Optional[QueryOptions] = None,
|
384
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
374
385
|
**kwargs: Any,
|
375
386
|
) -> PGVectorStore:
|
376
387
|
"""Create an PGVectorStore instance from documents.
|
@@ -393,6 +404,7 @@ class PGVectorStore(VectorStore):
|
|
393
404
|
fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
|
394
405
|
lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
395
406
|
index_query_options (QueryOptions): Index query option.
|
407
|
+
hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
|
396
408
|
|
397
409
|
Raises:
|
398
410
|
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
@@ -417,6 +429,7 @@ class PGVectorStore(VectorStore):
|
|
417
429
|
fetch_k=fetch_k,
|
418
430
|
lambda_mult=lambda_mult,
|
419
431
|
index_query_options=index_query_options,
|
432
|
+
hybrid_search_config=hybrid_search_config,
|
420
433
|
)
|
421
434
|
await vs.aadd_documents(documents, ids=ids)
|
422
435
|
return vs
|
@@ -442,6 +455,7 @@ class PGVectorStore(VectorStore):
|
|
442
455
|
fetch_k: int = 20,
|
443
456
|
lambda_mult: float = 0.5,
|
444
457
|
index_query_options: Optional[QueryOptions] = None,
|
458
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
445
459
|
**kwargs: Any,
|
446
460
|
) -> PGVectorStore:
|
447
461
|
"""Create an PGVectorStore instance from texts.
|
@@ -465,6 +479,7 @@ class PGVectorStore(VectorStore):
|
|
465
479
|
fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
|
466
480
|
lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
467
481
|
index_query_options (QueryOptions): Index query option.
|
482
|
+
hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
|
468
483
|
|
469
484
|
Raises:
|
470
485
|
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
@@ -488,6 +503,7 @@ class PGVectorStore(VectorStore):
|
|
488
503
|
fetch_k=fetch_k,
|
489
504
|
lambda_mult=lambda_mult,
|
490
505
|
index_query_options=index_query_options,
|
506
|
+
hybrid_search_config=hybrid_search_config,
|
491
507
|
**kwargs,
|
492
508
|
)
|
493
509
|
vs.add_texts(texts, metadatas=metadatas, ids=ids)
|
@@ -513,6 +529,7 @@ class PGVectorStore(VectorStore):
|
|
513
529
|
fetch_k: int = 20,
|
514
530
|
lambda_mult: float = 0.5,
|
515
531
|
index_query_options: Optional[QueryOptions] = None,
|
532
|
+
hybrid_search_config: Optional[HybridSearchConfig] = None,
|
516
533
|
**kwargs: Any,
|
517
534
|
) -> PGVectorStore:
|
518
535
|
"""Create an PGVectorStore instance from documents.
|
@@ -535,6 +552,7 @@ class PGVectorStore(VectorStore):
|
|
535
552
|
fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
|
536
553
|
lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
|
537
554
|
index_query_options (QueryOptions): Index query option.
|
555
|
+
hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
|
538
556
|
|
539
557
|
Raises:
|
540
558
|
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
|
@@ -558,6 +576,7 @@ class PGVectorStore(VectorStore):
|
|
558
576
|
fetch_k=fetch_k,
|
559
577
|
lambda_mult=lambda_mult,
|
560
578
|
index_query_options=index_query_options,
|
579
|
+
hybrid_search_config=hybrid_search_config,
|
561
580
|
**kwargs,
|
562
581
|
)
|
563
582
|
vs.add_documents(documents, ids=ids)
|
@@ -5,6 +5,7 @@ import contextlib
|
|
5
5
|
import enum
|
6
6
|
import logging
|
7
7
|
import uuid
|
8
|
+
import warnings
|
8
9
|
from typing import (
|
9
10
|
Any,
|
10
11
|
AsyncGenerator,
|
@@ -19,7 +20,6 @@ from typing import (
|
|
19
20
|
Type,
|
20
21
|
Union,
|
21
22
|
)
|
22
|
-
import warnings
|
23
23
|
from typing import (
|
24
24
|
cast as typing_cast,
|
25
25
|
)
|
@@ -429,13 +429,6 @@ class PGVector(VectorStore):
|
|
429
429
|
self._async_engine: Optional[AsyncEngine] = None
|
430
430
|
self._async_init = False
|
431
431
|
|
432
|
-
warnings.warn(
|
433
|
-
"PGVector is being deprecated and will be removed in the future. "
|
434
|
-
"Please migrate to PGVectorStore. "
|
435
|
-
"Refer to the migration guide at [https://github.com/langchain-ai/langchain-postgres/blob/main/examples/migrate_pgvector_to_pgvectorstore.md] for details.",
|
436
|
-
PendingDeprecationWarning,
|
437
|
-
)
|
438
|
-
|
439
432
|
if isinstance(connection, str):
|
440
433
|
if async_mode:
|
441
434
|
self._async_engine = create_async_engine(
|
@@ -1,25 +1,17 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: langchain-postgres
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.15
|
4
4
|
Summary: An integration package connecting Postgres and LangChain
|
5
|
-
|
6
|
-
License:
|
7
|
-
Requires-Python: >=3.9
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
Requires-Dist:
|
15
|
-
Requires-Dist: langchain-core (>=0.2.13,<0.4.0)
|
16
|
-
Requires-Dist: numpy (>=1.21,<2.0)
|
17
|
-
Requires-Dist: pgvector (>=0.2.5,<0.4)
|
18
|
-
Requires-Dist: psycopg (>=3,<4)
|
19
|
-
Requires-Dist: psycopg-pool (>=3.2.1,<4.0.0)
|
20
|
-
Requires-Dist: sqlalchemy (>=2,<3)
|
21
|
-
Project-URL: Repository, https://github.com/langchain-ai/langchain-postgres
|
22
|
-
Project-URL: Source Code, https://github.com/langchain-ai/langchain-postgres/tree/master/langchain_postgres
|
5
|
+
License-Expression: MIT
|
6
|
+
License-File: LICENSE
|
7
|
+
Requires-Python: >=3.9
|
8
|
+
Requires-Dist: asyncpg>=0.30.0
|
9
|
+
Requires-Dist: langchain-core<0.4.0,>=0.2.13
|
10
|
+
Requires-Dist: numpy<3,>=1.21
|
11
|
+
Requires-Dist: pgvector<0.4,>=0.2.5
|
12
|
+
Requires-Dist: psycopg-pool<4,>=3.2.1
|
13
|
+
Requires-Dist: psycopg<4,>=3
|
14
|
+
Requires-Dist: sqlalchemy<3,>=2
|
23
15
|
Description-Content-Type: text/markdown
|
24
16
|
|
25
17
|
# langchain-postgres
|
@@ -39,7 +31,7 @@ Feel free to use the abstraction as provided or else modify them / extend them a
|
|
39
31
|
|
40
32
|
## Requirements
|
41
33
|
|
42
|
-
The package supports the [asyncpg](https://github.com/MagicStack/asyncpg) and [
|
34
|
+
The package supports the [asyncpg](https://github.com/MagicStack/asyncpg) and [psycopg3](https://www.psycopg.org/psycopg3/) drivers.
|
43
35
|
|
44
36
|
## Installation
|
45
37
|
|
@@ -47,17 +39,19 @@ The package supports the [asyncpg](https://github.com/MagicStack/asyncpg) and [p
|
|
47
39
|
pip install -U langchain-postgres
|
48
40
|
```
|
49
41
|
|
50
|
-
##
|
51
|
-
|
52
|
-
### Vectorstore
|
42
|
+
## Vectorstore
|
53
43
|
|
54
44
|
> [!WARNING]
|
55
45
|
> In v0.0.14+, `PGVector` is deprecated. Please migrate to `PGVectorStore`
|
56
|
-
> Version 0.0.14+ has not been released yet, but you can test version of the vectorstore on the main branch. Until official release do not use in production.
|
57
46
|
> for improved performance and manageability.
|
58
|
-
> See the [migration guide](https://github.com/langchain-ai/langchain-postgres/blob/main/examples/migrate_pgvector_to_pgvectorstore.
|
47
|
+
> See the [migration guide](https://github.com/langchain-ai/langchain-postgres/blob/main/examples/migrate_pgvector_to_pgvectorstore.ipynb) for details on how to migrate from `PGVector` to `PGVectorStore`.
|
48
|
+
|
49
|
+
### Documentation
|
59
50
|
|
60
|
-
|
51
|
+
* [Quickstart](https://github.com/langchain-ai/langchain-postgres/blob/main/examples/pg_vectorstore.ipynb)
|
52
|
+
* [How-to](https://github.com/langchain-ai/langchain-postgres/blob/main/examples/pg_vectorstore_how_to.ipynb)
|
53
|
+
|
54
|
+
### Example
|
61
55
|
|
62
56
|
```python
|
63
57
|
from langchain_core.documents import Document
|
@@ -101,7 +95,7 @@ print(docs)
|
|
101
95
|
> [!TIP]
|
102
96
|
> All synchronous functions have corresponding asynchronous functions
|
103
97
|
|
104
|
-
|
98
|
+
## ChatMessageHistory
|
105
99
|
|
106
100
|
The chat message history abstraction helps to persist chat message history
|
107
101
|
in a postgres table.
|
@@ -167,4 +161,3 @@ Using the Google Cloud integrations provides the following benefits:
|
|
167
161
|
| Google AlloyDB | ✓ | ✓ | ✓ | ✓ | ✗ |
|
168
162
|
| Google Cloud SQL Postgres| ✓ | ✓ | ✓ | ✓ | ✗ |
|
169
163
|
|
170
|
-
|
@@ -0,0 +1,17 @@
|
|
1
|
+
langchain_postgres/__init__.py,sha256=-ovoLrNuzL-kMUV-RrIxoEI8wmgOAg4vfE8xevYSA3Q,702
|
2
|
+
langchain_postgres/_utils.py,sha256=N_OBzYFCb_bsHOnZ-YRg6izhmuudorQhupgeG-rSKUc,2848
|
3
|
+
langchain_postgres/chat_message_histories.py,sha256=Hq_0nGX1BoBxq5jg0LwfQg7iXm6B4izYVr6iLkMGoEY,14214
|
4
|
+
langchain_postgres/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
langchain_postgres/translator.py,sha256=6cTS2RJUodMUdsurJM-f-vgPXl6Ad6bfMo8ECuh5Jr4,1524
|
6
|
+
langchain_postgres/vectorstores.py,sha256=vzRbPwU1Rn-pOsnTsz1u72cSYD7H8jMlW4N7A58QIt4,83826
|
7
|
+
langchain_postgres/utils/pgvector_migrator.py,sha256=OxW2_FxaomZw5kqPAz-3lmZ5t2hSXU4ZW3xK6O62MH4,11771
|
8
|
+
langchain_postgres/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
langchain_postgres/v2/async_vectorstore.py,sha256=WJaFs38fZiNJ6ZM2fhz7u6oJZhfig4fP-OKFuyB7MIQ,58739
|
10
|
+
langchain_postgres/v2/engine.py,sha256=BZJHWzS7SqMWs1-7ZHKkRAIu5PuO98zqg5aWf0EXkDM,16850
|
11
|
+
langchain_postgres/v2/hybrid_search_config.py,sha256=zDVMscaV0n92BkgGd2J77Y675z9xWS-U6jTmkqHJtGI,5490
|
12
|
+
langchain_postgres/v2/indexes.py,sha256=aLCFGYiIbLBUr88drMLD6l41MPRI7lv0ALMVRWfqdq4,4888
|
13
|
+
langchain_postgres/v2/vectorstores.py,sha256=Lo3IQKjQ6AQlyNP8ILGeyCk6ZyKANcvebpRT5tHCT78,38595
|
14
|
+
langchain_postgres-0.0.15.dist-info/METADATA,sha256=RzNeUX4gFCBEQ7u7qQHWOC6LsVPxl6xMoOQMLAXtkkU,6556
|
15
|
+
langchain_postgres-0.0.15.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
16
|
+
langchain_postgres-0.0.15.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
|
17
|
+
langchain_postgres-0.0.15.dist-info/RECORD,,
|
@@ -1,16 +0,0 @@
|
|
1
|
-
langchain_postgres/__init__.py,sha256=UxIanyWPeUVtWFKCT-sWGXbWUO5I76akABXhXolY9bM,702
|
2
|
-
langchain_postgres/_utils.py,sha256=N_OBzYFCb_bsHOnZ-YRg6izhmuudorQhupgeG-rSKUc,2848
|
3
|
-
langchain_postgres/chat_message_histories.py,sha256=Et5AgXSRBCghLC5sn6EEUDd1xupaiPv-A5IyNBjpaTc,14213
|
4
|
-
langchain_postgres/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
langchain_postgres/translator.py,sha256=6cTS2RJUodMUdsurJM-f-vgPXl6Ad6bfMo8ECuh5Jr4,1524
|
6
|
-
langchain_postgres/utils/pgvector_migrator.py,sha256=OIclFsCKWQAtJ1JyFQsVQoWZSrEJg67GVnY84aBlucE,11776
|
7
|
-
langchain_postgres/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
-
langchain_postgres/v2/async_vectorstore.py,sha256=FMV-IwH7cJ2VuxnrMCy0X0WWG65oHNXfKAwsdf0Tp20,51786
|
9
|
-
langchain_postgres/v2/engine.py,sha256=8XD6ta2HzuYtHnxhvY-I_vMYqZd33yj2y9ZqQFbEz1g,14266
|
10
|
-
langchain_postgres/v2/indexes.py,sha256=aLCFGYiIbLBUr88drMLD6l41MPRI7lv0ALMVRWfqdq4,4888
|
11
|
-
langchain_postgres/v2/vectorstores.py,sha256=R17q1KIEZPBwEHgE6JYiRSiN8rZXzVPCmBoJobiyjM8,37198
|
12
|
-
langchain_postgres/vectorstores.py,sha256=Xjyqxa_nL7Xvq6dwqWUu4VdNZ5z6ypjFoSU9wj6Ad5c,84195
|
13
|
-
langchain_postgres-0.0.14rc1.dist-info/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
|
14
|
-
langchain_postgres-0.0.14rc1.dist-info/METADATA,sha256=ZOG0qTuKUt4_uz2VUAy4Cj4A-DtsfouNBB6ITk7bihk,7179
|
15
|
-
langchain_postgres-0.0.14rc1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
16
|
-
langchain_postgres-0.0.14rc1.dist-info/RECORD,,
|
{langchain_postgres-0.0.14rc1.dist-info → langchain_postgres-0.0.15.dist-info/licenses}/LICENSE
RENAMED
File without changes
|