langchain-postgres 0.0.14rc1__py3-none-any.whl → 0.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
1
  from importlib import metadata
2
2
 
3
3
  from langchain_postgres.chat_message_histories import PostgresChatMessageHistory
4
- from langchain_postgres.v2.engine import Column, PGEngine, ColumnDict
5
4
  from langchain_postgres.translator import PGVectorTranslator
5
+ from langchain_postgres.v2.engine import Column, ColumnDict, PGEngine
6
6
  from langchain_postgres.v2.vectorstores import PGVectorStore
7
7
  from langchain_postgres.vectorstores import PGVector
8
8
 
@@ -2,6 +2,7 @@
2
2
 
3
3
  This client provides support for both sync and async via psycopg 3.
4
4
  """
5
+
5
6
  from __future__ import annotations
6
7
 
7
8
  import json
@@ -68,7 +68,7 @@ async def __aextract_pgvector_collection(
68
68
  if not rows:
69
69
  break
70
70
  yield [row._mapping for row in rows]
71
- except ValueError as e:
71
+ except ValueError:
72
72
  raise ValueError(f"Collection, {collection_name} does not exist.")
73
73
  except SQLAlchemyError as e:
74
74
  raise ProgrammingError(
@@ -14,6 +14,7 @@ from sqlalchemy import RowMapping, text
14
14
  from sqlalchemy.ext.asyncio import AsyncEngine
15
15
 
16
16
  from .engine import PGEngine
17
+ from .hybrid_search_config import HybridSearchConfig
17
18
  from .indexes import (
18
19
  DEFAULT_DISTANCE_STRATEGY,
19
20
  DEFAULT_INDEX_NAME_SUFFIX,
@@ -77,6 +78,7 @@ class AsyncPGVectorStore(VectorStore):
77
78
  fetch_k: int = 20,
78
79
  lambda_mult: float = 0.5,
79
80
  index_query_options: Optional[QueryOptions] = None,
81
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
80
82
  ):
81
83
  """AsyncPGVectorStore constructor.
82
84
  Args:
@@ -95,6 +97,7 @@ class AsyncPGVectorStore(VectorStore):
95
97
  fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
96
98
  lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
97
99
  index_query_options (QueryOptions): Index query option.
100
+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
98
101
 
99
102
 
100
103
  Raises:
@@ -119,6 +122,7 @@ class AsyncPGVectorStore(VectorStore):
119
122
  self.fetch_k = fetch_k
120
123
  self.lambda_mult = lambda_mult
121
124
  self.index_query_options = index_query_options
125
+ self.hybrid_search_config = hybrid_search_config
122
126
 
123
127
  @classmethod
124
128
  async def create(
@@ -139,6 +143,7 @@ class AsyncPGVectorStore(VectorStore):
139
143
  fetch_k: int = 20,
140
144
  lambda_mult: float = 0.5,
141
145
  index_query_options: Optional[QueryOptions] = None,
146
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
142
147
  ) -> AsyncPGVectorStore:
143
148
  """Create an AsyncPGVectorStore instance.
144
149
 
@@ -158,6 +163,7 @@ class AsyncPGVectorStore(VectorStore):
158
163
  fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
159
164
  lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
160
165
  index_query_options (QueryOptions): Index query option.
166
+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
161
167
 
162
168
  Returns:
163
169
  AsyncPGVectorStore
@@ -193,9 +199,18 @@ class AsyncPGVectorStore(VectorStore):
193
199
  raise ValueError(
194
200
  f"Content column, {content_column}, is type, {content_type}. It must be a type of character string."
195
201
  )
202
+ if hybrid_search_config:
203
+ tsv_column_name = (
204
+ hybrid_search_config.tsv_column
205
+ if hybrid_search_config.tsv_column
206
+ else content_column + "_tsv"
207
+ )
208
+ if tsv_column_name not in columns or columns[tsv_column_name] != "tsvector":
209
+ # mark tsv_column as empty because there is no TSV column in table
210
+ hybrid_search_config.tsv_column = ""
196
211
  if embedding_column not in columns:
197
212
  raise ValueError(f"Embedding column, {embedding_column}, does not exist.")
198
- if columns[embedding_column] != "USER-DEFINED":
213
+ if columns[embedding_column] not in ["USER-DEFINED", "vector"]:
199
214
  raise ValueError(
200
215
  f"Embedding column, {embedding_column}, is not type Vector."
201
216
  )
@@ -236,6 +251,7 @@ class AsyncPGVectorStore(VectorStore):
236
251
  fetch_k=fetch_k,
237
252
  lambda_mult=lambda_mult,
238
253
  index_query_options=index_query_options,
254
+ hybrid_search_config=hybrid_search_config,
239
255
  )
240
256
 
241
257
  @property
@@ -273,17 +289,30 @@ class AsyncPGVectorStore(VectorStore):
273
289
  if len(self.metadata_columns) > 0
274
290
  else ""
275
291
  )
276
- insert_stmt = f'INSERT INTO "{self.schema_name}"."{self.table_name}"("{self.id_column}", "{self.content_column}", "{self.embedding_column}"{metadata_col_names}'
292
+ hybrid_search_column = (
293
+ f', "{self.hybrid_search_config.tsv_column}"'
294
+ if self.hybrid_search_config and self.hybrid_search_config.tsv_column
295
+ else ""
296
+ )
297
+ insert_stmt = f'INSERT INTO "{self.schema_name}"."{self.table_name}"("{self.id_column}", "{self.content_column}", "{self.embedding_column}"{hybrid_search_column}{metadata_col_names}'
277
298
  values = {
278
- "id": id,
299
+ "langchain_id": id,
279
300
  "content": content,
280
301
  "embedding": str([float(dimension) for dimension in embedding]),
281
302
  }
282
- values_stmt = "VALUES (:id, :content, :embedding"
303
+ values_stmt = "VALUES (:langchain_id, :content, :embedding"
283
304
 
284
305
  if not embedding and can_inline_embed:
285
- values_stmt = f"VALUES (:id, :content, {self.embedding_service.embed_query_inline(content)}" # type: ignore
306
+ values_stmt = f"VALUES (:langchain_id, :content, {self.embedding_service.embed_query_inline(content)}" # type: ignore
286
307
 
308
+ if self.hybrid_search_config and self.hybrid_search_config.tsv_column:
309
+ lang = (
310
+ f"'{self.hybrid_search_config.tsv_lang}',"
311
+ if self.hybrid_search_config.tsv_lang
312
+ else ""
313
+ )
314
+ values_stmt += f", to_tsvector({lang} :tsv_content)"
315
+ values["tsv_content"] = content
287
316
  # Add metadata
288
317
  extra = copy.deepcopy(metadata)
289
318
  for metadata_column in self.metadata_columns:
@@ -308,6 +337,9 @@ class AsyncPGVectorStore(VectorStore):
308
337
 
309
338
  upsert_stmt = f' ON CONFLICT ("{self.id_column}") DO UPDATE SET "{self.content_column}" = EXCLUDED."{self.content_column}", "{self.embedding_column}" = EXCLUDED."{self.embedding_column}"'
310
339
 
340
+ if self.hybrid_search_config and self.hybrid_search_config.tsv_column:
341
+ upsert_stmt += f', "{self.hybrid_search_config.tsv_column}" = EXCLUDED."{self.hybrid_search_config.tsv_column}"'
342
+
311
343
  if self.metadata_json_column:
312
344
  upsert_stmt += f', "{self.metadata_json_column}" = EXCLUDED."{self.metadata_json_column}"'
313
345
 
@@ -408,6 +440,7 @@ class AsyncPGVectorStore(VectorStore):
408
440
  fetch_k: int = 20,
409
441
  lambda_mult: float = 0.5,
410
442
  index_query_options: Optional[QueryOptions] = None,
443
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
411
444
  **kwargs: Any,
412
445
  ) -> AsyncPGVectorStore:
413
446
  """Create an AsyncPGVectorStore instance from texts.
@@ -453,6 +486,7 @@ class AsyncPGVectorStore(VectorStore):
453
486
  fetch_k=fetch_k,
454
487
  lambda_mult=lambda_mult,
455
488
  index_query_options=index_query_options,
489
+ hybrid_search_config=hybrid_search_config,
456
490
  )
457
491
  await vs.aadd_texts(texts, metadatas=metadatas, ids=ids, **kwargs)
458
492
  return vs
@@ -478,6 +512,7 @@ class AsyncPGVectorStore(VectorStore):
478
512
  fetch_k: int = 20,
479
513
  lambda_mult: float = 0.5,
480
514
  index_query_options: Optional[QueryOptions] = None,
515
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
481
516
  **kwargs: Any,
482
517
  ) -> AsyncPGVectorStore:
483
518
  """Create an AsyncPGVectorStore instance from documents.
@@ -524,6 +559,7 @@ class AsyncPGVectorStore(VectorStore):
524
559
  fetch_k=fetch_k,
525
560
  lambda_mult=lambda_mult,
526
561
  index_query_options=index_query_options,
562
+ hybrid_search_config=hybrid_search_config,
527
563
  )
528
564
  texts = [doc.page_content for doc in documents]
529
565
  metadatas = [doc.metadata for doc in documents]
@@ -538,16 +574,30 @@ class AsyncPGVectorStore(VectorStore):
538
574
  filter: Optional[dict] = None,
539
575
  **kwargs: Any,
540
576
  ) -> Sequence[RowMapping]:
541
- """Perform similarity search query on database."""
542
- k = k if k else self.k
577
+ """
578
+ Perform similarity search (or hybrid search) query on database.
579
+ Queries might be slow if the hybrid search column does not exist.
580
+ For best hybrid search performance, consider creating a TSV column
581
+ and adding GIN index.
582
+ """
583
+ hybrid_search_config = kwargs.get(
584
+ "hybrid_search_config", self.hybrid_search_config
585
+ )
586
+
587
+ final_k = k if k is not None else self.k
588
+
589
+ dense_limit = final_k
590
+ if hybrid_search_config:
591
+ dense_limit = hybrid_search_config.primary_top_k
592
+
543
593
  operator = self.distance_strategy.operator
544
594
  search_function = self.distance_strategy.search_function
545
595
 
546
- columns = self.metadata_columns + [
596
+ columns = [
547
597
  self.id_column,
548
598
  self.content_column,
549
599
  self.embedding_column,
550
- ]
600
+ ] + self.metadata_columns
551
601
  if self.metadata_json_column:
552
602
  columns.append(self.metadata_json_column)
553
603
 
@@ -557,16 +607,19 @@ class AsyncPGVectorStore(VectorStore):
557
607
  filter_dict = None
558
608
  if filter and isinstance(filter, dict):
559
609
  safe_filter, filter_dict = self._create_filter_clause(filter)
560
- param_filter = f"WHERE {safe_filter}" if safe_filter else ""
610
+
561
611
  inline_embed_func = getattr(self.embedding_service, "embed_query_inline", None)
562
612
  if not embedding and callable(inline_embed_func) and "query" in kwargs:
563
613
  query_embedding = self.embedding_service.embed_query_inline(kwargs["query"]) # type: ignore
614
+ embedding_data_string = f"{query_embedding}"
564
615
  else:
565
616
  query_embedding = f"{[float(dimension) for dimension in embedding]}"
566
- stmt = f"""SELECT {column_names}, {search_function}("{self.embedding_column}", :query_embedding) as distance
567
- FROM "{self.schema_name}"."{self.table_name}" {param_filter} ORDER BY "{self.embedding_column}" {operator} :query_embedding LIMIT :k;
617
+ embedding_data_string = ":query_embedding"
618
+ where_filters = f"WHERE {safe_filter}" if safe_filter else ""
619
+ dense_query_stmt = f"""SELECT {column_names}, {search_function}("{self.embedding_column}", {embedding_data_string}) as distance
620
+ FROM "{self.schema_name}"."{self.table_name}" {where_filters} ORDER BY "{self.embedding_column}" {operator} {embedding_data_string} LIMIT :dense_limit;
568
621
  """
569
- param_dict = {"query_embedding": query_embedding, "k": k}
622
+ param_dict = {"query_embedding": query_embedding, "dense_limit": dense_limit}
570
623
  if filter_dict:
571
624
  param_dict.update(filter_dict)
572
625
  if self.index_query_options:
@@ -575,15 +628,49 @@ class AsyncPGVectorStore(VectorStore):
575
628
  for query_option in self.index_query_options.to_parameter():
576
629
  query_options_stmt = f"SET LOCAL {query_option};"
577
630
  await conn.execute(text(query_options_stmt))
578
- result = await conn.execute(text(stmt), param_dict)
631
+ result = await conn.execute(text(dense_query_stmt), param_dict)
579
632
  result_map = result.mappings()
580
- results = result_map.fetchall()
633
+ dense_results = result_map.fetchall()
581
634
  else:
582
635
  async with self.engine.connect() as conn:
583
- result = await conn.execute(text(stmt), param_dict)
636
+ result = await conn.execute(text(dense_query_stmt), param_dict)
637
+ result_map = result.mappings()
638
+ dense_results = result_map.fetchall()
639
+
640
+ fts_query = (
641
+ hybrid_search_config.fts_query
642
+ if hybrid_search_config and hybrid_search_config.fts_query
643
+ else kwargs.get("fts_query", "")
644
+ )
645
+ if hybrid_search_config and fts_query:
646
+ hybrid_search_config.fusion_function_parameters["fetch_top_k"] = final_k
647
+ # do the sparse query
648
+ lang = (
649
+ f"'{hybrid_search_config.tsv_lang}',"
650
+ if hybrid_search_config.tsv_lang
651
+ else ""
652
+ )
653
+ query_tsv = f"plainto_tsquery({lang} :fts_query)"
654
+ param_dict["fts_query"] = fts_query
655
+ if hybrid_search_config.tsv_column:
656
+ content_tsv = f'"{hybrid_search_config.tsv_column}"'
657
+ else:
658
+ content_tsv = f'to_tsvector({lang} "{self.content_column}")'
659
+ and_filters = f"AND ({safe_filter})" if safe_filter else ""
660
+ sparse_query_stmt = f'SELECT {column_names}, ts_rank_cd({content_tsv}, {query_tsv}) as distance FROM "{self.schema_name}"."{self.table_name}" WHERE {content_tsv} @@ {query_tsv} {and_filters} ORDER BY distance desc LIMIT {hybrid_search_config.secondary_top_k};'
661
+ async with self.engine.connect() as conn:
662
+ result = await conn.execute(text(sparse_query_stmt), param_dict)
584
663
  result_map = result.mappings()
585
- results = result_map.fetchall()
586
- return results
664
+ sparse_results = result_map.fetchall()
665
+
666
+ combined_results = hybrid_search_config.fusion_function(
667
+ dense_results,
668
+ sparse_results,
669
+ **hybrid_search_config.fusion_function_parameters,
670
+ distance_strategy=self.distance_strategy,
671
+ )
672
+ return combined_results
673
+ return dense_results
587
674
 
588
675
  async def asimilarity_search(
589
676
  self,
@@ -601,6 +688,14 @@ class AsyncPGVectorStore(VectorStore):
601
688
  )
602
689
  kwargs["query"] = query
603
690
 
691
+ # add fts_query to hybrid_search_config
692
+ hybrid_search_config = kwargs.get(
693
+ "hybrid_search_config", self.hybrid_search_config
694
+ )
695
+ if hybrid_search_config and not hybrid_search_config.fts_query:
696
+ hybrid_search_config.fts_query = query
697
+ kwargs["hybrid_search_config"] = hybrid_search_config
698
+
604
699
  return await self.asimilarity_search_by_vector(
605
700
  embedding=embedding, k=k, filter=filter, **kwargs
606
701
  )
@@ -632,6 +727,14 @@ class AsyncPGVectorStore(VectorStore):
632
727
  )
633
728
  kwargs["query"] = query
634
729
 
730
+ # add fts_query to hybrid_search_config
731
+ hybrid_search_config = kwargs.get(
732
+ "hybrid_search_config", self.hybrid_search_config
733
+ )
734
+ if hybrid_search_config and not hybrid_search_config.fts_query:
735
+ hybrid_search_config.fts_query = query
736
+ kwargs["hybrid_search_config"] = hybrid_search_config
737
+
635
738
  docs = await self.asimilarity_search_with_score_by_vector(
636
739
  embedding=embedding, k=k, filter=filter, **kwargs
637
740
  )
@@ -776,6 +879,41 @@ class AsyncPGVectorStore(VectorStore):
776
879
 
777
880
  return [r for i, r in enumerate(documents_with_scores) if i in mmr_selected]
778
881
 
882
+ async def aapply_hybrid_search_index(
883
+ self,
884
+ concurrently: bool = False,
885
+ ) -> None:
886
+ """Creates a TSV index in the vector store table if possible."""
887
+ if (
888
+ not self.hybrid_search_config
889
+ or not self.hybrid_search_config.index_type
890
+ or not self.hybrid_search_config.index_name
891
+ ):
892
+ # no index needs to be created
893
+ raise ValueError("Hybrid Search Config cannot create index.")
894
+
895
+ lang = (
896
+ f"'{self.hybrid_search_config.tsv_lang}',"
897
+ if self.hybrid_search_config.tsv_lang
898
+ else ""
899
+ )
900
+ tsv_column_name = (
901
+ self.hybrid_search_config.tsv_column
902
+ if self.hybrid_search_config.tsv_column
903
+ else f"to_tsvector({lang} {self.content_column})"
904
+ )
905
+ tsv_index_query = f'CREATE INDEX {"CONCURRENTLY" if concurrently else ""} {self.hybrid_search_config.index_name} ON "{self.schema_name}"."{self.table_name}" USING {self.hybrid_search_config.index_type}({tsv_column_name});'
906
+ if concurrently:
907
+ async with self.engine.connect() as conn:
908
+ autocommit_conn = await conn.execution_options(
909
+ isolation_level="AUTOCOMMIT"
910
+ )
911
+ await autocommit_conn.execute(text(tsv_index_query))
912
+ else:
913
+ async with self.engine.connect() as conn:
914
+ await conn.execute(text(tsv_index_query))
915
+ await conn.commit()
916
+
779
917
  async def aapply_vector_index(
780
918
  self,
781
919
  index: BaseIndex,
@@ -800,10 +938,11 @@ class AsyncPGVectorStore(VectorStore):
800
938
  filter = f"WHERE ({index.partial_indexes})" if index.partial_indexes else ""
801
939
  params = "WITH " + index.index_options()
802
940
  if name is None:
803
- if index.name == None:
941
+ if index.name is None:
804
942
  index.name = self.table_name + DEFAULT_INDEX_NAME_SUFFIX
805
943
  name = index.name
806
944
  stmt = f'CREATE INDEX {"CONCURRENTLY" if concurrently else ""} "{name}" ON "{self.schema_name}"."{self.table_name}" USING {index.index_type} ({self.embedding_column} {function}) {params} {filter};'
945
+
807
946
  if concurrently:
808
947
  async with self.engine.connect() as conn:
809
948
  autocommit_conn = await conn.execution_options(
@@ -954,46 +1093,48 @@ class AsyncPGVectorStore(VectorStore):
954
1093
  operator = "$eq"
955
1094
  filter_value = value
956
1095
 
1096
+ suffix_id = str(uuid.uuid4()).split("-")[0]
957
1097
  if operator in COMPARISONS_TO_NATIVE:
958
1098
  # Then we implement an equality filter
959
1099
  # native is trusted input
960
1100
  native = COMPARISONS_TO_NATIVE[operator]
961
- id = str(uuid.uuid4()).split("-")[0]
962
- return f"{field} {native} :{field}_{id}", {f"{field}_{id}": filter_value}
1101
+ param_name = f"{field}_{suffix_id}"
1102
+ return f"{field} {native} :{param_name}", {f"{param_name}": filter_value}
963
1103
  elif operator == "$between":
964
1104
  # Use AND with two comparisons
965
1105
  low, high = filter_value
966
-
967
- return f"({field} BETWEEN :{field}_low AND :{field}_high)", {
968
- f"{field}_low": low,
969
- f"{field}_high": high,
1106
+ low_param_name = f"{field}_low_{suffix_id}"
1107
+ high_param_name = f"{field}_high_{suffix_id}"
1108
+ return f"({field} BETWEEN :{low_param_name} AND :{high_param_name})", {
1109
+ f"{low_param_name}": low,
1110
+ f"{high_param_name}": high,
970
1111
  }
971
- elif operator in {"$in", "$nin", "$like", "$ilike"}:
1112
+ elif operator in {"$in", "$nin"}:
972
1113
  # We'll do force coercion to text
973
- if operator in {"$in", "$nin"}:
974
- for val in filter_value:
975
- if not isinstance(val, (str, int, float)):
976
- raise NotImplementedError(
977
- f"Unsupported type: {type(val)} for value: {val}"
978
- )
979
-
980
- if isinstance(val, bool): # b/c bool is an instance of int
981
- raise NotImplementedError(
982
- f"Unsupported type: {type(val)} for value: {val}"
983
- )
984
-
985
- if operator in {"$in"}:
986
- return f"{field} = ANY(:{field}_in)", {f"{field}_in": filter_value}
987
- elif operator in {"$nin"}:
988
- return f"{field} <> ALL (:{field}_nin)", {f"{field}_nin": filter_value}
989
- elif operator in {"$like"}:
990
- return f"({field} LIKE :{field}_like)", {f"{field}_like": filter_value}
991
- elif operator in {"$ilike"}:
992
- return f"({field} ILIKE :{field}_ilike)", {
993
- f"{field}_ilike": filter_value
1114
+ for val in filter_value:
1115
+ if not isinstance(val, (str, int, float)):
1116
+ raise NotImplementedError(
1117
+ f"Unsupported type: {type(val)} for value: {val}"
1118
+ )
1119
+
1120
+ if isinstance(val, bool): # b/c bool is an instance of int
1121
+ raise NotImplementedError(
1122
+ f"Unsupported type: {type(val)} for value: {val}"
1123
+ )
1124
+ param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}"
1125
+ if operator == "$in":
1126
+ return f"{field} = ANY(:{param_name})", {f"{param_name}": filter_value}
1127
+ else: # i.e. $nin
1128
+ return f"{field} <> ALL (:{param_name})", {
1129
+ f"{param_name}": filter_value
994
1130
  }
995
- else:
996
- raise NotImplementedError()
1131
+
1132
+ elif operator in {"$like", "$ilike"}:
1133
+ param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}"
1134
+ if operator == "$like":
1135
+ return f"({field} LIKE :{param_name})", {f"{param_name}": filter_value}
1136
+ else: # i.e. $ilike
1137
+ return f"({field} ILIKE :{param_name})", {f"{param_name}": filter_value}
997
1138
  elif operator == "$exists":
998
1139
  if not isinstance(filter_value, bool):
999
1140
  raise ValueError(
@@ -3,14 +3,13 @@ from __future__ import annotations
3
3
  import asyncio
4
4
  from dataclasses import dataclass
5
5
  from threading import Thread
6
- from typing import TYPE_CHECKING, Any, Awaitable, Optional, TypeVar, TypedDict, Union
6
+ from typing import Any, Awaitable, Optional, TypedDict, TypeVar, Union
7
7
 
8
8
  from sqlalchemy import text
9
9
  from sqlalchemy.engine import URL
10
10
  from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
11
11
 
12
- if TYPE_CHECKING:
13
- import asyncpg # type: ignore
12
+ from .hybrid_search_config import HybridSearchConfig
14
13
 
15
14
  T = TypeVar("T")
16
15
 
@@ -120,7 +119,7 @@ class PGEngine:
120
119
  return await coro
121
120
  # Otherwise, run in the background thread
122
121
  return await asyncio.wrap_future(
123
- asyncio.run_coroutine_threadsafe(coro, self._loop)
122
+ asyncio.run_coroutine_threadsafe(coro, self._loop) # type: ignore[arg-type]
124
123
  )
125
124
 
126
125
  def _run_as_sync(self, coro: Awaitable[T]) -> T:
@@ -129,7 +128,7 @@ class PGEngine:
129
128
  raise Exception(
130
129
  "Engine was initialized without a background loop and cannot call sync methods."
131
130
  )
132
- return asyncio.run_coroutine_threadsafe(coro, self._loop).result()
131
+ return asyncio.run_coroutine_threadsafe(coro, self._loop).result() # type: ignore[arg-type]
133
132
 
134
133
  async def close(self) -> None:
135
134
  """Dispose of connection pool"""
@@ -159,6 +158,7 @@ class PGEngine:
159
158
  id_column: Union[str, Column, ColumnDict] = "langchain_id",
160
159
  overwrite_existing: bool = False,
161
160
  store_metadata: bool = True,
161
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
162
162
  ) -> None:
163
163
  """
164
164
  Create a table for saving of vectors to be used with PGVectorStore.
@@ -181,6 +181,8 @@ class PGEngine:
181
181
  overwrite_existing (bool): Whether to drop existing table. Default: False.
182
182
  store_metadata (bool): Whether to store metadata in the table.
183
183
  Default: True.
184
+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration.
185
+ Default: None.
184
186
 
185
187
  Raises:
186
188
  :class:`DuplicateTableError <asyncpg.exceptions.DuplicateTableError>`: if table already exists.
@@ -189,6 +191,7 @@ class PGEngine:
189
191
 
190
192
  schema_name = self._escape_postgres_identifier(schema_name)
191
193
  table_name = self._escape_postgres_identifier(table_name)
194
+ hybrid_search_default_column_name = content_column + "_tsv"
192
195
  content_column = self._escape_postgres_identifier(content_column)
193
196
  embedding_column = self._escape_postgres_identifier(embedding_column)
194
197
  if metadata_columns is None:
@@ -229,10 +232,22 @@ class PGEngine:
229
232
  id_data_type = id_column["data_type"]
230
233
  id_column_name = id_column["name"]
231
234
 
235
+ hybrid_search_column = "" # Default is no TSV column for hybrid search
236
+ if hybrid_search_config:
237
+ hybrid_search_column_name = (
238
+ hybrid_search_config.tsv_column or hybrid_search_default_column_name
239
+ )
240
+ hybrid_search_column_name = self._escape_postgres_identifier(
241
+ hybrid_search_column_name
242
+ )
243
+ hybrid_search_config.tsv_column = hybrid_search_column_name
244
+ hybrid_search_column = f',"{self._escape_postgres_identifier(hybrid_search_column_name)}" TSVECTOR NOT NULL'
245
+
232
246
  query = f"""CREATE TABLE "{schema_name}"."{table_name}"(
233
247
  "{id_column_name}" {id_data_type} PRIMARY KEY,
234
248
  "{content_column}" TEXT NOT NULL,
235
- "{embedding_column}" vector({vector_size}) NOT NULL"""
249
+ "{embedding_column}" vector({vector_size}) NOT NULL
250
+ {hybrid_search_column}"""
236
251
  for column in metadata_columns:
237
252
  if isinstance(column, Column):
238
253
  nullable = "NOT NULL" if not column.nullable else ""
@@ -261,6 +276,7 @@ class PGEngine:
261
276
  id_column: Union[str, Column, ColumnDict] = "langchain_id",
262
277
  overwrite_existing: bool = False,
263
278
  store_metadata: bool = True,
279
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
264
280
  ) -> None:
265
281
  """
266
282
  Create a table for saving of vectors to be used with PGVectorStore.
@@ -283,6 +299,10 @@ class PGEngine:
283
299
  overwrite_existing (bool): Whether to drop existing table. Default: False.
284
300
  store_metadata (bool): Whether to store metadata in the table.
285
301
  Default: True.
302
+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration.
303
+ Note that queries might be slow if the hybrid search column does not exist.
304
+ For best hybrid search performance, consider creating a TSV column and adding GIN index.
305
+ Default: None.
286
306
  """
287
307
  await self._run_as_async(
288
308
  self._ainit_vectorstore_table(
@@ -296,6 +316,7 @@ class PGEngine:
296
316
  id_column=id_column,
297
317
  overwrite_existing=overwrite_existing,
298
318
  store_metadata=store_metadata,
319
+ hybrid_search_config=hybrid_search_config,
299
320
  )
300
321
  )
301
322
 
@@ -312,6 +333,7 @@ class PGEngine:
312
333
  id_column: Union[str, Column, ColumnDict] = "langchain_id",
313
334
  overwrite_existing: bool = False,
314
335
  store_metadata: bool = True,
336
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
315
337
  ) -> None:
316
338
  """
317
339
  Create a table for saving of vectors to be used with PGVectorStore.
@@ -334,6 +356,10 @@ class PGEngine:
334
356
  overwrite_existing (bool): Whether to drop existing table. Default: False.
335
357
  store_metadata (bool): Whether to store metadata in the table.
336
358
  Default: True.
359
+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration.
360
+ Note that queries might be slow if the hybrid search column does not exist.
361
+ For best hybrid search performance, consider creating a TSV column and adding GIN index.
362
+ Default: None.
337
363
  """
338
364
  self._run_as_sync(
339
365
  self._ainit_vectorstore_table(
@@ -347,5 +373,38 @@ class PGEngine:
347
373
  id_column=id_column,
348
374
  overwrite_existing=overwrite_existing,
349
375
  store_metadata=store_metadata,
376
+ hybrid_search_config=hybrid_search_config,
350
377
  )
351
378
  )
379
+
380
+ async def _adrop_table(
381
+ self,
382
+ table_name: str,
383
+ *,
384
+ schema_name: str = "public",
385
+ ) -> None:
386
+ """Drop the vector store table"""
387
+ query = f'DROP TABLE IF EXISTS "{schema_name}"."{table_name}";'
388
+ async with self._pool.connect() as conn:
389
+ await conn.execute(text(query))
390
+ await conn.commit()
391
+
392
+ async def adrop_table(
393
+ self,
394
+ table_name: str,
395
+ *,
396
+ schema_name: str = "public",
397
+ ) -> None:
398
+ await self._run_as_async(
399
+ self._adrop_table(table_name=table_name, schema_name=schema_name)
400
+ )
401
+
402
+ def drop_table(
403
+ self,
404
+ table_name: str,
405
+ *,
406
+ schema_name: str = "public",
407
+ ) -> None:
408
+ self._run_as_sync(
409
+ self._adrop_table(table_name=table_name, schema_name=schema_name)
410
+ )
@@ -0,0 +1,212 @@
1
+ from abc import ABC
2
+ from dataclasses import dataclass, field
3
+ from typing import Any, Callable, Optional, Sequence
4
+
5
+ from sqlalchemy import RowMapping
6
+
7
+ from .indexes import DistanceStrategy
8
+
9
+
10
+ def _normalize_scores(
11
+ results: Sequence[dict[str, Any]], is_distance_metric: bool
12
+ ) -> Sequence[dict[str, Any]]:
13
+ """Normalizes scores to a 0-1 scale, where 1 is best."""
14
+ if not results:
15
+ return []
16
+
17
+ # Get scores from the last column of each result
18
+ scores = [float(list(item.values())[-1]) for item in results]
19
+ min_score, max_score = min(scores), max(scores)
20
+ score_range = max_score - min_score
21
+
22
+ if score_range == 0:
23
+ # All documents are of the highest quality (1.0)
24
+ for item in results:
25
+ item["normalized_score"] = 1.0
26
+ return list(results)
27
+
28
+ for item in results:
29
+ # Access the score again from the last column for calculation
30
+ score = list(item.values())[-1]
31
+ normalized = (score - min_score) / score_range
32
+ if is_distance_metric:
33
+ # For distance, a lower score is better, so we invert the result.
34
+ item["normalized_score"] = 1.0 - normalized
35
+ else:
36
+ # For similarity (like keyword search), a higher score is better.
37
+ item["normalized_score"] = normalized
38
+
39
+ return list(results)
40
+
41
+
42
+ def weighted_sum_ranking(
43
+ primary_search_results: Sequence[RowMapping],
44
+ secondary_search_results: Sequence[RowMapping],
45
+ primary_results_weight: float = 0.5,
46
+ secondary_results_weight: float = 0.5,
47
+ fetch_top_k: int = 4,
48
+ **kwargs: Any,
49
+ ) -> Sequence[dict[str, Any]]:
50
+ """
51
+ Ranks documents using a weighted sum of scores from two sources.
52
+
53
+ Args:
54
+ primary_search_results: A list of (document, distance) tuples from
55
+ the primary search.
56
+ secondary_search_results: A list of (document, distance) tuples from
57
+ the secondary search.
58
+ primary_results_weight: The weight for the primary source's scores.
59
+ Defaults to 0.5.
60
+ secondary_results_weight: The weight for the secondary source's scores.
61
+ Defaults to 0.5.
62
+ fetch_top_k: The number of documents to fetch after merging the results.
63
+ Defaults to 4.
64
+
65
+ Returns:
66
+ A list of (document, distance) tuples, sorted by weighted_score in
67
+ descending order.
68
+ """
69
+
70
+ distance_strategy = kwargs.get(
71
+ "distance_strategy", DistanceStrategy.COSINE_DISTANCE
72
+ )
73
+ is_primary_distance = distance_strategy != DistanceStrategy.INNER_PRODUCT
74
+
75
+ # Normalize both sets of results onto a 0-1 scale
76
+ normalized_primary = _normalize_scores(
77
+ [dict(row) for row in primary_search_results],
78
+ is_distance_metric=is_primary_distance,
79
+ )
80
+
81
+ # Keyword search relevance is a similarity score (higher is better)
82
+ normalized_secondary = _normalize_scores(
83
+ [dict(row) for row in secondary_search_results], is_distance_metric=False
84
+ )
85
+
86
+ # stores computed metric with provided distance metric and weights
87
+ weighted_scores: dict[str, dict[str, Any]] = {}
88
+
89
+ # Process primary results
90
+ for item in normalized_primary:
91
+ doc_id = str(list(item.values())[0])
92
+ # Set the 'distance' key with the weighted primary score
93
+ item["distance"] = item["normalized_score"] * primary_results_weight
94
+ weighted_scores[doc_id] = item
95
+
96
+ # Process secondary results
97
+ for item in normalized_secondary:
98
+ doc_id = str(list(item.values())[0])
99
+ secondary_weighted_score = item["normalized_score"] * secondary_results_weight
100
+
101
+ if doc_id in weighted_scores:
102
+ # Add to the existing 'distance' score
103
+ weighted_scores[doc_id]["distance"] += secondary_weighted_score
104
+ else:
105
+ # Set the 'distance' key for the new item
106
+ item["distance"] = secondary_weighted_score
107
+ weighted_scores[doc_id] = item
108
+
109
+ ranked_results = sorted(
110
+ weighted_scores.values(), key=lambda item: item["distance"], reverse=True
111
+ )
112
+
113
+ for result in ranked_results:
114
+ result.pop("normalized_score", None)
115
+
116
+ return ranked_results[:fetch_top_k]
117
+
118
+
119
+ def reciprocal_rank_fusion(
120
+ primary_search_results: Sequence[RowMapping],
121
+ secondary_search_results: Sequence[RowMapping],
122
+ rrf_k: float = 60,
123
+ fetch_top_k: int = 4,
124
+ **kwargs: Any,
125
+ ) -> Sequence[dict[str, Any]]:
126
+ """
127
+ Ranks documents using Reciprocal Rank Fusion (RRF) of scores from two sources.
128
+
129
+ Args:
130
+ primary_search_results: A list of (document, distance) tuples from
131
+ the primary search.
132
+ secondary_search_results: A list of (document, distance) tuples from
133
+ the secondary search.
134
+ rrf_k: The RRF parameter k.
135
+ Defaults to 60.
136
+ fetch_top_k: The number of documents to fetch after merging the results.
137
+ Defaults to 4.
138
+
139
+ Returns:
140
+ A list of (document_id, rrf_score) tuples, sorted by rrf_score
141
+ in descending order.
142
+ """
143
+ distance_strategy = kwargs.get(
144
+ "distance_strategy", DistanceStrategy.COSINE_DISTANCE
145
+ )
146
+ rrf_scores: dict[str, dict[str, Any]] = {}
147
+
148
+ # Process results from primary source
149
+ # Determine sorting order based on the vector distance strategy.
150
+ # For COSINE & EUCLIDEAN(distance), we sort ascending (reverse=False).
151
+ # For INNER_PRODUCT (similarity), we sort descending (reverse=True).
152
+ is_similarity_metric = distance_strategy == DistanceStrategy.INNER_PRODUCT
153
+ sorted_primary = sorted(
154
+ primary_search_results,
155
+ key=lambda item: item["distance"],
156
+ reverse=is_similarity_metric,
157
+ )
158
+
159
+ for rank, row in enumerate(sorted_primary):
160
+ doc_id = str(list(row.values())[0])
161
+ if doc_id not in rrf_scores:
162
+ rrf_scores[doc_id] = dict(row)
163
+ rrf_scores[doc_id]["distance"] = 0.0
164
+ # Add the "normalized" rank score
165
+ rrf_scores[doc_id]["distance"] += 1.0 / (rank + rrf_k)
166
+
167
+ # Process results from secondary source
168
+ # Keyword search relevance is always "higher is better" -> sort descending
169
+ sorted_secondary = sorted(
170
+ secondary_search_results,
171
+ key=lambda item: item["distance"],
172
+ reverse=True,
173
+ )
174
+
175
+ for rank, row in enumerate(sorted_secondary):
176
+ doc_id = str(list(row.values())[0])
177
+ if doc_id not in rrf_scores:
178
+ rrf_scores[doc_id] = dict(row)
179
+ rrf_scores[doc_id]["distance"] = 0.0
180
+ # Add the rank score from this list to the existing score
181
+ rrf_scores[doc_id]["distance"] += 1.0 / (rank + rrf_k)
182
+
183
+ # Sort the results by rrf score in descending order
184
+ # Sort the results by weighted score in descending order
185
+ ranked_results = sorted(
186
+ rrf_scores.values(), key=lambda item: item["distance"], reverse=True
187
+ )
188
+ # Extract only the RowMapping for the top results
189
+ return ranked_results[:fetch_top_k]
190
+
191
+
192
+ @dataclass
193
+ class HybridSearchConfig(ABC):
194
+ """
195
+ AlloyDB Vector Store Hybrid Search Config.
196
+
197
+ Queries might be slow if the hybrid search column does not exist.
198
+ For best hybrid search performance, consider creating a TSV column
199
+ and adding GIN index.
200
+ """
201
+
202
+ tsv_column: Optional[str] = ""
203
+ tsv_lang: Optional[str] = "pg_catalog.english"
204
+ fts_query: Optional[str] = ""
205
+ fusion_function: Callable[
206
+ [Sequence[RowMapping], Sequence[RowMapping], Any], Sequence[Any]
207
+ ] = weighted_sum_ranking # Updated default
208
+ fusion_function_parameters: dict[str, Any] = field(default_factory=dict)
209
+ primary_top_k: int = 4
210
+ secondary_top_k: int = 4
211
+ index_name: str = "langchain_tsv_index"
212
+ index_type: str = "GIN"
@@ -9,6 +9,7 @@ from langchain_core.vectorstores import VectorStore
9
9
 
10
10
  from .async_vectorstore import AsyncPGVectorStore
11
11
  from .engine import PGEngine
12
+ from .hybrid_search_config import HybridSearchConfig
12
13
  from .indexes import (
13
14
  DEFAULT_DISTANCE_STRATEGY,
14
15
  BaseIndex,
@@ -59,6 +60,7 @@ class PGVectorStore(VectorStore):
59
60
  fetch_k: int = 20,
60
61
  lambda_mult: float = 0.5,
61
62
  index_query_options: Optional[QueryOptions] = None,
63
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
62
64
  ) -> PGVectorStore:
63
65
  """Create an PGVectorStore instance.
64
66
 
@@ -78,6 +80,7 @@ class PGVectorStore(VectorStore):
78
80
  fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
79
81
  lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
80
82
  index_query_options (QueryOptions): Index query option.
83
+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
81
84
 
82
85
  Returns:
83
86
  PGVectorStore
@@ -98,6 +101,7 @@ class PGVectorStore(VectorStore):
98
101
  fetch_k=fetch_k,
99
102
  lambda_mult=lambda_mult,
100
103
  index_query_options=index_query_options,
104
+ hybrid_search_config=hybrid_search_config,
101
105
  )
102
106
  vs = await engine._run_as_async(coro)
103
107
  return cls(cls.__create_key, engine, vs)
@@ -120,6 +124,7 @@ class PGVectorStore(VectorStore):
120
124
  fetch_k: int = 20,
121
125
  lambda_mult: float = 0.5,
122
126
  index_query_options: Optional[QueryOptions] = None,
127
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
123
128
  ) -> PGVectorStore:
124
129
  """Create an PGVectorStore instance.
125
130
 
@@ -140,6 +145,7 @@ class PGVectorStore(VectorStore):
140
145
  fetch_k (int, optional): Number of Documents to fetch to pass to MMR algorithm. Defaults to 20.
141
146
  lambda_mult (float, optional): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
142
147
  index_query_options (Optional[QueryOptions], optional): Index query option. Defaults to None.
148
+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
143
149
 
144
150
  Returns:
145
151
  PGVectorStore
@@ -160,6 +166,7 @@ class PGVectorStore(VectorStore):
160
166
  fetch_k=fetch_k,
161
167
  lambda_mult=lambda_mult,
162
168
  index_query_options=index_query_options,
169
+ hybrid_search_config=hybrid_search_config,
163
170
  )
164
171
  vs = engine._run_as_sync(coro)
165
172
  return cls(cls.__create_key, engine, vs)
@@ -301,6 +308,7 @@ class PGVectorStore(VectorStore):
301
308
  fetch_k: int = 20,
302
309
  lambda_mult: float = 0.5,
303
310
  index_query_options: Optional[QueryOptions] = None,
311
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
304
312
  **kwargs: Any,
305
313
  ) -> PGVectorStore:
306
314
  """Create an PGVectorStore instance from texts.
@@ -324,6 +332,7 @@ class PGVectorStore(VectorStore):
324
332
  fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
325
333
  lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
326
334
  index_query_options (QueryOptions): Index query option.
335
+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
327
336
 
328
337
  Raises:
329
338
  :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
@@ -347,6 +356,7 @@ class PGVectorStore(VectorStore):
347
356
  fetch_k=fetch_k,
348
357
  lambda_mult=lambda_mult,
349
358
  index_query_options=index_query_options,
359
+ hybrid_search_config=hybrid_search_config,
350
360
  )
351
361
  await vs.aadd_texts(texts, metadatas=metadatas, ids=ids)
352
362
  return vs
@@ -371,6 +381,7 @@ class PGVectorStore(VectorStore):
371
381
  fetch_k: int = 20,
372
382
  lambda_mult: float = 0.5,
373
383
  index_query_options: Optional[QueryOptions] = None,
384
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
374
385
  **kwargs: Any,
375
386
  ) -> PGVectorStore:
376
387
  """Create an PGVectorStore instance from documents.
@@ -393,6 +404,7 @@ class PGVectorStore(VectorStore):
393
404
  fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
394
405
  lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
395
406
  index_query_options (QueryOptions): Index query option.
407
+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
396
408
 
397
409
  Raises:
398
410
  :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
@@ -417,6 +429,7 @@ class PGVectorStore(VectorStore):
417
429
  fetch_k=fetch_k,
418
430
  lambda_mult=lambda_mult,
419
431
  index_query_options=index_query_options,
432
+ hybrid_search_config=hybrid_search_config,
420
433
  )
421
434
  await vs.aadd_documents(documents, ids=ids)
422
435
  return vs
@@ -442,6 +455,7 @@ class PGVectorStore(VectorStore):
442
455
  fetch_k: int = 20,
443
456
  lambda_mult: float = 0.5,
444
457
  index_query_options: Optional[QueryOptions] = None,
458
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
445
459
  **kwargs: Any,
446
460
  ) -> PGVectorStore:
447
461
  """Create an PGVectorStore instance from texts.
@@ -465,6 +479,7 @@ class PGVectorStore(VectorStore):
465
479
  fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
466
480
  lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
467
481
  index_query_options (QueryOptions): Index query option.
482
+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
468
483
 
469
484
  Raises:
470
485
  :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
@@ -488,6 +503,7 @@ class PGVectorStore(VectorStore):
488
503
  fetch_k=fetch_k,
489
504
  lambda_mult=lambda_mult,
490
505
  index_query_options=index_query_options,
506
+ hybrid_search_config=hybrid_search_config,
491
507
  **kwargs,
492
508
  )
493
509
  vs.add_texts(texts, metadatas=metadatas, ids=ids)
@@ -513,6 +529,7 @@ class PGVectorStore(VectorStore):
513
529
  fetch_k: int = 20,
514
530
  lambda_mult: float = 0.5,
515
531
  index_query_options: Optional[QueryOptions] = None,
532
+ hybrid_search_config: Optional[HybridSearchConfig] = None,
516
533
  **kwargs: Any,
517
534
  ) -> PGVectorStore:
518
535
  """Create an PGVectorStore instance from documents.
@@ -535,6 +552,7 @@ class PGVectorStore(VectorStore):
535
552
  fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
536
553
  lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5.
537
554
  index_query_options (QueryOptions): Index query option.
555
+ hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None.
538
556
 
539
557
  Raises:
540
558
  :class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.
@@ -558,6 +576,7 @@ class PGVectorStore(VectorStore):
558
576
  fetch_k=fetch_k,
559
577
  lambda_mult=lambda_mult,
560
578
  index_query_options=index_query_options,
579
+ hybrid_search_config=hybrid_search_config,
561
580
  **kwargs,
562
581
  )
563
582
  vs.add_documents(documents, ids=ids)
@@ -770,6 +789,24 @@ class PGVectorStore(VectorStore):
770
789
  )
771
790
  )
772
791
 
792
+ async def aapply_hybrid_search_index(
793
+ self,
794
+ concurrently: bool = False,
795
+ ) -> None:
796
+ """Creates a TSV index in the vector store table if possible."""
797
+ return await self._engine._run_as_async(
798
+ self.__vs.aapply_hybrid_search_index(concurrently=concurrently)
799
+ )
800
+
801
+ def apply_hybrid_search_index(
802
+ self,
803
+ concurrently: bool = False,
804
+ ) -> None:
805
+ """Creates a TSV index in the vector store table if possible."""
806
+ return self._engine._run_as_sync(
807
+ self.__vs.aapply_hybrid_search_index(concurrently=concurrently)
808
+ )
809
+
773
810
  async def aapply_vector_index(
774
811
  self,
775
812
  index: BaseIndex,
@@ -5,6 +5,7 @@ import contextlib
5
5
  import enum
6
6
  import logging
7
7
  import uuid
8
+ import warnings
8
9
  from typing import (
9
10
  Any,
10
11
  AsyncGenerator,
@@ -19,7 +20,6 @@ from typing import (
19
20
  Type,
20
21
  Union,
21
22
  )
22
- import warnings
23
23
  from typing import (
24
24
  cast as typing_cast,
25
25
  )
@@ -429,13 +429,6 @@ class PGVector(VectorStore):
429
429
  self._async_engine: Optional[AsyncEngine] = None
430
430
  self._async_init = False
431
431
 
432
- warnings.warn(
433
- "PGVector is being deprecated and will be removed in the future. "
434
- "Please migrate to PGVectorStore. "
435
- "Refer to the migration guide at [https://github.com/langchain-ai/langchain-postgres/blob/main/examples/migrate_pgvector_to_pgvectorstore.md] for details.",
436
- PendingDeprecationWarning,
437
- )
438
-
439
432
  if isinstance(connection, str):
440
433
  if async_mode:
441
434
  self._async_engine = create_async_engine(
@@ -1,25 +1,17 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: langchain-postgres
3
- Version: 0.0.14rc1
3
+ Version: 0.0.16
4
4
  Summary: An integration package connecting Postgres and LangChain
5
- Home-page: https://github.com/langchain-ai/langchain-postgres
6
- License: MIT
7
- Requires-Python: >=3.9,<4.0
8
- Classifier: License :: OSI Approved :: MIT License
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: Programming Language :: Python :: 3.9
11
- Classifier: Programming Language :: Python :: 3.10
12
- Classifier: Programming Language :: Python :: 3.11
13
- Classifier: Programming Language :: Python :: 3.12
14
- Requires-Dist: asyncpg (>=0.30.0,<0.31.0)
15
- Requires-Dist: langchain-core (>=0.2.13,<0.4.0)
16
- Requires-Dist: numpy (>=1.21,<2.0)
17
- Requires-Dist: pgvector (>=0.2.5,<0.4)
18
- Requires-Dist: psycopg (>=3,<4)
19
- Requires-Dist: psycopg-pool (>=3.2.1,<4.0.0)
20
- Requires-Dist: sqlalchemy (>=2,<3)
21
- Project-URL: Repository, https://github.com/langchain-ai/langchain-postgres
22
- Project-URL: Source Code, https://github.com/langchain-ai/langchain-postgres/tree/master/langchain_postgres
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.9
8
+ Requires-Dist: asyncpg>=0.30.0
9
+ Requires-Dist: langchain-core<2.0,>=0.2.13
10
+ Requires-Dist: numpy<3,>=1.21
11
+ Requires-Dist: pgvector<0.4,>=0.2.5
12
+ Requires-Dist: psycopg-pool<4,>=3.2.1
13
+ Requires-Dist: psycopg[binary]<4,>=3
14
+ Requires-Dist: sqlalchemy[asyncio]<3,>=2
23
15
  Description-Content-Type: text/markdown
24
16
 
25
17
  # langchain-postgres
@@ -39,7 +31,7 @@ Feel free to use the abstraction as provided or else modify them / extend them a
39
31
 
40
32
  ## Requirements
41
33
 
42
- The package supports the [asyncpg](https://github.com/MagicStack/asyncpg) and [psycogp3](https://www.psycopg.org/psycopg3/) drivers.
34
+ The package supports the [asyncpg](https://github.com/MagicStack/asyncpg) and [psycopg3](https://www.psycopg.org/psycopg3/) drivers.
43
35
 
44
36
  ## Installation
45
37
 
@@ -47,17 +39,19 @@ The package supports the [asyncpg](https://github.com/MagicStack/asyncpg) and [p
47
39
  pip install -U langchain-postgres
48
40
  ```
49
41
 
50
- ## Usage
51
-
52
- ### Vectorstore
42
+ ## Vectorstore
53
43
 
54
44
  > [!WARNING]
55
45
  > In v0.0.14+, `PGVector` is deprecated. Please migrate to `PGVectorStore`
56
- > Version 0.0.14+ has not been released yet, but you can test version of the vectorstore on the main branch. Until official release do not use in production.
57
46
  > for improved performance and manageability.
58
- > See the [migration guide](https://github.com/langchain-ai/langchain-postgres/blob/main/examples/migrate_pgvector_to_pgvectorstore.md) for details on how to migrate from `PGVector` to `PGVectorStore`.
47
+ > See the [migration guide](https://github.com/langchain-ai/langchain-postgres/blob/main/examples/migrate_pgvector_to_pgvectorstore.ipynb) for details on how to migrate from `PGVector` to `PGVectorStore`.
48
+
49
+ ### Documentation
59
50
 
60
- For a detailed example on `PGVectorStore` see [here](https://github.com/langchain-ai/langchain-postgres/blob/main/examples/pg_vectorstore.ipynb).
51
+ * [Quickstart](https://github.com/langchain-ai/langchain-postgres/blob/main/examples/pg_vectorstore.ipynb)
52
+ * [How-to](https://github.com/langchain-ai/langchain-postgres/blob/main/examples/pg_vectorstore_how_to.ipynb)
53
+
54
+ ### Example
61
55
 
62
56
  ```python
63
57
  from langchain_core.documents import Document
@@ -101,7 +95,25 @@ print(docs)
101
95
  > [!TIP]
102
96
  > All synchronous functions have corresponding asynchronous functions
103
97
 
104
- ### ChatMessageHistory
98
+ ### Hybrid Search with PGVectorStore
99
+
100
+ With PGVectorStore you can use hybrid search for more comprehensive and relevant search results.
101
+
102
+ ```python
103
+ vs = PGVectorStore.create_sync(
104
+ engine=engine,
105
+ table_name=TABLE_NAME,
106
+ embedding_service=embedding,
107
+ hybrid_search_config=HybridSearchConfig(
108
+ fusion_function=reciprocal_rank_fusion
109
+ ),
110
+ )
111
+ hybrid_docs = vector_store.similarity_search("products", k=5)
112
+ ```
113
+
114
+ For a detailed guide on how to use hybrid search, see the [documentation](/examples/pg_vectorstore_how_to.ipynb#hybrid-search-with-pgvectorstore ).
115
+
116
+ ## ChatMessageHistory
105
117
 
106
118
  The chat message history abstraction helps to persist chat message history
107
119
  in a postgres table.
@@ -167,4 +179,3 @@ Using the Google Cloud integrations provides the following benefits:
167
179
  | Google AlloyDB | ✓ | ✓ | ✓ | ✓ | ✗ |
168
180
  | Google Cloud SQL Postgres| ✓ | ✓ | ✓ | ✓ | ✗ |
169
181
 
170
-
@@ -0,0 +1,17 @@
1
+ langchain_postgres/__init__.py,sha256=-ovoLrNuzL-kMUV-RrIxoEI8wmgOAg4vfE8xevYSA3Q,702
2
+ langchain_postgres/_utils.py,sha256=N_OBzYFCb_bsHOnZ-YRg6izhmuudorQhupgeG-rSKUc,2848
3
+ langchain_postgres/chat_message_histories.py,sha256=Hq_0nGX1BoBxq5jg0LwfQg7iXm6B4izYVr6iLkMGoEY,14214
4
+ langchain_postgres/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ langchain_postgres/translator.py,sha256=6cTS2RJUodMUdsurJM-f-vgPXl6Ad6bfMo8ECuh5Jr4,1524
6
+ langchain_postgres/vectorstores.py,sha256=vzRbPwU1Rn-pOsnTsz1u72cSYD7H8jMlW4N7A58QIt4,83826
7
+ langchain_postgres/utils/pgvector_migrator.py,sha256=OxW2_FxaomZw5kqPAz-3lmZ5t2hSXU4ZW3xK6O62MH4,11771
8
+ langchain_postgres/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ langchain_postgres/v2/async_vectorstore.py,sha256=MuRjlRcANOnxrXRGcyGEzIZYr4v75tk8jbMZZCexSAc,58711
10
+ langchain_postgres/v2/engine.py,sha256=UC3upYnqmgKBw4E6t62CbjUEdVO67t1j0rCbdFmoQnI,16902
11
+ langchain_postgres/v2/hybrid_search_config.py,sha256=dhBeedqpVXv2VP2_RLs_jNHLLLrukJ-UXytxRD3zVts,7658
12
+ langchain_postgres/v2/indexes.py,sha256=aLCFGYiIbLBUr88drMLD6l41MPRI7lv0ALMVRWfqdq4,4888
13
+ langchain_postgres/v2/vectorstores.py,sha256=Iq5z3KU0Ne_djMLlhJNL43zprii0O1JdUN2uEuvvKNI,39213
14
+ langchain_postgres-0.0.16.dist-info/METADATA,sha256=fLsfXjrnlW412RDvPW5nv4uFJqaujUQkIBujCCsERWc,7143
15
+ langchain_postgres-0.0.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ langchain_postgres-0.0.16.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
17
+ langchain_postgres-0.0.16.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.8.1
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,16 +0,0 @@
1
- langchain_postgres/__init__.py,sha256=UxIanyWPeUVtWFKCT-sWGXbWUO5I76akABXhXolY9bM,702
2
- langchain_postgres/_utils.py,sha256=N_OBzYFCb_bsHOnZ-YRg6izhmuudorQhupgeG-rSKUc,2848
3
- langchain_postgres/chat_message_histories.py,sha256=Et5AgXSRBCghLC5sn6EEUDd1xupaiPv-A5IyNBjpaTc,14213
4
- langchain_postgres/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- langchain_postgres/translator.py,sha256=6cTS2RJUodMUdsurJM-f-vgPXl6Ad6bfMo8ECuh5Jr4,1524
6
- langchain_postgres/utils/pgvector_migrator.py,sha256=OIclFsCKWQAtJ1JyFQsVQoWZSrEJg67GVnY84aBlucE,11776
7
- langchain_postgres/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- langchain_postgres/v2/async_vectorstore.py,sha256=FMV-IwH7cJ2VuxnrMCy0X0WWG65oHNXfKAwsdf0Tp20,51786
9
- langchain_postgres/v2/engine.py,sha256=8XD6ta2HzuYtHnxhvY-I_vMYqZd33yj2y9ZqQFbEz1g,14266
10
- langchain_postgres/v2/indexes.py,sha256=aLCFGYiIbLBUr88drMLD6l41MPRI7lv0ALMVRWfqdq4,4888
11
- langchain_postgres/v2/vectorstores.py,sha256=R17q1KIEZPBwEHgE6JYiRSiN8rZXzVPCmBoJobiyjM8,37198
12
- langchain_postgres/vectorstores.py,sha256=Xjyqxa_nL7Xvq6dwqWUu4VdNZ5z6ypjFoSU9wj6Ad5c,84195
13
- langchain_postgres-0.0.14rc1.dist-info/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
14
- langchain_postgres-0.0.14rc1.dist-info/METADATA,sha256=ZOG0qTuKUt4_uz2VUAy4Cj4A-DtsfouNBB6ITk7bihk,7179
15
- langchain_postgres-0.0.14rc1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
16
- langchain_postgres-0.0.14rc1.dist-info/RECORD,,