cmem-plugin-pgvector 0.6.2__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: cmem-plugin-pgvector
3
- Version: 0.6.2
3
+ Version: 0.7.0
4
4
  Summary: Store and search for embedding vectors in a Postgres vector store.
5
5
  License: Apache-2.0
6
6
  Keywords: eccenca Corporate Memory,plugin
@@ -16,10 +16,10 @@ Classifier: Programming Language :: Python :: 3
16
16
  Classifier: Programming Language :: Python :: 3.11
17
17
  Classifier: Programming Language :: Python :: 3.12
18
18
  Classifier: Programming Language :: Python :: 3.13
19
- Requires-Dist: cmem-plugin-base (>=4.8.0,<5.0.0)
20
- Requires-Dist: langchain-community (>=0.3.2,<0.4.0)
21
- Requires-Dist: langchain-postgres (>=0.0.13,<0.0.14)
22
- Requires-Dist: psycopg[binary] (>=3.2.3,<4.0.0)
19
+ Requires-Dist: cmem-plugin-base (>=4.12.1,<5.0.0)
20
+ Requires-Dist: langchain-core (>=0.3.71,<0.4.0)
21
+ Requires-Dist: langchain-postgres (>=0.0.15,<0.0.16)
22
+ Requires-Dist: psycopg[binary] (>=3.2.9,<4.0.0)
23
23
  Description-Content-Type: text/markdown
24
24
 
25
25
  # cmem-plugin-pgvector
@@ -28,12 +28,10 @@ Store and search for embedding vectors in a Postgres vector store.
28
28
 
29
29
  [![eccenca Corporate Memory][cmem-shield]][cmem-link]
30
30
 
31
- This is a plugin for [eccenca](https://eccenca.com) [Corporate Memory](https://documentation.eccenca.com).
32
-
33
- You can install it with the [cmemc](https://eccenca.com/go/cmemc) command line client like this:
31
+ This is a plugin for [eccenca](https://eccenca.com) [Corporate Memory](https://documentation.eccenca.com). You can install it with the [cmemc](https://eccenca.com/go/cmemc) command line client like this:
34
32
 
35
33
  ``` bash
36
- cmemc admin workspace python install cmem-plugin-llm
34
+ cmemc admin workspace python install cmem-plugin-pgvector
37
35
  ```
38
36
 
39
37
  [![pypi version](https://img.shields.io/pypi/v/cmem-plugin-pgvector)](https://pypi.org/project/cmem-plugin-pgvector) [![license](https://img.shields.io/pypi/l/cmem-plugin-pgvector)](https://pypi.org/project/cmem-plugin-pgvector)
@@ -4,12 +4,10 @@ Store and search for embedding vectors in a Postgres vector store.
4
4
 
5
5
  [![eccenca Corporate Memory][cmem-shield]][cmem-link]
6
6
 
7
- This is a plugin for [eccenca](https://eccenca.com) [Corporate Memory](https://documentation.eccenca.com).
8
-
9
- You can install it with the [cmemc](https://eccenca.com/go/cmemc) command line client like this:
7
+ This is a plugin for [eccenca](https://eccenca.com) [Corporate Memory](https://documentation.eccenca.com). You can install it with the [cmemc](https://eccenca.com/go/cmemc) command line client like this:
10
8
 
11
9
  ``` bash
12
- cmemc admin workspace python install cmem-plugin-llm
10
+ cmemc admin workspace python install cmem-plugin-pgvector
13
11
  ```
14
12
 
15
13
  [![pypi version](https://img.shields.io/pypi/v/cmem-plugin-pgvector)](https://pypi.org/project/cmem-plugin-pgvector) [![license](https://img.shields.io/pypi/l/cmem-plugin-pgvector)](https://pypi.org/project/cmem-plugin-pgvector)
@@ -13,13 +13,27 @@ from cmem_plugin_base.dataintegration.ports import (
13
13
  FixedNumberOfInputs,
14
14
  FixedSchemaPort,
15
15
  )
16
- from cmem_plugin_base.dataintegration.types import IntParameterType
16
+ from cmem_plugin_base.dataintegration.types import EnumParameterType, IntParameterType
17
17
  from langchain_core.documents import Document
18
+ from langchain_core.embeddings import Embeddings
18
19
  from langchain_postgres import PGVector
20
+ from langchain_postgres.vectorstores import DistanceStrategy
19
21
 
20
22
  from cmem_plugin_pgvector.commons import DatabaseParams
21
23
 
22
24
 
25
+ class DummyEmbeddings(Embeddings):
26
+ """Dummy embedding model"""
27
+
28
+ def embed_query(self, text: str) -> list[float]:
29
+ """Embed a query"""
30
+ raise NotImplementedError
31
+
32
+ def embed_documents(self, texts: list[str]) -> list[list[float]]:
33
+ """Embed a list of documents"""
34
+ raise NotImplementedError
35
+
36
+
23
37
  @Plugin(
24
38
  label="Search Vector Embeddings",
25
39
  description="Search for top-k metadata stored in Postgres Vector Store (PGVector).",
@@ -73,6 +87,14 @@ The results in this output are structured like this:
73
87
  default_value=10,
74
88
  param_type=IntParameterType(),
75
89
  ),
90
+ PluginParameter(
91
+ name="distance_strategy",
92
+ label="Distance Strategy",
93
+ description="The distance strategy to use. (default: COSINE)",
94
+ param_type=EnumParameterType(enum_type=DistanceStrategy),
95
+ default_value=DistanceStrategy.COSINE,
96
+ advanced=True,
97
+ ),
76
98
  ],
77
99
  )
78
100
  class PGVectorSearchPlugin(WorkflowPlugin):
@@ -92,6 +114,7 @@ class PGVectorSearchPlugin(WorkflowPlugin):
92
114
  report: ExecutionReport
93
115
  search_result_path: str
94
116
  top_k: int
117
+ distance_strategy: DistanceStrategy
95
118
 
96
119
  def __init__( # noqa: PLR0913
97
120
  self,
@@ -104,6 +127,7 @@ class PGVectorSearchPlugin(WorkflowPlugin):
104
127
  search_result_path: str = "_search_result",
105
128
  embedding_query_path: str = "_embedding",
106
129
  top_k: int = 10,
130
+ distance_strategy: DistanceStrategy = DistanceStrategy.COSINE,
107
131
  ) -> None:
108
132
  self.collection_name = collection_name
109
133
  self.user = user
@@ -113,6 +137,7 @@ class PGVectorSearchPlugin(WorkflowPlugin):
113
137
  self.embedding_query_path = embedding_query_path
114
138
  self.search_result_path = search_result_path
115
139
  self.top_k = top_k
140
+ self.distance_strategy = distance_strategy
116
141
 
117
142
  str_password = self.password = password if isinstance(password, str) else password.decrypt()
118
143
  self.connection_string = (
@@ -126,9 +151,10 @@ class PGVectorSearchPlugin(WorkflowPlugin):
126
151
  self.db = PGVector(
127
152
  collection_name=self.collection_name,
128
153
  connection=self.connection_string,
129
- embeddings=None, # type: ignore # noqa: PGH003
154
+ embeddings=DummyEmbeddings(),
130
155
  use_jsonb=True,
131
156
  pre_delete_collection=False,
157
+ distance_strategy=distance_strategy,
132
158
  )
133
159
  self._setup_ports()
134
160
 
@@ -170,15 +196,17 @@ class PGVectorSearchPlugin(WorkflowPlugin):
170
196
  pass
171
197
  return False
172
198
 
173
- def _docs_to_json(self, docs: list[tuple[Document, float]]) -> list:
199
+ @staticmethod
200
+ def _docs_to_json(docs: list[tuple[Document, float]]) -> list:
174
201
  """Convert a list of Documents to a list of metadata"""
175
202
  doc_list: list = []
176
203
  for doc_tuple in docs:
177
- json_entity = {}
178
- json_entity["id"] = doc_tuple[0].id
179
- json_entity["metadata"] = str(doc_tuple[0].metadata)
180
- json_entity["_embedding_source"] = doc_tuple[0].page_content
181
- json_entity["distance"] = str(doc_tuple[1])
204
+ json_entity = {
205
+ "id": doc_tuple[0].id,
206
+ "metadata": str(doc_tuple[0].metadata),
207
+ "_embedding_source": doc_tuple[0].page_content,
208
+ "distance": str(doc_tuple[1]),
209
+ }
182
210
  doc_list.append(json_entity)
183
211
  return doc_list
184
212
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cmem-plugin-pgvector"
3
- version = "0.6.2"
3
+ version = "0.7.0"
4
4
  license = "Apache-2.0"
5
5
  description = "Store and search for embedding vectors in a Postgres vector store."
6
6
  authors = ["eccenca GmbH <cmempy-developer@eccenca.com>"]
@@ -16,27 +16,28 @@ keywords = [
16
16
 
17
17
  [tool.poetry.dependencies]# if you need to change python version here, change it also in .python-version
18
18
  python = "^3.11"
19
- langchain-community = "^0.3.2"
20
- langchain-postgres = "^0.0.13"
21
- psycopg = {extras = ["binary"], version = "^3.2.3"}
19
+ langchain-postgres = "^0.0.15"
20
+ psycopg = {extras = ["binary"], version = "^3.2.9"}
21
+ langchain-core = "^0.3.71"
22
22
 
23
23
  [tool.poetry.dependencies.cmem-plugin-base]
24
- version = "^4.8.0"
24
+ version = "^4.12.1"
25
25
  allow-prereleases = false
26
26
 
27
27
  [tool.poetry.group.dev.dependencies.cmem-cmemc]
28
- version = "^24.3.0"
28
+ version = ">=24.2.0"
29
29
 
30
30
  [tool.poetry.group.dev.dependencies]
31
- genbadge = {extras = ["coverage"], version = "^1.1.1"}
32
- mypy = "^1.14.1"
33
- pip = "^25.0"
34
- pytest = "^8.3.4"
35
- pytest-cov = "^6.0.0"
31
+ deptry = "^0.23.0"
32
+ genbadge = {extras = ["coverage"], version = "^1.1.2"}
33
+ mypy = "^1.16.1"
34
+ pip = "^25.1.1"
35
+ pytest = "^8.4.1"
36
+ pytest-cov = "^6.2.1"
36
37
  pytest-dotenv = "^0.5.2"
37
38
  pytest-html = "^4.1.1"
38
39
  pytest-memray = { version = "^1.7.0", markers = "platform_system != 'Windows'" }
39
- ruff = "^0.9.4"
40
+ ruff = "^0.12.0"
40
41
  safety = "^1.10.3"
41
42
  aiohttp = "^3.10.11"
42
43
 
@@ -96,4 +97,3 @@ ignore = [
96
97
  "S101", # use of assert detected
97
98
  "TRY003", # Avoid specifying long messages outside the exception class
98
99
  ]
99
-