cmem-plugin-pgvector 0.6.2__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cmem_plugin_pgvector-0.6.2 → cmem_plugin_pgvector-0.7.0}/PKG-INFO +7 -9
- {cmem_plugin_pgvector-0.6.2 → cmem_plugin_pgvector-0.7.0}/README-public.md +2 -4
- {cmem_plugin_pgvector-0.6.2 → cmem_plugin_pgvector-0.7.0}/cmem_plugin_pgvector/search_task.py +36 -8
- {cmem_plugin_pgvector-0.6.2 → cmem_plugin_pgvector-0.7.0}/pyproject.toml +13 -13
- {cmem_plugin_pgvector-0.6.2 → cmem_plugin_pgvector-0.7.0}/LICENSE +0 -0
- {cmem_plugin_pgvector-0.6.2 → cmem_plugin_pgvector-0.7.0}/cmem_plugin_pgvector/__init__.py +0 -0
- {cmem_plugin_pgvector-0.6.2 → cmem_plugin_pgvector-0.7.0}/cmem_plugin_pgvector/commons.py +0 -0
- {cmem_plugin_pgvector-0.6.2 → cmem_plugin_pgvector-0.7.0}/cmem_plugin_pgvector/postgresql.svg +0 -0
- {cmem_plugin_pgvector-0.6.2 → cmem_plugin_pgvector-0.7.0}/cmem_plugin_pgvector/store_task.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: cmem-plugin-pgvector
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: Store and search for embedding vectors in a Postgres vector store.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: eccenca Corporate Memory,plugin
|
|
@@ -16,10 +16,10 @@ Classifier: Programming Language :: Python :: 3
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
-
Requires-Dist: cmem-plugin-base (>=4.
|
|
20
|
-
Requires-Dist: langchain-
|
|
21
|
-
Requires-Dist: langchain-postgres (>=0.0.
|
|
22
|
-
Requires-Dist: psycopg[binary] (>=3.2.
|
|
19
|
+
Requires-Dist: cmem-plugin-base (>=4.12.1,<5.0.0)
|
|
20
|
+
Requires-Dist: langchain-core (>=0.3.71,<0.4.0)
|
|
21
|
+
Requires-Dist: langchain-postgres (>=0.0.15,<0.0.16)
|
|
22
|
+
Requires-Dist: psycopg[binary] (>=3.2.9,<4.0.0)
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
|
|
25
25
|
# cmem-plugin-pgvector
|
|
@@ -28,12 +28,10 @@ Store and search for embedding vectors in a Postgres vector store.
|
|
|
28
28
|
|
|
29
29
|
[![eccenca Corporate Memory][cmem-shield]][cmem-link]
|
|
30
30
|
|
|
31
|
-
This is a plugin for [eccenca](https://eccenca.com) [Corporate Memory](https://documentation.eccenca.com).
|
|
32
|
-
|
|
33
|
-
You can install it with the [cmemc](https://eccenca.com/go/cmemc) command line client like this:
|
|
31
|
+
This is a plugin for [eccenca](https://eccenca.com) [Corporate Memory](https://documentation.eccenca.com). You can install it with the [cmemc](https://eccenca.com/go/cmemc) command line client like this:
|
|
34
32
|
|
|
35
33
|
``` bash
|
|
36
|
-
cmemc admin workspace python install cmem-plugin-
|
|
34
|
+
cmemc admin workspace python install cmem-plugin-pgvector
|
|
37
35
|
```
|
|
38
36
|
|
|
39
37
|
[](https://pypi.org/project/cmem-plugin-pgvector) [](https://pypi.org/project/cmem-plugin-pgvector)
|
|
@@ -4,12 +4,10 @@ Store and search for embedding vectors in a Postgres vector store.
|
|
|
4
4
|
|
|
5
5
|
[![eccenca Corporate Memory][cmem-shield]][cmem-link]
|
|
6
6
|
|
|
7
|
-
This is a plugin for [eccenca](https://eccenca.com) [Corporate Memory](https://documentation.eccenca.com).
|
|
8
|
-
|
|
9
|
-
You can install it with the [cmemc](https://eccenca.com/go/cmemc) command line client like this:
|
|
7
|
+
This is a plugin for [eccenca](https://eccenca.com) [Corporate Memory](https://documentation.eccenca.com). You can install it with the [cmemc](https://eccenca.com/go/cmemc) command line client like this:
|
|
10
8
|
|
|
11
9
|
``` bash
|
|
12
|
-
cmemc admin workspace python install cmem-plugin-
|
|
10
|
+
cmemc admin workspace python install cmem-plugin-pgvector
|
|
13
11
|
```
|
|
14
12
|
|
|
15
13
|
[](https://pypi.org/project/cmem-plugin-pgvector) [](https://pypi.org/project/cmem-plugin-pgvector)
|
{cmem_plugin_pgvector-0.6.2 → cmem_plugin_pgvector-0.7.0}/cmem_plugin_pgvector/search_task.py
RENAMED
|
@@ -13,13 +13,27 @@ from cmem_plugin_base.dataintegration.ports import (
|
|
|
13
13
|
FixedNumberOfInputs,
|
|
14
14
|
FixedSchemaPort,
|
|
15
15
|
)
|
|
16
|
-
from cmem_plugin_base.dataintegration.types import IntParameterType
|
|
16
|
+
from cmem_plugin_base.dataintegration.types import EnumParameterType, IntParameterType
|
|
17
17
|
from langchain_core.documents import Document
|
|
18
|
+
from langchain_core.embeddings import Embeddings
|
|
18
19
|
from langchain_postgres import PGVector
|
|
20
|
+
from langchain_postgres.vectorstores import DistanceStrategy
|
|
19
21
|
|
|
20
22
|
from cmem_plugin_pgvector.commons import DatabaseParams
|
|
21
23
|
|
|
22
24
|
|
|
25
|
+
class DummyEmbeddings(Embeddings):
|
|
26
|
+
"""Dummy embedding model"""
|
|
27
|
+
|
|
28
|
+
def embed_query(self, text: str) -> list[float]:
|
|
29
|
+
"""Embed a query"""
|
|
30
|
+
raise NotImplementedError
|
|
31
|
+
|
|
32
|
+
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
|
33
|
+
"""Embed a list of documents"""
|
|
34
|
+
raise NotImplementedError
|
|
35
|
+
|
|
36
|
+
|
|
23
37
|
@Plugin(
|
|
24
38
|
label="Search Vector Embeddings",
|
|
25
39
|
description="Search for top-k metadata stored in Postgres Vector Store (PGVector).",
|
|
@@ -73,6 +87,14 @@ The results in this output are structured like this:
|
|
|
73
87
|
default_value=10,
|
|
74
88
|
param_type=IntParameterType(),
|
|
75
89
|
),
|
|
90
|
+
PluginParameter(
|
|
91
|
+
name="distance_strategy",
|
|
92
|
+
label="Distance Strategy",
|
|
93
|
+
description="The distance strategy to use. (default: COSINE)",
|
|
94
|
+
param_type=EnumParameterType(enum_type=DistanceStrategy),
|
|
95
|
+
default_value=DistanceStrategy.COSINE,
|
|
96
|
+
advanced=True,
|
|
97
|
+
),
|
|
76
98
|
],
|
|
77
99
|
)
|
|
78
100
|
class PGVectorSearchPlugin(WorkflowPlugin):
|
|
@@ -92,6 +114,7 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
92
114
|
report: ExecutionReport
|
|
93
115
|
search_result_path: str
|
|
94
116
|
top_k: int
|
|
117
|
+
distance_strategy: DistanceStrategy
|
|
95
118
|
|
|
96
119
|
def __init__( # noqa: PLR0913
|
|
97
120
|
self,
|
|
@@ -104,6 +127,7 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
104
127
|
search_result_path: str = "_search_result",
|
|
105
128
|
embedding_query_path: str = "_embedding",
|
|
106
129
|
top_k: int = 10,
|
|
130
|
+
distance_strategy: DistanceStrategy = DistanceStrategy.COSINE,
|
|
107
131
|
) -> None:
|
|
108
132
|
self.collection_name = collection_name
|
|
109
133
|
self.user = user
|
|
@@ -113,6 +137,7 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
113
137
|
self.embedding_query_path = embedding_query_path
|
|
114
138
|
self.search_result_path = search_result_path
|
|
115
139
|
self.top_k = top_k
|
|
140
|
+
self.distance_strategy = distance_strategy
|
|
116
141
|
|
|
117
142
|
str_password = self.password = password if isinstance(password, str) else password.decrypt()
|
|
118
143
|
self.connection_string = (
|
|
@@ -126,9 +151,10 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
126
151
|
self.db = PGVector(
|
|
127
152
|
collection_name=self.collection_name,
|
|
128
153
|
connection=self.connection_string,
|
|
129
|
-
embeddings=
|
|
154
|
+
embeddings=DummyEmbeddings(),
|
|
130
155
|
use_jsonb=True,
|
|
131
156
|
pre_delete_collection=False,
|
|
157
|
+
distance_strategy=distance_strategy,
|
|
132
158
|
)
|
|
133
159
|
self._setup_ports()
|
|
134
160
|
|
|
@@ -170,15 +196,17 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
170
196
|
pass
|
|
171
197
|
return False
|
|
172
198
|
|
|
173
|
-
|
|
199
|
+
@staticmethod
|
|
200
|
+
def _docs_to_json(docs: list[tuple[Document, float]]) -> list:
|
|
174
201
|
"""Convert a list of Documents to a list of metadata"""
|
|
175
202
|
doc_list: list = []
|
|
176
203
|
for doc_tuple in docs:
|
|
177
|
-
json_entity = {
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
204
|
+
json_entity = {
|
|
205
|
+
"id": doc_tuple[0].id,
|
|
206
|
+
"metadata": str(doc_tuple[0].metadata),
|
|
207
|
+
"_embedding_source": doc_tuple[0].page_content,
|
|
208
|
+
"distance": str(doc_tuple[1]),
|
|
209
|
+
}
|
|
182
210
|
doc_list.append(json_entity)
|
|
183
211
|
return doc_list
|
|
184
212
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "cmem-plugin-pgvector"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.7.0"
|
|
4
4
|
license = "Apache-2.0"
|
|
5
5
|
description = "Store and search for embedding vectors in a Postgres vector store."
|
|
6
6
|
authors = ["eccenca GmbH <cmempy-developer@eccenca.com>"]
|
|
@@ -16,27 +16,28 @@ keywords = [
|
|
|
16
16
|
|
|
17
17
|
[tool.poetry.dependencies]# if you need to change python version here, change it also in .python-version
|
|
18
18
|
python = "^3.11"
|
|
19
|
-
langchain-
|
|
20
|
-
|
|
21
|
-
|
|
19
|
+
langchain-postgres = "^0.0.15"
|
|
20
|
+
psycopg = {extras = ["binary"], version = "^3.2.9"}
|
|
21
|
+
langchain-core = "^0.3.71"
|
|
22
22
|
|
|
23
23
|
[tool.poetry.dependencies.cmem-plugin-base]
|
|
24
|
-
version = "^4.
|
|
24
|
+
version = "^4.12.1"
|
|
25
25
|
allow-prereleases = false
|
|
26
26
|
|
|
27
27
|
[tool.poetry.group.dev.dependencies.cmem-cmemc]
|
|
28
|
-
version = "
|
|
28
|
+
version = ">=24.2.0"
|
|
29
29
|
|
|
30
30
|
[tool.poetry.group.dev.dependencies]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
pytest
|
|
31
|
+
deptry = "^0.23.0"
|
|
32
|
+
genbadge = {extras = ["coverage"], version = "^1.1.2"}
|
|
33
|
+
mypy = "^1.16.1"
|
|
34
|
+
pip = "^25.1.1"
|
|
35
|
+
pytest = "^8.4.1"
|
|
36
|
+
pytest-cov = "^6.2.1"
|
|
36
37
|
pytest-dotenv = "^0.5.2"
|
|
37
38
|
pytest-html = "^4.1.1"
|
|
38
39
|
pytest-memray = { version = "^1.7.0", markers = "platform_system != 'Windows'" }
|
|
39
|
-
ruff = "^0.
|
|
40
|
+
ruff = "^0.12.0"
|
|
40
41
|
safety = "^1.10.3"
|
|
41
42
|
aiohttp = "^3.10.11"
|
|
42
43
|
|
|
@@ -96,4 +97,3 @@ ignore = [
|
|
|
96
97
|
"S101", # use of assert detected
|
|
97
98
|
"TRY003", # Avoid specifying long messages outside the exception class
|
|
98
99
|
]
|
|
99
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cmem_plugin_pgvector-0.6.2 → cmem_plugin_pgvector-0.7.0}/cmem_plugin_pgvector/postgresql.svg
RENAMED
|
File without changes
|
{cmem_plugin_pgvector-0.6.2 → cmem_plugin_pgvector-0.7.0}/cmem_plugin_pgvector/store_task.py
RENAMED
|
File without changes
|