cmem-plugin-pgvector 0.7.0__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cmem_plugin_pgvector-0.7.0 → cmem_plugin_pgvector-0.8.0}/PKG-INFO +4 -2
- {cmem_plugin_pgvector-0.7.0 → cmem_plugin_pgvector-0.8.0}/cmem_plugin_pgvector/commons.py +40 -14
- {cmem_plugin_pgvector-0.7.0 → cmem_plugin_pgvector-0.8.0}/cmem_plugin_pgvector/search_task.py +35 -12
- {cmem_plugin_pgvector-0.7.0 → cmem_plugin_pgvector-0.8.0}/cmem_plugin_pgvector/store_task.py +33 -9
- {cmem_plugin_pgvector-0.7.0 → cmem_plugin_pgvector-0.8.0}/pyproject.toml +2 -1
- {cmem_plugin_pgvector-0.7.0 → cmem_plugin_pgvector-0.8.0}/LICENSE +0 -0
- {cmem_plugin_pgvector-0.7.0 → cmem_plugin_pgvector-0.8.0}/README-public.md +0 -0
- {cmem_plugin_pgvector-0.7.0 → cmem_plugin_pgvector-0.8.0}/cmem_plugin_pgvector/__init__.py +0 -0
- {cmem_plugin_pgvector-0.7.0 → cmem_plugin_pgvector-0.8.0}/cmem_plugin_pgvector/postgresql.svg +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: cmem-plugin-pgvector
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Store and search for embedding vectors in a Postgres vector store.
|
|
5
5
|
License: Apache-2.0
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Keywords: eccenca Corporate Memory,plugin
|
|
7
8
|
Author: eccenca GmbH
|
|
8
9
|
Author-email: cmempy-developer@eccenca.com
|
|
@@ -16,6 +17,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
16
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
19
|
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
21
|
Requires-Dist: cmem-plugin-base (>=4.12.1,<5.0.0)
|
|
20
22
|
Requires-Dist: langchain-core (>=0.3.71,<0.4.0)
|
|
21
23
|
Requires-Dist: langchain-postgres (>=0.0.15,<0.0.16)
|
|
@@ -15,6 +15,37 @@ from cmem_plugin_base.dataintegration.types import (
|
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
class DatabaseConnectionError(Exception):
|
|
19
|
+
"""Custom exception for database connection issues"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def check_database_connection(dbname: str, user: str, password: str, host: str, port: int) -> str:
|
|
23
|
+
"""Test database connection and return success message or raise exception on failure"""
|
|
24
|
+
try:
|
|
25
|
+
with (
|
|
26
|
+
psycopg.connect(
|
|
27
|
+
dbname=dbname,
|
|
28
|
+
user=user,
|
|
29
|
+
password=password,
|
|
30
|
+
host=host,
|
|
31
|
+
port=port,
|
|
32
|
+
connect_timeout=10,
|
|
33
|
+
) as conn,
|
|
34
|
+
conn.cursor() as cursor,
|
|
35
|
+
):
|
|
36
|
+
# Test basic connectivity
|
|
37
|
+
cursor.execute("SELECT version();")
|
|
38
|
+
version = cursor.fetchone()[0] # type: ignore[index]
|
|
39
|
+
return f"Connection successful. PostgreSQL version: {version[:50]}..."
|
|
40
|
+
|
|
41
|
+
except psycopg.OperationalError as e:
|
|
42
|
+
raise DatabaseConnectionError(f"Connection failed: {e!s}") from e
|
|
43
|
+
except psycopg.Error as e:
|
|
44
|
+
raise DatabaseConnectionError(f"Database error: {e!s}") from e
|
|
45
|
+
except Exception as e:
|
|
46
|
+
raise DatabaseConnectionError(f"Unexpected error: {e!s}") from e
|
|
47
|
+
|
|
48
|
+
|
|
18
49
|
def get_collection_names(
|
|
19
50
|
dbname: str, user: str, password: str, host: str = "localhost", port: int = 5432
|
|
20
51
|
) -> list[str]:
|
|
@@ -59,24 +90,19 @@ class PGVectorCollection(StringParameterType):
|
|
|
59
90
|
user = depend_on_parameter_values[3]
|
|
60
91
|
password = depend_on_parameter_values[4]
|
|
61
92
|
password = password if isinstance(password, str) else password.decrypt()
|
|
62
|
-
result = []
|
|
93
|
+
result: list[Autocompletion] = []
|
|
63
94
|
try:
|
|
64
95
|
collections = get_collection_names(
|
|
65
96
|
host=host, port=port, dbname=dbname, user=user, password=password
|
|
66
97
|
)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
result = [Autocompletion(value=f"{_}", label=f"{_}") for _ in filtered_models]
|
|
76
|
-
except Exception as error:
|
|
77
|
-
raise ValueError(
|
|
78
|
-
"Failed to authenticate with OpenAI API, Please check URL and API key."
|
|
79
|
-
) from error
|
|
98
|
+
except psycopg.Error:
|
|
99
|
+
return result
|
|
100
|
+
filtered_collections = set()
|
|
101
|
+
for term in query_terms:
|
|
102
|
+
for collection in collections:
|
|
103
|
+
if term in collection:
|
|
104
|
+
filtered_collections.add(collection)
|
|
105
|
+
result = [Autocompletion(value=f"{_}", label=f"{_}") for _ in filtered_collections]
|
|
80
106
|
result.sort(key=lambda x: x.label)
|
|
81
107
|
return result
|
|
82
108
|
|
{cmem_plugin_pgvector-0.7.0 → cmem_plugin_pgvector-0.8.0}/cmem_plugin_pgvector/search_task.py
RENAMED
|
@@ -5,7 +5,7 @@ from ast import literal_eval
|
|
|
5
5
|
from collections.abc import Generator, Sequence
|
|
6
6
|
|
|
7
7
|
from cmem_plugin_base.dataintegration.context import ExecutionContext, ExecutionReport
|
|
8
|
-
from cmem_plugin_base.dataintegration.description import Icon, Plugin, PluginParameter
|
|
8
|
+
from cmem_plugin_base.dataintegration.description import Icon, Plugin, PluginAction, PluginParameter
|
|
9
9
|
from cmem_plugin_base.dataintegration.entity import Entities, Entity, EntityPath, EntitySchema
|
|
10
10
|
from cmem_plugin_base.dataintegration.parameter.password import Password
|
|
11
11
|
from cmem_plugin_base.dataintegration.plugins import WorkflowPlugin
|
|
@@ -19,7 +19,11 @@ from langchain_core.embeddings import Embeddings
|
|
|
19
19
|
from langchain_postgres import PGVector
|
|
20
20
|
from langchain_postgres.vectorstores import DistanceStrategy
|
|
21
21
|
|
|
22
|
-
from cmem_plugin_pgvector.commons import
|
|
22
|
+
from cmem_plugin_pgvector.commons import (
|
|
23
|
+
DatabaseConnectionError,
|
|
24
|
+
DatabaseParams,
|
|
25
|
+
check_database_connection,
|
|
26
|
+
)
|
|
23
27
|
|
|
24
28
|
|
|
25
29
|
class DummyEmbeddings(Embeddings):
|
|
@@ -66,6 +70,13 @@ The results in this output are structured like this:
|
|
|
66
70
|
""",
|
|
67
71
|
icon=Icon(package=__package__, file_name="postgresql.svg"),
|
|
68
72
|
plugin_id="cmem_plugin_pgvector-Search",
|
|
73
|
+
actions=[
|
|
74
|
+
PluginAction(
|
|
75
|
+
name="test_connection",
|
|
76
|
+
label="Test Connection",
|
|
77
|
+
description="Test database connectivity",
|
|
78
|
+
)
|
|
79
|
+
],
|
|
69
80
|
parameters=[
|
|
70
81
|
*DatabaseParams().as_list(),
|
|
71
82
|
PluginParameter(
|
|
@@ -147,17 +158,21 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
147
158
|
self.report = ExecutionReport()
|
|
148
159
|
self.report.operation = "search"
|
|
149
160
|
self.report.operation_desc = "searches"
|
|
150
|
-
|
|
151
|
-
self.db = PGVector(
|
|
152
|
-
collection_name=self.collection_name,
|
|
153
|
-
connection=self.connection_string,
|
|
154
|
-
embeddings=DummyEmbeddings(),
|
|
155
|
-
use_jsonb=True,
|
|
156
|
-
pre_delete_collection=False,
|
|
157
|
-
distance_strategy=distance_strategy,
|
|
158
|
-
)
|
|
159
161
|
self._setup_ports()
|
|
160
162
|
|
|
163
|
+
def test_connection(self) -> str:
|
|
164
|
+
"""Plugin Action to test database connection"""
|
|
165
|
+
try:
|
|
166
|
+
return check_database_connection(
|
|
167
|
+
dbname=self.database,
|
|
168
|
+
user=self.user,
|
|
169
|
+
password=self.password,
|
|
170
|
+
host=self.host,
|
|
171
|
+
port=self.port,
|
|
172
|
+
)
|
|
173
|
+
except DatabaseConnectionError as e:
|
|
174
|
+
raise ValueError(f"Connection test failed: {e!s}") from e
|
|
175
|
+
|
|
161
176
|
def _setup_ports(self) -> None:
|
|
162
177
|
"""Configure input and output ports depending on the configuration"""
|
|
163
178
|
input_paths = [EntityPath(path=self.embedding_query_path)]
|
|
@@ -203,7 +218,7 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
203
218
|
for doc_tuple in docs:
|
|
204
219
|
json_entity = {
|
|
205
220
|
"id": doc_tuple[0].id,
|
|
206
|
-
"metadata":
|
|
221
|
+
"metadata": doc_tuple[0].metadata,
|
|
207
222
|
"_embedding_source": doc_tuple[0].page_content,
|
|
208
223
|
"distance": str(doc_tuple[1]),
|
|
209
224
|
}
|
|
@@ -237,6 +252,14 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
237
252
|
) -> Entities:
|
|
238
253
|
"""Run the workflow operator."""
|
|
239
254
|
self.log.info("Start searching collection.")
|
|
255
|
+
self.db = PGVector(
|
|
256
|
+
collection_name=self.collection_name,
|
|
257
|
+
connection=self.connection_string,
|
|
258
|
+
embeddings=DummyEmbeddings(),
|
|
259
|
+
use_jsonb=True,
|
|
260
|
+
pre_delete_collection=False,
|
|
261
|
+
distance_strategy=self.distance_strategy,
|
|
262
|
+
)
|
|
240
263
|
self.inputs = inputs
|
|
241
264
|
self.execution_context = context
|
|
242
265
|
try:
|
{cmem_plugin_pgvector-0.7.0 → cmem_plugin_pgvector-0.8.0}/cmem_plugin_pgvector/store_task.py
RENAMED
|
@@ -5,7 +5,7 @@ from collections.abc import Sequence
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
7
|
from cmem_plugin_base.dataintegration.context import ExecutionContext, ExecutionReport
|
|
8
|
-
from cmem_plugin_base.dataintegration.description import Icon, Plugin, PluginParameter
|
|
8
|
+
from cmem_plugin_base.dataintegration.description import Icon, Plugin, PluginAction, PluginParameter
|
|
9
9
|
from cmem_plugin_base.dataintegration.entity import Entities, Entity, EntityPath
|
|
10
10
|
from cmem_plugin_base.dataintegration.parameter.password import Password
|
|
11
11
|
from cmem_plugin_base.dataintegration.plugins import WorkflowPlugin
|
|
@@ -15,7 +15,11 @@ from cmem_plugin_base.dataintegration.ports import (
|
|
|
15
15
|
)
|
|
16
16
|
from langchain_postgres import PGVector
|
|
17
17
|
|
|
18
|
-
from cmem_plugin_pgvector.commons import
|
|
18
|
+
from cmem_plugin_pgvector.commons import (
|
|
19
|
+
DatabaseConnectionError,
|
|
20
|
+
DatabaseParams,
|
|
21
|
+
check_database_connection,
|
|
22
|
+
)
|
|
19
23
|
|
|
20
24
|
|
|
21
25
|
class DataContainer:
|
|
@@ -56,6 +60,13 @@ metadata.
|
|
|
56
60
|
""",
|
|
57
61
|
icon=Icon(package=__package__, file_name="postgresql.svg"),
|
|
58
62
|
plugin_id="cmem_plugin_pgvector-Store",
|
|
63
|
+
actions=[
|
|
64
|
+
PluginAction(
|
|
65
|
+
name="test_connection",
|
|
66
|
+
label="Test Connection",
|
|
67
|
+
description="Test database connectivity",
|
|
68
|
+
)
|
|
69
|
+
],
|
|
59
70
|
parameters=[
|
|
60
71
|
*DatabaseParams().as_list(),
|
|
61
72
|
PluginParameter(
|
|
@@ -137,6 +148,7 @@ class PGVectorStorePlugin(WorkflowPlugin):
|
|
|
137
148
|
self.embedding_path = embedding_path
|
|
138
149
|
self.metadata_paths = metadata_paths
|
|
139
150
|
self.source_path = source_path
|
|
151
|
+
self.pre_delete_collection = pre_delete_collection
|
|
140
152
|
|
|
141
153
|
self.output_port = None
|
|
142
154
|
self.input_ports = FixedNumberOfInputs([UnknownSchemaPort()])
|
|
@@ -149,13 +161,18 @@ class PGVectorStorePlugin(WorkflowPlugin):
|
|
|
149
161
|
self.report.operation = "store"
|
|
150
162
|
self.report.operation_desc = "vectors stored"
|
|
151
163
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
164
|
+
def test_connection(self) -> str:
|
|
165
|
+
"""Plugin Action to test database connection"""
|
|
166
|
+
try:
|
|
167
|
+
return check_database_connection(
|
|
168
|
+
dbname=self.database,
|
|
169
|
+
user=self.user,
|
|
170
|
+
password=self.password,
|
|
171
|
+
host=self.host,
|
|
172
|
+
port=self.port,
|
|
173
|
+
)
|
|
174
|
+
except DatabaseConnectionError as e:
|
|
175
|
+
raise ValueError(f"Connection test failed: {e!s}") from e
|
|
159
176
|
|
|
160
177
|
def _update_report(self, count: int) -> None:
|
|
161
178
|
self.report.entity_count = count
|
|
@@ -228,6 +245,13 @@ class PGVectorStorePlugin(WorkflowPlugin):
|
|
|
228
245
|
) -> None:
|
|
229
246
|
"""Run the workflow operator."""
|
|
230
247
|
self.log.info("Start storing vectors.")
|
|
248
|
+
self.db = PGVector(
|
|
249
|
+
collection_name=self.collection_name,
|
|
250
|
+
connection=self.connection_string,
|
|
251
|
+
embeddings=None, # type: ignore # noqa: PGH003
|
|
252
|
+
use_jsonb=True,
|
|
253
|
+
pre_delete_collection=self.pre_delete_collection,
|
|
254
|
+
)
|
|
231
255
|
self.inputs = inputs
|
|
232
256
|
self.execution_context = context
|
|
233
257
|
try:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "cmem-plugin-pgvector"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.8.0"
|
|
4
4
|
license = "Apache-2.0"
|
|
5
5
|
description = "Store and search for embedding vectors in a Postgres vector store."
|
|
6
6
|
authors = ["eccenca GmbH <cmempy-developer@eccenca.com>"]
|
|
@@ -40,6 +40,7 @@ pytest-memray = { version = "^1.7.0", markers = "platform_system != 'Windows'"
|
|
|
40
40
|
ruff = "^0.12.0"
|
|
41
41
|
safety = "^1.10.3"
|
|
42
42
|
aiohttp = "^3.10.11"
|
|
43
|
+
testcontainers = "^4.12.0"
|
|
43
44
|
|
|
44
45
|
[build-system]
|
|
45
46
|
requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cmem_plugin_pgvector-0.7.0 → cmem_plugin_pgvector-0.8.0}/cmem_plugin_pgvector/postgresql.svg
RENAMED
|
File without changes
|