cmem-plugin-pgvector 0.7.0__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,9 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: cmem-plugin-pgvector
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: Store and search for embedding vectors in a Postgres vector store.
5
5
  License: Apache-2.0
6
+ License-File: LICENSE
6
7
  Keywords: eccenca Corporate Memory,plugin
7
8
  Author: eccenca GmbH
8
9
  Author-email: cmempy-developer@eccenca.com
@@ -16,6 +17,7 @@ Classifier: Programming Language :: Python :: 3
16
17
  Classifier: Programming Language :: Python :: 3.11
17
18
  Classifier: Programming Language :: Python :: 3.12
18
19
  Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Programming Language :: Python :: 3.14
19
21
  Requires-Dist: cmem-plugin-base (>=4.12.1,<5.0.0)
20
22
  Requires-Dist: langchain-core (>=0.3.71,<0.4.0)
21
23
  Requires-Dist: langchain-postgres (>=0.0.15,<0.0.16)
@@ -15,6 +15,37 @@ from cmem_plugin_base.dataintegration.types import (
15
15
  )
16
16
 
17
17
 
18
+ class DatabaseConnectionError(Exception):
19
+ """Custom exception for database connection issues"""
20
+
21
+
22
+ def check_database_connection(dbname: str, user: str, password: str, host: str, port: int) -> str:
23
+ """Test database connection and return success message or raise exception on failure"""
24
+ try:
25
+ with (
26
+ psycopg.connect(
27
+ dbname=dbname,
28
+ user=user,
29
+ password=password,
30
+ host=host,
31
+ port=port,
32
+ connect_timeout=10,
33
+ ) as conn,
34
+ conn.cursor() as cursor,
35
+ ):
36
+ # Test basic connectivity
37
+ cursor.execute("SELECT version();")
38
+ version = cursor.fetchone()[0] # type: ignore[index]
39
+ return f"Connection successful. PostgreSQL version: {version[:50]}..."
40
+
41
+ except psycopg.OperationalError as e:
42
+ raise DatabaseConnectionError(f"Connection failed: {e!s}") from e
43
+ except psycopg.Error as e:
44
+ raise DatabaseConnectionError(f"Database error: {e!s}") from e
45
+ except Exception as e:
46
+ raise DatabaseConnectionError(f"Unexpected error: {e!s}") from e
47
+
48
+
18
49
  def get_collection_names(
19
50
  dbname: str, user: str, password: str, host: str = "localhost", port: int = 5432
20
51
  ) -> list[str]:
@@ -59,24 +90,19 @@ class PGVectorCollection(StringParameterType):
59
90
  user = depend_on_parameter_values[3]
60
91
  password = depend_on_parameter_values[4]
61
92
  password = password if isinstance(password, str) else password.decrypt()
62
- result = []
93
+ result: list[Autocompletion] = []
63
94
  try:
64
95
  collections = get_collection_names(
65
96
  host=host, port=port, dbname=dbname, user=user, password=password
66
97
  )
67
- filtered_models = set()
68
- if query_terms:
69
- for term in query_terms:
70
- for collection in collections:
71
- if term in collection:
72
- filtered_models.add(collection)
73
- else:
74
- filtered_models = set(collections)
75
- result = [Autocompletion(value=f"{_}", label=f"{_}") for _ in filtered_models]
76
- except Exception as error:
77
- raise ValueError(
78
- "Failed to authenticate with OpenAI API, Please check URL and API key."
79
- ) from error
98
+ except psycopg.Error:
99
+ return result
100
+ filtered_collections = set()
101
+ for term in query_terms:
102
+ for collection in collections:
103
+ if term in collection:
104
+ filtered_collections.add(collection)
105
+ result = [Autocompletion(value=f"{_}", label=f"{_}") for _ in filtered_collections]
80
106
  result.sort(key=lambda x: x.label)
81
107
  return result
82
108
 
@@ -5,7 +5,7 @@ from ast import literal_eval
5
5
  from collections.abc import Generator, Sequence
6
6
 
7
7
  from cmem_plugin_base.dataintegration.context import ExecutionContext, ExecutionReport
8
- from cmem_plugin_base.dataintegration.description import Icon, Plugin, PluginParameter
8
+ from cmem_plugin_base.dataintegration.description import Icon, Plugin, PluginAction, PluginParameter
9
9
  from cmem_plugin_base.dataintegration.entity import Entities, Entity, EntityPath, EntitySchema
10
10
  from cmem_plugin_base.dataintegration.parameter.password import Password
11
11
  from cmem_plugin_base.dataintegration.plugins import WorkflowPlugin
@@ -19,7 +19,11 @@ from langchain_core.embeddings import Embeddings
19
19
  from langchain_postgres import PGVector
20
20
  from langchain_postgres.vectorstores import DistanceStrategy
21
21
 
22
- from cmem_plugin_pgvector.commons import DatabaseParams
22
+ from cmem_plugin_pgvector.commons import (
23
+ DatabaseConnectionError,
24
+ DatabaseParams,
25
+ check_database_connection,
26
+ )
23
27
 
24
28
 
25
29
  class DummyEmbeddings(Embeddings):
@@ -66,6 +70,13 @@ The results in this output are structured like this:
66
70
  """,
67
71
  icon=Icon(package=__package__, file_name="postgresql.svg"),
68
72
  plugin_id="cmem_plugin_pgvector-Search",
73
+ actions=[
74
+ PluginAction(
75
+ name="test_connection",
76
+ label="Test Connection",
77
+ description="Test database connectivity",
78
+ )
79
+ ],
69
80
  parameters=[
70
81
  *DatabaseParams().as_list(),
71
82
  PluginParameter(
@@ -147,17 +158,21 @@ class PGVectorSearchPlugin(WorkflowPlugin):
147
158
  self.report = ExecutionReport()
148
159
  self.report.operation = "search"
149
160
  self.report.operation_desc = "searches"
150
-
151
- self.db = PGVector(
152
- collection_name=self.collection_name,
153
- connection=self.connection_string,
154
- embeddings=DummyEmbeddings(),
155
- use_jsonb=True,
156
- pre_delete_collection=False,
157
- distance_strategy=distance_strategy,
158
- )
159
161
  self._setup_ports()
160
162
 
163
+ def test_connection(self) -> str:
164
+ """Plugin Action to test database connection"""
165
+ try:
166
+ return check_database_connection(
167
+ dbname=self.database,
168
+ user=self.user,
169
+ password=self.password,
170
+ host=self.host,
171
+ port=self.port,
172
+ )
173
+ except DatabaseConnectionError as e:
174
+ raise ValueError(f"Connection test failed: {e!s}") from e
175
+
161
176
  def _setup_ports(self) -> None:
162
177
  """Configure input and output ports depending on the configuration"""
163
178
  input_paths = [EntityPath(path=self.embedding_query_path)]
@@ -203,7 +218,7 @@ class PGVectorSearchPlugin(WorkflowPlugin):
203
218
  for doc_tuple in docs:
204
219
  json_entity = {
205
220
  "id": doc_tuple[0].id,
206
- "metadata": str(doc_tuple[0].metadata),
221
+ "metadata": doc_tuple[0].metadata,
207
222
  "_embedding_source": doc_tuple[0].page_content,
208
223
  "distance": str(doc_tuple[1]),
209
224
  }
@@ -237,6 +252,14 @@ class PGVectorSearchPlugin(WorkflowPlugin):
237
252
  ) -> Entities:
238
253
  """Run the workflow operator."""
239
254
  self.log.info("Start searching collection.")
255
+ self.db = PGVector(
256
+ collection_name=self.collection_name,
257
+ connection=self.connection_string,
258
+ embeddings=DummyEmbeddings(),
259
+ use_jsonb=True,
260
+ pre_delete_collection=False,
261
+ distance_strategy=self.distance_strategy,
262
+ )
240
263
  self.inputs = inputs
241
264
  self.execution_context = context
242
265
  try:
@@ -5,7 +5,7 @@ from collections.abc import Sequence
5
5
  from typing import Any
6
6
 
7
7
  from cmem_plugin_base.dataintegration.context import ExecutionContext, ExecutionReport
8
- from cmem_plugin_base.dataintegration.description import Icon, Plugin, PluginParameter
8
+ from cmem_plugin_base.dataintegration.description import Icon, Plugin, PluginAction, PluginParameter
9
9
  from cmem_plugin_base.dataintegration.entity import Entities, Entity, EntityPath
10
10
  from cmem_plugin_base.dataintegration.parameter.password import Password
11
11
  from cmem_plugin_base.dataintegration.plugins import WorkflowPlugin
@@ -15,7 +15,11 @@ from cmem_plugin_base.dataintegration.ports import (
15
15
  )
16
16
  from langchain_postgres import PGVector
17
17
 
18
- from cmem_plugin_pgvector.commons import DatabaseParams
18
+ from cmem_plugin_pgvector.commons import (
19
+ DatabaseConnectionError,
20
+ DatabaseParams,
21
+ check_database_connection,
22
+ )
19
23
 
20
24
 
21
25
  class DataContainer:
@@ -56,6 +60,13 @@ metadata.
56
60
  """,
57
61
  icon=Icon(package=__package__, file_name="postgresql.svg"),
58
62
  plugin_id="cmem_plugin_pgvector-Store",
63
+ actions=[
64
+ PluginAction(
65
+ name="test_connection",
66
+ label="Test Connection",
67
+ description="Test database connectivity",
68
+ )
69
+ ],
59
70
  parameters=[
60
71
  *DatabaseParams().as_list(),
61
72
  PluginParameter(
@@ -137,6 +148,7 @@ class PGVectorStorePlugin(WorkflowPlugin):
137
148
  self.embedding_path = embedding_path
138
149
  self.metadata_paths = metadata_paths
139
150
  self.source_path = source_path
151
+ self.pre_delete_collection = pre_delete_collection
140
152
 
141
153
  self.output_port = None
142
154
  self.input_ports = FixedNumberOfInputs([UnknownSchemaPort()])
@@ -149,13 +161,18 @@ class PGVectorStorePlugin(WorkflowPlugin):
149
161
  self.report.operation = "store"
150
162
  self.report.operation_desc = "vectors stored"
151
163
 
152
- self.db = PGVector(
153
- collection_name=self.collection_name,
154
- connection=self.connection_string,
155
- embeddings=None, # type: ignore # noqa: PGH003
156
- use_jsonb=True,
157
- pre_delete_collection=pre_delete_collection,
158
- )
164
+ def test_connection(self) -> str:
165
+ """Plugin Action to test database connection"""
166
+ try:
167
+ return check_database_connection(
168
+ dbname=self.database,
169
+ user=self.user,
170
+ password=self.password,
171
+ host=self.host,
172
+ port=self.port,
173
+ )
174
+ except DatabaseConnectionError as e:
175
+ raise ValueError(f"Connection test failed: {e!s}") from e
159
176
 
160
177
  def _update_report(self, count: int) -> None:
161
178
  self.report.entity_count = count
@@ -228,6 +245,13 @@ class PGVectorStorePlugin(WorkflowPlugin):
228
245
  ) -> None:
229
246
  """Run the workflow operator."""
230
247
  self.log.info("Start storing vectors.")
248
+ self.db = PGVector(
249
+ collection_name=self.collection_name,
250
+ connection=self.connection_string,
251
+ embeddings=None, # type: ignore # noqa: PGH003
252
+ use_jsonb=True,
253
+ pre_delete_collection=self.pre_delete_collection,
254
+ )
231
255
  self.inputs = inputs
232
256
  self.execution_context = context
233
257
  try:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cmem-plugin-pgvector"
3
- version = "0.7.0"
3
+ version = "0.8.0"
4
4
  license = "Apache-2.0"
5
5
  description = "Store and search for embedding vectors in a Postgres vector store."
6
6
  authors = ["eccenca GmbH <cmempy-developer@eccenca.com>"]
@@ -40,6 +40,7 @@ pytest-memray = { version = "^1.7.0", markers = "platform_system != 'Windows'"
40
40
  ruff = "^0.12.0"
41
41
  safety = "^1.10.3"
42
42
  aiohttp = "^3.10.11"
43
+ testcontainers = "^4.12.0"
43
44
 
44
45
  [build-system]
45
46
  requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning"]