cmem-plugin-pgvector 0.6.3__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cmem_plugin_pgvector-0.6.3 → cmem_plugin_pgvector-0.7.1}/PKG-INFO +1 -1
- {cmem_plugin_pgvector-0.6.3 → cmem_plugin_pgvector-0.7.1}/cmem_plugin_pgvector/commons.py +9 -17
- {cmem_plugin_pgvector-0.6.3 → cmem_plugin_pgvector-0.7.1}/cmem_plugin_pgvector/search_task.py +21 -9
- {cmem_plugin_pgvector-0.6.3 → cmem_plugin_pgvector-0.7.1}/cmem_plugin_pgvector/store_task.py +8 -8
- {cmem_plugin_pgvector-0.6.3 → cmem_plugin_pgvector-0.7.1}/pyproject.toml +2 -1
- {cmem_plugin_pgvector-0.6.3 → cmem_plugin_pgvector-0.7.1}/LICENSE +0 -0
- {cmem_plugin_pgvector-0.6.3 → cmem_plugin_pgvector-0.7.1}/README-public.md +0 -0
- {cmem_plugin_pgvector-0.6.3 → cmem_plugin_pgvector-0.7.1}/cmem_plugin_pgvector/__init__.py +0 -0
- {cmem_plugin_pgvector-0.6.3 → cmem_plugin_pgvector-0.7.1}/cmem_plugin_pgvector/postgresql.svg +0 -0
|
@@ -60,23 +60,15 @@ class PGVectorCollection(StringParameterType):
|
|
|
60
60
|
password = depend_on_parameter_values[4]
|
|
61
61
|
password = password if isinstance(password, str) else password.decrypt()
|
|
62
62
|
result = []
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
filtered_models.add(collection)
|
|
73
|
-
else:
|
|
74
|
-
filtered_models = set(collections)
|
|
75
|
-
result = [Autocompletion(value=f"{_}", label=f"{_}") for _ in filtered_models]
|
|
76
|
-
except Exception as error:
|
|
77
|
-
raise ValueError(
|
|
78
|
-
"Failed to authenticate with OpenAI API, Please check URL and API key."
|
|
79
|
-
) from error
|
|
63
|
+
collections = get_collection_names(
|
|
64
|
+
host=host, port=port, dbname=dbname, user=user, password=password
|
|
65
|
+
)
|
|
66
|
+
filtered_collections = set()
|
|
67
|
+
for term in query_terms:
|
|
68
|
+
for collection in collections:
|
|
69
|
+
if term in collection:
|
|
70
|
+
filtered_collections.add(collection)
|
|
71
|
+
result = [Autocompletion(value=f"{_}", label=f"{_}") for _ in filtered_collections]
|
|
80
72
|
result.sort(key=lambda x: x.label)
|
|
81
73
|
return result
|
|
82
74
|
|
{cmem_plugin_pgvector-0.6.3 → cmem_plugin_pgvector-0.7.1}/cmem_plugin_pgvector/search_task.py
RENAMED
|
@@ -13,10 +13,11 @@ from cmem_plugin_base.dataintegration.ports import (
|
|
|
13
13
|
FixedNumberOfInputs,
|
|
14
14
|
FixedSchemaPort,
|
|
15
15
|
)
|
|
16
|
-
from cmem_plugin_base.dataintegration.types import IntParameterType
|
|
16
|
+
from cmem_plugin_base.dataintegration.types import EnumParameterType, IntParameterType
|
|
17
17
|
from langchain_core.documents import Document
|
|
18
18
|
from langchain_core.embeddings import Embeddings
|
|
19
19
|
from langchain_postgres import PGVector
|
|
20
|
+
from langchain_postgres.vectorstores import DistanceStrategy
|
|
20
21
|
|
|
21
22
|
from cmem_plugin_pgvector.commons import DatabaseParams
|
|
22
23
|
|
|
@@ -86,6 +87,14 @@ The results in this output are structured like this:
|
|
|
86
87
|
default_value=10,
|
|
87
88
|
param_type=IntParameterType(),
|
|
88
89
|
),
|
|
90
|
+
PluginParameter(
|
|
91
|
+
name="distance_strategy",
|
|
92
|
+
label="Distance Strategy",
|
|
93
|
+
description="The distance strategy to use. (default: COSINE)",
|
|
94
|
+
param_type=EnumParameterType(enum_type=DistanceStrategy),
|
|
95
|
+
default_value=DistanceStrategy.COSINE,
|
|
96
|
+
advanced=True,
|
|
97
|
+
),
|
|
89
98
|
],
|
|
90
99
|
)
|
|
91
100
|
class PGVectorSearchPlugin(WorkflowPlugin):
|
|
@@ -105,6 +114,7 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
105
114
|
report: ExecutionReport
|
|
106
115
|
search_result_path: str
|
|
107
116
|
top_k: int
|
|
117
|
+
distance_strategy: DistanceStrategy
|
|
108
118
|
|
|
109
119
|
def __init__( # noqa: PLR0913
|
|
110
120
|
self,
|
|
@@ -117,6 +127,7 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
117
127
|
search_result_path: str = "_search_result",
|
|
118
128
|
embedding_query_path: str = "_embedding",
|
|
119
129
|
top_k: int = 10,
|
|
130
|
+
distance_strategy: DistanceStrategy = DistanceStrategy.COSINE,
|
|
120
131
|
) -> None:
|
|
121
132
|
self.collection_name = collection_name
|
|
122
133
|
self.user = user
|
|
@@ -126,6 +137,7 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
126
137
|
self.embedding_query_path = embedding_query_path
|
|
127
138
|
self.search_result_path = search_result_path
|
|
128
139
|
self.top_k = top_k
|
|
140
|
+
self.distance_strategy = distance_strategy
|
|
129
141
|
|
|
130
142
|
str_password = self.password = password if isinstance(password, str) else password.decrypt()
|
|
131
143
|
self.connection_string = (
|
|
@@ -135,14 +147,6 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
135
147
|
self.report = ExecutionReport()
|
|
136
148
|
self.report.operation = "search"
|
|
137
149
|
self.report.operation_desc = "searches"
|
|
138
|
-
|
|
139
|
-
self.db = PGVector(
|
|
140
|
-
collection_name=self.collection_name,
|
|
141
|
-
connection=self.connection_string,
|
|
142
|
-
embeddings=DummyEmbeddings(),
|
|
143
|
-
use_jsonb=True,
|
|
144
|
-
pre_delete_collection=False,
|
|
145
|
-
)
|
|
146
150
|
self._setup_ports()
|
|
147
151
|
|
|
148
152
|
def _setup_ports(self) -> None:
|
|
@@ -224,6 +228,14 @@ class PGVectorSearchPlugin(WorkflowPlugin):
|
|
|
224
228
|
) -> Entities:
|
|
225
229
|
"""Run the workflow operator."""
|
|
226
230
|
self.log.info("Start searching collection.")
|
|
231
|
+
self.db = PGVector(
|
|
232
|
+
collection_name=self.collection_name,
|
|
233
|
+
connection=self.connection_string,
|
|
234
|
+
embeddings=DummyEmbeddings(),
|
|
235
|
+
use_jsonb=True,
|
|
236
|
+
pre_delete_collection=False,
|
|
237
|
+
distance_strategy=self.distance_strategy,
|
|
238
|
+
)
|
|
227
239
|
self.inputs = inputs
|
|
228
240
|
self.execution_context = context
|
|
229
241
|
try:
|
{cmem_plugin_pgvector-0.6.3 → cmem_plugin_pgvector-0.7.1}/cmem_plugin_pgvector/store_task.py
RENAMED
|
@@ -137,6 +137,7 @@ class PGVectorStorePlugin(WorkflowPlugin):
|
|
|
137
137
|
self.embedding_path = embedding_path
|
|
138
138
|
self.metadata_paths = metadata_paths
|
|
139
139
|
self.source_path = source_path
|
|
140
|
+
self.pre_delete_collection = pre_delete_collection
|
|
140
141
|
|
|
141
142
|
self.output_port = None
|
|
142
143
|
self.input_ports = FixedNumberOfInputs([UnknownSchemaPort()])
|
|
@@ -149,14 +150,6 @@ class PGVectorStorePlugin(WorkflowPlugin):
|
|
|
149
150
|
self.report.operation = "store"
|
|
150
151
|
self.report.operation_desc = "vectors stored"
|
|
151
152
|
|
|
152
|
-
self.db = PGVector(
|
|
153
|
-
collection_name=self.collection_name,
|
|
154
|
-
connection=self.connection_string,
|
|
155
|
-
embeddings=None, # type: ignore # noqa: PGH003
|
|
156
|
-
use_jsonb=True,
|
|
157
|
-
pre_delete_collection=pre_delete_collection,
|
|
158
|
-
)
|
|
159
|
-
|
|
160
153
|
def _update_report(self, count: int) -> None:
|
|
161
154
|
self.report.entity_count = count
|
|
162
155
|
self.execution_context.report.update(self.report)
|
|
@@ -228,6 +221,13 @@ class PGVectorStorePlugin(WorkflowPlugin):
|
|
|
228
221
|
) -> None:
|
|
229
222
|
"""Run the workflow operator."""
|
|
230
223
|
self.log.info("Start storing vectors.")
|
|
224
|
+
self.db = PGVector(
|
|
225
|
+
collection_name=self.collection_name,
|
|
226
|
+
connection=self.connection_string,
|
|
227
|
+
embeddings=None, # type: ignore # noqa: PGH003
|
|
228
|
+
use_jsonb=True,
|
|
229
|
+
pre_delete_collection=self.pre_delete_collection,
|
|
230
|
+
)
|
|
231
231
|
self.inputs = inputs
|
|
232
232
|
self.execution_context = context
|
|
233
233
|
try:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "cmem-plugin-pgvector"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.7.1"
|
|
4
4
|
license = "Apache-2.0"
|
|
5
5
|
description = "Store and search for embedding vectors in a Postgres vector store."
|
|
6
6
|
authors = ["eccenca GmbH <cmempy-developer@eccenca.com>"]
|
|
@@ -40,6 +40,7 @@ pytest-memray = { version = "^1.7.0", markers = "platform_system != 'Windows'"
|
|
|
40
40
|
ruff = "^0.12.0"
|
|
41
41
|
safety = "^1.10.3"
|
|
42
42
|
aiohttp = "^3.10.11"
|
|
43
|
+
testcontainers = "^4.12.0"
|
|
43
44
|
|
|
44
45
|
[build-system]
|
|
45
46
|
requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cmem_plugin_pgvector-0.6.3 → cmem_plugin_pgvector-0.7.1}/cmem_plugin_pgvector/postgresql.svg
RENAMED
|
File without changes
|