cognee-community-vector-adapter-redis 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee_community_vector_adapter_redis/redis_adapter.py +177 -125
- {cognee_community_vector_adapter_redis-0.0.1.dist-info → cognee_community_vector_adapter_redis-0.0.3.dist-info}/METADATA +99 -71
- cognee_community_vector_adapter_redis-0.0.3.dist-info/RECORD +6 -0
- cognee_community_vector_adapter_redis-0.0.1.dist-info/RECORD +0 -6
- {cognee_community_vector_adapter_redis-0.0.1.dist-info → cognee_community_vector_adapter_redis-0.0.3.dist-info}/WHEEL +0 -0
|
@@ -6,35 +6,40 @@ from uuid import UUID
|
|
|
6
6
|
from redisvl.index import AsyncSearchIndex
|
|
7
7
|
from redisvl.schema import IndexSchema
|
|
8
8
|
from redisvl.query import VectorQuery
|
|
9
|
+
# from redisvl.query import VectorDistanceMetric
|
|
9
10
|
|
|
10
|
-
from cognee.exceptions import InvalidValueError
|
|
11
11
|
from cognee.shared.logging_utils import get_logger
|
|
12
12
|
|
|
13
13
|
from cognee.infrastructure.engine import DataPoint
|
|
14
14
|
from cognee.infrastructure.engine.utils import parse_id
|
|
15
|
+
from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
|
|
15
16
|
from cognee.infrastructure.databases.vector import VectorDBInterface
|
|
16
17
|
from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
|
|
17
|
-
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import
|
|
18
|
+
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import (
|
|
19
|
+
EmbeddingEngine,
|
|
20
|
+
)
|
|
18
21
|
|
|
19
22
|
logger = get_logger("RedisAdapter")
|
|
20
23
|
|
|
21
24
|
|
|
22
25
|
class VectorEngineInitializationError(Exception):
|
|
23
26
|
"""Exception raised when vector engine initialization fails."""
|
|
27
|
+
|
|
24
28
|
pass
|
|
25
29
|
|
|
26
30
|
|
|
27
31
|
class CollectionNotFoundError(Exception):
|
|
28
32
|
"""Exception raised when a collection is not found."""
|
|
33
|
+
|
|
29
34
|
pass
|
|
30
35
|
|
|
31
36
|
|
|
32
37
|
def serialize_for_json(obj: Any) -> Any:
|
|
33
38
|
"""Convert objects to JSON-serializable format.
|
|
34
|
-
|
|
39
|
+
|
|
35
40
|
Args:
|
|
36
41
|
obj: Object to serialize (UUID, dict, list, or any other type).
|
|
37
|
-
|
|
42
|
+
|
|
38
43
|
Returns:
|
|
39
44
|
JSON-serializable representation of the object.
|
|
40
45
|
"""
|
|
@@ -50,40 +55,41 @@ def serialize_for_json(obj: Any) -> Any:
|
|
|
50
55
|
|
|
51
56
|
class RedisDataPoint(DataPoint):
|
|
52
57
|
"""Redis data point schema for vector index entries.
|
|
53
|
-
|
|
58
|
+
|
|
54
59
|
Attributes:
|
|
55
60
|
text: The text content to be indexed.
|
|
56
61
|
metadata: Metadata containing index field configuration.
|
|
57
62
|
"""
|
|
63
|
+
|
|
58
64
|
text: str
|
|
59
65
|
metadata: dict = {"index_fields": ["text"]}
|
|
60
66
|
|
|
61
67
|
|
|
62
68
|
class RedisAdapter(VectorDBInterface):
|
|
63
69
|
"""Redis vector database adapter using RedisVL for vector similarity search.
|
|
64
|
-
|
|
70
|
+
|
|
65
71
|
This adapter provides an interface to Redis vector search capabilities,
|
|
66
72
|
supporting embedding generation, vector indexing, and similarity search.
|
|
67
73
|
"""
|
|
68
|
-
|
|
74
|
+
|
|
69
75
|
name = "Redis"
|
|
70
76
|
url: Optional[str]
|
|
71
77
|
api_key: Optional[str] = None
|
|
72
78
|
embedding_engine: Optional[EmbeddingEngine] = None
|
|
73
|
-
|
|
79
|
+
|
|
74
80
|
def __init__(
|
|
75
|
-
self,
|
|
81
|
+
self,
|
|
76
82
|
url: str,
|
|
77
83
|
api_key: Optional[str] = None,
|
|
78
|
-
embedding_engine: Optional[EmbeddingEngine] = None
|
|
84
|
+
embedding_engine: Optional[EmbeddingEngine] = None,
|
|
79
85
|
) -> None:
|
|
80
86
|
"""Initialize the Redis adapter.
|
|
81
|
-
|
|
87
|
+
|
|
82
88
|
Args:
|
|
83
89
|
url (str): Connection string for your Redis instance like redis://localhost:6379.
|
|
84
90
|
embedding_engine: Engine for generating embeddings.
|
|
85
91
|
api_key: Optional API key. Ignored for Redis.
|
|
86
|
-
|
|
92
|
+
|
|
87
93
|
Raises:
|
|
88
94
|
VectorEngineInitializationError: If required parameters are missing.
|
|
89
95
|
"""
|
|
@@ -91,31 +97,32 @@ class RedisAdapter(VectorDBInterface):
|
|
|
91
97
|
raise VectorEngineInitializationError("Redis connnection URL!")
|
|
92
98
|
if not embedding_engine:
|
|
93
99
|
raise VectorEngineInitializationError("Embedding engine is required!")
|
|
94
|
-
|
|
100
|
+
|
|
95
101
|
self.url = url
|
|
96
102
|
self.embedding_engine = embedding_engine
|
|
97
103
|
self._indices = {}
|
|
98
|
-
|
|
104
|
+
self.VECTOR_DB_LOCK = asyncio.Lock()
|
|
105
|
+
|
|
99
106
|
async def embed_data(self, data: List[str]) -> List[List[float]]:
|
|
100
107
|
"""Embed text data using the embedding engine.
|
|
101
|
-
|
|
108
|
+
|
|
102
109
|
Args:
|
|
103
110
|
data: List of text strings to embed.
|
|
104
|
-
|
|
111
|
+
|
|
105
112
|
Returns:
|
|
106
113
|
List of embedding vectors as lists of floats.
|
|
107
|
-
|
|
114
|
+
|
|
108
115
|
Raises:
|
|
109
116
|
Exception: If embedding generation fails.
|
|
110
117
|
"""
|
|
111
118
|
return await self.embedding_engine.embed_text(data)
|
|
112
|
-
|
|
119
|
+
|
|
113
120
|
def _create_schema(self, collection_name: str) -> IndexSchema:
|
|
114
121
|
"""Create a RedisVL IndexSchema for a collection.
|
|
115
|
-
|
|
122
|
+
|
|
116
123
|
Args:
|
|
117
124
|
collection_name: Name of the collection to create an index schema for.
|
|
118
|
-
|
|
125
|
+
|
|
119
126
|
Returns:
|
|
120
127
|
Redis IndexSchema configured for vector search.
|
|
121
128
|
"""
|
|
@@ -123,7 +130,7 @@ class RedisAdapter(VectorDBInterface):
|
|
|
123
130
|
"index": {
|
|
124
131
|
"name": collection_name,
|
|
125
132
|
"prefix": f"{collection_name}",
|
|
126
|
-
"storage_type": "json"
|
|
133
|
+
"storage_type": "json",
|
|
127
134
|
},
|
|
128
135
|
"fields": [
|
|
129
136
|
{"name": "id", "type": "tag", "attrs": {"sortable": True}},
|
|
@@ -136,133 +143,154 @@ class RedisAdapter(VectorDBInterface):
|
|
|
136
143
|
"m": 32,
|
|
137
144
|
"dims": self.embedding_engine.get_vector_size(),
|
|
138
145
|
"distance_metric": "cosine",
|
|
139
|
-
"datatype": "float32"
|
|
140
|
-
}
|
|
146
|
+
"datatype": "float32",
|
|
147
|
+
},
|
|
141
148
|
},
|
|
142
|
-
{"name": "
|
|
143
|
-
]
|
|
149
|
+
{"name": "payload_data", "type": "text", "attrs": {"sortable": True}},
|
|
150
|
+
],
|
|
144
151
|
}
|
|
145
152
|
return IndexSchema.from_dict(schema_dict)
|
|
146
|
-
|
|
153
|
+
|
|
147
154
|
def _get_index(self, collection_name: str) -> AsyncSearchIndex:
|
|
148
155
|
"""Get or create an AsyncSearchIndex for a collection.
|
|
149
|
-
|
|
156
|
+
|
|
150
157
|
Args:
|
|
151
158
|
collection_name: Name of the collection.
|
|
152
|
-
|
|
159
|
+
|
|
153
160
|
Returns:
|
|
154
161
|
AsyncSearchIndex instance for the collection.
|
|
155
162
|
"""
|
|
156
163
|
if collection_name not in self._indices:
|
|
157
164
|
schema = self._create_schema(collection_name)
|
|
158
165
|
self._indices[collection_name] = AsyncSearchIndex(
|
|
159
|
-
schema=schema,
|
|
160
|
-
redis_url=self.url,
|
|
161
|
-
validate_on_load=True
|
|
166
|
+
schema=schema, redis_url=self.url, validate_on_load=True
|
|
162
167
|
)
|
|
163
168
|
return self._indices[collection_name]
|
|
164
|
-
|
|
169
|
+
|
|
165
170
|
async def has_collection(self, collection_name: str) -> bool:
|
|
166
171
|
"""Check if a collection (index) exists.
|
|
167
|
-
|
|
172
|
+
|
|
168
173
|
Args:
|
|
169
174
|
collection_name: Name of the collection to check.
|
|
170
|
-
|
|
175
|
+
|
|
171
176
|
Returns:
|
|
172
177
|
True if collection exists, False otherwise.
|
|
173
178
|
"""
|
|
174
179
|
try:
|
|
175
180
|
index = self._get_index(collection_name)
|
|
176
|
-
|
|
181
|
+
result = await index.exists()
|
|
182
|
+
await index.disconnect()
|
|
183
|
+
return result
|
|
177
184
|
except Exception:
|
|
178
185
|
return False
|
|
179
|
-
|
|
186
|
+
|
|
180
187
|
async def create_collection(
|
|
181
188
|
self,
|
|
182
189
|
collection_name: str,
|
|
183
190
|
payload_schema: Optional[Any] = None,
|
|
184
191
|
) -> None:
|
|
185
192
|
"""Create a new collection (Redis index) with vector search capabilities.
|
|
186
|
-
|
|
193
|
+
|
|
187
194
|
Args:
|
|
188
195
|
collection_name: Name of the collection to create.
|
|
189
196
|
payload_schema: Schema for payload data (not used).
|
|
190
|
-
|
|
197
|
+
|
|
191
198
|
Raises:
|
|
192
199
|
Exception: If collection creation fails.
|
|
193
200
|
"""
|
|
194
|
-
|
|
195
|
-
if await self.has_collection(collection_name):
|
|
196
|
-
logger.info(f"Collection {collection_name} already exists")
|
|
197
|
-
return
|
|
198
|
-
|
|
201
|
+
async with self.VECTOR_DB_LOCK:
|
|
199
202
|
index = self._get_index(collection_name)
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
203
|
+
try:
|
|
204
|
+
if await self.has_collection(collection_name):
|
|
205
|
+
logger.info(f"Collection {collection_name} already exists")
|
|
206
|
+
return
|
|
207
|
+
|
|
208
|
+
index = self._get_index(collection_name)
|
|
209
|
+
await index.create(overwrite=False)
|
|
210
|
+
|
|
211
|
+
logger.info(f"Created collection {collection_name}")
|
|
212
|
+
|
|
213
|
+
except Exception as e:
|
|
214
|
+
logger.error(f"Error creating collection {collection_name}: {str(e)}")
|
|
215
|
+
raise e
|
|
216
|
+
finally:
|
|
217
|
+
await index.disconnect()
|
|
218
|
+
|
|
219
|
+
async def create_data_points(
|
|
220
|
+
self, collection_name: str, data_points: List[DataPoint]
|
|
221
|
+
) -> None:
|
|
209
222
|
"""Create data points in the collection.
|
|
210
|
-
|
|
223
|
+
|
|
211
224
|
Args:
|
|
212
225
|
collection_name: Name of the target collection.
|
|
213
226
|
data_points: List of DataPoint objects to insert.
|
|
214
|
-
|
|
227
|
+
|
|
215
228
|
Raises:
|
|
216
229
|
CollectionNotFoundError: If the collection doesn't exist.
|
|
217
230
|
Exception: If data point creation fails.
|
|
218
231
|
"""
|
|
232
|
+
index = self._get_index(collection_name)
|
|
219
233
|
try:
|
|
220
234
|
if not await self.has_collection(collection_name):
|
|
221
|
-
raise CollectionNotFoundError(
|
|
222
|
-
|
|
235
|
+
raise CollectionNotFoundError(
|
|
236
|
+
f"Collection {collection_name} not found!"
|
|
237
|
+
)
|
|
238
|
+
|
|
223
239
|
# Embed the data points
|
|
224
240
|
data_vectors = await self.embed_data(
|
|
225
|
-
[
|
|
241
|
+
[
|
|
242
|
+
DataPoint.get_embeddable_data(data_point)
|
|
243
|
+
for data_point in data_points
|
|
244
|
+
]
|
|
226
245
|
)
|
|
227
|
-
|
|
246
|
+
|
|
228
247
|
# Prepare documents for RedisVL
|
|
229
248
|
documents = []
|
|
230
249
|
for data_point, embedding in zip(data_points, data_vectors):
|
|
231
250
|
# Serialize the payload to handle UUIDs and other non-JSON types
|
|
232
251
|
payload = serialize_for_json(data_point.model_dump())
|
|
233
|
-
|
|
252
|
+
|
|
234
253
|
doc_data = {
|
|
235
254
|
"id": str(data_point.id),
|
|
236
|
-
"text": getattr(
|
|
255
|
+
"text": getattr(
|
|
256
|
+
data_point,
|
|
257
|
+
data_point.metadata.get("index_fields", ["text"])[0],
|
|
258
|
+
"",
|
|
259
|
+
),
|
|
237
260
|
"vector": embedding,
|
|
238
|
-
"
|
|
261
|
+
"payload_data": json.dumps(payload), # Store as JSON string
|
|
239
262
|
}
|
|
240
263
|
documents.append(doc_data)
|
|
241
|
-
|
|
264
|
+
|
|
242
265
|
# Load using RedisVL
|
|
243
|
-
index = self._get_index(collection_name)
|
|
244
266
|
await index.load(documents, id_field="id")
|
|
245
|
-
|
|
246
|
-
logger.info(
|
|
247
|
-
|
|
267
|
+
|
|
268
|
+
logger.info(
|
|
269
|
+
f"Created {len(data_points)} data points in collection {collection_name}"
|
|
270
|
+
)
|
|
271
|
+
|
|
248
272
|
except Exception as e:
|
|
249
273
|
logger.error(f"Error creating data points: {str(e)}")
|
|
250
274
|
raise e
|
|
251
|
-
|
|
252
|
-
|
|
275
|
+
finally:
|
|
276
|
+
await index.disconnect()
|
|
277
|
+
|
|
278
|
+
async def create_vector_index(
|
|
279
|
+
self, index_name: str, index_property_name: str
|
|
280
|
+
) -> None:
|
|
253
281
|
"""Create a vector index for a specific property.
|
|
254
|
-
|
|
282
|
+
|
|
255
283
|
Args:
|
|
256
284
|
index_name: Base name for the index.
|
|
257
285
|
index_property_name: Property name to index.
|
|
258
286
|
"""
|
|
259
287
|
await self.create_collection(f"{index_name}_{index_property_name}")
|
|
260
|
-
|
|
288
|
+
|
|
261
289
|
async def index_data_points(
|
|
262
290
|
self, index_name: str, index_property_name: str, data_points: list[DataPoint]
|
|
263
291
|
) -> None:
|
|
264
292
|
"""Index data points for a specific property.
|
|
265
|
-
|
|
293
|
+
|
|
266
294
|
Args:
|
|
267
295
|
index_name: Base name for the index.
|
|
268
296
|
index_property_name: Property name to index.
|
|
@@ -273,44 +301,50 @@ class RedisAdapter(VectorDBInterface):
|
|
|
273
301
|
[
|
|
274
302
|
RedisDataPoint(
|
|
275
303
|
id=data_point.id,
|
|
276
|
-
text=getattr(
|
|
304
|
+
text=getattr(
|
|
305
|
+
data_point, data_point.metadata.get("index_fields", ["text"])[0]
|
|
306
|
+
),
|
|
277
307
|
)
|
|
278
308
|
for data_point in data_points
|
|
279
309
|
],
|
|
280
310
|
)
|
|
281
|
-
|
|
282
|
-
async def retrieve(
|
|
311
|
+
|
|
312
|
+
async def retrieve(
|
|
313
|
+
self, collection_name: str, data_point_ids: List[str]
|
|
314
|
+
) -> List[Dict[str, Any]]:
|
|
283
315
|
"""Retrieve data points by their IDs.
|
|
284
|
-
|
|
316
|
+
|
|
285
317
|
Args:
|
|
286
318
|
collection_name: Name of the collection to retrieve from.
|
|
287
319
|
data_point_ids: List of data point IDs to retrieve.
|
|
288
|
-
|
|
320
|
+
|
|
289
321
|
Returns:
|
|
290
322
|
List of retrieved data point payloads.
|
|
291
323
|
"""
|
|
324
|
+
index = self._get_index(collection_name)
|
|
292
325
|
try:
|
|
293
|
-
index = self._get_index(collection_name)
|
|
294
326
|
results = []
|
|
295
|
-
|
|
327
|
+
|
|
296
328
|
for data_id in data_point_ids:
|
|
297
329
|
doc = await index.fetch(data_id)
|
|
298
330
|
if doc:
|
|
299
331
|
# Parse the stored payload JSON
|
|
300
|
-
payload_str = doc.get("
|
|
332
|
+
payload_str = doc.get("payload_data", "{}")
|
|
301
333
|
try:
|
|
302
334
|
payload = json.loads(payload_str)
|
|
303
335
|
results.append(payload)
|
|
304
336
|
except json.JSONDecodeError:
|
|
305
337
|
# Fallback to the document itself if payload parsing fails
|
|
306
338
|
results.append(doc)
|
|
307
|
-
|
|
339
|
+
|
|
308
340
|
return results
|
|
309
|
-
|
|
341
|
+
|
|
310
342
|
except Exception as e:
|
|
311
343
|
logger.error(f"Error retrieving data points: {str(e)}")
|
|
312
344
|
return []
|
|
313
|
-
|
|
345
|
+
finally:
|
|
346
|
+
await index.disconnect()
|
|
347
|
+
|
|
314
348
|
async def search(
|
|
315
349
|
self,
|
|
316
350
|
collection_name: str,
|
|
@@ -320,78 +354,89 @@ class RedisAdapter(VectorDBInterface):
|
|
|
320
354
|
with_vector: bool = False,
|
|
321
355
|
) -> List[ScoredResult]:
|
|
322
356
|
"""Search for similar vectors in the collection.
|
|
323
|
-
|
|
357
|
+
|
|
324
358
|
Args:
|
|
325
359
|
collection_name: Name of the collection to search.
|
|
326
360
|
query_text: Text query to search for (will be embedded).
|
|
327
361
|
query_vector: Pre-computed query vector.
|
|
328
362
|
limit: Maximum number of results to return.
|
|
329
363
|
with_vector: Whether to include vectors in results.
|
|
330
|
-
|
|
364
|
+
|
|
331
365
|
Returns:
|
|
332
366
|
List of ScoredResult objects sorted by similarity.
|
|
333
|
-
|
|
367
|
+
|
|
334
368
|
Raises:
|
|
335
|
-
|
|
369
|
+
MissingQueryParameterError: If neither query_text nor query_vector is provided.
|
|
336
370
|
Exception: If search execution fails.
|
|
337
371
|
"""
|
|
338
372
|
if query_text is None and query_vector is None:
|
|
339
|
-
raise
|
|
340
|
-
|
|
341
|
-
if limit <= 0:
|
|
342
|
-
return []
|
|
343
|
-
|
|
373
|
+
raise MissingQueryParameterError()
|
|
374
|
+
|
|
344
375
|
if not await self.has_collection(collection_name):
|
|
345
|
-
logger.warning(
|
|
376
|
+
logger.warning(
|
|
377
|
+
f"Collection '{collection_name}' not found in RedisAdapter.search; returning []."
|
|
378
|
+
)
|
|
346
379
|
return []
|
|
347
|
-
|
|
380
|
+
|
|
381
|
+
index = self._get_index(collection_name)
|
|
382
|
+
|
|
383
|
+
if limit == 0:
|
|
384
|
+
info = await index.info()
|
|
385
|
+
limit = info["num_docs"]
|
|
386
|
+
|
|
387
|
+
if limit == 0:
|
|
388
|
+
return []
|
|
389
|
+
|
|
348
390
|
try:
|
|
349
391
|
# Get the query vector
|
|
350
392
|
if query_vector is None:
|
|
351
393
|
query_vector = (await self.embed_data([query_text]))[0]
|
|
352
|
-
|
|
394
|
+
|
|
353
395
|
# Create the vector query
|
|
354
396
|
vector_query = VectorQuery(
|
|
355
397
|
vector=query_vector,
|
|
356
398
|
vector_field_name="vector",
|
|
357
399
|
num_results=limit,
|
|
358
400
|
return_score=True,
|
|
359
|
-
normalize_vector_distance=
|
|
401
|
+
normalize_vector_distance=False,
|
|
360
402
|
)
|
|
361
|
-
|
|
403
|
+
|
|
362
404
|
# Set return fields
|
|
363
|
-
return_fields = ["id", "text", "
|
|
405
|
+
return_fields = ["id", "text", "payload_data"]
|
|
364
406
|
if with_vector:
|
|
365
407
|
return_fields.append("vector")
|
|
366
408
|
vector_query = vector_query.return_fields(*return_fields)
|
|
367
|
-
|
|
409
|
+
|
|
368
410
|
# Execute the search
|
|
369
|
-
index = self._get_index(collection_name)
|
|
370
411
|
results = await index.query(vector_query)
|
|
371
|
-
|
|
412
|
+
|
|
372
413
|
# Convert results to ScoredResult objects
|
|
373
414
|
scored_results = []
|
|
374
415
|
for doc in results:
|
|
375
416
|
# Parse the stored payload - it's stored as JSON string
|
|
376
|
-
payload_str = doc.get("
|
|
417
|
+
payload_str = doc.get("payload_data", "{}")
|
|
377
418
|
try:
|
|
378
419
|
payload = json.loads(payload_str)
|
|
379
420
|
except json.JSONDecodeError:
|
|
380
421
|
payload = doc
|
|
381
|
-
|
|
422
|
+
|
|
382
423
|
scored_results.append(
|
|
383
424
|
ScoredResult(
|
|
384
|
-
id=parse_id(doc["id"]),
|
|
425
|
+
id=parse_id(doc["id"].split(":", 1)[1]),
|
|
385
426
|
payload=payload,
|
|
386
|
-
score=float(
|
|
427
|
+
score=float(
|
|
428
|
+
doc.get("vector_distance", 0.0)
|
|
429
|
+
), # RedisVL returns distance
|
|
387
430
|
)
|
|
388
431
|
)
|
|
389
432
|
return scored_results
|
|
390
|
-
|
|
433
|
+
|
|
391
434
|
except Exception as e:
|
|
392
435
|
logger.error(f"Error during search: {str(e)}")
|
|
393
436
|
raise e
|
|
394
|
-
|
|
437
|
+
finally:
|
|
438
|
+
await index.disconnect()
|
|
439
|
+
|
|
395
440
|
async def batch_search(
|
|
396
441
|
self,
|
|
397
442
|
collection_name: str,
|
|
@@ -400,19 +445,19 @@ class RedisAdapter(VectorDBInterface):
|
|
|
400
445
|
with_vectors: bool = False,
|
|
401
446
|
) -> List[List[ScoredResult]]:
|
|
402
447
|
"""Perform batch search for multiple queries.
|
|
403
|
-
|
|
448
|
+
|
|
404
449
|
Args:
|
|
405
450
|
collection_name: Name of the collection to search.
|
|
406
451
|
query_texts: List of text queries to search for.
|
|
407
452
|
limit: Maximum number of results per query.
|
|
408
453
|
with_vectors: Whether to include vectors in results.
|
|
409
|
-
|
|
454
|
+
|
|
410
455
|
Returns:
|
|
411
456
|
List of search results for each query, filtered by score threshold.
|
|
412
457
|
"""
|
|
413
458
|
# Embed all queries at once
|
|
414
459
|
vectors = await self.embed_data(query_texts)
|
|
415
|
-
|
|
460
|
+
|
|
416
461
|
# Execute searches in parallel
|
|
417
462
|
# TODO: replace with index.batch_query() in the future
|
|
418
463
|
search_tasks = [
|
|
@@ -420,46 +465,52 @@ class RedisAdapter(VectorDBInterface):
|
|
|
420
465
|
collection_name=collection_name,
|
|
421
466
|
query_vector=vector,
|
|
422
467
|
limit=limit,
|
|
423
|
-
with_vector=with_vectors
|
|
468
|
+
with_vector=with_vectors,
|
|
424
469
|
)
|
|
425
470
|
for vector in vectors
|
|
426
471
|
]
|
|
427
|
-
|
|
472
|
+
|
|
428
473
|
results = await asyncio.gather(*search_tasks)
|
|
429
|
-
|
|
474
|
+
|
|
430
475
|
# Filter results by score threshold (Redis uses distance, so lower is better)
|
|
431
476
|
return [
|
|
432
477
|
[result for result in result_group if result.score < 0.1]
|
|
433
478
|
for result_group in results
|
|
434
479
|
]
|
|
435
|
-
|
|
436
|
-
async def delete_data_points(
|
|
480
|
+
|
|
481
|
+
async def delete_data_points(
|
|
482
|
+
self, collection_name: str, data_point_ids: List[str]
|
|
483
|
+
) -> Dict[str, int]:
|
|
437
484
|
"""Delete data points by their IDs.
|
|
438
|
-
|
|
485
|
+
|
|
439
486
|
Args:
|
|
440
487
|
collection_name: Name of the collection to delete from.
|
|
441
488
|
data_point_ids: List of data point IDs to delete.
|
|
442
|
-
|
|
489
|
+
|
|
443
490
|
Returns:
|
|
444
491
|
Dictionary containing the number of deleted documents.
|
|
445
|
-
|
|
492
|
+
|
|
446
493
|
Raises:
|
|
447
494
|
Exception: If deletion fails.
|
|
448
495
|
"""
|
|
496
|
+
index = self._get_index(collection_name)
|
|
449
497
|
try:
|
|
450
|
-
index = self._get_index(collection_name)
|
|
451
498
|
deleted_count = await index.drop_documents(data_point_ids)
|
|
452
|
-
logger.info(
|
|
499
|
+
logger.info(
|
|
500
|
+
f"Deleted {deleted_count} data points from collection {collection_name}"
|
|
501
|
+
)
|
|
453
502
|
return {"deleted": deleted_count}
|
|
454
503
|
except Exception as e:
|
|
455
504
|
logger.error(f"Error deleting data points: {str(e)}")
|
|
456
505
|
raise e
|
|
457
|
-
|
|
506
|
+
finally:
|
|
507
|
+
await index.disconnect()
|
|
508
|
+
|
|
458
509
|
async def prune(self) -> None:
|
|
459
510
|
"""Remove all collections and data from Redis.
|
|
460
|
-
|
|
511
|
+
|
|
461
512
|
This method drops all existing indices and clears the internal cache.
|
|
462
|
-
|
|
513
|
+
|
|
463
514
|
Raises:
|
|
464
515
|
Exception: If pruning fails.
|
|
465
516
|
"""
|
|
@@ -473,12 +524,13 @@ class RedisAdapter(VectorDBInterface):
|
|
|
473
524
|
await index.disconnect()
|
|
474
525
|
except Exception as e:
|
|
475
526
|
logger.warning(f"Failed to drop index {collection_name}: {str(e)}")
|
|
476
|
-
|
|
527
|
+
await index.disconnect()
|
|
528
|
+
|
|
477
529
|
# Clear the indices cache
|
|
478
530
|
self._indices.clear()
|
|
479
|
-
|
|
531
|
+
|
|
480
532
|
logger.info("Pruned all Redis vector collections")
|
|
481
|
-
|
|
533
|
+
|
|
482
534
|
except Exception as e:
|
|
483
535
|
logger.error(f"Error during prune: {str(e)}")
|
|
484
536
|
raise e
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cognee-community-vector-adapter-redis
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: Redis vector database adapter for cognee
|
|
5
5
|
Requires-Python: <=3.13,>=3.11
|
|
6
6
|
Requires-Dist: cognee>=0.2.0.dev0
|
|
@@ -17,7 +17,7 @@ Description-Content-Type: text/markdown
|
|
|
17
17
|
<br />
|
|
18
18
|
|
|
19
19
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
20
|
-

|
|
21
21
|
|
|
22
22
|
[](https://github.com/redis/redis-vl-python)
|
|
23
23
|
|
|
@@ -44,10 +44,19 @@ Description-Content-Type: text/markdown
|
|
|
44
44
|
|
|
45
45
|
## Installation
|
|
46
46
|
|
|
47
|
+
If published, the package can be simply installed via pip:
|
|
48
|
+
|
|
47
49
|
```bash
|
|
48
50
|
pip install cognee-community-vector-adapter-redis
|
|
49
51
|
```
|
|
50
52
|
|
|
53
|
+
In case it is not published yet, you can use poetry to locally build the adapter package:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install poetry
|
|
57
|
+
poetry install # run this command in the directory containing the pyproject.toml file
|
|
58
|
+
```
|
|
59
|
+
|
|
51
60
|
## Prerequisites
|
|
52
61
|
|
|
53
62
|
You need a Redis instance with the Redis Search module enabled. You can use:
|
|
@@ -71,96 +80,115 @@ uv run examples/example.py
|
|
|
71
80
|
## Usage
|
|
72
81
|
|
|
73
82
|
```python
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
83
|
+
import os
|
|
84
|
+
import asyncio
|
|
85
|
+
from cognee import config, prune, add, cognify, search, SearchType
|
|
86
|
+
|
|
87
|
+
# Import the register module to enable Redis support
|
|
88
|
+
from cognee_community_vector_adapter_redis import register
|
|
89
|
+
|
|
90
|
+
async def main():
|
|
91
|
+
# Configure Redis as vector database
|
|
92
|
+
config.set_vector_db_config({
|
|
93
|
+
"vector_db_provider": "redis",
|
|
94
|
+
"vector_db_url": os.getenv("VECTOR_DB_URL", "redis://localhost:6379"),
|
|
95
|
+
"vector_db_key": os.getenv("VECTOR_DB_KEY", "your-api-key"), # Optional
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
# Optional: Clean previous data
|
|
99
|
+
await prune.prune_data()
|
|
100
|
+
await prune.prune_system()
|
|
101
|
+
|
|
102
|
+
# Add your content
|
|
103
|
+
await add("""
|
|
104
|
+
Natural language processing (NLP) is an interdisciplinary
|
|
105
|
+
subfield of computer science and information retrieval.
|
|
106
|
+
""")
|
|
107
|
+
|
|
108
|
+
# Process with cognee
|
|
109
|
+
await cognify()
|
|
110
|
+
|
|
111
|
+
# Search
|
|
112
|
+
search_results = await search(
|
|
113
|
+
query_type=SearchType.GRAPH_COMPLETION,
|
|
114
|
+
query_text="Tell me about NLP"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
for result in search_results:
|
|
118
|
+
print("Search result:", result)
|
|
119
|
+
|
|
120
|
+
if __name__ == "__main__":
|
|
121
|
+
asyncio.run(main())
|
|
122
|
+
```
|
|
89
123
|
|
|
90
|
-
|
|
91
|
-
await redis_adapter.create_collection("my_collection")
|
|
124
|
+
## Configuration
|
|
92
125
|
|
|
93
|
-
|
|
94
|
-
from cognee.infrastructure.engine import DataPoint
|
|
126
|
+
Configure Redis as your vector database in cognee:
|
|
95
127
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
]
|
|
128
|
+
- `vector_db_provider`: Set to "redis"
|
|
129
|
+
- `vector_db_url`: Redis connection URL (e.g., "redis://localhost:6379")
|
|
130
|
+
- `vector_db_key`: Optional API key parameter (for compatibility, not used by Redis)
|
|
100
131
|
|
|
101
|
-
|
|
132
|
+
### Environment Variables
|
|
102
133
|
|
|
103
|
-
|
|
104
|
-
results = await redis_adapter.search(
|
|
105
|
-
collection_name="my_collection",
|
|
106
|
-
query_text="Hello Redis",
|
|
107
|
-
limit=10
|
|
108
|
-
)
|
|
134
|
+
Set the following environment variables or pass them directly in the config:
|
|
109
135
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
query_vector=query_vector[0],
|
|
115
|
-
limit=10,
|
|
116
|
-
with_vector=True # Include vectors in results
|
|
117
|
-
)
|
|
136
|
+
```bash
|
|
137
|
+
export VECTOR_DB_URL="redis://localhost:6379"
|
|
138
|
+
export VECTOR_DB_KEY="optional-key" # Not used by Redis
|
|
139
|
+
```
|
|
118
140
|
|
|
119
|
-
|
|
120
|
-
results = await redis_adapter.batch_search(
|
|
121
|
-
collection_name="my_collection",
|
|
122
|
-
query_texts=["query1", "query2"],
|
|
123
|
-
limit=5
|
|
124
|
-
)
|
|
141
|
+
### Connection URL Examples
|
|
125
142
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
143
|
+
```python
|
|
144
|
+
# Local Redis
|
|
145
|
+
config.set_vector_db_config({
|
|
146
|
+
"vector_db_provider": "redis",
|
|
147
|
+
"vector_db_url": "redis://localhost:6379"
|
|
148
|
+
})
|
|
131
149
|
|
|
132
|
-
#
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
)
|
|
150
|
+
# Redis with authentication
|
|
151
|
+
config.set_vector_db_config({
|
|
152
|
+
"vector_db_provider": "redis",
|
|
153
|
+
"vector_db_url": "redis://user:password@localhost:6379"
|
|
154
|
+
})
|
|
137
155
|
|
|
138
|
-
#
|
|
139
|
-
|
|
156
|
+
# Redis with SSL
|
|
157
|
+
config.set_vector_db_config({
|
|
158
|
+
"vector_db_provider": "redis",
|
|
159
|
+
"vector_db_url": "rediss://localhost:6380"
|
|
160
|
+
})
|
|
140
161
|
```
|
|
141
162
|
|
|
142
|
-
##
|
|
163
|
+
## Requirements
|
|
143
164
|
|
|
144
|
-
|
|
165
|
+
- Python >= 3.11, <= 3.13
|
|
166
|
+
- redisvl >= 0.6.0, <= 1.0.0
|
|
167
|
+
- cognee >= 0.2.0.dev0
|
|
145
168
|
|
|
146
|
-
|
|
147
|
-
- `embedding_engine`: The `EmbeddingEngine` to use for text vectorization (required)
|
|
148
|
-
- `api_key`: Optional API key parameter (not used for Redis but part of the interface)
|
|
169
|
+
## Advanced Usage
|
|
149
170
|
|
|
150
|
-
|
|
171
|
+
For direct adapter usage (advanced users only):
|
|
151
172
|
|
|
152
173
|
```python
|
|
153
|
-
|
|
154
|
-
|
|
174
|
+
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
|
175
|
+
from cognee_community_vector_adapter_redis import RedisAdapter
|
|
176
|
+
from cognee.infrastructure.engine import DataPoint
|
|
155
177
|
|
|
156
|
-
#
|
|
157
|
-
|
|
178
|
+
# Initialize embedding engine and adapter
|
|
179
|
+
embedding_engine = EmbeddingEngine(model="your-model")
|
|
180
|
+
redis_adapter = RedisAdapter(
|
|
181
|
+
url="redis://localhost:6379",
|
|
182
|
+
embedding_engine=embedding_engine
|
|
183
|
+
)
|
|
158
184
|
|
|
159
|
-
#
|
|
160
|
-
redis_adapter
|
|
185
|
+
# Direct adapter operations
|
|
186
|
+
await redis_adapter.create_collection("my_collection")
|
|
187
|
+
data_points = [DataPoint(id="1", text="Hello", metadata={"index_fields": ["text"]})]
|
|
188
|
+
await redis_adapter.create_data_points("my_collection", data_points)
|
|
189
|
+
results = await redis_adapter.search("my_collection", query_text="Hello", limit=10)
|
|
161
190
|
```
|
|
162
191
|
|
|
163
|
-
|
|
164
192
|
## Error Handling
|
|
165
193
|
|
|
166
194
|
The adapter includes comprehensive error handling:
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
cognee_community_vector_adapter_redis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
cognee_community_vector_adapter_redis/redis_adapter.py,sha256=YXLW4s631NMqsubAxTqOTQ5etNsNmwjOWu4U3EnZ7VM,18055
|
|
3
|
+
cognee_community_vector_adapter_redis/register.py,sha256=0LdEifYQuIu9OkXNV8PxOPOg2gKCL-9Lq4FL0nmUCxo,154
|
|
4
|
+
cognee_community_vector_adapter_redis-0.0.3.dist-info/METADATA,sha256=ntPwag-uykrHOEKPCki0d_l1EUulh910847MHRWlc6Q,6647
|
|
5
|
+
cognee_community_vector_adapter_redis-0.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
+
cognee_community_vector_adapter_redis-0.0.3.dist-info/RECORD,,
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
cognee_community_vector_adapter_redis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
cognee_community_vector_adapter_redis/redis_adapter.py,sha256=c4D2GLkiZ11nIJ7rk30OW0kSN78rep7T05mJT0wSN0o,17576
|
|
3
|
-
cognee_community_vector_adapter_redis/register.py,sha256=0LdEifYQuIu9OkXNV8PxOPOg2gKCL-9Lq4FL0nmUCxo,154
|
|
4
|
-
cognee_community_vector_adapter_redis-0.0.1.dist-info/METADATA,sha256=g55O7nqOsjqVPdBp649zGoa39fy1gZZDBjCH_ULDvqI,5966
|
|
5
|
-
cognee_community_vector_adapter_redis-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
-
cognee_community_vector_adapter_redis-0.0.1.dist-info/RECORD,,
|