cognee-community-vector-adapter-redis 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee_community_vector_adapter_redis/redis_adapter.py +184 -146
- {cognee_community_vector_adapter_redis-0.0.1.dist-info → cognee_community_vector_adapter_redis-0.1.0.dist-info}/METADATA +102 -72
- cognee_community_vector_adapter_redis-0.1.0.dist-info/RECORD +6 -0
- {cognee_community_vector_adapter_redis-0.0.1.dist-info → cognee_community_vector_adapter_redis-0.1.0.dist-info}/WHEEL +1 -1
- cognee_community_vector_adapter_redis-0.0.1.dist-info/RECORD +0 -6
|
@@ -1,40 +1,44 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import asyncio
|
|
3
|
-
|
|
2
|
+
import json
|
|
3
|
+
from typing import Any
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
from redisvl.schema import IndexSchema
|
|
8
|
-
from redisvl.query import VectorQuery
|
|
9
|
-
|
|
10
|
-
from cognee.exceptions import InvalidValueError
|
|
11
|
-
from cognee.shared.logging_utils import get_logger
|
|
12
|
-
|
|
13
|
-
from cognee.infrastructure.engine import DataPoint
|
|
14
|
-
from cognee.infrastructure.engine.utils import parse_id
|
|
6
|
+
from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
|
|
15
7
|
from cognee.infrastructure.databases.vector import VectorDBInterface
|
|
8
|
+
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import (
|
|
9
|
+
EmbeddingEngine,
|
|
10
|
+
)
|
|
16
11
|
from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
|
|
17
|
-
from cognee.infrastructure.
|
|
12
|
+
from cognee.infrastructure.engine import DataPoint
|
|
13
|
+
from cognee.infrastructure.engine.utils import parse_id
|
|
14
|
+
|
|
15
|
+
# from redisvl.query import VectorDistanceMetric
|
|
16
|
+
from cognee.shared.logging_utils import get_logger
|
|
17
|
+
from redisvl.index import AsyncSearchIndex
|
|
18
|
+
from redisvl.query import VectorQuery
|
|
19
|
+
from redisvl.schema import IndexSchema
|
|
18
20
|
|
|
19
21
|
logger = get_logger("RedisAdapter")
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
class VectorEngineInitializationError(Exception):
|
|
23
25
|
"""Exception raised when vector engine initialization fails."""
|
|
26
|
+
|
|
24
27
|
pass
|
|
25
28
|
|
|
26
29
|
|
|
27
30
|
class CollectionNotFoundError(Exception):
|
|
28
31
|
"""Exception raised when a collection is not found."""
|
|
32
|
+
|
|
29
33
|
pass
|
|
30
34
|
|
|
31
35
|
|
|
32
36
|
def serialize_for_json(obj: Any) -> Any:
|
|
33
37
|
"""Convert objects to JSON-serializable format.
|
|
34
|
-
|
|
38
|
+
|
|
35
39
|
Args:
|
|
36
40
|
obj: Object to serialize (UUID, dict, list, or any other type).
|
|
37
|
-
|
|
41
|
+
|
|
38
42
|
Returns:
|
|
39
43
|
JSON-serializable representation of the object.
|
|
40
44
|
"""
|
|
@@ -50,40 +54,42 @@ def serialize_for_json(obj: Any) -> Any:
|
|
|
50
54
|
|
|
51
55
|
class RedisDataPoint(DataPoint):
|
|
52
56
|
"""Redis data point schema for vector index entries.
|
|
53
|
-
|
|
57
|
+
|
|
54
58
|
Attributes:
|
|
55
59
|
text: The text content to be indexed.
|
|
56
60
|
metadata: Metadata containing index field configuration.
|
|
57
61
|
"""
|
|
62
|
+
|
|
58
63
|
text: str
|
|
59
64
|
metadata: dict = {"index_fields": ["text"]}
|
|
60
65
|
|
|
61
66
|
|
|
62
67
|
class RedisAdapter(VectorDBInterface):
|
|
63
68
|
"""Redis vector database adapter using RedisVL for vector similarity search.
|
|
64
|
-
|
|
69
|
+
|
|
65
70
|
This adapter provides an interface to Redis vector search capabilities,
|
|
66
71
|
supporting embedding generation, vector indexing, and similarity search.
|
|
67
72
|
"""
|
|
68
|
-
|
|
73
|
+
|
|
69
74
|
name = "Redis"
|
|
70
|
-
url:
|
|
71
|
-
api_key:
|
|
72
|
-
embedding_engine:
|
|
73
|
-
|
|
75
|
+
url: str | None
|
|
76
|
+
api_key: str | None = None
|
|
77
|
+
embedding_engine: EmbeddingEngine | None = None
|
|
78
|
+
|
|
74
79
|
def __init__(
|
|
75
|
-
self,
|
|
80
|
+
self,
|
|
76
81
|
url: str,
|
|
77
|
-
|
|
78
|
-
|
|
82
|
+
database_name: str = "cognee",
|
|
83
|
+
api_key: str | None = None,
|
|
84
|
+
embedding_engine: EmbeddingEngine | None = None,
|
|
79
85
|
) -> None:
|
|
80
86
|
"""Initialize the Redis adapter.
|
|
81
|
-
|
|
87
|
+
|
|
82
88
|
Args:
|
|
83
89
|
url (str): Connection string for your Redis instance like redis://localhost:6379.
|
|
84
90
|
embedding_engine: Engine for generating embeddings.
|
|
85
91
|
api_key: Optional API key. Ignored for Redis.
|
|
86
|
-
|
|
92
|
+
|
|
87
93
|
Raises:
|
|
88
94
|
VectorEngineInitializationError: If required parameters are missing.
|
|
89
95
|
"""
|
|
@@ -91,31 +97,33 @@ class RedisAdapter(VectorDBInterface):
|
|
|
91
97
|
raise VectorEngineInitializationError("Redis connnection URL!")
|
|
92
98
|
if not embedding_engine:
|
|
93
99
|
raise VectorEngineInitializationError("Embedding engine is required!")
|
|
94
|
-
|
|
100
|
+
|
|
95
101
|
self.url = url
|
|
102
|
+
self.database_name = database_name
|
|
96
103
|
self.embedding_engine = embedding_engine
|
|
97
104
|
self._indices = {}
|
|
98
|
-
|
|
99
|
-
|
|
105
|
+
self.VECTOR_DB_LOCK = asyncio.Lock()
|
|
106
|
+
|
|
107
|
+
async def embed_data(self, data: list[str]) -> list[list[float]]:
|
|
100
108
|
"""Embed text data using the embedding engine.
|
|
101
|
-
|
|
109
|
+
|
|
102
110
|
Args:
|
|
103
111
|
data: List of text strings to embed.
|
|
104
|
-
|
|
112
|
+
|
|
105
113
|
Returns:
|
|
106
114
|
List of embedding vectors as lists of floats.
|
|
107
|
-
|
|
115
|
+
|
|
108
116
|
Raises:
|
|
109
117
|
Exception: If embedding generation fails.
|
|
110
118
|
"""
|
|
111
119
|
return await self.embedding_engine.embed_text(data)
|
|
112
|
-
|
|
120
|
+
|
|
113
121
|
def _create_schema(self, collection_name: str) -> IndexSchema:
|
|
114
122
|
"""Create a RedisVL IndexSchema for a collection.
|
|
115
|
-
|
|
123
|
+
|
|
116
124
|
Args:
|
|
117
125
|
collection_name: Name of the collection to create an index schema for.
|
|
118
|
-
|
|
126
|
+
|
|
119
127
|
Returns:
|
|
120
128
|
Redis IndexSchema configured for vector search.
|
|
121
129
|
"""
|
|
@@ -123,7 +131,7 @@ class RedisAdapter(VectorDBInterface):
|
|
|
123
131
|
"index": {
|
|
124
132
|
"name": collection_name,
|
|
125
133
|
"prefix": f"{collection_name}",
|
|
126
|
-
"storage_type": "json"
|
|
134
|
+
"storage_type": "json",
|
|
127
135
|
},
|
|
128
136
|
"fields": [
|
|
129
137
|
{"name": "id", "type": "tag", "attrs": {"sortable": True}},
|
|
@@ -136,133 +144,137 @@ class RedisAdapter(VectorDBInterface):
|
|
|
136
144
|
"m": 32,
|
|
137
145
|
"dims": self.embedding_engine.get_vector_size(),
|
|
138
146
|
"distance_metric": "cosine",
|
|
139
|
-
"datatype": "float32"
|
|
140
|
-
}
|
|
147
|
+
"datatype": "float32",
|
|
148
|
+
},
|
|
141
149
|
},
|
|
142
|
-
{"name": "
|
|
143
|
-
]
|
|
150
|
+
{"name": "payload_data", "type": "text", "attrs": {"sortable": True}},
|
|
151
|
+
],
|
|
144
152
|
}
|
|
145
153
|
return IndexSchema.from_dict(schema_dict)
|
|
146
|
-
|
|
154
|
+
|
|
147
155
|
def _get_index(self, collection_name: str) -> AsyncSearchIndex:
|
|
148
156
|
"""Get or create an AsyncSearchIndex for a collection.
|
|
149
|
-
|
|
157
|
+
|
|
150
158
|
Args:
|
|
151
159
|
collection_name: Name of the collection.
|
|
152
|
-
|
|
160
|
+
|
|
153
161
|
Returns:
|
|
154
162
|
AsyncSearchIndex instance for the collection.
|
|
155
163
|
"""
|
|
156
164
|
if collection_name not in self._indices:
|
|
157
165
|
schema = self._create_schema(collection_name)
|
|
158
166
|
self._indices[collection_name] = AsyncSearchIndex(
|
|
159
|
-
schema=schema,
|
|
160
|
-
redis_url=self.url,
|
|
161
|
-
validate_on_load=True
|
|
167
|
+
schema=schema, redis_url=self.url, validate_on_load=True
|
|
162
168
|
)
|
|
163
169
|
return self._indices[collection_name]
|
|
164
|
-
|
|
170
|
+
|
|
165
171
|
async def has_collection(self, collection_name: str) -> bool:
|
|
166
172
|
"""Check if a collection (index) exists.
|
|
167
|
-
|
|
173
|
+
|
|
168
174
|
Args:
|
|
169
175
|
collection_name: Name of the collection to check.
|
|
170
|
-
|
|
176
|
+
|
|
171
177
|
Returns:
|
|
172
178
|
True if collection exists, False otherwise.
|
|
173
179
|
"""
|
|
174
180
|
try:
|
|
175
181
|
index = self._get_index(collection_name)
|
|
176
|
-
|
|
182
|
+
result = await index.exists()
|
|
183
|
+
return result
|
|
177
184
|
except Exception:
|
|
178
185
|
return False
|
|
179
|
-
|
|
186
|
+
|
|
180
187
|
async def create_collection(
|
|
181
188
|
self,
|
|
182
189
|
collection_name: str,
|
|
183
|
-
payload_schema:
|
|
190
|
+
payload_schema: Any | None = None,
|
|
184
191
|
) -> None:
|
|
185
192
|
"""Create a new collection (Redis index) with vector search capabilities.
|
|
186
|
-
|
|
193
|
+
|
|
187
194
|
Args:
|
|
188
195
|
collection_name: Name of the collection to create.
|
|
189
196
|
payload_schema: Schema for payload data (not used).
|
|
190
|
-
|
|
197
|
+
|
|
191
198
|
Raises:
|
|
192
199
|
Exception: If collection creation fails.
|
|
193
200
|
"""
|
|
194
|
-
|
|
195
|
-
if await self.has_collection(collection_name):
|
|
196
|
-
logger.info(f"Collection {collection_name} already exists")
|
|
197
|
-
return
|
|
198
|
-
|
|
201
|
+
async with self.VECTOR_DB_LOCK:
|
|
199
202
|
index = self._get_index(collection_name)
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
203
|
+
try:
|
|
204
|
+
if await self.has_collection(collection_name):
|
|
205
|
+
logger.info(f"Collection {collection_name} already exists")
|
|
206
|
+
return
|
|
207
|
+
|
|
208
|
+
await index.create(overwrite=False)
|
|
209
|
+
|
|
210
|
+
logger.info(f"Created collection {collection_name}")
|
|
211
|
+
|
|
212
|
+
except Exception as e:
|
|
213
|
+
logger.error(f"Error creating collection {collection_name}: {str(e)}")
|
|
214
|
+
raise e
|
|
215
|
+
|
|
216
|
+
async def create_data_points(self, collection_name: str, data_points: list[DataPoint]) -> None:
|
|
209
217
|
"""Create data points in the collection.
|
|
210
|
-
|
|
218
|
+
|
|
211
219
|
Args:
|
|
212
220
|
collection_name: Name of the target collection.
|
|
213
221
|
data_points: List of DataPoint objects to insert.
|
|
214
|
-
|
|
222
|
+
|
|
215
223
|
Raises:
|
|
216
224
|
CollectionNotFoundError: If the collection doesn't exist.
|
|
217
225
|
Exception: If data point creation fails.
|
|
218
226
|
"""
|
|
227
|
+
index = self._get_index(collection_name)
|
|
219
228
|
try:
|
|
220
229
|
if not await self.has_collection(collection_name):
|
|
221
230
|
raise CollectionNotFoundError(f"Collection {collection_name} not found!")
|
|
222
|
-
|
|
231
|
+
|
|
223
232
|
# Embed the data points
|
|
224
233
|
data_vectors = await self.embed_data(
|
|
225
234
|
[DataPoint.get_embeddable_data(data_point) for data_point in data_points]
|
|
226
235
|
)
|
|
227
|
-
|
|
236
|
+
|
|
228
237
|
# Prepare documents for RedisVL
|
|
229
238
|
documents = []
|
|
230
|
-
for data_point, embedding in zip(data_points, data_vectors):
|
|
239
|
+
for data_point, embedding in zip(data_points, data_vectors, strict=False):
|
|
231
240
|
# Serialize the payload to handle UUIDs and other non-JSON types
|
|
232
241
|
payload = serialize_for_json(data_point.model_dump())
|
|
233
|
-
|
|
242
|
+
|
|
234
243
|
doc_data = {
|
|
235
244
|
"id": str(data_point.id),
|
|
236
|
-
"text": getattr(
|
|
245
|
+
"text": getattr(
|
|
246
|
+
data_point,
|
|
247
|
+
data_point.metadata.get("index_fields", ["text"])[0],
|
|
248
|
+
"",
|
|
249
|
+
),
|
|
237
250
|
"vector": embedding,
|
|
238
|
-
"
|
|
251
|
+
"payload_data": json.dumps(payload), # Store as JSON string
|
|
239
252
|
}
|
|
240
253
|
documents.append(doc_data)
|
|
241
|
-
|
|
254
|
+
|
|
242
255
|
# Load using RedisVL
|
|
243
|
-
index = self._get_index(collection_name)
|
|
244
256
|
await index.load(documents, id_field="id")
|
|
245
|
-
|
|
257
|
+
|
|
246
258
|
logger.info(f"Created {len(data_points)} data points in collection {collection_name}")
|
|
247
|
-
|
|
259
|
+
|
|
248
260
|
except Exception as e:
|
|
249
261
|
logger.error(f"Error creating data points: {str(e)}")
|
|
250
262
|
raise e
|
|
251
|
-
|
|
263
|
+
|
|
252
264
|
async def create_vector_index(self, index_name: str, index_property_name: str) -> None:
|
|
253
265
|
"""Create a vector index for a specific property.
|
|
254
|
-
|
|
266
|
+
|
|
255
267
|
Args:
|
|
256
268
|
index_name: Base name for the index.
|
|
257
269
|
index_property_name: Property name to index.
|
|
258
270
|
"""
|
|
259
271
|
await self.create_collection(f"{index_name}_{index_property_name}")
|
|
260
|
-
|
|
272
|
+
|
|
261
273
|
async def index_data_points(
|
|
262
274
|
self, index_name: str, index_property_name: str, data_points: list[DataPoint]
|
|
263
275
|
) -> None:
|
|
264
276
|
"""Index data points for a specific property.
|
|
265
|
-
|
|
277
|
+
|
|
266
278
|
Args:
|
|
267
279
|
index_name: Base name for the index.
|
|
268
280
|
index_property_name: Property name to index.
|
|
@@ -278,141 +290,157 @@ class RedisAdapter(VectorDBInterface):
|
|
|
278
290
|
for data_point in data_points
|
|
279
291
|
],
|
|
280
292
|
)
|
|
281
|
-
|
|
282
|
-
async def retrieve(
|
|
293
|
+
|
|
294
|
+
async def retrieve(
|
|
295
|
+
self, collection_name: str, data_point_ids: list[str]
|
|
296
|
+
) -> list[dict[str, Any]]:
|
|
283
297
|
"""Retrieve data points by their IDs.
|
|
284
|
-
|
|
298
|
+
|
|
285
299
|
Args:
|
|
286
300
|
collection_name: Name of the collection to retrieve from.
|
|
287
301
|
data_point_ids: List of data point IDs to retrieve.
|
|
288
|
-
|
|
302
|
+
|
|
289
303
|
Returns:
|
|
290
304
|
List of retrieved data point payloads.
|
|
291
305
|
"""
|
|
306
|
+
index = self._get_index(collection_name)
|
|
292
307
|
try:
|
|
293
|
-
index = self._get_index(collection_name)
|
|
294
308
|
results = []
|
|
295
|
-
|
|
309
|
+
|
|
296
310
|
for data_id in data_point_ids:
|
|
297
311
|
doc = await index.fetch(data_id)
|
|
298
312
|
if doc:
|
|
299
313
|
# Parse the stored payload JSON
|
|
300
|
-
payload_str = doc.get("
|
|
314
|
+
payload_str = doc.get("payload_data", "{}")
|
|
301
315
|
try:
|
|
302
316
|
payload = json.loads(payload_str)
|
|
303
317
|
results.append(payload)
|
|
304
318
|
except json.JSONDecodeError:
|
|
305
319
|
# Fallback to the document itself if payload parsing fails
|
|
306
320
|
results.append(doc)
|
|
307
|
-
|
|
321
|
+
|
|
308
322
|
return results
|
|
309
|
-
|
|
323
|
+
|
|
310
324
|
except Exception as e:
|
|
311
325
|
logger.error(f"Error retrieving data points: {str(e)}")
|
|
312
326
|
return []
|
|
313
|
-
|
|
327
|
+
|
|
314
328
|
async def search(
|
|
315
329
|
self,
|
|
316
330
|
collection_name: str,
|
|
317
|
-
query_text:
|
|
318
|
-
query_vector:
|
|
319
|
-
limit: int = 15,
|
|
331
|
+
query_text: str | None = None,
|
|
332
|
+
query_vector: list[float] | None = None,
|
|
333
|
+
limit: int | None = 15,
|
|
320
334
|
with_vector: bool = False,
|
|
321
|
-
|
|
335
|
+
include_payload: bool = True,
|
|
336
|
+
) -> list[ScoredResult]:
|
|
322
337
|
"""Search for similar vectors in the collection.
|
|
323
|
-
|
|
338
|
+
|
|
324
339
|
Args:
|
|
325
340
|
collection_name: Name of the collection to search.
|
|
326
341
|
query_text: Text query to search for (will be embedded).
|
|
327
342
|
query_vector: Pre-computed query vector.
|
|
328
343
|
limit: Maximum number of results to return.
|
|
329
344
|
with_vector: Whether to include vectors in results.
|
|
330
|
-
|
|
345
|
+
include_payload: Whether to include payloads in results.
|
|
346
|
+
|
|
331
347
|
Returns:
|
|
332
348
|
List of ScoredResult objects sorted by similarity.
|
|
333
|
-
|
|
349
|
+
|
|
334
350
|
Raises:
|
|
335
|
-
|
|
351
|
+
MissingQueryParameterError: If neither query_text nor query_vector is provided.
|
|
336
352
|
Exception: If search execution fails.
|
|
337
353
|
"""
|
|
338
354
|
if query_text is None and query_vector is None:
|
|
339
|
-
raise
|
|
340
|
-
|
|
341
|
-
if limit <= 0:
|
|
342
|
-
return []
|
|
343
|
-
|
|
355
|
+
raise MissingQueryParameterError()
|
|
356
|
+
|
|
344
357
|
if not await self.has_collection(collection_name):
|
|
345
|
-
logger.warning(
|
|
358
|
+
logger.warning(
|
|
359
|
+
f"Collection '{collection_name}' not found in RedisAdapter.search; returning []."
|
|
360
|
+
)
|
|
346
361
|
return []
|
|
347
|
-
|
|
362
|
+
|
|
363
|
+
index = self._get_index(collection_name)
|
|
364
|
+
|
|
365
|
+
if limit is None:
|
|
366
|
+
info = await index.info()
|
|
367
|
+
limit = info["num_docs"]
|
|
368
|
+
|
|
369
|
+
if limit <= 0:
|
|
370
|
+
return []
|
|
371
|
+
|
|
348
372
|
try:
|
|
349
373
|
# Get the query vector
|
|
350
374
|
if query_vector is None:
|
|
351
375
|
query_vector = (await self.embed_data([query_text]))[0]
|
|
352
|
-
|
|
376
|
+
|
|
353
377
|
# Create the vector query
|
|
354
378
|
vector_query = VectorQuery(
|
|
355
379
|
vector=query_vector,
|
|
356
380
|
vector_field_name="vector",
|
|
357
381
|
num_results=limit,
|
|
358
382
|
return_score=True,
|
|
359
|
-
normalize_vector_distance=
|
|
383
|
+
normalize_vector_distance=False,
|
|
360
384
|
)
|
|
361
|
-
|
|
385
|
+
|
|
362
386
|
# Set return fields
|
|
363
|
-
|
|
387
|
+
if include_payload:
|
|
388
|
+
return_fields = ["id", "text", "payload_data"]
|
|
389
|
+
else:
|
|
390
|
+
return_fields = ["id", "text"]
|
|
364
391
|
if with_vector:
|
|
365
392
|
return_fields.append("vector")
|
|
366
393
|
vector_query = vector_query.return_fields(*return_fields)
|
|
367
|
-
|
|
394
|
+
|
|
368
395
|
# Execute the search
|
|
369
|
-
index = self._get_index(collection_name)
|
|
370
396
|
results = await index.query(vector_query)
|
|
371
|
-
|
|
397
|
+
|
|
372
398
|
# Convert results to ScoredResult objects
|
|
373
399
|
scored_results = []
|
|
374
400
|
for doc in results:
|
|
375
401
|
# Parse the stored payload - it's stored as JSON string
|
|
376
|
-
payload_str = doc.get("
|
|
402
|
+
payload_str = doc.get("payload_data", "{}")
|
|
377
403
|
try:
|
|
378
404
|
payload = json.loads(payload_str)
|
|
379
405
|
except json.JSONDecodeError:
|
|
380
406
|
payload = doc
|
|
381
|
-
|
|
407
|
+
|
|
382
408
|
scored_results.append(
|
|
383
409
|
ScoredResult(
|
|
384
|
-
id=parse_id(doc["id"]),
|
|
410
|
+
id=parse_id(doc["id"].split(":", 1)[1]),
|
|
385
411
|
payload=payload,
|
|
386
|
-
score=float(doc.get("vector_distance", 0.0)) # RedisVL returns distance
|
|
412
|
+
score=float(doc.get("vector_distance", 0.0)), # RedisVL returns distance
|
|
387
413
|
)
|
|
388
414
|
)
|
|
389
415
|
return scored_results
|
|
390
|
-
|
|
416
|
+
|
|
391
417
|
except Exception as e:
|
|
392
418
|
logger.error(f"Error during search: {str(e)}")
|
|
393
419
|
raise e
|
|
394
|
-
|
|
420
|
+
|
|
395
421
|
async def batch_search(
|
|
396
422
|
self,
|
|
397
423
|
collection_name: str,
|
|
398
|
-
query_texts:
|
|
399
|
-
limit:
|
|
424
|
+
query_texts: list[str],
|
|
425
|
+
limit: int | None,
|
|
400
426
|
with_vectors: bool = False,
|
|
401
|
-
|
|
427
|
+
include_payload: bool = True,
|
|
428
|
+
) -> list[list[ScoredResult]]:
|
|
402
429
|
"""Perform batch search for multiple queries.
|
|
403
|
-
|
|
430
|
+
|
|
404
431
|
Args:
|
|
405
432
|
collection_name: Name of the collection to search.
|
|
406
433
|
query_texts: List of text queries to search for.
|
|
407
434
|
limit: Maximum number of results per query.
|
|
408
435
|
with_vectors: Whether to include vectors in results.
|
|
409
|
-
|
|
436
|
+
include_payload: Whether to include payloads in results.
|
|
437
|
+
|
|
410
438
|
Returns:
|
|
411
439
|
List of search results for each query, filtered by score threshold.
|
|
412
440
|
"""
|
|
413
441
|
# Embed all queries at once
|
|
414
442
|
vectors = await self.embed_data(query_texts)
|
|
415
|
-
|
|
443
|
+
|
|
416
444
|
# Execute searches in parallel
|
|
417
445
|
# TODO: replace with index.batch_query() in the future
|
|
418
446
|
search_tasks = [
|
|
@@ -420,46 +448,48 @@ class RedisAdapter(VectorDBInterface):
|
|
|
420
448
|
collection_name=collection_name,
|
|
421
449
|
query_vector=vector,
|
|
422
450
|
limit=limit,
|
|
423
|
-
with_vector=with_vectors
|
|
451
|
+
with_vector=with_vectors,
|
|
452
|
+
include_payload=include_payload,
|
|
424
453
|
)
|
|
425
454
|
for vector in vectors
|
|
426
455
|
]
|
|
427
|
-
|
|
456
|
+
|
|
428
457
|
results = await asyncio.gather(*search_tasks)
|
|
429
|
-
|
|
458
|
+
|
|
430
459
|
# Filter results by score threshold (Redis uses distance, so lower is better)
|
|
431
460
|
return [
|
|
432
|
-
[result for result in result_group if result.score < 0.1]
|
|
433
|
-
for result_group in results
|
|
461
|
+
[result for result in result_group if result.score < 0.1] for result_group in results
|
|
434
462
|
]
|
|
435
|
-
|
|
436
|
-
async def delete_data_points(
|
|
463
|
+
|
|
464
|
+
async def delete_data_points(
|
|
465
|
+
self, collection_name: str, data_point_ids: list[str]
|
|
466
|
+
) -> dict[str, int]:
|
|
437
467
|
"""Delete data points by their IDs.
|
|
438
|
-
|
|
468
|
+
|
|
439
469
|
Args:
|
|
440
470
|
collection_name: Name of the collection to delete from.
|
|
441
471
|
data_point_ids: List of data point IDs to delete.
|
|
442
|
-
|
|
472
|
+
|
|
443
473
|
Returns:
|
|
444
474
|
Dictionary containing the number of deleted documents.
|
|
445
|
-
|
|
475
|
+
|
|
446
476
|
Raises:
|
|
447
477
|
Exception: If deletion fails.
|
|
448
478
|
"""
|
|
479
|
+
index = self._get_index(collection_name)
|
|
449
480
|
try:
|
|
450
|
-
index = self._get_index(collection_name)
|
|
451
481
|
deleted_count = await index.drop_documents(data_point_ids)
|
|
452
482
|
logger.info(f"Deleted {deleted_count} data points from collection {collection_name}")
|
|
453
483
|
return {"deleted": deleted_count}
|
|
454
484
|
except Exception as e:
|
|
455
485
|
logger.error(f"Error deleting data points: {str(e)}")
|
|
456
486
|
raise e
|
|
457
|
-
|
|
487
|
+
|
|
458
488
|
async def prune(self) -> None:
|
|
459
489
|
"""Remove all collections and data from Redis.
|
|
460
|
-
|
|
490
|
+
|
|
461
491
|
This method drops all existing indices and clears the internal cache.
|
|
462
|
-
|
|
492
|
+
|
|
463
493
|
Raises:
|
|
464
494
|
Exception: If pruning fails.
|
|
465
495
|
"""
|
|
@@ -470,15 +500,23 @@ class RedisAdapter(VectorDBInterface):
|
|
|
470
500
|
if await index.exists():
|
|
471
501
|
await index.delete(drop=True)
|
|
472
502
|
logger.info(f"Dropped index {collection_name}")
|
|
473
|
-
await index.disconnect()
|
|
474
503
|
except Exception as e:
|
|
475
504
|
logger.warning(f"Failed to drop index {collection_name}: {str(e)}")
|
|
476
|
-
|
|
505
|
+
|
|
477
506
|
# Clear the indices cache
|
|
478
507
|
self._indices.clear()
|
|
479
|
-
|
|
508
|
+
|
|
480
509
|
logger.info("Pruned all Redis vector collections")
|
|
481
|
-
|
|
510
|
+
|
|
482
511
|
except Exception as e:
|
|
483
512
|
logger.error(f"Error during prune: {str(e)}")
|
|
484
513
|
raise e
|
|
514
|
+
|
|
515
|
+
async def get_collection_names(self):
|
|
516
|
+
"""
|
|
517
|
+
Get names of all collections in the database.
|
|
518
|
+
|
|
519
|
+
Returns:
|
|
520
|
+
List of collection names. In this case of Redis, the return type is a dict.
|
|
521
|
+
"""
|
|
522
|
+
return self._indices.keys()
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cognee-community-vector-adapter-redis
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: Redis vector database adapter for cognee
|
|
5
5
|
Requires-Python: <=3.13,>=3.11
|
|
6
|
-
Requires-Dist: cognee
|
|
6
|
+
Requires-Dist: cognee==0.5.2
|
|
7
|
+
Requires-Dist: instructor>=1.11
|
|
7
8
|
Requires-Dist: redisvl<=1.0.0,>=0.6.0
|
|
9
|
+
Requires-Dist: starlette>=0.48.0
|
|
8
10
|
Description-Content-Type: text/markdown
|
|
9
11
|
|
|
10
12
|
<div align="center" dir="auto">
|
|
@@ -17,7 +19,7 @@ Description-Content-Type: text/markdown
|
|
|
17
19
|
<br />
|
|
18
20
|
|
|
19
21
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
20
|
-

|
|
21
23
|
|
|
22
24
|
[](https://github.com/redis/redis-vl-python)
|
|
23
25
|
|
|
@@ -44,10 +46,19 @@ Description-Content-Type: text/markdown
|
|
|
44
46
|
|
|
45
47
|
## Installation
|
|
46
48
|
|
|
49
|
+
If published, the package can be simply installed via pip:
|
|
50
|
+
|
|
47
51
|
```bash
|
|
48
52
|
pip install cognee-community-vector-adapter-redis
|
|
49
53
|
```
|
|
50
54
|
|
|
55
|
+
In case it is not published yet, you can use poetry to locally build the adapter package:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install poetry
|
|
59
|
+
poetry install # run this command in the directory containing the pyproject.toml file
|
|
60
|
+
```
|
|
61
|
+
|
|
51
62
|
## Prerequisites
|
|
52
63
|
|
|
53
64
|
You need a Redis instance with the Redis Search module enabled. You can use:
|
|
@@ -71,96 +82,115 @@ uv run examples/example.py
|
|
|
71
82
|
## Usage
|
|
72
83
|
|
|
73
84
|
```python
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
85
|
+
import os
|
|
86
|
+
import asyncio
|
|
87
|
+
from cognee import config, prune, add, cognify, search, SearchType
|
|
88
|
+
|
|
89
|
+
# Import the register module to enable Redis support
|
|
90
|
+
from cognee_community_vector_adapter_redis import register
|
|
91
|
+
|
|
92
|
+
async def main():
|
|
93
|
+
# Configure Redis as vector database
|
|
94
|
+
config.set_vector_db_config({
|
|
95
|
+
"vector_db_provider": "redis",
|
|
96
|
+
"vector_db_url": os.getenv("VECTOR_DB_URL", "redis://localhost:6379"),
|
|
97
|
+
"vector_db_key": os.getenv("VECTOR_DB_KEY", "your-api-key"), # Optional
|
|
98
|
+
})
|
|
99
|
+
|
|
100
|
+
# Optional: Clean previous data
|
|
101
|
+
await prune.prune_data()
|
|
102
|
+
await prune.prune_system()
|
|
103
|
+
|
|
104
|
+
# Add your content
|
|
105
|
+
await add("""
|
|
106
|
+
Natural language processing (NLP) is an interdisciplinary
|
|
107
|
+
subfield of computer science and information retrieval.
|
|
108
|
+
""")
|
|
109
|
+
|
|
110
|
+
# Process with cognee
|
|
111
|
+
await cognify()
|
|
112
|
+
|
|
113
|
+
# Search
|
|
114
|
+
search_results = await search(
|
|
115
|
+
query_type=SearchType.GRAPH_COMPLETION,
|
|
116
|
+
query_text="Tell me about NLP"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
for result in search_results:
|
|
120
|
+
print("Search result:", result)
|
|
121
|
+
|
|
122
|
+
if __name__ == "__main__":
|
|
123
|
+
asyncio.run(main())
|
|
124
|
+
```
|
|
89
125
|
|
|
90
|
-
|
|
91
|
-
await redis_adapter.create_collection("my_collection")
|
|
126
|
+
## Configuration
|
|
92
127
|
|
|
93
|
-
|
|
94
|
-
from cognee.infrastructure.engine import DataPoint
|
|
128
|
+
Configure Redis as your vector database in cognee:
|
|
95
129
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
]
|
|
130
|
+
- `vector_db_provider`: Set to "redis"
|
|
131
|
+
- `vector_db_url`: Redis connection URL (e.g., "redis://localhost:6379")
|
|
132
|
+
- `vector_db_key`: Optional API key parameter (for compatibility, not used by Redis)
|
|
100
133
|
|
|
101
|
-
|
|
134
|
+
### Environment Variables
|
|
102
135
|
|
|
103
|
-
|
|
104
|
-
results = await redis_adapter.search(
|
|
105
|
-
collection_name="my_collection",
|
|
106
|
-
query_text="Hello Redis",
|
|
107
|
-
limit=10
|
|
108
|
-
)
|
|
136
|
+
Set the following environment variables or pass them directly in the config:
|
|
109
137
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
query_vector=query_vector[0],
|
|
115
|
-
limit=10,
|
|
116
|
-
with_vector=True # Include vectors in results
|
|
117
|
-
)
|
|
138
|
+
```bash
|
|
139
|
+
export VECTOR_DB_URL="redis://localhost:6379"
|
|
140
|
+
export VECTOR_DB_KEY="optional-key" # Not used by Redis
|
|
141
|
+
```
|
|
118
142
|
|
|
119
|
-
|
|
120
|
-
results = await redis_adapter.batch_search(
|
|
121
|
-
collection_name="my_collection",
|
|
122
|
-
query_texts=["query1", "query2"],
|
|
123
|
-
limit=5
|
|
124
|
-
)
|
|
143
|
+
### Connection URL Examples
|
|
125
144
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
145
|
+
```python
|
|
146
|
+
# Local Redis
|
|
147
|
+
config.set_vector_db_config({
|
|
148
|
+
"vector_db_provider": "redis",
|
|
149
|
+
"vector_db_url": "redis://localhost:6379"
|
|
150
|
+
})
|
|
131
151
|
|
|
132
|
-
#
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
)
|
|
152
|
+
# Redis with authentication
|
|
153
|
+
config.set_vector_db_config({
|
|
154
|
+
"vector_db_provider": "redis",
|
|
155
|
+
"vector_db_url": "redis://user:password@localhost:6379"
|
|
156
|
+
})
|
|
137
157
|
|
|
138
|
-
#
|
|
139
|
-
|
|
158
|
+
# Redis with SSL
|
|
159
|
+
config.set_vector_db_config({
|
|
160
|
+
"vector_db_provider": "redis",
|
|
161
|
+
"vector_db_url": "rediss://localhost:6380"
|
|
162
|
+
})
|
|
140
163
|
```
|
|
141
164
|
|
|
142
|
-
##
|
|
165
|
+
## Requirements
|
|
143
166
|
|
|
144
|
-
|
|
167
|
+
- Python >= 3.11, <= 3.13
|
|
168
|
+
- redisvl >= 0.6.0, <= 1.0.0
|
|
169
|
+
- cognee >= 0.2.0.dev0
|
|
145
170
|
|
|
146
|
-
|
|
147
|
-
- `embedding_engine`: The `EmbeddingEngine` to use for text vectorization (required)
|
|
148
|
-
- `api_key`: Optional API key parameter (not used for Redis but part of the interface)
|
|
171
|
+
## Advanced Usage
|
|
149
172
|
|
|
150
|
-
|
|
173
|
+
For direct adapter usage (advanced users only):
|
|
151
174
|
|
|
152
175
|
```python
|
|
153
|
-
|
|
154
|
-
|
|
176
|
+
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
|
177
|
+
from cognee_community_vector_adapter_redis import RedisAdapter
|
|
178
|
+
from cognee.infrastructure.engine import DataPoint
|
|
155
179
|
|
|
156
|
-
#
|
|
157
|
-
|
|
180
|
+
# Initialize embedding engine and adapter
|
|
181
|
+
embedding_engine = EmbeddingEngine(model="your-model")
|
|
182
|
+
redis_adapter = RedisAdapter(
|
|
183
|
+
url="redis://localhost:6379",
|
|
184
|
+
embedding_engine=embedding_engine
|
|
185
|
+
)
|
|
158
186
|
|
|
159
|
-
#
|
|
160
|
-
redis_adapter
|
|
187
|
+
# Direct adapter operations
|
|
188
|
+
await redis_adapter.create_collection("my_collection")
|
|
189
|
+
data_points = [DataPoint(id="1", text="Hello", metadata={"index_fields": ["text"]})]
|
|
190
|
+
await redis_adapter.create_data_points("my_collection", data_points)
|
|
191
|
+
results = await redis_adapter.search("my_collection", query_text="Hello", limit=10)
|
|
161
192
|
```
|
|
162
193
|
|
|
163
|
-
|
|
164
194
|
## Error Handling
|
|
165
195
|
|
|
166
196
|
The adapter includes comprehensive error handling:
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
cognee_community_vector_adapter_redis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
cognee_community_vector_adapter_redis/redis_adapter.py,sha256=03HlidkIfq3IeeWgHe9C33athQuSM5Xm4wd6HG93TCg,17969
|
|
3
|
+
cognee_community_vector_adapter_redis/register.py,sha256=0LdEifYQuIu9OkXNV8PxOPOg2gKCL-9Lq4FL0nmUCxo,154
|
|
4
|
+
cognee_community_vector_adapter_redis-0.1.0.dist-info/METADATA,sha256=wF_uAzy6fgGPG-pAF3_WQxe_VhxjrTK7ujtV9qDT0AM,6707
|
|
5
|
+
cognee_community_vector_adapter_redis-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
+
cognee_community_vector_adapter_redis-0.1.0.dist-info/RECORD,,
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
cognee_community_vector_adapter_redis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
cognee_community_vector_adapter_redis/redis_adapter.py,sha256=c4D2GLkiZ11nIJ7rk30OW0kSN78rep7T05mJT0wSN0o,17576
|
|
3
|
-
cognee_community_vector_adapter_redis/register.py,sha256=0LdEifYQuIu9OkXNV8PxOPOg2gKCL-9Lq4FL0nmUCxo,154
|
|
4
|
-
cognee_community_vector_adapter_redis-0.0.1.dist-info/METADATA,sha256=g55O7nqOsjqVPdBp649zGoa39fy1gZZDBjCH_ULDvqI,5966
|
|
5
|
-
cognee_community_vector_adapter_redis-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
-
cognee_community_vector_adapter_redis-0.0.1.dist-info/RECORD,,
|