cognee-community-vector-adapter-valkey 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ from .exceptions import CollectionNotFoundError, ValkeyVectorEngineInitializationError
2
+ from .valkey_adapter import ValkeyAdapter
3
+
4
+ __all__ = ["ValkeyAdapter", "ValkeyVectorEngineInitializationError", "CollectionNotFoundError"]
@@ -0,0 +1,10 @@
1
+ class ValkeyVectorEngineInitializationError(Exception):
2
+ """Exception raised when vector engine initialization fails."""
3
+
4
+ pass
5
+
6
+
7
+ class CollectionNotFoundError(Exception):
8
+ """Exception raised when a collection is not found."""
9
+
10
+ pass
@@ -0,0 +1,5 @@
1
+ from cognee.infrastructure.databases.vector import use_vector_adapter
2
+
3
+ from .valkey_adapter import ValkeyAdapter
4
+
5
+ use_vector_adapter("valkey", ValkeyAdapter)
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import struct
5
+ from functools import singledispatch
6
+ from typing import Any
7
+ from urllib.parse import urlparse
8
+ from uuid import UUID
9
+
10
+ from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
11
+
12
+ """
13
+ Internal helper function. Not part of the public API.
14
+ """
15
+
16
+
17
+ def _parse_host_port(url: str) -> tuple[str, int]:
18
+ """
19
+ Parse a url and extract the host and port.
20
+
21
+ Args:
22
+ url (str): The connection URL, e.g., "valkey://localhost:6379".
23
+
24
+ Returns:
25
+ tuple[str, int]: A tuple containing:
26
+ - host (str): The hostname from the URL, defaults to "localhost" if missing.
27
+ - port (int): The port number from the URL, defaults to 6379 if missing.
28
+ """
29
+
30
+ parsed = urlparse(url)
31
+ host = parsed.hostname or "localhost"
32
+ port = parsed.port or 6379
33
+ return host, port
34
+
35
+
36
+ def _to_float32_bytes(vec) -> bytes:
37
+ """
38
+ Convert a sequence of numeric values into a bytes representation using 32-bit floats.
39
+
40
+ Args:
41
+ vec (Iterable[float]): A sequence of numbers (e.g., list, tuple) to be converted.
42
+
43
+ Returns:
44
+ bytes: A binary representation of the input values packed as consecutive 32-bit floats.
45
+
46
+ Notes:
47
+ - Uses `struct.pack` with the format string `"{len(vec)}f"`, which packs all values as
48
+ IEEE 754 single-precision floats.
49
+ - Ensures compatibility with vector databases or embedding engines that require raw
50
+ float32 byte arrays.
51
+ """
52
+
53
+ return struct.pack(f"{len(vec)}f", *map(float, vec))
54
+
55
+
56
+ @singledispatch
57
+ def _serialize_for_json(obj: Any) -> Any:
58
+ """Convert objects to JSON-serializable format.
59
+ This id default serialization: return the object as-is.
60
+
61
+ Args:
62
+ obj: Object to serialize (UUID, dict, list, or any other type).
63
+
64
+ Returns:
65
+ JSON-serializable representation of the object.
66
+ """
67
+ return obj
68
+
69
+
70
+ @_serialize_for_json.register
71
+ def _(obj: UUID) -> str:
72
+ return str(obj)
73
+
74
+
75
+ @_serialize_for_json.register
76
+ def _(obj: dict) -> dict:
77
+ return {k: _serialize_for_json(v) for k, v in obj.items()}
78
+
79
+
80
+ @_serialize_for_json.register
81
+ def _(obj: list) -> list:
82
+ return [_serialize_for_json(item) for item in obj]
83
+
84
+
85
+ def _b2s(x: bytes | bytearray | str) -> str:
86
+ """Convert bytes or bytearray to a UTF-8 string if possible,
87
+ otherwise return a string representation.
88
+
89
+ Args:
90
+ x (Any): The input value, which may be bytes, bytearray, or any other type.
91
+
92
+ Returns:
93
+ Any: A decoded UTF-8 string if `x` is bytes or bytearray; otherwise, returns `x` unchanged.
94
+ If decoding fails, returns the string representation of `x`.
95
+ """
96
+
97
+ if isinstance(x, (bytes, bytearray)):
98
+ try:
99
+ return x.decode("utf-8")
100
+ except Exception:
101
+ return str(x)
102
+ return x
103
+
104
+
105
+ def _build_scored_results_from_ft(
106
+ raw: Any,
107
+ *,
108
+ use_key_suffix_when_missing_id: bool = True,
109
+ ) -> list[ScoredResult]:
110
+ """Build a list of `ScoredResult` objects from raw FT (Full-Text) search response.
111
+
112
+ Args:
113
+ raw (Any): The raw response from Valkey's FT search command, expected to be a list or tuple
114
+ where the second element is a mapping of keys to field dictionaries.
115
+ use_key_suffix_when_missing_id (bool): If True, use the key string as the ID when the `id`
116
+ field is missing in the response.
117
+
118
+ Returns:
119
+ list[ScoredResult]: A list of scored results, each containing:
120
+ - id (str): Extracted from `id` field or fallback to key.
121
+ - payload (dict): Parsed JSON from `payload_data` field, or raw string if malformed.
122
+ - score (float | None): Extracted from `__vector_score` field if present.
123
+
124
+ Notes:
125
+ - Handles both byte keys and string keys by decoding them.
126
+ - Gracefully falls back when fields are missing or payload is invalid JSON.
127
+ """
128
+ if not isinstance(raw, (list, tuple)) or len(raw) < 2 or not isinstance(raw[1], dict):
129
+ return []
130
+
131
+ mapping: dict[Any, dict[Any, Any]] = raw[1] # the { key -> fields } dict
132
+ scored: list[ScoredResult] = []
133
+
134
+ for key_bytes, fields in mapping.items():
135
+ key_str = _b2s(key_bytes)
136
+
137
+ # Extract id
138
+ raw_id = fields.get(b"id") if b"id" in fields else fields.get("id")
139
+ if raw_id is not None:
140
+ result_id = _b2s(raw_id)
141
+ else:
142
+ result_id = key_str
143
+
144
+ # Extrat score
145
+ score = (
146
+ fields.get(b"__vector_score")
147
+ if b"__vector_score" in fields
148
+ else fields.get("__vector_score")
149
+ )
150
+ if score is not None:
151
+ score = float(score)
152
+
153
+ # Extract and parse payload_data
154
+ payload_raw = (
155
+ fields.get(b"payload_data") if b"payload_data" in fields else fields.get("payload_data")
156
+ )
157
+ payload: dict[str, Any] = {}
158
+ if payload_raw is not None:
159
+ payload_str = _b2s(payload_raw)
160
+ if isinstance(payload_str, str):
161
+ try:
162
+ obj = json.loads(payload_str)
163
+ if isinstance(obj, dict):
164
+ payload = obj
165
+ else:
166
+ # If it's not a dict (e.g., list), wrap it
167
+ payload = {"_payload": obj}
168
+ except json.JSONDecodeError:
169
+ # Keep the raw string if malformed
170
+ payload = {"_payload_raw": payload_str}
171
+
172
+ scored.append(
173
+ ScoredResult(
174
+ id=result_id,
175
+ payload=payload,
176
+ score=score,
177
+ )
178
+ )
179
+
180
+ return scored
@@ -0,0 +1,535 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import json
5
+ from typing import Any
6
+
7
+ from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
8
+ from cognee.infrastructure.databases.vector import VectorDBInterface
9
+ from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import (
10
+ EmbeddingEngine,
11
+ )
12
+ from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
13
+ from cognee.infrastructure.engine import DataPoint
14
+ from cognee.shared.logging_utils import get_logger
15
+ from glide import (
16
+ BackoffStrategy,
17
+ GlideClient,
18
+ GlideClientConfiguration,
19
+ NodeAddress,
20
+ ft,
21
+ glide_json,
22
+ )
23
+ from glide_shared.commands.server_modules.ft_options.ft_create_options import (
24
+ DataType,
25
+ DistanceMetricType,
26
+ FtCreateOptions,
27
+ TagField,
28
+ VectorAlgorithm,
29
+ VectorField,
30
+ VectorFieldAttributesHnsw,
31
+ VectorType,
32
+ )
33
+ from glide_shared.commands.server_modules.ft_options.ft_search_options import (
34
+ FtSearchOptions,
35
+ ReturnField,
36
+ )
37
+ from glide_shared.exceptions import RequestError
38
+
39
+ from .exceptions import CollectionNotFoundError, ValkeyVectorEngineInitializationError
40
+ from .utils import (
41
+ _build_scored_results_from_ft,
42
+ _parse_host_port,
43
+ _serialize_for_json,
44
+ _to_float32_bytes,
45
+ )
46
+
47
+ logger = get_logger("ValkeyAdapter")
48
+
49
+
50
+ class ValkeyAdapter(VectorDBInterface):
51
+ """Valkey vector database adapter using ValkeyGlide for vector similarity search.
52
+
53
+ This adapter provides an implementation of the `VectorDBInterface` for Valkey,
54
+ enabling vector storage, retrieval, and similarity search using Valkey's
55
+ full-text and vector indexing capabilities.
56
+ """
57
+
58
+ name = "Valkey"
59
+ url: str | None
60
+ api_key: str | None = None
61
+ embedding_engine: EmbeddingEngine | None = None
62
+
63
+ def __init__(
64
+ self,
65
+ url: str | None,
66
+ api_key: str | None = None,
67
+ database_name: str = "cognee",
68
+ embedding_engine: EmbeddingEngine | None = None,
69
+ ) -> None:
70
+ """Initialize the Valkey adapter.
71
+
72
+ Args:
73
+ url (str): Connection string for your Valkey instance like valkey://localhost:6379.
74
+ embedding_engine: Engine for generating embeddings.
75
+ api_key: Optional API key. Ignored for Valkey.
76
+
77
+ Raises:
78
+ ValkeyVectorEngineInitializationError: If required parameters are missing.
79
+ """
80
+
81
+ if not embedding_engine:
82
+ raise ValkeyVectorEngineInitializationError(
83
+ "Embedding engine is required. Provide 'embedding_engine' to the Valkey adapter."
84
+ )
85
+
86
+ self.url = url
87
+ self._host, self._port = _parse_host_port(url)
88
+ self.database_name = database_name
89
+ self.embedding_engine = embedding_engine
90
+ self._client: GlideClient | None = None
91
+ self._connected = False
92
+ self.VECTOR_DB_LOCK = asyncio.Lock()
93
+
94
+ # -------------------- lifecycle --------------------
95
+
96
+ async def get_connection(self) -> GlideClient:
97
+ """Establish and return an asynchronous Glide client connection to the Valkey server.
98
+
99
+ If a connection already exists and is marked as active, it will be reused.
100
+ Otherwise, a new connection is created using the configured host and port.
101
+
102
+ Returns:
103
+ GlideClient: An active Glide client instance for executing Valkey commands.
104
+
105
+ Behavior:
106
+ - Uses a backoff reconnect strategy with 3 retries and exponential delay.
107
+ - Disables TLS by default (set `use_tls=True` in configuration if needed).
108
+ - Sets a request timeout of 5000 ms.
109
+ """
110
+
111
+ if self._connected and self._client is not None:
112
+ return self._client
113
+
114
+ cfg = GlideClientConfiguration(
115
+ [NodeAddress(self._host, self._port)],
116
+ use_tls=False,
117
+ request_timeout=5000,
118
+ reconnect_strategy=BackoffStrategy(num_of_retries=3, factor=1000, exponent_base=2),
119
+ )
120
+ self._client = await GlideClient.create(cfg)
121
+ self._connected = True
122
+
123
+ return self._client
124
+
125
+ async def close(self) -> None:
126
+ """Close the active Glide client connection to the Valkey server.
127
+
128
+ If a client connection exists, attempts to close it gracefully.
129
+ Any exceptions during closure are suppressed to avoid breaking cleanup logic.
130
+
131
+ After closing:
132
+ - The internal client reference is set to None.
133
+ - The connection state flag (`_connected`) is reset to False.
134
+
135
+ Returns:
136
+ None
137
+
138
+ """
139
+
140
+ if self._client is not None:
141
+ try:
142
+ await self._client.close()
143
+ except Exception as e:
144
+ logger.error("Failed to close Valkey client: %e", e)
145
+ pass
146
+ self._client = None
147
+ self._connected = False
148
+
149
+ # -------------------- helpers --------------------
150
+
151
+ def _index_name(self, collection: str) -> str:
152
+ return f"index:{collection}"
153
+
154
+ def _key_prefix(self, collection: str) -> str:
155
+ return f"vdb:{collection}:"
156
+
157
+ def _key(self, collection: str, pid: str) -> str:
158
+ return f"{self._key_prefix(collection)}{pid}"
159
+
160
+ def _ensure_dims(self) -> int:
161
+ dims = self.embedding_engine.get_dimensions()
162
+ return int(dims)
163
+
164
+ async def embed_data(self, data: list[str]) -> list[list[float]]:
165
+ """Embed text data using the embedding engine.
166
+
167
+ Args:
168
+ data: List of text strings to embed.
169
+
170
+ Returns:
171
+ List of embedding vectors as lists of floats.
172
+
173
+ Raises:
174
+ Exception: If embedding generation fails.
175
+ """
176
+ return await self.embedding_engine.embed_text(data)
177
+
178
+ # -------------------- VectorDBInterface methods --------------------
179
+
180
+ async def has_collection(self, collection_name: str) -> bool:
181
+ """Check if a collection (index) exists.
182
+
183
+ Args:
184
+ collection_name: Name of the collection to check.
185
+
186
+ Returns:
187
+ True if collection exists, False otherwise.
188
+ """
189
+ client = await self.get_connection()
190
+ try:
191
+ await ft.info(client, self._index_name(collection_name))
192
+ return True
193
+ except Exception as e:
194
+ logger.warning("Valkey index check failed for '%s': %s", collection_name, e)
195
+ return False
196
+
197
+ async def create_collection(
198
+ self,
199
+ collection_name: str,
200
+ payload_schema: Any | None = None,
201
+ ) -> None:
202
+ """Create a new collection (Valkey index) with vector search capabilities.
203
+
204
+ Args:
205
+ collection_name: Name of the collection to create.
206
+ payload_schema: Schema for payload data (not used).
207
+
208
+ Raises:
209
+ Exception: If collection creation fails.
210
+ """
211
+ async with self.VECTOR_DB_LOCK:
212
+ try:
213
+ if await self.has_collection(collection_name):
214
+ logger.info(f"Collection {collection_name} already exists")
215
+ return
216
+
217
+ fields = [
218
+ TagField("id"),
219
+ VectorField(
220
+ name="vector",
221
+ algorithm=VectorAlgorithm.HNSW,
222
+ attributes=VectorFieldAttributesHnsw(
223
+ dimensions=self.embedding_engine.get_vector_size(),
224
+ distance_metric=DistanceMetricType.COSINE,
225
+ type=VectorType.FLOAT32,
226
+ ),
227
+ ),
228
+ ]
229
+ prefixes = [self._key_prefix(collection_name)]
230
+ options = FtCreateOptions(DataType.JSON, prefixes)
231
+ index = self._index_name(collection_name)
232
+
233
+ ok = await ft.create(self._client, index, fields, options)
234
+ if ok not in (b"OK", "OK"):
235
+ raise Exception(f"FT.CREATE failed for index '{index}': {ok!r}")
236
+
237
+ except Exception as e:
238
+ logger.error(f"Error creating collection {collection_name}: {str(e)}")
239
+ raise e
240
+
241
+ async def create_data_points(
242
+ self,
243
+ collection_name: str,
244
+ data_points: list[DataPoint],
245
+ ) -> None:
246
+ """Create data points in the collection.
247
+
248
+ Args:
249
+ collection_name: Name of the target collection.
250
+ data_points: List of DataPoint objects to insert.
251
+
252
+ Raises:
253
+ CollectionNotFoundError: If the collection doesn't exist.
254
+ Exception: If data point creation fails.
255
+ """
256
+ client = await self.get_connection()
257
+ assert self._client is not None
258
+
259
+ try:
260
+ if not await self.has_collection(collection_name):
261
+ raise CollectionNotFoundError(f"Collection {collection_name} not found!")
262
+
263
+ # Embed the data points
264
+ data_to_embed = [
265
+ DataPoint.get_embeddable_data(data_point) for data_point in data_points
266
+ ]
267
+ data_vectors = await self.embed_data(data_to_embed)
268
+
269
+ documents = []
270
+ for data_point, embedding in zip(data_points, data_vectors, strict=False):
271
+ payload = _serialize_for_json(data_point.model_dump())
272
+
273
+ doc_data = {
274
+ "id": str(data_point.id),
275
+ "vector": embedding,
276
+ "payload_data": json.dumps(payload), # Store as JSON string
277
+ }
278
+
279
+ documents.append(
280
+ glide_json.set(
281
+ client,
282
+ self._key(collection_name, str(data_point.id)),
283
+ "$",
284
+ json.dumps(doc_data),
285
+ )
286
+ )
287
+
288
+ await asyncio.gather(*documents)
289
+
290
+ except RequestError as e:
291
+ # Helpful guidance if JSON vector arrays aren't supported by the deployed module
292
+ logger.error(f"JSON.SET failed: {e}")
293
+ raise e
294
+
295
+ except Exception as e:
296
+ logger.error(f"Error creating data points: {str(e)}")
297
+ raise e
298
+
299
+ # TODO: Add this and fix issues
300
+ # async def create_vector_index(self, index_name: str, index_property_name: str):
301
+ # await self.create_collection(f"{index_name}_{index_property_name}")
302
+ #
303
+ # async def index_data_points(
304
+ # self, index_name: str, index_property_name: str, data_points: List[DataPoint]
305
+ # ):
306
+ # """Index data points in the collection."""
307
+ #
308
+ # await self.create_data_points(f"{index_name}_{index_property_name}", data_points)
309
+
310
+ async def retrieve(
311
+ self,
312
+ collection_name: str,
313
+ data_point_ids: list[str],
314
+ ) -> list[dict[str, Any]]:
315
+ """Retrieve data points by their IDs.
316
+
317
+ Args:
318
+ collection_name: Name of the collection to retrieve from.
319
+ data_point_ids: List of data point IDs to retrieve.
320
+
321
+ Returns:
322
+ List of retrieved data point payloads.
323
+ """
324
+ client = await self.get_connection()
325
+ assert self._client is not None
326
+
327
+ try:
328
+ results = []
329
+ for data_id in data_point_ids:
330
+ key = self._key(collection_name, data_id)
331
+ raw_doc = await glide_json.get(client, key, "$")
332
+ if raw_doc:
333
+ doc = json.loads(raw_doc)
334
+ payload_str = doc[0]["payload_data"]
335
+ try:
336
+ payload = json.loads(payload_str)
337
+ results.append(payload)
338
+ except json.JSONDecodeError:
339
+ # Fallback to the document itself if payload parsing fails
340
+ results.append(raw_doc)
341
+
342
+ return results
343
+
344
+ except Exception as e:
345
+ logger.error(f"Error retrieving data points: {str(e)}")
346
+ return []
347
+
348
+ async def search(
349
+ self,
350
+ collection_name: str,
351
+ query_text: str | None = None,
352
+ query_vector: list[float] | None = None,
353
+ limit: int | None = 15,
354
+ with_vector: bool = False,
355
+ include_payload: bool = False,
356
+ ) -> list[ScoredResult]:
357
+ """Search for similar vectors in the collection.
358
+
359
+ Args:
360
+ collection_name: Name of the collection to search.
361
+ query_text: Text query to search for (will be embedded).
362
+ query_vector: Pre-computed query vector.
363
+ limit: Maximum number of results to return.
364
+ with_vector: Whether to include vectors in results.
365
+ include_payload: Whether to include payloads in results.
366
+
367
+ Returns:
368
+ List of ScoredResult objects sorted by similarity.
369
+
370
+ Raises:
371
+ MissingQueryParameterError: If neither query_text nor query_vector is provided.
372
+ Exception: If search execution fails.
373
+ """
374
+ client = await self.get_connection()
375
+ assert self._client is not None
376
+
377
+ if query_text is None and query_vector is None:
378
+ raise MissingQueryParameterError()
379
+
380
+ if not await self.has_collection(collection_name):
381
+ logger.warning(
382
+ f"Collection '{collection_name}' not found in ValkeyAdapter.search; returning []."
383
+ )
384
+ return []
385
+
386
+ if limit is None:
387
+ info = await ft.info(client, self._index_name(collection_name))
388
+ limit = info["num_docs"]
389
+
390
+ if limit <= 0:
391
+ return []
392
+
393
+ try:
394
+ # Get the query vector
395
+ if query_vector is None:
396
+ [vec] = await self.embed_data([query_text])
397
+ else:
398
+ vec = query_vector
399
+ vec_bytes = _to_float32_bytes(vec)
400
+
401
+ # Set return fields
402
+ return_fields = [
403
+ ReturnField("$.id", alias="id"),
404
+ ReturnField("__vector_score", alias="score"),
405
+ ]
406
+ if include_payload:
407
+ return_fields.append(ReturnField("$.payload_data", alias="payload_data"))
408
+ if with_vector:
409
+ return_fields.append(ReturnField("$.vector", alias="vector"))
410
+
411
+ vector_param_name = "query_vector"
412
+ query = f"*=>[KNN {limit} @vector ${vector_param_name}]"
413
+ query_options = FtSearchOptions(
414
+ params={vector_param_name: vec_bytes}, return_fields=return_fields
415
+ )
416
+
417
+ # Execute the search
418
+ raw_results = await ft.search(
419
+ client=client,
420
+ index_name=self._index_name(collection_name),
421
+ query=query,
422
+ options=query_options,
423
+ )
424
+
425
+ scored_results = _build_scored_results_from_ft(raw_results)
426
+ return scored_results
427
+
428
+ except Exception as e:
429
+ logger.error(f"Error during search: {str(e)}")
430
+ raise e
431
+
432
+ async def batch_search(
433
+ self,
434
+ collection_name: str,
435
+ query_texts: list[str],
436
+ limit: int | None,
437
+ with_vectors: bool = False,
438
+ score_threshold: float | None = 0.1,
439
+ max_concurrency: int = 10,
440
+ include_payload: bool = False,
441
+ ) -> list[list[ScoredResult]]:
442
+ """Perform batch search for multiple queries.
443
+
444
+ Args:
445
+ collection_name: Name of the collection to search.
446
+ query_texts: List of text queries to search for.
447
+ limit: Maximum number of results per query.
448
+ with_vectors: Whether to include vectors in results.
449
+ score_threshold: threshold for filtering scores.
450
+ max_concurrency: maximum number of concurrent searches.
451
+ include_payload: Whether to include payloads in results.
452
+
453
+ Returns:
454
+ List of search results for each query, filtered by score threshold.
455
+ """
456
+ if not await self.has_collection(collection_name):
457
+ logger.warning(
458
+ f"Collection '{collection_name}' not found in ValkeyAdapter.search; returning []."
459
+ )
460
+ return []
461
+
462
+ # Embed all queries at once
463
+ vectors = await self.embed_data(query_texts)
464
+
465
+ # Execute searches in parallel
466
+ semaphore = asyncio.Semaphore(max_concurrency)
467
+
468
+ async def limited_search(vector):
469
+ async with semaphore:
470
+ return await self.search(
471
+ collection_name=collection_name,
472
+ query_vector=vector,
473
+ limit=limit,
474
+ with_vector=with_vectors,
475
+ include_payload=include_payload,
476
+ )
477
+
478
+ tasks = [limited_search(vector) for vector in vectors]
479
+ results = await asyncio.gather(*tasks)
480
+
481
+ # Filter results by a score threshold
482
+ return [
483
+ [result for result in result_group if result.score < score_threshold]
484
+ for result_group in results
485
+ ]
486
+
487
+ async def delete_data_points(
488
+ self,
489
+ collection_name: str,
490
+ data_point_ids: list[str],
491
+ ) -> dict[str, int]:
492
+ """Delete data points by their IDs.
493
+
494
+ Args:
495
+ collection_name: Name of the collection to delete from.
496
+ data_point_ids: List of data point IDs to delete.
497
+
498
+ Returns:
499
+ Dictionary containing the number of deleted documents.
500
+
501
+ Raises:
502
+ Exception: If deletion fails.
503
+ """
504
+ client = await self.get_connection()
505
+ assert self._client is not None
506
+
507
+ ids = [self._key(collection_name, id) for id in data_point_ids]
508
+
509
+ try:
510
+ deleted_count = await client.delete(ids)
511
+ logger.info(f"Deleted {deleted_count} data points from collection {collection_name}")
512
+ return {"deleted": deleted_count}
513
+ except Exception as e:
514
+ logger.error(f"Error deleting data points: {str(e)}")
515
+ raise e
516
+
517
+ async def prune(self):
518
+ """Remove all collections and data from Valkey.
519
+
520
+ This method drops all existing indices and clears the internal cache.
521
+
522
+ Raises:
523
+ Exception: If pruning fails.
524
+ """
525
+ client = await self.get_connection()
526
+ assert self._client is not None
527
+ try:
528
+ all_indexes = await ft.list(client)
529
+ for index in all_indexes:
530
+ await ft.dropindex(client, index)
531
+ logger.info(f"Dropped index {index}")
532
+
533
+ except Exception as e:
534
+ logger.error(f"Error during prune: {str(e)}")
535
+ raise e
@@ -0,0 +1,160 @@
1
+ Metadata-Version: 2.4
2
+ Name: cognee-community-vector-adapter-valkey
3
+ Version: 0.1.1
4
+ Summary: Valkey vector database adapter for cognee
5
+ Requires-Python: <=3.13,>=3.11
6
+ Requires-Dist: cognee==0.5.2
7
+ Requires-Dist: numpy>=1.24.0
8
+ Requires-Dist: valkey-glide>=2.1.0
9
+ Provides-Extra: dev
10
+ Requires-Dist: anyio>=4.0; extra == 'dev'
11
+ Requires-Dist: mypy>=1.17.1; extra == 'dev'
12
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
13
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
14
+ Requires-Dist: pytest>=7.4; extra == 'dev'
15
+ Provides-Extra: test
16
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'test'
17
+ Requires-Dist: pytest>=7.4; extra == 'test'
18
+ Requires-Dist: valkey-glide>=2.1.0; extra == 'test'
19
+ Description-Content-Type: text/markdown
20
+
21
+ # Cognee Valkey Vector Adapter
22
+
23
+ A Valkey vector database adapter for Cognee using Valkey Glide, providing high-performance vector storage and retrieval for AI memory applications. Compared to the Redis adapter, Valkey offers a fully open-source, community-driven architecture without the licensing restrictions of Redis. Using Valkey Glide ensures efficient async operations and native support for Valkey’s enhancements, providing optimal compatibility and performance when running on Valkey, making it the best choice for teams adopting Valkey as their primary in-memory vector solution.
24
+
25
+ ## Features
26
+
27
+ - Full support for vector embeddings storage and retrieval
28
+ - Batch / pipeline operations for efficient processing
29
+ - Automatic embedding generation via configurable embedding engines
30
+ - Comprehensive error handling
31
+
32
+ ## Installation
33
+
34
+ If published, the package can be simply installed via pip:
35
+
36
+ ```bash
37
+ pip install cognee-community-vector-adapter-valkey
38
+ ```
39
+
40
+ In case it is not published yet, you can use poetry to locally build the adapter package:
41
+
42
+ ```bash
43
+ pip install uv
44
+ uv sync --all-extras
45
+ ```
46
+
47
+ ## Prerequisites
48
+
49
+ You need a Valkey instance with the Valkey Search module enabled. You can use:
50
+
51
+ 1. **Valkey**:
52
+ ```bash
53
+ docker run -d --name valkey -p 6379:6379 valkey/valkey-bundle
54
+ ```
55
+
56
+ ## Examples
57
+ Checkout the `examples/` folder!
58
+
59
+ ```bash
60
+ uv run examples/example.py
61
+ ```
62
+
63
+ >You will need an OpenAI API key to run the example script.
64
+
65
+ ## Configuration
66
+
67
+ Configure Valkey as your vector database in cognee:
68
+
69
+ - `vector_db_provider`: Set to "valkey"
70
+ - `vector_db_url`: Valkey connection URL (e.g., "valkey://localhost:6379")
71
+
72
+ ### Environment Variables
73
+
74
+ Set the following environment variables or pass them directly in the config:
75
+
76
+ ```bash
77
+ export VECTOR_DB_URL="valkey://localhost:6379"
78
+ ```
79
+
80
+ ### Connection URL Examples
81
+
82
+ ```python
83
+ # Local Valkey
84
+ config.set_vector_db_config({
85
+ "vector_db_provider": "valkey",
86
+ "vector_db_url": "valkey://localhost:6379"
87
+ })
88
+
89
+ # Valkey with authentication
90
+ config.set_vector_db_config({
91
+ "vector_db_provider": "valkey",
92
+ "vector_db_url": "valkey://user:password@localhost:6379"
93
+ })
94
+ ```
95
+
96
+ ## Requirements
97
+
98
+ - Python >= 3.11, <= 3.13
99
+ - valkey-glide >= 2.1.0
100
+ - cognee >= 0.4.0
101
+
102
+ ## Advanced Usage
103
+
104
+ For direct adapter usage (advanced users only):
105
+
106
+ ```python
107
+ from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
108
+ from cognee_community_vector_adapter_valkey import ValkeyAdapter
109
+ from cognee.infrastructure.engine import DataPoint
110
+
111
+ # Initialize embedding engine and adapter
112
+ embedding_engine = EmbeddingEngine(model="your-model")
113
+ valkey_adapter = ValkeyAdapter(
114
+ url="valkey://localhost:6379",
115
+ embedding_engine=embedding_engine
116
+ )
117
+
118
+ # Direct adapter operations
119
+ await valkey_adapter.create_collection("my_collection")
120
+ data_points = [DataPoint(id="1", text="Hello", metadata={"index_fields": ["text"]})]
121
+ await valkey_adapter.create_data_points("my_collection", data_points)
122
+ results = await valkey_adapter.search("my_collection", query_text="Hello", limit=10)
123
+ ```
124
+
125
+ ## Error Handling
126
+
127
+ The adapter includes comprehensive error handling:
128
+
129
+ - `VectorEngineInitializationError`: Raised when required parameters are missing
130
+ - `CollectionNotFoundError`: Raised when attempting operations on non-existent collections
131
+ - `InvalidValueError`: Raised for invalid query parameters
132
+ - Graceful handling of connection failures and embedding errors
133
+
134
+
135
+ ## Troubleshooting
136
+
137
+ ### Common Issues
138
+
139
+ 1. **Connection Errors**: Ensure Valkey is running and accessible at the specified URL
140
+ 2. **Search Module Missing**: Make sure Valkey has the Search module enabled
141
+ 3. **Embedding Dimension Mismatch**: Verify embedding engine dimensions match index configuration
142
+ 4. **Collection Not Found**: Always create collections before adding data points
143
+
144
+ ### Debug Logging
145
+
146
+ The adapter uses Cognee's logging system. Enable debug logging to see detailed operation logs:
147
+
148
+ ```python
149
+ import logging
150
+ logging.getLogger("ValkeyAdapter").setLevel(logging.DEBUG)
151
+ ```
152
+
153
+ ## Development
154
+
155
+ To contribute or modify the adapter:
156
+
157
+ 1. Clone the repository and `cd` into the `valkey` folder
158
+ 2. Install dependencies: `uv sync --all-extras`
159
+ 3. Make sure a Valkey instance is running (see above)
160
+ 5. Make your changes, test, and submit a PR
@@ -0,0 +1,8 @@
1
+ cognee_community_vector_adapter_valkey/__init__.py,sha256=jEC7tJHqvrLazHCBOU22molo1p_FkjMjUItxEEoxTZA,226
2
+ cognee_community_vector_adapter_valkey/exceptions.py,sha256=mMxiVP0eOTwrrwtPVPWJFKv7Ur4Oky2_6qAeaEeB5g0,247
3
+ cognee_community_vector_adapter_valkey/register.py,sha256=Eh4lgm6TISLw3dfq2RuNpcMubvo9Kf5-wRvJ6VxClWU,158
4
+ cognee_community_vector_adapter_valkey/utils.py,sha256=dFiXNsd8WaX3gDf5o25Dz6lS0FtqnACYyOGSdIg0Lx4,5664
5
+ cognee_community_vector_adapter_valkey/valkey_adapter.py,sha256=BAzMXboHX7nRdHCHENlrJ29sP7gKWs1qUo3psPzqyhk,18481
6
+ cognee_community_vector_adapter_valkey-0.1.1.dist-info/METADATA,sha256=vRe5w9-vXTF0ZsLBTQE0yCwbEI91ELapKpCtyO6nxUY,4975
7
+ cognee_community_vector_adapter_valkey-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
8
+ cognee_community_vector_adapter_valkey-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any