cognee-community-vector-adapter-valkey 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+ .idea
12
+ # Env config
13
+ .env
14
+
15
+ # Cognee system
16
+ # for backward compatibility
17
+ .cognee-data
18
+ .cognee-system
19
+
20
+ # for new version
21
+ .cognee_data
22
+ .cognee_system
23
+ .data_storage
24
+ *.log
@@ -0,0 +1,160 @@
1
+ Metadata-Version: 2.4
2
+ Name: cognee-community-vector-adapter-valkey
3
+ Version: 0.1.1
4
+ Summary: Valkey vector database adapter for cognee
5
+ Requires-Python: <=3.13,>=3.11
6
+ Requires-Dist: cognee==0.5.2
7
+ Requires-Dist: numpy>=1.24.0
8
+ Requires-Dist: valkey-glide>=2.1.0
9
+ Provides-Extra: dev
10
+ Requires-Dist: anyio>=4.0; extra == 'dev'
11
+ Requires-Dist: mypy>=1.17.1; extra == 'dev'
12
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
13
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
14
+ Requires-Dist: pytest>=7.4; extra == 'dev'
15
+ Provides-Extra: test
16
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'test'
17
+ Requires-Dist: pytest>=7.4; extra == 'test'
18
+ Requires-Dist: valkey-glide>=2.1.0; extra == 'test'
19
+ Description-Content-Type: text/markdown
20
+
21
+ # Cognee Valkey Vector Adapter
22
+
23
+ A Valkey vector database adapter for Cognee using Valkey Glide, providing high-performance vector storage and retrieval for AI memory applications. Compared to the Redis adapter, Valkey offers a fully open-source, community-driven architecture without the licensing restrictions of Redis. Using Valkey Glide ensures efficient async operations and native support for Valkey’s enhancements, providing optimal compatibility and performance when running on Valkey, making it the best choice for teams adopting Valkey as their primary in-memory vector solution.
24
+
25
+ ## Features
26
+
27
+ - Full support for vector embeddings storage and retrieval
28
+ - Batch / pipeline operations for efficient processing
29
+ - Automatic embedding generation via configurable embedding engines
30
+ - Comprehensive error handling
31
+
32
+ ## Installation
33
+
34
+ If published, the package can be simply installed via pip:
35
+
36
+ ```bash
37
+ pip install cognee-community-vector-adapter-valkey
38
+ ```
39
+
40
+ In case it is not published yet, you can use poetry to locally build the adapter package:
41
+
42
+ ```bash
43
+ pip install uv
44
+ uv sync --all-extras
45
+ ```
46
+
47
+ ## Prerequisites
48
+
49
+ You need a Valkey instance with the Valkey Search module enabled. You can use:
50
+
51
+ 1. **Valkey**:
52
+ ```bash
53
+ docker run -d --name valkey -p 6379:6379 valkey/valkey-bundle
54
+ ```
55
+
56
+ ## Examples
57
+ Checkout the `examples/` folder!
58
+
59
+ ```bash
60
+ uv run examples/example.py
61
+ ```
62
+
63
+ >You will need an OpenAI API key to run the example script.
64
+
65
+ ## Configuration
66
+
67
+ Configure Valkey as your vector database in cognee:
68
+
69
+ - `vector_db_provider`: Set to "valkey"
70
+ - `vector_db_url`: Valkey connection URL (e.g., "valkey://localhost:6379")
71
+
72
+ ### Environment Variables
73
+
74
+ Set the following environment variables or pass them directly in the config:
75
+
76
+ ```bash
77
+ export VECTOR_DB_URL="valkey://localhost:6379"
78
+ ```
79
+
80
+ ### Connection URL Examples
81
+
82
+ ```python
83
+ # Local Valkey
84
+ config.set_vector_db_config({
85
+ "vector_db_provider": "valkey",
86
+ "vector_db_url": "valkey://localhost:6379"
87
+ })
88
+
89
+ # Valkey with authentication
90
+ config.set_vector_db_config({
91
+ "vector_db_provider": "valkey",
92
+ "vector_db_url": "valkey://user:password@localhost:6379"
93
+ })
94
+ ```
95
+
96
+ ## Requirements
97
+
98
+ - Python >= 3.11, <= 3.13
99
+ - valkey-glide >= 2.1.0
100
+ - cognee >= 0.4.0
101
+
102
+ ## Advanced Usage
103
+
104
+ For direct adapter usage (advanced users only):
105
+
106
+ ```python
107
+ from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
108
+ from cognee_community_vector_adapter_valkey import ValkeyAdapter
109
+ from cognee.infrastructure.engine import DataPoint
110
+
111
+ # Initialize embedding engine and adapter
112
+ embedding_engine = EmbeddingEngine(model="your-model")
113
+ valkey_adapter = ValkeyAdapter(
114
+ url="valkey://localhost:6379",
115
+ embedding_engine=embedding_engine
116
+ )
117
+
118
+ # Direct adapter operations
119
+ await valkey_adapter.create_collection("my_collection")
120
+ data_points = [DataPoint(id="1", text="Hello", metadata={"index_fields": ["text"]})]
121
+ await valkey_adapter.create_data_points("my_collection", data_points)
122
+ results = await valkey_adapter.search("my_collection", query_text="Hello", limit=10)
123
+ ```
124
+
125
+ ## Error Handling
126
+
127
+ The adapter includes comprehensive error handling:
128
+
129
+ - `VectorEngineInitializationError`: Raised when required parameters are missing
130
+ - `CollectionNotFoundError`: Raised when attempting operations on non-existent collections
131
+ - `InvalidValueError`: Raised for invalid query parameters
132
+ - Graceful handling of connection failures and embedding errors
133
+
134
+
135
+ ## Troubleshooting
136
+
137
+ ### Common Issues
138
+
139
+ 1. **Connection Errors**: Ensure Valkey is running and accessible at the specified URL
140
+ 2. **Search Module Missing**: Make sure Valkey has the Search module enabled
141
+ 3. **Embedding Dimension Mismatch**: Verify embedding engine dimensions match index configuration
142
+ 4. **Collection Not Found**: Always create collections before adding data points
143
+
144
+ ### Debug Logging
145
+
146
+ The adapter uses Cognee's logging system. Enable debug logging to see detailed operation logs:
147
+
148
+ ```python
149
+ import logging
150
+ logging.getLogger("ValkeyAdapter").setLevel(logging.DEBUG)
151
+ ```
152
+
153
+ ## Development
154
+
155
+ To contribute or modify the adapter:
156
+
157
+ 1. Clone the repository and `cd` into the `valkey` folder
158
+ 2. Install dependencies: `uv sync --all-extras`
159
+ 3. Make sure a Valkey instance is running (see above)
160
+ 5. Make your changes, test, and submit a PR
@@ -0,0 +1,140 @@
1
+ # Cognee Valkey Vector Adapter
2
+
3
+ A Valkey vector database adapter for Cognee using Valkey Glide, providing high-performance vector storage and retrieval for AI memory applications. Compared to the Redis adapter, Valkey offers a fully open-source, community-driven architecture without the licensing restrictions of Redis. Using Valkey Glide ensures efficient async operations and native support for Valkey’s enhancements, providing optimal compatibility and performance when running on Valkey, making it the best choice for teams adopting Valkey as their primary in-memory vector solution.
4
+
5
+ ## Features
6
+
7
+ - Full support for vector embeddings storage and retrieval
8
+ - Batch / pipeline operations for efficient processing
9
+ - Automatic embedding generation via configurable embedding engines
10
+ - Comprehensive error handling
11
+
12
+ ## Installation
13
+
14
+ If published, the package can be simply installed via pip:
15
+
16
+ ```bash
17
+ pip install cognee-community-vector-adapter-valkey
18
+ ```
19
+
20
+ In case it is not published yet, you can use poetry to locally build the adapter package:
21
+
22
+ ```bash
23
+ pip install uv
24
+ uv sync --all-extras
25
+ ```
26
+
27
+ ## Prerequisites
28
+
29
+ You need a Valkey instance with the Valkey Search module enabled. You can use:
30
+
31
+ 1. **Valkey**:
32
+ ```bash
33
+ docker run -d --name valkey -p 6379:6379 valkey/valkey-bundle
34
+ ```
35
+
36
+ ## Examples
37
+ Checkout the `examples/` folder!
38
+
39
+ ```bash
40
+ uv run examples/example.py
41
+ ```
42
+
43
+ >You will need an OpenAI API key to run the example script.
44
+
45
+ ## Configuration
46
+
47
+ Configure Valkey as your vector database in cognee:
48
+
49
+ - `vector_db_provider`: Set to "valkey"
50
+ - `vector_db_url`: Valkey connection URL (e.g., "valkey://localhost:6379")
51
+
52
+ ### Environment Variables
53
+
54
+ Set the following environment variables or pass them directly in the config:
55
+
56
+ ```bash
57
+ export VECTOR_DB_URL="valkey://localhost:6379"
58
+ ```
59
+
60
+ ### Connection URL Examples
61
+
62
+ ```python
63
+ # Local Valkey
64
+ config.set_vector_db_config({
65
+ "vector_db_provider": "valkey",
66
+ "vector_db_url": "valkey://localhost:6379"
67
+ })
68
+
69
+ # Valkey with authentication
70
+ config.set_vector_db_config({
71
+ "vector_db_provider": "valkey",
72
+ "vector_db_url": "valkey://user:password@localhost:6379"
73
+ })
74
+ ```
75
+
76
+ ## Requirements
77
+
78
+ - Python >= 3.11, <= 3.13
79
+ - valkey-glide >= 2.1.0
80
+ - cognee >= 0.4.0
81
+
82
+ ## Advanced Usage
83
+
84
+ For direct adapter usage (advanced users only):
85
+
86
+ ```python
87
+ from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
88
+ from cognee_community_vector_adapter_valkey import ValkeyAdapter
89
+ from cognee.infrastructure.engine import DataPoint
90
+
91
+ # Initialize embedding engine and adapter
92
+ embedding_engine = EmbeddingEngine(model="your-model")
93
+ valkey_adapter = ValkeyAdapter(
94
+ url="valkey://localhost:6379",
95
+ embedding_engine=embedding_engine
96
+ )
97
+
98
+ # Direct adapter operations
99
+ await valkey_adapter.create_collection("my_collection")
100
+ data_points = [DataPoint(id="1", text="Hello", metadata={"index_fields": ["text"]})]
101
+ await valkey_adapter.create_data_points("my_collection", data_points)
102
+ results = await valkey_adapter.search("my_collection", query_text="Hello", limit=10)
103
+ ```
104
+
105
+ ## Error Handling
106
+
107
+ The adapter includes comprehensive error handling:
108
+
109
+ - `VectorEngineInitializationError`: Raised when required parameters are missing
110
+ - `CollectionNotFoundError`: Raised when attempting operations on non-existent collections
111
+ - `InvalidValueError`: Raised for invalid query parameters
112
+ - Graceful handling of connection failures and embedding errors
113
+
114
+
115
+ ## Troubleshooting
116
+
117
+ ### Common Issues
118
+
119
+ 1. **Connection Errors**: Ensure Valkey is running and accessible at the specified URL
120
+ 2. **Search Module Missing**: Make sure Valkey has the Search module enabled
121
+ 3. **Embedding Dimension Mismatch**: Verify embedding engine dimensions match index configuration
122
+ 4. **Collection Not Found**: Always create collections before adding data points
123
+
124
+ ### Debug Logging
125
+
126
+ The adapter uses Cognee's logging system. Enable debug logging to see detailed operation logs:
127
+
128
+ ```python
129
+ import logging
130
+ logging.getLogger("ValkeyAdapter").setLevel(logging.DEBUG)
131
+ ```
132
+
133
+ ## Development
134
+
135
+ To contribute or modify the adapter:
136
+
137
+ 1. Clone the repository and `cd` into the `valkey` folder
138
+ 2. Install dependencies: `uv sync --all-extras`
139
+ 3. Make sure a Valkey instance is running (see above)
140
+ 5. Make your changes, test, and submit a PR
@@ -0,0 +1,7 @@
1
+ from .cognee_community_vector_adapter_valkey import ValkeyAdapter
2
+ from .cognee_community_vector_adapter_valkey.exceptions import (
3
+ CollectionNotFoundError,
4
+ ValkeyVectorEngineInitializationError,
5
+ )
6
+
7
+ __all__ = ["ValkeyAdapter", "ValkeyVectorEngineInitializationError", "CollectionNotFoundError"]
@@ -0,0 +1,4 @@
1
+ from .exceptions import CollectionNotFoundError, ValkeyVectorEngineInitializationError
2
+ from .valkey_adapter import ValkeyAdapter
3
+
4
+ __all__ = ["ValkeyAdapter", "ValkeyVectorEngineInitializationError", "CollectionNotFoundError"]
@@ -0,0 +1,10 @@
1
+ class ValkeyVectorEngineInitializationError(Exception):
2
+ """Exception raised when vector engine initialization fails."""
3
+
4
+ pass
5
+
6
+
7
+ class CollectionNotFoundError(Exception):
8
+ """Exception raised when a collection is not found."""
9
+
10
+ pass
@@ -0,0 +1,5 @@
1
+ from cognee.infrastructure.databases.vector import use_vector_adapter
2
+
3
+ from .valkey_adapter import ValkeyAdapter
4
+
5
+ use_vector_adapter("valkey", ValkeyAdapter)
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import struct
5
+ from functools import singledispatch
6
+ from typing import Any
7
+ from urllib.parse import urlparse
8
+ from uuid import UUID
9
+
10
+ from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
11
+
12
+ """
13
+ Internal helper function. Not part of the public API.
14
+ """
15
+
16
+
17
+ def _parse_host_port(url: str) -> tuple[str, int]:
18
+ """
19
+ Parse a url and extract the host and port.
20
+
21
+ Args:
22
+ url (str): The connection URL, e.g., "valkey://localhost:6379".
23
+
24
+ Returns:
25
+ tuple[str, int]: A tuple containing:
26
+ - host (str): The hostname from the URL, defaults to "localhost" if missing.
27
+ - port (int): The port number from the URL, defaults to 6379 if missing.
28
+ """
29
+
30
+ parsed = urlparse(url)
31
+ host = parsed.hostname or "localhost"
32
+ port = parsed.port or 6379
33
+ return host, port
34
+
35
+
36
+ def _to_float32_bytes(vec) -> bytes:
37
+ """
38
+ Convert a sequence of numeric values into a bytes representation using 32-bit floats.
39
+
40
+ Args:
41
+ vec (Iterable[float]): A sequence of numbers (e.g., list, tuple) to be converted.
42
+
43
+ Returns:
44
+ bytes: A binary representation of the input values packed as consecutive 32-bit floats.
45
+
46
+ Notes:
47
+ - Uses `struct.pack` with the format string `"{len(vec)}f"`, which packs all values as
48
+ IEEE 754 single-precision floats.
49
+ - Ensures compatibility with vector databases or embedding engines that require raw
50
+ float32 byte arrays.
51
+ """
52
+
53
+ return struct.pack(f"{len(vec)}f", *map(float, vec))
54
+
55
+
56
+ @singledispatch
57
+ def _serialize_for_json(obj: Any) -> Any:
58
+ """Convert objects to JSON-serializable format.
59
+ This id default serialization: return the object as-is.
60
+
61
+ Args:
62
+ obj: Object to serialize (UUID, dict, list, or any other type).
63
+
64
+ Returns:
65
+ JSON-serializable representation of the object.
66
+ """
67
+ return obj
68
+
69
+
70
+ @_serialize_for_json.register
71
+ def _(obj: UUID) -> str:
72
+ return str(obj)
73
+
74
+
75
+ @_serialize_for_json.register
76
+ def _(obj: dict) -> dict:
77
+ return {k: _serialize_for_json(v) for k, v in obj.items()}
78
+
79
+
80
+ @_serialize_for_json.register
81
+ def _(obj: list) -> list:
82
+ return [_serialize_for_json(item) for item in obj]
83
+
84
+
85
+ def _b2s(x: bytes | bytearray | str) -> str:
86
+ """Convert bytes or bytearray to a UTF-8 string if possible,
87
+ otherwise return a string representation.
88
+
89
+ Args:
90
+ x (Any): The input value, which may be bytes, bytearray, or any other type.
91
+
92
+ Returns:
93
+ Any: A decoded UTF-8 string if `x` is bytes or bytearray; otherwise, returns `x` unchanged.
94
+ If decoding fails, returns the string representation of `x`.
95
+ """
96
+
97
+ if isinstance(x, (bytes, bytearray)):
98
+ try:
99
+ return x.decode("utf-8")
100
+ except Exception:
101
+ return str(x)
102
+ return x
103
+
104
+
105
+ def _build_scored_results_from_ft(
106
+ raw: Any,
107
+ *,
108
+ use_key_suffix_when_missing_id: bool = True,
109
+ ) -> list[ScoredResult]:
110
+ """Build a list of `ScoredResult` objects from raw FT (Full-Text) search response.
111
+
112
+ Args:
113
+ raw (Any): The raw response from Valkey's FT search command, expected to be a list or tuple
114
+ where the second element is a mapping of keys to field dictionaries.
115
+ use_key_suffix_when_missing_id (bool): If True, use the key string as the ID when the `id`
116
+ field is missing in the response.
117
+
118
+ Returns:
119
+ list[ScoredResult]: A list of scored results, each containing:
120
+ - id (str): Extracted from `id` field or fallback to key.
121
+ - payload (dict): Parsed JSON from `payload_data` field, or raw string if malformed.
122
+ - score (float | None): Extracted from `__vector_score` field if present.
123
+
124
+ Notes:
125
+ - Handles both byte keys and string keys by decoding them.
126
+ - Gracefully falls back when fields are missing or payload is invalid JSON.
127
+ """
128
+ if not isinstance(raw, (list, tuple)) or len(raw) < 2 or not isinstance(raw[1], dict):
129
+ return []
130
+
131
+ mapping: dict[Any, dict[Any, Any]] = raw[1] # the { key -> fields } dict
132
+ scored: list[ScoredResult] = []
133
+
134
+ for key_bytes, fields in mapping.items():
135
+ key_str = _b2s(key_bytes)
136
+
137
+ # Extract id
138
+ raw_id = fields.get(b"id") if b"id" in fields else fields.get("id")
139
+ if raw_id is not None:
140
+ result_id = _b2s(raw_id)
141
+ else:
142
+ result_id = key_str
143
+
144
+ # Extrat score
145
+ score = (
146
+ fields.get(b"__vector_score")
147
+ if b"__vector_score" in fields
148
+ else fields.get("__vector_score")
149
+ )
150
+ if score is not None:
151
+ score = float(score)
152
+
153
+ # Extract and parse payload_data
154
+ payload_raw = (
155
+ fields.get(b"payload_data") if b"payload_data" in fields else fields.get("payload_data")
156
+ )
157
+ payload: dict[str, Any] = {}
158
+ if payload_raw is not None:
159
+ payload_str = _b2s(payload_raw)
160
+ if isinstance(payload_str, str):
161
+ try:
162
+ obj = json.loads(payload_str)
163
+ if isinstance(obj, dict):
164
+ payload = obj
165
+ else:
166
+ # If it's not a dict (e.g., list), wrap it
167
+ payload = {"_payload": obj}
168
+ except json.JSONDecodeError:
169
+ # Keep the raw string if malformed
170
+ payload = {"_payload_raw": payload_str}
171
+
172
+ scored.append(
173
+ ScoredResult(
174
+ id=result_id,
175
+ payload=payload,
176
+ score=score,
177
+ )
178
+ )
179
+
180
+ return scored