pulsedb 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pulsedb-1.0.0/LICENSE +22 -0
- pulsedb-1.0.0/PKG-INFO +208 -0
- pulsedb-1.0.0/README.md +189 -0
- pulsedb-1.0.0/pyproject.toml +32 -0
- pulsedb-1.0.0/sdk/langchain_pulsedb/__init__.py +6 -0
- pulsedb-1.0.0/sdk/langchain_pulsedb/vectorstore.py +110 -0
- pulsedb-1.0.0/sdk/pulsedb/__init__.py +50 -0
- pulsedb-1.0.0/sdk/pulsedb/async_client.py +325 -0
- pulsedb-1.0.0/sdk/pulsedb/client.py +162 -0
- pulsedb-1.0.0/sdk/pulsedb/exceptions.py +23 -0
- pulsedb-1.0.0/sdk/pulsedb.egg-info/PKG-INFO +208 -0
- pulsedb-1.0.0/sdk/pulsedb.egg-info/SOURCES.txt +19 -0
- pulsedb-1.0.0/sdk/pulsedb.egg-info/dependency_links.txt +1 -0
- pulsedb-1.0.0/sdk/pulsedb.egg-info/requires.txt +3 -0
- pulsedb-1.0.0/sdk/pulsedb.egg-info/top_level.txt +2 -0
- pulsedb-1.0.0/setup.cfg +4 -0
- pulsedb-1.0.0/tests/test_core.py +137 -0
- pulsedb-1.0.0/tests/test_sdk.py +72 -0
- pulsedb-1.0.0/tests/test_sorted_sets.py +82 -0
- pulsedb-1.0.0/tests/test_types.py +86 -0
- pulsedb-1.0.0/tests/test_vector.py +96 -0
pulsedb-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Business Source License 1.1
|
|
2
|
+
|
|
3
|
+
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
|
|
4
|
+
"Business Source License" is a trademark of MariaDB Corporation Ab.
|
|
5
|
+
|
|
6
|
+
Parameters
|
|
7
|
+
Licensor: G Kavinrajan
|
|
8
|
+
Licensed Work: PulseDB
|
|
9
|
+
Additional Use Grant: You may make use of the Licensed Work for non-production purposes, including testing, development, and academic research. You may not use the Licensed Work for any production purpose without a commercial agreement with the Licensor.
|
|
10
|
+
Change Date: 2030-06-28
|
|
11
|
+
Change License: Apache License, Version 2.0
|
|
12
|
+
|
|
13
|
+
Terms
|
|
14
|
+
|
|
15
|
+
The Licensor hereby grants you the right to copy, modify, create derivative works, redistribute, and make non-production use of the Licensed Work. The Licensed Work is provided "AS IS", without warranties or conditions of any kind.
|
|
16
|
+
|
|
17
|
+
You may also make production use of the Licensed Work, provided such use does not exceed the Additional Use Grant (if any). If your use exceeds the Additional Use Grant, you must acquire a commercial license from the Licensor.
|
|
18
|
+
|
|
19
|
+
Effective on the Change Date, or the fourth anniversary of the first publicly available distribution of a specific version of the Licensed Work under this License, whichever comes first, the Licensor hereby grants you rights under the terms of the Change License, and such rights shall replace all rights and restrictions granted under this License.
|
|
20
|
+
|
|
21
|
+
Disclaimer of Warranty
|
|
22
|
+
THE LICENSED WORK IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE LICENSOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE LICENSED WORK OR THE USE OR OTHER DEALINGS IN THE LICENSED WORK.
|
pulsedb-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pulsedb
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: High-performance Python Vector Database & Memory Engine with RESP2 support.
|
|
5
|
+
Author-email: G Kavinrajan <gkavinrajan@example.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/gkavinrajanCodes/pulseDB
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/gkavinrajanCodes/pulseDB/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Topic :: Database
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: redis>=5.0.0
|
|
16
|
+
Requires-Dist: numpy>=1.20.0
|
|
17
|
+
Requires-Dist: hnswlib>=0.8.0
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
<div align="center">
|
|
21
|
+
|
|
22
|
+
# ⚡ PulseDB
|
|
23
|
+
|
|
24
|
+
**An enterprise-grade, in-memory database with a native AI Vector Engine.**
|
|
25
|
+
|
|
26
|
+
Built for developers who need Redis-compatible storage *and* lightning-fast semantic search — without running two separate systems.
|
|
27
|
+
|
|
28
|
+
[](https://github.com/gkavinrajanCodes/pulseDB/actions)
|
|
29
|
+
[](https://pypi.org/project/pulsedb/)
|
|
30
|
+
[](LICENSE)
|
|
31
|
+
|
|
32
|
+
</div>
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## What is PulseDB?
|
|
37
|
+
|
|
38
|
+
PulseDB is a high-performance, open-source database that combines:
|
|
39
|
+
|
|
40
|
+
- **A Redis-compatible KV store** — Strings, Lists, Hashes with TTL, LRU eviction, and RESP2 wire protocol
|
|
41
|
+
- **An AI Memory Engine** — HNSW-based vector search with native C++ pre-filtering callbacks
|
|
42
|
+
- **A Python SDK** — Ergonomic `db.vectors.upsert()` / `db.vectors.search()` API
|
|
43
|
+
- **A LangChain Integration** — Drop-in `PulseDBVectorStore` for RAG pipelines with metadata filtering
|
|
44
|
+
|
|
45
|
+
> One server, one protocol, one SDK. No Pinecone. No Weaviate. No Redis Stack.
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Features
|
|
50
|
+
|
|
51
|
+
| Category | Capability |
|
|
52
|
+
|---|---|
|
|
53
|
+
| **KV Store** | `SET`, `GET`, `DEL`, `EXPIRE`, `TTL`, `MSET`, `MGET`, `INCR`, `APPEND` |
|
|
54
|
+
| **Data Types** | Strings · Lists (`LPUSH/RPOP/LRANGE`) · Hashes (`HSET/HGET/HGETALL`) |
|
|
55
|
+
| **Vector Engine** | HNSW cosine similarity, O(log N) search, dynamic resizing |
|
|
56
|
+
| **Hybrid Search** | Native C++ pre-filter callbacks — filter by metadata *during* graph traversal |
|
|
57
|
+
| **Persistence** | Write-Ahead Log (WAL) + JSON snapshots + HNSW binary graph snapshots |
|
|
58
|
+
| **Protocol** | RESP2 TCP (port 6379) — works with `redis-cli`, `redis-py`, `ioredis` |
|
|
59
|
+
| **Cluster** | Consistent hashing, multi-node routing |
|
|
60
|
+
| **Auth** | API Key (HTTP) + `REQUIREPASS` (TCP) + optional TLS/SSL |
|
|
61
|
+
| **Observability** | Prometheus `/metrics` endpoint, structured `/health` and `/ready` |
|
|
62
|
+
| **LangChain** | `PulseDBVectorStore` with `similarity_search(filter={...})` |
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Quickstart
|
|
67
|
+
|
|
68
|
+
### 1. Run the Server (Docker)
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
docker run -d \
|
|
72
|
+
-p 6379:6379 \
|
|
73
|
+
-p 8000:8000 \
|
|
74
|
+
-v pulsedb_data:/app/data \
|
|
75
|
+
--name pulsedb \
|
|
76
|
+
ghcr.io/gkavinrajancodes/pulsedb:latest
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Or use Docker Compose for a 3-node cluster:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
git clone https://github.com/gkavinrajanCodes/pulseDB.git
|
|
83
|
+
cd pulseDB && docker-compose up --build
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### 2. Install the SDK
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
pip install pulsedb
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### 3. Use It
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from pulsedb import PulseDB
|
|
96
|
+
|
|
97
|
+
db = PulseDB(host="localhost", port=6379)
|
|
98
|
+
|
|
99
|
+
# Standard KV Store
|
|
100
|
+
db.set("session:abc", "user_data", ttl=3600)
|
|
101
|
+
print(db.get("session:abc")) # "user_data"
|
|
102
|
+
|
|
103
|
+
# AI Memory Engine — insert vectors with metadata
|
|
104
|
+
db.vectors.upsert("article:1", [0.12, 0.98, 0.34], metadata={"category": "sports", "year": 2024})
|
|
105
|
+
db.vectors.upsert("article:2", [0.91, 0.11, 0.67], metadata={"category": "tech", "year": 2023})
|
|
106
|
+
|
|
107
|
+
# Semantic similarity search — optionally filter by metadata
|
|
108
|
+
results = db.vectors.search([0.10, 0.95, 0.40], top_k=5, filter={"category": "sports"})
|
|
109
|
+
# → [{"id": "article:1", "score": 0.997}]
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## LangChain Integration
|
|
115
|
+
|
|
116
|
+
PulseDB works natively as a LangChain VectorStore, giving your RAG pipeline blazing fast retrieval with hybrid metadata filtering.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from langchain_openai import OpenAIEmbeddings
|
|
120
|
+
from sdk.langchain_pulsedb.vectorstore import PulseDBVectorStore
|
|
121
|
+
|
|
122
|
+
store = PulseDBVectorStore(
|
|
123
|
+
embedding=OpenAIEmbeddings(),
|
|
124
|
+
host="localhost",
|
|
125
|
+
port=6379,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Ingest documents — metadata is automatically stored for hybrid filtering
|
|
129
|
+
store.add_texts(
|
|
130
|
+
texts=["PulseDB is fast", "Redis is popular", "Pinecone is expensive"],
|
|
131
|
+
metadatas=[{"source": "blog"}, {"source": "wiki"}, {"source": "review"}]
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Hybrid search — find similar docs but only from the blog source
|
|
135
|
+
docs = store.similarity_search("fast database", k=2, filter={"source": "blog"})
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## How the AI Memory Engine Works
|
|
141
|
+
|
|
142
|
+
Standard vector databases do **post-filtering**: search all vectors, get K results, then throw away the ones that don't match the filter. This degrades accuracy.
|
|
143
|
+
|
|
144
|
+
PulseDB does **true pre-filtering** using native `hnswlib` C++ filter callbacks. The filter function is evaluated *inside* the graph traversal — so the C++ engine skips disqualified nodes entirely before scoring them.
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
Query Vector → HNSW Graph Traversal → [Filter Callback runs on every node visited]
|
|
148
|
+
↓ Pass → included in result set
|
|
149
|
+
↓ Fail → skipped immediately
|
|
150
|
+
Top-K results returned
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
This means your effective `top_k` is always accurate, even with highly restrictive filters.
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Architecture
|
|
158
|
+
|
|
159
|
+
```mermaid
|
|
160
|
+
graph TD
|
|
161
|
+
Client["Client (SDK / redis-cli)"] -->|RESP2 Binary Protocol| TCP["asyncio TCP Server :6379"]
|
|
162
|
+
Client -->|HTTP REST| HTTP["FastAPI Gateway :8000"]
|
|
163
|
+
TCP --> Router["Command Router"]
|
|
164
|
+
HTTP --> Router
|
|
165
|
+
Router --> KV["16-Shard KV Store (LRU + TTL)"]
|
|
166
|
+
Router --> VE["AI Vector Engine (hnswlib HNSW)"]
|
|
167
|
+
Router --> DT["Data Types (Lists, Hashes)"]
|
|
168
|
+
Router --> PS["Pub/Sub Engine"]
|
|
169
|
+
KV --> WAL["Write-Ahead Log"]
|
|
170
|
+
VE --> Snap["HNSW Binary Snapshot"]
|
|
171
|
+
WAL --> Snap
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## Run Locally (From Source)
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
# 1. Clone and install
|
|
180
|
+
git clone https://github.com/gkavinrajanCodes/pulseDB.git
|
|
181
|
+
cd pulseDB
|
|
182
|
+
python3.10 -m venv workenv && source workenv/bin/activate
|
|
183
|
+
pip install -r requirements.txt
|
|
184
|
+
|
|
185
|
+
# 2. Start the server
|
|
186
|
+
NODE_ID=node1 CLUSTER_NODES=node1 uvicorn server.main:app --host 0.0.0.0 --port 8000
|
|
187
|
+
|
|
188
|
+
# 3. Install the SDK (in another terminal)
|
|
189
|
+
pip install -e sdk/
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
## Contributing
|
|
195
|
+
|
|
196
|
+
1. Fork the repository
|
|
197
|
+
2. Create a feature branch: `git checkout -b feature/sorted-sets`
|
|
198
|
+
3. Commit your changes: `git commit -m "feat: add ZADD/ZRANGE sorted set commands"`
|
|
199
|
+
4. Push: `git push origin feature/sorted-sets`
|
|
200
|
+
5. Open a Pull Request
|
|
201
|
+
|
|
202
|
+
All PRs are validated against our CI matrix (Python 3.10, 3.11, 3.12 with flake8, mypy, and pytest).
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## License
|
|
207
|
+
|
|
208
|
+
Distributed under the Business Source License (BSL 1.1). See [LICENSE](LICENSE) for details.
|
pulsedb-1.0.0/README.md
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# ⚡ PulseDB
|
|
4
|
+
|
|
5
|
+
**An enterprise-grade, in-memory database with a native AI Vector Engine.**
|
|
6
|
+
|
|
7
|
+
Built for developers who need Redis-compatible storage *and* lightning-fast semantic search — without running two separate systems.
|
|
8
|
+
|
|
9
|
+
[](https://github.com/gkavinrajanCodes/pulseDB/actions)
|
|
10
|
+
[](https://pypi.org/project/pulsedb/)
|
|
11
|
+
[](LICENSE)
|
|
12
|
+
|
|
13
|
+
</div>
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## What is PulseDB?
|
|
18
|
+
|
|
19
|
+
PulseDB is a high-performance, open-source database that combines:
|
|
20
|
+
|
|
21
|
+
- **A Redis-compatible KV store** — Strings, Lists, Hashes with TTL, LRU eviction, and RESP2 wire protocol
|
|
22
|
+
- **An AI Memory Engine** — HNSW-based vector search with native C++ pre-filtering callbacks
|
|
23
|
+
- **A Python SDK** — Ergonomic `db.vectors.upsert()` / `db.vectors.search()` API
|
|
24
|
+
- **A LangChain Integration** — Drop-in `PulseDBVectorStore` for RAG pipelines with metadata filtering
|
|
25
|
+
|
|
26
|
+
> One server, one protocol, one SDK. No Pinecone. No Weaviate. No Redis Stack.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Features
|
|
31
|
+
|
|
32
|
+
| Category | Capability |
|
|
33
|
+
|---|---|
|
|
34
|
+
| **KV Store** | `SET`, `GET`, `DEL`, `EXPIRE`, `TTL`, `MSET`, `MGET`, `INCR`, `APPEND` |
|
|
35
|
+
| **Data Types** | Strings · Lists (`LPUSH/RPOP/LRANGE`) · Hashes (`HSET/HGET/HGETALL`) |
|
|
36
|
+
| **Vector Engine** | HNSW cosine similarity, O(log N) search, dynamic resizing |
|
|
37
|
+
| **Hybrid Search** | Native C++ pre-filter callbacks — filter by metadata *during* graph traversal |
|
|
38
|
+
| **Persistence** | Write-Ahead Log (WAL) + JSON snapshots + HNSW binary graph snapshots |
|
|
39
|
+
| **Protocol** | RESP2 TCP (port 6379) — works with `redis-cli`, `redis-py`, `ioredis` |
|
|
40
|
+
| **Cluster** | Consistent hashing, multi-node routing |
|
|
41
|
+
| **Auth** | API Key (HTTP) + `REQUIREPASS` (TCP) + optional TLS/SSL |
|
|
42
|
+
| **Observability** | Prometheus `/metrics` endpoint, structured `/health` and `/ready` |
|
|
43
|
+
| **LangChain** | `PulseDBVectorStore` with `similarity_search(filter={...})` |
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Quickstart
|
|
48
|
+
|
|
49
|
+
### 1. Run the Server (Docker)
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
docker run -d \
|
|
53
|
+
-p 6379:6379 \
|
|
54
|
+
-p 8000:8000 \
|
|
55
|
+
-v pulsedb_data:/app/data \
|
|
56
|
+
--name pulsedb \
|
|
57
|
+
ghcr.io/gkavinrajancodes/pulsedb:latest
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Or use Docker Compose for a 3-node cluster:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
git clone https://github.com/gkavinrajanCodes/pulseDB.git
|
|
64
|
+
cd pulseDB && docker-compose up --build
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### 2. Install the SDK
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install pulsedb
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### 3. Use It
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from pulsedb import PulseDB
|
|
77
|
+
|
|
78
|
+
db = PulseDB(host="localhost", port=6379)
|
|
79
|
+
|
|
80
|
+
# Standard KV Store
|
|
81
|
+
db.set("session:abc", "user_data", ttl=3600)
|
|
82
|
+
print(db.get("session:abc")) # "user_data"
|
|
83
|
+
|
|
84
|
+
# AI Memory Engine — insert vectors with metadata
|
|
85
|
+
db.vectors.upsert("article:1", [0.12, 0.98, 0.34], metadata={"category": "sports", "year": 2024})
|
|
86
|
+
db.vectors.upsert("article:2", [0.91, 0.11, 0.67], metadata={"category": "tech", "year": 2023})
|
|
87
|
+
|
|
88
|
+
# Semantic similarity search — optionally filter by metadata
|
|
89
|
+
results = db.vectors.search([0.10, 0.95, 0.40], top_k=5, filter={"category": "sports"})
|
|
90
|
+
# → [{"id": "article:1", "score": 0.997}]
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## LangChain Integration
|
|
96
|
+
|
|
97
|
+
PulseDB works natively as a LangChain VectorStore, giving your RAG pipeline blazing fast retrieval with hybrid metadata filtering.
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from langchain_openai import OpenAIEmbeddings
|
|
101
|
+
from sdk.langchain_pulsedb.vectorstore import PulseDBVectorStore
|
|
102
|
+
|
|
103
|
+
store = PulseDBVectorStore(
|
|
104
|
+
embedding=OpenAIEmbeddings(),
|
|
105
|
+
host="localhost",
|
|
106
|
+
port=6379,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Ingest documents — metadata is automatically stored for hybrid filtering
|
|
110
|
+
store.add_texts(
|
|
111
|
+
texts=["PulseDB is fast", "Redis is popular", "Pinecone is expensive"],
|
|
112
|
+
metadatas=[{"source": "blog"}, {"source": "wiki"}, {"source": "review"}]
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Hybrid search — find similar docs but only from the blog source
|
|
116
|
+
docs = store.similarity_search("fast database", k=2, filter={"source": "blog"})
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## How the AI Memory Engine Works
|
|
122
|
+
|
|
123
|
+
Standard vector databases do **post-filtering**: search all vectors, get K results, then throw away the ones that don't match the filter. This degrades accuracy.
|
|
124
|
+
|
|
125
|
+
PulseDB does **true pre-filtering** using native `hnswlib` C++ filter callbacks. The filter function is evaluated *inside* the graph traversal — so the C++ engine skips disqualified nodes entirely before scoring them.
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
Query Vector → HNSW Graph Traversal → [Filter Callback runs on every node visited]
|
|
129
|
+
↓ Pass → included in result set
|
|
130
|
+
↓ Fail → skipped immediately
|
|
131
|
+
Top-K results returned
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
This means your effective `top_k` is always accurate, even with highly restrictive filters.
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Architecture
|
|
139
|
+
|
|
140
|
+
```mermaid
|
|
141
|
+
graph TD
|
|
142
|
+
Client["Client (SDK / redis-cli)"] -->|RESP2 Binary Protocol| TCP["asyncio TCP Server :6379"]
|
|
143
|
+
Client -->|HTTP REST| HTTP["FastAPI Gateway :8000"]
|
|
144
|
+
TCP --> Router["Command Router"]
|
|
145
|
+
HTTP --> Router
|
|
146
|
+
Router --> KV["16-Shard KV Store (LRU + TTL)"]
|
|
147
|
+
Router --> VE["AI Vector Engine (hnswlib HNSW)"]
|
|
148
|
+
Router --> DT["Data Types (Lists, Hashes)"]
|
|
149
|
+
Router --> PS["Pub/Sub Engine"]
|
|
150
|
+
KV --> WAL["Write-Ahead Log"]
|
|
151
|
+
VE --> Snap["HNSW Binary Snapshot"]
|
|
152
|
+
WAL --> Snap
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Run Locally (From Source)
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
# 1. Clone and install
|
|
161
|
+
git clone https://github.com/gkavinrajanCodes/pulseDB.git
|
|
162
|
+
cd pulseDB
|
|
163
|
+
python3.10 -m venv workenv && source workenv/bin/activate
|
|
164
|
+
pip install -r requirements.txt
|
|
165
|
+
|
|
166
|
+
# 2. Start the server
|
|
167
|
+
NODE_ID=node1 CLUSTER_NODES=node1 uvicorn server.main:app --host 0.0.0.0 --port 8000
|
|
168
|
+
|
|
169
|
+
# 3. Install the SDK (in another terminal)
|
|
170
|
+
pip install -e sdk/
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Contributing
|
|
176
|
+
|
|
177
|
+
1. Fork the repository
|
|
178
|
+
2. Create a feature branch: `git checkout -b feature/sorted-sets`
|
|
179
|
+
3. Commit your changes: `git commit -m "feat: add ZADD/ZRANGE sorted set commands"`
|
|
180
|
+
4. Push: `git push origin feature/sorted-sets`
|
|
181
|
+
5. Open a Pull Request
|
|
182
|
+
|
|
183
|
+
All PRs are validated against our CI matrix (Python 3.10, 3.11, 3.12 with flake8, mypy, and pytest).
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## License
|
|
188
|
+
|
|
189
|
+
Distributed under the Business Source License (BSL 1.1). See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pulsedb"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="G Kavinrajan", email="gkavinrajan@example.com" },
|
|
10
|
+
]
|
|
11
|
+
description = "High-performance Python Vector Database & Memory Engine with RESP2 support."
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.10"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Topic :: Database",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"redis>=5.0.0",
|
|
22
|
+
"numpy>=1.20.0",
|
|
23
|
+
"hnswlib>=0.8.0"
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
"Homepage" = "https://github.com/gkavinrajanCodes/pulseDB"
|
|
28
|
+
"Bug Tracker" = "https://github.com/gkavinrajanCodes/pulseDB/issues"
|
|
29
|
+
|
|
30
|
+
[tool.setuptools.packages.find]
|
|
31
|
+
where = ["sdk"]
|
|
32
|
+
include = ["pulsedb", "pulsedb.*", "langchain_pulsedb", "langchain_pulsedb.*"]
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Copyright (c) 2026 G Kavinrajan. All rights reserved.
|
|
2
|
+
# Licensed under the Business Source License 1.1
|
|
3
|
+
|
|
4
|
+
import uuid
|
|
5
|
+
from typing import Any, Iterable, List, Optional, Tuple, Dict
|
|
6
|
+
|
|
7
|
+
from langchain_core.documents import Document
|
|
8
|
+
from langchain_core.embeddings import Embeddings
|
|
9
|
+
from langchain_core.vectorstores import VectorStore
|
|
10
|
+
|
|
11
|
+
from pulsedb import PulseDB
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PulseDBVectorStore(VectorStore):
|
|
15
|
+
"""PulseDB VectorStore wrapper for LangChain."""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
embedding: Embeddings,
|
|
20
|
+
client: Optional[PulseDB] = None,
|
|
21
|
+
host: str = "localhost",
|
|
22
|
+
port: int = 6379,
|
|
23
|
+
collection_name: str = "langchain",
|
|
24
|
+
):
|
|
25
|
+
self._embedding = embedding
|
|
26
|
+
self._client = client or PulseDB(host=host, port=port)
|
|
27
|
+
self._collection = collection_name
|
|
28
|
+
|
|
29
|
+
def _get_key(self, doc_id: str) -> str:
|
|
30
|
+
return f"{self._collection}:{doc_id}"
|
|
31
|
+
|
|
32
|
+
def add_texts(
|
|
33
|
+
self,
|
|
34
|
+
texts: Iterable[str],
|
|
35
|
+
metadatas: Optional[List[dict]] = None,
|
|
36
|
+
ids: Optional[List[str]] = None,
|
|
37
|
+
**kwargs: Any,
|
|
38
|
+
) -> List[str]:
|
|
39
|
+
"""Run more texts through the embeddings and add to the vectorstore."""
|
|
40
|
+
texts = list(texts)
|
|
41
|
+
if not texts:
|
|
42
|
+
return []
|
|
43
|
+
|
|
44
|
+
embeddings = self._embedding.embed_documents(texts)
|
|
45
|
+
if ids is None:
|
|
46
|
+
ids = [str(uuid.uuid4()) for _ in texts]
|
|
47
|
+
if metadatas is None:
|
|
48
|
+
metadatas = [{} for _ in texts]
|
|
49
|
+
|
|
50
|
+
batch = []
|
|
51
|
+
for text, metadata, doc_id, embedding in zip(texts, metadatas, ids, embeddings):
|
|
52
|
+
key = self._get_key(doc_id)
|
|
53
|
+
doc_metadata = metadata.copy()
|
|
54
|
+
doc_metadata["_text"] = text
|
|
55
|
+
batch.append({"id": key, "vector": embedding, "metadata": doc_metadata})
|
|
56
|
+
|
|
57
|
+
self._client.vectors.upsert_batch(batch)
|
|
58
|
+
|
|
59
|
+
return ids
|
|
60
|
+
|
|
61
|
+
def similarity_search(
|
|
62
|
+
self, query: str, k: int = 4, filter: Optional[Dict[str, Any]] = None, **kwargs: Any
|
|
63
|
+
) -> List[Document]:
|
|
64
|
+
"""Return docs most similar to query."""
|
|
65
|
+
results = self.similarity_search_with_score(query, k=k, filter=filter, **kwargs)
|
|
66
|
+
return [doc for doc, _ in results]
|
|
67
|
+
|
|
68
|
+
def similarity_search_with_score(
|
|
69
|
+
self, query: str, k: int = 4, filter: Optional[Dict[str, Any]] = None, **kwargs: Any
|
|
70
|
+
) -> List[Tuple[Document, float]]:
|
|
71
|
+
"""Return docs most similar to query, along with scores."""
|
|
72
|
+
embedding = self._embedding.embed_query(query)
|
|
73
|
+
|
|
74
|
+
# Search the vector index using the native TCP Binary Protocol
|
|
75
|
+
raw_results = self._client.vectors.search(embedding, top_k=k, filter=filter)
|
|
76
|
+
|
|
77
|
+
docs_with_scores = []
|
|
78
|
+
for res in raw_results:
|
|
79
|
+
key = res["id"]
|
|
80
|
+
score = res["score"]
|
|
81
|
+
|
|
82
|
+
# Only process keys in our collection
|
|
83
|
+
if not key.startswith(f"{self._collection}:"):
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
# Fetch the metadata dictionary
|
|
87
|
+
doc_data = self._client.vectors.get(key)
|
|
88
|
+
if not doc_data:
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
metadata = doc_data.get("metadata", {})
|
|
92
|
+
text = metadata.pop("_text", "")
|
|
93
|
+
|
|
94
|
+
doc = Document(page_content=text, metadata=metadata)
|
|
95
|
+
docs_with_scores.append((doc, score))
|
|
96
|
+
|
|
97
|
+
return docs_with_scores
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def from_texts(
|
|
101
|
+
cls,
|
|
102
|
+
texts: List[str],
|
|
103
|
+
embedding: Embeddings,
|
|
104
|
+
metadatas: Optional[List[dict]] = None,
|
|
105
|
+
**kwargs: Any,
|
|
106
|
+
) -> "PulseDBVectorStore":
|
|
107
|
+
"""Return VectorStore initialized from texts and embeddings."""
|
|
108
|
+
store = cls(embedding, **kwargs)
|
|
109
|
+
store.add_texts(texts, metadatas)
|
|
110
|
+
return store
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Copyright (c) 2026 G Kavinrajan. All rights reserved.
|
|
2
|
+
# Licensed under the Business Source License 1.1
|
|
3
|
+
|
|
4
|
+
# sdk/pulsedb/__init__.py
|
|
5
|
+
"""
|
|
6
|
+
PulseDB Python SDK
|
|
7
|
+
|
|
8
|
+
Connects to PulseDB over the high-performance TCP Binary Protocol (port 6379).
|
|
9
|
+
|
|
10
|
+
Usage (sync):
|
|
11
|
+
from pulsedb import PulseDB
|
|
12
|
+
|
|
13
|
+
db = PulseDB(host="localhost", port=6379)
|
|
14
|
+
db.set("user:123", "alice", ttl=3600)
|
|
15
|
+
print(db.get("user:123")) # "alice"
|
|
16
|
+
|
|
17
|
+
# AI Memory Engine (Vector Search)
|
|
18
|
+
db.vectors.upsert("doc1", [0.1, 0.2, 0.3], metadata={"category": "news"})
|
|
19
|
+
results = db.vectors.search([0.1, 0.2, 0.3], top_k=5, filter={"category": "news"})
|
|
20
|
+
|
|
21
|
+
Usage (async):
|
|
22
|
+
from pulsedb import AsyncPulseDB
|
|
23
|
+
|
|
24
|
+
async def main():
|
|
25
|
+
async with AsyncPulseDB(host="localhost", port=6379) as db:
|
|
26
|
+
await db.set("counter", 0)
|
|
27
|
+
await db.incr("counter")
|
|
28
|
+
await db.vectors.upsert("doc1", [0.1, 0.2, 0.3])
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from .client import PulseDB
|
|
32
|
+
from .async_client import AsyncPulseDB
|
|
33
|
+
from .exceptions import (
|
|
34
|
+
PulseDBError,
|
|
35
|
+
ConnectionError,
|
|
36
|
+
AuthenticationError,
|
|
37
|
+
CommandError,
|
|
38
|
+
TimeoutError,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
__version__ = "1.1.0"
|
|
42
|
+
__all__ = [
|
|
43
|
+
"PulseDB",
|
|
44
|
+
"AsyncPulseDB",
|
|
45
|
+
"PulseDBError",
|
|
46
|
+
"ConnectionError",
|
|
47
|
+
"AuthenticationError",
|
|
48
|
+
"CommandError",
|
|
49
|
+
"TimeoutError",
|
|
50
|
+
]
|