pulsedb 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pulsedb-1.0.0/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Business Source License 1.1
2
+
3
+ License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
4
+ "Business Source License" is a trademark of MariaDB Corporation Ab.
5
+
6
+ Parameters
7
+ Licensor: G Kavinrajan
8
+ Licensed Work: PulseDB
9
+ Additional Use Grant: You may make use of the Licensed Work for non-production purposes, including testing, development, and academic research. You may not use the Licensed Work for any production purpose without a commercial agreement with the Licensor.
10
+ Change Date: 2030-06-28
11
+ Change License: Apache License, Version 2.0
12
+
13
+ Terms
14
+
15
+ The Licensor hereby grants you the right to copy, modify, create derivative works, redistribute, and make non-production use of the Licensed Work. The Licensed Work is provided "AS IS", without warranties or conditions of any kind.
16
+
17
+ You may also make production use of the Licensed Work, provided such use does not exceed the Additional Use Grant (if any). If your use exceeds the Additional Use Grant, you must acquire a commercial license from the Licensor.
18
+
19
+ Effective on the Change Date, or the fourth anniversary of the first publicly available distribution of a specific version of the Licensed Work under this License, whichever comes first, the Licensor hereby grants you rights under the terms of the Change License, and such rights shall replace all rights and restrictions granted under this License.
20
+
21
+ Disclaimer of Warranty
22
+ THE LICENSED WORK IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE LICENSOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE LICENSED WORK OR THE USE OR OTHER DEALINGS IN THE LICENSED WORK.
pulsedb-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,208 @@
1
+ Metadata-Version: 2.4
2
+ Name: pulsedb
3
+ Version: 1.0.0
4
+ Summary: High-performance Python Vector Database & Memory Engine with RESP2 support.
5
+ Author-email: G Kavinrajan <gkavinrajan@example.com>
6
+ Project-URL: Homepage, https://github.com/gkavinrajanCodes/pulseDB
7
+ Project-URL: Bug Tracker, https://github.com/gkavinrajanCodes/pulseDB/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Topic :: Database
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: redis>=5.0.0
16
+ Requires-Dist: numpy>=1.20.0
17
+ Requires-Dist: hnswlib>=0.8.0
18
+ Dynamic: license-file
19
+
20
+ <div align="center">
21
+
22
+ # ⚡ PulseDB
23
+
24
+ **An enterprise-grade, in-memory database with a native AI Vector Engine.**
25
+
26
+ Built for developers who need Redis-compatible storage *and* lightning-fast semantic search — without running two separate systems.
27
+
28
+ [![CI](https://github.com/gkavinrajanCodes/pulseDB/actions/workflows/python-app.yml/badge.svg)](https://github.com/gkavinrajanCodes/pulseDB/actions)
29
+ [![Python](https://img.shields.io/badge/python-3.10%20|%203.11%20|%203.12-blue)](https://pypi.org/project/pulsedb/)
30
+ [![License: BSL 1.1](https://img.shields.io/badge/License-BSL%201.1-blue.svg)](LICENSE)
31
+
32
+ </div>
33
+
34
+ ---
35
+
36
+ ## What is PulseDB?
37
+
38
+ PulseDB is a high-performance, open-source database that combines:
39
+
40
+ - **A Redis-compatible KV store** — Strings, Lists, Hashes with TTL, LRU eviction, and RESP2 wire protocol
41
+ - **An AI Memory Engine** — HNSW-based vector search with native C++ pre-filtering callbacks
42
+ - **A Python SDK** — Ergonomic `db.vectors.upsert()` / `db.vectors.search()` API
43
+ - **A LangChain Integration** — Drop-in `PulseDBVectorStore` for RAG pipelines with metadata filtering
44
+
45
+ > One server, one protocol, one SDK. No Pinecone. No Weaviate. No Redis Stack.
46
+
47
+ ---
48
+
49
+ ## Features
50
+
51
+ | Category | Capability |
52
+ |---|---|
53
+ | **KV Store** | `SET`, `GET`, `DEL`, `EXPIRE`, `TTL`, `MSET`, `MGET`, `INCR`, `APPEND` |
54
+ | **Data Types** | Strings · Lists (`LPUSH/RPOP/LRANGE`) · Hashes (`HSET/HGET/HGETALL`) |
55
+ | **Vector Engine** | HNSW cosine similarity, O(log N) search, dynamic resizing |
56
+ | **Hybrid Search** | Native C++ pre-filter callbacks — filter by metadata *during* graph traversal |
57
+ | **Persistence** | Write-Ahead Log (WAL) + JSON snapshots + HNSW binary graph snapshots |
58
+ | **Protocol** | RESP2 TCP (port 6379) — works with `redis-cli`, `redis-py`, `ioredis` |
59
+ | **Cluster** | Consistent hashing, multi-node routing |
60
+ | **Auth** | API Key (HTTP) + `REQUIREPASS` (TCP) + optional TLS/SSL |
61
+ | **Observability** | Prometheus `/metrics` endpoint, structured `/health` and `/ready` |
62
+ | **LangChain** | `PulseDBVectorStore` with `similarity_search(filter={...})` |
63
+
64
+ ---
65
+
66
+ ## Quickstart
67
+
68
+ ### 1. Run the Server (Docker)
69
+
70
+ ```bash
71
+ docker run -d \
72
+ -p 6379:6379 \
73
+ -p 8000:8000 \
74
+ -v pulsedb_data:/app/data \
75
+ --name pulsedb \
76
+ ghcr.io/gkavinrajancodes/pulsedb:latest
77
+ ```
78
+
79
+ Or use Docker Compose for a 3-node cluster:
80
+
81
+ ```bash
82
+ git clone https://github.com/gkavinrajanCodes/pulseDB.git
83
+ cd pulseDB && docker-compose up --build
84
+ ```
85
+
86
+ ### 2. Install the SDK
87
+
88
+ ```bash
89
+ pip install pulsedb
90
+ ```
91
+
92
+ ### 3. Use It
93
+
94
+ ```python
95
+ from pulsedb import PulseDB
96
+
97
+ db = PulseDB(host="localhost", port=6379)
98
+
99
+ # Standard KV Store
100
+ db.set("session:abc", "user_data", ttl=3600)
101
+ print(db.get("session:abc")) # "user_data"
102
+
103
+ # AI Memory Engine — insert vectors with metadata
104
+ db.vectors.upsert("article:1", [0.12, 0.98, 0.34], metadata={"category": "sports", "year": 2024})
105
+ db.vectors.upsert("article:2", [0.91, 0.11, 0.67], metadata={"category": "tech", "year": 2023})
106
+
107
+ # Semantic similarity search — optionally filter by metadata
108
+ results = db.vectors.search([0.10, 0.95, 0.40], top_k=5, filter={"category": "sports"})
109
+ # → [{"id": "article:1", "score": 0.997}]
110
+ ```
111
+
112
+ ---
113
+
114
+ ## LangChain Integration
115
+
116
+ PulseDB works natively as a LangChain VectorStore, giving your RAG pipeline blazing fast retrieval with hybrid metadata filtering.
117
+
118
+ ```python
119
+ from langchain_openai import OpenAIEmbeddings
120
+ from sdk.langchain_pulsedb.vectorstore import PulseDBVectorStore
121
+
122
+ store = PulseDBVectorStore(
123
+ embedding=OpenAIEmbeddings(),
124
+ host="localhost",
125
+ port=6379,
126
+ )
127
+
128
+ # Ingest documents — metadata is automatically stored for hybrid filtering
129
+ store.add_texts(
130
+ texts=["PulseDB is fast", "Redis is popular", "Pinecone is expensive"],
131
+ metadatas=[{"source": "blog"}, {"source": "wiki"}, {"source": "review"}]
132
+ )
133
+
134
+ # Hybrid search — find similar docs but only from the blog source
135
+ docs = store.similarity_search("fast database", k=2, filter={"source": "blog"})
136
+ ```
137
+
138
+ ---
139
+
140
+ ## How the AI Memory Engine Works
141
+
142
+ Standard vector databases do **post-filtering**: search all vectors, get K results, then throw away the ones that don't match the filter. This degrades accuracy.
143
+
144
+ PulseDB does **true pre-filtering** using native `hnswlib` C++ filter callbacks. The filter function is evaluated *inside* the graph traversal — so the C++ engine skips disqualified nodes entirely before scoring them.
145
+
146
+ ```
147
+ Query Vector → HNSW Graph Traversal → [Filter Callback runs on every node visited]
148
+ ↓ Pass → included in result set
149
+ ↓ Fail → skipped immediately
150
+ Top-K results returned
151
+ ```
152
+
153
+ This means your effective `top_k` is always accurate, even with highly restrictive filters.
154
+
155
+ ---
156
+
157
+ ## Architecture
158
+
159
+ ```mermaid
160
+ graph TD
161
+ Client["Client (SDK / redis-cli)"] -->|RESP2 Binary Protocol| TCP["asyncio TCP Server :6379"]
162
+ Client -->|HTTP REST| HTTP["FastAPI Gateway :8000"]
163
+ TCP --> Router["Command Router"]
164
+ HTTP --> Router
165
+ Router --> KV["16-Shard KV Store (LRU + TTL)"]
166
+ Router --> VE["AI Vector Engine (hnswlib HNSW)"]
167
+ Router --> DT["Data Types (Lists, Hashes)"]
168
+ Router --> PS["Pub/Sub Engine"]
169
+ KV --> WAL["Write-Ahead Log"]
170
+ VE --> Snap["HNSW Binary Snapshot"]
171
+ WAL --> Snap
172
+ ```
173
+
174
+ ---
175
+
176
+ ## Run Locally (From Source)
177
+
178
+ ```bash
179
+ # 1. Clone and install
180
+ git clone https://github.com/gkavinrajanCodes/pulseDB.git
181
+ cd pulseDB
182
+ python3.10 -m venv workenv && source workenv/bin/activate
183
+ pip install -r requirements.txt
184
+
185
+ # 2. Start the server
186
+ NODE_ID=node1 CLUSTER_NODES=node1 uvicorn server.main:app --host 0.0.0.0 --port 8000
187
+
188
+ # 3. Install the SDK (in another terminal)
189
+ pip install -e sdk/
190
+ ```
191
+
192
+ ---
193
+
194
+ ## Contributing
195
+
196
+ 1. Fork the repository
197
+ 2. Create a feature branch: `git checkout -b feature/sorted-sets`
198
+ 3. Commit your changes: `git commit -m "feat: add ZADD/ZRANGE sorted set commands"`
199
+ 4. Push: `git push origin feature/sorted-sets`
200
+ 5. Open a Pull Request
201
+
202
+ All PRs are validated against our CI matrix (Python 3.10, 3.11, 3.12 with flake8, mypy, and pytest).
203
+
204
+ ---
205
+
206
+ ## License
207
+
208
+ Distributed under the Business Source License (BSL 1.1). See [LICENSE](LICENSE) for details.
@@ -0,0 +1,189 @@
1
+ <div align="center">
2
+
3
+ # ⚡ PulseDB
4
+
5
+ **An enterprise-grade, in-memory database with a native AI Vector Engine.**
6
+
7
+ Built for developers who need Redis-compatible storage *and* lightning-fast semantic search — without running two separate systems.
8
+
9
+ [![CI](https://github.com/gkavinrajanCodes/pulseDB/actions/workflows/python-app.yml/badge.svg)](https://github.com/gkavinrajanCodes/pulseDB/actions)
10
+ [![Python](https://img.shields.io/badge/python-3.10%20|%203.11%20|%203.12-blue)](https://pypi.org/project/pulsedb/)
11
+ [![License: BSL 1.1](https://img.shields.io/badge/License-BSL%201.1-blue.svg)](LICENSE)
12
+
13
+ </div>
14
+
15
+ ---
16
+
17
+ ## What is PulseDB?
18
+
19
+ PulseDB is a high-performance, open-source database that combines:
20
+
21
+ - **A Redis-compatible KV store** — Strings, Lists, Hashes with TTL, LRU eviction, and RESP2 wire protocol
22
+ - **An AI Memory Engine** — HNSW-based vector search with native C++ pre-filtering callbacks
23
+ - **A Python SDK** — Ergonomic `db.vectors.upsert()` / `db.vectors.search()` API
24
+ - **A LangChain Integration** — Drop-in `PulseDBVectorStore` for RAG pipelines with metadata filtering
25
+
26
+ > One server, one protocol, one SDK. No Pinecone. No Weaviate. No Redis Stack.
27
+
28
+ ---
29
+
30
+ ## Features
31
+
32
+ | Category | Capability |
33
+ |---|---|
34
+ | **KV Store** | `SET`, `GET`, `DEL`, `EXPIRE`, `TTL`, `MSET`, `MGET`, `INCR`, `APPEND` |
35
+ | **Data Types** | Strings · Lists (`LPUSH/RPOP/LRANGE`) · Hashes (`HSET/HGET/HGETALL`) |
36
+ | **Vector Engine** | HNSW cosine similarity, O(log N) search, dynamic resizing |
37
+ | **Hybrid Search** | Native C++ pre-filter callbacks — filter by metadata *during* graph traversal |
38
+ | **Persistence** | Write-Ahead Log (WAL) + JSON snapshots + HNSW binary graph snapshots |
39
+ | **Protocol** | RESP2 TCP (port 6379) — works with `redis-cli`, `redis-py`, `ioredis` |
40
+ | **Cluster** | Consistent hashing, multi-node routing |
41
+ | **Auth** | API Key (HTTP) + `REQUIREPASS` (TCP) + optional TLS/SSL |
42
+ | **Observability** | Prometheus `/metrics` endpoint, structured `/health` and `/ready` |
43
+ | **LangChain** | `PulseDBVectorStore` with `similarity_search(filter={...})` |
44
+
45
+ ---
46
+
47
+ ## Quickstart
48
+
49
+ ### 1. Run the Server (Docker)
50
+
51
+ ```bash
52
+ docker run -d \
53
+ -p 6379:6379 \
54
+ -p 8000:8000 \
55
+ -v pulsedb_data:/app/data \
56
+ --name pulsedb \
57
+ ghcr.io/gkavinrajancodes/pulsedb:latest
58
+ ```
59
+
60
+ Or use Docker Compose for a 3-node cluster:
61
+
62
+ ```bash
63
+ git clone https://github.com/gkavinrajanCodes/pulseDB.git
64
+ cd pulseDB && docker-compose up --build
65
+ ```
66
+
67
+ ### 2. Install the SDK
68
+
69
+ ```bash
70
+ pip install pulsedb
71
+ ```
72
+
73
+ ### 3. Use It
74
+
75
+ ```python
76
+ from pulsedb import PulseDB
77
+
78
+ db = PulseDB(host="localhost", port=6379)
79
+
80
+ # Standard KV Store
81
+ db.set("session:abc", "user_data", ttl=3600)
82
+ print(db.get("session:abc")) # "user_data"
83
+
84
+ # AI Memory Engine — insert vectors with metadata
85
+ db.vectors.upsert("article:1", [0.12, 0.98, 0.34], metadata={"category": "sports", "year": 2024})
86
+ db.vectors.upsert("article:2", [0.91, 0.11, 0.67], metadata={"category": "tech", "year": 2023})
87
+
88
+ # Semantic similarity search — optionally filter by metadata
89
+ results = db.vectors.search([0.10, 0.95, 0.40], top_k=5, filter={"category": "sports"})
90
+ # → [{"id": "article:1", "score": 0.997}]
91
+ ```
92
+
93
+ ---
94
+
95
+ ## LangChain Integration
96
+
97
+ PulseDB works natively as a LangChain VectorStore, giving your RAG pipeline blazing fast retrieval with hybrid metadata filtering.
98
+
99
+ ```python
100
+ from langchain_openai import OpenAIEmbeddings
101
+ from sdk.langchain_pulsedb.vectorstore import PulseDBVectorStore
102
+
103
+ store = PulseDBVectorStore(
104
+ embedding=OpenAIEmbeddings(),
105
+ host="localhost",
106
+ port=6379,
107
+ )
108
+
109
+ # Ingest documents — metadata is automatically stored for hybrid filtering
110
+ store.add_texts(
111
+ texts=["PulseDB is fast", "Redis is popular", "Pinecone is expensive"],
112
+ metadatas=[{"source": "blog"}, {"source": "wiki"}, {"source": "review"}]
113
+ )
114
+
115
+ # Hybrid search — find similar docs but only from the blog source
116
+ docs = store.similarity_search("fast database", k=2, filter={"source": "blog"})
117
+ ```
118
+
119
+ ---
120
+
121
+ ## How the AI Memory Engine Works
122
+
123
+ Standard vector databases do **post-filtering**: search all vectors, get K results, then throw away the ones that don't match the filter. This degrades accuracy.
124
+
125
+ PulseDB does **true pre-filtering** using native `hnswlib` C++ filter callbacks. The filter function is evaluated *inside* the graph traversal — so the C++ engine skips disqualified nodes entirely before scoring them.
126
+
127
+ ```
128
+ Query Vector → HNSW Graph Traversal → [Filter Callback runs on every node visited]
129
+ ↓ Pass → included in result set
130
+ ↓ Fail → skipped immediately
131
+ Top-K results returned
132
+ ```
133
+
134
+ This means your effective `top_k` is always accurate, even with highly restrictive filters.
135
+
136
+ ---
137
+
138
+ ## Architecture
139
+
140
+ ```mermaid
141
+ graph TD
142
+ Client["Client (SDK / redis-cli)"] -->|RESP2 Binary Protocol| TCP["asyncio TCP Server :6379"]
143
+ Client -->|HTTP REST| HTTP["FastAPI Gateway :8000"]
144
+ TCP --> Router["Command Router"]
145
+ HTTP --> Router
146
+ Router --> KV["16-Shard KV Store (LRU + TTL)"]
147
+ Router --> VE["AI Vector Engine (hnswlib HNSW)"]
148
+ Router --> DT["Data Types (Lists, Hashes)"]
149
+ Router --> PS["Pub/Sub Engine"]
150
+ KV --> WAL["Write-Ahead Log"]
151
+ VE --> Snap["HNSW Binary Snapshot"]
152
+ WAL --> Snap
153
+ ```
154
+
155
+ ---
156
+
157
+ ## Run Locally (From Source)
158
+
159
+ ```bash
160
+ # 1. Clone and install
161
+ git clone https://github.com/gkavinrajanCodes/pulseDB.git
162
+ cd pulseDB
163
+ python3.10 -m venv workenv && source workenv/bin/activate
164
+ pip install -r requirements.txt
165
+
166
+ # 2. Start the server
167
+ NODE_ID=node1 CLUSTER_NODES=node1 uvicorn server.main:app --host 0.0.0.0 --port 8000
168
+
169
+ # 3. Install the SDK (in another terminal)
170
+ pip install -e sdk/
171
+ ```
172
+
173
+ ---
174
+
175
+ ## Contributing
176
+
177
+ 1. Fork the repository
178
+ 2. Create a feature branch: `git checkout -b feature/sorted-sets`
179
+ 3. Commit your changes: `git commit -m "feat: add ZADD/ZRANGE sorted set commands"`
180
+ 4. Push: `git push origin feature/sorted-sets`
181
+ 5. Open a Pull Request
182
+
183
+ All PRs are validated against our CI matrix (Python 3.10, 3.11, 3.12 with flake8, mypy, and pytest).
184
+
185
+ ---
186
+
187
+ ## License
188
+
189
+ Distributed under the Business Source License (BSL 1.1). See [LICENSE](LICENSE) for details.
@@ -0,0 +1,32 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pulsedb"
7
+ version = "1.0.0"
8
+ authors = [
9
+ { name="G Kavinrajan", email="gkavinrajan@example.com" },
10
+ ]
11
+ description = "High-performance Python Vector Database & Memory Engine with RESP2 support."
12
+ readme = "README.md"
13
+ requires-python = ">=3.10"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ "Topic :: Database",
19
+ ]
20
+ dependencies = [
21
+ "redis>=5.0.0",
22
+ "numpy>=1.20.0",
23
+ "hnswlib>=0.8.0"
24
+ ]
25
+
26
+ [project.urls]
27
+ "Homepage" = "https://github.com/gkavinrajanCodes/pulseDB"
28
+ "Bug Tracker" = "https://github.com/gkavinrajanCodes/pulseDB/issues"
29
+
30
+ [tool.setuptools.packages.find]
31
+ where = ["sdk"]
32
+ include = ["pulsedb", "pulsedb.*", "langchain_pulsedb", "langchain_pulsedb.*"]
@@ -0,0 +1,6 @@
1
+ # Copyright (c) 2026 G Kavinrajan. All rights reserved.
2
+ # Licensed under the Business Source License 1.1
3
+
4
+ from .vectorstore import PulseDBVectorStore
5
+
6
+ __all__ = ["PulseDBVectorStore"]
@@ -0,0 +1,110 @@
1
+ # Copyright (c) 2026 G Kavinrajan. All rights reserved.
2
+ # Licensed under the Business Source License 1.1
3
+
4
+ import uuid
5
+ from typing import Any, Iterable, List, Optional, Tuple, Dict
6
+
7
+ from langchain_core.documents import Document
8
+ from langchain_core.embeddings import Embeddings
9
+ from langchain_core.vectorstores import VectorStore
10
+
11
+ from pulsedb import PulseDB
12
+
13
+
14
+ class PulseDBVectorStore(VectorStore):
15
+ """PulseDB VectorStore wrapper for LangChain."""
16
+
17
+ def __init__(
18
+ self,
19
+ embedding: Embeddings,
20
+ client: Optional[PulseDB] = None,
21
+ host: str = "localhost",
22
+ port: int = 6379,
23
+ collection_name: str = "langchain",
24
+ ):
25
+ self._embedding = embedding
26
+ self._client = client or PulseDB(host=host, port=port)
27
+ self._collection = collection_name
28
+
29
+ def _get_key(self, doc_id: str) -> str:
30
+ return f"{self._collection}:{doc_id}"
31
+
32
+ def add_texts(
33
+ self,
34
+ texts: Iterable[str],
35
+ metadatas: Optional[List[dict]] = None,
36
+ ids: Optional[List[str]] = None,
37
+ **kwargs: Any,
38
+ ) -> List[str]:
39
+ """Run more texts through the embeddings and add to the vectorstore."""
40
+ texts = list(texts)
41
+ if not texts:
42
+ return []
43
+
44
+ embeddings = self._embedding.embed_documents(texts)
45
+ if ids is None:
46
+ ids = [str(uuid.uuid4()) for _ in texts]
47
+ if metadatas is None:
48
+ metadatas = [{} for _ in texts]
49
+
50
+ batch = []
51
+ for text, metadata, doc_id, embedding in zip(texts, metadatas, ids, embeddings):
52
+ key = self._get_key(doc_id)
53
+ doc_metadata = metadata.copy()
54
+ doc_metadata["_text"] = text
55
+ batch.append({"id": key, "vector": embedding, "metadata": doc_metadata})
56
+
57
+ self._client.vectors.upsert_batch(batch)
58
+
59
+ return ids
60
+
61
+ def similarity_search(
62
+ self, query: str, k: int = 4, filter: Optional[Dict[str, Any]] = None, **kwargs: Any
63
+ ) -> List[Document]:
64
+ """Return docs most similar to query."""
65
+ results = self.similarity_search_with_score(query, k=k, filter=filter, **kwargs)
66
+ return [doc for doc, _ in results]
67
+
68
+ def similarity_search_with_score(
69
+ self, query: str, k: int = 4, filter: Optional[Dict[str, Any]] = None, **kwargs: Any
70
+ ) -> List[Tuple[Document, float]]:
71
+ """Return docs most similar to query, along with scores."""
72
+ embedding = self._embedding.embed_query(query)
73
+
74
+ # Search the vector index using the native TCP Binary Protocol
75
+ raw_results = self._client.vectors.search(embedding, top_k=k, filter=filter)
76
+
77
+ docs_with_scores = []
78
+ for res in raw_results:
79
+ key = res["id"]
80
+ score = res["score"]
81
+
82
+ # Only process keys in our collection
83
+ if not key.startswith(f"{self._collection}:"):
84
+ continue
85
+
86
+ # Fetch the metadata dictionary
87
+ doc_data = self._client.vectors.get(key)
88
+ if not doc_data:
89
+ continue
90
+
91
+ metadata = doc_data.get("metadata", {})
92
+ text = metadata.pop("_text", "")
93
+
94
+ doc = Document(page_content=text, metadata=metadata)
95
+ docs_with_scores.append((doc, score))
96
+
97
+ return docs_with_scores
98
+
99
+ @classmethod
100
+ def from_texts(
101
+ cls,
102
+ texts: List[str],
103
+ embedding: Embeddings,
104
+ metadatas: Optional[List[dict]] = None,
105
+ **kwargs: Any,
106
+ ) -> "PulseDBVectorStore":
107
+ """Return VectorStore initialized from texts and embeddings."""
108
+ store = cls(embedding, **kwargs)
109
+ store.add_texts(texts, metadatas)
110
+ return store
@@ -0,0 +1,50 @@
1
+ # Copyright (c) 2026 G Kavinrajan. All rights reserved.
2
+ # Licensed under the Business Source License 1.1
3
+
4
+ # sdk/pulsedb/__init__.py
5
+ """
6
+ PulseDB Python SDK
7
+
8
+ Connects to PulseDB over the high-performance TCP Binary Protocol (port 6379).
9
+
10
+ Usage (sync):
11
+ from pulsedb import PulseDB
12
+
13
+ db = PulseDB(host="localhost", port=6379)
14
+ db.set("user:123", "alice", ttl=3600)
15
+ print(db.get("user:123")) # "alice"
16
+
17
+ # AI Memory Engine (Vector Search)
18
+ db.vectors.upsert("doc1", [0.1, 0.2, 0.3], metadata={"category": "news"})
19
+ results = db.vectors.search([0.1, 0.2, 0.3], top_k=5, filter={"category": "news"})
20
+
21
+ Usage (async):
22
+ from pulsedb import AsyncPulseDB
23
+
24
+ async def main():
25
+ async with AsyncPulseDB(host="localhost", port=6379) as db:
26
+ await db.set("counter", 0)
27
+ await db.incr("counter")
28
+ await db.vectors.upsert("doc1", [0.1, 0.2, 0.3])
29
+ """
30
+
31
+ from .client import PulseDB
32
+ from .async_client import AsyncPulseDB
33
+ from .exceptions import (
34
+ PulseDBError,
35
+ ConnectionError,
36
+ AuthenticationError,
37
+ CommandError,
38
+ TimeoutError,
39
+ )
40
+
41
+ __version__ = "1.1.0"
42
+ __all__ = [
43
+ "PulseDB",
44
+ "AsyncPulseDB",
45
+ "PulseDBError",
46
+ "ConnectionError",
47
+ "AuthenticationError",
48
+ "CommandError",
49
+ "TimeoutError",
50
+ ]