pulsedb 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ # Copyright (c) 2026 G Kavinrajan. All rights reserved.
2
+ # Licensed under the Business Source License 1.1
3
+
4
+ from .vectorstore import PulseDBVectorStore
5
+
6
+ __all__ = ["PulseDBVectorStore"]
@@ -0,0 +1,110 @@
1
+ # Copyright (c) 2026 G Kavinrajan. All rights reserved.
2
+ # Licensed under the Business Source License 1.1
3
+
4
+ import uuid
5
+ from typing import Any, Iterable, List, Optional, Tuple, Dict
6
+
7
+ from langchain_core.documents import Document
8
+ from langchain_core.embeddings import Embeddings
9
+ from langchain_core.vectorstores import VectorStore
10
+
11
+ from pulsedb import PulseDB
12
+
13
+
14
+ class PulseDBVectorStore(VectorStore):
15
+ """PulseDB VectorStore wrapper for LangChain."""
16
+
17
+ def __init__(
18
+ self,
19
+ embedding: Embeddings,
20
+ client: Optional[PulseDB] = None,
21
+ host: str = "localhost",
22
+ port: int = 6379,
23
+ collection_name: str = "langchain",
24
+ ):
25
+ self._embedding = embedding
26
+ self._client = client or PulseDB(host=host, port=port)
27
+ self._collection = collection_name
28
+
29
+ def _get_key(self, doc_id: str) -> str:
30
+ return f"{self._collection}:{doc_id}"
31
+
32
+ def add_texts(
33
+ self,
34
+ texts: Iterable[str],
35
+ metadatas: Optional[List[dict]] = None,
36
+ ids: Optional[List[str]] = None,
37
+ **kwargs: Any,
38
+ ) -> List[str]:
39
+ """Run more texts through the embeddings and add to the vectorstore."""
40
+ texts = list(texts)
41
+ if not texts:
42
+ return []
43
+
44
+ embeddings = self._embedding.embed_documents(texts)
45
+ if ids is None:
46
+ ids = [str(uuid.uuid4()) for _ in texts]
47
+ if metadatas is None:
48
+ metadatas = [{} for _ in texts]
49
+
50
+ batch = []
51
+ for text, metadata, doc_id, embedding in zip(texts, metadatas, ids, embeddings):
52
+ key = self._get_key(doc_id)
53
+ doc_metadata = metadata.copy()
54
+ doc_metadata["_text"] = text
55
+ batch.append({"id": key, "vector": embedding, "metadata": doc_metadata})
56
+
57
+ self._client.vectors.upsert_batch(batch)
58
+
59
+ return ids
60
+
61
+ def similarity_search(
62
+ self, query: str, k: int = 4, filter: Optional[Dict[str, Any]] = None, **kwargs: Any
63
+ ) -> List[Document]:
64
+ """Return docs most similar to query."""
65
+ results = self.similarity_search_with_score(query, k=k, filter=filter, **kwargs)
66
+ return [doc for doc, _ in results]
67
+
68
+ def similarity_search_with_score(
69
+ self, query: str, k: int = 4, filter: Optional[Dict[str, Any]] = None, **kwargs: Any
70
+ ) -> List[Tuple[Document, float]]:
71
+ """Return docs most similar to query, along with scores."""
72
+ embedding = self._embedding.embed_query(query)
73
+
74
+ # Search the vector index using the native TCP Binary Protocol
75
+ raw_results = self._client.vectors.search(embedding, top_k=k, filter=filter)
76
+
77
+ docs_with_scores = []
78
+ for res in raw_results:
79
+ key = res["id"]
80
+ score = res["score"]
81
+
82
+ # Only process keys in our collection
83
+ if not key.startswith(f"{self._collection}:"):
84
+ continue
85
+
86
+ # Fetch the metadata dictionary
87
+ doc_data = self._client.vectors.get(key)
88
+ if not doc_data:
89
+ continue
90
+
91
+ metadata = doc_data.get("metadata", {})
92
+ text = metadata.pop("_text", "")
93
+
94
+ doc = Document(page_content=text, metadata=metadata)
95
+ docs_with_scores.append((doc, score))
96
+
97
+ return docs_with_scores
98
+
99
+ @classmethod
100
+ def from_texts(
101
+ cls,
102
+ texts: List[str],
103
+ embedding: Embeddings,
104
+ metadatas: Optional[List[dict]] = None,
105
+ **kwargs: Any,
106
+ ) -> "PulseDBVectorStore":
107
+ """Return VectorStore initialized from texts and embeddings."""
108
+ store = cls(embedding, **kwargs)
109
+ store.add_texts(texts, metadatas)
110
+ return store
pulsedb/__init__.py ADDED
@@ -0,0 +1,50 @@
1
+ # Copyright (c) 2026 G Kavinrajan. All rights reserved.
2
+ # Licensed under the Business Source License 1.1
3
+
4
+ # sdk/pulsedb/__init__.py
5
+ """
6
+ PulseDB Python SDK
7
+
8
+ Connects to PulseDB over the high-performance TCP Binary Protocol (port 6379).
9
+
10
+ Usage (sync):
11
+ from pulsedb import PulseDB
12
+
13
+ db = PulseDB(host="localhost", port=6379)
14
+ db.set("user:123", "alice", ttl=3600)
15
+ print(db.get("user:123")) # "alice"
16
+
17
+ # AI Memory Engine (Vector Search)
18
+ db.vectors.upsert("doc1", [0.1, 0.2, 0.3], metadata={"category": "news"})
19
+ results = db.vectors.search([0.1, 0.2, 0.3], top_k=5, filter={"category": "news"})
20
+
21
+ Usage (async):
22
+ from pulsedb import AsyncPulseDB
23
+
24
+ async def main():
25
+ async with AsyncPulseDB(host="localhost", port=6379) as db:
26
+ await db.set("counter", 0)
27
+ await db.incr("counter")
28
+ await db.vectors.upsert("doc1", [0.1, 0.2, 0.3])
29
+ """
30
+
31
+ from .client import PulseDB
32
+ from .async_client import AsyncPulseDB
33
+ from .exceptions import (
34
+ PulseDBError,
35
+ ConnectionError,
36
+ AuthenticationError,
37
+ CommandError,
38
+ TimeoutError,
39
+ )
40
+
41
+ __version__ = "1.1.0"
42
+ __all__ = [
43
+ "PulseDB",
44
+ "AsyncPulseDB",
45
+ "PulseDBError",
46
+ "ConnectionError",
47
+ "AuthenticationError",
48
+ "CommandError",
49
+ "TimeoutError",
50
+ ]
@@ -0,0 +1,325 @@
1
+ # Copyright (c) 2026 G Kavinrajan. All rights reserved.
2
+ # Licensed under the Business Source License 1.1
3
+
4
+ # sdk/pulsedb/async_client.py
5
+ """
6
+ Async PulseDB client using the ultra-fast Binary Protocol (RESP2 over TCP).
7
+
8
+ Usage:
9
+ import asyncio
10
+ from pulsedb import AsyncPulseDB
11
+
12
+ async def main():
13
+ db = AsyncPulseDB(host="localhost", port=6379)
14
+ await db.set("key", "value", ttl=3600)
15
+ val = await db.get("key")
16
+
17
+ # Vector Engine Usage
18
+ await db.vectors.upsert("doc1", [0.1, 0.2, 0.3], metadata={"author": "John"})
19
+ results = await db.vectors.search([0.1, 0.2, 0.3], top_k=5, filter={"author": "John"})
20
+
21
+ asyncio.run(main())
22
+ """
23
+
24
+ import json
25
+ import asyncio
26
+ from typing import Optional, List, Any, Dict
27
+
28
+ import redis
29
+ import redis.asyncio as redis_async
30
+ import numpy as np
31
+
32
+ from .exceptions import CommandError, ConnectionError, TimeoutError
33
+
34
+
35
+ class AsyncVectorNamespace:
36
+ """
37
+ Provides a beautiful, Pythonic API for the PulseDB AI Memory Engine.
38
+ Transparently packs Python floats into C++ binary bytes and serializes metadata.
39
+ """
40
+ def __init__(self, db: "AsyncPulseDB"):
41
+ self.db = db
42
+
43
+ async def upsert(self, id: str, vector: List[float], metadata: Optional[Dict[str, Any]] = None) -> str:
44
+ """Insert or update a vector embedding with optional metadata."""
45
+ blob = np.array(vector, dtype=np.float32).tobytes()
46
+ args: List[Any] = [id, blob]
47
+ if metadata is not None:
48
+ args.extend(["METADATA", json.dumps(metadata)])
49
+
50
+ try:
51
+ return await self.db.execute_command("VECTOR.BSET", *args)
52
+ except Exception as e:
53
+ if "dimension mismatch" in str(e).lower():
54
+ raise CommandError(f"Vector dimension mismatch: {e}")
55
+ raise CommandError(f"Failed to upsert vector: {e}")
56
+
57
+ async def upsert_batch(self, items: List[Dict[str, Any]]) -> int:
58
+ """
59
+ Bulk-insert or update multiple vectors in a single network round-trip.
60
+
61
+ Args:
62
+ items: List of dicts, each with:
63
+ - ``id`` (str): unique key
64
+ - ``vector`` (List[float]): embedding values
65
+ - ``metadata`` (dict, optional): metadata for hybrid filtering
66
+
67
+ Returns:
68
+ Number of vectors successfully inserted.
69
+
70
+ Example::
71
+
72
+ await db.vectors.upsert_batch([
73
+ {"id": "doc1", "vector": [0.1, 0.2], "metadata": {"cat": "news"}},
74
+ {"id": "doc2", "vector": [0.9, 0.8], "metadata": {"cat": "sports"}},
75
+ ])
76
+ """
77
+ payload = []
78
+ for item in items:
79
+ blob = np.array(item["vector"], dtype=np.float32).tobytes()
80
+ entry: Dict[str, Any] = {"id": item["id"], "blob": blob.hex()}
81
+ if "metadata" in item and item["metadata"] is not None:
82
+ entry["metadata"] = item["metadata"]
83
+ payload.append(entry)
84
+
85
+ try:
86
+ result = await self.db.execute_command("VECTOR.BSET_BATCH", json.dumps(payload))
87
+ # Response: "OK:N" or "PARTIAL: N inserted, errors: ..."
88
+ if isinstance(result, str) and result.startswith("OK:"):
89
+ return int(result[3:])
90
+ raise CommandError(f"Batch upsert error: {result}")
91
+ except CommandError:
92
+ raise
93
+ except Exception as e:
94
+ raise CommandError(f"Failed to batch upsert: {e}")
95
+
96
+ async def get(self, id: str) -> Optional[Dict[str, Any]]:
97
+ """Retrieve a vector and its metadata by ID."""
98
+ result = await self.db.execute_command("VECTOR.GET", id)
99
+ if result == "NULL" or result is None:
100
+ return None
101
+ if isinstance(result, (bytes, bytearray)):
102
+ result = result.decode("utf-8")
103
+ if isinstance(result, str):
104
+ try:
105
+ return json.loads(result)
106
+ except json.JSONDecodeError:
107
+ return None # fallback
108
+ return result if isinstance(result, dict) else None
109
+
110
+ async def search(self, query: List[float], top_k: int = 5, filter: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
111
+ """Perform a blazing fast similarity search, optionally pre-filtering by metadata."""
112
+ blob = np.array(query, dtype=np.float32).tobytes()
113
+ args: List[Any] = [blob, "TOP_K", top_k]
114
+ if filter is not None:
115
+ args.extend(["FILTER", json.dumps(filter)])
116
+
117
+ results = await self.db.execute_command("VECTOR.BSEARCH", *args)
118
+ if not results:
119
+ return []
120
+
121
+ parsed = []
122
+ # Results return as flat array: [key1, score1, key2, score2, ...]
123
+ for i in range(0, len(results), 2):
124
+ doc_id = results[i]
125
+ if isinstance(doc_id, (bytes, bytearray)):
126
+ doc_id = doc_id.decode("utf-8")
127
+ score = float(results[i+1])
128
+ parsed.append({"id": doc_id, "score": score})
129
+
130
+ return parsed
131
+
132
+ async def count(self) -> int:
133
+ """Get the total number of vectors in the AI Memory Engine."""
134
+ return int(await self.db.execute_command("VECTOR.COUNT"))
135
+
136
+ async def delete(self, id: str) -> str:
137
+ """Delete a vector from the AI Memory Engine."""
138
+ return await self.db.execute_command("VECTOR.DEL", id)
139
+
140
+
141
+ class AsyncPulseDB:
142
+ """
143
+ Async TCP client for PulseDB Cloud.
144
+
145
+ All methods are coroutines. Use with await inside an async function.
146
+ For sync usage, see PulseDB (sync_client.py).
147
+ """
148
+
149
+ def __init__(
150
+ self,
151
+ host: str = "localhost",
152
+ port: int = 6379,
153
+ timeout: float = 10.0,
154
+ ):
155
+ self._host = host
156
+ self._port = port
157
+ self._timeout = timeout
158
+ self._client: Optional[redis_async.Redis] = None
159
+
160
+ # Initialize Vector AI Namespace
161
+ self.vectors = AsyncVectorNamespace(self)
162
+
163
+ def _get_client(self) -> redis_async.Redis:
164
+ if self._client is None:
165
+ # We use protocol=2 for backwards compatibility with our custom RESP2 router
166
+ self._client = redis_async.Redis(
167
+ host=self._host,
168
+ port=self._port,
169
+ socket_timeout=self._timeout,
170
+ decode_responses=True,
171
+ protocol=2
172
+ )
173
+ return self._client
174
+
175
+ async def execute_command(self, command: str, *args) -> Any:
176
+ """Execute a raw command."""
177
+ client = self._get_client()
178
+ try:
179
+ result = await client.execute_command(command, *args)
180
+ if isinstance(result, str) and result.startswith("ERROR:"):
181
+ raise CommandError(result[7:])
182
+ return result
183
+ except redis.exceptions.ConnectionError as e:
184
+ raise ConnectionError(f"Cannot connect to PulseDB at {self._host}:{self._port}: {e}") from e
185
+ except redis.exceptions.TimeoutError as e:
186
+ raise TimeoutError(f"Command '{command}' timed out") from e
187
+ except redis.exceptions.ResponseError as e:
188
+ err_msg = str(e)
189
+ if err_msg.startswith("ERROR:"):
190
+ raise CommandError(err_msg[7:])
191
+ raise CommandError(err_msg)
192
+
193
+ # ------------------------------------------------------------------
194
+ # Core KV operations
195
+ # ------------------------------------------------------------------
196
+
197
+ async def set(self, key: str, value: Any, ttl: Optional[float] = None) -> str:
198
+ """Set key to value. Optionally set TTL in seconds."""
199
+ args = [key, str(value)]
200
+ if ttl is not None:
201
+ args += ["EX", str(int(ttl))]
202
+ return await self.execute_command("SET", *args)
203
+
204
+ async def get(self, key: str) -> Optional[str]:
205
+ """Get value for key. Returns None if key doesn't exist."""
206
+ result = await self.execute_command("GET", key)
207
+ return None if result == "NULL" else result
208
+
209
+ async def delete(self, *keys: str) -> str:
210
+ """Delete one or more keys."""
211
+ return await self.execute_command("DEL", *keys)
212
+
213
+ async def exists(self, key: str) -> bool:
214
+ """Return True if the key exists."""
215
+ return bool(await self.execute_command("EXISTS", key))
216
+
217
+ async def expire(self, key: str, seconds: float) -> int:
218
+ """Set TTL on a key. Returns 1 if set, 0 if key not found."""
219
+ return await self.execute_command("EXPIRE", key, str(seconds))
220
+
221
+ async def ttl(self, key: str) -> int:
222
+ """Get remaining TTL in seconds. -1 = no TTL. -2 = key not found."""
223
+ return await self.execute_command("TTL", key)
224
+
225
+ async def mset(self, mapping: dict) -> str:
226
+ """Set multiple keys at once."""
227
+ args = []
228
+ for k, v in mapping.items():
229
+ args += [k, str(v)]
230
+ return await self.execute_command("MSET", *args)
231
+
232
+ async def mget(self, *keys: str) -> List[Optional[str]]:
233
+ """Get multiple keys at once. Returns list with None for missing keys."""
234
+ results = await self.execute_command("MGET", *keys)
235
+ if isinstance(results, list):
236
+ return [None if v == "NULL" else v for v in results]
237
+ return results
238
+
239
+ async def keys(self, pattern: str = "*") -> List[str]:
240
+ """Return all keys matching a glob pattern."""
241
+ result = await self.execute_command("KEYS", pattern)
242
+ return result if isinstance(result, list) else []
243
+
244
+ async def dbsize(self) -> int:
245
+ """Return total number of keys."""
246
+ return int(await self.execute_command("DBSIZE"))
247
+
248
+ # ------------------------------------------------------------------
249
+ # Hash operations
250
+ # ------------------------------------------------------------------
251
+
252
+ async def hmset(self, key: str, mapping: dict) -> str:
253
+ """Set multiple fields in a hash."""
254
+ args = [key]
255
+ for k, v in mapping.items():
256
+ args.extend([k, str(v)])
257
+ return await self.execute_command("HMSET", *args)
258
+
259
+ async def hgetall(self, key: str) -> List[str]:
260
+ """Get all fields and values in a hash as a flat list."""
261
+ result = await self.execute_command("HGETALL", key)
262
+ if isinstance(result, dict):
263
+ flat = []
264
+ for k, v in result.items():
265
+ flat.extend([k, str(v)])
266
+ return flat
267
+ return result if isinstance(result, list) else []
268
+
269
+ # ------------------------------------------------------------------
270
+ # Numeric operations
271
+ # ------------------------------------------------------------------
272
+
273
+ async def incr(self, key: str) -> int:
274
+ """Increment integer value of key by 1."""
275
+ return int(await self.execute_command("INCR", key))
276
+
277
+ async def incrby(self, key: str, amount: int) -> int:
278
+ """Increment integer value of key by amount."""
279
+ return int(await self.execute_command("INCRBY", key, str(amount)))
280
+
281
+ async def decr(self, key: str) -> int:
282
+ """Decrement integer value of key by 1."""
283
+ return int(await self.execute_command("DECR", key))
284
+
285
+ async def decrby(self, key: str, amount: int) -> int:
286
+ """Decrement integer value of key by amount."""
287
+ return int(await self.execute_command("DECRBY", key, str(amount)))
288
+
289
+ # ------------------------------------------------------------------
290
+ # Pub/Sub
291
+ # ------------------------------------------------------------------
292
+
293
+ async def publish(self, channel: str, message: str) -> str:
294
+ """Publish a message to a channel."""
295
+ return await self.execute_command("PUBLISH", channel, message)
296
+
297
+ # ------------------------------------------------------------------
298
+ # Admin
299
+ # ------------------------------------------------------------------
300
+
301
+ async def ping(self) -> str:
302
+ """Ping the server. Returns 'PONG' if alive."""
303
+ return await self.execute_command("PING")
304
+
305
+ async def flush(self) -> str:
306
+ """Delete all keys in the database."""
307
+ return await self.execute_command("FLUSHDB")
308
+
309
+ async def info(self) -> str:
310
+ """Get server info string."""
311
+ return await self.execute_command("INFO")
312
+
313
+ # ------------------------------------------------------------------
314
+ # Context manager support
315
+ # ------------------------------------------------------------------
316
+
317
+ async def close(self):
318
+ if self._client:
319
+ await self._client.aclose()
320
+
321
+ async def __aenter__(self):
322
+ return self
323
+
324
+ async def __aexit__(self, *args):
325
+ await self.close()
pulsedb/client.py ADDED
@@ -0,0 +1,162 @@
1
+ # Copyright (c) 2026 G Kavinrajan. All rights reserved.
2
+ # Licensed under the Business Source License 1.1
3
+
4
+ # sdk/pulsedb/client.py
5
+ """
6
+ Synchronous PulseDB client (wraps the async client).
7
+
8
+ Usage:
9
+ from pulsedb import PulseDB
10
+
11
+ db = PulseDB(host="localhost", port=6379)
12
+ db.set("key", "value", ttl=3600)
13
+ val = db.get("key")
14
+
15
+ # Vector Engine Usage
16
+ db.vectors.upsert("doc1", [0.1, 0.2, 0.3], metadata={"author": "John"})
17
+ results = db.vectors.search([0.1, 0.2, 0.3], top_k=5, filter={"author": "John"})
18
+ """
19
+
20
+ import asyncio
21
+ from typing import Optional, List, Any, Dict
22
+
23
+ from .async_client import AsyncPulseDB
24
+
25
+
26
+ import threading
27
+
28
+ _loop = asyncio.new_event_loop()
29
+ _thread = threading.Thread(target=_loop.run_forever, daemon=True)
30
+ _thread.start()
31
+
32
+ def _run(coro):
33
+ """Run a coroutine in the background event loop (sync bridge)."""
34
+ future = asyncio.run_coroutine_threadsafe(coro, _loop)
35
+ return future.result()
36
+
37
+
38
+ class VectorNamespace:
39
+ """
40
+ Provides a beautiful, Pythonic API for the PulseDB AI Memory Engine.
41
+ Transparently packs Python floats into C++ binary bytes and serializes metadata.
42
+ """
43
+ def __init__(self, async_namespace):
44
+ self._async = async_namespace
45
+
46
+ def upsert(self, id: str, vector: List[float], metadata: Optional[Dict[str, Any]] = None) -> str:
47
+ """Insert or update a vector embedding with optional metadata."""
48
+ return _run(self._async.upsert(id, vector, metadata))
49
+
50
+ def upsert_batch(self, items: List[Dict[str, Any]]) -> int:
51
+ """Bulk-insert multiple vectors in a single network round-trip."""
52
+ return _run(self._async.upsert_batch(items))
53
+
54
+ def get(self, id: str) -> Optional[Dict[str, Any]]:
55
+ """Retrieve a vector and its metadata by ID."""
56
+ return _run(self._async.get(id))
57
+
58
+ def search(self, query: List[float], top_k: int = 5, filter: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
59
+ """Perform a blazing fast similarity search, optionally pre-filtering by metadata."""
60
+ return _run(self._async.search(query, top_k, filter))
61
+
62
+ def count(self) -> int:
63
+ """Get the total number of vectors in the AI Memory Engine."""
64
+ return _run(self._async.count())
65
+
66
+ def delete(self, id: str) -> str:
67
+ """Delete a vector from the AI Memory Engine."""
68
+ return _run(self._async.delete(id))
69
+
70
+
71
+ class PulseDB:
72
+ """
73
+ Synchronous PulseDB client.
74
+
75
+ Wraps AsyncPulseDB to provide a blocking API for use in sync codebases,
76
+ scripts, Django views, Flask routes, etc.
77
+
78
+ For async codebases (FastAPI, aiohttp), use AsyncPulseDB directly.
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ host: str = "localhost",
84
+ port: int = 6379,
85
+ timeout: float = 10.0,
86
+ ):
87
+ self._async = AsyncPulseDB(
88
+ host=host, port=port, timeout=timeout
89
+ )
90
+ self.vectors = VectorNamespace(self._async.vectors)
91
+
92
+ def execute_command(self, command: str, *args) -> Any:
93
+ return _run(self._async.execute_command(command, *args))
94
+
95
+ def set(self, key: str, value: Any, ttl: Optional[float] = None) -> str:
96
+ return _run(self._async.set(key, value, ttl))
97
+
98
+ def get(self, key: str) -> Optional[str]:
99
+ return _run(self._async.get(key))
100
+
101
+ def delete(self, *keys: str) -> str:
102
+ return _run(self._async.delete(*keys))
103
+
104
+ def exists(self, key: str) -> bool:
105
+ return _run(self._async.exists(key))
106
+
107
+ def expire(self, key: str, seconds: float) -> int:
108
+ return _run(self._async.expire(key, seconds))
109
+
110
+ def ttl(self, key: str) -> int:
111
+ return _run(self._async.ttl(key))
112
+
113
+ def mset(self, mapping: dict) -> str:
114
+ return _run(self._async.mset(mapping))
115
+
116
+ def mget(self, *keys: str) -> List[Optional[str]]:
117
+ return _run(self._async.mget(*keys))
118
+
119
+ def keys(self, pattern: str = "*") -> List[str]:
120
+ return _run(self._async.keys(pattern))
121
+
122
+ def dbsize(self) -> int:
123
+ return _run(self._async.dbsize())
124
+
125
+ def hmset(self, key: str, mapping: dict) -> str:
126
+ return _run(self._async.hmset(key, mapping))
127
+
128
+ def hgetall(self, key: str) -> List[str]:
129
+ return _run(self._async.hgetall(key))
130
+
131
+ def incr(self, key: str) -> int:
132
+ return _run(self._async.incr(key))
133
+
134
+ def incrby(self, key: str, amount: int) -> int:
135
+ return _run(self._async.incrby(key, amount))
136
+
137
+ def decr(self, key: str) -> int:
138
+ return _run(self._async.decr(key))
139
+
140
+ def decrby(self, key: str, amount: int) -> int:
141
+ return _run(self._async.decrby(key, amount))
142
+
143
+ def publish(self, channel: str, message: str) -> str:
144
+ return _run(self._async.publish(channel, message))
145
+
146
+ def ping(self) -> str:
147
+ return _run(self._async.ping())
148
+
149
+ def flush(self) -> str:
150
+ return _run(self._async.flush())
151
+
152
+ def info(self) -> str:
153
+ return _run(self._async.info())
154
+
155
+ def close(self):
156
+ _run(self._async.close())
157
+
158
+ def __enter__(self):
159
+ return self
160
+
161
+ def __exit__(self, *args):
162
+ self.close()
pulsedb/exceptions.py ADDED
@@ -0,0 +1,23 @@
1
+ # Copyright (c) 2026 G Kavinrajan. All rights reserved.
2
+ # Licensed under the Business Source License 1.1
3
+
4
+ # sdk/pulsedb/exceptions.py
5
+
6
+ class PulseDBError(Exception):
7
+ """Base exception for all PulseDB SDK errors."""
8
+
9
+
10
+ class ConnectionError(PulseDBError):
11
+ """Could not connect to the PulseDB server."""
12
+
13
+
14
+ class AuthenticationError(PulseDBError):
15
+ """API key was rejected."""
16
+
17
+
18
+ class CommandError(PulseDBError):
19
+ """The server returned an ERROR response."""
20
+
21
+
22
+ class TimeoutError(PulseDBError):
23
+ """A command or connection timed out."""
@@ -0,0 +1,208 @@
1
+ Metadata-Version: 2.4
2
+ Name: pulsedb
3
+ Version: 1.0.0
4
+ Summary: High-performance Python Vector Database & Memory Engine with RESP2 support.
5
+ Author-email: G Kavinrajan <gkavinrajan@example.com>
6
+ Project-URL: Homepage, https://github.com/gkavinrajanCodes/pulseDB
7
+ Project-URL: Bug Tracker, https://github.com/gkavinrajanCodes/pulseDB/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Topic :: Database
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: redis>=5.0.0
16
+ Requires-Dist: numpy>=1.20.0
17
+ Requires-Dist: hnswlib>=0.8.0
18
+ Dynamic: license-file
19
+
20
+ <div align="center">
21
+
22
+ # ⚡ PulseDB
23
+
24
+ **An enterprise-grade, in-memory database with a native AI Vector Engine.**
25
+
26
+ Built for developers who need Redis-compatible storage *and* lightning-fast semantic search — without running two separate systems.
27
+
28
+ [![CI](https://github.com/gkavinrajanCodes/pulseDB/actions/workflows/python-app.yml/badge.svg)](https://github.com/gkavinrajanCodes/pulseDB/actions)
29
+ [![Python](https://img.shields.io/badge/python-3.10%20|%203.11%20|%203.12-blue)](https://pypi.org/project/pulsedb/)
30
+ [![License: BSL 1.1](https://img.shields.io/badge/License-BSL%201.1-blue.svg)](LICENSE)
31
+
32
+ </div>
33
+
34
+ ---
35
+
36
+ ## What is PulseDB?
37
+
38
+ PulseDB is a high-performance, open-source database that combines:
39
+
40
+ - **A Redis-compatible KV store** — Strings, Lists, Hashes with TTL, LRU eviction, and RESP2 wire protocol
41
+ - **An AI Memory Engine** — HNSW-based vector search with native C++ pre-filtering callbacks
42
+ - **A Python SDK** — Ergonomic `db.vectors.upsert()` / `db.vectors.search()` API
43
+ - **A LangChain Integration** — Drop-in `PulseDBVectorStore` for RAG pipelines with metadata filtering
44
+
45
+ > One server, one protocol, one SDK. No Pinecone. No Weaviate. No Redis Stack.
46
+
47
+ ---
48
+
49
+ ## Features
50
+
51
+ | Category | Capability |
52
+ |---|---|
53
+ | **KV Store** | `SET`, `GET`, `DEL`, `EXPIRE`, `TTL`, `MSET`, `MGET`, `INCR`, `APPEND` |
54
+ | **Data Types** | Strings · Lists (`LPUSH/RPOP/LRANGE`) · Hashes (`HSET/HGET/HGETALL`) |
55
+ | **Vector Engine** | HNSW cosine similarity, O(log N) search, dynamic resizing |
56
+ | **Hybrid Search** | Native C++ pre-filter callbacks — filter by metadata *during* graph traversal |
57
+ | **Persistence** | Write-Ahead Log (WAL) + JSON snapshots + HNSW binary graph snapshots |
58
+ | **Protocol** | RESP2 TCP (port 6379) — works with `redis-cli`, `redis-py`, `ioredis` |
59
+ | **Cluster** | Consistent hashing, multi-node routing |
60
+ | **Auth** | API Key (HTTP) + `REQUIREPASS` (TCP) + optional TLS/SSL |
61
+ | **Observability** | Prometheus `/metrics` endpoint, structured `/health` and `/ready` |
62
+ | **LangChain** | `PulseDBVectorStore` with `similarity_search(filter={...})` |
63
+
64
+ ---
65
+
66
+ ## Quickstart
67
+
68
+ ### 1. Run the Server (Docker)
69
+
70
+ ```bash
71
+ docker run -d \
72
+ -p 6379:6379 \
73
+ -p 8000:8000 \
74
+ -v pulsedb_data:/app/data \
75
+ --name pulsedb \
76
+ ghcr.io/gkavinrajancodes/pulsedb:latest
77
+ ```
78
+
79
+ Or use Docker Compose for a 3-node cluster:
80
+
81
+ ```bash
82
+ git clone https://github.com/gkavinrajanCodes/pulseDB.git
83
+ cd pulseDB && docker-compose up --build
84
+ ```
85
+
86
+ ### 2. Install the SDK
87
+
88
+ ```bash
89
+ pip install pulsedb
90
+ ```
91
+
92
+ ### 3. Use It
93
+
94
+ ```python
95
+ from pulsedb import PulseDB
96
+
97
+ db = PulseDB(host="localhost", port=6379)
98
+
99
+ # Standard KV Store
100
+ db.set("session:abc", "user_data", ttl=3600)
101
+ print(db.get("session:abc")) # "user_data"
102
+
103
+ # AI Memory Engine — insert vectors with metadata
104
+ db.vectors.upsert("article:1", [0.12, 0.98, 0.34], metadata={"category": "sports", "year": 2024})
105
+ db.vectors.upsert("article:2", [0.91, 0.11, 0.67], metadata={"category": "tech", "year": 2023})
106
+
107
+ # Semantic similarity search — optionally filter by metadata
108
+ results = db.vectors.search([0.10, 0.95, 0.40], top_k=5, filter={"category": "sports"})
109
+ # → [{"id": "article:1", "score": 0.997}]
110
+ ```
111
+
112
+ ---
113
+
114
+ ## LangChain Integration
115
+
116
+ PulseDB works natively as a LangChain VectorStore, giving your RAG pipeline blazing fast retrieval with hybrid metadata filtering.
117
+
118
+ ```python
119
+ from langchain_openai import OpenAIEmbeddings
120
+ from sdk.langchain_pulsedb.vectorstore import PulseDBVectorStore
121
+
122
+ store = PulseDBVectorStore(
123
+ embedding=OpenAIEmbeddings(),
124
+ host="localhost",
125
+ port=6379,
126
+ )
127
+
128
+ # Ingest documents — metadata is automatically stored for hybrid filtering
129
+ store.add_texts(
130
+ texts=["PulseDB is fast", "Redis is popular", "Pinecone is expensive"],
131
+ metadatas=[{"source": "blog"}, {"source": "wiki"}, {"source": "review"}]
132
+ )
133
+
134
+ # Hybrid search — find similar docs but only from the blog source
135
+ docs = store.similarity_search("fast database", k=2, filter={"source": "blog"})
136
+ ```
137
+
138
+ ---
139
+
140
+ ## How the AI Memory Engine Works
141
+
142
+ Standard vector databases do **post-filtering**: search all vectors, get K results, then throw away the ones that don't match the filter. This degrades accuracy.
143
+
144
+ PulseDB does **true pre-filtering** using native `hnswlib` C++ filter callbacks. The filter function is evaluated *inside* the graph traversal — so the C++ engine skips disqualified nodes entirely before scoring them.
145
+
146
+ ```
147
+ Query Vector → HNSW Graph Traversal → [Filter Callback runs on every node visited]
148
+ ↓ Pass → included in result set
149
+ ↓ Fail → skipped immediately
150
+ Top-K results returned
151
+ ```
152
+
153
+ This means your effective `top_k` is always accurate, even with highly restrictive filters.
154
+
155
+ ---
156
+
157
+ ## Architecture
158
+
159
+ ```mermaid
160
+ graph TD
161
+ Client["Client (SDK / redis-cli)"] -->|RESP2 Binary Protocol| TCP["asyncio TCP Server :6379"]
162
+ Client -->|HTTP REST| HTTP["FastAPI Gateway :8000"]
163
+ TCP --> Router["Command Router"]
164
+ HTTP --> Router
165
+ Router --> KV["16-Shard KV Store (LRU + TTL)"]
166
+ Router --> VE["AI Vector Engine (hnswlib HNSW)"]
167
+ Router --> DT["Data Types (Lists, Hashes)"]
168
+ Router --> PS["Pub/Sub Engine"]
169
+ KV --> WAL["Write-Ahead Log"]
170
+ VE --> Snap["HNSW Binary Snapshot"]
171
+ WAL --> Snap
172
+ ```
173
+
174
+ ---
175
+
176
+ ## Run Locally (From Source)
177
+
178
+ ```bash
179
+ # 1. Clone and install
180
+ git clone https://github.com/gkavinrajanCodes/pulseDB.git
181
+ cd pulseDB
182
+ python3.10 -m venv workenv && source workenv/bin/activate
183
+ pip install -r requirements.txt
184
+
185
+ # 2. Start the server
186
+ NODE_ID=node1 CLUSTER_NODES=node1 uvicorn server.main:app --host 0.0.0.0 --port 8000
187
+
188
+ # 3. Install the SDK (in another terminal)
189
+ pip install -e sdk/
190
+ ```
191
+
192
+ ---
193
+
194
+ ## Contributing
195
+
196
+ 1. Fork the repository
197
+ 2. Create a feature branch: `git checkout -b feature/sorted-sets`
198
+ 3. Commit your changes: `git commit -m "feat: add ZADD/ZRANGE sorted set commands"`
199
+ 4. Push: `git push origin feature/sorted-sets`
200
+ 5. Open a Pull Request
201
+
202
+ All PRs are validated against our CI matrix (Python 3.10, 3.11, 3.12 with flake8, mypy, and pytest).
203
+
204
+ ---
205
+
206
+ ## License
207
+
208
+ Distributed under the Business Source License (BSL 1.1). See [LICENSE](LICENSE) for details.
@@ -0,0 +1,11 @@
1
+ langchain_pulsedb/__init__.py,sha256=dfVSuHAPcGDaLRQQmOSzsAUzvGehBy1hzlNiG3OQp6c,184
2
+ langchain_pulsedb/vectorstore.py,sha256=nH9aRInEPOo9UWiu8m3_VkeSluZNXZUPKmZUH0CwHBw,3710
3
+ pulsedb/__init__.py,sha256=rpPuiFzEz7LzNXmcAeranlTMvUqKieNz6tMa_zPBBKo,1280
4
+ pulsedb/async_client.py,sha256=WSH8aWss_LOrci_McIMDNueqh_zDCMl_rIKFD5Zxs2Q,12479
5
+ pulsedb/client.py,sha256=3lY_185OILcnN0rm95TLVPcldVzL7-ipqYyhWHUldTo,5034
6
+ pulsedb/exceptions.py,sha256=M18aR3Ss0noBPrUbvXGSe-WKynIJjfhtJhnD427B_o8,550
7
+ pulsedb-1.0.0.dist-info/licenses/LICENSE,sha256=O8-iHTflN7beXDad1oQ47UXbeWiAzAIlZlsLha6FcJg,1854
8
+ pulsedb-1.0.0.dist-info/METADATA,sha256=WcCK3DVJfWJpYI55a1r8k01HkN7wQFR_Ie1CWeSWXq4,6890
9
+ pulsedb-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
10
+ pulsedb-1.0.0.dist-info/top_level.txt,sha256=q3OIfkyWuqHKd8siaUjPT7pU_cBqN1G5cijo2wefkG4,26
11
+ pulsedb-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,22 @@
1
+ Business Source License 1.1
2
+
3
+ License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
4
+ "Business Source License" is a trademark of MariaDB Corporation Ab.
5
+
6
+ Parameters
7
+ Licensor: G Kavinrajan
8
+ Licensed Work: PulseDB
9
+ Additional Use Grant: You may make use of the Licensed Work for non-production purposes, including testing, development, and academic research. You may not use the Licensed Work for any production purpose without a commercial agreement with the Licensor.
10
+ Change Date: 2030-06-28
11
+ Change License: Apache License, Version 2.0
12
+
13
+ Terms
14
+
15
+ The Licensor hereby grants you the right to copy, modify, create derivative works, redistribute, and make non-production use of the Licensed Work. The Licensed Work is provided "AS IS", without warranties or conditions of any kind.
16
+
17
+ You may also make production use of the Licensed Work, provided such use does not exceed the Additional Use Grant (if any). If your use exceeds the Additional Use Grant, you must acquire a commercial license from the Licensor.
18
+
19
+ Effective on the Change Date, or the fourth anniversary of the first publicly available distribution of a specific version of the Licensed Work under this License, whichever comes first, the Licensor hereby grants you rights under the terms of the Change License, and such rights shall replace all rights and restrictions granted under this License.
20
+
21
+ Disclaimer of Warranty
22
+ THE LICENSED WORK IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE LICENSOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE LICENSED WORK OR THE USE OR OTHER DEALINGS IN THE LICENSED WORK.
@@ -0,0 +1,2 @@
1
+ langchain_pulsedb
2
+ pulsedb