PyPI - pulsedb - Versions diffs - 1.0.0__py3-none-any.whl - Mend

pulsedb 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

langchain_pulsedb/__init__.py +6 -0
langchain_pulsedb/vectorstore.py +110 -0
pulsedb/__init__.py +50 -0
pulsedb/async_client.py +325 -0
pulsedb/client.py +162 -0
pulsedb/exceptions.py +23 -0
pulsedb-1.0.0.dist-info/METADATA +208 -0
pulsedb-1.0.0.dist-info/RECORD +11 -0
pulsedb-1.0.0.dist-info/WHEEL +5 -0
pulsedb-1.0.0.dist-info/licenses/LICENSE +22 -0
pulsedb-1.0.0.dist-info/top_level.txt +2 -0

langchain_pulsedb/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+# Copyright (c) 2026 G Kavinrajan. All rights reserved.
+# Licensed under the Business Source License 1.1
+from .vectorstore import PulseDBVectorStore
+__all__ = ["PulseDBVectorStore"]

langchain_pulsedb/vectorstore.py ADDED Viewed

@@ -0,0 +1,110 @@
+# Copyright (c) 2026 G Kavinrajan. All rights reserved.
+# Licensed under the Business Source License 1.1
+import uuid
+from typing import Any, Iterable, List, Optional, Tuple, Dict
+from langchain_core.documents import Document
+from langchain_core.embeddings import Embeddings
+from langchain_core.vectorstores import VectorStore
+from pulsedb import PulseDB
+class PulseDBVectorStore(VectorStore):
+    """PulseDB VectorStore wrapper for LangChain."""
+    def __init__(
+        self,
+        embedding: Embeddings,
+        client: Optional[PulseDB] = None,
+        host: str = "localhost",
+        port: int = 6379,
+        collection_name: str = "langchain",
+    ):
+        self._embedding = embedding
+        self._client = client or PulseDB(host=host, port=port)
+        self._collection = collection_name
+    def _get_key(self, doc_id: str) -> str:
+        return f"{self._collection}:{doc_id}"
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Run more texts through the embeddings and add to the vectorstore."""
+        texts = list(texts)
+        if not texts:
+            return []
+        embeddings = self._embedding.embed_documents(texts)
+        if ids is None:
+            ids = [str(uuid.uuid4()) for _ in texts]
+        if metadatas is None:
+            metadatas = [{} for _ in texts]
+        batch = []
+        for text, metadata, doc_id, embedding in zip(texts, metadatas, ids, embeddings):
+            key = self._get_key(doc_id)
+            doc_metadata = metadata.copy()
+            doc_metadata["_text"] = text
+            batch.append({"id": key, "vector": embedding, "metadata": doc_metadata})
+        self._client.vectors.upsert_batch(batch)
+        return ids
+    def similarity_search(
+        self, query: str, k: int = 4, filter: Optional[Dict[str, Any]] = None, **kwargs: Any
+    ) -> List[Document]:
+        """Return docs most similar to query."""
+        results = self.similarity_search_with_score(query, k=k, filter=filter, **kwargs)
+        return [doc for doc, _ in results]
+    def similarity_search_with_score(
+        self, query: str, k: int = 4, filter: Optional[Dict[str, Any]] = None, **kwargs: Any
+    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query, along with scores."""
+        embedding = self._embedding.embed_query(query)
+        # Search the vector index using the native TCP Binary Protocol
+        raw_results = self._client.vectors.search(embedding, top_k=k, filter=filter)
+        docs_with_scores = []
+        for res in raw_results:
+            key = res["id"]
+            score = res["score"]
+            # Only process keys in our collection
+            if not key.startswith(f"{self._collection}:"):
+                continue
+            # Fetch the metadata dictionary
+            doc_data = self._client.vectors.get(key)
+            if not doc_data:
+                continue
+            metadata = doc_data.get("metadata", {})
+            text = metadata.pop("_text", "")
+            doc = Document(page_content=text, metadata=metadata)
+            docs_with_scores.append((doc, score))
+        return docs_with_scores
+    @classmethod
+    def from_texts(
+        cls,
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        **kwargs: Any,
+    ) -> "PulseDBVectorStore":
+        """Return VectorStore initialized from texts and embeddings."""
+        store = cls(embedding, **kwargs)
+        store.add_texts(texts, metadatas)
+        return store

pulsedb/__init__.py ADDED Viewed

@@ -0,0 +1,50 @@
+# Copyright (c) 2026 G Kavinrajan. All rights reserved.
+# Licensed under the Business Source License 1.1
+# sdk/pulsedb/__init__.py
+"""
+PulseDB Python SDK
+Connects to PulseDB over the high-performance TCP Binary Protocol (port 6379).
+Usage (sync):
+    from pulsedb import PulseDB
+    db = PulseDB(host="localhost", port=6379)
+    db.set("user:123", "alice", ttl=3600)
+    print(db.get("user:123"))  # "alice"
+    # AI Memory Engine (Vector Search)
+    db.vectors.upsert("doc1", [0.1, 0.2, 0.3], metadata={"category": "news"})
+    results = db.vectors.search([0.1, 0.2, 0.3], top_k=5, filter={"category": "news"})
+Usage (async):
+    from pulsedb import AsyncPulseDB
+    async def main():
+        async with AsyncPulseDB(host="localhost", port=6379) as db:
+            await db.set("counter", 0)
+            await db.incr("counter")
+            await db.vectors.upsert("doc1", [0.1, 0.2, 0.3])
+"""
+from .client import PulseDB
+from .async_client import AsyncPulseDB
+from .exceptions import (
+    PulseDBError,
+    ConnectionError,
+    AuthenticationError,
+    CommandError,
+    TimeoutError,
+)
+__version__ = "1.1.0"
+__all__ = [
+    "PulseDB",
+    "AsyncPulseDB",
+    "PulseDBError",
+    "ConnectionError",
+    "AuthenticationError",
+    "CommandError",
+    "TimeoutError",
+]

pulsedb/async_client.py ADDED Viewed

@@ -0,0 +1,325 @@
+# Copyright (c) 2026 G Kavinrajan. All rights reserved.
+# Licensed under the Business Source License 1.1
+# sdk/pulsedb/async_client.py
+"""
+Async PulseDB client using the ultra-fast Binary Protocol (RESP2 over TCP).
+Usage:
+    import asyncio
+    from pulsedb import AsyncPulseDB
+    async def main():
+        db = AsyncPulseDB(host="localhost", port=6379)
+        await db.set("key", "value", ttl=3600)
+        val = await db.get("key")
+        # Vector Engine Usage
+        await db.vectors.upsert("doc1", [0.1, 0.2, 0.3], metadata={"author": "John"})
+        results = await db.vectors.search([0.1, 0.2, 0.3], top_k=5, filter={"author": "John"})
+    asyncio.run(main())
+"""
+import json
+import asyncio
+from typing import Optional, List, Any, Dict
+import redis
+import redis.asyncio as redis_async
+import numpy as np
+from .exceptions import CommandError, ConnectionError, TimeoutError
+class AsyncVectorNamespace:
+    """
+    Provides a beautiful, Pythonic API for the PulseDB AI Memory Engine.
+    Transparently packs Python floats into C++ binary bytes and serializes metadata.
+    """
+    def __init__(self, db: "AsyncPulseDB"):
+        self.db = db
+    async def upsert(self, id: str, vector: List[float], metadata: Optional[Dict[str, Any]] = None) -> str:
+        """Insert or update a vector embedding with optional metadata."""
+        blob = np.array(vector, dtype=np.float32).tobytes()
+        args: List[Any] = [id, blob]
+        if metadata is not None:
+            args.extend(["METADATA", json.dumps(metadata)])
+        try:
+            return await self.db.execute_command("VECTOR.BSET", *args)
+        except Exception as e:
+            if "dimension mismatch" in str(e).lower():
+                raise CommandError(f"Vector dimension mismatch: {e}")
+            raise CommandError(f"Failed to upsert vector: {e}")
+    async def upsert_batch(self, items: List[Dict[str, Any]]) -> int:
+        """
+        Bulk-insert or update multiple vectors in a single network round-trip.
+        Args:
+            items: List of dicts, each with:
+                - ``id`` (str): unique key
+                - ``vector`` (List[float]): embedding values
+                - ``metadata`` (dict, optional): metadata for hybrid filtering
+        Returns:
+            Number of vectors successfully inserted.
+        Example::
+            await db.vectors.upsert_batch([
+                {"id": "doc1", "vector": [0.1, 0.2], "metadata": {"cat": "news"}},
+                {"id": "doc2", "vector": [0.9, 0.8], "metadata": {"cat": "sports"}},
+            ])
+        """
+        payload = []
+        for item in items:
+            blob = np.array(item["vector"], dtype=np.float32).tobytes()
+            entry: Dict[str, Any] = {"id": item["id"], "blob": blob.hex()}
+            if "metadata" in item and item["metadata"] is not None:
+                entry["metadata"] = item["metadata"]
+            payload.append(entry)
+        try:
+            result = await self.db.execute_command("VECTOR.BSET_BATCH", json.dumps(payload))
+            # Response: "OK:N" or "PARTIAL: N inserted, errors: ..."
+            if isinstance(result, str) and result.startswith("OK:"):
+                return int(result[3:])
+            raise CommandError(f"Batch upsert error: {result}")
+        except CommandError:
+            raise
+        except Exception as e:
+            raise CommandError(f"Failed to batch upsert: {e}")
+    async def get(self, id: str) -> Optional[Dict[str, Any]]:
+        """Retrieve a vector and its metadata by ID."""
+        result = await self.db.execute_command("VECTOR.GET", id)
+        if result == "NULL" or result is None:
+            return None
+        if isinstance(result, (bytes, bytearray)):
+            result = result.decode("utf-8")
+        if isinstance(result, str):
+            try:
+                return json.loads(result)
+            except json.JSONDecodeError:
+                return None # fallback
+        return result if isinstance(result, dict) else None
+    async def search(self, query: List[float], top_k: int = 5, filter: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
+        """Perform a blazing fast similarity search, optionally pre-filtering by metadata."""
+        blob = np.array(query, dtype=np.float32).tobytes()
+        args: List[Any] = [blob, "TOP_K", top_k]
+        if filter is not None:
+            args.extend(["FILTER", json.dumps(filter)])
+        results = await self.db.execute_command("VECTOR.BSEARCH", *args)
+        if not results:
+            return []
+        parsed = []
+        # Results return as flat array: [key1, score1, key2, score2, ...]
+        for i in range(0, len(results), 2):
+            doc_id = results[i]
+            if isinstance(doc_id, (bytes, bytearray)):
+                doc_id = doc_id.decode("utf-8")
+            score = float(results[i+1])
+            parsed.append({"id": doc_id, "score": score})
+        return parsed
+    async def count(self) -> int:
+        """Get the total number of vectors in the AI Memory Engine."""
+        return int(await self.db.execute_command("VECTOR.COUNT"))
+    async def delete(self, id: str) -> str:
+        """Delete a vector from the AI Memory Engine."""
+        return await self.db.execute_command("VECTOR.DEL", id)
+class AsyncPulseDB:
+    """
+    Async TCP client for PulseDB Cloud.
+    All methods are coroutines. Use with await inside an async function.
+    For sync usage, see PulseDB (sync_client.py).
+    """
+    def __init__(
+        self,
+        host: str = "localhost",
+        port: int = 6379,
+        timeout: float = 10.0,
+    ):
+        self._host = host
+        self._port = port
+        self._timeout = timeout
+        self._client: Optional[redis_async.Redis] = None
+        # Initialize Vector AI Namespace
+        self.vectors = AsyncVectorNamespace(self)
+    def _get_client(self) -> redis_async.Redis:
+        if self._client is None:
+            # We use protocol=2 for backwards compatibility with our custom RESP2 router
+            self._client = redis_async.Redis(
+                host=self._host,
+                port=self._port,
+                socket_timeout=self._timeout,
+                decode_responses=True,
+                protocol=2
+            )
+        return self._client
+    async def execute_command(self, command: str, *args) -> Any:
+        """Execute a raw command."""
+        client = self._get_client()
+        try:
+            result = await client.execute_command(command, *args)
+            if isinstance(result, str) and result.startswith("ERROR:"):
+                raise CommandError(result[7:])
+            return result
+        except redis.exceptions.ConnectionError as e:
+            raise ConnectionError(f"Cannot connect to PulseDB at {self._host}:{self._port}: {e}") from e
+        except redis.exceptions.TimeoutError as e:
+            raise TimeoutError(f"Command '{command}' timed out") from e
+        except redis.exceptions.ResponseError as e:
+            err_msg = str(e)
+            if err_msg.startswith("ERROR:"):
+                raise CommandError(err_msg[7:])
+            raise CommandError(err_msg)
+    # ------------------------------------------------------------------
+    # Core KV operations
+    # ------------------------------------------------------------------
+    async def set(self, key: str, value: Any, ttl: Optional[float] = None) -> str:
+        """Set key to value. Optionally set TTL in seconds."""
+        args = [key, str(value)]
+        if ttl is not None:
+            args += ["EX", str(int(ttl))]
+        return await self.execute_command("SET", *args)
+    async def get(self, key: str) -> Optional[str]:
+        """Get value for key. Returns None if key doesn't exist."""
+        result = await self.execute_command("GET", key)
+        return None if result == "NULL" else result
+    async def delete(self, *keys: str) -> str:
+        """Delete one or more keys."""
+        return await self.execute_command("DEL", *keys)
+    async def exists(self, key: str) -> bool:
+        """Return True if the key exists."""
+        return bool(await self.execute_command("EXISTS", key))
+    async def expire(self, key: str, seconds: float) -> int:
+        """Set TTL on a key. Returns 1 if set, 0 if key not found."""
+        return await self.execute_command("EXPIRE", key, str(seconds))
+    async def ttl(self, key: str) -> int:
+        """Get remaining TTL in seconds. -1 = no TTL. -2 = key not found."""
+        return await self.execute_command("TTL", key)
+    async def mset(self, mapping: dict) -> str:
+        """Set multiple keys at once."""
+        args = []
+        for k, v in mapping.items():
+            args += [k, str(v)]
+        return await self.execute_command("MSET", *args)
+    async def mget(self, *keys: str) -> List[Optional[str]]:
+        """Get multiple keys at once. Returns list with None for missing keys."""
+        results = await self.execute_command("MGET", *keys)
+        if isinstance(results, list):
+            return [None if v == "NULL" else v for v in results]
+        return results
+    async def keys(self, pattern: str = "*") -> List[str]:
+        """Return all keys matching a glob pattern."""
+        result = await self.execute_command("KEYS", pattern)
+        return result if isinstance(result, list) else []
+    async def dbsize(self) -> int:
+        """Return total number of keys."""
+        return int(await self.execute_command("DBSIZE"))
+    # ------------------------------------------------------------------
+    # Hash operations
+    # ------------------------------------------------------------------
+    async def hmset(self, key: str, mapping: dict) -> str:
+        """Set multiple fields in a hash."""
+        args = [key]
+        for k, v in mapping.items():
+            args.extend([k, str(v)])
+        return await self.execute_command("HMSET", *args)
+    async def hgetall(self, key: str) -> List[str]:
+        """Get all fields and values in a hash as a flat list."""
+        result = await self.execute_command("HGETALL", key)
+        if isinstance(result, dict):
+            flat = []
+            for k, v in result.items():
+                flat.extend([k, str(v)])
+            return flat
+        return result if isinstance(result, list) else []
+    # ------------------------------------------------------------------
+    # Numeric operations
+    # ------------------------------------------------------------------
+    async def incr(self, key: str) -> int:
+        """Increment integer value of key by 1."""
+        return int(await self.execute_command("INCR", key))
+    async def incrby(self, key: str, amount: int) -> int:
+        """Increment integer value of key by amount."""
+        return int(await self.execute_command("INCRBY", key, str(amount)))
+    async def decr(self, key: str) -> int:
+        """Decrement integer value of key by 1."""
+        return int(await self.execute_command("DECR", key))
+    async def decrby(self, key: str, amount: int) -> int:
+        """Decrement integer value of key by amount."""
+        return int(await self.execute_command("DECRBY", key, str(amount)))
+    # ------------------------------------------------------------------
+    # Pub/Sub
+    # ------------------------------------------------------------------
+    async def publish(self, channel: str, message: str) -> str:
+        """Publish a message to a channel."""
+        return await self.execute_command("PUBLISH", channel, message)
+    # ------------------------------------------------------------------
+    # Admin
+    # ------------------------------------------------------------------
+    async def ping(self) -> str:
+        """Ping the server. Returns 'PONG' if alive."""
+        return await self.execute_command("PING")
+    async def flush(self) -> str:
+        """Delete all keys in the database."""
+        return await self.execute_command("FLUSHDB")
+    async def info(self) -> str:
+        """Get server info string."""
+        return await self.execute_command("INFO")
+    # ------------------------------------------------------------------
+    # Context manager support
+    # ------------------------------------------------------------------
+    async def close(self):
+        if self._client:
+            await self._client.aclose()
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, *args):
+        await self.close()

pulsedb/client.py ADDED Viewed

@@ -0,0 +1,162 @@
+# Copyright (c) 2026 G Kavinrajan. All rights reserved.
+# Licensed under the Business Source License 1.1
+# sdk/pulsedb/client.py
+"""
+Synchronous PulseDB client (wraps the async client).
+Usage:
+    from pulsedb import PulseDB
+    db = PulseDB(host="localhost", port=6379)
+    db.set("key", "value", ttl=3600)
+    val = db.get("key")
+    # Vector Engine Usage
+    db.vectors.upsert("doc1", [0.1, 0.2, 0.3], metadata={"author": "John"})
+    results = db.vectors.search([0.1, 0.2, 0.3], top_k=5, filter={"author": "John"})
+"""
+import asyncio
+from typing import Optional, List, Any, Dict
+from .async_client import AsyncPulseDB
+import threading
+_loop = asyncio.new_event_loop()
+_thread = threading.Thread(target=_loop.run_forever, daemon=True)
+_thread.start()
+def _run(coro):
+    """Run a coroutine in the background event loop (sync bridge)."""
+    future = asyncio.run_coroutine_threadsafe(coro, _loop)
+    return future.result()
+class VectorNamespace:
+    """
+    Provides a beautiful, Pythonic API for the PulseDB AI Memory Engine.
+    Transparently packs Python floats into C++ binary bytes and serializes metadata.
+    """
+    def __init__(self, async_namespace):
+        self._async = async_namespace
+    def upsert(self, id: str, vector: List[float], metadata: Optional[Dict[str, Any]] = None) -> str:
+        """Insert or update a vector embedding with optional metadata."""
+        return _run(self._async.upsert(id, vector, metadata))
+    def upsert_batch(self, items: List[Dict[str, Any]]) -> int:
+        """Bulk-insert multiple vectors in a single network round-trip."""
+        return _run(self._async.upsert_batch(items))
+    def get(self, id: str) -> Optional[Dict[str, Any]]:
+        """Retrieve a vector and its metadata by ID."""
+        return _run(self._async.get(id))
+    def search(self, query: List[float], top_k: int = 5, filter: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
+        """Perform a blazing fast similarity search, optionally pre-filtering by metadata."""
+        return _run(self._async.search(query, top_k, filter))
+    def count(self) -> int:
+        """Get the total number of vectors in the AI Memory Engine."""
+        return _run(self._async.count())
+    def delete(self, id: str) -> str:
+        """Delete a vector from the AI Memory Engine."""
+        return _run(self._async.delete(id))
+class PulseDB:
+    """
+    Synchronous PulseDB client.
+    Wraps AsyncPulseDB to provide a blocking API for use in sync codebases,
+    scripts, Django views, Flask routes, etc.
+    For async codebases (FastAPI, aiohttp), use AsyncPulseDB directly.
+    """
+    def __init__(
+        self,
+        host: str = "localhost",
+        port: int = 6379,
+        timeout: float = 10.0,
+    ):
+        self._async = AsyncPulseDB(
+            host=host, port=port, timeout=timeout
+        )
+        self.vectors = VectorNamespace(self._async.vectors)
+    def execute_command(self, command: str, *args) -> Any:
+        return _run(self._async.execute_command(command, *args))
+    def set(self, key: str, value: Any, ttl: Optional[float] = None) -> str:
+        return _run(self._async.set(key, value, ttl))
+    def get(self, key: str) -> Optional[str]:
+        return _run(self._async.get(key))
+    def delete(self, *keys: str) -> str:
+        return _run(self._async.delete(*keys))
+    def exists(self, key: str) -> bool:
+        return _run(self._async.exists(key))
+    def expire(self, key: str, seconds: float) -> int:
+        return _run(self._async.expire(key, seconds))
+    def ttl(self, key: str) -> int:
+        return _run(self._async.ttl(key))
+    def mset(self, mapping: dict) -> str:
+        return _run(self._async.mset(mapping))
+    def mget(self, *keys: str) -> List[Optional[str]]:
+        return _run(self._async.mget(*keys))
+    def keys(self, pattern: str = "*") -> List[str]:
+        return _run(self._async.keys(pattern))
+    def dbsize(self) -> int:
+        return _run(self._async.dbsize())
+    def hmset(self, key: str, mapping: dict) -> str:
+        return _run(self._async.hmset(key, mapping))
+    def hgetall(self, key: str) -> List[str]:
+        return _run(self._async.hgetall(key))
+    def incr(self, key: str) -> int:
+        return _run(self._async.incr(key))
+    def incrby(self, key: str, amount: int) -> int:
+        return _run(self._async.incrby(key, amount))
+    def decr(self, key: str) -> int:
+        return _run(self._async.decr(key))
+    def decrby(self, key: str, amount: int) -> int:
+        return _run(self._async.decrby(key, amount))
+    def publish(self, channel: str, message: str) -> str:
+        return _run(self._async.publish(channel, message))
+    def ping(self) -> str:
+        return _run(self._async.ping())
+    def flush(self) -> str:
+        return _run(self._async.flush())
+    def info(self) -> str:
+        return _run(self._async.info())
+    def close(self):
+        _run(self._async.close())
+    def __enter__(self):
+        return self
+    def __exit__(self, *args):
+        self.close()

pulsedb/exceptions.py ADDED Viewed

@@ -0,0 +1,23 @@
+# Copyright (c) 2026 G Kavinrajan. All rights reserved.
+# Licensed under the Business Source License 1.1
+# sdk/pulsedb/exceptions.py
+class PulseDBError(Exception):
+    """Base exception for all PulseDB SDK errors."""
+class ConnectionError(PulseDBError):
+    """Could not connect to the PulseDB server."""
+class AuthenticationError(PulseDBError):
+    """API key was rejected."""
+class CommandError(PulseDBError):
+    """The server returned an ERROR response."""
+class TimeoutError(PulseDBError):
+    """A command or connection timed out."""

pulsedb-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,208 @@
+Metadata-Version: 2.4
+Name: pulsedb
+Version: 1.0.0
+Summary: High-performance Python Vector Database & Memory Engine with RESP2 support.
+Author-email: G Kavinrajan <gkavinrajan@example.com>
+Project-URL: Homepage, https://github.com/gkavinrajanCodes/pulseDB
+Project-URL: Bug Tracker, https://github.com/gkavinrajanCodes/pulseDB/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Topic :: Database
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: redis>=5.0.0
+Requires-Dist: numpy>=1.20.0
+Requires-Dist: hnswlib>=0.8.0
+Dynamic: license-file
+<div align="center">
+# ⚡ PulseDB
+**An enterprise-grade, in-memory database with a native AI Vector Engine.**
+Built for developers who need Redis-compatible storage *and* lightning-fast semantic search — without running two separate systems.
+[![CI](https://github.com/gkavinrajanCodes/pulseDB/actions/workflows/python-app.yml/badge.svg)](https://github.com/gkavinrajanCodes/pulseDB/actions)
+[![Python](https://img.shields.io/badge/python-3.10%20|%203.11%20|%203.12-blue)](https://pypi.org/project/pulsedb/)
+[![License: BSL 1.1](https://img.shields.io/badge/License-BSL%201.1-blue.svg)](LICENSE)
+</div>
+---
+## What is PulseDB?
+PulseDB is a high-performance, open-source database that combines:
+- **A Redis-compatible KV store** — Strings, Lists, Hashes with TTL, LRU eviction, and RESP2 wire protocol
+- **An AI Memory Engine** — HNSW-based vector search with native C++ pre-filtering callbacks
+- **A Python SDK** — Ergonomic `db.vectors.upsert()` / `db.vectors.search()` API
+- **A LangChain Integration** — Drop-in `PulseDBVectorStore` for RAG pipelines with metadata filtering
+> One server, one protocol, one SDK. No Pinecone. No Weaviate. No Redis Stack.
+---
+## Features
+| Category | Capability |
+|---|---|
+| **KV Store** | `SET`, `GET`, `DEL`, `EXPIRE`, `TTL`, `MSET`, `MGET`, `INCR`, `APPEND` |
+| **Data Types** | Strings · Lists (`LPUSH/RPOP/LRANGE`) · Hashes (`HSET/HGET/HGETALL`) |
+| **Vector Engine** | HNSW cosine similarity, O(log N) search, dynamic resizing |
+| **Hybrid Search** | Native C++ pre-filter callbacks — filter by metadata *during* graph traversal |
+| **Persistence** | Write-Ahead Log (WAL) + JSON snapshots + HNSW binary graph snapshots |
+| **Protocol** | RESP2 TCP (port 6379) — works with `redis-cli`, `redis-py`, `ioredis` |
+| **Cluster** | Consistent hashing, multi-node routing |
+| **Auth** | API Key (HTTP) + `REQUIREPASS` (TCP) + optional TLS/SSL |
+| **Observability** | Prometheus `/metrics` endpoint, structured `/health` and `/ready` |
+| **LangChain** | `PulseDBVectorStore` with `similarity_search(filter={...})` |
+---
+## Quickstart
+### 1. Run the Server (Docker)
+```bash
+docker run -d \
+  -p 6379:6379 \
+  -p 8000:8000 \
+  -v pulsedb_data:/app/data \
+  --name pulsedb \
+  ghcr.io/gkavinrajancodes/pulsedb:latest
+```
+Or use Docker Compose for a 3-node cluster:
+```bash
+git clone https://github.com/gkavinrajanCodes/pulseDB.git
+cd pulseDB && docker-compose up --build
+```
+### 2. Install the SDK
+```bash
+pip install pulsedb
+```
+### 3. Use It
+```python
+from pulsedb import PulseDB
+db = PulseDB(host="localhost", port=6379)
+# Standard KV Store
+db.set("session:abc", "user_data", ttl=3600)
+print(db.get("session:abc"))  # "user_data"
+# AI Memory Engine — insert vectors with metadata
+db.vectors.upsert("article:1", [0.12, 0.98, 0.34], metadata={"category": "sports", "year": 2024})
+db.vectors.upsert("article:2", [0.91, 0.11, 0.67], metadata={"category": "tech", "year": 2023})
+# Semantic similarity search — optionally filter by metadata
+results = db.vectors.search([0.10, 0.95, 0.40], top_k=5, filter={"category": "sports"})
+# → [{"id": "article:1", "score": 0.997}]
+```
+---
+## LangChain Integration
+PulseDB works natively as a LangChain VectorStore, giving your RAG pipeline blazing fast retrieval with hybrid metadata filtering.
+```python
+from langchain_openai import OpenAIEmbeddings
+from sdk.langchain_pulsedb.vectorstore import PulseDBVectorStore
+store = PulseDBVectorStore(
+    embedding=OpenAIEmbeddings(),
+    host="localhost",
+    port=6379,
+)
+# Ingest documents — metadata is automatically stored for hybrid filtering
+store.add_texts(
+    texts=["PulseDB is fast", "Redis is popular", "Pinecone is expensive"],
+    metadatas=[{"source": "blog"}, {"source": "wiki"}, {"source": "review"}]
+)
+# Hybrid search — find similar docs but only from the blog source
+docs = store.similarity_search("fast database", k=2, filter={"source": "blog"})
+```
+---
+## How the AI Memory Engine Works
+Standard vector databases do **post-filtering**: search all vectors, get K results, then throw away the ones that don't match the filter. This degrades accuracy.
+PulseDB does **true pre-filtering** using native `hnswlib` C++ filter callbacks. The filter function is evaluated *inside* the graph traversal — so the C++ engine skips disqualified nodes entirely before scoring them.
+```
+Query Vector → HNSW Graph Traversal → [Filter Callback runs on every node visited]
+                                        ↓ Pass → included in result set
+                                        ↓ Fail → skipped immediately
+                                       Top-K results returned
+```
+This means your effective `top_k` is always accurate, even with highly restrictive filters.
+---
+## Architecture
+```mermaid
+graph TD
+    Client["Client (SDK / redis-cli)"] -->|RESP2 Binary Protocol| TCP["asyncio TCP Server :6379"]
+    Client -->|HTTP REST| HTTP["FastAPI Gateway :8000"]
+    TCP --> Router["Command Router"]
+    HTTP --> Router
+    Router --> KV["16-Shard KV Store (LRU + TTL)"]
+    Router --> VE["AI Vector Engine (hnswlib HNSW)"]
+    Router --> DT["Data Types (Lists, Hashes)"]
+    Router --> PS["Pub/Sub Engine"]
+    KV --> WAL["Write-Ahead Log"]
+    VE --> Snap["HNSW Binary Snapshot"]
+    WAL --> Snap
+```
+---
+## Run Locally (From Source)
+```bash
+# 1. Clone and install
+git clone https://github.com/gkavinrajanCodes/pulseDB.git
+cd pulseDB
+python3.10 -m venv workenv && source workenv/bin/activate
+pip install -r requirements.txt
+# 2. Start the server
+NODE_ID=node1 CLUSTER_NODES=node1 uvicorn server.main:app --host 0.0.0.0 --port 8000
+# 3. Install the SDK (in another terminal)
+pip install -e sdk/
+```
+---
+## Contributing
+1. Fork the repository
+2. Create a feature branch: `git checkout -b feature/sorted-sets`
+3. Commit your changes: `git commit -m "feat: add ZADD/ZRANGE sorted set commands"`
+4. Push: `git push origin feature/sorted-sets`
+5. Open a Pull Request
+All PRs are validated against our CI matrix (Python 3.10, 3.11, 3.12 with flake8, mypy, and pytest).
+---
+## License
+Distributed under the Business Source License (BSL 1.1). See [LICENSE](LICENSE) for details.

pulsedb-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+langchain_pulsedb/__init__.py,sha256=dfVSuHAPcGDaLRQQmOSzsAUzvGehBy1hzlNiG3OQp6c,184
+langchain_pulsedb/vectorstore.py,sha256=nH9aRInEPOo9UWiu8m3_VkeSluZNXZUPKmZUH0CwHBw,3710
+pulsedb/__init__.py,sha256=rpPuiFzEz7LzNXmcAeranlTMvUqKieNz6tMa_zPBBKo,1280
+pulsedb/async_client.py,sha256=WSH8aWss_LOrci_McIMDNueqh_zDCMl_rIKFD5Zxs2Q,12479
+pulsedb/client.py,sha256=3lY_185OILcnN0rm95TLVPcldVzL7-ipqYyhWHUldTo,5034
+pulsedb/exceptions.py,sha256=M18aR3Ss0noBPrUbvXGSe-WKynIJjfhtJhnD427B_o8,550
+pulsedb-1.0.0.dist-info/licenses/LICENSE,sha256=O8-iHTflN7beXDad1oQ47UXbeWiAzAIlZlsLha6FcJg,1854
+pulsedb-1.0.0.dist-info/METADATA,sha256=WcCK3DVJfWJpYI55a1r8k01HkN7wQFR_Ie1CWeSWXq4,6890
+pulsedb-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+pulsedb-1.0.0.dist-info/top_level.txt,sha256=q3OIfkyWuqHKd8siaUjPT7pU_cBqN1G5cijo2wefkG4,26
+pulsedb-1.0.0.dist-info/RECORD,,

pulsedb-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

pulsedb-1.0.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,22 @@
+Business Source License 1.1
+License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
+"Business Source License" is a trademark of MariaDB Corporation Ab.
+Parameters
+Licensor: G Kavinrajan
+Licensed Work: PulseDB
+Additional Use Grant: You may make use of the Licensed Work for non-production purposes, including testing, development, and academic research. You may not use the Licensed Work for any production purpose without a commercial agreement with the Licensor.
+Change Date: 2030-06-28
+Change License: Apache License, Version 2.0
+Terms
+The Licensor hereby grants you the right to copy, modify, create derivative works, redistribute, and make non-production use of the Licensed Work. The Licensed Work is provided "AS IS", without warranties or conditions of any kind.
+You may also make production use of the Licensed Work, provided such use does not exceed the Additional Use Grant (if any). If your use exceeds the Additional Use Grant, you must acquire a commercial license from the Licensor.
+Effective on the Change Date, or the fourth anniversary of the first publicly available distribution of a specific version of the Licensed Work under this License, whichever comes first, the Licensor hereby grants you rights under the terms of the Change License, and such rights shall replace all rights and restrictions granted under this License.
+Disclaimer of Warranty
+THE LICENSED WORK IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE LICENSOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE LICENSED WORK OR THE USE OR OTHER DEALINGS IN THE LICENSED WORK.

pulsedb-1.0.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ langchain_pulsedb
2	+ pulsedb