PyPI - beaver-db - Versions diffs - 0.16.6__tar.gz → 0.16.8__tar.gz - Mend

beaver-db 0.16.6tar.gz → 0.16.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of beaver-db might be problematic. Click here for more details.

Files changed (42) hide show

{beaver_db-0.16.6 → beaver_db-0.16.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: beaver-db
-Version: 0.16.6
+Version: 0.16.8
 Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
 License-File: LICENSE
 Classifier: License :: OSI Approved :: MIT License
@@ -26,6 +26,8 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
 `beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
+> If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor) for an equally minimalistic approach to task orchestration.
 ## Design Philosophy
 `beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.

{beaver_db-0.16.6 → beaver_db-0.16.8}/README.md RENAMED Viewed

@@ -11,6 +11,8 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
 `beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
+> If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor) for an equally minimalistic approach to task orchestration.
 ## Design Philosophy
 `beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.

{beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/blobs.py RENAMED Viewed

@@ -2,7 +2,7 @@ import json
 import sqlite3
 from typing import Any, Dict, Iterator, NamedTuple, Optional, Type, TypeVar
-from .types import JsonSerializable
+from .types import JsonSerializable, IDatabase
 class Blob[M](NamedTuple):
@@ -16,9 +16,9 @@ class Blob[M](NamedTuple):
 class BlobManager[M]:
     """A wrapper providing a Pythonic interface to a blob store in the database."""
-    def __init__(self, name: str, conn: sqlite3.Connection, model: Type[M] | None = None):
+    def __init__(self, name: str, db: IDatabase, model: Type[M] | None = None):
         self._name = name
-        self._conn = conn
+        self._db = db
         self._model = model
     def _serialize(self, value: M) -> str | None:
@@ -51,8 +51,8 @@ class BlobManager[M]:
         metadata_json = self._serialize(metadata) if metadata else None
-        with self._conn:
-            self._conn.execute(
+        with self._db.connection:
+            self._db.connection.execute(
                 "INSERT OR REPLACE INTO beaver_blobs (store_name, key, data, metadata) VALUES (?, ?, ?, ?)",
                 (self._name, key, data, metadata_json),
             )
@@ -67,7 +67,7 @@ class BlobManager[M]:
         Returns:
             A Blob object containing the data and metadata, or None if the key is not found.
         """
-        cursor = self._conn.cursor()
+        cursor = self._db.connection.cursor()
         cursor.execute(
             "SELECT data, metadata FROM beaver_blobs WHERE store_name = ? AND key = ?",
             (self._name, key),
@@ -90,8 +90,8 @@ class BlobManager[M]:
         Raises:
             KeyError: If the key does not exist in the store.
         """
-        with self._conn:
-            cursor = self._conn.cursor()
+        with self._db.connection:
+            cursor = self._db.connection.cursor()
             cursor.execute(
                 "DELETE FROM beaver_blobs WHERE store_name = ? AND key = ?",
                 (self._name, key),
@@ -103,7 +103,7 @@ class BlobManager[M]:
         """
         Checks if a key exists in the blob store (e.g., `key in blobs`).
         """
-        cursor = self._conn.cursor()
+        cursor = self._db.connection.cursor()
         cursor.execute(
             "SELECT 1 FROM beaver_blobs WHERE store_name = ? AND key = ? LIMIT 1",
             (self._name, key),
@@ -114,7 +114,7 @@ class BlobManager[M]:
     def __iter__(self) -> Iterator[str]:
         """Returns an iterator over the keys in the blob store."""
-        cursor = self._conn.cursor()
+        cursor = self._db.connection.cursor()
         cursor.execute(
             "SELECT key FROM beaver_blobs WHERE store_name = ?", (self._name,)
         )

{beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/channels.py RENAMED Viewed

@@ -6,7 +6,7 @@ import time
 from queue import Empty, Queue
 from typing import Any, AsyncIterator, Generic, Iterator, Set, Type, TypeVar
-from .types import JsonSerializable
+from .types import JsonSerializable, IDatabase
 # A special message object used to signal the listener to gracefully shut down.
 _SHUTDOWN_SENTINEL = object()
@@ -120,14 +120,12 @@ class ChannelManager[T]:
     def __init__(
         self,
         name: str,
-        conn: sqlite3.Connection,
-        db_path: str,
+        db: IDatabase,
         poll_interval: float = 0.1,
         model: Type[T] | None = None,
     ):
         self._name = name
-        self._conn = conn
-        self._db_path = db_path
+        self._db = db
         self._poll_interval = poll_interval
         self._model = model
         self._listeners: Set[Queue] = set()
@@ -197,8 +195,8 @@ class ChannelManager[T]:
         Useful for reducing the database once logs are not needed.
         """
-        with self._conn:
-            self._conn.execute("DELETE FROM beaver_pubsub_log WHERE channel_name = ?", (self._name,))
+        with self._db.connection:
+            self._db.connection.execute("DELETE FROM beaver_pubsub_log WHERE channel_name = ?", (self._name,))
     def _polling_loop(self):
         """
@@ -208,8 +206,7 @@ class ChannelManager[T]:
         to all registered listener queues.
         """
         # A separate SQLite connection is required for each thread.
-        thread_conn = sqlite3.connect(self._db_path, check_same_thread=False)
-        thread_conn.row_factory = sqlite3.Row
+        thread_conn = self._db.connection
         # The poller starts listening for messages from this moment forward.
         last_seen_timestamp = time.time()
@@ -256,8 +253,8 @@ class ChannelManager[T]:
         except TypeError as e:
             raise TypeError("Message payload must be JSON-serializable.") from e
-        with self._conn:
-            self._conn.execute(
+        with self._db.connection:
+            self._db.connection.execute(
                 "INSERT INTO beaver_pubsub_log (timestamp, channel_name, message_payload) VALUES (?, ?, ?)",
                 (time.time(), self._name, json_payload),
             )

{beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/collections.py RENAMED Viewed

@@ -4,7 +4,7 @@ import threading
 import uuid
 from enum import Enum
 from typing import Any, Iterator, List, Literal, Tuple, Type, TypeVar
-from .types import Model, stub
+from .types import Model, stub, IDatabase
 try:
     import numpy as np
@@ -111,12 +111,12 @@ class CollectionManager[D: Document]:
     FTS, fuzzy search, graph traversal, and persistent vector search.
     """
-    def __init__(self, name: str, conn: sqlite3.Connection, model: Type[D] | None = None):
+    def __init__(self, name: str, db: IDatabase, model: Type[D] | None = None):
         self._name = name
-        self._conn = conn
+        self._db = db
         self._model = model or Document
         # All vector-related operations are now delegated to the VectorIndex class.
-        self._vector_index = VectorIndex(name, conn)
+        self._vector_index = VectorIndex(name, db)
         # A lock to ensure only one compaction thread runs at a time for this collection.
         self._compaction_lock = threading.Lock()
         self._compaction_thread: threading.Thread | None = None
@@ -134,7 +134,7 @@ class CollectionManager[D: Document]:
     def _needs_compaction(self, threshold: int = 1000) -> bool:
         """Checks if the total number of pending vector operations exceeds the threshold."""
-        cursor = self._conn.cursor()
+        cursor = self._db.connection.cursor()
         cursor.execute(
             "SELECT COUNT(*) FROM _beaver_ann_pending_log WHERE collection_name = ?",
             (self._name,)
@@ -199,8 +199,8 @@ class CollectionManager[D: Document]:
         if not isinstance(document, Document):
             raise TypeError("Item to index must be a Document object.")
-        with self._conn:
-            cursor = self._conn.cursor()
+        with self._db.connection:
+            cursor = self._db.connection.cursor()
             # Step 1: Core Document and Vector Storage
             cursor.execute(
@@ -253,8 +253,8 @@ class CollectionManager[D: Document]:
         """Removes a document and all its associated data from the collection."""
         if not isinstance(document, Document):
             raise TypeError("Item to drop must be a Document object.")
-        with self._conn:
-            cursor = self._conn.cursor()
+        with self._db.connection:
+            cursor = self._db.connection.cursor()
             cursor.execute("DELETE FROM beaver_collections WHERE collection = ? AND item_id = ?", (self._name, document.id))
             cursor.execute("DELETE FROM beaver_fts_index WHERE collection = ? AND item_id = ?", (self._name, document.id))
             cursor.execute("DELETE FROM beaver_trigrams WHERE collection = ? AND item_id = ?", (self._name, document.id))
@@ -271,7 +271,7 @@ class CollectionManager[D: Document]:
     def __iter__(self) -> Iterator[D]:
         """Returns an iterator over all documents in the collection."""
-        cursor = self._conn.cursor()
+        cursor = self._db.connection.cursor()
         cursor.execute(
             "SELECT item_id, item_vector, metadata FROM beaver_collections WHERE collection = ?",
             (self._name,),
@@ -306,7 +306,7 @@ class CollectionManager[D: Document]:
         placeholders = ",".join("?" for _ in result_ids)
         sql = f"SELECT item_id, item_vector, metadata FROM beaver_collections WHERE collection = ? AND item_id IN ({placeholders})"
-        cursor = self._conn.cursor()
+        cursor = self._db.connection.cursor()
         rows = cursor.execute(sql, (self._name, *result_ids)).fetchall()
         doc_map = {
@@ -350,7 +350,7 @@ class CollectionManager[D: Document]:
         self, query: str, on: list[str] | None, top_k: int
     ) -> list[tuple[D, float]]:
         """Performs a standard FTS search."""
-        cursor = self._conn.cursor()
+        cursor = self._db.connection.cursor()
         sql_query = """
             SELECT t1.item_id, t1.item_vector, t1.metadata, fts.rank
             FROM beaver_collections AS t1 JOIN (
@@ -390,7 +390,7 @@ class CollectionManager[D: Document]:
         if similarity_threshold == 0:
             return set()
-        cursor = self._conn.cursor()
+        cursor = self._db.connection.cursor()
         sql = """
             SELECT item_id FROM beaver_trigrams
             WHERE collection = ? AND trigram IN ({}) {}
@@ -422,7 +422,7 @@ class CollectionManager[D: Document]:
         if not candidate_ids:
             return []
-        cursor = self._conn.cursor()
+        cursor = self._db.connection.cursor()
         id_placeholders = ",".join("?" for _ in candidate_ids)
         sql_text = f"SELECT item_id, field_path, field_content FROM beaver_fts_index WHERE collection = ? AND item_id IN ({id_placeholders})"
         params_text: list[Any] = [self._name]
@@ -480,8 +480,8 @@ class CollectionManager[D: Document]:
         """Creates a directed edge between two documents."""
         if not isinstance(source, Document) or not isinstance(target, Document):
             raise TypeError("Source and target must be Document objects.")
-        with self._conn:
-            self._conn.execute(
+        with self._db.connection:
+            self._db.connection.execute(
                 "INSERT OR REPLACE INTO beaver_edges (collection, source_item_id, target_item_id, label, metadata) VALUES (?, ?, ?, ?, ?)",
                 (
                     self._name,
@@ -500,7 +500,7 @@ class CollectionManager[D: Document]:
             sql += " AND t2.label = ?"
             params.append(label)
-        rows = self._conn.cursor().execute(sql, tuple(params)).fetchall()
+        rows = self._db.connection.cursor().execute(sql, tuple(params)).fetchall()
         return [
             self._model(
                 id=row["item_id"],
@@ -549,7 +549,7 @@ class CollectionManager[D: Document]:
         """
         params = [source.id, self._name, depth] + labels + [self._name]
-        rows = self._conn.cursor().execute(sql, tuple(params)).fetchall()
+        rows = self._db.connection.cursor().execute(sql, tuple(params)).fetchall()
         return [
             self._model(
                 id=row["item_id"],
@@ -565,7 +565,7 @@ class CollectionManager[D: Document]:
     def __len__(self) -> int:
         """Returns the number of documents in the collection."""
-        cursor = self._conn.cursor()
+        cursor = self._db.connection.cursor()
         cursor.execute(
             "SELECT COUNT(*) FROM beaver_collections WHERE collection = ?",
             (self._name,),

{beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/core.py RENAMED Viewed

@@ -15,7 +15,7 @@ from .queues import QueueManager
 class BeaverDB:
     """
     An embedded, multi-modal database in a single SQLite file.
-    This class manages the database connection and table schemas.
+    This class manages thread-safe database connections and table schemas.
     """
     def __init__(self, db_path: str, timeout:float=30.0):
@@ -26,22 +26,44 @@ class BeaverDB:
             db_path: The path to the SQLite database file.
         """
         self._db_path = db_path
-        # Enable WAL mode for better concurrency between readers and writers
-        self._conn = sqlite3.connect(self._db_path, check_same_thread=False, timeout=timeout)
-        self._conn.execute("PRAGMA journal_mode=WAL;")
-        self._conn.row_factory = sqlite3.Row
+        self._timeout = timeout
+        # This object will store a different connection for each thread.
+        self._thread_local = threading.local()
         self._channels: dict[str, ChannelManager] = {}
         self._channels_lock = threading.Lock()
-        # Add a cache and lock for CollectionManager singletons
         self._collections: dict[str, CollectionManager] = {}
         self._collections_lock = threading.Lock()
-        # Initialize the schemas
+        # Initialize the schemas. This will implicitly create the first
+        # connection for the main thread via the `connection` property.
         self._create_all_tables()
+    @property
+    def connection(self) -> sqlite3.Connection:
+        """
+        Provides a thread-safe SQLite connection.
+        Each thread will receive its own dedicated connection object.
+        The connection is created on the first access and then reused for
+        all subsequent calls within the same thread.
+        """
+        # Check if a connection is already stored for this thread
+        conn = getattr(self._thread_local, 'conn', None)
+        if conn is None:
+            # No connection for this thread yet, so create one.
+            # We no longer need check_same_thread=False, restoring thread safety.
+            conn = sqlite3.connect(self._db_path, timeout=self._timeout)
+            conn.execute("PRAGMA journal_mode=WAL;")
+            conn.row_factory = sqlite3.Row
+            self._thread_local.conn = conn
+        return conn
     def _create_all_tables(self):
         """Initializes all required tables in the database file."""
-        with self._conn:
+        with self.connection:
             self._create_ann_deletions_log_table()
             self._create_ann_id_mapping_table()
             self._create_ann_indexes_table()
@@ -60,7 +82,7 @@ class BeaverDB:
     def _create_logs_table(self):
         """Creates the table for time-indexed logs."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS beaver_logs (
                 log_name TEXT NOT NULL,
@@ -70,7 +92,7 @@ class BeaverDB:
             )
             """
         )
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE INDEX IF NOT EXISTS idx_logs_timestamp
             ON beaver_logs (log_name, timestamp)
@@ -79,7 +101,7 @@ class BeaverDB:
     def _create_blobs_table(self):
         """Creates the table for storing named blobs."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS beaver_blobs (
                 store_name TEXT NOT NULL,
@@ -93,7 +115,7 @@ class BeaverDB:
     def _create_ann_indexes_table(self):
         """Creates the table to store the serialized base ANN index."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS _beaver_ann_indexes (
                 collection_name TEXT PRIMARY KEY,
@@ -105,7 +127,7 @@ class BeaverDB:
     def _create_ann_pending_log_table(self):
         """Creates the log for new vector additions."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS _beaver_ann_pending_log (
                 collection_name TEXT NOT NULL,
@@ -117,7 +139,7 @@ class BeaverDB:
     def _create_ann_deletions_log_table(self):
         """Creates the log for vector deletions (tombstones)."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS _beaver_ann_deletions_log (
                 collection_name TEXT NOT NULL,
@@ -129,7 +151,7 @@ class BeaverDB:
     def _create_ann_id_mapping_table(self):
         """Creates the table to map string IDs to integer IDs for Faiss."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS _beaver_ann_id_mapping (
                 collection_name TEXT NOT NULL,
@@ -142,7 +164,7 @@ class BeaverDB:
     def _create_priority_queue_table(self):
         """Creates the priority queue table and its performance index."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS beaver_priority_queues (
                 queue_name TEXT NOT NULL,
@@ -152,7 +174,7 @@ class BeaverDB:
             )
             """
         )
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE INDEX IF NOT EXISTS idx_priority_queue_order
             ON beaver_priority_queues (queue_name, priority ASC, timestamp ASC)
@@ -161,7 +183,7 @@ class BeaverDB:
     def _create_dict_table(self):
         """Creates the namespaced dictionary table."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS beaver_dicts (
                 dict_name TEXT NOT NULL,
@@ -175,7 +197,7 @@ class BeaverDB:
     def _create_pubsub_table(self):
         """Creates the pub/sub log table."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS beaver_pubsub_log (
                 timestamp REAL PRIMARY KEY,
@@ -184,7 +206,7 @@ class BeaverDB:
             )
         """
         )
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE INDEX IF NOT EXISTS idx_pubsub_channel_timestamp
             ON beaver_pubsub_log (channel_name, timestamp)
@@ -193,7 +215,7 @@ class BeaverDB:
     def _create_list_table(self):
         """Creates the lists table."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS beaver_lists (
                 list_name TEXT NOT NULL,
@@ -206,7 +228,7 @@ class BeaverDB:
     def _create_collections_table(self):
         """Creates the main table for storing documents and vectors."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS beaver_collections (
                 collection TEXT NOT NULL,
@@ -220,7 +242,7 @@ class BeaverDB:
     def _create_fts_table(self):
         """Creates the virtual FTS table for full-text search."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE VIRTUAL TABLE IF NOT EXISTS beaver_fts_index USING fts5(
                 collection,
@@ -234,7 +256,7 @@ class BeaverDB:
     def _create_trigrams_table(self):
         """Creates the table for the fuzzy search trigram index."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS beaver_trigrams (
                 collection TEXT NOT NULL,
@@ -245,7 +267,7 @@ class BeaverDB:
             )
             """
         )
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE INDEX IF NOT EXISTS idx_trigram_lookup
             ON beaver_trigrams (collection, trigram, field_path)
@@ -254,7 +276,7 @@ class BeaverDB:
     def _create_edges_table(self):
         """Creates the table for storing relationships between documents."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS beaver_edges (
                 collection TEXT NOT NULL,
@@ -269,7 +291,7 @@ class BeaverDB:
     def _create_versions_table(self):
         """Creates a table to track the version of each collection for caching."""
-        self._conn.execute(
+        self.connection.execute(
             """
             CREATE TABLE IF NOT EXISTS beaver_collection_versions (
                 collection_name TEXT PRIMARY KEY,
@@ -280,12 +302,12 @@ class BeaverDB:
     def close(self):
         """Closes the database connection."""
-        if self._conn:
+        if self.connection:
             # Cleanly shut down any active polling threads before closing
             with self._channels_lock:
                 for channel in self._channels.values():
                     channel.close()
-            self._conn.close()
+            self.connection.close()
     # --- Factory and Passthrough Methods ---
@@ -300,7 +322,7 @@ class BeaverDB:
         if model and not isinstance(model, JsonSerializable):
             raise TypeError("The model parameter must be a JsonSerializable class.")
-        return DictManager(name, self._conn, model)
+        return DictManager(name, self, model)
     def list[T](self, name: str, model: type[T] | None = None) -> ListManager[T]:
         """
@@ -313,7 +335,7 @@ class BeaverDB:
         if model and not isinstance(model, JsonSerializable):
             raise TypeError("The model parameter must be a JsonSerializable class.")
-        return ListManager(name, self._conn, model)
+        return ListManager(name, self, model)
     def queue[T](self, name: str, model: type[T] | None = None) -> QueueManager[T]:
         """
@@ -326,7 +348,7 @@ class BeaverDB:
         if model and not isinstance(model, JsonSerializable):
             raise TypeError("The model parameter must be a JsonSerializable class.")
-        return QueueManager(name, self._conn, model)
+        return QueueManager(name, self, model)
     def collection[D: Document](self, name: str, model: Type[D] | None = None) -> CollectionManager[D]:
         """
@@ -341,7 +363,7 @@ class BeaverDB:
         # of the vector index consistently.
         with self._collections_lock:
             if name not in self._collections:
-                self._collections[name] = CollectionManager(name, self._conn, model=model)
+                self._collections[name] = CollectionManager(name, self, model=model)
             return self._collections[name]
@@ -355,7 +377,7 @@ class BeaverDB:
         # Use a thread-safe lock to ensure only one Channel object is created per name.
         with self._channels_lock:
             if name not in self._channels:
-                self._channels[name] = ChannelManager(name, self._conn, self._db_path, model=model)
+                self._channels[name] = ChannelManager(name, self, model=model)
             return self._channels[name]
     def blobs[M](self, name: str, model: type[M] | None = None) -> BlobManager[M]:
@@ -363,7 +385,7 @@ class BeaverDB:
         if not isinstance(name, str) or not name:
             raise TypeError("Blob store name must be a non-empty string.")
-        return BlobManager(name, self._conn, model)
+        return BlobManager(name, self, model)
     def log[T](self, name: str, model: type[T] | None = None) -> LogManager[T]:
         """
@@ -376,4 +398,4 @@ class BeaverDB:
         if model and not isinstance(model, JsonSerializable):
             raise TypeError("The model parameter must be a JsonSerializable class.")
-        return LogManager(name, self._conn, self._db_path, model)
+        return LogManager(name, self, self._db_path, model)

beaver-db 0.16.6__tar.gz → 0.16.8__tar.gz

Potentially problematic release.

beaver-db 0.16.6tar.gz → 0.16.8tar.gz