beaver-db 0.16.6__tar.gz → 0.16.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of beaver-db might be problematic. Click here for more details.
- {beaver_db-0.16.6 → beaver_db-0.16.8}/PKG-INFO +3 -1
- {beaver_db-0.16.6 → beaver_db-0.16.8}/README.md +2 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/blobs.py +10 -10
- {beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/channels.py +8 -11
- {beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/collections.py +19 -19
- {beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/core.py +58 -36
- {beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/dicts.py +13 -14
- {beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/lists.py +25 -25
- {beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/logs.py +10 -14
- {beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/queues.py +28 -13
- {beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/types.py +7 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/vectors.py +32 -30
- {beaver_db-0.16.6 → beaver_db-0.16.8}/pyproject.toml +1 -1
- {beaver_db-0.16.6 → beaver_db-0.16.8}/.gitignore +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/.python-version +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/LICENSE +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/beaver/__init__.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/design.md +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/async_pubsub.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/blobs.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/cache.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/fts.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/fuzzy.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/general_test.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/graph.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/kvstore.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/list.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/logs.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/pqueue.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/producer_consumer.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/publisher.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/pubsub.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/rerank.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/stress_vectors.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/subscriber.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/textual_chat.css +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/textual_chat.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/type_hints.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/examples/vector.py +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/makefile +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/roadmap.md +0 -0
- {beaver_db-0.16.6 → beaver_db-0.16.8}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: beaver-db
|
|
3
|
-
Version: 0.16.
|
|
3
|
+
Version: 0.16.8
|
|
4
4
|
Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -26,6 +26,8 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
|
|
|
26
26
|
|
|
27
27
|
`beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
|
|
28
28
|
|
|
29
|
+
> If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor) for an equally minimalistic approach to task orchestration.
|
|
30
|
+
|
|
29
31
|
## Design Philosophy
|
|
30
32
|
|
|
31
33
|
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
@@ -11,6 +11,8 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
|
|
|
11
11
|
|
|
12
12
|
`beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
|
|
13
13
|
|
|
14
|
+
> If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor) for an equally minimalistic approach to task orchestration.
|
|
15
|
+
|
|
14
16
|
## Design Philosophy
|
|
15
17
|
|
|
16
18
|
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
@@ -2,7 +2,7 @@ import json
|
|
|
2
2
|
import sqlite3
|
|
3
3
|
from typing import Any, Dict, Iterator, NamedTuple, Optional, Type, TypeVar
|
|
4
4
|
|
|
5
|
-
from .types import JsonSerializable
|
|
5
|
+
from .types import JsonSerializable, IDatabase
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class Blob[M](NamedTuple):
|
|
@@ -16,9 +16,9 @@ class Blob[M](NamedTuple):
|
|
|
16
16
|
class BlobManager[M]:
|
|
17
17
|
"""A wrapper providing a Pythonic interface to a blob store in the database."""
|
|
18
18
|
|
|
19
|
-
def __init__(self, name: str,
|
|
19
|
+
def __init__(self, name: str, db: IDatabase, model: Type[M] | None = None):
|
|
20
20
|
self._name = name
|
|
21
|
-
self.
|
|
21
|
+
self._db = db
|
|
22
22
|
self._model = model
|
|
23
23
|
|
|
24
24
|
def _serialize(self, value: M) -> str | None:
|
|
@@ -51,8 +51,8 @@ class BlobManager[M]:
|
|
|
51
51
|
|
|
52
52
|
metadata_json = self._serialize(metadata) if metadata else None
|
|
53
53
|
|
|
54
|
-
with self.
|
|
55
|
-
self.
|
|
54
|
+
with self._db.connection:
|
|
55
|
+
self._db.connection.execute(
|
|
56
56
|
"INSERT OR REPLACE INTO beaver_blobs (store_name, key, data, metadata) VALUES (?, ?, ?, ?)",
|
|
57
57
|
(self._name, key, data, metadata_json),
|
|
58
58
|
)
|
|
@@ -67,7 +67,7 @@ class BlobManager[M]:
|
|
|
67
67
|
Returns:
|
|
68
68
|
A Blob object containing the data and metadata, or None if the key is not found.
|
|
69
69
|
"""
|
|
70
|
-
cursor = self.
|
|
70
|
+
cursor = self._db.connection.cursor()
|
|
71
71
|
cursor.execute(
|
|
72
72
|
"SELECT data, metadata FROM beaver_blobs WHERE store_name = ? AND key = ?",
|
|
73
73
|
(self._name, key),
|
|
@@ -90,8 +90,8 @@ class BlobManager[M]:
|
|
|
90
90
|
Raises:
|
|
91
91
|
KeyError: If the key does not exist in the store.
|
|
92
92
|
"""
|
|
93
|
-
with self.
|
|
94
|
-
cursor = self.
|
|
93
|
+
with self._db.connection:
|
|
94
|
+
cursor = self._db.connection.cursor()
|
|
95
95
|
cursor.execute(
|
|
96
96
|
"DELETE FROM beaver_blobs WHERE store_name = ? AND key = ?",
|
|
97
97
|
(self._name, key),
|
|
@@ -103,7 +103,7 @@ class BlobManager[M]:
|
|
|
103
103
|
"""
|
|
104
104
|
Checks if a key exists in the blob store (e.g., `key in blobs`).
|
|
105
105
|
"""
|
|
106
|
-
cursor = self.
|
|
106
|
+
cursor = self._db.connection.cursor()
|
|
107
107
|
cursor.execute(
|
|
108
108
|
"SELECT 1 FROM beaver_blobs WHERE store_name = ? AND key = ? LIMIT 1",
|
|
109
109
|
(self._name, key),
|
|
@@ -114,7 +114,7 @@ class BlobManager[M]:
|
|
|
114
114
|
|
|
115
115
|
def __iter__(self) -> Iterator[str]:
|
|
116
116
|
"""Returns an iterator over the keys in the blob store."""
|
|
117
|
-
cursor = self.
|
|
117
|
+
cursor = self._db.connection.cursor()
|
|
118
118
|
cursor.execute(
|
|
119
119
|
"SELECT key FROM beaver_blobs WHERE store_name = ?", (self._name,)
|
|
120
120
|
)
|
|
@@ -6,7 +6,7 @@ import time
|
|
|
6
6
|
from queue import Empty, Queue
|
|
7
7
|
from typing import Any, AsyncIterator, Generic, Iterator, Set, Type, TypeVar
|
|
8
8
|
|
|
9
|
-
from .types import JsonSerializable
|
|
9
|
+
from .types import JsonSerializable, IDatabase
|
|
10
10
|
|
|
11
11
|
# A special message object used to signal the listener to gracefully shut down.
|
|
12
12
|
_SHUTDOWN_SENTINEL = object()
|
|
@@ -120,14 +120,12 @@ class ChannelManager[T]:
|
|
|
120
120
|
def __init__(
|
|
121
121
|
self,
|
|
122
122
|
name: str,
|
|
123
|
-
|
|
124
|
-
db_path: str,
|
|
123
|
+
db: IDatabase,
|
|
125
124
|
poll_interval: float = 0.1,
|
|
126
125
|
model: Type[T] | None = None,
|
|
127
126
|
):
|
|
128
127
|
self._name = name
|
|
129
|
-
self.
|
|
130
|
-
self._db_path = db_path
|
|
128
|
+
self._db = db
|
|
131
129
|
self._poll_interval = poll_interval
|
|
132
130
|
self._model = model
|
|
133
131
|
self._listeners: Set[Queue] = set()
|
|
@@ -197,8 +195,8 @@ class ChannelManager[T]:
|
|
|
197
195
|
|
|
198
196
|
Useful for reducing the database once logs are not needed.
|
|
199
197
|
"""
|
|
200
|
-
with self.
|
|
201
|
-
self.
|
|
198
|
+
with self._db.connection:
|
|
199
|
+
self._db.connection.execute("DELETE FROM beaver_pubsub_log WHERE channel_name = ?", (self._name,))
|
|
202
200
|
|
|
203
201
|
def _polling_loop(self):
|
|
204
202
|
"""
|
|
@@ -208,8 +206,7 @@ class ChannelManager[T]:
|
|
|
208
206
|
to all registered listener queues.
|
|
209
207
|
"""
|
|
210
208
|
# A separate SQLite connection is required for each thread.
|
|
211
|
-
thread_conn =
|
|
212
|
-
thread_conn.row_factory = sqlite3.Row
|
|
209
|
+
thread_conn = self._db.connection
|
|
213
210
|
|
|
214
211
|
# The poller starts listening for messages from this moment forward.
|
|
215
212
|
last_seen_timestamp = time.time()
|
|
@@ -256,8 +253,8 @@ class ChannelManager[T]:
|
|
|
256
253
|
except TypeError as e:
|
|
257
254
|
raise TypeError("Message payload must be JSON-serializable.") from e
|
|
258
255
|
|
|
259
|
-
with self.
|
|
260
|
-
self.
|
|
256
|
+
with self._db.connection:
|
|
257
|
+
self._db.connection.execute(
|
|
261
258
|
"INSERT INTO beaver_pubsub_log (timestamp, channel_name, message_payload) VALUES (?, ?, ?)",
|
|
262
259
|
(time.time(), self._name, json_payload),
|
|
263
260
|
)
|
|
@@ -4,7 +4,7 @@ import threading
|
|
|
4
4
|
import uuid
|
|
5
5
|
from enum import Enum
|
|
6
6
|
from typing import Any, Iterator, List, Literal, Tuple, Type, TypeVar
|
|
7
|
-
from .types import Model, stub
|
|
7
|
+
from .types import Model, stub, IDatabase
|
|
8
8
|
|
|
9
9
|
try:
|
|
10
10
|
import numpy as np
|
|
@@ -111,12 +111,12 @@ class CollectionManager[D: Document]:
|
|
|
111
111
|
FTS, fuzzy search, graph traversal, and persistent vector search.
|
|
112
112
|
"""
|
|
113
113
|
|
|
114
|
-
def __init__(self, name: str,
|
|
114
|
+
def __init__(self, name: str, db: IDatabase, model: Type[D] | None = None):
|
|
115
115
|
self._name = name
|
|
116
|
-
self.
|
|
116
|
+
self._db = db
|
|
117
117
|
self._model = model or Document
|
|
118
118
|
# All vector-related operations are now delegated to the VectorIndex class.
|
|
119
|
-
self._vector_index = VectorIndex(name,
|
|
119
|
+
self._vector_index = VectorIndex(name, db)
|
|
120
120
|
# A lock to ensure only one compaction thread runs at a time for this collection.
|
|
121
121
|
self._compaction_lock = threading.Lock()
|
|
122
122
|
self._compaction_thread: threading.Thread | None = None
|
|
@@ -134,7 +134,7 @@ class CollectionManager[D: Document]:
|
|
|
134
134
|
|
|
135
135
|
def _needs_compaction(self, threshold: int = 1000) -> bool:
|
|
136
136
|
"""Checks if the total number of pending vector operations exceeds the threshold."""
|
|
137
|
-
cursor = self.
|
|
137
|
+
cursor = self._db.connection.cursor()
|
|
138
138
|
cursor.execute(
|
|
139
139
|
"SELECT COUNT(*) FROM _beaver_ann_pending_log WHERE collection_name = ?",
|
|
140
140
|
(self._name,)
|
|
@@ -199,8 +199,8 @@ class CollectionManager[D: Document]:
|
|
|
199
199
|
if not isinstance(document, Document):
|
|
200
200
|
raise TypeError("Item to index must be a Document object.")
|
|
201
201
|
|
|
202
|
-
with self.
|
|
203
|
-
cursor = self.
|
|
202
|
+
with self._db.connection:
|
|
203
|
+
cursor = self._db.connection.cursor()
|
|
204
204
|
|
|
205
205
|
# Step 1: Core Document and Vector Storage
|
|
206
206
|
cursor.execute(
|
|
@@ -253,8 +253,8 @@ class CollectionManager[D: Document]:
|
|
|
253
253
|
"""Removes a document and all its associated data from the collection."""
|
|
254
254
|
if not isinstance(document, Document):
|
|
255
255
|
raise TypeError("Item to drop must be a Document object.")
|
|
256
|
-
with self.
|
|
257
|
-
cursor = self.
|
|
256
|
+
with self._db.connection:
|
|
257
|
+
cursor = self._db.connection.cursor()
|
|
258
258
|
cursor.execute("DELETE FROM beaver_collections WHERE collection = ? AND item_id = ?", (self._name, document.id))
|
|
259
259
|
cursor.execute("DELETE FROM beaver_fts_index WHERE collection = ? AND item_id = ?", (self._name, document.id))
|
|
260
260
|
cursor.execute("DELETE FROM beaver_trigrams WHERE collection = ? AND item_id = ?", (self._name, document.id))
|
|
@@ -271,7 +271,7 @@ class CollectionManager[D: Document]:
|
|
|
271
271
|
|
|
272
272
|
def __iter__(self) -> Iterator[D]:
|
|
273
273
|
"""Returns an iterator over all documents in the collection."""
|
|
274
|
-
cursor = self.
|
|
274
|
+
cursor = self._db.connection.cursor()
|
|
275
275
|
cursor.execute(
|
|
276
276
|
"SELECT item_id, item_vector, metadata FROM beaver_collections WHERE collection = ?",
|
|
277
277
|
(self._name,),
|
|
@@ -306,7 +306,7 @@ class CollectionManager[D: Document]:
|
|
|
306
306
|
placeholders = ",".join("?" for _ in result_ids)
|
|
307
307
|
sql = f"SELECT item_id, item_vector, metadata FROM beaver_collections WHERE collection = ? AND item_id IN ({placeholders})"
|
|
308
308
|
|
|
309
|
-
cursor = self.
|
|
309
|
+
cursor = self._db.connection.cursor()
|
|
310
310
|
rows = cursor.execute(sql, (self._name, *result_ids)).fetchall()
|
|
311
311
|
|
|
312
312
|
doc_map = {
|
|
@@ -350,7 +350,7 @@ class CollectionManager[D: Document]:
|
|
|
350
350
|
self, query: str, on: list[str] | None, top_k: int
|
|
351
351
|
) -> list[tuple[D, float]]:
|
|
352
352
|
"""Performs a standard FTS search."""
|
|
353
|
-
cursor = self.
|
|
353
|
+
cursor = self._db.connection.cursor()
|
|
354
354
|
sql_query = """
|
|
355
355
|
SELECT t1.item_id, t1.item_vector, t1.metadata, fts.rank
|
|
356
356
|
FROM beaver_collections AS t1 JOIN (
|
|
@@ -390,7 +390,7 @@ class CollectionManager[D: Document]:
|
|
|
390
390
|
if similarity_threshold == 0:
|
|
391
391
|
return set()
|
|
392
392
|
|
|
393
|
-
cursor = self.
|
|
393
|
+
cursor = self._db.connection.cursor()
|
|
394
394
|
sql = """
|
|
395
395
|
SELECT item_id FROM beaver_trigrams
|
|
396
396
|
WHERE collection = ? AND trigram IN ({}) {}
|
|
@@ -422,7 +422,7 @@ class CollectionManager[D: Document]:
|
|
|
422
422
|
if not candidate_ids:
|
|
423
423
|
return []
|
|
424
424
|
|
|
425
|
-
cursor = self.
|
|
425
|
+
cursor = self._db.connection.cursor()
|
|
426
426
|
id_placeholders = ",".join("?" for _ in candidate_ids)
|
|
427
427
|
sql_text = f"SELECT item_id, field_path, field_content FROM beaver_fts_index WHERE collection = ? AND item_id IN ({id_placeholders})"
|
|
428
428
|
params_text: list[Any] = [self._name]
|
|
@@ -480,8 +480,8 @@ class CollectionManager[D: Document]:
|
|
|
480
480
|
"""Creates a directed edge between two documents."""
|
|
481
481
|
if not isinstance(source, Document) or not isinstance(target, Document):
|
|
482
482
|
raise TypeError("Source and target must be Document objects.")
|
|
483
|
-
with self.
|
|
484
|
-
self.
|
|
483
|
+
with self._db.connection:
|
|
484
|
+
self._db.connection.execute(
|
|
485
485
|
"INSERT OR REPLACE INTO beaver_edges (collection, source_item_id, target_item_id, label, metadata) VALUES (?, ?, ?, ?, ?)",
|
|
486
486
|
(
|
|
487
487
|
self._name,
|
|
@@ -500,7 +500,7 @@ class CollectionManager[D: Document]:
|
|
|
500
500
|
sql += " AND t2.label = ?"
|
|
501
501
|
params.append(label)
|
|
502
502
|
|
|
503
|
-
rows = self.
|
|
503
|
+
rows = self._db.connection.cursor().execute(sql, tuple(params)).fetchall()
|
|
504
504
|
return [
|
|
505
505
|
self._model(
|
|
506
506
|
id=row["item_id"],
|
|
@@ -549,7 +549,7 @@ class CollectionManager[D: Document]:
|
|
|
549
549
|
"""
|
|
550
550
|
params = [source.id, self._name, depth] + labels + [self._name]
|
|
551
551
|
|
|
552
|
-
rows = self.
|
|
552
|
+
rows = self._db.connection.cursor().execute(sql, tuple(params)).fetchall()
|
|
553
553
|
return [
|
|
554
554
|
self._model(
|
|
555
555
|
id=row["item_id"],
|
|
@@ -565,7 +565,7 @@ class CollectionManager[D: Document]:
|
|
|
565
565
|
|
|
566
566
|
def __len__(self) -> int:
|
|
567
567
|
"""Returns the number of documents in the collection."""
|
|
568
|
-
cursor = self.
|
|
568
|
+
cursor = self._db.connection.cursor()
|
|
569
569
|
cursor.execute(
|
|
570
570
|
"SELECT COUNT(*) FROM beaver_collections WHERE collection = ?",
|
|
571
571
|
(self._name,),
|
|
@@ -15,7 +15,7 @@ from .queues import QueueManager
|
|
|
15
15
|
class BeaverDB:
|
|
16
16
|
"""
|
|
17
17
|
An embedded, multi-modal database in a single SQLite file.
|
|
18
|
-
This class manages
|
|
18
|
+
This class manages thread-safe database connections and table schemas.
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
def __init__(self, db_path: str, timeout:float=30.0):
|
|
@@ -26,22 +26,44 @@ class BeaverDB:
|
|
|
26
26
|
db_path: The path to the SQLite database file.
|
|
27
27
|
"""
|
|
28
28
|
self._db_path = db_path
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
self.
|
|
32
|
-
|
|
29
|
+
self._timeout = timeout
|
|
30
|
+
# This object will store a different connection for each thread.
|
|
31
|
+
self._thread_local = threading.local()
|
|
32
|
+
|
|
33
33
|
self._channels: dict[str, ChannelManager] = {}
|
|
34
34
|
self._channels_lock = threading.Lock()
|
|
35
|
-
# Add a cache and lock for CollectionManager singletons
|
|
36
35
|
self._collections: dict[str, CollectionManager] = {}
|
|
37
36
|
self._collections_lock = threading.Lock()
|
|
38
37
|
|
|
39
|
-
# Initialize the schemas
|
|
38
|
+
# Initialize the schemas. This will implicitly create the first
|
|
39
|
+
# connection for the main thread via the `connection` property.
|
|
40
40
|
self._create_all_tables()
|
|
41
41
|
|
|
42
|
+
@property
|
|
43
|
+
def connection(self) -> sqlite3.Connection:
|
|
44
|
+
"""
|
|
45
|
+
Provides a thread-safe SQLite connection.
|
|
46
|
+
|
|
47
|
+
Each thread will receive its own dedicated connection object.
|
|
48
|
+
The connection is created on the first access and then reused for
|
|
49
|
+
all subsequent calls within the same thread.
|
|
50
|
+
"""
|
|
51
|
+
# Check if a connection is already stored for this thread
|
|
52
|
+
conn = getattr(self._thread_local, 'conn', None)
|
|
53
|
+
|
|
54
|
+
if conn is None:
|
|
55
|
+
# No connection for this thread yet, so create one.
|
|
56
|
+
# We no longer need check_same_thread=False, restoring thread safety.
|
|
57
|
+
conn = sqlite3.connect(self._db_path, timeout=self._timeout)
|
|
58
|
+
conn.execute("PRAGMA journal_mode=WAL;")
|
|
59
|
+
conn.row_factory = sqlite3.Row
|
|
60
|
+
self._thread_local.conn = conn
|
|
61
|
+
|
|
62
|
+
return conn
|
|
63
|
+
|
|
42
64
|
def _create_all_tables(self):
|
|
43
65
|
"""Initializes all required tables in the database file."""
|
|
44
|
-
with self.
|
|
66
|
+
with self.connection:
|
|
45
67
|
self._create_ann_deletions_log_table()
|
|
46
68
|
self._create_ann_id_mapping_table()
|
|
47
69
|
self._create_ann_indexes_table()
|
|
@@ -60,7 +82,7 @@ class BeaverDB:
|
|
|
60
82
|
|
|
61
83
|
def _create_logs_table(self):
|
|
62
84
|
"""Creates the table for time-indexed logs."""
|
|
63
|
-
self.
|
|
85
|
+
self.connection.execute(
|
|
64
86
|
"""
|
|
65
87
|
CREATE TABLE IF NOT EXISTS beaver_logs (
|
|
66
88
|
log_name TEXT NOT NULL,
|
|
@@ -70,7 +92,7 @@ class BeaverDB:
|
|
|
70
92
|
)
|
|
71
93
|
"""
|
|
72
94
|
)
|
|
73
|
-
self.
|
|
95
|
+
self.connection.execute(
|
|
74
96
|
"""
|
|
75
97
|
CREATE INDEX IF NOT EXISTS idx_logs_timestamp
|
|
76
98
|
ON beaver_logs (log_name, timestamp)
|
|
@@ -79,7 +101,7 @@ class BeaverDB:
|
|
|
79
101
|
|
|
80
102
|
def _create_blobs_table(self):
|
|
81
103
|
"""Creates the table for storing named blobs."""
|
|
82
|
-
self.
|
|
104
|
+
self.connection.execute(
|
|
83
105
|
"""
|
|
84
106
|
CREATE TABLE IF NOT EXISTS beaver_blobs (
|
|
85
107
|
store_name TEXT NOT NULL,
|
|
@@ -93,7 +115,7 @@ class BeaverDB:
|
|
|
93
115
|
|
|
94
116
|
def _create_ann_indexes_table(self):
|
|
95
117
|
"""Creates the table to store the serialized base ANN index."""
|
|
96
|
-
self.
|
|
118
|
+
self.connection.execute(
|
|
97
119
|
"""
|
|
98
120
|
CREATE TABLE IF NOT EXISTS _beaver_ann_indexes (
|
|
99
121
|
collection_name TEXT PRIMARY KEY,
|
|
@@ -105,7 +127,7 @@ class BeaverDB:
|
|
|
105
127
|
|
|
106
128
|
def _create_ann_pending_log_table(self):
|
|
107
129
|
"""Creates the log for new vector additions."""
|
|
108
|
-
self.
|
|
130
|
+
self.connection.execute(
|
|
109
131
|
"""
|
|
110
132
|
CREATE TABLE IF NOT EXISTS _beaver_ann_pending_log (
|
|
111
133
|
collection_name TEXT NOT NULL,
|
|
@@ -117,7 +139,7 @@ class BeaverDB:
|
|
|
117
139
|
|
|
118
140
|
def _create_ann_deletions_log_table(self):
|
|
119
141
|
"""Creates the log for vector deletions (tombstones)."""
|
|
120
|
-
self.
|
|
142
|
+
self.connection.execute(
|
|
121
143
|
"""
|
|
122
144
|
CREATE TABLE IF NOT EXISTS _beaver_ann_deletions_log (
|
|
123
145
|
collection_name TEXT NOT NULL,
|
|
@@ -129,7 +151,7 @@ class BeaverDB:
|
|
|
129
151
|
|
|
130
152
|
def _create_ann_id_mapping_table(self):
|
|
131
153
|
"""Creates the table to map string IDs to integer IDs for Faiss."""
|
|
132
|
-
self.
|
|
154
|
+
self.connection.execute(
|
|
133
155
|
"""
|
|
134
156
|
CREATE TABLE IF NOT EXISTS _beaver_ann_id_mapping (
|
|
135
157
|
collection_name TEXT NOT NULL,
|
|
@@ -142,7 +164,7 @@ class BeaverDB:
|
|
|
142
164
|
|
|
143
165
|
def _create_priority_queue_table(self):
|
|
144
166
|
"""Creates the priority queue table and its performance index."""
|
|
145
|
-
self.
|
|
167
|
+
self.connection.execute(
|
|
146
168
|
"""
|
|
147
169
|
CREATE TABLE IF NOT EXISTS beaver_priority_queues (
|
|
148
170
|
queue_name TEXT NOT NULL,
|
|
@@ -152,7 +174,7 @@ class BeaverDB:
|
|
|
152
174
|
)
|
|
153
175
|
"""
|
|
154
176
|
)
|
|
155
|
-
self.
|
|
177
|
+
self.connection.execute(
|
|
156
178
|
"""
|
|
157
179
|
CREATE INDEX IF NOT EXISTS idx_priority_queue_order
|
|
158
180
|
ON beaver_priority_queues (queue_name, priority ASC, timestamp ASC)
|
|
@@ -161,7 +183,7 @@ class BeaverDB:
|
|
|
161
183
|
|
|
162
184
|
def _create_dict_table(self):
|
|
163
185
|
"""Creates the namespaced dictionary table."""
|
|
164
|
-
self.
|
|
186
|
+
self.connection.execute(
|
|
165
187
|
"""
|
|
166
188
|
CREATE TABLE IF NOT EXISTS beaver_dicts (
|
|
167
189
|
dict_name TEXT NOT NULL,
|
|
@@ -175,7 +197,7 @@ class BeaverDB:
|
|
|
175
197
|
|
|
176
198
|
def _create_pubsub_table(self):
|
|
177
199
|
"""Creates the pub/sub log table."""
|
|
178
|
-
self.
|
|
200
|
+
self.connection.execute(
|
|
179
201
|
"""
|
|
180
202
|
CREATE TABLE IF NOT EXISTS beaver_pubsub_log (
|
|
181
203
|
timestamp REAL PRIMARY KEY,
|
|
@@ -184,7 +206,7 @@ class BeaverDB:
|
|
|
184
206
|
)
|
|
185
207
|
"""
|
|
186
208
|
)
|
|
187
|
-
self.
|
|
209
|
+
self.connection.execute(
|
|
188
210
|
"""
|
|
189
211
|
CREATE INDEX IF NOT EXISTS idx_pubsub_channel_timestamp
|
|
190
212
|
ON beaver_pubsub_log (channel_name, timestamp)
|
|
@@ -193,7 +215,7 @@ class BeaverDB:
|
|
|
193
215
|
|
|
194
216
|
def _create_list_table(self):
|
|
195
217
|
"""Creates the lists table."""
|
|
196
|
-
self.
|
|
218
|
+
self.connection.execute(
|
|
197
219
|
"""
|
|
198
220
|
CREATE TABLE IF NOT EXISTS beaver_lists (
|
|
199
221
|
list_name TEXT NOT NULL,
|
|
@@ -206,7 +228,7 @@ class BeaverDB:
|
|
|
206
228
|
|
|
207
229
|
def _create_collections_table(self):
|
|
208
230
|
"""Creates the main table for storing documents and vectors."""
|
|
209
|
-
self.
|
|
231
|
+
self.connection.execute(
|
|
210
232
|
"""
|
|
211
233
|
CREATE TABLE IF NOT EXISTS beaver_collections (
|
|
212
234
|
collection TEXT NOT NULL,
|
|
@@ -220,7 +242,7 @@ class BeaverDB:
|
|
|
220
242
|
|
|
221
243
|
def _create_fts_table(self):
|
|
222
244
|
"""Creates the virtual FTS table for full-text search."""
|
|
223
|
-
self.
|
|
245
|
+
self.connection.execute(
|
|
224
246
|
"""
|
|
225
247
|
CREATE VIRTUAL TABLE IF NOT EXISTS beaver_fts_index USING fts5(
|
|
226
248
|
collection,
|
|
@@ -234,7 +256,7 @@ class BeaverDB:
|
|
|
234
256
|
|
|
235
257
|
def _create_trigrams_table(self):
|
|
236
258
|
"""Creates the table for the fuzzy search trigram index."""
|
|
237
|
-
self.
|
|
259
|
+
self.connection.execute(
|
|
238
260
|
"""
|
|
239
261
|
CREATE TABLE IF NOT EXISTS beaver_trigrams (
|
|
240
262
|
collection TEXT NOT NULL,
|
|
@@ -245,7 +267,7 @@ class BeaverDB:
|
|
|
245
267
|
)
|
|
246
268
|
"""
|
|
247
269
|
)
|
|
248
|
-
self.
|
|
270
|
+
self.connection.execute(
|
|
249
271
|
"""
|
|
250
272
|
CREATE INDEX IF NOT EXISTS idx_trigram_lookup
|
|
251
273
|
ON beaver_trigrams (collection, trigram, field_path)
|
|
@@ -254,7 +276,7 @@ class BeaverDB:
|
|
|
254
276
|
|
|
255
277
|
def _create_edges_table(self):
|
|
256
278
|
"""Creates the table for storing relationships between documents."""
|
|
257
|
-
self.
|
|
279
|
+
self.connection.execute(
|
|
258
280
|
"""
|
|
259
281
|
CREATE TABLE IF NOT EXISTS beaver_edges (
|
|
260
282
|
collection TEXT NOT NULL,
|
|
@@ -269,7 +291,7 @@ class BeaverDB:
|
|
|
269
291
|
|
|
270
292
|
def _create_versions_table(self):
|
|
271
293
|
"""Creates a table to track the version of each collection for caching."""
|
|
272
|
-
self.
|
|
294
|
+
self.connection.execute(
|
|
273
295
|
"""
|
|
274
296
|
CREATE TABLE IF NOT EXISTS beaver_collection_versions (
|
|
275
297
|
collection_name TEXT PRIMARY KEY,
|
|
@@ -280,12 +302,12 @@ class BeaverDB:
|
|
|
280
302
|
|
|
281
303
|
def close(self):
|
|
282
304
|
"""Closes the database connection."""
|
|
283
|
-
if self.
|
|
305
|
+
if self.connection:
|
|
284
306
|
# Cleanly shut down any active polling threads before closing
|
|
285
307
|
with self._channels_lock:
|
|
286
308
|
for channel in self._channels.values():
|
|
287
309
|
channel.close()
|
|
288
|
-
self.
|
|
310
|
+
self.connection.close()
|
|
289
311
|
|
|
290
312
|
# --- Factory and Passthrough Methods ---
|
|
291
313
|
|
|
@@ -300,7 +322,7 @@ class BeaverDB:
|
|
|
300
322
|
if model and not isinstance(model, JsonSerializable):
|
|
301
323
|
raise TypeError("The model parameter must be a JsonSerializable class.")
|
|
302
324
|
|
|
303
|
-
return DictManager(name, self
|
|
325
|
+
return DictManager(name, self, model)
|
|
304
326
|
|
|
305
327
|
def list[T](self, name: str, model: type[T] | None = None) -> ListManager[T]:
|
|
306
328
|
"""
|
|
@@ -313,7 +335,7 @@ class BeaverDB:
|
|
|
313
335
|
if model and not isinstance(model, JsonSerializable):
|
|
314
336
|
raise TypeError("The model parameter must be a JsonSerializable class.")
|
|
315
337
|
|
|
316
|
-
return ListManager(name, self
|
|
338
|
+
return ListManager(name, self, model)
|
|
317
339
|
|
|
318
340
|
def queue[T](self, name: str, model: type[T] | None = None) -> QueueManager[T]:
|
|
319
341
|
"""
|
|
@@ -326,7 +348,7 @@ class BeaverDB:
|
|
|
326
348
|
if model and not isinstance(model, JsonSerializable):
|
|
327
349
|
raise TypeError("The model parameter must be a JsonSerializable class.")
|
|
328
350
|
|
|
329
|
-
return QueueManager(name, self
|
|
351
|
+
return QueueManager(name, self, model)
|
|
330
352
|
|
|
331
353
|
def collection[D: Document](self, name: str, model: Type[D] | None = None) -> CollectionManager[D]:
|
|
332
354
|
"""
|
|
@@ -341,7 +363,7 @@ class BeaverDB:
|
|
|
341
363
|
# of the vector index consistently.
|
|
342
364
|
with self._collections_lock:
|
|
343
365
|
if name not in self._collections:
|
|
344
|
-
self._collections[name] = CollectionManager(name, self
|
|
366
|
+
self._collections[name] = CollectionManager(name, self, model=model)
|
|
345
367
|
|
|
346
368
|
return self._collections[name]
|
|
347
369
|
|
|
@@ -355,7 +377,7 @@ class BeaverDB:
|
|
|
355
377
|
# Use a thread-safe lock to ensure only one Channel object is created per name.
|
|
356
378
|
with self._channels_lock:
|
|
357
379
|
if name not in self._channels:
|
|
358
|
-
self._channels[name] = ChannelManager(name, self
|
|
380
|
+
self._channels[name] = ChannelManager(name, self, model=model)
|
|
359
381
|
return self._channels[name]
|
|
360
382
|
|
|
361
383
|
def blobs[M](self, name: str, model: type[M] | None = None) -> BlobManager[M]:
|
|
@@ -363,7 +385,7 @@ class BeaverDB:
|
|
|
363
385
|
if not isinstance(name, str) or not name:
|
|
364
386
|
raise TypeError("Blob store name must be a non-empty string.")
|
|
365
387
|
|
|
366
|
-
return BlobManager(name, self
|
|
388
|
+
return BlobManager(name, self, model)
|
|
367
389
|
|
|
368
390
|
def log[T](self, name: str, model: type[T] | None = None) -> LogManager[T]:
|
|
369
391
|
"""
|
|
@@ -376,4 +398,4 @@ class BeaverDB:
|
|
|
376
398
|
if model and not isinstance(model, JsonSerializable):
|
|
377
399
|
raise TypeError("The model parameter must be a JsonSerializable class.")
|
|
378
400
|
|
|
379
|
-
return LogManager(name, self
|
|
401
|
+
return LogManager(name, self, self._db_path, model)
|