beaver-db 0.9.2__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of beaver-db might be problematic. Click here for more details.
- beaver/collections.py +339 -149
- beaver/core.py +180 -99
- beaver/vectors.py +370 -0
- {beaver_db-0.9.2.dist-info → beaver_db-0.11.0.dist-info}/METADATA +33 -12
- beaver_db-0.11.0.dist-info/RECORD +13 -0
- beaver_db-0.9.2.dist-info/RECORD +0 -12
- {beaver_db-0.9.2.dist-info → beaver_db-0.11.0.dist-info}/WHEEL +0 -0
- {beaver_db-0.9.2.dist-info → beaver_db-0.11.0.dist-info}/licenses/LICENSE +0 -0
- {beaver_db-0.9.2.dist-info → beaver_db-0.11.0.dist-info}/top_level.txt +0 -0
beaver/core.py
CHANGED
|
@@ -14,7 +14,7 @@ class BeaverDB:
|
|
|
14
14
|
This class manages the database connection and table schemas.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
def __init__(self, db_path: str):
|
|
17
|
+
def __init__(self, db_path: str, timeout:float=30.0):
|
|
18
18
|
"""
|
|
19
19
|
Initializes the database connection and creates all necessary tables.
|
|
20
20
|
|
|
@@ -23,149 +23,221 @@ class BeaverDB:
|
|
|
23
23
|
"""
|
|
24
24
|
self._db_path = db_path
|
|
25
25
|
# Enable WAL mode for better concurrency between readers and writers
|
|
26
|
-
self._conn = sqlite3.connect(self._db_path, check_same_thread=False)
|
|
26
|
+
self._conn = sqlite3.connect(self._db_path, check_same_thread=False, timeout=timeout)
|
|
27
27
|
self._conn.execute("PRAGMA journal_mode=WAL;")
|
|
28
28
|
self._conn.row_factory = sqlite3.Row
|
|
29
|
-
self._create_all_tables()
|
|
30
29
|
self._channels: dict[str, ChannelManager] = {}
|
|
31
30
|
self._channels_lock = threading.Lock()
|
|
31
|
+
# Add a cache and lock for CollectionManager singletons
|
|
32
|
+
self._collections: dict[str, CollectionManager] = {}
|
|
33
|
+
self._collections_lock = threading.Lock()
|
|
34
|
+
|
|
35
|
+
# Initialize the schemas
|
|
36
|
+
self._create_all_tables()
|
|
32
37
|
|
|
33
38
|
def _create_all_tables(self):
|
|
34
39
|
"""Initializes all required tables in the database file."""
|
|
35
|
-
self.
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
40
|
+
with self._conn:
|
|
41
|
+
self._create_pubsub_table()
|
|
42
|
+
self._create_list_table()
|
|
43
|
+
self._create_collections_table()
|
|
44
|
+
self._create_fts_table()
|
|
45
|
+
self._create_trigrams_table()
|
|
46
|
+
self._create_edges_table()
|
|
47
|
+
self._create_versions_table()
|
|
48
|
+
self._create_dict_table()
|
|
49
|
+
self._create_priority_queue_table()
|
|
50
|
+
self._create_ann_indexes_table()
|
|
51
|
+
self._create_ann_pending_log_table()
|
|
52
|
+
self._create_ann_deletions_log_table()
|
|
53
|
+
self._create_ann_id_mapping_table()
|
|
54
|
+
|
|
55
|
+
def _create_ann_indexes_table(self):
|
|
56
|
+
"""Creates the table to store the serialized base ANN index."""
|
|
57
|
+
self._conn.execute(
|
|
58
|
+
"""
|
|
59
|
+
CREATE TABLE IF NOT EXISTS _beaver_ann_indexes (
|
|
60
|
+
collection_name TEXT PRIMARY KEY,
|
|
61
|
+
index_data BLOB,
|
|
62
|
+
base_index_version INTEGER NOT NULL DEFAULT 0
|
|
63
|
+
)
|
|
64
|
+
"""
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def _create_ann_pending_log_table(self):
|
|
68
|
+
"""Creates the log for new vector additions."""
|
|
69
|
+
self._conn.execute(
|
|
70
|
+
"""
|
|
71
|
+
CREATE TABLE IF NOT EXISTS _beaver_ann_pending_log (
|
|
72
|
+
collection_name TEXT NOT NULL,
|
|
73
|
+
str_id TEXT NOT NULL,
|
|
74
|
+
PRIMARY KEY (collection_name, str_id)
|
|
75
|
+
)
|
|
76
|
+
"""
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def _create_ann_deletions_log_table(self):
|
|
80
|
+
"""Creates the log for vector deletions (tombstones)."""
|
|
81
|
+
self._conn.execute(
|
|
82
|
+
"""
|
|
83
|
+
CREATE TABLE IF NOT EXISTS _beaver_ann_deletions_log (
|
|
84
|
+
collection_name TEXT NOT NULL,
|
|
85
|
+
int_id INTEGER NOT NULL,
|
|
86
|
+
PRIMARY KEY (collection_name, int_id)
|
|
87
|
+
)
|
|
88
|
+
"""
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def _create_ann_id_mapping_table(self):
|
|
92
|
+
"""Creates the table to map string IDs to integer IDs for Faiss."""
|
|
93
|
+
self._conn.execute(
|
|
94
|
+
"""
|
|
95
|
+
CREATE TABLE IF NOT EXISTS _beaver_ann_id_mapping (
|
|
96
|
+
collection_name TEXT NOT NULL,
|
|
97
|
+
str_id TEXT NOT NULL,
|
|
98
|
+
int_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
99
|
+
UNIQUE(collection_name, str_id)
|
|
100
|
+
)
|
|
101
|
+
"""
|
|
102
|
+
)
|
|
43
103
|
|
|
44
104
|
def _create_priority_queue_table(self):
|
|
45
105
|
"""Creates the priority queue table and its performance index."""
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
data TEXT NOT NULL
|
|
54
|
-
)
|
|
55
|
-
"""
|
|
56
|
-
)
|
|
57
|
-
self._conn.execute(
|
|
58
|
-
"""
|
|
59
|
-
CREATE INDEX IF NOT EXISTS idx_priority_queue_order
|
|
60
|
-
ON beaver_priority_queues (queue_name, priority ASC, timestamp ASC)
|
|
61
|
-
"""
|
|
106
|
+
self._conn.execute(
|
|
107
|
+
"""
|
|
108
|
+
CREATE TABLE IF NOT EXISTS beaver_priority_queues (
|
|
109
|
+
queue_name TEXT NOT NULL,
|
|
110
|
+
priority REAL NOT NULL,
|
|
111
|
+
timestamp REAL NOT NULL,
|
|
112
|
+
data TEXT NOT NULL
|
|
62
113
|
)
|
|
114
|
+
"""
|
|
115
|
+
)
|
|
116
|
+
self._conn.execute(
|
|
117
|
+
"""
|
|
118
|
+
CREATE INDEX IF NOT EXISTS idx_priority_queue_order
|
|
119
|
+
ON beaver_priority_queues (queue_name, priority ASC, timestamp ASC)
|
|
120
|
+
"""
|
|
121
|
+
)
|
|
63
122
|
|
|
64
123
|
def _create_dict_table(self):
|
|
65
124
|
"""Creates the namespaced dictionary table."""
|
|
66
|
-
|
|
67
|
-
self._conn.execute(
|
|
68
|
-
"""
|
|
69
|
-
CREATE TABLE IF NOT EXISTS beaver_dicts (
|
|
70
|
-
dict_name TEXT NOT NULL,
|
|
71
|
-
key TEXT NOT NULL,
|
|
72
|
-
value TEXT NOT NULL,
|
|
73
|
-
expires_at REAL,
|
|
74
|
-
PRIMARY KEY (dict_name, key)
|
|
75
|
-
)
|
|
125
|
+
self._conn.execute(
|
|
76
126
|
"""
|
|
127
|
+
CREATE TABLE IF NOT EXISTS beaver_dicts (
|
|
128
|
+
dict_name TEXT NOT NULL,
|
|
129
|
+
key TEXT NOT NULL,
|
|
130
|
+
value TEXT NOT NULL,
|
|
131
|
+
expires_at REAL,
|
|
132
|
+
PRIMARY KEY (dict_name, key)
|
|
77
133
|
)
|
|
134
|
+
"""
|
|
135
|
+
)
|
|
78
136
|
|
|
79
137
|
def _create_pubsub_table(self):
|
|
80
138
|
"""Creates the pub/sub log table."""
|
|
81
|
-
|
|
82
|
-
self._conn.execute(
|
|
83
|
-
"""
|
|
84
|
-
CREATE TABLE IF NOT EXISTS beaver_pubsub_log (
|
|
85
|
-
timestamp REAL PRIMARY KEY,
|
|
86
|
-
channel_name TEXT NOT NULL,
|
|
87
|
-
message_payload TEXT NOT NULL
|
|
88
|
-
)
|
|
139
|
+
self._conn.execute(
|
|
89
140
|
"""
|
|
141
|
+
CREATE TABLE IF NOT EXISTS beaver_pubsub_log (
|
|
142
|
+
timestamp REAL PRIMARY KEY,
|
|
143
|
+
channel_name TEXT NOT NULL,
|
|
144
|
+
message_payload TEXT NOT NULL
|
|
90
145
|
)
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
ON beaver_pubsub_log (channel_name, timestamp)
|
|
146
|
+
"""
|
|
147
|
+
)
|
|
148
|
+
self._conn.execute(
|
|
95
149
|
"""
|
|
96
|
-
|
|
150
|
+
CREATE INDEX IF NOT EXISTS idx_pubsub_channel_timestamp
|
|
151
|
+
ON beaver_pubsub_log (channel_name, timestamp)
|
|
152
|
+
"""
|
|
153
|
+
)
|
|
97
154
|
|
|
98
155
|
def _create_list_table(self):
|
|
99
156
|
"""Creates the lists table."""
|
|
100
|
-
|
|
101
|
-
self._conn.execute(
|
|
102
|
-
"""
|
|
103
|
-
CREATE TABLE IF NOT EXISTS beaver_lists (
|
|
104
|
-
list_name TEXT NOT NULL,
|
|
105
|
-
item_order REAL NOT NULL,
|
|
106
|
-
item_value TEXT NOT NULL,
|
|
107
|
-
PRIMARY KEY (list_name, item_order)
|
|
108
|
-
)
|
|
157
|
+
self._conn.execute(
|
|
109
158
|
"""
|
|
159
|
+
CREATE TABLE IF NOT EXISTS beaver_lists (
|
|
160
|
+
list_name TEXT NOT NULL,
|
|
161
|
+
item_order REAL NOT NULL,
|
|
162
|
+
item_value TEXT NOT NULL,
|
|
163
|
+
PRIMARY KEY (list_name, item_order)
|
|
110
164
|
)
|
|
165
|
+
"""
|
|
166
|
+
)
|
|
111
167
|
|
|
112
168
|
def _create_collections_table(self):
|
|
113
169
|
"""Creates the main table for storing documents and vectors."""
|
|
114
|
-
|
|
115
|
-
self._conn.execute(
|
|
116
|
-
"""
|
|
117
|
-
CREATE TABLE IF NOT EXISTS beaver_collections (
|
|
118
|
-
collection TEXT NOT NULL,
|
|
119
|
-
item_id TEXT NOT NULL,
|
|
120
|
-
item_vector BLOB,
|
|
121
|
-
metadata TEXT,
|
|
122
|
-
PRIMARY KEY (collection, item_id)
|
|
123
|
-
)
|
|
170
|
+
self._conn.execute(
|
|
124
171
|
"""
|
|
172
|
+
CREATE TABLE IF NOT EXISTS beaver_collections (
|
|
173
|
+
collection TEXT NOT NULL,
|
|
174
|
+
item_id TEXT NOT NULL,
|
|
175
|
+
item_vector BLOB,
|
|
176
|
+
metadata TEXT,
|
|
177
|
+
PRIMARY KEY (collection, item_id)
|
|
125
178
|
)
|
|
179
|
+
"""
|
|
180
|
+
)
|
|
126
181
|
|
|
127
182
|
def _create_fts_table(self):
|
|
128
183
|
"""Creates the virtual FTS table for full-text search."""
|
|
129
|
-
|
|
130
|
-
self._conn.execute(
|
|
131
|
-
"""
|
|
132
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS beaver_fts_index USING fts5(
|
|
133
|
-
collection,
|
|
134
|
-
item_id,
|
|
135
|
-
field_path,
|
|
136
|
-
field_content,
|
|
137
|
-
tokenize = 'porter'
|
|
138
|
-
)
|
|
184
|
+
self._conn.execute(
|
|
139
185
|
"""
|
|
186
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS beaver_fts_index USING fts5(
|
|
187
|
+
collection,
|
|
188
|
+
item_id,
|
|
189
|
+
field_path,
|
|
190
|
+
field_content,
|
|
191
|
+
tokenize = 'porter'
|
|
140
192
|
)
|
|
193
|
+
"""
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
def _create_trigrams_table(self):
|
|
197
|
+
"""Creates the table for the fuzzy search trigram index."""
|
|
198
|
+
self._conn.execute(
|
|
199
|
+
"""
|
|
200
|
+
CREATE TABLE IF NOT EXISTS beaver_trigrams (
|
|
201
|
+
collection TEXT NOT NULL,
|
|
202
|
+
item_id TEXT NOT NULL,
|
|
203
|
+
field_path TEXT NOT NULL,
|
|
204
|
+
trigram TEXT NOT NULL,
|
|
205
|
+
PRIMARY KEY (collection, field_path, trigram, item_id)
|
|
206
|
+
)
|
|
207
|
+
"""
|
|
208
|
+
)
|
|
209
|
+
self._conn.execute(
|
|
210
|
+
"""
|
|
211
|
+
CREATE INDEX IF NOT EXISTS idx_trigram_lookup
|
|
212
|
+
ON beaver_trigrams (collection, trigram, field_path)
|
|
213
|
+
"""
|
|
214
|
+
)
|
|
141
215
|
|
|
142
216
|
def _create_edges_table(self):
|
|
143
217
|
"""Creates the table for storing relationships between documents."""
|
|
144
|
-
|
|
145
|
-
self._conn.execute(
|
|
146
|
-
"""
|
|
147
|
-
CREATE TABLE IF NOT EXISTS beaver_edges (
|
|
148
|
-
collection TEXT NOT NULL,
|
|
149
|
-
source_item_id TEXT NOT NULL,
|
|
150
|
-
target_item_id TEXT NOT NULL,
|
|
151
|
-
label TEXT NOT NULL,
|
|
152
|
-
metadata TEXT,
|
|
153
|
-
PRIMARY KEY (collection, source_item_id, target_item_id, label)
|
|
154
|
-
)
|
|
218
|
+
self._conn.execute(
|
|
155
219
|
"""
|
|
220
|
+
CREATE TABLE IF NOT EXISTS beaver_edges (
|
|
221
|
+
collection TEXT NOT NULL,
|
|
222
|
+
source_item_id TEXT NOT NULL,
|
|
223
|
+
target_item_id TEXT NOT NULL,
|
|
224
|
+
label TEXT NOT NULL,
|
|
225
|
+
metadata TEXT,
|
|
226
|
+
PRIMARY KEY (collection, source_item_id, target_item_id, label)
|
|
156
227
|
)
|
|
228
|
+
"""
|
|
229
|
+
)
|
|
157
230
|
|
|
158
231
|
def _create_versions_table(self):
|
|
159
232
|
"""Creates a table to track the version of each collection for caching."""
|
|
160
|
-
|
|
161
|
-
self._conn.execute(
|
|
162
|
-
"""
|
|
163
|
-
CREATE TABLE IF NOT EXISTS beaver_collection_versions (
|
|
164
|
-
collection_name TEXT PRIMARY KEY,
|
|
165
|
-
version INTEGER NOT NULL DEFAULT 0
|
|
166
|
-
)
|
|
233
|
+
self._conn.execute(
|
|
167
234
|
"""
|
|
235
|
+
CREATE TABLE IF NOT EXISTS beaver_collection_versions (
|
|
236
|
+
collection_name TEXT PRIMARY KEY,
|
|
237
|
+
version INTEGER NOT NULL DEFAULT 0
|
|
168
238
|
)
|
|
239
|
+
"""
|
|
240
|
+
)
|
|
169
241
|
|
|
170
242
|
def close(self):
|
|
171
243
|
"""Closes the database connection."""
|
|
@@ -200,11 +272,20 @@ class BeaverDB:
|
|
|
200
272
|
return QueueManager(name, self._conn)
|
|
201
273
|
|
|
202
274
|
def collection(self, name: str) -> CollectionManager:
|
|
203
|
-
"""
|
|
275
|
+
"""
|
|
276
|
+
Returns a singleton CollectionManager instance for interacting with a
|
|
277
|
+
document collection.
|
|
278
|
+
"""
|
|
204
279
|
if not isinstance(name, str) or not name:
|
|
205
280
|
raise TypeError("Collection name must be a non-empty string.")
|
|
206
281
|
|
|
207
|
-
|
|
282
|
+
# Use a thread-safe lock to ensure only one CollectionManager object is
|
|
283
|
+
# created per name. This is crucial for managing the in-memory state
|
|
284
|
+
# of the vector index consistently.
|
|
285
|
+
with self._collections_lock:
|
|
286
|
+
if name not in self._collections:
|
|
287
|
+
self._collections[name] = CollectionManager(name, self._conn)
|
|
288
|
+
return self._collections[name]
|
|
208
289
|
|
|
209
290
|
def channel(self, name: str) -> ChannelManager:
|
|
210
291
|
"""
|