beaver-db 0.16.7__py3-none-any.whl → 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of beaver-db might be problematic. Click here for more details.
- beaver/blobs.py +10 -10
- beaver/channels.py +8 -11
- beaver/cli.py +34 -0
- beaver/collections.py +19 -19
- beaver/core.py +58 -36
- beaver/dicts.py +13 -14
- beaver/lists.py +25 -25
- beaver/logs.py +10 -14
- beaver/queues.py +12 -12
- beaver/server.py +132 -0
- beaver/types.py +7 -0
- beaver/vectors.py +32 -30
- {beaver_db-0.16.7.dist-info → beaver_db-0.17.0.dist-info}/METADATA +12 -3
- beaver_db-0.17.0.dist-info/RECORD +18 -0
- beaver_db-0.17.0.dist-info/entry_points.txt +2 -0
- beaver_db-0.16.7.dist-info/RECORD +0 -15
- {beaver_db-0.16.7.dist-info → beaver_db-0.17.0.dist-info}/WHEEL +0 -0
- {beaver_db-0.16.7.dist-info → beaver_db-0.17.0.dist-info}/licenses/LICENSE +0 -0
beaver/vectors.py
CHANGED
|
@@ -3,6 +3,8 @@ import sqlite3
|
|
|
3
3
|
import threading
|
|
4
4
|
from typing import Dict, List, Set, Tuple
|
|
5
5
|
|
|
6
|
+
from .types import IDatabase
|
|
7
|
+
|
|
6
8
|
try:
|
|
7
9
|
import faiss
|
|
8
10
|
import numpy as np
|
|
@@ -23,12 +25,12 @@ class VectorIndex:
|
|
|
23
25
|
user-provided string IDs to the internal integer IDs required by Faiss.
|
|
24
26
|
"""
|
|
25
27
|
|
|
26
|
-
def __init__(self, collection_name: str,
|
|
28
|
+
def __init__(self, collection_name: str, db: IDatabase):
|
|
27
29
|
"""
|
|
28
30
|
Initializes the VectorIndex for a specific collection.
|
|
29
31
|
"""
|
|
30
|
-
self.
|
|
31
|
-
self.
|
|
32
|
+
self._collection = collection_name
|
|
33
|
+
self._db = db
|
|
32
34
|
# A lock to ensure thread safety for in-memory operations and synchronization checks.
|
|
33
35
|
self._lock = threading.Lock()
|
|
34
36
|
# Tracks the overall version of the collection this instance is aware of.
|
|
@@ -66,7 +68,7 @@ class VectorIndex:
|
|
|
66
68
|
elif self._dimension != dim:
|
|
67
69
|
# If a dimension is already set, all subsequent vectors must match.
|
|
68
70
|
raise ValueError(
|
|
69
|
-
f"Vector dimension mismatch for collection '{self.
|
|
71
|
+
f"Vector dimension mismatch for collection '{self._collection}'. "
|
|
70
72
|
f"Expected {self._dimension}, but got {dim}."
|
|
71
73
|
)
|
|
72
74
|
|
|
@@ -83,12 +85,12 @@ class VectorIndex:
|
|
|
83
85
|
# INSERT OR IGNORE is an atomic and safe way to create a new mapping only if it's missing.
|
|
84
86
|
cursor.execute(
|
|
85
87
|
"INSERT OR IGNORE INTO _beaver_ann_id_mapping (collection_name, str_id) VALUES (?, ?)",
|
|
86
|
-
(self.
|
|
88
|
+
(self._collection, str_id)
|
|
87
89
|
)
|
|
88
90
|
# Retrieve the now-guaranteed-to-exist integer ID.
|
|
89
91
|
cursor.execute(
|
|
90
92
|
"SELECT int_id FROM _beaver_ann_id_mapping WHERE collection_name = ? AND str_id = ?",
|
|
91
|
-
(self.
|
|
93
|
+
(self._collection, str_id)
|
|
92
94
|
)
|
|
93
95
|
result = cursor.fetchone()
|
|
94
96
|
if not result:
|
|
@@ -103,20 +105,20 @@ class VectorIndex:
|
|
|
103
105
|
|
|
104
106
|
def _get_db_version(self) -> int:
|
|
105
107
|
"""Gets the current overall version of the collection from the database."""
|
|
106
|
-
cursor = self.
|
|
108
|
+
cursor = self._db.connection.cursor()
|
|
107
109
|
cursor.execute(
|
|
108
110
|
"SELECT version FROM beaver_collection_versions WHERE collection_name = ?",
|
|
109
|
-
(self.
|
|
111
|
+
(self._collection,),
|
|
110
112
|
)
|
|
111
113
|
result = cursor.fetchone()
|
|
112
114
|
return result[0] if result else 0
|
|
113
115
|
|
|
114
116
|
def _get_db_base_index_version(self) -> int:
|
|
115
117
|
"""Gets the version of the persistent on-disk base index from the database."""
|
|
116
|
-
cursor = self.
|
|
118
|
+
cursor = self._db.connection.cursor()
|
|
117
119
|
cursor.execute(
|
|
118
120
|
"SELECT base_index_version FROM _beaver_ann_indexes WHERE collection_name = ?",
|
|
119
|
-
(self.
|
|
121
|
+
(self._collection,),
|
|
120
122
|
)
|
|
121
123
|
result = cursor.fetchone()
|
|
122
124
|
return result[0] if result else 0
|
|
@@ -146,10 +148,10 @@ class VectorIndex:
|
|
|
146
148
|
|
|
147
149
|
def _load_id_mappings(self):
|
|
148
150
|
"""Loads the complete str <-> int ID mapping from the DB into in-memory caches."""
|
|
149
|
-
cursor = self.
|
|
151
|
+
cursor = self._db.connection.cursor()
|
|
150
152
|
cursor.execute(
|
|
151
153
|
"SELECT str_id, int_id FROM _beaver_ann_id_mapping WHERE collection_name = ?",
|
|
152
|
-
(self.
|
|
154
|
+
(self._collection,)
|
|
153
155
|
)
|
|
154
156
|
# Fetch all mappings at once for efficiency.
|
|
155
157
|
all_mappings = cursor.fetchall()
|
|
@@ -158,10 +160,10 @@ class VectorIndex:
|
|
|
158
160
|
|
|
159
161
|
def _load_base_index(self):
|
|
160
162
|
"""Loads and deserializes the persistent base index from the database BLOB."""
|
|
161
|
-
cursor = self.
|
|
163
|
+
cursor = self._db.connection.cursor()
|
|
162
164
|
cursor.execute(
|
|
163
165
|
"SELECT index_data, base_index_version FROM _beaver_ann_indexes WHERE collection_name = ?",
|
|
164
|
-
(self.
|
|
166
|
+
(self._collection,),
|
|
165
167
|
)
|
|
166
168
|
result = cursor.fetchone()
|
|
167
169
|
if result and result["index_data"]:
|
|
@@ -184,11 +186,11 @@ class VectorIndex:
|
|
|
184
186
|
"Catches up" to changes by rebuilding the in-memory delta index and
|
|
185
187
|
deletion set from the database logs.
|
|
186
188
|
"""
|
|
187
|
-
cursor = self.
|
|
189
|
+
cursor = self._db.connection.cursor()
|
|
188
190
|
# Sync the set of deleted integer IDs.
|
|
189
191
|
cursor.execute(
|
|
190
192
|
"SELECT int_id FROM _beaver_ann_deletions_log WHERE collection_name = ?",
|
|
191
|
-
(self.
|
|
193
|
+
(self._collection,)
|
|
192
194
|
)
|
|
193
195
|
self._deleted_int_ids = {row["int_id"] for row in cursor.fetchall()}
|
|
194
196
|
|
|
@@ -200,7 +202,7 @@ class VectorIndex:
|
|
|
200
202
|
JOIN beaver_collections c ON p.str_id = c.item_id AND p.collection_name = c.collection
|
|
201
203
|
WHERE p.collection_name = ?
|
|
202
204
|
""",
|
|
203
|
-
(self.
|
|
205
|
+
(self._collection,)
|
|
204
206
|
)
|
|
205
207
|
pending_items = cursor.fetchall()
|
|
206
208
|
|
|
@@ -216,7 +218,7 @@ class VectorIndex:
|
|
|
216
218
|
if vectors.ndim == 1:
|
|
217
219
|
vectors = vectors.reshape(-1, self._dimension)
|
|
218
220
|
if vectors.shape[1] != self._dimension:
|
|
219
|
-
raise ValueError(f"Inconsistent vector dimensions in pending log for '{self.
|
|
221
|
+
raise ValueError(f"Inconsistent vector dimensions in pending log for '{self._collection}'.")
|
|
220
222
|
|
|
221
223
|
# Rebuild the delta index from scratch with all current pending items.
|
|
222
224
|
self._delta_index = faiss.IndexIDMap(faiss.IndexFlatL2(self._dimension))
|
|
@@ -238,7 +240,7 @@ class VectorIndex:
|
|
|
238
240
|
# Add the string ID to the log for other processes to sync.
|
|
239
241
|
cursor.execute(
|
|
240
242
|
"INSERT OR IGNORE INTO _beaver_ann_pending_log (collection_name, str_id) VALUES (?, ?)",
|
|
241
|
-
(self.
|
|
243
|
+
(self._collection, item_id),
|
|
242
244
|
)
|
|
243
245
|
# Create the delta index if this is the first item added.
|
|
244
246
|
if self._delta_index is None:
|
|
@@ -260,7 +262,7 @@ class VectorIndex:
|
|
|
260
262
|
# Add the integer ID to the deletion log.
|
|
261
263
|
cursor.execute(
|
|
262
264
|
"INSERT INTO _beaver_ann_deletions_log (collection_name, int_id) VALUES (?, ?)",
|
|
263
|
-
(self.
|
|
265
|
+
(self._collection, int_id),
|
|
264
266
|
)
|
|
265
267
|
# Also add to the live in-memory deletion set.
|
|
266
268
|
self._deleted_int_ids.add(int_id)
|
|
@@ -323,10 +325,10 @@ class VectorIndex:
|
|
|
323
325
|
if self._dimension is None: return # Nothing to compact.
|
|
324
326
|
|
|
325
327
|
# Step 1: Take a snapshot of the logs. This defines the scope of this compaction run.
|
|
326
|
-
cursor = self.
|
|
327
|
-
cursor.execute("SELECT str_id FROM _beaver_ann_pending_log WHERE collection_name = ?", (self.
|
|
328
|
+
cursor = self._db.connection.cursor()
|
|
329
|
+
cursor.execute("SELECT str_id FROM _beaver_ann_pending_log WHERE collection_name = ?", (self._collection,))
|
|
328
330
|
pending_str_ids = {row["str_id"] for row in cursor.fetchall()}
|
|
329
|
-
cursor.execute("SELECT int_id FROM _beaver_ann_deletions_log WHERE collection_name = ?", (self.
|
|
331
|
+
cursor.execute("SELECT int_id FROM _beaver_ann_deletions_log WHERE collection_name = ?", (self._collection,))
|
|
330
332
|
deleted_int_ids_snapshot = {row["int_id"] for row in cursor.fetchall()}
|
|
331
333
|
|
|
332
334
|
deleted_str_ids_snapshot = {self._int_to_str_id[int_id] for int_id in deleted_int_ids_snapshot if int_id in self._int_to_str_id}
|
|
@@ -334,11 +336,11 @@ class VectorIndex:
|
|
|
334
336
|
# Step 2: Fetch all vectors from the main table that haven't been marked for deletion.
|
|
335
337
|
# This is the long-running part that happens "offline" in a background thread.
|
|
336
338
|
if not deleted_str_ids_snapshot:
|
|
337
|
-
cursor.execute("SELECT item_id, item_vector FROM beaver_collections WHERE collection = ?", (self.
|
|
339
|
+
cursor.execute("SELECT item_id, item_vector FROM beaver_collections WHERE collection = ?", (self._collection,))
|
|
338
340
|
else:
|
|
339
341
|
cursor.execute(
|
|
340
342
|
f"SELECT item_id, item_vector FROM beaver_collections WHERE collection = ? AND item_id NOT IN ({','.join('?' for _ in deleted_str_ids_snapshot)})",
|
|
341
|
-
(self.
|
|
343
|
+
(self._collection, *deleted_str_ids_snapshot)
|
|
342
344
|
)
|
|
343
345
|
|
|
344
346
|
all_valid_vectors = cursor.fetchall()
|
|
@@ -361,16 +363,16 @@ class VectorIndex:
|
|
|
361
363
|
index_data = buffer.getvalue()
|
|
362
364
|
|
|
363
365
|
# Step 5: Perform the atomic swap in the database. This is a fast, transactional write.
|
|
364
|
-
with self.
|
|
366
|
+
with self._db.connection:
|
|
365
367
|
# Increment the overall collection version to signal a change.
|
|
366
|
-
self.
|
|
368
|
+
self._db.connection.execute("INSERT INTO beaver_collection_versions (collection_name, version) VALUES (?, 1) ON CONFLICT(collection_name) DO UPDATE SET version = version + 1", (self._collection,))
|
|
367
369
|
new_version = self._get_db_version()
|
|
368
370
|
|
|
369
371
|
# Update the on-disk base index and its version number.
|
|
370
|
-
self.
|
|
372
|
+
self._db.connection.execute("INSERT INTO _beaver_ann_indexes (collection_name, index_data, base_index_version) VALUES (?, ?, ?) ON CONFLICT(collection_name) DO UPDATE SET index_data = excluded.index_data, base_index_version = excluded.base_index_version", (self._collection, index_data, new_version))
|
|
371
373
|
|
|
372
374
|
# Atomically clear the log entries that were included in this compaction run.
|
|
373
375
|
if pending_str_ids:
|
|
374
|
-
self.
|
|
376
|
+
self._db.connection.execute(f"DELETE FROM _beaver_ann_pending_log WHERE collection_name = ? AND str_id IN ({','.join('?' for _ in pending_str_ids)})", (self._collection, *pending_str_ids))
|
|
375
377
|
if deleted_int_ids_snapshot:
|
|
376
|
-
self.
|
|
378
|
+
self._db.connection.execute(f"DELETE FROM _beaver_ann_deletions_log WHERE collection_name = ? AND int_id IN ({','.join('?' for _ in deleted_int_ids_snapshot)})", (self._collection, *deleted_int_ids_snapshot))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: beaver-db
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.17.0
|
|
4
4
|
Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -9,8 +9,17 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
9
9
|
Classifier: Topic :: Database
|
|
10
10
|
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
11
11
|
Requires-Python: >=3.13
|
|
12
|
-
Provides-Extra:
|
|
13
|
-
Requires-Dist:
|
|
12
|
+
Provides-Extra: cli
|
|
13
|
+
Requires-Dist: typer>=0.19.2; extra == 'cli'
|
|
14
|
+
Provides-Extra: full
|
|
15
|
+
Requires-Dist: faiss-cpu>=1.12.0; extra == 'full'
|
|
16
|
+
Requires-Dist: fastapi[standard]>=0.118.0; extra == 'full'
|
|
17
|
+
Requires-Dist: typer>=0.19.2; extra == 'full'
|
|
18
|
+
Provides-Extra: server
|
|
19
|
+
Requires-Dist: fastapi[standard]>=0.118.0; extra == 'server'
|
|
20
|
+
Requires-Dist: typer>=0.19.2; extra == 'server'
|
|
21
|
+
Provides-Extra: vector
|
|
22
|
+
Requires-Dist: faiss-cpu>=1.12.0; extra == 'vector'
|
|
14
23
|
Description-Content-Type: text/markdown
|
|
15
24
|
|
|
16
25
|
# beaver 🦫
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
beaver/__init__.py,sha256=qyEzF1Os7w4b4Hijgz0Y0R4zTrRBrHIGT1mEkZFl2YM,101
|
|
2
|
+
beaver/blobs.py,sha256=YkIEskHD6oHRaJTF0P25HrTT8LqM-REyV_UBPVQxeqQ,4055
|
|
3
|
+
beaver/channels.py,sha256=kIuwKMDBdDQObaKT23znsMXzfpKfE7pXSxvf-u4LlpY,9554
|
|
4
|
+
beaver/cli.py,sha256=ExphB5Tx2hiyhLdtafQmANa6GA3KE5sbbI9tHWj_yY0,948
|
|
5
|
+
beaver/collections.py,sha256=Wm684pGp-E89PCq9gcbbmRC9VMtTxolRVXnrxKlw2m8,24615
|
|
6
|
+
beaver/core.py,sha256=68vjuEbkJTHv4SltCLCrgs34BpLCeL602oJZ6CJ34Zo,14560
|
|
7
|
+
beaver/dicts.py,sha256=Xp8lPfQt08O8zCbptQLWQLO79OxG6uAVER6ryj3SScQ,5495
|
|
8
|
+
beaver/lists.py,sha256=rfJ8uTNLkMREYc0uGx0z1VKt2m3eR9hvbdvDD58EbmQ,10140
|
|
9
|
+
beaver/logs.py,sha256=a5xenwl5NZeegIU0dWVEs67lvaHzzw-JRAZtEzNNO3E,9529
|
|
10
|
+
beaver/queues.py,sha256=Fr3oie63EtceSoiC8EOEDSLu1tDI8q2MYLXd8MEeC3g,6476
|
|
11
|
+
beaver/server.py,sha256=lmzMu51cXa1Qdezg140hmsMLCxVSq8YGX0EPQfuGidk,4043
|
|
12
|
+
beaver/types.py,sha256=WZLINf7hy6zdKdAFQK0EVMSl5vnY_KnrHXNdXgAKuPg,1582
|
|
13
|
+
beaver/vectors.py,sha256=qvI6RwUOGrhVH5d6PUmI3jKDaoDotMy0iy-bHyvmXks,18496
|
|
14
|
+
beaver_db-0.17.0.dist-info/METADATA,sha256=NHuwsDxsE2k8qPuBWttjBMLhfh7f9NIMIhAmFjiVTgg,18615
|
|
15
|
+
beaver_db-0.17.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
16
|
+
beaver_db-0.17.0.dist-info/entry_points.txt,sha256=bd5E2s45PoBdtdR9-ToKSdLNhmHp8naV1lWP5mOzlrc,42
|
|
17
|
+
beaver_db-0.17.0.dist-info/licenses/LICENSE,sha256=1xrIY5JnMk_QDQzsqmVzPIIyCgZAkWCC8kF2Ddo1UT0,1071
|
|
18
|
+
beaver_db-0.17.0.dist-info/RECORD,,
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
beaver/__init__.py,sha256=qyEzF1Os7w4b4Hijgz0Y0R4zTrRBrHIGT1mEkZFl2YM,101
|
|
2
|
-
beaver/blobs.py,sha256=5cmcvlJLY9jaftIRuNbdEryZxI47sw_pYpysYli23NY,3996
|
|
3
|
-
beaver/channels.py,sha256=VBXJDw_be-bSY76kRVzFdMZFoy4CepSMwJACc9NJgpc,9658
|
|
4
|
-
beaver/collections.py,sha256=860bYpchokjleDIebJaNU1jcGTCMIbg0t4MasEVVbOk,24486
|
|
5
|
-
beaver/core.py,sha256=t_UzpqcbF2U8BjmQ9aIWTvUzPuVuOLcPzTrZQ2htjn4,13706
|
|
6
|
-
beaver/dicts.py,sha256=1BQ9A_cMkJ7l5ayWbDG-4Wi3WtQ-9BKd7Wj_CB7dGlU,5410
|
|
7
|
-
beaver/lists.py,sha256=Q7xjyReBWFg47nBrXbt09GvBJkEmXvpW9ptL9xCnXC8,9946
|
|
8
|
-
beaver/logs.py,sha256=mlJizZU0emlqLwuNeBJSPlict35Vyi35L4eIl5orv-M,9673
|
|
9
|
-
beaver/queues.py,sha256=rhzP-4PMNaYRH60lQu0a4cSUyOtJdN6TR82m61JBsuU,6434
|
|
10
|
-
beaver/types.py,sha256=bR1bfLgemcySy9mnVL_hxAtog4wN8EmgfF2OonOWjQA,1464
|
|
11
|
-
beaver/vectors.py,sha256=grwiRdusa39s-J9c8nK8LO7duhuYTaLR2Az6wHKs4rU,18487
|
|
12
|
-
beaver_db-0.16.7.dist-info/METADATA,sha256=iFdXZPm8ayklLq4fzO9RgamVszerQSeDWSEyR28gMcg,18240
|
|
13
|
-
beaver_db-0.16.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
14
|
-
beaver_db-0.16.7.dist-info/licenses/LICENSE,sha256=1xrIY5JnMk_QDQzsqmVzPIIyCgZAkWCC8kF2Ddo1UT0,1071
|
|
15
|
-
beaver_db-0.16.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|