beaver-db 0.16.7__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of beaver-db might be problematic. Click here for more details.

beaver/vectors.py CHANGED
@@ -3,6 +3,8 @@ import sqlite3
3
3
  import threading
4
4
  from typing import Dict, List, Set, Tuple
5
5
 
6
+ from .types import IDatabase
7
+
6
8
  try:
7
9
  import faiss
8
10
  import numpy as np
@@ -23,12 +25,12 @@ class VectorIndex:
23
25
  user-provided string IDs to the internal integer IDs required by Faiss.
24
26
  """
25
27
 
26
- def __init__(self, collection_name: str, conn: sqlite3.Connection):
28
+ def __init__(self, collection_name: str, db: IDatabase):
27
29
  """
28
30
  Initializes the VectorIndex for a specific collection.
29
31
  """
30
- self._collection_name = collection_name
31
- self._conn = conn
32
+ self._collection = collection_name
33
+ self._db = db
32
34
  # A lock to ensure thread safety for in-memory operations and synchronization checks.
33
35
  self._lock = threading.Lock()
34
36
  # Tracks the overall version of the collection this instance is aware of.
@@ -66,7 +68,7 @@ class VectorIndex:
66
68
  elif self._dimension != dim:
67
69
  # If a dimension is already set, all subsequent vectors must match.
68
70
  raise ValueError(
69
- f"Vector dimension mismatch for collection '{self._collection_name}'. "
71
+ f"Vector dimension mismatch for collection '{self._collection}'. "
70
72
  f"Expected {self._dimension}, but got {dim}."
71
73
  )
72
74
 
@@ -83,12 +85,12 @@ class VectorIndex:
83
85
  # INSERT OR IGNORE is an atomic and safe way to create a new mapping only if it's missing.
84
86
  cursor.execute(
85
87
  "INSERT OR IGNORE INTO _beaver_ann_id_mapping (collection_name, str_id) VALUES (?, ?)",
86
- (self._collection_name, str_id)
88
+ (self._collection, str_id)
87
89
  )
88
90
  # Retrieve the now-guaranteed-to-exist integer ID.
89
91
  cursor.execute(
90
92
  "SELECT int_id FROM _beaver_ann_id_mapping WHERE collection_name = ? AND str_id = ?",
91
- (self._collection_name, str_id)
93
+ (self._collection, str_id)
92
94
  )
93
95
  result = cursor.fetchone()
94
96
  if not result:
@@ -103,20 +105,20 @@ class VectorIndex:
103
105
 
104
106
  def _get_db_version(self) -> int:
105
107
  """Gets the current overall version of the collection from the database."""
106
- cursor = self._conn.cursor()
108
+ cursor = self._db.connection.cursor()
107
109
  cursor.execute(
108
110
  "SELECT version FROM beaver_collection_versions WHERE collection_name = ?",
109
- (self._collection_name,),
111
+ (self._collection,),
110
112
  )
111
113
  result = cursor.fetchone()
112
114
  return result[0] if result else 0
113
115
 
114
116
  def _get_db_base_index_version(self) -> int:
115
117
  """Gets the version of the persistent on-disk base index from the database."""
116
- cursor = self._conn.cursor()
118
+ cursor = self._db.connection.cursor()
117
119
  cursor.execute(
118
120
  "SELECT base_index_version FROM _beaver_ann_indexes WHERE collection_name = ?",
119
- (self._collection_name,),
121
+ (self._collection,),
120
122
  )
121
123
  result = cursor.fetchone()
122
124
  return result[0] if result else 0
@@ -146,10 +148,10 @@ class VectorIndex:
146
148
 
147
149
  def _load_id_mappings(self):
148
150
  """Loads the complete str <-> int ID mapping from the DB into in-memory caches."""
149
- cursor = self._conn.cursor()
151
+ cursor = self._db.connection.cursor()
150
152
  cursor.execute(
151
153
  "SELECT str_id, int_id FROM _beaver_ann_id_mapping WHERE collection_name = ?",
152
- (self._collection_name,)
154
+ (self._collection,)
153
155
  )
154
156
  # Fetch all mappings at once for efficiency.
155
157
  all_mappings = cursor.fetchall()
@@ -158,10 +160,10 @@ class VectorIndex:
158
160
 
159
161
  def _load_base_index(self):
160
162
  """Loads and deserializes the persistent base index from the database BLOB."""
161
- cursor = self._conn.cursor()
163
+ cursor = self._db.connection.cursor()
162
164
  cursor.execute(
163
165
  "SELECT index_data, base_index_version FROM _beaver_ann_indexes WHERE collection_name = ?",
164
- (self._collection_name,),
166
+ (self._collection,),
165
167
  )
166
168
  result = cursor.fetchone()
167
169
  if result and result["index_data"]:
@@ -184,11 +186,11 @@ class VectorIndex:
184
186
  "Catches up" to changes by rebuilding the in-memory delta index and
185
187
  deletion set from the database logs.
186
188
  """
187
- cursor = self._conn.cursor()
189
+ cursor = self._db.connection.cursor()
188
190
  # Sync the set of deleted integer IDs.
189
191
  cursor.execute(
190
192
  "SELECT int_id FROM _beaver_ann_deletions_log WHERE collection_name = ?",
191
- (self._collection_name,)
193
+ (self._collection,)
192
194
  )
193
195
  self._deleted_int_ids = {row["int_id"] for row in cursor.fetchall()}
194
196
 
@@ -200,7 +202,7 @@ class VectorIndex:
200
202
  JOIN beaver_collections c ON p.str_id = c.item_id AND p.collection_name = c.collection
201
203
  WHERE p.collection_name = ?
202
204
  """,
203
- (self._collection_name,)
205
+ (self._collection,)
204
206
  )
205
207
  pending_items = cursor.fetchall()
206
208
 
@@ -216,7 +218,7 @@ class VectorIndex:
216
218
  if vectors.ndim == 1:
217
219
  vectors = vectors.reshape(-1, self._dimension)
218
220
  if vectors.shape[1] != self._dimension:
219
- raise ValueError(f"Inconsistent vector dimensions in pending log for '{self._collection_name}'.")
221
+ raise ValueError(f"Inconsistent vector dimensions in pending log for '{self._collection}'.")
220
222
 
221
223
  # Rebuild the delta index from scratch with all current pending items.
222
224
  self._delta_index = faiss.IndexIDMap(faiss.IndexFlatL2(self._dimension))
@@ -238,7 +240,7 @@ class VectorIndex:
238
240
  # Add the string ID to the log for other processes to sync.
239
241
  cursor.execute(
240
242
  "INSERT OR IGNORE INTO _beaver_ann_pending_log (collection_name, str_id) VALUES (?, ?)",
241
- (self._collection_name, item_id),
243
+ (self._collection, item_id),
242
244
  )
243
245
  # Create the delta index if this is the first item added.
244
246
  if self._delta_index is None:
@@ -260,7 +262,7 @@ class VectorIndex:
260
262
  # Add the integer ID to the deletion log.
261
263
  cursor.execute(
262
264
  "INSERT INTO _beaver_ann_deletions_log (collection_name, int_id) VALUES (?, ?)",
263
- (self._collection_name, int_id),
265
+ (self._collection, int_id),
264
266
  )
265
267
  # Also add to the live in-memory deletion set.
266
268
  self._deleted_int_ids.add(int_id)
@@ -323,10 +325,10 @@ class VectorIndex:
323
325
  if self._dimension is None: return # Nothing to compact.
324
326
 
325
327
  # Step 1: Take a snapshot of the logs. This defines the scope of this compaction run.
326
- cursor = self._conn.cursor()
327
- cursor.execute("SELECT str_id FROM _beaver_ann_pending_log WHERE collection_name = ?", (self._collection_name,))
328
+ cursor = self._db.connection.cursor()
329
+ cursor.execute("SELECT str_id FROM _beaver_ann_pending_log WHERE collection_name = ?", (self._collection,))
328
330
  pending_str_ids = {row["str_id"] for row in cursor.fetchall()}
329
- cursor.execute("SELECT int_id FROM _beaver_ann_deletions_log WHERE collection_name = ?", (self._collection_name,))
331
+ cursor.execute("SELECT int_id FROM _beaver_ann_deletions_log WHERE collection_name = ?", (self._collection,))
330
332
  deleted_int_ids_snapshot = {row["int_id"] for row in cursor.fetchall()}
331
333
 
332
334
  deleted_str_ids_snapshot = {self._int_to_str_id[int_id] for int_id in deleted_int_ids_snapshot if int_id in self._int_to_str_id}
@@ -334,11 +336,11 @@ class VectorIndex:
334
336
  # Step 2: Fetch all vectors from the main table that haven't been marked for deletion.
335
337
  # This is the long-running part that happens "offline" in a background thread.
336
338
  if not deleted_str_ids_snapshot:
337
- cursor.execute("SELECT item_id, item_vector FROM beaver_collections WHERE collection = ?", (self._collection_name,))
339
+ cursor.execute("SELECT item_id, item_vector FROM beaver_collections WHERE collection = ?", (self._collection,))
338
340
  else:
339
341
  cursor.execute(
340
342
  f"SELECT item_id, item_vector FROM beaver_collections WHERE collection = ? AND item_id NOT IN ({','.join('?' for _ in deleted_str_ids_snapshot)})",
341
- (self._collection_name, *deleted_str_ids_snapshot)
343
+ (self._collection, *deleted_str_ids_snapshot)
342
344
  )
343
345
 
344
346
  all_valid_vectors = cursor.fetchall()
@@ -361,16 +363,16 @@ class VectorIndex:
361
363
  index_data = buffer.getvalue()
362
364
 
363
365
  # Step 5: Perform the atomic swap in the database. This is a fast, transactional write.
364
- with self._conn:
366
+ with self._db.connection:
365
367
  # Increment the overall collection version to signal a change.
366
- self._conn.execute("INSERT INTO beaver_collection_versions (collection_name, version) VALUES (?, 1) ON CONFLICT(collection_name) DO UPDATE SET version = version + 1", (self._collection_name,))
368
+ self._db.connection.execute("INSERT INTO beaver_collection_versions (collection_name, version) VALUES (?, 1) ON CONFLICT(collection_name) DO UPDATE SET version = version + 1", (self._collection,))
367
369
  new_version = self._get_db_version()
368
370
 
369
371
  # Update the on-disk base index and its version number.
370
- self._conn.execute("INSERT INTO _beaver_ann_indexes (collection_name, index_data, base_index_version) VALUES (?, ?, ?) ON CONFLICT(collection_name) DO UPDATE SET index_data = excluded.index_data, base_index_version = excluded.base_index_version", (self._collection_name, index_data, new_version))
372
+ self._db.connection.execute("INSERT INTO _beaver_ann_indexes (collection_name, index_data, base_index_version) VALUES (?, ?, ?) ON CONFLICT(collection_name) DO UPDATE SET index_data = excluded.index_data, base_index_version = excluded.base_index_version", (self._collection, index_data, new_version))
371
373
 
372
374
  # Atomically clear the log entries that were included in this compaction run.
373
375
  if pending_str_ids:
374
- self._conn.execute(f"DELETE FROM _beaver_ann_pending_log WHERE collection_name = ? AND str_id IN ({','.join('?' for _ in pending_str_ids)})", (self._collection_name, *pending_str_ids))
376
+ self._db.connection.execute(f"DELETE FROM _beaver_ann_pending_log WHERE collection_name = ? AND str_id IN ({','.join('?' for _ in pending_str_ids)})", (self._collection, *pending_str_ids))
375
377
  if deleted_int_ids_snapshot:
376
- self._conn.execute(f"DELETE FROM _beaver_ann_deletions_log WHERE collection_name = ? AND int_id IN ({','.join('?' for _ in deleted_int_ids_snapshot)})", (self._collection_name, *deleted_int_ids_snapshot))
378
+ self._db.connection.execute(f"DELETE FROM _beaver_ann_deletions_log WHERE collection_name = ? AND int_id IN ({','.join('?' for _ in deleted_int_ids_snapshot)})", (self._collection, *deleted_int_ids_snapshot))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: beaver-db
3
- Version: 0.16.7
3
+ Version: 0.17.0
4
4
  Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
5
5
  License-File: LICENSE
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -9,8 +9,17 @@ Classifier: Programming Language :: Python :: 3.13
9
9
  Classifier: Topic :: Database
10
10
  Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
11
11
  Requires-Python: >=3.13
12
- Provides-Extra: faiss
13
- Requires-Dist: faiss-cpu>=1.12.0; extra == 'faiss'
12
+ Provides-Extra: cli
13
+ Requires-Dist: typer>=0.19.2; extra == 'cli'
14
+ Provides-Extra: full
15
+ Requires-Dist: faiss-cpu>=1.12.0; extra == 'full'
16
+ Requires-Dist: fastapi[standard]>=0.118.0; extra == 'full'
17
+ Requires-Dist: typer>=0.19.2; extra == 'full'
18
+ Provides-Extra: server
19
+ Requires-Dist: fastapi[standard]>=0.118.0; extra == 'server'
20
+ Requires-Dist: typer>=0.19.2; extra == 'server'
21
+ Provides-Extra: vector
22
+ Requires-Dist: faiss-cpu>=1.12.0; extra == 'vector'
14
23
  Description-Content-Type: text/markdown
15
24
 
16
25
  # beaver 🦫
@@ -0,0 +1,18 @@
1
+ beaver/__init__.py,sha256=qyEzF1Os7w4b4Hijgz0Y0R4zTrRBrHIGT1mEkZFl2YM,101
2
+ beaver/blobs.py,sha256=YkIEskHD6oHRaJTF0P25HrTT8LqM-REyV_UBPVQxeqQ,4055
3
+ beaver/channels.py,sha256=kIuwKMDBdDQObaKT23znsMXzfpKfE7pXSxvf-u4LlpY,9554
4
+ beaver/cli.py,sha256=ExphB5Tx2hiyhLdtafQmANa6GA3KE5sbbI9tHWj_yY0,948
5
+ beaver/collections.py,sha256=Wm684pGp-E89PCq9gcbbmRC9VMtTxolRVXnrxKlw2m8,24615
6
+ beaver/core.py,sha256=68vjuEbkJTHv4SltCLCrgs34BpLCeL602oJZ6CJ34Zo,14560
7
+ beaver/dicts.py,sha256=Xp8lPfQt08O8zCbptQLWQLO79OxG6uAVER6ryj3SScQ,5495
8
+ beaver/lists.py,sha256=rfJ8uTNLkMREYc0uGx0z1VKt2m3eR9hvbdvDD58EbmQ,10140
9
+ beaver/logs.py,sha256=a5xenwl5NZeegIU0dWVEs67lvaHzzw-JRAZtEzNNO3E,9529
10
+ beaver/queues.py,sha256=Fr3oie63EtceSoiC8EOEDSLu1tDI8q2MYLXd8MEeC3g,6476
11
+ beaver/server.py,sha256=lmzMu51cXa1Qdezg140hmsMLCxVSq8YGX0EPQfuGidk,4043
12
+ beaver/types.py,sha256=WZLINf7hy6zdKdAFQK0EVMSl5vnY_KnrHXNdXgAKuPg,1582
13
+ beaver/vectors.py,sha256=qvI6RwUOGrhVH5d6PUmI3jKDaoDotMy0iy-bHyvmXks,18496
14
+ beaver_db-0.17.0.dist-info/METADATA,sha256=NHuwsDxsE2k8qPuBWttjBMLhfh7f9NIMIhAmFjiVTgg,18615
15
+ beaver_db-0.17.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ beaver_db-0.17.0.dist-info/entry_points.txt,sha256=bd5E2s45PoBdtdR9-ToKSdLNhmHp8naV1lWP5mOzlrc,42
17
+ beaver_db-0.17.0.dist-info/licenses/LICENSE,sha256=1xrIY5JnMk_QDQzsqmVzPIIyCgZAkWCC8kF2Ddo1UT0,1071
18
+ beaver_db-0.17.0.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ beaver = beaver.cli:app
@@ -1,15 +0,0 @@
1
- beaver/__init__.py,sha256=qyEzF1Os7w4b4Hijgz0Y0R4zTrRBrHIGT1mEkZFl2YM,101
2
- beaver/blobs.py,sha256=5cmcvlJLY9jaftIRuNbdEryZxI47sw_pYpysYli23NY,3996
3
- beaver/channels.py,sha256=VBXJDw_be-bSY76kRVzFdMZFoy4CepSMwJACc9NJgpc,9658
4
- beaver/collections.py,sha256=860bYpchokjleDIebJaNU1jcGTCMIbg0t4MasEVVbOk,24486
5
- beaver/core.py,sha256=t_UzpqcbF2U8BjmQ9aIWTvUzPuVuOLcPzTrZQ2htjn4,13706
6
- beaver/dicts.py,sha256=1BQ9A_cMkJ7l5ayWbDG-4Wi3WtQ-9BKd7Wj_CB7dGlU,5410
7
- beaver/lists.py,sha256=Q7xjyReBWFg47nBrXbt09GvBJkEmXvpW9ptL9xCnXC8,9946
8
- beaver/logs.py,sha256=mlJizZU0emlqLwuNeBJSPlict35Vyi35L4eIl5orv-M,9673
9
- beaver/queues.py,sha256=rhzP-4PMNaYRH60lQu0a4cSUyOtJdN6TR82m61JBsuU,6434
10
- beaver/types.py,sha256=bR1bfLgemcySy9mnVL_hxAtog4wN8EmgfF2OonOWjQA,1464
11
- beaver/vectors.py,sha256=grwiRdusa39s-J9c8nK8LO7duhuYTaLR2Az6wHKs4rU,18487
12
- beaver_db-0.16.7.dist-info/METADATA,sha256=iFdXZPm8ayklLq4fzO9RgamVszerQSeDWSEyR28gMcg,18240
13
- beaver_db-0.16.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- beaver_db-0.16.7.dist-info/licenses/LICENSE,sha256=1xrIY5JnMk_QDQzsqmVzPIIyCgZAkWCC8kF2Ddo1UT0,1071
15
- beaver_db-0.16.7.dist-info/RECORD,,