beaver-db 0.16.6__py3-none-any.whl → 0.16.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of beaver-db might be problematic. Click here for more details.

beaver/blobs.py CHANGED
@@ -2,7 +2,7 @@ import json
2
2
  import sqlite3
3
3
  from typing import Any, Dict, Iterator, NamedTuple, Optional, Type, TypeVar
4
4
 
5
- from .types import JsonSerializable
5
+ from .types import JsonSerializable, IDatabase
6
6
 
7
7
 
8
8
  class Blob[M](NamedTuple):
@@ -16,9 +16,9 @@ class Blob[M](NamedTuple):
16
16
  class BlobManager[M]:
17
17
  """A wrapper providing a Pythonic interface to a blob store in the database."""
18
18
 
19
- def __init__(self, name: str, conn: sqlite3.Connection, model: Type[M] | None = None):
19
+ def __init__(self, name: str, db: IDatabase, model: Type[M] | None = None):
20
20
  self._name = name
21
- self._conn = conn
21
+ self._db = db
22
22
  self._model = model
23
23
 
24
24
  def _serialize(self, value: M) -> str | None:
@@ -51,8 +51,8 @@ class BlobManager[M]:
51
51
 
52
52
  metadata_json = self._serialize(metadata) if metadata else None
53
53
 
54
- with self._conn:
55
- self._conn.execute(
54
+ with self._db.connection:
55
+ self._db.connection.execute(
56
56
  "INSERT OR REPLACE INTO beaver_blobs (store_name, key, data, metadata) VALUES (?, ?, ?, ?)",
57
57
  (self._name, key, data, metadata_json),
58
58
  )
@@ -67,7 +67,7 @@ class BlobManager[M]:
67
67
  Returns:
68
68
  A Blob object containing the data and metadata, or None if the key is not found.
69
69
  """
70
- cursor = self._conn.cursor()
70
+ cursor = self._db.connection.cursor()
71
71
  cursor.execute(
72
72
  "SELECT data, metadata FROM beaver_blobs WHERE store_name = ? AND key = ?",
73
73
  (self._name, key),
@@ -90,8 +90,8 @@ class BlobManager[M]:
90
90
  Raises:
91
91
  KeyError: If the key does not exist in the store.
92
92
  """
93
- with self._conn:
94
- cursor = self._conn.cursor()
93
+ with self._db.connection:
94
+ cursor = self._db.connection.cursor()
95
95
  cursor.execute(
96
96
  "DELETE FROM beaver_blobs WHERE store_name = ? AND key = ?",
97
97
  (self._name, key),
@@ -103,7 +103,7 @@ class BlobManager[M]:
103
103
  """
104
104
  Checks if a key exists in the blob store (e.g., `key in blobs`).
105
105
  """
106
- cursor = self._conn.cursor()
106
+ cursor = self._db.connection.cursor()
107
107
  cursor.execute(
108
108
  "SELECT 1 FROM beaver_blobs WHERE store_name = ? AND key = ? LIMIT 1",
109
109
  (self._name, key),
@@ -114,7 +114,7 @@ class BlobManager[M]:
114
114
 
115
115
  def __iter__(self) -> Iterator[str]:
116
116
  """Returns an iterator over the keys in the blob store."""
117
- cursor = self._conn.cursor()
117
+ cursor = self._db.connection.cursor()
118
118
  cursor.execute(
119
119
  "SELECT key FROM beaver_blobs WHERE store_name = ?", (self._name,)
120
120
  )
beaver/channels.py CHANGED
@@ -6,7 +6,7 @@ import time
6
6
  from queue import Empty, Queue
7
7
  from typing import Any, AsyncIterator, Generic, Iterator, Set, Type, TypeVar
8
8
 
9
- from .types import JsonSerializable
9
+ from .types import JsonSerializable, IDatabase
10
10
 
11
11
  # A special message object used to signal the listener to gracefully shut down.
12
12
  _SHUTDOWN_SENTINEL = object()
@@ -120,14 +120,12 @@ class ChannelManager[T]:
120
120
  def __init__(
121
121
  self,
122
122
  name: str,
123
- conn: sqlite3.Connection,
124
- db_path: str,
123
+ db: IDatabase,
125
124
  poll_interval: float = 0.1,
126
125
  model: Type[T] | None = None,
127
126
  ):
128
127
  self._name = name
129
- self._conn = conn
130
- self._db_path = db_path
128
+ self._db = db
131
129
  self._poll_interval = poll_interval
132
130
  self._model = model
133
131
  self._listeners: Set[Queue] = set()
@@ -197,8 +195,8 @@ class ChannelManager[T]:
197
195
 
198
196
  Useful for reducing the database once logs are not needed.
199
197
  """
200
- with self._conn:
201
- self._conn.execute("DELETE FROM beaver_pubsub_log WHERE channel_name = ?", (self._name,))
198
+ with self._db.connection:
199
+ self._db.connection.execute("DELETE FROM beaver_pubsub_log WHERE channel_name = ?", (self._name,))
202
200
 
203
201
  def _polling_loop(self):
204
202
  """
@@ -208,8 +206,7 @@ class ChannelManager[T]:
208
206
  to all registered listener queues.
209
207
  """
210
208
  # A separate SQLite connection is required for each thread.
211
- thread_conn = sqlite3.connect(self._db_path, check_same_thread=False)
212
- thread_conn.row_factory = sqlite3.Row
209
+ thread_conn = self._db.connection
213
210
 
214
211
  # The poller starts listening for messages from this moment forward.
215
212
  last_seen_timestamp = time.time()
@@ -256,8 +253,8 @@ class ChannelManager[T]:
256
253
  except TypeError as e:
257
254
  raise TypeError("Message payload must be JSON-serializable.") from e
258
255
 
259
- with self._conn:
260
- self._conn.execute(
256
+ with self._db.connection:
257
+ self._db.connection.execute(
261
258
  "INSERT INTO beaver_pubsub_log (timestamp, channel_name, message_payload) VALUES (?, ?, ?)",
262
259
  (time.time(), self._name, json_payload),
263
260
  )
beaver/collections.py CHANGED
@@ -4,7 +4,7 @@ import threading
4
4
  import uuid
5
5
  from enum import Enum
6
6
  from typing import Any, Iterator, List, Literal, Tuple, Type, TypeVar
7
- from .types import Model, stub
7
+ from .types import Model, stub, IDatabase
8
8
 
9
9
  try:
10
10
  import numpy as np
@@ -111,12 +111,12 @@ class CollectionManager[D: Document]:
111
111
  FTS, fuzzy search, graph traversal, and persistent vector search.
112
112
  """
113
113
 
114
- def __init__(self, name: str, conn: sqlite3.Connection, model: Type[D] | None = None):
114
+ def __init__(self, name: str, db: IDatabase, model: Type[D] | None = None):
115
115
  self._name = name
116
- self._conn = conn
116
+ self._db = db
117
117
  self._model = model or Document
118
118
  # All vector-related operations are now delegated to the VectorIndex class.
119
- self._vector_index = VectorIndex(name, conn)
119
+ self._vector_index = VectorIndex(name, db)
120
120
  # A lock to ensure only one compaction thread runs at a time for this collection.
121
121
  self._compaction_lock = threading.Lock()
122
122
  self._compaction_thread: threading.Thread | None = None
@@ -134,7 +134,7 @@ class CollectionManager[D: Document]:
134
134
 
135
135
  def _needs_compaction(self, threshold: int = 1000) -> bool:
136
136
  """Checks if the total number of pending vector operations exceeds the threshold."""
137
- cursor = self._conn.cursor()
137
+ cursor = self._db.connection.cursor()
138
138
  cursor.execute(
139
139
  "SELECT COUNT(*) FROM _beaver_ann_pending_log WHERE collection_name = ?",
140
140
  (self._name,)
@@ -199,8 +199,8 @@ class CollectionManager[D: Document]:
199
199
  if not isinstance(document, Document):
200
200
  raise TypeError("Item to index must be a Document object.")
201
201
 
202
- with self._conn:
203
- cursor = self._conn.cursor()
202
+ with self._db.connection:
203
+ cursor = self._db.connection.cursor()
204
204
 
205
205
  # Step 1: Core Document and Vector Storage
206
206
  cursor.execute(
@@ -253,8 +253,8 @@ class CollectionManager[D: Document]:
253
253
  """Removes a document and all its associated data from the collection."""
254
254
  if not isinstance(document, Document):
255
255
  raise TypeError("Item to drop must be a Document object.")
256
- with self._conn:
257
- cursor = self._conn.cursor()
256
+ with self._db.connection:
257
+ cursor = self._db.connection.cursor()
258
258
  cursor.execute("DELETE FROM beaver_collections WHERE collection = ? AND item_id = ?", (self._name, document.id))
259
259
  cursor.execute("DELETE FROM beaver_fts_index WHERE collection = ? AND item_id = ?", (self._name, document.id))
260
260
  cursor.execute("DELETE FROM beaver_trigrams WHERE collection = ? AND item_id = ?", (self._name, document.id))
@@ -271,7 +271,7 @@ class CollectionManager[D: Document]:
271
271
 
272
272
  def __iter__(self) -> Iterator[D]:
273
273
  """Returns an iterator over all documents in the collection."""
274
- cursor = self._conn.cursor()
274
+ cursor = self._db.connection.cursor()
275
275
  cursor.execute(
276
276
  "SELECT item_id, item_vector, metadata FROM beaver_collections WHERE collection = ?",
277
277
  (self._name,),
@@ -306,7 +306,7 @@ class CollectionManager[D: Document]:
306
306
  placeholders = ",".join("?" for _ in result_ids)
307
307
  sql = f"SELECT item_id, item_vector, metadata FROM beaver_collections WHERE collection = ? AND item_id IN ({placeholders})"
308
308
 
309
- cursor = self._conn.cursor()
309
+ cursor = self._db.connection.cursor()
310
310
  rows = cursor.execute(sql, (self._name, *result_ids)).fetchall()
311
311
 
312
312
  doc_map = {
@@ -350,7 +350,7 @@ class CollectionManager[D: Document]:
350
350
  self, query: str, on: list[str] | None, top_k: int
351
351
  ) -> list[tuple[D, float]]:
352
352
  """Performs a standard FTS search."""
353
- cursor = self._conn.cursor()
353
+ cursor = self._db.connection.cursor()
354
354
  sql_query = """
355
355
  SELECT t1.item_id, t1.item_vector, t1.metadata, fts.rank
356
356
  FROM beaver_collections AS t1 JOIN (
@@ -390,7 +390,7 @@ class CollectionManager[D: Document]:
390
390
  if similarity_threshold == 0:
391
391
  return set()
392
392
 
393
- cursor = self._conn.cursor()
393
+ cursor = self._db.connection.cursor()
394
394
  sql = """
395
395
  SELECT item_id FROM beaver_trigrams
396
396
  WHERE collection = ? AND trigram IN ({}) {}
@@ -422,7 +422,7 @@ class CollectionManager[D: Document]:
422
422
  if not candidate_ids:
423
423
  return []
424
424
 
425
- cursor = self._conn.cursor()
425
+ cursor = self._db.connection.cursor()
426
426
  id_placeholders = ",".join("?" for _ in candidate_ids)
427
427
  sql_text = f"SELECT item_id, field_path, field_content FROM beaver_fts_index WHERE collection = ? AND item_id IN ({id_placeholders})"
428
428
  params_text: list[Any] = [self._name]
@@ -480,8 +480,8 @@ class CollectionManager[D: Document]:
480
480
  """Creates a directed edge between two documents."""
481
481
  if not isinstance(source, Document) or not isinstance(target, Document):
482
482
  raise TypeError("Source and target must be Document objects.")
483
- with self._conn:
484
- self._conn.execute(
483
+ with self._db.connection:
484
+ self._db.connection.execute(
485
485
  "INSERT OR REPLACE INTO beaver_edges (collection, source_item_id, target_item_id, label, metadata) VALUES (?, ?, ?, ?, ?)",
486
486
  (
487
487
  self._name,
@@ -500,7 +500,7 @@ class CollectionManager[D: Document]:
500
500
  sql += " AND t2.label = ?"
501
501
  params.append(label)
502
502
 
503
- rows = self._conn.cursor().execute(sql, tuple(params)).fetchall()
503
+ rows = self._db.connection.cursor().execute(sql, tuple(params)).fetchall()
504
504
  return [
505
505
  self._model(
506
506
  id=row["item_id"],
@@ -549,7 +549,7 @@ class CollectionManager[D: Document]:
549
549
  """
550
550
  params = [source.id, self._name, depth] + labels + [self._name]
551
551
 
552
- rows = self._conn.cursor().execute(sql, tuple(params)).fetchall()
552
+ rows = self._db.connection.cursor().execute(sql, tuple(params)).fetchall()
553
553
  return [
554
554
  self._model(
555
555
  id=row["item_id"],
@@ -565,7 +565,7 @@ class CollectionManager[D: Document]:
565
565
 
566
566
  def __len__(self) -> int:
567
567
  """Returns the number of documents in the collection."""
568
- cursor = self._conn.cursor()
568
+ cursor = self._db.connection.cursor()
569
569
  cursor.execute(
570
570
  "SELECT COUNT(*) FROM beaver_collections WHERE collection = ?",
571
571
  (self._name,),
beaver/core.py CHANGED
@@ -15,7 +15,7 @@ from .queues import QueueManager
15
15
  class BeaverDB:
16
16
  """
17
17
  An embedded, multi-modal database in a single SQLite file.
18
- This class manages the database connection and table schemas.
18
+ This class manages thread-safe database connections and table schemas.
19
19
  """
20
20
 
21
21
  def __init__(self, db_path: str, timeout:float=30.0):
@@ -26,22 +26,44 @@ class BeaverDB:
26
26
  db_path: The path to the SQLite database file.
27
27
  """
28
28
  self._db_path = db_path
29
- # Enable WAL mode for better concurrency between readers and writers
30
- self._conn = sqlite3.connect(self._db_path, check_same_thread=False, timeout=timeout)
31
- self._conn.execute("PRAGMA journal_mode=WAL;")
32
- self._conn.row_factory = sqlite3.Row
29
+ self._timeout = timeout
30
+ # This object will store a different connection for each thread.
31
+ self._thread_local = threading.local()
32
+
33
33
  self._channels: dict[str, ChannelManager] = {}
34
34
  self._channels_lock = threading.Lock()
35
- # Add a cache and lock for CollectionManager singletons
36
35
  self._collections: dict[str, CollectionManager] = {}
37
36
  self._collections_lock = threading.Lock()
38
37
 
39
- # Initialize the schemas
38
+ # Initialize the schemas. This will implicitly create the first
39
+ # connection for the main thread via the `connection` property.
40
40
  self._create_all_tables()
41
41
 
42
+ @property
43
+ def connection(self) -> sqlite3.Connection:
44
+ """
45
+ Provides a thread-safe SQLite connection.
46
+
47
+ Each thread will receive its own dedicated connection object.
48
+ The connection is created on the first access and then reused for
49
+ all subsequent calls within the same thread.
50
+ """
51
+ # Check if a connection is already stored for this thread
52
+ conn = getattr(self._thread_local, 'conn', None)
53
+
54
+ if conn is None:
55
+ # No connection for this thread yet, so create one.
56
+ # We no longer need check_same_thread=False, restoring thread safety.
57
+ conn = sqlite3.connect(self._db_path, timeout=self._timeout)
58
+ conn.execute("PRAGMA journal_mode=WAL;")
59
+ conn.row_factory = sqlite3.Row
60
+ self._thread_local.conn = conn
61
+
62
+ return conn
63
+
42
64
  def _create_all_tables(self):
43
65
  """Initializes all required tables in the database file."""
44
- with self._conn:
66
+ with self.connection:
45
67
  self._create_ann_deletions_log_table()
46
68
  self._create_ann_id_mapping_table()
47
69
  self._create_ann_indexes_table()
@@ -60,7 +82,7 @@ class BeaverDB:
60
82
 
61
83
  def _create_logs_table(self):
62
84
  """Creates the table for time-indexed logs."""
63
- self._conn.execute(
85
+ self.connection.execute(
64
86
  """
65
87
  CREATE TABLE IF NOT EXISTS beaver_logs (
66
88
  log_name TEXT NOT NULL,
@@ -70,7 +92,7 @@ class BeaverDB:
70
92
  )
71
93
  """
72
94
  )
73
- self._conn.execute(
95
+ self.connection.execute(
74
96
  """
75
97
  CREATE INDEX IF NOT EXISTS idx_logs_timestamp
76
98
  ON beaver_logs (log_name, timestamp)
@@ -79,7 +101,7 @@ class BeaverDB:
79
101
 
80
102
  def _create_blobs_table(self):
81
103
  """Creates the table for storing named blobs."""
82
- self._conn.execute(
104
+ self.connection.execute(
83
105
  """
84
106
  CREATE TABLE IF NOT EXISTS beaver_blobs (
85
107
  store_name TEXT NOT NULL,
@@ -93,7 +115,7 @@ class BeaverDB:
93
115
 
94
116
  def _create_ann_indexes_table(self):
95
117
  """Creates the table to store the serialized base ANN index."""
96
- self._conn.execute(
118
+ self.connection.execute(
97
119
  """
98
120
  CREATE TABLE IF NOT EXISTS _beaver_ann_indexes (
99
121
  collection_name TEXT PRIMARY KEY,
@@ -105,7 +127,7 @@ class BeaverDB:
105
127
 
106
128
  def _create_ann_pending_log_table(self):
107
129
  """Creates the log for new vector additions."""
108
- self._conn.execute(
130
+ self.connection.execute(
109
131
  """
110
132
  CREATE TABLE IF NOT EXISTS _beaver_ann_pending_log (
111
133
  collection_name TEXT NOT NULL,
@@ -117,7 +139,7 @@ class BeaverDB:
117
139
 
118
140
  def _create_ann_deletions_log_table(self):
119
141
  """Creates the log for vector deletions (tombstones)."""
120
- self._conn.execute(
142
+ self.connection.execute(
121
143
  """
122
144
  CREATE TABLE IF NOT EXISTS _beaver_ann_deletions_log (
123
145
  collection_name TEXT NOT NULL,
@@ -129,7 +151,7 @@ class BeaverDB:
129
151
 
130
152
  def _create_ann_id_mapping_table(self):
131
153
  """Creates the table to map string IDs to integer IDs for Faiss."""
132
- self._conn.execute(
154
+ self.connection.execute(
133
155
  """
134
156
  CREATE TABLE IF NOT EXISTS _beaver_ann_id_mapping (
135
157
  collection_name TEXT NOT NULL,
@@ -142,7 +164,7 @@ class BeaverDB:
142
164
 
143
165
  def _create_priority_queue_table(self):
144
166
  """Creates the priority queue table and its performance index."""
145
- self._conn.execute(
167
+ self.connection.execute(
146
168
  """
147
169
  CREATE TABLE IF NOT EXISTS beaver_priority_queues (
148
170
  queue_name TEXT NOT NULL,
@@ -152,7 +174,7 @@ class BeaverDB:
152
174
  )
153
175
  """
154
176
  )
155
- self._conn.execute(
177
+ self.connection.execute(
156
178
  """
157
179
  CREATE INDEX IF NOT EXISTS idx_priority_queue_order
158
180
  ON beaver_priority_queues (queue_name, priority ASC, timestamp ASC)
@@ -161,7 +183,7 @@ class BeaverDB:
161
183
 
162
184
  def _create_dict_table(self):
163
185
  """Creates the namespaced dictionary table."""
164
- self._conn.execute(
186
+ self.connection.execute(
165
187
  """
166
188
  CREATE TABLE IF NOT EXISTS beaver_dicts (
167
189
  dict_name TEXT NOT NULL,
@@ -175,7 +197,7 @@ class BeaverDB:
175
197
 
176
198
  def _create_pubsub_table(self):
177
199
  """Creates the pub/sub log table."""
178
- self._conn.execute(
200
+ self.connection.execute(
179
201
  """
180
202
  CREATE TABLE IF NOT EXISTS beaver_pubsub_log (
181
203
  timestamp REAL PRIMARY KEY,
@@ -184,7 +206,7 @@ class BeaverDB:
184
206
  )
185
207
  """
186
208
  )
187
- self._conn.execute(
209
+ self.connection.execute(
188
210
  """
189
211
  CREATE INDEX IF NOT EXISTS idx_pubsub_channel_timestamp
190
212
  ON beaver_pubsub_log (channel_name, timestamp)
@@ -193,7 +215,7 @@ class BeaverDB:
193
215
 
194
216
  def _create_list_table(self):
195
217
  """Creates the lists table."""
196
- self._conn.execute(
218
+ self.connection.execute(
197
219
  """
198
220
  CREATE TABLE IF NOT EXISTS beaver_lists (
199
221
  list_name TEXT NOT NULL,
@@ -206,7 +228,7 @@ class BeaverDB:
206
228
 
207
229
  def _create_collections_table(self):
208
230
  """Creates the main table for storing documents and vectors."""
209
- self._conn.execute(
231
+ self.connection.execute(
210
232
  """
211
233
  CREATE TABLE IF NOT EXISTS beaver_collections (
212
234
  collection TEXT NOT NULL,
@@ -220,7 +242,7 @@ class BeaverDB:
220
242
 
221
243
  def _create_fts_table(self):
222
244
  """Creates the virtual FTS table for full-text search."""
223
- self._conn.execute(
245
+ self.connection.execute(
224
246
  """
225
247
  CREATE VIRTUAL TABLE IF NOT EXISTS beaver_fts_index USING fts5(
226
248
  collection,
@@ -234,7 +256,7 @@ class BeaverDB:
234
256
 
235
257
  def _create_trigrams_table(self):
236
258
  """Creates the table for the fuzzy search trigram index."""
237
- self._conn.execute(
259
+ self.connection.execute(
238
260
  """
239
261
  CREATE TABLE IF NOT EXISTS beaver_trigrams (
240
262
  collection TEXT NOT NULL,
@@ -245,7 +267,7 @@ class BeaverDB:
245
267
  )
246
268
  """
247
269
  )
248
- self._conn.execute(
270
+ self.connection.execute(
249
271
  """
250
272
  CREATE INDEX IF NOT EXISTS idx_trigram_lookup
251
273
  ON beaver_trigrams (collection, trigram, field_path)
@@ -254,7 +276,7 @@ class BeaverDB:
254
276
 
255
277
  def _create_edges_table(self):
256
278
  """Creates the table for storing relationships between documents."""
257
- self._conn.execute(
279
+ self.connection.execute(
258
280
  """
259
281
  CREATE TABLE IF NOT EXISTS beaver_edges (
260
282
  collection TEXT NOT NULL,
@@ -269,7 +291,7 @@ class BeaverDB:
269
291
 
270
292
  def _create_versions_table(self):
271
293
  """Creates a table to track the version of each collection for caching."""
272
- self._conn.execute(
294
+ self.connection.execute(
273
295
  """
274
296
  CREATE TABLE IF NOT EXISTS beaver_collection_versions (
275
297
  collection_name TEXT PRIMARY KEY,
@@ -280,12 +302,12 @@ class BeaverDB:
280
302
 
281
303
  def close(self):
282
304
  """Closes the database connection."""
283
- if self._conn:
305
+ if self.connection:
284
306
  # Cleanly shut down any active polling threads before closing
285
307
  with self._channels_lock:
286
308
  for channel in self._channels.values():
287
309
  channel.close()
288
- self._conn.close()
310
+ self.connection.close()
289
311
 
290
312
  # --- Factory and Passthrough Methods ---
291
313
 
@@ -300,7 +322,7 @@ class BeaverDB:
300
322
  if model and not isinstance(model, JsonSerializable):
301
323
  raise TypeError("The model parameter must be a JsonSerializable class.")
302
324
 
303
- return DictManager(name, self._conn, model)
325
+ return DictManager(name, self, model)
304
326
 
305
327
  def list[T](self, name: str, model: type[T] | None = None) -> ListManager[T]:
306
328
  """
@@ -313,7 +335,7 @@ class BeaverDB:
313
335
  if model and not isinstance(model, JsonSerializable):
314
336
  raise TypeError("The model parameter must be a JsonSerializable class.")
315
337
 
316
- return ListManager(name, self._conn, model)
338
+ return ListManager(name, self, model)
317
339
 
318
340
  def queue[T](self, name: str, model: type[T] | None = None) -> QueueManager[T]:
319
341
  """
@@ -326,7 +348,7 @@ class BeaverDB:
326
348
  if model and not isinstance(model, JsonSerializable):
327
349
  raise TypeError("The model parameter must be a JsonSerializable class.")
328
350
 
329
- return QueueManager(name, self._conn, model)
351
+ return QueueManager(name, self, model)
330
352
 
331
353
  def collection[D: Document](self, name: str, model: Type[D] | None = None) -> CollectionManager[D]:
332
354
  """
@@ -341,7 +363,7 @@ class BeaverDB:
341
363
  # of the vector index consistently.
342
364
  with self._collections_lock:
343
365
  if name not in self._collections:
344
- self._collections[name] = CollectionManager(name, self._conn, model=model)
366
+ self._collections[name] = CollectionManager(name, self, model=model)
345
367
 
346
368
  return self._collections[name]
347
369
 
@@ -355,7 +377,7 @@ class BeaverDB:
355
377
  # Use a thread-safe lock to ensure only one Channel object is created per name.
356
378
  with self._channels_lock:
357
379
  if name not in self._channels:
358
- self._channels[name] = ChannelManager(name, self._conn, self._db_path, model=model)
380
+ self._channels[name] = ChannelManager(name, self, model=model)
359
381
  return self._channels[name]
360
382
 
361
383
  def blobs[M](self, name: str, model: type[M] | None = None) -> BlobManager[M]:
@@ -363,7 +385,7 @@ class BeaverDB:
363
385
  if not isinstance(name, str) or not name:
364
386
  raise TypeError("Blob store name must be a non-empty string.")
365
387
 
366
- return BlobManager(name, self._conn, model)
388
+ return BlobManager(name, self, model)
367
389
 
368
390
  def log[T](self, name: str, model: type[T] | None = None) -> LogManager[T]:
369
391
  """
@@ -376,4 +398,4 @@ class BeaverDB:
376
398
  if model and not isinstance(model, JsonSerializable):
377
399
  raise TypeError("The model parameter must be a JsonSerializable class.")
378
400
 
379
- return LogManager(name, self._conn, self._db_path, model)
401
+ return LogManager(name, self, self._db_path, model)
beaver/dicts.py CHANGED
@@ -3,15 +3,14 @@ import sqlite3
3
3
  import time
4
4
  from typing import Any, Iterator, Tuple, Type
5
5
 
6
- from .types import JsonSerializable
7
-
6
+ from .types import JsonSerializable, IDatabase
8
7
 
9
8
  class DictManager[T]:
10
9
  """A wrapper providing a Pythonic interface to a dictionary in the database."""
11
10
 
12
- def __init__(self, name: str, conn: sqlite3.Connection, model: Type[T] | None = None):
11
+ def __init__(self, name: str, db: IDatabase, model: Type[T] | None = None):
13
12
  self._name = name
14
- self._conn = conn
13
+ self._db = db
15
14
  self._model = model
16
15
 
17
16
  def _serialize(self, value: T) -> str:
@@ -40,8 +39,8 @@ class DictManager[T]:
40
39
  raise ValueError("ttl_seconds must be a positive integer.")
41
40
  expires_at = time.time() + ttl_seconds
42
41
 
43
- with self._conn:
44
- self._conn.execute(
42
+ with self._db.connection:
43
+ self._db.connection.execute(
45
44
  "INSERT OR REPLACE INTO beaver_dicts (dict_name, key, value, expires_at) VALUES (?, ?, ?, ?)",
46
45
  (self._name, key, self._serialize(value), expires_at),
47
46
  )
@@ -55,7 +54,7 @@ class DictManager[T]:
55
54
 
56
55
  def __getitem__(self, key: str) -> T:
57
56
  """Retrieves a value for a given key, raising KeyError if expired."""
58
- cursor = self._conn.cursor()
57
+ cursor = self._db.connection.cursor()
59
58
  cursor.execute(
60
59
  "SELECT value, expires_at FROM beaver_dicts WHERE dict_name = ? AND key = ?",
61
60
  (self._name, key),
@@ -70,7 +69,7 @@ class DictManager[T]:
70
69
 
71
70
  if expires_at is not None and time.time() > expires_at:
72
71
  # Expired: delete the key and raise KeyError
73
- with self._conn:
72
+ with self._db.connection:
74
73
  cursor.execute(
75
74
  "DELETE FROM beaver_dicts WHERE dict_name = ? AND key = ?",
76
75
  (self._name, key),
@@ -94,8 +93,8 @@ class DictManager[T]:
94
93
 
95
94
  def __delitem__(self, key: str):
96
95
  """Deletes a key-value pair (e.g., `del my_dict[key]`)."""
97
- with self._conn:
98
- cursor = self._conn.cursor()
96
+ with self._db.connection:
97
+ cursor = self._db.connection.cursor()
99
98
  cursor.execute(
100
99
  "DELETE FROM beaver_dicts WHERE dict_name = ? AND key = ?",
101
100
  (self._name, key),
@@ -105,7 +104,7 @@ class DictManager[T]:
105
104
 
106
105
  def __len__(self) -> int:
107
106
  """Returns the number of items in the dictionary."""
108
- cursor = self._conn.cursor()
107
+ cursor = self._db.connection.cursor()
109
108
  cursor.execute(
110
109
  "SELECT COUNT(*) FROM beaver_dicts WHERE dict_name = ?", (self._name,)
111
110
  )
@@ -119,7 +118,7 @@ class DictManager[T]:
119
118
 
120
119
  def keys(self) -> Iterator[str]:
121
120
  """Returns an iterator over the dictionary's keys."""
122
- cursor = self._conn.cursor()
121
+ cursor = self._db.connection.cursor()
123
122
  cursor.execute(
124
123
  "SELECT key FROM beaver_dicts WHERE dict_name = ?", (self._name,)
125
124
  )
@@ -129,7 +128,7 @@ class DictManager[T]:
129
128
 
130
129
  def values(self) -> Iterator[T]:
131
130
  """Returns an iterator over the dictionary's values."""
132
- cursor = self._conn.cursor()
131
+ cursor = self._db.connection.cursor()
133
132
  cursor.execute(
134
133
  "SELECT value FROM beaver_dicts WHERE dict_name = ?", (self._name,)
135
134
  )
@@ -139,7 +138,7 @@ class DictManager[T]:
139
138
 
140
139
  def items(self) -> Iterator[Tuple[str, T]]:
141
140
  """Returns an iterator over the dictionary's items (key-value pairs)."""
142
- cursor = self._conn.cursor()
141
+ cursor = self._db.connection.cursor()
143
142
  cursor.execute(
144
143
  "SELECT key, value FROM beaver_dicts WHERE dict_name = ?", (self._name,)
145
144
  )
beaver/lists.py CHANGED
@@ -2,15 +2,15 @@ import json
2
2
  import sqlite3
3
3
  from typing import Any, Iterator, Type, Union
4
4
 
5
- from .types import JsonSerializable
5
+ from .types import JsonSerializable, IDatabase
6
6
 
7
7
 
8
8
  class ListManager[T]:
9
9
  """A wrapper providing a Pythonic, full-featured interface to a list in the database."""
10
10
 
11
- def __init__(self, name: str, conn: sqlite3.Connection, model: Type[T] | None = None):
11
+ def __init__(self, name: str, db: IDatabase, model: Type[T] | None = None):
12
12
  self._name = name
13
- self._conn = conn
13
+ self._db = db
14
14
  self._model = model
15
15
 
16
16
  def _serialize(self, value: T) -> str:
@@ -27,7 +27,7 @@ class ListManager[T]:
27
27
 
28
28
  def __len__(self) -> int:
29
29
  """Returns the number of items in the list (e.g., `len(my_list)`)."""
30
- cursor = self._conn.cursor()
30
+ cursor = self._db.connection.cursor()
31
31
  cursor.execute(
32
32
  "SELECT COUNT(*) FROM beaver_lists WHERE list_name = ?", (self._name,)
33
33
  )
@@ -40,7 +40,7 @@ class ListManager[T]:
40
40
  Retrieves an item or slice from the list (e.g., `my_list[0]`, `my_list[1:3]`).
41
41
  """
42
42
  if isinstance(key, slice):
43
- with self._conn:
43
+ with self._db.connection:
44
44
  start, stop, step = key.indices(len(self))
45
45
  if step != 1:
46
46
  raise ValueError("Slicing with a step is not supported.")
@@ -49,7 +49,7 @@ class ListManager[T]:
49
49
  if limit <= 0:
50
50
  return []
51
51
 
52
- cursor = self._conn.cursor()
52
+ cursor = self._db.connection.cursor()
53
53
  cursor.execute(
54
54
  "SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT ? OFFSET ?",
55
55
  (self._name, limit, start),
@@ -59,14 +59,14 @@ class ListManager[T]:
59
59
  return results
60
60
 
61
61
  elif isinstance(key, int):
62
- with self._conn:
62
+ with self._db.connection:
63
63
  list_len = len(self)
64
64
  if key < -list_len or key >= list_len:
65
65
  raise IndexError("List index out of range.")
66
66
 
67
67
  offset = key if key >= 0 else list_len + key
68
68
 
69
- cursor = self._conn.cursor()
69
+ cursor = self._db.connection.cursor()
70
70
  cursor.execute(
71
71
  "SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
72
72
  (self._name, offset),
@@ -83,14 +83,14 @@ class ListManager[T]:
83
83
  if not isinstance(key, int):
84
84
  raise TypeError("List indices must be integers.")
85
85
 
86
- with self._conn:
86
+ with self._db.connection:
87
87
  list_len = len(self)
88
88
  if key < -list_len or key >= list_len:
89
89
  raise IndexError("List index out of range.")
90
90
 
91
91
  offset = key if key >= 0 else list_len + key
92
92
 
93
- cursor = self._conn.cursor()
93
+ cursor = self._db.connection.cursor()
94
94
  # Find the rowid of the item to update
95
95
  cursor.execute(
96
96
  "SELECT rowid FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
@@ -112,14 +112,14 @@ class ListManager[T]:
112
112
  if not isinstance(key, int):
113
113
  raise TypeError("List indices must be integers.")
114
114
 
115
- with self._conn:
115
+ with self._db.connection:
116
116
  list_len = len(self)
117
117
  if key < -list_len or key >= list_len:
118
118
  raise IndexError("List index out of range.")
119
119
 
120
120
  offset = key if key >= 0 else list_len + key
121
121
 
122
- cursor = self._conn.cursor()
122
+ cursor = self._db.connection.cursor()
123
123
  # Find the rowid of the item to delete
124
124
  cursor.execute(
125
125
  "SELECT rowid FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
@@ -135,7 +135,7 @@ class ListManager[T]:
135
135
 
136
136
  def __iter__(self) -> Iterator[T]:
137
137
  """Returns an iterator for the list."""
138
- cursor = self._conn.cursor()
138
+ cursor = self._db.connection.cursor()
139
139
  cursor.execute(
140
140
  "SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC",
141
141
  (self._name,)
@@ -146,7 +146,7 @@ class ListManager[T]:
146
146
 
147
147
  def __contains__(self, value: T) -> bool:
148
148
  """Checks for the existence of an item in the list (e.g., `'item' in my_list`)."""
149
- cursor = self._conn.cursor()
149
+ cursor = self._db.connection.cursor()
150
150
  cursor.execute(
151
151
  "SELECT 1 FROM beaver_lists WHERE list_name = ? AND item_value = ? LIMIT 1",
152
152
  (self._name, self._serialize(value))
@@ -161,7 +161,7 @@ class ListManager[T]:
161
161
 
162
162
  def _get_order_at_index(self, index: int) -> float:
163
163
  """Helper to get the float `item_order` at a specific index."""
164
- cursor = self._conn.cursor()
164
+ cursor = self._db.connection.cursor()
165
165
  cursor.execute(
166
166
  "SELECT item_order FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
167
167
  (self._name, index),
@@ -176,8 +176,8 @@ class ListManager[T]:
176
176
 
177
177
  def push(self, value: T):
178
178
  """Pushes an item to the end of the list."""
179
- with self._conn:
180
- cursor = self._conn.cursor()
179
+ with self._db.connection:
180
+ cursor = self._db.connection.cursor()
181
181
  cursor.execute(
182
182
  "SELECT MAX(item_order) FROM beaver_lists WHERE list_name = ?",
183
183
  (self._name,),
@@ -192,8 +192,8 @@ class ListManager[T]:
192
192
 
193
193
  def prepend(self, value: T):
194
194
  """Prepends an item to the beginning of the list."""
195
- with self._conn:
196
- cursor = self._conn.cursor()
195
+ with self._db.connection:
196
+ cursor = self._db.connection.cursor()
197
197
  cursor.execute(
198
198
  "SELECT MIN(item_order) FROM beaver_lists WHERE list_name = ?",
199
199
  (self._name,),
@@ -208,7 +208,7 @@ class ListManager[T]:
208
208
 
209
209
  def insert(self, index: int, value: T):
210
210
  """Inserts an item at a specific index."""
211
- with self._conn:
211
+ with self._db.connection:
212
212
  list_len = len(self)
213
213
  if index <= 0:
214
214
  self.prepend(value)
@@ -222,15 +222,15 @@ class ListManager[T]:
222
222
  order_after = self._get_order_at_index(index)
223
223
  new_order = order_before + (order_after - order_before) / 2.0
224
224
 
225
- self._conn.execute(
225
+ self._db.connection.execute(
226
226
  "INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
227
227
  (self._name, new_order, self._serialize(value)),
228
228
  )
229
229
 
230
230
  def pop(self) -> T | None:
231
231
  """Removes and returns the last item from the list."""
232
- with self._conn:
233
- cursor = self._conn.cursor()
232
+ with self._db.connection:
233
+ cursor = self._db.connection.cursor()
234
234
  cursor.execute(
235
235
  "SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order DESC LIMIT 1",
236
236
  (self._name,),
@@ -247,8 +247,8 @@ class ListManager[T]:
247
247
 
248
248
  def deque(self) -> T | None:
249
249
  """Removes and returns the first item from the list."""
250
- with self._conn:
251
- cursor = self._conn.cursor()
250
+ with self._db.connection:
251
+ cursor = self._db.connection.cursor()
252
252
  cursor.execute(
253
253
  "SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1",
254
254
  (self._name,),
beaver/logs.py CHANGED
@@ -8,7 +8,7 @@ from datetime import datetime, timedelta, timezone
8
8
  from queue import Empty, Queue
9
9
  from typing import Any, AsyncIterator, Callable, Iterator, Type, TypeVar
10
10
 
11
- from .types import JsonSerializable
11
+ from .types import JsonSerializable, IDatabase
12
12
 
13
13
 
14
14
  # A special message object used to signal the iterator to gracefully shut down.
@@ -23,14 +23,14 @@ class LiveIterator[T,R]:
23
23
 
24
24
  def __init__(
25
25
  self,
26
- db_path: str,
26
+ db: IDatabase,
27
27
  log_name: str,
28
28
  window: timedelta,
29
29
  period: timedelta,
30
30
  aggregator: Callable[[list[T]], R],
31
31
  deserializer: Callable[[str], T],
32
32
  ):
33
- self._db_path = db_path
33
+ self._db = db
34
34
  self._log_name = log_name
35
35
  self._window_duration_seconds = window.total_seconds()
36
36
  self._sampling_period_seconds = period.total_seconds()
@@ -43,9 +43,7 @@ class LiveIterator[T,R]:
43
43
  def _polling_loop(self):
44
44
  """The main loop for the background thread that queries and aggregates data."""
45
45
  # Each thread needs its own database connection.
46
- thread_conn = sqlite3.connect(self._db_path, check_same_thread=False)
47
- thread_conn.row_factory = sqlite3.Row
48
-
46
+ thread_conn = self._db.connection
49
47
  window_deque: collections.deque[tuple[float, T]] = collections.deque()
50
48
  last_seen_timestamp = 0.0
51
49
 
@@ -179,13 +177,11 @@ class LogManager[T]:
179
177
  def __init__(
180
178
  self,
181
179
  name: str,
182
- conn: sqlite3.Connection,
183
- db_path: str,
180
+ db,
184
181
  model: Type[T] | None = None,
185
182
  ):
186
183
  self._name = name
187
- self._conn = conn
188
- self._db_path = db_path
184
+ self._db = db
189
185
  self._model = model
190
186
 
191
187
  def _serialize(self, value: T) -> str:
@@ -215,8 +211,8 @@ class LogManager[T]:
215
211
  ts = timestamp or datetime.now(timezone.utc)
216
212
  ts_float = ts.timestamp()
217
213
 
218
- with self._conn:
219
- self._conn.execute(
214
+ with self._db.connection:
215
+ self._db.connection.execute(
220
216
  "INSERT INTO beaver_logs (log_name, timestamp, data) VALUES (?, ?, ?)",
221
217
  (self._name, ts_float, self._serialize(data)),
222
218
  )
@@ -235,7 +231,7 @@ class LogManager[T]:
235
231
  start_ts = start.timestamp()
236
232
  end_ts = end.timestamp()
237
233
 
238
- cursor = self._conn.cursor()
234
+ cursor = self._db.connection.cursor()
239
235
  cursor.execute(
240
236
  "SELECT data FROM beaver_logs WHERE log_name = ? AND timestamp BETWEEN ? AND ? ORDER BY timestamp ASC",
241
237
  (self._name, start_ts, end_ts),
@@ -265,7 +261,7 @@ class LogManager[T]:
265
261
  An iterator that yields the results of the aggregator.
266
262
  """
267
263
  return LiveIterator(
268
- db_path=self._db_path,
264
+ db=self._db,
269
265
  log_name=self._name,
270
266
  window=window,
271
267
  period=period,
beaver/queues.py CHANGED
@@ -4,7 +4,7 @@ import sqlite3
4
4
  import time
5
5
  from typing import Any, Literal, NamedTuple, Type, overload
6
6
 
7
- from .types import JsonSerializable
7
+ from .types import JsonSerializable, IDatabase
8
8
 
9
9
 
10
10
  class QueueItem[T](NamedTuple):
@@ -25,6 +25,12 @@ class AsyncQueueManager[T]:
25
25
  """Asynchronously adds an item to the queue with a specific priority."""
26
26
  await asyncio.to_thread(self._queue.put, data, priority)
27
27
 
28
+ async def peek(self) -> QueueItem[T] | None:
29
+ """
30
+ Asynchronously returns the first item without removing it, if any, otherwise returns None.
31
+ """
32
+ return await asyncio.to_thread(self._queue.peek)
33
+
28
34
  @overload
29
35
  async def get(self, block: Literal[True] = True, timeout: float | None = None) -> QueueItem[T]: ...
30
36
  @overload
@@ -44,9 +50,9 @@ class QueueManager[T]:
44
50
  producer-consumer priority queue.
45
51
  """
46
52
 
47
- def __init__(self, name: str, conn: sqlite3.Connection, model: Type[T] | None = None):
53
+ def __init__(self, name: str, db: IDatabase, model: Type[T] | None = None):
48
54
  self._name = name
49
- self._conn = conn
55
+ self._db = db
50
56
  self._model = model
51
57
 
52
58
  def _serialize(self, value: T) -> str:
@@ -71,19 +77,19 @@ class QueueManager[T]:
71
77
  data: The JSON-serializable data to store.
72
78
  priority: The priority of the item (lower numbers are higher priority).
73
79
  """
74
- with self._conn:
75
- self._conn.execute(
80
+ with self._db.connection:
81
+ self._db.connection.execute(
76
82
  "INSERT INTO beaver_priority_queues (queue_name, priority, timestamp, data) VALUES (?, ?, ?, ?)",
77
83
  (self._name, priority, time.time(), self._serialize(data)),
78
84
  )
79
85
 
80
- def _get_item_atomically(self) -> QueueItem[T] | None:
86
+ def _get_item_atomically(self, pop:bool=True) -> QueueItem[T] | None:
81
87
  """
82
88
  Performs a single, atomic attempt to retrieve and remove the
83
89
  highest-priority item from the queue. Returns None if the queue is empty.
84
90
  """
85
- with self._conn:
86
- cursor = self._conn.cursor()
91
+ with self._db.connection:
92
+ cursor = self._db.connection.cursor()
87
93
  cursor.execute(
88
94
  """
89
95
  SELECT rowid, priority, timestamp, data
@@ -100,11 +106,20 @@ class QueueManager[T]:
100
106
  return None
101
107
 
102
108
  rowid, priority, timestamp, data = result
103
- cursor.execute("DELETE FROM beaver_priority_queues WHERE rowid = ?", (rowid,))
104
109
 
105
- return QueueItem(
106
- priority=priority, timestamp=timestamp, data=self._deserialize(data)
107
- )
110
+ if pop:
111
+ self._db.connection.execute("DELETE FROM beaver_priority_queues WHERE rowid = ?", (rowid,))
112
+
113
+ return QueueItem(
114
+ priority=priority, timestamp=timestamp, data=self._deserialize(data)
115
+ )
116
+
117
+ def peek(self) -> QueueItem[T] | None:
118
+ """
119
+ Retrieves the first item of the queue.
120
+ If the queue is empy, returns None.
121
+ """
122
+ return self._get_item_atomically(pop=False)
108
123
 
109
124
  @overload
110
125
  def get(self, block: Literal[True] = True, timeout: float | None = None) -> QueueItem[T]: ...
@@ -154,7 +169,7 @@ class QueueManager[T]:
154
169
 
155
170
  def __len__(self) -> int:
156
171
  """Returns the current number of items in the queue."""
157
- cursor = self._conn.cursor()
172
+ cursor = self._db.connection.cursor()
158
173
  cursor.execute(
159
174
  "SELECT COUNT(*) FROM beaver_priority_queues WHERE queue_name = ?",
160
175
  (self._name,),
beaver/types.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import sqlite3
2
3
  from typing import Protocol, Type, runtime_checkable, Self
3
4
 
4
5
 
@@ -49,3 +50,9 @@ def stub(msg: str):
49
50
  raise TypeError(msg)
50
51
 
51
52
  return Stub()
53
+
54
+
55
+ class IDatabase(Protocol):
56
+ @property
57
+ def connection(self) -> sqlite3.Connection:
58
+ ...
beaver/vectors.py CHANGED
@@ -3,6 +3,8 @@ import sqlite3
3
3
  import threading
4
4
  from typing import Dict, List, Set, Tuple
5
5
 
6
+ from .types import IDatabase
7
+
6
8
  try:
7
9
  import faiss
8
10
  import numpy as np
@@ -23,12 +25,12 @@ class VectorIndex:
23
25
  user-provided string IDs to the internal integer IDs required by Faiss.
24
26
  """
25
27
 
26
- def __init__(self, collection_name: str, conn: sqlite3.Connection):
28
+ def __init__(self, collection_name: str, db: IDatabase):
27
29
  """
28
30
  Initializes the VectorIndex for a specific collection.
29
31
  """
30
- self._collection_name = collection_name
31
- self._conn = conn
32
+ self._collection = collection_name
33
+ self._db = db
32
34
  # A lock to ensure thread safety for in-memory operations and synchronization checks.
33
35
  self._lock = threading.Lock()
34
36
  # Tracks the overall version of the collection this instance is aware of.
@@ -66,7 +68,7 @@ class VectorIndex:
66
68
  elif self._dimension != dim:
67
69
  # If a dimension is already set, all subsequent vectors must match.
68
70
  raise ValueError(
69
- f"Vector dimension mismatch for collection '{self._collection_name}'. "
71
+ f"Vector dimension mismatch for collection '{self._collection}'. "
70
72
  f"Expected {self._dimension}, but got {dim}."
71
73
  )
72
74
 
@@ -83,12 +85,12 @@ class VectorIndex:
83
85
  # INSERT OR IGNORE is an atomic and safe way to create a new mapping only if it's missing.
84
86
  cursor.execute(
85
87
  "INSERT OR IGNORE INTO _beaver_ann_id_mapping (collection_name, str_id) VALUES (?, ?)",
86
- (self._collection_name, str_id)
88
+ (self._collection, str_id)
87
89
  )
88
90
  # Retrieve the now-guaranteed-to-exist integer ID.
89
91
  cursor.execute(
90
92
  "SELECT int_id FROM _beaver_ann_id_mapping WHERE collection_name = ? AND str_id = ?",
91
- (self._collection_name, str_id)
93
+ (self._collection, str_id)
92
94
  )
93
95
  result = cursor.fetchone()
94
96
  if not result:
@@ -103,20 +105,20 @@ class VectorIndex:
103
105
 
104
106
  def _get_db_version(self) -> int:
105
107
  """Gets the current overall version of the collection from the database."""
106
- cursor = self._conn.cursor()
108
+ cursor = self._db.connection.cursor()
107
109
  cursor.execute(
108
110
  "SELECT version FROM beaver_collection_versions WHERE collection_name = ?",
109
- (self._collection_name,),
111
+ (self._collection,),
110
112
  )
111
113
  result = cursor.fetchone()
112
114
  return result[0] if result else 0
113
115
 
114
116
  def _get_db_base_index_version(self) -> int:
115
117
  """Gets the version of the persistent on-disk base index from the database."""
116
- cursor = self._conn.cursor()
118
+ cursor = self._db.connection.cursor()
117
119
  cursor.execute(
118
120
  "SELECT base_index_version FROM _beaver_ann_indexes WHERE collection_name = ?",
119
- (self._collection_name,),
121
+ (self._collection,),
120
122
  )
121
123
  result = cursor.fetchone()
122
124
  return result[0] if result else 0
@@ -146,10 +148,10 @@ class VectorIndex:
146
148
 
147
149
  def _load_id_mappings(self):
148
150
  """Loads the complete str <-> int ID mapping from the DB into in-memory caches."""
149
- cursor = self._conn.cursor()
151
+ cursor = self._db.connection.cursor()
150
152
  cursor.execute(
151
153
  "SELECT str_id, int_id FROM _beaver_ann_id_mapping WHERE collection_name = ?",
152
- (self._collection_name,)
154
+ (self._collection,)
153
155
  )
154
156
  # Fetch all mappings at once for efficiency.
155
157
  all_mappings = cursor.fetchall()
@@ -158,10 +160,10 @@ class VectorIndex:
158
160
 
159
161
  def _load_base_index(self):
160
162
  """Loads and deserializes the persistent base index from the database BLOB."""
161
- cursor = self._conn.cursor()
163
+ cursor = self._db.connection.cursor()
162
164
  cursor.execute(
163
165
  "SELECT index_data, base_index_version FROM _beaver_ann_indexes WHERE collection_name = ?",
164
- (self._collection_name,),
166
+ (self._collection,),
165
167
  )
166
168
  result = cursor.fetchone()
167
169
  if result and result["index_data"]:
@@ -184,11 +186,11 @@ class VectorIndex:
184
186
  "Catches up" to changes by rebuilding the in-memory delta index and
185
187
  deletion set from the database logs.
186
188
  """
187
- cursor = self._conn.cursor()
189
+ cursor = self._db.connection.cursor()
188
190
  # Sync the set of deleted integer IDs.
189
191
  cursor.execute(
190
192
  "SELECT int_id FROM _beaver_ann_deletions_log WHERE collection_name = ?",
191
- (self._collection_name,)
193
+ (self._collection,)
192
194
  )
193
195
  self._deleted_int_ids = {row["int_id"] for row in cursor.fetchall()}
194
196
 
@@ -200,7 +202,7 @@ class VectorIndex:
200
202
  JOIN beaver_collections c ON p.str_id = c.item_id AND p.collection_name = c.collection
201
203
  WHERE p.collection_name = ?
202
204
  """,
203
- (self._collection_name,)
205
+ (self._collection,)
204
206
  )
205
207
  pending_items = cursor.fetchall()
206
208
 
@@ -216,7 +218,7 @@ class VectorIndex:
216
218
  if vectors.ndim == 1:
217
219
  vectors = vectors.reshape(-1, self._dimension)
218
220
  if vectors.shape[1] != self._dimension:
219
- raise ValueError(f"Inconsistent vector dimensions in pending log for '{self._collection_name}'.")
221
+ raise ValueError(f"Inconsistent vector dimensions in pending log for '{self._collection}'.")
220
222
 
221
223
  # Rebuild the delta index from scratch with all current pending items.
222
224
  self._delta_index = faiss.IndexIDMap(faiss.IndexFlatL2(self._dimension))
@@ -238,7 +240,7 @@ class VectorIndex:
238
240
  # Add the string ID to the log for other processes to sync.
239
241
  cursor.execute(
240
242
  "INSERT OR IGNORE INTO _beaver_ann_pending_log (collection_name, str_id) VALUES (?, ?)",
241
- (self._collection_name, item_id),
243
+ (self._collection, item_id),
242
244
  )
243
245
  # Create the delta index if this is the first item added.
244
246
  if self._delta_index is None:
@@ -260,7 +262,7 @@ class VectorIndex:
260
262
  # Add the integer ID to the deletion log.
261
263
  cursor.execute(
262
264
  "INSERT INTO _beaver_ann_deletions_log (collection_name, int_id) VALUES (?, ?)",
263
- (self._collection_name, int_id),
265
+ (self._collection, int_id),
264
266
  )
265
267
  # Also add to the live in-memory deletion set.
266
268
  self._deleted_int_ids.add(int_id)
@@ -323,10 +325,10 @@ class VectorIndex:
323
325
  if self._dimension is None: return # Nothing to compact.
324
326
 
325
327
  # Step 1: Take a snapshot of the logs. This defines the scope of this compaction run.
326
- cursor = self._conn.cursor()
327
- cursor.execute("SELECT str_id FROM _beaver_ann_pending_log WHERE collection_name = ?", (self._collection_name,))
328
+ cursor = self._db.connection.cursor()
329
+ cursor.execute("SELECT str_id FROM _beaver_ann_pending_log WHERE collection_name = ?", (self._collection,))
328
330
  pending_str_ids = {row["str_id"] for row in cursor.fetchall()}
329
- cursor.execute("SELECT int_id FROM _beaver_ann_deletions_log WHERE collection_name = ?", (self._collection_name,))
331
+ cursor.execute("SELECT int_id FROM _beaver_ann_deletions_log WHERE collection_name = ?", (self._collection,))
330
332
  deleted_int_ids_snapshot = {row["int_id"] for row in cursor.fetchall()}
331
333
 
332
334
  deleted_str_ids_snapshot = {self._int_to_str_id[int_id] for int_id in deleted_int_ids_snapshot if int_id in self._int_to_str_id}
@@ -334,11 +336,11 @@ class VectorIndex:
334
336
  # Step 2: Fetch all vectors from the main table that haven't been marked for deletion.
335
337
  # This is the long-running part that happens "offline" in a background thread.
336
338
  if not deleted_str_ids_snapshot:
337
- cursor.execute("SELECT item_id, item_vector FROM beaver_collections WHERE collection = ?", (self._collection_name,))
339
+ cursor.execute("SELECT item_id, item_vector FROM beaver_collections WHERE collection = ?", (self._collection,))
338
340
  else:
339
341
  cursor.execute(
340
342
  f"SELECT item_id, item_vector FROM beaver_collections WHERE collection = ? AND item_id NOT IN ({','.join('?' for _ in deleted_str_ids_snapshot)})",
341
- (self._collection_name, *deleted_str_ids_snapshot)
343
+ (self._collection, *deleted_str_ids_snapshot)
342
344
  )
343
345
 
344
346
  all_valid_vectors = cursor.fetchall()
@@ -361,16 +363,16 @@ class VectorIndex:
361
363
  index_data = buffer.getvalue()
362
364
 
363
365
  # Step 5: Perform the atomic swap in the database. This is a fast, transactional write.
364
- with self._conn:
366
+ with self._db.connection:
365
367
  # Increment the overall collection version to signal a change.
366
- self._conn.execute("INSERT INTO beaver_collection_versions (collection_name, version) VALUES (?, 1) ON CONFLICT(collection_name) DO UPDATE SET version = version + 1", (self._collection_name,))
368
+ self._db.connection.execute("INSERT INTO beaver_collection_versions (collection_name, version) VALUES (?, 1) ON CONFLICT(collection_name) DO UPDATE SET version = version + 1", (self._collection,))
367
369
  new_version = self._get_db_version()
368
370
 
369
371
  # Update the on-disk base index and its version number.
370
- self._conn.execute("INSERT INTO _beaver_ann_indexes (collection_name, index_data, base_index_version) VALUES (?, ?, ?) ON CONFLICT(collection_name) DO UPDATE SET index_data = excluded.index_data, base_index_version = excluded.base_index_version", (self._collection_name, index_data, new_version))
372
+ self._db.connection.execute("INSERT INTO _beaver_ann_indexes (collection_name, index_data, base_index_version) VALUES (?, ?, ?) ON CONFLICT(collection_name) DO UPDATE SET index_data = excluded.index_data, base_index_version = excluded.base_index_version", (self._collection, index_data, new_version))
371
373
 
372
374
  # Atomically clear the log entries that were included in this compaction run.
373
375
  if pending_str_ids:
374
- self._conn.execute(f"DELETE FROM _beaver_ann_pending_log WHERE collection_name = ? AND str_id IN ({','.join('?' for _ in pending_str_ids)})", (self._collection_name, *pending_str_ids))
376
+ self._db.connection.execute(f"DELETE FROM _beaver_ann_pending_log WHERE collection_name = ? AND str_id IN ({','.join('?' for _ in pending_str_ids)})", (self._collection, *pending_str_ids))
375
377
  if deleted_int_ids_snapshot:
376
- self._conn.execute(f"DELETE FROM _beaver_ann_deletions_log WHERE collection_name = ? AND int_id IN ({','.join('?' for _ in deleted_int_ids_snapshot)})", (self._collection_name, *deleted_int_ids_snapshot))
378
+ self._db.connection.execute(f"DELETE FROM _beaver_ann_deletions_log WHERE collection_name = ? AND int_id IN ({','.join('?' for _ in deleted_int_ids_snapshot)})", (self._collection, *deleted_int_ids_snapshot))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: beaver-db
3
- Version: 0.16.6
3
+ Version: 0.16.8
4
4
  Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
5
5
  License-File: LICENSE
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -26,6 +26,8 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
26
26
 
27
27
  `beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
28
28
 
29
+ > If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor) for an equally minimalistic approach to task orchestration.
30
+
29
31
  ## Design Philosophy
30
32
 
31
33
  `beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
@@ -0,0 +1,15 @@
1
+ beaver/__init__.py,sha256=qyEzF1Os7w4b4Hijgz0Y0R4zTrRBrHIGT1mEkZFl2YM,101
2
+ beaver/blobs.py,sha256=YkIEskHD6oHRaJTF0P25HrTT8LqM-REyV_UBPVQxeqQ,4055
3
+ beaver/channels.py,sha256=kIuwKMDBdDQObaKT23znsMXzfpKfE7pXSxvf-u4LlpY,9554
4
+ beaver/collections.py,sha256=Wm684pGp-E89PCq9gcbbmRC9VMtTxolRVXnrxKlw2m8,24615
5
+ beaver/core.py,sha256=68vjuEbkJTHv4SltCLCrgs34BpLCeL602oJZ6CJ34Zo,14560
6
+ beaver/dicts.py,sha256=Xp8lPfQt08O8zCbptQLWQLO79OxG6uAVER6ryj3SScQ,5495
7
+ beaver/lists.py,sha256=rfJ8uTNLkMREYc0uGx0z1VKt2m3eR9hvbdvDD58EbmQ,10140
8
+ beaver/logs.py,sha256=a5xenwl5NZeegIU0dWVEs67lvaHzzw-JRAZtEzNNO3E,9529
9
+ beaver/queues.py,sha256=Fr3oie63EtceSoiC8EOEDSLu1tDI8q2MYLXd8MEeC3g,6476
10
+ beaver/types.py,sha256=WZLINf7hy6zdKdAFQK0EVMSl5vnY_KnrHXNdXgAKuPg,1582
11
+ beaver/vectors.py,sha256=qvI6RwUOGrhVH5d6PUmI3jKDaoDotMy0iy-bHyvmXks,18496
12
+ beaver_db-0.16.8.dist-info/METADATA,sha256=SN90Fv3zk6_Xa38nkVDdWNr6mifgnfi_6dJtfEeg7PA,18240
13
+ beaver_db-0.16.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ beaver_db-0.16.8.dist-info/licenses/LICENSE,sha256=1xrIY5JnMk_QDQzsqmVzPIIyCgZAkWCC8kF2Ddo1UT0,1071
15
+ beaver_db-0.16.8.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- beaver/__init__.py,sha256=qyEzF1Os7w4b4Hijgz0Y0R4zTrRBrHIGT1mEkZFl2YM,101
2
- beaver/blobs.py,sha256=5cmcvlJLY9jaftIRuNbdEryZxI47sw_pYpysYli23NY,3996
3
- beaver/channels.py,sha256=VBXJDw_be-bSY76kRVzFdMZFoy4CepSMwJACc9NJgpc,9658
4
- beaver/collections.py,sha256=860bYpchokjleDIebJaNU1jcGTCMIbg0t4MasEVVbOk,24486
5
- beaver/core.py,sha256=t_UzpqcbF2U8BjmQ9aIWTvUzPuVuOLcPzTrZQ2htjn4,13706
6
- beaver/dicts.py,sha256=1BQ9A_cMkJ7l5ayWbDG-4Wi3WtQ-9BKd7Wj_CB7dGlU,5410
7
- beaver/lists.py,sha256=Q7xjyReBWFg47nBrXbt09GvBJkEmXvpW9ptL9xCnXC8,9946
8
- beaver/logs.py,sha256=mlJizZU0emlqLwuNeBJSPlict35Vyi35L4eIl5orv-M,9673
9
- beaver/queues.py,sha256=IQoeNhcYrVZTuH_4bWhtiEa-EYbFx_2iVKkR254XPnE,5953
10
- beaver/types.py,sha256=bR1bfLgemcySy9mnVL_hxAtog4wN8EmgfF2OonOWjQA,1464
11
- beaver/vectors.py,sha256=grwiRdusa39s-J9c8nK8LO7duhuYTaLR2Az6wHKs4rU,18487
12
- beaver_db-0.16.6.dist-info/METADATA,sha256=8SDSyQv8WgaEmMXIzWLvSr8EoQjj82457-rdINHnprY,18068
13
- beaver_db-0.16.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- beaver_db-0.16.6.dist-info/licenses/LICENSE,sha256=1xrIY5JnMk_QDQzsqmVzPIIyCgZAkWCC8kF2Ddo1UT0,1071
15
- beaver_db-0.16.6.dist-info/RECORD,,