beaver-db 0.9.2__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of beaver-db might be problematic. Click here for more details.

beaver/core.py CHANGED
@@ -14,7 +14,7 @@ class BeaverDB:
14
14
  This class manages the database connection and table schemas.
15
15
  """
16
16
 
17
- def __init__(self, db_path: str):
17
+ def __init__(self, db_path: str, timeout:float=30.0):
18
18
  """
19
19
  Initializes the database connection and creates all necessary tables.
20
20
 
@@ -23,149 +23,221 @@ class BeaverDB:
23
23
  """
24
24
  self._db_path = db_path
25
25
  # Enable WAL mode for better concurrency between readers and writers
26
- self._conn = sqlite3.connect(self._db_path, check_same_thread=False)
26
+ self._conn = sqlite3.connect(self._db_path, check_same_thread=False, timeout=timeout)
27
27
  self._conn.execute("PRAGMA journal_mode=WAL;")
28
28
  self._conn.row_factory = sqlite3.Row
29
- self._create_all_tables()
30
29
  self._channels: dict[str, ChannelManager] = {}
31
30
  self._channels_lock = threading.Lock()
31
+ # Add a cache and lock for CollectionManager singletons
32
+ self._collections: dict[str, CollectionManager] = {}
33
+ self._collections_lock = threading.Lock()
34
+
35
+ # Initialize the schemas
36
+ self._create_all_tables()
32
37
 
33
38
  def _create_all_tables(self):
34
39
  """Initializes all required tables in the database file."""
35
- self._create_pubsub_table()
36
- self._create_list_table()
37
- self._create_collections_table()
38
- self._create_fts_table()
39
- self._create_edges_table()
40
- self._create_versions_table()
41
- self._create_dict_table()
42
- self._create_priority_queue_table()
40
+ with self._conn:
41
+ self._create_pubsub_table()
42
+ self._create_list_table()
43
+ self._create_collections_table()
44
+ self._create_fts_table()
45
+ self._create_trigrams_table()
46
+ self._create_edges_table()
47
+ self._create_versions_table()
48
+ self._create_dict_table()
49
+ self._create_priority_queue_table()
50
+ self._create_ann_indexes_table()
51
+ self._create_ann_pending_log_table()
52
+ self._create_ann_deletions_log_table()
53
+ self._create_ann_id_mapping_table()
54
+
55
+ def _create_ann_indexes_table(self):
56
+ """Creates the table to store the serialized base ANN index."""
57
+ self._conn.execute(
58
+ """
59
+ CREATE TABLE IF NOT EXISTS _beaver_ann_indexes (
60
+ collection_name TEXT PRIMARY KEY,
61
+ index_data BLOB,
62
+ base_index_version INTEGER NOT NULL DEFAULT 0
63
+ )
64
+ """
65
+ )
66
+
67
+ def _create_ann_pending_log_table(self):
68
+ """Creates the log for new vector additions."""
69
+ self._conn.execute(
70
+ """
71
+ CREATE TABLE IF NOT EXISTS _beaver_ann_pending_log (
72
+ collection_name TEXT NOT NULL,
73
+ str_id TEXT NOT NULL,
74
+ PRIMARY KEY (collection_name, str_id)
75
+ )
76
+ """
77
+ )
78
+
79
+ def _create_ann_deletions_log_table(self):
80
+ """Creates the log for vector deletions (tombstones)."""
81
+ self._conn.execute(
82
+ """
83
+ CREATE TABLE IF NOT EXISTS _beaver_ann_deletions_log (
84
+ collection_name TEXT NOT NULL,
85
+ int_id INTEGER NOT NULL,
86
+ PRIMARY KEY (collection_name, int_id)
87
+ )
88
+ """
89
+ )
90
+
91
+ def _create_ann_id_mapping_table(self):
92
+ """Creates the table to map string IDs to integer IDs for Faiss."""
93
+ self._conn.execute(
94
+ """
95
+ CREATE TABLE IF NOT EXISTS _beaver_ann_id_mapping (
96
+ collection_name TEXT NOT NULL,
97
+ str_id TEXT NOT NULL,
98
+ int_id INTEGER PRIMARY KEY AUTOINCREMENT,
99
+ UNIQUE(collection_name, str_id)
100
+ )
101
+ """
102
+ )
43
103
 
44
104
  def _create_priority_queue_table(self):
45
105
  """Creates the priority queue table and its performance index."""
46
- with self._conn:
47
- self._conn.execute(
48
- """
49
- CREATE TABLE IF NOT EXISTS beaver_priority_queues (
50
- queue_name TEXT NOT NULL,
51
- priority REAL NOT NULL,
52
- timestamp REAL NOT NULL,
53
- data TEXT NOT NULL
54
- )
55
- """
56
- )
57
- self._conn.execute(
58
- """
59
- CREATE INDEX IF NOT EXISTS idx_priority_queue_order
60
- ON beaver_priority_queues (queue_name, priority ASC, timestamp ASC)
61
- """
106
+ self._conn.execute(
107
+ """
108
+ CREATE TABLE IF NOT EXISTS beaver_priority_queues (
109
+ queue_name TEXT NOT NULL,
110
+ priority REAL NOT NULL,
111
+ timestamp REAL NOT NULL,
112
+ data TEXT NOT NULL
62
113
  )
114
+ """
115
+ )
116
+ self._conn.execute(
117
+ """
118
+ CREATE INDEX IF NOT EXISTS idx_priority_queue_order
119
+ ON beaver_priority_queues (queue_name, priority ASC, timestamp ASC)
120
+ """
121
+ )
63
122
 
64
123
  def _create_dict_table(self):
65
124
  """Creates the namespaced dictionary table."""
66
- with self._conn:
67
- self._conn.execute(
68
- """
69
- CREATE TABLE IF NOT EXISTS beaver_dicts (
70
- dict_name TEXT NOT NULL,
71
- key TEXT NOT NULL,
72
- value TEXT NOT NULL,
73
- expires_at REAL,
74
- PRIMARY KEY (dict_name, key)
75
- )
125
+ self._conn.execute(
76
126
  """
127
+ CREATE TABLE IF NOT EXISTS beaver_dicts (
128
+ dict_name TEXT NOT NULL,
129
+ key TEXT NOT NULL,
130
+ value TEXT NOT NULL,
131
+ expires_at REAL,
132
+ PRIMARY KEY (dict_name, key)
77
133
  )
134
+ """
135
+ )
78
136
 
79
137
  def _create_pubsub_table(self):
80
138
  """Creates the pub/sub log table."""
81
- with self._conn:
82
- self._conn.execute(
83
- """
84
- CREATE TABLE IF NOT EXISTS beaver_pubsub_log (
85
- timestamp REAL PRIMARY KEY,
86
- channel_name TEXT NOT NULL,
87
- message_payload TEXT NOT NULL
88
- )
139
+ self._conn.execute(
89
140
  """
141
+ CREATE TABLE IF NOT EXISTS beaver_pubsub_log (
142
+ timestamp REAL PRIMARY KEY,
143
+ channel_name TEXT NOT NULL,
144
+ message_payload TEXT NOT NULL
90
145
  )
91
- self._conn.execute(
92
- """
93
- CREATE INDEX IF NOT EXISTS idx_pubsub_channel_timestamp
94
- ON beaver_pubsub_log (channel_name, timestamp)
146
+ """
147
+ )
148
+ self._conn.execute(
95
149
  """
96
- )
150
+ CREATE INDEX IF NOT EXISTS idx_pubsub_channel_timestamp
151
+ ON beaver_pubsub_log (channel_name, timestamp)
152
+ """
153
+ )
97
154
 
98
155
  def _create_list_table(self):
99
156
  """Creates the lists table."""
100
- with self._conn:
101
- self._conn.execute(
102
- """
103
- CREATE TABLE IF NOT EXISTS beaver_lists (
104
- list_name TEXT NOT NULL,
105
- item_order REAL NOT NULL,
106
- item_value TEXT NOT NULL,
107
- PRIMARY KEY (list_name, item_order)
108
- )
157
+ self._conn.execute(
109
158
  """
159
+ CREATE TABLE IF NOT EXISTS beaver_lists (
160
+ list_name TEXT NOT NULL,
161
+ item_order REAL NOT NULL,
162
+ item_value TEXT NOT NULL,
163
+ PRIMARY KEY (list_name, item_order)
110
164
  )
165
+ """
166
+ )
111
167
 
112
168
  def _create_collections_table(self):
113
169
  """Creates the main table for storing documents and vectors."""
114
- with self._conn:
115
- self._conn.execute(
116
- """
117
- CREATE TABLE IF NOT EXISTS beaver_collections (
118
- collection TEXT NOT NULL,
119
- item_id TEXT NOT NULL,
120
- item_vector BLOB,
121
- metadata TEXT,
122
- PRIMARY KEY (collection, item_id)
123
- )
170
+ self._conn.execute(
124
171
  """
172
+ CREATE TABLE IF NOT EXISTS beaver_collections (
173
+ collection TEXT NOT NULL,
174
+ item_id TEXT NOT NULL,
175
+ item_vector BLOB,
176
+ metadata TEXT,
177
+ PRIMARY KEY (collection, item_id)
125
178
  )
179
+ """
180
+ )
126
181
 
127
182
  def _create_fts_table(self):
128
183
  """Creates the virtual FTS table for full-text search."""
129
- with self._conn:
130
- self._conn.execute(
131
- """
132
- CREATE VIRTUAL TABLE IF NOT EXISTS beaver_fts_index USING fts5(
133
- collection,
134
- item_id,
135
- field_path,
136
- field_content,
137
- tokenize = 'porter'
138
- )
184
+ self._conn.execute(
139
185
  """
186
+ CREATE VIRTUAL TABLE IF NOT EXISTS beaver_fts_index USING fts5(
187
+ collection,
188
+ item_id,
189
+ field_path,
190
+ field_content,
191
+ tokenize = 'porter'
140
192
  )
193
+ """
194
+ )
195
+
196
+ def _create_trigrams_table(self):
197
+ """Creates the table for the fuzzy search trigram index."""
198
+ self._conn.execute(
199
+ """
200
+ CREATE TABLE IF NOT EXISTS beaver_trigrams (
201
+ collection TEXT NOT NULL,
202
+ item_id TEXT NOT NULL,
203
+ field_path TEXT NOT NULL,
204
+ trigram TEXT NOT NULL,
205
+ PRIMARY KEY (collection, field_path, trigram, item_id)
206
+ )
207
+ """
208
+ )
209
+ self._conn.execute(
210
+ """
211
+ CREATE INDEX IF NOT EXISTS idx_trigram_lookup
212
+ ON beaver_trigrams (collection, trigram, field_path)
213
+ """
214
+ )
141
215
 
142
216
  def _create_edges_table(self):
143
217
  """Creates the table for storing relationships between documents."""
144
- with self._conn:
145
- self._conn.execute(
146
- """
147
- CREATE TABLE IF NOT EXISTS beaver_edges (
148
- collection TEXT NOT NULL,
149
- source_item_id TEXT NOT NULL,
150
- target_item_id TEXT NOT NULL,
151
- label TEXT NOT NULL,
152
- metadata TEXT,
153
- PRIMARY KEY (collection, source_item_id, target_item_id, label)
154
- )
218
+ self._conn.execute(
155
219
  """
220
+ CREATE TABLE IF NOT EXISTS beaver_edges (
221
+ collection TEXT NOT NULL,
222
+ source_item_id TEXT NOT NULL,
223
+ target_item_id TEXT NOT NULL,
224
+ label TEXT NOT NULL,
225
+ metadata TEXT,
226
+ PRIMARY KEY (collection, source_item_id, target_item_id, label)
156
227
  )
228
+ """
229
+ )
157
230
 
158
231
  def _create_versions_table(self):
159
232
  """Creates a table to track the version of each collection for caching."""
160
- with self._conn:
161
- self._conn.execute(
162
- """
163
- CREATE TABLE IF NOT EXISTS beaver_collection_versions (
164
- collection_name TEXT PRIMARY KEY,
165
- version INTEGER NOT NULL DEFAULT 0
166
- )
233
+ self._conn.execute(
167
234
  """
235
+ CREATE TABLE IF NOT EXISTS beaver_collection_versions (
236
+ collection_name TEXT PRIMARY KEY,
237
+ version INTEGER NOT NULL DEFAULT 0
168
238
  )
239
+ """
240
+ )
169
241
 
170
242
  def close(self):
171
243
  """Closes the database connection."""
@@ -200,11 +272,20 @@ class BeaverDB:
200
272
  return QueueManager(name, self._conn)
201
273
 
202
274
  def collection(self, name: str) -> CollectionManager:
203
- """Returns a wrapper for interacting with a document collection."""
275
+ """
276
+ Returns a singleton CollectionManager instance for interacting with a
277
+ document collection.
278
+ """
204
279
  if not isinstance(name, str) or not name:
205
280
  raise TypeError("Collection name must be a non-empty string.")
206
281
 
207
- return CollectionManager(name, self._conn)
282
+ # Use a thread-safe lock to ensure only one CollectionManager object is
283
+ # created per name. This is crucial for managing the in-memory state
284
+ # of the vector index consistently.
285
+ with self._collections_lock:
286
+ if name not in self._collections:
287
+ self._collections[name] = CollectionManager(name, self._conn)
288
+ return self._collections[name]
208
289
 
209
290
  def channel(self, name: str) -> ChannelManager:
210
291
  """