beaver-db 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of beaver-db might be problematic. Click here for more details.
- beaver/__init__.py +1 -1
- beaver/core.py +431 -102
- beaver_db-0.3.0.dist-info/METADATA +129 -0
- beaver_db-0.3.0.dist-info/RECORD +6 -0
- beaver_db-0.1.0.dist-info/METADATA +0 -117
- beaver_db-0.1.0.dist-info/RECORD +0 -6
- {beaver_db-0.1.0.dist-info → beaver_db-0.3.0.dist-info}/WHEEL +0 -0
- {beaver_db-0.1.0.dist-info → beaver_db-0.3.0.dist-info}/top_level.txt +0 -0
beaver/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
from .core import BeaverDB,
|
|
1
|
+
from .core import BeaverDB, Document
|
beaver/core.py
CHANGED
|
@@ -1,35 +1,357 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import uuid
|
|
3
|
+
import numpy as np
|
|
2
4
|
import json
|
|
3
5
|
import sqlite3
|
|
4
6
|
import time
|
|
5
|
-
from typing import Any, AsyncIterator
|
|
7
|
+
from typing import Any, AsyncIterator, Union
|
|
6
8
|
|
|
7
|
-
# --- SQL Schema ---
|
|
8
|
-
# These statements are executed once to set up the database.
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
10
|
+
class BeaverDB:
|
|
11
|
+
"""
|
|
12
|
+
An embedded, multi-modal database in a single SQLite file.
|
|
13
|
+
Currently supports async pub/sub and a synchronous key-value store.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, db_path: str):
|
|
17
|
+
"""
|
|
18
|
+
Initializes the database connection and creates necessary tables.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
db_path: The path to the SQLite database file.
|
|
22
|
+
"""
|
|
23
|
+
self._db_path = db_path
|
|
24
|
+
# Enable WAL mode for better concurrency between readers and writers
|
|
25
|
+
self._conn = sqlite3.connect(self._db_path, check_same_thread=False)
|
|
26
|
+
self._conn.execute("PRAGMA journal_mode=WAL;")
|
|
27
|
+
self._conn.row_factory = sqlite3.Row
|
|
28
|
+
self._create_pubsub_table()
|
|
29
|
+
self._create_kv_table()
|
|
30
|
+
self._create_list_table()
|
|
31
|
+
self._create_collections_table()
|
|
32
|
+
|
|
33
|
+
def _create_pubsub_table(self):
|
|
34
|
+
"""Creates the pub/sub log table if it doesn't exist."""
|
|
35
|
+
with self._conn:
|
|
36
|
+
self._conn.execute("""
|
|
37
|
+
CREATE TABLE IF NOT EXISTS beaver_pubsub_log (
|
|
38
|
+
timestamp REAL PRIMARY KEY,
|
|
39
|
+
channel_name TEXT NOT NULL,
|
|
40
|
+
message_payload TEXT NOT NULL
|
|
41
|
+
)
|
|
42
|
+
""")
|
|
43
|
+
self._conn.execute("""
|
|
44
|
+
CREATE INDEX IF NOT EXISTS idx_pubsub_channel_timestamp
|
|
45
|
+
ON beaver_pubsub_log (channel_name, timestamp)
|
|
46
|
+
""")
|
|
47
|
+
|
|
48
|
+
def _create_kv_table(self):
|
|
49
|
+
"""Creates the key-value store table if it doesn't exist."""
|
|
50
|
+
with self._conn:
|
|
51
|
+
self._conn.execute("""
|
|
52
|
+
CREATE TABLE IF NOT EXISTS _beaver_kv_store (
|
|
53
|
+
key TEXT PRIMARY KEY,
|
|
54
|
+
value TEXT NOT NULL
|
|
55
|
+
)
|
|
56
|
+
""")
|
|
57
|
+
|
|
58
|
+
def _create_list_table(self):
|
|
59
|
+
"""Creates the lists table if it doesn't exist."""
|
|
60
|
+
with self._conn:
|
|
61
|
+
self._conn.execute("""
|
|
62
|
+
CREATE TABLE IF NOT EXISTS beaver_lists (
|
|
63
|
+
list_name TEXT NOT NULL,
|
|
64
|
+
item_order REAL NOT NULL,
|
|
65
|
+
item_value TEXT NOT NULL,
|
|
66
|
+
PRIMARY KEY (list_name, item_order)
|
|
67
|
+
)
|
|
68
|
+
""")
|
|
69
|
+
|
|
70
|
+
def _create_collections_table(self):
|
|
71
|
+
"""Creates the collections table if it doesn't exist."""
|
|
72
|
+
with self._conn:
|
|
73
|
+
self._conn.execute("""
|
|
74
|
+
CREATE TABLE IF NOT EXISTS beaver_collections (
|
|
75
|
+
collection TEXT NOT NULL,
|
|
76
|
+
item_id TEXT NOT NULL,
|
|
77
|
+
item_vector BLOB NOT NULL,
|
|
78
|
+
metadata TEXT,
|
|
79
|
+
PRIMARY KEY (collection, item_id)
|
|
80
|
+
)
|
|
81
|
+
""")
|
|
82
|
+
|
|
83
|
+
def close(self):
|
|
84
|
+
"""Closes the database connection."""
|
|
85
|
+
if self._conn:
|
|
86
|
+
self._conn.close()
|
|
87
|
+
|
|
88
|
+
# --- Key-Value Store Methods ---
|
|
89
|
+
|
|
90
|
+
def set(self, key: str, value: Any):
|
|
91
|
+
"""
|
|
92
|
+
Stores a JSON-serializable value for a given key.
|
|
93
|
+
This operation is synchronous.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
key: The unique string identifier for the value.
|
|
97
|
+
value: A JSON-serializable Python object (dict, list, str, int, etc.).
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
TypeError: If the key is not a string or the value is not JSON-serializable.
|
|
101
|
+
"""
|
|
102
|
+
if not isinstance(key, str):
|
|
103
|
+
raise TypeError("Key must be a string.")
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
json_value = json.dumps(value)
|
|
107
|
+
except TypeError as e:
|
|
108
|
+
raise TypeError("Value must be JSON-serializable.") from e
|
|
109
|
+
|
|
110
|
+
with self._conn:
|
|
111
|
+
self._conn.execute(
|
|
112
|
+
"INSERT OR REPLACE INTO _beaver_kv_store (key, value) VALUES (?, ?)",
|
|
113
|
+
(key, json_value)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def get(self, key: str) -> Any:
|
|
117
|
+
"""
|
|
118
|
+
Retrieves a value for a given key.
|
|
119
|
+
This operation is synchronous.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
key: The string identifier for the value.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
The deserialized Python object, or None if the key is not found.
|
|
126
|
+
|
|
127
|
+
Raises:
|
|
128
|
+
TypeError: If the key is not a string.
|
|
129
|
+
"""
|
|
130
|
+
if not isinstance(key, str):
|
|
131
|
+
raise TypeError("Key must be a string.")
|
|
132
|
+
|
|
133
|
+
cursor = self._conn.cursor()
|
|
134
|
+
cursor.execute("SELECT value FROM _beaver_kv_store WHERE key = ?", (key,))
|
|
135
|
+
result = cursor.fetchone()
|
|
136
|
+
cursor.close()
|
|
137
|
+
|
|
138
|
+
if result:
|
|
139
|
+
return json.loads(result['value'])
|
|
140
|
+
return None
|
|
141
|
+
|
|
142
|
+
# --- List Methods ---
|
|
143
|
+
|
|
144
|
+
def list(self, name: str) -> "ListWrapper":
|
|
145
|
+
"""
|
|
146
|
+
Returns a wrapper object for interacting with a specific list.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
name: The name of the list.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
A ListWrapper instance bound to the given list name.
|
|
153
|
+
"""
|
|
154
|
+
if not isinstance(name, str) or not name:
|
|
155
|
+
raise TypeError("List name must be a non-empty string.")
|
|
156
|
+
return ListWrapper(name, self._conn)
|
|
157
|
+
|
|
158
|
+
def collection(self, name: str) -> "CollectionWrapper":
|
|
159
|
+
"""Returns a wrapper for interacting with a vector collection."""
|
|
160
|
+
return CollectionWrapper(name, self._conn)
|
|
161
|
+
|
|
162
|
+
# --- Asynchronous Pub/Sub Methods ---
|
|
163
|
+
|
|
164
|
+
async def publish(self, channel_name: str, payload: Any):
|
|
165
|
+
"""
|
|
166
|
+
Publishes a JSON-serializable message to a channel.
|
|
167
|
+
This operation is asynchronous.
|
|
168
|
+
"""
|
|
169
|
+
if not isinstance(channel_name, str) or not channel_name:
|
|
170
|
+
raise ValueError("Channel name must be a non-empty string.")
|
|
171
|
+
try:
|
|
172
|
+
json_payload = json.dumps(payload)
|
|
173
|
+
except TypeError as e:
|
|
174
|
+
raise TypeError("Message payload must be JSON-serializable.") from e
|
|
175
|
+
|
|
176
|
+
await asyncio.to_thread(
|
|
177
|
+
self._write_publish_to_db, channel_name, json_payload
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def _write_publish_to_db(self, channel_name, json_payload):
|
|
181
|
+
"""The synchronous part of the publish operation."""
|
|
182
|
+
with self._conn:
|
|
183
|
+
self._conn.execute(
|
|
184
|
+
"INSERT INTO beaver_pubsub_log (timestamp, channel_name, message_payload) VALUES (?, ?, ?)",
|
|
185
|
+
(time.time(), channel_name, json_payload)
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def subscribe(self, channel_name: str) -> "Subscriber":
|
|
189
|
+
"""
|
|
190
|
+
Subscribes to a channel, returning an async iterator.
|
|
191
|
+
"""
|
|
192
|
+
return Subscriber(self._conn, channel_name)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class ListWrapper:
|
|
196
|
+
"""A wrapper providing a Pythonic interface to a list in the database."""
|
|
197
|
+
|
|
198
|
+
def __init__(self, name: str, conn: sqlite3.Connection):
|
|
199
|
+
self._name = name
|
|
200
|
+
self._conn = conn
|
|
201
|
+
|
|
202
|
+
def __len__(self) -> int:
|
|
203
|
+
"""Returns the number of items in the list (e.g., `len(my_list)`)."""
|
|
204
|
+
cursor = self._conn.cursor()
|
|
205
|
+
cursor.execute("SELECT COUNT(*) FROM beaver_lists WHERE list_name = ?", (self._name,))
|
|
206
|
+
count = cursor.fetchone()[0]
|
|
207
|
+
cursor.close()
|
|
208
|
+
return count
|
|
209
|
+
|
|
210
|
+
def __getitem__(self, key: Union[int, slice]) -> Any:
|
|
211
|
+
"""
|
|
212
|
+
Retrieves an item or slice from the list (e.g., `my_list[0]`, `my_list[1:3]`).
|
|
213
|
+
"""
|
|
214
|
+
if isinstance(key, slice):
|
|
215
|
+
start, stop, step = key.indices(len(self))
|
|
216
|
+
if step != 1:
|
|
217
|
+
raise ValueError("Slicing with a step is not supported.")
|
|
218
|
+
|
|
219
|
+
limit = stop - start
|
|
220
|
+
if limit <= 0:
|
|
221
|
+
return []
|
|
222
|
+
|
|
223
|
+
cursor = self._conn.cursor()
|
|
224
|
+
cursor.execute(
|
|
225
|
+
"SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT ? OFFSET ?",
|
|
226
|
+
(self._name, limit, start)
|
|
227
|
+
)
|
|
228
|
+
results = [json.loads(row['item_value']) for row in cursor.fetchall()]
|
|
229
|
+
cursor.close()
|
|
230
|
+
return results
|
|
231
|
+
|
|
232
|
+
elif isinstance(key, int):
|
|
233
|
+
list_len = len(self)
|
|
234
|
+
if key < -list_len or key >= list_len:
|
|
235
|
+
raise IndexError("List index out of range.")
|
|
236
|
+
|
|
237
|
+
offset = key if key >= 0 else list_len + key
|
|
238
|
+
|
|
239
|
+
cursor = self._conn.cursor()
|
|
240
|
+
cursor.execute(
|
|
241
|
+
"SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
|
|
242
|
+
(self._name, offset)
|
|
243
|
+
)
|
|
244
|
+
result = cursor.fetchone()
|
|
245
|
+
cursor.close()
|
|
246
|
+
return json.loads(result['item_value']) if result else None
|
|
247
|
+
|
|
248
|
+
else:
|
|
249
|
+
raise TypeError("List indices must be integers or slices.")
|
|
250
|
+
|
|
251
|
+
def _get_order_at_index(self, index: int) -> float:
|
|
252
|
+
"""Helper to get the float `item_order` at a specific index."""
|
|
253
|
+
cursor = self._conn.cursor()
|
|
254
|
+
cursor.execute(
|
|
255
|
+
"SELECT item_order FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
|
|
256
|
+
(self._name, index)
|
|
257
|
+
)
|
|
258
|
+
result = cursor.fetchone()
|
|
259
|
+
cursor.close()
|
|
260
|
+
|
|
261
|
+
if result:
|
|
262
|
+
return result[0]
|
|
263
|
+
|
|
264
|
+
raise IndexError(f"{index} out of range.")
|
|
265
|
+
|
|
266
|
+
def push(self, value: Any):
|
|
267
|
+
"""Pushes an item to the end of the list."""
|
|
268
|
+
with self._conn:
|
|
269
|
+
cursor = self._conn.cursor()
|
|
270
|
+
cursor.execute("SELECT MAX(item_order) FROM beaver_lists WHERE list_name = ?", (self._name,))
|
|
271
|
+
max_order = cursor.fetchone()[0] or 0.0
|
|
272
|
+
new_order = max_order + 1.0
|
|
273
|
+
|
|
274
|
+
cursor.execute(
|
|
275
|
+
"INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
|
|
276
|
+
(self._name, new_order, json.dumps(value))
|
|
277
|
+
)
|
|
17
278
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
279
|
+
def prepend(self, value: Any):
|
|
280
|
+
"""Prepends an item to the beginning of the list."""
|
|
281
|
+
with self._conn:
|
|
282
|
+
cursor = self._conn.cursor()
|
|
283
|
+
cursor.execute("SELECT MIN(item_order) FROM beaver_lists WHERE list_name = ?", (self._name,))
|
|
284
|
+
min_order = cursor.fetchone()[0] or 0.0
|
|
285
|
+
new_order = min_order - 1.0
|
|
286
|
+
|
|
287
|
+
cursor.execute(
|
|
288
|
+
"INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
|
|
289
|
+
(self._name, new_order, json.dumps(value))
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
def insert(self, index: int, value: Any):
|
|
293
|
+
"""Inserts an item at a specific index."""
|
|
294
|
+
list_len = len(self)
|
|
295
|
+
if index <= 0:
|
|
296
|
+
self.prepend(value)
|
|
297
|
+
return
|
|
298
|
+
if index >= list_len:
|
|
299
|
+
self.push(value)
|
|
300
|
+
return
|
|
301
|
+
|
|
302
|
+
# Midpoint insertion
|
|
303
|
+
order_before = self._get_order_at_index(index - 1)
|
|
304
|
+
order_after = self._get_order_at_index(index)
|
|
305
|
+
|
|
306
|
+
new_order = order_before + (order_after - order_before) / 2.0
|
|
307
|
+
|
|
308
|
+
with self._conn:
|
|
309
|
+
self._conn.execute(
|
|
310
|
+
"INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
|
|
311
|
+
(self._name, new_order, json.dumps(value))
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
def pop(self) -> Any:
|
|
315
|
+
"""Removes and returns the last item from the list."""
|
|
316
|
+
with self._conn:
|
|
317
|
+
cursor = self._conn.cursor()
|
|
318
|
+
cursor.execute(
|
|
319
|
+
"SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order DESC LIMIT 1",
|
|
320
|
+
(self._name,)
|
|
321
|
+
)
|
|
322
|
+
result = cursor.fetchone()
|
|
323
|
+
if not result:
|
|
324
|
+
return None
|
|
325
|
+
|
|
326
|
+
rowid_to_delete, value_to_return = result
|
|
327
|
+
cursor.execute("DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,))
|
|
328
|
+
return json.loads(value_to_return)
|
|
329
|
+
|
|
330
|
+
def deque(self) -> Any:
|
|
331
|
+
"""Removes and returns the first item from the list."""
|
|
332
|
+
with self._conn:
|
|
333
|
+
cursor = self._conn.cursor()
|
|
334
|
+
cursor.execute(
|
|
335
|
+
"SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1",
|
|
336
|
+
(self._name,)
|
|
337
|
+
)
|
|
338
|
+
result = cursor.fetchone()
|
|
339
|
+
if not result:
|
|
340
|
+
return None
|
|
341
|
+
|
|
342
|
+
rowid_to_delete, value_to_return = result
|
|
343
|
+
cursor.execute("DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,))
|
|
344
|
+
return json.loads(value_to_return)
|
|
22
345
|
|
|
23
346
|
|
|
24
347
|
class Subscriber(AsyncIterator):
|
|
25
348
|
"""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
This object is returned by `BeaverDB.subscribe()` and is designed to
|
|
29
|
-
be used with `async with` and `async for`.
|
|
349
|
+
An async iterator that polls a channel for new messages.
|
|
350
|
+
Designed to be used with 'async with'.
|
|
30
351
|
"""
|
|
31
|
-
|
|
32
|
-
|
|
352
|
+
|
|
353
|
+
def __init__(self, conn: sqlite3.Connection, channel_name: str, poll_interval: float = 0.1):
|
|
354
|
+
self._conn = conn
|
|
33
355
|
self._channel = channel_name
|
|
34
356
|
self._poll_interval = poll_interval
|
|
35
357
|
self._queue = asyncio.Queue()
|
|
@@ -37,50 +359,42 @@ class Subscriber(AsyncIterator):
|
|
|
37
359
|
self._polling_task = None
|
|
38
360
|
|
|
39
361
|
async def _poll_for_messages(self):
|
|
40
|
-
"""
|
|
362
|
+
"""Background task that polls the database for new messages."""
|
|
41
363
|
while True:
|
|
42
364
|
try:
|
|
43
|
-
# Run the synchronous DB query in a thread to avoid blocking asyncio
|
|
44
365
|
new_messages = await asyncio.to_thread(
|
|
45
366
|
self._fetch_new_messages_from_db
|
|
46
367
|
)
|
|
47
|
-
|
|
48
368
|
if new_messages:
|
|
49
|
-
for
|
|
50
|
-
payload = json.loads(
|
|
369
|
+
for msg in new_messages:
|
|
370
|
+
payload = json.loads(msg["message_payload"])
|
|
51
371
|
await self._queue.put(payload)
|
|
52
|
-
self._last_seen_timestamp = timestamp
|
|
53
|
-
|
|
372
|
+
self._last_seen_timestamp = msg["timestamp"]
|
|
54
373
|
await asyncio.sleep(self._poll_interval)
|
|
55
374
|
except asyncio.CancelledError:
|
|
56
|
-
# Gracefully exit when the task is cancelled
|
|
57
375
|
break
|
|
58
|
-
except Exception
|
|
59
|
-
# In a real app,
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
(self._channel, self._last_seen_timestamp)
|
|
73
|
-
)
|
|
74
|
-
return cursor.fetchall()
|
|
376
|
+
except Exception:
|
|
377
|
+
# In a real app, add more robust error logging
|
|
378
|
+
await asyncio.sleep(self._poll_interval * 5)
|
|
379
|
+
|
|
380
|
+
def _fetch_new_messages_from_db(self) -> list:
|
|
381
|
+
"""The actual synchronous database query."""
|
|
382
|
+
cursor = self._conn.cursor()
|
|
383
|
+
cursor.execute(
|
|
384
|
+
"SELECT timestamp, message_payload FROM beaver_pubsub_log WHERE channel_name = ? AND timestamp > ? ORDER BY timestamp ASC",
|
|
385
|
+
(self._channel, self._last_seen_timestamp)
|
|
386
|
+
)
|
|
387
|
+
results = cursor.fetchall()
|
|
388
|
+
cursor.close()
|
|
389
|
+
return results
|
|
75
390
|
|
|
76
391
|
async def __aenter__(self):
|
|
77
|
-
"""Starts the background task
|
|
78
|
-
|
|
79
|
-
self._polling_task = asyncio.create_task(self._poll_for_messages())
|
|
392
|
+
"""Starts the background task."""
|
|
393
|
+
self._polling_task = asyncio.create_task(self._poll_for_messages())
|
|
80
394
|
return self
|
|
81
395
|
|
|
82
396
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
83
|
-
"""Stops the background task
|
|
397
|
+
"""Stops the background task."""
|
|
84
398
|
if self._polling_task:
|
|
85
399
|
self._polling_task.cancel()
|
|
86
400
|
await asyncio.gather(self._polling_task, return_exceptions=True)
|
|
@@ -93,71 +407,86 @@ class Subscriber(AsyncIterator):
|
|
|
93
407
|
return await self._queue.get()
|
|
94
408
|
|
|
95
409
|
|
|
96
|
-
class
|
|
97
|
-
"""
|
|
98
|
-
|
|
410
|
+
class Document:
|
|
411
|
+
"""A data class for a vector and its metadata, with a unique ID."""
|
|
412
|
+
def __init__(self, embedding: list[float], id: str|None = None, **metadata):
|
|
413
|
+
if not isinstance(embedding, list) or not all(isinstance(x, (int, float)) for x in embedding):
|
|
414
|
+
raise TypeError("Embedding must be a list of numbers.")
|
|
99
415
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
"""
|
|
103
|
-
def __init__(self, db_path: str = "beaver.db"):
|
|
104
|
-
"""
|
|
105
|
-
Initializes the database.
|
|
416
|
+
self.id = id or str(uuid.uuid4())
|
|
417
|
+
self.embedding = np.array(embedding, dtype=np.float32)
|
|
106
418
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
"""
|
|
110
|
-
self.db_path = db_path
|
|
111
|
-
self._setup_database()
|
|
419
|
+
for key, value in metadata.items():
|
|
420
|
+
setattr(self, key, value)
|
|
112
421
|
|
|
113
|
-
def
|
|
114
|
-
"""
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
422
|
+
def to_dict(self) -> dict[str, Any]:
|
|
423
|
+
"""Serializes metadata to a dictionary."""
|
|
424
|
+
metadata = self.__dict__.copy()
|
|
425
|
+
# Exclude internal attributes from the metadata payload
|
|
426
|
+
metadata.pop('embedding', None)
|
|
427
|
+
metadata.pop('id', None)
|
|
428
|
+
return metadata
|
|
120
429
|
|
|
121
|
-
|
|
430
|
+
def __repr__(self):
|
|
431
|
+
metadata_str = ', '.join(f"{k}={v!r}" for k, v in self.to_dict().items())
|
|
432
|
+
return f"Document(id='{self.id}', {metadata_str})"
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
class CollectionWrapper:
|
|
436
|
+
"""A wrapper for vector collection operations with upsert logic."""
|
|
437
|
+
def __init__(self, name: str, conn: sqlite3.Connection):
|
|
438
|
+
self._name = name
|
|
439
|
+
self._conn = conn
|
|
440
|
+
|
|
441
|
+
def index(self, document: Document):
|
|
122
442
|
"""
|
|
123
|
-
|
|
443
|
+
Indexes a Document, performing an upsert based on the document's ID.
|
|
444
|
+
If the ID exists, the record is replaced.
|
|
445
|
+
If the ID is new (or auto-generated), a new record is inserted.
|
|
124
446
|
|
|
125
447
|
Args:
|
|
126
|
-
|
|
127
|
-
payload: A JSON-serializable Python object (e.g., dict, list).
|
|
448
|
+
document: The Document object to index.
|
|
128
449
|
"""
|
|
129
|
-
|
|
130
|
-
|
|
450
|
+
with self._conn:
|
|
451
|
+
self._conn.execute(
|
|
452
|
+
"INSERT OR REPLACE INTO beaver_collections (collection, item_id, item_vector, metadata) VALUES (?, ?, ?, ?)",
|
|
453
|
+
(
|
|
454
|
+
self._name,
|
|
455
|
+
document.id,
|
|
456
|
+
document.embedding.tobytes(),
|
|
457
|
+
json.dumps(document.to_dict())
|
|
458
|
+
)
|
|
459
|
+
)
|
|
131
460
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
461
|
+
def search(self, vector: list[float], top_k: int = 10) -> list[tuple[Document, float]]:
|
|
462
|
+
"""
|
|
463
|
+
Performs a vector search and returns Document objects.
|
|
464
|
+
"""
|
|
465
|
+
query_vector = np.array(vector, dtype=np.float32)
|
|
136
466
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
467
|
+
cursor = self._conn.cursor()
|
|
468
|
+
cursor.execute(
|
|
469
|
+
"SELECT item_id, item_vector, metadata FROM beaver_collections WHERE collection = ?",
|
|
470
|
+
(self._name,)
|
|
140
471
|
)
|
|
141
472
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
with sqlite3.connect(self.db_path) as conn:
|
|
145
|
-
cursor = conn.cursor()
|
|
146
|
-
cursor.execute(
|
|
147
|
-
"INSERT INTO _beaver_pubsub_log (timestamp, channel_name, message_payload) "
|
|
148
|
-
"VALUES (?, ?, ?)",
|
|
149
|
-
(time.time(), channel_name, json_payload)
|
|
150
|
-
)
|
|
151
|
-
conn.commit()
|
|
473
|
+
all_docs_data = cursor.fetchall()
|
|
474
|
+
cursor.close()
|
|
152
475
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
Subscribes to a channel.
|
|
476
|
+
if not all_docs_data:
|
|
477
|
+
return []
|
|
156
478
|
|
|
157
|
-
|
|
158
|
-
|
|
479
|
+
results = []
|
|
480
|
+
for row in all_docs_data:
|
|
481
|
+
doc_id = row['item_id']
|
|
482
|
+
embedding = np.frombuffer(row['item_vector'], dtype=np.float32).tolist()
|
|
483
|
+
metadata = json.loads(row['metadata'])
|
|
159
484
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
485
|
+
distance = np.linalg.norm(embedding - query_vector)
|
|
486
|
+
|
|
487
|
+
# Reconstruct the Document object with its original ID
|
|
488
|
+
doc = Document(id=doc_id, embedding=list(embedding), **metadata)
|
|
489
|
+
results.append((doc, float(distance)))
|
|
490
|
+
|
|
491
|
+
results.sort(key=lambda x: x[1])
|
|
492
|
+
return results[:top_k]
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: beaver-db
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Asynchronous, embedded, modern DB based on SQLite.
|
|
5
|
+
Requires-Python: >=3.13
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: numpy>=2.3.3
|
|
8
|
+
|
|
9
|
+
# beaver 🦫
|
|
10
|
+
|
|
11
|
+
A fast, single-file, multi-modal database for Python, built with the standard sqlite3 library.
|
|
12
|
+
|
|
13
|
+
`beaver` is the **B**ackend for **E**mbedded **A**synchronous **V**ector & **E**vent Retrieval. It's an industrious, all-in-one database designed to manage complex, modern data types without requiring a database server.
|
|
14
|
+
|
|
15
|
+
## Design Philosophy
|
|
16
|
+
|
|
17
|
+
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
18
|
+
|
|
19
|
+
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`, `asyncio`) and `numpy`.
|
|
20
|
+
- **Async-First (When It Matters)**: The pub/sub system is fully asynchronous for high-performance, real-time messaging. Other features like key-value, list, and vector operations are synchronous for ease of use.
|
|
21
|
+
- **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
|
|
22
|
+
- **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications.
|
|
23
|
+
|
|
24
|
+
## Core Features
|
|
25
|
+
|
|
26
|
+
- **Asynchronous Pub/Sub**: A fully asynchronous, Redis-like publish-subscribe system for real-time messaging.
|
|
27
|
+
- **Persistent Key-Value Store**: A simple `set`/`get` interface for storing any JSON-serializable object.
|
|
28
|
+
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
29
|
+
- **Vector Storage & Search**: Store vector embeddings and perform simple, brute-force k-nearest neighbor searches, ideal for small-scale RAG.
|
|
30
|
+
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install beaver-db
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Quickstart & API Guide
|
|
39
|
+
|
|
40
|
+
### Initialization
|
|
41
|
+
|
|
42
|
+
All you need to do is import and instantiate the `BeaverDB` class with a file path.
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from beaver import BeaverDB, Document
|
|
46
|
+
|
|
47
|
+
db = BeaverDB("my_application.db")
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Key-Value Store
|
|
51
|
+
|
|
52
|
+
Use `set()` and `get()` for simple data storage. The value can be any JSON-encodable object.
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
# Set a value
|
|
56
|
+
db.set("app_config", {"theme": "dark", "user_id": 123})
|
|
57
|
+
|
|
58
|
+
# Get a value
|
|
59
|
+
config = db.get("app_config")
|
|
60
|
+
print(f"Theme: {config['theme']}") # Output: Theme: dark
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### List Management
|
|
64
|
+
|
|
65
|
+
Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
tasks = db.list("daily_tasks")
|
|
69
|
+
tasks.push("Write the project report")
|
|
70
|
+
tasks.prepend("Plan the day's agenda")
|
|
71
|
+
print(f"The first task is: {tasks[0]}")
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Vector Storage & Search
|
|
75
|
+
|
|
76
|
+
Store `Document` objects containing vector embeddings and metadata. The search is a linear scan, which is sufficient for small-to-medium collections.
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
# Get a handle to a collection
|
|
80
|
+
docs = db.collection("my_documents")
|
|
81
|
+
|
|
82
|
+
# Create and index a document (ID will be a UUID)
|
|
83
|
+
doc1 = Document(embedding=[0.1, 0.2, 0.7], text="A cat sat on the mat.")
|
|
84
|
+
docs.index(doc1)
|
|
85
|
+
|
|
86
|
+
# Create and index a document with a specific ID (for upserting)
|
|
87
|
+
doc2 = Document(id="article-42", embedding=[0.9, 0.1, 0.1], text="A dog chased a ball.")
|
|
88
|
+
docs.index(doc2)
|
|
89
|
+
|
|
90
|
+
# Search for the 2 most similar documents
|
|
91
|
+
query_vector = [0.15, 0.25, 0.65]
|
|
92
|
+
results = docs.search(vector=query_vector, top_k=2)
|
|
93
|
+
|
|
94
|
+
# Results are a list of (Document, distance) tuples
|
|
95
|
+
top_document, distance = results[0]
|
|
96
|
+
print(f"Closest document: {top_document.text} (distance: {distance:.4f})")
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Asynchronous Pub/Sub
|
|
100
|
+
|
|
101
|
+
Publish events from one part of your app and listen in another using `asyncio`.
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
import asyncio
|
|
105
|
+
|
|
106
|
+
async def listener():
|
|
107
|
+
async with db.subscribe("system_events") as sub:
|
|
108
|
+
async for message in sub:
|
|
109
|
+
print(f"LISTENER: Received event -> {message['event']}")
|
|
110
|
+
|
|
111
|
+
async def publisher():
|
|
112
|
+
await asyncio.sleep(1)
|
|
113
|
+
await db.publish("system_events", {"event": "user_login", "user": "alice"})
|
|
114
|
+
|
|
115
|
+
# To run them concurrently:
|
|
116
|
+
# asyncio.run(asyncio.gather(listener(), publisher()))
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Roadmap
|
|
120
|
+
|
|
121
|
+
`beaver` aims to be a complete, self-contained data toolkit. The following features are planned:
|
|
122
|
+
|
|
123
|
+
- **More Efficient Vector Search**: Integrate an approximate nearest neighbor (ANN) index like `scipy.spatial.cKDTree` to improve search speed on larger datasets.
|
|
124
|
+
- **JSON Document Store with Full-Text Search**: Store flexible JSON documents and get powerful full-text search across all text fields, powered by SQLite's FTS5 extension.
|
|
125
|
+
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
|
|
126
|
+
|
|
127
|
+
## License
|
|
128
|
+
|
|
129
|
+
This project is licensed under the MIT License.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
beaver/__init__.py,sha256=uTPhMNDjw41YTWQN8NTLbovudfp8RIwcqbZ5XtYIuJA,36
|
|
2
|
+
beaver/core.py,sha256=I-_i8AshcNor1OZxoEtNjzLXCy1Byuxvo84y9K4AV_Q,17518
|
|
3
|
+
beaver_db-0.3.0.dist-info/METADATA,sha256=_Hy3Fq64IDahqm3K0vuPvvZRTmtO0sU-tEGlmEjLNpE,4865
|
|
4
|
+
beaver_db-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
5
|
+
beaver_db-0.3.0.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
|
|
6
|
+
beaver_db-0.3.0.dist-info/RECORD,,
|
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: beaver-db
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: Asynchronous, embedded, modern DB based on SQLite.
|
|
5
|
-
Requires-Python: >=3.13
|
|
6
|
-
Description-Content-Type: text/markdown
|
|
7
|
-
|
|
8
|
-
# beaver 🦫
|
|
9
|
-
|
|
10
|
-
A single-file, multi-modal database for Python, built with the standard sqlite3 library.
|
|
11
|
-
|
|
12
|
-
`beaver` is the **B**ackend for **E**mbedded **A**synchronous **V**ector & **E**vent **R**etrieval. It's an industrious, all-in-one database designed to manage complex, modern data types without leaving the comfort of a single file.
|
|
13
|
-
|
|
14
|
-
This project is currently in its initial phase, with the core asynchronous pub/sub functionality fully implemented.
|
|
15
|
-
|
|
16
|
-
## Core Features (Current)
|
|
17
|
-
|
|
18
|
-
- **Zero Dependencies:** Built using only the standard Python `sqlite3` and `asyncio` libraries. No external packages to install.
|
|
19
|
-
- **Async Pub/Sub:** A fully asynchronous, Redis-like publish-subscribe system for real-time messaging between components of your application.
|
|
20
|
-
- **Single-File & Persistent:** All data is stored in a single SQLite file, making it incredibly portable and easy to back up. Your event log persists across application restarts.
|
|
21
|
-
- **Works with Existing Databases:** `beaver` can be pointed at an existing SQLite file and will create its tables without disturbing other data.
|
|
22
|
-
|
|
23
|
-
## Use Cases
|
|
24
|
-
|
|
25
|
-
I built `beaver` to have a local, embedded database for building small AI-powered projects without having to pay for a server-based database.
|
|
26
|
-
|
|
27
|
-
Examples include:
|
|
28
|
-
|
|
29
|
-
- Streaming messages and tokens from a local FastAPI to a local Streamlit app.
|
|
30
|
-
- Storing user files for Retrieval Augmented Generation in single-user applications.
|
|
31
|
-
|
|
32
|
-
## Installation
|
|
33
|
-
|
|
34
|
-
To use `beaver`, just run `pip install beaver-db` and import the main class.
|
|
35
|
-
|
|
36
|
-
```python
|
|
37
|
-
import asyncio
|
|
38
|
-
from beaver import BeaverDB
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
# --- Example Usage ---
|
|
42
|
-
async def listener(db: BeaverDB):
|
|
43
|
-
"""A sample task that listens for messages."""
|
|
44
|
-
print("LISTENER: Waiting for messages on the 'system_events' channel...")
|
|
45
|
-
try:
|
|
46
|
-
async with db.subscribe("system_events") as subscriber:
|
|
47
|
-
async for message in subscriber:
|
|
48
|
-
print(f"LISTENER: Received -> {message}")
|
|
49
|
-
except asyncio.CancelledError:
|
|
50
|
-
print("LISTENER: Shutting down.")
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
async def publisher(db: BeaverDB):
|
|
54
|
-
"""A sample task that publishes messages."""
|
|
55
|
-
print("PUBLISHER: Ready to send events.")
|
|
56
|
-
await asyncio.sleep(1) # Give the listener a moment to start
|
|
57
|
-
|
|
58
|
-
print("PUBLISHER: Sending user login event.")
|
|
59
|
-
await db.publish(
|
|
60
|
-
"system_events",
|
|
61
|
-
{"event": "user_login", "username": "alice", "status": "success"}
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
await asyncio.sleep(2)
|
|
65
|
-
|
|
66
|
-
print("PUBLISHER: Sending system alert.")
|
|
67
|
-
await db.publish(
|
|
68
|
-
"system_events",
|
|
69
|
-
{"event": "system_alert", "level": "warning", "detail": "CPU usage at 85%"}
|
|
70
|
-
)
|
|
71
|
-
await asyncio.sleep(1)
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
async def main():
|
|
75
|
-
"""Runs the listener and publisher concurrently."""
|
|
76
|
-
db = BeaverDB("demo.db")
|
|
77
|
-
|
|
78
|
-
# Run both tasks and wait for them to complete
|
|
79
|
-
listener_task = asyncio.create_task(listener(db))
|
|
80
|
-
publisher_task = asyncio.create_task(publisher(db))
|
|
81
|
-
|
|
82
|
-
await asyncio.sleep(5) # Let them run for a bit
|
|
83
|
-
listener_task.cancel() # Cleanly shut down the listener
|
|
84
|
-
await asyncio.gather(listener_task, publisher_task, return_exceptions=True)
|
|
85
|
-
print("\nDemo finished.")
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
if __name__ == "__main__":
|
|
89
|
-
# To run this demo, save the file as beaver.py and run `python beaver.py`
|
|
90
|
-
print("--- BeaverDB Pub/Sub Demo ---")
|
|
91
|
-
asyncio.run(main())
|
|
92
|
-
```
|
|
93
|
-
|
|
94
|
-
## Roadmap
|
|
95
|
-
|
|
96
|
-
`beaver` aims to be a complete, self-contained data toolkit for modern Python applications. The following features are planned for future releases, all accessible through a high-level API while still allowing direct SQL access:
|
|
97
|
-
|
|
98
|
-
- **Vector Storage & Search:** Store numpy vector embeddings alongside your data and perform efficient k-nearest neighbor (k-NN) searches.
|
|
99
|
-
- **Persistent Key-Value Store:** A simple get/set interface for storing configuration, session data, or any other JSON-serializable object.
|
|
100
|
-
- **JSON Document Store with Full-Text Search:** Store flexible JSON documents and get powerful full-text search across all text fields by default, powered by SQLite's FTS5 extension.
|
|
101
|
-
- **Standard Relational Interface:** While beaver provides high-level features, you will always be able to use the underlying SQLite connection for normal relational tasks, such as creating and managing users or products tables with standard SQL.
|
|
102
|
-
|
|
103
|
-
## Performance
|
|
104
|
-
|
|
105
|
-
Despite its local, embedded nature, `beaver` is highly performant by small use cases. Here are some metrics, measured on a single laptop, Intel Core i7, 7th generation.
|
|
106
|
-
|
|
107
|
-
- Process 100,000 messages (1000 messages times 100 asynchronous clients) in less than 30 seconds, giving over 3K messages per second with an average latency of only 100 ms (time elapsed between message generation and client processing).
|
|
108
|
-
|
|
109
|
-
## Why Beaver?
|
|
110
|
-
|
|
111
|
-
Beavers are nature's engineers. They build a single, robust, and complex home—the lodge—from many different materials.
|
|
112
|
-
|
|
113
|
-
Similarly, beaver builds a powerful, multi-modal database but contains it all within a single, self-contained file. It's an industrious, no-nonsense tool for building modern applications.
|
|
114
|
-
|
|
115
|
-
## License
|
|
116
|
-
|
|
117
|
-
This project is licensed under the MIT License.
|
beaver_db-0.1.0.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
beaver/__init__.py,sha256=pE1JdpHVni2Hv6igs5VrKPlHkkKMik3ZiwhR23KRBkk,38
|
|
2
|
-
beaver/core.py,sha256=gWuuPwpT7yWgOulv_uwhOvJniltFO8K5-dfqjfA0jNk,5888
|
|
3
|
-
beaver_db-0.1.0.dist-info/METADATA,sha256=b0gaV6IuXw2F_B4NOjKAP6Wj7dzHi7DjIYYg-0R_Q1Q,5243
|
|
4
|
-
beaver_db-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
5
|
-
beaver_db-0.1.0.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
|
|
6
|
-
beaver_db-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|