beaver-db 0.18.6__py3-none-any.whl → 0.19.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of beaver-db might be problematic. Click here for more details.
- beaver/__init__.py +1 -1
- beaver/blobs.py +11 -0
- beaver/core.py +51 -0
- beaver/locks.py +173 -0
- beaver/server.py +139 -26
- {beaver_db-0.18.6.dist-info → beaver_db-0.19.2.dist-info}/METADATA +49 -15
- {beaver_db-0.18.6.dist-info → beaver_db-0.19.2.dist-info}/RECORD +10 -9
- {beaver_db-0.18.6.dist-info → beaver_db-0.19.2.dist-info}/WHEEL +0 -0
- {beaver_db-0.18.6.dist-info → beaver_db-0.19.2.dist-info}/entry_points.txt +0 -0
- {beaver_db-0.18.6.dist-info → beaver_db-0.19.2.dist-info}/licenses/LICENSE +0 -0
beaver/__init__.py
CHANGED
beaver/blobs.py
CHANGED
|
@@ -122,5 +122,16 @@ class BlobManager[M]:
|
|
|
122
122
|
yield row["key"]
|
|
123
123
|
cursor.close()
|
|
124
124
|
|
|
125
|
+
def __len__(self) -> int:
|
|
126
|
+
"""Returns the number of blobs in the store."""
|
|
127
|
+
cursor = self._db.connection.cursor()
|
|
128
|
+
cursor.execute(
|
|
129
|
+
"SELECT COUNT(*) FROM beaver_blobs WHERE store_name = ?",
|
|
130
|
+
(self._name,)
|
|
131
|
+
)
|
|
132
|
+
count = cursor.fetchone()[0]
|
|
133
|
+
cursor.close()
|
|
134
|
+
return count
|
|
135
|
+
|
|
125
136
|
def __repr__(self) -> str:
|
|
126
137
|
return f"BlobManager(name='{self._name}')"
|
beaver/core.py
CHANGED
|
@@ -9,6 +9,7 @@ from .channels import ChannelManager
|
|
|
9
9
|
from .collections import CollectionManager, Document
|
|
10
10
|
from .dicts import DictManager
|
|
11
11
|
from .lists import ListManager
|
|
12
|
+
from .locks import LockManager
|
|
12
13
|
from .logs import LogManager
|
|
13
14
|
from .queues import QueueManager
|
|
14
15
|
|
|
@@ -99,6 +100,35 @@ class BeaverDB:
|
|
|
99
100
|
self._create_pubsub_table()
|
|
100
101
|
self._create_trigrams_table()
|
|
101
102
|
self._create_versions_table()
|
|
103
|
+
self._create_locks_table()
|
|
104
|
+
|
|
105
|
+
def _create_locks_table(self): # <-- Add this new method
|
|
106
|
+
"""Creates the table for managing inter-process lock waiters."""
|
|
107
|
+
self.connection.execute(
|
|
108
|
+
"""
|
|
109
|
+
CREATE TABLE IF NOT EXISTS beaver_lock_waiters (
|
|
110
|
+
lock_name TEXT NOT NULL,
|
|
111
|
+
waiter_id TEXT NOT NULL,
|
|
112
|
+
requested_at REAL NOT NULL,
|
|
113
|
+
expires_at REAL NOT NULL,
|
|
114
|
+
PRIMARY KEY (lock_name, requested_at)
|
|
115
|
+
)
|
|
116
|
+
"""
|
|
117
|
+
)
|
|
118
|
+
# Index for fast cleanup of expired locks
|
|
119
|
+
self.connection.execute(
|
|
120
|
+
"""
|
|
121
|
+
CREATE INDEX IF NOT EXISTS idx_lock_expires
|
|
122
|
+
ON beaver_lock_waiters (lock_name, expires_at)
|
|
123
|
+
"""
|
|
124
|
+
)
|
|
125
|
+
# Index for fast deletion by the lock holder
|
|
126
|
+
self.connection.execute(
|
|
127
|
+
"""
|
|
128
|
+
CREATE INDEX IF NOT EXISTS idx_lock_waiter_id
|
|
129
|
+
ON beaver_lock_waiters (lock_name, waiter_id)
|
|
130
|
+
"""
|
|
131
|
+
)
|
|
102
132
|
|
|
103
133
|
def _create_logs_table(self):
|
|
104
134
|
"""Creates the table for time-indexed logs."""
|
|
@@ -421,3 +451,24 @@ class BeaverDB:
|
|
|
421
451
|
raise TypeError("The model parameter must be a JsonSerializable class.")
|
|
422
452
|
|
|
423
453
|
return LogManager(name, self, self._db_path, model)
|
|
454
|
+
|
|
455
|
+
def lock(
|
|
456
|
+
self,
|
|
457
|
+
name: str,
|
|
458
|
+
timeout: float | None = None,
|
|
459
|
+
lock_ttl: float = 60.0,
|
|
460
|
+
poll_interval: float = 0.1,
|
|
461
|
+
) -> LockManager:
|
|
462
|
+
"""
|
|
463
|
+
Returns an inter-process lock manager for a given lock name.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
name: The unique name of the lock (e.g., "run_compaction").
|
|
467
|
+
timeout: Max seconds to wait to acquire the lock.
|
|
468
|
+
If None, it will wait forever.
|
|
469
|
+
lock_ttl: Max seconds the lock can be held. If the process crashes,
|
|
470
|
+
the lock will auto-expire after this time.
|
|
471
|
+
poll_interval: Seconds to wait between polls. Shorter intervals
|
|
472
|
+
are more responsive but create more DB I/O.
|
|
473
|
+
"""
|
|
474
|
+
return LockManager(self, name, timeout, lock_ttl, poll_interval)
|
beaver/locks.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import time
|
|
3
|
+
import os
|
|
4
|
+
import uuid
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from .types import IDatabase
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LockManager:
|
|
10
|
+
"""
|
|
11
|
+
An inter-process, deadlock-proof, and fair (FIFO) lock built on SQLite.
|
|
12
|
+
|
|
13
|
+
This class provides a context manager (`with` statement) to ensure that
|
|
14
|
+
only one process (among many) can enter a critical section of code at a
|
|
15
|
+
time.
|
|
16
|
+
|
|
17
|
+
It is "fair" because it uses a FIFO queue (based on insertion time).
|
|
18
|
+
It is "deadlock-proof" because locks have a Time-To-Live (TTL); if a
|
|
19
|
+
process crashes, its lock will eventually expire and be cleaned up.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
db: IDatabase,
|
|
25
|
+
name: str,
|
|
26
|
+
timeout: Optional[float] = None,
|
|
27
|
+
lock_ttl: float = 60.0,
|
|
28
|
+
poll_interval: float = 0.1,
|
|
29
|
+
):
|
|
30
|
+
"""
|
|
31
|
+
Initializes the lock manager.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
db: The BeaverDB instance.
|
|
35
|
+
name: The unique name of the lock (e.g., "run_compaction").
|
|
36
|
+
timeout: Max seconds to wait to acquire the lock. If None,
|
|
37
|
+
it will wait forever.
|
|
38
|
+
lock_ttl: Max seconds the lock can be held. If the process crashes,
|
|
39
|
+
the lock will auto-expire after this time.
|
|
40
|
+
poll_interval: Seconds to wait between polls.
|
|
41
|
+
"""
|
|
42
|
+
if not isinstance(name, str) or not name:
|
|
43
|
+
raise ValueError("Lock name must be a non-empty string.")
|
|
44
|
+
if lock_ttl <= 0:
|
|
45
|
+
raise ValueError("lock_ttl must be positive.")
|
|
46
|
+
if poll_interval <= 0:
|
|
47
|
+
raise ValueError("poll_interval must be positive.")
|
|
48
|
+
|
|
49
|
+
self._db = db
|
|
50
|
+
self._lock_name = name
|
|
51
|
+
self._timeout = timeout
|
|
52
|
+
self._lock_ttl = lock_ttl
|
|
53
|
+
self._poll_interval = poll_interval
|
|
54
|
+
# A unique ID for this specific lock instance across all processes
|
|
55
|
+
self._waiter_id = f"pid:{os.getpid()}:id:{uuid.uuid4()}"
|
|
56
|
+
self._acquired = False # State to track if this instance holds the lock
|
|
57
|
+
|
|
58
|
+
def acquire(self) -> "LockManager":
|
|
59
|
+
"""
|
|
60
|
+
Blocks until the lock is acquired or the timeout expires.
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
TimeoutError: If the lock cannot be acquired within the specified timeout.
|
|
64
|
+
"""
|
|
65
|
+
if self._acquired:
|
|
66
|
+
# This instance already holds the lock
|
|
67
|
+
return self
|
|
68
|
+
|
|
69
|
+
start_time = time.time()
|
|
70
|
+
requested_at = time.time()
|
|
71
|
+
expires_at = requested_at + self._lock_ttl
|
|
72
|
+
|
|
73
|
+
conn = self._db.connection
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
# 1. Add self to the FIFO queue (atomic)
|
|
77
|
+
with conn:
|
|
78
|
+
conn.execute(
|
|
79
|
+
"""
|
|
80
|
+
INSERT INTO beaver_lock_waiters (lock_name, waiter_id, requested_at, expires_at)
|
|
81
|
+
VALUES (?, ?, ?, ?)
|
|
82
|
+
""",
|
|
83
|
+
(self._lock_name, self._waiter_id, requested_at, expires_at),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# 2. Start polling loop
|
|
87
|
+
while True:
|
|
88
|
+
with conn:
|
|
89
|
+
# 3. Clean up expired locks from crashed processes
|
|
90
|
+
now = time.time()
|
|
91
|
+
conn.execute(
|
|
92
|
+
"DELETE FROM beaver_lock_waiters WHERE lock_name = ? AND expires_at < ?",
|
|
93
|
+
(self._lock_name, now),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# 4. Check who is at the front of the queue
|
|
97
|
+
cursor = conn.cursor()
|
|
98
|
+
cursor.execute(
|
|
99
|
+
"""
|
|
100
|
+
SELECT waiter_id FROM beaver_lock_waiters
|
|
101
|
+
WHERE lock_name = ?
|
|
102
|
+
ORDER BY requested_at ASC
|
|
103
|
+
LIMIT 1
|
|
104
|
+
""",
|
|
105
|
+
(self._lock_name,),
|
|
106
|
+
)
|
|
107
|
+
result = cursor.fetchone()
|
|
108
|
+
cursor.close()
|
|
109
|
+
|
|
110
|
+
if result and result["waiter_id"] == self._waiter_id:
|
|
111
|
+
# We are at the front. We own the lock.
|
|
112
|
+
self._acquired = True
|
|
113
|
+
return self
|
|
114
|
+
|
|
115
|
+
# 5. Check for timeout
|
|
116
|
+
if self._timeout is not None:
|
|
117
|
+
if (time.time() - start_time) > self._timeout:
|
|
118
|
+
# We timed out. Remove ourselves from the queue and raise.
|
|
119
|
+
self._release_from_queue()
|
|
120
|
+
raise TimeoutError(
|
|
121
|
+
f"Failed to acquire lock '{self._lock_name}' within {self._timeout}s."
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# 6. Wait politely before polling again
|
|
125
|
+
# Add +/- 10% jitter to the poll interval to avoid thundering herd
|
|
126
|
+
jitter = self._poll_interval * 0.1
|
|
127
|
+
sleep_time = random.uniform(
|
|
128
|
+
self._poll_interval - jitter, self._poll_interval + jitter
|
|
129
|
+
)
|
|
130
|
+
time.sleep(sleep_time)
|
|
131
|
+
|
|
132
|
+
except Exception:
|
|
133
|
+
# If anything goes wrong, try to clean up our waiter entry
|
|
134
|
+
self._release_from_queue()
|
|
135
|
+
raise
|
|
136
|
+
|
|
137
|
+
def _release_from_queue(self):
|
|
138
|
+
"""
|
|
139
|
+
Atomically removes this instance's entry from the waiter queue.
|
|
140
|
+
This is a best-effort, fire-and-forget operation.
|
|
141
|
+
"""
|
|
142
|
+
try:
|
|
143
|
+
with self._db.connection:
|
|
144
|
+
self._db.connection.execute(
|
|
145
|
+
"DELETE FROM beaver_lock_waiters WHERE lock_name = ? AND waiter_id = ?",
|
|
146
|
+
(self._lock_name, self._waiter_id),
|
|
147
|
+
)
|
|
148
|
+
except Exception:
|
|
149
|
+
# Don't raise errors during release/cleanup
|
|
150
|
+
pass
|
|
151
|
+
|
|
152
|
+
def release(self):
|
|
153
|
+
"""
|
|
154
|
+
Releases the lock, allowing the next process in the queue to acquire it.
|
|
155
|
+
This is safe to call multiple times.
|
|
156
|
+
"""
|
|
157
|
+
if not self._acquired:
|
|
158
|
+
# We don't hold the lock, so nothing to do.
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
self._release_from_queue()
|
|
162
|
+
self._acquired = False
|
|
163
|
+
|
|
164
|
+
def __enter__(self) -> "LockManager":
|
|
165
|
+
"""Acquires the lock when entering a 'with' statement."""
|
|
166
|
+
return self.acquire()
|
|
167
|
+
|
|
168
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
169
|
+
"""Releases the lock when exiting a 'with' statement."""
|
|
170
|
+
self.release()
|
|
171
|
+
|
|
172
|
+
def __repr__(self) -> str:
|
|
173
|
+
return f"LockManager(name='{self._lock_name}', acquired={self._acquired})"
|
beaver/server.py
CHANGED
|
@@ -2,11 +2,23 @@ try:
|
|
|
2
2
|
from typing import Any, Optional, List
|
|
3
3
|
import json
|
|
4
4
|
from datetime import datetime, timedelta, timezone
|
|
5
|
-
from fastapi import
|
|
5
|
+
from fastapi import (
|
|
6
|
+
FastAPI,
|
|
7
|
+
HTTPException,
|
|
8
|
+
Body,
|
|
9
|
+
UploadFile,
|
|
10
|
+
File,
|
|
11
|
+
Form,
|
|
12
|
+
Response,
|
|
13
|
+
WebSocket,
|
|
14
|
+
WebSocketDisconnect,
|
|
15
|
+
)
|
|
6
16
|
import uvicorn
|
|
7
17
|
from pydantic import BaseModel, Field
|
|
8
18
|
except ImportError:
|
|
9
|
-
raise ImportError(
|
|
19
|
+
raise ImportError(
|
|
20
|
+
'Please install server dependencies with: pip install "beaver-db[server]"'
|
|
21
|
+
)
|
|
10
22
|
|
|
11
23
|
from .core import BeaverDB
|
|
12
24
|
from .collections import Document, WalkDirection
|
|
@@ -14,6 +26,7 @@ from .collections import Document, WalkDirection
|
|
|
14
26
|
|
|
15
27
|
# --- Pydantic Models for Collections ---
|
|
16
28
|
|
|
29
|
+
|
|
17
30
|
class IndexRequest(BaseModel):
|
|
18
31
|
id: Optional[str] = None
|
|
19
32
|
embedding: Optional[List[float]] = None
|
|
@@ -21,28 +34,36 @@ class IndexRequest(BaseModel):
|
|
|
21
34
|
fts: bool = True
|
|
22
35
|
fuzzy: bool = False
|
|
23
36
|
|
|
37
|
+
|
|
24
38
|
class SearchRequest(BaseModel):
|
|
25
39
|
vector: List[float]
|
|
26
40
|
top_k: int = 10
|
|
27
41
|
|
|
42
|
+
|
|
28
43
|
class MatchRequest(BaseModel):
|
|
29
44
|
query: str
|
|
30
45
|
on: Optional[List[str]] = None
|
|
31
46
|
top_k: int = 10
|
|
32
47
|
fuzziness: int = 0
|
|
33
48
|
|
|
49
|
+
|
|
34
50
|
class ConnectRequest(BaseModel):
|
|
35
51
|
source_id: str
|
|
36
52
|
target_id: str
|
|
37
53
|
label: str
|
|
38
54
|
metadata: Optional[dict] = None
|
|
39
55
|
|
|
56
|
+
|
|
40
57
|
class WalkRequest(BaseModel):
|
|
41
58
|
labels: List[str]
|
|
42
59
|
depth: int
|
|
43
60
|
direction: WalkDirection = WalkDirection.OUTGOING
|
|
44
61
|
|
|
45
62
|
|
|
63
|
+
class CountResponse(BaseModel):
|
|
64
|
+
count: int
|
|
65
|
+
|
|
66
|
+
|
|
46
67
|
def build(db: BeaverDB) -> FastAPI:
|
|
47
68
|
"""Constructs a FastAPI instance for a given BeaverDB."""
|
|
48
69
|
app = FastAPI(title="BeaverDB Server")
|
|
@@ -55,7 +76,9 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
55
76
|
d = db.dict(name)
|
|
56
77
|
value = d.get(key)
|
|
57
78
|
if value is None:
|
|
58
|
-
raise HTTPException(
|
|
79
|
+
raise HTTPException(
|
|
80
|
+
status_code=404, detail=f"Key '{key}' not found in dictionary '{name}'"
|
|
81
|
+
)
|
|
59
82
|
return value
|
|
60
83
|
|
|
61
84
|
@app.put("/dicts/{name}/{key}", tags=["Dicts"])
|
|
@@ -73,8 +96,15 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
73
96
|
del d[key]
|
|
74
97
|
return {"status": "ok"}
|
|
75
98
|
except KeyError:
|
|
76
|
-
raise HTTPException(
|
|
99
|
+
raise HTTPException(
|
|
100
|
+
status_code=404, detail=f"Key '{key}' not found in dictionary '{name}'"
|
|
101
|
+
)
|
|
77
102
|
|
|
103
|
+
@app.get("/dicts/{name}/count", tags=["Dicts"], response_model=CountResponse)
|
|
104
|
+
def get_dict_count(name: str) -> dict:
|
|
105
|
+
"""Retrieves the number of key-value pairs in the dictionary."""
|
|
106
|
+
d = db.dict(name)
|
|
107
|
+
return {"count": len(d)}
|
|
78
108
|
|
|
79
109
|
# --- Lists Endpoints ---
|
|
80
110
|
|
|
@@ -91,7 +121,9 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
91
121
|
try:
|
|
92
122
|
return l[index]
|
|
93
123
|
except IndexError:
|
|
94
|
-
raise HTTPException(
|
|
124
|
+
raise HTTPException(
|
|
125
|
+
status_code=404, detail=f"Index {index} out of bounds for list '{name}'"
|
|
126
|
+
)
|
|
95
127
|
|
|
96
128
|
@app.post("/lists/{name}", tags=["Lists"])
|
|
97
129
|
def push_list_item(name: str, value: Any = Body(...)):
|
|
@@ -108,7 +140,9 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
108
140
|
l[index] = value
|
|
109
141
|
return {"status": "ok"}
|
|
110
142
|
except IndexError:
|
|
111
|
-
raise HTTPException(
|
|
143
|
+
raise HTTPException(
|
|
144
|
+
status_code=404, detail=f"Index {index} out of bounds for list '{name}'"
|
|
145
|
+
)
|
|
112
146
|
|
|
113
147
|
@app.delete("/lists/{name}/{index}", tags=["Lists"])
|
|
114
148
|
def delete_list_item(name: str, index: int):
|
|
@@ -118,7 +152,15 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
118
152
|
del l[index]
|
|
119
153
|
return {"status": "ok"}
|
|
120
154
|
except IndexError:
|
|
121
|
-
raise HTTPException(
|
|
155
|
+
raise HTTPException(
|
|
156
|
+
status_code=404, detail=f"Index {index} out of bounds for list '{name}'"
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
@app.get("/lists/{name}/count", tags=["Lists"], response_model=CountResponse)
|
|
160
|
+
def get_list_count(name: str) -> dict:
|
|
161
|
+
"""Retrieves the number of items in the list."""
|
|
162
|
+
l = db.list(name)
|
|
163
|
+
return {"count": len(l)}
|
|
122
164
|
|
|
123
165
|
# --- Queues Endpoints ---
|
|
124
166
|
|
|
@@ -149,11 +191,19 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
149
191
|
item = q.get(block=True, timeout=timeout)
|
|
150
192
|
return item
|
|
151
193
|
except TimeoutError:
|
|
152
|
-
raise HTTPException(
|
|
194
|
+
raise HTTPException(
|
|
195
|
+
status_code=408,
|
|
196
|
+
detail=f"Request timed out after {timeout}s waiting for an item in queue '{name}'",
|
|
197
|
+
)
|
|
153
198
|
except IndexError:
|
|
154
199
|
# This case is less likely with block=True but good to handle
|
|
155
200
|
raise HTTPException(status_code=404, detail=f"Queue '{name}' is empty")
|
|
156
201
|
|
|
202
|
+
@app.get("/queues/{name}/count", tags=["Queues"], response_model=CountResponse)
|
|
203
|
+
def get_queue_count(name: str) -> dict:
|
|
204
|
+
"""RetrieVIes the number of items currently in the queue."""
|
|
205
|
+
q = db.queue(name)
|
|
206
|
+
return {"count": len(q)}
|
|
157
207
|
|
|
158
208
|
# --- Blobs Endpoints ---
|
|
159
209
|
|
|
@@ -163,12 +213,20 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
163
213
|
blobs = db.blobs(name)
|
|
164
214
|
blob = blobs.get(key)
|
|
165
215
|
if blob is None:
|
|
166
|
-
raise HTTPException(
|
|
216
|
+
raise HTTPException(
|
|
217
|
+
status_code=404,
|
|
218
|
+
detail=f"Blob with key '{key}' not found in store '{name}'",
|
|
219
|
+
)
|
|
167
220
|
# Return the raw bytes with a generic binary content type
|
|
168
221
|
return Response(content=blob.data, media_type="application/octet-stream")
|
|
169
222
|
|
|
170
223
|
@app.put("/blobs/{name}/{key}", tags=["Blobs"])
|
|
171
|
-
async def put_blob(
|
|
224
|
+
async def put_blob(
|
|
225
|
+
name: str,
|
|
226
|
+
key: str,
|
|
227
|
+
data: UploadFile = File(...),
|
|
228
|
+
metadata: Optional[str] = Form(None),
|
|
229
|
+
):
|
|
172
230
|
"""Stores a blob (binary file) with optional JSON metadata."""
|
|
173
231
|
blobs = db.blobs(name)
|
|
174
232
|
file_bytes = await data.read()
|
|
@@ -178,7 +236,9 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
178
236
|
try:
|
|
179
237
|
meta_dict = json.loads(metadata)
|
|
180
238
|
except json.JSONDecodeError:
|
|
181
|
-
raise HTTPException(
|
|
239
|
+
raise HTTPException(
|
|
240
|
+
status_code=400, detail="Invalid JSON format for metadata."
|
|
241
|
+
)
|
|
182
242
|
|
|
183
243
|
blobs.put(key=key, data=file_bytes, metadata=meta_dict)
|
|
184
244
|
return {"status": "ok"}
|
|
@@ -191,8 +251,16 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
191
251
|
blobs.delete(key)
|
|
192
252
|
return {"status": "ok"}
|
|
193
253
|
except KeyError:
|
|
194
|
-
raise HTTPException(
|
|
254
|
+
raise HTTPException(
|
|
255
|
+
status_code=404,
|
|
256
|
+
detail=f"Blob with key '{key}' not found in store '{name}'",
|
|
257
|
+
)
|
|
195
258
|
|
|
259
|
+
@app.get("/blobs/{name}/count", tags=["Blobs"], response_model=CountResponse)
|
|
260
|
+
def get_blob_count(name: str) -> dict:
|
|
261
|
+
"""Retrieves the number of blobs in the store."""
|
|
262
|
+
b = db.blobs(name)
|
|
263
|
+
return {"count": len(b)}
|
|
196
264
|
|
|
197
265
|
# --- Logs Endpoints ---
|
|
198
266
|
|
|
@@ -208,12 +276,25 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
208
276
|
"""Retrieves log entries within a specific time window."""
|
|
209
277
|
log = db.log(name)
|
|
210
278
|
# Ensure datetimes are timezone-aware (UTC) for correct comparison
|
|
211
|
-
start_utc =
|
|
212
|
-
|
|
279
|
+
start_utc = (
|
|
280
|
+
start.astimezone(timezone.utc)
|
|
281
|
+
if start.tzinfo
|
|
282
|
+
else start.replace(tzinfo=timezone.utc)
|
|
283
|
+
)
|
|
284
|
+
end_utc = (
|
|
285
|
+
end.astimezone(timezone.utc)
|
|
286
|
+
if end.tzinfo
|
|
287
|
+
else end.replace(tzinfo=timezone.utc)
|
|
288
|
+
)
|
|
213
289
|
return log.range(start=start_utc, end=end_utc)
|
|
214
290
|
|
|
215
291
|
@app.websocket("/logs/{name}/live", name="Logs")
|
|
216
|
-
async def live_log_feed(
|
|
292
|
+
async def live_log_feed(
|
|
293
|
+
websocket: WebSocket,
|
|
294
|
+
name: str,
|
|
295
|
+
window_seconds: int = 5,
|
|
296
|
+
period_seconds: int = 1,
|
|
297
|
+
):
|
|
217
298
|
"""Streams live, aggregated log data over a WebSocket."""
|
|
218
299
|
await websocket.accept()
|
|
219
300
|
|
|
@@ -222,7 +303,10 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
222
303
|
# This simple aggregator function runs in the background and returns a
|
|
223
304
|
# JSON-serializable summary of the data in the current window.
|
|
224
305
|
def simple_aggregator(window):
|
|
225
|
-
return {
|
|
306
|
+
return {
|
|
307
|
+
"count": len(window),
|
|
308
|
+
"latest_timestamp": window[-1]["timestamp"] if window else None,
|
|
309
|
+
}
|
|
226
310
|
|
|
227
311
|
live_stream = async_logs.live(
|
|
228
312
|
window=timedelta(seconds=window_seconds),
|
|
@@ -239,7 +323,6 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
239
323
|
# Cleanly close the underlying iterator and its background thread.
|
|
240
324
|
live_stream.close()
|
|
241
325
|
|
|
242
|
-
|
|
243
326
|
# --- Channels Endpoints ---
|
|
244
327
|
|
|
245
328
|
@app.post("/channels/{name}/publish", tags=["Channels"])
|
|
@@ -263,7 +346,6 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
263
346
|
except WebSocketDisconnect:
|
|
264
347
|
print(f"Client disconnected from channel '{name}' subscription.")
|
|
265
348
|
|
|
266
|
-
|
|
267
349
|
# --- Collections Endpoints ---
|
|
268
350
|
|
|
269
351
|
@app.get("/collections/{name}", tags=["Collections"])
|
|
@@ -282,7 +364,10 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
282
364
|
return {"status": "ok", "id": doc.id}
|
|
283
365
|
except TypeError as e:
|
|
284
366
|
if "vector" in str(e):
|
|
285
|
-
raise HTTPException(
|
|
367
|
+
raise HTTPException(
|
|
368
|
+
status_code=501,
|
|
369
|
+
detail="Vector indexing requires the '[vector]' extra. Install with: pip install \"beaver-db[vector]\"",
|
|
370
|
+
)
|
|
286
371
|
raise e
|
|
287
372
|
|
|
288
373
|
@app.post("/collections/{name}/search", tags=["Collections"])
|
|
@@ -291,18 +376,29 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
291
376
|
collection = db.collection(name)
|
|
292
377
|
try:
|
|
293
378
|
results = collection.search(vector=req.vector, top_k=req.top_k)
|
|
294
|
-
return [
|
|
379
|
+
return [
|
|
380
|
+
{"document": doc.to_dict(metadata_only=False), "distance": dist}
|
|
381
|
+
for doc, dist in results
|
|
382
|
+
]
|
|
295
383
|
except TypeError as e:
|
|
296
384
|
if "vector" in str(e):
|
|
297
|
-
raise HTTPException(
|
|
385
|
+
raise HTTPException(
|
|
386
|
+
status_code=501,
|
|
387
|
+
detail="Vector search requires the '[vector]' extra. Install with: pip install \"beaver-db[vector]\"",
|
|
388
|
+
)
|
|
298
389
|
raise e
|
|
299
390
|
|
|
300
391
|
@app.post("/collections/{name}/match", tags=["Collections"])
|
|
301
392
|
def match_collection(name: str, req: MatchRequest) -> List[dict]:
|
|
302
393
|
"""Performs a full-text or fuzzy search on the collection."""
|
|
303
394
|
collection = db.collection(name)
|
|
304
|
-
results = collection.match(
|
|
305
|
-
|
|
395
|
+
results = collection.match(
|
|
396
|
+
query=req.query, on=req.on, top_k=req.top_k, fuzziness=req.fuzziness
|
|
397
|
+
)
|
|
398
|
+
return [
|
|
399
|
+
{"document": doc.to_dict(metadata_only=False), "score": score}
|
|
400
|
+
for doc, score in results
|
|
401
|
+
]
|
|
306
402
|
|
|
307
403
|
@app.post("/collections/{name}/connect", tags=["Collections"])
|
|
308
404
|
def connect_documents(name: str, req: ConnectRequest):
|
|
@@ -310,11 +406,15 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
310
406
|
collection = db.collection(name)
|
|
311
407
|
source_doc = Document(id=req.source_id)
|
|
312
408
|
target_doc = Document(id=req.target_id)
|
|
313
|
-
collection.connect(
|
|
409
|
+
collection.connect(
|
|
410
|
+
source=source_doc, target=target_doc, label=req.label, metadata=req.metadata
|
|
411
|
+
)
|
|
314
412
|
return {"status": "ok"}
|
|
315
413
|
|
|
316
414
|
@app.get("/collections/{name}/{doc_id}/neighbors", tags=["Collections"])
|
|
317
|
-
def get_neighbors(
|
|
415
|
+
def get_neighbors(
|
|
416
|
+
name: str, doc_id: str, label: Optional[str] = None
|
|
417
|
+
) -> List[dict]:
|
|
318
418
|
"""Retrieves the neighboring documents for a given document."""
|
|
319
419
|
collection = db.collection(name)
|
|
320
420
|
doc = Document(id=doc_id)
|
|
@@ -326,9 +426,22 @@ def build(db: BeaverDB) -> FastAPI:
|
|
|
326
426
|
"""Performs a graph traversal (BFS) from a starting document."""
|
|
327
427
|
collection = db.collection(name)
|
|
328
428
|
source_doc = Document(id=doc_id)
|
|
329
|
-
results = collection.walk(
|
|
429
|
+
results = collection.walk(
|
|
430
|
+
source=source_doc,
|
|
431
|
+
labels=req.labels,
|
|
432
|
+
depth=req.depth,
|
|
433
|
+
direction=req.direction,
|
|
434
|
+
)
|
|
330
435
|
return [doc.to_dict(metadata_only=False) for doc in results]
|
|
331
436
|
|
|
437
|
+
@app.get(
|
|
438
|
+
"/collections/{name}/count", tags=["Collections"], response_model=CountResponse
|
|
439
|
+
)
|
|
440
|
+
def get_collection_count(name: str) -> dict:
|
|
441
|
+
"""RetrieRetrieves the number of documents in the collection."""
|
|
442
|
+
c = db.collection(name)
|
|
443
|
+
return {"count": len(c)}
|
|
444
|
+
|
|
332
445
|
return app
|
|
333
446
|
|
|
334
447
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: beaver-db
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.19.2
|
|
4
4
|
Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -23,7 +23,11 @@ Provides-Extra: vector
|
|
|
23
23
|
Requires-Dist: faiss-cpu>=1.12.0; extra == 'vector'
|
|
24
24
|
Description-Content-Type: text/markdown
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
<div style="text-align: center;">
|
|
27
|
+
<img src="https://github.com/syalia-srl/beaver/blob/main/logo.png?raw=true" width="256px">
|
|
28
|
+
</div>
|
|
29
|
+
|
|
30
|
+
---
|
|
27
31
|
|
|
28
32
|
<!-- Project badges -->
|
|
29
33
|

|
|
@@ -32,11 +36,13 @@ Description-Content-Type: text/markdown
|
|
|
32
36
|

|
|
33
37
|

|
|
34
38
|
|
|
35
|
-
A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
39
|
+
> A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
40
|
+
|
|
41
|
+
---
|
|
36
42
|
|
|
37
43
|
`beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
|
|
38
44
|
|
|
39
|
-
> If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor) for an equally minimalistic approach to task orchestration.
|
|
45
|
+
> If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor "null") for an equally minimalistic approach to task orchestration.
|
|
40
46
|
|
|
41
47
|
## Design Philosophy
|
|
42
48
|
|
|
@@ -55,6 +61,7 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
|
|
|
55
61
|
- **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
|
|
56
62
|
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
57
63
|
- **Persistent Priority Queue**: A high-performance, persistent priority queue perfect for task orchestration across multiple processes. Also with optional async support.
|
|
64
|
+
- **Inter-Process Locking**: A robust, deadlock-proof, and fair (FIFO) distributed lock (`db.lock()`) to coordinate multiple processes and prevent race conditions.
|
|
58
65
|
- **Time-Indexed Log for Monitoring**: A specialized data structure for structured, time-series logs. Query historical data by time range or create a live, aggregated view of the most recent events for real-time dashboards.
|
|
59
66
|
- **Simple Blob Storage**: A dictionary-like interface for storing medium-sized binary files (like PDFs or images) directly in the database, ensuring transactional integrity with your other data.
|
|
60
67
|
- **High-Performance Vector Storage & Search (Optional)**: Store vector embeddings and perform fast approximate nearest neighbor searches using a `faiss`-based hybrid index.
|
|
@@ -103,14 +110,14 @@ pip install "beaver-db[full]"
|
|
|
103
110
|
```
|
|
104
111
|
|
|
105
112
|
### Running with Docker
|
|
113
|
+
|
|
106
114
|
For a fully embedded and lightweight solution, you can run the BeaverDB REST API server using Docker. This is the easiest way to get a self-hosted instance up and running.
|
|
107
115
|
|
|
108
116
|
```bash
|
|
109
117
|
docker run -p 8000:8000 -v $(pwd)/data:/app apiad/beaverdb
|
|
110
118
|
```
|
|
111
119
|
|
|
112
|
-
This command will start the BeaverDB server, and your database file will be stored in the data directory on your host machine. You can access the API at
|
|
113
|
-
|
|
120
|
+
This command will start the BeaverDB server, and your database file will be stored in the data directory on your host machine. You can access the API at [http://localhost:8000](http://localhost:8000").
|
|
114
121
|
|
|
115
122
|
## Quickstart
|
|
116
123
|
|
|
@@ -172,12 +179,12 @@ Here are a couple of examples using `curl`:
|
|
|
172
179
|
|
|
173
180
|
```bash
|
|
174
181
|
# Set a value in the 'app_config' dictionary
|
|
175
|
-
curl -X PUT http://127.0.0.1:8000/dicts/app_config/api_key
|
|
182
|
+
curl -X PUT [http://127.0.0.1:8000/dicts/app_config/api_key](http://127.0.0.1:8000/dicts/app_config/api_key)
|
|
176
183
|
-H "Content-Type: application/json"
|
|
177
184
|
-d '"your-secret-api-key"'
|
|
178
185
|
|
|
179
186
|
# Get the value back
|
|
180
|
-
curl http://127.0.0.1:8000/dicts/app_config/api_key
|
|
187
|
+
curl [http://127.0.0.1:8000/dicts/app_config/api_key](http://127.0.0.1:8000/dicts/app_config/api_key)
|
|
181
188
|
# Output: "your-secret-api-key"
|
|
182
189
|
```
|
|
183
190
|
|
|
@@ -341,6 +348,34 @@ for summary in live_summary:
|
|
|
341
348
|
print(f"Live Stats (10s window): Count={summary['count']}, Mean={summary['mean']:.2f}")
|
|
342
349
|
```
|
|
343
350
|
|
|
351
|
+
### 9. Coordinate Distributed Web Scrapers
|
|
352
|
+
|
|
353
|
+
Run multiple scraper processes in parallel and use `db.lock()` to coordinate them. You can ensure only one process refreshes a shared API token or sitemap, preventing race conditions and rate-limiting.
|
|
354
|
+
|
|
355
|
+
```python
|
|
356
|
+
import time
|
|
357
|
+
|
|
358
|
+
scrapers_state = db.dict("scraper_state")
|
|
359
|
+
|
|
360
|
+
last_refresh = scrapers_state.get("last_sitemap_refresh", 0)
|
|
361
|
+
if time.time() - last_refresh > 3600: # Only refresh once per hour
|
|
362
|
+
try:
|
|
363
|
+
# Try to get a lock to refresh the shared sitemap, but don't wait long
|
|
364
|
+
with db.lock("refresh_sitemap", timeout=1):
|
|
365
|
+
# We got the lock. Check if it's time to refresh.
|
|
366
|
+
print(f"PID {os.getpid()} is refreshing the sitemap...")
|
|
367
|
+
scrapers_state["sitemap"] = ["/page1", "/page2"] # Your fetch_sitemap()
|
|
368
|
+
scrapers_state["last_sitemap_refresh"] = time.time()
|
|
369
|
+
|
|
370
|
+
except TimeoutError:
|
|
371
|
+
# Another process is already refreshing, so we can skip
|
|
372
|
+
print(f"PID {os.getpid()} letting other process handle refresh.")
|
|
373
|
+
|
|
374
|
+
# All processes can now safely use the shared sitemap
|
|
375
|
+
sitemap = scrapers_state.get("sitemap")
|
|
376
|
+
# ... proceed with scraping ...
|
|
377
|
+
```
|
|
378
|
+
|
|
344
379
|
## Type-Safe Data Models
|
|
345
380
|
|
|
346
381
|
For enhanced data integrity and a better developer experience, BeaverDB supports type-safe operations for all modalities. By associating a model with these data structures, you get automatic serialization and deserialization, complete with autocompletion in your editor.
|
|
@@ -348,7 +383,6 @@ For enhanced data integrity and a better developer experience, BeaverDB supports
|
|
|
348
383
|
This feature is designed to be flexible and works seamlessly with two kinds of models:
|
|
349
384
|
|
|
350
385
|
- **Pydantic Models**: If you're already using Pydantic, your `BaseModel` classes will work out of the box.
|
|
351
|
-
|
|
352
386
|
- **Lightweight `beaver.Model`**: For a zero-dependency solution, you can inherit from the built-in `beaver.Model` class, which is a standard Python class with serialization methods automatically included.
|
|
353
387
|
|
|
354
388
|
|
|
@@ -393,10 +427,11 @@ For more in-depth examples, check out the scripts in the `examples/` directory:
|
|
|
393
427
|
- [`graph.py`](examples/graph.py): Shows how to create relationships between documents and perform multi-hop graph traversals.
|
|
394
428
|
- [`kvstore.py`](examples/kvstore.py): A comprehensive demo of the namespaced dictionary feature.
|
|
395
429
|
- [`list.py`](examples/list.py): Shows the full capabilities of the persistent list, including slicing and in-place updates.
|
|
430
|
+
- [`locks.py`](examples/lock_test.py): Demonstrates how to use the inter-process lock to create critical sections.
|
|
396
431
|
- [`logs.py`](examples/logs.py): A short example showing how to build a realtime dashboard with the logging feature.
|
|
397
432
|
- [`pqueue.py`](examples/pqueue.py): A practical example of using the persistent priority queue for task management.
|
|
398
433
|
- [`producer_consumer.py`](examples/producer_consumer.py): A demonstration of the distributed task queue system in a multi-process environment.
|
|
399
|
-
- [`publisher.py`](examples/publisher.
|
|
434
|
+
- [`publisher.py`](examples/publisher.py) and [`subscriber.py`](examples/subscriber.py): A pair of examples demonstrating inter-process message passing with the publish/subscribe system.
|
|
400
435
|
- [`pubsub.py`](examples/pubsub.py): A demonstration of the synchronous, thread-safe publish/subscribe system in a single process.
|
|
401
436
|
- [`rerank.py`](examples/rerank.py): Shows how to combine results from vector and text search for more refined results.
|
|
402
437
|
- [`stress_vectors.py`](examples/stress_vectors.py): A stress test for the vector search functionality.
|
|
@@ -410,14 +445,13 @@ For more in-depth examples, check out the scripts in the `examples/` directory:
|
|
|
410
445
|
|
|
411
446
|
These are some of the features and improvements planned for future releases:
|
|
412
447
|
|
|
413
|
-
- **
|
|
414
|
-
- **Type-
|
|
415
|
-
- **Drop-in REST
|
|
448
|
+
- **[Issue #2](https://github.com/syalia-srl/beaver/issues/2) Comprehensive async wrappers**: Extend the async support with on-demand wrappers for all data structures, not just channels.
|
|
449
|
+
- **[Issue #9](https://github.com/syalia-srl/beaver/issues/2) Type-safe wrappers based on Pydantic-compatible models**: Enhance the built-in `Model` to handle recursive and embedded types and provide Pydantic compatibility.
|
|
450
|
+
- **[Issue #6](https://github.com/syalia-srl/beaver/issues/2) Drop-in replacement for Beaver REST server client**: Implement a `BeaverClient` class that acts as a drop-in replacement for `BeaverDB` but works against the REST API server.
|
|
451
|
+
- **[Issue #7](https://github.com/syalia-srl/beaver/issues/2) Replace `faiss` with simpler, linear `numpy` vectorial search**: Investigate removing the heavy `faiss` dependency in favor of a pure `numpy` implementation to improve installation simplicity, accepting a trade-off in search performance for O(1) installation.
|
|
416
452
|
|
|
417
|
-
Check out the [roadmap](roadmap.md) for a detailed list of upcoming features and design ideas.
|
|
418
453
|
|
|
419
454
|
If you think of something that would make `beaver` more useful for your use case, please open an issue and/or submit a pull request.
|
|
420
|
-
|
|
421
455
|
## License
|
|
422
456
|
|
|
423
457
|
This project is licensed under the MIT License.
|
|
@@ -1,18 +1,19 @@
|
|
|
1
|
-
beaver/__init__.py,sha256=
|
|
2
|
-
beaver/blobs.py,sha256=
|
|
1
|
+
beaver/__init__.py,sha256=HjTNjk3x58Pw9Iv_eAPVL088wDVEua-JEgecZ28NgMc,125
|
|
2
|
+
beaver/blobs.py,sha256=U5n6NLRQGAzsePGR2SJPRXHy22K8T9cJVMIb0JGzsY0,4399
|
|
3
3
|
beaver/channels.py,sha256=kIuwKMDBdDQObaKT23znsMXzfpKfE7pXSxvf-u4LlpY,9554
|
|
4
4
|
beaver/cli.py,sha256=Sxm-mYU3LGd4tIqw-5LHb0ektWebjV9vn51hm-CMJD0,2232
|
|
5
5
|
beaver/collections.py,sha256=UAQAuRxJRCqY5PHfxJNm3CdKqMNuyY8DOLdodvY6jpk,26107
|
|
6
|
-
beaver/core.py,sha256=
|
|
6
|
+
beaver/core.py,sha256=JRkRvc0Sb3FT9KlR3YbmiPcqCQ686dFKmHSNZ_UJ_aE,17100
|
|
7
7
|
beaver/dicts.py,sha256=Xp8lPfQt08O8zCbptQLWQLO79OxG6uAVER6ryj3SScQ,5495
|
|
8
8
|
beaver/lists.py,sha256=rfJ8uTNLkMREYc0uGx0z1VKt2m3eR9hvbdvDD58EbmQ,10140
|
|
9
|
+
beaver/locks.py,sha256=GWDSRkPw2lrAQfXIRqvkc5PK9zZ2eLYWKTuzHTs9j_A,6321
|
|
9
10
|
beaver/logs.py,sha256=a5xenwl5NZeegIU0dWVEs67lvaHzzw-JRAZtEzNNO3E,9529
|
|
10
11
|
beaver/queues.py,sha256=Fr3oie63EtceSoiC8EOEDSLu1tDI8q2MYLXd8MEeC3g,6476
|
|
11
|
-
beaver/server.py,sha256=
|
|
12
|
+
beaver/server.py,sha256=At3BoEV7JfpYjNtyHMdPUF8shj4V4D5nStXWb6Bv53A,15947
|
|
12
13
|
beaver/types.py,sha256=m0ohT7A8r0Y1a7bJEx4VanLaOUWU2VYxaLHPsVPjrIw,1651
|
|
13
14
|
beaver/vectors.py,sha256=EGZf1s364-rMubxkYoTcjBl72lRRxM1cUwypjsoC6ec,18499
|
|
14
|
-
beaver_db-0.
|
|
15
|
-
beaver_db-0.
|
|
16
|
-
beaver_db-0.
|
|
17
|
-
beaver_db-0.
|
|
18
|
-
beaver_db-0.
|
|
15
|
+
beaver_db-0.19.2.dist-info/METADATA,sha256=Iec3mTpq384nkp_R8fD2AGjXHRzarA93uxL623ZtyVE,23431
|
|
16
|
+
beaver_db-0.19.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
17
|
+
beaver_db-0.19.2.dist-info/entry_points.txt,sha256=bd5E2s45PoBdtdR9-ToKSdLNhmHp8naV1lWP5mOzlrc,42
|
|
18
|
+
beaver_db-0.19.2.dist-info/licenses/LICENSE,sha256=1xrIY5JnMk_QDQzsqmVzPIIyCgZAkWCC8kF2Ddo1UT0,1071
|
|
19
|
+
beaver_db-0.19.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|