beaver-db 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of beaver-db might be problematic. Click here for more details.
- beaver/__init__.py +2 -1
- beaver/collections.py +327 -0
- beaver/core.py +77 -503
- beaver/lists.py +166 -0
- beaver/subscribers.py +54 -0
- beaver_db-0.5.1.dist-info/METADATA +171 -0
- beaver_db-0.5.1.dist-info/RECORD +9 -0
- beaver_db-0.4.0.dist-info/METADATA +0 -129
- beaver_db-0.4.0.dist-info/RECORD +0 -6
- {beaver_db-0.4.0.dist-info → beaver_db-0.5.1.dist-info}/WHEEL +0 -0
- {beaver_db-0.4.0.dist-info → beaver_db-0.5.1.dist-info}/top_level.txt +0 -0
beaver/lists.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sqlite3
|
|
3
|
+
from typing import Any, Union
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ListWrapper:
|
|
7
|
+
"""A wrapper providing a Pythonic interface to a list in the database."""
|
|
8
|
+
|
|
9
|
+
def __init__(self, name: str, conn: sqlite3.Connection):
|
|
10
|
+
self._name = name
|
|
11
|
+
self._conn = conn
|
|
12
|
+
|
|
13
|
+
def __len__(self) -> int:
|
|
14
|
+
"""Returns the number of items in the list (e.g., `len(my_list)`)."""
|
|
15
|
+
cursor = self._conn.cursor()
|
|
16
|
+
cursor.execute(
|
|
17
|
+
"SELECT COUNT(*) FROM beaver_lists WHERE list_name = ?", (self._name,)
|
|
18
|
+
)
|
|
19
|
+
count = cursor.fetchone()[0]
|
|
20
|
+
cursor.close()
|
|
21
|
+
return count
|
|
22
|
+
|
|
23
|
+
def __getitem__(self, key: Union[int, slice]) -> Any:
|
|
24
|
+
"""
|
|
25
|
+
Retrieves an item or slice from the list (e.g., `my_list[0]`, `my_list[1:3]`).
|
|
26
|
+
"""
|
|
27
|
+
if isinstance(key, slice):
|
|
28
|
+
start, stop, step = key.indices(len(self))
|
|
29
|
+
if step != 1:
|
|
30
|
+
raise ValueError("Slicing with a step is not supported.")
|
|
31
|
+
|
|
32
|
+
limit = stop - start
|
|
33
|
+
if limit <= 0:
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
cursor = self._conn.cursor()
|
|
37
|
+
cursor.execute(
|
|
38
|
+
"SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT ? OFFSET ?",
|
|
39
|
+
(self._name, limit, start),
|
|
40
|
+
)
|
|
41
|
+
results = [json.loads(row["item_value"]) for row in cursor.fetchall()]
|
|
42
|
+
cursor.close()
|
|
43
|
+
return results
|
|
44
|
+
|
|
45
|
+
elif isinstance(key, int):
|
|
46
|
+
list_len = len(self)
|
|
47
|
+
if key < -list_len or key >= list_len:
|
|
48
|
+
raise IndexError("List index out of range.")
|
|
49
|
+
|
|
50
|
+
offset = key if key >= 0 else list_len + key
|
|
51
|
+
|
|
52
|
+
cursor = self._conn.cursor()
|
|
53
|
+
cursor.execute(
|
|
54
|
+
"SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
|
|
55
|
+
(self._name, offset),
|
|
56
|
+
)
|
|
57
|
+
result = cursor.fetchone()
|
|
58
|
+
cursor.close()
|
|
59
|
+
return json.loads(result["item_value"]) if result else None
|
|
60
|
+
|
|
61
|
+
else:
|
|
62
|
+
raise TypeError("List indices must be integers or slices.")
|
|
63
|
+
|
|
64
|
+
def _get_order_at_index(self, index: int) -> float:
|
|
65
|
+
"""Helper to get the float `item_order` at a specific index."""
|
|
66
|
+
cursor = self._conn.cursor()
|
|
67
|
+
cursor.execute(
|
|
68
|
+
"SELECT item_order FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
|
|
69
|
+
(self._name, index),
|
|
70
|
+
)
|
|
71
|
+
result = cursor.fetchone()
|
|
72
|
+
cursor.close()
|
|
73
|
+
|
|
74
|
+
if result:
|
|
75
|
+
return result[0]
|
|
76
|
+
|
|
77
|
+
raise IndexError(f"{index} out of range.")
|
|
78
|
+
|
|
79
|
+
def push(self, value: Any):
|
|
80
|
+
"""Pushes an item to the end of the list."""
|
|
81
|
+
with self._conn:
|
|
82
|
+
cursor = self._conn.cursor()
|
|
83
|
+
cursor.execute(
|
|
84
|
+
"SELECT MAX(item_order) FROM beaver_lists WHERE list_name = ?",
|
|
85
|
+
(self._name,),
|
|
86
|
+
)
|
|
87
|
+
max_order = cursor.fetchone()[0] or 0.0
|
|
88
|
+
new_order = max_order + 1.0
|
|
89
|
+
|
|
90
|
+
cursor.execute(
|
|
91
|
+
"INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
|
|
92
|
+
(self._name, new_order, json.dumps(value)),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def prepend(self, value: Any):
|
|
96
|
+
"""Prepends an item to the beginning of the list."""
|
|
97
|
+
with self._conn:
|
|
98
|
+
cursor = self._conn.cursor()
|
|
99
|
+
cursor.execute(
|
|
100
|
+
"SELECT MIN(item_order) FROM beaver_lists WHERE list_name = ?",
|
|
101
|
+
(self._name,),
|
|
102
|
+
)
|
|
103
|
+
min_order = cursor.fetchone()[0] or 0.0
|
|
104
|
+
new_order = min_order - 1.0
|
|
105
|
+
|
|
106
|
+
cursor.execute(
|
|
107
|
+
"INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
|
|
108
|
+
(self._name, new_order, json.dumps(value)),
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def insert(self, index: int, value: Any):
|
|
112
|
+
"""Inserts an item at a specific index."""
|
|
113
|
+
list_len = len(self)
|
|
114
|
+
if index <= 0:
|
|
115
|
+
self.prepend(value)
|
|
116
|
+
return
|
|
117
|
+
if index >= list_len:
|
|
118
|
+
self.push(value)
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
# Midpoint insertion for O(1) inserts
|
|
122
|
+
order_before = self._get_order_at_index(index - 1)
|
|
123
|
+
order_after = self._get_order_at_index(index)
|
|
124
|
+
new_order = order_before + (order_after - order_before) / 2.0
|
|
125
|
+
|
|
126
|
+
with self._conn:
|
|
127
|
+
self._conn.execute(
|
|
128
|
+
"INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
|
|
129
|
+
(self._name, new_order, json.dumps(value)),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
def pop(self) -> Any:
|
|
133
|
+
"""Removes and returns the last item from the list."""
|
|
134
|
+
with self._conn:
|
|
135
|
+
cursor = self._conn.cursor()
|
|
136
|
+
cursor.execute(
|
|
137
|
+
"SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order DESC LIMIT 1",
|
|
138
|
+
(self._name,),
|
|
139
|
+
)
|
|
140
|
+
result = cursor.fetchone()
|
|
141
|
+
if not result:
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
rowid_to_delete, value_to_return = result
|
|
145
|
+
cursor.execute(
|
|
146
|
+
"DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,)
|
|
147
|
+
)
|
|
148
|
+
return json.loads(value_to_return)
|
|
149
|
+
|
|
150
|
+
def deque(self) -> Any:
|
|
151
|
+
"""Removes and returns the first item from the list."""
|
|
152
|
+
with self._conn:
|
|
153
|
+
cursor = self._conn.cursor()
|
|
154
|
+
cursor.execute(
|
|
155
|
+
"SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1",
|
|
156
|
+
(self._name,),
|
|
157
|
+
)
|
|
158
|
+
result = cursor.fetchone()
|
|
159
|
+
if not result:
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
rowid_to_delete, value_to_return = result
|
|
163
|
+
cursor.execute(
|
|
164
|
+
"DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,)
|
|
165
|
+
)
|
|
166
|
+
return json.loads(value_to_return)
|
beaver/subscribers.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sqlite3
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, Iterator
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SubWrapper(Iterator):
|
|
8
|
+
"""
|
|
9
|
+
A synchronous, blocking iterator that polls a channel for new messages.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(
|
|
13
|
+
self, conn: sqlite3.Connection, channel_name: str, poll_interval: float = 0.1
|
|
14
|
+
):
|
|
15
|
+
"""
|
|
16
|
+
Initializes the synchronous subscriber.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
conn: The SQLite database connection.
|
|
20
|
+
channel_name: The name of the channel to subscribe to.
|
|
21
|
+
poll_interval: The time in seconds to wait between polling for new messages.
|
|
22
|
+
"""
|
|
23
|
+
self._conn = conn
|
|
24
|
+
self._channel = channel_name
|
|
25
|
+
self._poll_interval = poll_interval
|
|
26
|
+
self._last_seen_timestamp = time.time()
|
|
27
|
+
|
|
28
|
+
def __iter__(self) -> "SubWrapper":
|
|
29
|
+
"""Returns the iterator object itself."""
|
|
30
|
+
return self
|
|
31
|
+
|
|
32
|
+
def __next__(self) -> Any:
|
|
33
|
+
"""
|
|
34
|
+
Blocks until a new message is available on the channel and returns it.
|
|
35
|
+
This polling mechanism is simple but can introduce a slight latency
|
|
36
|
+
equivalent to the poll_interval.
|
|
37
|
+
"""
|
|
38
|
+
while True:
|
|
39
|
+
# Fetch the next available message from the database
|
|
40
|
+
cursor = self._conn.cursor()
|
|
41
|
+
cursor.execute(
|
|
42
|
+
"SELECT timestamp, message_payload FROM beaver_pubsub_log WHERE channel_name = ? AND timestamp > ? ORDER BY timestamp ASC LIMIT 1",
|
|
43
|
+
(self._channel, self._last_seen_timestamp),
|
|
44
|
+
)
|
|
45
|
+
result = cursor.fetchone()
|
|
46
|
+
cursor.close()
|
|
47
|
+
|
|
48
|
+
if result:
|
|
49
|
+
# If a message is found, update the timestamp and return the payload
|
|
50
|
+
self._last_seen_timestamp = result["timestamp"]
|
|
51
|
+
return json.loads(result["message_payload"])
|
|
52
|
+
else:
|
|
53
|
+
# If no new messages, wait for the poll interval before trying again
|
|
54
|
+
time.sleep(self._poll_interval)
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: beaver-db
|
|
3
|
+
Version: 0.5.1
|
|
4
|
+
Summary: Asynchronous, embedded, modern DB based on SQLite.
|
|
5
|
+
Requires-Python: >=3.13
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: numpy>=2.3.3
|
|
8
|
+
Requires-Dist: scipy>=1.16.2
|
|
9
|
+
|
|
10
|
+
# beaver 🦫
|
|
11
|
+
|
|
12
|
+
A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
13
|
+
|
|
14
|
+
`beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
|
|
15
|
+
|
|
16
|
+
## Design Philosophy
|
|
17
|
+
|
|
18
|
+
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
19
|
+
|
|
20
|
+
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`) and `numpy`/`scipy`.
|
|
21
|
+
- **Synchronous & Thread-Safe**: Designed for simplicity and safety in multi-threaded environments.
|
|
22
|
+
- **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
|
|
23
|
+
- **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications. The vector search is accelerated with an in-memory k-d tree.
|
|
24
|
+
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
|
|
25
|
+
|
|
26
|
+
## Core Features
|
|
27
|
+
|
|
28
|
+
- **Synchronous Pub/Sub**: A simple, thread-safe, Redis-like publish-subscribe system for real-time messaging.
|
|
29
|
+
- **Persistent Key-Value Store**: A simple `set`/`get` interface for storing any JSON-serializable object.
|
|
30
|
+
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
31
|
+
- **Efficient Vector Storage & Search**: Store vector embeddings and perform fast approximate nearest neighbor searches using an in-memory k-d tree.
|
|
32
|
+
- **Full-Text Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine.
|
|
33
|
+
- **Graph Traversal**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
|
|
34
|
+
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install beaver-db
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Quickstart & API Guide
|
|
43
|
+
|
|
44
|
+
### Initialization
|
|
45
|
+
|
|
46
|
+
All you need to do is import and instantiate the `BeaverDB` class with a file path.
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from beaver import BeaverDB, Document
|
|
50
|
+
|
|
51
|
+
db = BeaverDB("my_application.db")
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Key-Value Store
|
|
55
|
+
|
|
56
|
+
Use `set()` and `get()` for simple data storage. The value can be any JSON-encodable object.
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
# Set a value
|
|
60
|
+
db.set("app_config", {"theme": "dark", "user_id": 123})
|
|
61
|
+
|
|
62
|
+
# Get a value
|
|
63
|
+
config = db.get("app_config")
|
|
64
|
+
print(f"Theme: {config.get('theme')}") # Output: Theme: dark
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### List Management
|
|
68
|
+
|
|
69
|
+
Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
tasks = db.list("daily_tasks")
|
|
73
|
+
tasks.push("Write the project report")
|
|
74
|
+
tasks.prepend("Plan the day's agenda")
|
|
75
|
+
print(f"The first task is: {tasks[0]}")
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Vector & Text Search
|
|
79
|
+
|
|
80
|
+
Store `Document` objects containing vector embeddings and metadata. When you index a document, its string fields are automatically made available for full-text search.
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
# Get a handle to a collection
|
|
84
|
+
docs = db.collection("articles")
|
|
85
|
+
|
|
86
|
+
# Create and index a multi-modal document
|
|
87
|
+
doc = Document(
|
|
88
|
+
id="sql-001",
|
|
89
|
+
embedding=[0.8, 0.1, 0.1],
|
|
90
|
+
content="SQLite is a powerful embedded database ideal for local apps.",
|
|
91
|
+
author="John Smith"
|
|
92
|
+
)
|
|
93
|
+
docs.index(doc)
|
|
94
|
+
|
|
95
|
+
# 1. Perform a vector search to find semantically similar documents
|
|
96
|
+
query_vector = [0.7, 0.2, 0.2]
|
|
97
|
+
vector_results = docs.search(vector=query_vector, top_k=1)
|
|
98
|
+
top_doc, distance = vector_results[0]
|
|
99
|
+
print(f"Vector Search Result: {top_doc.content} (distance: {distance:.2f})")
|
|
100
|
+
|
|
101
|
+
# 2. Perform a full-text search to find documents with specific words
|
|
102
|
+
text_results = docs.match(query="database", top_k=1)
|
|
103
|
+
top_doc, rank = text_results[0]
|
|
104
|
+
print(f"Full-Text Search Result: {top_doc.content} (rank: {rank:.2f})")
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Graph Traversal
|
|
108
|
+
|
|
109
|
+
Create relationships between documents and traverse them.
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from beaver import WalkDirection
|
|
113
|
+
|
|
114
|
+
# Create documents
|
|
115
|
+
alice = Document(id="alice", name="Alice")
|
|
116
|
+
bob = Document(id="bob", name="Bob")
|
|
117
|
+
charlie = Document(id="charlie", name="Charlie")
|
|
118
|
+
|
|
119
|
+
# Index them
|
|
120
|
+
social_net = db.collection("social")
|
|
121
|
+
social_net.index(alice)
|
|
122
|
+
social_net.index(bob)
|
|
123
|
+
social_net.index(charlie)
|
|
124
|
+
|
|
125
|
+
# Create edges
|
|
126
|
+
social_net.connect(alice, bob, label="FOLLOWS")
|
|
127
|
+
social_net.connect(bob, charlie, label="FOLLOWS")
|
|
128
|
+
|
|
129
|
+
# Find direct neighbors
|
|
130
|
+
following = social_net.neighbors(alice, label="FOLLOWS")
|
|
131
|
+
print(f"Alice follows: {[p.id for p in following]}")
|
|
132
|
+
|
|
133
|
+
# Perform a multi-hop walk to find friends of friends
|
|
134
|
+
foaf = social_net.walk(
|
|
135
|
+
source=alice,
|
|
136
|
+
labels=["FOLLOWS"],
|
|
137
|
+
depth=2,
|
|
138
|
+
direction=WalkDirection.OUTGOING,
|
|
139
|
+
)
|
|
140
|
+
print(f"Alice's extended network: {[p.id for p in foaf]}")
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### Synchronous Pub/Sub
|
|
144
|
+
|
|
145
|
+
Publish events from one part of your app and listen in another using threads.
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
import threading
|
|
149
|
+
|
|
150
|
+
def listener():
|
|
151
|
+
for message in db.subscribe("system_events"):
|
|
152
|
+
print(f"LISTENER: Received -> {message}")
|
|
153
|
+
if message.get("event") == "shutdown":
|
|
154
|
+
break
|
|
155
|
+
|
|
156
|
+
def publisher():
|
|
157
|
+
db.publish("system_events", {"event": "user_login", "user": "alice"})
|
|
158
|
+
db.publish("system_events", {"event": "shutdown"})
|
|
159
|
+
|
|
160
|
+
# Run them concurrently
|
|
161
|
+
listener_thread = threading.Thread(target=listener)
|
|
162
|
+
publisher_thread = threading.Thread(target=publisher)
|
|
163
|
+
listener_thread.start()
|
|
164
|
+
publisher_thread.start()
|
|
165
|
+
listener_thread.join()
|
|
166
|
+
publisher_thread.join()
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## License
|
|
170
|
+
|
|
171
|
+
This project is licensed under the MIT License.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
beaver/__init__.py,sha256=-z5Gj6YKMOswpJOOn5Gej8z5i6k3c0Xs00DIYLA-bMI,75
|
|
2
|
+
beaver/collections.py,sha256=fP1xkmo-XXlk3H_lPRiqFhtizINQn8192wOtXFlkTK4,12811
|
|
3
|
+
beaver/core.py,sha256=sk0Z_k7EcORe6bN8CfPukGX7eAfmCGSX_B37KpJmQJ4,7279
|
|
4
|
+
beaver/lists.py,sha256=JG1JOkaYCUldADUzPJhaNi93w-k3S8mUzcCw574uht4,5915
|
|
5
|
+
beaver/subscribers.py,sha256=tCty2iDbeE9IXcPicbxj2CB5gqfLufMB9-nLQwqNBUU,1944
|
|
6
|
+
beaver_db-0.5.1.dist-info/METADATA,sha256=GSFmx4PdrZSOMOJg1RVV9ni59hjtvyOCosAeaUxFJuU,5875
|
|
7
|
+
beaver_db-0.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
beaver_db-0.5.1.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
|
|
9
|
+
beaver_db-0.5.1.dist-info/RECORD,,
|
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: beaver-db
|
|
3
|
-
Version: 0.4.0
|
|
4
|
-
Summary: Asynchronous, embedded, modern DB based on SQLite.
|
|
5
|
-
Requires-Python: >=3.13
|
|
6
|
-
Description-Content-Type: text/markdown
|
|
7
|
-
Requires-Dist: numpy>=2.3.3
|
|
8
|
-
|
|
9
|
-
# beaver 🦫
|
|
10
|
-
|
|
11
|
-
A fast, single-file, multi-modal database for Python, built with the standard sqlite3 library.
|
|
12
|
-
|
|
13
|
-
`beaver` is the **B**ackend for **E**mbedded **A**synchronous **V**ector & **E**vent Retrieval. It's an industrious, all-in-one database designed to manage complex, modern data types without requiring a database server.
|
|
14
|
-
|
|
15
|
-
## Design Philosophy
|
|
16
|
-
|
|
17
|
-
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
18
|
-
|
|
19
|
-
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`, `asyncio`) and `numpy`.
|
|
20
|
-
- **Async-First (When It Matters)**: The pub/sub system is fully asynchronous for high-performance, real-time messaging. Other features like key-value, list, and vector operations are synchronous for ease of use.
|
|
21
|
-
- **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
|
|
22
|
-
- **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications.
|
|
23
|
-
|
|
24
|
-
## Core Features
|
|
25
|
-
|
|
26
|
-
- **Asynchronous Pub/Sub**: A fully asynchronous, Redis-like publish-subscribe system for real-time messaging.
|
|
27
|
-
- **Persistent Key-Value Store**: A simple `set`/`get` interface for storing any JSON-serializable object.
|
|
28
|
-
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
29
|
-
- **Vector Storage & Search**: Store vector embeddings and perform simple, brute-force k-nearest neighbor searches, ideal for small-scale RAG.
|
|
30
|
-
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
31
|
-
|
|
32
|
-
## Installation
|
|
33
|
-
|
|
34
|
-
```bash
|
|
35
|
-
pip install beaver-db
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
## Quickstart & API Guide
|
|
39
|
-
|
|
40
|
-
### Initialization
|
|
41
|
-
|
|
42
|
-
All you need to do is import and instantiate the `BeaverDB` class with a file path.
|
|
43
|
-
|
|
44
|
-
```python
|
|
45
|
-
from beaver import BeaverDB, Document
|
|
46
|
-
|
|
47
|
-
db = BeaverDB("my_application.db")
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
### Key-Value Store
|
|
51
|
-
|
|
52
|
-
Use `set()` and `get()` for simple data storage. The value can be any JSON-encodable object.
|
|
53
|
-
|
|
54
|
-
```python
|
|
55
|
-
# Set a value
|
|
56
|
-
db.set("app_config", {"theme": "dark", "user_id": 123})
|
|
57
|
-
|
|
58
|
-
# Get a value
|
|
59
|
-
config = db.get("app_config")
|
|
60
|
-
print(f"Theme: {config['theme']}") # Output: Theme: dark
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
### List Management
|
|
64
|
-
|
|
65
|
-
Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
|
|
66
|
-
|
|
67
|
-
```python
|
|
68
|
-
tasks = db.list("daily_tasks")
|
|
69
|
-
tasks.push("Write the project report")
|
|
70
|
-
tasks.prepend("Plan the day's agenda")
|
|
71
|
-
print(f"The first task is: {tasks[0]}")
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
### Vector Storage & Search
|
|
75
|
-
|
|
76
|
-
Store `Document` objects containing vector embeddings and metadata. The search is a linear scan, which is sufficient for small-to-medium collections.
|
|
77
|
-
|
|
78
|
-
```python
|
|
79
|
-
# Get a handle to a collection
|
|
80
|
-
docs = db.collection("my_documents")
|
|
81
|
-
|
|
82
|
-
# Create and index a document (ID will be a UUID)
|
|
83
|
-
doc1 = Document(embedding=[0.1, 0.2, 0.7], text="A cat sat on the mat.")
|
|
84
|
-
docs.index(doc1)
|
|
85
|
-
|
|
86
|
-
# Create and index a document with a specific ID (for upserting)
|
|
87
|
-
doc2 = Document(id="article-42", embedding=[0.9, 0.1, 0.1], text="A dog chased a ball.")
|
|
88
|
-
docs.index(doc2)
|
|
89
|
-
|
|
90
|
-
# Search for the 2 most similar documents
|
|
91
|
-
query_vector = [0.15, 0.25, 0.65]
|
|
92
|
-
results = docs.search(vector=query_vector, top_k=2)
|
|
93
|
-
|
|
94
|
-
# Results are a list of (Document, distance) tuples
|
|
95
|
-
top_document, distance = results[0]
|
|
96
|
-
print(f"Closest document: {top_document.text} (distance: {distance:.4f})")
|
|
97
|
-
```
|
|
98
|
-
|
|
99
|
-
### Asynchronous Pub/Sub
|
|
100
|
-
|
|
101
|
-
Publish events from one part of your app and listen in another using `asyncio`.
|
|
102
|
-
|
|
103
|
-
```python
|
|
104
|
-
import asyncio
|
|
105
|
-
|
|
106
|
-
async def listener():
|
|
107
|
-
async with db.subscribe("system_events") as sub:
|
|
108
|
-
async for message in sub:
|
|
109
|
-
print(f"LISTENER: Received event -> {message['event']}")
|
|
110
|
-
|
|
111
|
-
async def publisher():
|
|
112
|
-
await asyncio.sleep(1)
|
|
113
|
-
await db.publish("system_events", {"event": "user_login", "user": "alice"})
|
|
114
|
-
|
|
115
|
-
# To run them concurrently:
|
|
116
|
-
# asyncio.run(asyncio.gather(listener(), publisher()))
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
## Roadmap
|
|
120
|
-
|
|
121
|
-
`beaver` aims to be a complete, self-contained data toolkit. The following features are planned:
|
|
122
|
-
|
|
123
|
-
- **More Efficient Vector Search**: Integrate an approximate nearest neighbor (ANN) index like `scipy.spatial.cKDTree` to improve search speed on larger datasets.
|
|
124
|
-
- **JSON Document Store with Full-Text Search**: Store flexible JSON documents and get powerful full-text search across all text fields, powered by SQLite's FTS5 extension.
|
|
125
|
-
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
|
|
126
|
-
|
|
127
|
-
## License
|
|
128
|
-
|
|
129
|
-
This project is licensed under the MIT License.
|
beaver_db-0.4.0.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
beaver/__init__.py,sha256=uTPhMNDjw41YTWQN8NTLbovudfp8RIwcqbZ5XtYIuJA,36
|
|
2
|
-
beaver/core.py,sha256=i2rBoUM1rq_j1xM3w4xW4c9e2eI8Ce6BeJ8rE8jQ-fI,21928
|
|
3
|
-
beaver_db-0.4.0.dist-info/METADATA,sha256=7VzqxHKU-Ft1QVAfVvywt4e50C3QWxS7FUpKIaQEJKk,4865
|
|
4
|
-
beaver_db-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
5
|
-
beaver_db-0.4.0.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
|
|
6
|
-
beaver_db-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|