beaver-db 0.2.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of beaver-db might be problematic. Click here for more details.
- beaver_db-0.4.0/PKG-INFO +129 -0
- beaver_db-0.4.0/README.md +121 -0
- beaver_db-0.4.0/beaver/__init__.py +1 -0
- {beaver_db-0.2.0 → beaver_db-0.4.0}/beaver/core.py +285 -34
- beaver_db-0.4.0/beaver_db.egg-info/PKG-INFO +129 -0
- {beaver_db-0.2.0 → beaver_db-0.4.0}/beaver_db.egg-info/SOURCES.txt +1 -0
- beaver_db-0.4.0/beaver_db.egg-info/requires.txt +1 -0
- {beaver_db-0.2.0 → beaver_db-0.4.0}/pyproject.toml +4 -2
- beaver_db-0.2.0/PKG-INFO +0 -109
- beaver_db-0.2.0/README.md +0 -102
- beaver_db-0.2.0/beaver/__init__.py +0 -1
- beaver_db-0.2.0/beaver_db.egg-info/PKG-INFO +0 -109
- {beaver_db-0.2.0 → beaver_db-0.4.0}/beaver_db.egg-info/dependency_links.txt +0 -0
- {beaver_db-0.2.0 → beaver_db-0.4.0}/beaver_db.egg-info/top_level.txt +0 -0
- {beaver_db-0.2.0 → beaver_db-0.4.0}/setup.cfg +0 -0
beaver_db-0.4.0/PKG-INFO
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: beaver-db
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Asynchronous, embedded, modern DB based on SQLite.
|
|
5
|
+
Requires-Python: >=3.13
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: numpy>=2.3.3
|
|
8
|
+
|
|
9
|
+
# beaver 🦫
|
|
10
|
+
|
|
11
|
+
A fast, single-file, multi-modal database for Python, built with the standard sqlite3 library.
|
|
12
|
+
|
|
13
|
+
`beaver` is the **B**ackend for **E**mbedded **A**synchronous **V**ector & **E**vent Retrieval. It's an industrious, all-in-one database designed to manage complex, modern data types without requiring a database server.
|
|
14
|
+
|
|
15
|
+
## Design Philosophy
|
|
16
|
+
|
|
17
|
+
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
18
|
+
|
|
19
|
+
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`, `asyncio`) and `numpy`.
|
|
20
|
+
- **Async-First (When It Matters)**: The pub/sub system is fully asynchronous for high-performance, real-time messaging. Other features like key-value, list, and vector operations are synchronous for ease of use.
|
|
21
|
+
- **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
|
|
22
|
+
- **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications.
|
|
23
|
+
|
|
24
|
+
## Core Features
|
|
25
|
+
|
|
26
|
+
- **Asynchronous Pub/Sub**: A fully asynchronous, Redis-like publish-subscribe system for real-time messaging.
|
|
27
|
+
- **Persistent Key-Value Store**: A simple `set`/`get` interface for storing any JSON-serializable object.
|
|
28
|
+
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
29
|
+
- **Vector Storage & Search**: Store vector embeddings and perform simple, brute-force k-nearest neighbor searches, ideal for small-scale RAG.
|
|
30
|
+
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install beaver-db
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Quickstart & API Guide
|
|
39
|
+
|
|
40
|
+
### Initialization
|
|
41
|
+
|
|
42
|
+
All you need to do is import and instantiate the `BeaverDB` class with a file path.
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from beaver import BeaverDB, Document
|
|
46
|
+
|
|
47
|
+
db = BeaverDB("my_application.db")
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Key-Value Store
|
|
51
|
+
|
|
52
|
+
Use `set()` and `get()` for simple data storage. The value can be any JSON-encodable object.
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
# Set a value
|
|
56
|
+
db.set("app_config", {"theme": "dark", "user_id": 123})
|
|
57
|
+
|
|
58
|
+
# Get a value
|
|
59
|
+
config = db.get("app_config")
|
|
60
|
+
print(f"Theme: {config['theme']}") # Output: Theme: dark
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### List Management
|
|
64
|
+
|
|
65
|
+
Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
tasks = db.list("daily_tasks")
|
|
69
|
+
tasks.push("Write the project report")
|
|
70
|
+
tasks.prepend("Plan the day's agenda")
|
|
71
|
+
print(f"The first task is: {tasks[0]}")
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Vector Storage & Search
|
|
75
|
+
|
|
76
|
+
Store `Document` objects containing vector embeddings and metadata. The search is a linear scan, which is sufficient for small-to-medium collections.
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
# Get a handle to a collection
|
|
80
|
+
docs = db.collection("my_documents")
|
|
81
|
+
|
|
82
|
+
# Create and index a document (ID will be a UUID)
|
|
83
|
+
doc1 = Document(embedding=[0.1, 0.2, 0.7], text="A cat sat on the mat.")
|
|
84
|
+
docs.index(doc1)
|
|
85
|
+
|
|
86
|
+
# Create and index a document with a specific ID (for upserting)
|
|
87
|
+
doc2 = Document(id="article-42", embedding=[0.9, 0.1, 0.1], text="A dog chased a ball.")
|
|
88
|
+
docs.index(doc2)
|
|
89
|
+
|
|
90
|
+
# Search for the 2 most similar documents
|
|
91
|
+
query_vector = [0.15, 0.25, 0.65]
|
|
92
|
+
results = docs.search(vector=query_vector, top_k=2)
|
|
93
|
+
|
|
94
|
+
# Results are a list of (Document, distance) tuples
|
|
95
|
+
top_document, distance = results[0]
|
|
96
|
+
print(f"Closest document: {top_document.text} (distance: {distance:.4f})")
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Asynchronous Pub/Sub
|
|
100
|
+
|
|
101
|
+
Publish events from one part of your app and listen in another using `asyncio`.
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
import asyncio
|
|
105
|
+
|
|
106
|
+
async def listener():
|
|
107
|
+
async with db.subscribe("system_events") as sub:
|
|
108
|
+
async for message in sub:
|
|
109
|
+
print(f"LISTENER: Received event -> {message['event']}")
|
|
110
|
+
|
|
111
|
+
async def publisher():
|
|
112
|
+
await asyncio.sleep(1)
|
|
113
|
+
await db.publish("system_events", {"event": "user_login", "user": "alice"})
|
|
114
|
+
|
|
115
|
+
# To run them concurrently:
|
|
116
|
+
# asyncio.run(asyncio.gather(listener(), publisher()))
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Roadmap
|
|
120
|
+
|
|
121
|
+
`beaver` aims to be a complete, self-contained data toolkit. The following features are planned:
|
|
122
|
+
|
|
123
|
+
- **More Efficient Vector Search**: Integrate an approximate nearest neighbor (ANN) index like `scipy.spatial.cKDTree` to improve search speed on larger datasets.
|
|
124
|
+
- **JSON Document Store with Full-Text Search**: Store flexible JSON documents and get powerful full-text search across all text fields, powered by SQLite's FTS5 extension.
|
|
125
|
+
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
|
|
126
|
+
|
|
127
|
+
## License
|
|
128
|
+
|
|
129
|
+
This project is licensed under the MIT License.
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# beaver 🦫
|
|
2
|
+
|
|
3
|
+
A fast, single-file, multi-modal database for Python, built with the standard sqlite3 library.
|
|
4
|
+
|
|
5
|
+
`beaver` is the **B**ackend for **E**mbedded **A**synchronous **V**ector & **E**vent Retrieval. It's an industrious, all-in-one database designed to manage complex, modern data types without requiring a database server.
|
|
6
|
+
|
|
7
|
+
## Design Philosophy
|
|
8
|
+
|
|
9
|
+
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
10
|
+
|
|
11
|
+
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`, `asyncio`) and `numpy`.
|
|
12
|
+
- **Async-First (When It Matters)**: The pub/sub system is fully asynchronous for high-performance, real-time messaging. Other features like key-value, list, and vector operations are synchronous for ease of use.
|
|
13
|
+
- **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
|
|
14
|
+
- **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications.
|
|
15
|
+
|
|
16
|
+
## Core Features
|
|
17
|
+
|
|
18
|
+
- **Asynchronous Pub/Sub**: A fully asynchronous, Redis-like publish-subscribe system for real-time messaging.
|
|
19
|
+
- **Persistent Key-Value Store**: A simple `set`/`get` interface for storing any JSON-serializable object.
|
|
20
|
+
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
21
|
+
- **Vector Storage & Search**: Store vector embeddings and perform simple, brute-force k-nearest neighbor searches, ideal for small-scale RAG.
|
|
22
|
+
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install beaver-db
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quickstart & API Guide
|
|
31
|
+
|
|
32
|
+
### Initialization
|
|
33
|
+
|
|
34
|
+
All you need to do is import and instantiate the `BeaverDB` class with a file path.
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from beaver import BeaverDB, Document
|
|
38
|
+
|
|
39
|
+
db = BeaverDB("my_application.db")
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Key-Value Store
|
|
43
|
+
|
|
44
|
+
Use `set()` and `get()` for simple data storage. The value can be any JSON-encodable object.
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
# Set a value
|
|
48
|
+
db.set("app_config", {"theme": "dark", "user_id": 123})
|
|
49
|
+
|
|
50
|
+
# Get a value
|
|
51
|
+
config = db.get("app_config")
|
|
52
|
+
print(f"Theme: {config['theme']}") # Output: Theme: dark
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### List Management
|
|
56
|
+
|
|
57
|
+
Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
tasks = db.list("daily_tasks")
|
|
61
|
+
tasks.push("Write the project report")
|
|
62
|
+
tasks.prepend("Plan the day's agenda")
|
|
63
|
+
print(f"The first task is: {tasks[0]}")
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Vector Storage & Search
|
|
67
|
+
|
|
68
|
+
Store `Document` objects containing vector embeddings and metadata. The search is a linear scan, which is sufficient for small-to-medium collections.
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
# Get a handle to a collection
|
|
72
|
+
docs = db.collection("my_documents")
|
|
73
|
+
|
|
74
|
+
# Create and index a document (ID will be a UUID)
|
|
75
|
+
doc1 = Document(embedding=[0.1, 0.2, 0.7], text="A cat sat on the mat.")
|
|
76
|
+
docs.index(doc1)
|
|
77
|
+
|
|
78
|
+
# Create and index a document with a specific ID (for upserting)
|
|
79
|
+
doc2 = Document(id="article-42", embedding=[0.9, 0.1, 0.1], text="A dog chased a ball.")
|
|
80
|
+
docs.index(doc2)
|
|
81
|
+
|
|
82
|
+
# Search for the 2 most similar documents
|
|
83
|
+
query_vector = [0.15, 0.25, 0.65]
|
|
84
|
+
results = docs.search(vector=query_vector, top_k=2)
|
|
85
|
+
|
|
86
|
+
# Results are a list of (Document, distance) tuples
|
|
87
|
+
top_document, distance = results[0]
|
|
88
|
+
print(f"Closest document: {top_document.text} (distance: {distance:.4f})")
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Asynchronous Pub/Sub
|
|
92
|
+
|
|
93
|
+
Publish events from one part of your app and listen in another using `asyncio`.
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
import asyncio
|
|
97
|
+
|
|
98
|
+
async def listener():
|
|
99
|
+
async with db.subscribe("system_events") as sub:
|
|
100
|
+
async for message in sub:
|
|
101
|
+
print(f"LISTENER: Received event -> {message['event']}")
|
|
102
|
+
|
|
103
|
+
async def publisher():
|
|
104
|
+
await asyncio.sleep(1)
|
|
105
|
+
await db.publish("system_events", {"event": "user_login", "user": "alice"})
|
|
106
|
+
|
|
107
|
+
# To run them concurrently:
|
|
108
|
+
# asyncio.run(asyncio.gather(listener(), publisher()))
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Roadmap
|
|
112
|
+
|
|
113
|
+
`beaver` aims to be a complete, self-contained data toolkit. The following features are planned:
|
|
114
|
+
|
|
115
|
+
- **More Efficient Vector Search**: Integrate an approximate nearest neighbor (ANN) index like `scipy.spatial.cKDTree` to improve search speed on larger datasets.
|
|
116
|
+
- **JSON Document Store with Full-Text Search**: Store flexible JSON documents and get powerful full-text search across all text fields, powered by SQLite's FTS5 extension.
|
|
117
|
+
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
|
|
118
|
+
|
|
119
|
+
## License
|
|
120
|
+
|
|
121
|
+
This project is licensed under the MIT License.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .core import BeaverDB, Document
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import uuid
|
|
3
|
+
import numpy as np
|
|
2
4
|
import json
|
|
3
5
|
import sqlite3
|
|
4
6
|
import time
|
|
@@ -26,43 +28,83 @@ class BeaverDB:
|
|
|
26
28
|
self._create_pubsub_table()
|
|
27
29
|
self._create_kv_table()
|
|
28
30
|
self._create_list_table()
|
|
31
|
+
self._create_collections_table()
|
|
32
|
+
self._create_fts_table() # <-- Nueva llamada
|
|
33
|
+
|
|
34
|
+
def _create_fts_table(self):
|
|
35
|
+
"""Creates the virtual FTS table for full text search."""
|
|
36
|
+
with self._conn:
|
|
37
|
+
self._conn.execute(
|
|
38
|
+
"""
|
|
39
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS beaver_fts_index USING fts5(
|
|
40
|
+
collection,
|
|
41
|
+
item_id,
|
|
42
|
+
field_path,
|
|
43
|
+
field_content,
|
|
44
|
+
tokenize = 'porter'
|
|
45
|
+
)
|
|
46
|
+
"""
|
|
47
|
+
)
|
|
29
48
|
|
|
30
49
|
def _create_pubsub_table(self):
|
|
31
50
|
"""Creates the pub/sub log table if it doesn't exist."""
|
|
32
51
|
with self._conn:
|
|
33
|
-
self._conn.execute(
|
|
52
|
+
self._conn.execute(
|
|
53
|
+
"""
|
|
34
54
|
CREATE TABLE IF NOT EXISTS beaver_pubsub_log (
|
|
35
55
|
timestamp REAL PRIMARY KEY,
|
|
36
56
|
channel_name TEXT NOT NULL,
|
|
37
57
|
message_payload TEXT NOT NULL
|
|
38
58
|
)
|
|
39
|
-
"""
|
|
40
|
-
|
|
59
|
+
"""
|
|
60
|
+
)
|
|
61
|
+
self._conn.execute(
|
|
62
|
+
"""
|
|
41
63
|
CREATE INDEX IF NOT EXISTS idx_pubsub_channel_timestamp
|
|
42
64
|
ON beaver_pubsub_log (channel_name, timestamp)
|
|
43
|
-
"""
|
|
65
|
+
"""
|
|
66
|
+
)
|
|
44
67
|
|
|
45
68
|
def _create_kv_table(self):
|
|
46
69
|
"""Creates the key-value store table if it doesn't exist."""
|
|
47
70
|
with self._conn:
|
|
48
|
-
self._conn.execute(
|
|
71
|
+
self._conn.execute(
|
|
72
|
+
"""
|
|
49
73
|
CREATE TABLE IF NOT EXISTS _beaver_kv_store (
|
|
50
74
|
key TEXT PRIMARY KEY,
|
|
51
75
|
value TEXT NOT NULL
|
|
52
76
|
)
|
|
53
|
-
"""
|
|
77
|
+
"""
|
|
78
|
+
)
|
|
54
79
|
|
|
55
80
|
def _create_list_table(self):
|
|
56
81
|
"""Creates the lists table if it doesn't exist."""
|
|
57
82
|
with self._conn:
|
|
58
|
-
self._conn.execute(
|
|
83
|
+
self._conn.execute(
|
|
84
|
+
"""
|
|
59
85
|
CREATE TABLE IF NOT EXISTS beaver_lists (
|
|
60
86
|
list_name TEXT NOT NULL,
|
|
61
87
|
item_order REAL NOT NULL,
|
|
62
88
|
item_value TEXT NOT NULL,
|
|
63
89
|
PRIMARY KEY (list_name, item_order)
|
|
64
90
|
)
|
|
65
|
-
"""
|
|
91
|
+
"""
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def _create_collections_table(self):
|
|
95
|
+
"""Creates the collections table if it doesn't exist."""
|
|
96
|
+
with self._conn:
|
|
97
|
+
self._conn.execute(
|
|
98
|
+
"""
|
|
99
|
+
CREATE TABLE IF NOT EXISTS beaver_collections (
|
|
100
|
+
collection TEXT NOT NULL,
|
|
101
|
+
item_id TEXT NOT NULL,
|
|
102
|
+
item_vector BLOB,
|
|
103
|
+
metadata TEXT,
|
|
104
|
+
PRIMARY KEY (collection, item_id)
|
|
105
|
+
)
|
|
106
|
+
"""
|
|
107
|
+
)
|
|
66
108
|
|
|
67
109
|
def close(self):
|
|
68
110
|
"""Closes the database connection."""
|
|
@@ -94,7 +136,7 @@ class BeaverDB:
|
|
|
94
136
|
with self._conn:
|
|
95
137
|
self._conn.execute(
|
|
96
138
|
"INSERT OR REPLACE INTO _beaver_kv_store (key, value) VALUES (?, ?)",
|
|
97
|
-
(key, json_value)
|
|
139
|
+
(key, json_value),
|
|
98
140
|
)
|
|
99
141
|
|
|
100
142
|
def get(self, key: str) -> Any:
|
|
@@ -120,7 +162,7 @@ class BeaverDB:
|
|
|
120
162
|
cursor.close()
|
|
121
163
|
|
|
122
164
|
if result:
|
|
123
|
-
return json.loads(result[
|
|
165
|
+
return json.loads(result["value"])
|
|
124
166
|
return None
|
|
125
167
|
|
|
126
168
|
# --- List Methods ---
|
|
@@ -139,6 +181,10 @@ class BeaverDB:
|
|
|
139
181
|
raise TypeError("List name must be a non-empty string.")
|
|
140
182
|
return ListWrapper(name, self._conn)
|
|
141
183
|
|
|
184
|
+
def collection(self, name: str) -> "CollectionWrapper":
|
|
185
|
+
"""Returns a wrapper for interacting with a vector collection."""
|
|
186
|
+
return CollectionWrapper(name, self._conn)
|
|
187
|
+
|
|
142
188
|
# --- Asynchronous Pub/Sub Methods ---
|
|
143
189
|
|
|
144
190
|
async def publish(self, channel_name: str, payload: Any):
|
|
@@ -153,16 +199,14 @@ class BeaverDB:
|
|
|
153
199
|
except TypeError as e:
|
|
154
200
|
raise TypeError("Message payload must be JSON-serializable.") from e
|
|
155
201
|
|
|
156
|
-
await asyncio.to_thread(
|
|
157
|
-
self._write_publish_to_db, channel_name, json_payload
|
|
158
|
-
)
|
|
202
|
+
await asyncio.to_thread(self._write_publish_to_db, channel_name, json_payload)
|
|
159
203
|
|
|
160
204
|
def _write_publish_to_db(self, channel_name, json_payload):
|
|
161
205
|
"""The synchronous part of the publish operation."""
|
|
162
206
|
with self._conn:
|
|
163
207
|
self._conn.execute(
|
|
164
208
|
"INSERT INTO beaver_pubsub_log (timestamp, channel_name, message_payload) VALUES (?, ?, ?)",
|
|
165
|
-
(time.time(), channel_name, json_payload)
|
|
209
|
+
(time.time(), channel_name, json_payload),
|
|
166
210
|
)
|
|
167
211
|
|
|
168
212
|
def subscribe(self, channel_name: str) -> "Subscriber":
|
|
@@ -182,7 +226,9 @@ class ListWrapper:
|
|
|
182
226
|
def __len__(self) -> int:
|
|
183
227
|
"""Returns the number of items in the list (e.g., `len(my_list)`)."""
|
|
184
228
|
cursor = self._conn.cursor()
|
|
185
|
-
cursor.execute(
|
|
229
|
+
cursor.execute(
|
|
230
|
+
"SELECT COUNT(*) FROM beaver_lists WHERE list_name = ?", (self._name,)
|
|
231
|
+
)
|
|
186
232
|
count = cursor.fetchone()[0]
|
|
187
233
|
cursor.close()
|
|
188
234
|
return count
|
|
@@ -203,9 +249,9 @@ class ListWrapper:
|
|
|
203
249
|
cursor = self._conn.cursor()
|
|
204
250
|
cursor.execute(
|
|
205
251
|
"SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT ? OFFSET ?",
|
|
206
|
-
(self._name, limit, start)
|
|
252
|
+
(self._name, limit, start),
|
|
207
253
|
)
|
|
208
|
-
results = [json.loads(row[
|
|
254
|
+
results = [json.loads(row["item_value"]) for row in cursor.fetchall()]
|
|
209
255
|
cursor.close()
|
|
210
256
|
return results
|
|
211
257
|
|
|
@@ -219,11 +265,11 @@ class ListWrapper:
|
|
|
219
265
|
cursor = self._conn.cursor()
|
|
220
266
|
cursor.execute(
|
|
221
267
|
"SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
|
|
222
|
-
(self._name, offset)
|
|
268
|
+
(self._name, offset),
|
|
223
269
|
)
|
|
224
270
|
result = cursor.fetchone()
|
|
225
271
|
cursor.close()
|
|
226
|
-
return json.loads(result[
|
|
272
|
+
return json.loads(result["item_value"]) if result else None
|
|
227
273
|
|
|
228
274
|
else:
|
|
229
275
|
raise TypeError("List indices must be integers or slices.")
|
|
@@ -233,7 +279,7 @@ class ListWrapper:
|
|
|
233
279
|
cursor = self._conn.cursor()
|
|
234
280
|
cursor.execute(
|
|
235
281
|
"SELECT item_order FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
|
|
236
|
-
(self._name, index)
|
|
282
|
+
(self._name, index),
|
|
237
283
|
)
|
|
238
284
|
result = cursor.fetchone()
|
|
239
285
|
cursor.close()
|
|
@@ -247,26 +293,32 @@ class ListWrapper:
|
|
|
247
293
|
"""Pushes an item to the end of the list."""
|
|
248
294
|
with self._conn:
|
|
249
295
|
cursor = self._conn.cursor()
|
|
250
|
-
cursor.execute(
|
|
296
|
+
cursor.execute(
|
|
297
|
+
"SELECT MAX(item_order) FROM beaver_lists WHERE list_name = ?",
|
|
298
|
+
(self._name,),
|
|
299
|
+
)
|
|
251
300
|
max_order = cursor.fetchone()[0] or 0.0
|
|
252
301
|
new_order = max_order + 1.0
|
|
253
302
|
|
|
254
303
|
cursor.execute(
|
|
255
304
|
"INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
|
|
256
|
-
(self._name, new_order, json.dumps(value))
|
|
305
|
+
(self._name, new_order, json.dumps(value)),
|
|
257
306
|
)
|
|
258
307
|
|
|
259
308
|
def prepend(self, value: Any):
|
|
260
309
|
"""Prepends an item to the beginning of the list."""
|
|
261
310
|
with self._conn:
|
|
262
311
|
cursor = self._conn.cursor()
|
|
263
|
-
cursor.execute(
|
|
312
|
+
cursor.execute(
|
|
313
|
+
"SELECT MIN(item_order) FROM beaver_lists WHERE list_name = ?",
|
|
314
|
+
(self._name,),
|
|
315
|
+
)
|
|
264
316
|
min_order = cursor.fetchone()[0] or 0.0
|
|
265
317
|
new_order = min_order - 1.0
|
|
266
318
|
|
|
267
319
|
cursor.execute(
|
|
268
320
|
"INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
|
|
269
|
-
(self._name, new_order, json.dumps(value))
|
|
321
|
+
(self._name, new_order, json.dumps(value)),
|
|
270
322
|
)
|
|
271
323
|
|
|
272
324
|
def insert(self, index: int, value: Any):
|
|
@@ -288,7 +340,7 @@ class ListWrapper:
|
|
|
288
340
|
with self._conn:
|
|
289
341
|
self._conn.execute(
|
|
290
342
|
"INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
|
|
291
|
-
(self._name, new_order, json.dumps(value))
|
|
343
|
+
(self._name, new_order, json.dumps(value)),
|
|
292
344
|
)
|
|
293
345
|
|
|
294
346
|
def pop(self) -> Any:
|
|
@@ -297,14 +349,16 @@ class ListWrapper:
|
|
|
297
349
|
cursor = self._conn.cursor()
|
|
298
350
|
cursor.execute(
|
|
299
351
|
"SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order DESC LIMIT 1",
|
|
300
|
-
(self._name,)
|
|
352
|
+
(self._name,),
|
|
301
353
|
)
|
|
302
354
|
result = cursor.fetchone()
|
|
303
355
|
if not result:
|
|
304
356
|
return None
|
|
305
357
|
|
|
306
358
|
rowid_to_delete, value_to_return = result
|
|
307
|
-
cursor.execute(
|
|
359
|
+
cursor.execute(
|
|
360
|
+
"DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,)
|
|
361
|
+
)
|
|
308
362
|
return json.loads(value_to_return)
|
|
309
363
|
|
|
310
364
|
def deque(self) -> Any:
|
|
@@ -313,14 +367,16 @@ class ListWrapper:
|
|
|
313
367
|
cursor = self._conn.cursor()
|
|
314
368
|
cursor.execute(
|
|
315
369
|
"SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1",
|
|
316
|
-
(self._name,)
|
|
370
|
+
(self._name,),
|
|
317
371
|
)
|
|
318
372
|
result = cursor.fetchone()
|
|
319
373
|
if not result:
|
|
320
374
|
return None
|
|
321
375
|
|
|
322
376
|
rowid_to_delete, value_to_return = result
|
|
323
|
-
cursor.execute(
|
|
377
|
+
cursor.execute(
|
|
378
|
+
"DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,)
|
|
379
|
+
)
|
|
324
380
|
return json.loads(value_to_return)
|
|
325
381
|
|
|
326
382
|
|
|
@@ -330,7 +386,9 @@ class Subscriber(AsyncIterator):
|
|
|
330
386
|
Designed to be used with 'async with'.
|
|
331
387
|
"""
|
|
332
388
|
|
|
333
|
-
def __init__(
|
|
389
|
+
def __init__(
|
|
390
|
+
self, conn: sqlite3.Connection, channel_name: str, poll_interval: float = 0.1
|
|
391
|
+
):
|
|
334
392
|
self._conn = conn
|
|
335
393
|
self._channel = channel_name
|
|
336
394
|
self._poll_interval = poll_interval
|
|
@@ -342,9 +400,7 @@ class Subscriber(AsyncIterator):
|
|
|
342
400
|
"""Background task that polls the database for new messages."""
|
|
343
401
|
while True:
|
|
344
402
|
try:
|
|
345
|
-
new_messages = await asyncio.to_thread(
|
|
346
|
-
self._fetch_new_messages_from_db
|
|
347
|
-
)
|
|
403
|
+
new_messages = await asyncio.to_thread(self._fetch_new_messages_from_db)
|
|
348
404
|
if new_messages:
|
|
349
405
|
for msg in new_messages:
|
|
350
406
|
payload = json.loads(msg["message_payload"])
|
|
@@ -362,7 +418,7 @@ class Subscriber(AsyncIterator):
|
|
|
362
418
|
cursor = self._conn.cursor()
|
|
363
419
|
cursor.execute(
|
|
364
420
|
"SELECT timestamp, message_payload FROM beaver_pubsub_log WHERE channel_name = ? AND timestamp > ? ORDER BY timestamp ASC",
|
|
365
|
-
(self._channel, self._last_seen_timestamp)
|
|
421
|
+
(self._channel, self._last_seen_timestamp),
|
|
366
422
|
)
|
|
367
423
|
results = cursor.fetchall()
|
|
368
424
|
cursor.close()
|
|
@@ -385,3 +441,198 @@ class Subscriber(AsyncIterator):
|
|
|
385
441
|
async def __anext__(self) -> Any:
|
|
386
442
|
"""Allows 'async for' to pull messages from the internal queue."""
|
|
387
443
|
return await self._queue.get()
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
class Document:
|
|
447
|
+
"""A data class for a vector and its metadata, with a unique ID."""
|
|
448
|
+
|
|
449
|
+
def __init__(
|
|
450
|
+
self, embedding: list[float] | None = None, id: str | None = None, **metadata
|
|
451
|
+
):
|
|
452
|
+
self.id = id or str(uuid.uuid4())
|
|
453
|
+
|
|
454
|
+
if embedding is None:
|
|
455
|
+
self.embedding = None
|
|
456
|
+
else:
|
|
457
|
+
if not isinstance(embedding, list) or not all(
|
|
458
|
+
isinstance(x, (int, float)) for x in embedding
|
|
459
|
+
):
|
|
460
|
+
raise TypeError("Embedding must be a list of numbers.")
|
|
461
|
+
|
|
462
|
+
self.embedding = np.array(embedding, dtype=np.float32)
|
|
463
|
+
|
|
464
|
+
for key, value in metadata.items():
|
|
465
|
+
setattr(self, key, value)
|
|
466
|
+
|
|
467
|
+
def to_dict(self) -> dict[str, Any]:
|
|
468
|
+
"""Serializes metadata to a dictionary."""
|
|
469
|
+
metadata = self.__dict__.copy()
|
|
470
|
+
# Exclude internal attributes from the metadata payload
|
|
471
|
+
metadata.pop("embedding", None)
|
|
472
|
+
metadata.pop("id", None)
|
|
473
|
+
return metadata
|
|
474
|
+
|
|
475
|
+
def __repr__(self):
|
|
476
|
+
metadata_str = ", ".join(f"{k}={v!r}" for k, v in self.to_dict().items())
|
|
477
|
+
return f"Document(id='{self.id}', {metadata_str})"
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
class CollectionWrapper:
|
|
481
|
+
"""A wrapper for vector collection operations with upsert logic."""
|
|
482
|
+
|
|
483
|
+
def __init__(self, name: str, conn: sqlite3.Connection):
|
|
484
|
+
self._name = name
|
|
485
|
+
self._conn = conn
|
|
486
|
+
|
|
487
|
+
# Dentro de la clase CollectionWrapper en beaver/core.py
|
|
488
|
+
|
|
489
|
+
def _flatten_metadata(self, metadata: dict, prefix: str = "") -> dict[str, str]:
|
|
490
|
+
"""
|
|
491
|
+
Aplana un diccionario anidado y filtra solo los valores de tipo string.
|
|
492
|
+
Ejemplo: {'a': {'b': 'c'}} -> {'a__b': 'c'}
|
|
493
|
+
"""
|
|
494
|
+
flat_dict = {}
|
|
495
|
+
for key, value in metadata.items():
|
|
496
|
+
new_key = f"{prefix}__{key}" if prefix else key
|
|
497
|
+
if isinstance(value, dict):
|
|
498
|
+
flat_dict.update(self._flatten_metadata(value, new_key))
|
|
499
|
+
elif isinstance(value, str):
|
|
500
|
+
flat_dict[new_key] = value
|
|
501
|
+
return flat_dict
|
|
502
|
+
|
|
503
|
+
def index(self, document: Document, *, fts: bool = True):
|
|
504
|
+
"""
|
|
505
|
+
Indexa un Document, realizando un upsert y actualizando el índice FTS.
|
|
506
|
+
"""
|
|
507
|
+
with self._conn:
|
|
508
|
+
if fts:
|
|
509
|
+
self._conn.execute(
|
|
510
|
+
"DELETE FROM beaver_fts_index WHERE collection = ? AND item_id = ?",
|
|
511
|
+
(self._name, document.id),
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
string_fields = self._flatten_metadata(document.to_dict())
|
|
515
|
+
|
|
516
|
+
if string_fields:
|
|
517
|
+
fts_data = [
|
|
518
|
+
(self._name, document.id, path, content)
|
|
519
|
+
for path, content in string_fields.items()
|
|
520
|
+
]
|
|
521
|
+
self._conn.executemany(
|
|
522
|
+
"INSERT INTO beaver_fts_index (collection, item_id, field_path, field_content) VALUES (?, ?, ?, ?)",
|
|
523
|
+
fts_data,
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
self._conn.execute(
|
|
527
|
+
"INSERT OR REPLACE INTO beaver_collections (collection, item_id, item_vector, metadata) VALUES (?, ?, ?, ?)",
|
|
528
|
+
(
|
|
529
|
+
self._name,
|
|
530
|
+
document.id,
|
|
531
|
+
document.embedding.tobytes() if document.embedding is not None else None,
|
|
532
|
+
json.dumps(document.to_dict()),
|
|
533
|
+
),
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
def search(
|
|
537
|
+
self, vector: list[float], top_k: int = 10
|
|
538
|
+
) -> list[tuple[Document, float]]:
|
|
539
|
+
"""
|
|
540
|
+
Performs a vector search and returns Document objects.
|
|
541
|
+
"""
|
|
542
|
+
query_vector = np.array(vector, dtype=np.float32)
|
|
543
|
+
|
|
544
|
+
cursor = self._conn.cursor()
|
|
545
|
+
cursor.execute(
|
|
546
|
+
"SELECT item_id, item_vector, metadata FROM beaver_collections WHERE collection = ?",
|
|
547
|
+
(self._name,),
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
all_docs_data = cursor.fetchall()
|
|
551
|
+
cursor.close()
|
|
552
|
+
|
|
553
|
+
if not all_docs_data:
|
|
554
|
+
return []
|
|
555
|
+
|
|
556
|
+
results = []
|
|
557
|
+
for row in all_docs_data:
|
|
558
|
+
if row["item_vector"] is None:
|
|
559
|
+
continue # Skip documents without embeddings
|
|
560
|
+
|
|
561
|
+
doc_id = row["item_id"]
|
|
562
|
+
embedding = np.frombuffer(row["item_vector"], dtype=np.float32).tolist()
|
|
563
|
+
metadata = json.loads(row["metadata"])
|
|
564
|
+
|
|
565
|
+
distance = np.linalg.norm(embedding - query_vector)
|
|
566
|
+
|
|
567
|
+
# Reconstruct the Document object with its original ID
|
|
568
|
+
doc = Document(id=doc_id, embedding=list(embedding), **metadata)
|
|
569
|
+
results.append((doc, float(distance)))
|
|
570
|
+
|
|
571
|
+
results.sort(key=lambda x: x[1])
|
|
572
|
+
return results[:top_k]
|
|
573
|
+
|
|
574
|
+
def match(
|
|
575
|
+
self, query: str, on_field: str | None = None, top_k: int = 10
|
|
576
|
+
) -> list[tuple[Document, float]]:
|
|
577
|
+
"""
|
|
578
|
+
Realiza una búsqueda de texto completo en los campos de metadatos indexados.
|
|
579
|
+
|
|
580
|
+
Args:
|
|
581
|
+
query: La expresión de búsqueda (ej. "gato", "perro OR conejo").
|
|
582
|
+
on_field: Opcional, el campo específico donde buscar (ej. "details__title").
|
|
583
|
+
top_k: El número máximo de resultados a devolver.
|
|
584
|
+
|
|
585
|
+
Returns:
|
|
586
|
+
Una lista de tuplas (Documento, puntuación_de_relevancia).
|
|
587
|
+
"""
|
|
588
|
+
cursor = self._conn.cursor()
|
|
589
|
+
|
|
590
|
+
sql_query = """
|
|
591
|
+
SELECT
|
|
592
|
+
t1.item_id, t1.item_vector, t1.metadata, fts.rank
|
|
593
|
+
FROM beaver_collections AS t1
|
|
594
|
+
JOIN (
|
|
595
|
+
SELECT DISTINCT item_id, rank
|
|
596
|
+
FROM beaver_fts_index
|
|
597
|
+
WHERE beaver_fts_index MATCH ?
|
|
598
|
+
ORDER BY rank
|
|
599
|
+
LIMIT ?
|
|
600
|
+
) AS fts ON t1.item_id = fts.item_id
|
|
601
|
+
WHERE t1.collection = ?
|
|
602
|
+
ORDER BY fts.rank
|
|
603
|
+
"""
|
|
604
|
+
|
|
605
|
+
params = []
|
|
606
|
+
field_filter_sql = ""
|
|
607
|
+
|
|
608
|
+
if on_field:
|
|
609
|
+
field_filter_sql = "AND field_path = ?"
|
|
610
|
+
params.append(on_field)
|
|
611
|
+
else:
|
|
612
|
+
# Búsqueda en todos los campos
|
|
613
|
+
params.append(query)
|
|
614
|
+
|
|
615
|
+
sql_query = sql_query.format(field_filter_sql)
|
|
616
|
+
params.extend([top_k, self._name])
|
|
617
|
+
|
|
618
|
+
cursor.execute(sql_query, tuple(params))
|
|
619
|
+
|
|
620
|
+
results = []
|
|
621
|
+
for row in cursor.fetchall():
|
|
622
|
+
doc_id = row["item_id"]
|
|
623
|
+
|
|
624
|
+
if row["item_vector"] is None:
|
|
625
|
+
embedding = None
|
|
626
|
+
else:
|
|
627
|
+
embedding = np.frombuffer(row["item_vector"], dtype=np.float32).tolist()
|
|
628
|
+
|
|
629
|
+
metadata = json.loads(row["metadata"])
|
|
630
|
+
rank = row["rank"]
|
|
631
|
+
|
|
632
|
+
doc = Document(id=doc_id, embedding=embedding, **metadata)
|
|
633
|
+
results.append((doc, rank))
|
|
634
|
+
|
|
635
|
+
results.sort(key=lambda x: x[1])
|
|
636
|
+
cursor.close()
|
|
637
|
+
|
|
638
|
+
return results
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: beaver-db
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Asynchronous, embedded, modern DB based on SQLite.
|
|
5
|
+
Requires-Python: >=3.13
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: numpy>=2.3.3
|
|
8
|
+
|
|
9
|
+
# beaver 🦫
|
|
10
|
+
|
|
11
|
+
A fast, single-file, multi-modal database for Python, built with the standard sqlite3 library.
|
|
12
|
+
|
|
13
|
+
`beaver` is the **B**ackend for **E**mbedded **A**synchronous **V**ector & **E**vent Retrieval. It's an industrious, all-in-one database designed to manage complex, modern data types without requiring a database server.
|
|
14
|
+
|
|
15
|
+
## Design Philosophy
|
|
16
|
+
|
|
17
|
+
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
18
|
+
|
|
19
|
+
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`, `asyncio`) and `numpy`.
|
|
20
|
+
- **Async-First (When It Matters)**: The pub/sub system is fully asynchronous for high-performance, real-time messaging. Other features like key-value, list, and vector operations are synchronous for ease of use.
|
|
21
|
+
- **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
|
|
22
|
+
- **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications.
|
|
23
|
+
|
|
24
|
+
## Core Features
|
|
25
|
+
|
|
26
|
+
- **Asynchronous Pub/Sub**: A fully asynchronous, Redis-like publish-subscribe system for real-time messaging.
|
|
27
|
+
- **Persistent Key-Value Store**: A simple `set`/`get` interface for storing any JSON-serializable object.
|
|
28
|
+
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
29
|
+
- **Vector Storage & Search**: Store vector embeddings and perform simple, brute-force k-nearest neighbor searches, ideal for small-scale RAG.
|
|
30
|
+
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install beaver-db
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Quickstart & API Guide
|
|
39
|
+
|
|
40
|
+
### Initialization
|
|
41
|
+
|
|
42
|
+
All you need to do is import and instantiate the `BeaverDB` class with a file path.
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from beaver import BeaverDB, Document
|
|
46
|
+
|
|
47
|
+
db = BeaverDB("my_application.db")
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Key-Value Store
|
|
51
|
+
|
|
52
|
+
Use `set()` and `get()` for simple data storage. The value can be any JSON-encodable object.
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
# Set a value
|
|
56
|
+
db.set("app_config", {"theme": "dark", "user_id": 123})
|
|
57
|
+
|
|
58
|
+
# Get a value
|
|
59
|
+
config = db.get("app_config")
|
|
60
|
+
print(f"Theme: {config['theme']}") # Output: Theme: dark
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### List Management
|
|
64
|
+
|
|
65
|
+
Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
tasks = db.list("daily_tasks")
|
|
69
|
+
tasks.push("Write the project report")
|
|
70
|
+
tasks.prepend("Plan the day's agenda")
|
|
71
|
+
print(f"The first task is: {tasks[0]}")
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Vector Storage & Search
|
|
75
|
+
|
|
76
|
+
Store `Document` objects containing vector embeddings and metadata. The search is a linear scan, which is sufficient for small-to-medium collections.
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
# Get a handle to a collection
|
|
80
|
+
docs = db.collection("my_documents")
|
|
81
|
+
|
|
82
|
+
# Create and index a document (ID will be a UUID)
|
|
83
|
+
doc1 = Document(embedding=[0.1, 0.2, 0.7], text="A cat sat on the mat.")
|
|
84
|
+
docs.index(doc1)
|
|
85
|
+
|
|
86
|
+
# Create and index a document with a specific ID (for upserting)
|
|
87
|
+
doc2 = Document(id="article-42", embedding=[0.9, 0.1, 0.1], text="A dog chased a ball.")
|
|
88
|
+
docs.index(doc2)
|
|
89
|
+
|
|
90
|
+
# Search for the 2 most similar documents
|
|
91
|
+
query_vector = [0.15, 0.25, 0.65]
|
|
92
|
+
results = docs.search(vector=query_vector, top_k=2)
|
|
93
|
+
|
|
94
|
+
# Results are a list of (Document, distance) tuples
|
|
95
|
+
top_document, distance = results[0]
|
|
96
|
+
print(f"Closest document: {top_document.text} (distance: {distance:.4f})")
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Asynchronous Pub/Sub
|
|
100
|
+
|
|
101
|
+
Publish events from one part of your app and listen in another using `asyncio`.
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
import asyncio
|
|
105
|
+
|
|
106
|
+
async def listener():
|
|
107
|
+
async with db.subscribe("system_events") as sub:
|
|
108
|
+
async for message in sub:
|
|
109
|
+
print(f"LISTENER: Received event -> {message['event']}")
|
|
110
|
+
|
|
111
|
+
async def publisher():
|
|
112
|
+
await asyncio.sleep(1)
|
|
113
|
+
await db.publish("system_events", {"event": "user_login", "user": "alice"})
|
|
114
|
+
|
|
115
|
+
# To run them concurrently:
|
|
116
|
+
# asyncio.run(asyncio.gather(listener(), publisher()))
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Roadmap
|
|
120
|
+
|
|
121
|
+
`beaver` aims to be a complete, self-contained data toolkit. The following features are planned:
|
|
122
|
+
|
|
123
|
+
- **More Efficient Vector Search**: Integrate an approximate nearest neighbor (ANN) index like `scipy.spatial.cKDTree` to improve search speed on larger datasets.
|
|
124
|
+
- **JSON Document Store with Full-Text Search**: Store flexible JSON documents and get powerful full-text search across all text fields, powered by SQLite's FTS5 extension.
|
|
125
|
+
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
|
|
126
|
+
|
|
127
|
+
## License
|
|
128
|
+
|
|
129
|
+
This project is licensed under the MIT License.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
numpy>=2.3.3
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "beaver-db"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.4.0"
|
|
4
4
|
description = "Asynchronous, embedded, modern DB based on SQLite."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.13"
|
|
7
|
-
dependencies = [
|
|
7
|
+
dependencies = [
|
|
8
|
+
"numpy>=2.3.3",
|
|
9
|
+
]
|
|
8
10
|
|
|
9
11
|
[tool.hatch.build.targets.wheel]
|
|
10
12
|
packages = ["beaver"]
|
beaver_db-0.2.0/PKG-INFO
DELETED
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: beaver-db
|
|
3
|
-
Version: 0.2.0
|
|
4
|
-
Summary: Asynchronous, embedded, modern DB based on SQLite.
|
|
5
|
-
Requires-Python: >=3.13
|
|
6
|
-
Description-Content-Type: text/markdown
|
|
7
|
-
|
|
8
|
-
# beaver 🦫
|
|
9
|
-
|
|
10
|
-
A fast, single-file, multi-modal database for Python, built with the standard sqlite3 library.
|
|
11
|
-
|
|
12
|
-
`beaver` is the Backend for Embedded Asynchronous Vector & Event Retrieval. It's an industrious, all-in-one database designed to manage complex, modern data types without requiring a database server.
|
|
13
|
-
|
|
14
|
-
Design Philosophy
|
|
15
|
-
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
16
|
-
|
|
17
|
-
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (sqlite3, asyncio). No external packages are required, making it incredibly lightweight and portable.
|
|
18
|
-
- **Async-First (When It Matters)**: The pub/sub system is fully asynchronous for high-performance, real-time messaging. Simpler features like key-value and list operations remain synchronous for ease of use.
|
|
19
|
-
- **Built for Local Applications**: Perfect for local AI tools, chatbots (streaming tokens), task management apps, desktop utilities, and prototypes that need persistent, structured data without network overhead.
|
|
20
|
-
- **Fast by Default**: It's built on SQLite, which is famously fast, reliable, and will likely serve your needs for a long way before you need a "professional" database.
|
|
21
|
-
|
|
22
|
-
## Core Features
|
|
23
|
-
|
|
24
|
-
- **Asynchronous Pub/Sub**: A fully asynchronous, Redis-like publish-subscribe system for real-time messaging.
|
|
25
|
-
- **Persistent Key-Value Store**: A simple set/get interface for storing configuration, session data, or any other JSON-serializable object.
|
|
26
|
-
- **Pythonic List Management**: A fluent, Redis-like interface (db.list("name").push()) for managing persistent, ordered lists with support for indexing and slicing.
|
|
27
|
-
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
28
|
-
|
|
29
|
-
## Installation
|
|
30
|
-
|
|
31
|
-
```bash
|
|
32
|
-
pip install beaver-db
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
## Quickstart & API Guide
|
|
36
|
-
|
|
37
|
-
### 1. Initialization
|
|
38
|
-
|
|
39
|
-
All you need to do is import and instantiate the BeaverDB class with a file path.
|
|
40
|
-
|
|
41
|
-
```python
|
|
42
|
-
from beaver import BeaverDB
|
|
43
|
-
|
|
44
|
-
db = BeaverDB("my_application.db")
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
### 2. Key-Value Store
|
|
48
|
-
|
|
49
|
-
Use `set()` and `get()` for simple data storage. The value can be any JSON-encodable object.
|
|
50
|
-
|
|
51
|
-
```python
|
|
52
|
-
# Set a value
|
|
53
|
-
db.set("app_config", {"theme": "dark", "user_id": 123})
|
|
54
|
-
|
|
55
|
-
# Get a value
|
|
56
|
-
config = db.get("app_config")
|
|
57
|
-
print(f"Theme: {config['theme']}") # Output: Theme: dark
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
### 3. List Management
|
|
61
|
-
|
|
62
|
-
Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
|
|
63
|
-
|
|
64
|
-
```python
|
|
65
|
-
# Get a wrapper for the 'tasks' list
|
|
66
|
-
tasks = db.list("daily_tasks")
|
|
67
|
-
|
|
68
|
-
# Push items to the list
|
|
69
|
-
tasks.push("Write the project report")
|
|
70
|
-
tasks.push("Send follow-up emails")
|
|
71
|
-
tasks.prepend("Plan the day's agenda") # Push to the front
|
|
72
|
-
|
|
73
|
-
# Use len() and indexing (including slices!)
|
|
74
|
-
print(f"There are {len(tasks)} tasks.")
|
|
75
|
-
print(f"The first task is: {tasks[0]}")
|
|
76
|
-
print(f"The rest is: {tasks[1:]}")
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
### 4. Asynchronous Pub/Sub
|
|
80
|
-
|
|
81
|
-
Publish events from one part of your app and listen in another using asyncio.
|
|
82
|
-
|
|
83
|
-
```python
|
|
84
|
-
import asyncio
|
|
85
|
-
|
|
86
|
-
async def listener():
|
|
87
|
-
async with db.subscribe("system_events") as sub:
|
|
88
|
-
async for message in sub:
|
|
89
|
-
print(f"LISTENER: Received event -> {message['event']}")
|
|
90
|
-
|
|
91
|
-
async def publisher():
|
|
92
|
-
await asyncio.sleep(1)
|
|
93
|
-
await db.publish("system_events", {"event": "user_login", "user": "alice"})
|
|
94
|
-
|
|
95
|
-
# To run them concurrently:
|
|
96
|
-
# asyncio.run(asyncio.gather(listener(), publisher()))
|
|
97
|
-
```
|
|
98
|
-
|
|
99
|
-
## Roadmap
|
|
100
|
-
|
|
101
|
-
`beaver` aims to be a complete, self-contained data toolkit. The following features are planned:
|
|
102
|
-
|
|
103
|
-
- **Vector Storage & Search**: Store NumPy vector embeddings and perform efficient k-nearest neighbor (k-NN) searches using `scipy.spatial.cKDTree`.
|
|
104
|
-
- **JSON Document Store with Full-Text Search**: Store flexible JSON documents and get powerful full-text search across all text fields, powered by SQLite's FTS5 extension.
|
|
105
|
-
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks (e.g., managing users, products) with standard SQL.
|
|
106
|
-
|
|
107
|
-
## License
|
|
108
|
-
|
|
109
|
-
This project is licensed under the MIT License.
|
beaver_db-0.2.0/README.md
DELETED
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
# beaver 🦫
|
|
2
|
-
|
|
3
|
-
A fast, single-file, multi-modal database for Python, built with the standard sqlite3 library.
|
|
4
|
-
|
|
5
|
-
`beaver` is the Backend for Embedded Asynchronous Vector & Event Retrieval. It's an industrious, all-in-one database designed to manage complex, modern data types without requiring a database server.
|
|
6
|
-
|
|
7
|
-
Design Philosophy
|
|
8
|
-
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
9
|
-
|
|
10
|
-
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (sqlite3, asyncio). No external packages are required, making it incredibly lightweight and portable.
|
|
11
|
-
- **Async-First (When It Matters)**: The pub/sub system is fully asynchronous for high-performance, real-time messaging. Simpler features like key-value and list operations remain synchronous for ease of use.
|
|
12
|
-
- **Built for Local Applications**: Perfect for local AI tools, chatbots (streaming tokens), task management apps, desktop utilities, and prototypes that need persistent, structured data without network overhead.
|
|
13
|
-
- **Fast by Default**: It's built on SQLite, which is famously fast, reliable, and will likely serve your needs for a long way before you need a "professional" database.
|
|
14
|
-
|
|
15
|
-
## Core Features
|
|
16
|
-
|
|
17
|
-
- **Asynchronous Pub/Sub**: A fully asynchronous, Redis-like publish-subscribe system for real-time messaging.
|
|
18
|
-
- **Persistent Key-Value Store**: A simple set/get interface for storing configuration, session data, or any other JSON-serializable object.
|
|
19
|
-
- **Pythonic List Management**: A fluent, Redis-like interface (db.list("name").push()) for managing persistent, ordered lists with support for indexing and slicing.
|
|
20
|
-
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
21
|
-
|
|
22
|
-
## Installation
|
|
23
|
-
|
|
24
|
-
```bash
|
|
25
|
-
pip install beaver-db
|
|
26
|
-
```
|
|
27
|
-
|
|
28
|
-
## Quickstart & API Guide
|
|
29
|
-
|
|
30
|
-
### 1. Initialization
|
|
31
|
-
|
|
32
|
-
All you need to do is import and instantiate the BeaverDB class with a file path.
|
|
33
|
-
|
|
34
|
-
```python
|
|
35
|
-
from beaver import BeaverDB
|
|
36
|
-
|
|
37
|
-
db = BeaverDB("my_application.db")
|
|
38
|
-
```
|
|
39
|
-
|
|
40
|
-
### 2. Key-Value Store
|
|
41
|
-
|
|
42
|
-
Use `set()` and `get()` for simple data storage. The value can be any JSON-encodable object.
|
|
43
|
-
|
|
44
|
-
```python
|
|
45
|
-
# Set a value
|
|
46
|
-
db.set("app_config", {"theme": "dark", "user_id": 123})
|
|
47
|
-
|
|
48
|
-
# Get a value
|
|
49
|
-
config = db.get("app_config")
|
|
50
|
-
print(f"Theme: {config['theme']}") # Output: Theme: dark
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
### 3. List Management
|
|
54
|
-
|
|
55
|
-
Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
|
|
56
|
-
|
|
57
|
-
```python
|
|
58
|
-
# Get a wrapper for the 'tasks' list
|
|
59
|
-
tasks = db.list("daily_tasks")
|
|
60
|
-
|
|
61
|
-
# Push items to the list
|
|
62
|
-
tasks.push("Write the project report")
|
|
63
|
-
tasks.push("Send follow-up emails")
|
|
64
|
-
tasks.prepend("Plan the day's agenda") # Push to the front
|
|
65
|
-
|
|
66
|
-
# Use len() and indexing (including slices!)
|
|
67
|
-
print(f"There are {len(tasks)} tasks.")
|
|
68
|
-
print(f"The first task is: {tasks[0]}")
|
|
69
|
-
print(f"The rest is: {tasks[1:]}")
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
### 4. Asynchronous Pub/Sub
|
|
73
|
-
|
|
74
|
-
Publish events from one part of your app and listen in another using asyncio.
|
|
75
|
-
|
|
76
|
-
```python
|
|
77
|
-
import asyncio
|
|
78
|
-
|
|
79
|
-
async def listener():
|
|
80
|
-
async with db.subscribe("system_events") as sub:
|
|
81
|
-
async for message in sub:
|
|
82
|
-
print(f"LISTENER: Received event -> {message['event']}")
|
|
83
|
-
|
|
84
|
-
async def publisher():
|
|
85
|
-
await asyncio.sleep(1)
|
|
86
|
-
await db.publish("system_events", {"event": "user_login", "user": "alice"})
|
|
87
|
-
|
|
88
|
-
# To run them concurrently:
|
|
89
|
-
# asyncio.run(asyncio.gather(listener(), publisher()))
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
## Roadmap
|
|
93
|
-
|
|
94
|
-
`beaver` aims to be a complete, self-contained data toolkit. The following features are planned:
|
|
95
|
-
|
|
96
|
-
- **Vector Storage & Search**: Store NumPy vector embeddings and perform efficient k-nearest neighbor (k-NN) searches using `scipy.spatial.cKDTree`.
|
|
97
|
-
- **JSON Document Store with Full-Text Search**: Store flexible JSON documents and get powerful full-text search across all text fields, powered by SQLite's FTS5 extension.
|
|
98
|
-
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks (e.g., managing users, products) with standard SQL.
|
|
99
|
-
|
|
100
|
-
## License
|
|
101
|
-
|
|
102
|
-
This project is licensed under the MIT License.
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .core import BeaverDB, Subscriber
|
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: beaver-db
|
|
3
|
-
Version: 0.2.0
|
|
4
|
-
Summary: Asynchronous, embedded, modern DB based on SQLite.
|
|
5
|
-
Requires-Python: >=3.13
|
|
6
|
-
Description-Content-Type: text/markdown
|
|
7
|
-
|
|
8
|
-
# beaver 🦫
|
|
9
|
-
|
|
10
|
-
A fast, single-file, multi-modal database for Python, built with the standard sqlite3 library.
|
|
11
|
-
|
|
12
|
-
`beaver` is the Backend for Embedded Asynchronous Vector & Event Retrieval. It's an industrious, all-in-one database designed to manage complex, modern data types without requiring a database server.
|
|
13
|
-
|
|
14
|
-
Design Philosophy
|
|
15
|
-
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
16
|
-
|
|
17
|
-
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (sqlite3, asyncio). No external packages are required, making it incredibly lightweight and portable.
|
|
18
|
-
- **Async-First (When It Matters)**: The pub/sub system is fully asynchronous for high-performance, real-time messaging. Simpler features like key-value and list operations remain synchronous for ease of use.
|
|
19
|
-
- **Built for Local Applications**: Perfect for local AI tools, chatbots (streaming tokens), task management apps, desktop utilities, and prototypes that need persistent, structured data without network overhead.
|
|
20
|
-
- **Fast by Default**: It's built on SQLite, which is famously fast, reliable, and will likely serve your needs for a long way before you need a "professional" database.
|
|
21
|
-
|
|
22
|
-
## Core Features
|
|
23
|
-
|
|
24
|
-
- **Asynchronous Pub/Sub**: A fully asynchronous, Redis-like publish-subscribe system for real-time messaging.
|
|
25
|
-
- **Persistent Key-Value Store**: A simple set/get interface for storing configuration, session data, or any other JSON-serializable object.
|
|
26
|
-
- **Pythonic List Management**: A fluent, Redis-like interface (db.list("name").push()) for managing persistent, ordered lists with support for indexing and slicing.
|
|
27
|
-
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
28
|
-
|
|
29
|
-
## Installation
|
|
30
|
-
|
|
31
|
-
```bash
|
|
32
|
-
pip install beaver-db
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
## Quickstart & API Guide
|
|
36
|
-
|
|
37
|
-
### 1. Initialization
|
|
38
|
-
|
|
39
|
-
All you need to do is import and instantiate the BeaverDB class with a file path.
|
|
40
|
-
|
|
41
|
-
```python
|
|
42
|
-
from beaver import BeaverDB
|
|
43
|
-
|
|
44
|
-
db = BeaverDB("my_application.db")
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
### 2. Key-Value Store
|
|
48
|
-
|
|
49
|
-
Use `set()` and `get()` for simple data storage. The value can be any JSON-encodable object.
|
|
50
|
-
|
|
51
|
-
```python
|
|
52
|
-
# Set a value
|
|
53
|
-
db.set("app_config", {"theme": "dark", "user_id": 123})
|
|
54
|
-
|
|
55
|
-
# Get a value
|
|
56
|
-
config = db.get("app_config")
|
|
57
|
-
print(f"Theme: {config['theme']}") # Output: Theme: dark
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
### 3. List Management
|
|
61
|
-
|
|
62
|
-
Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
|
|
63
|
-
|
|
64
|
-
```python
|
|
65
|
-
# Get a wrapper for the 'tasks' list
|
|
66
|
-
tasks = db.list("daily_tasks")
|
|
67
|
-
|
|
68
|
-
# Push items to the list
|
|
69
|
-
tasks.push("Write the project report")
|
|
70
|
-
tasks.push("Send follow-up emails")
|
|
71
|
-
tasks.prepend("Plan the day's agenda") # Push to the front
|
|
72
|
-
|
|
73
|
-
# Use len() and indexing (including slices!)
|
|
74
|
-
print(f"There are {len(tasks)} tasks.")
|
|
75
|
-
print(f"The first task is: {tasks[0]}")
|
|
76
|
-
print(f"The rest is: {tasks[1:]}")
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
### 4. Asynchronous Pub/Sub
|
|
80
|
-
|
|
81
|
-
Publish events from one part of your app and listen in another using asyncio.
|
|
82
|
-
|
|
83
|
-
```python
|
|
84
|
-
import asyncio
|
|
85
|
-
|
|
86
|
-
async def listener():
|
|
87
|
-
async with db.subscribe("system_events") as sub:
|
|
88
|
-
async for message in sub:
|
|
89
|
-
print(f"LISTENER: Received event -> {message['event']}")
|
|
90
|
-
|
|
91
|
-
async def publisher():
|
|
92
|
-
await asyncio.sleep(1)
|
|
93
|
-
await db.publish("system_events", {"event": "user_login", "user": "alice"})
|
|
94
|
-
|
|
95
|
-
# To run them concurrently:
|
|
96
|
-
# asyncio.run(asyncio.gather(listener(), publisher()))
|
|
97
|
-
```
|
|
98
|
-
|
|
99
|
-
## Roadmap
|
|
100
|
-
|
|
101
|
-
`beaver` aims to be a complete, self-contained data toolkit. The following features are planned:
|
|
102
|
-
|
|
103
|
-
- **Vector Storage & Search**: Store NumPy vector embeddings and perform efficient k-nearest neighbor (k-NN) searches using `scipy.spatial.cKDTree`.
|
|
104
|
-
- **JSON Document Store with Full-Text Search**: Store flexible JSON documents and get powerful full-text search across all text fields, powered by SQLite's FTS5 extension.
|
|
105
|
-
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks (e.g., managing users, products) with standard SQL.
|
|
106
|
-
|
|
107
|
-
## License
|
|
108
|
-
|
|
109
|
-
This project is licensed under the MIT License.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|