beaver-db 0.7.0__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of beaver-db might be problematic. Click here for more details.
- beaver_db-0.7.1/PKG-INFO +179 -0
- beaver_db-0.7.1/README.md +168 -0
- {beaver_db-0.7.0 → beaver_db-0.7.1}/beaver/collections.py +18 -0
- beaver_db-0.7.1/beaver_db.egg-info/PKG-INFO +179 -0
- {beaver_db-0.7.0 → beaver_db-0.7.1}/pyproject.toml +1 -1
- beaver_db-0.7.0/PKG-INFO +0 -197
- beaver_db-0.7.0/README.md +0 -186
- beaver_db-0.7.0/beaver_db.egg-info/PKG-INFO +0 -197
- {beaver_db-0.7.0 → beaver_db-0.7.1}/LICENSE +0 -0
- {beaver_db-0.7.0 → beaver_db-0.7.1}/beaver/__init__.py +0 -0
- {beaver_db-0.7.0 → beaver_db-0.7.1}/beaver/channels.py +0 -0
- {beaver_db-0.7.0 → beaver_db-0.7.1}/beaver/core.py +0 -0
- {beaver_db-0.7.0 → beaver_db-0.7.1}/beaver/dicts.py +0 -0
- {beaver_db-0.7.0 → beaver_db-0.7.1}/beaver/lists.py +0 -0
- {beaver_db-0.7.0 → beaver_db-0.7.1}/beaver_db.egg-info/SOURCES.txt +0 -0
- {beaver_db-0.7.0 → beaver_db-0.7.1}/beaver_db.egg-info/dependency_links.txt +0 -0
- {beaver_db-0.7.0 → beaver_db-0.7.1}/beaver_db.egg-info/requires.txt +0 -0
- {beaver_db-0.7.0 → beaver_db-0.7.1}/beaver_db.egg-info/top_level.txt +0 -0
- {beaver_db-0.7.0 → beaver_db-0.7.1}/setup.cfg +0 -0
beaver_db-0.7.1/PKG-INFO
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: beaver-db
|
|
3
|
+
Version: 0.7.1
|
|
4
|
+
Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
|
|
5
|
+
Requires-Python: >=3.13
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: numpy>=2.3.3
|
|
9
|
+
Requires-Dist: scipy>=1.16.2
|
|
10
|
+
Dynamic: license-file
|
|
11
|
+
|
|
12
|
+
# beaver 🦫
|
|
13
|
+
|
|
14
|
+

|
|
15
|
+

|
|
16
|
+

|
|
17
|
+
|
|
18
|
+
A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
19
|
+
|
|
20
|
+
`beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
|
|
21
|
+
|
|
22
|
+
## Design Philosophy
|
|
23
|
+
|
|
24
|
+
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
25
|
+
|
|
26
|
+
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`) and `numpy`/`scipy`.
|
|
27
|
+
- **Synchronous & Thread-Safe**: Designed for simplicity and safety in multi-threaded environments.
|
|
28
|
+
- **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
|
|
29
|
+
- **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications. The vector search is accelerated with an in-memory k-d tree.
|
|
30
|
+
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
|
|
31
|
+
|
|
32
|
+
## Core Features
|
|
33
|
+
|
|
34
|
+
- **Synchronous Pub/Sub**: A simple, thread-safe, Redis-like publish-subscribe system for real-time messaging.
|
|
35
|
+
- **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
|
|
36
|
+
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
37
|
+
- **Efficient Vector Storage & Search**: Store vector embeddings and perform fast approximate nearest neighbor searches using an in-memory k-d tree.
|
|
38
|
+
- **Full-Text Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine.
|
|
39
|
+
- **Graph Traversal**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
|
|
40
|
+
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install beaver-db
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Quickstart
|
|
49
|
+
|
|
50
|
+
Get up and running in 30 seconds. This example showcases a dictionary, a list, and full-text search in a single script.
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from beaver import BeaverDB, Document
|
|
54
|
+
|
|
55
|
+
# 1. Initialize the database
|
|
56
|
+
db = BeaverDB("data.db")
|
|
57
|
+
|
|
58
|
+
# 2. Use a namespaced dictionary for app configuration
|
|
59
|
+
config = db.dict("app_config")
|
|
60
|
+
config["theme"] = "dark"
|
|
61
|
+
print(f"Theme set to: {config['theme']}")
|
|
62
|
+
|
|
63
|
+
# 3. Use a persistent list to manage a task queue
|
|
64
|
+
tasks = db.list("daily_tasks")
|
|
65
|
+
tasks.push("Write the project report")
|
|
66
|
+
tasks.push("Deploy the new feature")
|
|
67
|
+
print(f"First task is: {tasks[0]}")
|
|
68
|
+
|
|
69
|
+
# 4. Use a collection for document storage and search
|
|
70
|
+
articles = db.collection("articles")
|
|
71
|
+
doc = Document(
|
|
72
|
+
id="sqlite-001",
|
|
73
|
+
content="SQLite is a powerful embedded database ideal for local apps."
|
|
74
|
+
)
|
|
75
|
+
articles.index(doc)
|
|
76
|
+
|
|
77
|
+
# Perform a full-text search
|
|
78
|
+
results = articles.match(query="database")
|
|
79
|
+
top_doc, rank = results[0]
|
|
80
|
+
print(f"FTS Result: '{top_doc.content}'")
|
|
81
|
+
|
|
82
|
+
db.close()
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Things You Can Build with Beaver
|
|
86
|
+
|
|
87
|
+
Here are a few ideas to inspire your next project, showcasing how to combine Beaver's features to build powerful local applications.
|
|
88
|
+
|
|
89
|
+
### 1. User Authentication and Profile Store
|
|
90
|
+
|
|
91
|
+
Use a **namespaced dictionary** to create a simple and secure user store. The key can be the username, and the value can be a dictionary containing the hashed password and other profile information.
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
users = db.dict("user_profiles")
|
|
95
|
+
|
|
96
|
+
# Create a new user
|
|
97
|
+
users["alice"] = {
|
|
98
|
+
"hashed_password": "...",
|
|
99
|
+
"email": "alice@example.com",
|
|
100
|
+
"permissions": ["read", "write"]
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# Retrieve a user's profile
|
|
104
|
+
alice_profile = users.get("alice")
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### 2. Chatbot Conversation History
|
|
108
|
+
|
|
109
|
+
A **persistent list** is perfect for storing the history of a conversation. Each time the user or the bot sends a message, just `push` it to the list. This maintains a chronological record of the entire dialogue.
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
chat_history = db.list("conversation_with_user_123")
|
|
113
|
+
|
|
114
|
+
chat_history.push({"role": "user", "content": "Hello, Beaver!"})
|
|
115
|
+
chat_history.push({"role": "assistant", "content": "Hello! How can I help you today?"})
|
|
116
|
+
|
|
117
|
+
# Retrieve the full conversation
|
|
118
|
+
for message in chat_history:
|
|
119
|
+
print(f"{message['role']}: {message['content']}")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### 3. Build a RAG (Retrieval-Augmented Generation) System
|
|
123
|
+
|
|
124
|
+
Combine **vector search** and **full-text search** to build a powerful RAG pipeline for your local documents.
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
# Get context for a user query like "fast python web frameworks"
|
|
128
|
+
vector_results = [doc for doc, _ in docs.search(vector=query_vector)]
|
|
129
|
+
text_results = [doc for doc, _ in docs.match(query="python web framework")]
|
|
130
|
+
|
|
131
|
+
# Combine and rerank for the best context
|
|
132
|
+
from beaver.collections import rerank
|
|
133
|
+
best_context = rerank(vector_results, text_results, weights=[0.6, 0.4])
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### 4. Caching for Expensive API Calls
|
|
137
|
+
|
|
138
|
+
Leverage a **dictionary with a TTL (Time-To-Live)** to cache the results of slow network requests. This can dramatically speed up your application and reduce your reliance on external services.
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
api_cache = db.dict("external_api_cache")
|
|
142
|
+
|
|
143
|
+
# Check the cache first
|
|
144
|
+
response = api_cache.get("weather_new_york")
|
|
145
|
+
if response is None:
|
|
146
|
+
# If not in cache, make the real API call
|
|
147
|
+
response = make_slow_weather_api_call("New York")
|
|
148
|
+
# Cache the result for 1 hour
|
|
149
|
+
api_cache.set("weather_new_york", response, ttl_seconds=3600)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## More Examples
|
|
153
|
+
|
|
154
|
+
For more in-depth examples, check out the scripts in the `examples/` directory:
|
|
155
|
+
|
|
156
|
+
- [`examples/kvstore.py`](https://www.google.com/search?q=examples/kvstore.py): A comprehensive demo of the namespaced dictionary feature.
|
|
157
|
+
- [`examples/list.py`](https://www.google.com/search?q=examples/list.py): Shows the full capabilities of the persistent list, including slicing and in-place updates.
|
|
158
|
+
- [`examples/vector.py`](https://www.google.com/search?q=examples/vector.py): Demonstrates how to index and search vector embeddings, including upserts.
|
|
159
|
+
- [`examples/fts.py`](https://www.google.com/search?q=examples/fts.py): A detailed look at full-text search, including targeted searches on specific metadata fields.
|
|
160
|
+
- [`examples/graph.py`](https://www.google.com/search?q=examples/graph.py): Shows how to create relationships between documents and perform multi-hop graph traversals.
|
|
161
|
+
- [`examples/pubsub.py`](https://www.google.com/search?q=examples/pubsub.py): A demonstration of the synchronous, thread-safe publish/subscribe system.
|
|
162
|
+
- [`examples/cache.py`](https://www.google.com/search?q=examples/cache.py): A practical example of using a dictionary with TTL as a cache for API calls.
|
|
163
|
+
- [`examples/rerank.py`](https://www.google.com/search?q=examples/rerank.py): Shows how to combine results from vector and text search for more refined results.
|
|
164
|
+
|
|
165
|
+
## Roadmap
|
|
166
|
+
|
|
167
|
+
These are some of the features and improvements planned for future releases:
|
|
168
|
+
|
|
169
|
+
- **Fuzzy search**: Implement fuzzy matching capabilities for text search.
|
|
170
|
+
- **Faster ANN**: Explore integrating more advanced ANN libraries like `faiss` for improved vector search performance.
|
|
171
|
+
- **Priority Queues**: Introduce a priority queue data structure for task management.
|
|
172
|
+
- **Improved Pub/Sub**: Fan-out implementation with a more Pythonic API.
|
|
173
|
+
- **Async API**: Comprehensive async support with on-demand wrappers for all collections.
|
|
174
|
+
|
|
175
|
+
Check out the [roadmap](https://www.google.com/search?q=roadmap.md) for a detailed list of upcoming features and design ideas.
|
|
176
|
+
|
|
177
|
+
## License
|
|
178
|
+
|
|
179
|
+
This project is licensed under the MIT License.
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# beaver 🦫
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
8
|
+
|
|
9
|
+
`beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
|
|
10
|
+
|
|
11
|
+
## Design Philosophy
|
|
12
|
+
|
|
13
|
+
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
14
|
+
|
|
15
|
+
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`) and `numpy`/`scipy`.
|
|
16
|
+
- **Synchronous & Thread-Safe**: Designed for simplicity and safety in multi-threaded environments.
|
|
17
|
+
- **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
|
|
18
|
+
- **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications. The vector search is accelerated with an in-memory k-d tree.
|
|
19
|
+
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
|
|
20
|
+
|
|
21
|
+
## Core Features
|
|
22
|
+
|
|
23
|
+
- **Synchronous Pub/Sub**: A simple, thread-safe, Redis-like publish-subscribe system for real-time messaging.
|
|
24
|
+
- **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
|
|
25
|
+
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
26
|
+
- **Efficient Vector Storage & Search**: Store vector embeddings and perform fast approximate nearest neighbor searches using an in-memory k-d tree.
|
|
27
|
+
- **Full-Text Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine.
|
|
28
|
+
- **Graph Traversal**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
|
|
29
|
+
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install beaver-db
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Quickstart
|
|
38
|
+
|
|
39
|
+
Get up and running in 30 seconds. This example showcases a dictionary, a list, and full-text search in a single script.
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from beaver import BeaverDB, Document
|
|
43
|
+
|
|
44
|
+
# 1. Initialize the database
|
|
45
|
+
db = BeaverDB("data.db")
|
|
46
|
+
|
|
47
|
+
# 2. Use a namespaced dictionary for app configuration
|
|
48
|
+
config = db.dict("app_config")
|
|
49
|
+
config["theme"] = "dark"
|
|
50
|
+
print(f"Theme set to: {config['theme']}")
|
|
51
|
+
|
|
52
|
+
# 3. Use a persistent list to manage a task queue
|
|
53
|
+
tasks = db.list("daily_tasks")
|
|
54
|
+
tasks.push("Write the project report")
|
|
55
|
+
tasks.push("Deploy the new feature")
|
|
56
|
+
print(f"First task is: {tasks[0]}")
|
|
57
|
+
|
|
58
|
+
# 4. Use a collection for document storage and search
|
|
59
|
+
articles = db.collection("articles")
|
|
60
|
+
doc = Document(
|
|
61
|
+
id="sqlite-001",
|
|
62
|
+
content="SQLite is a powerful embedded database ideal for local apps."
|
|
63
|
+
)
|
|
64
|
+
articles.index(doc)
|
|
65
|
+
|
|
66
|
+
# Perform a full-text search
|
|
67
|
+
results = articles.match(query="database")
|
|
68
|
+
top_doc, rank = results[0]
|
|
69
|
+
print(f"FTS Result: '{top_doc.content}'")
|
|
70
|
+
|
|
71
|
+
db.close()
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Things You Can Build with Beaver
|
|
75
|
+
|
|
76
|
+
Here are a few ideas to inspire your next project, showcasing how to combine Beaver's features to build powerful local applications.
|
|
77
|
+
|
|
78
|
+
### 1. User Authentication and Profile Store
|
|
79
|
+
|
|
80
|
+
Use a **namespaced dictionary** to create a simple and secure user store. The key can be the username, and the value can be a dictionary containing the hashed password and other profile information.
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
users = db.dict("user_profiles")
|
|
84
|
+
|
|
85
|
+
# Create a new user
|
|
86
|
+
users["alice"] = {
|
|
87
|
+
"hashed_password": "...",
|
|
88
|
+
"email": "alice@example.com",
|
|
89
|
+
"permissions": ["read", "write"]
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# Retrieve a user's profile
|
|
93
|
+
alice_profile = users.get("alice")
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### 2. Chatbot Conversation History
|
|
97
|
+
|
|
98
|
+
A **persistent list** is perfect for storing the history of a conversation. Each time the user or the bot sends a message, just `push` it to the list. This maintains a chronological record of the entire dialogue.
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
chat_history = db.list("conversation_with_user_123")
|
|
102
|
+
|
|
103
|
+
chat_history.push({"role": "user", "content": "Hello, Beaver!"})
|
|
104
|
+
chat_history.push({"role": "assistant", "content": "Hello! How can I help you today?"})
|
|
105
|
+
|
|
106
|
+
# Retrieve the full conversation
|
|
107
|
+
for message in chat_history:
|
|
108
|
+
print(f"{message['role']}: {message['content']}")
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### 3. Build a RAG (Retrieval-Augmented Generation) System
|
|
112
|
+
|
|
113
|
+
Combine **vector search** and **full-text search** to build a powerful RAG pipeline for your local documents.
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
# Get context for a user query like "fast python web frameworks"
|
|
117
|
+
vector_results = [doc for doc, _ in docs.search(vector=query_vector)]
|
|
118
|
+
text_results = [doc for doc, _ in docs.match(query="python web framework")]
|
|
119
|
+
|
|
120
|
+
# Combine and rerank for the best context
|
|
121
|
+
from beaver.collections import rerank
|
|
122
|
+
best_context = rerank(vector_results, text_results, weights=[0.6, 0.4])
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### 4. Caching for Expensive API Calls
|
|
126
|
+
|
|
127
|
+
Leverage a **dictionary with a TTL (Time-To-Live)** to cache the results of slow network requests. This can dramatically speed up your application and reduce your reliance on external services.
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
api_cache = db.dict("external_api_cache")
|
|
131
|
+
|
|
132
|
+
# Check the cache first
|
|
133
|
+
response = api_cache.get("weather_new_york")
|
|
134
|
+
if response is None:
|
|
135
|
+
# If not in cache, make the real API call
|
|
136
|
+
response = make_slow_weather_api_call("New York")
|
|
137
|
+
# Cache the result for 1 hour
|
|
138
|
+
api_cache.set("weather_new_york", response, ttl_seconds=3600)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## More Examples
|
|
142
|
+
|
|
143
|
+
For more in-depth examples, check out the scripts in the `examples/` directory:
|
|
144
|
+
|
|
145
|
+
- [`examples/kvstore.py`](https://www.google.com/search?q=examples/kvstore.py): A comprehensive demo of the namespaced dictionary feature.
|
|
146
|
+
- [`examples/list.py`](https://www.google.com/search?q=examples/list.py): Shows the full capabilities of the persistent list, including slicing and in-place updates.
|
|
147
|
+
- [`examples/vector.py`](https://www.google.com/search?q=examples/vector.py): Demonstrates how to index and search vector embeddings, including upserts.
|
|
148
|
+
- [`examples/fts.py`](https://www.google.com/search?q=examples/fts.py): A detailed look at full-text search, including targeted searches on specific metadata fields.
|
|
149
|
+
- [`examples/graph.py`](https://www.google.com/search?q=examples/graph.py): Shows how to create relationships between documents and perform multi-hop graph traversals.
|
|
150
|
+
- [`examples/pubsub.py`](https://www.google.com/search?q=examples/pubsub.py): A demonstration of the synchronous, thread-safe publish/subscribe system.
|
|
151
|
+
- [`examples/cache.py`](https://www.google.com/search?q=examples/cache.py): A practical example of using a dictionary with TTL as a cache for API calls.
|
|
152
|
+
- [`examples/rerank.py`](https://www.google.com/search?q=examples/rerank.py): Shows how to combine results from vector and text search for more refined results.
|
|
153
|
+
|
|
154
|
+
## Roadmap
|
|
155
|
+
|
|
156
|
+
These are some of the features and improvements planned for future releases:
|
|
157
|
+
|
|
158
|
+
- **Fuzzy search**: Implement fuzzy matching capabilities for text search.
|
|
159
|
+
- **Faster ANN**: Explore integrating more advanced ANN libraries like `faiss` for improved vector search performance.
|
|
160
|
+
- **Priority Queues**: Introduce a priority queue data structure for task management.
|
|
161
|
+
- **Improved Pub/Sub**: Fan-out implementation with a more Pythonic API.
|
|
162
|
+
- **Async API**: Comprehensive async support with on-demand wrappers for all collections.
|
|
163
|
+
|
|
164
|
+
Check out the [roadmap](https://www.google.com/search?q=roadmap.md) for a detailed list of upcoming features and design ideas.
|
|
165
|
+
|
|
166
|
+
## License
|
|
167
|
+
|
|
168
|
+
This project is licensed under the MIT License.
|
|
@@ -151,6 +151,24 @@ class CollectionManager:
|
|
|
151
151
|
(self._name,),
|
|
152
152
|
)
|
|
153
153
|
|
|
154
|
+
def __iter__(self):
|
|
155
|
+
"""Returns an iterator over all documents in the collection."""
|
|
156
|
+
cursor = self._conn.cursor()
|
|
157
|
+
cursor.execute(
|
|
158
|
+
"SELECT item_id, item_vector, metadata FROM beaver_collections WHERE collection = ?",
|
|
159
|
+
(self._name,),
|
|
160
|
+
)
|
|
161
|
+
for row in cursor:
|
|
162
|
+
embedding = (
|
|
163
|
+
np.frombuffer(row["item_vector"], dtype=np.float32).tolist()
|
|
164
|
+
if row["item_vector"]
|
|
165
|
+
else None
|
|
166
|
+
)
|
|
167
|
+
yield Document(
|
|
168
|
+
id=row["item_id"], embedding=embedding, **json.loads(row["metadata"])
|
|
169
|
+
)
|
|
170
|
+
cursor.close()
|
|
171
|
+
|
|
154
172
|
def refresh(self):
|
|
155
173
|
"""Forces a rebuild of the in-memory ANN index from data in SQLite."""
|
|
156
174
|
cursor = self._conn.cursor()
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: beaver-db
|
|
3
|
+
Version: 0.7.1
|
|
4
|
+
Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
|
|
5
|
+
Requires-Python: >=3.13
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: numpy>=2.3.3
|
|
9
|
+
Requires-Dist: scipy>=1.16.2
|
|
10
|
+
Dynamic: license-file
|
|
11
|
+
|
|
12
|
+
# beaver 🦫
|
|
13
|
+
|
|
14
|
+

|
|
15
|
+

|
|
16
|
+

|
|
17
|
+
|
|
18
|
+
A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
19
|
+
|
|
20
|
+
`beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
|
|
21
|
+
|
|
22
|
+
## Design Philosophy
|
|
23
|
+
|
|
24
|
+
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
25
|
+
|
|
26
|
+
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`) and `numpy`/`scipy`.
|
|
27
|
+
- **Synchronous & Thread-Safe**: Designed for simplicity and safety in multi-threaded environments.
|
|
28
|
+
- **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
|
|
29
|
+
- **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications. The vector search is accelerated with an in-memory k-d tree.
|
|
30
|
+
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
|
|
31
|
+
|
|
32
|
+
## Core Features
|
|
33
|
+
|
|
34
|
+
- **Synchronous Pub/Sub**: A simple, thread-safe, Redis-like publish-subscribe system for real-time messaging.
|
|
35
|
+
- **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
|
|
36
|
+
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
37
|
+
- **Efficient Vector Storage & Search**: Store vector embeddings and perform fast approximate nearest neighbor searches using an in-memory k-d tree.
|
|
38
|
+
- **Full-Text Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine.
|
|
39
|
+
- **Graph Traversal**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
|
|
40
|
+
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install beaver-db
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Quickstart
|
|
49
|
+
|
|
50
|
+
Get up and running in 30 seconds. This example showcases a dictionary, a list, and full-text search in a single script.
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from beaver import BeaverDB, Document
|
|
54
|
+
|
|
55
|
+
# 1. Initialize the database
|
|
56
|
+
db = BeaverDB("data.db")
|
|
57
|
+
|
|
58
|
+
# 2. Use a namespaced dictionary for app configuration
|
|
59
|
+
config = db.dict("app_config")
|
|
60
|
+
config["theme"] = "dark"
|
|
61
|
+
print(f"Theme set to: {config['theme']}")
|
|
62
|
+
|
|
63
|
+
# 3. Use a persistent list to manage a task queue
|
|
64
|
+
tasks = db.list("daily_tasks")
|
|
65
|
+
tasks.push("Write the project report")
|
|
66
|
+
tasks.push("Deploy the new feature")
|
|
67
|
+
print(f"First task is: {tasks[0]}")
|
|
68
|
+
|
|
69
|
+
# 4. Use a collection for document storage and search
|
|
70
|
+
articles = db.collection("articles")
|
|
71
|
+
doc = Document(
|
|
72
|
+
id="sqlite-001",
|
|
73
|
+
content="SQLite is a powerful embedded database ideal for local apps."
|
|
74
|
+
)
|
|
75
|
+
articles.index(doc)
|
|
76
|
+
|
|
77
|
+
# Perform a full-text search
|
|
78
|
+
results = articles.match(query="database")
|
|
79
|
+
top_doc, rank = results[0]
|
|
80
|
+
print(f"FTS Result: '{top_doc.content}'")
|
|
81
|
+
|
|
82
|
+
db.close()
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Things You Can Build with Beaver
|
|
86
|
+
|
|
87
|
+
Here are a few ideas to inspire your next project, showcasing how to combine Beaver's features to build powerful local applications.
|
|
88
|
+
|
|
89
|
+
### 1. User Authentication and Profile Store
|
|
90
|
+
|
|
91
|
+
Use a **namespaced dictionary** to create a simple and secure user store. The key can be the username, and the value can be a dictionary containing the hashed password and other profile information.
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
users = db.dict("user_profiles")
|
|
95
|
+
|
|
96
|
+
# Create a new user
|
|
97
|
+
users["alice"] = {
|
|
98
|
+
"hashed_password": "...",
|
|
99
|
+
"email": "alice@example.com",
|
|
100
|
+
"permissions": ["read", "write"]
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# Retrieve a user's profile
|
|
104
|
+
alice_profile = users.get("alice")
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### 2. Chatbot Conversation History
|
|
108
|
+
|
|
109
|
+
A **persistent list** is perfect for storing the history of a conversation. Each time the user or the bot sends a message, just `push` it to the list. This maintains a chronological record of the entire dialogue.
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
chat_history = db.list("conversation_with_user_123")
|
|
113
|
+
|
|
114
|
+
chat_history.push({"role": "user", "content": "Hello, Beaver!"})
|
|
115
|
+
chat_history.push({"role": "assistant", "content": "Hello! How can I help you today?"})
|
|
116
|
+
|
|
117
|
+
# Retrieve the full conversation
|
|
118
|
+
for message in chat_history:
|
|
119
|
+
print(f"{message['role']}: {message['content']}")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### 3. Build a RAG (Retrieval-Augmented Generation) System
|
|
123
|
+
|
|
124
|
+
Combine **vector search** and **full-text search** to build a powerful RAG pipeline for your local documents.
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
# Get context for a user query like "fast python web frameworks"
|
|
128
|
+
vector_results = [doc for doc, _ in docs.search(vector=query_vector)]
|
|
129
|
+
text_results = [doc for doc, _ in docs.match(query="python web framework")]
|
|
130
|
+
|
|
131
|
+
# Combine and rerank for the best context
|
|
132
|
+
from beaver.collections import rerank
|
|
133
|
+
best_context = rerank(vector_results, text_results, weights=[0.6, 0.4])
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### 4. Caching for Expensive API Calls
|
|
137
|
+
|
|
138
|
+
Leverage a **dictionary with a TTL (Time-To-Live)** to cache the results of slow network requests. This can dramatically speed up your application and reduce your reliance on external services.
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
api_cache = db.dict("external_api_cache")
|
|
142
|
+
|
|
143
|
+
# Check the cache first
|
|
144
|
+
response = api_cache.get("weather_new_york")
|
|
145
|
+
if response is None:
|
|
146
|
+
# If not in cache, make the real API call
|
|
147
|
+
response = make_slow_weather_api_call("New York")
|
|
148
|
+
# Cache the result for 1 hour
|
|
149
|
+
api_cache.set("weather_new_york", response, ttl_seconds=3600)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## More Examples
|
|
153
|
+
|
|
154
|
+
For more in-depth examples, check out the scripts in the `examples/` directory:
|
|
155
|
+
|
|
156
|
+
- [`examples/kvstore.py`](https://www.google.com/search?q=examples/kvstore.py): A comprehensive demo of the namespaced dictionary feature.
|
|
157
|
+
- [`examples/list.py`](https://www.google.com/search?q=examples/list.py): Shows the full capabilities of the persistent list, including slicing and in-place updates.
|
|
158
|
+
- [`examples/vector.py`](https://www.google.com/search?q=examples/vector.py): Demonstrates how to index and search vector embeddings, including upserts.
|
|
159
|
+
- [`examples/fts.py`](https://www.google.com/search?q=examples/fts.py): A detailed look at full-text search, including targeted searches on specific metadata fields.
|
|
160
|
+
- [`examples/graph.py`](https://www.google.com/search?q=examples/graph.py): Shows how to create relationships between documents and perform multi-hop graph traversals.
|
|
161
|
+
- [`examples/pubsub.py`](https://www.google.com/search?q=examples/pubsub.py): A demonstration of the synchronous, thread-safe publish/subscribe system.
|
|
162
|
+
- [`examples/cache.py`](https://www.google.com/search?q=examples/cache.py): A practical example of using a dictionary with TTL as a cache for API calls.
|
|
163
|
+
- [`examples/rerank.py`](https://www.google.com/search?q=examples/rerank.py): Shows how to combine results from vector and text search for more refined results.
|
|
164
|
+
|
|
165
|
+
## Roadmap
|
|
166
|
+
|
|
167
|
+
These are some of the features and improvements planned for future releases:
|
|
168
|
+
|
|
169
|
+
- **Fuzzy search**: Implement fuzzy matching capabilities for text search.
|
|
170
|
+
- **Faster ANN**: Explore integrating more advanced ANN libraries like `faiss` for improved vector search performance.
|
|
171
|
+
- **Priority Queues**: Introduce a priority queue data structure for task management.
|
|
172
|
+
- **Improved Pub/Sub**: Fan-out implementation with a more Pythonic API.
|
|
173
|
+
- **Async API**: Comprehensive async support with on-demand wrappers for all collections.
|
|
174
|
+
|
|
175
|
+
Check out the [roadmap](https://www.google.com/search?q=roadmap.md) for a detailed list of upcoming features and design ideas.
|
|
176
|
+
|
|
177
|
+
## License
|
|
178
|
+
|
|
179
|
+
This project is licensed under the MIT License.
|
beaver_db-0.7.0/PKG-INFO
DELETED
|
@@ -1,197 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: beaver-db
|
|
3
|
-
Version: 0.7.0
|
|
4
|
-
Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
|
|
5
|
-
Requires-Python: >=3.13
|
|
6
|
-
Description-Content-Type: text/markdown
|
|
7
|
-
License-File: LICENSE
|
|
8
|
-
Requires-Dist: numpy>=2.3.3
|
|
9
|
-
Requires-Dist: scipy>=1.16.2
|
|
10
|
-
Dynamic: license-file
|
|
11
|
-
|
|
12
|
-
# beaver 🦫
|
|
13
|
-
|
|
14
|
-

|
|
15
|
-

|
|
16
|
-

|
|
17
|
-
|
|
18
|
-
A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
19
|
-
|
|
20
|
-
`beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
|
|
21
|
-
|
|
22
|
-
## Design Philosophy
|
|
23
|
-
|
|
24
|
-
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
25
|
-
|
|
26
|
-
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`) and `numpy`/`scipy`.
|
|
27
|
-
- **Synchronous & Thread-Safe**: Designed for simplicity and safety in multi-threaded environments.
|
|
28
|
-
- **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
|
|
29
|
-
- **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications. The vector search is accelerated with an in-memory k-d tree.
|
|
30
|
-
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
|
|
31
|
-
|
|
32
|
-
## Core Features
|
|
33
|
-
|
|
34
|
-
- **Synchronous Pub/Sub**: A simple, thread-safe, Redis-like publish-subscribe system for real-time messaging.
|
|
35
|
-
- **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
|
|
36
|
-
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists, with all operations in constant time.
|
|
37
|
-
- **Efficient Vector Storage & Search**: Store vector embeddings and perform fast approximate nearest neighbor searches using an in-memory k-d tree.
|
|
38
|
-
- **Full-Text Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine.
|
|
39
|
-
- **Graph Traversal**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
|
|
40
|
-
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
41
|
-
|
|
42
|
-
## Installation
|
|
43
|
-
|
|
44
|
-
```bash
|
|
45
|
-
pip install beaver-db
|
|
46
|
-
```
|
|
47
|
-
|
|
48
|
-
## Quickstart & API Guide
|
|
49
|
-
|
|
50
|
-
### Initialization
|
|
51
|
-
|
|
52
|
-
All you need to do is import and instantiate the `BeaverDB` class with a file path.
|
|
53
|
-
|
|
54
|
-
```python
|
|
55
|
-
from beaver import BeaverDB, Document
|
|
56
|
-
|
|
57
|
-
db = BeaverDB("my_application.db")
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
### Namespaced Dictionaries
|
|
61
|
-
|
|
62
|
-
Use `db.dict()` to get a dictionary-like object for a specific namespace. The value can be any JSON-encodable object.
|
|
63
|
-
|
|
64
|
-
```python
|
|
65
|
-
# Get a handle to the 'app_config' namespace
|
|
66
|
-
config = db.dict("app_config")
|
|
67
|
-
|
|
68
|
-
# Set values using standard dictionary syntax
|
|
69
|
-
config["theme"] = "dark"
|
|
70
|
-
config["user_id"] = 123
|
|
71
|
-
|
|
72
|
-
# Get a value
|
|
73
|
-
theme = config.get("theme")
|
|
74
|
-
print(f"Theme: {theme}") # Output: Theme: dark
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
### List Management
|
|
78
|
-
|
|
79
|
-
Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
|
|
80
|
-
|
|
81
|
-
```python
|
|
82
|
-
tasks = db.list("daily_tasks")
|
|
83
|
-
tasks.push("Write the project report")
|
|
84
|
-
tasks.prepend("Plan the day's agenda")
|
|
85
|
-
print(f"The first task is: {tasks[0]}")
|
|
86
|
-
```
|
|
87
|
-
|
|
88
|
-
### Vector & Text Search
|
|
89
|
-
|
|
90
|
-
Store `Document` objects containing vector embeddings and metadata. When you index a document, its string fields are automatically made available for full-text search.
|
|
91
|
-
|
|
92
|
-
```python
|
|
93
|
-
# Get a handle to a collection
|
|
94
|
-
docs = db.collection("articles")
|
|
95
|
-
|
|
96
|
-
# Create and index a multi-modal document
|
|
97
|
-
doc = Document(
|
|
98
|
-
id="sql-001",
|
|
99
|
-
embedding=[0.8, 0.1, 0.1],
|
|
100
|
-
content="SQLite is a powerful embedded database ideal for local apps.",
|
|
101
|
-
author="John Smith"
|
|
102
|
-
)
|
|
103
|
-
docs.index(doc)
|
|
104
|
-
|
|
105
|
-
# 1. Perform a vector search to find semantically similar documents
|
|
106
|
-
query_vector = [0.7, 0.2, 0.2]
|
|
107
|
-
vector_results = docs.search(vector=query_vector, top_k=3)
|
|
108
|
-
top_doc, distance = vector_results[0]
|
|
109
|
-
print(f"Vector Search Result: {top_doc.content} (distance: {distance:.2f})")
|
|
110
|
-
|
|
111
|
-
# 2. Perform a full-text search to find documents with specific words
|
|
112
|
-
text_results = docs.match(query="database", top_k=3)
|
|
113
|
-
top_doc, rank = text_results[0]
|
|
114
|
-
print(f"Full-Text Search Result: {top_doc.content} (rank: {rank:.2f})")
|
|
115
|
-
|
|
116
|
-
# 3. Combine both vector and text search for refined results
|
|
117
|
-
from beaver.collections import rerank
|
|
118
|
-
combined_results = rerank([d for d,_ in vector_results], [d for d,_ in text_results], weights=[2,1])
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
### Graph Traversal
|
|
122
|
-
|
|
123
|
-
Create relationships between documents and traverse them.
|
|
124
|
-
|
|
125
|
-
```python
|
|
126
|
-
from beaver import WalkDirection
|
|
127
|
-
|
|
128
|
-
# Create documents
|
|
129
|
-
alice = Document(id="alice", name="Alice")
|
|
130
|
-
bob = Document(id="bob", name="Bob")
|
|
131
|
-
charlie = Document(id="charlie", name="Charlie")
|
|
132
|
-
|
|
133
|
-
# Index them
|
|
134
|
-
social_net = db.collection("social")
|
|
135
|
-
social_net.index(alice)
|
|
136
|
-
social_net.index(bob)
|
|
137
|
-
social_net.index(charlie)
|
|
138
|
-
|
|
139
|
-
# Create edges
|
|
140
|
-
social_net.connect(alice, bob, label="FOLLOWS")
|
|
141
|
-
social_net.connect(bob, charlie, label="FOLLOWS")
|
|
142
|
-
|
|
143
|
-
# Find direct neighbors
|
|
144
|
-
following = social_net.neighbors(alice, label="FOLLOWS")
|
|
145
|
-
print(f"Alice follows: {[p.id for p in following]}")
|
|
146
|
-
|
|
147
|
-
# Perform a multi-hop walk to find friends of friends
|
|
148
|
-
foaf = social_net.walk(
|
|
149
|
-
source=alice,
|
|
150
|
-
labels=["FOLLOWS"],
|
|
151
|
-
depth=2,
|
|
152
|
-
direction=WalkDirection.OUTGOING,
|
|
153
|
-
)
|
|
154
|
-
print(f"Alice's extended network: {[p.id for p in foaf]}")
|
|
155
|
-
```
|
|
156
|
-
|
|
157
|
-
### Synchronous Pub/Sub
|
|
158
|
-
|
|
159
|
-
Publish events from one part of your app and listen in another using threads.
|
|
160
|
-
|
|
161
|
-
```python
|
|
162
|
-
import threading
|
|
163
|
-
|
|
164
|
-
def listener():
|
|
165
|
-
for message in db.subscribe("system_events"):
|
|
166
|
-
print(f"LISTENER: Received -> {message}")
|
|
167
|
-
if message.get("event") == "shutdown":
|
|
168
|
-
break
|
|
169
|
-
|
|
170
|
-
def publisher():
|
|
171
|
-
db.publish("system_events", {"event": "user_login", "user": "alice"})
|
|
172
|
-
db.publish("system_events", {"event": "shutdown"})
|
|
173
|
-
|
|
174
|
-
# Run them concurrently
|
|
175
|
-
listener_thread = threading.Thread(target=listener)
|
|
176
|
-
publisher_thread = threading.Thread(target=publisher)
|
|
177
|
-
listener_thread.start()
|
|
178
|
-
publisher_thread.start()
|
|
179
|
-
listener_thread.join()
|
|
180
|
-
publisher_thread.join()
|
|
181
|
-
```
|
|
182
|
-
|
|
183
|
-
## Roadmap
|
|
184
|
-
|
|
185
|
-
These are some of the features and improvements planned for future releases:
|
|
186
|
-
|
|
187
|
-
- **Fuzzy search**: Implement fuzzy matching capabilities for text search.
|
|
188
|
-
- **Faster ANN**: Explore integrating more advanced ANN libraries like `faiss` for improved vector search performance.
|
|
189
|
-
- **Priority Queues**: Introduce a priority queue data structure for task management.
|
|
190
|
-
- **Improved Pub/Sub**: Fan-out implementation with a more Pythonic API.
|
|
191
|
-
- **Async API**: Comprehensive async support with on-demand wrappers for all collections.
|
|
192
|
-
|
|
193
|
-
Check out the [roadmap](roadmap.md) for a detailed list of upcoming features and design ideas.
|
|
194
|
-
|
|
195
|
-
## License
|
|
196
|
-
|
|
197
|
-
This project is licensed under the MIT License.
|
beaver_db-0.7.0/README.md
DELETED
|
@@ -1,186 +0,0 @@
|
|
|
1
|
-
# beaver 🦫
|
|
2
|
-
|
|
3
|
-

|
|
4
|
-

|
|
5
|
-

|
|
6
|
-
|
|
7
|
-
A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
8
|
-
|
|
9
|
-
`beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
|
|
10
|
-
|
|
11
|
-
## Design Philosophy
|
|
12
|
-
|
|
13
|
-
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
14
|
-
|
|
15
|
-
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`) and `numpy`/`scipy`.
|
|
16
|
-
- **Synchronous & Thread-Safe**: Designed for simplicity and safety in multi-threaded environments.
|
|
17
|
-
- **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
|
|
18
|
-
- **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications. The vector search is accelerated with an in-memory k-d tree.
|
|
19
|
-
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
|
|
20
|
-
|
|
21
|
-
## Core Features
|
|
22
|
-
|
|
23
|
-
- **Synchronous Pub/Sub**: A simple, thread-safe, Redis-like publish-subscribe system for real-time messaging.
|
|
24
|
-
- **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
|
|
25
|
-
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists, with all operations in constant time.
|
|
26
|
-
- **Efficient Vector Storage & Search**: Store vector embeddings and perform fast approximate nearest neighbor searches using an in-memory k-d tree.
|
|
27
|
-
- **Full-Text Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine.
|
|
28
|
-
- **Graph Traversal**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
|
|
29
|
-
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
30
|
-
|
|
31
|
-
## Installation
|
|
32
|
-
|
|
33
|
-
```bash
|
|
34
|
-
pip install beaver-db
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
## Quickstart & API Guide
|
|
38
|
-
|
|
39
|
-
### Initialization
|
|
40
|
-
|
|
41
|
-
All you need to do is import and instantiate the `BeaverDB` class with a file path.
|
|
42
|
-
|
|
43
|
-
```python
|
|
44
|
-
from beaver import BeaverDB, Document
|
|
45
|
-
|
|
46
|
-
db = BeaverDB("my_application.db")
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
### Namespaced Dictionaries
|
|
50
|
-
|
|
51
|
-
Use `db.dict()` to get a dictionary-like object for a specific namespace. The value can be any JSON-encodable object.
|
|
52
|
-
|
|
53
|
-
```python
|
|
54
|
-
# Get a handle to the 'app_config' namespace
|
|
55
|
-
config = db.dict("app_config")
|
|
56
|
-
|
|
57
|
-
# Set values using standard dictionary syntax
|
|
58
|
-
config["theme"] = "dark"
|
|
59
|
-
config["user_id"] = 123
|
|
60
|
-
|
|
61
|
-
# Get a value
|
|
62
|
-
theme = config.get("theme")
|
|
63
|
-
print(f"Theme: {theme}") # Output: Theme: dark
|
|
64
|
-
```
|
|
65
|
-
|
|
66
|
-
### List Management
|
|
67
|
-
|
|
68
|
-
Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
|
|
69
|
-
|
|
70
|
-
```python
|
|
71
|
-
tasks = db.list("daily_tasks")
|
|
72
|
-
tasks.push("Write the project report")
|
|
73
|
-
tasks.prepend("Plan the day's agenda")
|
|
74
|
-
print(f"The first task is: {tasks[0]}")
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
### Vector & Text Search
|
|
78
|
-
|
|
79
|
-
Store `Document` objects containing vector embeddings and metadata. When you index a document, its string fields are automatically made available for full-text search.
|
|
80
|
-
|
|
81
|
-
```python
|
|
82
|
-
# Get a handle to a collection
|
|
83
|
-
docs = db.collection("articles")
|
|
84
|
-
|
|
85
|
-
# Create and index a multi-modal document
|
|
86
|
-
doc = Document(
|
|
87
|
-
id="sql-001",
|
|
88
|
-
embedding=[0.8, 0.1, 0.1],
|
|
89
|
-
content="SQLite is a powerful embedded database ideal for local apps.",
|
|
90
|
-
author="John Smith"
|
|
91
|
-
)
|
|
92
|
-
docs.index(doc)
|
|
93
|
-
|
|
94
|
-
# 1. Perform a vector search to find semantically similar documents
|
|
95
|
-
query_vector = [0.7, 0.2, 0.2]
|
|
96
|
-
vector_results = docs.search(vector=query_vector, top_k=3)
|
|
97
|
-
top_doc, distance = vector_results[0]
|
|
98
|
-
print(f"Vector Search Result: {top_doc.content} (distance: {distance:.2f})")
|
|
99
|
-
|
|
100
|
-
# 2. Perform a full-text search to find documents with specific words
|
|
101
|
-
text_results = docs.match(query="database", top_k=3)
|
|
102
|
-
top_doc, rank = text_results[0]
|
|
103
|
-
print(f"Full-Text Search Result: {top_doc.content} (rank: {rank:.2f})")
|
|
104
|
-
|
|
105
|
-
# 3. Combine both vector and text search for refined results
|
|
106
|
-
from beaver.collections import rerank
|
|
107
|
-
combined_results = rerank([d for d,_ in vector_results], [d for d,_ in text_results], weights=[2,1])
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
### Graph Traversal
|
|
111
|
-
|
|
112
|
-
Create relationships between documents and traverse them.
|
|
113
|
-
|
|
114
|
-
```python
|
|
115
|
-
from beaver import WalkDirection
|
|
116
|
-
|
|
117
|
-
# Create documents
|
|
118
|
-
alice = Document(id="alice", name="Alice")
|
|
119
|
-
bob = Document(id="bob", name="Bob")
|
|
120
|
-
charlie = Document(id="charlie", name="Charlie")
|
|
121
|
-
|
|
122
|
-
# Index them
|
|
123
|
-
social_net = db.collection("social")
|
|
124
|
-
social_net.index(alice)
|
|
125
|
-
social_net.index(bob)
|
|
126
|
-
social_net.index(charlie)
|
|
127
|
-
|
|
128
|
-
# Create edges
|
|
129
|
-
social_net.connect(alice, bob, label="FOLLOWS")
|
|
130
|
-
social_net.connect(bob, charlie, label="FOLLOWS")
|
|
131
|
-
|
|
132
|
-
# Find direct neighbors
|
|
133
|
-
following = social_net.neighbors(alice, label="FOLLOWS")
|
|
134
|
-
print(f"Alice follows: {[p.id for p in following]}")
|
|
135
|
-
|
|
136
|
-
# Perform a multi-hop walk to find friends of friends
|
|
137
|
-
foaf = social_net.walk(
|
|
138
|
-
source=alice,
|
|
139
|
-
labels=["FOLLOWS"],
|
|
140
|
-
depth=2,
|
|
141
|
-
direction=WalkDirection.OUTGOING,
|
|
142
|
-
)
|
|
143
|
-
print(f"Alice's extended network: {[p.id for p in foaf]}")
|
|
144
|
-
```
|
|
145
|
-
|
|
146
|
-
### Synchronous Pub/Sub
|
|
147
|
-
|
|
148
|
-
Publish events from one part of your app and listen in another using threads.
|
|
149
|
-
|
|
150
|
-
```python
|
|
151
|
-
import threading
|
|
152
|
-
|
|
153
|
-
def listener():
|
|
154
|
-
for message in db.subscribe("system_events"):
|
|
155
|
-
print(f"LISTENER: Received -> {message}")
|
|
156
|
-
if message.get("event") == "shutdown":
|
|
157
|
-
break
|
|
158
|
-
|
|
159
|
-
def publisher():
|
|
160
|
-
db.publish("system_events", {"event": "user_login", "user": "alice"})
|
|
161
|
-
db.publish("system_events", {"event": "shutdown"})
|
|
162
|
-
|
|
163
|
-
# Run them concurrently
|
|
164
|
-
listener_thread = threading.Thread(target=listener)
|
|
165
|
-
publisher_thread = threading.Thread(target=publisher)
|
|
166
|
-
listener_thread.start()
|
|
167
|
-
publisher_thread.start()
|
|
168
|
-
listener_thread.join()
|
|
169
|
-
publisher_thread.join()
|
|
170
|
-
```
|
|
171
|
-
|
|
172
|
-
## Roadmap
|
|
173
|
-
|
|
174
|
-
These are some of the features and improvements planned for future releases:
|
|
175
|
-
|
|
176
|
-
- **Fuzzy search**: Implement fuzzy matching capabilities for text search.
|
|
177
|
-
- **Faster ANN**: Explore integrating more advanced ANN libraries like `faiss` for improved vector search performance.
|
|
178
|
-
- **Priority Queues**: Introduce a priority queue data structure for task management.
|
|
179
|
-
- **Improved Pub/Sub**: Fan-out implementation with a more Pythonic API.
|
|
180
|
-
- **Async API**: Comprehensive async support with on-demand wrappers for all collections.
|
|
181
|
-
|
|
182
|
-
Check out the [roadmap](roadmap.md) for a detailed list of upcoming features and design ideas.
|
|
183
|
-
|
|
184
|
-
## License
|
|
185
|
-
|
|
186
|
-
This project is licensed under the MIT License.
|
|
@@ -1,197 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: beaver-db
|
|
3
|
-
Version: 0.7.0
|
|
4
|
-
Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
|
|
5
|
-
Requires-Python: >=3.13
|
|
6
|
-
Description-Content-Type: text/markdown
|
|
7
|
-
License-File: LICENSE
|
|
8
|
-
Requires-Dist: numpy>=2.3.3
|
|
9
|
-
Requires-Dist: scipy>=1.16.2
|
|
10
|
-
Dynamic: license-file
|
|
11
|
-
|
|
12
|
-
# beaver 🦫
|
|
13
|
-
|
|
14
|
-

|
|
15
|
-

|
|
16
|
-

|
|
17
|
-
|
|
18
|
-
A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
19
|
-
|
|
20
|
-
`beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
|
|
21
|
-
|
|
22
|
-
## Design Philosophy
|
|
23
|
-
|
|
24
|
-
`beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
|
|
25
|
-
|
|
26
|
-
- **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`) and `numpy`/`scipy`.
|
|
27
|
-
- **Synchronous & Thread-Safe**: Designed for simplicity and safety in multi-threaded environments.
|
|
28
|
-
- **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
|
|
29
|
-
- **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications. The vector search is accelerated with an in-memory k-d tree.
|
|
30
|
-
- **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
|
|
31
|
-
|
|
32
|
-
## Core Features
|
|
33
|
-
|
|
34
|
-
- **Synchronous Pub/Sub**: A simple, thread-safe, Redis-like publish-subscribe system for real-time messaging.
|
|
35
|
-
- **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
|
|
36
|
-
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists, with all operations in constant time.
|
|
37
|
-
- **Efficient Vector Storage & Search**: Store vector embeddings and perform fast approximate nearest neighbor searches using an in-memory k-d tree.
|
|
38
|
-
- **Full-Text Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine.
|
|
39
|
-
- **Graph Traversal**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
|
|
40
|
-
- **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
|
|
41
|
-
|
|
42
|
-
## Installation
|
|
43
|
-
|
|
44
|
-
```bash
|
|
45
|
-
pip install beaver-db
|
|
46
|
-
```
|
|
47
|
-
|
|
48
|
-
## Quickstart & API Guide
|
|
49
|
-
|
|
50
|
-
### Initialization
|
|
51
|
-
|
|
52
|
-
All you need to do is import and instantiate the `BeaverDB` class with a file path.
|
|
53
|
-
|
|
54
|
-
```python
|
|
55
|
-
from beaver import BeaverDB, Document
|
|
56
|
-
|
|
57
|
-
db = BeaverDB("my_application.db")
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
### Namespaced Dictionaries
|
|
61
|
-
|
|
62
|
-
Use `db.dict()` to get a dictionary-like object for a specific namespace. The value can be any JSON-encodable object.
|
|
63
|
-
|
|
64
|
-
```python
|
|
65
|
-
# Get a handle to the 'app_config' namespace
|
|
66
|
-
config = db.dict("app_config")
|
|
67
|
-
|
|
68
|
-
# Set values using standard dictionary syntax
|
|
69
|
-
config["theme"] = "dark"
|
|
70
|
-
config["user_id"] = 123
|
|
71
|
-
|
|
72
|
-
# Get a value
|
|
73
|
-
theme = config.get("theme")
|
|
74
|
-
print(f"Theme: {theme}") # Output: Theme: dark
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
### List Management
|
|
78
|
-
|
|
79
|
-
Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
|
|
80
|
-
|
|
81
|
-
```python
|
|
82
|
-
tasks = db.list("daily_tasks")
|
|
83
|
-
tasks.push("Write the project report")
|
|
84
|
-
tasks.prepend("Plan the day's agenda")
|
|
85
|
-
print(f"The first task is: {tasks[0]}")
|
|
86
|
-
```
|
|
87
|
-
|
|
88
|
-
### Vector & Text Search
|
|
89
|
-
|
|
90
|
-
Store `Document` objects containing vector embeddings and metadata. When you index a document, its string fields are automatically made available for full-text search.
|
|
91
|
-
|
|
92
|
-
```python
|
|
93
|
-
# Get a handle to a collection
|
|
94
|
-
docs = db.collection("articles")
|
|
95
|
-
|
|
96
|
-
# Create and index a multi-modal document
|
|
97
|
-
doc = Document(
|
|
98
|
-
id="sql-001",
|
|
99
|
-
embedding=[0.8, 0.1, 0.1],
|
|
100
|
-
content="SQLite is a powerful embedded database ideal for local apps.",
|
|
101
|
-
author="John Smith"
|
|
102
|
-
)
|
|
103
|
-
docs.index(doc)
|
|
104
|
-
|
|
105
|
-
# 1. Perform a vector search to find semantically similar documents
|
|
106
|
-
query_vector = [0.7, 0.2, 0.2]
|
|
107
|
-
vector_results = docs.search(vector=query_vector, top_k=3)
|
|
108
|
-
top_doc, distance = vector_results[0]
|
|
109
|
-
print(f"Vector Search Result: {top_doc.content} (distance: {distance:.2f})")
|
|
110
|
-
|
|
111
|
-
# 2. Perform a full-text search to find documents with specific words
|
|
112
|
-
text_results = docs.match(query="database", top_k=3)
|
|
113
|
-
top_doc, rank = text_results[0]
|
|
114
|
-
print(f"Full-Text Search Result: {top_doc.content} (rank: {rank:.2f})")
|
|
115
|
-
|
|
116
|
-
# 3. Combine both vector and text search for refined results
|
|
117
|
-
from beaver.collections import rerank
|
|
118
|
-
combined_results = rerank([d for d,_ in vector_results], [d for d,_ in text_results], weights=[2,1])
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
### Graph Traversal
|
|
122
|
-
|
|
123
|
-
Create relationships between documents and traverse them.
|
|
124
|
-
|
|
125
|
-
```python
|
|
126
|
-
from beaver import WalkDirection
|
|
127
|
-
|
|
128
|
-
# Create documents
|
|
129
|
-
alice = Document(id="alice", name="Alice")
|
|
130
|
-
bob = Document(id="bob", name="Bob")
|
|
131
|
-
charlie = Document(id="charlie", name="Charlie")
|
|
132
|
-
|
|
133
|
-
# Index them
|
|
134
|
-
social_net = db.collection("social")
|
|
135
|
-
social_net.index(alice)
|
|
136
|
-
social_net.index(bob)
|
|
137
|
-
social_net.index(charlie)
|
|
138
|
-
|
|
139
|
-
# Create edges
|
|
140
|
-
social_net.connect(alice, bob, label="FOLLOWS")
|
|
141
|
-
social_net.connect(bob, charlie, label="FOLLOWS")
|
|
142
|
-
|
|
143
|
-
# Find direct neighbors
|
|
144
|
-
following = social_net.neighbors(alice, label="FOLLOWS")
|
|
145
|
-
print(f"Alice follows: {[p.id for p in following]}")
|
|
146
|
-
|
|
147
|
-
# Perform a multi-hop walk to find friends of friends
|
|
148
|
-
foaf = social_net.walk(
|
|
149
|
-
source=alice,
|
|
150
|
-
labels=["FOLLOWS"],
|
|
151
|
-
depth=2,
|
|
152
|
-
direction=WalkDirection.OUTGOING,
|
|
153
|
-
)
|
|
154
|
-
print(f"Alice's extended network: {[p.id for p in foaf]}")
|
|
155
|
-
```
|
|
156
|
-
|
|
157
|
-
### Synchronous Pub/Sub
|
|
158
|
-
|
|
159
|
-
Publish events from one part of your app and listen in another using threads.
|
|
160
|
-
|
|
161
|
-
```python
|
|
162
|
-
import threading
|
|
163
|
-
|
|
164
|
-
def listener():
|
|
165
|
-
for message in db.subscribe("system_events"):
|
|
166
|
-
print(f"LISTENER: Received -> {message}")
|
|
167
|
-
if message.get("event") == "shutdown":
|
|
168
|
-
break
|
|
169
|
-
|
|
170
|
-
def publisher():
|
|
171
|
-
db.publish("system_events", {"event": "user_login", "user": "alice"})
|
|
172
|
-
db.publish("system_events", {"event": "shutdown"})
|
|
173
|
-
|
|
174
|
-
# Run them concurrently
|
|
175
|
-
listener_thread = threading.Thread(target=listener)
|
|
176
|
-
publisher_thread = threading.Thread(target=publisher)
|
|
177
|
-
listener_thread.start()
|
|
178
|
-
publisher_thread.start()
|
|
179
|
-
listener_thread.join()
|
|
180
|
-
publisher_thread.join()
|
|
181
|
-
```
|
|
182
|
-
|
|
183
|
-
## Roadmap
|
|
184
|
-
|
|
185
|
-
These are some of the features and improvements planned for future releases:
|
|
186
|
-
|
|
187
|
-
- **Fuzzy search**: Implement fuzzy matching capabilities for text search.
|
|
188
|
-
- **Faster ANN**: Explore integrating more advanced ANN libraries like `faiss` for improved vector search performance.
|
|
189
|
-
- **Priority Queues**: Introduce a priority queue data structure for task management.
|
|
190
|
-
- **Improved Pub/Sub**: Fan-out implementation with a more Pythonic API.
|
|
191
|
-
- **Async API**: Comprehensive async support with on-demand wrappers for all collections.
|
|
192
|
-
|
|
193
|
-
Check out the [roadmap](roadmap.md) for a detailed list of upcoming features and design ideas.
|
|
194
|
-
|
|
195
|
-
## License
|
|
196
|
-
|
|
197
|
-
This project is licensed under the MIT License.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|