beaver-db 0.7.1__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of beaver-db might be problematic. Click here for more details.
- {beaver_db-0.7.1 → beaver_db-0.9.0}/PKG-INFO +55 -21
- {beaver_db-0.7.1 → beaver_db-0.9.0}/README.md +54 -20
- beaver_db-0.9.0/beaver/channels.py +185 -0
- {beaver_db-0.7.1 → beaver_db-0.9.0}/beaver/core.py +51 -19
- beaver_db-0.9.0/beaver/queues.py +87 -0
- {beaver_db-0.7.1 → beaver_db-0.9.0}/beaver_db.egg-info/PKG-INFO +55 -21
- {beaver_db-0.7.1 → beaver_db-0.9.0}/beaver_db.egg-info/SOURCES.txt +1 -0
- {beaver_db-0.7.1 → beaver_db-0.9.0}/pyproject.toml +1 -1
- beaver_db-0.7.1/beaver/channels.py +0 -54
- {beaver_db-0.7.1 → beaver_db-0.9.0}/LICENSE +0 -0
- {beaver_db-0.7.1 → beaver_db-0.9.0}/beaver/__init__.py +0 -0
- {beaver_db-0.7.1 → beaver_db-0.9.0}/beaver/collections.py +0 -0
- {beaver_db-0.7.1 → beaver_db-0.9.0}/beaver/dicts.py +0 -0
- {beaver_db-0.7.1 → beaver_db-0.9.0}/beaver/lists.py +0 -0
- {beaver_db-0.7.1 → beaver_db-0.9.0}/beaver_db.egg-info/dependency_links.txt +0 -0
- {beaver_db-0.7.1 → beaver_db-0.9.0}/beaver_db.egg-info/requires.txt +0 -0
- {beaver_db-0.7.1 → beaver_db-0.9.0}/beaver_db.egg-info/top_level.txt +0 -0
- {beaver_db-0.7.1 → beaver_db-0.9.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: beaver-db
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
|
|
5
5
|
Requires-Python: >=3.13
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -9,11 +9,11 @@ Requires-Dist: numpy>=2.3.3
|
|
|
9
9
|
Requires-Dist: scipy>=1.16.2
|
|
10
10
|
Dynamic: license-file
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
I've updated the README to highlight the new high-efficiency, thread-safe, and process-safe pub/sub system. I've also added an example of how you can use it to build real-time, event-driven applications.
|
|
13
|
+
|
|
14
|
+
Here are the changes:
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-

|
|
16
|
-

|
|
16
|
+
# beaver 🦫
|
|
17
17
|
|
|
18
18
|
A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
19
19
|
|
|
@@ -31,9 +31,10 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
|
|
|
31
31
|
|
|
32
32
|
## Core Features
|
|
33
33
|
|
|
34
|
-
- **
|
|
34
|
+
- **High-Efficiency Pub/Sub**: A powerful, thread and process-safe publish-subscribe system for real-time messaging with a fan-out architecture.
|
|
35
35
|
- **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
|
|
36
36
|
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
37
|
+
- **Persistent Priority Queue**: A high-performance, persistent queue that always returns the item with the highest priority, perfect for task management.
|
|
37
38
|
- **Efficient Vector Storage & Search**: Store vector embeddings and perform fast approximate nearest neighbor searches using an in-memory k-d tree.
|
|
38
39
|
- **Full-Text Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine.
|
|
39
40
|
- **Graph Traversal**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
|
|
@@ -86,7 +87,24 @@ db.close()
|
|
|
86
87
|
|
|
87
88
|
Here are a few ideas to inspire your next project, showcasing how to combine Beaver's features to build powerful local applications.
|
|
88
89
|
|
|
89
|
-
### 1.
|
|
90
|
+
### 1. AI Agent Task Management
|
|
91
|
+
|
|
92
|
+
Use a **persistent priority queue** to manage tasks for an AI agent. This ensures the agent always works on the most important task first, even if the application restarts.
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
tasks = db.queue("agent_tasks")
|
|
96
|
+
|
|
97
|
+
# Tasks are added with a priority (lower is higher)
|
|
98
|
+
tasks.put({"action": "summarize_news"}, priority=10)
|
|
99
|
+
tasks.put({"action": "respond_to_user"}, priority=1)
|
|
100
|
+
tasks.put({"action": "run_backup"}, priority=20)
|
|
101
|
+
|
|
102
|
+
# The agent retrieves the highest-priority task
|
|
103
|
+
next_task = tasks.get() # -> Returns the "respond_to_user" task
|
|
104
|
+
print(f"Agent's next task: {next_task.data['action']}")
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### 2. User Authentication and Profile Store
|
|
90
108
|
|
|
91
109
|
Use a **namespaced dictionary** to create a simple and secure user store. The key can be the username, and the value can be a dictionary containing the hashed password and other profile information.
|
|
92
110
|
|
|
@@ -104,7 +122,7 @@ users["alice"] = {
|
|
|
104
122
|
alice_profile = users.get("alice")
|
|
105
123
|
```
|
|
106
124
|
|
|
107
|
-
###
|
|
125
|
+
### 3. Chatbot Conversation History
|
|
108
126
|
|
|
109
127
|
A **persistent list** is perfect for storing the history of a conversation. Each time the user or the bot sends a message, just `push` it to the list. This maintains a chronological record of the entire dialogue.
|
|
110
128
|
|
|
@@ -119,7 +137,7 @@ for message in chat_history:
|
|
|
119
137
|
print(f"{message['role']}: {message['content']}")
|
|
120
138
|
```
|
|
121
139
|
|
|
122
|
-
###
|
|
140
|
+
### 4. Build a RAG (Retrieval-Augmented Generation) System
|
|
123
141
|
|
|
124
142
|
Combine **vector search** and **full-text search** to build a powerful RAG pipeline for your local documents.
|
|
125
143
|
|
|
@@ -133,7 +151,7 @@ from beaver.collections import rerank
|
|
|
133
151
|
best_context = rerank(vector_results, text_results, weights=[0.6, 0.4])
|
|
134
152
|
```
|
|
135
153
|
|
|
136
|
-
###
|
|
154
|
+
### 5. Caching for Expensive API Calls
|
|
137
155
|
|
|
138
156
|
Leverage a **dictionary with a TTL (Time-To-Live)** to cache the results of slow network requests. This can dramatically speed up your application and reduce your reliance on external services.
|
|
139
157
|
|
|
@@ -149,18 +167,36 @@ if response is None:
|
|
|
149
167
|
api_cache.set("weather_new_york", response, ttl_seconds=3600)
|
|
150
168
|
```
|
|
151
169
|
|
|
170
|
+
### 6. Real-time Event-Driven Systems
|
|
171
|
+
|
|
172
|
+
Use the **high-efficiency pub/sub system** to build applications where different components react to events in real-time. This is perfect for decoupled systems, real-time UIs, or monitoring services.
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# In one process or thread (e.g., a monitoring service)
|
|
176
|
+
system_events = db.channel("system_events")
|
|
177
|
+
system_events.publish({"event": "user_login", "user_id": "alice"})
|
|
178
|
+
|
|
179
|
+
# In another process or thread (e.g., a UI updater or logger)
|
|
180
|
+
with db.channel("system_events").subscribe() as listener:
|
|
181
|
+
for message in listener.listen():
|
|
182
|
+
print(f"Event received: {message}")
|
|
183
|
+
# >> Event received: {'event': 'user_login', 'user_id': 'alice'}
|
|
184
|
+
```
|
|
185
|
+
|
|
152
186
|
## More Examples
|
|
153
187
|
|
|
154
188
|
For more in-depth examples, check out the scripts in the `examples/` directory:
|
|
155
189
|
|
|
156
|
-
- [`examples/kvstore.py`](
|
|
157
|
-
- [`examples/list.py`](
|
|
158
|
-
- [`examples/
|
|
159
|
-
- [`examples/
|
|
160
|
-
- [`examples/
|
|
161
|
-
- [`examples/
|
|
162
|
-
- [`examples/
|
|
163
|
-
- [`examples/
|
|
190
|
+
- [`examples/kvstore.py`](examples/kvstore.py): A comprehensive demo of the namespaced dictionary feature.
|
|
191
|
+
- [`examples/list.py`](examples/list.py): Shows the full capabilities of the persistent list, including slicing and in-place updates.
|
|
192
|
+
- [`examples/queue.py`](examples/queue.py): A practical example of using the persistent priority queue for task management.
|
|
193
|
+
- [`examples/vector.py`](examples/vector.py): Demonstrates how to index and search vector embeddings, including upserts.
|
|
194
|
+
- [`examples/fts.py`](examples/fts.py): A detailed look at full-text search, including targeted searches on specific metadata fields.
|
|
195
|
+
- [`examples/graph.py`](examples/graph.py): Shows how to create relationships between documents and perform multi-hop graph traversals.
|
|
196
|
+
- [`examples/pubsub.py`](examples/pubsub.py): A demonstration of the synchronous, thread-safe publish/subscribe system in a single process.
|
|
197
|
+
- [`examples/publisher.py`](examples/publisher.py) and [`examples/subscriber.py`](examples/subscriber.py): A pair of examples demonstrating inter-process message passing with the publish/subscribe system.
|
|
198
|
+
- [`examples/cache.py`](examples/cache.py): A practical example of using a dictionary with TTL as a cache for API calls.
|
|
199
|
+
- [`examples/rerank.py`](examples/rerank.py): Shows how to combine results from vector and text search for more refined results.
|
|
164
200
|
|
|
165
201
|
## Roadmap
|
|
166
202
|
|
|
@@ -168,11 +204,9 @@ These are some of the features and improvements planned for future releases:
|
|
|
168
204
|
|
|
169
205
|
- **Fuzzy search**: Implement fuzzy matching capabilities for text search.
|
|
170
206
|
- **Faster ANN**: Explore integrating more advanced ANN libraries like `faiss` for improved vector search performance.
|
|
171
|
-
- **Priority Queues**: Introduce a priority queue data structure for task management.
|
|
172
|
-
- **Improved Pub/Sub**: Fan-out implementation with a more Pythonic API.
|
|
173
207
|
- **Async API**: Comprehensive async support with on-demand wrappers for all collections.
|
|
174
208
|
|
|
175
|
-
Check out the [roadmap](
|
|
209
|
+
Check out the [roadmap](roadmap.md) for a detailed list of upcoming features and design ideas.
|
|
176
210
|
|
|
177
211
|
## License
|
|
178
212
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
|
|
1
|
+
I've updated the README to highlight the new high-efficiency, thread-safe, and process-safe pub/sub system. I've also added an example of how you can use it to build real-time, event-driven applications.
|
|
2
|
+
|
|
3
|
+
Here are the changes:
|
|
2
4
|
|
|
3
|
-
|
|
4
|
-

|
|
5
|
-

|
|
5
|
+
# beaver 🦫
|
|
6
6
|
|
|
7
7
|
A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
8
8
|
|
|
@@ -20,9 +20,10 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
|
|
|
20
20
|
|
|
21
21
|
## Core Features
|
|
22
22
|
|
|
23
|
-
- **
|
|
23
|
+
- **High-Efficiency Pub/Sub**: A powerful, thread and process-safe publish-subscribe system for real-time messaging with a fan-out architecture.
|
|
24
24
|
- **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
|
|
25
25
|
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
26
|
+
- **Persistent Priority Queue**: A high-performance, persistent queue that always returns the item with the highest priority, perfect for task management.
|
|
26
27
|
- **Efficient Vector Storage & Search**: Store vector embeddings and perform fast approximate nearest neighbor searches using an in-memory k-d tree.
|
|
27
28
|
- **Full-Text Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine.
|
|
28
29
|
- **Graph Traversal**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
|
|
@@ -75,7 +76,24 @@ db.close()
|
|
|
75
76
|
|
|
76
77
|
Here are a few ideas to inspire your next project, showcasing how to combine Beaver's features to build powerful local applications.
|
|
77
78
|
|
|
78
|
-
### 1.
|
|
79
|
+
### 1. AI Agent Task Management
|
|
80
|
+
|
|
81
|
+
Use a **persistent priority queue** to manage tasks for an AI agent. This ensures the agent always works on the most important task first, even if the application restarts.
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
tasks = db.queue("agent_tasks")
|
|
85
|
+
|
|
86
|
+
# Tasks are added with a priority (lower is higher)
|
|
87
|
+
tasks.put({"action": "summarize_news"}, priority=10)
|
|
88
|
+
tasks.put({"action": "respond_to_user"}, priority=1)
|
|
89
|
+
tasks.put({"action": "run_backup"}, priority=20)
|
|
90
|
+
|
|
91
|
+
# The agent retrieves the highest-priority task
|
|
92
|
+
next_task = tasks.get() # -> Returns the "respond_to_user" task
|
|
93
|
+
print(f"Agent's next task: {next_task.data['action']}")
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### 2. User Authentication and Profile Store
|
|
79
97
|
|
|
80
98
|
Use a **namespaced dictionary** to create a simple and secure user store. The key can be the username, and the value can be a dictionary containing the hashed password and other profile information.
|
|
81
99
|
|
|
@@ -93,7 +111,7 @@ users["alice"] = {
|
|
|
93
111
|
alice_profile = users.get("alice")
|
|
94
112
|
```
|
|
95
113
|
|
|
96
|
-
###
|
|
114
|
+
### 3. Chatbot Conversation History
|
|
97
115
|
|
|
98
116
|
A **persistent list** is perfect for storing the history of a conversation. Each time the user or the bot sends a message, just `push` it to the list. This maintains a chronological record of the entire dialogue.
|
|
99
117
|
|
|
@@ -108,7 +126,7 @@ for message in chat_history:
|
|
|
108
126
|
print(f"{message['role']}: {message['content']}")
|
|
109
127
|
```
|
|
110
128
|
|
|
111
|
-
###
|
|
129
|
+
### 4. Build a RAG (Retrieval-Augmented Generation) System
|
|
112
130
|
|
|
113
131
|
Combine **vector search** and **full-text search** to build a powerful RAG pipeline for your local documents.
|
|
114
132
|
|
|
@@ -122,7 +140,7 @@ from beaver.collections import rerank
|
|
|
122
140
|
best_context = rerank(vector_results, text_results, weights=[0.6, 0.4])
|
|
123
141
|
```
|
|
124
142
|
|
|
125
|
-
###
|
|
143
|
+
### 5. Caching for Expensive API Calls
|
|
126
144
|
|
|
127
145
|
Leverage a **dictionary with a TTL (Time-To-Live)** to cache the results of slow network requests. This can dramatically speed up your application and reduce your reliance on external services.
|
|
128
146
|
|
|
@@ -138,18 +156,36 @@ if response is None:
|
|
|
138
156
|
api_cache.set("weather_new_york", response, ttl_seconds=3600)
|
|
139
157
|
```
|
|
140
158
|
|
|
159
|
+
### 6. Real-time Event-Driven Systems
|
|
160
|
+
|
|
161
|
+
Use the **high-efficiency pub/sub system** to build applications where different components react to events in real-time. This is perfect for decoupled systems, real-time UIs, or monitoring services.
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
# In one process or thread (e.g., a monitoring service)
|
|
165
|
+
system_events = db.channel("system_events")
|
|
166
|
+
system_events.publish({"event": "user_login", "user_id": "alice"})
|
|
167
|
+
|
|
168
|
+
# In another process or thread (e.g., a UI updater or logger)
|
|
169
|
+
with db.channel("system_events").subscribe() as listener:
|
|
170
|
+
for message in listener.listen():
|
|
171
|
+
print(f"Event received: {message}")
|
|
172
|
+
# >> Event received: {'event': 'user_login', 'user_id': 'alice'}
|
|
173
|
+
```
|
|
174
|
+
|
|
141
175
|
## More Examples
|
|
142
176
|
|
|
143
177
|
For more in-depth examples, check out the scripts in the `examples/` directory:
|
|
144
178
|
|
|
145
|
-
- [`examples/kvstore.py`](
|
|
146
|
-
- [`examples/list.py`](
|
|
147
|
-
- [`examples/
|
|
148
|
-
- [`examples/
|
|
149
|
-
- [`examples/
|
|
150
|
-
- [`examples/
|
|
151
|
-
- [`examples/
|
|
152
|
-
- [`examples/
|
|
179
|
+
- [`examples/kvstore.py`](examples/kvstore.py): A comprehensive demo of the namespaced dictionary feature.
|
|
180
|
+
- [`examples/list.py`](examples/list.py): Shows the full capabilities of the persistent list, including slicing and in-place updates.
|
|
181
|
+
- [`examples/queue.py`](examples/queue.py): A practical example of using the persistent priority queue for task management.
|
|
182
|
+
- [`examples/vector.py`](examples/vector.py): Demonstrates how to index and search vector embeddings, including upserts.
|
|
183
|
+
- [`examples/fts.py`](examples/fts.py): A detailed look at full-text search, including targeted searches on specific metadata fields.
|
|
184
|
+
- [`examples/graph.py`](examples/graph.py): Shows how to create relationships between documents and perform multi-hop graph traversals.
|
|
185
|
+
- [`examples/pubsub.py`](examples/pubsub.py): A demonstration of the synchronous, thread-safe publish/subscribe system in a single process.
|
|
186
|
+
- [`examples/publisher.py`](examples/publisher.py) and [`examples/subscriber.py`](examples/subscriber.py): A pair of examples demonstrating inter-process message passing with the publish/subscribe system.
|
|
187
|
+
- [`examples/cache.py`](examples/cache.py): A practical example of using a dictionary with TTL as a cache for API calls.
|
|
188
|
+
- [`examples/rerank.py`](examples/rerank.py): Shows how to combine results from vector and text search for more refined results.
|
|
153
189
|
|
|
154
190
|
## Roadmap
|
|
155
191
|
|
|
@@ -157,11 +193,9 @@ These are some of the features and improvements planned for future releases:
|
|
|
157
193
|
|
|
158
194
|
- **Fuzzy search**: Implement fuzzy matching capabilities for text search.
|
|
159
195
|
- **Faster ANN**: Explore integrating more advanced ANN libraries like `faiss` for improved vector search performance.
|
|
160
|
-
- **Priority Queues**: Introduce a priority queue data structure for task management.
|
|
161
|
-
- **Improved Pub/Sub**: Fan-out implementation with a more Pythonic API.
|
|
162
196
|
- **Async API**: Comprehensive async support with on-demand wrappers for all collections.
|
|
163
197
|
|
|
164
|
-
Check out the [roadmap](
|
|
198
|
+
Check out the [roadmap](roadmap.md) for a detailed list of upcoming features and design ideas.
|
|
165
199
|
|
|
166
200
|
## License
|
|
167
201
|
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sqlite3
|
|
3
|
+
import threading
|
|
4
|
+
import time
|
|
5
|
+
from queue import Empty, Queue
|
|
6
|
+
from typing import Any, Iterator, Set
|
|
7
|
+
|
|
8
|
+
# A special message object used to signal the listener to gracefully shut down.
|
|
9
|
+
_SHUTDOWN_SENTINEL = object()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Subscriber:
|
|
13
|
+
"""
|
|
14
|
+
A thread-safe message receiver for a specific channel subscription.
|
|
15
|
+
|
|
16
|
+
This object is designed to be used as a context manager (`with` statement).
|
|
17
|
+
It holds a dedicated in-memory queue that receives messages from the
|
|
18
|
+
channel's central polling thread, ensuring that a slow listener does not
|
|
19
|
+
impact others.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, channel: "ChannelManager"):
|
|
23
|
+
self._channel = channel
|
|
24
|
+
self._queue: Queue = Queue()
|
|
25
|
+
|
|
26
|
+
def __enter__(self) -> "Subscriber":
|
|
27
|
+
"""Registers the listener's queue with the channel to start receiving messages."""
|
|
28
|
+
self._channel._register(self._queue)
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
32
|
+
"""Unregisters the listener's queue from the channel to stop receiving messages."""
|
|
33
|
+
self._channel._unregister(self._queue)
|
|
34
|
+
|
|
35
|
+
def listen(self, timeout: float | None = None) -> Iterator[Any]:
|
|
36
|
+
"""
|
|
37
|
+
Returns a blocking iterator that yields messages as they arrive.
|
|
38
|
+
|
|
39
|
+
This method pulls messages from the listener's dedicated, thread-safe
|
|
40
|
+
in-memory queue. It performs no database operations itself.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
timeout: If provided, the iterator will raise `queue.Empty` if no message is
|
|
44
|
+
received within this many seconds.
|
|
45
|
+
"""
|
|
46
|
+
while True:
|
|
47
|
+
try:
|
|
48
|
+
msg = self._queue.get(timeout=timeout)
|
|
49
|
+
|
|
50
|
+
if msg is _SHUTDOWN_SENTINEL:
|
|
51
|
+
break
|
|
52
|
+
|
|
53
|
+
yield msg
|
|
54
|
+
except Empty:
|
|
55
|
+
raise TimeoutError(f"Timeout {timeout}s expired.")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class ChannelManager:
|
|
59
|
+
"""
|
|
60
|
+
The central hub for a named pub/sub channel.
|
|
61
|
+
|
|
62
|
+
This object manages all active listeners for the channel and runs a single,
|
|
63
|
+
efficient background thread to poll the database for new messages. It then
|
|
64
|
+
"fans out" these messages to all subscribed listeners.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
name: str,
|
|
70
|
+
conn: sqlite3.Connection,
|
|
71
|
+
db_path: str,
|
|
72
|
+
poll_interval: float = 0.1,
|
|
73
|
+
):
|
|
74
|
+
self._name = name
|
|
75
|
+
self._conn = conn
|
|
76
|
+
self._db_path = db_path
|
|
77
|
+
self._poll_interval = poll_interval
|
|
78
|
+
self._listeners: Set[Queue] = set()
|
|
79
|
+
self._lock = threading.Lock()
|
|
80
|
+
self._polling_thread: threading.Thread | None = None
|
|
81
|
+
self._stop_event = threading.Event()
|
|
82
|
+
|
|
83
|
+
def _register(self, queue: Queue):
|
|
84
|
+
"""Adds a listener's queue and starts the poller if it's the first one."""
|
|
85
|
+
|
|
86
|
+
with self._lock:
|
|
87
|
+
self._listeners.add(queue)
|
|
88
|
+
# If the polling thread isn't running, start it.
|
|
89
|
+
if self._polling_thread is None or not self._polling_thread.is_alive():
|
|
90
|
+
self._start_polling()
|
|
91
|
+
|
|
92
|
+
def _unregister(self, queue: Queue):
|
|
93
|
+
"""Removes a listener's queue and stops the poller if it's the last one."""
|
|
94
|
+
|
|
95
|
+
with self._lock:
|
|
96
|
+
self._listeners.discard(queue)
|
|
97
|
+
# If there are no more listeners, stop the polling thread to save resources.
|
|
98
|
+
if not self._listeners:
|
|
99
|
+
self._stop_polling()
|
|
100
|
+
|
|
101
|
+
def _start_polling(self):
|
|
102
|
+
"""Starts the background polling thread."""
|
|
103
|
+
self._stop_event.clear()
|
|
104
|
+
self._polling_thread = threading.Thread(target=self._polling_loop, daemon=True)
|
|
105
|
+
self._polling_thread.start()
|
|
106
|
+
|
|
107
|
+
def _stop_polling(self):
|
|
108
|
+
"""Signals the background polling thread to stop."""
|
|
109
|
+
if self._polling_thread and self._polling_thread.is_alive():
|
|
110
|
+
self._stop_event.set()
|
|
111
|
+
self._polling_thread.join()
|
|
112
|
+
self._polling_thread = None
|
|
113
|
+
|
|
114
|
+
def close(self):
|
|
115
|
+
"""Reliable close this channel and removes listeners."""
|
|
116
|
+
self._stop_polling()
|
|
117
|
+
|
|
118
|
+
with self._lock:
|
|
119
|
+
for listener in self._listeners:
|
|
120
|
+
listener.put(_SHUTDOWN_SENTINEL)
|
|
121
|
+
|
|
122
|
+
self._listeners.clear()
|
|
123
|
+
|
|
124
|
+
def _polling_loop(self):
|
|
125
|
+
"""
|
|
126
|
+
The main loop for the background thread.
|
|
127
|
+
|
|
128
|
+
This function polls the database for new messages and fans them out
|
|
129
|
+
to all registered listener queues.
|
|
130
|
+
"""
|
|
131
|
+
# A separate SQLite connection is required for each thread.
|
|
132
|
+
thread_conn = sqlite3.connect(self._db_path, check_same_thread=False)
|
|
133
|
+
thread_conn.row_factory = sqlite3.Row
|
|
134
|
+
|
|
135
|
+
# The poller starts listening for messages from this moment forward.
|
|
136
|
+
last_seen_timestamp = time.time()
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
while not self._stop_event.is_set():
|
|
140
|
+
cursor = thread_conn.cursor()
|
|
141
|
+
cursor.execute(
|
|
142
|
+
"SELECT timestamp, message_payload FROM beaver_pubsub_log WHERE channel_name = ? AND timestamp > ? ORDER BY timestamp ASC",
|
|
143
|
+
(self._name, last_seen_timestamp),
|
|
144
|
+
)
|
|
145
|
+
messages = cursor.fetchall()
|
|
146
|
+
cursor.close()
|
|
147
|
+
|
|
148
|
+
if messages:
|
|
149
|
+
# Update the timestamp to the last message we've seen.
|
|
150
|
+
last_seen_timestamp = messages[-1]["timestamp"]
|
|
151
|
+
|
|
152
|
+
# The "fan-out": Push messages to all active listener queues.
|
|
153
|
+
# This block is locked to prevent modification of the listeners set
|
|
154
|
+
# while we are iterating over it.
|
|
155
|
+
with self._lock:
|
|
156
|
+
for queue in self._listeners:
|
|
157
|
+
for row in messages:
|
|
158
|
+
queue.put(json.loads(row["message_payload"]))
|
|
159
|
+
|
|
160
|
+
# Wait for the poll interval before checking for new messages again.
|
|
161
|
+
time.sleep(self._poll_interval)
|
|
162
|
+
|
|
163
|
+
thread_conn.close()
|
|
164
|
+
|
|
165
|
+
def subscribe(self) -> Subscriber:
|
|
166
|
+
"""Creates a new subscription, returning a Listener context manager."""
|
|
167
|
+
return Subscriber(self)
|
|
168
|
+
|
|
169
|
+
def publish(self, payload: Any):
|
|
170
|
+
"""
|
|
171
|
+
Publishes a JSON-serializable message to the channel.
|
|
172
|
+
|
|
173
|
+
This is a synchronous operation that performs a fast, atomic INSERT
|
|
174
|
+
into the database's pub/sub log.
|
|
175
|
+
"""
|
|
176
|
+
try:
|
|
177
|
+
json_payload = json.dumps(payload)
|
|
178
|
+
except TypeError as e:
|
|
179
|
+
raise TypeError("Message payload must be JSON-serializable.") from e
|
|
180
|
+
|
|
181
|
+
with self._conn:
|
|
182
|
+
self._conn.execute(
|
|
183
|
+
"INSERT INTO beaver_pubsub_log (timestamp, channel_name, message_payload) VALUES (?, ?, ?)",
|
|
184
|
+
(time.time(), self._name, json_payload),
|
|
185
|
+
)
|
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import sqlite3
|
|
3
|
-
import
|
|
4
|
-
from typing import Any
|
|
2
|
+
import threading
|
|
5
3
|
|
|
6
4
|
from .dicts import DictManager
|
|
7
5
|
from .lists import ListManager
|
|
8
6
|
from .channels import ChannelManager
|
|
9
7
|
from .collections import CollectionManager
|
|
8
|
+
from .queues import QueueManager
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
class BeaverDB:
|
|
@@ -28,6 +27,8 @@ class BeaverDB:
|
|
|
28
27
|
self._conn.execute("PRAGMA journal_mode=WAL;")
|
|
29
28
|
self._conn.row_factory = sqlite3.Row
|
|
30
29
|
self._create_all_tables()
|
|
30
|
+
self._channels: dict[str, ChannelManager] = {}
|
|
31
|
+
self._channels_lock = threading.Lock()
|
|
31
32
|
|
|
32
33
|
def _create_all_tables(self):
|
|
33
34
|
"""Initializes all required tables in the database file."""
|
|
@@ -38,6 +39,27 @@ class BeaverDB:
|
|
|
38
39
|
self._create_edges_table()
|
|
39
40
|
self._create_versions_table()
|
|
40
41
|
self._create_dict_table()
|
|
42
|
+
self._create_priority_queue_table()
|
|
43
|
+
|
|
44
|
+
def _create_priority_queue_table(self):
|
|
45
|
+
"""Creates the priority queue table and its performance index."""
|
|
46
|
+
with self._conn:
|
|
47
|
+
self._conn.execute(
|
|
48
|
+
"""
|
|
49
|
+
CREATE TABLE IF NOT EXISTS beaver_priority_queues (
|
|
50
|
+
queue_name TEXT NOT NULL,
|
|
51
|
+
priority REAL NOT NULL,
|
|
52
|
+
timestamp REAL NOT NULL,
|
|
53
|
+
data TEXT NOT NULL
|
|
54
|
+
)
|
|
55
|
+
"""
|
|
56
|
+
)
|
|
57
|
+
self._conn.execute(
|
|
58
|
+
"""
|
|
59
|
+
CREATE INDEX IF NOT EXISTS idx_priority_queue_order
|
|
60
|
+
ON beaver_priority_queues (queue_name, priority ASC, timestamp ASC)
|
|
61
|
+
"""
|
|
62
|
+
)
|
|
41
63
|
|
|
42
64
|
def _create_dict_table(self):
|
|
43
65
|
"""Creates the namespaced dictionary table."""
|
|
@@ -148,6 +170,10 @@ class BeaverDB:
|
|
|
148
170
|
def close(self):
|
|
149
171
|
"""Closes the database connection."""
|
|
150
172
|
if self._conn:
|
|
173
|
+
# Cleanly shut down any active polling threads before closing
|
|
174
|
+
with self._channels_lock:
|
|
175
|
+
for channel in self._channels.values():
|
|
176
|
+
channel.close()
|
|
151
177
|
self._conn.close()
|
|
152
178
|
|
|
153
179
|
# --- Factory and Passthrough Methods ---
|
|
@@ -156,33 +182,39 @@ class BeaverDB:
|
|
|
156
182
|
"""Returns a wrapper object for interacting with a named dictionary."""
|
|
157
183
|
if not isinstance(name, str) or not name:
|
|
158
184
|
raise TypeError("Dictionary name must be a non-empty string.")
|
|
185
|
+
|
|
159
186
|
return DictManager(name, self._conn)
|
|
160
187
|
|
|
161
188
|
def list(self, name: str) -> ListManager:
|
|
162
189
|
"""Returns a wrapper object for interacting with a named list."""
|
|
163
190
|
if not isinstance(name, str) or not name:
|
|
164
191
|
raise TypeError("List name must be a non-empty string.")
|
|
192
|
+
|
|
165
193
|
return ListManager(name, self._conn)
|
|
166
194
|
|
|
195
|
+
def queue(self, name: str) -> QueueManager:
|
|
196
|
+
"""Returns a wrapper object for interacting with a persistent priority queue."""
|
|
197
|
+
if not isinstance(name, str) or not name:
|
|
198
|
+
raise TypeError("Queue name must be a non-empty string.")
|
|
199
|
+
|
|
200
|
+
return QueueManager(name, self._conn)
|
|
201
|
+
|
|
167
202
|
def collection(self, name: str) -> CollectionManager:
|
|
168
203
|
"""Returns a wrapper for interacting with a document collection."""
|
|
204
|
+
if not isinstance(name, str) or not name:
|
|
205
|
+
raise TypeError("Collection name must be a non-empty string.")
|
|
206
|
+
|
|
169
207
|
return CollectionManager(name, self._conn)
|
|
170
208
|
|
|
171
|
-
def
|
|
172
|
-
"""
|
|
173
|
-
|
|
209
|
+
def channel(self, name: str) -> ChannelManager:
|
|
210
|
+
"""
|
|
211
|
+
Returns a singleton Channel instance for high-efficiency pub/sub.
|
|
212
|
+
"""
|
|
213
|
+
if not isinstance(name, str) or not name:
|
|
174
214
|
raise ValueError("Channel name must be a non-empty string.")
|
|
175
|
-
try:
|
|
176
|
-
json_payload = json.dumps(payload)
|
|
177
|
-
except TypeError as e:
|
|
178
|
-
raise TypeError("Message payload must be JSON-serializable.") from e
|
|
179
|
-
|
|
180
|
-
with self._conn:
|
|
181
|
-
self._conn.execute(
|
|
182
|
-
"INSERT INTO beaver_pubsub_log (timestamp, channel_name, message_payload) VALUES (?, ?, ?)",
|
|
183
|
-
(time.time(), channel_name, json_payload),
|
|
184
|
-
)
|
|
185
215
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
216
|
+
# Use a thread-safe lock to ensure only one Channel object is created per name.
|
|
217
|
+
with self._channels_lock:
|
|
218
|
+
if name not in self._channels:
|
|
219
|
+
self._channels[name] = ChannelManager(name, self._conn, self._db_path)
|
|
220
|
+
return self._channels[name]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sqlite3
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, NamedTuple
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class QueueItem(NamedTuple):
|
|
8
|
+
"""A data class representing a single item retrieved from the queue."""
|
|
9
|
+
|
|
10
|
+
priority: float
|
|
11
|
+
timestamp: float
|
|
12
|
+
data: Any
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class QueueManager:
|
|
16
|
+
"""A wrapper providing a Pythonic interface to a persistent priority queue."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, name: str, conn: sqlite3.Connection):
|
|
19
|
+
self._name = name
|
|
20
|
+
self._conn = conn
|
|
21
|
+
|
|
22
|
+
def put(self, data: Any, priority: float):
|
|
23
|
+
"""
|
|
24
|
+
Adds an item to the queue with a specific priority.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
data: The JSON-serializable data to store.
|
|
28
|
+
priority: The priority of the item (lower numbers are higher priority).
|
|
29
|
+
"""
|
|
30
|
+
with self._conn:
|
|
31
|
+
self._conn.execute(
|
|
32
|
+
"INSERT INTO beaver_priority_queues (queue_name, priority, timestamp, data) VALUES (?, ?, ?, ?)",
|
|
33
|
+
(self._name, priority, time.time(), json.dumps(data)),
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def get(self) -> QueueItem:
|
|
37
|
+
"""
|
|
38
|
+
Atomically retrieves and removes the highest-priority item from the queue.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
A QueueItem containing the data and its metadata.
|
|
42
|
+
|
|
43
|
+
Raises IndexError if queue is empty.
|
|
44
|
+
"""
|
|
45
|
+
with self._conn:
|
|
46
|
+
cursor = self._conn.cursor()
|
|
47
|
+
# The compound index on (queue_name, priority, timestamp) makes this query efficient.
|
|
48
|
+
cursor.execute(
|
|
49
|
+
"""
|
|
50
|
+
SELECT rowid, priority, timestamp, data
|
|
51
|
+
FROM beaver_priority_queues
|
|
52
|
+
WHERE queue_name = ?
|
|
53
|
+
ORDER BY priority ASC, timestamp ASC
|
|
54
|
+
LIMIT 1
|
|
55
|
+
""",
|
|
56
|
+
(self._name,),
|
|
57
|
+
)
|
|
58
|
+
result = cursor.fetchone()
|
|
59
|
+
|
|
60
|
+
if result is None:
|
|
61
|
+
raise IndexError("Queue is empty")
|
|
62
|
+
|
|
63
|
+
rowid, priority, timestamp, data = result
|
|
64
|
+
# Delete the retrieved item to ensure it's processed only once.
|
|
65
|
+
cursor.execute("DELETE FROM beaver_priority_queues WHERE rowid = ?", (rowid,))
|
|
66
|
+
|
|
67
|
+
return QueueItem(
|
|
68
|
+
priority=priority, timestamp=timestamp, data=json.loads(data)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def __len__(self) -> int:
|
|
72
|
+
"""Returns the current number of items in the queue."""
|
|
73
|
+
cursor = self._conn.cursor()
|
|
74
|
+
cursor.execute(
|
|
75
|
+
"SELECT COUNT(*) FROM beaver_priority_queues WHERE queue_name = ?",
|
|
76
|
+
(self._name,),
|
|
77
|
+
)
|
|
78
|
+
count = cursor.fetchone()[0]
|
|
79
|
+
cursor.close()
|
|
80
|
+
return count
|
|
81
|
+
|
|
82
|
+
def __nonzero__(self) -> bool:
|
|
83
|
+
"""Returns True if the queue is not empty."""
|
|
84
|
+
return len(self) > 0
|
|
85
|
+
|
|
86
|
+
def __repr__(self) -> str:
|
|
87
|
+
return f"QueueManager(name='{self._name}')"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: beaver-db
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
|
|
5
5
|
Requires-Python: >=3.13
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -9,11 +9,11 @@ Requires-Dist: numpy>=2.3.3
|
|
|
9
9
|
Requires-Dist: scipy>=1.16.2
|
|
10
10
|
Dynamic: license-file
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
I've updated the README to highlight the new high-efficiency, thread-safe, and process-safe pub/sub system. I've also added an example of how you can use it to build real-time, event-driven applications.
|
|
13
|
+
|
|
14
|
+
Here are the changes:
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-

|
|
16
|
-

|
|
16
|
+
# beaver 🦫
|
|
17
17
|
|
|
18
18
|
A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
19
19
|
|
|
@@ -31,9 +31,10 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
|
|
|
31
31
|
|
|
32
32
|
## Core Features
|
|
33
33
|
|
|
34
|
-
- **
|
|
34
|
+
- **High-Efficiency Pub/Sub**: A powerful, thread and process-safe publish-subscribe system for real-time messaging with a fan-out architecture.
|
|
35
35
|
- **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
|
|
36
36
|
- **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
|
|
37
|
+
- **Persistent Priority Queue**: A high-performance, persistent queue that always returns the item with the highest priority, perfect for task management.
|
|
37
38
|
- **Efficient Vector Storage & Search**: Store vector embeddings and perform fast approximate nearest neighbor searches using an in-memory k-d tree.
|
|
38
39
|
- **Full-Text Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine.
|
|
39
40
|
- **Graph Traversal**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
|
|
@@ -86,7 +87,24 @@ db.close()
|
|
|
86
87
|
|
|
87
88
|
Here are a few ideas to inspire your next project, showcasing how to combine Beaver's features to build powerful local applications.
|
|
88
89
|
|
|
89
|
-
### 1.
|
|
90
|
+
### 1. AI Agent Task Management
|
|
91
|
+
|
|
92
|
+
Use a **persistent priority queue** to manage tasks for an AI agent. This ensures the agent always works on the most important task first, even if the application restarts.
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
tasks = db.queue("agent_tasks")
|
|
96
|
+
|
|
97
|
+
# Tasks are added with a priority (lower is higher)
|
|
98
|
+
tasks.put({"action": "summarize_news"}, priority=10)
|
|
99
|
+
tasks.put({"action": "respond_to_user"}, priority=1)
|
|
100
|
+
tasks.put({"action": "run_backup"}, priority=20)
|
|
101
|
+
|
|
102
|
+
# The agent retrieves the highest-priority task
|
|
103
|
+
next_task = tasks.get() # -> Returns the "respond_to_user" task
|
|
104
|
+
print(f"Agent's next task: {next_task.data['action']}")
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### 2. User Authentication and Profile Store
|
|
90
108
|
|
|
91
109
|
Use a **namespaced dictionary** to create a simple and secure user store. The key can be the username, and the value can be a dictionary containing the hashed password and other profile information.
|
|
92
110
|
|
|
@@ -104,7 +122,7 @@ users["alice"] = {
|
|
|
104
122
|
alice_profile = users.get("alice")
|
|
105
123
|
```
|
|
106
124
|
|
|
107
|
-
###
|
|
125
|
+
### 3. Chatbot Conversation History
|
|
108
126
|
|
|
109
127
|
A **persistent list** is perfect for storing the history of a conversation. Each time the user or the bot sends a message, just `push` it to the list. This maintains a chronological record of the entire dialogue.
|
|
110
128
|
|
|
@@ -119,7 +137,7 @@ for message in chat_history:
|
|
|
119
137
|
print(f"{message['role']}: {message['content']}")
|
|
120
138
|
```
|
|
121
139
|
|
|
122
|
-
###
|
|
140
|
+
### 4. Build a RAG (Retrieval-Augmented Generation) System
|
|
123
141
|
|
|
124
142
|
Combine **vector search** and **full-text search** to build a powerful RAG pipeline for your local documents.
|
|
125
143
|
|
|
@@ -133,7 +151,7 @@ from beaver.collections import rerank
|
|
|
133
151
|
best_context = rerank(vector_results, text_results, weights=[0.6, 0.4])
|
|
134
152
|
```
|
|
135
153
|
|
|
136
|
-
###
|
|
154
|
+
### 5. Caching for Expensive API Calls
|
|
137
155
|
|
|
138
156
|
Leverage a **dictionary with a TTL (Time-To-Live)** to cache the results of slow network requests. This can dramatically speed up your application and reduce your reliance on external services.
|
|
139
157
|
|
|
@@ -149,18 +167,36 @@ if response is None:
|
|
|
149
167
|
api_cache.set("weather_new_york", response, ttl_seconds=3600)
|
|
150
168
|
```
|
|
151
169
|
|
|
170
|
+
### 6. Real-time Event-Driven Systems
|
|
171
|
+
|
|
172
|
+
Use the **high-efficiency pub/sub system** to build applications where different components react to events in real-time. This is perfect for decoupled systems, real-time UIs, or monitoring services.
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# In one process or thread (e.g., a monitoring service)
|
|
176
|
+
system_events = db.channel("system_events")
|
|
177
|
+
system_events.publish({"event": "user_login", "user_id": "alice"})
|
|
178
|
+
|
|
179
|
+
# In another process or thread (e.g., a UI updater or logger)
|
|
180
|
+
with db.channel("system_events").subscribe() as listener:
|
|
181
|
+
for message in listener.listen():
|
|
182
|
+
print(f"Event received: {message}")
|
|
183
|
+
# >> Event received: {'event': 'user_login', 'user_id': 'alice'}
|
|
184
|
+
```
|
|
185
|
+
|
|
152
186
|
## More Examples
|
|
153
187
|
|
|
154
188
|
For more in-depth examples, check out the scripts in the `examples/` directory:
|
|
155
189
|
|
|
156
|
-
- [`examples/kvstore.py`](
|
|
157
|
-
- [`examples/list.py`](
|
|
158
|
-
- [`examples/
|
|
159
|
-
- [`examples/
|
|
160
|
-
- [`examples/
|
|
161
|
-
- [`examples/
|
|
162
|
-
- [`examples/
|
|
163
|
-
- [`examples/
|
|
190
|
+
- [`examples/kvstore.py`](examples/kvstore.py): A comprehensive demo of the namespaced dictionary feature.
|
|
191
|
+
- [`examples/list.py`](examples/list.py): Shows the full capabilities of the persistent list, including slicing and in-place updates.
|
|
192
|
+
- [`examples/queue.py`](examples/queue.py): A practical example of using the persistent priority queue for task management.
|
|
193
|
+
- [`examples/vector.py`](examples/vector.py): Demonstrates how to index and search vector embeddings, including upserts.
|
|
194
|
+
- [`examples/fts.py`](examples/fts.py): A detailed look at full-text search, including targeted searches on specific metadata fields.
|
|
195
|
+
- [`examples/graph.py`](examples/graph.py): Shows how to create relationships between documents and perform multi-hop graph traversals.
|
|
196
|
+
- [`examples/pubsub.py`](examples/pubsub.py): A demonstration of the synchronous, thread-safe publish/subscribe system in a single process.
|
|
197
|
+
- [`examples/publisher.py`](examples/publisher.py) and [`examples/subscriber.py`](examples/subscriber.py): A pair of examples demonstrating inter-process message passing with the publish/subscribe system.
|
|
198
|
+
- [`examples/cache.py`](examples/cache.py): A practical example of using a dictionary with TTL as a cache for API calls.
|
|
199
|
+
- [`examples/rerank.py`](examples/rerank.py): Shows how to combine results from vector and text search for more refined results.
|
|
164
200
|
|
|
165
201
|
## Roadmap
|
|
166
202
|
|
|
@@ -168,11 +204,9 @@ These are some of the features and improvements planned for future releases:
|
|
|
168
204
|
|
|
169
205
|
- **Fuzzy search**: Implement fuzzy matching capabilities for text search.
|
|
170
206
|
- **Faster ANN**: Explore integrating more advanced ANN libraries like `faiss` for improved vector search performance.
|
|
171
|
-
- **Priority Queues**: Introduce a priority queue data structure for task management.
|
|
172
|
-
- **Improved Pub/Sub**: Fan-out implementation with a more Pythonic API.
|
|
173
207
|
- **Async API**: Comprehensive async support with on-demand wrappers for all collections.
|
|
174
208
|
|
|
175
|
-
Check out the [roadmap](
|
|
209
|
+
Check out the [roadmap](roadmap.md) for a detailed list of upcoming features and design ideas.
|
|
176
210
|
|
|
177
211
|
## License
|
|
178
212
|
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import sqlite3
|
|
3
|
-
import time
|
|
4
|
-
from typing import Any, Iterator
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class ChannelManager(Iterator):
|
|
8
|
-
"""
|
|
9
|
-
A synchronous, blocking iterator that polls a channel for new messages.
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
def __init__(
|
|
13
|
-
self, conn: sqlite3.Connection, channel_name: str, poll_interval: float = 0.1
|
|
14
|
-
):
|
|
15
|
-
"""
|
|
16
|
-
Initializes the synchronous subscriber.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
conn: The SQLite database connection.
|
|
20
|
-
channel_name: The name of the channel to subscribe to.
|
|
21
|
-
poll_interval: The time in seconds to wait between polling for new messages.
|
|
22
|
-
"""
|
|
23
|
-
self._conn = conn
|
|
24
|
-
self._channel = channel_name
|
|
25
|
-
self._poll_interval = poll_interval
|
|
26
|
-
self._last_seen_timestamp = time.time()
|
|
27
|
-
|
|
28
|
-
def __iter__(self) -> "ChannelManager":
|
|
29
|
-
"""Returns the iterator object itself."""
|
|
30
|
-
return self
|
|
31
|
-
|
|
32
|
-
def __next__(self) -> Any:
|
|
33
|
-
"""
|
|
34
|
-
Blocks until a new message is available on the channel and returns it.
|
|
35
|
-
This polling mechanism is simple but can introduce a slight latency
|
|
36
|
-
equivalent to the poll_interval.
|
|
37
|
-
"""
|
|
38
|
-
while True:
|
|
39
|
-
# Fetch the next available message from the database
|
|
40
|
-
cursor = self._conn.cursor()
|
|
41
|
-
cursor.execute(
|
|
42
|
-
"SELECT timestamp, message_payload FROM beaver_pubsub_log WHERE channel_name = ? AND timestamp > ? ORDER BY timestamp ASC LIMIT 1",
|
|
43
|
-
(self._channel, self._last_seen_timestamp),
|
|
44
|
-
)
|
|
45
|
-
result = cursor.fetchone()
|
|
46
|
-
cursor.close()
|
|
47
|
-
|
|
48
|
-
if result:
|
|
49
|
-
# If a message is found, update the timestamp and return the payload
|
|
50
|
-
self._last_seen_timestamp = result["timestamp"]
|
|
51
|
-
return json.loads(result["message_payload"])
|
|
52
|
-
else:
|
|
53
|
-
# If no new messages, wait for the poll interval before trying again
|
|
54
|
-
time.sleep(self._poll_interval)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|