beaver-db 0.18.6__tar.gz → 0.19.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of beaver-db might be problematic. Click here for more details.

Files changed (57) hide show
  1. {beaver_db-0.18.6 → beaver_db-0.19.2}/.gitignore +1 -0
  2. {beaver_db-0.18.6 → beaver_db-0.19.2}/PKG-INFO +49 -15
  3. {beaver_db-0.18.6 → beaver_db-0.19.2}/README.md +48 -14
  4. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/__init__.py +1 -1
  5. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/blobs.py +11 -0
  6. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/core.py +51 -0
  7. beaver_db-0.19.2/beaver/locks.py +173 -0
  8. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/server.py +139 -26
  9. beaver_db-0.19.2/examples/locks.py +52 -0
  10. beaver_db-0.19.2/issues/2-comprehensive-async-wrappers.md +52 -0
  11. beaver_db-0.19.2/issues/6-drop-in-replacement-for-beaver-rest-server-client.md +53 -0
  12. beaver_db-0.19.2/issues/7-replace-faiss-with-simpler-linear-numpy-vectorial-search.md +139 -0
  13. beaver_db-0.19.2/issues/9-type-safe-wrappers-based-on-pydantic-compatible-models.md +63 -0
  14. beaver_db-0.19.2/issues/closed/1-refactor-vector-store-to-use-faiss.md +50 -0
  15. beaver_db-0.19.2/issues/closed/5-add-dblock-for-inter-process-synchronization.md +132 -0
  16. beaver_db-0.19.2/issues/closed/8-first-class-synchronization-primitive.md +127 -0
  17. beaver_db-0.19.2/logo.png +0 -0
  18. beaver_db-0.19.2/makefile +55 -0
  19. {beaver_db-0.18.6 → beaver_db-0.19.2}/pyproject.toml +1 -1
  20. beaver_db-0.18.6/makefile +0 -23
  21. beaver_db-0.18.6/roadmap.md +0 -152
  22. {beaver_db-0.18.6 → beaver_db-0.19.2}/.dockerignore +0 -0
  23. {beaver_db-0.18.6 → beaver_db-0.19.2}/.python-version +0 -0
  24. {beaver_db-0.18.6 → beaver_db-0.19.2}/LICENSE +0 -0
  25. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/channels.py +0 -0
  26. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/cli.py +0 -0
  27. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/collections.py +0 -0
  28. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/dicts.py +0 -0
  29. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/lists.py +0 -0
  30. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/logs.py +0 -0
  31. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/queues.py +0 -0
  32. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/types.py +0 -0
  33. {beaver_db-0.18.6 → beaver_db-0.19.2}/beaver/vectors.py +0 -0
  34. {beaver_db-0.18.6 → beaver_db-0.19.2}/design.md +0 -0
  35. {beaver_db-0.18.6 → beaver_db-0.19.2}/dockerfile +0 -0
  36. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/async_pubsub.py +0 -0
  37. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/blobs.py +0 -0
  38. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/cache.py +0 -0
  39. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/fts.py +0 -0
  40. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/fuzzy.py +0 -0
  41. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/general_test.py +0 -0
  42. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/graph.py +0 -0
  43. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/kvstore.py +0 -0
  44. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/list.py +0 -0
  45. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/logs.py +0 -0
  46. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/pqueue.py +0 -0
  47. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/producer_consumer.py +0 -0
  48. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/publisher.py +0 -0
  49. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/pubsub.py +0 -0
  50. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/rerank.py +0 -0
  51. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/stress_vectors.py +0 -0
  52. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/subscriber.py +0 -0
  53. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/textual_chat.css +0 -0
  54. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/textual_chat.py +0 -0
  55. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/type_hints.py +0 -0
  56. {beaver_db-0.18.6 → beaver_db-0.19.2}/examples/vector.py +0 -0
  57. {beaver_db-0.18.6 → beaver_db-0.19.2}/uv.lock +0 -0
@@ -10,3 +10,4 @@ wheels/
10
10
  .venv
11
11
  *.db*
12
12
  .env
13
+ .issues-sync-state
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: beaver-db
3
- Version: 0.18.6
3
+ Version: 0.19.2
4
4
  Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
5
5
  License-File: LICENSE
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -23,7 +23,11 @@ Provides-Extra: vector
23
23
  Requires-Dist: faiss-cpu>=1.12.0; extra == 'vector'
24
24
  Description-Content-Type: text/markdown
25
25
 
26
- # beaver 🦫
26
+ <div style="text-align: center;">
27
+ <img src="https://github.com/syalia-srl/beaver/blob/main/logo.png?raw=true" width="256px">
28
+ </div>
29
+
30
+ ---
27
31
 
28
32
  <!-- Project badges -->
29
33
  ![PyPI - Version](https://img.shields.io/pypi/v/beaver-db)
@@ -32,11 +36,13 @@ Description-Content-Type: text/markdown
32
36
  ![PyPi - Downloads (Monthly)](https://img.shields.io/pypi/dm/beaver-db)
33
37
  ![Github - Commits](https://img.shields.io/github/commit-activity/m/apiad/beaver)
34
38
 
35
- A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
39
+ > A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
40
+
41
+ ---
36
42
 
37
43
  `beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
38
44
 
39
- > If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor) for an equally minimalistic approach to task orchestration.
45
+ > If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor "null") for an equally minimalistic approach to task orchestration.
40
46
 
41
47
  ## Design Philosophy
42
48
 
@@ -55,6 +61,7 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
55
61
  - **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
56
62
  - **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
57
63
  - **Persistent Priority Queue**: A high-performance, persistent priority queue perfect for task orchestration across multiple processes. Also with optional async support.
64
+ - **Inter-Process Locking**: A robust, deadlock-proof, and fair (FIFO) distributed lock (`db.lock()`) to coordinate multiple processes and prevent race conditions.
58
65
  - **Time-Indexed Log for Monitoring**: A specialized data structure for structured, time-series logs. Query historical data by time range or create a live, aggregated view of the most recent events for real-time dashboards.
59
66
  - **Simple Blob Storage**: A dictionary-like interface for storing medium-sized binary files (like PDFs or images) directly in the database, ensuring transactional integrity with your other data.
60
67
  - **High-Performance Vector Storage & Search (Optional)**: Store vector embeddings and perform fast approximate nearest neighbor searches using a `faiss`-based hybrid index.
@@ -103,14 +110,14 @@ pip install "beaver-db[full]"
103
110
  ```
104
111
 
105
112
  ### Running with Docker
113
+
106
114
  For a fully embedded and lightweight solution, you can run the BeaverDB REST API server using Docker. This is the easiest way to get a self-hosted instance up and running.
107
115
 
108
116
  ```bash
109
117
  docker run -p 8000:8000 -v $(pwd)/data:/app apiad/beaverdb
110
118
  ```
111
119
 
112
- This command will start the BeaverDB server, and your database file will be stored in the data directory on your host machine. You can access the API at <http://localhost:8000>.
113
-
120
+ This command will start the BeaverDB server, and your database file will be stored in the data directory on your host machine. You can access the API at [http://localhost:8000](http://localhost:8000").
114
121
 
115
122
  ## Quickstart
116
123
 
@@ -172,12 +179,12 @@ Here are a couple of examples using `curl`:
172
179
 
173
180
  ```bash
174
181
  # Set a value in the 'app_config' dictionary
175
- curl -X PUT http://127.0.0.1:8000/dicts/app_config/api_key
182
+ curl -X PUT [http://127.0.0.1:8000/dicts/app_config/api_key](http://127.0.0.1:8000/dicts/app_config/api_key)
176
183
  -H "Content-Type: application/json"
177
184
  -d '"your-secret-api-key"'
178
185
 
179
186
  # Get the value back
180
- curl http://127.0.0.1:8000/dicts/app_config/api_key
187
+ curl [http://127.0.0.1:8000/dicts/app_config/api_key](http://127.0.0.1:8000/dicts/app_config/api_key)
181
188
  # Output: "your-secret-api-key"
182
189
  ```
183
190
 
@@ -341,6 +348,34 @@ for summary in live_summary:
341
348
  print(f"Live Stats (10s window): Count={summary['count']}, Mean={summary['mean']:.2f}")
342
349
  ```
343
350
 
351
+ ### 9. Coordinate Distributed Web Scrapers
352
+
353
+ Run multiple scraper processes in parallel and use `db.lock()` to coordinate them. You can ensure only one process refreshes a shared API token or sitemap, preventing race conditions and rate-limiting.
354
+
355
+ ```python
356
+ import time
357
+
358
+ scrapers_state = db.dict("scraper_state")
359
+
360
+ last_refresh = scrapers_state.get("last_sitemap_refresh", 0)
361
+ if time.time() - last_refresh > 3600: # Only refresh once per hour
362
+ try:
363
+ # Try to get a lock to refresh the shared sitemap, but don't wait long
364
+ with db.lock("refresh_sitemap", timeout=1):
365
+ # We got the lock. Check if it's time to refresh.
366
+ print(f"PID {os.getpid()} is refreshing the sitemap...")
367
+ scrapers_state["sitemap"] = ["/page1", "/page2"] # Your fetch_sitemap()
368
+ scrapers_state["last_sitemap_refresh"] = time.time()
369
+
370
+ except TimeoutError:
371
+ # Another process is already refreshing, so we can skip
372
+ print(f"PID {os.getpid()} letting other process handle refresh.")
373
+
374
+ # All processes can now safely use the shared sitemap
375
+ sitemap = scrapers_state.get("sitemap")
376
+ # ... proceed with scraping ...
377
+ ```
378
+
344
379
  ## Type-Safe Data Models
345
380
 
346
381
  For enhanced data integrity and a better developer experience, BeaverDB supports type-safe operations for all modalities. By associating a model with these data structures, you get automatic serialization and deserialization, complete with autocompletion in your editor.
@@ -348,7 +383,6 @@ For enhanced data integrity and a better developer experience, BeaverDB supports
348
383
  This feature is designed to be flexible and works seamlessly with two kinds of models:
349
384
 
350
385
  - **Pydantic Models**: If you're already using Pydantic, your `BaseModel` classes will work out of the box.
351
-
352
386
  - **Lightweight `beaver.Model`**: For a zero-dependency solution, you can inherit from the built-in `beaver.Model` class, which is a standard Python class with serialization methods automatically included.
353
387
 
354
388
 
@@ -393,10 +427,11 @@ For more in-depth examples, check out the scripts in the `examples/` directory:
393
427
  - [`graph.py`](examples/graph.py): Shows how to create relationships between documents and perform multi-hop graph traversals.
394
428
  - [`kvstore.py`](examples/kvstore.py): A comprehensive demo of the namespaced dictionary feature.
395
429
  - [`list.py`](examples/list.py): Shows the full capabilities of the persistent list, including slicing and in-place updates.
430
+ - [`locks.py`](examples/lock_test.py): Demonstrates how to use the inter-process lock to create critical sections.
396
431
  - [`logs.py`](examples/logs.py): A short example showing how to build a realtime dashboard with the logging feature.
397
432
  - [`pqueue.py`](examples/pqueue.py): A practical example of using the persistent priority queue for task management.
398
433
  - [`producer_consumer.py`](examples/producer_consumer.py): A demonstration of the distributed task queue system in a multi-process environment.
399
- - [`publisher.py`](examples/publisher.p) and [`subscriber.py`](examples/subscriber.py): A pair of examples demonstrating inter-process message passing with the publish/subscribe system.
434
+ - [`publisher.py`](examples/publisher.py) and [`subscriber.py`](examples/subscriber.py): A pair of examples demonstrating inter-process message passing with the publish/subscribe system.
400
435
  - [`pubsub.py`](examples/pubsub.py): A demonstration of the synchronous, thread-safe publish/subscribe system in a single process.
401
436
  - [`rerank.py`](examples/rerank.py): Shows how to combine results from vector and text search for more refined results.
402
437
  - [`stress_vectors.py`](examples/stress_vectors.py): A stress test for the vector search functionality.
@@ -410,14 +445,13 @@ For more in-depth examples, check out the scripts in the `examples/` directory:
410
445
 
411
446
  These are some of the features and improvements planned for future releases:
412
447
 
413
- - **Async API**: Extend the async support with on-demand wrappers for all features besides channels.
414
- - **Type-Safe Models**: Enhance built-in `Model` to handle recursive and embedded types.
415
- - **Drop-in REST Client**: Implement a `BeaverClient` class that acts as a drop-in replacement for `BeaverDB` but instead of a local database file, it works against a REST API server.
448
+ - **[Issue #2](https://github.com/syalia-srl/beaver/issues/2) Comprehensive async wrappers**: Extend the async support with on-demand wrappers for all data structures, not just channels.
449
+ - **[Issue #9](https://github.com/syalia-srl/beaver/issues/2) Type-safe wrappers based on Pydantic-compatible models**: Enhance the built-in `Model` to handle recursive and embedded types and provide Pydantic compatibility.
450
+ - **[Issue #6](https://github.com/syalia-srl/beaver/issues/2) Drop-in replacement for Beaver REST server client**: Implement a `BeaverClient` class that acts as a drop-in replacement for `BeaverDB` but works against the REST API server.
451
+ - **[Issue #7](https://github.com/syalia-srl/beaver/issues/2) Replace `faiss` with simpler, linear `numpy` vectorial search**: Investigate removing the heavy `faiss` dependency in favor of a pure `numpy` implementation to improve installation simplicity, accepting a trade-off in search performance for O(1) installation.
416
452
 
417
- Check out the [roadmap](roadmap.md) for a detailed list of upcoming features and design ideas.
418
453
 
419
454
  If you think of something that would make `beaver` more useful for your use case, please open an issue and/or submit a pull request.
420
-
421
455
  ## License
422
456
 
423
457
  This project is licensed under the MIT License.
@@ -1,4 +1,8 @@
1
- # beaver 🦫
1
+ <div style="text-align: center;">
2
+ <img src="https://github.com/syalia-srl/beaver/blob/main/logo.png?raw=true" width="256px">
3
+ </div>
4
+
5
+ ---
2
6
 
3
7
  <!-- Project badges -->
4
8
  ![PyPI - Version](https://img.shields.io/pypi/v/beaver-db)
@@ -7,11 +11,13 @@
7
11
  ![PyPi - Downloads (Monthly)](https://img.shields.io/pypi/dm/beaver-db)
8
12
  ![Github - Commits](https://img.shields.io/github/commit-activity/m/apiad/beaver)
9
13
 
10
- A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
14
+ > A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
15
+
16
+ ---
11
17
 
12
18
  `beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
13
19
 
14
- > If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor) for an equally minimalistic approach to task orchestration.
20
+ > If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor "null") for an equally minimalistic approach to task orchestration.
15
21
 
16
22
  ## Design Philosophy
17
23
 
@@ -30,6 +36,7 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
30
36
  - **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
31
37
  - **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
32
38
  - **Persistent Priority Queue**: A high-performance, persistent priority queue perfect for task orchestration across multiple processes. Also with optional async support.
39
+ - **Inter-Process Locking**: A robust, deadlock-proof, and fair (FIFO) distributed lock (`db.lock()`) to coordinate multiple processes and prevent race conditions.
33
40
  - **Time-Indexed Log for Monitoring**: A specialized data structure for structured, time-series logs. Query historical data by time range or create a live, aggregated view of the most recent events for real-time dashboards.
34
41
  - **Simple Blob Storage**: A dictionary-like interface for storing medium-sized binary files (like PDFs or images) directly in the database, ensuring transactional integrity with your other data.
35
42
  - **High-Performance Vector Storage & Search (Optional)**: Store vector embeddings and perform fast approximate nearest neighbor searches using a `faiss`-based hybrid index.
@@ -78,14 +85,14 @@ pip install "beaver-db[full]"
78
85
  ```
79
86
 
80
87
  ### Running with Docker
88
+
81
89
  For a fully embedded and lightweight solution, you can run the BeaverDB REST API server using Docker. This is the easiest way to get a self-hosted instance up and running.
82
90
 
83
91
  ```bash
84
92
  docker run -p 8000:8000 -v $(pwd)/data:/app apiad/beaverdb
85
93
  ```
86
94
 
87
- This command will start the BeaverDB server, and your database file will be stored in the data directory on your host machine. You can access the API at <http://localhost:8000>.
88
-
95
+ This command will start the BeaverDB server, and your database file will be stored in the data directory on your host machine. You can access the API at [http://localhost:8000](http://localhost:8000").
89
96
 
90
97
  ## Quickstart
91
98
 
@@ -147,12 +154,12 @@ Here are a couple of examples using `curl`:
147
154
 
148
155
  ```bash
149
156
  # Set a value in the 'app_config' dictionary
150
- curl -X PUT http://127.0.0.1:8000/dicts/app_config/api_key
157
+ curl -X PUT [http://127.0.0.1:8000/dicts/app_config/api_key](http://127.0.0.1:8000/dicts/app_config/api_key)
151
158
  -H "Content-Type: application/json"
152
159
  -d '"your-secret-api-key"'
153
160
 
154
161
  # Get the value back
155
- curl http://127.0.0.1:8000/dicts/app_config/api_key
162
+ curl [http://127.0.0.1:8000/dicts/app_config/api_key](http://127.0.0.1:8000/dicts/app_config/api_key)
156
163
  # Output: "your-secret-api-key"
157
164
  ```
158
165
 
@@ -316,6 +323,34 @@ for summary in live_summary:
316
323
  print(f"Live Stats (10s window): Count={summary['count']}, Mean={summary['mean']:.2f}")
317
324
  ```
318
325
 
326
+ ### 9. Coordinate Distributed Web Scrapers
327
+
328
+ Run multiple scraper processes in parallel and use `db.lock()` to coordinate them. You can ensure only one process refreshes a shared API token or sitemap, preventing race conditions and rate-limiting.
329
+
330
+ ```python
331
+ import time
332
+
333
+ scrapers_state = db.dict("scraper_state")
334
+
335
+ last_refresh = scrapers_state.get("last_sitemap_refresh", 0)
336
+ if time.time() - last_refresh > 3600: # Only refresh once per hour
337
+ try:
338
+ # Try to get a lock to refresh the shared sitemap, but don't wait long
339
+ with db.lock("refresh_sitemap", timeout=1):
340
+ # We got the lock. Check if it's time to refresh.
341
+ print(f"PID {os.getpid()} is refreshing the sitemap...")
342
+ scrapers_state["sitemap"] = ["/page1", "/page2"] # Your fetch_sitemap()
343
+ scrapers_state["last_sitemap_refresh"] = time.time()
344
+
345
+ except TimeoutError:
346
+ # Another process is already refreshing, so we can skip
347
+ print(f"PID {os.getpid()} letting other process handle refresh.")
348
+
349
+ # All processes can now safely use the shared sitemap
350
+ sitemap = scrapers_state.get("sitemap")
351
+ # ... proceed with scraping ...
352
+ ```
353
+
319
354
  ## Type-Safe Data Models
320
355
 
321
356
  For enhanced data integrity and a better developer experience, BeaverDB supports type-safe operations for all modalities. By associating a model with these data structures, you get automatic serialization and deserialization, complete with autocompletion in your editor.
@@ -323,7 +358,6 @@ For enhanced data integrity and a better developer experience, BeaverDB supports
323
358
  This feature is designed to be flexible and works seamlessly with two kinds of models:
324
359
 
325
360
  - **Pydantic Models**: If you're already using Pydantic, your `BaseModel` classes will work out of the box.
326
-
327
361
  - **Lightweight `beaver.Model`**: For a zero-dependency solution, you can inherit from the built-in `beaver.Model` class, which is a standard Python class with serialization methods automatically included.
328
362
 
329
363
 
@@ -368,10 +402,11 @@ For more in-depth examples, check out the scripts in the `examples/` directory:
368
402
  - [`graph.py`](examples/graph.py): Shows how to create relationships between documents and perform multi-hop graph traversals.
369
403
  - [`kvstore.py`](examples/kvstore.py): A comprehensive demo of the namespaced dictionary feature.
370
404
  - [`list.py`](examples/list.py): Shows the full capabilities of the persistent list, including slicing and in-place updates.
405
+ - [`locks.py`](examples/lock_test.py): Demonstrates how to use the inter-process lock to create critical sections.
371
406
  - [`logs.py`](examples/logs.py): A short example showing how to build a realtime dashboard with the logging feature.
372
407
  - [`pqueue.py`](examples/pqueue.py): A practical example of using the persistent priority queue for task management.
373
408
  - [`producer_consumer.py`](examples/producer_consumer.py): A demonstration of the distributed task queue system in a multi-process environment.
374
- - [`publisher.py`](examples/publisher.p) and [`subscriber.py`](examples/subscriber.py): A pair of examples demonstrating inter-process message passing with the publish/subscribe system.
409
+ - [`publisher.py`](examples/publisher.py) and [`subscriber.py`](examples/subscriber.py): A pair of examples demonstrating inter-process message passing with the publish/subscribe system.
375
410
  - [`pubsub.py`](examples/pubsub.py): A demonstration of the synchronous, thread-safe publish/subscribe system in a single process.
376
411
  - [`rerank.py`](examples/rerank.py): Shows how to combine results from vector and text search for more refined results.
377
412
  - [`stress_vectors.py`](examples/stress_vectors.py): A stress test for the vector search functionality.
@@ -385,14 +420,13 @@ For more in-depth examples, check out the scripts in the `examples/` directory:
385
420
 
386
421
  These are some of the features and improvements planned for future releases:
387
422
 
388
- - **Async API**: Extend the async support with on-demand wrappers for all features besides channels.
389
- - **Type-Safe Models**: Enhance built-in `Model` to handle recursive and embedded types.
390
- - **Drop-in REST Client**: Implement a `BeaverClient` class that acts as a drop-in replacement for `BeaverDB` but instead of a local database file, it works against a REST API server.
423
+ - **[Issue #2](https://github.com/syalia-srl/beaver/issues/2) Comprehensive async wrappers**: Extend the async support with on-demand wrappers for all data structures, not just channels.
424
+ - **[Issue #9](https://github.com/syalia-srl/beaver/issues/2) Type-safe wrappers based on Pydantic-compatible models**: Enhance the built-in `Model` to handle recursive and embedded types and provide Pydantic compatibility.
425
+ - **[Issue #6](https://github.com/syalia-srl/beaver/issues/2) Drop-in replacement for Beaver REST server client**: Implement a `BeaverClient` class that acts as a drop-in replacement for `BeaverDB` but works against the REST API server.
426
+ - **[Issue #7](https://github.com/syalia-srl/beaver/issues/2) Replace `faiss` with simpler, linear `numpy` vectorial search**: Investigate removing the heavy `faiss` dependency in favor of a pure `numpy` implementation to improve installation simplicity, accepting a trade-off in search performance for O(1) installation.
391
427
 
392
- Check out the [roadmap](roadmap.md) for a detailed list of upcoming features and design ideas.
393
428
 
394
429
  If you think of something that would make `beaver` more useful for your use case, please open an issue and/or submit a pull request.
395
-
396
430
  ## License
397
431
 
398
432
  This project is licensed under the MIT License.
@@ -2,4 +2,4 @@ from .core import BeaverDB
2
2
  from .types import Model
3
3
  from .collections import Document, WalkDirection
4
4
 
5
- __version__ = "0.18.6"
5
+ __version__ = "0.19.2"
@@ -122,5 +122,16 @@ class BlobManager[M]:
122
122
  yield row["key"]
123
123
  cursor.close()
124
124
 
125
+ def __len__(self) -> int:
126
+ """Returns the number of blobs in the store."""
127
+ cursor = self._db.connection.cursor()
128
+ cursor.execute(
129
+ "SELECT COUNT(*) FROM beaver_blobs WHERE store_name = ?",
130
+ (self._name,)
131
+ )
132
+ count = cursor.fetchone()[0]
133
+ cursor.close()
134
+ return count
135
+
125
136
  def __repr__(self) -> str:
126
137
  return f"BlobManager(name='{self._name}')"
@@ -9,6 +9,7 @@ from .channels import ChannelManager
9
9
  from .collections import CollectionManager, Document
10
10
  from .dicts import DictManager
11
11
  from .lists import ListManager
12
+ from .locks import LockManager
12
13
  from .logs import LogManager
13
14
  from .queues import QueueManager
14
15
 
@@ -99,6 +100,35 @@ class BeaverDB:
99
100
  self._create_pubsub_table()
100
101
  self._create_trigrams_table()
101
102
  self._create_versions_table()
103
+ self._create_locks_table()
104
+
105
+ def _create_locks_table(self): # <-- Add this new method
106
+ """Creates the table for managing inter-process lock waiters."""
107
+ self.connection.execute(
108
+ """
109
+ CREATE TABLE IF NOT EXISTS beaver_lock_waiters (
110
+ lock_name TEXT NOT NULL,
111
+ waiter_id TEXT NOT NULL,
112
+ requested_at REAL NOT NULL,
113
+ expires_at REAL NOT NULL,
114
+ PRIMARY KEY (lock_name, requested_at)
115
+ )
116
+ """
117
+ )
118
+ # Index for fast cleanup of expired locks
119
+ self.connection.execute(
120
+ """
121
+ CREATE INDEX IF NOT EXISTS idx_lock_expires
122
+ ON beaver_lock_waiters (lock_name, expires_at)
123
+ """
124
+ )
125
+ # Index for fast deletion by the lock holder
126
+ self.connection.execute(
127
+ """
128
+ CREATE INDEX IF NOT EXISTS idx_lock_waiter_id
129
+ ON beaver_lock_waiters (lock_name, waiter_id)
130
+ """
131
+ )
102
132
 
103
133
  def _create_logs_table(self):
104
134
  """Creates the table for time-indexed logs."""
@@ -421,3 +451,24 @@ class BeaverDB:
421
451
  raise TypeError("The model parameter must be a JsonSerializable class.")
422
452
 
423
453
  return LogManager(name, self, self._db_path, model)
454
+
455
+ def lock(
456
+ self,
457
+ name: str,
458
+ timeout: float | None = None,
459
+ lock_ttl: float = 60.0,
460
+ poll_interval: float = 0.1,
461
+ ) -> LockManager:
462
+ """
463
+ Returns an inter-process lock manager for a given lock name.
464
+
465
+ Args:
466
+ name: The unique name of the lock (e.g., "run_compaction").
467
+ timeout: Max seconds to wait to acquire the lock.
468
+ If None, it will wait forever.
469
+ lock_ttl: Max seconds the lock can be held. If the process crashes,
470
+ the lock will auto-expire after this time.
471
+ poll_interval: Seconds to wait between polls. Shorter intervals
472
+ are more responsive but create more DB I/O.
473
+ """
474
+ return LockManager(self, name, timeout, lock_ttl, poll_interval)
@@ -0,0 +1,173 @@
1
+ import random
2
+ import time
3
+ import os
4
+ import uuid
5
+ from typing import Optional
6
+ from .types import IDatabase
7
+
8
+
9
+ class LockManager:
10
+ """
11
+ An inter-process, deadlock-proof, and fair (FIFO) lock built on SQLite.
12
+
13
+ This class provides a context manager (`with` statement) to ensure that
14
+ only one process (among many) can enter a critical section of code at a
15
+ time.
16
+
17
+ It is "fair" because it uses a FIFO queue (based on insertion time).
18
+ It is "deadlock-proof" because locks have a Time-To-Live (TTL); if a
19
+ process crashes, its lock will eventually expire and be cleaned up.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ db: IDatabase,
25
+ name: str,
26
+ timeout: Optional[float] = None,
27
+ lock_ttl: float = 60.0,
28
+ poll_interval: float = 0.1,
29
+ ):
30
+ """
31
+ Initializes the lock manager.
32
+
33
+ Args:
34
+ db: The BeaverDB instance.
35
+ name: The unique name of the lock (e.g., "run_compaction").
36
+ timeout: Max seconds to wait to acquire the lock. If None,
37
+ it will wait forever.
38
+ lock_ttl: Max seconds the lock can be held. If the process crashes,
39
+ the lock will auto-expire after this time.
40
+ poll_interval: Seconds to wait between polls.
41
+ """
42
+ if not isinstance(name, str) or not name:
43
+ raise ValueError("Lock name must be a non-empty string.")
44
+ if lock_ttl <= 0:
45
+ raise ValueError("lock_ttl must be positive.")
46
+ if poll_interval <= 0:
47
+ raise ValueError("poll_interval must be positive.")
48
+
49
+ self._db = db
50
+ self._lock_name = name
51
+ self._timeout = timeout
52
+ self._lock_ttl = lock_ttl
53
+ self._poll_interval = poll_interval
54
+ # A unique ID for this specific lock instance across all processes
55
+ self._waiter_id = f"pid:{os.getpid()}:id:{uuid.uuid4()}"
56
+ self._acquired = False # State to track if this instance holds the lock
57
+
58
+ def acquire(self) -> "LockManager":
59
+ """
60
+ Blocks until the lock is acquired or the timeout expires.
61
+
62
+ Raises:
63
+ TimeoutError: If the lock cannot be acquired within the specified timeout.
64
+ """
65
+ if self._acquired:
66
+ # This instance already holds the lock
67
+ return self
68
+
69
+ start_time = time.time()
70
+ requested_at = time.time()
71
+ expires_at = requested_at + self._lock_ttl
72
+
73
+ conn = self._db.connection
74
+
75
+ try:
76
+ # 1. Add self to the FIFO queue (atomic)
77
+ with conn:
78
+ conn.execute(
79
+ """
80
+ INSERT INTO beaver_lock_waiters (lock_name, waiter_id, requested_at, expires_at)
81
+ VALUES (?, ?, ?, ?)
82
+ """,
83
+ (self._lock_name, self._waiter_id, requested_at, expires_at),
84
+ )
85
+
86
+ # 2. Start polling loop
87
+ while True:
88
+ with conn:
89
+ # 3. Clean up expired locks from crashed processes
90
+ now = time.time()
91
+ conn.execute(
92
+ "DELETE FROM beaver_lock_waiters WHERE lock_name = ? AND expires_at < ?",
93
+ (self._lock_name, now),
94
+ )
95
+
96
+ # 4. Check who is at the front of the queue
97
+ cursor = conn.cursor()
98
+ cursor.execute(
99
+ """
100
+ SELECT waiter_id FROM beaver_lock_waiters
101
+ WHERE lock_name = ?
102
+ ORDER BY requested_at ASC
103
+ LIMIT 1
104
+ """,
105
+ (self._lock_name,),
106
+ )
107
+ result = cursor.fetchone()
108
+ cursor.close()
109
+
110
+ if result and result["waiter_id"] == self._waiter_id:
111
+ # We are at the front. We own the lock.
112
+ self._acquired = True
113
+ return self
114
+
115
+ # 5. Check for timeout
116
+ if self._timeout is not None:
117
+ if (time.time() - start_time) > self._timeout:
118
+ # We timed out. Remove ourselves from the queue and raise.
119
+ self._release_from_queue()
120
+ raise TimeoutError(
121
+ f"Failed to acquire lock '{self._lock_name}' within {self._timeout}s."
122
+ )
123
+
124
+ # 6. Wait politely before polling again
125
+ # Add +/- 10% jitter to the poll interval to avoid thundering herd
126
+ jitter = self._poll_interval * 0.1
127
+ sleep_time = random.uniform(
128
+ self._poll_interval - jitter, self._poll_interval + jitter
129
+ )
130
+ time.sleep(sleep_time)
131
+
132
+ except Exception:
133
+ # If anything goes wrong, try to clean up our waiter entry
134
+ self._release_from_queue()
135
+ raise
136
+
137
+ def _release_from_queue(self):
138
+ """
139
+ Atomically removes this instance's entry from the waiter queue.
140
+ This is a best-effort, fire-and-forget operation.
141
+ """
142
+ try:
143
+ with self._db.connection:
144
+ self._db.connection.execute(
145
+ "DELETE FROM beaver_lock_waiters WHERE lock_name = ? AND waiter_id = ?",
146
+ (self._lock_name, self._waiter_id),
147
+ )
148
+ except Exception:
149
+ # Don't raise errors during release/cleanup
150
+ pass
151
+
152
+ def release(self):
153
+ """
154
+ Releases the lock, allowing the next process in the queue to acquire it.
155
+ This is safe to call multiple times.
156
+ """
157
+ if not self._acquired:
158
+ # We don't hold the lock, so nothing to do.
159
+ return
160
+
161
+ self._release_from_queue()
162
+ self._acquired = False
163
+
164
+ def __enter__(self) -> "LockManager":
165
+ """Acquires the lock when entering a 'with' statement."""
166
+ return self.acquire()
167
+
168
+ def __exit__(self, exc_type, exc_val, exc_tb):
169
+ """Releases the lock when exiting a 'with' statement."""
170
+ self.release()
171
+
172
+ def __repr__(self) -> str:
173
+ return f"LockManager(name='{self._lock_name}', acquired={self._acquired})"