beaver-db 0.17.5__tar.gz → 0.24.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of beaver-db might be problematic. Click here for more details.

Files changed (117) hide show
  1. beaver_db-0.17.5/.gitignore → beaver_db-0.24.5/.dockerignore +1 -0
  2. beaver_db-0.24.5/.github/workflows/release.yaml +90 -0
  3. beaver_db-0.24.5/.github/workflows/tests.yaml +29 -0
  4. beaver_db-0.24.5/.gitignore +14 -0
  5. beaver_db-0.24.5/.vscode/settings.json +7 -0
  6. beaver_db-0.24.5/PKG-INFO +478 -0
  7. beaver_db-0.24.5/README.md +458 -0
  8. beaver_db-0.24.5/beaver/__init__.py +12 -0
  9. beaver_db-0.24.5/beaver/blobs.py +192 -0
  10. beaver_db-0.24.5/beaver/cache.py +146 -0
  11. {beaver_db-0.17.5 → beaver_db-0.24.5}/beaver/channels.py +3 -16
  12. beaver_db-0.24.5/beaver/cli/__init__.py +85 -0
  13. beaver_db-0.24.5/beaver/cli/blobs.py +188 -0
  14. beaver_db-0.24.5/beaver/cli/channels.py +150 -0
  15. beaver_db-0.24.5/beaver/cli/collections.py +427 -0
  16. beaver_db-0.24.5/beaver/cli/dicts.py +156 -0
  17. beaver_db-0.24.5/beaver/cli/lists.py +227 -0
  18. beaver_db-0.24.5/beaver/cli/locks.py +202 -0
  19. beaver_db-0.24.5/beaver/cli/logs.py +228 -0
  20. beaver_db-0.24.5/beaver/cli/queues.py +188 -0
  21. beaver_db-0.24.5/beaver/client.py +362 -0
  22. {beaver_db-0.17.5 → beaver_db-0.24.5}/beaver/collections.py +276 -160
  23. beaver_db-0.24.5/beaver/core.py +627 -0
  24. {beaver_db-0.17.5 → beaver_db-0.24.5}/beaver/dicts.py +116 -27
  25. beaver_db-0.24.5/beaver/lists.py +345 -0
  26. beaver_db-0.24.5/beaver/locks.py +244 -0
  27. {beaver_db-0.17.5 → beaver_db-0.24.5}/beaver/logs.py +92 -33
  28. beaver_db-0.24.5/beaver/manager.py +156 -0
  29. {beaver_db-0.17.5 → beaver_db-0.24.5}/beaver/queues.py +105 -23
  30. {beaver_db-0.17.5 → beaver_db-0.24.5}/beaver/server.py +144 -31
  31. {beaver_db-0.17.5 → beaver_db-0.24.5}/beaver/types.py +25 -5
  32. beaver_db-0.24.5/beaver/vectors.py +379 -0
  33. beaver_db-0.24.5/design.md +91 -0
  34. beaver_db-0.24.5/dockerfile +28 -0
  35. beaver_db-0.24.5/docs/.gitignore +2 -0
  36. beaver_db-0.24.5/docs/_quarto.yml +62 -0
  37. beaver_db-0.24.5/docs/cover.png +0 -0
  38. beaver_db-0.24.5/docs/dev-architecture.qmd +15 -0
  39. beaver_db-0.24.5/docs/dev-concurrency.qmd +15 -0
  40. beaver_db-0.24.5/docs/dev-contributing.qmd +9 -0
  41. beaver_db-0.24.5/docs/dev-search.qmd +11 -0
  42. beaver_db-0.24.5/docs/guide-collections.qmd +20 -0
  43. beaver_db-0.24.5/docs/guide-concurrency.qmd +10 -0
  44. beaver_db-0.24.5/docs/guide-deployment.qmd +14 -0
  45. beaver_db-0.24.5/docs/guide-dicts-blobs.qmd +12 -0
  46. beaver_db-0.24.5/docs/guide-lists-queues.qmd +12 -0
  47. beaver_db-0.24.5/docs/guide-realtime.qmd +12 -0
  48. beaver_db-0.24.5/docs/index.qmd +89 -0
  49. beaver_db-0.24.5/docs/logo.png +0 -0
  50. beaver_db-0.24.5/docs/quickstart.qmd +120 -0
  51. beaver_db-0.24.5/examples/locks.py +52 -0
  52. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/stress_vectors.py +3 -12
  53. beaver_db-0.24.5/issues/13-adopt-pydantic-deprecate-beavermodel-and-refactor-document-to-be-generic.md +84 -0
  54. beaver_db-0.24.5/issues/15-enhance-cli-with-admin-commands-shell-piping-and-interactivity.md +109 -0
  55. beaver_db-0.24.5/issues/16-add-clear-method-for-all-data-structures.md +82 -0
  56. beaver_db-0.24.5/issues/17-replace-atomic-operations-with-custom-beaver-lock.md +131 -0
  57. beaver_db-0.24.5/issues/18-enhanced-dump-and-load-for-etl.md +116 -0
  58. beaver_db-0.24.5/issues/19-add-comprehensive-unit-integration-and-concurrency-test-suite.md +144 -0
  59. beaver_db-0.24.5/issues/2-comprehensive-async-wrappers.md +53 -0
  60. beaver_db-0.24.5/issues/21-implement-dbcache-property-with-wal-invalidation-dummy-fallback-and-performance-stats.md +150 -0
  61. beaver_db-0.24.5/issues/6-drop-in-replacement-for-beaver-rest-server-client.md +118 -0
  62. beaver_db-0.24.5/issues/closed/1-refactor-vector-store-to-use-faiss.md +50 -0
  63. beaver_db-0.24.5/issues/closed/10-expose-dblock-functionality-on-all-high-level-data-managers.md +86 -0
  64. beaver_db-0.24.5/issues/closed/12-add-dump-method-for-json-export-to-all-data-managers.md +195 -0
  65. beaver_db-0.24.5/issues/closed/14-deprecate-fire-based-cli-and-build-a-feature-rich-typer-and-rich-cli.md +116 -0
  66. beaver_db-0.24.5/issues/closed/5-add-dblock-for-inter-process-synchronization.md +132 -0
  67. beaver_db-0.24.5/issues/closed/7-replace-faiss-with-simpler-linear-numpy-vectorial-search.md +140 -0
  68. beaver_db-0.24.5/issues/closed/8-first-class-synchronization-primitive.md +127 -0
  69. beaver_db-0.24.5/issues/closed/9-type-safe-wrappers-based-on-pydantic-compatible-models.md +63 -0
  70. beaver_db-0.24.5/logo.png +0 -0
  71. beaver_db-0.24.5/makefile +58 -0
  72. {beaver_db-0.17.5 → beaver_db-0.24.5}/pyproject.toml +14 -6
  73. beaver_db-0.24.5/pytest.ini +7 -0
  74. beaver_db-0.24.5/tests/conftest.py +53 -0
  75. beaver_db-0.24.5/tests/integration/test_cache.py +24 -0
  76. beaver_db-0.24.5/tests/integration/test_realtime.py +186 -0
  77. beaver_db-0.24.5/tests/unit/test_blob_manager.py +161 -0
  78. beaver_db-0.24.5/tests/unit/test_collection_manager.py +316 -0
  79. beaver_db-0.24.5/tests/unit/test_dict_manager.py +183 -0
  80. beaver_db-0.24.5/tests/unit/test_list_manager.py +242 -0
  81. beaver_db-0.24.5/tests/unit/test_log_manager.py +141 -0
  82. beaver_db-0.24.5/tests/unit/test_queue_manager.py +200 -0
  83. beaver_db-0.24.5/uv.lock +1009 -0
  84. beaver_db-0.17.5/PKG-INFO +0 -353
  85. beaver_db-0.17.5/README.md +0 -328
  86. beaver_db-0.17.5/beaver/__init__.py +0 -3
  87. beaver_db-0.17.5/beaver/blobs.py +0 -126
  88. beaver_db-0.17.5/beaver/cli.py +0 -61
  89. beaver_db-0.17.5/beaver/core.py +0 -401
  90. beaver_db-0.17.5/beaver/lists.py +0 -264
  91. beaver_db-0.17.5/beaver/vectors.py +0 -378
  92. beaver_db-0.17.5/design.md +0 -118
  93. beaver_db-0.17.5/makefile +0 -15
  94. beaver_db-0.17.5/roadmap.md +0 -105
  95. beaver_db-0.17.5/uv.lock +0 -881
  96. {beaver_db-0.17.5 → beaver_db-0.24.5}/.python-version +0 -0
  97. {beaver_db-0.17.5 → beaver_db-0.24.5}/LICENSE +0 -0
  98. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/async_pubsub.py +0 -0
  99. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/blobs.py +0 -0
  100. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/cache.py +0 -0
  101. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/fts.py +0 -0
  102. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/fuzzy.py +0 -0
  103. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/general_test.py +0 -0
  104. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/graph.py +0 -0
  105. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/kvstore.py +0 -0
  106. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/list.py +0 -0
  107. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/logs.py +0 -0
  108. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/pqueue.py +0 -0
  109. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/producer_consumer.py +0 -0
  110. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/publisher.py +0 -0
  111. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/pubsub.py +0 -0
  112. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/rerank.py +0 -0
  113. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/subscriber.py +0 -0
  114. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/textual_chat.css +0 -0
  115. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/textual_chat.py +0 -0
  116. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/type_hints.py +0 -0
  117. {beaver_db-0.17.5 → beaver_db-0.24.5}/examples/vector.py +0 -0
@@ -10,3 +10,4 @@ wheels/
10
10
  .venv
11
11
  *.db*
12
12
  .env
13
+ examples/
@@ -0,0 +1,90 @@
1
+ name: Release Pipeline
2
+
3
+ on:
4
+ release:
5
+ types: [created]
6
+
7
+ jobs:
8
+ test:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - name: Checkout repository
12
+ uses: actions/checkout@v4
13
+
14
+ - name: Set up Python
15
+ uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.13"
18
+
19
+ - name: Install uv
20
+ run: curl -LsSf https://astral.sh/uv/install.sh | sh
21
+
22
+ - name: Install dependencies
23
+ run: uv sync --all-extras --all-groups --no-editable
24
+
25
+ - name: Run tests with coverage
26
+ run: uv run pytest --cov=beaver --cov-report=xml --cov-report=term
27
+
28
+ - name: Upload coverage to Codecov
29
+ uses: codecov/codecov-action@v4
30
+ with:
31
+ files: ./coverage.xml
32
+ fail_ci_if_error: false
33
+
34
+ publish-pypi:
35
+ needs: test
36
+ runs-on: ubuntu-latest
37
+ steps:
38
+ - name: Checkout repository
39
+ uses: actions/checkout@v4
40
+
41
+ - name: Set up Python
42
+ uses: actions/setup-python@v5
43
+ with:
44
+ python-version: "3.13"
45
+
46
+ - name: Install uv
47
+ run: curl -LsSf https://astral.sh/uv/install.sh | sh
48
+
49
+ - name: Build package
50
+ run: uv build
51
+
52
+ - name: Publish to PyPI
53
+ run: uv publish --token ${{ secrets.PYPI_TOKEN }}
54
+
55
+ push-to-ghcr:
56
+ needs: publish-pypi
57
+ runs-on: ubuntu-latest
58
+ permissions:
59
+ contents: read
60
+ packages: write
61
+ steps:
62
+ - name: Checkout repository
63
+ uses: actions/checkout@v4
64
+
65
+ - name: Log in to GitHub Container Registry
66
+ uses: docker/login-action@v3
67
+ with:
68
+ registry: ghcr.io
69
+ username: ${{ github.actor }}
70
+ password: ${{ secrets.GITHUB_TOKEN }}
71
+
72
+ - name: Extract metadata (tags and labels)
73
+ id: meta
74
+ uses: docker/metadata-action@v5
75
+ with:
76
+ images: ghcr.io/${{ github.repository }}
77
+ tags: |
78
+ type=semver,pattern={{version}}
79
+ type=semver,pattern=latest
80
+
81
+ - name: Build and push Docker image
82
+ uses: docker/build-push-action@v5
83
+ with:
84
+ context: .
85
+ file: ./dockerfile
86
+ push: true
87
+ tags: ${{ steps.meta.outputs.tags }}
88
+ labels: ${{ steps.meta.outputs.labels }}
89
+ build-args: |
90
+ VERSION=${{ steps.meta.outputs.version }}
@@ -0,0 +1,29 @@
1
+ name: Run Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - name: Checkout repository
15
+ uses: actions/checkout@v4
16
+
17
+ - name: Set up Python
18
+ uses: actions/setup-python@v5
19
+ with:
20
+ python-version: "3.13"
21
+
22
+ - name: Install uv
23
+ run: curl -LsSf https://astral.sh/uv/install.sh | sh
24
+
25
+ - name: Install dependencies
26
+ run: uv sync --all-extras --all-groups --no-editable
27
+
28
+ - name: Run tests with coverage
29
+ run: uv run pytest -m "unit" --cov=beaver --cov-report=xml --cov-report=term
@@ -0,0 +1,14 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+ *.db*
12
+ .env
13
+ .issues-sync-state
14
+ .coverage
@@ -0,0 +1,7 @@
1
+ {
2
+ "python.testing.pytestArgs": [
3
+ "tests"
4
+ ],
5
+ "python.testing.unittestEnabled": false,
6
+ "python.testing.pytestEnabled": true
7
+ }
@@ -0,0 +1,478 @@
1
+ Metadata-Version: 2.4
2
+ Name: beaver-db
3
+ Version: 0.24.5
4
+ Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
5
+ License-File: LICENSE
6
+ Classifier: License :: OSI Approved :: MIT License
7
+ Classifier: Operating System :: OS Independent
8
+ Classifier: Programming Language :: Python :: 3.13
9
+ Classifier: Topic :: Database
10
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
11
+ Requires-Python: >=3.13
12
+ Requires-Dist: numpy>=2.3.4
13
+ Requires-Dist: rich>=14.2.0
14
+ Requires-Dist: typer>=0.20.0
15
+ Provides-Extra: full
16
+ Requires-Dist: fastapi[standard]>=0.118.0; extra == 'full'
17
+ Provides-Extra: remote
18
+ Requires-Dist: fastapi[standard]>=0.118.0; extra == 'remote'
19
+ Description-Content-Type: text/markdown
20
+
21
+ <div style="text-align: center;">
22
+ <img src="https://github.com/syalia-srl/beaver/blob/main/logo.png?raw=true" width="256px">
23
+ </div>
24
+
25
+ ---
26
+
27
+ <!-- Project badges -->
28
+ ![PyPI - Version](https://img.shields.io/pypi/v/beaver-db)
29
+ ![PyPi - Python Version](https://img.shields.io/pypi/pyversions/beaver-db)
30
+ ![Github - Open Issues](https://img.shields.io/github/issues-raw/apiad/beaver)
31
+ ![PyPi - Downloads (Monthly)](https://img.shields.io/pypi/dm/beaver-db)
32
+ ![Github - Commits](https://img.shields.io/github/commit-activity/m/apiad/beaver)
33
+
34
+ -----
35
+
36
+ > A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
37
+
38
+ `beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
39
+
40
+ > If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor "null") for an equally minimalistic approach to task orchestration.
41
+
42
+ ## Design Philosophy
43
+
44
+ `beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
45
+
46
+ - **Minimalistic**: The core library has minimal dependencies (`numpy`, `rich`, `typer`). The REST server and client are available as an optional feature.
47
+ - **Schemaless**: Flexible data storage without rigid schemas across all modalities.
48
+ - **Synchronous, Multi-Process, and Thread-Safe**: Designed for simplicity and safety in multi-threaded and multi-process environments.
49
+ - **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
50
+ - **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications. Vector search is a built-in feature accelerated by a multi-process-safe, in-memory `numpy` index.
51
+ - **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
52
+
53
+ ## Core Features
54
+
55
+ - **Sync/Async High-Efficiency Pub/Sub**: A powerful, thread and process-safe publish-subscribe system for real-time messaging with a fan-out architecture. Sync by default, but with an `as_async` wrapper for async applications.
56
+ - **Namespaced Key-Value Dictionaries**: A Pythonic, dictionary-like interface for storing any JSON-serializable object within separate namespaces with optional TTL for cache implementations.
57
+ - **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
58
+ - **Persistent Priority Queue**: A high-performance, persistent priority queue perfect for task orchestration across multiple processes. Also with optional async support.
59
+ - **Inter-Process Locking**: A robust, deadlock-proof, and fair (FIFO) distributed lock (`db.lock()`) to coordinate multiple processes and prevent race conditions.
60
+ - **Time-Indexed Log for Monitoring**: A specialized data structure for structured, time-series logs. Query historical data by time range or create a live, aggregated view of the most recent events for real-time dashboards.
61
+ - **Simple Blob Storage**: A dictionary-like interface for storing medium-sized binary files (like PDFs or images) directly in the database, ensuring transactional integrity with your other data.
62
+ - **High-Performance Vector Storage & Search**: Store vector embeddings and perform fast, multi-process-safe linear searches using an in-memory `numpy`-based index.
63
+ - **Full-Text and Fuzzy Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine, enhanced with optional fuzzy search for typo-tolerant matching.
64
+ - **Knowledge Graph**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
65
+ - **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
66
+ - **Built-in REST API Server (Optional)**: Instantly serve your database over a RESTful API with automatic OpenAPI documentation using FastAPI.
67
+ - **Full-Featured CLI Client**: Interact with your database directly from the command line for administrative tasks and data exploration.
68
+ - **Optional Type-Safety:** Although the database is schemaless, you can use a minimalistic typing system for automatic serialization and deserialization that is Pydantic-compatible out of the box.
69
+ - **Data Export & Backups:** Dump any dictionary, list, collection, queue, blob, or log structure to a portable JSON file with a single `.dump()` command.
70
+
71
+ ## How Beaver is Implemented
72
+
73
+ BeaverDB is architected as a set of targeted wrappers around a standard SQLite database. The core `BeaverDB` class manages a single connection to the SQLite file and initializes all the necessary tables for the various features.
74
+
75
+ When you call a method like `db.dict("my_dict")` or `db.collection("my_docs")`, you get back a specialized manager object (`DictManager`, `CollectionManager`, etc.) that provides a clean, Pythonic API for that specific data modality. These managers translate the simple method calls (e.g., `my_dict["key"] = "value"`) into the appropriate SQL queries, handling all the complexity of data serialization, indexing, and transaction management behind the scenes. This design provides a minimal and intuitive API surface while leveraging the power and reliability of SQLite.
76
+
77
+ The vector store in BeaverDB is designed for simplicity and multi-process safety, using an in-memory `numpy` index with a log-based synchronization mechanism. Here's a look at the core ideas behind its implementation:
78
+
79
+ - **In-Memory Delta-Index System**: Each process maintains its own in-memory `numpy` index. This index is split into two-tiers to balance fast writes with read efficiency:
80
+ - **Base Index (`N-matrix`):** A `numpy` array holding the compacted, main set of vectors. This is loaded from disk once.
81
+ - **Delta Index (`k-matrix`):** A small, in-memory `numpy` array holding all new vectors that have been added since the last compaction.
82
+ - **Tombstones:** A set of deleted vector IDs that are filtered out at search time.
83
+ - **Fast, Log-Based Sync**: All vector additions (`index()`) and deletions (`drop()`) are O(1) writes to a central SQLite log table (`_vector_change_log`). When another process performs a search, it first checks this log and performs a fast O(k) "delta-sync" to update its in-memory `k-matrix` and tombstones, rather than re-loading the entire N-matrix.
84
+ - **Automatic Compaction**: When the delta index (`k-matrix`) grows too large, a compaction process is triggered to rebuild the main `N-matrix` from the database and clear the log. This ensures search performance remains fast over time.
85
+
86
+ This delta-index approach allows BeaverDB to provide a vector search experience that has extremely fast O(1) writes and O(k) cross-process synchronization, sacrificing O(log N) search time for a simpler, dependency-free O(N+k) linear scan. This aligns with the "simplicity-first" philosophy of the library.
87
+
88
+ ## Installation
89
+
90
+ Install the core library:
91
+
92
+ ```bash
93
+ pip install beaver-db
94
+ ```
95
+
96
+ This includes `numpy` (for vector search) and `rich`/`typer` (for the CLI).
97
+
98
+ To include optional features, you can install them as extras:
99
+
100
+ ```bash
101
+ # For the REST API server and client
102
+ pip install "beaver-db[remote]"
103
+
104
+ # To install all optional features at once
105
+ pip install "beaver-db[full]"
106
+ ```
107
+
108
+ ### Running with Docker
109
+
110
+ For a fully embedded and lightweight solution, you can run the BeaverDB REST API server using Docker. This is the easiest way to get a self-hosted instance up and running.
111
+
112
+ ```bash
113
+ docker pull ghcr.io/syalia-srl/beaver:latest
114
+ docker run -p 8000:8000 -v $(pwd)/data:/app ghcr.io/syalia-srl/beaver
115
+ ```
116
+
117
+ This command will start the BeaverDB server, and your database file will be stored in the data directory on your host machine. You can access the API at [http://localhost:8000](http://localhost:8000).
118
+
119
+ ## Quickstart
120
+
121
+ Get up and running in 30 seconds. This example showcases a dictionary, a list, and full-text search in a single script.
122
+
123
+ ```python
124
+ from beaver import BeaverDB, Document
125
+
126
+ # 1. Initialize the database
127
+ db = BeaverDB("data.db")
128
+
129
+ # 2. Use a namespaced dictionary for app configuration
130
+ config = db.dict("app_config")
131
+ config["theme"] = "dark"
132
+ print(f"Theme set to: {config['theme']}")
133
+
134
+ # 3. Use a persistent list to manage a task queue
135
+ tasks = db.list("daily_tasks")
136
+ tasks.push("Write the project report")
137
+ tasks.push("Deploy the new feature")
138
+ print(f"First task is: {tasks[0]}")
139
+
140
+ # 4. Use a collection for document storage and search
141
+ articles = db.collection("articles")
142
+ doc = Document(
143
+ id="sqlite-001",
144
+ content="SQLite is a powerful embedded database ideal for local apps."
145
+ )
146
+ articles.index(doc)
147
+
148
+ # Perform a full-text search
149
+ results = articles.match(query="database")
150
+ top_doc, rank = results[0]
151
+ print(f"FTS Result: '{top_doc.content}'")
152
+
153
+ db.close()
154
+ ```
155
+
156
+ ## Built-in Server and CLI
157
+
158
+ Beaver comes with a built-in REST API server and a powerful, full-featured command-line client, allowing you to interact with your database without writing any code.
159
+
160
+ ### REST API Server
161
+
162
+ You can instantly expose all of your database's functionality over a RESTful API. This is perfect for building quick prototypes, microservices, or for interacting with your data from other languages.
163
+
164
+ **1. Start the server**
165
+
166
+ ```bash
167
+ # Start the server for your database file
168
+ beaver serve --database data.db --port 8000
169
+ ```
170
+
171
+ This starts a `FastAPI` server. You can now access the interactive API documentation at `http://127.0.0.1:8000/docs`.
172
+
173
+ **2. Interact with the API**
174
+
175
+ Here are a couple of examples using `curl`:
176
+
177
+ ```bash
178
+ # Set a value in the 'app_config' dictionary
179
+ curl -X PUT http://127.0.0.1:8000/dicts/app_config/api_key \
180
+ -H "Content-Type: application/json" \
181
+ -d '"your-secret-api-key"'
182
+
183
+ # Get the value back
184
+ curl http://127.0.0.1:8000/dicts/app_config/api_key
185
+ # Output: "your-secret-api-key"
186
+ ```
187
+
188
+ ### Full-Featured CLI Client
189
+
190
+ The CLI client allows you to call any BeaverDB method directly from your terminal. Built with `typer` and `rich`, it provides a user-friendly, task-oriented interface with beautiful output.
191
+
192
+ ```bash
193
+ # Get a value from a dictionary
194
+ beaver dict app_config get theme
195
+
196
+ # Set a value (JSON is automatically parsed)
197
+ beaver dict app_config set user '{"name": "Alice", "id": 123}'
198
+
199
+ # Push an item to a list
200
+ beaver list daily_tasks push "Review PRs"
201
+
202
+ # Watch a live, aggregated dashboard of a log
203
+ beaver log system_metrics watch
204
+
205
+ # Run a script protected by a distributed lock
206
+ beaver lock my-cron-job run bash -c 'run_daily_report.sh'
207
+ ```
208
+
209
+ ## Data Export for Backups
210
+
211
+ All data structures (`dict`, `list`, `collection`, `queue`, `log`, and `blobs`) support a `.dump()` method for easy backups and migration. You can either write the data directly to a JSON file or get it as a Python dictionary.
212
+
213
+ ```python
214
+ import json
215
+ from beaver import BeaverDB
216
+
217
+ db = BeaverDB("my_app.db")
218
+ config = db.dict("app_config")
219
+
220
+ # Add some data
221
+ config["theme"] = "dark"
222
+ config["user_id"] = 456
223
+
224
+ # Dump the dictionary's contents to a JSON file
225
+ with open("config_backup.json", "w") as f:
226
+ config.dump(f)
227
+
228
+ # 'config_backup.json' now contains:
229
+ # {
230
+ # "metadata": {
231
+ # "type": "Dict",
232
+ # "name": "app_config",
233
+ # "count": 2,
234
+ # "dump_date": "2025-11-02T09:05:10.123456Z"
235
+ # },
236
+ # "items": [
237
+ # {"key": "theme", "value": "dark"},
238
+ # {"key": "user_id", "value": 456}
239
+ # ]
240
+ # }
241
+
242
+ # You can also get the dump as a Python object
243
+ dump_data = config.dump()
244
+ ```
245
+
246
+ You can also use the CLI to dump data:
247
+
248
+ ```bash
249
+ beaver --database data.db collection my_documents dump > my_documents.json
250
+ ```
251
+
252
+ ## Things You Can Build with Beaver
253
+
254
+ Here are a few ideas to inspire your next project, showcasing how to combine Beaver's features to build powerful local applications.
255
+
256
+ ### 1. AI Agent Task Management
257
+
258
+ Use a **persistent priority queue** to manage tasks for an AI agent. This ensures the agent always works on the most important task first, even if the application restarts.
259
+
260
+ ```python
261
+ tasks = db.queue("agent_tasks")
262
+
263
+ # Tasks are added with a priority (lower is higher)
264
+ tasks.put({"action": "summarize_news"}, priority=10)
265
+ tasks.put({"action": "respond_to_user"}, priority=1)
266
+ tasks.put({"action": "run_backup"}, priority=20)
267
+
268
+ # The agent retrieves the highest-priority task
269
+ next_task = tasks.get() # -> Returns the "respond_to_user" task
270
+ print(f"Agent's next task: {next_task.data['action']}")
271
+ ```
272
+
273
+ ### 2. User Authentication and Profile Store
274
+
275
+ Use a **namespaced dictionary** to create a simple and secure user store. The key can be the username, and the value can be a dictionary containing the hashed password and other profile information.
276
+
277
+ ```python
278
+ users = db.dict("user_profiles")
279
+
280
+ # Create a new user
281
+ users["alice"] = {
282
+ "hashed_password": "...",
283
+ "email": "alice@example.com",
284
+ "permissions": ["read", "write"]
285
+ }
286
+
287
+ # Retrieve a user's profile
288
+ alice_profile = users.get("alice")
289
+ ```
290
+
291
+ ### 3. Chatbot Conversation History
292
+
293
+ A **persistent list** is perfect for storing the history of a conversation. Each time the user or the bot sends a message, just `push` it to the list. This maintains a chronological record of the entire dialogue.
294
+
295
+ ```python
296
+ chat_history = db.list("conversation_with_user_123")
297
+
298
+ chat_history.push({"role": "user", "content": "Hello, Beaver!"})
299
+ chat_history.push({"role": "assistant", "content": "Hello! How can I help you today?"})
300
+
301
+ # Retrieve the full conversation
302
+ for message in chat_history:
303
+ print(f"{message['role']}: {message['content']}")
304
+ ```
305
+
306
+ ### 4. Build a RAG (Retrieval-Augmented Generation) System
307
+
308
+ Combine **vector search** and **full-text search** to build a powerful RAG pipeline for your local documents. The vector search uses a multi-process-safe, in-memory `numpy` index that supports incremental additions without downtime.
309
+
310
+ ```python
311
+ # Get context for a user query like "fast python web frameworks"
312
+ vector_results = [doc for doc, _ in docs.search(vector=query_vector)]
313
+ text_results = [doc for doc, _ in docs.match(query="python web framework")]
314
+
315
+ # Combine and rerank for the best context
316
+ from beaver.collections import rerank
317
+ best_context = rerank(vector_results, text_results, weights=[0.6, 0.4])
318
+ ```
319
+
320
+ ### 5. Caching for Expensive API Calls
321
+
322
+ Leverage a **dictionary with a TTL (Time-To-Live)** to cache the results of slow network requests. This can dramatically speed up your application and reduce your reliance on external services.
323
+
324
+ ```python
325
+ api_cache = db.dict("external_api_cache")
326
+
327
+ # Check the cache first
328
+ response = api_cache.get("weather_new_york")
329
+ if response is None:
330
+ # If not in cache, make the real API call
331
+ response = make_slow_weather_api_call("New York")
332
+ # Cache the result for 1 hour
333
+ api_cache.set("weather_new_york", response, ttl_seconds=3600)
334
+ ```
335
+
336
+ ### 6. Real-time Event-Driven Systems
337
+
338
+ Use the **high-efficiency pub/sub system** to build applications where different components react to events in real-time. This is perfect for decoupled systems, real-time UIs, or monitoring services.
339
+
340
+ ```python
341
+ # In one process or thread (e.g., a monitoring service)
342
+ system_events = db.channel("system_events")
343
+ system_events.publish({"event": "user_login", "user_id": "alice"})
344
+
345
+ # In another process or thread (e.g., a UI updater or logger)
346
+ with db.channel("system_events").subscribe() as listener:
347
+ for message in listener.listen():
348
+ print(f"Event received: {message}")
349
+ # >> Event received: {'event': 'user_login', 'user_id': 'alice'}
350
+ ```
351
+
352
+ ### 7. Storing User-Uploaded Content
353
+
354
+ Use the simple blob store to save files like user avatars, attachments, or generated reports directly in the database. This keeps all your data in one portable file.
355
+
356
+ ```python
357
+ attachments = db.blobs("user_uploads")
358
+
359
+ # Store a user's avatar
360
+ with open("avatar.png", "rb") as f:
361
+ avatar_bytes = f.read()
362
+
363
+ attachments.put(
364
+ key="user_123_avatar.png",
365
+ data=avatar_bytes,
366
+ metadata={"mimetype": "image/png"}
367
+ )
368
+
369
+ # Retrieve it later
370
+ avatar = attachments.get("user_123_avatar.png")
371
+ ```
372
+
373
+ ### 8. Real-time Application Monitoring
374
+
375
+ Use the **time-indexed log** to monitor your application's health in real-time. The `live()` method provides a continuously updating, aggregated view of your log data, perfect for building simple dashboards directly in your terminal.
376
+
377
+ ```python
378
+ from datetime import timedelta
379
+ import statistics
380
+
381
+ logs = db.log("system_metrics")
382
+
383
+ def summarize(window):
384
+ values = [log.get("value", 0) for log in window]
385
+ return {"mean": statistics.mean(values), "count": len(values)}
386
+
387
+ live_summary = logs.live(
388
+ window=timedelta(seconds=10),
389
+ period=timedelta(seconds=1),
390
+ aggregator=summarize
391
+ )
392
+
393
+ for summary in live_summary:
394
+ print(f"Live Stats (10s window): Count={summary['count']}, Mean={summary['mean']:.2f}")
395
+ ```
396
+
397
+ ### 9. Coordinate Distributed Web Scrapers
398
+
399
+ Run multiple scraper processes in parallel and use `db.lock()` to coordinate them. You can ensure only one process refreshes a shared API token or sitemap, preventing race conditions and rate-limiting.
400
+
401
+ ```python
402
+ import time
403
+
404
+ scrapers_state = db.dict("scraper_state")
405
+
406
+ last_refresh = scrapers_state.get("last_sitemap_refresh", 0)
407
+ if time.time() - last_refresh > 3600: # Only refresh once per hour
408
+ try:
409
+ # Try to get a lock to refresh the shared sitemap, but don't wait long
410
+ with db.lock("refresh_sitemap", timeout=1):
411
+ # We got the lock. Check if it's time to refresh.
412
+ print(f"PID {os.getpid()} is refreshing the sitemap...")
413
+ scrapers_state["sitemap"] = ["/page1", "/page2"] # Your fetch_sitemap()
414
+ scrapers_state["last_sitemap_refresh"] = time.time()
415
+
416
+ except TimeoutError:
417
+ # Another process is already refreshing, so we can skip
418
+ print(f"PID {os.getpid()} letting other process handle refresh.")
419
+
420
+ # All processes can now safely use the shared sitemap
421
+ sitemap = scrapers_state.get("sitemap")
422
+ # ... proceed with scraping ...
423
+ ```
424
+
425
+ ## Type-Safe Data Models
426
+
427
+ For enhanced data integrity and a better developer experience, BeaverDB supports type-safe operations for all modalities. By associating a model with these data structures, you get automatic serialization and deserialization, complete with autocompletion in your editor.
428
+
429
+ This feature is designed to be flexible and works seamlessly with two kinds of models:
430
+
431
+ - **Pydantic Models**: If you're already using Pydantic, your `BaseModel` classes will work out of the box.
432
+ - **Lightweight `beaver.Model`**: For a zero-dependency solution, you can inherit from the built-in `beaver.Model` class, which is a standard Python class with serialization methods automatically included.
433
+
434
+ Here’s a quick example of how to use it:
435
+
436
+ ```python
437
+ from beaver import BeaverDB, Model
438
+
439
+ # Inherit from beaver.Model for a lightweight, dependency-free model
440
+ class User(Model):
441
+ name: str
442
+ email: str
443
+
444
+ db = BeaverDB("user_data.db")
445
+
446
+ # Associate the User model with a dictionary
447
+ users = db.dict("user_profiles", model=User)
448
+
449
+ # BeaverDB now handles serialization automatically
450
+ users["alice"] = User(name="Alice", email="alice@example.com")
451
+
452
+ # The retrieved object is a proper instance of the User class
453
+ retrieved_user = users["alice"]
454
+ # Your editor will provide autocompletion here
455
+ print(f"Retrieved: {retrieved_user.name}")
456
+ ```
457
+
458
+ In the same way you can have typed message payloads in `db.channel`, typed metadata in `db.blobs`, and custom document types in `db.collection`, as well as custom types in lists and queues.
459
+
460
+ Basically everywhere you can store or get some object in BeaverDB, you can use a typed version adding `model=MyClass` to the corresponding wrapper methond in `BeaverDB` and enjoy first-class type safety and inference.
461
+
462
+ ## Documentation
463
+
464
+ For a complete API reference, in-depth guides, and more examples, please visit the official documentation at:
465
+
466
+ [**https://syalia.com/beaver**](https://syalia.com/beaver)
467
+
468
+ Also, check the [examples](./examples) folder for a comprehensive list of working examples using `beaver`.
469
+
470
+ ## Roadmap
471
+
472
+ `beaver` is roughly feature-complete, but there are still some features and improvements planned for future releases, mostly directed to improving developer experience.
473
+
474
+ If you think of something that would make `beaver` more useful for your use case, please open an issue and/or submit a pull request.
475
+
476
+ ## License
477
+
478
+ This project is licensed under the MIT License.