beaver-db 0.16.5__tar.gz → 0.16.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of beaver-db might be problematic. Click here for more details.

Files changed (48) hide show
  1. beaver_db-0.16.7/.gitignore +12 -0
  2. beaver_db-0.16.7/.python-version +1 -0
  3. {beaver_db-0.16.5 → beaver_db-0.16.7}/PKG-INFO +18 -9
  4. {beaver_db-0.16.5 → beaver_db-0.16.7}/README.md +13 -3
  5. {beaver_db-0.16.5 → beaver_db-0.16.7}/beaver/channels.py +10 -0
  6. {beaver_db-0.16.5 → beaver_db-0.16.7}/beaver/queues.py +17 -2
  7. beaver_db-0.16.7/design.md +118 -0
  8. beaver_db-0.16.7/examples/async_pubsub.py +48 -0
  9. beaver_db-0.16.7/examples/blobs.py +51 -0
  10. beaver_db-0.16.7/examples/cache.py +64 -0
  11. beaver_db-0.16.7/examples/fts.py +92 -0
  12. beaver_db-0.16.7/examples/fuzzy.py +69 -0
  13. beaver_db-0.16.7/examples/general_test.py +184 -0
  14. beaver_db-0.16.7/examples/graph.py +72 -0
  15. beaver_db-0.16.7/examples/kvstore.py +52 -0
  16. beaver_db-0.16.7/examples/list.py +61 -0
  17. beaver_db-0.16.7/examples/logs.py +105 -0
  18. beaver_db-0.16.7/examples/pqueue.py +62 -0
  19. beaver_db-0.16.7/examples/producer_consumer.py +89 -0
  20. beaver_db-0.16.7/examples/publisher.py +43 -0
  21. beaver_db-0.16.7/examples/pubsub.py +84 -0
  22. beaver_db-0.16.7/examples/rerank.py +66 -0
  23. beaver_db-0.16.7/examples/stress_vectors.py +106 -0
  24. beaver_db-0.16.7/examples/subscriber.py +33 -0
  25. beaver_db-0.16.7/examples/textual_chat.css +48 -0
  26. beaver_db-0.16.7/examples/textual_chat.py +217 -0
  27. beaver_db-0.16.7/examples/type_hints.py +35 -0
  28. beaver_db-0.16.7/examples/vector.py +50 -0
  29. beaver_db-0.16.7/makefile +15 -0
  30. {beaver_db-0.16.5 → beaver_db-0.16.7}/pyproject.toml +5 -1
  31. beaver_db-0.16.7/roadmap.md +105 -0
  32. beaver_db-0.16.7/uv.lock +238 -0
  33. beaver_db-0.16.5/beaver_db.egg-info/PKG-INFO +0 -334
  34. beaver_db-0.16.5/beaver_db.egg-info/SOURCES.txt +0 -19
  35. beaver_db-0.16.5/beaver_db.egg-info/dependency_links.txt +0 -1
  36. beaver_db-0.16.5/beaver_db.egg-info/requires.txt +0 -3
  37. beaver_db-0.16.5/beaver_db.egg-info/top_level.txt +0 -1
  38. beaver_db-0.16.5/setup.cfg +0 -4
  39. {beaver_db-0.16.5 → beaver_db-0.16.7}/LICENSE +0 -0
  40. {beaver_db-0.16.5 → beaver_db-0.16.7}/beaver/__init__.py +0 -0
  41. {beaver_db-0.16.5 → beaver_db-0.16.7}/beaver/blobs.py +0 -0
  42. {beaver_db-0.16.5 → beaver_db-0.16.7}/beaver/collections.py +0 -0
  43. {beaver_db-0.16.5 → beaver_db-0.16.7}/beaver/core.py +0 -0
  44. {beaver_db-0.16.5 → beaver_db-0.16.7}/beaver/dicts.py +0 -0
  45. {beaver_db-0.16.5 → beaver_db-0.16.7}/beaver/lists.py +0 -0
  46. {beaver_db-0.16.5 → beaver_db-0.16.7}/beaver/logs.py +0 -0
  47. {beaver_db-0.16.5 → beaver_db-0.16.7}/beaver/types.py +0 -0
  48. {beaver_db-0.16.5 → beaver_db-0.16.7}/beaver/vectors.py +0 -0
@@ -0,0 +1,12 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+ *.db*
12
+ .env
@@ -0,0 +1 @@
1
+ 3.13
@@ -1,18 +1,17 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: beaver-db
3
- Version: 0.16.5
3
+ Version: 0.16.7
4
4
  Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
5
- Classifier: Programming Language :: Python :: 3.13
5
+ License-File: LICENSE
6
6
  Classifier: License :: OSI Approved :: MIT License
7
7
  Classifier: Operating System :: OS Independent
8
+ Classifier: Programming Language :: Python :: 3.13
8
9
  Classifier: Topic :: Database
9
10
  Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
10
11
  Requires-Python: >=3.13
11
- Description-Content-Type: text/markdown
12
- License-File: LICENSE
13
12
  Provides-Extra: faiss
14
- Requires-Dist: faiss-cpu>=1.12.0; extra == "faiss"
15
- Dynamic: license-file
13
+ Requires-Dist: faiss-cpu>=1.12.0; extra == 'faiss'
14
+ Description-Content-Type: text/markdown
16
15
 
17
16
  # beaver 🦫
18
17
 
@@ -27,15 +26,17 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
27
26
 
28
27
  `beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
29
28
 
29
+ > If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor) for an equally minimalistic approach to task orchestration.
30
+
30
31
  ## Design Philosophy
31
32
 
32
33
  `beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
33
34
 
34
- - **Minimalistic**: Uses only Python's standard libraries (`sqlite3`) and `numpy`/`faiss-cpu`.
35
+ - **Minimalistic**: The core library has zero external dependencies. Vector search capabilities, which require `numpy` and `faiss-cpu`, are available as an optional feature.
35
36
  - **Schemaless**: Flexible data storage without rigid schemas across all modalities.
36
37
  - **Synchronous, Multi-Process, and Thread-Safe**: Designed for simplicity and safety in multi-threaded and multi-process environments.
37
38
  - **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
38
- - **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications. Vector search is accelerated with a high-performance, persistent `faiss` index.
39
+ - **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications. Vector search is an optional feature accelerated with a high-performance, persistent `faiss` index.
39
40
  - **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
40
41
 
41
42
  ## Core Features
@@ -46,7 +47,7 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
46
47
  - **Persistent Priority Queue**: A high-performance, persistent priority queue perfect for task orchestration across multiple processes. Also with optional async support.
47
48
  - **Time-Indexed Log for Monitoring**: A specialized data structure for structured, time-series logs. Query historical data by time range or create a live, aggregated view of the most recent events for real-time dashboards.
48
49
  - **Simple Blob Storage**: A dictionary-like interface for storing medium-sized binary files (like PDFs or images) directly in the database, ensuring transactional integrity with your other data.
49
- - **High-Performance Vector Storage & Search**: Store vector embeddings and perform fast, crash-safe approximate nearest neighbor searches using a `faiss`-based hybrid index.
50
+ - **High-Performance Vector Storage & Search (Optional)**: Store vector embeddings and perform fast approximate nearest neighbor searches using a `faiss`-based hybrid index.
50
51
  - **Full-Text and Fuzzy Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine, enhanced with optional fuzzy search for typo-tolerant matching.
51
52
  - **Knowledge Graph**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
52
53
  - **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
@@ -71,10 +72,18 @@ This hybrid approach allows BeaverDB to provide a vector search experience that
71
72
 
72
73
  ## Installation
73
74
 
75
+ Install the core, dependency-free library:
76
+
74
77
  ```bash
75
78
  pip install beaver-db
76
79
  ```
77
80
 
81
+ If you want vector search capabilities, install the `faiss` extra:
82
+
83
+ ```bash
84
+ pip install "beaver-db[faiss]"
85
+ ```
86
+
78
87
  ## Quickstart
79
88
 
80
89
  Get up and running in 30 seconds. This example showcases a dictionary, a list, and full-text search in a single script.
@@ -11,15 +11,17 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
11
11
 
12
12
  `beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
13
13
 
14
+ > If you like beaver's minimalist, no-bullshit philosophy, check out [castor](https://github.com/apiad/castor) for an equally minimalistic approach to task orchestration.
15
+
14
16
  ## Design Philosophy
15
17
 
16
18
  `beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
17
19
 
18
- - **Minimalistic**: Uses only Python's standard libraries (`sqlite3`) and `numpy`/`faiss-cpu`.
20
+ - **Minimalistic**: The core library has zero external dependencies. Vector search capabilities, which require `numpy` and `faiss-cpu`, are available as an optional feature.
19
21
  - **Schemaless**: Flexible data storage without rigid schemas across all modalities.
20
22
  - **Synchronous, Multi-Process, and Thread-Safe**: Designed for simplicity and safety in multi-threaded and multi-process environments.
21
23
  - **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
22
- - **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications. Vector search is accelerated with a high-performance, persistent `faiss` index.
24
+ - **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications. Vector search is an optional feature accelerated with a high-performance, persistent `faiss` index.
23
25
  - **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
24
26
 
25
27
  ## Core Features
@@ -30,7 +32,7 @@ A fast, single-file, multi-modal database for Python, built with the standard `s
30
32
  - **Persistent Priority Queue**: A high-performance, persistent priority queue perfect for task orchestration across multiple processes. Also with optional async support.
31
33
  - **Time-Indexed Log for Monitoring**: A specialized data structure for structured, time-series logs. Query historical data by time range or create a live, aggregated view of the most recent events for real-time dashboards.
32
34
  - **Simple Blob Storage**: A dictionary-like interface for storing medium-sized binary files (like PDFs or images) directly in the database, ensuring transactional integrity with your other data.
33
- - **High-Performance Vector Storage & Search**: Store vector embeddings and perform fast, crash-safe approximate nearest neighbor searches using a `faiss`-based hybrid index.
35
+ - **High-Performance Vector Storage & Search (Optional)**: Store vector embeddings and perform fast approximate nearest neighbor searches using a `faiss`-based hybrid index.
34
36
  - **Full-Text and Fuzzy Search**: Automatically index and search through document metadata using SQLite's powerful FTS5 engine, enhanced with optional fuzzy search for typo-tolerant matching.
35
37
  - **Knowledge Graph**: Create relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
36
38
  - **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
@@ -55,10 +57,18 @@ This hybrid approach allows BeaverDB to provide a vector search experience that
55
57
 
56
58
  ## Installation
57
59
 
60
+ Install the core, dependency-free library:
61
+
58
62
  ```bash
59
63
  pip install beaver-db
60
64
  ```
61
65
 
66
+ If you want vector search capabilities, install the `faiss` extra:
67
+
68
+ ```bash
69
+ pip install "beaver-db[faiss]"
70
+ ```
71
+
62
72
  ## Quickstart
63
73
 
64
74
  Get up and running in 30 seconds. This example showcases a dictionary, a list, and full-text search in a single script.
@@ -190,6 +190,16 @@ class ChannelManager[T]:
190
190
 
191
191
  self._listeners.clear()
192
192
 
193
+ def prune(self):
194
+ """
195
+ Cleans the log history for the channel effectively
196
+ deleting all previous logs.
197
+
198
+ Useful for reducing the database once logs are not needed.
199
+ """
200
+ with self._conn:
201
+ self._conn.execute("DELETE FROM beaver_pubsub_log WHERE channel_name = ?", (self._name,))
202
+
193
203
  def _polling_loop(self):
194
204
  """
195
205
  The main loop for the background thread.
@@ -25,6 +25,12 @@ class AsyncQueueManager[T]:
25
25
  """Asynchronously adds an item to the queue with a specific priority."""
26
26
  await asyncio.to_thread(self._queue.put, data, priority)
27
27
 
28
+ async def peek(self) -> QueueItem[T] | None:
29
+ """
30
+ Asynchronously returns the first item without removing it, if any, otherwise returns None.
31
+ """
32
+ return await asyncio.to_thread(self._queue.peek)
33
+
28
34
  @overload
29
35
  async def get(self, block: Literal[True] = True, timeout: float | None = None) -> QueueItem[T]: ...
30
36
  @overload
@@ -77,7 +83,7 @@ class QueueManager[T]:
77
83
  (self._name, priority, time.time(), self._serialize(data)),
78
84
  )
79
85
 
80
- def _get_item_atomically(self) -> QueueItem[T] | None:
86
+ def _get_item_atomically(self, pop:bool=True) -> QueueItem[T] | None:
81
87
  """
82
88
  Performs a single, atomic attempt to retrieve and remove the
83
89
  highest-priority item from the queue. Returns None if the queue is empty.
@@ -100,12 +106,21 @@ class QueueManager[T]:
100
106
  return None
101
107
 
102
108
  rowid, priority, timestamp, data = result
103
- cursor.execute("DELETE FROM beaver_priority_queues WHERE rowid = ?", (rowid,))
109
+
110
+ if pop:
111
+ cursor.execute("DELETE FROM beaver_priority_queues WHERE rowid = ?", (rowid,))
104
112
 
105
113
  return QueueItem(
106
114
  priority=priority, timestamp=timestamp, data=self._deserialize(data)
107
115
  )
108
116
 
117
+ def peek(self) -> QueueItem[T] | None:
118
+ """
119
+ Retrieves the first item of the queue.
120
+ If the queue is empy, returns None.
121
+ """
122
+ return self._get_item_atomically(pop=False)
123
+
109
124
  @overload
110
125
  def get(self, block: Literal[True] = True, timeout: float | None = None) -> QueueItem[T]: ...
111
126
  @overload
@@ -0,0 +1,118 @@
1
+ # BeaverDB Design Document
2
+
3
+ Version: 1.1
4
+ Status: In Progress
5
+ Last Updated: September 18, 2025
6
+
7
+ ## 1. Introduction & Vision
8
+
9
+ `beaver-db` is a local-first, embedded, multi-modal database for Python. Its primary motivation is to provide a simple, single-file solution for modern applications that need to handle complex data types like vectors, documents, and graphs without the overhead of a database server.
10
+
11
+ The vision for `beaver-db` is to be the go-to "good enough" database for prototypes, desktop utilities, and small-scale applications. It empowers developers to start quickly with a simple API but provides sufficient power and performance so that they do not need to immediately upgrade to a more complex database system as their application's features grow.
12
+
13
+ ## 2. Guiding Principles
14
+
15
+ These principleembeddings guide all development and design decisions for beaver-db.
16
+
17
+ * **Local-First & Embedded:** The database will always be a single SQLite file. This is the non-negotiable source of truth. Temporary files for journaling or syncing are acceptable, but the core architecture is serverless. Any feature that requires a client-server model is out of scope.
18
+ * **Standard SQLite Compatibility:** The .db file generated by beaver-db must always be a valid SQLite file that can be opened and queried by any standard SQLite tool. This ensures data portability and interoperability.
19
+ * **Minimal & Cross-Platform Dependencies:** New dependencies beyond numpy and scipy will only be considered if they provide a monumental performance improvement and are fully cross-platform, with a guarantee of easy installation for all users. The core library should strive to use Python's standard library wherever possible.
20
+ * **Simplicity and Pythonic API:** The library must present a simple, intuitive, and "Pythonic" interface. We will always prefer simple function calls with clear parameters over custom Domain Specific Languages (DSLs) or expression parsing. The user should not have to learn a new query language.
21
+ * **Developer Experience & Minimal API Surface:** The primary user experience goal is to provide a clean and minimal public API.
22
+ * **User-facing Classes:** The user should only ever need to instantiate BeaverDB and Document directly.
23
+ * **Fluent Interface:** All other functionalities (like list or collection operations) are exposed through methods on the BeaverDB object (e.g., `db.list("my_list")`, `db.collection("my_docs")`). These methods return internal wrapper objects (`ListWrapper`, `CollectionWrapper`) that provide a rich, fluent interface, preventing the need for the user to learn and manage a large number of classes.
24
+ * **API Design:** Public functions will have short, descriptive names and well-documented parameters, adhering to Python's conventions.
25
+ * **Synchronousembedding Core with Async Potential:** The core library is built on a synchronous foundation, reflecting the synchronous nature of the underlying `sqlite3` driver. This ensures thread safety and simplicity. An async-compatible API may be introduced in the future, but it will likely be a wrapper around the synchronous core (e.g., using a thread pool).
26
+ * **Convention over Configuration:** Features should work well out-of-the-box with sensible defaults. While configuration options can be provided, the user should not be required to tweak many parameters to get good performance and functionality.
27
+
28
+ ## 3. Architecture & Core Components
29
+
30
+ `beaver-db` is architected as a set of targeted wrappers around a standard SQLite database, with an in-memory component for performance-critical tasks like vector search.
31
+
32
+ ### 3.1. Core Engine (BeaverDB)
33
+
34
+ * **Connection Management:** The BeaverDB class manages a single connection to the SQLite database file.
35
+ * **Concurrency:** It enables `PRAGMA journal_mode=WAL;` (Write-Ahead Logging) by default. This provides a good level of concurrency between one writer and multiple readers, which is a common pattern for the target applications.
36
+ * **Schema Management:** All tables created and managed by the library are prefixed with `beaver_` (or `_beaver_` for internal helpers). This avoids conflicts with user-defined tables within the same database file. The schema is evolved by adding new `beaver_*` tables as new features are introduced.
37
+
38
+ ### 3.2. Data Models & Features
39
+
40
+ ### Key-Value Dictionaries (DictWrapper)
41
+
42
+ * **Implementation**: A single table (`beaver_dicts`) stores key-value pairs partitioned by a `dict_name` (TEXT). The primary key is a composite of `(dict_name, key)`.
43
+ * **Design**: The `db.dict("namespace")` method returns a `DictWrapper` that provides a complete and standard Pythonic dictionary-like interface. This includes subscripting (`__getitem__`, `__setitem__`), explicit `get()`/`set()` methods, and iterators (`keys()`, `values()`, `items()`). This feature is ideal for managing structured configurations or namespaced key-value data while adhering to the principle of a Simple and Pythonic API.
44
+
45
+ #### Lists (ListWrapper)
46
+
47
+ * **Implementation:** A table (`beaver_lists`) storing list_name, item_order (REAL), and item_value (TEXT).
48
+ * **Design:** The item_order is a floating-point number. This allows for O(1) insertions in the middle of the list by calculating the midpoint between two existing order values. This avoids re-indexing all subsequent items. The ListWrapper provides a rich, Pythonic API (`_len_`, `_getitem_`, `push`, `pop`, etc.).
49
+
50
+ #### Pub/Sub System
51
+
52
+ * **Implementation:** A log table (`beaver_pubsub_log`) that stores messages with a timestamp, channel name, and payload.
53
+ * **Design:** The current SubWrapper is a synchronous iterator that polls the table for new messages since the last seen timestamp. This design is simple and robust. A more performant, event-driven mechanism could be explored in the future, but only if it does not violate the principles of simplicity and minimal dependencies.
54
+
55
+ #### Collections (CollectionWrapper)
56
+
57
+ This is the most complex component, supporting documents, vectors, text, and graphs.
58
+
59
+ * **Document Storage:** Documents are stored in the beaver_collections table. Each document has a collection, item_id, optional item_vector (BLOB), and metadata (TEXT, as JSON). The Document class is a flexible container with no enforced schema.
60
+ * **Vector Search (ANN):**
61
+ * **Indexing:** Vector embeddings are stored as raw bytes (BLOBs) in the beaver_collections table.
62
+ * **Search Algorithm:** For performance, an in-memory k-d tree (`scipy.spatial.cKDTree`) is used for Approximate Nearest Neighbor (ANN) search.
63
+ * **Stale Index Handling:** The database maintains a version number for each collection in beaver_collection_versions. This version is incremented on every index or drop operation. The CollectionWrapper caches the version of its in-memory index and automatically triggers a `refresh()` if it detects its version is older than the database version.
64
+ * **Design Trade-offs:** This in-memory approach is extremely fast for datasets that fit in RAM. The library will not have hard-coded scalability limits, but performance testing will establish practical boundaries which will be published in the documentation.
65
+ * **Full-Text Search (FTS):**
66
+ * **Implementation:** Uses a virtual table (`beaver_fts_index`) powered by SQLite's FTS5 extension.
67
+ * **Indexing:** When a Document is indexed, its metadata is flattened into key-value pairs (e.g., author.name becomes author_name). All string values are automatically inserted into the FTS index.
68
+ * **Querying:** The `match()` method provides a simple interface for searching. It supports FTS5 syntax (like "OR", "AND") passed directly in the query string. The goal is to provide powerful search out-of-the-box without requiring complex configuration.
69
+ * **Graph Engine:**
70
+ * **Implementation:** Relationships are stored as directed edges in the beaver_edges table, linking a source_item_id to a target_item_id with a label.
71
+ * **Design:** The API is purely functional and Pythonic. It does not use a custom graph query language.
72
+ * `connect()`: Creates a directed edge.
73
+ * `neighbors()`: Retrieves immediate (1-hop) neighbors.
74
+ * `walk()`: Uses a recursive Common Table Expression (CTE) in SQL to perform efficient multi-hop graph traversals (BFS). This is powerful yet keeps the implementation clean and contained within SQL.
75
+
76
+ ### 3.3. Reliability and Data Handling
77
+
78
+ * **Atomicity and Transactions:** Correctness is paramount. Any public method that performs multiple database modifications (e.g., collection.index, which writes to the documents, FTS, and version tables) **must** be wrapped in a single, atomic database transaction. If any step of the operation fails, the entire transaction will be rolled back, ensuring the database is never left in an inconsistent state.
79
+ * **Error Handling:** The library will use standard Python exceptions whenever possible (e.g., TypeError, ValueError, sqlite3.Error). Custom exceptions will be avoided to ensure the API feels native and predictable to Python developers.
80
+ * **Data Serialization:**
81
+ * The default serialization format is JSON.
82
+ * The library will provide built-in, standardized converters for common Python types not native to JSON, such as `datetime` (to ISO 8601 strings) and `bytes` (to a standard encoding like base64).
83
+ * For custom user-defined objects, it is the user's responsibility to serialize them into a JSON-compatible format (e.g., a string or a dictionary) before storing them.
84
+
85
+ ## 4. Roadmap & Future Development
86
+
87
+ ### 4.1. Immediate Priorities
88
+
89
+ The primary focus for the near future is on stability and usability.
90
+
91
+ 1. **Comprehensive Unit Testing:** Increase test coverage to ensure all features are robust and reliable.
92
+ 2. **Elaborate Examples:** Create more examples that demonstrate how to combine features (e.g., a hybrid search that uses FTS, graph traversal, and vector search together).
93
+ 3. **Performance Benchmarking:** Develop a standardized suite of performance tests to track regressions and identify optimization opportunities. The results will be used to document the practical scalability limits of the library for various use cases.
94
+
95
+ ### 4.2. Long-Term Vision
96
+
97
+ The library is approaching feature-completeness for its core domain. The long-term vision is to make it a stable, trusted, and well-documented tool for its niche.
98
+
99
+ * **Stability:** The API will soon be stabilized, with a commitment to backward compatibility as defined in the versioning policy.
100
+ * **New Modalities:** In the distant future, other data modalities could be considered if they fit the embedded, single-file philosophy.
101
+
102
+ ### 4.3. Explicitly Out of Scope
103
+
104
+ To maintain focus and simplicity, the following features will **not** be implemented:
105
+
106
+ * Client-server architecture.
107
+ * Replication or distributed operation.
108
+ * Multi-file database formats.
109
+ * Any feature that makes the database file incompatible with standard SQLite tools.
110
+ * Custom query languages or DSLs.
111
+
112
+ ### 4.4. Versioning and Backward Compatibility
113
+
114
+ The project will adhere to **Semantic Versioning (Major.Minor.Patch)**.
115
+
116
+ * **Patch Releases (x.y.Z):** For bug fixes and non-breaking changes. No API or schema changes are permitted.
117
+ * **Minor Releases (x.Y.z):** For adding new, backward-compatible features. Schema additions (e.g., new `beaver_*` tables) are allowed, but schema modifications that break older code are not. Code written for version x.z will be able to open and read a database file from version x.y where z > y.
118
+ * **Major Releases (X.y.z):** For introducing breaking changes to the API or the database schema. There is no guarantee of backward compatibility between major versions, and a migration path will not be provided automatically.
@@ -0,0 +1,48 @@
1
+ import asyncio
2
+ from beaver import BeaverDB
3
+
4
+ async def listener_task(name: str, db: BeaverDB):
5
+ """An async task that listens for messages on a channel."""
6
+ print(f"[{name}] Starting and subscribing to 'async_events'.")
7
+ async_channel = db.channel("async_events").as_async()
8
+
9
+ async with async_channel.subscribe() as listener:
10
+ async for message in listener.listen():
11
+ print(f"[{name}] Received -> {message}")
12
+
13
+ async def publisher_task(db: BeaverDB):
14
+ """An async task that publishes several messages."""
15
+ print("[Publisher] Starting.")
16
+ async_channel = db.channel("async_events").as_async()
17
+
18
+ await asyncio.sleep(1)
19
+ print("[Publisher] Publishing message 1: User Login")
20
+ await async_channel.publish({"event": "user_login", "user": "alice"})
21
+
22
+ await asyncio.sleep(0.5)
23
+ print("[Publisher] Publishing message 2: System Alert")
24
+ await async_channel.publish({"event": "alert", "level": "high"})
25
+
26
+ print("[Publisher] Finished publishing.")
27
+
28
+ async def main():
29
+ """Sets up and runs the concurrent async publisher and listeners."""
30
+ db = BeaverDB("async_demo.db")
31
+
32
+ # Create and run the listener and publisher tasks concurrently
33
+ listener_a = asyncio.create_task(listener_task("Listener-A", db))
34
+ listener_b = asyncio.create_task(listener_task("Listener-B", db))
35
+ publisher = asyncio.create_task(publisher_task(db))
36
+
37
+ await publisher
38
+ await asyncio.sleep(2) # Wait for messages to be processed
39
+
40
+ # In a real app, you might cancel the listeners or have a shutdown event
41
+ listener_a.cancel()
42
+ listener_b.cancel()
43
+ db.close()
44
+
45
+ if __name__ == "__main__":
46
+ print("--- BeaverDB Async Pub/Sub Demo ---")
47
+ asyncio.run(main())
48
+ print("\nDemo finished successfully.")
@@ -0,0 +1,51 @@
1
+ from beaver import BeaverDB
2
+
3
+
4
+ def blob_store_demo():
5
+ """Demonstrates the basic functionality of the blob store."""
6
+ print("--- Running Blob Store Demo ---")
7
+ db = BeaverDB("demo.db")
8
+
9
+ # 1. Get a handle to a named blob store
10
+ attachments = db.blobs("email_attachments")
11
+
12
+ # 2. Create some sample binary data
13
+ file_content = "This is the content of a virtual text file."
14
+ file_bytes = file_content.encode("utf-8")
15
+ file_key = "emails/user123/attachment_01.txt"
16
+
17
+ # 3. Store the blob with some metadata
18
+ print(f"Storing blob with key: '{file_key}'")
19
+ attachments.put(
20
+ key=file_key,
21
+ data=file_bytes,
22
+ metadata={"mimetype": "text/plain", "sender": "alice@example.com"},
23
+ )
24
+
25
+ # 4. Check for the blob's existence
26
+ if file_key in attachments:
27
+ print(f"Verified that key '{file_key}' exists in the store.")
28
+
29
+ # 5. Retrieve the blob
30
+ blob = attachments.get(file_key)
31
+ if blob:
32
+ print("\n--- Retrieved Blob ---")
33
+ print(f" Key: {blob.key}")
34
+ print(f" Metadata: {blob.metadata}")
35
+ print(f" Data (decoded): '{blob.data.decode('utf-8')}'")
36
+ assert blob.data == file_bytes
37
+
38
+ # 6. Delete the blob
39
+ print("\nDeleting blob...")
40
+ attachments.delete(file_key)
41
+
42
+ # 7. Verify deletion
43
+ if file_key not in attachments:
44
+ print(f"Verified that key '{file_key}' has been deleted.")
45
+
46
+ db.close()
47
+ print("\n--- Demo Finished Successfully ---")
48
+
49
+
50
+ if __name__ == "__main__":
51
+ blob_store_demo()
@@ -0,0 +1,64 @@
1
+ import time
2
+ from beaver import BeaverDB
3
+
4
+ def expensive_api_call(prompt: str):
5
+ """A mock function that simulates a slow API call."""
6
+ print(f"--- Making expensive API call for: '{prompt}' ---")
7
+ time.sleep(2) # Simulate network latency
8
+ if prompt == "capital of Ecuador":
9
+ return "Quito"
10
+ return "Data not found"
11
+
12
+ def cache_demo():
13
+ """Demonstrates using a namespaced dictionary as a persistent cache with TTL."""
14
+ print("--- Running Cache Demo ---")
15
+ db = BeaverDB("demo.db")
16
+
17
+ # Use a dictionary as a cache. Items will expire after 10 seconds.
18
+ api_cache = db.dict("api_cache")
19
+
20
+ prompt = "capital of Ecuador"
21
+
22
+ # --- 1. First Call (Cache Miss) ---
23
+ print("\nAttempt 1: Key is not in cache.")
24
+ response = api_cache.get(prompt)
25
+ if response is None:
26
+ print("Cache miss.")
27
+ response = expensive_api_call(prompt)
28
+ # Set the value in the cache with a 10-second TTL
29
+ api_cache.set(prompt, response, ttl_seconds=10)
30
+
31
+ print(f"Response: {response}")
32
+
33
+ # --- 2. Second Call (Cache Hit) ---
34
+ print("\nAttempt 2: Making the same request within 5 seconds.")
35
+ time.sleep(5)
36
+ response = api_cache.get(prompt)
37
+ if response is None:
38
+ print("Cache miss.")
39
+ response = expensive_api_call(prompt)
40
+ api_cache.set(prompt, response, ttl_seconds=10)
41
+ else:
42
+ print("Cache hit!")
43
+
44
+ print(f"Response: {response}")
45
+
46
+ # --- 3. Third Call (Cache Expired) ---
47
+ print("\nAttempt 3: Waiting for 12 seconds for the cache to expire.")
48
+ time.sleep(12)
49
+ response = api_cache.get(prompt)
50
+ if response is None:
51
+ print("Cache miss (key expired).")
52
+ response = expensive_api_call(prompt)
53
+ api_cache.set(prompt, response, ttl_seconds=10)
54
+ else:
55
+ print("Cache hit!")
56
+
57
+ print(f"Response: {response}")
58
+
59
+ db.close()
60
+ print("\n--- Demo Finished ---")
61
+
62
+
63
+ if __name__ == "__main__":
64
+ cache_demo()
@@ -0,0 +1,92 @@
1
+ from beaver import BeaverDB, Document
2
+
3
+ def full_text_search_demo():
4
+ """
5
+ Demonstrates the full-text search (FTS) functionality in BeaverDB.
6
+ """
7
+ print("--- Running Full-Text Search Demo ---")
8
+
9
+ # We use a separate database file for this demo
10
+ db = BeaverDB("demo.db")
11
+ articles = db.collection("tech_articles")
12
+
13
+ # --- 1. Create and Index Documents with Nested Metadata ---
14
+
15
+ # By calling articles.index(), the text fields will be automatically indexed for FTS.
16
+ print("Indexing documents...")
17
+
18
+ doc1 = Document(
19
+ id="py-001",
20
+ content="Python is a versatile programming language. Its use in data science is notable.",
21
+ author={
22
+ "name": "Jane Doe",
23
+ "email": "jane.doe@example.com"
24
+ },
25
+ category="Programming Languages"
26
+ )
27
+ articles.index(doc1)
28
+
29
+ doc2 = Document(
30
+ id="sql-002",
31
+ content="SQLite is a powerful embedded database. It is ideal for local applications.",
32
+ author={
33
+ "name": "John Smith",
34
+ "handle": "@john_smith_sql"
35
+ },
36
+ category="Databases"
37
+ )
38
+ articles.index(doc2)
39
+
40
+ doc3 = Document(
41
+ id="py-dev-003",
42
+ content="Web application development with Python is made easier with frameworks like Django.",
43
+ author={
44
+ "name": "Jane Doe", # Same author as doc1
45
+ "email": "jane.doe@example.com"
46
+ },
47
+ category="Web Development"
48
+ )
49
+ articles.index(doc3)
50
+
51
+ print("Documents indexed.\n")
52
+
53
+ # --- 2. Perform a General Full-Text Search ---
54
+
55
+ # We search for the word "data" across ALL text fields in all documents.
56
+ # It should find both doc1 ("data science") and doc2 ("Databases").
57
+ print("--- General Search for 'data' ---")
58
+ general_results = articles.match("data", top_k=5)
59
+
60
+ for doc, rank in general_results:
61
+ print(f" - Document ID: {doc.id}, Relevance (Rank): {rank:.4f}")
62
+ print(f" Content: '{doc.content[:40]}...'")
63
+ print(f" Author: {doc.author['name']}")
64
+
65
+ # --- 3. Perform a Targeted Full-Text Search on a Specific Field ---
66
+
67
+ # Now, we search for "Jane" but ONLY in the flattened 'author__name' field.
68
+ # This should return only the documents by Jane Doe (doc1 and doc3).
69
+ print("\n--- Specific Search for 'Jane' in the 'author__name' field ---")
70
+ specific_results = articles.match("Jane", on="author.name", top_k=5)
71
+
72
+ for doc, rank in specific_results:
73
+ print(f" - Document ID: {doc.id}, Relevance (Rank): {rank:.4f}")
74
+ print(f" Author Found: {doc.author['name']}")
75
+ print(f" Category: {doc.category}")
76
+
77
+ # --- 4. Use FTS5 Operators ---
78
+
79
+ # FTS5 allows for operators like OR. We search for documents containing "SQLite" OR "Django".
80
+ print("\n--- Search with 'OR' Operator: 'SQLite OR Django' ---")
81
+ or_results = articles.match("SQLite OR Django", top_k=5)
82
+
83
+ for doc, rank in or_results:
84
+ print(f" - Document ID: {doc.id}, Relevance (Rank): {rank:.4f}")
85
+ print(f" Content: '{doc.content}'")
86
+
87
+ db.close()
88
+ print("\n--- Demo Finished ---")
89
+
90
+
91
+ if __name__ == "__main__":
92
+ full_text_search_demo()
@@ -0,0 +1,69 @@
1
+ from beaver import BeaverDB, Document
2
+
3
+
4
+ def fuzzy_search_demo():
5
+ """Demonstrates the unified FTS and fuzzy search functionality."""
6
+ print("--- Running Fuzzy Search Demo ---")
7
+ db = BeaverDB("fuzzy_demo.db")
8
+ articles = db.collection("tech_articles")
9
+
10
+ # Clean up from previous runs
11
+ for doc in articles:
12
+ articles.drop(doc)
13
+
14
+ # --- 1. Index Documents with Fuzzy Indexing Enabled ---
15
+ print("Indexing documents with fuzzy=True...")
16
+ docs_to_index = [
17
+ Document(
18
+ id="py-001",
19
+ content="Python is a versatile programming language.",
20
+ author={"name": "Jane Doe", "handle": "janedoe_py"},
21
+ ),
22
+ Document(
23
+ id="sql-002",
24
+ content="SQLite is a powerful embedded database.",
25
+ author={"name": "Jon Smith", "handle": "jonsmith_sql"},
26
+ ),
27
+ Document(
28
+ id="py-dev-003",
29
+ content="Web development with python is easier with frameworks.",
30
+ author={"name": "Jane Doe", "handle": "janedoe_py"},
31
+ ),
32
+ ]
33
+
34
+ for doc in docs_to_index:
35
+ # Index all string fields for FTS and also create a fuzzy index for them
36
+ articles.index(doc, fts=True, fuzzy=True)
37
+
38
+ # --- 2. Standard FTS Search (fuzziness=0) ---
39
+ print("\n--- Standard FTS for 'python' ---")
40
+ exact_results = articles.match("python", fuzziness=0)
41
+ for doc, rank in exact_results:
42
+ print(f" - Found ID: {doc.id} (Rank: {rank:.2f})")
43
+
44
+ # --- 3. Fuzzy Search on a Specific Field ---
45
+ # Search for "Jhn Smith" with a typo. It should find "Jon Smith".
46
+ print("\n--- Fuzzy Search for 'Jhn Smith' on 'author.name' ---")
47
+ fuzzy_results = articles.match(
48
+ "Jhn Smith",
49
+ on=["author.name"],
50
+ fuzziness=2
51
+ )
52
+ for doc, distance in fuzzy_results:
53
+ print(f" - Found: '{doc.author['name']}' (Distance: {distance})")
54
+ assert doc.id == "sql-002"
55
+
56
+ # --- 4. Fuzzy Search Across All Fields ---
57
+ # Search for "prgramming" with a typo. It should find the doc with "programming".
58
+ print("\n--- Fuzzy Search for 'prgramming' across all fields ---")
59
+ fuzzy_all_fields = articles.match("prgramming", fuzziness=2)
60
+ for doc, distance in fuzzy_all_fields:
61
+ print(f" - Found in content: '{doc.content}' (Distance: {distance})")
62
+ assert doc.id == "py-001"
63
+
64
+ db.close()
65
+ print("\n--- Demo Finished ---")
66
+
67
+
68
+ if __name__ == "__main__":
69
+ fuzzy_search_demo()