beaver-db 2.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
beaver/sketches.py ADDED
@@ -0,0 +1,307 @@
1
+ import math
2
+ import hashlib
3
+ import struct
4
+ import asyncio
5
+ from typing import (
6
+ Any,
7
+ Iterator,
8
+ Optional,
9
+ Protocol,
10
+ runtime_checkable,
11
+ TYPE_CHECKING,
12
+ Self,
13
+ )
14
+
15
+ from pydantic import BaseModel
16
+
17
+ from .manager import AsyncBeaverBase, atomic, emits
18
+ from .locks import AsyncBeaverLock
19
+
20
+ if TYPE_CHECKING:
21
+ from .core import AsyncBeaverDB
22
+
23
+
24
+ def _calculate_hll_precision(error_rate: float) -> int:
25
+ """Derives the HyperLogLog precision 'p' from a desired error rate."""
26
+ if not (0 < error_rate < 1):
27
+ raise ValueError("Error rate must be between 0 and 1")
28
+ p = 2 * math.log2(1.04 / error_rate)
29
+ return max(4, min(int(math.ceil(p)), 18))
30
+
31
+
32
+ def _calculate_bloom_params(capacity: int, error_rate: float) -> tuple[int, int]:
33
+ """Calculates optimal Bloom Filter size (bits) and hash count (k)."""
34
+ if capacity <= 0:
35
+ raise ValueError("Capacity must be positive")
36
+ if not (0 < error_rate < 1):
37
+ raise ValueError("Error rate must be between 0 and 1")
38
+
39
+ m_bits = -(capacity * math.log(error_rate)) / (math.log(2) ** 2)
40
+ k = (m_bits / capacity) * math.log(2)
41
+ return int(math.ceil(m_bits)), int(math.ceil(k))
42
+
43
+
44
+ class ApproximateSet:
45
+ """
46
+ A unified probabilistic data structure combining HyperLogLog and Bloom Filter.
47
+ Pure Python implementation (CPU-bound).
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ capacity: int = 1_000_000,
53
+ error_rate: float = 0.01,
54
+ data: bytes | None = None,
55
+ ):
56
+ self.capacity = capacity
57
+ self.error_rate = error_rate
58
+
59
+ # 1. Configure HyperLogLog
60
+ self.p = _calculate_hll_precision(error_rate)
61
+ self.m = 1 << self.p
62
+ self.alpha = self._get_alpha(self.m)
63
+
64
+ # 2. Configure Bloom Filter
65
+ self.bloom_bits, self.bloom_k = _calculate_bloom_params(capacity, error_rate)
66
+ self.bloom_bytes_len = (self.bloom_bits + 7) // 8
67
+
68
+ # 3. Initialize or Load Storage
69
+ expected_size = self.m + self.bloom_bytes_len
70
+
71
+ if data:
72
+ if len(data) != expected_size:
73
+ raise ValueError(
74
+ f"Corrupted sketch data. Expected {expected_size} bytes, got {len(data)}"
75
+ )
76
+ self._data = bytearray(data)
77
+ else:
78
+ self._data = bytearray(expected_size)
79
+
80
+ def _get_alpha(self, m: int) -> float:
81
+ if m == 16:
82
+ return 0.673
83
+ elif m == 32:
84
+ return 0.697
85
+ elif m == 64:
86
+ return 0.709
87
+ return 0.7213 / (1 + 1.079 / m)
88
+
89
+ def add(self, item_bytes: bytes):
90
+ self._add_hll(item_bytes)
91
+ self._add_bloom(item_bytes)
92
+
93
+ def _add_hll(self, item_bytes: bytes):
94
+ h = hashlib.sha1(item_bytes).digest()
95
+ x = struct.unpack("<Q", h[:8])[0]
96
+ j = x & (self.m - 1)
97
+ w = x >> self.p
98
+ rank = 1
99
+ while w & 1 == 0 and rank <= (64 - self.p):
100
+ rank += 1
101
+ w >>= 1
102
+ if rank > self._data[j]:
103
+ self._data[j] = rank
104
+
105
+ def _add_bloom(self, item_bytes: bytes):
106
+ h = hashlib.md5(item_bytes).digest()
107
+ h1, h2 = struct.unpack("<QQ", h)
108
+ offset = self.m
109
+ for i in range(self.bloom_k):
110
+ bit_idx = (h1 + i * h2) % self.bloom_bits
111
+ byte_idx = offset + (bit_idx // 8)
112
+ mask = 1 << (bit_idx % 8)
113
+ self._data[byte_idx] |= mask
114
+
115
+ def __contains__(self, item_bytes: bytes) -> bool:
116
+ h = hashlib.md5(item_bytes).digest()
117
+ h1, h2 = struct.unpack("<QQ", h)
118
+ offset = self.m
119
+ for i in range(self.bloom_k):
120
+ bit_idx = (h1 + i * h2) % self.bloom_bits
121
+ byte_idx = offset + (bit_idx // 8)
122
+ mask = 1 << (bit_idx % 8)
123
+ if not (self._data[byte_idx] & mask):
124
+ return False
125
+ return True
126
+
127
+ def __len__(self) -> int:
128
+ zeros = 0
129
+ sum_inv = 0.0
130
+ for i in range(self.m):
131
+ val = self._data[i]
132
+ if val == 0:
133
+ zeros += 1
134
+ sum_inv += 2.0 ** (-val)
135
+ E = self.alpha * (self.m**2) / sum_inv
136
+ if E <= 2.5 * self.m:
137
+ if zeros > 0:
138
+ E = self.m * math.log(self.m / zeros)
139
+ return int(E)
140
+
141
+ def to_bytes(self) -> bytes:
142
+ return bytes(self._data)
143
+
144
+
145
+ class AsyncSketchBatch[T: BaseModel]:
146
+ """Async Context manager for batched updates to an ApproximateSet."""
147
+
148
+ def __init__(self, manager: "AsyncBeaverSketch[T]"):
149
+ self._manager = manager
150
+ self._pending_items: list[Any] = []
151
+
152
+ def add(self, item: Any):
153
+ """Adds an item to the pending batch buffer."""
154
+ self._pending_items.append(item)
155
+
156
+ async def __aenter__(self):
157
+ if self._manager._sketch is None:
158
+ await self._manager._ensure_sketch()
159
+
160
+ return self
161
+
162
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
163
+ if not self._pending_items:
164
+ return
165
+
166
+ # Atomic Bulk Update: Lock -> Reload -> Modify -> Save
167
+ async with self._manager._internal_lock:
168
+ async with self._manager._db.transaction():
169
+ # 1. Reload latest state from DB
170
+ await self._manager._reload()
171
+
172
+ # 2. Update in-memory (CPU bound, could offload to thread if huge)
173
+ for item in self._pending_items:
174
+ serialized_item = self._manager._serialize(item)
175
+ item_bytes = serialized_item.encode("utf-8")
176
+ self._manager._sketch.add(item_bytes)
177
+
178
+ # 3. Save back to DB
179
+ await self._manager._save()
180
+
181
+ self._pending_items.clear()
182
+
183
+
184
+ @runtime_checkable
185
+ class IBeaverSketch[T: BaseModel](Protocol):
186
+ """Protocol exposed to the user via BeaverBridge."""
187
+
188
+ def add(self, item: T) -> None: ...
189
+ def contains(self, item: T) -> bool: ...
190
+ def count(self) -> int: ...
191
+ def clear(self) -> None: ...
192
+ def batched(self) -> AsyncSketchBatch[T]: ...
193
+ def __len__(self) -> int: ...
194
+ def __contains__(self, item: T) -> bool: ...
195
+
196
+
197
+ class AsyncBeaverSketch[T: BaseModel](AsyncBeaverBase[T]):
198
+ """
199
+ Manages a persistent ApproximateSet (Bloom + HLL).
200
+ """
201
+
202
+ def __init__(
203
+ self,
204
+ name: str,
205
+ db: "AsyncBeaverDB",
206
+ capacity: int = 1_000_000,
207
+ error_rate: float = 0.01,
208
+ model: type[T] | None = None,
209
+ ):
210
+ super().__init__(name, db, model=model)
211
+ self._capacity = capacity
212
+ self._error_rate = error_rate
213
+ self._sketch: ApproximateSet | None = None
214
+
215
+ async def _ensure_sketch(self):
216
+ """Loads the sketch from DB or creates it if it doesn't exist."""
217
+ cursor = await self.connection.execute(
218
+ "SELECT capacity, error_rate, data FROM __beaver_sketches__ WHERE name = ?",
219
+ (self._name,),
220
+ )
221
+ row = await cursor.fetchone()
222
+
223
+ if row:
224
+ db_cap, db_err, db_data = row["capacity"], row["error_rate"], row["data"]
225
+ # Allow small float tolerance
226
+ if db_cap != self._capacity or abs(db_err - self._error_rate) > 1e-9:
227
+ raise ValueError(
228
+ f"Sketch '{self._name}' exists with capacity={db_cap}, error={db_err}. "
229
+ f"Cannot load with requested capacity={self._capacity}, error={self._error_rate}."
230
+ )
231
+ self._sketch = ApproximateSet(
232
+ self._capacity, self._error_rate, data=db_data
233
+ )
234
+ else:
235
+ self._sketch = ApproximateSet(self._capacity, self._error_rate)
236
+ await self._save()
237
+
238
+ async def _reload(self):
239
+ """Reloads the binary data from the database."""
240
+ cursor = await self.connection.execute(
241
+ "SELECT data FROM __beaver_sketches__ WHERE name = ?", (self._name,)
242
+ )
243
+ row = await cursor.fetchone()
244
+ if row:
245
+ self._sketch._data = bytearray(row["data"])
246
+
247
+ async def _save(self):
248
+ """Persists the current in-memory sketch to the database."""
249
+ if self._sketch:
250
+ await self.connection.execute(
251
+ """
252
+ INSERT OR REPLACE INTO __beaver_sketches__ (name, type, capacity, error_rate, data)
253
+ VALUES (?, 'approx_set', ?, ?, ?)
254
+ """,
255
+ (self._name, self._capacity, self._error_rate, self._sketch.to_bytes()),
256
+ )
257
+
258
+ @atomic
259
+ async def add(self, item: T):
260
+ """
261
+ Adds a single item to the sketch atomically.
262
+ """
263
+ if self._sketch is None:
264
+ await self._ensure_sketch()
265
+
266
+ serialized_item = self._serialize(item)
267
+ item_bytes = serialized_item.encode("utf-8")
268
+
269
+ await self._reload()
270
+ self._sketch.add(item_bytes)
271
+ await self._save()
272
+
273
+ async def contains(self, item: T) -> bool:
274
+ """
275
+ Checks membership using the local cached state.
276
+ Note: Does not strictly reload from DB for performance reasons.
277
+ """
278
+ if self._sketch is None:
279
+ await self._ensure_sketch()
280
+
281
+ serialized_item = self._serialize(item)
282
+ item_bytes = serialized_item.encode("utf-8")
283
+ return item_bytes in self._sketch
284
+
285
+ async def count(self) -> int:
286
+ """Returns approximate cardinality using local cached state."""
287
+ if self._sketch is None:
288
+ await self._ensure_sketch()
289
+
290
+ return len(self._sketch)
291
+
292
+ def batched(self) -> AsyncSketchBatch[T]:
293
+ """Returns an async context manager for batched updates."""
294
+ # Initialize lazily if needed
295
+ if self._sketch is None:
296
+ # We can't await here in a sync method, so we rely on _init or first usage
297
+ pass
298
+
299
+ return AsyncSketchBatch(self)
300
+
301
+ async def clear(self):
302
+ """Resets the sketch to empty."""
303
+ if self._sketch is None:
304
+ await self._ensure_sketch()
305
+
306
+ self._sketch = ApproximateSet(self._capacity, self._error_rate)
307
+ await self._save()
beaver/types.py ADDED
@@ -0,0 +1,32 @@
1
+ import json
2
+ import sqlite3
3
+ from typing import Any, Callable, Optional, Protocol, Type, Self, runtime_checkable
4
+
5
+ from .cache import ICache
6
+
7
+
8
+ class IDatabase(Protocol):
9
+ @property
10
+ def connection(self) -> sqlite3.Connection: ...
11
+ def cache(self, key: str) -> "ICache": ...
12
+ def singleton[T, M](
13
+ self, cls: Type[M], name: str, model: Type[T] | None = None, **kwargs
14
+ ) -> M: ...
15
+ def emit(self, topic: str, event: str, payload: dict) -> bool: ...
16
+ def on(
17
+ self,
18
+ topic: str,
19
+ event: str,
20
+ callback: Callable,
21
+ ): ...
22
+ def off(
23
+ self,
24
+ topic: str,
25
+ event: str,
26
+ callback: Callable,
27
+ ): ...
28
+
29
+
30
+ @runtime_checkable
31
+ class IResourceManager(Protocol):
32
+ def close(self): ...
beaver/vectors.py ADDED
@@ -0,0 +1,198 @@
1
+ import json
2
+ import math
3
+ import struct
4
+ from typing import (
5
+ List,
6
+ Tuple,
7
+ Iterator,
8
+ AsyncIterator,
9
+ Protocol,
10
+ runtime_checkable,
11
+ TYPE_CHECKING,
12
+ Any,
13
+ )
14
+
15
+ from pydantic import BaseModel
16
+
17
+ from .manager import AsyncBeaverBase, atomic, emits
18
+
19
+ if TYPE_CHECKING:
20
+ from .core import AsyncBeaverDB
21
+
22
+
23
+ class VectorItem[T](BaseModel):
24
+ """Represents a stored vector with metadata."""
25
+
26
+ id: str
27
+ vector: List[float]
28
+ metadata: T | None = None
29
+ score: float = 0
30
+
31
+
32
+ @runtime_checkable
33
+ class IBeaverVectors[T](Protocol):
34
+ """Protocol exposed to the user via BeaverBridge."""
35
+
36
+ def set(self, id: str, vector: List[float], metadata: T | None = None) -> None: ...
37
+ def get(self, id: str) -> VectorItem[T] | None: ...
38
+ def delete(self, id: str) -> None: ...
39
+
40
+ def search(self, vector: List[float], k: int = 10) -> List[VectorItem[T]]: ...
41
+
42
+ def count(self) -> int: ...
43
+ def clear(self) -> None: ...
44
+ def __iter__(self) -> Iterator[VectorItem[T]]: ...
45
+
46
+
47
+ class AsyncBeaverVectors[T: BaseModel](AsyncBeaverBase[T]):
48
+ """
49
+ A simple, persistent vector store.
50
+
51
+ Performs exact Nearest Neighbor search by doing a full scan
52
+ and computing distances in memory.
53
+
54
+ Table managed:
55
+ - __beaver_vectors__ (collection, item_id, vector, metadata)
56
+ """
57
+
58
+ def __init__(self, name: str, db: "AsyncBeaverDB", model: type[T] | None = None):
59
+ super().__init__(name, db, model)
60
+ # T is the metadata model
61
+ self._meta_model = model
62
+
63
+ def _serialize_vector(self, vector: List[float]) -> bytes:
64
+ """Packs a list of floats into binary data."""
65
+ # Use 'f' for float (4 bytes) or 'd' for double (8 bytes).
66
+ # 'f' is standard for most embeddings.
67
+ return struct.pack(f"{len(vector)}f", *vector)
68
+
69
+ def _deserialize_vector(self, data: bytes) -> List[float]:
70
+ """Unpacks binary data into a list of floats."""
71
+ count = len(data) // 4
72
+ return list(struct.unpack(f"{count}f", data))
73
+
74
+ def _cosine_similarity(self, v1: List[float], v2: List[float]) -> float:
75
+ """Computes Cosine Similarity between two vectors."""
76
+ if len(v1) != len(v2):
77
+ return -1.0 # Dimension mismatch punishment
78
+
79
+ dot_product = sum(a * b for a, b in zip(v1, v2))
80
+ norm_v1 = math.sqrt(sum(a * a for a in v1))
81
+ norm_v2 = math.sqrt(sum(b * b for b in v2))
82
+
83
+ if norm_v1 == 0 or norm_v2 == 0:
84
+ return 0.0
85
+
86
+ return dot_product / (norm_v1 * norm_v2)
87
+
88
+ @emits("set", payload=lambda id, *args, **kwargs: dict(id=id))
89
+ @atomic
90
+ async def set(self, id: str, vector: List[float], metadata: T | None = None):
91
+ """
92
+ Stores a vector and optional metadata.
93
+ """
94
+ vec_blob = self._serialize_vector(vector)
95
+
96
+ # Serialize metadata using base manager logic
97
+ meta_json = self._serialize(metadata) if metadata else None
98
+
99
+ await self.connection.execute(
100
+ """
101
+ INSERT OR REPLACE INTO __beaver_vectors__
102
+ (collection, item_id, vector, metadata)
103
+ VALUES (?, ?, ?, ?)
104
+ """,
105
+ (self._name, id, vec_blob, meta_json),
106
+ )
107
+
108
+ @atomic
109
+ async def get(self, id: str) -> VectorItem[T]:
110
+ """Retrieves a vector item by ID."""
111
+ cursor = await self.connection.execute(
112
+ "SELECT vector, metadata FROM __beaver_vectors__ WHERE collection = ? AND item_id = ?",
113
+ (self._name, id),
114
+ )
115
+ row = await cursor.fetchone()
116
+
117
+ if not row:
118
+ raise KeyError(id)
119
+
120
+ vector = self._deserialize_vector(row["vector"])
121
+ meta_val = self._deserialize(row["metadata"]) if row["metadata"] else None
122
+
123
+ return VectorItem(id=id, vector=vector, metadata=meta_val)
124
+
125
+ @emits("delete", payload=lambda id, *args, **kwargs: dict(id=id))
126
+ @atomic
127
+ async def delete(self, id: str):
128
+ """Deletes a vector item."""
129
+ await self.connection.execute(
130
+ "DELETE FROM __beaver_vectors__ WHERE collection = ? AND item_id = ?",
131
+ (self._name, id),
132
+ )
133
+
134
+ async def search(self, vector: List[float], k: int = 10) -> List[VectorItem[T]]:
135
+ """
136
+ Performs exact KNN search using Cosine Similarity.
137
+ Scans the entire table for this collection.
138
+ """
139
+ query_vec = vector
140
+
141
+ # 1. Fetch ALL vectors (Full Scan)
142
+ # Optimization: We could stream this if memory is an issue,
143
+ # but for a simple store, fetching all is fine.
144
+ cursor = await self.connection.execute(
145
+ "SELECT item_id, vector, metadata FROM __beaver_vectors__ WHERE collection = ?",
146
+ (self._name,),
147
+ )
148
+
149
+ candidates = []
150
+ async for row in cursor:
151
+ # CPU Bound work inside the loop
152
+ row_vec = self._deserialize_vector(row["vector"])
153
+ score = self._cosine_similarity(query_vec, row_vec)
154
+
155
+ candidates.append((score, row))
156
+
157
+ # 2. Sort by Score Descending
158
+ candidates.sort(key=lambda x: x[0], reverse=True)
159
+
160
+ # 3. Take Top K and Hydrate
161
+ top_k = candidates[:k]
162
+ results = []
163
+
164
+ for score, row in top_k:
165
+ # Reconstruct item
166
+ vec = self._deserialize_vector(row["vector"])
167
+ meta_val = self._deserialize(row["metadata"]) if row["metadata"] else None
168
+
169
+ item = VectorItem(
170
+ id=row["item_id"], vector=vec, metadata=meta_val, score=score
171
+ )
172
+ results.append(item)
173
+
174
+ return results
175
+
176
+ async def count(self) -> int:
177
+ cursor = await self.connection.execute(
178
+ "SELECT COUNT(*) FROM __beaver_vectors__ WHERE collection = ?",
179
+ (self._name,),
180
+ )
181
+ result = await cursor.fetchone()
182
+ return result[0] if result else 0
183
+
184
+ @atomic
185
+ async def clear(self):
186
+ await self.connection.execute(
187
+ "DELETE FROM __beaver_vectors__ WHERE collection = ?", (self._name,)
188
+ )
189
+
190
+ async def __aiter__(self):
191
+ cursor = await self.connection.execute(
192
+ "SELECT item_id, vector, metadata FROM __beaver_vectors__ WHERE collection = ?",
193
+ (self._name,),
194
+ )
195
+ async for row in cursor:
196
+ vec = self._deserialize_vector(row["vector"])
197
+ meta = self._deserialize(row["metadata"]) if row["metadata"] else None
198
+ yield VectorItem(id=row["item_id"], vector=vec, metadata=meta)
@@ -0,0 +1,149 @@
1
+ Metadata-Version: 2.4
2
+ Name: beaver-db
3
+ Version: 2.0rc2
4
+ Summary: Fast, async-native, embedded, and multi-modal DB based on SQLite for AI-powered applications.
5
+ License-File: LICENSE
6
+ Classifier: License :: OSI Approved :: MIT License
7
+ Classifier: Operating System :: OS Independent
8
+ Classifier: Programming Language :: Python :: 3.12
9
+ Classifier: Topic :: Database
10
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
11
+ Requires-Python: >=3.12
12
+ Requires-Dist: aiosqlite>=0.21.0
13
+ Requires-Dist: numpy>=2.3.4
14
+ Requires-Dist: pydantic>=2.12.3
15
+ Requires-Dist: rich>=14.2.0
16
+ Requires-Dist: typer>=0.20.0
17
+ Provides-Extra: full
18
+ Requires-Dist: cryptography>=46.0.3; extra == 'full'
19
+ Requires-Dist: fastapi[standard]>=0.118.0; extra == 'full'
20
+ Provides-Extra: remote
21
+ Requires-Dist: fastapi[standard]>=0.118.0; extra == 'remote'
22
+ Provides-Extra: security
23
+ Requires-Dist: cryptography>=46.0.3; extra == 'security'
24
+ Description-Content-Type: text/markdown
25
+
26
+ <div style="text-align: center;">
27
+ <img src="https://github.com/syalia-srl/beaver/blob/main/logo.png?raw=true" width="256px">
28
+ </div>
29
+
30
+ ---
31
+
32
+ <!-- Project badges -->
33
+ ![PyPI - Version](https://img.shields.io/pypi/v/beaver-db)
34
+ ![PyPi - Python Version](https://img.shields.io/pypi/pyversions/beaver-db)
35
+ ![Github - Open Issues](https://img.shields.io/github/issues-raw/syalia-srl/beaver)
36
+ ![PyPi - Downloads (Monthly)](https://img.shields.io/pypi/dm/beaver-db)
37
+ ![Github - Commits](https://img.shields.io/github/commit-activity/m/syalia-srl/beaver)
38
+
39
+ -----
40
+
41
+ `beaver` is a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
42
+
43
+ ## Design Philosophy
44
+
45
+ `beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
46
+
47
+ * **Minimal Dependencies**: The core library has minimal dependencies (`numpy`, `pydantic`, `rich`, `typer`). Advanced features (like the REST server) are optional extras.
48
+ * **Safe Concurrency**: Thread-safe and multi-process-safe by default, with robust inter-process locking.
49
+ * **Local-First**: A single, portable SQLite file is the default.
50
+ * **Fast & Performant**: Zero network latency for local operations and an optional, in-memory read cache.
51
+ * **Standard SQLite**: The database file is 100% compatible with any standard SQLite tool, ensuring data portability.
52
+ * **Pythonic API**: Designed to feel like a natural extension of your code, using standard Python data structures and Pydantic models.
53
+
54
+ ## Installation
55
+
56
+ Install the core library:
57
+
58
+ ```bash
59
+ pip install beaver-db
60
+ ```
61
+
62
+ To include optional features, you can install them as extras:
63
+
64
+ ```bash
65
+ # For the REST API server and client
66
+ pip install "beaver-db[remote]"
67
+
68
+ # To install all optional features at once
69
+ pip install "beaver-db[full]"
70
+ ```
71
+
72
+ ### Docker
73
+
74
+ You can also run the BeaverDB REST API server using Docker.
75
+
76
+ ```bash
77
+ docker pull ghcr.io/syalia-srl/beaver:latest
78
+ docker run -p 8000:8000 -v $(pwd)/data:/app ghcr.io/syalia-srl/beaver
79
+ ```
80
+
81
+ ## Quickstart
82
+
83
+ Get up and running in 30 seconds. This example showcases a dictionary, a list, and full-text search in a single script.
84
+
85
+ ```python
86
+ from beaver import BeaverDB, Document
87
+
88
+ # 1. Initialize the database
89
+ db = BeaverDB("data.db")
90
+
91
+ # 2. Use a namespaced dictionary for app configuration
92
+ config = db.dict("app_config")
93
+ config["theme"] = "dark"
94
+ print(f"Theme set to: {config['theme']}")
95
+
96
+ # 3. Use a persistent list to manage a task queue
97
+ tasks = db.list("daily_tasks")
98
+ tasks.push("Write the project report")
99
+ tasks.push("Deploy the new feature")
100
+ print(f"First task is: {tasks[0]}")
101
+
102
+ # 4. Use a collection for document storage and search
103
+ articles = db.collection("articles")
104
+ doc = Document(
105
+ id="sqlite-001",
106
+ body="SQLite is a powerful embedded database ideal for local apps.",
107
+ )
108
+ articles.index(doc)
109
+
110
+ # Perform a full-text search
111
+ results = articles.match(query="database")
112
+ top_doc, rank = results[0]
113
+ print(f"FTS Result: '{top_doc.body}'")
114
+
115
+ db.close()
116
+ ```
117
+
118
+ ## Features
119
+
120
+ * [**Key-Value Dictionaries**](https://syalia.com/beaver/guide-dicts-blobs.html): A Pythonic, dictionary-like interface for storing any JSON-serializable object or Pydantic model within separate namespaces. Includes TTL support for caching.
121
+ * [**Blob Storage**](https://syalia.com/beaver/guide-dicts-blobs.html): A dictionary-like interface for storing binary data (e.g., images, PDFs) with associated JSON metadata.
122
+ * [**Persistent Lists**](https://syalia.com/beaver/guide-lists-queues.html): A full-featured, persistent Python list supporting `push`, `pop`, `prepend`, `deque`, slicing, and in-place updates.
123
+ * [**Persistent Priority Queue**](https://syalia.com/beaver/guide-lists-queues.html): A high-performance, persistent priority queue perfect for task orchestration across multiple processes.
124
+ * [**Probabilistic Sketches:**]() Track cardinality and membership for millions of items in constant space using HyperLogLog and Bloom Filters.
125
+ * [**Document Collections**](https://syalia.com/beaver/guide-collections.html): Store rich documents combining a vector embedding and Pydantic-based metadata.
126
+ * [**Vector Search**](https://syalia.com/beaver/guide-collections.html%23vector-search): Fast, multi-process-safe linear vector search using an in-memory `numpy`-based index.
127
+ * [**Full-Text & Fuzzy Search**](https://syalia.com/beaver/guide-collections.html%23full-text-fuzzy-search): Automatically index and search through document metadata using SQLite's FTS5 engine, with optional fuzzy search for typo-tolerant matching.
128
+ * [**Knowledge Graph**](https://syalia.com/beaver/guide-collections.html%23knowledge-graph): Create directed, labeled relationships between documents and traverse the graph to find neighbors or perform multi-hop walks.
129
+ * [**Pub/Sub System**](https://syalia.com/beaver/guide-realtime.html): A powerful, thread and process-safe publish-subscribe system for real-time messaging with a fan-out architecture.
130
+ * [**Time-Indexed Logs**](https://syalia.com/beaver/guide-realtime.html): A specialized data structure for structured, time-series logs. Query historical data by time range or create a live, aggregated view.
131
+ * [**Event-Driven Callbacks**](https://syalia.com/beaver/guide-realtime.html): Listen for database changes in real-time. Subscribe to events on specific managers to trigger workflows or update UIs.
132
+ * [**Inter-Process Locking**](https://syalia.com/beaver/guide-concurrency.html): Robust, deadlock-proof locks. Use `db.lock('task_name')` to coordinate arbitrary scripts, or `with db.list('my_list') as l:` to perform atomic, multi-step operations.
133
+ * [**Pydantic Support**](https://syalia.com/beaver/dev-architecture.html%23type-safe-models): Optionally associate `pydantic.BaseModel`s with any data structure for automatic, recursive data validation and (de)serialization.
134
+ * [**Deployment**](https://syalia.com/beaver/guide-deployment.html): Instantly serve your database over a RESTful API with `beaver serve` and interact with it via the `beaver` CLI.
135
+ * [**Data Export & Backups**](https://syalia.com/beaver/guide-deployment.html): Dump any data structure to a portable JSON file with a single `.dump()` command.
136
+
137
+ ## Documentation
138
+
139
+ For a complete API reference, in-depth guides, and more examples, please visit the official documentation at:
140
+
141
+ [**https://syalia.com/beaver**](https://syalia.com/beaver)
142
+
143
+ ## Contributing
144
+
145
+ Contributions are welcome\! If you think of something that would make `beaver` more useful for your use case, please open an issue or submit a pull request.
146
+
147
+ ## License
148
+
149
+ This project is licensed under the MIT License.