beaver-db 2.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
beaver/core.py ADDED
@@ -0,0 +1,646 @@
1
+ import asyncio
2
+ import threading
3
+ import warnings
4
+ import weakref
5
+ from typing import Any, Callable, Self, Type, AsyncContextManager
6
+
7
+ import aiosqlite
8
+ from pydantic import BaseModel
9
+
10
+ from .blobs import AsyncBeaverBlob, IBeaverBlob
11
+ from .bridge import BeaverBridge
12
+ from .cache import DummyCache, LocalCache
13
+ from .channels import AsyncBeaverChannel, IBeaverChannel
14
+ from .dicts import AsyncBeaverDict, IBeaverDict
15
+ from .docs import AsyncBeaverDocuments, IBeaverDocuments
16
+ from .events import AsyncBeaverEvents, IBeaverEvents
17
+ from .graphs import AsyncBeaverGraph, IBeaverGraph
18
+ from .lists import AsyncBeaverList, IBeaverList
19
+ from .locks import AsyncBeaverLock, IBeaverLock
20
+ from .logs import AsyncBeaverLog, IBeaverLog
21
+ from .manager import AsyncBeaverBase
22
+ from .queues import AsyncBeaverQueue, IBeaverQueue
23
+ from .sketches import AsyncBeaverSketch, IBeaverSketch
24
+ from .vectors import AsyncBeaverVectors, IBeaverVectors
25
+
26
+
27
+ class Event(BaseModel):
28
+ topic: str
29
+ event: str
30
+ payload: dict
31
+
32
+
33
+ class Transaction:
34
+ """
35
+ A Reentrant Async Context Manager for database transactions.
36
+
37
+ If a task already holds the transaction lock (nested @atomic calls),
38
+ this acts as a pass-through. The actual BEGIN/COMMIT only happens
39
+ at the outermost level.
40
+ """
41
+
42
+ def __init__(self, db: "AsyncBeaverDB"):
43
+ self.db = db
44
+ self._is_root = False
45
+
46
+ async def __aenter__(self):
47
+ current_task = asyncio.current_task()
48
+
49
+ # 1. Check Reentrancy: Do we already own the transaction?
50
+ if self.db._tx_owner_task == current_task:
51
+ self._is_root = False
52
+ return self
53
+
54
+ # 2. Acquire Lock (Wait for other tasks)
55
+ await self.db._tx_lock.acquire()
56
+
57
+ # 3. Mark ownership and start DB transaction
58
+ self.db._tx_owner_task = current_task
59
+ self._is_root = True
60
+
61
+ # Using isolation_level=None globally means we MUST start manually
62
+ await self.db.connection.execute("BEGIN IMMEDIATE")
63
+ return self
64
+
65
+ async def __aexit__(self, exc_type, exc, tb):
66
+ # Only the root transaction performs the Commit/Rollback and Release
67
+ if not self._is_root:
68
+ return
69
+
70
+ try:
71
+ if exc_type:
72
+ await self.db.connection.rollback()
73
+ else:
74
+ await self.db.connection.commit()
75
+ finally:
76
+ # Always clear ownership and release lock
77
+ self.db._tx_owner_task = None
78
+ self.db._tx_lock.release()
79
+
80
+
81
+ class AsyncBeaverDB:
82
+ """
83
+ The Async-First Core Engine of BeaverDB.
84
+
85
+ This class manages the single aiosqlite connection and strictly runs
86
+ within an asyncio event loop. It is NOT thread-safe; it is designed
87
+ to be owned by a single thread (the Reactor Thread).
88
+ """
89
+
90
+ def __init__(
91
+ self,
92
+ db_path: str,
93
+ /,
94
+ *,
95
+ connection_timeout: float = 30.0,
96
+ cache_timeout: float = 0.0,
97
+ pragma_wal: bool = True,
98
+ pragma_synchronous: bool = False,
99
+ pragma_temp_memory: bool = True,
100
+ pragma_mmap_size: int = 256 * 1024 * 1024,
101
+ ):
102
+ self._db_path = db_path
103
+ self._timeout = connection_timeout
104
+ self._cache_timeout = cache_timeout
105
+
106
+ # The Single Source of Truth Connection
107
+ self._connection: aiosqlite.Connection | None = None
108
+
109
+ # Transaction Serializer Lock
110
+ # Ensures that "check-then-act" operations (like locks) are atomic
111
+ # relative to other tasks on this loop.
112
+ # Locking Primitives
113
+ self._tx_lock = asyncio.Lock()
114
+ self._tx_owner_task: asyncio.Task | None = None # Track owner for reentrancy
115
+
116
+ # Manager Singleton Cache
117
+ self._manager_cache: dict[tuple[type, str], Any] = {}
118
+
119
+ # Store pragma settings
120
+ self._pragma_wal = pragma_wal
121
+ self._pragma_synchronous = pragma_synchronous
122
+ self._pragma_temp_memory = pragma_temp_memory
123
+ self._pragma_mmap_size = pragma_mmap_size
124
+
125
+ # Pub/Sub Registry (To be reimplemented in Phase 4)
126
+ # self._event_callbacks: dict[str, list[Callable]] = {}
127
+
128
+ async def connect(self) -> Self:
129
+ """
130
+ Initializes the async database connection and creates tables.
131
+ Must be awaited before using the DB.
132
+ """
133
+ if self._connection is not None:
134
+ return self
135
+
136
+ self._connection = await aiosqlite.connect(
137
+ self._db_path,
138
+ timeout=self._timeout,
139
+ # We will manage transactions manually via .transaction()
140
+ isolation_level=None,
141
+ )
142
+ self._connection.row_factory = aiosqlite.Row
143
+
144
+ # Apply Pragmas
145
+ if self._pragma_wal:
146
+ await self._connection.execute("PRAGMA journal_mode = WAL;")
147
+
148
+ if self._pragma_synchronous:
149
+ await self._connection.execute("PRAGMA synchronous = FULL;")
150
+ else:
151
+ await self._connection.execute("PRAGMA synchronous = NORMAL;")
152
+
153
+ if self._pragma_temp_memory:
154
+ await self._connection.execute("PRAGMA temp_store = MEMORY;")
155
+
156
+ if self._pragma_mmap_size > 0:
157
+ await self._connection.execute(
158
+ f"PRAGMA mmap_size = {self._pragma_mmap_size};"
159
+ )
160
+
161
+ await self._create_all_tables()
162
+ # await self._check_version()
163
+
164
+ return self
165
+
166
+ async def close(self):
167
+ """Closes the database connection."""
168
+ if self._connection:
169
+ await self._connection.close()
170
+ self._connection = None
171
+
172
+ # Clear cache to allow GC
173
+ self._manager_cache.clear()
174
+
175
+ async def __aenter__(self):
176
+ return await self.connect()
177
+
178
+ async def __aexit__(self, *args, **kwargs):
179
+ await self.close()
180
+
181
+ @property
182
+ def connection(self) -> aiosqlite.Connection:
183
+ """
184
+ Returns the raw aiosqlite connection.
185
+ Raises an error if not connected.
186
+ """
187
+ if self._connection is None:
188
+ raise ConnectionError(
189
+ "AsyncBeaverDB is not connected. Await .connect() first."
190
+ )
191
+
192
+ return self._connection
193
+
194
+ def transaction(self) -> AsyncContextManager:
195
+ """
196
+ Returns an async context manager for an atomic transaction.
197
+ Use: async with db.transaction(): ...
198
+ """
199
+ return Transaction(self)
200
+
201
+ async def _create_all_tables(self):
202
+ """Initializes all required tables with the new __beaver__ naming convention."""
203
+ # Note: We use execute() directly here as these are DDL statements
204
+ # and don't strictly require the transaction lock (sqlite handles DDL locking).
205
+
206
+ c = self.connection
207
+
208
+ # Blobs
209
+ await c.execute(
210
+ """
211
+ CREATE TABLE IF NOT EXISTS __beaver_blobs__ (
212
+ store_name TEXT NOT NULL,
213
+ key TEXT NOT NULL,
214
+ data BLOB NOT NULL,
215
+ metadata TEXT,
216
+ PRIMARY KEY (store_name, key)
217
+ )
218
+ """
219
+ )
220
+
221
+ # Cache Versioning
222
+ await c.execute(
223
+ """
224
+ CREATE TABLE IF NOT EXISTS __beaver_manager_versions__ (
225
+ namespace TEXT PRIMARY KEY,
226
+ version INTEGER NOT NULL DEFAULT 0
227
+ )
228
+ """
229
+ )
230
+
231
+ # Dicts
232
+ await c.execute(
233
+ """
234
+ CREATE TABLE IF NOT EXISTS __beaver_dicts__ (
235
+ dict_name TEXT NOT NULL,
236
+ key TEXT NOT NULL,
237
+ value TEXT NOT NULL,
238
+ expires_at REAL,
239
+ PRIMARY KEY (dict_name, key)
240
+ )
241
+ """
242
+ )
243
+
244
+ # Lists
245
+ await c.execute(
246
+ """
247
+ CREATE TABLE IF NOT EXISTS __beaver_lists__ (
248
+ list_name TEXT NOT NULL,
249
+ item_order REAL NOT NULL,
250
+ item_value TEXT NOT NULL,
251
+ PRIMARY KEY (list_name, item_order)
252
+ )
253
+ """
254
+ )
255
+
256
+ # Locks
257
+ await c.execute(
258
+ """
259
+ CREATE TABLE IF NOT EXISTS __beaver_lock_waiters__ (
260
+ lock_name TEXT NOT NULL,
261
+ waiter_id TEXT NOT NULL,
262
+ requested_at REAL NOT NULL,
263
+ expires_at REAL NOT NULL,
264
+ PRIMARY KEY (lock_name, requested_at)
265
+ )
266
+ """
267
+ )
268
+ await c.execute(
269
+ "CREATE INDEX IF NOT EXISTS idx_lock_expires ON __beaver_lock_waiters__ (lock_name, expires_at)"
270
+ )
271
+ await c.execute(
272
+ "CREATE INDEX IF NOT EXISTS idx_lock_waiter_id ON __beaver_lock_waiters__ (lock_name, waiter_id)"
273
+ )
274
+
275
+ # Logs
276
+ await c.execute(
277
+ """
278
+ CREATE TABLE IF NOT EXISTS __beaver_logs__ (
279
+ log_name TEXT NOT NULL,
280
+ timestamp REAL NOT NULL,
281
+ data TEXT NOT NULL,
282
+ PRIMARY KEY (log_name, timestamp)
283
+ )
284
+ """
285
+ )
286
+ await c.execute(
287
+ "CREATE INDEX IF NOT EXISTS idx_logs_timestamp ON __beaver_logs__ (log_name, timestamp)"
288
+ )
289
+
290
+ # Priority Queues
291
+ await c.execute(
292
+ """
293
+ CREATE TABLE IF NOT EXISTS __beaver_priority_queues__ (
294
+ queue_name TEXT NOT NULL,
295
+ priority REAL NOT NULL,
296
+ timestamp REAL NOT NULL,
297
+ data TEXT NOT NULL
298
+ )
299
+ """
300
+ )
301
+ await c.execute(
302
+ "CREATE INDEX IF NOT EXISTS idx_priority_queue_order ON __beaver_priority_queues__ (queue_name, priority ASC, timestamp ASC)"
303
+ )
304
+
305
+ # PubSub
306
+ await c.execute(
307
+ """
308
+ CREATE TABLE IF NOT EXISTS __beaver_pubsub_log__ (
309
+ timestamp REAL PRIMARY KEY,
310
+ channel_name TEXT NOT NULL,
311
+ message_payload TEXT NOT NULL
312
+ )
313
+ """
314
+ )
315
+ await c.execute(
316
+ "CREATE INDEX IF NOT EXISTS idx_pubsub_channel_timestamp ON __beaver_pubsub_log__ (channel_name, timestamp)"
317
+ )
318
+
319
+ # Sketches
320
+ await c.execute(
321
+ """
322
+ CREATE TABLE IF NOT EXISTS __beaver_sketches__ (
323
+ name TEXT PRIMARY KEY,
324
+ type TEXT NOT NULL,
325
+ capacity INTEGER NOT NULL,
326
+ error_rate REAL NOT NULL,
327
+ data BLOB NOT NULL
328
+ )
329
+ """
330
+ )
331
+
332
+ # Main Storage
333
+ await c.execute(
334
+ """
335
+ CREATE TABLE IF NOT EXISTS __beaver_documents__ (
336
+ collection TEXT NOT NULL,
337
+ item_id TEXT NOT NULL,
338
+ data TEXT NOT NULL,
339
+ PRIMARY KEY (collection, item_id)
340
+ )
341
+ """
342
+ )
343
+
344
+ # FTS Index
345
+ await c.execute(
346
+ """
347
+ CREATE VIRTUAL TABLE IF NOT EXISTS __beaver_fts_index__ USING fts5(
348
+ collection,
349
+ item_id,
350
+ field_path,
351
+ field_content,
352
+ tokenize = 'porter'
353
+ )
354
+ """
355
+ )
356
+
357
+ # Fuzzy Index (Trigrams)
358
+ await c.execute(
359
+ """
360
+ CREATE TABLE IF NOT EXISTS __beaver_trigrams__ (
361
+ collection TEXT NOT NULL,
362
+ item_id TEXT NOT NULL,
363
+ trigram TEXT NOT NULL,
364
+ PRIMARY KEY (collection, item_id, trigram)
365
+ )
366
+ """
367
+ )
368
+ await c.execute(
369
+ "CREATE INDEX IF NOT EXISTS idx_trigram_lookup ON __beaver_trigrams__ (collection, trigram)"
370
+ )
371
+
372
+ # Edges
373
+ await c.execute(
374
+ """
375
+ CREATE TABLE IF NOT EXISTS __beaver_edges__ (
376
+ collection TEXT NOT NULL,
377
+ source_item_id TEXT NOT NULL,
378
+ target_item_id TEXT NOT NULL,
379
+ label TEXT NOT NULL,
380
+ metadata TEXT,
381
+ PRIMARY KEY (collection, source_item_id, target_item_id, label)
382
+ )
383
+ """
384
+ )
385
+
386
+ # Index for reverse lookups (parents)
387
+ await c.execute(
388
+ """
389
+ CREATE INDEX IF NOT EXISTS idx_edges_target
390
+ ON __beaver_edges__ (collection, target_item_id)
391
+ """
392
+ )
393
+
394
+ # Vectors (Simple Store)
395
+ await c.execute(
396
+ """
397
+ CREATE TABLE IF NOT EXISTS __beaver_vectors__ (
398
+ collection TEXT NOT NULL,
399
+ item_id TEXT NOT NULL,
400
+ vector BLOB NOT NULL,
401
+ metadata TEXT,
402
+ PRIMARY KEY (collection, item_id)
403
+ )
404
+ """
405
+ )
406
+
407
+ await self.connection.commit()
408
+
409
+ def singleton[T: AsyncBeaverBase](self, cls: type[T], name, **kwargs) -> T:
410
+ """
411
+ Factory method to get a singleton manager.
412
+ Since this runs on the event loop, no locks are needed.
413
+ """
414
+ cache_key = (cls, name)
415
+
416
+ if cache_key not in self._manager_cache:
417
+ # We pass 'self' (AsyncBeaverDB) as the db interface
418
+ instance = cls(name=name, db=self, **kwargs)
419
+ self._manager_cache[cache_key] = instance
420
+
421
+ return self._manager_cache[cache_key]
422
+
423
+ # --- Factory Methods (Internal) ---
424
+ # These return the raw Async Managers.
425
+ # Note: These manager classes will be refactored in Phase 3.
426
+
427
+ def dict[T: BaseModel](
428
+ self, name: str, model: type[T] | None = None, secret: str | None = None
429
+ ) -> AsyncBeaverDict[T]:
430
+ return self.singleton(AsyncBeaverDict, name, model=model, secret=secret)
431
+
432
+ def list[T: BaseModel](
433
+ self, name: str, model: type[T] | None = None
434
+ ) -> AsyncBeaverList[T]:
435
+ return self.singleton(AsyncBeaverList, name, model=model)
436
+
437
+ def queue[T: BaseModel](
438
+ self, name: str, model: type[T] | None = None
439
+ ) -> AsyncBeaverQueue[T]:
440
+ return self.singleton(AsyncBeaverQueue, name, model=model)
441
+
442
+ def docs[T: BaseModel](
443
+ self, name: str, model: Type[T] | None = None
444
+ ) -> AsyncBeaverDocuments[T]:
445
+ return self.singleton(AsyncBeaverDocuments, name, model=model)
446
+
447
+ def graphs[T: BaseModel](
448
+ self, name: str, model: Type[T] | None = None
449
+ ) -> AsyncBeaverGraph[T]:
450
+ return self.singleton(AsyncBeaverGraph, name, model=model)
451
+
452
+ def channel[T: BaseModel](
453
+ self, name: str, model: type[T] | None = None
454
+ ) -> AsyncBeaverChannel[T]:
455
+ return self.singleton(AsyncBeaverChannel, name, model=model)
456
+
457
+ def blob[T: BaseModel](
458
+ self, name: str, model: type[T] | None = None
459
+ ) -> AsyncBeaverBlob[T]:
460
+ return self.singleton(AsyncBeaverBlob, name, model=model)
461
+
462
+ def log[T: BaseModel](
463
+ self, name: str, model: type[T] | None = None
464
+ ) -> AsyncBeaverLog[T]:
465
+ return self.singleton(AsyncBeaverLog, name, model=model)
466
+
467
+ def lock(
468
+ self, name: str, timeout=None, lock_ttl=60.0, poll_interval=0.1
469
+ ) -> AsyncBeaverLock:
470
+ return AsyncBeaverLock(self, name, timeout, lock_ttl, poll_interval)
471
+
472
+ def sketch[T: BaseModel](
473
+ self,
474
+ name: str,
475
+ capacity=1_000_000,
476
+ error_rate=0.01,
477
+ model: type[T] | None = None,
478
+ ) -> AsyncBeaverSketch[T]:
479
+ return self.singleton(
480
+ AsyncBeaverSketch,
481
+ name,
482
+ capacity=capacity,
483
+ error_rate=error_rate,
484
+ model=model,
485
+ )
486
+
487
+ def graph[T: BaseModel](
488
+ self, name, model: type[T] | None = None
489
+ ) -> AsyncBeaverGraph[T]:
490
+ return self.singleton(
491
+ AsyncBeaverGraph,
492
+ name,
493
+ model=model,
494
+ )
495
+
496
+ def events[T: BaseModel](
497
+ self, name, model: type[T] | None = None
498
+ ) -> AsyncBeaverEvents:
499
+ return self.singleton(
500
+ AsyncBeaverEvents,
501
+ name,
502
+ model=model,
503
+ )
504
+
505
+ def vectors[T: BaseModel](
506
+ self, name, model: type[T] | None = None
507
+ ) -> AsyncBeaverVectors:
508
+ return self.singleton(AsyncBeaverVectors, name, model=model)
509
+
510
+ def cache(self, key: str = "global"):
511
+ # Temporary stub: Caching will be revisited
512
+ return DummyCache.singleton()
513
+
514
+
515
+ class BeaverDB:
516
+ """
517
+ The Synchronous Facade (Portal).
518
+
519
+ This class starts a background thread with an asyncio loop and
520
+ proxies all requests to the AsyncBeaverDB engine via BeaverBridge.
521
+ """
522
+
523
+ def __init__(self, db_path: str, /, **kwargs):
524
+ # 1. Start the Reactor Thread
525
+ self._loop = asyncio.new_event_loop()
526
+ self._thread = threading.Thread(
527
+ target=self._loop.run_forever, daemon=True, name="BeaverDB-Reactor"
528
+ )
529
+ self._thread.start()
530
+
531
+ # 2. Initialize the Engine on the Reactor Thread
532
+ async def init_engine():
533
+ db = AsyncBeaverDB(db_path, **kwargs)
534
+ await db.connect()
535
+ return db
536
+
537
+ future = asyncio.run_coroutine_threadsafe(init_engine(), self._loop)
538
+ self._async_db = future.result()
539
+ self._closed = False
540
+
541
+ def close(self):
542
+ """Shuts down the reactor thread and closes the DB."""
543
+ if self._closed:
544
+ return
545
+
546
+ async def shutdown():
547
+ await self._async_db.close()
548
+
549
+ future = asyncio.run_coroutine_threadsafe(shutdown(), self._loop)
550
+ future.result()
551
+
552
+ self._loop.call_soon_threadsafe(self._loop.stop)
553
+ self._thread.join(timeout=1.0)
554
+ self._closed = True
555
+
556
+ def __enter__(self):
557
+ return self
558
+
559
+ def __exit__(self, exc_type, exc_val, exc_tb):
560
+ self.close()
561
+
562
+ def _get_manager(self, method_name: str, *args, **kwargs) -> Any:
563
+ """
564
+ Helper to invoke a factory method on the Async Engine and wrap the result.
565
+ Executing on the loop ensures the singleton cache is accessed safely.
566
+ """
567
+
568
+ async def factory_call():
569
+ method = getattr(self._async_db, method_name)
570
+ return method(*args, **kwargs)
571
+
572
+ future = asyncio.run_coroutine_threadsafe(factory_call(), self._loop)
573
+ async_manager = future.result()
574
+
575
+ # Wrap the Async Manager in the Bridge
576
+ return BeaverBridge(async_manager, self._loop)
577
+
578
+ # --- Public API (Proxies) ---
579
+
580
+ def dict[T: BaseModel](
581
+ self, name: str, model: type[T] | None = None, secret: str | None = None
582
+ ) -> IBeaverDict[T]:
583
+ return self._get_manager("dict", name, model, secret)
584
+
585
+ def list[T: BaseModel](
586
+ self, name: str, model: type[T] | None = None
587
+ ) -> IBeaverList[T]:
588
+ return self._get_manager("list", name, model)
589
+
590
+ def queue[T: BaseModel](
591
+ self, name: str, model: type[T] | None = None
592
+ ) -> IBeaverQueue[T]:
593
+ return self._get_manager("queue", name, model)
594
+
595
+ def docs[T: BaseModel](
596
+ self, name: str, model: Type[T] | None = None
597
+ ) -> IBeaverDocuments[T]:
598
+ return self._get_manager("docs", name, model)
599
+
600
+ def graph[T: BaseModel](
601
+ self, name: str, model: Type[T] | None = None
602
+ ) -> IBeaverGraph[T]:
603
+ return self._get_manager("graph", name, model)
604
+
605
+ def vectors[T: BaseModel](
606
+ self, name: str, model: type[T] | None = None
607
+ ) -> IBeaverVectors[T]:
608
+ return self._get_manager("vectors", name, model)
609
+
610
+ def channel[T: BaseModel](
611
+ self, name: str, model: type[T] | None = None
612
+ ) -> IBeaverChannel[T]:
613
+ return self._get_manager("channel", name, model)
614
+
615
+ def blob[T: BaseModel](
616
+ self, name: str, model: type[T] | None = None
617
+ ) -> IBeaverBlob[T]:
618
+ return self._get_manager("blob", name, model)
619
+
620
+ def log[T: BaseModel](
621
+ self, name: str, model: type[T] | None = None
622
+ ) -> IBeaverLog[T]:
623
+ return self._get_manager("log", name, model)
624
+
625
+ def lock(
626
+ self,
627
+ name: str,
628
+ timeout: float | None = None,
629
+ ttl: float = 60.0,
630
+ poll_interval: float = 0.1,
631
+ ) -> IBeaverLock:
632
+ return self._get_manager("lock", name, timeout, ttl, poll_interval)
633
+
634
+ def sketch[T: BaseModel](
635
+ self,
636
+ name: str,
637
+ capacity: int = 1_000_000,
638
+ error_rate: float = 0.01,
639
+ model: type[T] | None = None,
640
+ ) -> IBeaverSketch[T]:
641
+ return self._get_manager("sketch", name, capacity, error_rate, model)
642
+
643
+ def events[T: BaseModel](
644
+ self, name: str, model: type[T] | None = None
645
+ ) -> IBeaverEvents[T]:
646
+ return self._get_manager("events", name, model)