beaver-db 2.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
beaver/docs.py ADDED
@@ -0,0 +1,459 @@
1
+ import json
2
+ import uuid
3
+ import asyncio
4
+ from typing import (
5
+ Any,
6
+ Iterator,
7
+ AsyncIterator,
8
+ List,
9
+ Literal,
10
+ Protocol,
11
+ runtime_checkable,
12
+ TYPE_CHECKING,
13
+ overload,
14
+ )
15
+
16
+ from pydantic import BaseModel, Field
17
+
18
+ from .queries import Filter
19
+ from .manager import AsyncBeaverBase, atomic, emits
20
+
21
+ if TYPE_CHECKING:
22
+ from .core import AsyncBeaverDB
23
+
24
+
25
+ class Document[T](BaseModel):
26
+ """
27
+ Minimal document container.
28
+ """
29
+
30
+ id: str = Field(default_factory=lambda: uuid.uuid4().hex)
31
+ body: T
32
+ score: float | None = None
33
+
34
+
35
+ def _flatten_document(
36
+ data: Any, parent_key: str = "", sep: str = "."
37
+ ) -> Iterator[tuple[str, str]]:
38
+ """
39
+ Recursively yields (path, value) for all string leaf nodes in a dictionary/model.
40
+ """
41
+ if isinstance(data, BaseModel):
42
+ data = data.model_dump()
43
+
44
+ if isinstance(data, dict):
45
+ for k, v in data.items():
46
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
47
+ yield from _flatten_document(v, new_key, sep=sep)
48
+ elif isinstance(data, list):
49
+ for v in data:
50
+ if isinstance(v, (dict, list)):
51
+ yield from _flatten_document(v, parent_key, sep=sep)
52
+ elif isinstance(v, str):
53
+ yield parent_key, v
54
+ elif isinstance(data, str):
55
+ yield parent_key, data
56
+
57
+
58
+ class DocumentQuery[T]:
59
+ """
60
+ A fluent query builder for searching and filtering documents.
61
+ """
62
+
63
+ def __init__(self, manager: "AsyncBeaverDocuments[T]"):
64
+ self._manager = manager
65
+ self._search_query: str | None = None
66
+ self._search_fields: List[str] | None = None
67
+ self._fuzzy_query: str | None = None
68
+ self._filters: list[Filter] = []
69
+ self._sort_fields: list[tuple[str, str]] = []
70
+ self._limit: int | None = None
71
+ self._offset: int | None = None
72
+
73
+ def fts(self, query: str, on: List[str] | None = None) -> "DocumentQuery[T]":
74
+ """Adds a Full-Text Search (FTS) clause."""
75
+ self._search_query = query
76
+ self._search_fields = on
77
+ return self
78
+
79
+ def fuzzy(self, query: str) -> "DocumentQuery[T]":
80
+ """Adds a Fuzzy Search clause."""
81
+ self._fuzzy_query = query
82
+ return self
83
+
84
+ def where(self, *expressions) -> "DocumentQuery[T]":
85
+ """Adds a metadata filter."""
86
+ for o in expressions:
87
+ if not isinstance(o, Filter):
88
+ raise TypeError(
89
+ f"Expression {o} is invalid. Use `query(Model)` to create valid filters."
90
+ )
91
+
92
+ self._filters.extend(expressions)
93
+ return self
94
+
95
+ def sort(self, **kwargs: Literal["ASC", "DESC"]) -> "DocumentQuery[T]":
96
+ """Sorts by a metadata field."""
97
+ self._sort_fields.extend(kwargs.items())
98
+ return self
99
+
100
+ def limit(self, limit: int) -> "DocumentQuery[T]":
101
+ self._limit = limit
102
+ return self
103
+
104
+ def offset(self, offset: int) -> "DocumentQuery[T]":
105
+ self._offset = offset
106
+ return self
107
+
108
+ async def execute(self) -> List[Document[T]]:
109
+ """Executes the built query and returns the results."""
110
+ return await self._manager._execute_query(self)
111
+
112
+ def __await__(self):
113
+ """Allows `await docs.search(...)` directly."""
114
+ return self.execute().__await__()
115
+
116
+ async def __aiter__(self) -> AsyncIterator[Document[T]]:
117
+ """Allows `async for doc in docs.search(...)`."""
118
+ results = await self.execute()
119
+ for doc in results:
120
+ yield doc
121
+
122
+
123
+ @runtime_checkable
124
+ class IBeaverDocuments[D: BaseModel](Protocol):
125
+ """Protocol exposed to the user via BeaverBridge."""
126
+
127
+ def index(
128
+ self, document: D | None = None, id: str | None = None, body: Any | None = None
129
+ ) -> Document[D]: ...
130
+ def get(self, id: str) -> D | None: ...
131
+ def drop(self, id_or_document: str | D) -> None: ...
132
+ def get_many(self, ids: List[str]) -> List[D]: ...
133
+
134
+ # Query API
135
+ def query(self) -> DocumentQuery[D]: ...
136
+ def search(
137
+ self, query: str, on: List[str] | None = None, fuzzy: bool = False
138
+ ) -> List[Document[D]]: ...
139
+
140
+ def count(self) -> int: ...
141
+ def clear(self) -> None: ...
142
+ def __iter__(self) -> Iterator[D]: ...
143
+
144
+
145
+ class AsyncBeaverDocuments[T: BaseModel](AsyncBeaverBase[T]):
146
+ """
147
+ Manages document storage, field-aware Full-Text Search, and Fuzzy Search.
148
+
149
+ Tables:
150
+ - __beaver_documents__ (collection, item_id, data)
151
+ - __beaver_fts_index__ (collection, item_id, field_path, field_content)
152
+ - __beaver_trigrams__ (collection, item_id, trigram)
153
+ """
154
+
155
+ def __init__(self, name: str, db: "AsyncBeaverDB", model: type[T] | None = None):
156
+ super().__init__(name, db, model)
157
+ self._doc_model = Document[model] if model else Document[Any]
158
+
159
+ def _normalize_doc(self, document, id, body) -> Document[T]:
160
+ """Helper to unify flexible arguments into a Document instance."""
161
+ if document:
162
+ if not isinstance(document, Document):
163
+ return self._doc_model(body=document, id=id or uuid.uuid4().hex)
164
+ return document
165
+
166
+ if body is not None:
167
+ return self._doc_model(id=id or uuid.uuid4().hex, body=body)
168
+
169
+ raise ValueError("Must provide either 'document' or 'body'.")
170
+
171
+ @emits("index", payload=lambda *args, **kwargs: dict())
172
+ @atomic
173
+ async def index(
174
+ self,
175
+ document: Document[T] | None = None,
176
+ id: str | None = None,
177
+ body: T | None = None,
178
+ fts: bool = True,
179
+ fuzzy: bool = False,
180
+ ) -> Document[T]:
181
+ """
182
+ Inserts or updates a document, indexing text fields for FTS and Trigrams.
183
+ """
184
+ doc = self._normalize_doc(document, id, body)
185
+
186
+ # 1. Main Storage (Full JSON)
187
+ if isinstance(doc.body, BaseModel):
188
+ body_json = doc.body.model_dump_json()
189
+ else:
190
+ body_json = json.dumps(doc.body)
191
+
192
+ await self.connection.execute(
193
+ """
194
+ INSERT OR REPLACE INTO __beaver_documents__ (collection, item_id, data)
195
+ VALUES (?, ?, ?)
196
+ """,
197
+ (self._name, doc.id, body_json),
198
+ )
199
+
200
+ # 2. FTS Update (Flatten -> Delete Old -> Insert New)
201
+ await self.connection.execute(
202
+ "DELETE FROM __beaver_fts_index__ WHERE collection = ? AND item_id = ?",
203
+ (self._name, doc.id),
204
+ )
205
+
206
+ fts_rows = []
207
+ for field_path, content in _flatten_document(doc.body):
208
+ if content.strip():
209
+ fts_rows.append((self._name, doc.id, field_path, content))
210
+
211
+ if fts:
212
+ if fts_rows:
213
+ await self.connection.executemany(
214
+ """
215
+ INSERT INTO __beaver_fts_index__ (collection, item_id, field_path, field_content)
216
+ VALUES (?, ?, ?, ?)
217
+ """,
218
+ fts_rows,
219
+ )
220
+
221
+ # 3. Fuzzy Index Update (Trigrams)
222
+ await self.connection.execute(
223
+ "DELETE FROM __beaver_trigrams__ WHERE collection = ? AND item_id = ?",
224
+ (self._name, doc.id),
225
+ )
226
+
227
+ if fuzzy:
228
+ # Index trigrams for the whole document content (concatenated)
229
+ # or specific fields? For simplicity, we index all text content found.
230
+ # This allows fuzzy matching on any text field.
231
+ full_text = " ".join(row[3] for row in fts_rows)
232
+ if full_text:
233
+ await self._index_trigrams(doc.id, full_text)
234
+
235
+ return doc
236
+
237
+ async def _index_trigrams(self, item_id: str, text: str):
238
+ """Generates and stores trigrams for fuzzy search."""
239
+ clean_text = text.lower()
240
+ if len(clean_text) < 3:
241
+ return
242
+
243
+ trigrams = set(clean_text[i : i + 3] for i in range(len(clean_text) - 2))
244
+
245
+ if trigrams:
246
+ await self.connection.executemany(
247
+ """
248
+ INSERT OR IGNORE INTO __beaver_trigrams__ (collection, item_id, trigram)
249
+ VALUES (?, ?, ?)
250
+ """,
251
+ [(self._name, item_id, t) for t in trigrams],
252
+ )
253
+
254
+ @atomic
255
+ async def get(self, id: str) -> Document[T]:
256
+ """Retrieves a document by ID."""
257
+ cursor = await self.connection.execute(
258
+ "SELECT data FROM __beaver_documents__ WHERE collection = ? AND item_id = ?",
259
+ (self._name, id),
260
+ )
261
+ row = await cursor.fetchone()
262
+
263
+ if not row:
264
+ raise KeyError(id)
265
+
266
+ body_val = json.loads(row["data"])
267
+ return self._doc_model(id=id, body=body_val)
268
+
269
+ async def get_many(self, ids: List[str]) -> List[Document[T]]:
270
+ """Batch retrieval helper."""
271
+ if not ids:
272
+ return []
273
+
274
+ placeholders = ",".join("?" * len(ids))
275
+ cursor = await self.connection.execute(
276
+ f"SELECT item_id, data FROM __beaver_documents__ WHERE collection = ? AND item_id IN ({placeholders})",
277
+ (self._name, *ids),
278
+ )
279
+
280
+ results = []
281
+ async for row in cursor:
282
+ body_val = json.loads(row["data"])
283
+ results.append(self._doc_model(id=row["item_id"], body=body_val))
284
+ return results
285
+
286
+ @emits("drop", payload=lambda val, *args, **kwargs: dict(target=str(val)))
287
+ @atomic
288
+ async def drop(self, id_or_document: str | Document[T]):
289
+ """Deletes a document by ID or instance."""
290
+ doc_id = (
291
+ id_or_document.id
292
+ if isinstance(id_or_document, Document)
293
+ else id_or_document
294
+ )
295
+
296
+ await self.connection.execute(
297
+ "DELETE FROM __beaver_documents__ WHERE collection = ? AND item_id = ?",
298
+ (self._name, doc_id),
299
+ )
300
+ await self.connection.execute(
301
+ "DELETE FROM __beaver_fts_index__ WHERE collection = ? AND item_id = ?",
302
+ (self._name, doc_id),
303
+ )
304
+ await self.connection.execute(
305
+ "DELETE FROM __beaver_trigrams__ WHERE collection = ? AND item_id = ?",
306
+ (self._name, doc_id),
307
+ )
308
+
309
+ # --- Query API ---
310
+
311
+ def query(self) -> DocumentQuery[T]:
312
+ return DocumentQuery(self)
313
+
314
+ async def search(
315
+ self, query: str, on: List[str] | None = None, fuzzy: bool = False
316
+ ):
317
+ if fuzzy:
318
+ return await self.query().fuzzy(query).execute()
319
+ else:
320
+ return await self.query().fts(query, on=on).execute()
321
+
322
+ async def _execute_query(self, q: DocumentQuery) -> List[Document[T]]:
323
+ """
324
+ Compiles the DocumentQuery into SQL and executes it.
325
+ """
326
+ parts = ["SELECT d.item_id, d.data"]
327
+ params = []
328
+
329
+ # Scoring column
330
+ if q._search_query:
331
+ parts.append(", MIN(f.rank) as score")
332
+ elif q._fuzzy_query:
333
+ parts.append(", count_matches as score")
334
+ else:
335
+ parts.append(", NULL as score")
336
+
337
+ parts.append("FROM __beaver_documents__ d")
338
+
339
+ # JOINS
340
+ if q._search_query:
341
+ parts.append(
342
+ "JOIN __beaver_fts_index__ f ON d.collection = f.collection AND d.item_id = f.item_id"
343
+ )
344
+
345
+ if q._fuzzy_query:
346
+ # Fuzzy Logic: Find IDs with matching trigrams, count matches, and join back
347
+ clean_query = q._fuzzy_query.lower()
348
+ query_trigrams = [
349
+ clean_query[i : i + 3] for i in range(len(clean_query) - 2)
350
+ ]
351
+
352
+ if not query_trigrams:
353
+ return [] # Query too short for fuzzy
354
+
355
+ placeholders = ",".join("?" * len(query_trigrams))
356
+
357
+ # Subquery to rank by trigram matches
358
+ subquery = f"""
359
+ JOIN (
360
+ SELECT item_id, COUNT(*) as count_matches
361
+ FROM __beaver_trigrams__
362
+ WHERE collection = ? AND trigram IN ({placeholders})
363
+ GROUP BY item_id
364
+ ) t ON d.item_id = t.item_id
365
+ """
366
+ parts.append(subquery)
367
+ params.append(self._name)
368
+ params.extend(query_trigrams)
369
+
370
+ # WHERE clauses
371
+ where = ["d.collection = ?"]
372
+ params.append(self._name)
373
+
374
+ if q._search_query:
375
+ where.append("__beaver_fts_index__ MATCH ?")
376
+ params.append(q._search_query)
377
+
378
+ if q._search_fields:
379
+ placeholders = ",".join("?" * len(q._search_fields))
380
+ where.append(f"f.field_path IN ({placeholders})")
381
+ params.extend(q._search_fields)
382
+
383
+ if q._filters:
384
+ for filter in q._filters:
385
+ where.append(
386
+ f"json_extract(d.data, '$.{filter.path}') {filter.operator} ?"
387
+ )
388
+ params.append(filter.value)
389
+
390
+ parts.append("WHERE " + " AND ".join(where))
391
+
392
+ # GROUP BY (Required for FTS when matching multiple fields to deduplicate docs)
393
+ if q._search_query:
394
+ parts.append("GROUP BY d.item_id")
395
+
396
+ # ORDER BY
397
+ if q._search_query:
398
+ parts.append(
399
+ "ORDER BY score"
400
+ ) # FTS rank (lower is better usually, but here handled by sqlite)
401
+ elif q._fuzzy_query:
402
+ parts.append("ORDER BY score DESC") # More trigram matches = better
403
+ elif q._sort_fields:
404
+ sort_expr = ", ".join(
405
+ f"json_extract(d.data, '$.{field}') {order}"
406
+ for field, order in q._sort_fields
407
+ )
408
+ parts.append(f"ORDER BY {sort_expr}")
409
+ else:
410
+ parts.append("ORDER BY d.item_id")
411
+
412
+ # LIMIT
413
+ if q._limit is not None:
414
+ parts.append("LIMIT ?")
415
+ params.append(q._limit)
416
+ if q._offset is not None:
417
+ parts.append("OFFSET ?")
418
+ params.append(q._offset)
419
+
420
+ sql = " ".join(parts)
421
+ cursor = await self.connection.execute(sql, tuple(params))
422
+
423
+ results = []
424
+ async for row in cursor:
425
+ body_val = json.loads(row["data"])
426
+ score = row["score"]
427
+ doc = self._doc_model(id=row["item_id"], body=body_val, score=score)
428
+ results.append(doc)
429
+
430
+ return results
431
+
432
+ async def count(self) -> int:
433
+ cursor = await self.connection.execute(
434
+ "SELECT COUNT(*) FROM __beaver_documents__ WHERE collection = ?",
435
+ (self._name,),
436
+ )
437
+ result = await cursor.fetchone()
438
+ return result[0] if result else 0
439
+
440
+ @atomic
441
+ async def clear(self):
442
+ await self.connection.execute(
443
+ "DELETE FROM __beaver_documents__ WHERE collection = ?", (self._name,)
444
+ )
445
+ await self.connection.execute(
446
+ "DELETE FROM __beaver_fts_index__ WHERE collection = ?", (self._name,)
447
+ )
448
+ await self.connection.execute(
449
+ "DELETE FROM __beaver_trigrams__ WHERE collection = ?", (self._name,)
450
+ )
451
+
452
+ async def __aiter__(self) -> AsyncIterator[Document[T]]:
453
+ cursor = await self.connection.execute(
454
+ "SELECT item_id, data FROM __beaver_documents__ WHERE collection = ?",
455
+ (self._name,),
456
+ )
457
+ async for row in cursor:
458
+ body_val = json.loads(row["data"])
459
+ yield self._doc_model(id=row["item_id"], body=body_val)
beaver/events.py ADDED
@@ -0,0 +1,155 @@
1
+ import asyncio
2
+ import time
3
+ import inspect
4
+ import json
5
+ import uuid
6
+ from typing import (
7
+ Any,
8
+ Callable,
9
+ Protocol,
10
+ runtime_checkable,
11
+ TYPE_CHECKING,
12
+ Generic,
13
+ TypeVar,
14
+ )
15
+ import weakref
16
+
17
+ from pydantic import BaseModel, Field
18
+
19
+ from .manager import AsyncBeaverBase, atomic
20
+ from .channels import AsyncBeaverChannel
21
+
22
+ if TYPE_CHECKING:
23
+ from .core import AsyncBeaverDB
24
+
25
+ T = TypeVar("T")
26
+
27
+
28
+ class Event[T](BaseModel):
29
+ """
30
+ A type-safe envelope for events.
31
+
32
+ Attributes:
33
+ id: Unique event ID.
34
+ event: The event name/topic.
35
+ payload: The actual data (typed).
36
+ timestamp: When the event was created.
37
+ """
38
+
39
+ id: str = Field(default_factory=lambda: uuid.uuid4().hex)
40
+ event: str
41
+ payload: T
42
+ timestamp: float = Field(default_factory=time.time)
43
+
44
+
45
+ class EventHandler:
46
+ """
47
+ Public-facing handle returned by `AsyncBeaverEvents.attach()`.
48
+ Allows the user to close their specific callback listener.
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ manager: "AsyncBeaverEvents",
54
+ event: str,
55
+ callback: Callable,
56
+ ):
57
+ self._manager_ref = weakref.ref(manager)
58
+ self._event = event
59
+ self._callback = callback
60
+ self._closed = False
61
+
62
+ async def off(self):
63
+ """Removes the callback from the manager's event system."""
64
+ if self._closed:
65
+ return
66
+
67
+ manager = self._manager_ref()
68
+
69
+ if manager:
70
+ await manager.detach(self._event, self._callback)
71
+
72
+ self._closed = True
73
+
74
+
75
+ @runtime_checkable
76
+ class IBeaverEvents[T](Protocol):
77
+ """Protocol exposed to the user via BeaverBridge."""
78
+
79
+ def attach(
80
+ self, event: str, callback: Callable[[Event[T]], Any]
81
+ ) -> EventHandler: ...
82
+ def detach(self, event: str, callback: Callable[[Event[T]], Any]) -> None: ...
83
+ def emit(self, event: str, payload: T) -> None: ...
84
+
85
+
86
+ class AsyncBeaverEvents[T: BaseModel](AsyncBeaverBase[T]):
87
+ """
88
+ A standalone Event Bus manager.
89
+ Implements the Observer Pattern on top of AsyncBeaverChannel.
90
+ """
91
+
92
+ def __init__(self, name: str, db: "AsyncBeaverDB", model: type[T] | None = None):
93
+ super().__init__(name, db, model)
94
+ self._callbacks: dict[str, list[Callable]] = {}
95
+ self._listening = False
96
+ self._listener_task: asyncio.Task | None = None
97
+
98
+ # Internal channel for broadcasting events
99
+ self._channel_name = f"__events_{self._name}__"
100
+ self._channel: AsyncBeaverChannel[Event[T]] = db.channel(
101
+ self._channel_name, model=Event[model] if model else Event
102
+ )
103
+
104
+ async def _ensure_listener(self):
105
+ """Starts the background dispatch loop if not running."""
106
+ if self._listening:
107
+ return
108
+
109
+ self._listening = True
110
+ self._listener_task = asyncio.create_task(self._dispatch_loop())
111
+
112
+ async def _dispatch_loop(self):
113
+ """Consumes messages from the channel and executes callbacks."""
114
+ # Subscribe to the underlying channel
115
+ async for msg in self._channel.subscribe():
116
+ # Unwrap the envelope (which is a raw dict from channel)
117
+ event = msg.payload
118
+
119
+ # Validate envelope structure
120
+ event_name = event.event
121
+
122
+ # Execute Callbacks
123
+ for callback in self._callbacks.get(event_name, []):
124
+ if inspect.iscoroutinefunction(callback):
125
+ # Run async callbacks concurrently
126
+ asyncio.create_task(callback(event))
127
+ else:
128
+ # Run sync callbacks directly
129
+ callback(event)
130
+
131
+ async def attach(self, event: str, callback: Callable[[Event[T]], Any]):
132
+ """Attaches a callback to an event."""
133
+ await self._ensure_listener()
134
+
135
+ if event not in self._callbacks:
136
+ self._callbacks[event] = []
137
+
138
+ if callback not in self._callbacks[event]:
139
+ self._callbacks[event].append(callback)
140
+
141
+ return EventHandler(self, event, callback)
142
+
143
+ async def detach(self, event: str, callback: Callable[[Event[T]], Any]):
144
+ """Detaches a callback."""
145
+ if event in self._callbacks:
146
+ if callback in self._callbacks[event]:
147
+ self._callbacks[event].remove(callback)
148
+
149
+ @atomic
150
+ async def emit(self, event: str, payload: T):
151
+ """
152
+ Emits an event.
153
+ """
154
+ # Publish to the underlying channel
155
+ await self._channel.publish(Event(event=event, payload=payload))