beaver-db 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of beaver-db might be problematic. Click here for more details.

beaver/__init__.py CHANGED
@@ -1 +1 @@
1
- from .core import BeaverDB, Subscriber
1
+ from .core import BeaverDB, Document
beaver/core.py CHANGED
@@ -1,4 +1,6 @@
1
1
  import asyncio
2
+ import uuid
3
+ import numpy as np
2
4
  import json
3
5
  import sqlite3
4
6
  import time
@@ -26,43 +28,83 @@ class BeaverDB:
26
28
  self._create_pubsub_table()
27
29
  self._create_kv_table()
28
30
  self._create_list_table()
31
+ self._create_collections_table()
32
+ self._create_fts_table() # <-- Nueva llamada
33
+
34
+ def _create_fts_table(self):
35
+ """Creates the virtual FTS table for full text search."""
36
+ with self._conn:
37
+ self._conn.execute(
38
+ """
39
+ CREATE VIRTUAL TABLE IF NOT EXISTS beaver_fts_index USING fts5(
40
+ collection,
41
+ item_id,
42
+ field_path,
43
+ field_content,
44
+ tokenize = 'porter'
45
+ )
46
+ """
47
+ )
29
48
 
30
49
  def _create_pubsub_table(self):
31
50
  """Creates the pub/sub log table if it doesn't exist."""
32
51
  with self._conn:
33
- self._conn.execute("""
52
+ self._conn.execute(
53
+ """
34
54
  CREATE TABLE IF NOT EXISTS beaver_pubsub_log (
35
55
  timestamp REAL PRIMARY KEY,
36
56
  channel_name TEXT NOT NULL,
37
57
  message_payload TEXT NOT NULL
38
58
  )
39
- """)
40
- self._conn.execute("""
59
+ """
60
+ )
61
+ self._conn.execute(
62
+ """
41
63
  CREATE INDEX IF NOT EXISTS idx_pubsub_channel_timestamp
42
64
  ON beaver_pubsub_log (channel_name, timestamp)
43
- """)
65
+ """
66
+ )
44
67
 
45
68
  def _create_kv_table(self):
46
69
  """Creates the key-value store table if it doesn't exist."""
47
70
  with self._conn:
48
- self._conn.execute("""
71
+ self._conn.execute(
72
+ """
49
73
  CREATE TABLE IF NOT EXISTS _beaver_kv_store (
50
74
  key TEXT PRIMARY KEY,
51
75
  value TEXT NOT NULL
52
76
  )
53
- """)
77
+ """
78
+ )
54
79
 
55
80
  def _create_list_table(self):
56
81
  """Creates the lists table if it doesn't exist."""
57
82
  with self._conn:
58
- self._conn.execute("""
83
+ self._conn.execute(
84
+ """
59
85
  CREATE TABLE IF NOT EXISTS beaver_lists (
60
86
  list_name TEXT NOT NULL,
61
87
  item_order REAL NOT NULL,
62
88
  item_value TEXT NOT NULL,
63
89
  PRIMARY KEY (list_name, item_order)
64
90
  )
65
- """)
91
+ """
92
+ )
93
+
94
+ def _create_collections_table(self):
95
+ """Creates the collections table if it doesn't exist."""
96
+ with self._conn:
97
+ self._conn.execute(
98
+ """
99
+ CREATE TABLE IF NOT EXISTS beaver_collections (
100
+ collection TEXT NOT NULL,
101
+ item_id TEXT NOT NULL,
102
+ item_vector BLOB,
103
+ metadata TEXT,
104
+ PRIMARY KEY (collection, item_id)
105
+ )
106
+ """
107
+ )
66
108
 
67
109
  def close(self):
68
110
  """Closes the database connection."""
@@ -94,7 +136,7 @@ class BeaverDB:
94
136
  with self._conn:
95
137
  self._conn.execute(
96
138
  "INSERT OR REPLACE INTO _beaver_kv_store (key, value) VALUES (?, ?)",
97
- (key, json_value)
139
+ (key, json_value),
98
140
  )
99
141
 
100
142
  def get(self, key: str) -> Any:
@@ -120,7 +162,7 @@ class BeaverDB:
120
162
  cursor.close()
121
163
 
122
164
  if result:
123
- return json.loads(result['value'])
165
+ return json.loads(result["value"])
124
166
  return None
125
167
 
126
168
  # --- List Methods ---
@@ -139,6 +181,10 @@ class BeaverDB:
139
181
  raise TypeError("List name must be a non-empty string.")
140
182
  return ListWrapper(name, self._conn)
141
183
 
184
+ def collection(self, name: str) -> "CollectionWrapper":
185
+ """Returns a wrapper for interacting with a vector collection."""
186
+ return CollectionWrapper(name, self._conn)
187
+
142
188
  # --- Asynchronous Pub/Sub Methods ---
143
189
 
144
190
  async def publish(self, channel_name: str, payload: Any):
@@ -153,16 +199,14 @@ class BeaverDB:
153
199
  except TypeError as e:
154
200
  raise TypeError("Message payload must be JSON-serializable.") from e
155
201
 
156
- await asyncio.to_thread(
157
- self._write_publish_to_db, channel_name, json_payload
158
- )
202
+ await asyncio.to_thread(self._write_publish_to_db, channel_name, json_payload)
159
203
 
160
204
  def _write_publish_to_db(self, channel_name, json_payload):
161
205
  """The synchronous part of the publish operation."""
162
206
  with self._conn:
163
207
  self._conn.execute(
164
208
  "INSERT INTO beaver_pubsub_log (timestamp, channel_name, message_payload) VALUES (?, ?, ?)",
165
- (time.time(), channel_name, json_payload)
209
+ (time.time(), channel_name, json_payload),
166
210
  )
167
211
 
168
212
  def subscribe(self, channel_name: str) -> "Subscriber":
@@ -182,7 +226,9 @@ class ListWrapper:
182
226
  def __len__(self) -> int:
183
227
  """Returns the number of items in the list (e.g., `len(my_list)`)."""
184
228
  cursor = self._conn.cursor()
185
- cursor.execute("SELECT COUNT(*) FROM beaver_lists WHERE list_name = ?", (self._name,))
229
+ cursor.execute(
230
+ "SELECT COUNT(*) FROM beaver_lists WHERE list_name = ?", (self._name,)
231
+ )
186
232
  count = cursor.fetchone()[0]
187
233
  cursor.close()
188
234
  return count
@@ -203,9 +249,9 @@ class ListWrapper:
203
249
  cursor = self._conn.cursor()
204
250
  cursor.execute(
205
251
  "SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT ? OFFSET ?",
206
- (self._name, limit, start)
252
+ (self._name, limit, start),
207
253
  )
208
- results = [json.loads(row['item_value']) for row in cursor.fetchall()]
254
+ results = [json.loads(row["item_value"]) for row in cursor.fetchall()]
209
255
  cursor.close()
210
256
  return results
211
257
 
@@ -219,11 +265,11 @@ class ListWrapper:
219
265
  cursor = self._conn.cursor()
220
266
  cursor.execute(
221
267
  "SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
222
- (self._name, offset)
268
+ (self._name, offset),
223
269
  )
224
270
  result = cursor.fetchone()
225
271
  cursor.close()
226
- return json.loads(result['item_value']) if result else None
272
+ return json.loads(result["item_value"]) if result else None
227
273
 
228
274
  else:
229
275
  raise TypeError("List indices must be integers or slices.")
@@ -233,7 +279,7 @@ class ListWrapper:
233
279
  cursor = self._conn.cursor()
234
280
  cursor.execute(
235
281
  "SELECT item_order FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
236
- (self._name, index)
282
+ (self._name, index),
237
283
  )
238
284
  result = cursor.fetchone()
239
285
  cursor.close()
@@ -247,26 +293,32 @@ class ListWrapper:
247
293
  """Pushes an item to the end of the list."""
248
294
  with self._conn:
249
295
  cursor = self._conn.cursor()
250
- cursor.execute("SELECT MAX(item_order) FROM beaver_lists WHERE list_name = ?", (self._name,))
296
+ cursor.execute(
297
+ "SELECT MAX(item_order) FROM beaver_lists WHERE list_name = ?",
298
+ (self._name,),
299
+ )
251
300
  max_order = cursor.fetchone()[0] or 0.0
252
301
  new_order = max_order + 1.0
253
302
 
254
303
  cursor.execute(
255
304
  "INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
256
- (self._name, new_order, json.dumps(value))
305
+ (self._name, new_order, json.dumps(value)),
257
306
  )
258
307
 
259
308
  def prepend(self, value: Any):
260
309
  """Prepends an item to the beginning of the list."""
261
310
  with self._conn:
262
311
  cursor = self._conn.cursor()
263
- cursor.execute("SELECT MIN(item_order) FROM beaver_lists WHERE list_name = ?", (self._name,))
312
+ cursor.execute(
313
+ "SELECT MIN(item_order) FROM beaver_lists WHERE list_name = ?",
314
+ (self._name,),
315
+ )
264
316
  min_order = cursor.fetchone()[0] or 0.0
265
317
  new_order = min_order - 1.0
266
318
 
267
319
  cursor.execute(
268
320
  "INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
269
- (self._name, new_order, json.dumps(value))
321
+ (self._name, new_order, json.dumps(value)),
270
322
  )
271
323
 
272
324
  def insert(self, index: int, value: Any):
@@ -288,7 +340,7 @@ class ListWrapper:
288
340
  with self._conn:
289
341
  self._conn.execute(
290
342
  "INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
291
- (self._name, new_order, json.dumps(value))
343
+ (self._name, new_order, json.dumps(value)),
292
344
  )
293
345
 
294
346
  def pop(self) -> Any:
@@ -297,14 +349,16 @@ class ListWrapper:
297
349
  cursor = self._conn.cursor()
298
350
  cursor.execute(
299
351
  "SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order DESC LIMIT 1",
300
- (self._name,)
352
+ (self._name,),
301
353
  )
302
354
  result = cursor.fetchone()
303
355
  if not result:
304
356
  return None
305
357
 
306
358
  rowid_to_delete, value_to_return = result
307
- cursor.execute("DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,))
359
+ cursor.execute(
360
+ "DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,)
361
+ )
308
362
  return json.loads(value_to_return)
309
363
 
310
364
  def deque(self) -> Any:
@@ -313,14 +367,16 @@ class ListWrapper:
313
367
  cursor = self._conn.cursor()
314
368
  cursor.execute(
315
369
  "SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1",
316
- (self._name,)
370
+ (self._name,),
317
371
  )
318
372
  result = cursor.fetchone()
319
373
  if not result:
320
374
  return None
321
375
 
322
376
  rowid_to_delete, value_to_return = result
323
- cursor.execute("DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,))
377
+ cursor.execute(
378
+ "DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,)
379
+ )
324
380
  return json.loads(value_to_return)
325
381
 
326
382
 
@@ -330,7 +386,9 @@ class Subscriber(AsyncIterator):
330
386
  Designed to be used with 'async with'.
331
387
  """
332
388
 
333
- def __init__(self, conn: sqlite3.Connection, channel_name: str, poll_interval: float = 0.1):
389
+ def __init__(
390
+ self, conn: sqlite3.Connection, channel_name: str, poll_interval: float = 0.1
391
+ ):
334
392
  self._conn = conn
335
393
  self._channel = channel_name
336
394
  self._poll_interval = poll_interval
@@ -342,9 +400,7 @@ class Subscriber(AsyncIterator):
342
400
  """Background task that polls the database for new messages."""
343
401
  while True:
344
402
  try:
345
- new_messages = await asyncio.to_thread(
346
- self._fetch_new_messages_from_db
347
- )
403
+ new_messages = await asyncio.to_thread(self._fetch_new_messages_from_db)
348
404
  if new_messages:
349
405
  for msg in new_messages:
350
406
  payload = json.loads(msg["message_payload"])
@@ -362,7 +418,7 @@ class Subscriber(AsyncIterator):
362
418
  cursor = self._conn.cursor()
363
419
  cursor.execute(
364
420
  "SELECT timestamp, message_payload FROM beaver_pubsub_log WHERE channel_name = ? AND timestamp > ? ORDER BY timestamp ASC",
365
- (self._channel, self._last_seen_timestamp)
421
+ (self._channel, self._last_seen_timestamp),
366
422
  )
367
423
  results = cursor.fetchall()
368
424
  cursor.close()
@@ -385,3 +441,198 @@ class Subscriber(AsyncIterator):
385
441
  async def __anext__(self) -> Any:
386
442
  """Allows 'async for' to pull messages from the internal queue."""
387
443
  return await self._queue.get()
444
+
445
+
446
+ class Document:
447
+ """A data class for a vector and its metadata, with a unique ID."""
448
+
449
+ def __init__(
450
+ self, embedding: list[float] | None = None, id: str | None = None, **metadata
451
+ ):
452
+ self.id = id or str(uuid.uuid4())
453
+
454
+ if embedding is None:
455
+ self.embedding = None
456
+ else:
457
+ if not isinstance(embedding, list) or not all(
458
+ isinstance(x, (int, float)) for x in embedding
459
+ ):
460
+ raise TypeError("Embedding must be a list of numbers.")
461
+
462
+ self.embedding = np.array(embedding, dtype=np.float32)
463
+
464
+ for key, value in metadata.items():
465
+ setattr(self, key, value)
466
+
467
+ def to_dict(self) -> dict[str, Any]:
468
+ """Serializes metadata to a dictionary."""
469
+ metadata = self.__dict__.copy()
470
+ # Exclude internal attributes from the metadata payload
471
+ metadata.pop("embedding", None)
472
+ metadata.pop("id", None)
473
+ return metadata
474
+
475
+ def __repr__(self):
476
+ metadata_str = ", ".join(f"{k}={v!r}" for k, v in self.to_dict().items())
477
+ return f"Document(id='{self.id}', {metadata_str})"
478
+
479
+
480
+ class CollectionWrapper:
481
+ """A wrapper for vector collection operations with upsert logic."""
482
+
483
+ def __init__(self, name: str, conn: sqlite3.Connection):
484
+ self._name = name
485
+ self._conn = conn
486
+
487
+ # Dentro de la clase CollectionWrapper en beaver/core.py
488
+
489
+ def _flatten_metadata(self, metadata: dict, prefix: str = "") -> dict[str, str]:
490
+ """
491
+ Aplana un diccionario anidado y filtra solo los valores de tipo string.
492
+ Ejemplo: {'a': {'b': 'c'}} -> {'a__b': 'c'}
493
+ """
494
+ flat_dict = {}
495
+ for key, value in metadata.items():
496
+ new_key = f"{prefix}__{key}" if prefix else key
497
+ if isinstance(value, dict):
498
+ flat_dict.update(self._flatten_metadata(value, new_key))
499
+ elif isinstance(value, str):
500
+ flat_dict[new_key] = value
501
+ return flat_dict
502
+
503
+ def index(self, document: Document, *, fts: bool = True):
504
+ """
505
+ Indexa un Document, realizando un upsert y actualizando el índice FTS.
506
+ """
507
+ with self._conn:
508
+ if fts:
509
+ self._conn.execute(
510
+ "DELETE FROM beaver_fts_index WHERE collection = ? AND item_id = ?",
511
+ (self._name, document.id),
512
+ )
513
+
514
+ string_fields = self._flatten_metadata(document.to_dict())
515
+
516
+ if string_fields:
517
+ fts_data = [
518
+ (self._name, document.id, path, content)
519
+ for path, content in string_fields.items()
520
+ ]
521
+ self._conn.executemany(
522
+ "INSERT INTO beaver_fts_index (collection, item_id, field_path, field_content) VALUES (?, ?, ?, ?)",
523
+ fts_data,
524
+ )
525
+
526
+ self._conn.execute(
527
+ "INSERT OR REPLACE INTO beaver_collections (collection, item_id, item_vector, metadata) VALUES (?, ?, ?, ?)",
528
+ (
529
+ self._name,
530
+ document.id,
531
+ document.embedding.tobytes() if document.embedding is not None else None,
532
+ json.dumps(document.to_dict()),
533
+ ),
534
+ )
535
+
536
+ def search(
537
+ self, vector: list[float], top_k: int = 10
538
+ ) -> list[tuple[Document, float]]:
539
+ """
540
+ Performs a vector search and returns Document objects.
541
+ """
542
+ query_vector = np.array(vector, dtype=np.float32)
543
+
544
+ cursor = self._conn.cursor()
545
+ cursor.execute(
546
+ "SELECT item_id, item_vector, metadata FROM beaver_collections WHERE collection = ?",
547
+ (self._name,),
548
+ )
549
+
550
+ all_docs_data = cursor.fetchall()
551
+ cursor.close()
552
+
553
+ if not all_docs_data:
554
+ return []
555
+
556
+ results = []
557
+ for row in all_docs_data:
558
+ if row["item_vector"] is None:
559
+ continue # Skip documents without embeddings
560
+
561
+ doc_id = row["item_id"]
562
+ embedding = np.frombuffer(row["item_vector"], dtype=np.float32).tolist()
563
+ metadata = json.loads(row["metadata"])
564
+
565
+ distance = np.linalg.norm(embedding - query_vector)
566
+
567
+ # Reconstruct the Document object with its original ID
568
+ doc = Document(id=doc_id, embedding=list(embedding), **metadata)
569
+ results.append((doc, float(distance)))
570
+
571
+ results.sort(key=lambda x: x[1])
572
+ return results[:top_k]
573
+
574
+ def match(
575
+ self, query: str, on_field: str | None = None, top_k: int = 10
576
+ ) -> list[tuple[Document, float]]:
577
+ """
578
+ Realiza una búsqueda de texto completo en los campos de metadatos indexados.
579
+
580
+ Args:
581
+ query: La expresión de búsqueda (ej. "gato", "perro OR conejo").
582
+ on_field: Opcional, el campo específico donde buscar (ej. "details__title").
583
+ top_k: El número máximo de resultados a devolver.
584
+
585
+ Returns:
586
+ Una lista de tuplas (Documento, puntuación_de_relevancia).
587
+ """
588
+ cursor = self._conn.cursor()
589
+
590
+ sql_query = """
591
+ SELECT
592
+ t1.item_id, t1.item_vector, t1.metadata, fts.rank
593
+ FROM beaver_collections AS t1
594
+ JOIN (
595
+ SELECT DISTINCT item_id, rank
596
+ FROM beaver_fts_index
597
+ WHERE beaver_fts_index MATCH ?
598
+ ORDER BY rank
599
+ LIMIT ?
600
+ ) AS fts ON t1.item_id = fts.item_id
601
+ WHERE t1.collection = ?
602
+ ORDER BY fts.rank
603
+ """
604
+
605
+ params = []
606
+ field_filter_sql = ""
607
+
608
+ if on_field:
609
+ field_filter_sql = "AND field_path = ?"
610
+ params.append(on_field)
611
+ else:
612
+ # Búsqueda en todos los campos
613
+ params.append(query)
614
+
615
+ sql_query = sql_query.format(field_filter_sql)
616
+ params.extend([top_k, self._name])
617
+
618
+ cursor.execute(sql_query, tuple(params))
619
+
620
+ results = []
621
+ for row in cursor.fetchall():
622
+ doc_id = row["item_id"]
623
+
624
+ if row["item_vector"] is None:
625
+ embedding = None
626
+ else:
627
+ embedding = np.frombuffer(row["item_vector"], dtype=np.float32).tolist()
628
+
629
+ metadata = json.loads(row["metadata"])
630
+ rank = row["rank"]
631
+
632
+ doc = Document(id=doc_id, embedding=embedding, **metadata)
633
+ results.append((doc, rank))
634
+
635
+ results.sort(key=lambda x: x[1])
636
+ cursor.close()
637
+
638
+ return results
@@ -0,0 +1,129 @@
1
+ Metadata-Version: 2.4
2
+ Name: beaver-db
3
+ Version: 0.4.0
4
+ Summary: Asynchronous, embedded, modern DB based on SQLite.
5
+ Requires-Python: >=3.13
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: numpy>=2.3.3
8
+
9
+ # beaver 🦫
10
+
11
+ A fast, single-file, multi-modal database for Python, built with the standard sqlite3 library.
12
+
13
+ `beaver` is the **B**ackend for **E**mbedded **A**synchronous **V**ector & **E**vent Retrieval. It's an industrious, all-in-one database designed to manage complex, modern data types without requiring a database server.
14
+
15
+ ## Design Philosophy
16
+
17
+ `beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
18
+
19
+ - **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (`sqlite3`, `asyncio`) and `numpy`.
20
+ - **Async-First (When It Matters)**: The pub/sub system is fully asynchronous for high-performance, real-time messaging. Other features like key-value, list, and vector operations are synchronous for ease of use.
21
+ - **Built for Local Applications**: Perfect for local AI tools, RAG prototypes, chatbots, and desktop utilities that need persistent, structured data without network overhead.
22
+ - **Fast by Default**: It's built on SQLite, which is famously fast and reliable for local applications.
23
+
24
+ ## Core Features
25
+
26
+ - **Asynchronous Pub/Sub**: A fully asynchronous, Redis-like publish-subscribe system for real-time messaging.
27
+ - **Persistent Key-Value Store**: A simple `set`/`get` interface for storing any JSON-serializable object.
28
+ - **Pythonic List Management**: A fluent, Redis-like interface for managing persistent, ordered lists.
29
+ - **Vector Storage & Search**: Store vector embeddings and perform simple, brute-force k-nearest neighbor searches, ideal for small-scale RAG.
30
+ - **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
31
+
32
+ ## Installation
33
+
34
+ ```bash
35
+ pip install beaver-db
36
+ ```
37
+
38
+ ## Quickstart & API Guide
39
+
40
+ ### Initialization
41
+
42
+ All you need to do is import and instantiate the `BeaverDB` class with a file path.
43
+
44
+ ```python
45
+ from beaver import BeaverDB, Document
46
+
47
+ db = BeaverDB("my_application.db")
48
+ ```
49
+
50
+ ### Key-Value Store
51
+
52
+ Use `set()` and `get()` for simple data storage. The value can be any JSON-encodable object.
53
+
54
+ ```python
55
+ # Set a value
56
+ db.set("app_config", {"theme": "dark", "user_id": 123})
57
+
58
+ # Get a value
59
+ config = db.get("app_config")
60
+ print(f"Theme: {config['theme']}") # Output: Theme: dark
61
+ ```
62
+
63
+ ### List Management
64
+
65
+ Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
66
+
67
+ ```python
68
+ tasks = db.list("daily_tasks")
69
+ tasks.push("Write the project report")
70
+ tasks.prepend("Plan the day's agenda")
71
+ print(f"The first task is: {tasks[0]}")
72
+ ```
73
+
74
+ ### Vector Storage & Search
75
+
76
+ Store `Document` objects containing vector embeddings and metadata. The search is a linear scan, which is sufficient for small-to-medium collections.
77
+
78
+ ```python
79
+ # Get a handle to a collection
80
+ docs = db.collection("my_documents")
81
+
82
+ # Create and index a document (ID will be a UUID)
83
+ doc1 = Document(embedding=[0.1, 0.2, 0.7], text="A cat sat on the mat.")
84
+ docs.index(doc1)
85
+
86
+ # Create and index a document with a specific ID (for upserting)
87
+ doc2 = Document(id="article-42", embedding=[0.9, 0.1, 0.1], text="A dog chased a ball.")
88
+ docs.index(doc2)
89
+
90
+ # Search for the 2 most similar documents
91
+ query_vector = [0.15, 0.25, 0.65]
92
+ results = docs.search(vector=query_vector, top_k=2)
93
+
94
+ # Results are a list of (Document, distance) tuples
95
+ top_document, distance = results[0]
96
+ print(f"Closest document: {top_document.text} (distance: {distance:.4f})")
97
+ ```
98
+
99
+ ### Asynchronous Pub/Sub
100
+
101
+ Publish events from one part of your app and listen in another using `asyncio`.
102
+
103
+ ```python
104
+ import asyncio
105
+
106
+ async def listener():
107
+ async with db.subscribe("system_events") as sub:
108
+ async for message in sub:
109
+ print(f"LISTENER: Received event -> {message['event']}")
110
+
111
+ async def publisher():
112
+ await asyncio.sleep(1)
113
+ await db.publish("system_events", {"event": "user_login", "user": "alice"})
114
+
115
+ # To run them concurrently:
116
+ # asyncio.run(asyncio.gather(listener(), publisher()))
117
+ ```
118
+
119
+ ## Roadmap
120
+
121
+ `beaver` aims to be a complete, self-contained data toolkit. The following features are planned:
122
+
123
+ - **More Efficient Vector Search**: Integrate an approximate nearest neighbor (ANN) index like `scipy.spatial.cKDTree` to improve search speed on larger datasets.
124
+ - **JSON Document Store with Full-Text Search**: Store flexible JSON documents and get powerful full-text search across all text fields, powered by SQLite's FTS5 extension.
125
+ - **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks with standard SQL.
126
+
127
+ ## License
128
+
129
+ This project is licensed under the MIT License.
@@ -0,0 +1,6 @@
1
+ beaver/__init__.py,sha256=uTPhMNDjw41YTWQN8NTLbovudfp8RIwcqbZ5XtYIuJA,36
2
+ beaver/core.py,sha256=i2rBoUM1rq_j1xM3w4xW4c9e2eI8Ce6BeJ8rE8jQ-fI,21928
3
+ beaver_db-0.4.0.dist-info/METADATA,sha256=7VzqxHKU-Ft1QVAfVvywt4e50C3QWxS7FUpKIaQEJKk,4865
4
+ beaver_db-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ beaver_db-0.4.0.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
6
+ beaver_db-0.4.0.dist-info/RECORD,,
@@ -1,109 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: beaver-db
3
- Version: 0.2.0
4
- Summary: Asynchronous, embedded, modern DB based on SQLite.
5
- Requires-Python: >=3.13
6
- Description-Content-Type: text/markdown
7
-
8
- # beaver 🦫
9
-
10
- A fast, single-file, multi-modal database for Python, built with the standard sqlite3 library.
11
-
12
- `beaver` is the Backend for Embedded Asynchronous Vector & Event Retrieval. It's an industrious, all-in-one database designed to manage complex, modern data types without requiring a database server.
13
-
14
- Design Philosophy
15
- `beaver` is built with a minimalistic philosophy for small, local use cases where a full-blown database server would be overkill.
16
-
17
- - **Minimalistic & Zero-Dependency**: Uses only Python's standard libraries (sqlite3, asyncio). No external packages are required, making it incredibly lightweight and portable.
18
- - **Async-First (When It Matters)**: The pub/sub system is fully asynchronous for high-performance, real-time messaging. Simpler features like key-value and list operations remain synchronous for ease of use.
19
- - **Built for Local Applications**: Perfect for local AI tools, chatbots (streaming tokens), task management apps, desktop utilities, and prototypes that need persistent, structured data without network overhead.
20
- - **Fast by Default**: It's built on SQLite, which is famously fast, reliable, and will likely serve your needs for a long way before you need a "professional" database.
21
-
22
- ## Core Features
23
-
24
- - **Asynchronous Pub/Sub**: A fully asynchronous, Redis-like publish-subscribe system for real-time messaging.
25
- - **Persistent Key-Value Store**: A simple set/get interface for storing configuration, session data, or any other JSON-serializable object.
26
- - **Pythonic List Management**: A fluent, Redis-like interface (db.list("name").push()) for managing persistent, ordered lists with support for indexing and slicing.
27
- - **Single-File & Portable**: All data is stored in a single SQLite file, making it incredibly easy to move, back up, or embed in your application.
28
-
29
- ## Installation
30
-
31
- ```bash
32
- pip install beaver-db
33
- ```
34
-
35
- ## Quickstart & API Guide
36
-
37
- ### 1. Initialization
38
-
39
- All you need to do is import and instantiate the BeaverDB class with a file path.
40
-
41
- ```python
42
- from beaver import BeaverDB
43
-
44
- db = BeaverDB("my_application.db")
45
- ```
46
-
47
- ### 2. Key-Value Store
48
-
49
- Use `set()` and `get()` for simple data storage. The value can be any JSON-encodable object.
50
-
51
- ```python
52
- # Set a value
53
- db.set("app_config", {"theme": "dark", "user_id": 123})
54
-
55
- # Get a value
56
- config = db.get("app_config")
57
- print(f"Theme: {config['theme']}") # Output: Theme: dark
58
- ```
59
-
60
- ### 3. List Management
61
-
62
- Get a list wrapper with `db.list()` and use Pythonic methods to manage it.
63
-
64
- ```python
65
- # Get a wrapper for the 'tasks' list
66
- tasks = db.list("daily_tasks")
67
-
68
- # Push items to the list
69
- tasks.push("Write the project report")
70
- tasks.push("Send follow-up emails")
71
- tasks.prepend("Plan the day's agenda") # Push to the front
72
-
73
- # Use len() and indexing (including slices!)
74
- print(f"There are {len(tasks)} tasks.")
75
- print(f"The first task is: {tasks[0]}")
76
- print(f"The rest is: {tasks[1:]}")
77
- ```
78
-
79
- ### 4. Asynchronous Pub/Sub
80
-
81
- Publish events from one part of your app and listen in another using asyncio.
82
-
83
- ```python
84
- import asyncio
85
-
86
- async def listener():
87
- async with db.subscribe("system_events") as sub:
88
- async for message in sub:
89
- print(f"LISTENER: Received event -> {message['event']}")
90
-
91
- async def publisher():
92
- await asyncio.sleep(1)
93
- await db.publish("system_events", {"event": "user_login", "user": "alice"})
94
-
95
- # To run them concurrently:
96
- # asyncio.run(asyncio.gather(listener(), publisher()))
97
- ```
98
-
99
- ## Roadmap
100
-
101
- `beaver` aims to be a complete, self-contained data toolkit. The following features are planned:
102
-
103
- - **Vector Storage & Search**: Store NumPy vector embeddings and perform efficient k-nearest neighbor (k-NN) searches using `scipy.spatial.cKDTree`.
104
- - **JSON Document Store with Full-Text Search**: Store flexible JSON documents and get powerful full-text search across all text fields, powered by SQLite's FTS5 extension.
105
- - **Standard Relational Interface**: While `beaver` provides high-level features, you can always use the same SQLite file for normal relational tasks (e.g., managing users, products) with standard SQL.
106
-
107
- ## License
108
-
109
- This project is licensed under the MIT License.
@@ -1,6 +0,0 @@
1
- beaver/__init__.py,sha256=pE1JdpHVni2Hv6igs5VrKPlHkkKMik3ZiwhR23KRBkk,38
2
- beaver/core.py,sha256=NQizE87jkR7DwbcZoIX61rdVe7z6bAarBUbo-oUc8SI,13720
3
- beaver_db-0.2.0.dist-info/METADATA,sha256=DGdk2Il2CFRtfM2BzY7Z2dYUxy1UQeKdVKnIVa_5wzg,4295
4
- beaver_db-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
- beaver_db-0.2.0.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
6
- beaver_db-0.2.0.dist-info/RECORD,,