beaver-db 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of beaver-db might be problematic. Click here for more details.

beaver/core.py CHANGED
@@ -29,56 +29,82 @@ class BeaverDB:
29
29
  self._create_kv_table()
30
30
  self._create_list_table()
31
31
  self._create_collections_table()
32
+ self._create_fts_table() # <-- Nueva llamada
33
+
34
+ def _create_fts_table(self):
35
+ """Creates the virtual FTS table for full text search."""
36
+ with self._conn:
37
+ self._conn.execute(
38
+ """
39
+ CREATE VIRTUAL TABLE IF NOT EXISTS beaver_fts_index USING fts5(
40
+ collection,
41
+ item_id,
42
+ field_path,
43
+ field_content,
44
+ tokenize = 'porter'
45
+ )
46
+ """
47
+ )
32
48
 
33
49
  def _create_pubsub_table(self):
34
50
  """Creates the pub/sub log table if it doesn't exist."""
35
51
  with self._conn:
36
- self._conn.execute("""
52
+ self._conn.execute(
53
+ """
37
54
  CREATE TABLE IF NOT EXISTS beaver_pubsub_log (
38
55
  timestamp REAL PRIMARY KEY,
39
56
  channel_name TEXT NOT NULL,
40
57
  message_payload TEXT NOT NULL
41
58
  )
42
- """)
43
- self._conn.execute("""
59
+ """
60
+ )
61
+ self._conn.execute(
62
+ """
44
63
  CREATE INDEX IF NOT EXISTS idx_pubsub_channel_timestamp
45
64
  ON beaver_pubsub_log (channel_name, timestamp)
46
- """)
65
+ """
66
+ )
47
67
 
48
68
  def _create_kv_table(self):
49
69
  """Creates the key-value store table if it doesn't exist."""
50
70
  with self._conn:
51
- self._conn.execute("""
71
+ self._conn.execute(
72
+ """
52
73
  CREATE TABLE IF NOT EXISTS _beaver_kv_store (
53
74
  key TEXT PRIMARY KEY,
54
75
  value TEXT NOT NULL
55
76
  )
56
- """)
77
+ """
78
+ )
57
79
 
58
80
  def _create_list_table(self):
59
81
  """Creates the lists table if it doesn't exist."""
60
82
  with self._conn:
61
- self._conn.execute("""
83
+ self._conn.execute(
84
+ """
62
85
  CREATE TABLE IF NOT EXISTS beaver_lists (
63
86
  list_name TEXT NOT NULL,
64
87
  item_order REAL NOT NULL,
65
88
  item_value TEXT NOT NULL,
66
89
  PRIMARY KEY (list_name, item_order)
67
90
  )
68
- """)
91
+ """
92
+ )
69
93
 
70
94
  def _create_collections_table(self):
71
95
  """Creates the collections table if it doesn't exist."""
72
96
  with self._conn:
73
- self._conn.execute("""
97
+ self._conn.execute(
98
+ """
74
99
  CREATE TABLE IF NOT EXISTS beaver_collections (
75
100
  collection TEXT NOT NULL,
76
101
  item_id TEXT NOT NULL,
77
- item_vector BLOB NOT NULL,
102
+ item_vector BLOB,
78
103
  metadata TEXT,
79
104
  PRIMARY KEY (collection, item_id)
80
105
  )
81
- """)
106
+ """
107
+ )
82
108
 
83
109
  def close(self):
84
110
  """Closes the database connection."""
@@ -110,7 +136,7 @@ class BeaverDB:
110
136
  with self._conn:
111
137
  self._conn.execute(
112
138
  "INSERT OR REPLACE INTO _beaver_kv_store (key, value) VALUES (?, ?)",
113
- (key, json_value)
139
+ (key, json_value),
114
140
  )
115
141
 
116
142
  def get(self, key: str) -> Any:
@@ -136,7 +162,7 @@ class BeaverDB:
136
162
  cursor.close()
137
163
 
138
164
  if result:
139
- return json.loads(result['value'])
165
+ return json.loads(result["value"])
140
166
  return None
141
167
 
142
168
  # --- List Methods ---
@@ -173,16 +199,14 @@ class BeaverDB:
173
199
  except TypeError as e:
174
200
  raise TypeError("Message payload must be JSON-serializable.") from e
175
201
 
176
- await asyncio.to_thread(
177
- self._write_publish_to_db, channel_name, json_payload
178
- )
202
+ await asyncio.to_thread(self._write_publish_to_db, channel_name, json_payload)
179
203
 
180
204
  def _write_publish_to_db(self, channel_name, json_payload):
181
205
  """The synchronous part of the publish operation."""
182
206
  with self._conn:
183
207
  self._conn.execute(
184
208
  "INSERT INTO beaver_pubsub_log (timestamp, channel_name, message_payload) VALUES (?, ?, ?)",
185
- (time.time(), channel_name, json_payload)
209
+ (time.time(), channel_name, json_payload),
186
210
  )
187
211
 
188
212
  def subscribe(self, channel_name: str) -> "Subscriber":
@@ -202,7 +226,9 @@ class ListWrapper:
202
226
  def __len__(self) -> int:
203
227
  """Returns the number of items in the list (e.g., `len(my_list)`)."""
204
228
  cursor = self._conn.cursor()
205
- cursor.execute("SELECT COUNT(*) FROM beaver_lists WHERE list_name = ?", (self._name,))
229
+ cursor.execute(
230
+ "SELECT COUNT(*) FROM beaver_lists WHERE list_name = ?", (self._name,)
231
+ )
206
232
  count = cursor.fetchone()[0]
207
233
  cursor.close()
208
234
  return count
@@ -223,9 +249,9 @@ class ListWrapper:
223
249
  cursor = self._conn.cursor()
224
250
  cursor.execute(
225
251
  "SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT ? OFFSET ?",
226
- (self._name, limit, start)
252
+ (self._name, limit, start),
227
253
  )
228
- results = [json.loads(row['item_value']) for row in cursor.fetchall()]
254
+ results = [json.loads(row["item_value"]) for row in cursor.fetchall()]
229
255
  cursor.close()
230
256
  return results
231
257
 
@@ -239,11 +265,11 @@ class ListWrapper:
239
265
  cursor = self._conn.cursor()
240
266
  cursor.execute(
241
267
  "SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
242
- (self._name, offset)
268
+ (self._name, offset),
243
269
  )
244
270
  result = cursor.fetchone()
245
271
  cursor.close()
246
- return json.loads(result['item_value']) if result else None
272
+ return json.loads(result["item_value"]) if result else None
247
273
 
248
274
  else:
249
275
  raise TypeError("List indices must be integers or slices.")
@@ -253,7 +279,7 @@ class ListWrapper:
253
279
  cursor = self._conn.cursor()
254
280
  cursor.execute(
255
281
  "SELECT item_order FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
256
- (self._name, index)
282
+ (self._name, index),
257
283
  )
258
284
  result = cursor.fetchone()
259
285
  cursor.close()
@@ -267,26 +293,32 @@ class ListWrapper:
267
293
  """Pushes an item to the end of the list."""
268
294
  with self._conn:
269
295
  cursor = self._conn.cursor()
270
- cursor.execute("SELECT MAX(item_order) FROM beaver_lists WHERE list_name = ?", (self._name,))
296
+ cursor.execute(
297
+ "SELECT MAX(item_order) FROM beaver_lists WHERE list_name = ?",
298
+ (self._name,),
299
+ )
271
300
  max_order = cursor.fetchone()[0] or 0.0
272
301
  new_order = max_order + 1.0
273
302
 
274
303
  cursor.execute(
275
304
  "INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
276
- (self._name, new_order, json.dumps(value))
305
+ (self._name, new_order, json.dumps(value)),
277
306
  )
278
307
 
279
308
  def prepend(self, value: Any):
280
309
  """Prepends an item to the beginning of the list."""
281
310
  with self._conn:
282
311
  cursor = self._conn.cursor()
283
- cursor.execute("SELECT MIN(item_order) FROM beaver_lists WHERE list_name = ?", (self._name,))
312
+ cursor.execute(
313
+ "SELECT MIN(item_order) FROM beaver_lists WHERE list_name = ?",
314
+ (self._name,),
315
+ )
284
316
  min_order = cursor.fetchone()[0] or 0.0
285
317
  new_order = min_order - 1.0
286
318
 
287
319
  cursor.execute(
288
320
  "INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
289
- (self._name, new_order, json.dumps(value))
321
+ (self._name, new_order, json.dumps(value)),
290
322
  )
291
323
 
292
324
  def insert(self, index: int, value: Any):
@@ -308,7 +340,7 @@ class ListWrapper:
308
340
  with self._conn:
309
341
  self._conn.execute(
310
342
  "INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
311
- (self._name, new_order, json.dumps(value))
343
+ (self._name, new_order, json.dumps(value)),
312
344
  )
313
345
 
314
346
  def pop(self) -> Any:
@@ -317,14 +349,16 @@ class ListWrapper:
317
349
  cursor = self._conn.cursor()
318
350
  cursor.execute(
319
351
  "SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order DESC LIMIT 1",
320
- (self._name,)
352
+ (self._name,),
321
353
  )
322
354
  result = cursor.fetchone()
323
355
  if not result:
324
356
  return None
325
357
 
326
358
  rowid_to_delete, value_to_return = result
327
- cursor.execute("DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,))
359
+ cursor.execute(
360
+ "DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,)
361
+ )
328
362
  return json.loads(value_to_return)
329
363
 
330
364
  def deque(self) -> Any:
@@ -333,14 +367,16 @@ class ListWrapper:
333
367
  cursor = self._conn.cursor()
334
368
  cursor.execute(
335
369
  "SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1",
336
- (self._name,)
370
+ (self._name,),
337
371
  )
338
372
  result = cursor.fetchone()
339
373
  if not result:
340
374
  return None
341
375
 
342
376
  rowid_to_delete, value_to_return = result
343
- cursor.execute("DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,))
377
+ cursor.execute(
378
+ "DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,)
379
+ )
344
380
  return json.loads(value_to_return)
345
381
 
346
382
 
@@ -350,7 +386,9 @@ class Subscriber(AsyncIterator):
350
386
  Designed to be used with 'async with'.
351
387
  """
352
388
 
353
- def __init__(self, conn: sqlite3.Connection, channel_name: str, poll_interval: float = 0.1):
389
+ def __init__(
390
+ self, conn: sqlite3.Connection, channel_name: str, poll_interval: float = 0.1
391
+ ):
354
392
  self._conn = conn
355
393
  self._channel = channel_name
356
394
  self._poll_interval = poll_interval
@@ -362,9 +400,7 @@ class Subscriber(AsyncIterator):
362
400
  """Background task that polls the database for new messages."""
363
401
  while True:
364
402
  try:
365
- new_messages = await asyncio.to_thread(
366
- self._fetch_new_messages_from_db
367
- )
403
+ new_messages = await asyncio.to_thread(self._fetch_new_messages_from_db)
368
404
  if new_messages:
369
405
  for msg in new_messages:
370
406
  payload = json.loads(msg["message_payload"])
@@ -382,7 +418,7 @@ class Subscriber(AsyncIterator):
382
418
  cursor = self._conn.cursor()
383
419
  cursor.execute(
384
420
  "SELECT timestamp, message_payload FROM beaver_pubsub_log WHERE channel_name = ? AND timestamp > ? ORDER BY timestamp ASC",
385
- (self._channel, self._last_seen_timestamp)
421
+ (self._channel, self._last_seen_timestamp),
386
422
  )
387
423
  results = cursor.fetchall()
388
424
  cursor.close()
@@ -409,12 +445,21 @@ class Subscriber(AsyncIterator):
409
445
 
410
446
  class Document:
411
447
  """A data class for a vector and its metadata, with a unique ID."""
412
- def __init__(self, embedding: list[float], id: str|None = None, **metadata):
413
- if not isinstance(embedding, list) or not all(isinstance(x, (int, float)) for x in embedding):
414
- raise TypeError("Embedding must be a list of numbers.")
415
448
 
449
+ def __init__(
450
+ self, embedding: list[float] | None = None, id: str | None = None, **metadata
451
+ ):
416
452
  self.id = id or str(uuid.uuid4())
417
- self.embedding = np.array(embedding, dtype=np.float32)
453
+
454
+ if embedding is None:
455
+ self.embedding = None
456
+ else:
457
+ if not isinstance(embedding, list) or not all(
458
+ isinstance(x, (int, float)) for x in embedding
459
+ ):
460
+ raise TypeError("Embedding must be a list of numbers.")
461
+
462
+ self.embedding = np.array(embedding, dtype=np.float32)
418
463
 
419
464
  for key, value in metadata.items():
420
465
  setattr(self, key, value)
@@ -423,42 +468,74 @@ class Document:
423
468
  """Serializes metadata to a dictionary."""
424
469
  metadata = self.__dict__.copy()
425
470
  # Exclude internal attributes from the metadata payload
426
- metadata.pop('embedding', None)
427
- metadata.pop('id', None)
471
+ metadata.pop("embedding", None)
472
+ metadata.pop("id", None)
428
473
  return metadata
429
474
 
430
475
  def __repr__(self):
431
- metadata_str = ', '.join(f"{k}={v!r}" for k, v in self.to_dict().items())
476
+ metadata_str = ", ".join(f"{k}={v!r}" for k, v in self.to_dict().items())
432
477
  return f"Document(id='{self.id}', {metadata_str})"
433
478
 
434
479
 
435
480
  class CollectionWrapper:
436
481
  """A wrapper for vector collection operations with upsert logic."""
482
+
437
483
  def __init__(self, name: str, conn: sqlite3.Connection):
438
484
  self._name = name
439
485
  self._conn = conn
440
486
 
441
- def index(self, document: Document):
442
- """
443
- Indexes a Document, performing an upsert based on the document's ID.
444
- If the ID exists, the record is replaced.
445
- If the ID is new (or auto-generated), a new record is inserted.
487
+ # Dentro de la clase CollectionWrapper en beaver/core.py
446
488
 
447
- Args:
448
- document: The Document object to index.
489
+ def _flatten_metadata(self, metadata: dict, prefix: str = "") -> dict[str, str]:
490
+ """
491
+ Aplana un diccionario anidado y filtra solo los valores de tipo string.
492
+ Ejemplo: {'a': {'b': 'c'}} -> {'a__b': 'c'}
493
+ """
494
+ flat_dict = {}
495
+ for key, value in metadata.items():
496
+ new_key = f"{prefix}__{key}" if prefix else key
497
+ if isinstance(value, dict):
498
+ flat_dict.update(self._flatten_metadata(value, new_key))
499
+ elif isinstance(value, str):
500
+ flat_dict[new_key] = value
501
+ return flat_dict
502
+
503
+ def index(self, document: Document, *, fts: bool = True):
504
+ """
505
+ Indexa un Document, realizando un upsert y actualizando el índice FTS.
449
506
  """
450
507
  with self._conn:
508
+ if fts:
509
+ self._conn.execute(
510
+ "DELETE FROM beaver_fts_index WHERE collection = ? AND item_id = ?",
511
+ (self._name, document.id),
512
+ )
513
+
514
+ string_fields = self._flatten_metadata(document.to_dict())
515
+
516
+ if string_fields:
517
+ fts_data = [
518
+ (self._name, document.id, path, content)
519
+ for path, content in string_fields.items()
520
+ ]
521
+ self._conn.executemany(
522
+ "INSERT INTO beaver_fts_index (collection, item_id, field_path, field_content) VALUES (?, ?, ?, ?)",
523
+ fts_data,
524
+ )
525
+
451
526
  self._conn.execute(
452
527
  "INSERT OR REPLACE INTO beaver_collections (collection, item_id, item_vector, metadata) VALUES (?, ?, ?, ?)",
453
528
  (
454
529
  self._name,
455
530
  document.id,
456
- document.embedding.tobytes(),
457
- json.dumps(document.to_dict())
458
- )
531
+ document.embedding.tobytes() if document.embedding is not None else None,
532
+ json.dumps(document.to_dict()),
533
+ ),
459
534
  )
460
535
 
461
- def search(self, vector: list[float], top_k: int = 10) -> list[tuple[Document, float]]:
536
+ def search(
537
+ self, vector: list[float], top_k: int = 10
538
+ ) -> list[tuple[Document, float]]:
462
539
  """
463
540
  Performs a vector search and returns Document objects.
464
541
  """
@@ -467,7 +544,7 @@ class CollectionWrapper:
467
544
  cursor = self._conn.cursor()
468
545
  cursor.execute(
469
546
  "SELECT item_id, item_vector, metadata FROM beaver_collections WHERE collection = ?",
470
- (self._name,)
547
+ (self._name,),
471
548
  )
472
549
 
473
550
  all_docs_data = cursor.fetchall()
@@ -478,9 +555,12 @@ class CollectionWrapper:
478
555
 
479
556
  results = []
480
557
  for row in all_docs_data:
481
- doc_id = row['item_id']
482
- embedding = np.frombuffer(row['item_vector'], dtype=np.float32).tolist()
483
- metadata = json.loads(row['metadata'])
558
+ if row["item_vector"] is None:
559
+ continue # Skip documents without embeddings
560
+
561
+ doc_id = row["item_id"]
562
+ embedding = np.frombuffer(row["item_vector"], dtype=np.float32).tolist()
563
+ metadata = json.loads(row["metadata"])
484
564
 
485
565
  distance = np.linalg.norm(embedding - query_vector)
486
566
 
@@ -490,3 +570,69 @@ class CollectionWrapper:
490
570
 
491
571
  results.sort(key=lambda x: x[1])
492
572
  return results[:top_k]
573
+
574
+ def match(
575
+ self, query: str, on_field: str | None = None, top_k: int = 10
576
+ ) -> list[tuple[Document, float]]:
577
+ """
578
+ Realiza una búsqueda de texto completo en los campos de metadatos indexados.
579
+
580
+ Args:
581
+ query: La expresión de búsqueda (ej. "gato", "perro OR conejo").
582
+ on_field: Opcional, el campo específico donde buscar (ej. "details__title").
583
+ top_k: El número máximo de resultados a devolver.
584
+
585
+ Returns:
586
+ Una lista de tuplas (Documento, puntuación_de_relevancia).
587
+ """
588
+ cursor = self._conn.cursor()
589
+
590
+ sql_query = """
591
+ SELECT
592
+ t1.item_id, t1.item_vector, t1.metadata, fts.rank
593
+ FROM beaver_collections AS t1
594
+ JOIN (
595
+ SELECT DISTINCT item_id, rank
596
+ FROM beaver_fts_index
597
+ WHERE beaver_fts_index MATCH ?
598
+ ORDER BY rank
599
+ LIMIT ?
600
+ ) AS fts ON t1.item_id = fts.item_id
601
+ WHERE t1.collection = ?
602
+ ORDER BY fts.rank
603
+ """
604
+
605
+ params = []
606
+ field_filter_sql = ""
607
+
608
+ if on_field:
609
+ field_filter_sql = "AND field_path = ?"
610
+ params.append(on_field)
611
+ else:
612
+ # Búsqueda en todos los campos
613
+ params.append(query)
614
+
615
+ sql_query = sql_query.format(field_filter_sql)
616
+ params.extend([top_k, self._name])
617
+
618
+ cursor.execute(sql_query, tuple(params))
619
+
620
+ results = []
621
+ for row in cursor.fetchall():
622
+ doc_id = row["item_id"]
623
+
624
+ if row["item_vector"] is None:
625
+ embedding = None
626
+ else:
627
+ embedding = np.frombuffer(row["item_vector"], dtype=np.float32).tolist()
628
+
629
+ metadata = json.loads(row["metadata"])
630
+ rank = row["rank"]
631
+
632
+ doc = Document(id=doc_id, embedding=embedding, **metadata)
633
+ results.append((doc, rank))
634
+
635
+ results.sort(key=lambda x: x[1])
636
+ cursor.close()
637
+
638
+ return results
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: beaver-db
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Asynchronous, embedded, modern DB based on SQLite.
5
5
  Requires-Python: >=3.13
6
6
  Description-Content-Type: text/markdown
@@ -0,0 +1,6 @@
1
+ beaver/__init__.py,sha256=uTPhMNDjw41YTWQN8NTLbovudfp8RIwcqbZ5XtYIuJA,36
2
+ beaver/core.py,sha256=i2rBoUM1rq_j1xM3w4xW4c9e2eI8Ce6BeJ8rE8jQ-fI,21928
3
+ beaver_db-0.4.0.dist-info/METADATA,sha256=7VzqxHKU-Ft1QVAfVvywt4e50C3QWxS7FUpKIaQEJKk,4865
4
+ beaver_db-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ beaver_db-0.4.0.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
6
+ beaver_db-0.4.0.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- beaver/__init__.py,sha256=uTPhMNDjw41YTWQN8NTLbovudfp8RIwcqbZ5XtYIuJA,36
2
- beaver/core.py,sha256=I-_i8AshcNor1OZxoEtNjzLXCy1Byuxvo84y9K4AV_Q,17518
3
- beaver_db-0.3.0.dist-info/METADATA,sha256=_Hy3Fq64IDahqm3K0vuPvvZRTmtO0sU-tEGlmEjLNpE,4865
4
- beaver_db-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
- beaver_db-0.3.0.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
6
- beaver_db-0.3.0.dist-info/RECORD,,