beaver-db 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of beaver-db might be problematic. Click here for more details.

beaver/collections.py CHANGED
@@ -175,6 +175,9 @@ class CollectionWrapper:
175
175
  if not self._kdtree:
176
176
  return []
177
177
 
178
+ if top_k > len(self._doc_ids):
179
+ top_k = len(self._doc_ids)
180
+
178
181
  distances, indices = self._kdtree.query(
179
182
  np.array(vector, dtype=np.float32), k=top_k
180
183
  )
@@ -325,3 +328,58 @@ class CollectionWrapper:
325
328
  )
326
329
  for row in rows
327
330
  ]
331
+
332
+
333
+ def rerank(
334
+ results: list[list[Document]],
335
+ weights: list[float] | None = None,
336
+ k: int = 60
337
+ ) -> list[Document]:
338
+ """
339
+ Reranks documents from multiple search result lists using Reverse Rank Fusion (RRF).
340
+ This function is specifically designed to work with beaver.collections.Document objects.
341
+
342
+ Args:
343
+ results (list[list[Document]]): A list of search result lists, where each
344
+ inner list contains Document objects.
345
+ weights (list[float], optional): A list of weights corresponding to each
346
+ result list. If None, all lists are weighted equally. Defaults to None.
347
+ k (int, optional): A constant used in the RRF formula. Defaults to 60.
348
+
349
+ Returns:
350
+ list[Document]: A single, reranked list of unique Document objects, sorted
351
+ by their fused rank score in descending order.
352
+ """
353
+ if not results:
354
+ return []
355
+
356
+ # Assign a default weight of 1.0 if none are provided
357
+ if weights is None:
358
+ weights = [1.0] * len(results)
359
+
360
+ if len(results) != len(weights):
361
+ raise ValueError("The number of result lists must match the number of weights.")
362
+
363
+ # Use dictionaries to store scores and unique documents by their ID
364
+ rrf_scores: dict[str, float] = {}
365
+ doc_store: dict[str, Document] = {}
366
+
367
+ # Iterate through each list of Document objects and its weight
368
+ for result_list, weight in zip(results, weights):
369
+ for rank, doc in enumerate(result_list):
370
+ # Use the .id attribute from the Document object
371
+ doc_id = doc.id
372
+ if doc_id not in doc_store:
373
+ doc_store[doc_id] = doc
374
+
375
+ # Calculate the reciprocal rank score, scaled by the weight
376
+ score = weight * (1 / (k + rank))
377
+
378
+ # Add the score to the document's running total
379
+ rrf_scores[doc_id] = rrf_scores.get(doc_id, 0.0) + score
380
+
381
+ # Sort the document IDs by their final aggregated scores
382
+ sorted_doc_ids = sorted(rrf_scores.keys(), key=rrf_scores.get, reverse=True)
383
+
384
+ # Return the final list of Document objects in the new, reranked order
385
+ return [doc_store[doc_id] for doc_id in sorted_doc_ids]
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: beaver-db
3
- Version: 0.5.0
4
- Summary: Asynchronous, embedded, modern DB based on SQLite.
3
+ Version: 0.5.2
4
+ Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
5
5
  Requires-Python: >=3.13
6
6
  Description-Content-Type: text/markdown
7
7
  Requires-Dist: numpy>=2.3.3
@@ -11,7 +11,7 @@ Requires-Dist: scipy>=1.16.2
11
11
 
12
12
  A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
13
13
 
14
- `beaver` is the **B**ackend for **E**mbedded **A**synchronous **V**ector & **E**vent Retrieval. It's an industrious, all-in-one database designed to manage complex, modern data types without requiring a database server.
14
+ `beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
15
15
 
16
16
  ## Design Philosophy
17
17
 
@@ -1,9 +1,9 @@
1
1
  beaver/__init__.py,sha256=-z5Gj6YKMOswpJOOn5Gej8z5i6k3c0Xs00DIYLA-bMI,75
2
- beaver/collections.py,sha256=fP1xkmo-XXlk3H_lPRiqFhtizINQn8192wOtXFlkTK4,12811
2
+ beaver/collections.py,sha256=4rdGMTD7ex4SQUH52WIHOZhOdeWe7Nqvm9TPg7flv_g,15059
3
3
  beaver/core.py,sha256=sk0Z_k7EcORe6bN8CfPukGX7eAfmCGSX_B37KpJmQJ4,7279
4
4
  beaver/lists.py,sha256=JG1JOkaYCUldADUzPJhaNi93w-k3S8mUzcCw574uht4,5915
5
5
  beaver/subscribers.py,sha256=tCty2iDbeE9IXcPicbxj2CB5gqfLufMB9-nLQwqNBUU,1944
6
- beaver_db-0.5.0.dist-info/METADATA,sha256=VpZkov21mBFSTvgSsw6evkQZ9awqrMI42CQJ_RD3nDo,5829
7
- beaver_db-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
- beaver_db-0.5.0.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
9
- beaver_db-0.5.0.dist-info/RECORD,,
6
+ beaver_db-0.5.2.dist-info/METADATA,sha256=ij6szwN6Ee0MutGlFgszF2vqddR5y8OlA9O4R5Y4Nbo,5904
7
+ beaver_db-0.5.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
+ beaver_db-0.5.2.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
9
+ beaver_db-0.5.2.dist-info/RECORD,,