beaver-db 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of beaver-db might be problematic. Click here for more details.
- beaver/collections.py +58 -0
- {beaver_db-0.5.0.dist-info → beaver_db-0.5.2.dist-info}/METADATA +3 -3
- {beaver_db-0.5.0.dist-info → beaver_db-0.5.2.dist-info}/RECORD +5 -5
- {beaver_db-0.5.0.dist-info → beaver_db-0.5.2.dist-info}/WHEEL +0 -0
- {beaver_db-0.5.0.dist-info → beaver_db-0.5.2.dist-info}/top_level.txt +0 -0
beaver/collections.py
CHANGED
|
@@ -175,6 +175,9 @@ class CollectionWrapper:
|
|
|
175
175
|
if not self._kdtree:
|
|
176
176
|
return []
|
|
177
177
|
|
|
178
|
+
if top_k > len(self._doc_ids):
|
|
179
|
+
top_k = len(self._doc_ids)
|
|
180
|
+
|
|
178
181
|
distances, indices = self._kdtree.query(
|
|
179
182
|
np.array(vector, dtype=np.float32), k=top_k
|
|
180
183
|
)
|
|
@@ -325,3 +328,58 @@ class CollectionWrapper:
|
|
|
325
328
|
)
|
|
326
329
|
for row in rows
|
|
327
330
|
]
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def rerank(
|
|
334
|
+
results: list[list[Document]],
|
|
335
|
+
weights: list[float] | None = None,
|
|
336
|
+
k: int = 60
|
|
337
|
+
) -> list[Document]:
|
|
338
|
+
"""
|
|
339
|
+
Reranks documents from multiple search result lists using Reverse Rank Fusion (RRF).
|
|
340
|
+
This function is specifically designed to work with beaver.collections.Document objects.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
results (list[list[Document]]): A list of search result lists, where each
|
|
344
|
+
inner list contains Document objects.
|
|
345
|
+
weights (list[float], optional): A list of weights corresponding to each
|
|
346
|
+
result list. If None, all lists are weighted equally. Defaults to None.
|
|
347
|
+
k (int, optional): A constant used in the RRF formula. Defaults to 60.
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
list[Document]: A single, reranked list of unique Document objects, sorted
|
|
351
|
+
by their fused rank score in descending order.
|
|
352
|
+
"""
|
|
353
|
+
if not results:
|
|
354
|
+
return []
|
|
355
|
+
|
|
356
|
+
# Assign a default weight of 1.0 if none are provided
|
|
357
|
+
if weights is None:
|
|
358
|
+
weights = [1.0] * len(results)
|
|
359
|
+
|
|
360
|
+
if len(results) != len(weights):
|
|
361
|
+
raise ValueError("The number of result lists must match the number of weights.")
|
|
362
|
+
|
|
363
|
+
# Use dictionaries to store scores and unique documents by their ID
|
|
364
|
+
rrf_scores: dict[str, float] = {}
|
|
365
|
+
doc_store: dict[str, Document] = {}
|
|
366
|
+
|
|
367
|
+
# Iterate through each list of Document objects and its weight
|
|
368
|
+
for result_list, weight in zip(results, weights):
|
|
369
|
+
for rank, doc in enumerate(result_list):
|
|
370
|
+
# Use the .id attribute from the Document object
|
|
371
|
+
doc_id = doc.id
|
|
372
|
+
if doc_id not in doc_store:
|
|
373
|
+
doc_store[doc_id] = doc
|
|
374
|
+
|
|
375
|
+
# Calculate the reciprocal rank score, scaled by the weight
|
|
376
|
+
score = weight * (1 / (k + rank))
|
|
377
|
+
|
|
378
|
+
# Add the score to the document's running total
|
|
379
|
+
rrf_scores[doc_id] = rrf_scores.get(doc_id, 0.0) + score
|
|
380
|
+
|
|
381
|
+
# Sort the document IDs by their final aggregated scores
|
|
382
|
+
sorted_doc_ids = sorted(rrf_scores.keys(), key=rrf_scores.get, reverse=True)
|
|
383
|
+
|
|
384
|
+
# Return the final list of Document objects in the new, reranked order
|
|
385
|
+
return [doc_store[doc_id] for doc_id in sorted_doc_ids]
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: beaver-db
|
|
3
|
-
Version: 0.5.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.5.2
|
|
4
|
+
Summary: Fast, embedded, and multi-modal DB based on SQLite for AI-powered applications.
|
|
5
5
|
Requires-Python: >=3.13
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
7
7
|
Requires-Dist: numpy>=2.3.3
|
|
@@ -11,7 +11,7 @@ Requires-Dist: scipy>=1.16.2
|
|
|
11
11
|
|
|
12
12
|
A fast, single-file, multi-modal database for Python, built with the standard `sqlite3` library.
|
|
13
13
|
|
|
14
|
-
`beaver` is the **B**ackend for **E**mbedded **A**
|
|
14
|
+
`beaver` is the **B**ackend for **E**mbedded, **A**ll-in-one **V**ector, **E**ntity, and **R**elationship storage. It's a simple, local, and embedded database designed to manage complex, modern data types without requiring a database server, built on top of SQLite.
|
|
15
15
|
|
|
16
16
|
## Design Philosophy
|
|
17
17
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
beaver/__init__.py,sha256=-z5Gj6YKMOswpJOOn5Gej8z5i6k3c0Xs00DIYLA-bMI,75
|
|
2
|
-
beaver/collections.py,sha256=
|
|
2
|
+
beaver/collections.py,sha256=4rdGMTD7ex4SQUH52WIHOZhOdeWe7Nqvm9TPg7flv_g,15059
|
|
3
3
|
beaver/core.py,sha256=sk0Z_k7EcORe6bN8CfPukGX7eAfmCGSX_B37KpJmQJ4,7279
|
|
4
4
|
beaver/lists.py,sha256=JG1JOkaYCUldADUzPJhaNi93w-k3S8mUzcCw574uht4,5915
|
|
5
5
|
beaver/subscribers.py,sha256=tCty2iDbeE9IXcPicbxj2CB5gqfLufMB9-nLQwqNBUU,1944
|
|
6
|
-
beaver_db-0.5.
|
|
7
|
-
beaver_db-0.5.
|
|
8
|
-
beaver_db-0.5.
|
|
9
|
-
beaver_db-0.5.
|
|
6
|
+
beaver_db-0.5.2.dist-info/METADATA,sha256=ij6szwN6Ee0MutGlFgszF2vqddR5y8OlA9O4R5Y4Nbo,5904
|
|
7
|
+
beaver_db-0.5.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
beaver_db-0.5.2.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
|
|
9
|
+
beaver_db-0.5.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|