bayesian_bm25_rs 0.1.1__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bayesian_bm25_rs
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Classifier: License :: Other/Proprietary License
|
|
5
|
+
Classifier: Programming Language :: Rust
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Summary: Bayesian BM25 scoring and experimental validation (Rust core + Python bindings)
|
|
16
|
+
Keywords: bm25,information-retrieval,search,ranking,bayesian,hybrid-search
|
|
17
|
+
License: UNLICENSED
|
|
18
|
+
Requires-Python: >=3.8
|
|
19
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
20
|
+
Project-URL: Homepage, https://github.com/sigridjineth/bayesian_bm25_rs
|
|
21
|
+
Project-URL: Issues, https://github.com/sigridjineth/bayesian_bm25_rs/issues
|
|
22
|
+
Project-URL: Repository, https://github.com/sigridjineth/bayesian_bm25_rs
|
|
23
|
+
|
|
24
|
+
# bb25 (Bayesian BM25)
|
|
25
|
+
|
|
26
|
+
bb25 is a fast, self-contained BM25 + Bayesian calibration implementation with a minimal Python API. It also includes a small reference corpus and experiment suite so you can validate the expected numerical properties.
|
|
27
|
+
|
|
28
|
+
- PyPI package name: `bayesian_bm25_rs`
|
|
29
|
+
- Python import name: `bb25`
|
|
30
|
+
|
|
31
|
+
## Install
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
pip install bayesian_bm25_rs
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Quick start
|
|
38
|
+
|
|
39
|
+
### Use the built-in corpus and queries
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
import bb25 as bb
|
|
43
|
+
|
|
44
|
+
corpus = bb.build_default_corpus()
|
|
45
|
+
docs = corpus.documents()
|
|
46
|
+
queries = bb.build_default_queries()
|
|
47
|
+
|
|
48
|
+
bm25 = bb.BM25Scorer(corpus, 1.2, 0.75)
|
|
49
|
+
score = bm25.score(queries[0].terms, docs[0])
|
|
50
|
+
print("score0", score)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Build your own corpus
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
import bb25 as bb
|
|
57
|
+
|
|
58
|
+
corpus = bb.Corpus()
|
|
59
|
+
corpus.add_document("d1", "neural networks for ranking", [0.1] * 8)
|
|
60
|
+
corpus.add_document("d2", "bm25 is a strong baseline", [0.2] * 8)
|
|
61
|
+
corpus.build_index() # must be called before creating scorers
|
|
62
|
+
|
|
63
|
+
bm25 = bb.BM25Scorer(corpus, 1.2, 0.75)
|
|
64
|
+
print(bm25.idf("bm25"))
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Bayesian calibration + hybrid fusion
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
import bb25 as bb
|
|
71
|
+
|
|
72
|
+
corpus = bb.build_default_corpus()
|
|
73
|
+
docs = corpus.documents()
|
|
74
|
+
queries = bb.build_default_queries()
|
|
75
|
+
|
|
76
|
+
bm25 = bb.BM25Scorer(corpus, 1.2, 0.75)
|
|
77
|
+
bayes = bb.BayesianBM25Scorer(bm25, 1.0, 0.5)
|
|
78
|
+
vector = bb.VectorScorer()
|
|
79
|
+
hybrid = bb.HybridScorer(bayes, vector)
|
|
80
|
+
|
|
81
|
+
q = queries[0]
|
|
82
|
+
prob_or = hybrid.score_or(q.terms, q.embedding, docs[0])
|
|
83
|
+
prob_and = hybrid.score_and(q.terms, q.embedding, docs[0])
|
|
84
|
+
print("OR", prob_or, "AND", prob_and)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Run the experiments
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
import bb25 as bb
|
|
91
|
+
|
|
92
|
+
results = bb.run_experiments()
|
|
93
|
+
print(all(r.passed for r in results))
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Build from source (Rust)
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
make build
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## PyPI publishing
|
|
103
|
+
|
|
104
|
+
Build a wheel with maturin:
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
python -m pip install maturin
|
|
108
|
+
maturin build --release
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
For Pyodide builds, see `docs/pyodide.md`.
|
|
112
|
+
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
bayesian_bm25_rs-0.1.1.dist-info/METADATA,sha256=ZDcmTY4LQxiDJNpyaPsFWWRqBVqqTHAiZzxpD7zXHtg,2917
|
|
2
|
+
bayesian_bm25_rs-0.1.1.dist-info/WHEEL,sha256=qr8B0oB3ZR0cwIaW0mxUADH9b4sNRMHcgJlpYNTDAPw,105
|
|
3
|
+
bb25/__init__.py,sha256=cAbH7A3F8yvvXg52Ovx7vuohGYPBlaSlFhTY4WSQ3u0,99
|
|
4
|
+
bb25/bb25.cpython-312-darwin.so,sha256=-UQrS59CLTNeAFSdwGAep9W1rBwxa6BcwYAvojywEWs,988800
|
|
5
|
+
bayesian_bm25_rs-0.1.1.dist-info/RECORD,,
|
bb25/__init__.py
ADDED
|
Binary file
|