binaryfuse 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,122 @@
1
+ # This file is autogenerated by the Binary Fuse Filter CI/CD setup task
2
+
3
+ name: CI
4
+
5
+ on:
6
+ push:
7
+ branches:
8
+ - main
9
+ - master
10
+ tags:
11
+ - '*'
12
+ pull_request:
13
+ workflow_dispatch:
14
+
15
+ permissions:
16
+ contents: read
17
+
18
+ jobs:
19
+ linux:
20
+ runs-on: ubuntu-latest
21
+ strategy:
22
+ matrix:
23
+ target: [x86_64, x86, aarch64, armv7, s390x, ppc64le]
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ - uses: actions/setup-python@v5
27
+ with:
28
+ python-version: '3.10'
29
+ - name: Build wheels
30
+ uses: PyO3/maturin-action@v1
31
+ with:
32
+ target: ${{ matrix.target }}
33
+ args: --release --out dist --interpreter 3.10
34
+ sccache: 'true'
35
+ manylinux: auto
36
+ - name: Upload wheels
37
+ uses: actions/upload-artifact@v4
38
+ with:
39
+ name: wheels-linux-${{ matrix.target }}
40
+ path: dist
41
+
42
+ windows:
43
+ runs-on: windows-latest
44
+ strategy:
45
+ matrix:
46
+ target: [x64, x86]
47
+ steps:
48
+ - uses: actions/checkout@v4
49
+ - uses: actions/setup-python@v5
50
+ with:
51
+ python-version: '3.10'
52
+ architecture: ${{ matrix.target }}
53
+ - name: Build wheels
54
+ uses: PyO3/maturin-action@v1
55
+ with:
56
+ target: ${{ matrix.target }}
57
+ args: --release --out dist --find-interpreter
58
+ sccache: 'true'
59
+ - name: Upload wheels
60
+ uses: actions/upload-artifact@v4
61
+ with:
62
+ name: wheels-windows-${{ matrix.target }}
63
+ path: dist
64
+
65
+ macos:
66
+ runs-on: macos-latest
67
+ strategy:
68
+ matrix:
69
+ target: [x86_64, aarch64]
70
+ steps:
71
+ - uses: actions/checkout@v4
72
+ - uses: actions/setup-python@v5
73
+ with:
74
+ python-version: '3.10'
75
+ - name: Build wheels
76
+ uses: PyO3/maturin-action@v1
77
+ with:
78
+ target: ${{ matrix.target }}
79
+ args: --release --out dist --find-interpreter
80
+ sccache: 'true'
81
+ - name: Upload wheels
82
+ uses: actions/upload-artifact@v4
83
+ with:
84
+ name: wheels-macos-${{ matrix.target }}
85
+ path: dist
86
+
87
+ sdist:
88
+ runs-on: ubuntu-latest
89
+ steps:
90
+ - uses: actions/checkout@v4
91
+ - name: Build sdist
92
+ uses: PyO3/maturin-action@v1
93
+ with:
94
+ command: sdist
95
+ args: --out dist
96
+ - name: Upload sdist
97
+ uses: actions/upload-artifact@v4
98
+ with:
99
+ name: wheels-sdist
100
+ path: dist
101
+
102
+ release:
103
+ name: Release
104
+ runs-on: ubuntu-latest
105
+ if: "startsWith(github.ref, 'refs/tags/')"
106
+ needs: [linux, windows, macos, sdist]
107
+ environment: pypi
108
+ permissions:
109
+ id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
110
+ contents: read
111
+ steps:
112
+ - uses: actions/download-artifact@v4
113
+ with:
114
+ pattern: wheels-*
115
+ path: dist
116
+ merge-multiple: true
117
+ - name: Publish to PyPI
118
+ uses: pypa/gh-action-pypi-publish@release/v1
119
+ with:
120
+ skip-existing: true
121
+ # To use API Token, uncomment the line below and set the secret in your repo
122
+ # password: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,14 @@
1
+ /target
2
+ /.venv
3
+ /python/binary_fuse/__pycache__
4
+ /__pycache__
5
+ **/*.pyc
6
+ **/*.pyo
7
+ **/*.pyd
8
+ .pytest_cache
9
+ .uv_cache
10
+ *.egg-info
11
+ dist/
12
+ build/
13
+ Cargo.lock
14
+ uv.lock
@@ -0,0 +1,23 @@
1
+ # CHANGELOG
2
+
3
+
4
+
5
+ ## v0.1.0 (2026-01-23)
6
+
7
+ ### Documentation
8
+
9
+ * docs: update package name in README and add version bump workflow ([`0500160`](https://github.com/NivekNey/binaryfuse/commit/0500160632789b1bcfd3aa1a1fc8b28dca974e0e))
10
+
11
+ ### Feature
12
+
13
+ * feat(ci): enable true minimal-touch CD with python-semantic-release ([`03c2555`](https://github.com/NivekNey/binaryfuse/commit/03c25555ee340a4f4ecadc566f75d59ffd1a5c56))
14
+
15
+ ### Fix
16
+
17
+ * fix(ci): restrict linux wheel build to CPython 3.10 to avoid PyPy incompatibility ([`da8f882`](https://github.com/NivekNey/binaryfuse/commit/da8f882e7dda1084f6bdca9af7062d4df95322a8))
18
+
19
+ * fix(build): remove conflicting binary target from Cargo.toml to ensure stable wheel builds ([`f1b2c75`](https://github.com/NivekNey/binaryfuse/commit/f1b2c75c1f179dde8bfa06fc65718ff083c60b74))
20
+
21
+ ### Unknown
22
+
23
+ * Initial commit: Binary Fuse Filter implementation with Python bindings and CI/CD ([`c2d617d`](https://github.com/NivekNey/binaryfuse/commit/c2d617dc91b828d67851534fadbbda445f76a96a))
@@ -0,0 +1,321 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "autocfg"
7
+ version = "1.5.0"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
10
+
11
+ [[package]]
12
+ name = "binaryfuse"
13
+ version = "0.1.1"
14
+ dependencies = [
15
+ "pyo3",
16
+ "rand",
17
+ "rand_chacha",
18
+ "serde",
19
+ "serde_json",
20
+ ]
21
+
22
+ [[package]]
23
+ name = "cfg-if"
24
+ version = "1.0.4"
25
+ source = "registry+https://github.com/rust-lang/crates.io-index"
26
+ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
27
+
28
+ [[package]]
29
+ name = "getrandom"
30
+ version = "0.2.17"
31
+ source = "registry+https://github.com/rust-lang/crates.io-index"
32
+ checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
33
+ dependencies = [
34
+ "cfg-if",
35
+ "libc",
36
+ "wasi",
37
+ ]
38
+
39
+ [[package]]
40
+ name = "heck"
41
+ version = "0.5.0"
42
+ source = "registry+https://github.com/rust-lang/crates.io-index"
43
+ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
44
+
45
+ [[package]]
46
+ name = "indoc"
47
+ version = "2.0.7"
48
+ source = "registry+https://github.com/rust-lang/crates.io-index"
49
+ checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
50
+ dependencies = [
51
+ "rustversion",
52
+ ]
53
+
54
+ [[package]]
55
+ name = "itoa"
56
+ version = "1.0.17"
57
+ source = "registry+https://github.com/rust-lang/crates.io-index"
58
+ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
59
+
60
+ [[package]]
61
+ name = "libc"
62
+ version = "0.2.180"
63
+ source = "registry+https://github.com/rust-lang/crates.io-index"
64
+ checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
65
+
66
+ [[package]]
67
+ name = "memchr"
68
+ version = "2.7.6"
69
+ source = "registry+https://github.com/rust-lang/crates.io-index"
70
+ checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
71
+
72
+ [[package]]
73
+ name = "memoffset"
74
+ version = "0.9.1"
75
+ source = "registry+https://github.com/rust-lang/crates.io-index"
76
+ checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
77
+ dependencies = [
78
+ "autocfg",
79
+ ]
80
+
81
+ [[package]]
82
+ name = "once_cell"
83
+ version = "1.21.3"
84
+ source = "registry+https://github.com/rust-lang/crates.io-index"
85
+ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
86
+
87
+ [[package]]
88
+ name = "portable-atomic"
89
+ version = "1.13.0"
90
+ source = "registry+https://github.com/rust-lang/crates.io-index"
91
+ checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950"
92
+
93
+ [[package]]
94
+ name = "ppv-lite86"
95
+ version = "0.2.21"
96
+ source = "registry+https://github.com/rust-lang/crates.io-index"
97
+ checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
98
+ dependencies = [
99
+ "zerocopy",
100
+ ]
101
+
102
+ [[package]]
103
+ name = "proc-macro2"
104
+ version = "1.0.106"
105
+ source = "registry+https://github.com/rust-lang/crates.io-index"
106
+ checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
107
+ dependencies = [
108
+ "unicode-ident",
109
+ ]
110
+
111
+ [[package]]
112
+ name = "pyo3"
113
+ version = "0.22.6"
114
+ source = "registry+https://github.com/rust-lang/crates.io-index"
115
+ checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884"
116
+ dependencies = [
117
+ "cfg-if",
118
+ "indoc",
119
+ "libc",
120
+ "memoffset",
121
+ "once_cell",
122
+ "portable-atomic",
123
+ "pyo3-build-config",
124
+ "pyo3-ffi",
125
+ "pyo3-macros",
126
+ "unindent",
127
+ ]
128
+
129
+ [[package]]
130
+ name = "pyo3-build-config"
131
+ version = "0.22.6"
132
+ source = "registry+https://github.com/rust-lang/crates.io-index"
133
+ checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38"
134
+ dependencies = [
135
+ "once_cell",
136
+ "target-lexicon",
137
+ ]
138
+
139
+ [[package]]
140
+ name = "pyo3-ffi"
141
+ version = "0.22.6"
142
+ source = "registry+https://github.com/rust-lang/crates.io-index"
143
+ checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636"
144
+ dependencies = [
145
+ "libc",
146
+ "pyo3-build-config",
147
+ ]
148
+
149
+ [[package]]
150
+ name = "pyo3-macros"
151
+ version = "0.22.6"
152
+ source = "registry+https://github.com/rust-lang/crates.io-index"
153
+ checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453"
154
+ dependencies = [
155
+ "proc-macro2",
156
+ "pyo3-macros-backend",
157
+ "quote",
158
+ "syn",
159
+ ]
160
+
161
+ [[package]]
162
+ name = "pyo3-macros-backend"
163
+ version = "0.22.6"
164
+ source = "registry+https://github.com/rust-lang/crates.io-index"
165
+ checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe"
166
+ dependencies = [
167
+ "heck",
168
+ "proc-macro2",
169
+ "pyo3-build-config",
170
+ "quote",
171
+ "syn",
172
+ ]
173
+
174
+ [[package]]
175
+ name = "quote"
176
+ version = "1.0.44"
177
+ source = "registry+https://github.com/rust-lang/crates.io-index"
178
+ checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
179
+ dependencies = [
180
+ "proc-macro2",
181
+ ]
182
+
183
+ [[package]]
184
+ name = "rand"
185
+ version = "0.8.5"
186
+ source = "registry+https://github.com/rust-lang/crates.io-index"
187
+ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
188
+ dependencies = [
189
+ "libc",
190
+ "rand_chacha",
191
+ "rand_core",
192
+ ]
193
+
194
+ [[package]]
195
+ name = "rand_chacha"
196
+ version = "0.3.1"
197
+ source = "registry+https://github.com/rust-lang/crates.io-index"
198
+ checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
199
+ dependencies = [
200
+ "ppv-lite86",
201
+ "rand_core",
202
+ ]
203
+
204
+ [[package]]
205
+ name = "rand_core"
206
+ version = "0.6.4"
207
+ source = "registry+https://github.com/rust-lang/crates.io-index"
208
+ checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
209
+ dependencies = [
210
+ "getrandom",
211
+ ]
212
+
213
+ [[package]]
214
+ name = "rustversion"
215
+ version = "1.0.22"
216
+ source = "registry+https://github.com/rust-lang/crates.io-index"
217
+ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
218
+
219
+ [[package]]
220
+ name = "serde"
221
+ version = "1.0.228"
222
+ source = "registry+https://github.com/rust-lang/crates.io-index"
223
+ checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
224
+ dependencies = [
225
+ "serde_core",
226
+ "serde_derive",
227
+ ]
228
+
229
+ [[package]]
230
+ name = "serde_core"
231
+ version = "1.0.228"
232
+ source = "registry+https://github.com/rust-lang/crates.io-index"
233
+ checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
234
+ dependencies = [
235
+ "serde_derive",
236
+ ]
237
+
238
+ [[package]]
239
+ name = "serde_derive"
240
+ version = "1.0.228"
241
+ source = "registry+https://github.com/rust-lang/crates.io-index"
242
+ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
243
+ dependencies = [
244
+ "proc-macro2",
245
+ "quote",
246
+ "syn",
247
+ ]
248
+
249
+ [[package]]
250
+ name = "serde_json"
251
+ version = "1.0.149"
252
+ source = "registry+https://github.com/rust-lang/crates.io-index"
253
+ checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
254
+ dependencies = [
255
+ "itoa",
256
+ "memchr",
257
+ "serde",
258
+ "serde_core",
259
+ "zmij",
260
+ ]
261
+
262
+ [[package]]
263
+ name = "syn"
264
+ version = "2.0.114"
265
+ source = "registry+https://github.com/rust-lang/crates.io-index"
266
+ checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
267
+ dependencies = [
268
+ "proc-macro2",
269
+ "quote",
270
+ "unicode-ident",
271
+ ]
272
+
273
+ [[package]]
274
+ name = "target-lexicon"
275
+ version = "0.12.16"
276
+ source = "registry+https://github.com/rust-lang/crates.io-index"
277
+ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
278
+
279
+ [[package]]
280
+ name = "unicode-ident"
281
+ version = "1.0.22"
282
+ source = "registry+https://github.com/rust-lang/crates.io-index"
283
+ checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
284
+
285
+ [[package]]
286
+ name = "unindent"
287
+ version = "0.2.4"
288
+ source = "registry+https://github.com/rust-lang/crates.io-index"
289
+ checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
290
+
291
+ [[package]]
292
+ name = "wasi"
293
+ version = "0.11.1+wasi-snapshot-preview1"
294
+ source = "registry+https://github.com/rust-lang/crates.io-index"
295
+ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
296
+
297
+ [[package]]
298
+ name = "zerocopy"
299
+ version = "0.8.33"
300
+ source = "registry+https://github.com/rust-lang/crates.io-index"
301
+ checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd"
302
+ dependencies = [
303
+ "zerocopy-derive",
304
+ ]
305
+
306
+ [[package]]
307
+ name = "zerocopy-derive"
308
+ version = "0.8.33"
309
+ source = "registry+https://github.com/rust-lang/crates.io-index"
310
+ checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1"
311
+ dependencies = [
312
+ "proc-macro2",
313
+ "quote",
314
+ "syn",
315
+ ]
316
+
317
+ [[package]]
318
+ name = "zmij"
319
+ version = "1.0.16"
320
+ source = "registry+https://github.com/rust-lang/crates.io-index"
321
+ checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65"
@@ -0,0 +1,31 @@
1
+ [package]
2
+ name = "binaryfuse"
3
+ version = "0.1.1"
4
+ edition = "2021"
5
+ authors = ["Kevin Yen <kevinyen@gmail.com>"]
6
+ description = "A fast and space-efficient Binary Fuse Filter implementation with Python bindings."
7
+ license = "MIT"
8
+ readme = "README.md"
9
+
10
+ [lib]
11
+ name = "binary_fuse"
12
+ crate-type = ["cdylib", "rlib"]
13
+
14
+ [dependencies]
15
+ pyo3 = { version = "0.22", features = ["abi3-py38"], optional = true }
16
+ rand = "0.8"
17
+ serde = { version = "1.0", features = ["derive"] }
18
+ serde_json = "1.0"
19
+
20
+ [features]
21
+ extension-module = ["pyo3", "pyo3/extension-module"]
22
+ default = ["extension-module"]
23
+
24
+
25
+
26
+ [dev-dependencies]
27
+ rand_chacha = "0.3"
28
+
29
+ [profile.release]
30
+ lto = true
31
+ codegen-units = 1
@@ -0,0 +1,97 @@
1
+ Metadata-Version: 2.4
2
+ Name: binaryfuse
3
+ Version: 0.1.1
4
+ Requires-Dist: pytest>=8.3.0 ; extra == 'test'
5
+ Provides-Extra: test
6
+ Summary: A fast and space-efficient Binary Fuse Filter implementation with Python bindings.
7
+ Author-email: Kevin Yen <kevinyen@gmail.com>
8
+ License: MIT
9
+ Requires-Python: >=3.8
10
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
11
+
12
+ # Binary Fuse Filter
13
+
14
+ A fast and space-efficient Binary Fuse Filter implementation in Rust with Python bindings. Based on the paper "Binary Fuse Filters: Fast and Smaller Than Xor Filters" by Daniel Lemire et al.
15
+
16
+ Binary Fuse Filters are approximate set membership structures (like Bloom filters) but are smaller and faster for lookups. They are particularly effective when the set of keys is known in advance (static filters).
17
+
18
+ ## Features
19
+
20
+ - **High Performance**: Implementation in Rust for maximum speed.
21
+ - **Space Efficient**: Only ~1.15x overhead over the theoretical minimum (better than Bloom/Xor filters).
22
+ - **Python Bindings**: Easy to use from Python with `BinaryFuse8` (8-bit fingerprints, ~0.4% FPR) and `BinaryFuse16` (16-bit fingerprints, ~0.0015% FPR).
23
+ - **Serialization**: Full support for Python's `pickle` module.
24
+
25
+ ## Installation
26
+
27
+ ```bash
28
+ pip install binaryfuse
29
+ ```
30
+
31
+
32
+ ## Usage (Python)
33
+
34
+ ```python
35
+ from binary_fuse import BinaryFuse8
36
+
37
+ # List of integer keys
38
+ keys = [1, 2, 3, 100, 200, 300]
39
+
40
+ # Build the filter
41
+ bf = BinaryFuse8(keys)
42
+
43
+ # Check membership
44
+ print(1 in bf) # True
45
+ print(101 in bf) # False (most likely)
46
+
47
+ # Serialization
48
+ import pickle
49
+ data = pickle.dumps(bf)
50
+ bf2 = pickle.loads(data)
51
+ print(1 in bf2) # True
52
+ ```
53
+
54
+ ### Compatibility & Zero-Compile Installation
55
+ We ship **`abi3` wheels** (Stable ABI), which means:
56
+ * **One binary works everywhere**: A single wheel supports **all Python versions from 3.8 to 3.13+**.
57
+ * **No compilation needed**: Works out-of-the-box on Linux (glibc/musl), macOS (Intel/Apple Silicon), and Windows.
58
+ * **Future-proof**: The wheel you install today will likely work on Python 3.14+ without any updates.
59
+
60
+ ## Technical Details
61
+
62
+ Binary Fuse Filters use a fused segment layout where each key is mapped to three locations ($h_0, h_1, h_2$) in a linear array. The locations are chosen from overlapping segments, which improves the success rate of the construction algorithm compared to standard Xor filters.
63
+
64
+ This implementation uses:
65
+ - **Arity 3**: Each key maps to 3 locations.
66
+ - **32 Segments**: Standard for modern Binary Fuse Filter implementations.
67
+ - **Hashing**: Robust mixing of 64-bit hashes to ensure uniform distribution.
68
+
69
+ ### Comparative Results (100,000 keys)
70
+
71
+ | Filter | Build (s) | Hit (s) | Miss (s) | FPR (%) | Size (KB) | Bits/Key |
72
+ | :--- | :--- | :--- | :--- | :--- | :--- | :--- |
73
+ | **BinaryFuse8 (Ours)** | **0.0170** | **0.0029** | **0.0032** | **0.37%** | 131.0 | 10.7 |
74
+ | **BinaryFuse16 (Ours)**| **0.0132** | **0.0029** | **0.0030** | **0.00%** | 262.0 | 21.4 |
75
+ | rbloom (Rust Bloom) | 0.0053 | 0.0019 | 0.0026 | 1.00% | 117.0 | 9.6 |
76
+ | Fuse8 (Lemire C) | 0.0195 | 0.0203 | 0.0206 | 0.40% | 1056.0* | N/A |
77
+ | pybloom-live (Py) | 0.0915 | 0.0829 | 0.0602 | 0.99% | 117.3 | 9.6 |
78
+
79
+ *\* Note: Some C-binding sizes (like Fuse8) are estimated via RSS delta and may be inaccurate.*
80
+
81
+ ### Why use Binary Fuse Filter?
82
+
83
+ While standard Bloom filters (like `rbloom`) are slightly faster for lookups (~20ns vs ~30ns), **Binary Fuse Filters are significantly more space-efficient for the same accuracy**.
84
+
85
+ * **Accuracy vs Size**: For roughly the same memory (~120-130KB), our `BinaryFuse8` offers **0.37%** False Positive Rate, while the Bloom filter has **1.00%** error. You get **2.7x better accuracy** for the same cost.
86
+ * **To match our accuracy**: A Bloom filter would need ~11.5 bits/key, making it ~20% larger than our filter.
87
+ * **Immutability**: Binary Fuse filters are static. If you need to add items incrementally, use a Bloom filter. If you have a static dataset (e.g., a daily blocklist, a dictionary), Binary Fuse is mathematically superior.
88
+
89
+ ## Thread Safety
90
+
91
+ Our implementation is **thread-safe for concurrent lookups**.
92
+ Because the filter is immutable after construction, multiple Python threads can perform membership tests (`key in bf`) simultaneously. The underlying Rust logic does not require any write locks or synchronization, making it ideal for high-concurrency environments.
93
+
94
+ ## License
95
+
96
+ MIT
97
+
@@ -0,0 +1,85 @@
1
+ # Binary Fuse Filter
2
+
3
+ A fast and space-efficient Binary Fuse Filter implementation in Rust with Python bindings. Based on the paper "Binary Fuse Filters: Fast and Smaller Than Xor Filters" by Daniel Lemire et al.
4
+
5
+ Binary Fuse Filters are approximate set membership structures (like Bloom filters) but are smaller and faster for lookups. They are particularly effective when the set of keys is known in advance (static filters).
6
+
7
+ ## Features
8
+
9
+ - **High Performance**: Implementation in Rust for maximum speed.
10
+ - **Space Efficient**: Only ~1.15x overhead over the theoretical minimum (better than Bloom/Xor filters).
11
+ - **Python Bindings**: Easy to use from Python with `BinaryFuse8` (8-bit fingerprints, ~0.4% FPR) and `BinaryFuse16` (16-bit fingerprints, ~0.0015% FPR).
12
+ - **Serialization**: Full support for Python's `pickle` module.
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ pip install binaryfuse
18
+ ```
19
+
20
+
21
+ ## Usage (Python)
22
+
23
+ ```python
24
+ from binary_fuse import BinaryFuse8
25
+
26
+ # List of integer keys
27
+ keys = [1, 2, 3, 100, 200, 300]
28
+
29
+ # Build the filter
30
+ bf = BinaryFuse8(keys)
31
+
32
+ # Check membership
33
+ print(1 in bf) # True
34
+ print(101 in bf) # False (most likely)
35
+
36
+ # Serialization
37
+ import pickle
38
+ data = pickle.dumps(bf)
39
+ bf2 = pickle.loads(data)
40
+ print(1 in bf2) # True
41
+ ```
42
+
43
+ ### Compatibility & Zero-Compile Installation
44
+ We ship **`abi3` wheels** (Stable ABI), which means:
45
+ * **One binary works everywhere**: A single wheel supports **all Python versions from 3.8 to 3.13+**.
46
+ * **No compilation needed**: Works out-of-the-box on Linux (glibc/musl), macOS (Intel/Apple Silicon), and Windows.
47
+ * **Future-proof**: The wheel you install today will likely work on Python 3.14+ without any updates.
48
+
49
+ ## Technical Details
50
+
51
+ Binary Fuse Filters use a fused segment layout where each key is mapped to three locations ($h_0, h_1, h_2$) in a linear array. The locations are chosen from overlapping segments, which improves the success rate of the construction algorithm compared to standard Xor filters.
52
+
53
+ This implementation uses:
54
+ - **Arity 3**: Each key maps to 3 locations.
55
+ - **32 Segments**: Standard for modern Binary Fuse Filter implementations.
56
+ - **Hashing**: Robust mixing of 64-bit hashes to ensure uniform distribution.
57
+
58
+ ### Comparative Results (100,000 keys)
59
+
60
+ | Filter | Build (s) | Hit (s) | Miss (s) | FPR (%) | Size (KB) | Bits/Key |
61
+ | :--- | :--- | :--- | :--- | :--- | :--- | :--- |
62
+ | **BinaryFuse8 (Ours)** | **0.0170** | **0.0029** | **0.0032** | **0.37%** | 131.0 | 10.7 |
63
+ | **BinaryFuse16 (Ours)**| **0.0132** | **0.0029** | **0.0030** | **0.00%** | 262.0 | 21.4 |
64
+ | rbloom (Rust Bloom) | 0.0053 | 0.0019 | 0.0026 | 1.00% | 117.0 | 9.6 |
65
+ | Fuse8 (Lemire C) | 0.0195 | 0.0203 | 0.0206 | 0.40% | 1056.0* | N/A |
66
+ | pybloom-live (Py) | 0.0915 | 0.0829 | 0.0602 | 0.99% | 117.3 | 9.6 |
67
+
68
+ *\* Note: Some C-binding sizes (like Fuse8) are estimated via RSS delta and may be inaccurate.*
69
+
70
+ ### Why use Binary Fuse Filter?
71
+
72
+ While standard Bloom filters (like `rbloom`) are slightly faster for lookups (~20ns vs ~30ns), **Binary Fuse Filters are significantly more space-efficient for the same accuracy**.
73
+
74
+ * **Accuracy vs Size**: For roughly the same memory (~120-130KB), our `BinaryFuse8` offers **0.37%** False Positive Rate, while the Bloom filter has **1.00%** error. You get **2.7x better accuracy** for the same cost.
75
+ * **To match our accuracy**: A Bloom filter would need ~11.5 bits/key, making it ~20% larger than our filter.
76
+ * **Immutability**: Binary Fuse filters are static. If you need to add items incrementally, use a Bloom filter. If you have a static dataset (e.g., a daily blocklist, a dictionary), Binary Fuse is mathematically superior.
77
+
78
+ ## Thread Safety
79
+
80
+ Our implementation is **thread-safe for concurrent lookups**.
81
+ Because the filter is immutable after construction, multiple Python threads can perform membership tests (`key in bf`) simultaneously. The underlying Rust logic does not require any write locks or synchronization, making it ideal for high-concurrency environments.
82
+
83
+ ## License
84
+
85
+ MIT