bloomkit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +207 -0
- package/dist/index.cjs +376 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +214 -0
- package/dist/index.d.ts +214 -0
- package/dist/index.js +341 -0
- package/dist/index.js.map +1 -0
- package/package.json +50 -0
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
/** @internal Compute optimal bit-array size m given n items and fpr. */
|
|
2
|
+
declare function optimalM(n: number, fpr: number): number;
|
|
3
|
+
/** @internal Compute optimal number of hash functions k given m bits and n items. */
|
|
4
|
+
declare function optimalK(m: number, n: number): number;
|
|
5
|
+
interface BloomFilterOptions {
|
|
6
|
+
/**
|
|
7
|
+
* Expected number of items that will be inserted.
|
|
8
|
+
* Used to size the filter optimally.
|
|
9
|
+
*/
|
|
10
|
+
capacity: number;
|
|
11
|
+
/**
|
|
12
|
+
* Target false-positive rate (0 < fpr < 1). Default: 0.01 (1%).
|
|
13
|
+
*/
|
|
14
|
+
errorRate?: number;
|
|
15
|
+
}
|
|
16
|
+
interface BloomFilterJSON {
|
|
17
|
+
type: "BloomFilter";
|
|
18
|
+
capacity: number;
|
|
19
|
+
errorRate: number;
|
|
20
|
+
m: number;
|
|
21
|
+
k: number;
|
|
22
|
+
bits: string;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Standard Bloom filter — space-efficient probabilistic set membership.
|
|
26
|
+
*
|
|
27
|
+
* - `add(item)` always works correctly.
|
|
28
|
+
* - `has(item)` may return `true` for items not added (false positive, rate ≤ `errorRate`).
|
|
29
|
+
* - `has(item)` never returns `false` for items that were added.
|
|
30
|
+
* - Items cannot be removed (use `CountingBloomFilter` for deletions).
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* const bf = new BloomFilter({ capacity: 1_000_000, errorRate: 0.01 });
|
|
34
|
+
* bf.add("hello");
|
|
35
|
+
* bf.has("hello"); // true
|
|
36
|
+
* bf.has("world"); // false (with high probability)
|
|
37
|
+
*/
|
|
38
|
+
declare class BloomFilter {
|
|
39
|
+
readonly capacity: number;
|
|
40
|
+
readonly errorRate: number;
|
|
41
|
+
/** Bit-array size. */
|
|
42
|
+
readonly m: number;
|
|
43
|
+
/** Number of hash functions. */
|
|
44
|
+
readonly k: number;
|
|
45
|
+
private readonly _bits;
|
|
46
|
+
private _count;
|
|
47
|
+
constructor(options: BloomFilterOptions);
|
|
48
|
+
/** Add an item to the filter. */
|
|
49
|
+
add(item: string): this;
|
|
50
|
+
/**
|
|
51
|
+
* Test membership. Returns `true` if item *may* have been added,
|
|
52
|
+
* `false` if it *definitely* has not.
|
|
53
|
+
*/
|
|
54
|
+
has(item: string): boolean;
|
|
55
|
+
/** Number of items added (may exceed capacity). */
|
|
56
|
+
get size(): number;
|
|
57
|
+
/** Current estimated false-positive rate based on items inserted. */
|
|
58
|
+
get currentFPR(): number;
|
|
59
|
+
/** Reset the filter. */
|
|
60
|
+
clear(): this;
|
|
61
|
+
/** Serialize to a plain object for JSON.stringify. */
|
|
62
|
+
toJSON(): BloomFilterJSON;
|
|
63
|
+
/** Restore a BloomFilter from the output of `toJSON`. */
|
|
64
|
+
static fromJSON(data: BloomFilterJSON): BloomFilter;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
interface CountingBloomFilterOptions {
|
|
68
|
+
/** Expected number of items. */
|
|
69
|
+
capacity: number;
|
|
70
|
+
/** Target false-positive rate (default: 0.01). */
|
|
71
|
+
errorRate?: number;
|
|
72
|
+
/** Counter width in bits — 4 (default) supports up to 15 adds per cell. */
|
|
73
|
+
counterBits?: 4 | 8;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Counting Bloom filter — supports deletion by maintaining per-cell counters
|
|
77
|
+
* instead of single bits.
|
|
78
|
+
*
|
|
79
|
+
* - `add(item)` increments k counters.
|
|
80
|
+
* - `remove(item)` decrements k counters. Do NOT remove items that were never added.
|
|
81
|
+
* - `has(item)` returns `true` iff all k counters are > 0.
|
|
82
|
+
*
|
|
83
|
+
* @example
|
|
84
|
+
* const cbf = new CountingBloomFilter({ capacity: 10_000 });
|
|
85
|
+
* cbf.add("user:42");
|
|
86
|
+
* cbf.has("user:42"); // true
|
|
87
|
+
* cbf.remove("user:42");
|
|
88
|
+
* cbf.has("user:42"); // false
|
|
89
|
+
*/
|
|
90
|
+
declare class CountingBloomFilter {
|
|
91
|
+
readonly capacity: number;
|
|
92
|
+
readonly errorRate: number;
|
|
93
|
+
readonly m: number;
|
|
94
|
+
readonly k: number;
|
|
95
|
+
private readonly _counters;
|
|
96
|
+
private readonly _counterBits;
|
|
97
|
+
private readonly _maxCount;
|
|
98
|
+
private _count;
|
|
99
|
+
constructor(options: CountingBloomFilterOptions);
|
|
100
|
+
private _get;
|
|
101
|
+
private _increment;
|
|
102
|
+
private _decrement;
|
|
103
|
+
/** Add an item to the filter. */
|
|
104
|
+
add(item: string): this;
|
|
105
|
+
/**
|
|
106
|
+
* Remove an item from the filter.
|
|
107
|
+
* Only remove items you previously added — removing items never added leads
|
|
108
|
+
* to false negatives.
|
|
109
|
+
*/
|
|
110
|
+
remove(item: string): this;
|
|
111
|
+
/** Test membership. */
|
|
112
|
+
has(item: string): boolean;
|
|
113
|
+
/** Number of items currently in the filter (approximate). */
|
|
114
|
+
get size(): number;
|
|
115
|
+
/** Reset the filter. */
|
|
116
|
+
clear(): this;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
interface ScalableBloomFilterOptions {
|
|
120
|
+
/**
|
|
121
|
+
* Initial capacity (items before first resize). Default: 1000.
|
|
122
|
+
*/
|
|
123
|
+
initialCapacity?: number;
|
|
124
|
+
/**
|
|
125
|
+
* Target overall false-positive rate. Default: 0.01.
|
|
126
|
+
* Each sub-filter uses `errorRate * tighteningRatio^i` so the series
|
|
127
|
+
* converges to a total FPR ≤ `errorRate`.
|
|
128
|
+
*/
|
|
129
|
+
errorRate?: number;
|
|
130
|
+
/**
|
|
131
|
+
* Scale factor: each new sub-filter has `scaleFactor × previous capacity`.
|
|
132
|
+
* Common values: 2 (default) or 4.
|
|
133
|
+
*/
|
|
134
|
+
scaleFactor?: number;
|
|
135
|
+
/**
|
|
136
|
+
* Ratio by which each sub-filter tightens its FPR. Default: 0.9.
|
|
137
|
+
* Must be in (0, 1).
|
|
138
|
+
*/
|
|
139
|
+
tighteningRatio?: number;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Scalable Bloom filter — grows automatically as items are added, so you
|
|
143
|
+
* don't need to know the final set size upfront.
|
|
144
|
+
*
|
|
145
|
+
* Maintains a series of standard BloomFilters; when the current one is full,
|
|
146
|
+
* a new one is created with higher capacity and tighter FPR.
|
|
147
|
+
*
|
|
148
|
+
* Port of Python pybloom-live's `ScalableBloomFilter`.
|
|
149
|
+
*
|
|
150
|
+
* @example
|
|
151
|
+
* const sbf = new ScalableBloomFilter({ errorRate: 0.01 });
|
|
152
|
+
* for (const id of millionIds) sbf.add(id);
|
|
153
|
+
* sbf.has(id); // reliable, even with 1M+ items
|
|
154
|
+
*/
|
|
155
|
+
declare class ScalableBloomFilter {
|
|
156
|
+
private readonly _initialCapacity;
|
|
157
|
+
private readonly _errorRate;
|
|
158
|
+
private readonly _scaleFactor;
|
|
159
|
+
private readonly _tighteningRatio;
|
|
160
|
+
private _filters;
|
|
161
|
+
private _count;
|
|
162
|
+
constructor(options?: ScalableBloomFilterOptions);
|
|
163
|
+
private _createFilter;
|
|
164
|
+
/** Add an item. The filter grows automatically when the current slice is full. */
|
|
165
|
+
add(item: string): this;
|
|
166
|
+
/**
|
|
167
|
+
* Test membership — checks all sub-filters.
|
|
168
|
+
* Returns `true` if item may have been added, `false` if definitely not.
|
|
169
|
+
*/
|
|
170
|
+
has(item: string): boolean;
|
|
171
|
+
/** Total number of items added (approximate). */
|
|
172
|
+
get size(): number;
|
|
173
|
+
/** Number of internal sub-filters created so far. */
|
|
174
|
+
get filterCount(): number;
|
|
175
|
+
/** Total bits allocated across all sub-filters. */
|
|
176
|
+
get bitsAllocated(): number;
|
|
177
|
+
/** Target false-positive rate. */
|
|
178
|
+
get errorRate(): number;
|
|
179
|
+
/** Reset the filter. */
|
|
180
|
+
clear(): this;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* MurmurHash3 (32-bit, x86) — fast non-cryptographic hash.
|
|
185
|
+
* Returns an unsigned 32-bit integer.
|
|
186
|
+
*/
|
|
187
|
+
declare function murmur3(key: string, seed?: number): number;
|
|
188
|
+
/**
|
|
189
|
+
* FNV-1a (32-bit) — second independent hash for double-hashing.
|
|
190
|
+
*/
|
|
191
|
+
declare function fnv1a(key: string, seed?: number): number;
|
|
192
|
+
/**
|
|
193
|
+
* Generate `k` hash positions in `[0, m)` using double hashing.
|
|
194
|
+
* gi(x) = (h1(x) + i * h2(x)) mod m
|
|
195
|
+
*/
|
|
196
|
+
declare function hashPositions(key: string, k: number, m: number): number[];
|
|
197
|
+
|
|
198
|
+
/** Compact fixed-size bit array backed by Uint32Array. */
|
|
199
|
+
declare class BitArray {
|
|
200
|
+
private readonly _buf;
|
|
201
|
+
readonly size: number;
|
|
202
|
+
constructor(size: number);
|
|
203
|
+
set(index: number): void;
|
|
204
|
+
get(index: number): boolean;
|
|
205
|
+
clear(): void;
|
|
206
|
+
/** Number of bits set to 1 (popcount). */
|
|
207
|
+
popcount(): number;
|
|
208
|
+
/** Export as a base64-encoded string for serialization. */
|
|
209
|
+
toBase64(): string;
|
|
210
|
+
/** Restore from a base64 string produced by `toBase64`. */
|
|
211
|
+
static fromBase64(b64: string, size: number): BitArray;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
export { BitArray, BloomFilter, type BloomFilterJSON, type BloomFilterOptions, CountingBloomFilter, type CountingBloomFilterOptions, ScalableBloomFilter, type ScalableBloomFilterOptions, fnv1a, hashPositions, murmur3, optimalK, optimalM };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
/** @internal Compute optimal bit-array size m given n items and fpr. */
|
|
2
|
+
declare function optimalM(n: number, fpr: number): number;
|
|
3
|
+
/** @internal Compute optimal number of hash functions k given m bits and n items. */
|
|
4
|
+
declare function optimalK(m: number, n: number): number;
|
|
5
|
+
interface BloomFilterOptions {
|
|
6
|
+
/**
|
|
7
|
+
* Expected number of items that will be inserted.
|
|
8
|
+
* Used to size the filter optimally.
|
|
9
|
+
*/
|
|
10
|
+
capacity: number;
|
|
11
|
+
/**
|
|
12
|
+
* Target false-positive rate (0 < fpr < 1). Default: 0.01 (1%).
|
|
13
|
+
*/
|
|
14
|
+
errorRate?: number;
|
|
15
|
+
}
|
|
16
|
+
interface BloomFilterJSON {
|
|
17
|
+
type: "BloomFilter";
|
|
18
|
+
capacity: number;
|
|
19
|
+
errorRate: number;
|
|
20
|
+
m: number;
|
|
21
|
+
k: number;
|
|
22
|
+
bits: string;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Standard Bloom filter — space-efficient probabilistic set membership.
|
|
26
|
+
*
|
|
27
|
+
* - `add(item)` always works correctly.
|
|
28
|
+
* - `has(item)` may return `true` for items not added (false positive, rate ≤ `errorRate`).
|
|
29
|
+
* - `has(item)` never returns `false` for items that were added.
|
|
30
|
+
* - Items cannot be removed (use `CountingBloomFilter` for deletions).
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* const bf = new BloomFilter({ capacity: 1_000_000, errorRate: 0.01 });
|
|
34
|
+
* bf.add("hello");
|
|
35
|
+
* bf.has("hello"); // true
|
|
36
|
+
* bf.has("world"); // false (with high probability)
|
|
37
|
+
*/
|
|
38
|
+
declare class BloomFilter {
|
|
39
|
+
readonly capacity: number;
|
|
40
|
+
readonly errorRate: number;
|
|
41
|
+
/** Bit-array size. */
|
|
42
|
+
readonly m: number;
|
|
43
|
+
/** Number of hash functions. */
|
|
44
|
+
readonly k: number;
|
|
45
|
+
private readonly _bits;
|
|
46
|
+
private _count;
|
|
47
|
+
constructor(options: BloomFilterOptions);
|
|
48
|
+
/** Add an item to the filter. */
|
|
49
|
+
add(item: string): this;
|
|
50
|
+
/**
|
|
51
|
+
* Test membership. Returns `true` if item *may* have been added,
|
|
52
|
+
* `false` if it *definitely* has not.
|
|
53
|
+
*/
|
|
54
|
+
has(item: string): boolean;
|
|
55
|
+
/** Number of items added (may exceed capacity). */
|
|
56
|
+
get size(): number;
|
|
57
|
+
/** Current estimated false-positive rate based on items inserted. */
|
|
58
|
+
get currentFPR(): number;
|
|
59
|
+
/** Reset the filter. */
|
|
60
|
+
clear(): this;
|
|
61
|
+
/** Serialize to a plain object for JSON.stringify. */
|
|
62
|
+
toJSON(): BloomFilterJSON;
|
|
63
|
+
/** Restore a BloomFilter from the output of `toJSON`. */
|
|
64
|
+
static fromJSON(data: BloomFilterJSON): BloomFilter;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
interface CountingBloomFilterOptions {
|
|
68
|
+
/** Expected number of items. */
|
|
69
|
+
capacity: number;
|
|
70
|
+
/** Target false-positive rate (default: 0.01). */
|
|
71
|
+
errorRate?: number;
|
|
72
|
+
/** Counter width in bits — 4 (default) supports up to 15 adds per cell. */
|
|
73
|
+
counterBits?: 4 | 8;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Counting Bloom filter — supports deletion by maintaining per-cell counters
|
|
77
|
+
* instead of single bits.
|
|
78
|
+
*
|
|
79
|
+
* - `add(item)` increments k counters.
|
|
80
|
+
* - `remove(item)` decrements k counters. Do NOT remove items that were never added.
|
|
81
|
+
* - `has(item)` returns `true` iff all k counters are > 0.
|
|
82
|
+
*
|
|
83
|
+
* @example
|
|
84
|
+
* const cbf = new CountingBloomFilter({ capacity: 10_000 });
|
|
85
|
+
* cbf.add("user:42");
|
|
86
|
+
* cbf.has("user:42"); // true
|
|
87
|
+
* cbf.remove("user:42");
|
|
88
|
+
* cbf.has("user:42"); // false
|
|
89
|
+
*/
|
|
90
|
+
declare class CountingBloomFilter {
|
|
91
|
+
readonly capacity: number;
|
|
92
|
+
readonly errorRate: number;
|
|
93
|
+
readonly m: number;
|
|
94
|
+
readonly k: number;
|
|
95
|
+
private readonly _counters;
|
|
96
|
+
private readonly _counterBits;
|
|
97
|
+
private readonly _maxCount;
|
|
98
|
+
private _count;
|
|
99
|
+
constructor(options: CountingBloomFilterOptions);
|
|
100
|
+
private _get;
|
|
101
|
+
private _increment;
|
|
102
|
+
private _decrement;
|
|
103
|
+
/** Add an item to the filter. */
|
|
104
|
+
add(item: string): this;
|
|
105
|
+
/**
|
|
106
|
+
* Remove an item from the filter.
|
|
107
|
+
* Only remove items you previously added — removing items never added leads
|
|
108
|
+
* to false negatives.
|
|
109
|
+
*/
|
|
110
|
+
remove(item: string): this;
|
|
111
|
+
/** Test membership. */
|
|
112
|
+
has(item: string): boolean;
|
|
113
|
+
/** Number of items currently in the filter (approximate). */
|
|
114
|
+
get size(): number;
|
|
115
|
+
/** Reset the filter. */
|
|
116
|
+
clear(): this;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
interface ScalableBloomFilterOptions {
|
|
120
|
+
/**
|
|
121
|
+
* Initial capacity (items before first resize). Default: 1000.
|
|
122
|
+
*/
|
|
123
|
+
initialCapacity?: number;
|
|
124
|
+
/**
|
|
125
|
+
* Target overall false-positive rate. Default: 0.01.
|
|
126
|
+
* Each sub-filter uses `errorRate * tighteningRatio^i` so the series
|
|
127
|
+
* converges to a total FPR ≤ `errorRate`.
|
|
128
|
+
*/
|
|
129
|
+
errorRate?: number;
|
|
130
|
+
/**
|
|
131
|
+
* Scale factor: each new sub-filter has `scaleFactor × previous capacity`.
|
|
132
|
+
* Common values: 2 (default) or 4.
|
|
133
|
+
*/
|
|
134
|
+
scaleFactor?: number;
|
|
135
|
+
/**
|
|
136
|
+
* Ratio by which each sub-filter tightens its FPR. Default: 0.9.
|
|
137
|
+
* Must be in (0, 1).
|
|
138
|
+
*/
|
|
139
|
+
tighteningRatio?: number;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Scalable Bloom filter — grows automatically as items are added, so you
|
|
143
|
+
* don't need to know the final set size upfront.
|
|
144
|
+
*
|
|
145
|
+
* Maintains a series of standard BloomFilters; when the current one is full,
|
|
146
|
+
* a new one is created with higher capacity and tighter FPR.
|
|
147
|
+
*
|
|
148
|
+
* Port of Python pybloom-live's `ScalableBloomFilter`.
|
|
149
|
+
*
|
|
150
|
+
* @example
|
|
151
|
+
* const sbf = new ScalableBloomFilter({ errorRate: 0.01 });
|
|
152
|
+
* for (const id of millionIds) sbf.add(id);
|
|
153
|
+
* sbf.has(id); // reliable, even with 1M+ items
|
|
154
|
+
*/
|
|
155
|
+
declare class ScalableBloomFilter {
|
|
156
|
+
private readonly _initialCapacity;
|
|
157
|
+
private readonly _errorRate;
|
|
158
|
+
private readonly _scaleFactor;
|
|
159
|
+
private readonly _tighteningRatio;
|
|
160
|
+
private _filters;
|
|
161
|
+
private _count;
|
|
162
|
+
constructor(options?: ScalableBloomFilterOptions);
|
|
163
|
+
private _createFilter;
|
|
164
|
+
/** Add an item. The filter grows automatically when the current slice is full. */
|
|
165
|
+
add(item: string): this;
|
|
166
|
+
/**
|
|
167
|
+
* Test membership — checks all sub-filters.
|
|
168
|
+
* Returns `true` if item may have been added, `false` if definitely not.
|
|
169
|
+
*/
|
|
170
|
+
has(item: string): boolean;
|
|
171
|
+
/** Total number of items added (approximate). */
|
|
172
|
+
get size(): number;
|
|
173
|
+
/** Number of internal sub-filters created so far. */
|
|
174
|
+
get filterCount(): number;
|
|
175
|
+
/** Total bits allocated across all sub-filters. */
|
|
176
|
+
get bitsAllocated(): number;
|
|
177
|
+
/** Target false-positive rate. */
|
|
178
|
+
get errorRate(): number;
|
|
179
|
+
/** Reset the filter. */
|
|
180
|
+
clear(): this;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* MurmurHash3 (32-bit, x86) — fast non-cryptographic hash.
|
|
185
|
+
* Returns an unsigned 32-bit integer.
|
|
186
|
+
*/
|
|
187
|
+
declare function murmur3(key: string, seed?: number): number;
|
|
188
|
+
/**
|
|
189
|
+
* FNV-1a (32-bit) — second independent hash for double-hashing.
|
|
190
|
+
*/
|
|
191
|
+
declare function fnv1a(key: string, seed?: number): number;
|
|
192
|
+
/**
|
|
193
|
+
* Generate `k` hash positions in `[0, m)` using double hashing.
|
|
194
|
+
* gi(x) = (h1(x) + i * h2(x)) mod m
|
|
195
|
+
*/
|
|
196
|
+
declare function hashPositions(key: string, k: number, m: number): number[];
|
|
197
|
+
|
|
198
|
+
/** Compact fixed-size bit array backed by Uint32Array. */
|
|
199
|
+
declare class BitArray {
|
|
200
|
+
private readonly _buf;
|
|
201
|
+
readonly size: number;
|
|
202
|
+
constructor(size: number);
|
|
203
|
+
set(index: number): void;
|
|
204
|
+
get(index: number): boolean;
|
|
205
|
+
clear(): void;
|
|
206
|
+
/** Number of bits set to 1 (popcount). */
|
|
207
|
+
popcount(): number;
|
|
208
|
+
/** Export as a base64-encoded string for serialization. */
|
|
209
|
+
toBase64(): string;
|
|
210
|
+
/** Restore from a base64 string produced by `toBase64`. */
|
|
211
|
+
static fromBase64(b64: string, size: number): BitArray;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
export { BitArray, BloomFilter, type BloomFilterJSON, type BloomFilterOptions, CountingBloomFilter, type CountingBloomFilterOptions, ScalableBloomFilter, type ScalableBloomFilterOptions, fnv1a, hashPositions, murmur3, optimalK, optimalM };
|