probe-filters 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,298 @@
1
+ # probe-filters
2
+
3
+ **probe-filters** — zero-false-negative, bounded-false-positive point, range, spatial, and temporal approximate membership filters with typed-array performance and expansion without key retention.
4
+
5
+ ```bash
6
+ npm install probe-filters
7
+ ```
8
+
9
+ ```js
10
+ import { PointFilter, RangeFilter, SpatialFilter, TemporalFilter, MultiFilter } from 'probe-filters';
11
+ ```
12
+
13
+
14
+ MultiFilter provides four approximate membership filters and a composite facade. Every filter guarantees:
15
+
16
+ | Property | Meaning |
17
+ |----------|---------|
18
+ | **no false negatives** | `insert(k)` ⇒ `query(k)` always returns true |
19
+ | **bounded false positives** | tunable fingerprint size controls FPR |
20
+ | **no key storage** | all filters use fixed-size structural storage, independent of key count |
21
+ | **online expansion** | capacity doubles without access to original keys |
22
+
23
+ ### Filter types
24
+
25
+ **PointFilter** — Aleph quotient filter (SIGMOD 2025). 32-bit mother-hash slots with Zeno fractional-growth expansion (Kim et al., SIGMOD 2026). Handles deletions via void-entry tombstoning with queued duplicate removal on expansion. Packed RSQF metadata at 2.125 bits/slot.
26
+
27
+ **RangeFilter** — Aeris keepsake-box partition-count filter (Chesetti et al., SIGMOD 2026). Keys fall into fixed-width partitions. Each partition tracks a fingerprint and insertion count. Range queries check partition-level fingerprints; adaptation splits keepsake boxes and extends fingerprint length. No per-key storage — 1000 keys in the same partition occupy 1 entry.
28
+
29
+ **SpatialFilter** — Morton/Z-order curve backed by RangeFilter. 2D coordinates encode to 1D Morton codes. Bounding-box queries decompose into contiguous Morton intervals via recursive quad walk, then delegate to the RangeFilter backend. No cells, no per-coordinate storage.
30
+
31
+ **TemporalFilter** — ring-bucket PointFilters. Events inserted into time-aligned buckets indexed by `floor(timestampMs / bucketDurationMs)`. Aging buckets fall out of the retention horizon. Sparsely serialized — only populated buckets are written.
32
+
33
+ ### MultiFilter facade
34
+
35
+ Composes all four filters behind a single interface with `set/get/has/remove` aliases and `mergeFrom` UNION merge across all enabled dimensions.
36
+
37
+ ## CONFIGURATION
38
+
39
+ ### PointFilter(options)
40
+
41
+ | Option | Type | Default | Description |
42
+ |--------|------|---------|-------------|
43
+ | `initialCapacity` | number | 256 | Initial slot count (power of 2) |
44
+ | `fingerprintSize` | number | 12 | Fingerprint bits per slot |
45
+ | `expansionThreshold` | number | 0.95 | Load factor triggering expansion |
46
+
47
+ ```js
48
+ const pf = new PointFilter({ initialCapacity: 1024, fingerprintSize: 14 });
49
+ ```
50
+
51
+ ### RangeFilter(options)
52
+
53
+ | Option | Type | Default | Description |
54
+ |--------|------|---------|-------------|
55
+ | `partitionSize` | number | 32 | Keys per partition |
56
+ | `fingerprintBits` | number | 8 | Initial fingerprint bits per partition |
57
+ | `maxFingerprintBits` | number | 24 | Maximum fingerprint bits after adaptation |
58
+
59
+ ```js
60
+ const rf = new RangeFilter({ partitionSize: 64, fingerprintBits: 10 });
61
+ ```
62
+
63
+ ### SpatialFilter(options)
64
+
65
+ | Option | Type | Default | Description |
66
+ |--------|------|---------|-------------|
67
+ | `bitsPerCoordinate` | number | 16 | Precision bits per axis (grid size = 2^bits) |
68
+ | `coordinateSystem` | string | `'integer'` | `'integer'`, `'float'`, `'latlon'`, or custom |
69
+ | `bounds` | array | — | Float coordinate bounds: `[[minX, minY], [maxX, maxY]]` |
70
+ | `coordinateCodec` | object | — | Custom `{ normalizePoint, decodePoint, bounds }` |
71
+ | `rangeOptions` | object | — | Passed through to internal RangeFilter |
72
+
73
+ ```js
74
+ const sf = new SpatialFilter({ bitsPerCoordinate: 16 });
75
+
76
+ const sfFloat = new SpatialFilter({
77
+ coordinateSystem: 'float', bounds: [[0, 0], [1, 1]], bitsPerCoordinate: 16,
78
+ });
79
+
80
+ const sfGPS = new SpatialFilter({ coordinateSystem: 'latlon', bitsPerCoordinate: 16 });
81
+ ```
82
+
83
+ ### TemporalFilter(options)
84
+
85
+ | Option | Type | Default | Description |
86
+ |--------|------|---------|-------------|
87
+ | `bucketDurationMs` | number | 60_000 | Width of each time bucket (ms) |
88
+ | `retentionDurationMs` | number | 86_400_000 | How far back events are retained (ms) |
89
+ | `filterOptions` | object | — | Passed through to internal PointFilter per bucket |
90
+
91
+ ```js
92
+ const tf = new TemporalFilter({
93
+ bucketDurationMs: 60_000, // 1 minute buckets
94
+ retentionDurationMs: 7 * 24 * 60_60_000, // 7 day retention
95
+ filterOptions: { initialCapacity: 64, fingerprintSize: 8 },
96
+ });
97
+ ```
98
+
99
+ ### MultiFilter(options)
100
+
101
+ | Option | Type | Default | Description |
102
+ |--------|------|---------|-------------|
103
+ | `point` | boolean | true | Enable PointFilter; `false` to disable |
104
+ | `range` | boolean | true | Enable RangeFilter; `false` to disable |
105
+ | `spatial` | boolean | true | Enable SpatialFilter; `false` to disable |
106
+ | `temporal` | boolean | true | Enable TemporalFilter; `false` to disable |
107
+ | `pointOptions` | object | — | Options forwarded to PointFilter |
108
+ | `rangeOptions` | object | — | Options forwarded to RangeFilter |
109
+ | `spatialOptions` | object | — | Options forwarded to SpatialFilter |
110
+ | `temporalOptions` | object | — | Options forwarded to TemporalFilter |
111
+
112
+ Top-level options without a prefix are forwarded to PointFilter (backward compatibility):
113
+ ```js
114
+ const mf = new MultiFilter({ fingerprintSize: 10 }); // → pointOptions.fingerprintSize
115
+ ```
116
+
117
+ ## API
118
+
119
+ ### PointFilter
120
+
121
+ ```
122
+ insert(key) → void Insert a key
123
+ query(key) → boolean Test membership (no false negatives)
124
+ delete(key) → boolean Remove a key (tombstone)
125
+ rejuvenate(key) → boolean Move key to newer fingerprint to reduce persistent FPs
126
+ expand() → void Double capacity (Zeno fractional-growth)
127
+ getStats() → object { capacity, loadFactor, expansions, ... }
128
+ serialize() → ArrayBuffer Compact binary with CRC32 integrity
129
+ static deserialize(buffer[, options])
130
+ ```
131
+
132
+ ### RangeFilter
133
+
134
+ ```
135
+ insert(key) → void Insert a key (increments partition count)
136
+ delete(key) → boolean Decrement count; remove partition at 0
137
+ queryRange(start, end) → boolean Test any key in [start, end]
138
+ adaptFalsePositive(start, end) → boolean Split partitions, extend fingerprints to fix FPs
139
+ expand() → void Shrink fingerprints, rejuvenate all partitions
140
+ getStats() → object { partitions, keepsakeBoxes, expansionLevel, ... }
141
+ serialize() → ArrayBuffer Compact binary with CRC32 integrity
142
+ static deserialize(buffer)
143
+ ```
144
+
145
+ ### SpatialFilter
146
+
147
+ ```
148
+ insert(point) → void [x, y] point (integer, float, or lat/lon)
149
+ insertBox(min, max[, id]) → void Bounding box shape
150
+ insertCircle(center, radius[, id]) → void Circle shape
151
+ query(point) → boolean Single-point membership
152
+ queryBox(min, max) → boolean Bounding-box membership
153
+ adaptFalsePositiveBox(min, max) → boolean Decompose box to Morton intervals, adapt
154
+ getStats() → object { partitions, bitsPerCoordinate, gridSize, ... }
155
+ serialize() → ArrayBuffer Compact binary with CRC32 integrity (wraps RangeFilter)
156
+ static deserialize(buffer)
157
+ ```
158
+
159
+ ### TemporalFilter
160
+ (time units: ms, timestamps: epoch ms)
161
+
162
+ ```
163
+ insertAt(key, timestampMs) → void
164
+ queryWithinLast(key, durationMs[, nowMs]) → boolean
165
+ queryAgo(key, ageMs, toleranceMs[, nowMs]) → boolean
166
+ queryBetweenAges(key, minAgeMs, maxAgeMs[, nowMs]) → boolean
167
+ adaptFalsePositiveWithinLast(key, durationMs[, nowMs]) → boolean
168
+ adaptFalsePositiveAgo(key, ageMs, toleranceMs[, nowMs]) → boolean
169
+ adaptFalsePositiveBetweenAges(key, minAgeMs, maxAgeMs[, nowMs]) → boolean
170
+ getStats([nowMs]) → object { bucketDurationMs, retentionDurationMs, activeBuckets }
171
+ serialize() → ArrayBuffer Compact binary with CRC32 integrity (populated-bucket-only)
172
+ static deserialize(buffer)
173
+ ```
174
+
175
+ ### MultiFilter
176
+
177
+ MultiFilter exposes the same methods as each sub-filter, prefixed where needed:
178
+
179
+ ```
180
+ set(key) / get(key) / has(key) / remove(key) Point aliases
181
+ setRangeKey(key) / hasRange(start, end) / removeRangeKey(key) Range aliases
182
+ setSpatialPoint(point) / hasBox(min, max) Spatial aliases
183
+ setTemporalKey(key, ts) / queryWithinLast(key, dur[, now]) Temporal aliases
184
+ adaptFalsePositive(query, kind) kind ∈ {'point','range','spatial','temporal'}
185
+ mergeFrom(other, operator) 'union'
186
+ serialize() / static deserialize(buffer)
187
+ ```
188
+
189
+ Spatial shape insertion: `insertSpatialBox(min, max)` / `insertSpatialCircle(center, radius)`.
190
+
191
+ Temporal adaptation: `adaptTemporalFalsePositiveWithinLast/Ago/BetweenAges`.
192
+
193
+ ## SERIALIZATION
194
+
195
+ All filters implement `serialize()` → `ArrayBuffer` and `static deserialize(buffer[, options])`. Every buffer is magic-number-prefixed and carries a 4-byte IEEE 802.3 CRC32 trailing checksum. Corrupted buffers throw on deserialization.
196
+
197
+ Low-level `BinaryWriter`/`BinaryReader` primitives and `wrapCRC32`/`unwrapCRC32` are available via `probe-filters/serialization`.
198
+
199
+ ## PERFORMANCE
200
+
201
+ Mean per-operation latency from `npm run benchmark` (Node.js, 5000-element point/range, 2500 spatial, 4000 temporal):
202
+
203
+ | Operation | Mean | p50 | p95 |
204
+ |-----------|------|-----|-----|
205
+ | PointFilter query (hit) | 10.9 μs | 1.5 μs | 50.4 μs |
206
+ | PointFilter query (miss) | 7.0 μs | 6.3 μs | 11.4 μs |
207
+ | RangeFilter queryRange (hit) | 5.8 μs | 5.2 μs | 10.0 μs |
208
+ | RangeFilter queryRange (miss) | 4.1 μs | 3.7 μs | 5.9 μs |
209
+ | SpatialFilter queryBox | 9.0 μs | 6.1 μs | 16.4 μs |
210
+ | TemporalFilter queryWithinLast | 43.6 μs | 41.7 μs | 62.0 μs |
211
+ | PointFilter serialize | 1.0 ms | 0.4 ms | 2.8 ms |
212
+ | PointFilter deserialize | 1.2 ms | 0.8 ms | 3.4 ms |
213
+ | RangeFilter serialize | 1.0 ms | 0.8 ms | 2.0 ms |
214
+ | RangeFilter deserialize | 5.3 ms | 4.2 ms | 9.7 ms |
215
+ | SpatialFilter serialize | 1.0 ms | 0.6 ms | 1.7 ms |
216
+ | SpatialFilter deserialize | 4.2 ms | 3.0 ms | 10.4 ms |
217
+ | TemporalFilter serialize | 3.2 ms | 2.7 ms | 6.2 ms |
218
+ | TemporalFilter deserialize | 5.5 ms | 4.4 ms | 12.7 ms |
219
+ | MultiFilter serialize | 6.9 ms | 4.6 ms | 14.8 ms |
220
+ | MultiFilter deserialize | 12.9 ms | 10.4 ms | 20.3 ms |
221
+ | mergeFrom UNION (2×2000) | 32.9 ms | 30.6 ms | 45.9 ms |
222
+
223
+ Serialized buffer includes 4-byte CRC32 integrity check. All operations are batch-free, single-key, directly comparable across filter types.
224
+
225
+ ## EXAMPLES
226
+
227
+ ### LSM-tree SSTable filter
228
+
229
+ ```js
230
+ const sstableFilter = new MultiFilter({
231
+ pointOptions: { initialCapacity: 1_000_000, fingerprintSize: 14 },
232
+ rangeOptions: { partitionSize: 256, fingerprintBits: 10 },
233
+ });
234
+ sstable.set('row:42');
235
+ sstable.setRangeKey(42);
236
+ sstable.has('row:42'); // true — skip disk read
237
+ sstable.hasRange(40, 50); // true — interval covered
238
+ ```
239
+
240
+ ### Distributed cache summary exchange
241
+
242
+ ```js
243
+ const local = new MultiFilter({ pointOptions: { fingerprintSize: 12 } });
244
+ local.set('key-a'); local.set('key-b');
245
+
246
+ const remote = new MultiFilter({ pointOptions: { fingerprintSize: 12 } });
247
+ remote.set('key-c');
248
+
249
+ // Merge remote summary into local
250
+ local.mergeFrom(remote, 'union');
251
+ local.has('key-c'); // true
252
+
253
+ // Transmit over wire
254
+ const wire = local.serialize();
255
+ // ... send wire ...
256
+ const peer = MultiFilter.deserialize(wire);
257
+ ```
258
+
259
+ ### Spatial pre-filter for geometry queries
260
+
261
+ ```js
262
+ const spatial = new SpatialFilter({
263
+ coordinateSystem: 'latlon', bitsPerCoordinate: 16,
264
+ });
265
+
266
+ spatial.insert([34.0522, -118.2437]); // Los Angeles
267
+ spatial.insert([40.7128, -74.0060]); // New York
268
+
269
+ spatial.queryBox([33.0, -120.0], [35.0, -117.0]); // true — LA in box
270
+ spatial.adaptFalsePositiveBox([37.0, -123.0], [38.0, -121.0]); // fix San Francisco FP
271
+ ```
272
+
273
+ ### Event recency tracking
274
+
275
+ ```js
276
+ const temporal = new TemporalFilter({
277
+ bucketDurationMs: 10_000, // 10s buckets
278
+ retentionDurationMs: 3_600_000, // 1 hour retention
279
+ });
280
+
281
+ temporal.insertAt('sensor:42', Date.now());
282
+ temporal.insertAt('sensor:42', Date.now() - 45_000);
283
+
284
+ temporal.queryWithinLast('sensor:42', 60_000); // true — seen in last minute
285
+ temporal.queryBetweenAges('sensor:42', 30_000, 90_000); // true — seen 30-90s ago
286
+ ```
287
+
288
+ ## SEE ALSO
289
+
290
+ **probe-maplets** — key-value maplet extensions providing value aggregation on top of probe-filters filters.
291
+
292
+ Papers:
293
+ - Yuvaraj Chesetti, Navid Eslami, Huanchen Zhang, Niv Dayan, and Prashant Pandey. 2026. *Aeris Filter: A Strongly and Monotonically Adaptive Range Filter*. Proc. ACM Manag. Data 4, 1 (SIGMOD), Article 7. https://doi.org/10.1145/3786621 — RangeFilter and SpatialFilter keepsake-box engine
294
+ - Hyuhng Min Kim, Navid Eslami, and Niv Dayan. 2026. *Zeno Filter: To Infinity in Tiny Steps*. Proc. ACM Manag. Data 4, 3 (SIGMOD), Article 251. https://doi.org/10.1145/3802128 — PointFilter fractional-growth expansion policy
295
+ - *Aleph Filter: A Dynamically Expanding Quotient Filter* (SIGMOD 2025) — PointFilter quotient-filter engine
296
+ - *Time To Replace Your Filter: How Maplets Simplify System Design* (arXiv:2510.05518) — companion key-value layer
297
+
298
+ *Liberated from Tenere Labs Monolith, May 2026.*
package/package.json ADDED
@@ -0,0 +1,50 @@
1
+ {
2
+ "name": "probe-filters",
3
+ "version": "1.0.0",
4
+ "description": "Dynamic approximate membership filters for point, range, spatial, and temporal queries (Aleph, Aeris, Zeno engines)",
5
+ "main": "src/index.js",
6
+ "type": "module",
7
+ "exports": {
8
+ ".": "./src/index.js",
9
+ "./serialization": "./src/serialization.js"
10
+ },
11
+ "files": [
12
+ "src/",
13
+ "README.md",
14
+ "LICENSE"
15
+ ],
16
+ "repository": {
17
+ "type": "git",
18
+ "url": "git+https://github.com/JDvorak/probe-filters.git"
19
+ },
20
+ "scripts": {
21
+ "test": "npm run test:contract && npm run test:property && npm run test:e2e",
22
+ "test:contract": "node --experimental-vm-modules node_modules/.bin/tape tests/contract.test.js",
23
+ "test:property": "node --experimental-vm-modules node_modules/.bin/tape tests/property.test.js",
24
+ "test:e2e": "node --experimental-vm-modules node_modules/.bin/tape tests/e2e.test.js",
25
+ "benchmark": "node tests/benchmark.js"
26
+ },
27
+ "keywords": [
28
+ "filter",
29
+ "quotient-filter",
30
+ "infini-filter",
31
+ "aleph-filter",
32
+ "aeris-filter",
33
+ "zeno-filter",
34
+ "range-filter",
35
+ "spatial-filter",
36
+ "temporal-filter",
37
+ "adaptive-filter",
38
+ "bloom-filter",
39
+ "probabilistic-data-structure"
40
+ ],
41
+ "author": "",
42
+ "license": "MIT",
43
+ "devDependencies": {
44
+ "fast-check": "^3.0.0",
45
+ "tape": "^5.0.0"
46
+ },
47
+ "engines": {
48
+ "node": ">=14.0.0"
49
+ }
50
+ }
package/src/index.js ADDED
@@ -0,0 +1,5 @@
1
+ export { PointFilter } from './pointFilter.js';
2
+ export { RangeFilter } from './rangeFilter.js';
3
+ export { SpatialFilter } from './spatialFilter.js';
4
+ export { TemporalFilter } from './temporalFilter.js';
5
+ export { MultiFilter } from './multiFilter.js';
@@ -0,0 +1,16 @@
1
+ const FILTER_MERGE_OPERATORS = {
2
+ UNION: 'UNION',
3
+ };
4
+
5
+ function normalizeFilterMergeOperator(operator = FILTER_MERGE_OPERATORS.UNION) {
6
+ const normalized = String(operator).toUpperCase();
7
+ if (!Object.prototype.hasOwnProperty.call(FILTER_MERGE_OPERATORS, normalized)) {
8
+ throw new Error(`Unsupported filter merge operator: ${operator}`);
9
+ }
10
+ return normalized;
11
+ }
12
+
13
+ export {
14
+ FILTER_MERGE_OPERATORS,
15
+ normalizeFilterMergeOperator,
16
+ };