probe-filters 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,298 @@
1
+ import { djb2 } from './pointFilter.js';
2
+ import { BinaryWriter, BinaryReader, MAGIC, wrapCRC32, unwrapCRC32 } from './serialization.js';
3
+
4
+ function nextPowerOfTwo(value) {
5
+ if (value <= 1) return 1;
6
+ return 2 ** Math.ceil(Math.log2(value));
7
+ }
8
+
9
+ function normalizeKey(key) {
10
+ if (!Number.isInteger(key) || key < 0) {
11
+ throw new Error('RangeFilter keys must be non-negative integers.');
12
+ }
13
+ return key >>> 0;
14
+ }
15
+
16
+ export class RangeFilter {
17
+ constructor(options = {}) {
18
+ const maxRangeLength = options.maxRangeLength ?? 1024;
19
+ this.partitionSize = options.partitionSize ?? nextPowerOfTwo(maxRangeLength);
20
+ this.partitionBits = Math.log2(this.partitionSize);
21
+ this.fingerprintBits = options.fingerprintBits ?? 12;
22
+ this.maxFingerprintBits = options.maxFingerprintBits ?? 24;
23
+ this.hashFunction = options.hashFunction ?? djb2;
24
+ this.expansionLevel = 0;
25
+ this.partitions = new Map();
26
+ this.boxes = new Map();
27
+ this.partitionToBoxKeys = new Map();
28
+ }
29
+
30
+ _partitionOf(key) {
31
+ return Math.floor(key / this.partitionSize) >>> 0;
32
+ }
33
+
34
+ _partitionHash(partition) {
35
+ return this.hashFunction(String(partition), 0) >>> 0;
36
+ }
37
+
38
+ _fingerprint(partition, length) {
39
+ if (length <= 0) return 0;
40
+ const hash = this._partitionHash(partition);
41
+ return hash % (2 ** length);
42
+ }
43
+
44
+ _boxKey(length, fingerprint) {
45
+ return `${length}:${fingerprint}`;
46
+ }
47
+
48
+ _getOrCreateBox(length, fingerprint) {
49
+ const key = this._boxKey(length, fingerprint);
50
+ let box = this.boxes.get(key);
51
+ if (!box) {
52
+ box = { length, fingerprint, partitions: new Map(), extensionBits: Math.max(0, length - this.fingerprintBits), age: 0 };
53
+ this.boxes.set(key, box);
54
+ }
55
+ return box;
56
+ }
57
+
58
+ _removePartitionFromBox(entry) {
59
+ const key = this._boxKey(entry.length, entry.fingerprint);
60
+ const box = this.boxes.get(key);
61
+ if (!box) return;
62
+
63
+ box.partitions.delete(entry.partition);
64
+ if (box.partitions.size === 0) {
65
+ this.boxes.delete(key);
66
+ }
67
+
68
+ const boxKeys = this.partitionToBoxKeys.get(entry.partition);
69
+ if (boxKeys) {
70
+ boxKeys.delete(key);
71
+ if (boxKeys.size === 0) this.partitionToBoxKeys.delete(entry.partition);
72
+ }
73
+ }
74
+
75
+ _placePartition(entry) {
76
+ entry.fingerprint = this._fingerprint(entry.partition, entry.length);
77
+ const box = this._getOrCreateBox(entry.length, entry.fingerprint);
78
+ box.partitions.set(entry.partition, entry);
79
+ const boxKey = this._boxKey(entry.length, entry.fingerprint);
80
+ let boxKeys = this.partitionToBoxKeys.get(entry.partition);
81
+ if (!boxKeys) {
82
+ boxKeys = new Set();
83
+ this.partitionToBoxKeys.set(entry.partition, boxKeys);
84
+ }
85
+ boxKeys.add(boxKey);
86
+ }
87
+
88
+ insert(key) {
89
+ const normalized = normalizeKey(key);
90
+ const partition = this._partitionOf(normalized);
91
+
92
+ let entry = this.partitions.get(partition);
93
+ if (!entry) {
94
+ entry = {
95
+ partition,
96
+ length: this.fingerprintBits,
97
+ fingerprint: this._fingerprint(partition, this.fingerprintBits),
98
+ falsePositiveCount: 0,
99
+ age: 0,
100
+ count: 1,
101
+ };
102
+ this.partitions.set(partition, entry);
103
+ this._placePartition(entry);
104
+ } else {
105
+ entry.count++;
106
+ }
107
+ }
108
+
109
+ delete(key) {
110
+ const normalized = normalizeKey(key);
111
+ const partition = this._partitionOf(normalized);
112
+
113
+ const entry = this.partitions.get(partition);
114
+ if (!entry) return false;
115
+
116
+ entry.count--;
117
+ if (entry.count <= 0) {
118
+ this._removePartitionFromBox(entry);
119
+ this.partitions.delete(partition);
120
+ }
121
+ return true;
122
+ }
123
+
124
+ _subqueries(start, end) {
125
+ const normalizedStart = normalizeKey(start);
126
+ const normalizedEnd = normalizeKey(end);
127
+ if (normalizedStart > normalizedEnd) {
128
+ throw new Error('Range start must be <= range end.');
129
+ }
130
+
131
+ const startPartition = this._partitionOf(normalizedStart);
132
+ const endPartition = this._partitionOf(normalizedEnd);
133
+ const subqueries = [];
134
+
135
+ for (let partition = startPartition; partition <= endPartition; partition++) {
136
+ subqueries.push({ partition });
137
+ }
138
+
139
+ return subqueries;
140
+ }
141
+
142
+ _matchingBoxes(partition) {
143
+ const matches = [];
144
+ const hash = this._partitionHash(partition);
145
+ for (let length = 1; length <= this.maxFingerprintBits; length++) {
146
+ const fingerprint = hash % (2 ** length);
147
+ const box = this.boxes.get(this._boxKey(length, fingerprint));
148
+ if (box) matches.push(box);
149
+ }
150
+ return matches;
151
+ }
152
+
153
+ queryRange(start, end) {
154
+ for (const subquery of this._subqueries(start, end)) {
155
+ if (this._matchingBoxes(subquery.partition).length > 0) {
156
+ return true;
157
+ }
158
+ }
159
+ return false;
160
+ }
161
+
162
+ _splitBoxAwayFromQuery(box, queryPartition) {
163
+ let newLength = box.length + 1;
164
+ const entries = Array.from(box.partitions.values());
165
+
166
+ while (newLength <= this.maxFingerprintBits) {
167
+ const queryFingerprint = this._fingerprint(queryPartition, newLength);
168
+ const stillCollides = entries.some(entry => entry.partition !== queryPartition && this._fingerprint(entry.partition, newLength) === queryFingerprint);
169
+ if (!stillCollides) break;
170
+ newLength++;
171
+ }
172
+
173
+ if (newLength > this.maxFingerprintBits) {
174
+ return false;
175
+ }
176
+
177
+ const oldKey = this._boxKey(box.length, box.fingerprint);
178
+ this.boxes.delete(oldKey);
179
+ for (const entry of entries) {
180
+ const boxKeys = this.partitionToBoxKeys.get(entry.partition);
181
+ if (boxKeys) {
182
+ boxKeys.delete(oldKey);
183
+ if (boxKeys.size === 0) this.partitionToBoxKeys.delete(entry.partition);
184
+ }
185
+ }
186
+
187
+ for (const entry of entries) {
188
+ entry.length = Math.max(entry.length, newLength);
189
+ entry.falsePositiveCount++;
190
+ entry.age = 0;
191
+ this._placePartition(entry);
192
+ }
193
+
194
+ return true;
195
+ }
196
+
197
+ adaptFalsePositive(start, end) {
198
+ let adapted = false;
199
+
200
+ for (const subquery of this._subqueries(start, end)) {
201
+ for (const box of this._matchingBoxes(subquery.partition)) {
202
+ if (box.partitions.has(subquery.partition)) continue;
203
+ adapted = this._splitBoxAwayFromQuery(box, subquery.partition) || adapted;
204
+ }
205
+ }
206
+
207
+ return adapted;
208
+ }
209
+
210
+ expand() {
211
+ this.expansionLevel++;
212
+ this.boxes.clear();
213
+ this.partitionToBoxKeys.clear();
214
+
215
+ for (const entry of this.partitions.values()) {
216
+ entry.age++;
217
+
218
+ if (entry.length > 1) {
219
+ entry.length--;
220
+ } else {
221
+ entry.length = this.fingerprintBits;
222
+ entry.age = 0;
223
+ }
224
+
225
+ this._placePartition(entry);
226
+ }
227
+ }
228
+
229
+ getStats() {
230
+ let maxAge = 0;
231
+ let maxLength = 0;
232
+ for (const entry of this.partitions.values()) {
233
+ maxAge = Math.max(maxAge, entry.age);
234
+ maxLength = Math.max(maxLength, entry.length);
235
+ }
236
+
237
+ return {
238
+ partitions: this.partitions.size,
239
+ keepsakeBoxes: this.boxes.size,
240
+ expansionLevel: this.expansionLevel,
241
+ maxAge,
242
+ maxLength,
243
+ };
244
+ }
245
+
246
+ serialize() {
247
+ const writer = new BinaryWriter();
248
+ writer.uint32(MAGIC.RANGE);
249
+ writer.uint32(this.partitionSize);
250
+ writer.uint32(this.fingerprintBits);
251
+ writer.uint32(this.maxFingerprintBits);
252
+ writer.uint32(this.expansionLevel);
253
+
254
+ const partitionEntries = Array.from(this.partitions.entries());
255
+ writer.uint32(partitionEntries.length);
256
+ for (const [key, entry] of partitionEntries) {
257
+ writer.uint32(key);
258
+ writer.uint8(entry.length ?? 0);
259
+ writer.uint32(entry.count ?? 1);
260
+ writer.uint32(entry.falsePositiveCount ?? 0);
261
+ writer.uint32(entry.age ?? 0);
262
+ }
263
+
264
+ return wrapCRC32(writer.toArrayBuffer());
265
+ }
266
+
267
+ static deserialize(buffer, options = {}) {
268
+ buffer = unwrapCRC32(buffer);
269
+ const reader = new BinaryReader(buffer);
270
+ if (reader.uint32() !== MAGIC.RANGE) throw new Error('Not a RangeFilter binary.');
271
+
272
+ const filter = new RangeFilter({
273
+ partitionSize: reader.uint32(),
274
+ fingerprintBits: reader.uint32(),
275
+ maxFingerprintBits: reader.uint32(),
276
+ ...options,
277
+ });
278
+ filter.expansionLevel = reader.uint32();
279
+
280
+ const partitionCount = reader.uint32();
281
+ for (let i = 0; i < partitionCount; i++) {
282
+ const key = reader.uint32();
283
+ const entry = {
284
+ partition: key,
285
+ fingerprint: 0,
286
+ length: reader.uint8(),
287
+ count: reader.uint32(),
288
+ falsePositiveCount: reader.uint32(),
289
+ age: 0,
290
+ };
291
+ entry.age = reader.uint32();
292
+ filter.partitions.set(key, entry);
293
+ filter._placePartition(entry);
294
+ }
295
+
296
+ return filter;
297
+ }
298
+ }
@@ -0,0 +1,214 @@
1
+ /**
2
+ * Serialization primitives for probe-filters and probe-maplets.
3
+ *
4
+ * Wire format for every filter/maplet:
5
+ * [4-byte MAGIC] [payload ...] [4-byte IEEE 802.3 CRC32]
6
+ *
7
+ * PointFilter payload:
8
+ * capacity(u32) elements(u32) slotAddrBits(u32) fpSize(u32) curFpLen(u32)
9
+ * deletionQ: count(u32) [motherHash(u32)...]
10
+ * rejuvenQ: count(u32) [motherHash(u32)...]
11
+ * liveSlots: count(u32) indexWidth(u8) // indexWidth = 2 if capacity≤64K else 4
12
+ * [index(u16|u32) meta(u8) motherHash(u32)...]
13
+ * meta = (state << 6) | fingerprintLength // state = 2 bits, fpLen = 6 bits
14
+ * fingerprints[] and canonicalSlots[] are reconstructed from motherHash on load:
15
+ * canonicalSlot = motherHash & (capacity - 1)
16
+ * fingerprint = (motherHash >>> slotAddrBits) & mask[fpLen]
17
+ * RSQF packed metadata (occupied, shifted, continuation, runends, runCounts,
18
+ * blockOffsets) is rebuilt via _rebuildPackedMetadata() on deserialize.
19
+ *
20
+ * RangeFilter payload:
21
+ * partitionSize(u32) fpBits(u32) maxFpBits(u32) expansionLevel(u32)
22
+ * partitions: count(u32) [key(u32) length(u8) count(u32) age(u32)...]
23
+ * fingerprint is recomputed from partition hash; keepsake boxes and
24
+ * partitionToBoxKeys reverse map are rebuilt via _placePartition() on load.
25
+ *
26
+ * SpatialFilter payload:
27
+ * bitsPerCoordinate(u32) [RangeFilter binary (wrapped bytes)...]
28
+ *
29
+ * TemporalFilter payload:
30
+ * bucketDurationMs(f64) retentionDurationMs(f64) maxBuckets(u32)
31
+ * buckets: count(u32) [bucketId(u32) [PointFilter binary (wrapped bytes)]...]
32
+ *
33
+ * MultiFilter payload:
34
+ * parts: count(u32) [typeLen(u32) type(utf8) [sub-filter binary (bytes)]...]
35
+ *
36
+ * Maplet wire formats follow the same pattern with [MAGIC] [filter binary (bytes)]
37
+ * [domainType(string)] [domain state] [values: key → encoded state].
38
+ */
39
+
40
+ const MAGIC = {
41
+ POINT: 0x41505446, // 'APTF' — Aleph Point Filter (PointFilter)
42
+ RANGE: 0x52414E47, // 'RANG' — RangeFilter
43
+ SPATIAL: 0x5350544C, // 'SPTL' — SpatialFilter
44
+ TEMPORAL: 0x54454D50, // 'TEMP' — TemporalFilter
45
+ MULTI: 0x4D554C54, // 'MULT' — MultiFilter
46
+ MAPLET: 0x4D41504C, // 'MAPL' — MultiMaplet
47
+ MAPLET_POINT: 0x4D504E54, // 'MPNT' — PointMaplet
48
+ MAPLET_RANGE: 0x4D524E47, // 'MRNG' — RangeMaplet
49
+ MAPLET_SPATIAL: 0x4D53504C, // 'MSPL' — SpatialMaplet
50
+ MAPLET_TEMPORAL: 0x4D544D50, // 'MTMP' — TemporalMaplet
51
+ MAPLET_MULTI: 0x4D4D4C54, // 'MMLT' — MultiMaplet
52
+ LIST: 0x4C495354, // 'LIST' — type-tagged list
53
+ };
54
+
55
+ export { MAGIC };
56
+
57
+ export class BinaryWriter {
58
+ constructor(initialCapacity = 1024) {
59
+ this.buffer = new ArrayBuffer(initialCapacity);
60
+ this.dataView = new DataView(this.buffer);
61
+ this.offset = 0;
62
+ }
63
+
64
+ _grow(needed) {
65
+ const required = this.offset + needed;
66
+ if (required <= this.buffer.byteLength) return;
67
+ let newSize = this.buffer.byteLength;
68
+ while (newSize < required) newSize *= 2;
69
+ const newBuffer = new ArrayBuffer(newSize);
70
+ new Uint8Array(newBuffer).set(new Uint8Array(this.buffer, 0, this.offset));
71
+ this.buffer = newBuffer;
72
+ this.dataView = new DataView(this.buffer);
73
+ }
74
+
75
+ uint8(value) {
76
+ this._grow(1);
77
+ this.dataView.setUint8(this.offset, value >>> 0);
78
+ this.offset += 1;
79
+ return this;
80
+ }
81
+
82
+ uint16(value) {
83
+ this._grow(2);
84
+ this.dataView.setUint16(this.offset, value >>> 0, true);
85
+ this.offset += 2;
86
+ return this;
87
+ }
88
+
89
+ uint32(value) {
90
+ this._grow(4);
91
+ this.dataView.setUint32(this.offset, value >>> 0, true);
92
+ this.offset += 4;
93
+ return this;
94
+ }
95
+
96
+ float64(value) {
97
+ this._grow(8);
98
+ this.dataView.setFloat64(this.offset, value, true);
99
+ this.offset += 8;
100
+ return this;
101
+ }
102
+
103
+ bytes(array) {
104
+ const len = Array.isArray(array) ? array.length : array.byteLength ?? array.length;
105
+ this.uint32(len);
106
+ this._grow(len);
107
+ if (array instanceof Uint8Array) {
108
+ new Uint8Array(this.buffer, this.offset, len).set(array);
109
+ } else {
110
+ const view = new Uint8Array(array.buffer ?? array, array.byteOffset ?? 0, len);
111
+ new Uint8Array(this.buffer, this.offset, len).set(view);
112
+ }
113
+ this.offset += len;
114
+ return this;
115
+ }
116
+
117
+ string(str) {
118
+ const encoded = new TextEncoder().encode(str);
119
+ this.uint32(encoded.length);
120
+ this._grow(encoded.length);
121
+ new Uint8Array(this.buffer, this.offset, encoded.length).set(encoded);
122
+ this.offset += encoded.length;
123
+ return this;
124
+ }
125
+
126
+ toArrayBuffer() {
127
+ return this.buffer.slice(0, this.offset);
128
+ }
129
+ }
130
+
131
+ export class BinaryReader {
132
+ constructor(buffer) {
133
+ this.buffer = buffer instanceof ArrayBuffer ? buffer : buffer.buffer;
134
+ this.dataView = new DataView(this.buffer);
135
+ this.offset = 0;
136
+ }
137
+
138
+ uint8() {
139
+ const value = this.dataView.getUint8(this.offset);
140
+ this.offset += 1;
141
+ return value;
142
+ }
143
+
144
+ uint16() {
145
+ const value = this.dataView.getUint16(this.offset, true);
146
+ this.offset += 2;
147
+ return value;
148
+ }
149
+
150
+ uint32() {
151
+ const value = this.dataView.getUint32(this.offset, true);
152
+ this.offset += 4;
153
+ return value;
154
+ }
155
+
156
+ float64() {
157
+ const value = this.dataView.getFloat64(this.offset, true);
158
+ this.offset += 8;
159
+ return value;
160
+ }
161
+
162
+ string() {
163
+ const len = this.uint32();
164
+ const bytes = new Uint8Array(this.buffer, this.offset, len);
165
+ this.offset += len;
166
+ return new TextDecoder().decode(bytes);
167
+ }
168
+
169
+ bytes() {
170
+ const len = this.uint32();
171
+ const aligned = new ArrayBuffer(len);
172
+ new Uint8Array(aligned).set(new Uint8Array(this.buffer, this.offset, len));
173
+ this.offset += len;
174
+ return new Uint8Array(aligned);
175
+ }
176
+
177
+ done() {
178
+ return this.offset >= this.buffer.byteLength;
179
+ }
180
+ }
181
+
182
+ const CRC32_TABLE = (() => {
183
+ const table = new Int32Array(256);
184
+ for (let i = 0; i < 256; i++) {
185
+ let crc = i;
186
+ for (let j = 0; j < 8; j++) crc = crc & 1 ? (crc >>> 1) ^ 0xEDB88320 : crc >>> 1;
187
+ table[i] = crc;
188
+ }
189
+ return table;
190
+ })();
191
+
192
+ function crc32(data) {
193
+ let crc = 0xFFFFFFFF;
194
+ for (let i = 0; i < data.length; i++) crc = (crc >>> 8) ^ CRC32_TABLE[(crc ^ data[i]) & 0xFF];
195
+ return (crc ^ 0xFFFFFFFF) >>> 0;
196
+ }
197
+
198
+ export function wrapCRC32(buffer) {
199
+ const data = new Uint8Array(buffer);
200
+ const checksum = crc32(data);
201
+ const out = new Uint8Array(data.length + 4);
202
+ out.set(data);
203
+ new DataView(out.buffer).setUint32(data.length, checksum, true);
204
+ return out.buffer;
205
+ }
206
+
207
+ export function unwrapCRC32(buffer) {
208
+ const bytes = buffer instanceof Uint8Array ? new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength) : new Uint8Array(buffer);
209
+ if (bytes.length < 4) throw new Error('Buffer too small for CRC32');
210
+ const payload = bytes.subarray(0, bytes.length - 4);
211
+ const expected = new DataView(bytes.buffer, bytes.byteOffset + bytes.length - 4, 4).getUint32(0, true);
212
+ if (crc32(payload) !== expected) throw new Error('CRC32 mismatch: data may be corrupted');
213
+ return payload.buffer.slice(payload.byteOffset, payload.byteOffset + payload.byteLength);
214
+ }