probe-filters 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,871 @@
1
+ /**
2
+ * PointFilter: An infinitely expandable filter with constant time operations.
3
+ * Based on the paper "Aleph Filter: To Infinity in Constant Time" by Dayan, Bercea, Pagh.
4
+ * PVLDB, 17(11): 3644 - 3656, 2024. doi:10.14778/3681954.3682027
5
+ */
6
+
7
+ import { PackedBitset } from './packedBitset.js';
8
+ import { BinaryWriter, BinaryReader, MAGIC, wrapCRC32, unwrapCRC32 } from './serialization.js';
9
+
10
+ const EMPTY = 0;
11
+ const OCCUPIED = 1;
12
+ const VOID = 2;
13
+ const TOMBSTONE = 3;
14
+ const NO_CANONICAL = 0xffffffff;
15
+
16
+ export function djb2(key, seed = 0) {
17
+ let h = (5381 + seed) | 0;
18
+ const l = key.length;
19
+ for (let i = 0; i < l; i++) h = ((h << 5) + h + key.charCodeAt(i)) | 0;
20
+ h ^= h >>> 16;
21
+ h = Math.imul(h, 0x85ebca6b);
22
+ h ^= h >>> 13;
23
+ h = Math.imul(h, 0xc2b2ae35);
24
+ h ^= h >>> 16;
25
+ return h >>> 0;
26
+ }
27
+
28
+ export class PointFilter {
29
+ constructor(options = {}) {
30
+ this.options = {
31
+ initialCapacity: 256,
32
+ fingerprintSize: 12,
33
+ estimatedMaxNFactor: null,
34
+ hashFunction: djb2,
35
+ expansionThreshold: 0.9,
36
+ ...options,
37
+ };
38
+
39
+ if (Math.log2(this.options.initialCapacity) % 1 !== 0) {
40
+ throw new Error("initialCapacity must be a power of 2.");
41
+ }
42
+ if (this.options.initialCapacity <= 0 || this.options.initialCapacity > 0x40000000) {
43
+ throw new Error("initialCapacity must fit in a 32-bit typed-array address space.");
44
+ }
45
+ if (this.options.fingerprintSize <= 0) {
46
+ throw new Error("fingerprintSize must be positive.");
47
+ }
48
+
49
+ const initialAddressBits = Math.log2(this.options.initialCapacity);
50
+ if (initialAddressBits + this.options.fingerprintSize > 32) {
51
+ throw new Error("initialCapacity and fingerprintSize must fit in one 32-bit hash.");
52
+ }
53
+
54
+ this.currentCapacity = this.options.initialCapacity;
55
+ this.numElements = 0;
56
+ this.currentExpansionLevel = 0;
57
+ this.bitsForSlotAddress = initialAddressBits;
58
+ this.currentFingerprintLengthForNewEntries = this.options.fingerprintSize;
59
+ this.fingerprintMasks = new Uint32Array(33);
60
+ this.fingerprintMasks[0] = 0;
61
+ for (let bits = 1; bits <= 32; bits++) {
62
+ this.fingerprintMasks[bits] = bits === 32 ? 0xffffffff : ((2 ** bits) - 1) >>> 0;
63
+ }
64
+ this._scratchCanonicalSlots = null;
65
+ this._scratchFingerprints = null;
66
+ this._scratchStates = null;
67
+ this._scratchFingerprintLengths = null;
68
+ this._scratchMotherHashes = null;
69
+ this._scratchOrder = null;
70
+
71
+ this.secondaryHashTable = {
72
+ slots: [],
73
+ capacity: this.options.initialCapacity / 2,
74
+ numElements: 0,
75
+ };
76
+ this.auxiliaryHashTables = [];
77
+ this.deletionQueue = [];
78
+ this.rejuvenationQueue = [];
79
+
80
+ this._allocateStorage(this.currentCapacity);
81
+ }
82
+
83
+ _allocateStorage(capacity) {
84
+ this.fingerprints = new Uint32Array(capacity);
85
+ this.canonicalSlots = new Uint32Array(capacity);
86
+ this.canonicalSlots.fill(NO_CANONICAL);
87
+ this.states = new Uint8Array(capacity);
88
+ this.fingerprintLengths = new Uint8Array(capacity);
89
+ this.motherHashes = new Uint32Array(capacity);
90
+ this.occupied = new PackedBitset(capacity);
91
+ this.shifted = new PackedBitset(capacity);
92
+ this.continuation = new PackedBitset(capacity);
93
+ this.runends = new PackedBitset(capacity);
94
+ this.runCounts = new Uint32Array(capacity);
95
+ this.blockOffsets = new Uint8Array(Math.ceil(capacity / 64));
96
+
97
+ this.mainHashTable = {
98
+ fingerprints: this.fingerprints,
99
+ canonicalSlots: this.canonicalSlots,
100
+ states: this.states,
101
+ fingerprintLengths: this.fingerprintLengths,
102
+ motherHashes: this.motherHashes,
103
+ occupied: this.occupied,
104
+ shifted: this.shifted,
105
+ continuation: this.continuation,
106
+ runends: this.runends,
107
+ runCounts: this.runCounts,
108
+ blockOffsets: this.blockOffsets,
109
+ capacity,
110
+ };
111
+ }
112
+
113
+ _isLiveEntry(index) {
114
+ return this.states[index] !== EMPTY && this.states[index] !== TOMBSTONE;
115
+ }
116
+
117
+ _rebuildPackedMetadata() {
118
+ this.occupied.clear();
119
+ this.shifted.clear();
120
+ this.continuation.clear();
121
+ this.runends.clear();
122
+ this.runCounts.fill(0);
123
+ this.blockOffsets.fill(0);
124
+
125
+ for (let index = 0; index < this.currentCapacity; index++) {
126
+ if (!this._isLiveEntry(index)) continue;
127
+
128
+ const canonicalSlot = this.canonicalSlots[index];
129
+ if (canonicalSlot === NO_CANONICAL) continue;
130
+
131
+ this.occupied.set(canonicalSlot, 1);
132
+ this.runCounts[canonicalSlot]++;
133
+ this.shifted.set(index, canonicalSlot !== index);
134
+
135
+ const previous = (index - 1 + this.currentCapacity) % this.currentCapacity;
136
+ if (this._isLiveEntry(previous) && this.canonicalSlots[previous] === canonicalSlot) {
137
+ this.continuation.set(index, 1);
138
+ }
139
+
140
+ const next = (index + 1) % this.currentCapacity;
141
+ if (!this._isLiveEntry(next) || this.canonicalSlots[next] !== canonicalSlot) {
142
+ this.runends.set(index, 1);
143
+ }
144
+ }
145
+
146
+ for (let block = 0; block < this.blockOffsets.length; block++) {
147
+ const blockStart = block * 64;
148
+ let offset = 0;
149
+ while (blockStart + offset < this.currentCapacity && offset < 255 && this.shifted.get(blockStart + offset) === 1) {
150
+ offset++;
151
+ }
152
+ this.blockOffsets[block] = offset;
153
+ }
154
+ }
155
+
156
+ _ensureCompactionScratch(capacity) {
157
+ if (this._scratchCanonicalSlots && this._scratchCanonicalSlots.length >= capacity) {
158
+ return;
159
+ }
160
+
161
+ this._scratchCanonicalSlots = new Uint32Array(capacity);
162
+ this._scratchFingerprints = new Uint32Array(capacity);
163
+ this._scratchStates = new Uint8Array(capacity);
164
+ this._scratchFingerprintLengths = new Uint8Array(capacity);
165
+ this._scratchMotherHashes = new Uint32Array(capacity);
166
+ this._scratchOrder = new Uint32Array(capacity);
167
+ }
168
+
169
+ _collectLiveEntriesIntoScratch() {
170
+ this._ensureCompactionScratch(this.currentCapacity);
171
+
172
+ let count = 0;
173
+ for (let index = 0; index < this.currentCapacity; index++) {
174
+ if (!this._isLiveEntry(index)) continue;
175
+
176
+ this._scratchCanonicalSlots[count] = this.canonicalSlots[index];
177
+ this._scratchFingerprints[count] = this.fingerprints[index];
178
+ this._scratchStates[count] = this.states[index];
179
+ this._scratchFingerprintLengths[count] = this.fingerprintLengths[index];
180
+ this._scratchMotherHashes[count] = this.motherHashes[index];
181
+ this._scratchOrder[count] = count;
182
+ count++;
183
+ }
184
+
185
+ return count;
186
+ }
187
+
188
+ _clearStorage() {
189
+ this.fingerprints.fill(0);
190
+ this.canonicalSlots.fill(NO_CANONICAL);
191
+ this.states.fill(EMPTY);
192
+ this.fingerprintLengths.fill(0);
193
+ this.motherHashes.fill(0);
194
+ this.runCounts.fill(0);
195
+ }
196
+
197
+ _compactRuns() {
198
+ const count = this._collectLiveEntriesIntoScratch();
199
+ const order = this._scratchOrder.subarray(0, count);
200
+
201
+ order.sort((a, b) => {
202
+ const canonicalA = this._scratchCanonicalSlots[a];
203
+ const canonicalB = this._scratchCanonicalSlots[b];
204
+ if (canonicalA !== canonicalB) return canonicalA - canonicalB;
205
+
206
+ const stateA = this._scratchStates[a];
207
+ const stateB = this._scratchStates[b];
208
+ if (stateA !== stateB) return stateA - stateB;
209
+
210
+ const fingerprintA = this._scratchFingerprints[a];
211
+ const fingerprintB = this._scratchFingerprints[b];
212
+ if (fingerprintA !== fingerprintB) return fingerprintA - fingerprintB;
213
+
214
+ return this._scratchFingerprintLengths[b] - this._scratchFingerprintLengths[a];
215
+ });
216
+
217
+ this._clearStorage();
218
+
219
+ for (let orderIndex = 0; orderIndex < count; orderIndex++) {
220
+ const scratchIndex = order[orderIndex];
221
+ const canonicalSlot = this._scratchCanonicalSlots[scratchIndex];
222
+ let placed = false;
223
+ for (let distance = 0; distance < this.currentCapacity; distance++) {
224
+ const index = (canonicalSlot + distance) % this.currentCapacity;
225
+ if (this.states[index] === EMPTY) {
226
+ this._writeSlot(
227
+ index,
228
+ canonicalSlot,
229
+ this._scratchFingerprints[scratchIndex],
230
+ this._scratchStates[scratchIndex],
231
+ this._scratchFingerprintLengths[scratchIndex],
232
+ this._scratchMotherHashes[scratchIndex]
233
+ );
234
+ placed = true;
235
+ break;
236
+ }
237
+ }
238
+ if (!placed) {
239
+ throw new Error('Filter is full while compacting RSQF runs.');
240
+ }
241
+ }
242
+
243
+ this._rebuildPackedMetadata();
244
+ }
245
+
246
+ _runEndByRank(canonicalSlot) {
247
+ if (this.occupied.get(canonicalSlot) === 0) return -1;
248
+
249
+ let runStart = -1;
250
+ for (let distance = 0; distance < this.currentCapacity; distance++) {
251
+ const index = (canonicalSlot + distance) % this.currentCapacity;
252
+ if (this._isLiveEntry(index) && this.canonicalSlots[index] === canonicalSlot) {
253
+ runStart = index;
254
+ break;
255
+ }
256
+ }
257
+
258
+ if (runStart === -1) return -1;
259
+
260
+ let current = runStart;
261
+ for (let steps = 0; steps < this.currentCapacity; steps++) {
262
+ const next = (current + 1) % this.currentCapacity;
263
+ if (!this._isLiveEntry(next) || this.canonicalSlots[next] !== canonicalSlot) {
264
+ return current;
265
+ }
266
+ current = next;
267
+ }
268
+
269
+ return runStart;
270
+ }
271
+
272
+ _runStartByRank(canonicalSlot) {
273
+ if (this.occupied.get(canonicalSlot) === 0) return -1;
274
+
275
+ for (let distance = 0; distance < this.currentCapacity; distance++) {
276
+ const index = (canonicalSlot + distance) % this.currentCapacity;
277
+ if (this._isLiveEntry(index) && this.canonicalSlots[index] === canonicalSlot) {
278
+ return index;
279
+ }
280
+ }
281
+
282
+ return -1;
283
+ }
284
+
285
+ _forEachSlotInRun(canonicalSlot, visitor) {
286
+ const runStart = this._runStartByRank(canonicalSlot);
287
+ if (runStart === -1) return false;
288
+
289
+ let current = runStart;
290
+ for (let steps = 0; steps < this.currentCapacity; steps++) {
291
+ if (visitor(current) === true) return true;
292
+ if (this.runends.get(current) === 1) break;
293
+ current = (current + 1) % this.currentCapacity;
294
+ }
295
+
296
+ return false;
297
+ }
298
+
299
+ metadataBitsPerSlot() {
300
+ return 2.125;
301
+ }
302
+
303
+ debugMetadata(index) {
304
+ return {
305
+ occupied: this.occupied.get(index),
306
+ shifted: this.shifted.get(index),
307
+ continuation: this.continuation.get(index),
308
+ runend: this.runends.get(index),
309
+ blockOffset: this.blockOffsets[index >>> 6],
310
+ };
311
+ }
312
+
313
+ validatePackedInvariants() {
314
+ for (let index = 0; index < this.currentCapacity; index++) {
315
+ if (this._isLiveEntry(index) && this.canonicalSlots[index] !== NO_CANONICAL && this.occupied.get(this.canonicalSlots[index]) !== 1) {
316
+ return false;
317
+ }
318
+
319
+ if (this.runends.get(index) === 1 && !this._isLiveEntry(index)) {
320
+ return false;
321
+ }
322
+ }
323
+
324
+ for (let index = 0; index < this.currentCapacity; index++) {
325
+ if (this.occupied.get(index) === 0) continue;
326
+ const runStart = this._runStartByRank(index);
327
+ const runEnd = this._runEndByRank(index);
328
+ if (runStart === -1 || runEnd === -1) return false;
329
+
330
+ let foundEntry = false;
331
+ let current = runStart;
332
+ for (let steps = 0; steps < this.currentCapacity; steps++) {
333
+ if (!this._isLiveEntry(current)) return false;
334
+ if (this.canonicalSlots[current] === index) foundEntry = true;
335
+ if (current === runEnd) break;
336
+ current = (current + 1) % this.currentCapacity;
337
+ }
338
+ if (!foundEntry) return false;
339
+ }
340
+
341
+ return true;
342
+ }
343
+
344
+ _getMotherHash(element) {
345
+ return this.options.hashFunction(String(element), 0) >>> 0;
346
+ }
347
+
348
+ _getSlotAndFingerprint(motherHash, currentFingerprintBits, currentAddressBits) {
349
+ const addressSpace = 2 ** currentAddressBits;
350
+ const canonicalSlot = motherHash % addressSpace;
351
+ const shiftedHash = Math.floor(motherHash / addressSpace);
352
+ const fingerprintSpace = 2 ** currentFingerprintBits;
353
+ const fingerprint = shiftedHash % fingerprintSpace;
354
+
355
+ return { canonicalSlot, fingerprint };
356
+ }
357
+
358
+ _fingerprintMatches(index, queryFingerprint) {
359
+ const state = this.states[index];
360
+ if (state === VOID) return true;
361
+
362
+ const length = this.fingerprintLengths[index];
363
+ if (length === 0) return false;
364
+
365
+ const mask = this.fingerprintMasks[length];
366
+ return this.fingerprints[index] === (queryFingerprint & mask);
367
+ }
368
+
369
+ _findSlot(canonicalSlot, fingerprint, allowVoid = true) {
370
+ let found = -1;
371
+
372
+ this._forEachSlotInRun(canonicalSlot, (index) => {
373
+ const state = this.states[index];
374
+
375
+ if ((allowVoid && state === VOID) || this._fingerprintMatches(index, fingerprint)) {
376
+ found = index;
377
+ return true;
378
+ }
379
+
380
+ return false;
381
+ });
382
+
383
+ return { found, insertionPoint: found === -1 ? this._findReusableSlot(canonicalSlot) : -1 };
384
+ }
385
+
386
+ _findReusableSlot(canonicalSlot) {
387
+ for (let distance = 0; distance < this.currentCapacity; distance++) {
388
+ const index = (canonicalSlot + distance) % this.currentCapacity;
389
+ if (this.states[index] === EMPTY || this.states[index] === TOMBSTONE) {
390
+ return index;
391
+ }
392
+ }
393
+ return -1;
394
+ }
395
+
396
+ _setOccupiedFromCount(canonicalSlot) {
397
+ this.occupied.set(canonicalSlot, this.runCounts[canonicalSlot] > 0 ? 1 : 0);
398
+ }
399
+
400
+ _copySlot(fromIndex, toIndex) {
401
+ this.fingerprints[toIndex] = this.fingerprints[fromIndex];
402
+ this.canonicalSlots[toIndex] = this.canonicalSlots[fromIndex];
403
+ this.states[toIndex] = this.states[fromIndex];
404
+ this.fingerprintLengths[toIndex] = this.fingerprintLengths[fromIndex];
405
+ this.motherHashes[toIndex] = this.motherHashes[fromIndex];
406
+ }
407
+
408
+ _clearSlot(index) {
409
+ this.fingerprints[index] = 0;
410
+ this.canonicalSlots[index] = NO_CANONICAL;
411
+ this.states[index] = EMPTY;
412
+ this.fingerprintLengths[index] = 0;
413
+ this.motherHashes[index] = 0;
414
+ }
415
+
416
+ _findRunInsertIndex(canonicalSlot) {
417
+ if (this.occupied.get(canonicalSlot) === 1) {
418
+ const ownRunEnd = this._runEndByRank(canonicalSlot);
419
+ if (ownRunEnd !== -1) {
420
+ return (ownRunEnd + 1) % this.currentCapacity;
421
+ }
422
+ return canonicalSlot;
423
+ }
424
+
425
+ for (let distance = 1; distance <= this.currentCapacity; distance++) {
426
+ const previousCanonical = (canonicalSlot - distance + this.currentCapacity) % this.currentCapacity;
427
+ if (this.occupied.get(previousCanonical) === 0) continue;
428
+
429
+ const previousRunEnd = this._runEndByRank(previousCanonical);
430
+ if (previousRunEnd !== -1) {
431
+ return (previousRunEnd + 1) % this.currentCapacity;
432
+ }
433
+ }
434
+
435
+ return canonicalSlot;
436
+ }
437
+
438
+ _shiftRightForInsertion(targetIndex) {
439
+ const reusableIndex = this._findReusableSlot(targetIndex);
440
+ if (reusableIndex === -1) return -1;
441
+
442
+ let hole = reusableIndex;
443
+ while (hole !== targetIndex) {
444
+ const previous = (hole - 1 + this.currentCapacity) % this.currentCapacity;
445
+ this._copySlot(previous, hole);
446
+ hole = previous;
447
+ }
448
+
449
+ this._clearSlot(targetIndex);
450
+ return reusableIndex;
451
+ }
452
+
453
+ _shiftLeftAfterDeletion(deletedIndex) {
454
+ let hole = deletedIndex;
455
+ let next = (hole + 1) % this.currentCapacity;
456
+
457
+ while (this._isLiveEntry(next)) {
458
+ this._copySlot(next, hole);
459
+ hole = next;
460
+ next = (next + 1) % this.currentCapacity;
461
+ }
462
+
463
+ this._clearSlot(hole);
464
+ return hole;
465
+ }
466
+
467
+ _forEachIndexInWrappedRange(startIndex, endIndex, visitor) {
468
+ let current = startIndex;
469
+ for (let steps = 0; steps < this.currentCapacity; steps++) {
470
+ visitor(current);
471
+ if (current === endIndex) return;
472
+ current = (current + 1) % this.currentCapacity;
473
+ }
474
+ }
475
+
476
+ _recomputeMetadataIndex(index) {
477
+ if (!this._isLiveEntry(index)) {
478
+ this.shifted.set(index, 0);
479
+ this.continuation.set(index, 0);
480
+ this.runends.set(index, 0);
481
+ return;
482
+ }
483
+
484
+ const canonicalSlot = this.canonicalSlots[index];
485
+ this.shifted.set(index, canonicalSlot !== index ? 1 : 0);
486
+
487
+ const previous = (index - 1 + this.currentCapacity) % this.currentCapacity;
488
+ const continuation = this._isLiveEntry(previous) && this.canonicalSlots[previous] === canonicalSlot;
489
+ this.continuation.set(index, continuation ? 1 : 0);
490
+
491
+ const next = (index + 1) % this.currentCapacity;
492
+ const runEnd = !this._isLiveEntry(next) || this.canonicalSlots[next] !== canonicalSlot;
493
+ this.runends.set(index, runEnd ? 1 : 0);
494
+ }
495
+
496
+ _recomputeBlockOffset(block) {
497
+ const blockStart = block * 64;
498
+ let offset = 0;
499
+ while (blockStart + offset < this.currentCapacity && offset < 255 && this.shifted.get(blockStart + offset) === 1) {
500
+ offset++;
501
+ }
502
+ this.blockOffsets[block] = offset;
503
+ }
504
+
505
+ _recomputeMetadataWindow(startIndex, endIndex) {
506
+ this._forEachIndexInWrappedRange(startIndex, endIndex, (index) => {
507
+ this._recomputeMetadataIndex(index);
508
+ });
509
+
510
+ const startBlock = startIndex >>> 6;
511
+ const endBlock = endIndex >>> 6;
512
+ if (startBlock <= endBlock) {
513
+ for (let block = startBlock; block <= endBlock; block++) {
514
+ this._recomputeBlockOffset(block);
515
+ }
516
+ } else {
517
+ for (let block = startBlock; block < this.blockOffsets.length; block++) {
518
+ this._recomputeBlockOffset(block);
519
+ }
520
+ for (let block = 0; block <= endBlock; block++) {
521
+ this._recomputeBlockOffset(block);
522
+ }
523
+ }
524
+ }
525
+
526
+ _writeSlot(index, canonicalSlot, fingerprint, state, fingerprintLength, motherHash = 0) {
527
+ this.fingerprints[index] = fingerprint >>> 0;
528
+ this.canonicalSlots[index] = canonicalSlot >>> 0;
529
+ this.states[index] = state;
530
+ this.fingerprintLengths[index] = fingerprintLength;
531
+ this.motherHashes[index] = motherHash >>> 0;
532
+ }
533
+
534
+ _performInsertion(canonicalSlot, fingerprint, state = OCCUPIED, fingerprintLength = this.currentFingerprintLengthForNewEntries, motherHash = 0) {
535
+ const { found } = this._findLongestMatchingSlot(canonicalSlot, fingerprint, false, motherHash);
536
+ if (found !== -1 && this.motherHashes[found] === (motherHash >>> 0)) {
537
+ return false;
538
+ }
539
+
540
+ let insertionPoint = this._findRunInsertIndex(canonicalSlot);
541
+ let reusableIndex = this._shiftRightForInsertion(insertionPoint);
542
+
543
+ if (reusableIndex === -1) {
544
+ this.expand();
545
+ insertionPoint = this._findRunInsertIndex(canonicalSlot);
546
+ reusableIndex = this._shiftRightForInsertion(insertionPoint);
547
+ if (reusableIndex === -1) {
548
+ throw new Error("Filter is full after expansion.");
549
+ }
550
+ }
551
+
552
+ this._writeSlot(insertionPoint, canonicalSlot, fingerprint, state, fingerprintLength, motherHash);
553
+ this.runCounts[canonicalSlot]++;
554
+ this._setOccupiedFromCount(canonicalSlot);
555
+
556
+ const windowStart = (insertionPoint - 1 + this.currentCapacity) % this.currentCapacity;
557
+ const windowEnd = (reusableIndex + 1) % this.currentCapacity;
558
+ this._recomputeMetadataWindow(windowStart, windowEnd);
559
+
560
+ this.numElements++;
561
+ return true;
562
+ }
563
+
564
+ _performInsertionNoCompact(canonicalSlot, fingerprint, state = OCCUPIED, fingerprintLength = this.currentFingerprintLengthForNewEntries, motherHash = 0) {
565
+ const insertionPoint = this._findReusableSlot(canonicalSlot);
566
+ if (insertionPoint === -1) {
567
+ throw new Error("Filter is full while batching expansion insertions.");
568
+ }
569
+
570
+ this._writeSlot(insertionPoint, canonicalSlot, fingerprint, state, fingerprintLength, motherHash);
571
+ this.runCounts[canonicalSlot]++;
572
+ this._setOccupiedFromCount(canonicalSlot);
573
+ this.numElements++;
574
+ return true;
575
+ }
576
+
577
+ insert(element) {
578
+ if ((this.numElements + 1) / this.currentCapacity >= this.options.expansionThreshold) {
579
+ this.expand();
580
+ }
581
+
582
+ const motherHash = this._getMotherHash(element);
583
+ const fingerprintBits = Math.min(this.currentFingerprintLengthForNewEntries, 32 - this.bitsForSlotAddress);
584
+ if (fingerprintBits <= 0) {
585
+ throw new Error("Cannot insert with zero fingerprint bits under the 32-bit hash constraint.");
586
+ }
587
+
588
+ const addressSpace = 2 ** this.bitsForSlotAddress;
589
+ const canonicalSlot = motherHash % addressSpace;
590
+ const shiftedHash = Math.floor(motherHash / addressSpace);
591
+ const fingerprintSpace = 2 ** fingerprintBits;
592
+ const fingerprint = shiftedHash % fingerprintSpace;
593
+
594
+ this._performInsertion(canonicalSlot, fingerprint, OCCUPIED, fingerprintBits, motherHash);
595
+ }
596
+
597
+ query(element) {
598
+ const motherHash = this._getMotherHash(element);
599
+ const fingerprintBits = Math.min(this.options.fingerprintSize, 32 - this.bitsForSlotAddress);
600
+ if (fingerprintBits < 0) return false;
601
+ const addressSpace = 2 ** this.bitsForSlotAddress;
602
+ const canonicalSlot = motherHash % addressSpace;
603
+ const shiftedHash = Math.floor(motherHash / addressSpace);
604
+ const fingerprintSpace = 2 ** fingerprintBits;
605
+ const fingerprint = shiftedHash % fingerprintSpace;
606
+ return this._queryWithParams(canonicalSlot, fingerprint);
607
+ }
608
+
609
+ _queryWithParams(canonicalSlot, fingerprint) {
610
+ return this._findSlot(canonicalSlot, fingerprint, true).found !== -1;
611
+ }
612
+
613
+ delete(element) {
614
+ const motherHash = this._getMotherHash(element);
615
+ const fingerprintBits = Math.min(this.options.fingerprintSize, 32 - this.bitsForSlotAddress);
616
+ if (fingerprintBits < 0) return false;
617
+ const addressSpace = 2 ** this.bitsForSlotAddress;
618
+ const canonicalSlot = motherHash % addressSpace;
619
+ const shiftedHash = Math.floor(motherHash / addressSpace);
620
+ const fingerprintSpace = 2 ** fingerprintBits;
621
+ const fingerprint = shiftedHash % fingerprintSpace;
622
+ return this._deleteWithParams(canonicalSlot, fingerprint, motherHash);
623
+ }
624
+
625
+ _deleteWithParams(canonicalSlot, fingerprint, motherHash = 0) {
626
+ const { found } = this._findLongestMatchingSlot(canonicalSlot, fingerprint, true, motherHash);
627
+ if (found === -1) {
628
+ return false;
629
+ }
630
+
631
+ if (this.states[found] === VOID) {
632
+ this.deletionQueue.push(motherHash >>> 0);
633
+ }
634
+
635
+ this.runCounts[canonicalSlot]--;
636
+ this._setOccupiedFromCount(canonicalSlot);
637
+
638
+ const leftBoundary = found;
639
+ const rightBoundary = this._shiftLeftAfterDeletion(found);
640
+ const windowStart = (leftBoundary - 1 + this.currentCapacity) % this.currentCapacity;
641
+ const windowEnd = (rightBoundary + 1) % this.currentCapacity;
642
+ this._recomputeMetadataWindow(windowStart, windowEnd);
643
+
644
+ this.numElements--;
645
+
646
+ return true;
647
+ }
648
+
649
+ _findLongestMatchingSlot(canonicalSlot, fingerprint, allowVoid = true, motherHash = null) {
650
+ let found = -1;
651
+ let bestLength = -1;
652
+ let foundExactMotherHash = false;
653
+ let insertionPoint = -1;
654
+ const normalizedMotherHash = motherHash === null ? null : motherHash >>> 0;
655
+
656
+ this._forEachSlotInRun(canonicalSlot, (index) => {
657
+ const state = this.states[index];
658
+ const exactMotherHash = normalizedMotherHash !== null && this.motherHashes[index] === normalizedMotherHash;
659
+
660
+ if (state === VOID && allowVoid && (!foundExactMotherHash || exactMotherHash) && bestLength < 0) {
661
+ found = index;
662
+ bestLength = 0;
663
+ foundExactMotherHash = exactMotherHash;
664
+ return false;
665
+ }
666
+
667
+ if (state === OCCUPIED && this._fingerprintMatches(index, fingerprint) &&
668
+ ((exactMotherHash && !foundExactMotherHash) || (!foundExactMotherHash && this.fingerprintLengths[index] > bestLength))) {
669
+ found = index;
670
+ bestLength = this.fingerprintLengths[index];
671
+ foundExactMotherHash = exactMotherHash;
672
+ }
673
+
674
+ return false;
675
+ });
676
+
677
+ return { found, insertionPoint };
678
+ }
679
+
680
+ expand() {
681
+ const oldFingerprints = this.fingerprints;
682
+ const oldCanonicalSlots = this.canonicalSlots;
683
+ const oldStates = this.states;
684
+ const oldFingerprintLengths = this.fingerprintLengths;
685
+ const oldMotherHashes = this.motherHashes;
686
+ const oldCapacity = this.currentCapacity;
687
+ const oldBitsForSlotAddress = this.bitsForSlotAddress;
688
+ const deletedVoidHashes = new Set(this.deletionQueue);
689
+
690
+ if (oldBitsForSlotAddress >= 31) {
691
+ throw new Error("Cannot expand further within the 32-bit address constraint.");
692
+ }
693
+
694
+ this.currentCapacity *= 2;
695
+ this.bitsForSlotAddress++;
696
+ this.currentExpansionLevel++;
697
+ this._allocateStorage(this.currentCapacity);
698
+ this.numElements = 0;
699
+
700
+ for (let slotIndex = 0; slotIndex < oldCapacity; slotIndex++) {
701
+ const state = oldStates[slotIndex];
702
+ if (state === EMPTY || state === TOMBSTONE) {
703
+ continue;
704
+ }
705
+ if (deletedVoidHashes.has(oldMotherHashes[slotIndex])) {
706
+ continue;
707
+ }
708
+
709
+ const oldCanonical = oldCanonicalSlots[slotIndex];
710
+ if (oldCanonical === NO_CANONICAL) {
711
+ continue;
712
+ }
713
+
714
+ if (state === VOID || oldFingerprintLengths[slotIndex] === 0) {
715
+ this._performInsertionNoCompact(oldCanonical, 0, VOID, 0, oldMotherHashes[slotIndex]);
716
+ this._performInsertionNoCompact(oldCanonical + oldCapacity, 0, VOID, 0, oldMotherHashes[slotIndex]);
717
+ continue;
718
+ }
719
+
720
+ const oldFingerprint = oldFingerprints[slotIndex] >>> 0;
721
+ const newAddressBit = oldFingerprint & 1;
722
+ const newCanonical = oldCanonical + (newAddressBit * oldCapacity);
723
+ const newFingerprint = oldFingerprint >>> 1;
724
+ const newLength = oldFingerprintLengths[slotIndex] - 1;
725
+
726
+ if (newLength === 0) {
727
+ this._performInsertionNoCompact(newCanonical, 0, VOID, 0, oldMotherHashes[slotIndex]);
728
+ } else {
729
+ this._performInsertionNoCompact(newCanonical, newFingerprint, OCCUPIED, newLength, oldMotherHashes[slotIndex]);
730
+ }
731
+ }
732
+
733
+ this.deletionQueue = [];
734
+ this._compactRuns();
735
+ }
736
+
737
+ rejuvenate(element) {
738
+ const motherHash = this._getMotherHash(element);
739
+ const fingerprintBits = Math.min(this.options.fingerprintSize, 32 - this.bitsForSlotAddress);
740
+ if (fingerprintBits < 0) return false;
741
+ const addressSpace = 2 ** this.bitsForSlotAddress;
742
+ const canonicalSlot = motherHash % addressSpace;
743
+ const shiftedHash = Math.floor(motherHash / addressSpace);
744
+ const fingerprintSpace = 2 ** fingerprintBits;
745
+ const fingerprint = shiftedHash % fingerprintSpace;
746
+ return this._rejuvenateWithParams(canonicalSlot, fingerprint, motherHash);
747
+ }
748
+
749
+ _rejuvenateWithParams(canonicalSlot, fingerprint, motherHash) {
750
+ const { found } = this._findLongestMatchingSlot(canonicalSlot, fingerprint, true, motherHash);
751
+ if (found === -1) {
752
+ return false;
753
+ }
754
+
755
+ const fullFingerprintBits = Math.min(this.options.fingerprintSize, 32 - this.bitsForSlotAddress);
756
+ if (fullFingerprintBits <= 0) {
757
+ return false;
758
+ }
759
+
760
+ const { fingerprint: fullFingerprint } = this._getSlotAndFingerprint(
761
+ motherHash,
762
+ fullFingerprintBits,
763
+ this.bitsForSlotAddress
764
+ );
765
+
766
+ this.fingerprints[found] = fullFingerprint >>> 0;
767
+ this.states[found] = OCCUPIED;
768
+ this.fingerprintLengths[found] = fullFingerprintBits;
769
+ this.motherHashes[found] = motherHash >>> 0;
770
+ this.rejuvenationQueue.push(motherHash >>> 0);
771
+ return true;
772
+ }
773
+
774
+ getStats() {
775
+ return {
776
+ capacity: this.currentCapacity,
777
+ numElements: this.numElements,
778
+ expansionLevel: this.currentExpansionLevel,
779
+ bitsForSlotAddress: this.bitsForSlotAddress,
780
+ fingerprintSize: this.options.fingerprintSize,
781
+ currentFingerprintLength: this.currentFingerprintLengthForNewEntries,
782
+ loadFactor: this.numElements / this.currentCapacity,
783
+ deletionQueueSize: this.deletionQueue.length,
784
+ rejuvenationQueueSize: this.rejuvenationQueue.length,
785
+ };
786
+ }
787
+
788
+ serialize() {
789
+ const writer = new BinaryWriter();
790
+ writer.uint32(MAGIC.POINT);
791
+ writer.uint32(this.currentCapacity);
792
+ writer.uint32(this.numElements);
793
+ writer.uint32(this.bitsForSlotAddress);
794
+ writer.uint32(this.options.fingerprintSize);
795
+ writer.uint32(this.currentFingerprintLengthForNewEntries);
796
+ writer.uint32(this.deletionQueue.length);
797
+ for (const entry of this.deletionQueue) writer.uint32(entry);
798
+ writer.uint32(this.rejuvenationQueue.length);
799
+ for (const entry of this.rejuvenationQueue) writer.uint32(entry);
800
+
801
+ const live = [];
802
+ for (let i = 0; i < this.currentCapacity; i++) {
803
+ if (this._isLiveEntry(i)) live.push(i);
804
+ }
805
+
806
+ writer.uint32(live.length);
807
+ writer.uint8(this.currentCapacity <= 0xffff ? 2 : 4);
808
+ for (const index of live) {
809
+ if (this.currentCapacity <= 0xffff) writer.uint16(index);
810
+ else writer.uint32(index);
811
+ writer.uint8((this.states[index] << 6) | this.fingerprintLengths[index]);
812
+ writer.uint32(this.motherHashes[index]);
813
+ }
814
+ return wrapCRC32(writer.toArrayBuffer());
815
+ }
816
+
817
+ static deserialize(buffer, options = {}) {
818
+ buffer = unwrapCRC32(buffer);
819
+ const reader = new BinaryReader(buffer);
820
+ const magic = reader.uint32();
821
+ if (magic !== MAGIC.POINT) throw new Error('Not a PointFilter binary.');
822
+
823
+ const capacity = reader.uint32();
824
+ const numElements = reader.uint32();
825
+ const bitsForSlotAddress = reader.uint32();
826
+ const fingerprintSize = reader.uint32();
827
+ const currentFingerprintLength = reader.uint32();
828
+
829
+ const initialCapacity = 2 ** Math.ceil(Math.log2(capacity));
830
+ const filter = new PointFilter({
831
+ initialCapacity,
832
+ fingerprintSize,
833
+ ...options,
834
+ });
835
+ filter.currentCapacity = capacity;
836
+ filter.numElements = numElements;
837
+ filter.currentFingerprintLengthForNewEntries = currentFingerprintLength;
838
+ filter.bitsForSlotAddress = bitsForSlotAddress;
839
+
840
+ if (capacity !== initialCapacity) {
841
+ filter._allocateStorage(capacity);
842
+ }
843
+
844
+ const dqLen = reader.uint32();
845
+ filter.deletionQueue = [];
846
+ for (let i = 0; i < dqLen; i++) filter.deletionQueue.push(reader.uint32());
847
+
848
+ const rqLen = reader.uint32();
849
+ filter.rejuvenationQueue = [];
850
+ for (let i = 0; i < rqLen; i++) filter.rejuvenationQueue.push(reader.uint32());
851
+
852
+ const liveCount = reader.uint32();
853
+ const indexBytes = reader.uint8();
854
+ if (indexBytes !== 2 && indexBytes !== 4) throw new Error('Unsupported PointFilter index width.');
855
+ for (let i = 0; i < liveCount; i++) {
856
+ const index = indexBytes === 2 ? reader.uint16() : reader.uint32();
857
+ const meta = reader.uint8();
858
+ const state = meta >>> 6;
859
+ const fingerprintLength = meta & 0x3f;
860
+ const motherHash = reader.uint32();
861
+ filter.states[index] = state;
862
+ filter.fingerprintLengths[index] = fingerprintLength;
863
+ filter.motherHashes[index] = motherHash;
864
+ filter.canonicalSlots[index] = motherHash & (capacity - 1);
865
+ filter.fingerprints[index] = (motherHash >>> bitsForSlotAddress) & filter.fingerprintMasks[fingerprintLength];
866
+ }
867
+ filter._rebuildPackedMetadata();
868
+
869
+ return filter;
870
+ }
871
+ }