@sepiariver/unique-set 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -4
- package/{index.d.ts → dist/index.d.mts} +15 -34
- package/dist/index.mjs +182 -0
- package/{src/index.js → index.ts} +74 -45
- package/package.json +12 -13
- package/temp.cjs +10 -0
- package/temp.mjs +10 -0
- package/tsconfig.json +23 -0
- package/index.js +0 -1
package/README.md
CHANGED
|
@@ -4,15 +4,25 @@ Extends the native `Set` class to deeply compare using [fast-deep-equal](https:/
|
|
|
4
4
|
|
|
5
5
|
Supports ESM and CommonJS.
|
|
6
6
|
|
|
7
|
+
```js
|
|
8
|
+
import { BloomSet, UniqueSet } from '@sepiariver/unique-set';
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
```js
|
|
12
|
+
const { BloomSet, UniqueSet } = require('@sepiariver/unique-set');
|
|
13
|
+
```
|
|
14
|
+
|
|
7
15
|
WARNING: This version exports 2 classes instead of a single default class, breaking b/c with version 1.
|
|
8
16
|
|
|
9
|
-
The
|
|
17
|
+
The overridden methods iterate through the elements of the `UniqueSet` deeply comparing equality until existence is found. If no elements match, the entire `UniqueSet` would have been iterated. However fast `fast-deep-equal` is [reported to be](https://github.com/epoberezkin/fast-deep-equal?tab=readme-ov-file#performance-benchmark), its time complexity is dependent on the depth of objects being compared. Calling it in a loop makes performance many, many times worse than the native `Set`.
|
|
18
|
+
|
|
19
|
+
_For datasets greater than a thousand elements, there is probably a better way to achieve what you're trying to do._ Otherwise, `UniqueSet` is convenient.
|
|
10
20
|
|
|
11
|
-
UPDATE
|
|
21
|
+
**UPDATE:** Version 2 ships with `BloomSet`, which uses a Bloom filter to greatly optimize absence checks, falling back to `fast-deep-equal` to validate potential false positives. This class is useful for larger datasets, up to the tens of thousands or even 100k depending largely on configuration. It performs about 3-10 times faster than `UniqueSet` for datasets greater than 1000 elements. Less than a few hundred (~400) elements, `UniqueSet` can be faster—it all depens on your dataset and config options. In all scenarios except the absolute best case, BloomSet is still orders of magnitude slower than the native `Set`, but if deep equality is required, this is a decent option.
|
|
12
22
|
|
|
13
|
-
|
|
23
|
+
Highly recommended: experiment with config options to find the best performance for your use case.
|
|
14
24
|
|
|
15
|
-
|
|
25
|
+
IMPORTANT: The `delete` method is unmodified in both classes. In the case of duplicate objects that are equivalent but have different references, the results of `delete` operations may be unexpected.
|
|
16
26
|
|
|
17
27
|
## Config Options
|
|
18
28
|
|
|
@@ -1,34 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
*/
|
|
5
|
-
export class UniqueSet<T> extends Set<T> {
|
|
6
|
-
/**
|
|
7
|
-
* Creates a new `UniqueSet` instance.
|
|
8
|
-
* @param iterable Optional: an iterable with which to initialize the UniqueSet.
|
|
9
|
-
* @throws TypeError If the input is not iterable.
|
|
10
|
-
*/
|
|
1
|
+
/** A `Set` extension that ensures uniqueness of items using deep equality checks. */
|
|
2
|
+
declare class UniqueSet<T> extends Set<T> {
|
|
3
|
+
/*** @throws TypeError If the input is not iterable. */
|
|
11
4
|
constructor(iterable?: Iterable<T>);
|
|
12
|
-
|
|
13
5
|
/**
|
|
14
6
|
* Determines whether an object is in the UniqueSet using deep equality.
|
|
15
7
|
* @param o The object to check for presence in the UniqueSet.
|
|
16
8
|
* @returns `true` if the object is found, `false` otherwise.
|
|
17
9
|
*/
|
|
18
10
|
has(o: T): boolean;
|
|
19
|
-
|
|
20
11
|
/**
|
|
21
12
|
* Adds a new object to the UniqueSet if it is not already present.
|
|
22
13
|
* @param o The object to add to the UniqueSet.
|
|
23
14
|
* @returns The `UniqueSet` instance, allowing for chaining.
|
|
24
15
|
*/
|
|
25
16
|
add(o: T): this;
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
*/
|
|
31
|
-
export class BloomSet<T> extends Set<T> {
|
|
17
|
+
}
|
|
18
|
+
/** A `Set` extension that uses a Bloom filter for fast existence checks combined with deep equality for accuracy. */
|
|
19
|
+
declare class BloomSet<T> extends Set<T> {
|
|
20
|
+
#private;
|
|
32
21
|
/**
|
|
33
22
|
* Creates a new `BloomSet` instance.
|
|
34
23
|
* @param iterable Optional: an iterable object with which to initialize the BloomSet.
|
|
@@ -37,24 +26,16 @@ declare module "unique-set" {
|
|
|
37
26
|
* @param options.hashCount The number of hash functions to use. Defaults to 7.
|
|
38
27
|
* @throws TypeError If the input is not iterable.
|
|
39
28
|
*/
|
|
40
|
-
constructor(
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
);
|
|
44
|
-
|
|
45
|
-
/**
|
|
46
|
-
* Determines existence of an object in the BloomSet using the Bloom filter and deep equality.
|
|
47
|
-
* @param o The object to check for presence in the BloomSet.
|
|
48
|
-
* @returns `true` if the object is found, `false` otherwise.
|
|
49
|
-
*/
|
|
29
|
+
constructor(iterable?: Iterable<T>, options?: {
|
|
30
|
+
size?: number;
|
|
31
|
+
hashCount?: number;
|
|
32
|
+
});
|
|
33
|
+
/** Determines existence of an object in the BloomSet using the Bloom filter and deep equality */
|
|
50
34
|
has(o: T): boolean;
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* Adds a new object to the BloomSet if it is not already present.
|
|
54
|
-
* @param o The object to add to the BloomSet.
|
|
35
|
+
/** Adds a new object to the BloomSet if it is not already present.
|
|
55
36
|
* @returns The `BloomSet` instance, allowing for chaining.
|
|
56
37
|
*/
|
|
57
38
|
add(o: T): this;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
39
|
}
|
|
40
|
+
|
|
41
|
+
export { BloomSet, UniqueSet };
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
// index.ts
|
|
2
|
+
import equal from "fast-deep-equal/es6/index.js";
|
|
3
|
+
var UniqueSet = class extends Set {
|
|
4
|
+
/*** @throws TypeError If the input is not iterable. */
|
|
5
|
+
constructor(iterable = []) {
|
|
6
|
+
if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
|
|
7
|
+
throw new TypeError("UniqueSet requires an iterable");
|
|
8
|
+
}
|
|
9
|
+
super();
|
|
10
|
+
for (const item of iterable) {
|
|
11
|
+
this.add(item);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Determines whether an object is in the UniqueSet using deep equality.
|
|
16
|
+
* @param o The object to check for presence in the UniqueSet.
|
|
17
|
+
* @returns `true` if the object is found, `false` otherwise.
|
|
18
|
+
*/
|
|
19
|
+
has(o) {
|
|
20
|
+
for (const i of this) {
|
|
21
|
+
if (equal(o, i)) {
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Adds a new object to the UniqueSet if it is not already present.
|
|
29
|
+
* @param o The object to add to the UniqueSet.
|
|
30
|
+
* @returns The `UniqueSet` instance, allowing for chaining.
|
|
31
|
+
*/
|
|
32
|
+
add(o) {
|
|
33
|
+
if (!this.has(o)) {
|
|
34
|
+
super.add(o);
|
|
35
|
+
}
|
|
36
|
+
return this;
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
var BloomSet = class extends Set {
|
|
40
|
+
#bitArray;
|
|
41
|
+
#aSize;
|
|
42
|
+
#hashCount;
|
|
43
|
+
/**
|
|
44
|
+
* Creates a new `BloomSet` instance.
|
|
45
|
+
* @param iterable Optional: an iterable object with which to initialize the BloomSet.
|
|
46
|
+
* @param options Bloom filter configuration options.
|
|
47
|
+
* @param options.size The size of the Bloom filter's bit array. Defaults to 6553577.
|
|
48
|
+
* @param options.hashCount The number of hash functions to use. Defaults to 7.
|
|
49
|
+
* @throws TypeError If the input is not iterable.
|
|
50
|
+
*/
|
|
51
|
+
constructor(iterable = [], options = {}) {
|
|
52
|
+
if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
|
|
53
|
+
throw new TypeError("BloomSet requires an iterable");
|
|
54
|
+
}
|
|
55
|
+
super();
|
|
56
|
+
if (!options || typeof options !== "object") {
|
|
57
|
+
options = {};
|
|
58
|
+
}
|
|
59
|
+
options.hashCount ??= 7;
|
|
60
|
+
options.size ??= 6553577;
|
|
61
|
+
let { size, hashCount } = options;
|
|
62
|
+
if (typeof size !== "number" || size <= 0) {
|
|
63
|
+
size = 6553577;
|
|
64
|
+
}
|
|
65
|
+
this.#aSize = this.#findNextPrime(size);
|
|
66
|
+
if (typeof hashCount !== "number" || hashCount <= 0) {
|
|
67
|
+
hashCount = 7;
|
|
68
|
+
}
|
|
69
|
+
this.#hashCount = hashCount;
|
|
70
|
+
this.#bitArray = new Uint8Array(Math.ceil(size / 8));
|
|
71
|
+
for (const item of iterable) {
|
|
72
|
+
this.add(item);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/** @internal */
|
|
76
|
+
#findNextPrime(num) {
|
|
77
|
+
if (num < 2) return 2;
|
|
78
|
+
if (num % 2 === 0) num++;
|
|
79
|
+
while (!this.#isPrime(num)) {
|
|
80
|
+
num += 2;
|
|
81
|
+
}
|
|
82
|
+
return num;
|
|
83
|
+
}
|
|
84
|
+
/** @internal */
|
|
85
|
+
#isPrime(num) {
|
|
86
|
+
if (num < 2) return false;
|
|
87
|
+
if (num === 2 || num === 3) return true;
|
|
88
|
+
if (num % 2 === 0 || num % 3 === 0) return false;
|
|
89
|
+
const sqrt = Math.floor(Math.sqrt(num));
|
|
90
|
+
for (let i = 5; i <= sqrt; i += 6) {
|
|
91
|
+
if (num % i === 0 || num % (i + 2) === 0) return false;
|
|
92
|
+
}
|
|
93
|
+
return true;
|
|
94
|
+
}
|
|
95
|
+
/** @internal */
|
|
96
|
+
#serialize(item) {
|
|
97
|
+
if (typeof item === "number" && isNaN(item)) {
|
|
98
|
+
return "NaN";
|
|
99
|
+
}
|
|
100
|
+
if (item && typeof item === "object") {
|
|
101
|
+
const serialize = this.#serialize.bind(this);
|
|
102
|
+
if (Array.isArray(item)) {
|
|
103
|
+
return `[${item.map(serialize).join(",")}]`;
|
|
104
|
+
} else {
|
|
105
|
+
return `{${Object.entries(item).sort(([a], [b]) => a.localeCompare(b)).map(([k, v]) => `${k}:${serialize(v)}`).join(",")}}`;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return String(item);
|
|
109
|
+
}
|
|
110
|
+
/** @internal */
|
|
111
|
+
#hashes(item) {
|
|
112
|
+
const hashes = [];
|
|
113
|
+
const str = this.#serialize(item);
|
|
114
|
+
let hash = this.#fnv1a(str);
|
|
115
|
+
for (let i = 0; i < this.#hashCount; i++) {
|
|
116
|
+
hash %= this.#aSize;
|
|
117
|
+
hashes.push(hash);
|
|
118
|
+
hash = (hash ^ hash >>> 13) * 3266489909;
|
|
119
|
+
hash >>>= 0;
|
|
120
|
+
}
|
|
121
|
+
return hashes;
|
|
122
|
+
}
|
|
123
|
+
/** @internal */
|
|
124
|
+
#fnv1a(str) {
|
|
125
|
+
if (typeof str !== "string") {
|
|
126
|
+
str = String(str);
|
|
127
|
+
}
|
|
128
|
+
let hash = 2166136261;
|
|
129
|
+
for (let i = 0; i < str.length; i++) {
|
|
130
|
+
hash ^= str.charCodeAt(i);
|
|
131
|
+
hash = hash * 16777619 >>> 0;
|
|
132
|
+
}
|
|
133
|
+
return hash >>> 0;
|
|
134
|
+
}
|
|
135
|
+
/** @internal */
|
|
136
|
+
#setBits(hashes) {
|
|
137
|
+
for (const hash of hashes) {
|
|
138
|
+
const index = Math.floor(hash / 8);
|
|
139
|
+
const bit = hash % 8;
|
|
140
|
+
this.#bitArray[index] |= 1 << bit;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
/** @internal */
|
|
144
|
+
#checkBits(hashes) {
|
|
145
|
+
for (const hash of hashes) {
|
|
146
|
+
const index = Math.floor(hash / 8);
|
|
147
|
+
const bit = hash % 8;
|
|
148
|
+
if (!(this.#bitArray[index] & 1 << bit)) {
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return true;
|
|
153
|
+
}
|
|
154
|
+
/** Determines existence of an object in the BloomSet using the Bloom filter and deep equality */
|
|
155
|
+
has(o) {
|
|
156
|
+
const hashes = this.#hashes(o);
|
|
157
|
+
if (!this.#checkBits(hashes)) {
|
|
158
|
+
return false;
|
|
159
|
+
}
|
|
160
|
+
for (const i of this) {
|
|
161
|
+
if (equal(o, i)) {
|
|
162
|
+
return true;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return false;
|
|
166
|
+
}
|
|
167
|
+
/** Adds a new object to the BloomSet if it is not already present.
|
|
168
|
+
* @returns The `BloomSet` instance, allowing for chaining.
|
|
169
|
+
*/
|
|
170
|
+
add(o) {
|
|
171
|
+
if (!this.has(o)) {
|
|
172
|
+
const hashes = this.#hashes(o);
|
|
173
|
+
this.#setBits(hashes);
|
|
174
|
+
super.add(o);
|
|
175
|
+
}
|
|
176
|
+
return this;
|
|
177
|
+
}
|
|
178
|
+
};
|
|
179
|
+
export {
|
|
180
|
+
BloomSet,
|
|
181
|
+
UniqueSet
|
|
182
|
+
};
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
import equal from "fast-deep-equal";
|
|
1
|
+
import equal from "fast-deep-equal/es6/index.js";
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
/** A `Set` extension that ensures uniqueness of items using deep equality checks. */
|
|
4
|
+
export class UniqueSet<T> extends Set<T> {
|
|
5
|
+
/*** @throws TypeError If the input is not iterable. */
|
|
6
|
+
constructor(iterable: Iterable<T> = []) {
|
|
5
7
|
if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
|
|
6
8
|
throw new TypeError("UniqueSet requires an iterable");
|
|
7
9
|
}
|
|
@@ -10,8 +12,12 @@ export class UniqueSet extends Set {
|
|
|
10
12
|
this.add(item);
|
|
11
13
|
}
|
|
12
14
|
}
|
|
13
|
-
|
|
14
|
-
|
|
15
|
+
/**
|
|
16
|
+
* Determines whether an object is in the UniqueSet using deep equality.
|
|
17
|
+
* @param o The object to check for presence in the UniqueSet.
|
|
18
|
+
* @returns `true` if the object is found, `false` otherwise.
|
|
19
|
+
*/
|
|
20
|
+
has(o: T): boolean {
|
|
15
21
|
for (const i of this) {
|
|
16
22
|
if (equal(o, i)) {
|
|
17
23
|
return true;
|
|
@@ -19,8 +25,12 @@ export class UniqueSet extends Set {
|
|
|
19
25
|
}
|
|
20
26
|
return false;
|
|
21
27
|
}
|
|
22
|
-
|
|
23
|
-
|
|
28
|
+
/**
|
|
29
|
+
* Adds a new object to the UniqueSet if it is not already present.
|
|
30
|
+
* @param o The object to add to the UniqueSet.
|
|
31
|
+
* @returns The `UniqueSet` instance, allowing for chaining.
|
|
32
|
+
*/
|
|
33
|
+
add(o: T): this {
|
|
24
34
|
if (!this.has(o)) {
|
|
25
35
|
super.add(o);
|
|
26
36
|
}
|
|
@@ -28,8 +38,23 @@ export class UniqueSet extends Set {
|
|
|
28
38
|
}
|
|
29
39
|
}
|
|
30
40
|
|
|
31
|
-
|
|
32
|
-
|
|
41
|
+
/** A `Set` extension that uses a Bloom filter for fast existence checks combined with deep equality for accuracy. */
|
|
42
|
+
export class BloomSet<T> extends Set<T> {
|
|
43
|
+
#bitArray: Uint8Array;
|
|
44
|
+
#aSize: number;
|
|
45
|
+
#hashCount: number;
|
|
46
|
+
/**
|
|
47
|
+
* Creates a new `BloomSet` instance.
|
|
48
|
+
* @param iterable Optional: an iterable object with which to initialize the BloomSet.
|
|
49
|
+
* @param options Bloom filter configuration options.
|
|
50
|
+
* @param options.size The size of the Bloom filter's bit array. Defaults to 6553577.
|
|
51
|
+
* @param options.hashCount The number of hash functions to use. Defaults to 7.
|
|
52
|
+
* @throws TypeError If the input is not iterable.
|
|
53
|
+
*/
|
|
54
|
+
constructor(
|
|
55
|
+
iterable: Iterable<T> = [],
|
|
56
|
+
options: { size?: number; hashCount?: number } = {}
|
|
57
|
+
) {
|
|
33
58
|
if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
|
|
34
59
|
throw new TypeError("BloomSet requires an iterable");
|
|
35
60
|
}
|
|
@@ -39,37 +64,41 @@ export class BloomSet extends Set {
|
|
|
39
64
|
options = {};
|
|
40
65
|
}
|
|
41
66
|
|
|
42
|
-
|
|
67
|
+
options.hashCount ??= 7;
|
|
68
|
+
options.size ??= 6553577;
|
|
69
|
+
|
|
70
|
+
let { size, hashCount } = options;
|
|
43
71
|
|
|
44
72
|
if (typeof size !== "number" || size <= 0) {
|
|
45
73
|
size = 6553577; // Targeting < 1 collision per 100,000 elements, ~819 KB memory, needs 7 hashes
|
|
46
74
|
}
|
|
47
|
-
this
|
|
75
|
+
this.#aSize = this.#findNextPrime(size);
|
|
48
76
|
|
|
49
77
|
if (typeof hashCount !== "number" || hashCount <= 0) {
|
|
50
78
|
hashCount = 7;
|
|
51
79
|
}
|
|
52
|
-
this
|
|
53
|
-
|
|
54
|
-
this.bitArray = new Uint8Array(Math.ceil(size / 8));
|
|
80
|
+
this.#hashCount = hashCount;
|
|
81
|
+
this.#bitArray = new Uint8Array(Math.ceil(size / 8));
|
|
55
82
|
|
|
56
83
|
for (const item of iterable) {
|
|
57
84
|
this.add(item);
|
|
58
85
|
}
|
|
59
86
|
}
|
|
60
87
|
|
|
61
|
-
|
|
88
|
+
/** @internal */
|
|
89
|
+
#findNextPrime(num: number) {
|
|
62
90
|
if (num < 2) return 2;
|
|
63
91
|
if (num % 2 === 0) num++; // Odd numbers only
|
|
64
92
|
|
|
65
|
-
while (!this
|
|
93
|
+
while (!this.#isPrime(num)) {
|
|
66
94
|
num += 2; // Odd numbers only
|
|
67
95
|
}
|
|
68
96
|
|
|
69
97
|
return num;
|
|
70
98
|
}
|
|
71
99
|
|
|
72
|
-
|
|
100
|
+
/** @internal */
|
|
101
|
+
#isPrime(num: number) {
|
|
73
102
|
if (num < 2) return false;
|
|
74
103
|
if (num === 2 || num === 3) return true;
|
|
75
104
|
if (num % 2 === 0 || num % 3 === 0) return false;
|
|
@@ -82,13 +111,14 @@ export class BloomSet extends Set {
|
|
|
82
111
|
return true;
|
|
83
112
|
}
|
|
84
113
|
|
|
85
|
-
|
|
114
|
+
/** @internal */
|
|
115
|
+
#serialize(item: T | number | object): string {
|
|
86
116
|
if (typeof item === "number" && isNaN(item)) {
|
|
87
117
|
return "NaN";
|
|
88
118
|
}
|
|
89
119
|
|
|
90
120
|
if (item && typeof item === "object") {
|
|
91
|
-
const serialize = this.
|
|
121
|
+
const serialize = this.#serialize.bind(this);
|
|
92
122
|
if (Array.isArray(item)) {
|
|
93
123
|
return `[${item.map(serialize).join(",")}]`;
|
|
94
124
|
} else {
|
|
@@ -102,14 +132,15 @@ export class BloomSet extends Set {
|
|
|
102
132
|
return String(item);
|
|
103
133
|
}
|
|
104
134
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
const
|
|
108
|
-
|
|
135
|
+
/** @internal */
|
|
136
|
+
#hashes(item: T) {
|
|
137
|
+
const hashes: number[] = [];
|
|
138
|
+
const str = this.#serialize(item);
|
|
139
|
+
let hash = this.#fnv1a(str); // Base hash
|
|
109
140
|
|
|
110
141
|
// Bloom into hashCount hash values
|
|
111
|
-
for (let i = 0; i < this
|
|
112
|
-
hash %= this
|
|
142
|
+
for (let i = 0; i < this.#hashCount; i++) {
|
|
143
|
+
hash %= this.#aSize; // Ensure within bounds
|
|
113
144
|
// Track
|
|
114
145
|
hashes.push(hash);
|
|
115
146
|
// Modify
|
|
@@ -120,7 +151,8 @@ export class BloomSet extends Set {
|
|
|
120
151
|
return hashes;
|
|
121
152
|
}
|
|
122
153
|
|
|
123
|
-
|
|
154
|
+
/** @internal */
|
|
155
|
+
#fnv1a(str: string) {
|
|
124
156
|
if (typeof str !== "string") {
|
|
125
157
|
str = String(str);
|
|
126
158
|
}
|
|
@@ -132,28 +164,30 @@ export class BloomSet extends Set {
|
|
|
132
164
|
return hash >>> 0;
|
|
133
165
|
}
|
|
134
166
|
|
|
135
|
-
|
|
167
|
+
/** @internal */
|
|
168
|
+
#setBits(hashes: number[]): void {
|
|
136
169
|
for (const hash of hashes) {
|
|
137
170
|
const index = Math.floor(hash / 8);
|
|
138
171
|
const bit = hash % 8;
|
|
139
|
-
this
|
|
172
|
+
this.#bitArray[index]! |= 1 << bit;
|
|
140
173
|
}
|
|
141
174
|
}
|
|
142
175
|
|
|
143
|
-
|
|
176
|
+
/** @internal */
|
|
177
|
+
#checkBits(hashes: number[]): boolean {
|
|
144
178
|
for (const hash of hashes) {
|
|
145
179
|
const index = Math.floor(hash / 8);
|
|
146
180
|
const bit = hash % 8;
|
|
147
|
-
if (!(this
|
|
181
|
+
if (!(this.#bitArray[index]! & (1 << bit))) {
|
|
148
182
|
return false;
|
|
149
183
|
}
|
|
150
184
|
}
|
|
151
185
|
return true;
|
|
152
186
|
}
|
|
153
|
-
|
|
154
|
-
has(o) {
|
|
155
|
-
const hashes = this
|
|
156
|
-
if (!this
|
|
187
|
+
/** Determines existence of an object in the BloomSet using the Bloom filter and deep equality */
|
|
188
|
+
has(o: T): boolean {
|
|
189
|
+
const hashes = this.#hashes(o);
|
|
190
|
+
if (!this.#checkBits(hashes)) {
|
|
157
191
|
return false; // Definitely not in the set
|
|
158
192
|
}
|
|
159
193
|
// Fall back to fast-deep-equal for false positives
|
|
@@ -164,20 +198,15 @@ export class BloomSet extends Set {
|
|
|
164
198
|
}
|
|
165
199
|
return false;
|
|
166
200
|
}
|
|
167
|
-
|
|
168
|
-
|
|
201
|
+
/** Adds a new object to the BloomSet if it is not already present.
|
|
202
|
+
* @returns The `BloomSet` instance, allowing for chaining.
|
|
203
|
+
*/
|
|
204
|
+
add(o: T): this {
|
|
169
205
|
if (!this.has(o)) {
|
|
170
|
-
const hashes = this
|
|
171
|
-
this
|
|
206
|
+
const hashes = this.#hashes(o);
|
|
207
|
+
this.#setBits(hashes);
|
|
172
208
|
super.add(o);
|
|
173
209
|
}
|
|
174
210
|
return this;
|
|
175
211
|
}
|
|
176
212
|
}
|
|
177
|
-
|
|
178
|
-
if (typeof module !== "undefined" && module.exports) {
|
|
179
|
-
module.exports = {
|
|
180
|
-
UniqueSet,
|
|
181
|
-
BloomSet,
|
|
182
|
-
};
|
|
183
|
-
}
|
package/package.json
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sepiariver/unique-set",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.1",
|
|
4
4
|
"description": "Extends the native Set class to deeply compare using fast-deep-equal, with optional Bloom filter optimization. This version exports 2 classes instead of a default, breaking b/c with version 1.",
|
|
5
|
-
"main": "index.js",
|
|
6
|
-
"module": "dist/index.
|
|
7
|
-
"types": "index.d.
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"module": "dist/index.mjs",
|
|
7
|
+
"types": "dist/index.d.mts",
|
|
8
8
|
"exports": {
|
|
9
|
-
"require": "./index.js",
|
|
10
|
-
"import": "./dist/index.
|
|
9
|
+
"require": "./dist/index.js",
|
|
10
|
+
"import": "./dist/index.mjs"
|
|
11
11
|
},
|
|
12
12
|
"scripts": {
|
|
13
|
-
"test": "
|
|
14
|
-
"
|
|
13
|
+
"test": "npm run build && vitest",
|
|
14
|
+
"lint": "tsc",
|
|
15
|
+
"build": "tsup index.ts --format esm --dts"
|
|
15
16
|
},
|
|
16
17
|
"repository": {
|
|
17
18
|
"type": "git",
|
|
@@ -35,10 +36,8 @@
|
|
|
35
36
|
"fast-deep-equal": "^3.1.3"
|
|
36
37
|
},
|
|
37
38
|
"devDependencies": {
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
"
|
|
41
|
-
"babel-jest": "^27.5.1",
|
|
42
|
-
"jest": "^27.5.1"
|
|
39
|
+
"tsup": "^8.3.5",
|
|
40
|
+
"typescript": "^5.7.2",
|
|
41
|
+
"vitest": "^2.1.8"
|
|
43
42
|
}
|
|
44
43
|
}
|
package/temp.cjs
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
const { BloomSet, UniqueSet } = require("./dist/index.js");
|
|
2
|
+
|
|
3
|
+
const bloom = new BloomSet();
|
|
4
|
+
bloom.add("foo");
|
|
5
|
+
console.log(bloom.has("foo")); // true
|
|
6
|
+
|
|
7
|
+
const unique = new UniqueSet();
|
|
8
|
+
unique.add("foo");
|
|
9
|
+
unique.add("foo");
|
|
10
|
+
console.log(unique.size); // 1
|
package/temp.mjs
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { BloomSet, UniqueSet } from "./dist/index.js";
|
|
2
|
+
|
|
3
|
+
const bloom = new BloomSet();
|
|
4
|
+
bloom.add("foo");
|
|
5
|
+
console.log(bloom.has("foo")); // true
|
|
6
|
+
|
|
7
|
+
const unique = new UniqueSet();
|
|
8
|
+
unique.add("foo");
|
|
9
|
+
unique.add("foo");
|
|
10
|
+
console.log(unique.size); // 1
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
/* Language and Environment */
|
|
4
|
+
"target": "ES2023", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
|
|
5
|
+
|
|
6
|
+
/* Modules */
|
|
7
|
+
"module": "ES2022", /* Specify what module code is generated. */
|
|
8
|
+
"removeComments": false, /* Disable emitting comments. */
|
|
9
|
+
"moduleResolution": "bundler",
|
|
10
|
+
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
|
|
11
|
+
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
|
|
12
|
+
/* Interop Constraints */
|
|
13
|
+
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
|
|
14
|
+
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
|
|
15
|
+
|
|
16
|
+
/* Type Checking */
|
|
17
|
+
"strict": true, /* Enable all strict type-checking options. */
|
|
18
|
+
/* Completeness */
|
|
19
|
+
"skipLibCheck": true, /* Skip type checking all .d.ts files. */
|
|
20
|
+
"noUncheckedIndexedAccess": true,
|
|
21
|
+
"noEmit": true
|
|
22
|
+
}
|
|
23
|
+
}
|
package/index.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
module.exports = require("./dist");
|