@sepiariver/unique-set 2.0.2 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,182 +1,160 @@
1
1
  // index.ts
2
2
  import equal from "fast-deep-equal/es6/index.js";
3
- var UniqueSet = class extends Set {
4
- /*** @throws TypeError If the input is not iterable. */
5
- constructor(iterable = []) {
6
- if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
7
- throw new TypeError("UniqueSet requires an iterable");
8
- }
9
- super();
10
- for (const item of iterable) {
11
- this.add(item);
12
- }
3
+ var _f64 = new Float64Array(1);
4
+ var _u8 = new Uint8Array(_f64.buffer);
5
+ var structuralHash = (value) => {
6
+ return _shash(value, 2166136261) >>> 0;
7
+ };
8
+ var _mix = (hash, byte) => {
9
+ return Math.imul(hash ^ byte, 16777619);
10
+ };
11
+ var _mixStr = (hash, str) => {
12
+ for (let i = 0; i < str.length; i++) {
13
+ hash = Math.imul(hash ^ str.charCodeAt(i), 16777619);
13
14
  }
14
- /**
15
- * Determines whether an object is in the UniqueSet using deep equality.
16
- * @param o The object to check for presence in the UniqueSet.
17
- * @returns `true` if the object is found, `false` otherwise.
18
- */
19
- has(o) {
20
- for (const i of this) {
21
- if (equal(o, i)) {
22
- return true;
23
- }
24
- }
25
- return false;
15
+ return hash;
16
+ };
17
+ var _shash = (value, hash) => {
18
+ if (value === null) return _mix(hash, 0);
19
+ if (value === void 0) return _mix(hash, 1);
20
+ switch (typeof value) {
21
+ case "boolean":
22
+ return _mix(hash, value ? 3 : 2);
23
+ case "number":
24
+ hash = _mix(hash, 5);
25
+ if (isNaN(value)) return _mix(hash, 4);
26
+ if (value === 0) return _mix(hash, 48);
27
+ _f64[0] = value;
28
+ for (let i = 0; i < 8; i++) hash = _mix(hash, _u8[i]);
29
+ return hash;
30
+ case "string":
31
+ hash = _mix(hash, 6);
32
+ return _mixStr(hash, value);
33
+ case "bigint":
34
+ hash = _mix(hash, 7);
35
+ return _mixStr(hash, value.toString());
36
+ case "function":
37
+ case "symbol":
38
+ hash = _mix(hash, 8);
39
+ return _mixStr(hash, String(value));
40
+ default:
41
+ break;
26
42
  }
27
- /**
28
- * Adds a new object to the UniqueSet if it is not already present.
29
- * @param o The object to add to the UniqueSet.
30
- * @returns The `UniqueSet` instance, allowing for chaining.
31
- */
32
- add(o) {
33
- if (!this.has(o)) {
34
- super.add(o);
35
- }
36
- return this;
43
+ if (Array.isArray(value)) {
44
+ hash = _mix(hash, 16);
45
+ for (let i = 0; i < value.length; i++) hash = _shash(value[i], hash);
46
+ return hash;
47
+ }
48
+ if (value instanceof Map) {
49
+ hash = _mix(hash, 17);
50
+ const entries = Array.from(value.entries()).sort(
51
+ ([a], [b]) => String(a).localeCompare(String(b))
52
+ );
53
+ for (const [k, v] of entries) {
54
+ hash = _shash(k, hash);
55
+ hash = _shash(v, hash);
56
+ }
57
+ return hash;
37
58
  }
59
+ if (value instanceof Set) {
60
+ hash = _mix(hash, 18);
61
+ for (const v of value) hash = _shash(v, hash);
62
+ return hash;
63
+ }
64
+ if (value instanceof Date) {
65
+ hash = _mix(hash, 20);
66
+ _f64[0] = value.getTime();
67
+ for (let i = 0; i < 8; i++) hash = _mix(hash, _u8[i]);
68
+ return hash;
69
+ }
70
+ if (value instanceof RegExp) {
71
+ hash = _mix(hash, 21);
72
+ return _mixStr(hash, value.toString());
73
+ }
74
+ hash = _mix(hash, 19);
75
+ const keys = Object.keys(value).sort();
76
+ for (const key of keys) {
77
+ hash = _mixStr(hash, key);
78
+ hash = _shash(value[key], hash);
79
+ }
80
+ return hash;
38
81
  };
39
- var BloomSet = class extends Set {
40
- #bitArray;
41
- #aSize;
42
- #hashCount;
43
- /**
44
- * Creates a new `BloomSet` instance.
45
- * @param iterable Optional: an iterable object with which to initialize the BloomSet.
46
- * @param options Bloom filter configuration options.
47
- * @param options.size The size of the Bloom filter's bit array. Defaults to 6553577.
48
- * @param options.hashCount The number of hash functions to use. Defaults to 7.
49
- * @throws TypeError If the input is not iterable.
50
- */
51
- constructor(iterable = [], options = {}) {
82
+ var MapSet = class {
83
+ #map;
84
+ #size;
85
+ constructor(iterable = []) {
52
86
  if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
53
- throw new TypeError("BloomSet requires an iterable");
87
+ throw new TypeError("MapSet requires an iterable");
54
88
  }
55
- super();
56
- if (!options || typeof options !== "object") {
57
- options = {};
58
- }
59
- options.hashCount ??= 7;
60
- options.size ??= 6553577;
61
- let { size, hashCount } = options;
62
- if (typeof size !== "number" || size <= 0) {
63
- size = 6553577;
64
- }
65
- this.#aSize = this.#findNextPrime(size);
66
- if (typeof hashCount !== "number" || hashCount <= 0) {
67
- hashCount = 7;
68
- }
69
- this.#hashCount = hashCount;
70
- this.#bitArray = new Uint8Array(Math.ceil(size / 8));
89
+ this.#map = /* @__PURE__ */ new Map();
90
+ this.#size = 0;
71
91
  for (const item of iterable) {
72
92
  this.add(item);
73
93
  }
74
94
  }
75
- /** @internal */
76
- #findNextPrime(num) {
77
- if (num < 2) return 2;
78
- if (num % 2 === 0) num++;
79
- while (!this.#isPrime(num)) {
80
- num += 2;
81
- }
82
- return num;
83
- }
84
- /** @internal */
85
- #isPrime(num) {
86
- if (num < 2) return false;
87
- if (num === 2 || num === 3) return true;
88
- if (num % 2 === 0 || num % 3 === 0) return false;
89
- const sqrt = Math.floor(Math.sqrt(num));
90
- for (let i = 5; i <= sqrt; i += 6) {
91
- if (num % i === 0 || num % (i + 2) === 0) return false;
92
- }
93
- return true;
94
- }
95
- /** @internal */
96
- #serialize(item) {
97
- if (typeof item === "number" && isNaN(item)) {
98
- return "NaN";
99
- }
100
- if (item && typeof item === "object") {
101
- const serialize = this.#serialize.bind(this);
102
- if (Array.isArray(item)) {
103
- return `[${item.map(serialize).join(",")}]`;
104
- } else {
105
- return `{${Object.entries(item).sort(([a], [b]) => a.localeCompare(b)).map(([k, v]) => `${k}:${serialize(v)}`).join(",")}}`;
95
+ add(value) {
96
+ const hash = structuralHash(value);
97
+ const bucket = this.#map.get(hash);
98
+ if (!bucket) {
99
+ this.#map.set(hash, [value]);
100
+ this.#size++;
101
+ } else {
102
+ for (const item of bucket) {
103
+ if (equal(value, item)) return this;
106
104
  }
105
+ bucket.push(value);
106
+ this.#size++;
107
107
  }
108
- return String(item);
109
- }
110
- /** @internal */
111
- #hashes(item) {
112
- const hashes = [];
113
- const str = this.#serialize(item);
114
- let hash = this.#fnv1a(str);
115
- for (let i = 0; i < this.#hashCount; i++) {
116
- hash %= this.#aSize;
117
- hashes.push(hash);
118
- hash = (hash ^ hash >>> 13) * 3266489909;
119
- hash >>>= 0;
120
- }
121
- return hashes;
108
+ return this;
122
109
  }
123
- /** @internal */
124
- #fnv1a(str) {
125
- if (typeof str !== "string") {
126
- str = String(str);
127
- }
128
- let hash = 2166136261;
129
- for (let i = 0; i < str.length; i++) {
130
- hash ^= str.charCodeAt(i);
131
- hash = hash * 16777619 >>> 0;
132
- }
133
- return hash >>> 0;
134
- }
135
- /** @internal */
136
- #setBits(hashes) {
137
- for (const hash of hashes) {
138
- const index = Math.floor(hash / 8);
139
- const bit = hash % 8;
140
- this.#bitArray[index] |= 1 << bit;
110
+ has(value) {
111
+ const hash = structuralHash(value);
112
+ const bucket = this.#map.get(hash);
113
+ if (!bucket) return false;
114
+ for (const item of bucket) {
115
+ if (equal(value, item)) return true;
141
116
  }
117
+ return false;
142
118
  }
143
- /** @internal */
144
- #checkBits(hashes) {
145
- for (const hash of hashes) {
146
- const index = Math.floor(hash / 8);
147
- const bit = hash % 8;
148
- if (!(this.#bitArray[index] & 1 << bit)) {
149
- return false;
119
+ delete(value) {
120
+ const hash = structuralHash(value);
121
+ const bucket = this.#map.get(hash);
122
+ if (!bucket) return false;
123
+ for (let i = 0; i < bucket.length; i++) {
124
+ if (equal(value, bucket[i])) {
125
+ bucket.splice(i, 1);
126
+ if (bucket.length === 0) this.#map.delete(hash);
127
+ this.#size--;
128
+ return true;
150
129
  }
151
130
  }
152
- return true;
131
+ return false;
153
132
  }
154
- /** Determines existence of an object in the BloomSet using the Bloom filter and deep equality */
155
- has(o) {
156
- const hashes = this.#hashes(o);
157
- if (!this.#checkBits(hashes)) {
158
- return false;
159
- }
160
- for (const i of this) {
161
- if (equal(o, i)) {
162
- return true;
133
+ get size() {
134
+ return this.#size;
135
+ }
136
+ clear() {
137
+ this.#map.clear();
138
+ this.#size = 0;
139
+ }
140
+ forEach(callback, thisArg) {
141
+ for (const bucket of this.#map.values()) {
142
+ for (const value of bucket) {
143
+ callback.call(thisArg, value, value, this);
163
144
  }
164
145
  }
165
- return false;
166
146
  }
167
- /** Adds a new object to the BloomSet if it is not already present.
168
- * @returns The `BloomSet` instance, allowing for chaining.
169
- */
170
- add(o) {
171
- if (!this.has(o)) {
172
- const hashes = this.#hashes(o);
173
- this.#setBits(hashes);
174
- super.add(o);
147
+ *values() {
148
+ for (const bucket of this.#map.values()) {
149
+ yield* bucket;
175
150
  }
176
- return this;
151
+ }
152
+ *[Symbol.iterator]() {
153
+ yield* this.values();
177
154
  }
178
155
  };
179
156
  export {
180
- BloomSet,
181
- UniqueSet
157
+ MapSet,
158
+ MapSet as UniqueSet,
159
+ structuralHash
182
160
  };
package/index.ts CHANGED
@@ -1,212 +1,182 @@
1
1
  import equal from "fast-deep-equal/es6/index.js";
2
2
 
3
- /** A `Set` extension that ensures uniqueness of items using deep equality checks. */
4
- export class UniqueSet<T> extends Set<T> {
5
- /*** @throws TypeError If the input is not iterable. */
6
- constructor(iterable: Iterable<T> = []) {
7
- if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
8
- throw new TypeError("UniqueSet requires an iterable");
9
- }
10
- super();
11
- for (const item of iterable) {
12
- this.add(item);
13
- }
3
+ /**
4
+ * Streaming structural hash — computes a 32-bit FNV-1a hash by traversing
5
+ * the value directly, without allocating an intermediate string.
6
+ */
7
+ const _f64 = new Float64Array(1);
8
+ const _u8 = new Uint8Array(_f64.buffer);
9
+
10
+ export const structuralHash = (value: unknown): number => {
11
+ return _shash(value, 0x811c9dc5) >>> 0;
12
+ };
13
+
14
+ const _mix = (hash: number, byte: number): number => {
15
+ return Math.imul(hash ^ byte, 0x01000193);
16
+ };
17
+
18
+ const _mixStr = (hash: number, str: string): number => {
19
+ for (let i = 0; i < str.length; i++) {
20
+ hash = Math.imul(hash ^ str.charCodeAt(i), 0x01000193);
14
21
  }
15
- /**
16
- * Determines whether an object is in the UniqueSet using deep equality.
17
- * @param o The object to check for presence in the UniqueSet.
18
- * @returns `true` if the object is found, `false` otherwise.
19
- */
20
- has(o: T): boolean {
21
- for (const i of this) {
22
- if (equal(o, i)) {
23
- return true;
24
- }
25
- }
26
- return false;
22
+ return hash;
23
+ };
24
+
25
+ const _shash = (value: unknown, hash: number): number => {
26
+ if (value === null) return _mix(hash, 0x00);
27
+ if (value === undefined) return _mix(hash, 0x01);
28
+
29
+ switch (typeof value) {
30
+ case "boolean":
31
+ return _mix(hash, value ? 0x03 : 0x02);
32
+ case "number":
33
+ hash = _mix(hash, 0x05);
34
+ if (isNaN(value)) return _mix(hash, 0x04);
35
+ if (value === 0) return _mix(hash, 0x30); // normalize 0 and -0
36
+ _f64[0] = value;
37
+ for (let i = 0; i < 8; i++) hash = _mix(hash, _u8[i]!);
38
+ return hash;
39
+ case "string":
40
+ hash = _mix(hash, 0x06);
41
+ return _mixStr(hash, value);
42
+ case "bigint":
43
+ hash = _mix(hash, 0x07);
44
+ return _mixStr(hash, value.toString());
45
+ case "function":
46
+ case "symbol":
47
+ hash = _mix(hash, 0x08);
48
+ return _mixStr(hash, String(value));
49
+ default:
50
+ break;
27
51
  }
28
- /**
29
- * Adds a new object to the UniqueSet if it is not already present.
30
- * @param o The object to add to the UniqueSet.
31
- * @returns The `UniqueSet` instance, allowing for chaining.
32
- */
33
- add(o: T): this {
34
- if (!this.has(o)) {
35
- super.add(o);
36
- }
37
- return this;
38
- }
39
- }
40
52
 
41
- /** A `Set` extension that uses a Bloom filter for fast existence checks combined with deep equality for accuracy. */
42
- export class BloomSet<T> extends Set<T> {
43
- #bitArray: Uint8Array;
44
- #aSize: number;
45
- #hashCount: number;
46
- /**
47
- * Creates a new `BloomSet` instance.
48
- * @param iterable Optional: an iterable object with which to initialize the BloomSet.
49
- * @param options Bloom filter configuration options.
50
- * @param options.size The size of the Bloom filter's bit array. Defaults to 6553577.
51
- * @param options.hashCount The number of hash functions to use. Defaults to 7.
52
- * @throws TypeError If the input is not iterable.
53
- */
54
- constructor(
55
- iterable: Iterable<T> = [],
56
- options: { size?: number; hashCount?: number } = {}
57
- ) {
58
- if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
59
- throw new TypeError("BloomSet requires an iterable");
60
- }
61
- super();
62
-
63
- if (!options || typeof options !== "object") {
64
- options = {};
65
- }
66
-
67
- options.hashCount ??= 7;
68
- options.size ??= 6553577;
53
+ if (Array.isArray(value)) {
54
+ hash = _mix(hash, 0x10);
55
+ for (let i = 0; i < value.length; i++) hash = _shash(value[i], hash);
56
+ return hash;
57
+ }
58
+ if (value instanceof Map) {
59
+ hash = _mix(hash, 0x11);
60
+ const entries = Array.from(value.entries()).sort(([a], [b]) =>
61
+ String(a).localeCompare(String(b))
62
+ );
63
+ for (const [k, v] of entries) {
64
+ hash = _shash(k, hash);
65
+ hash = _shash(v, hash);
66
+ }
67
+ return hash;
68
+ }
69
+ if (value instanceof Set) {
70
+ hash = _mix(hash, 0x12);
71
+ for (const v of value) hash = _shash(v, hash);
72
+ return hash;
73
+ }
74
+ if (value instanceof Date) {
75
+ hash = _mix(hash, 0x14);
76
+ _f64[0] = value.getTime();
77
+ for (let i = 0; i < 8; i++) hash = _mix(hash, _u8[i]!);
78
+ return hash;
79
+ }
80
+ if (value instanceof RegExp) {
81
+ hash = _mix(hash, 0x15);
82
+ return _mixStr(hash, value.toString());
83
+ }
69
84
 
70
- let { size, hashCount } = options;
85
+ // Plain object sort keys for order-independence
86
+ hash = _mix(hash, 0x13);
87
+ const keys = Object.keys(value as object).sort();
88
+ for (const key of keys) {
89
+ hash = _mixStr(hash, key);
90
+ hash = _shash((value as Record<string, unknown>)[key], hash);
91
+ }
92
+ return hash;
93
+ };
71
94
 
72
- if (typeof size !== "number" || size <= 0) {
73
- size = 6553577; // Targeting < 1 collision per 100,000 elements, ~819 KB memory, needs 7 hashes
74
- }
75
- this.#aSize = this.#findNextPrime(size);
95
+ export class MapSet<T> {
96
+ #map: Map<number, T[]>;
97
+ #size: number;
76
98
 
77
- if (typeof hashCount !== "number" || hashCount <= 0) {
78
- hashCount = 7;
99
+ constructor(iterable: Iterable<T> = []) {
100
+ if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
101
+ throw new TypeError("MapSet requires an iterable");
79
102
  }
80
- this.#hashCount = hashCount;
81
- this.#bitArray = new Uint8Array(Math.ceil(size / 8));
82
-
103
+ this.#map = new Map();
104
+ this.#size = 0;
83
105
  for (const item of iterable) {
84
106
  this.add(item);
85
107
  }
86
108
  }
87
109
 
88
- /** @internal */
89
- #findNextPrime(num: number) {
90
- if (num < 2) return 2;
91
- if (num % 2 === 0) num++; // Odd numbers only
92
-
93
- while (!this.#isPrime(num)) {
94
- num += 2; // Odd numbers only
110
+ add(value: T): this {
111
+ const hash = structuralHash(value);
112
+ const bucket = this.#map.get(hash);
113
+ if (!bucket) {
114
+ this.#map.set(hash, [value]);
115
+ this.#size++;
116
+ } else {
117
+ for (const item of bucket) {
118
+ if (equal(value, item)) return this;
119
+ }
120
+ bucket.push(value);
121
+ this.#size++;
95
122
  }
96
-
97
- return num;
123
+ return this;
98
124
  }
99
125
 
100
- /** @internal */
101
- #isPrime(num: number) {
102
- if (num < 2) return false;
103
- if (num === 2 || num === 3) return true;
104
- if (num % 2 === 0 || num % 3 === 0) return false;
105
-
106
- const sqrt = Math.floor(Math.sqrt(num));
107
- for (let i = 5; i <= sqrt; i += 6) {
108
- if (num % i === 0 || num % (i + 2) === 0) return false;
126
+ has(value: T): boolean {
127
+ const hash = structuralHash(value);
128
+ const bucket = this.#map.get(hash);
129
+ if (!bucket) return false;
130
+ for (const item of bucket) {
131
+ if (equal(value, item)) return true;
109
132
  }
110
-
111
- return true;
133
+ return false;
112
134
  }
113
135
 
114
- /** @internal */
115
- #serialize(item: T | number | object): string {
116
- if (typeof item === "number" && isNaN(item)) {
117
- return "NaN";
118
- }
119
-
120
- if (item && typeof item === "object") {
121
- const serialize = this.#serialize.bind(this);
122
- if (Array.isArray(item)) {
123
- return `[${item.map(serialize).join(",")}]`;
124
- } else {
125
- return `{${Object.entries(item)
126
- .sort(([a], [b]) => a.localeCompare(b))
127
- .map(([k, v]) => `${k}:${serialize(v)}`)
128
- .join(",")}}`;
136
+ delete(value: T): boolean {
137
+ const hash = structuralHash(value);
138
+ const bucket = this.#map.get(hash);
139
+ if (!bucket) return false;
140
+ for (let i = 0; i < bucket.length; i++) {
141
+ if (equal(value, bucket[i])) {
142
+ bucket.splice(i, 1);
143
+ if (bucket.length === 0) this.#map.delete(hash);
144
+ this.#size--;
145
+ return true;
129
146
  }
130
147
  }
131
-
132
- return String(item);
133
- }
134
-
135
- /** @internal */
136
- #hashes(item: T) {
137
- const hashes: number[] = [];
138
- const str = this.#serialize(item);
139
- let hash = this.#fnv1a(str); // Base hash
140
-
141
- // Bloom into hashCount hash values
142
- for (let i = 0; i < this.#hashCount; i++) {
143
- hash %= this.#aSize; // Ensure within bounds
144
- // Track
145
- hashes.push(hash);
146
- // Modify
147
- hash = (hash ^ (hash >>> 13)) * 0xc2b2ae35;
148
- hash >>>= 0; // Ensure unsigned 32-bit integer
149
- }
150
-
151
- return hashes;
148
+ return false;
152
149
  }
153
150
 
154
- /** @internal */
155
- #fnv1a(str: string) {
156
- if (typeof str !== "string") {
157
- str = String(str);
158
- }
159
- let hash = 2166136261; // FNV offset basis for 32-bit
160
- for (let i = 0; i < str.length; i++) {
161
- hash ^= str.charCodeAt(i);
162
- hash = (hash * 16777619) >>> 0; // Multiply by the FNV prime and ensure 32-bit unsigned
163
- }
164
- return hash >>> 0;
151
+ get size(): number {
152
+ return this.#size;
165
153
  }
166
154
 
167
- /** @internal */
168
- #setBits(hashes: number[]): void {
169
- for (const hash of hashes) {
170
- const index = Math.floor(hash / 8);
171
- const bit = hash % 8;
172
- this.#bitArray[index]! |= 1 << bit;
173
- }
155
+ clear(): void {
156
+ this.#map.clear();
157
+ this.#size = 0;
174
158
  }
175
159
 
176
- /** @internal */
177
- #checkBits(hashes: number[]): boolean {
178
- for (const hash of hashes) {
179
- const index = Math.floor(hash / 8);
180
- const bit = hash % 8;
181
- if (!(this.#bitArray[index]! & (1 << bit))) {
182
- return false;
160
+ forEach(
161
+ callback: (value: T, valueAgain: T, set: this) => void,
162
+ thisArg?: any
163
+ ): void {
164
+ for (const bucket of this.#map.values()) {
165
+ for (const value of bucket) {
166
+ callback.call(thisArg, value, value, this);
183
167
  }
184
168
  }
185
- return true;
186
169
  }
187
- /** Determines existence of an object in the BloomSet using the Bloom filter and deep equality */
188
- has(o: T): boolean {
189
- const hashes = this.#hashes(o);
190
- if (!this.#checkBits(hashes)) {
191
- return false; // Definitely not in the set
192
- }
193
- // Fall back to fast-deep-equal for false positives
194
- for (const i of this) {
195
- if (equal(o, i)) {
196
- return true;
197
- }
170
+
171
+ *values(): IterableIterator<T> {
172
+ for (const bucket of this.#map.values()) {
173
+ yield* bucket;
198
174
  }
199
- return false;
200
175
  }
201
- /** Adds a new object to the BloomSet if it is not already present.
202
- * @returns The `BloomSet` instance, allowing for chaining.
203
- */
204
- add(o: T): this {
205
- if (!this.has(o)) {
206
- const hashes = this.#hashes(o);
207
- this.#setBits(hashes);
208
- super.add(o);
209
- }
210
- return this;
176
+
177
+ *[Symbol.iterator](): IterableIterator<T> {
178
+ yield* this.values();
211
179
  }
212
180
  }
181
+
182
+ export { MapSet as UniqueSet };