@sepiariver/unique-set 2.0.3 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,178 +1,160 @@
1
1
  // index.ts
2
2
  import equal from "fast-deep-equal/es6/index.js";
3
- var serialize = (item) => {
4
- if (typeof item === "number" && isNaN(item)) {
5
- return "NaN";
6
- }
7
- if (item && typeof item === "object") {
8
- if (Array.isArray(item)) {
9
- return `[${item.map(serialize).join("")}]`;
10
- } else {
11
- return `{${Object.entries(item).sort(([a], [b]) => a.localeCompare(b)).map(([k, v]) => `${k}:${serialize(v)}`).join("")}}`;
12
- }
13
- }
14
- return String(item);
3
+ var _f64 = new Float64Array(1);
4
+ var _u8 = new Uint8Array(_f64.buffer);
5
+ var structuralHash = (value) => {
6
+ return _shash(value, 2166136261) >>> 0;
15
7
  };
16
- var fnv1a = (str) => {
17
- if (typeof str !== "string") {
18
- str = String(str);
19
- }
20
- let hash = 2166136261;
8
+ var _mix = (hash, byte) => {
9
+ return Math.imul(hash ^ byte, 16777619);
10
+ };
11
+ var _mixStr = (hash, str) => {
21
12
  for (let i = 0; i < str.length; i++) {
22
- hash ^= str.charCodeAt(i);
23
- hash = hash * 16777619 >>> 0;
13
+ hash = Math.imul(hash ^ str.charCodeAt(i), 16777619);
24
14
  }
25
- return hash >>> 0;
15
+ return hash;
26
16
  };
27
- var findNextPrime = (num) => {
28
- if (num < 2) return 2;
29
- if ((num & 1) === 0) num++;
30
- while (!isPrime(num)) {
31
- num += 2;
17
+ var _shash = (value, hash) => {
18
+ if (value === null) return _mix(hash, 0);
19
+ if (value === void 0) return _mix(hash, 1);
20
+ switch (typeof value) {
21
+ case "boolean":
22
+ return _mix(hash, value ? 3 : 2);
23
+ case "number":
24
+ hash = _mix(hash, 5);
25
+ if (isNaN(value)) return _mix(hash, 4);
26
+ if (value === 0) return _mix(hash, 48);
27
+ _f64[0] = value;
28
+ for (let i = 0; i < 8; i++) hash = _mix(hash, _u8[i]);
29
+ return hash;
30
+ case "string":
31
+ hash = _mix(hash, 6);
32
+ return _mixStr(hash, value);
33
+ case "bigint":
34
+ hash = _mix(hash, 7);
35
+ return _mixStr(hash, value.toString());
36
+ case "function":
37
+ case "symbol":
38
+ hash = _mix(hash, 8);
39
+ return _mixStr(hash, String(value));
40
+ default:
41
+ break;
32
42
  }
33
- return num;
34
- };
35
- var isPrime = (num) => {
36
- if (num < 2) return false;
37
- if (num === 2 || num === 3) return true;
38
- if ((num & 1) === 0) return false;
39
- if (num % 3 === 0) return false;
40
- const sqrt = Math.sqrt(num);
41
- for (let i = 5; i <= sqrt; i += 6) {
42
- if (num % i === 0 || num % (i + 2) === 0) return false;
43
- }
44
- return true;
43
+ if (Array.isArray(value)) {
44
+ hash = _mix(hash, 16);
45
+ for (let i = 0; i < value.length; i++) hash = _shash(value[i], hash);
46
+ return hash;
47
+ }
48
+ if (value instanceof Map) {
49
+ hash = _mix(hash, 17);
50
+ const entries = Array.from(value.entries()).sort(
51
+ ([a], [b]) => String(a).localeCompare(String(b))
52
+ );
53
+ for (const [k, v] of entries) {
54
+ hash = _shash(k, hash);
55
+ hash = _shash(v, hash);
56
+ }
57
+ return hash;
58
+ }
59
+ if (value instanceof Set) {
60
+ hash = _mix(hash, 18);
61
+ for (const v of value) hash = _shash(v, hash);
62
+ return hash;
63
+ }
64
+ if (value instanceof Date) {
65
+ hash = _mix(hash, 20);
66
+ _f64[0] = value.getTime();
67
+ for (let i = 0; i < 8; i++) hash = _mix(hash, _u8[i]);
68
+ return hash;
69
+ }
70
+ if (value instanceof RegExp) {
71
+ hash = _mix(hash, 21);
72
+ return _mixStr(hash, value.toString());
73
+ }
74
+ hash = _mix(hash, 19);
75
+ const keys = Object.keys(value).sort();
76
+ for (const key of keys) {
77
+ hash = _mixStr(hash, key);
78
+ hash = _shash(value[key], hash);
79
+ }
80
+ return hash;
45
81
  };
46
- var UniqueSet = class extends Set {
47
- /*** @throws TypeError If the input is not iterable. */
82
+ var MapSet = class {
83
+ #map;
84
+ #size;
48
85
  constructor(iterable = []) {
49
86
  if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
50
- throw new TypeError("UniqueSet requires an iterable");
87
+ throw new TypeError("MapSet requires an iterable");
51
88
  }
52
- super();
89
+ this.#map = /* @__PURE__ */ new Map();
90
+ this.#size = 0;
53
91
  for (const item of iterable) {
54
92
  this.add(item);
55
93
  }
56
94
  }
57
- /**
58
- * Determines whether an object is in the UniqueSet using deep equality.
59
- * @param o The object to check for presence in the UniqueSet.
60
- * @returns `true` if the object is found, `false` otherwise.
61
- */
62
- has(o) {
63
- for (const i of this) {
64
- if (equal(o, i)) {
65
- return true;
95
+ add(value) {
96
+ const hash = structuralHash(value);
97
+ const bucket = this.#map.get(hash);
98
+ if (!bucket) {
99
+ this.#map.set(hash, [value]);
100
+ this.#size++;
101
+ } else {
102
+ for (const item of bucket) {
103
+ if (equal(value, item)) return this;
66
104
  }
67
- }
68
- return false;
69
- }
70
- /**
71
- * Adds a new object to the UniqueSet if it is not already present.
72
- * @param o The object to add to the UniqueSet.
73
- * @returns The `UniqueSet` instance, allowing for chaining.
74
- */
75
- add(o) {
76
- if (!this.has(o)) {
77
- super.add(o);
105
+ bucket.push(value);
106
+ this.#size++;
78
107
  }
79
108
  return this;
80
109
  }
81
- };
82
- var BloomSet = class extends Set {
83
- #bitArray;
84
- #aSize;
85
- #hashCount;
86
- /**
87
- * Creates a new `BloomSet` instance.
88
- * @param iterable Optional: an iterable object with which to initialize the BloomSet.
89
- * @param options Bloom filter configuration options.
90
- * @param options.size The size of the Bloom filter's bit array. Defaults to 6553577.
91
- * @param options.hashCount The number of hash functions to use. Defaults to 7.
92
- * @throws TypeError If the input is not iterable.
93
- */
94
- constructor(iterable = [], options = {}) {
95
- if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
96
- throw new TypeError("BloomSet requires an iterable");
97
- }
98
- super();
99
- if (!options || typeof options !== "object") {
100
- options = {};
101
- }
102
- options.hashCount ??= 7;
103
- options.size ??= 6553577;
104
- let { size, hashCount } = options;
105
- if (typeof size !== "number" || size <= 0) {
106
- size = 6553577;
107
- }
108
- this.#aSize = findNextPrime(size);
109
- if (typeof hashCount !== "number" || hashCount <= 0) {
110
- hashCount = 7;
111
- }
112
- this.#hashCount = hashCount;
113
- this.#bitArray = new Uint8Array(Math.ceil(size / 8));
114
- for (const item of iterable) {
115
- this.add(item);
116
- }
117
- }
118
- /** @internal */
119
- #hashes(item) {
120
- const hashes = [];
121
- const str = serialize(item);
122
- let hash = fnv1a(str);
123
- for (let i = 0; i < this.#hashCount; i++) {
124
- hash %= this.#aSize;
125
- hashes.push(hash);
126
- hash = (hash ^ hash >>> 13) * 3266489909;
127
- hash >>>= 0;
128
- }
129
- return hashes;
130
- }
131
- /** @internal */
132
- #setBits(hashes) {
133
- for (const hash of hashes) {
134
- const index = Math.floor(hash / 8);
135
- const bit = hash % 8;
136
- this.#bitArray[index] |= 1 << bit;
110
+ has(value) {
111
+ const hash = structuralHash(value);
112
+ const bucket = this.#map.get(hash);
113
+ if (!bucket) return false;
114
+ for (const item of bucket) {
115
+ if (equal(value, item)) return true;
137
116
  }
117
+ return false;
138
118
  }
139
- /** @internal */
140
- #checkBits(hashes) {
141
- for (const hash of hashes) {
142
- const index = Math.floor(hash / 8);
143
- const bit = hash % 8;
144
- if (!(this.#bitArray[index] & 1 << bit)) {
145
- return false;
119
+ delete(value) {
120
+ const hash = structuralHash(value);
121
+ const bucket = this.#map.get(hash);
122
+ if (!bucket) return false;
123
+ for (let i = 0; i < bucket.length; i++) {
124
+ if (equal(value, bucket[i])) {
125
+ bucket.splice(i, 1);
126
+ if (bucket.length === 0) this.#map.delete(hash);
127
+ this.#size--;
128
+ return true;
146
129
  }
147
130
  }
148
- return true;
131
+ return false;
149
132
  }
150
- /** Determines existence of an object in the BloomSet using the Bloom filter and deep equality */
151
- has(o) {
152
- const hashes = this.#hashes(o);
153
- if (!this.#checkBits(hashes)) {
154
- return false;
155
- }
156
- for (const i of this) {
157
- if (equal(o, i)) {
158
- return true;
133
+ get size() {
134
+ return this.#size;
135
+ }
136
+ clear() {
137
+ this.#map.clear();
138
+ this.#size = 0;
139
+ }
140
+ forEach(callback, thisArg) {
141
+ for (const bucket of this.#map.values()) {
142
+ for (const value of bucket) {
143
+ callback.call(thisArg, value, value, this);
159
144
  }
160
145
  }
161
- return false;
162
146
  }
163
- /** Adds a new object to the BloomSet if it is not already present.
164
- * @returns The `BloomSet` instance, allowing for chaining.
165
- */
166
- add(o) {
167
- if (!this.has(o)) {
168
- const hashes = this.#hashes(o);
169
- this.#setBits(hashes);
170
- super.add(o);
147
+ *values() {
148
+ for (const bucket of this.#map.values()) {
149
+ yield* bucket;
171
150
  }
172
- return this;
151
+ }
152
+ *[Symbol.iterator]() {
153
+ yield* this.values();
173
154
  }
174
155
  };
175
156
  export {
176
- BloomSet,
177
- UniqueSet
157
+ MapSet,
158
+ MapSet as UniqueSet,
159
+ structuralHash
178
160
  };
package/index.ts CHANGED
@@ -1,210 +1,182 @@
1
1
  import equal from "fast-deep-equal/es6/index.js";
2
2
 
3
- /** Utility functions */
4
-
5
- const serialize = (item: any | number | object): string => {
6
- if (typeof item === "number" && isNaN(item)) {
7
- return "NaN";
8
- }
9
-
10
- if (item && typeof item === "object") {
11
- if (Array.isArray(item)) {
12
- return `[${item.map(serialize).join("")}]`;
13
- } else {
14
- return `{${Object.entries(item)
15
- .sort(([a], [b]) => a.localeCompare(b))
16
- .map(([k, v]) => `${k}:${serialize(v)}`)
17
- .join("")}}`;
18
- }
19
- }
3
+ /**
4
+ * Streaming structural hash — computes a 32-bit FNV-1a hash by traversing
5
+ * the value directly, without allocating an intermediate string.
6
+ */
7
+ const _f64 = new Float64Array(1);
8
+ const _u8 = new Uint8Array(_f64.buffer);
9
+
10
+ export const structuralHash = (value: unknown): number => {
11
+ return _shash(value, 0x811c9dc5) >>> 0;
12
+ };
20
13
 
21
- return String(item);
14
+ const _mix = (hash: number, byte: number): number => {
15
+ return Math.imul(hash ^ byte, 0x01000193);
22
16
  };
23
17
 
24
- const fnv1a = (str: string) => {
25
- if (typeof str !== "string") {
26
- str = String(str);
27
- }
28
- let hash = 2166136261; // FNV offset basis for 32-bit
18
+ const _mixStr = (hash: number, str: string): number => {
29
19
  for (let i = 0; i < str.length; i++) {
30
- hash ^= str.charCodeAt(i);
31
- hash = (hash * 16777619) >>> 0; // Multiply by the FNV prime and ensure 32-bit unsigned
20
+ hash = Math.imul(hash ^ str.charCodeAt(i), 0x01000193);
32
21
  }
33
- return hash >>> 0;
22
+ return hash;
34
23
  };
35
24
 
36
- const findNextPrime = (num: number) => {
37
- if (num < 2) return 2;
38
- if ((num & 1) === 0) num++; // Odd numbers only
39
-
40
- while (!isPrime(num)) {
41
- num += 2; // Odd numbers only
25
+ const _shash = (value: unknown, hash: number): number => {
26
+ if (value === null) return _mix(hash, 0x00);
27
+ if (value === undefined) return _mix(hash, 0x01);
28
+
29
+ switch (typeof value) {
30
+ case "boolean":
31
+ return _mix(hash, value ? 0x03 : 0x02);
32
+ case "number":
33
+ hash = _mix(hash, 0x05);
34
+ if (isNaN(value)) return _mix(hash, 0x04);
35
+ if (value === 0) return _mix(hash, 0x30); // normalize 0 and -0
36
+ _f64[0] = value;
37
+ for (let i = 0; i < 8; i++) hash = _mix(hash, _u8[i]!);
38
+ return hash;
39
+ case "string":
40
+ hash = _mix(hash, 0x06);
41
+ return _mixStr(hash, value);
42
+ case "bigint":
43
+ hash = _mix(hash, 0x07);
44
+ return _mixStr(hash, value.toString());
45
+ case "function":
46
+ case "symbol":
47
+ hash = _mix(hash, 0x08);
48
+ return _mixStr(hash, String(value));
49
+ default:
50
+ break;
42
51
  }
43
52
 
44
- return num;
45
- };
46
-
47
- const isPrime = (num: number): boolean => {
48
- if (num < 2) return false;
49
- if (num === 2 || num === 3) return true;
50
- if ((num & 1) === 0) return false;
51
- if (num % 3 === 0) return false;
52
-
53
- const sqrt = Math.sqrt(num);
54
- for (let i = 5; i <= sqrt; i += 6) {
55
- if (num % i === 0 || num % (i + 2) === 0) return false;
53
+ if (Array.isArray(value)) {
54
+ hash = _mix(hash, 0x10);
55
+ for (let i = 0; i < value.length; i++) hash = _shash(value[i], hash);
56
+ return hash;
57
+ }
58
+ if (value instanceof Map) {
59
+ hash = _mix(hash, 0x11);
60
+ const entries = Array.from(value.entries()).sort(([a], [b]) =>
61
+ String(a).localeCompare(String(b))
62
+ );
63
+ for (const [k, v] of entries) {
64
+ hash = _shash(k, hash);
65
+ hash = _shash(v, hash);
66
+ }
67
+ return hash;
68
+ }
69
+ if (value instanceof Set) {
70
+ hash = _mix(hash, 0x12);
71
+ for (const v of value) hash = _shash(v, hash);
72
+ return hash;
73
+ }
74
+ if (value instanceof Date) {
75
+ hash = _mix(hash, 0x14);
76
+ _f64[0] = value.getTime();
77
+ for (let i = 0; i < 8; i++) hash = _mix(hash, _u8[i]!);
78
+ return hash;
79
+ }
80
+ if (value instanceof RegExp) {
81
+ hash = _mix(hash, 0x15);
82
+ return _mixStr(hash, value.toString());
56
83
  }
57
84
 
58
- return true;
85
+ // Plain object — sort keys for order-independence
86
+ hash = _mix(hash, 0x13);
87
+ const keys = Object.keys(value as object).sort();
88
+ for (const key of keys) {
89
+ hash = _mixStr(hash, key);
90
+ hash = _shash((value as Record<string, unknown>)[key], hash);
91
+ }
92
+ return hash;
59
93
  };
60
94
 
61
- /** A `Set` extension that ensures uniqueness of items using deep equality checks. */
62
- export class UniqueSet<T> extends Set<T> {
63
- /*** @throws TypeError If the input is not iterable. */
95
+ export class MapSet<T> {
96
+ #map: Map<number, T[]>;
97
+ #size: number;
98
+
64
99
  constructor(iterable: Iterable<T> = []) {
65
100
  if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
66
- throw new TypeError("UniqueSet requires an iterable");
101
+ throw new TypeError("MapSet requires an iterable");
67
102
  }
68
- super();
103
+ this.#map = new Map();
104
+ this.#size = 0;
69
105
  for (const item of iterable) {
70
106
  this.add(item);
71
107
  }
72
108
  }
73
- /**
74
- * Determines whether an object is in the UniqueSet using deep equality.
75
- * @param o The object to check for presence in the UniqueSet.
76
- * @returns `true` if the object is found, `false` otherwise.
77
- */
78
- has(o: T): boolean {
79
- for (const i of this) {
80
- if (equal(o, i)) {
81
- return true;
109
+
110
+ add(value: T): this {
111
+ const hash = structuralHash(value);
112
+ const bucket = this.#map.get(hash);
113
+ if (!bucket) {
114
+ this.#map.set(hash, [value]);
115
+ this.#size++;
116
+ } else {
117
+ for (const item of bucket) {
118
+ if (equal(value, item)) return this;
82
119
  }
83
- }
84
- return false;
85
- }
86
- /**
87
- * Adds a new object to the UniqueSet if it is not already present.
88
- * @param o The object to add to the UniqueSet.
89
- * @returns The `UniqueSet` instance, allowing for chaining.
90
- */
91
- add(o: T): this {
92
- if (!this.has(o)) {
93
- super.add(o);
120
+ bucket.push(value);
121
+ this.#size++;
94
122
  }
95
123
  return this;
96
124
  }
97
- }
98
125
 
99
- /** A `Set` extension that uses a Bloom filter for fast existence checks combined with deep equality for accuracy. */
100
- export class BloomSet<T> extends Set<T> {
101
- #bitArray: Uint8Array;
102
- #aSize: number;
103
- #hashCount: number;
104
- /**
105
- * Creates a new `BloomSet` instance.
106
- * @param iterable Optional: an iterable object with which to initialize the BloomSet.
107
- * @param options Bloom filter configuration options.
108
- * @param options.size The size of the Bloom filter's bit array. Defaults to 6553577.
109
- * @param options.hashCount The number of hash functions to use. Defaults to 7.
110
- * @throws TypeError If the input is not iterable.
111
- */
112
- constructor(
113
- iterable: Iterable<T> = [],
114
- options: { size?: number; hashCount?: number } = {}
115
- ) {
116
- if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
117
- throw new TypeError("BloomSet requires an iterable");
118
- }
119
- super();
120
-
121
- if (!options || typeof options !== "object") {
122
- options = {};
123
- }
124
-
125
- options.hashCount ??= 7;
126
- options.size ??= 6553577;
127
-
128
- let { size, hashCount } = options;
129
-
130
- if (typeof size !== "number" || size <= 0) {
131
- size = 6553577; // Targeting < 1 collision per 100,000 elements, ~819 KB memory, needs 7 hashes
132
- }
133
- this.#aSize = findNextPrime(size);
134
-
135
- if (typeof hashCount !== "number" || hashCount <= 0) {
136
- hashCount = 7;
137
- }
138
- this.#hashCount = hashCount;
139
- this.#bitArray = new Uint8Array(Math.ceil(size / 8));
140
-
141
- for (const item of iterable) {
142
- this.add(item);
126
+ has(value: T): boolean {
127
+ const hash = structuralHash(value);
128
+ const bucket = this.#map.get(hash);
129
+ if (!bucket) return false;
130
+ for (const item of bucket) {
131
+ if (equal(value, item)) return true;
143
132
  }
133
+ return false;
144
134
  }
145
135
 
146
- /** @internal */
147
- #hashes(item: T) {
148
- const hashes: number[] = [];
149
- const str = serialize(item);
150
- let hash = fnv1a(str); // Base hash
151
-
152
- // Bloom into hashCount hash values
153
- for (let i = 0; i < this.#hashCount; i++) {
154
- hash %= this.#aSize; // Ensure within bounds
155
- // Track
156
- hashes.push(hash);
157
- // Modify
158
- hash = (hash ^ (hash >>> 13)) * 0xc2b2ae35;
159
- hash >>>= 0; // Ensure unsigned 32-bit integer
136
+ delete(value: T): boolean {
137
+ const hash = structuralHash(value);
138
+ const bucket = this.#map.get(hash);
139
+ if (!bucket) return false;
140
+ for (let i = 0; i < bucket.length; i++) {
141
+ if (equal(value, bucket[i])) {
142
+ bucket.splice(i, 1);
143
+ if (bucket.length === 0) this.#map.delete(hash);
144
+ this.#size--;
145
+ return true;
146
+ }
160
147
  }
148
+ return false;
149
+ }
161
150
 
162
- return hashes;
151
+ get size(): number {
152
+ return this.#size;
163
153
  }
164
154
 
165
- /** @internal */
166
- #setBits(hashes: number[]): void {
167
- for (const hash of hashes) {
168
- const index = Math.floor(hash / 8);
169
- const bit = hash % 8;
170
- this.#bitArray[index]! |= 1 << bit;
171
- }
155
+ clear(): void {
156
+ this.#map.clear();
157
+ this.#size = 0;
172
158
  }
173
159
 
174
- /** @internal */
175
- #checkBits(hashes: number[]): boolean {
176
- for (const hash of hashes) {
177
- const index = Math.floor(hash / 8);
178
- const bit = hash % 8;
179
- if (!(this.#bitArray[index]! & (1 << bit))) {
180
- return false;
160
+ forEach(
161
+ callback: (value: T, valueAgain: T, set: this) => void,
162
+ thisArg?: any
163
+ ): void {
164
+ for (const bucket of this.#map.values()) {
165
+ for (const value of bucket) {
166
+ callback.call(thisArg, value, value, this);
181
167
  }
182
168
  }
183
- return true;
184
169
  }
185
- /** Determines existence of an object in the BloomSet using the Bloom filter and deep equality */
186
- has(o: T): boolean {
187
- const hashes = this.#hashes(o);
188
- if (!this.#checkBits(hashes)) {
189
- return false; // Definitely not in the set
190
- }
191
- // Fall back to fast-deep-equal for false positives
192
- for (const i of this) {
193
- if (equal(o, i)) {
194
- return true;
195
- }
170
+
171
+ *values(): IterableIterator<T> {
172
+ for (const bucket of this.#map.values()) {
173
+ yield* bucket;
196
174
  }
197
- return false;
198
175
  }
199
- /** Adds a new object to the BloomSet if it is not already present.
200
- * @returns The `BloomSet` instance, allowing for chaining.
201
- */
202
- add(o: T): this {
203
- if (!this.has(o)) {
204
- const hashes = this.#hashes(o);
205
- this.#setBits(hashes);
206
- super.add(o);
207
- }
208
- return this;
176
+
177
+ *[Symbol.iterator](): IterableIterator<T> {
178
+ yield* this.values();
209
179
  }
210
180
  }
181
+
182
+ export { MapSet as UniqueSet };