@sepiariver/unique-set 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,10 +1,76 @@
1
1
  # @sepiariver/unique-set
2
2
 
3
- Extends the `has` and `add` methods on the native JavaScript `Set` object to use [fast-deep-equal](https://www.npmjs.com/package/fast-deep-equal) as the equality algorithm.
3
+ Extends the native `Set` class to deeply compare using [fast-deep-equal](https://www.npmjs.com/package/fast-deep-equal), with optional Bloom filter optimization.
4
4
 
5
- The extended methods iterate through the elements of the `UniqueSet` until equality is found. If no elements match, the entire `UniqueSet` would have been iterated to determine so. However fast `fast-deep-equal` is, calling it in a loop like this makes performance many times poorer than the native `Set`. For datasets greater than a thousand elements, there is probably a better way to achieve what you're trying to do. Otherwise, `UniqueSet` is convenient.
5
+ Supports ESM and CommonJS.
6
6
 
7
- Requires @babel/core 7+
7
+ WARNING: This version exports 2 classes instead of a single default class, breaking b/c with version 1.
8
+
9
+ The extended methods iterate through the elements of the `UniqueSet` until equality is found. If no elements match, the entire `UniqueSet` would have been iterated. However fast `fast-deep-equal` is, calling it in a loop like this makes performance many, many times worse than the native `Set`. For datasets greater than a thousand elements, there is probably a better way to achieve what you're trying to do. Otherwise, `UniqueSet` is convenient.
10
+
11
+ UPDATE: Version 2 ships with `BloomSet`, in which equality checks are optimized with a Bloom filter. This class is useful for larger datasets, performing about 3-10 times faster than `UniqueSet` for datasets greater than 1000 elements. Less than a few hundred (~400) elements, `UniqueSet` is faster. `BloomSet`'s probabilistic false positives are covered by a fallback to `fast-deep-equal`. BloomSet is still orders of magnitude slower than the native `Set`, but if deep equality is required, this is a decent option.
12
+
13
+ Experiment with configurations to find the best performance for your use case.
14
+
15
+ NOTE: The `delete` method is unmodified. In the case of duplicate objects that are equivalent but have different references, the results of `delete` operations may be unexpected.
16
+
17
+ ## Config Options
18
+
19
+ ### Constructor Signature
20
+
21
+ `new BloomSet(iterable = [], options = { size, hashCount });`
22
+
23
+ ### Options
24
+
25
+ The options object allows you to customize the behavior and performance of the BloomSet. The following properties can be configured:
26
+
27
+ #### 1. size (number)
28
+
29
+ Description: Specifies the size of the bit array used internally by the Bloom filter. This directly impacts the memory usage and false positive probability.
30
+
31
+ Default: 6,553,577 (a prime number using roughly 800 KB of memory).
32
+
33
+ Recommendations:
34
+
35
+ For datasets with ~100,000 elements, this default size provides excellent performance (compared against `UniqueSet`) with minimal (< 1) false positives.
36
+
37
+ Larger datasets may require increasing the size for lower false positive rates. Remember though, false positives are mitigated by a fallback to `fast-deep-equal`, so you may be able to squeeze more performance from a higher tolerance for false positives, depending on your dataset.
38
+
39
+ #### 2. hashCount (number)
40
+
41
+ Description: Specifies the number of hash functions used by the Bloom filter. This impacts both the false positive probability and the computational cost of adding/checking elements.
42
+
43
+ Default: 7
44
+
45
+ ### Examples
46
+
47
+ Default Configuration:
48
+
49
+ ```js
50
+ const bloomSet = new BloomSet();
51
+ bloomSet.add("example");
52
+ console.log(bloomSet.has("example")); // true
53
+ ```
54
+
55
+ Custom Configuration for Larger Datasets:
56
+
57
+ Example 28,755,000 bit array size uses roughly 3.5 MB of memory, but this configuration is robust against datasets of something like 1M elements. The practicality of using BloomSet with that many elements is low, due to the performance hit of deep equality checks.
58
+
59
+ ```js
60
+ const bloomSet = new BloomSet([], { size: 28755000, hashCount: 20 });
61
+ bloomSet.add("custom");
62
+ console.log(bloomSet.has("custom")); // true
63
+ ```
64
+
65
+ ### Considerations
66
+
67
+ - Memory Usage: The bit array uses size / 8 bytes of memory. Even at 800 KB, initializing 1250 BloomSets in the same scope would use a gigabyte of memory.
68
+ - False Positive Rate: The probability of a false positive is influenced by size, hashCount, and the number of elements. Adjust these values to balance performance and accuracy for your dataset.
69
+
70
+ #### Further Tuning
71
+
72
+ - Use a larger size for datasets exceeding 100,000 elements.
73
+ - Reduce hashCount if performance is critical and your dataset contains very few duplicates.
8
74
 
9
75
  ## Installation
10
76
 
@@ -15,7 +81,7 @@ npm install @sepiariver/unique-set
15
81
  ## Usage
16
82
 
17
83
  ```js
18
- const UniqueSet = require('@sepiariver/unique-set');
84
+ const { BloomSet, UniqueSet } = require('@sepiariver/unique-set');
19
85
 
20
86
  const data = [
21
87
  "string",
@@ -38,13 +104,21 @@ const data = [
38
104
  [1, 2, 3],
39
105
  ];
40
106
 
41
- let unique1 = new UniqueSet();
107
+ const unique1 = new UniqueSet();
42
108
  data.forEach((el) => {
43
109
  unique1.add(el);
44
110
  });
45
- let unique2 = new UniqueSet(data);
111
+ const unique2 = new UniqueSet(data);
46
112
  console.log(unique1.size); // 6 instead of 8 with Set
47
113
  console.log(unique2.size); // 6
114
+
115
+ const bloom1 = new BloomSet();
116
+ data.forEach((el) => {
117
+ bloom1.add(el);
118
+ });
119
+ const bloom2 = new BloomSet(data);
120
+ console.log(bloom1.size); // 6 instead of 8 with Set
121
+ console.log(bloom2.size); // 6
48
122
  ```
49
123
 
50
124
  ## Testing
@@ -55,4 +129,8 @@ console.log(unique2.size); // 6
55
129
 
56
130
  ## Contributing
57
131
 
58
- Submit pull requests to https://github.com/sepiariver/unique-set/pulls
132
+ Submit pull requests to [https://github.com/sepiariver/unique-set/pulls]
133
+
134
+ ## Issues
135
+
136
+ Issue reporting is encouraged: [https://github.com/sepiariver/unique-set/issues]
package/dist/index.js CHANGED
@@ -1,12 +1,27 @@
1
1
  "use strict";
2
2
 
3
- function _typeof(obj) { "@babel/helpers - typeof"; return _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function (obj) { return typeof obj; } : function (obj) { return obj && "function" == typeof Symbol && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }, _typeof(obj); }
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.UniqueSet = exports.BloomSet = void 0;
4
7
 
5
8
  var _fastDeepEqual = _interopRequireDefault(require("fast-deep-equal"));
6
9
 
7
10
  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
8
11
 
9
- function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it["return"] != null) it["return"](); } finally { if (didErr) throw err; } } }; }
12
+ function _slicedToArray(arr, i) { return _arrayWithHoles(arr) || _iterableToArrayLimit(arr, i) || _unsupportedIterableToArray(arr, i) || _nonIterableRest(); }
13
+
14
+ function _nonIterableRest() { throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); }
15
+
16
+ function _iterableToArrayLimit(arr, i) { var _i = arr == null ? null : typeof Symbol !== "undefined" && arr[Symbol.iterator] || arr["@@iterator"]; if (_i == null) return; var _arr = []; var _n = true; var _d = false; var _s, _e; try { for (_i = _i.call(arr); !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"] != null) _i["return"](); } finally { if (_d) throw _e; } } return _arr; }
17
+
18
+ function _arrayWithHoles(arr) { if (Array.isArray(arr)) return arr; }
19
+
20
+ function _typeof(obj) { "@babel/helpers - typeof"; return _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function (obj) { return typeof obj; } : function (obj) { return obj && "function" == typeof Symbol && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }, _typeof(obj); }
21
+
22
+ function _readOnlyError(name) { throw new TypeError("\"" + name + "\" is read-only"); }
23
+
24
+ function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e2) { throw _e2; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e3) { didErr = true; err = _e3; }, f: function f() { try { if (!normalCompletion && it["return"] != null) it["return"](); } finally { if (didErr) throw err; } } }; }
10
25
 
11
26
  function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
12
27
 
@@ -18,6 +33,10 @@ function _defineProperties(target, props) { for (var i = 0; i < props.length; i+
18
33
 
19
34
  function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); Object.defineProperty(Constructor, "prototype", { writable: false }); return Constructor; }
20
35
 
36
+ function _get() { if (typeof Reflect !== "undefined" && Reflect.get) { _get = Reflect.get; } else { _get = function _get(target, property, receiver) { var base = _superPropBase(target, property); if (!base) return; var desc = Object.getOwnPropertyDescriptor(base, property); if (desc.get) { return desc.get.call(arguments.length < 3 ? target : receiver); } return desc.value; }; } return _get.apply(this, arguments); }
37
+
38
+ function _superPropBase(object, property) { while (!Object.prototype.hasOwnProperty.call(object, property)) { object = _getPrototypeOf(object); if (object === null) break; } return object; }
39
+
21
40
  function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); Object.defineProperty(subClass, "prototype", { writable: false }); if (superClass) _setPrototypeOf(subClass, superClass); }
22
41
 
23
42
  function _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = _getPrototypeOf(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = _getPrototypeOf(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return _possibleConstructorReturn(this, result); }; }
@@ -44,33 +63,54 @@ var UniqueSet = /*#__PURE__*/function (_Set) {
44
63
  var _super = _createSuper(UniqueSet);
45
64
 
46
65
  function UniqueSet() {
66
+ var _this;
67
+
68
+ var iterable = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : [];
69
+
47
70
  _classCallCheck(this, UniqueSet);
48
71
 
49
- for (var _len = arguments.length, args = new Array(_len), _key = 0; _key < _len; _key++) {
50
- args[_key] = arguments[_key];
72
+ if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
73
+ throw new TypeError("UniqueSet requires an iterable");
74
+ }
75
+
76
+ _this = _super.call(this);
77
+
78
+ var _iterator = _createForOfIteratorHelper(iterable),
79
+ _step;
80
+
81
+ try {
82
+ for (_iterator.s(); !(_step = _iterator.n()).done;) {
83
+ var item = _step.value;
84
+
85
+ _this.add(item);
86
+ }
87
+ } catch (err) {
88
+ _iterator.e(err);
89
+ } finally {
90
+ _iterator.f();
51
91
  }
52
92
 
53
- return _super.call.apply(_super, [this].concat(args));
93
+ return _this;
54
94
  }
55
95
 
56
96
  _createClass(UniqueSet, [{
57
97
  key: "has",
58
98
  value: function has(o) {
59
- var _iterator = _createForOfIteratorHelper(this),
60
- _step;
99
+ var _iterator2 = _createForOfIteratorHelper(this),
100
+ _step2;
61
101
 
62
102
  try {
63
- for (_iterator.s(); !(_step = _iterator.n()).done;) {
64
- var i = _step.value;
103
+ for (_iterator2.s(); !(_step2 = _iterator2.n()).done;) {
104
+ var i = _step2.value;
65
105
 
66
106
  if ((0, _fastDeepEqual["default"])(o, i)) {
67
107
  return true;
68
108
  }
69
109
  }
70
110
  } catch (err) {
71
- _iterator.e(err);
111
+ _iterator2.e(err);
72
112
  } finally {
73
- _iterator.f();
113
+ _iterator2.f();
74
114
  }
75
115
 
76
116
  return false;
@@ -79,12 +119,271 @@ var UniqueSet = /*#__PURE__*/function (_Set) {
79
119
  key: "add",
80
120
  value: function add(o) {
81
121
  if (!this.has(o)) {
82
- Set.prototype.add.call(this, o);
122
+ _get(_getPrototypeOf(UniqueSet.prototype), "add", this).call(this, o);
83
123
  }
124
+
125
+ return this;
84
126
  }
85
127
  }]);
86
128
 
87
129
  return UniqueSet;
88
130
  }( /*#__PURE__*/_wrapNativeSuper(Set));
89
131
 
90
- module.exports = UniqueSet;
132
+ exports.UniqueSet = UniqueSet;
133
+
134
+ var BloomSet = /*#__PURE__*/function (_Set2) {
135
+ _inherits(BloomSet, _Set2);
136
+
137
+ var _super2 = _createSuper(BloomSet);
138
+
139
+ function BloomSet() {
140
+ var _this2;
141
+
142
+ var iterable = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : [];
143
+ var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
144
+
145
+ _classCallCheck(this, BloomSet);
146
+
147
+ if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
148
+ throw new TypeError("BloomSet requires an iterable");
149
+ }
150
+
151
+ _this2 = _super2.call(this);
152
+
153
+ if (!options || _typeof(options) !== "object") {
154
+ options = {};
155
+ }
156
+
157
+ var _options = options,
158
+ _options$size = _options.size,
159
+ size = _options$size === void 0 ? 6553577 : _options$size,
160
+ _options$hashCount = _options.hashCount,
161
+ hashCount = _options$hashCount === void 0 ? 7 : _options$hashCount;
162
+
163
+ if (typeof size !== "number" || size <= 0) {
164
+ 6553577, _readOnlyError("size"); // Targeting < 1 collision per 100,000 elements, ~819 KB memory, needs 7 hashes
165
+ }
166
+
167
+ _this2.aSize = _this2._findNextPrime(size);
168
+
169
+ if (typeof hashCount !== "number" || hashCount <= 0) {
170
+ 7, _readOnlyError("hashCount");
171
+ }
172
+
173
+ _this2.hashCount = hashCount;
174
+ _this2.bitArray = new Uint8Array(Math.ceil(size / 8));
175
+
176
+ var _iterator3 = _createForOfIteratorHelper(iterable),
177
+ _step3;
178
+
179
+ try {
180
+ for (_iterator3.s(); !(_step3 = _iterator3.n()).done;) {
181
+ var item = _step3.value;
182
+
183
+ _this2.add(item);
184
+ }
185
+ } catch (err) {
186
+ _iterator3.e(err);
187
+ } finally {
188
+ _iterator3.f();
189
+ }
190
+
191
+ return _this2;
192
+ }
193
+
194
+ _createClass(BloomSet, [{
195
+ key: "_findNextPrime",
196
+ value: function _findNextPrime(num) {
197
+ if (num < 2) return 2;
198
+ if (num % 2 === 0) num++; // Odd numbers only
199
+
200
+ while (!this._isPrime(num)) {
201
+ num += 2; // Odd numbers only
202
+ }
203
+
204
+ return num;
205
+ }
206
+ }, {
207
+ key: "_isPrime",
208
+ value: function _isPrime(num) {
209
+ if (num < 2) return false;
210
+ if (num === 2 || num === 3) return true;
211
+ if (num % 2 === 0 || num % 3 === 0) return false;
212
+ var sqrt = Math.floor(Math.sqrt(num));
213
+
214
+ for (var i = 5; i <= sqrt; i += 6) {
215
+ if (num % i === 0 || num % (i + 2) === 0) return false;
216
+ }
217
+
218
+ return true;
219
+ }
220
+ }, {
221
+ key: "_serialize",
222
+ value: function _serialize(item) {
223
+ if (typeof item === "number" && isNaN(item)) {
224
+ return "NaN";
225
+ }
226
+
227
+ if (item && _typeof(item) === "object") {
228
+ var serialize = this._serialize.bind(this);
229
+
230
+ if (Array.isArray(item)) {
231
+ return "[".concat(item.map(serialize).join(","), "]");
232
+ } else {
233
+ return "{".concat(Object.entries(item).sort(function (_ref, _ref2) {
234
+ var _ref3 = _slicedToArray(_ref, 1),
235
+ a = _ref3[0];
236
+
237
+ var _ref4 = _slicedToArray(_ref2, 1),
238
+ b = _ref4[0];
239
+
240
+ return a.localeCompare(b);
241
+ }).map(function (_ref5) {
242
+ var _ref6 = _slicedToArray(_ref5, 2),
243
+ k = _ref6[0],
244
+ v = _ref6[1];
245
+
246
+ return "".concat(k, ":").concat(serialize(v));
247
+ }).join(","), "}");
248
+ }
249
+ }
250
+
251
+ return String(item);
252
+ }
253
+ }, {
254
+ key: "_hashes",
255
+ value: function _hashes(item) {
256
+ var hashes = [];
257
+
258
+ var str = this._serialize(item);
259
+
260
+ var hash = this._fnv1a(str); // Base hash
261
+ // Bloom into hashCount hash values
262
+
263
+
264
+ for (var i = 0; i < this.hashCount; i++) {
265
+ hash %= this.aSize; // Ensure within bounds
266
+ // Track
267
+
268
+ hashes.push(hash); // Modify
269
+
270
+ hash = (hash ^ hash >>> 13) * 0xc2b2ae35;
271
+ hash >>>= 0; // Ensure unsigned 32-bit integer
272
+ }
273
+
274
+ return hashes;
275
+ }
276
+ }, {
277
+ key: "_fnv1a",
278
+ value: function _fnv1a(str) {
279
+ if (typeof str !== "string") {
280
+ str = String(str);
281
+ }
282
+
283
+ var hash = 2166136261; // FNV offset basis for 32-bit
284
+
285
+ for (var i = 0; i < str.length; i++) {
286
+ hash ^= str.charCodeAt(i);
287
+ hash = hash * 16777619 >>> 0; // Multiply by the FNV prime and ensure 32-bit unsigned
288
+ }
289
+
290
+ return hash >>> 0;
291
+ }
292
+ }, {
293
+ key: "_setBits",
294
+ value: function _setBits(hashes) {
295
+ var _iterator4 = _createForOfIteratorHelper(hashes),
296
+ _step4;
297
+
298
+ try {
299
+ for (_iterator4.s(); !(_step4 = _iterator4.n()).done;) {
300
+ var hash = _step4.value;
301
+ var index = Math.floor(hash / 8);
302
+ var bit = hash % 8;
303
+ this.bitArray[index] |= 1 << bit;
304
+ }
305
+ } catch (err) {
306
+ _iterator4.e(err);
307
+ } finally {
308
+ _iterator4.f();
309
+ }
310
+ }
311
+ }, {
312
+ key: "_checkBits",
313
+ value: function _checkBits(hashes) {
314
+ var _iterator5 = _createForOfIteratorHelper(hashes),
315
+ _step5;
316
+
317
+ try {
318
+ for (_iterator5.s(); !(_step5 = _iterator5.n()).done;) {
319
+ var hash = _step5.value;
320
+ var index = Math.floor(hash / 8);
321
+ var bit = hash % 8;
322
+
323
+ if (!(this.bitArray[index] & 1 << bit)) {
324
+ return false;
325
+ }
326
+ }
327
+ } catch (err) {
328
+ _iterator5.e(err);
329
+ } finally {
330
+ _iterator5.f();
331
+ }
332
+
333
+ return true;
334
+ }
335
+ }, {
336
+ key: "has",
337
+ value: function has(o) {
338
+ var hashes = this._hashes(o);
339
+
340
+ if (!this._checkBits(hashes)) {
341
+ return false; // Definitely not in the set
342
+ } // Fall back to fast-deep-equal for false positives
343
+
344
+
345
+ var _iterator6 = _createForOfIteratorHelper(this),
346
+ _step6;
347
+
348
+ try {
349
+ for (_iterator6.s(); !(_step6 = _iterator6.n()).done;) {
350
+ var i = _step6.value;
351
+
352
+ if ((0, _fastDeepEqual["default"])(o, i)) {
353
+ return true;
354
+ }
355
+ }
356
+ } catch (err) {
357
+ _iterator6.e(err);
358
+ } finally {
359
+ _iterator6.f();
360
+ }
361
+
362
+ return false;
363
+ }
364
+ }, {
365
+ key: "add",
366
+ value: function add(o) {
367
+ if (!this.has(o)) {
368
+ var hashes = this._hashes(o);
369
+
370
+ this._setBits(hashes);
371
+
372
+ _get(_getPrototypeOf(BloomSet.prototype), "add", this).call(this, o);
373
+ }
374
+
375
+ return this;
376
+ }
377
+ }]);
378
+
379
+ return BloomSet;
380
+ }( /*#__PURE__*/_wrapNativeSuper(Set));
381
+
382
+ exports.BloomSet = BloomSet;
383
+
384
+ if (typeof module !== "undefined" && module.exports) {
385
+ module.exports = {
386
+ UniqueSet: UniqueSet,
387
+ BloomSet: BloomSet
388
+ };
389
+ }
package/index.d.ts ADDED
@@ -0,0 +1,60 @@
1
+ declare module "unique-set" {
2
+ /**
3
+ * A `Set` extension that ensures uniqueness of items using deep equality checks.
4
+ */
5
+ export class UniqueSet<T> extends Set<T> {
6
+ /**
7
+ * Creates a new `UniqueSet` instance.
8
+ * @param iterable Optional: an iterable with which to initialize the UniqueSet.
9
+ * @throws TypeError If the input is not iterable.
10
+ */
11
+ constructor(iterable?: Iterable<T>);
12
+
13
+ /**
14
+ * Determines whether an object is in the UniqueSet using deep equality.
15
+ * @param o The object to check for presence in the UniqueSet.
16
+ * @returns `true` if the object is found, `false` otherwise.
17
+ */
18
+ has(o: T): boolean;
19
+
20
+ /**
21
+ * Adds a new object to the UniqueSet if it is not already present.
22
+ * @param o The object to add to the UniqueSet.
23
+ * @returns The `UniqueSet` instance, allowing for chaining.
24
+ */
25
+ add(o: T): this;
26
+ }
27
+
28
+ /**
29
+ * A `Set` extension that uses a Bloom filter for fast existence checks combined with deep equality for accuracy.
30
+ */
31
+ export class BloomSet<T> extends Set<T> {
32
+ /**
33
+ * Creates a new `BloomSet` instance.
34
+ * @param iterable Optional: an iterable object with which to initialize the BloomSet.
35
+ * @param options Bloom filter configuration options.
36
+ * @param options.size The size of the Bloom filter's bit array. Defaults to 6553577.
37
+ * @param options.hashCount The number of hash functions to use. Defaults to 7.
38
+ * @throws TypeError If the input is not iterable.
39
+ */
40
+ constructor(
41
+ iterable?: Iterable<T>,
42
+ options?: { size?: number; hashCount?: number }
43
+ );
44
+
45
+ /**
46
+ * Determines existence of an object in the BloomSet using the Bloom filter and deep equality.
47
+ * @param o The object to check for presence in the BloomSet.
48
+ * @returns `true` if the object is found, `false` otherwise.
49
+ */
50
+ has(o: T): boolean;
51
+
52
+ /**
53
+ * Adds a new object to the BloomSet if it is not already present.
54
+ * @param o The object to add to the BloomSet.
55
+ * @returns The `BloomSet` instance, allowing for chaining.
56
+ */
57
+ add(o: T): this;
58
+ }
59
+
60
+ }
package/package.json CHANGED
@@ -1,8 +1,14 @@
1
1
  {
2
2
  "name": "@sepiariver/unique-set",
3
- "version": "1.1.0",
4
- "description": "Extends the has and add methods on the native JavaScript Set object to deeply compare using fast-deep-equal",
3
+ "version": "2.0.0",
4
+ "description": "Extends the native Set class to deeply compare using fast-deep-equal, with optional Bloom filter optimization. This version exports 2 classes instead of a default, breaking b/c with version 1.",
5
5
  "main": "index.js",
6
+ "module": "dist/index.js",
7
+ "types": "index.d.ts",
8
+ "exports": {
9
+ "require": "./index.js",
10
+ "import": "./dist/index.js"
11
+ },
6
12
  "scripts": {
7
13
  "test": "jest",
8
14
  "build": "babel src -d dist"
@@ -14,7 +20,7 @@
14
20
  "keywords": [
15
21
  "Set",
16
22
  "unique",
17
- "object",
23
+ "bloom",
18
24
  "deep",
19
25
  "compare",
20
26
  "equal"
@@ -29,8 +35,8 @@
29
35
  "fast-deep-equal": "^3.1.3"
30
36
  },
31
37
  "devDependencies": {
32
- "@babel/cli": "^7.17.0",
33
- "@babel/core": "^7.17.2",
38
+ "@babel/cli": "^7.17.6",
39
+ "@babel/core": "^7.17.8",
34
40
  "@babel/preset-env": "^7.16.11",
35
41
  "babel-jest": "^27.5.1",
36
42
  "jest": "^27.5.1"
package/src/index.js CHANGED
@@ -1,9 +1,16 @@
1
1
  import equal from "fast-deep-equal";
2
2
 
3
- class UniqueSet extends Set {
4
- constructor(...args) {
5
- super(...args);
3
+ export class UniqueSet extends Set {
4
+ constructor(iterable = []) {
5
+ if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
6
+ throw new TypeError("UniqueSet requires an iterable");
7
+ }
8
+ super();
9
+ for (const item of iterable) {
10
+ this.add(item);
11
+ }
6
12
  }
13
+
7
14
  has(o) {
8
15
  for (const i of this) {
9
16
  if (equal(o, i)) {
@@ -12,11 +19,165 @@ class UniqueSet extends Set {
12
19
  }
13
20
  return false;
14
21
  }
22
+
15
23
  add(o) {
16
24
  if (!this.has(o)) {
17
- Set.prototype.add.call(this, o);
25
+ super.add(o);
18
26
  }
27
+ return this;
19
28
  }
20
29
  }
21
30
 
22
- module.exports = UniqueSet;
31
+ export class BloomSet extends Set {
32
+ constructor(iterable = [], options = {}) {
33
+ if (!Array.isArray(iterable) && !iterable[Symbol.iterator]) {
34
+ throw new TypeError("BloomSet requires an iterable");
35
+ }
36
+ super();
37
+
38
+ if (!options || typeof options !== "object") {
39
+ options = {};
40
+ }
41
+
42
+ const { size = 6553577, hashCount = 7 } = options;
43
+
44
+ if (typeof size !== "number" || size <= 0) {
45
+ size = 6553577; // Targeting < 1 collision per 100,000 elements, ~819 KB memory, needs 7 hashes
46
+ }
47
+ this.aSize = this._findNextPrime(size);
48
+
49
+ if (typeof hashCount !== "number" || hashCount <= 0) {
50
+ hashCount = 7;
51
+ }
52
+ this.hashCount = hashCount;
53
+
54
+ this.bitArray = new Uint8Array(Math.ceil(size / 8));
55
+
56
+ for (const item of iterable) {
57
+ this.add(item);
58
+ }
59
+ }
60
+
61
+ _findNextPrime(num) {
62
+ if (num < 2) return 2;
63
+ if (num % 2 === 0) num++; // Odd numbers only
64
+
65
+ while (!this._isPrime(num)) {
66
+ num += 2; // Odd numbers only
67
+ }
68
+
69
+ return num;
70
+ }
71
+
72
+ _isPrime(num) {
73
+ if (num < 2) return false;
74
+ if (num === 2 || num === 3) return true;
75
+ if (num % 2 === 0 || num % 3 === 0) return false;
76
+
77
+ const sqrt = Math.floor(Math.sqrt(num));
78
+ for (let i = 5; i <= sqrt; i += 6) {
79
+ if (num % i === 0 || num % (i + 2) === 0) return false;
80
+ }
81
+
82
+ return true;
83
+ }
84
+
85
+ _serialize(item) {
86
+ if (typeof item === "number" && isNaN(item)) {
87
+ return "NaN";
88
+ }
89
+
90
+ if (item && typeof item === "object") {
91
+ const serialize = this._serialize.bind(this);
92
+ if (Array.isArray(item)) {
93
+ return `[${item.map(serialize).join(",")}]`;
94
+ } else {
95
+ return `{${Object.entries(item)
96
+ .sort(([a], [b]) => a.localeCompare(b))
97
+ .map(([k, v]) => `${k}:${serialize(v)}`)
98
+ .join(",")}}`;
99
+ }
100
+ }
101
+
102
+ return String(item);
103
+ }
104
+
105
+ _hashes(item) {
106
+ const hashes = [];
107
+ const str = this._serialize(item);
108
+ let hash = this._fnv1a(str); // Base hash
109
+
110
+ // Bloom into hashCount hash values
111
+ for (let i = 0; i < this.hashCount; i++) {
112
+ hash %= this.aSize; // Ensure within bounds
113
+ // Track
114
+ hashes.push(hash);
115
+ // Modify
116
+ hash = (hash ^ (hash >>> 13)) * 0xc2b2ae35;
117
+ hash >>>= 0; // Ensure unsigned 32-bit integer
118
+ }
119
+
120
+ return hashes;
121
+ }
122
+
123
+ _fnv1a(str) {
124
+ if (typeof str !== "string") {
125
+ str = String(str);
126
+ }
127
+ let hash = 2166136261; // FNV offset basis for 32-bit
128
+ for (let i = 0; i < str.length; i++) {
129
+ hash ^= str.charCodeAt(i);
130
+ hash = (hash * 16777619) >>> 0; // Multiply by the FNV prime and ensure 32-bit unsigned
131
+ }
132
+ return hash >>> 0;
133
+ }
134
+
135
+ _setBits(hashes) {
136
+ for (const hash of hashes) {
137
+ const index = Math.floor(hash / 8);
138
+ const bit = hash % 8;
139
+ this.bitArray[index] |= 1 << bit;
140
+ }
141
+ }
142
+
143
+ _checkBits(hashes) {
144
+ for (const hash of hashes) {
145
+ const index = Math.floor(hash / 8);
146
+ const bit = hash % 8;
147
+ if (!(this.bitArray[index] & (1 << bit))) {
148
+ return false;
149
+ }
150
+ }
151
+ return true;
152
+ }
153
+
154
+ has(o) {
155
+ const hashes = this._hashes(o);
156
+ if (!this._checkBits(hashes)) {
157
+ return false; // Definitely not in the set
158
+ }
159
+ // Fall back to fast-deep-equal for false positives
160
+ for (const i of this) {
161
+ if (equal(o, i)) {
162
+ return true;
163
+ }
164
+ }
165
+ return false;
166
+ }
167
+
168
+ add(o) {
169
+ if (!this.has(o)) {
170
+ const hashes = this._hashes(o);
171
+ this._setBits(hashes);
172
+ super.add(o);
173
+ }
174
+ return this;
175
+ }
176
+ }
177
+
178
+ if (typeof module !== "undefined" && module.exports) {
179
+ module.exports = {
180
+ UniqueSet,
181
+ BloomSet,
182
+ };
183
+ }