@sepiariver/unique-set 3.1.3 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/PERF.md +26 -24
- package/README.md +11 -4
- package/dist/index.js +4 -14
- package/dist/index.mjs +1 -1
- package/index.ts +1 -1
- package/package.json +4 -3
package/PERF.md
CHANGED
|
@@ -2,42 +2,44 @@
|
|
|
2
2
|
|
|
3
3
|
Benchmarks run with `npm run bench` on Node.js v20.18.1, Apple M2 Pro.
|
|
4
4
|
|
|
5
|
+
Comparison between **UniqueSet** (`@sepiariver/unique-set`), **DeepSet** (`deep-equality-data-structures`), and the native `Set`.
|
|
6
|
+
|
|
5
7
|
## Flat Data (`bench.spec.ts`)
|
|
6
8
|
|
|
7
9
|
Mixed strings, flat objects (2 keys), and 2-element arrays with ~10-15% duplicate rate.
|
|
8
10
|
|
|
9
|
-
| Items |
|
|
10
|
-
| ------: |
|
|
11
|
-
| 400 |
|
|
12
|
-
| 1,000 |
|
|
13
|
-
| 20,000 |
|
|
14
|
-
| 100,000 |
|
|
11
|
+
| Items | UniqueSet | DeepSet | Native Set |
|
|
12
|
+
| ------: | --------: | --------: | ---------: |
|
|
13
|
+
| 400 | 0.96 ms | 8.13 ms | 0.07 ms |
|
|
14
|
+
| 1,000 | 0.60 ms | 9.33 ms | 0.08 ms |
|
|
15
|
+
| 20,000 | 6.12 ms | 116.00 ms | 1.00 ms |
|
|
16
|
+
| 100,000 | 20.83 ms | 517.00 ms | 4.15 ms |
|
|
15
17
|
|
|
16
18
|
## Nested Data (`bench-nested.spec.ts`)
|
|
17
19
|
|
|
18
20
|
Deeply nested objects (3-4 levels), nested arrays with objects, and mixed structures.
|
|
19
21
|
|
|
20
|
-
### `add()`
|
|
22
|
+
### `add()` — insert all items
|
|
21
23
|
|
|
22
|
-
| Items |
|
|
23
|
-
| ------: |
|
|
24
|
-
| 400 |
|
|
25
|
-
| 1,000 |
|
|
26
|
-
| 20,000 |
|
|
27
|
-
| 100,000 |
|
|
24
|
+
| Items | UniqueSet | DeepSet | Native Set |
|
|
25
|
+
| ------: | --------: | ----------: | ---------: |
|
|
26
|
+
| 400 | 1.90 ms | 14.88 ms | 0.05 ms |
|
|
27
|
+
| 1,000 | 0.72 ms | 22.39 ms | 0.05 ms |
|
|
28
|
+
| 20,000 | 9.73 ms | 423.00 ms | 0.89 ms |
|
|
29
|
+
| 100,000 | 57.18 ms | 2,130.00 ms | 3.88 ms |
|
|
28
30
|
|
|
29
|
-
### `has()`
|
|
31
|
+
### `has()` — query all items (50% hits, 50% misses)
|
|
30
32
|
|
|
31
|
-
| Items |
|
|
32
|
-
| ------: |
|
|
33
|
-
| 400 |
|
|
34
|
-
| 1,000 |
|
|
35
|
-
| 20,000 |
|
|
36
|
-
| 100,000 |
|
|
33
|
+
| Items | UniqueSet.has() | DeepSet.has() | Queries | Hits |
|
|
34
|
+
| ------: | --------------: | ------------: | ------: | -----: |
|
|
35
|
+
| 400 | 0.63 ms | 8.93 ms | 457 | 228 |
|
|
36
|
+
| 1,000 | 0.73 ms | 21.69 ms | 1,144 | 572 |
|
|
37
|
+
| 20,000 | 12.83 ms | 425.00 ms | 22,892 | 11,446 |
|
|
38
|
+
| 100,000 | 61.26 ms | 2,111.00 ms | 114,458 | 57,229 |
|
|
37
39
|
|
|
38
40
|
## Notes
|
|
39
41
|
|
|
40
|
-
- Native `Set
|
|
41
|
-
-
|
|
42
|
-
-
|
|
43
|
-
-
|
|
42
|
+
- **Native `Set`** uses reference equality and cannot deduplicate objects/arrays by value.
|
|
43
|
+
- **UniqueSet** (this package) uses a streaming 32-bit FNV-1a structural hash with `fast-equals` only as fallback for hash collisions. O(1) average for both `add()` and `has()`.
|
|
44
|
+
- **DeepSet** (`deep-equality-data-structures`) hashes values with `object-hash` (MD5 by default) for O(1) lookups. The performance gap comes from MD5 being a cryptographic hash and `object-hash` serializing values before hashing.
|
|
45
|
+
- UniqueSet is roughly **25–35x faster** than DeepSet on nested data at scale, while both produce identical deduplication results.
|
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# @sepiariver/unique-set
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Unique set is highly-performant, given the workload. It uses a streaming structural hash to optimize deep equality checks. Falls back to deeply compare using [fast-equals](https://www.npmjs.com/package/fast-equals) only when hash collisions occur.
|
|
4
4
|
|
|
5
5
|
Supports ESM and CommonJS. Thanks [@sakgoyal](https://github.com/sakgoyal) for contributing to and instigating ESM support.
|
|
6
6
|
|
|
@@ -16,7 +16,7 @@ WARNING: Version 3 includes breaking changes. Older versions are deprecated.
|
|
|
16
16
|
|
|
17
17
|
Configuration options from previous versions are no longer supported. Usage is identical to the native `Set` class.
|
|
18
18
|
|
|
19
|
-
IMPORTANT: `MapSet` and `UniqueSet` are the same class (`UniqueSet` is an alias). The `delete` method uses deep equality, so `delete({a: 1})` will remove a previously added `{a: 1}` even if it's a different reference.
|
|
19
|
+
IMPORTANT: `MapSet` and `UniqueSet` are the same class (`UniqueSet` is an alias). The `delete` method uses deep equality, so `delete({a: 1})` will remove a previously added `{a: 1}` even if it's a different reference. See "Considerations" below for more details on equality semantics.
|
|
20
20
|
|
|
21
21
|
## API
|
|
22
22
|
|
|
@@ -63,9 +63,16 @@ set.size; // 1
|
|
|
63
63
|
|
|
64
64
|
### Considerations
|
|
65
65
|
|
|
66
|
+
- **Performance**: See [PERF.md](PERF.md) for benchmarks. UniqueSet is optimized for deep equality with O(1) average complexity for both `add()` and `has()`, performing _25-35x faster_ than other deep equality `Set`-like implementations, especially on nested data at scale.
|
|
66
67
|
- **Memory**: Each unique value is stored once, bucketed by a 32-bit structural hash. Overhead is minimal: one `Map` entry plus a small array per hash bucket, with >99% of buckets containing exactly one item at typical sizes.
|
|
67
|
-
- **Collisions**: At 20,000 items, roughly 47 hash collisions are expected (birthday paradox on 32-bit). Collisions are handled correctly via `fast-
|
|
68
|
-
- **Equality semantics**:
|
|
68
|
+
- **Collisions**: At 20,000 items, roughly 47 hash collisions are expected (birthday paradox on 32-bit). Collisions are handled correctly via `fast-equals`. They add a small cost but never affect correctness.
|
|
69
|
+
- **Equality semantics**: Both the structural hash and `fast-equals` use deep value comparison throughout, so they are fully aligned.
|
|
70
|
+
- **Plain objects**: Key order is ignored.
|
|
71
|
+
- **Arrays**: Element order matters (hash is sequential; equality is index-by-index).
|
|
72
|
+
- **`Set` values**: Insertion order is ignored. `new Set([1, 2])` and `new Set([2, 1])` are treated as equal, including Sets containing objects (both layers use deep comparison).
|
|
73
|
+
- **`Map` values**: Insertion order is ignored. Both keys and values are compared by deep equality.
|
|
74
|
+
- **Primitives**: `NaN === NaN`. `0` and `-0` are treated as equal.
|
|
75
|
+
- **Functions and symbols**: Compared by reference. They hash by their string representation (`String(value)`), so same-source functions may land in the same bucket, but `fast-equals` uses `===` for the final check.
|
|
69
76
|
|
|
70
77
|
## Installation
|
|
71
78
|
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
2
|
var __defProp = Object.defineProperty;
|
|
4
3
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
4
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
5
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
6
|
var __export = (target, all) => {
|
|
9
7
|
for (var name in all)
|
|
@@ -17,14 +15,6 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
17
15
|
}
|
|
18
16
|
return to;
|
|
19
17
|
};
|
|
20
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
-
mod
|
|
27
|
-
));
|
|
28
18
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
19
|
|
|
30
20
|
// index.ts
|
|
@@ -35,7 +25,7 @@ __export(unique_set_exports, {
|
|
|
35
25
|
structuralHash: () => structuralHash
|
|
36
26
|
});
|
|
37
27
|
module.exports = __toCommonJS(unique_set_exports);
|
|
38
|
-
var
|
|
28
|
+
var import_fast_equals = require("fast-equals");
|
|
39
29
|
var _f64 = new Float64Array(1);
|
|
40
30
|
var _u8 = new Uint8Array(_f64.buffer);
|
|
41
31
|
var structuralHash = (value) => {
|
|
@@ -139,7 +129,7 @@ var MapSet = class {
|
|
|
139
129
|
this.#size++;
|
|
140
130
|
} else {
|
|
141
131
|
for (const item of bucket) {
|
|
142
|
-
if ((0,
|
|
132
|
+
if ((0, import_fast_equals.deepEqual)(value, item)) return this;
|
|
143
133
|
}
|
|
144
134
|
bucket.push(value);
|
|
145
135
|
this.#size++;
|
|
@@ -151,7 +141,7 @@ var MapSet = class {
|
|
|
151
141
|
const bucket = this.#map.get(hash);
|
|
152
142
|
if (!bucket) return false;
|
|
153
143
|
for (const item of bucket) {
|
|
154
|
-
if ((0,
|
|
144
|
+
if ((0, import_fast_equals.deepEqual)(value, item)) return true;
|
|
155
145
|
}
|
|
156
146
|
return false;
|
|
157
147
|
}
|
|
@@ -160,7 +150,7 @@ var MapSet = class {
|
|
|
160
150
|
const bucket = this.#map.get(hash);
|
|
161
151
|
if (!bucket) return false;
|
|
162
152
|
for (let i = 0; i < bucket.length; i++) {
|
|
163
|
-
if ((0,
|
|
153
|
+
if ((0, import_fast_equals.deepEqual)(value, bucket[i])) {
|
|
164
154
|
bucket.splice(i, 1);
|
|
165
155
|
if (bucket.length === 0) this.#map.delete(hash);
|
|
166
156
|
this.#size--;
|
package/dist/index.mjs
CHANGED
package/index.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sepiariver/unique-set",
|
|
3
|
-
"version": "3.
|
|
4
|
-
"description": "A Set-like collection that deduplicates by deep value equality using a streaming structural hash and fast-
|
|
3
|
+
"version": "3.2.0",
|
|
4
|
+
"description": "A Set-like collection that deduplicates by deep value equality using a streaming structural hash and fast-equals.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
7
7
|
"types": "dist/index.d.mts",
|
|
@@ -35,9 +35,10 @@
|
|
|
35
35
|
},
|
|
36
36
|
"homepage": "https://github.com/sepiariver/unique-set#readme",
|
|
37
37
|
"dependencies": {
|
|
38
|
-
"fast-
|
|
38
|
+
"fast-equals": "^6.0.0"
|
|
39
39
|
},
|
|
40
40
|
"devDependencies": {
|
|
41
|
+
"deep-equality-data-structures": "^2.0.0",
|
|
41
42
|
"tsup": "^8.3.5",
|
|
42
43
|
"typescript": "^5.7.2",
|
|
43
44
|
"vitest": "^2.1.8"
|