@sepiariver/unique-set 3.1.2 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/PERF.md +26 -24
- package/README.md +11 -4
- package/dist/index.js +24 -31
- package/dist/index.mjs +21 -18
- package/index.ts +22 -18
- package/package.json +4 -3
package/PERF.md
CHANGED
|
@@ -2,42 +2,44 @@
|
|
|
2
2
|
|
|
3
3
|
Benchmarks run with `npm run bench` on Node.js v20.18.1, Apple M2 Pro.
|
|
4
4
|
|
|
5
|
+
Comparison between **UniqueSet** (`@sepiariver/unique-set`), **DeepSet** (`deep-equality-data-structures`), and the native `Set`.
|
|
6
|
+
|
|
5
7
|
## Flat Data (`bench.spec.ts`)
|
|
6
8
|
|
|
7
9
|
Mixed strings, flat objects (2 keys), and 2-element arrays with ~10-15% duplicate rate.
|
|
8
10
|
|
|
9
|
-
| Items |
|
|
10
|
-
| ------: |
|
|
11
|
-
| 400 |
|
|
12
|
-
| 1,000 |
|
|
13
|
-
| 20,000 |
|
|
14
|
-
| 100,000 |
|
|
11
|
+
| Items | UniqueSet | DeepSet | Native Set |
|
|
12
|
+
| ------: | --------: | --------: | ---------: |
|
|
13
|
+
| 400 | 0.96 ms | 8.13 ms | 0.07 ms |
|
|
14
|
+
| 1,000 | 0.60 ms | 9.33 ms | 0.08 ms |
|
|
15
|
+
| 20,000 | 6.12 ms | 116.00 ms | 1.00 ms |
|
|
16
|
+
| 100,000 | 20.83 ms | 517.00 ms | 4.15 ms |
|
|
15
17
|
|
|
16
18
|
## Nested Data (`bench-nested.spec.ts`)
|
|
17
19
|
|
|
18
20
|
Deeply nested objects (3-4 levels), nested arrays with objects, and mixed structures.
|
|
19
21
|
|
|
20
|
-
### `add()`
|
|
22
|
+
### `add()` — insert all items
|
|
21
23
|
|
|
22
|
-
| Items |
|
|
23
|
-
| ------: |
|
|
24
|
-
| 400 |
|
|
25
|
-
| 1,000 |
|
|
26
|
-
| 20,000 |
|
|
27
|
-
| 100,000 |
|
|
24
|
+
| Items | UniqueSet | DeepSet | Native Set |
|
|
25
|
+
| ------: | --------: | ----------: | ---------: |
|
|
26
|
+
| 400 | 1.90 ms | 14.88 ms | 0.05 ms |
|
|
27
|
+
| 1,000 | 0.72 ms | 22.39 ms | 0.05 ms |
|
|
28
|
+
| 20,000 | 9.73 ms | 423.00 ms | 0.89 ms |
|
|
29
|
+
| 100,000 | 57.18 ms | 2,130.00 ms | 3.88 ms |
|
|
28
30
|
|
|
29
|
-
### `has()`
|
|
31
|
+
### `has()` — query all items (50% hits, 50% misses)
|
|
30
32
|
|
|
31
|
-
| Items |
|
|
32
|
-
| ------: |
|
|
33
|
-
| 400 |
|
|
34
|
-
| 1,000 |
|
|
35
|
-
| 20,000 |
|
|
36
|
-
| 100,000 |
|
|
33
|
+
| Items | UniqueSet.has() | DeepSet.has() | Queries | Hits |
|
|
34
|
+
| ------: | --------------: | ------------: | ------: | -----: |
|
|
35
|
+
| 400 | 0.63 ms | 8.93 ms | 457 | 228 |
|
|
36
|
+
| 1,000 | 0.73 ms | 21.69 ms | 1,144 | 572 |
|
|
37
|
+
| 20,000 | 12.83 ms | 425.00 ms | 22,892 | 11,446 |
|
|
38
|
+
| 100,000 | 61.26 ms | 2,111.00 ms | 114,458 | 57,229 |
|
|
37
39
|
|
|
38
40
|
## Notes
|
|
39
41
|
|
|
40
|
-
- Native `Set
|
|
41
|
-
-
|
|
42
|
-
-
|
|
43
|
-
-
|
|
42
|
+
- **Native `Set`** uses reference equality and cannot deduplicate objects/arrays by value.
|
|
43
|
+
- **UniqueSet** (this package) uses a streaming 32-bit FNV-1a structural hash with `fast-equals` only as fallback for hash collisions. O(1) average for both `add()` and `has()`.
|
|
44
|
+
- **DeepSet** (`deep-equality-data-structures`) hashes values with `object-hash` (MD5 by default) for O(1) lookups. The performance gap comes from MD5 being a cryptographic hash and `object-hash` serializing values before hashing.
|
|
45
|
+
- UniqueSet is roughly **25–35x faster** than DeepSet on nested data at scale, while both produce identical deduplication results.
|
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# @sepiariver/unique-set
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Unique set is highly-performant, given the workload. It uses a streaming structural hash to optimize deep equality checks. Falls back to deeply compare using [fast-equals](https://www.npmjs.com/package/fast-equals) only when hash collisions occur.
|
|
4
4
|
|
|
5
5
|
Supports ESM and CommonJS. Thanks [@sakgoyal](https://github.com/sakgoyal) for contributing to and instigating ESM support.
|
|
6
6
|
|
|
@@ -16,7 +16,7 @@ WARNING: Version 3 includes breaking changes. Older versions are deprecated.
|
|
|
16
16
|
|
|
17
17
|
Configuration options from previous versions are no longer supported. Usage is identical to the native `Set` class.
|
|
18
18
|
|
|
19
|
-
IMPORTANT: `MapSet` and `UniqueSet` are the same class (`UniqueSet` is an alias). The `delete` method uses deep equality, so `delete({a: 1})` will remove a previously added `{a: 1}` even if it's a different reference.
|
|
19
|
+
IMPORTANT: `MapSet` and `UniqueSet` are the same class (`UniqueSet` is an alias). The `delete` method uses deep equality, so `delete({a: 1})` will remove a previously added `{a: 1}` even if it's a different reference. See "Considerations" below for more details on equality semantics.
|
|
20
20
|
|
|
21
21
|
## API
|
|
22
22
|
|
|
@@ -63,9 +63,16 @@ set.size; // 1
|
|
|
63
63
|
|
|
64
64
|
### Considerations
|
|
65
65
|
|
|
66
|
+
- **Performance**: See [PERF.md](PERF.md) for benchmarks. UniqueSet is optimized for deep equality with O(1) average complexity for both `add()` and `has()`, performing _25-35x faster_ than other deep equality `Set`-like implementations, especially on nested data at scale.
|
|
66
67
|
- **Memory**: Each unique value is stored once, bucketed by a 32-bit structural hash. Overhead is minimal: one `Map` entry plus a small array per hash bucket, with >99% of buckets containing exactly one item at typical sizes.
|
|
67
|
-
- **Collisions**: At 20,000 items, roughly 47 hash collisions are expected (birthday paradox on 32-bit). Collisions are handled correctly via `fast-
|
|
68
|
-
- **Equality semantics**:
|
|
68
|
+
- **Collisions**: At 20,000 items, roughly 47 hash collisions are expected (birthday paradox on 32-bit). Collisions are handled correctly via `fast-equals`. They add a small cost but never affect correctness.
|
|
69
|
+
- **Equality semantics**: Both the structural hash and `fast-equals` use deep value comparison throughout, so they are fully aligned.
|
|
70
|
+
- **Plain objects**: Key order is ignored.
|
|
71
|
+
- **Arrays**: Element order matters (hash is sequential; equality is index-by-index).
|
|
72
|
+
- **`Set` values**: Insertion order is ignored. `new Set([1, 2])` and `new Set([2, 1])` are treated as equal, including Sets containing objects (both layers use deep comparison).
|
|
73
|
+
- **`Map` values**: Insertion order is ignored. Both keys and values are compared by deep equality.
|
|
74
|
+
- **Primitives**: `NaN === NaN`. `0` and `-0` are treated as equal.
|
|
75
|
+
- **Functions and symbols**: Compared by reference. They hash by their string representation (`String(value)`), so same-source functions may land in the same bucket, but `fast-equals` uses `===` for the final check.
|
|
69
76
|
|
|
70
77
|
## Installation
|
|
71
78
|
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
2
|
var __defProp = Object.defineProperty;
|
|
4
3
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
4
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
5
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
6
|
var __export = (target, all) => {
|
|
9
7
|
for (var name in all)
|
|
@@ -17,14 +15,6 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
17
15
|
}
|
|
18
16
|
return to;
|
|
19
17
|
};
|
|
20
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
-
mod
|
|
27
|
-
));
|
|
28
18
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
19
|
|
|
30
20
|
// index.ts
|
|
@@ -35,7 +25,7 @@ __export(unique_set_exports, {
|
|
|
35
25
|
structuralHash: () => structuralHash
|
|
36
26
|
});
|
|
37
27
|
module.exports = __toCommonJS(unique_set_exports);
|
|
38
|
-
var
|
|
28
|
+
var import_fast_equals = require("fast-equals");
|
|
39
29
|
var _f64 = new Float64Array(1);
|
|
40
30
|
var _u8 = new Uint8Array(_f64.buffer);
|
|
41
31
|
var structuralHash = (value) => {
|
|
@@ -82,20 +72,20 @@ var _shash = (value, hash) => {
|
|
|
82
72
|
return hash;
|
|
83
73
|
}
|
|
84
74
|
if (value instanceof Map) {
|
|
85
|
-
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
hash = _shash(k, hash);
|
|
91
|
-
hash = _shash(v, hash);
|
|
75
|
+
let mapHash = 0;
|
|
76
|
+
for (const [k, v] of value) {
|
|
77
|
+
let entryHash = _shash(k, 2166136261);
|
|
78
|
+
entryHash = _shash(v, entryHash);
|
|
79
|
+
mapHash = mapHash + entryHash | 0;
|
|
92
80
|
}
|
|
93
|
-
return hash;
|
|
81
|
+
return _mix(hash, mapHash);
|
|
94
82
|
}
|
|
95
83
|
if (value instanceof Set) {
|
|
96
|
-
|
|
97
|
-
for (const v of value)
|
|
98
|
-
|
|
84
|
+
let setHash = 0;
|
|
85
|
+
for (const v of value) {
|
|
86
|
+
setHash = setHash + _shash(v, 2166136261) | 0;
|
|
87
|
+
}
|
|
88
|
+
return _mix(hash, setHash);
|
|
99
89
|
}
|
|
100
90
|
if (value instanceof Date) {
|
|
101
91
|
hash = _mix(hash, 20);
|
|
@@ -108,12 +98,15 @@ var _shash = (value, hash) => {
|
|
|
108
98
|
return _mixStr(hash, value.toString());
|
|
109
99
|
}
|
|
110
100
|
hash = _mix(hash, 19);
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
101
|
+
let objHash = 0;
|
|
102
|
+
const keys = Object.keys(value);
|
|
103
|
+
for (let i = 0; i < keys.length; i++) {
|
|
104
|
+
const key = keys[i];
|
|
105
|
+
let pairHash = _mixStr(2166136261, key);
|
|
106
|
+
pairHash = _shash(value[key], pairHash);
|
|
107
|
+
objHash = objHash + pairHash | 0;
|
|
108
|
+
}
|
|
109
|
+
return _mix(hash, objHash);
|
|
117
110
|
};
|
|
118
111
|
var MapSet = class {
|
|
119
112
|
#map;
|
|
@@ -136,7 +129,7 @@ var MapSet = class {
|
|
|
136
129
|
this.#size++;
|
|
137
130
|
} else {
|
|
138
131
|
for (const item of bucket) {
|
|
139
|
-
if ((0,
|
|
132
|
+
if ((0, import_fast_equals.deepEqual)(value, item)) return this;
|
|
140
133
|
}
|
|
141
134
|
bucket.push(value);
|
|
142
135
|
this.#size++;
|
|
@@ -148,7 +141,7 @@ var MapSet = class {
|
|
|
148
141
|
const bucket = this.#map.get(hash);
|
|
149
142
|
if (!bucket) return false;
|
|
150
143
|
for (const item of bucket) {
|
|
151
|
-
if ((0,
|
|
144
|
+
if ((0, import_fast_equals.deepEqual)(value, item)) return true;
|
|
152
145
|
}
|
|
153
146
|
return false;
|
|
154
147
|
}
|
|
@@ -157,7 +150,7 @@ var MapSet = class {
|
|
|
157
150
|
const bucket = this.#map.get(hash);
|
|
158
151
|
if (!bucket) return false;
|
|
159
152
|
for (let i = 0; i < bucket.length; i++) {
|
|
160
|
-
if ((0,
|
|
153
|
+
if ((0, import_fast_equals.deepEqual)(value, bucket[i])) {
|
|
161
154
|
bucket.splice(i, 1);
|
|
162
155
|
if (bucket.length === 0) this.#map.delete(hash);
|
|
163
156
|
this.#size--;
|
package/dist/index.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// index.ts
|
|
2
|
-
import equal from "fast-
|
|
2
|
+
import { deepEqual as equal } from "fast-equals";
|
|
3
3
|
var _f64 = new Float64Array(1);
|
|
4
4
|
var _u8 = new Uint8Array(_f64.buffer);
|
|
5
5
|
var structuralHash = (value) => {
|
|
@@ -46,20 +46,20 @@ var _shash = (value, hash) => {
|
|
|
46
46
|
return hash;
|
|
47
47
|
}
|
|
48
48
|
if (value instanceof Map) {
|
|
49
|
-
|
|
50
|
-
const
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
hash = _shash(k, hash);
|
|
55
|
-
hash = _shash(v, hash);
|
|
49
|
+
let mapHash = 0;
|
|
50
|
+
for (const [k, v] of value) {
|
|
51
|
+
let entryHash = _shash(k, 2166136261);
|
|
52
|
+
entryHash = _shash(v, entryHash);
|
|
53
|
+
mapHash = mapHash + entryHash | 0;
|
|
56
54
|
}
|
|
57
|
-
return hash;
|
|
55
|
+
return _mix(hash, mapHash);
|
|
58
56
|
}
|
|
59
57
|
if (value instanceof Set) {
|
|
60
|
-
|
|
61
|
-
for (const v of value)
|
|
62
|
-
|
|
58
|
+
let setHash = 0;
|
|
59
|
+
for (const v of value) {
|
|
60
|
+
setHash = setHash + _shash(v, 2166136261) | 0;
|
|
61
|
+
}
|
|
62
|
+
return _mix(hash, setHash);
|
|
63
63
|
}
|
|
64
64
|
if (value instanceof Date) {
|
|
65
65
|
hash = _mix(hash, 20);
|
|
@@ -72,12 +72,15 @@ var _shash = (value, hash) => {
|
|
|
72
72
|
return _mixStr(hash, value.toString());
|
|
73
73
|
}
|
|
74
74
|
hash = _mix(hash, 19);
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
75
|
+
let objHash = 0;
|
|
76
|
+
const keys = Object.keys(value);
|
|
77
|
+
for (let i = 0; i < keys.length; i++) {
|
|
78
|
+
const key = keys[i];
|
|
79
|
+
let pairHash = _mixStr(2166136261, key);
|
|
80
|
+
pairHash = _shash(value[key], pairHash);
|
|
81
|
+
objHash = objHash + pairHash | 0;
|
|
82
|
+
}
|
|
83
|
+
return _mix(hash, objHash);
|
|
81
84
|
};
|
|
82
85
|
var MapSet = class {
|
|
83
86
|
#map;
|
package/index.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import equal from "fast-
|
|
1
|
+
import { deepEqual as equal } from "fast-equals";
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Streaming structural hash — computes a 32-bit FNV-1a hash by traversing
|
|
@@ -56,20 +56,20 @@ const _shash = (value: unknown, hash: number): number => {
|
|
|
56
56
|
return hash;
|
|
57
57
|
}
|
|
58
58
|
if (value instanceof Map) {
|
|
59
|
-
|
|
60
|
-
const
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
hash = _shash(k, hash);
|
|
65
|
-
hash = _shash(v, hash);
|
|
59
|
+
let mapHash = 0;
|
|
60
|
+
for (const [k, v] of value) {
|
|
61
|
+
let entryHash = _shash(k, 0x811c9dc5);
|
|
62
|
+
entryHash = _shash(v, entryHash);
|
|
63
|
+
mapHash = (mapHash + entryHash) | 0; // order-independent hash by summing entry hashes (32-bit)
|
|
66
64
|
}
|
|
67
|
-
return hash;
|
|
65
|
+
return _mix(hash, mapHash);
|
|
68
66
|
}
|
|
69
67
|
if (value instanceof Set) {
|
|
70
|
-
|
|
71
|
-
for (const v of value)
|
|
72
|
-
|
|
68
|
+
let setHash = 0;
|
|
69
|
+
for (const v of value) {
|
|
70
|
+
setHash = (setHash + _shash(v, 0x811c9dc5)) | 0; // order-independent hash by summing element hashes (32-bit)
|
|
71
|
+
}
|
|
72
|
+
return _mix(hash, setHash);
|
|
73
73
|
}
|
|
74
74
|
if (value instanceof Date) {
|
|
75
75
|
hash = _mix(hash, 0x14);
|
|
@@ -82,14 +82,18 @@ const _shash = (value: unknown, hash: number): number => {
|
|
|
82
82
|
return _mixStr(hash, value.toString());
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
-
// Plain object
|
|
85
|
+
// Plain object: order-independent
|
|
86
86
|
hash = _mix(hash, 0x13);
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
87
|
+
let objHash = 0;
|
|
88
|
+
const keys = Object.keys(value as object);
|
|
89
|
+
for (let i = 0; i < keys.length; i++) {
|
|
90
|
+
const key = keys[i]!;
|
|
91
|
+
let pairHash = _mixStr(0x811c9dc5, key);
|
|
92
|
+
pairHash = _shash((value as Record<string, unknown>)[key], pairHash);
|
|
93
|
+
objHash = (objHash + pairHash) | 0;
|
|
91
94
|
}
|
|
92
|
-
|
|
95
|
+
|
|
96
|
+
return _mix(hash, objHash);
|
|
93
97
|
};
|
|
94
98
|
|
|
95
99
|
export class MapSet<T> {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sepiariver/unique-set",
|
|
3
|
-
"version": "3.
|
|
4
|
-
"description": "A Set-like collection that deduplicates by deep value equality using a streaming structural hash and fast-
|
|
3
|
+
"version": "3.2.0",
|
|
4
|
+
"description": "A Set-like collection that deduplicates by deep value equality using a streaming structural hash and fast-equals.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
7
7
|
"types": "dist/index.d.mts",
|
|
@@ -35,9 +35,10 @@
|
|
|
35
35
|
},
|
|
36
36
|
"homepage": "https://github.com/sepiariver/unique-set#readme",
|
|
37
37
|
"dependencies": {
|
|
38
|
-
"fast-
|
|
38
|
+
"fast-equals": "^6.0.0"
|
|
39
39
|
},
|
|
40
40
|
"devDependencies": {
|
|
41
|
+
"deep-equality-data-structures": "^2.0.0",
|
|
41
42
|
"tsup": "^8.3.5",
|
|
42
43
|
"typescript": "^5.7.2",
|
|
43
44
|
"vitest": "^2.1.8"
|