genome 1.0.0 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +60 -249
- package/genome_rs.d.ts +93 -0
- package/genome_rs.js +9 -0
- package/genome_rs_bg.js +288 -0
- package/genome_rs_bg.wasm +0 -0
- package/package.json +50 -94
- package/dist/hash.d.ts +0 -67
- package/dist/hash.d.ts.map +0 -1
- package/dist/id-indexhash.d.ts +0 -21
- package/dist/id-indexhash.d.ts.map +0 -1
- package/dist/id-multiset.d.ts +0 -6
- package/dist/id-multiset.d.ts.map +0 -1
- package/dist/id-multiset32.d.ts +0 -2
- package/dist/id-multiset32.d.ts.map +0 -1
- package/dist/index.cjs +0 -449
- package/dist/index.cjs.map +0 -1
- package/dist/index.d.ts +0 -272
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -405
- package/dist/index.js.map +0 -1
- package/dist/largeObj.d.ts +0 -119
- package/dist/largeObj.d.ts.map +0 -1
package/LICENSE
CHANGED
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,286 +1,97 @@
|
|
|
1
1
|

|
|
2
|
-

|
|
3
|
+

|
|
3
4
|

|
|
4
5
|

|
|
6
|
+
# `genome`
|
|
5
7
|
|
|
8
|
+
Deterministic structural hashing for JSON values. Generates hierarchical
|
|
9
|
+
IDs that capture the shape of an object — same structure, same ID,
|
|
10
|
+
regardless of values.
|
|
6
11
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
A lightweight, robust library for generating unique identifiers for JavaScript/TypeScript objects based on their structure rather than requiring explicit string keys.
|
|
10
|
-
|
|
11
|
-
## Purpose
|
|
12
|
-
|
|
13
|
-
This library provides a solution for scenarios where you need to:
|
|
14
|
-
|
|
15
|
-
- Persist and rehydrate state without requiring explicit string keys
|
|
16
|
-
- Identify structurally identical objects across different instances
|
|
17
|
-
- Match objects by their shape rather than by identity or manual keys
|
|
18
|
-
- Detect circular references safely
|
|
19
|
-
|
|
20
|
-
## Installation
|
|
12
|
+
## Install
|
|
21
13
|
|
|
14
|
+
**[npm (WASM) ↗](https://www.npmjs.com/package/genome)**
|
|
22
15
|
```bash
|
|
23
|
-
|
|
24
|
-
```
|
|
25
|
-
|
|
26
|
-
## Basic Usage
|
|
27
|
-
```typescript
|
|
28
|
-
import { generateStructureId, getCompactId } from 'genome';
|
|
29
|
-
|
|
30
|
-
// Example object
|
|
31
|
-
const user = {
|
|
32
|
-
name: 'John',
|
|
33
|
-
age: 30,
|
|
34
|
-
preferences: {
|
|
35
|
-
theme: 'dark',
|
|
36
|
-
notifications: true
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// Generate a unique ID based on the object structure and it's properties' types
|
|
41
|
-
const id = generateStructureId(user) // L0:3713-L1:5761-L2:13827
|
|
42
|
-
|
|
43
|
-
// Generate a unique ID and then hash that value
|
|
44
|
-
const hashed = getCompactId(user) // cd76ea96
|
|
45
|
-
|
|
46
|
-
// Get info on what a structure would be without generating
|
|
47
|
-
const {
|
|
48
|
-
id, // L0:541598767187353870402585606-L1:1547425049106725343623905933-L2:10
|
|
49
|
-
levels, // 3
|
|
50
|
-
collisions // 0 ID collisions - same object structure already ran through generator
|
|
51
|
-
} = getStructureInfo(user)
|
|
52
|
-
|
|
53
|
-
// Get info for a compact id
|
|
54
|
-
const {
|
|
55
|
-
id, // cd76ea96
|
|
56
|
-
levels, // 3
|
|
57
|
-
collisions // 0 ID collisions - same object structure already ran through generator
|
|
58
|
-
} = getCompactInfo(user)
|
|
59
|
-
|
|
60
|
-
// get all of the data being stored about the state - debugging info
|
|
61
|
-
const allStructureStateData = exportStructureState()
|
|
16
|
+
npm install genome
|
|
62
17
|
```
|
|
63
18
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
Generates a unique ID string based on the structure of the provided object.
|
|
69
|
-
|
|
70
|
-
- **Parameters**:
|
|
71
|
-
- `obj`: The object to generate an ID for.
|
|
72
|
-
- `config` (optional): Configuration options for ID generation.
|
|
73
|
-
- **Returns**: A string representing the structure ID.
|
|
74
|
-
|
|
75
|
-
### `getStructureInfo(obj: Record<string, any>, config?: StructureIdConfig): { id: string; levels: number; collisionCount: number; }`
|
|
76
|
-
|
|
77
|
-
Provides additional information about the object's structure.
|
|
78
|
-
|
|
79
|
-
- **Parameters**:
|
|
80
|
-
- `obj`: The object to analyze.
|
|
81
|
-
- `config` (optional): Configuration options for ID generation.
|
|
82
|
-
- **Returns**: An object containing:
|
|
83
|
-
- `id`: The structure ID.
|
|
84
|
-
- `levels`: The number of nesting levels in the object.
|
|
85
|
-
- `collisionCount`: The number of times this structure has been encountered.
|
|
86
|
-
|
|
87
|
-
### `setStructureIdConfig(config: StructureIdConfig): void`
|
|
88
|
-
|
|
89
|
-
Sets global configuration options for structure ID generation.
|
|
90
|
-
|
|
91
|
-
- **Parameters**:
|
|
92
|
-
- `config`: The configuration object.
|
|
93
|
-
|
|
94
|
-
### `getStructureIdConfig(): StructureIdConfig`
|
|
95
|
-
|
|
96
|
-
Gets the current global configuration.
|
|
97
|
-
|
|
98
|
-
- **Returns**: A copy of the current global configuration object.
|
|
99
|
-
|
|
100
|
-
### `resetState(): void`
|
|
101
|
-
|
|
102
|
-
Resets the internal state of the library, clearing all cached property mappings.
|
|
103
|
-
|
|
104
|
-
**Note**: You typically don't need to call this unless you want to start fresh with property-to-bit mappings.
|
|
105
|
-
|
|
106
|
-
### Configuration Options
|
|
107
|
-
|
|
108
|
-
The `StructureIdConfig` object supports the following options:
|
|
109
|
-
|
|
110
|
-
```typescript
|
|
111
|
-
interface StructureIdConfig {
|
|
112
|
-
newIdOnCollision?: boolean;
|
|
113
|
-
}
|
|
19
|
+
**[Rust ↗](https://crates.io/crates/genome-rs)**
|
|
20
|
+
```toml
|
|
21
|
+
[dependencies]
|
|
22
|
+
genome = "1.0.0"
|
|
114
23
|
```
|
|
115
24
|
|
|
116
|
-
|
|
25
|
+
## Usage
|
|
117
26
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
// Generate a unique ID for each object, even with the same structure
|
|
122
|
-
const config = { newIdOnCollision: true };
|
|
27
|
+
**TypeScript / JavaScript**
|
|
28
|
+
```ts
|
|
29
|
+
import init, { hash, signature, compare, compareValues, setConfig, reset } from 'genome'
|
|
123
30
|
|
|
124
|
-
|
|
125
|
-
|
|
31
|
+
// `init()` loads and compiles the WASM binary — call it once before using any other functions.
|
|
32
|
+
await init()
|
|
126
33
|
|
|
127
|
-
const
|
|
128
|
-
const
|
|
34
|
+
const id = hash(JSON.stringify({ id: 1, name: "alice" }))
|
|
35
|
+
const score = compare(id1, id2)
|
|
129
36
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
console.log(id1 === id2); // false (even though structure is identical)
|
|
37
|
+
// setConfig(newIdOnCollision, ignoreArrayLength, ignoreValueTypes)
|
|
38
|
+
setConfig(false, true, false)
|
|
133
39
|
```
|
|
134
40
|
|
|
135
|
-
|
|
41
|
+
**Rust**
|
|
42
|
+
```rust
|
|
43
|
+
use genome::{Genome, GenomeConfig};
|
|
44
|
+
use serde_json::json;
|
|
136
45
|
|
|
137
|
-
|
|
138
|
-
// Set globally
|
|
139
|
-
setStructureIdConfig({ newIdOnCollision: true });
|
|
46
|
+
let mut g = Genome::new(GenomeConfig::default());
|
|
140
47
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
const id2 = generateStructureId(obj2);
|
|
48
|
+
let id1 = g.hash(&json!({ "id": 1, "name": "alice" }));
|
|
49
|
+
let id2 = g.hash(&json!({ "id": 2, "name": "bob" }));
|
|
144
50
|
|
|
145
|
-
|
|
146
|
-
const id3 = generateStructureId(obj3, { newIdOnCollision: false });
|
|
51
|
+
assert_eq!(id1, id2); // same structure
|
|
147
52
|
```
|
|
148
53
|
|
|
149
|
-
##
|
|
150
|
-
|
|
151
|
-
The `genome` library shines in scenarios where you need to identify and match objects based on their structure rather than explicit keys or instance identity. Here are some ideal use cases:
|
|
152
|
-
|
|
153
|
-
### State Management Without Explicit Keys
|
|
54
|
+
## Config
|
|
154
55
|
|
|
155
|
-
|
|
56
|
+
| Option | Type | Default | Description |
|
|
57
|
+
|--------|------|---------|-------------|
|
|
58
|
+
| `newIdOnCollision` | bool | false | Give structurally identical values distinct IDs |
|
|
59
|
+
| `ignoreArrayLength` | bool | false | Treat arrays with different lengths but same element shapes as equivalent |
|
|
60
|
+
| `ignoreValueTypes` | bool | false | Treat all scalar types as equivalent — only key names and depth matter |
|
|
156
61
|
|
|
157
|
-
|
|
158
|
-
// Instead of this:
|
|
159
|
-
const componentKey = "user-preferences-panel";
|
|
160
|
-
storeState(componentKey, preferences);
|
|
161
|
-
// Later:
|
|
162
|
-
const savedState = getState(componentKey);
|
|
163
|
-
|
|
164
|
-
// You can do this:
|
|
165
|
-
const structureId = generateStructureId(preferences);
|
|
166
|
-
storeState(structureId, preferences);
|
|
167
|
-
// Later:
|
|
168
|
-
const savedState = getState(generateStructureId(preferences));
|
|
169
|
-
```
|
|
62
|
+
## API
|
|
170
63
|
|
|
171
|
-
###
|
|
64
|
+
### `hash(json)`
|
|
65
|
+
Accepts a JSON string. Use `JSON.stringify()` before passing.
|
|
172
66
|
|
|
173
|
-
|
|
67
|
+
### `signature(value)`
|
|
68
|
+
Returns the structural fingerprint. In default mode returns the full ID.
|
|
69
|
+
When `newIdOnCollision` is true, strips L0 and returns L1+ only.
|
|
174
70
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
function expensiveCalculation(data: SomeComplexObject) {
|
|
179
|
-
const structureId = generateStructureId(data);
|
|
180
|
-
|
|
181
|
-
if (memoizedResults.has(structureId)) {
|
|
182
|
-
return memoizedResults.get(structureId);
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
const result = /* complex calculation */;
|
|
186
|
-
memoizedResults.set(structureId, result);
|
|
187
|
-
return result;
|
|
188
|
-
}
|
|
189
|
-
```
|
|
190
|
-
|
|
191
|
-
### Normalizing Data for Storage
|
|
192
|
-
|
|
193
|
-
When storing objects in databases or state management systems, you can use structural IDs to create consistent references:
|
|
194
|
-
|
|
195
|
-
```ts
|
|
196
|
-
function normalizeForStorage(entities: Record<string, unknown>[]) {
|
|
197
|
-
const normalizedEntities: Record<string, any> = {};
|
|
198
|
-
|
|
199
|
-
for (const entity of entities) {
|
|
200
|
-
const id = generateStructureId(entity);
|
|
201
|
-
normalizedEntities[id] = entity;
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
return normalizedEntities;
|
|
205
|
-
}
|
|
206
|
-
```
|
|
71
|
+
### `compare(idA, idB) → number`
|
|
72
|
+
Compares two structure IDs and returns a similarity score from 0.0 to 1.0.
|
|
207
73
|
|
|
208
|
-
###
|
|
74
|
+
### `compareValues(a, b)`
|
|
75
|
+
Compares two JSON strings structurally.
|
|
209
76
|
|
|
210
|
-
|
|
77
|
+
### `seed(sig, count)`
|
|
78
|
+
Seeds the collision counter for a known signature. Useful for restoring persisted state.
|
|
211
79
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
return generateStructureId(oldObj) !== generateStructureId(newObj);
|
|
215
|
-
}
|
|
216
|
-
```
|
|
80
|
+
### `reset()`
|
|
81
|
+
Clears all internal state.
|
|
217
82
|
|
|
218
|
-
|
|
83
|
+
## How it works
|
|
219
84
|
|
|
220
|
-
|
|
85
|
+
genome builds a hierarchical hash where each level (`L0`, `L1`, `L2`...)
|
|
86
|
+
represents a depth in the object tree. Two objects with the same keys,
|
|
87
|
+
same nesting structure, and same value types will always produce the same ID
|
|
88
|
+
regardless of the actual values stored.
|
|
221
89
|
|
|
222
|
-
```ts
|
|
223
|
-
function deduplicateByStructure<T>(objects: T[]): T[] {
|
|
224
|
-
const uniqueStructures = new Map<string, T>();
|
|
225
|
-
|
|
226
|
-
for (const obj of objects) {
|
|
227
|
-
const id = generateStructureId(obj as Record<string, unknown>);
|
|
228
|
-
if (!uniqueStructures.has(id)) {
|
|
229
|
-
uniqueStructures.set(id, obj);
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
return Array.from(uniqueStructures.values());
|
|
234
|
-
}
|
|
235
90
|
```
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
When you need to uniquely identify each object instance, even if they share the same structure, you can use the `newIdOnCollision` option:
|
|
240
|
-
|
|
241
|
-
```ts
|
|
242
|
-
function assignUniqueIds<T>(objects: T[]): Map<T, string> {
|
|
243
|
-
const idMap = new Map<T, string>();
|
|
244
|
-
const config = { newIdOnCollision: true };
|
|
245
|
-
|
|
246
|
-
for (const obj of objects) {
|
|
247
|
-
const id = generateStructureId(obj as Record<string, unknown>, config);
|
|
248
|
-
idMap.set(obj, id);
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
return idMap;
|
|
252
|
-
}
|
|
91
|
+
{ id: 1, name: "alice", address: { city: "NYC" } }
|
|
92
|
+
└── L0: root level (keys: id, name, address + their types)
|
|
93
|
+
└── L1: depth 1 (keys inside address: city + its type)
|
|
253
94
|
```
|
|
254
|
-
### Benefits Over Manual Key Management
|
|
255
|
-
|
|
256
|
-
- Automatic: No need to manually specify and maintain string keys
|
|
257
|
-
- Consistent: Same structure always generates the same ID
|
|
258
|
-
- Structural: Changes to object structure are automatically reflected in the ID
|
|
259
|
-
- Safe: Handles circular references without issues
|
|
260
|
-
- Deterministic: Property order doesn't affect the generated ID
|
|
261
|
-
|
|
262
|
-
## How It Works
|
|
263
|
-
|
|
264
|
-
The library uses a bit-wise approach to generate structure IDs:
|
|
265
|
-
|
|
266
|
-
1. Each JavaScript type gets a unique bit value (`number`, `string`, `object`, etc.)
|
|
267
|
-
2. Each property name gets a unique bit value the first time it's encountered
|
|
268
|
-
3. These bit values are consistently used for the same types and property names
|
|
269
|
-
4. The object is traversed, and hash values are calculated for each level of nesting
|
|
270
|
-
5. The final ID is formed by combining these level hashes
|
|
271
|
-
|
|
272
|
-
This approach ensures:
|
|
273
|
-
- Identical structures get identical IDs
|
|
274
|
-
- Different structures get different IDs
|
|
275
|
-
- The algorithm works correctly with circular references
|
|
276
|
-
- Property order doesn't affect the generated ID
|
|
277
|
-
|
|
278
|
-
## Performance Considerations
|
|
279
|
-
|
|
280
|
-
- The library maintains a global mapping of property names to bit values, which grows as more unique property names are encountered
|
|
281
|
-
- For very large or complex objects, the bit values might become quite large (using BigInt internally)
|
|
282
|
-
- Circular references are handled efficiently without stack overflows
|
|
283
95
|
|
|
284
96
|
## License
|
|
285
|
-
|
|
286
|
-
MIT
|
|
97
|
+
MIT
|
package/genome_rs.d.ts
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Compares two structure ID strings and returns a similarity score
|
|
6
|
+
* between 0.0 (completely different) and 1.0 (identical).
|
|
7
|
+
*
|
|
8
|
+
* ```js
|
|
9
|
+
* import { compare } from 'genome'
|
|
10
|
+
* const score = compare("L0:100-L1:200", "L0:100-L1:200") // 1.0
|
|
11
|
+
* ```
|
|
12
|
+
*/
|
|
13
|
+
export function compare(id_a: string, id_b: string): number;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Compares two JSON strings structurally and returns a similarity score.
|
|
17
|
+
*
|
|
18
|
+
* ```js
|
|
19
|
+
* import { compareValues } from 'genome'
|
|
20
|
+
* const score = compareValues(
|
|
21
|
+
* JSON.stringify({ id: 1, name: "alice" }),
|
|
22
|
+
* JSON.stringify({ id: 2, name: "bob" }),
|
|
23
|
+
* ) // 1.0 — same structure
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
export function compareValues(a: string, b: string): number;
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Generates a deterministic hierarchical structure ID for a JSON string.
|
|
30
|
+
*
|
|
31
|
+
* ```js
|
|
32
|
+
* import { hash } from 'genome'
|
|
33
|
+
* const id = hash(JSON.stringify({ id: 1, name: "alice" }))
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
export function hash(json: string): string;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Hashes a string with xxHash32 and returns a hex string.
|
|
40
|
+
*
|
|
41
|
+
* ```js
|
|
42
|
+
* import { hashStr } from 'genome'
|
|
43
|
+
* const hex = hashStr("hello", 0)
|
|
44
|
+
* ```
|
|
45
|
+
*/
|
|
46
|
+
export function hashStr(input: string, seed: number): string;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Resets all internal state — clears the key cache and collision counters.
|
|
50
|
+
*
|
|
51
|
+
* ```js
|
|
52
|
+
* import { reset } from 'genome'
|
|
53
|
+
* reset()
|
|
54
|
+
* ```
|
|
55
|
+
*/
|
|
56
|
+
export function reset(): void;
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Seeds the collision counter for a known signature.
|
|
60
|
+
* Use this to restore persisted counter state.
|
|
61
|
+
*
|
|
62
|
+
* ```js
|
|
63
|
+
* import { seed } from 'genome'
|
|
64
|
+
* seed("L1:12345-L2:67890", 3n)
|
|
65
|
+
* ```
|
|
66
|
+
*/
|
|
67
|
+
export function seed(signature: string, count: bigint): void;
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Sets the global config. Call this before using any other functions
|
|
71
|
+
* if you need non-default behaviour.
|
|
72
|
+
*
|
|
73
|
+
* ```js
|
|
74
|
+
* import { setConfig, hash } from 'genome'
|
|
75
|
+
*
|
|
76
|
+
* setConfig({ ignoreArrayLength: true, ignoreValueTypes: true })
|
|
77
|
+
* const id = hash(JSON.stringify({ items: [1, 2, 3] }))
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
80
|
+
export function setConfig(new_id_on_collision: boolean, ignore_array_length: boolean, ignore_value_types: boolean): void;
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Returns the structural signature for a JSON string.
|
|
84
|
+
*
|
|
85
|
+
* In default mode returns the full ID. When `newIdOnCollision` is true
|
|
86
|
+
* (set via `setConfig`), strips L0 and returns L1+ only.
|
|
87
|
+
*
|
|
88
|
+
* ```js
|
|
89
|
+
* import { signature } from 'genome'
|
|
90
|
+
* const sig = signature(JSON.stringify({ id: 1, name: "alice" }))
|
|
91
|
+
* ```
|
|
92
|
+
*/
|
|
93
|
+
export function signature(json: string): string;
|
package/genome_rs.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/* @ts-self-types="./genome_rs.d.ts" */
|
|
2
|
+
|
|
3
|
+
import * as wasm from "./genome_rs_bg.wasm";
|
|
4
|
+
import { __wbg_set_wasm } from "./genome_rs_bg.js";
|
|
5
|
+
__wbg_set_wasm(wasm);
|
|
6
|
+
wasm.__wbindgen_start();
|
|
7
|
+
export {
|
|
8
|
+
compare, compareValues, hash, hashStr, reset, seed, setConfig, signature
|
|
9
|
+
} from "./genome_rs_bg.js";
|