lavinhash 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 LavinHash Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,501 @@
1
+ # LavinHash
2
+
3
+ High-performance fuzzy hashing library implementing the Dual-Layer Adaptive Hashing (DLAH) algorithm for detecting file and text similarity.
4
+
5
+ ## Overview
6
+
7
+ LavinHash is a Rust-based fuzzy hashing library that analyzes both structural patterns and content features to compute similarity scores between data. The library uses a dual-layer approach that separates structural similarity (topology) from content similarity (semantic features), providing accurate similarity detection even for modified or partially similar data.
8
+
9
+ **Key Features:**
10
+
11
+ - Dual-layer similarity analysis (structure + content)
12
+ - Adaptive scaling for constant-time comparison regardless of file size
13
+ - Cross-platform support (Linux, macOS, Windows, WebAssembly)
14
+ - High performance with SIMD optimizations and parallel processing
15
+ - Multiple language bindings (JavaScript/TypeScript, with more planned)
16
+ - Deterministic hashing across all platforms
17
+
18
+ ## Installation
19
+
20
+ ### JavaScript/TypeScript (npm)
21
+
22
+ ```bash
23
+ npm install lavinhash
24
+ ```
25
+
26
+ ### Rust (crates.io)
27
+
28
+ ```toml
29
+ [dependencies]
30
+ lavinhash = "1.0"
31
+ ```
32
+
33
+ ### Building from Source
34
+
35
+ ```bash
36
+ git clone https://github.com/RafaCalRob/LavinHash.git
37
+ cd LavinHash
38
+ cargo build --release
39
+ ```
40
+
41
+ ## Quick Start
42
+
43
+ ### React (Vite, Create React App, Next.js)
44
+
45
+ ```javascript
46
+ import { wasm_compare_data } from 'lavinhash';
47
+
48
+ function App() {
49
+ const checkSimilarity = () => {
50
+ const encoder = new TextEncoder();
51
+ const text1 = encoder.encode("The quick brown fox jumps over the lazy dog");
52
+ const text2 = encoder.encode("The quick brown fox leaps over the lazy dog");
53
+
54
+ const similarity = wasm_compare_data(text1, text2);
55
+ console.log(`Similarity: ${similarity}%`); // Output: Similarity: 95%
56
+ };
57
+
58
+ return <button onClick={checkSimilarity}>Check Similarity</button>;
59
+ }
60
+ ```
61
+
62
+ ### Angular
63
+
64
+ ```typescript
65
+ import { Component } from '@angular/core';
66
+ import { wasm_compare_data } from 'lavinhash';
67
+
68
+ @Component({
69
+ selector: 'app-root',
70
+ template: '<button (click)="checkSimilarity()">Check Similarity</button>'
71
+ })
72
+ export class AppComponent {
73
+ checkSimilarity() {
74
+ const encoder = new TextEncoder();
75
+ const text1 = encoder.encode("Sample text");
76
+ const text2 = encoder.encode("Sample text modified");
77
+
78
+ const similarity = wasm_compare_data(text1, text2);
79
+ console.log(`Similarity: ${similarity}%`);
80
+ }
81
+ }
82
+ ```
83
+
84
+ ### Vue 3 (Vite, Nuxt 3)
85
+
86
+ ```vue
87
+ <script setup>
88
+ import { wasm_compare_data } from 'lavinhash';
89
+
90
+ const checkSimilarity = () => {
91
+ const encoder = new TextEncoder();
92
+ const text1 = encoder.encode("Sample text");
93
+ const text2 = encoder.encode("Sample text modified");
94
+
95
+ const similarity = wasm_compare_data(text1, text2);
96
+ console.log(`Similarity: ${similarity}%`);
97
+ };
98
+ </script>
99
+
100
+ <template>
101
+ <button @click="checkSimilarity">Check Similarity</button>
102
+ </template>
103
+ ```
104
+
105
+ ### Vanilla JavaScript (with bundler)
106
+
107
+ ```javascript
108
+ import { wasm_compare_data, wasm_generate_hash } from 'lavinhash';
109
+
110
+ const encoder = new TextEncoder();
111
+ const text1 = encoder.encode("Sample text");
112
+ const text2 = encoder.encode("Sample text modified");
113
+
114
+ const similarity = wasm_compare_data(text1, text2);
115
+ console.log(`Similarity: ${similarity}%`);
116
+ ```
117
+
118
+ ### Rust
119
+
120
+ ```rust
121
+ use lavinhash::{generate_hash, compare_hashes, HashConfig};
122
+
123
+ fn main() -> Result<(), Box<dyn std::error::Error>> {
124
+ let data1 = b"Document content version 1";
125
+ let data2 = b"Document content version 2";
126
+
127
+ let config = HashConfig::default();
128
+
129
+ let hash1 = generate_hash(data1, &config)?;
130
+ let hash2 = generate_hash(data2, &config)?;
131
+
132
+ let similarity = compare_hashes(&hash1, &hash2, 0.3);
133
+ println!("Similarity: {}%", similarity);
134
+
135
+ Ok(())
136
+ }
137
+ ```
138
+
139
+ ## API Reference
140
+
141
+ ### JavaScript/WASM API
142
+
143
+ #### `wasm_generate_hash(data: Uint8Array): Uint8Array`
144
+
145
+ Generates a fuzzy hash fingerprint from input data.
146
+
147
+ **Parameters:**
148
+ - `data`: Input data as Uint8Array
149
+
150
+ **Returns:**
151
+ - Serialized fingerprint (approximately 1KB)
152
+
153
+ **Example:**
154
+ ```javascript
155
+ const data = encoder.encode("Text to hash");
156
+ const hash = wasm_generate_hash(data);
157
+ console.log(`Hash size: ${hash.length} bytes`);
158
+ ```
159
+
160
+ #### `wasm_compare_hashes(hash_a: Uint8Array, hash_b: Uint8Array): number`
161
+
162
+ Compares two previously generated hashes.
163
+
164
+ **Parameters:**
165
+ - `hash_a`: First fingerprint
166
+ - `hash_b`: Second fingerprint
167
+
168
+ **Returns:**
169
+ - Similarity score (0-100)
170
+
171
+ **Example:**
172
+ ```javascript
173
+ const hash1 = wasm_generate_hash(data1);
174
+ const hash2 = wasm_generate_hash(data2);
175
+ const similarity = wasm_compare_hashes(hash1, hash2);
176
+ ```
177
+
178
+ #### `wasm_compare_data(data_a: Uint8Array, data_b: Uint8Array): number`
179
+
180
+ Generates hashes and compares in a single operation.
181
+
182
+ **Parameters:**
183
+ - `data_a`: First data array
184
+ - `data_b`: Second data array
185
+
186
+ **Returns:**
187
+ - Similarity score (0-100)
188
+
189
+ **Example:**
190
+ ```javascript
191
+ const similarity = wasm_compare_data(text1, text2);
192
+ ```
193
+
194
+ ### Rust API
195
+
196
+ #### `generate_hash(data: &[u8], config: &HashConfig) -> Result<FuzzyFingerprint, FingerprintError>`
197
+
198
+ Generates a fuzzy hash from input data.
199
+
200
+ **Parameters:**
201
+ - `data`: Input data slice
202
+ - `config`: Configuration options
203
+
204
+ **Returns:**
205
+ - `Ok(FuzzyFingerprint)`: Generated fingerprint
206
+ - `Err(FingerprintError)`: Error if data is invalid
207
+
208
+ #### `compare_hashes(hash_a: &FuzzyFingerprint, hash_b: &FuzzyFingerprint, alpha: f32) -> u8`
209
+
210
+ Compares two fingerprints.
211
+
212
+ **Parameters:**
213
+ - `hash_a`: First fingerprint
214
+ - `hash_b`: Second fingerprint
215
+ - `alpha`: Weight coefficient (0.0-1.0, default 0.3)
216
+
217
+ **Returns:**
218
+ - Similarity score (0-100)
219
+
220
+ #### `HashConfig`
221
+
222
+ Configuration structure for hash generation.
223
+
224
+ **Fields:**
225
+ - `enable_parallel: bool` - Enable parallel processing for large files (default: true)
226
+ - `alpha: f32` - Weight for structure vs content (default: 0.3)
227
+ - `min_modulus: u64` - Feature density control (default: 16)
228
+
229
+ **Example:**
230
+ ```rust
231
+ let mut config = HashConfig::default();
232
+ config.alpha = 0.5; // 50% structure, 50% content
233
+ config.enable_parallel = false; // Disable parallel processing
234
+ ```
235
+
236
+ ## Algorithm Details
237
+
238
+ ### DLAH (Dual-Layer Adaptive Hashing)
239
+
240
+ LavinHash implements a three-phase pipeline:
241
+
242
+ **Phase I: Adaptive Normalization**
243
+ - Case folding (A-Z to a-z)
244
+ - Whitespace normalization
245
+ - Control character filtering
246
+ - Zero-copy iterator-based processing
247
+
248
+ **Phase II: Structural Hash**
249
+ - Shannon entropy calculation with adaptive block sizing
250
+ - Quantization to 4-bit nibbles
251
+ - Compact vector representation
252
+ - Levenshtein distance for comparison
253
+
254
+ **Phase III: Content Hash**
255
+ - BuzHash rolling hash algorithm
256
+ - Adaptive modulus scaling
257
+ - 8192-bit Bloom filter (1KB)
258
+ - Jaccard similarity for comparison
259
+
260
+ ### Similarity Formula
261
+
262
+ ```
263
+ Similarity = α × Levenshtein(Structure) + (1-α) × Jaccard(Content)
264
+ ```
265
+
266
+ Where:
267
+ - `α = 0.3` (default) - 30% weight to structure, 70% to content
268
+ - Levenshtein: Normalized edit distance on entropy vectors
269
+ - Jaccard: Set similarity on Bloom filter features
270
+
271
+ ### Performance Characteristics
272
+
273
+ **Time Complexity:**
274
+ - Hash generation: O(n) where n is data size
275
+ - Hash comparison: O(1) - constant time regardless of file size
276
+
277
+ **Space Complexity:**
278
+ - Fingerprint size: ~1KB + O(log n) structural data
279
+ - Memory usage: O(1) for comparison, O(n) for generation
280
+
281
+ **Throughput:**
282
+ - Single-threaded: ~500 MB/s
283
+ - Multi-threaded: ~2 GB/s (files larger than 1MB)
284
+
285
+ ## Configuration
286
+
287
+ ### Basic Configuration
288
+
289
+ ```rust
290
+ use lavinhash::HashConfig;
291
+
292
+ let config = HashConfig {
293
+ enable_parallel: true,
294
+ alpha: 0.3,
295
+ min_modulus: 16,
296
+ };
297
+ ```
298
+
299
+ ### Advanced Configuration
300
+
301
+ **Adjusting Structure vs Content Weight:**
302
+
303
+ ```rust
304
+ // More weight to structure (topology)
305
+ config.alpha = 0.5; // 50% structure, 50% content
306
+
307
+ // More weight to content (features)
308
+ config.alpha = 0.1; // 10% structure, 90% content
309
+ ```
310
+
311
+ **Controlling Feature Density:**
312
+
313
+ ```rust
314
+ // Higher sensitivity (more features)
315
+ config.min_modulus = 8;
316
+
317
+ // Lower sensitivity (fewer features)
318
+ config.min_modulus = 32;
319
+ ```
320
+
321
+ **Parallel Processing:**
322
+
323
+ ```rust
324
+ // Force sequential processing
325
+ config.enable_parallel = false;
326
+
327
+ // Enable automatic parallel processing for files > 1MB
328
+ config.enable_parallel = true;
329
+ ```
330
+
331
+ ## Use Cases
332
+
333
+ ### Document Similarity Detection
334
+
335
+ Compare different versions of documents to detect modifications and measure similarity.
336
+
337
+ ```javascript
338
+ import { wasm_compare_data } from 'lavinhash';
339
+
340
+ // In a React/Vue/Angular app with file upload
341
+ async function compareDocuments(file1, file2) {
342
+ const buffer1 = await file1.arrayBuffer();
343
+ const buffer2 = await file2.arrayBuffer();
344
+
345
+ const data1 = new Uint8Array(buffer1);
346
+ const data2 = new Uint8Array(buffer2);
347
+
348
+ const similarity = wasm_compare_data(data1, data2);
349
+ console.log(`Similarity: ${similarity}%`);
350
+ return similarity;
351
+ }
352
+ ```
353
+
354
+ ### Duplicate Detection
355
+
356
+ Identify duplicate or near-duplicate files in large datasets.
357
+
358
+ ```rust
359
+ let files = vec![file1, file2, file3];
360
+ let hashes: Vec<_> = files.iter()
361
+ .map(|f| generate_hash(f, &config).unwrap())
362
+ .collect();
363
+
364
+ for i in 0..hashes.len() {
365
+ for j in i+1..hashes.len() {
366
+ let sim = compare_hashes(&hashes[i], &hashes[j], 0.3);
367
+ if sim > 90 {
368
+ println!("Files {} and {} are similar: {}%", i, j, sim);
369
+ }
370
+ }
371
+ }
372
+ ```
373
+
374
+ ### Version Tracking
375
+
376
+ Track changes between different versions of files or content.
377
+
378
+ ```javascript
379
+ import { wasm_generate_hash, wasm_compare_hashes } from 'lavinhash';
380
+
381
+ // Compare multiple file versions
382
+ async function trackVersions(files) {
383
+ const encoder = new TextEncoder();
384
+
385
+ const hashes = files.map(content => {
386
+ const data = encoder.encode(content);
387
+ return wasm_generate_hash(data);
388
+ });
389
+
390
+ const results = [];
391
+ for (let i = 0; i < hashes.length - 1; i++) {
392
+ const sim = wasm_compare_hashes(hashes[i], hashes[i + 1]);
393
+ results.push({
394
+ from: `v${i+1}`,
395
+ to: `v${i+2}`,
396
+ similarity: sim
397
+ });
398
+ }
399
+
400
+ return results;
401
+ }
402
+ ```
403
+
404
+ ## Framework Compatibility
405
+
406
+ LavinHash works seamlessly with all modern JavaScript frameworks and build tools:
407
+
408
+ - **React**: Vite, Create React App, Next.js, Remix
409
+ - **Angular**: Angular CLI (v12+)
410
+ - **Vue**: Vue 3, Nuxt 3, Vite
411
+ - **Svelte**: SvelteKit, Vite
412
+ - **Build Tools**: Webpack 5+, Vite, Rollup, Parcel, esbuild
413
+
414
+ The library uses ES modules and is optimized for modern bundlers.
415
+
416
+ ## Building WASM
417
+
418
+ To build the WebAssembly bindings:
419
+
420
+ ```bash
421
+ # Install wasm-pack
422
+ cargo install wasm-pack
423
+
424
+ # Build for modern bundlers (React, Angular, Vue, etc.)
425
+ wasm-pack build --target bundler --out-dir pkg --out-name lavinhash
426
+
427
+ # The compiled files will be in the pkg/ directory
428
+ ```
429
+
430
+ ## Testing
431
+
432
+ ### Rust Tests
433
+
434
+ ```bash
435
+ # Run all tests
436
+ cargo test
437
+
438
+ # Run tests with output
439
+ cargo test -- --nocapture
440
+
441
+ # Run specific test
442
+ cargo test test_generate_hash_basic
443
+ ```
444
+
445
+ ### Benchmarks
446
+
447
+ ```bash
448
+ # Run benchmarks
449
+ cargo bench
450
+ ```
451
+
452
+ ## Technical Specifications
453
+
454
+ **Fingerprint Format:**
455
+
456
+ ```
457
+ Offset | Field | Type | Size
458
+ -------|------------------|----------|-------------
459
+ 0x00 | Magic | u8 | 1 byte (0x48)
460
+ 0x01 | Version | u8 | 1 byte (0x01)
461
+ 0x02 | Struct Length | u16 LE | 2 bytes
462
+ 0x04 | Content Bloom | u64[128] | 1024 bytes
463
+ 0x404 | Structural Data | u8[] | Variable
464
+ ```
465
+
466
+ **Cross-Platform Determinism:**
467
+ - Identical input produces identical hash on all platforms
468
+ - Little-endian byte ordering
469
+ - IEEE 754 floating-point compliance
470
+
471
+ **Thread Safety:**
472
+ - Hash generation is thread-safe
473
+ - Parallel processing uses Rayon for data parallelism
474
+ - No global state or locks
475
+
476
+ ## Examples
477
+
478
+ See the `examples/` directory for complete working examples:
479
+
480
+ - `basic_usage.rs` - Rust usage examples
481
+ - `javascript_example.js` - Node.js integration
482
+ - `browser_example.html` - Browser-based demo
483
+
484
+ ## Documentation
485
+
486
+ - **API Documentation**: Available at [docs.rs/lavinhash](https://docs.rs/lavinhash)
487
+ - **Technical Specification**: See `docs/TECHNICAL.md` in the repository
488
+ - **Contributing Guide**: See `CONTRIBUTING.md`
489
+
490
+ ## License
491
+
492
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
493
+
494
+ ## Repository
495
+
496
+ Source code: [https://github.com/RafaCalRob/LavinHash](https://github.com/RafaCalRob/LavinHash)
497
+
498
+ ## Support
499
+
500
+ For bug reports and feature requests, please open an issue on GitHub:
501
+ [https://github.com/RafaCalRob/LavinHash/issues](https://github.com/RafaCalRob/LavinHash/issues)
package/lavinhash.d.ts ADDED
@@ -0,0 +1,47 @@
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+
4
+ /**
5
+ * Initialize the WASM module
6
+ */
7
+ export function init(): void;
8
+
9
+ /**
10
+ * Generate hash and compare in one step (WASM wrapper)
11
+ *
12
+ * # Arguments
13
+ * * `data_a` - First data as Uint8Array
14
+ * * `data_b` - Second data as Uint8Array
15
+ *
16
+ * # Returns
17
+ * Similarity score 0-100
18
+ */
19
+ export function wasm_compare_data(data_a: Uint8Array, data_b: Uint8Array): number;
20
+
21
+ /**
22
+ * Compare two fuzzy hashes (WASM wrapper)
23
+ *
24
+ * # Arguments
25
+ * * `hash_a` - First fingerprint (serialized)
26
+ * * `hash_b` - Second fingerprint (serialized)
27
+ *
28
+ * # Returns
29
+ * Similarity score 0-100
30
+ */
31
+ export function wasm_compare_hashes(hash_a: Uint8Array, hash_b: Uint8Array): number;
32
+
33
+ /**
34
+ * Get fingerprint size in bytes (WASM wrapper)
35
+ */
36
+ export function wasm_fingerprint_size(hash: Uint8Array): number;
37
+
38
+ /**
39
+ * Generate a fuzzy hash from data (WASM wrapper)
40
+ *
41
+ * # Arguments
42
+ * * `data` - Input data as Uint8Array
43
+ *
44
+ * # Returns
45
+ * Serialized fingerprint as Uint8Array
46
+ */
47
+ export function wasm_generate_hash(data: Uint8Array): Uint8Array;
package/lavinhash.js ADDED
@@ -0,0 +1,5 @@
1
+ import * as wasm from "./lavinhash_bg.wasm";
2
+ export * from "./lavinhash_bg.js";
3
+ import { __wbg_set_wasm } from "./lavinhash_bg.js";
4
+ __wbg_set_wasm(wasm);
5
+ wasm.__wbindgen_start();
@@ -0,0 +1,294 @@
1
+ let wasm;
2
+ export function __wbg_set_wasm(val) {
3
+ wasm = val;
4
+ }
5
+
6
+ function addHeapObject(obj) {
7
+ if (heap_next === heap.length) heap.push(heap.length + 1);
8
+ const idx = heap_next;
9
+ heap_next = heap[idx];
10
+
11
+ heap[idx] = obj;
12
+ return idx;
13
+ }
14
+
15
+ function dropObject(idx) {
16
+ if (idx < 132) return;
17
+ heap[idx] = heap_next;
18
+ heap_next = idx;
19
+ }
20
+
21
+ function getArrayU8FromWasm0(ptr, len) {
22
+ ptr = ptr >>> 0;
23
+ return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
24
+ }
25
+
26
+ let cachedDataViewMemory0 = null;
27
+ function getDataViewMemory0() {
28
+ if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) {
29
+ cachedDataViewMemory0 = new DataView(wasm.memory.buffer);
30
+ }
31
+ return cachedDataViewMemory0;
32
+ }
33
+
34
+ function getStringFromWasm0(ptr, len) {
35
+ ptr = ptr >>> 0;
36
+ return decodeText(ptr, len);
37
+ }
38
+
39
+ let cachedUint8ArrayMemory0 = null;
40
+ function getUint8ArrayMemory0() {
41
+ if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
42
+ cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
43
+ }
44
+ return cachedUint8ArrayMemory0;
45
+ }
46
+
47
+ function getObject(idx) { return heap[idx]; }
48
+
49
+ let heap = new Array(128).fill(undefined);
50
+ heap.push(undefined, null, true, false);
51
+
52
+ let heap_next = heap.length;
53
+
54
+ function passArray8ToWasm0(arg, malloc) {
55
+ const ptr = malloc(arg.length * 1, 1) >>> 0;
56
+ getUint8ArrayMemory0().set(arg, ptr / 1);
57
+ WASM_VECTOR_LEN = arg.length;
58
+ return ptr;
59
+ }
60
+
61
+ function passStringToWasm0(arg, malloc, realloc) {
62
+ if (realloc === undefined) {
63
+ const buf = cachedTextEncoder.encode(arg);
64
+ const ptr = malloc(buf.length, 1) >>> 0;
65
+ getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
66
+ WASM_VECTOR_LEN = buf.length;
67
+ return ptr;
68
+ }
69
+
70
+ let len = arg.length;
71
+ let ptr = malloc(len, 1) >>> 0;
72
+
73
+ const mem = getUint8ArrayMemory0();
74
+
75
+ let offset = 0;
76
+
77
+ for (; offset < len; offset++) {
78
+ const code = arg.charCodeAt(offset);
79
+ if (code > 0x7F) break;
80
+ mem[ptr + offset] = code;
81
+ }
82
+ if (offset !== len) {
83
+ if (offset !== 0) {
84
+ arg = arg.slice(offset);
85
+ }
86
+ ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
87
+ const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
88
+ const ret = cachedTextEncoder.encodeInto(arg, view);
89
+
90
+ offset += ret.written;
91
+ ptr = realloc(ptr, len, offset, 1) >>> 0;
92
+ }
93
+
94
+ WASM_VECTOR_LEN = offset;
95
+ return ptr;
96
+ }
97
+
98
+ function takeObject(idx) {
99
+ const ret = getObject(idx);
100
+ dropObject(idx);
101
+ return ret;
102
+ }
103
+
104
+ let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
105
+ cachedTextDecoder.decode();
106
+ const MAX_SAFARI_DECODE_BYTES = 2146435072;
107
+ let numBytesDecoded = 0;
108
+ function decodeText(ptr, len) {
109
+ numBytesDecoded += len;
110
+ if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
111
+ cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
112
+ cachedTextDecoder.decode();
113
+ numBytesDecoded = len;
114
+ }
115
+ return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
116
+ }
117
+
118
+ const cachedTextEncoder = new TextEncoder();
119
+
120
+ if (!('encodeInto' in cachedTextEncoder)) {
121
+ cachedTextEncoder.encodeInto = function (arg, view) {
122
+ const buf = cachedTextEncoder.encode(arg);
123
+ view.set(buf);
124
+ return {
125
+ read: arg.length,
126
+ written: buf.length
127
+ };
128
+ }
129
+ }
130
+
131
+ let WASM_VECTOR_LEN = 0;
132
+
133
+ /**
134
+ * Initialize the WASM module
135
+ */
136
+ export function init() {
137
+ wasm.init();
138
+ }
139
+
140
+ /**
141
+ * Generate hash and compare in one step (WASM wrapper)
142
+ *
143
+ * # Arguments
144
+ * * `data_a` - First data as Uint8Array
145
+ * * `data_b` - Second data as Uint8Array
146
+ *
147
+ * # Returns
148
+ * Similarity score 0-100
149
+ * @param {Uint8Array} data_a
150
+ * @param {Uint8Array} data_b
151
+ * @returns {number}
152
+ */
153
+ export function wasm_compare_data(data_a, data_b) {
154
+ try {
155
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
156
+ const ptr0 = passArray8ToWasm0(data_a, wasm.__wbindgen_export2);
157
+ const len0 = WASM_VECTOR_LEN;
158
+ const ptr1 = passArray8ToWasm0(data_b, wasm.__wbindgen_export2);
159
+ const len1 = WASM_VECTOR_LEN;
160
+ wasm.wasm_compare_data(retptr, ptr0, len0, ptr1, len1);
161
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
162
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
163
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
164
+ if (r2) {
165
+ throw takeObject(r1);
166
+ }
167
+ return r0;
168
+ } finally {
169
+ wasm.__wbindgen_add_to_stack_pointer(16);
170
+ }
171
+ }
172
+
173
+ /**
174
+ * Compare two fuzzy hashes (WASM wrapper)
175
+ *
176
+ * # Arguments
177
+ * * `hash_a` - First fingerprint (serialized)
178
+ * * `hash_b` - Second fingerprint (serialized)
179
+ *
180
+ * # Returns
181
+ * Similarity score 0-100
182
+ * @param {Uint8Array} hash_a
183
+ * @param {Uint8Array} hash_b
184
+ * @returns {number}
185
+ */
186
+ export function wasm_compare_hashes(hash_a, hash_b) {
187
+ try {
188
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
189
+ const ptr0 = passArray8ToWasm0(hash_a, wasm.__wbindgen_export2);
190
+ const len0 = WASM_VECTOR_LEN;
191
+ const ptr1 = passArray8ToWasm0(hash_b, wasm.__wbindgen_export2);
192
+ const len1 = WASM_VECTOR_LEN;
193
+ wasm.wasm_compare_hashes(retptr, ptr0, len0, ptr1, len1);
194
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
195
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
196
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
197
+ if (r2) {
198
+ throw takeObject(r1);
199
+ }
200
+ return r0;
201
+ } finally {
202
+ wasm.__wbindgen_add_to_stack_pointer(16);
203
+ }
204
+ }
205
+
206
+ /**
207
+ * Get fingerprint size in bytes (WASM wrapper)
208
+ * @param {Uint8Array} hash
209
+ * @returns {number}
210
+ */
211
+ export function wasm_fingerprint_size(hash) {
212
+ try {
213
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
214
+ const ptr0 = passArray8ToWasm0(hash, wasm.__wbindgen_export2);
215
+ const len0 = WASM_VECTOR_LEN;
216
+ wasm.wasm_fingerprint_size(retptr, ptr0, len0);
217
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
218
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
219
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
220
+ if (r2) {
221
+ throw takeObject(r1);
222
+ }
223
+ return r0 >>> 0;
224
+ } finally {
225
+ wasm.__wbindgen_add_to_stack_pointer(16);
226
+ }
227
+ }
228
+
229
+ /**
230
+ * Generate a fuzzy hash from data (WASM wrapper)
231
+ *
232
+ * # Arguments
233
+ * * `data` - Input data as Uint8Array
234
+ *
235
+ * # Returns
236
+ * Serialized fingerprint as Uint8Array
237
+ * @param {Uint8Array} data
238
+ * @returns {Uint8Array}
239
+ */
240
+ export function wasm_generate_hash(data) {
241
+ try {
242
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
243
+ const ptr0 = passArray8ToWasm0(data, wasm.__wbindgen_export2);
244
+ const len0 = WASM_VECTOR_LEN;
245
+ wasm.wasm_generate_hash(retptr, ptr0, len0);
246
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
247
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
248
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
249
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
250
+ if (r3) {
251
+ throw takeObject(r2);
252
+ }
253
+ var v2 = getArrayU8FromWasm0(r0, r1).slice();
254
+ wasm.__wbindgen_export(r0, r1 * 1, 1);
255
+ return v2;
256
+ } finally {
257
+ wasm.__wbindgen_add_to_stack_pointer(16);
258
+ }
259
+ }
260
+
261
+ export function __wbg_error_7534b8e9a36f1ab4(arg0, arg1) {
262
+ let deferred0_0;
263
+ let deferred0_1;
264
+ try {
265
+ deferred0_0 = arg0;
266
+ deferred0_1 = arg1;
267
+ console.error(getStringFromWasm0(arg0, arg1));
268
+ } finally {
269
+ wasm.__wbindgen_export(deferred0_0, deferred0_1, 1);
270
+ }
271
+ };
272
+
273
+ export function __wbg_new_8a6f238a6ece86ea() {
274
+ const ret = new Error();
275
+ return addHeapObject(ret);
276
+ };
277
+
278
+ export function __wbg_stack_0ed75d68575b0f3c(arg0, arg1) {
279
+ const ret = getObject(arg1).stack;
280
+ const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_export2, wasm.__wbindgen_export3);
281
+ const len1 = WASM_VECTOR_LEN;
282
+ getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
283
+ getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
284
+ };
285
+
286
+ export function __wbindgen_cast_2241b6af4c4b2941(arg0, arg1) {
287
+ // Cast intrinsic for `Ref(String) -> Externref`.
288
+ const ret = getStringFromWasm0(arg0, arg1);
289
+ return addHeapObject(ret);
290
+ };
291
+
292
+ export function __wbindgen_object_drop_ref(arg0) {
293
+ takeObject(arg0);
294
+ };
Binary file
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "lavinhash",
3
+ "type": "module",
4
+ "collaborators": [
5
+ "LavinHash Contributors"
6
+ ],
7
+ "description": "High-performance fuzzy hashing library implementing the DLAH (Dual-Layer Adaptive Hashing) algorithm",
8
+ "version": "1.0.0",
9
+ "license": "MIT",
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "https://github.com/RafaCalRob/lavinhash"
13
+ },
14
+ "files": [
15
+ "lavinhash_bg.wasm",
16
+ "lavinhash.js",
17
+ "lavinhash_bg.js",
18
+ "lavinhash.d.ts"
19
+ ],
20
+ "main": "lavinhash.js",
21
+ "homepage": "https://bdovenbird.com/lavinhash/",
22
+ "types": "lavinhash.d.ts",
23
+ "sideEffects": [
24
+ "./lavinhash.js",
25
+ "./snippets/*"
26
+ ],
27
+ "keywords": [
28
+ "fuzzy-hashing",
29
+ "similarity",
30
+ "hash",
31
+ "fingerprint",
32
+ "dlah",
33
+ "wasm",
34
+ "webassembly",
35
+ "rust",
36
+ "duplicate-detection",
37
+ "text-similarity",
38
+ "file-similarity",
39
+ "content-hashing",
40
+ "bloom-filter",
41
+ "react",
42
+ "angular",
43
+ "vue",
44
+ "typescript"
45
+ ]
46
+ }