elid 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +126 -0
- package/elid.d.ts +276 -0
- package/elid.js +5 -0
- package/elid_bg.js +812 -0
- package/elid_bg.wasm +0 -0
- package/package.json +32 -0
package/README.md
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# ELID - Efficient Levenshtein and String Similarity Library
|
|
2
|
+
|
|
3
|
+
[](https://forge.blackleafdigital.com/BlackLeafDigital/ELID/actions)
|
|
4
|
+
[](LICENSE-MIT)
|
|
5
|
+
|
|
6
|
+
A fast, zero-dependency Rust library for computing string similarity metrics with bindings for Python, JavaScript (WASM), and C.
|
|
7
|
+
|
|
8
|
+
## Algorithms
|
|
9
|
+
|
|
10
|
+
| Algorithm | Type | Best For |
|
|
11
|
+
|-----------|------|----------|
|
|
12
|
+
| **Levenshtein** | Edit distance | General-purpose comparison, spell checking |
|
|
13
|
+
| **Normalized Levenshtein** | Similarity (0-1) | When you need a percentage match |
|
|
14
|
+
| **Jaro** | Similarity (0-1) | Short strings |
|
|
15
|
+
| **Jaro-Winkler** | Similarity (0-1) | Names and record linkage |
|
|
16
|
+
| **Hamming** | Distance | Fixed-length strings, DNA, error codes |
|
|
17
|
+
| **OSA** | Edit distance | Typo detection (counts transpositions) |
|
|
18
|
+
| **SimHash** | LSH fingerprint | Database-queryable similarity, near-duplicate detection |
|
|
19
|
+
| **Best Match** | Composite (0-1) | When unsure which algorithm fits |
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
### Rust
|
|
24
|
+
|
|
25
|
+
```toml
|
|
26
|
+
[dependencies]
|
|
27
|
+
elid = "0.1.0"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Python
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install elid
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### JavaScript (WASM)
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
npm install elid-wasm
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### C/C++
|
|
43
|
+
|
|
44
|
+
Build with `cargo build --release --features ffi` to get `libelid.so` and `elid.h`.
|
|
45
|
+
|
|
46
|
+
## Quick Start
|
|
47
|
+
|
|
48
|
+
```rust
|
|
49
|
+
use elid::*;
|
|
50
|
+
|
|
51
|
+
// Edit distance
|
|
52
|
+
let distance = levenshtein("kitten", "sitting"); // 3
|
|
53
|
+
|
|
54
|
+
// Normalized similarity (0.0 to 1.0)
|
|
55
|
+
let similarity = normalized_levenshtein("hello", "hallo"); // 0.8
|
|
56
|
+
|
|
57
|
+
// Name matching
|
|
58
|
+
let similarity = jaro_winkler("Martha", "Marhta"); // 0.961
|
|
59
|
+
|
|
60
|
+
// SimHash for database queries
|
|
61
|
+
let hash = simhash("iPhone 14");
|
|
62
|
+
let sim = simhash_similarity("iPhone 14", "iPhone 15"); // ~0.92
|
|
63
|
+
|
|
64
|
+
// Find best match in a list
|
|
65
|
+
let candidates = vec!["apple", "application", "apply"];
|
|
66
|
+
let (idx, score) = find_best_match("app", &candidates);
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Python
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
import elid
|
|
73
|
+
|
|
74
|
+
elid.levenshtein("kitten", "sitting") # 3
|
|
75
|
+
elid.jaro_winkler("martha", "marhta") # 0.961
|
|
76
|
+
elid.simhash_similarity("iPhone 14", "iPhone 15") # 0.922
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### JavaScript
|
|
80
|
+
|
|
81
|
+
```javascript
|
|
82
|
+
import init, { levenshtein, jaroWinkler, simhashSimilarity } from 'elid';
|
|
83
|
+
|
|
84
|
+
await init();
|
|
85
|
+
levenshtein("kitten", "sitting"); // 3
|
|
86
|
+
jaroWinkler("martha", "marhta"); // 0.961
|
|
87
|
+
simhashSimilarity("iPhone 14", "iPhone 15"); // 0.922
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Configuration
|
|
91
|
+
|
|
92
|
+
Use `SimilarityOpts` for case-insensitive or whitespace-trimmed comparisons:
|
|
93
|
+
|
|
94
|
+
```rust
|
|
95
|
+
use elid::{levenshtein_with_opts, SimilarityOpts};
|
|
96
|
+
|
|
97
|
+
let opts = SimilarityOpts {
|
|
98
|
+
case_sensitive: false,
|
|
99
|
+
trim_whitespace: true,
|
|
100
|
+
..Default::default()
|
|
101
|
+
};
|
|
102
|
+
let distance = levenshtein_with_opts(" HELLO ", "hello", &opts); // 0
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Performance
|
|
106
|
+
|
|
107
|
+
- Zero external dependencies for core algorithms
|
|
108
|
+
- O(min(m,n)) space-optimized Levenshtein
|
|
109
|
+
- 1.4M+ string comparisons per second (Python benchmarks)
|
|
110
|
+
- ~96KB WASM binary
|
|
111
|
+
|
|
112
|
+
## Building
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
git clone https://forge.blackleafdigital.com/BlackLeafDigital/ELID.git
|
|
116
|
+
cd ELID
|
|
117
|
+
|
|
118
|
+
cargo build --release
|
|
119
|
+
cargo test
|
|
120
|
+
cargo bench
|
|
121
|
+
cargo run --example basic_usage
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## License
|
|
125
|
+
|
|
126
|
+
Dual-licensed under [MIT](LICENSE-MIT) or [Apache-2.0](LICENSE-APACHE) at your option.
|
package/elid.d.ts
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
/**
|
|
4
|
+
* Compute the SimHash fingerprint of a string.
|
|
5
|
+
*
|
|
6
|
+
* Returns a 64-bit hash where similar strings produce similar numbers.
|
|
7
|
+
* Use this for database queries by storing the hash and querying by numeric range.
|
|
8
|
+
*
|
|
9
|
+
* # JavaScript Example
|
|
10
|
+
*
|
|
11
|
+
* ```javascript
|
|
12
|
+
* import { simhash } from 'elid';
|
|
13
|
+
*
|
|
14
|
+
* const hash1 = simhash("iPhone 14");
|
|
15
|
+
* const hash2 = simhash("iPhone 15");
|
|
16
|
+
* const hash3 = simhash("Galaxy S23");
|
|
17
|
+
*
|
|
18
|
+
* // hash1 and hash2 will be numerically close
|
|
19
|
+
* // hash3 will be numerically distant
|
|
20
|
+
*
|
|
21
|
+
* // Store in database as bigint:
|
|
22
|
+
* // { name: "iPhone 14", simhash: hash1 }
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
export function simhash(text: string): number;
|
|
26
|
+
/**
|
|
27
|
+
* Compute the normalized Levenshtein similarity between two strings.
|
|
28
|
+
*
|
|
29
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
30
|
+
*
|
|
31
|
+
* # JavaScript Example
|
|
32
|
+
*
|
|
33
|
+
* ```javascript
|
|
34
|
+
* import { normalizedLevenshtein } from 'elid';
|
|
35
|
+
*
|
|
36
|
+
* const similarity = normalizedLevenshtein("hello", "hallo");
|
|
37
|
+
* console.log(similarity); // ~0.8
|
|
38
|
+
* ```
|
|
39
|
+
*/
|
|
40
|
+
export function normalizedLevenshtein(a: string, b: string): number;
|
|
41
|
+
/**
|
|
42
|
+
* Compute the best matching similarity between two strings.
|
|
43
|
+
*
|
|
44
|
+
* Runs multiple algorithms and returns the highest score.
|
|
45
|
+
*
|
|
46
|
+
* # JavaScript Example
|
|
47
|
+
*
|
|
48
|
+
* ```javascript
|
|
49
|
+
* import { bestMatch } from 'elid';
|
|
50
|
+
*
|
|
51
|
+
* const score = bestMatch("hello", "hallo");
|
|
52
|
+
* console.log(score); // ~0.8
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
export function bestMatch(a: string, b: string): number;
|
|
56
|
+
/**
|
|
57
|
+
* Compute the Jaro-Winkler similarity between two strings.
|
|
58
|
+
*
|
|
59
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
60
|
+
* Gives more favorable ratings to strings with common prefixes.
|
|
61
|
+
*
|
|
62
|
+
* # JavaScript Example
|
|
63
|
+
*
|
|
64
|
+
* ```javascript
|
|
65
|
+
* import { jaroWinkler } from 'elid';
|
|
66
|
+
*
|
|
67
|
+
* const similarity = jaroWinkler("martha", "marhta");
|
|
68
|
+
* console.log(similarity); // ~0.961
|
|
69
|
+
* ```
|
|
70
|
+
*/
|
|
71
|
+
export function jaroWinkler(a: string, b: string): number;
|
|
72
|
+
/**
|
|
73
|
+
* Compute the Hamming distance between two SimHash values.
|
|
74
|
+
*
|
|
75
|
+
* Returns the number of differing bits. Lower values = higher similarity.
|
|
76
|
+
*
|
|
77
|
+
* # JavaScript Example
|
|
78
|
+
*
|
|
79
|
+
* ```javascript
|
|
80
|
+
* import { simhash, simhashDistance } from 'elid';
|
|
81
|
+
*
|
|
82
|
+
* const hash1 = simhash("iPhone 14");
|
|
83
|
+
* const hash2 = simhash("iPhone 15");
|
|
84
|
+
* const distance = simhashDistance(hash1, hash2);
|
|
85
|
+
*
|
|
86
|
+
* console.log(distance); // Low number = similar
|
|
87
|
+
* ```
|
|
88
|
+
*/
|
|
89
|
+
export function simhashDistance(hash1: number, hash2: number): number;
|
|
90
|
+
/**
|
|
91
|
+
* Compute the normalized SimHash similarity between two strings.
|
|
92
|
+
*
|
|
93
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
94
|
+
*
|
|
95
|
+
* # JavaScript Example
|
|
96
|
+
*
|
|
97
|
+
* ```javascript
|
|
98
|
+
* import { simhashSimilarity } from 'elid';
|
|
99
|
+
*
|
|
100
|
+
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
101
|
+
* console.log(similarity); // ~0.9 (very similar)
|
|
102
|
+
*
|
|
103
|
+
* const similarity2 = simhashSimilarity("iPhone", "Galaxy");
|
|
104
|
+
* console.log(similarity2); // ~0.4 (different)
|
|
105
|
+
* ```
|
|
106
|
+
*/
|
|
107
|
+
export function simhashSimilarity(a: string, b: string): number;
|
|
108
|
+
/**
|
|
109
|
+
* Compute Levenshtein distance with custom options.
|
|
110
|
+
*
|
|
111
|
+
* # JavaScript Example
|
|
112
|
+
*
|
|
113
|
+
* ```javascript
|
|
114
|
+
* import { levenshteinWithOpts, SimilarityOptions } from 'elid';
|
|
115
|
+
*
|
|
116
|
+
* const opts = new SimilarityOptions();
|
|
117
|
+
* opts.setCaseSensitive(false);
|
|
118
|
+
* opts.setTrimWhitespace(true);
|
|
119
|
+
*
|
|
120
|
+
* const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
|
|
121
|
+
* console.log(distance); // 0
|
|
122
|
+
* ```
|
|
123
|
+
*/
|
|
124
|
+
export function levenshteinWithOpts(a: string, b: string, opts: SimilarityOptions): number;
|
|
125
|
+
/**
|
|
126
|
+
* Compute the Hamming distance between two strings.
|
|
127
|
+
*
|
|
128
|
+
* Returns the number of positions at which the characters differ.
|
|
129
|
+
* Returns null if strings have different lengths.
|
|
130
|
+
*
|
|
131
|
+
* # JavaScript Example
|
|
132
|
+
*
|
|
133
|
+
* ```javascript
|
|
134
|
+
* import { hamming } from 'elid';
|
|
135
|
+
*
|
|
136
|
+
* const distance = hamming("karolin", "kathrin");
|
|
137
|
+
* console.log(distance); // 3
|
|
138
|
+
*
|
|
139
|
+
* const invalid = hamming("hello", "world!");
|
|
140
|
+
* console.log(invalid); // null
|
|
141
|
+
* ```
|
|
142
|
+
*/
|
|
143
|
+
export function hamming(a: string, b: string): number | undefined;
|
|
144
|
+
/**
|
|
145
|
+
* Compute the Jaro similarity between two strings.
|
|
146
|
+
*
|
|
147
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
148
|
+
* Particularly effective for short strings like names.
|
|
149
|
+
*
|
|
150
|
+
* # JavaScript Example
|
|
151
|
+
*
|
|
152
|
+
* ```javascript
|
|
153
|
+
* import { jaro } from 'elid';
|
|
154
|
+
*
|
|
155
|
+
* const similarity = jaro("martha", "marhta");
|
|
156
|
+
* console.log(similarity); // ~0.944
|
|
157
|
+
* ```
|
|
158
|
+
*/
|
|
159
|
+
export function jaro(a: string, b: string): number;
|
|
160
|
+
/**
|
|
161
|
+
* Compute the Levenshtein distance between two strings.
|
|
162
|
+
*
|
|
163
|
+
* Returns the minimum number of single-character edits needed to transform one string into another.
|
|
164
|
+
*
|
|
165
|
+
* # JavaScript Example
|
|
166
|
+
*
|
|
167
|
+
* ```javascript
|
|
168
|
+
* import { levenshtein } from 'elid';
|
|
169
|
+
*
|
|
170
|
+
* const distance = levenshtein("kitten", "sitting");
|
|
171
|
+
* console.log(distance); // 3
|
|
172
|
+
* ```
|
|
173
|
+
*/
|
|
174
|
+
export function levenshtein(a: string, b: string): number;
|
|
175
|
+
/**
|
|
176
|
+
* Compute the OSA (Optimal String Alignment) distance between two strings.
|
|
177
|
+
*
|
|
178
|
+
* Similar to Levenshtein but also considers transpositions as a single operation.
|
|
179
|
+
*
|
|
180
|
+
* # JavaScript Example
|
|
181
|
+
*
|
|
182
|
+
* ```javascript
|
|
183
|
+
* import { osaDistance } from 'elid';
|
|
184
|
+
*
|
|
185
|
+
* const distance = osaDistance("ca", "ac");
|
|
186
|
+
* console.log(distance); // 1 (transposition)
|
|
187
|
+
* ```
|
|
188
|
+
*/
|
|
189
|
+
export function osaDistance(a: string, b: string): number;
|
|
190
|
+
/**
|
|
191
|
+
* Find all matches above a threshold score.
|
|
192
|
+
*
|
|
193
|
+
* Returns an array of objects with index and score for all candidates above the threshold.
|
|
194
|
+
*
|
|
195
|
+
* # JavaScript Example
|
|
196
|
+
*
|
|
197
|
+
* ```javascript
|
|
198
|
+
* import { findMatchesAboveThreshold } from 'elid';
|
|
199
|
+
*
|
|
200
|
+
* const candidates = ["apple", "application", "apply", "banana"];
|
|
201
|
+
* const matches = findMatchesAboveThreshold("app", candidates, 0.5);
|
|
202
|
+
* console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
|
|
203
|
+
* ```
|
|
204
|
+
*/
|
|
205
|
+
export function findMatchesAboveThreshold(query: string, candidates: string[], threshold: number): any;
|
|
206
|
+
/**
|
|
207
|
+
* Find all hashes within a given distance threshold.
|
|
208
|
+
*
|
|
209
|
+
* Useful for database queries - pre-compute hashes, then find similar ones.
|
|
210
|
+
*
|
|
211
|
+
* # JavaScript Example
|
|
212
|
+
*
|
|
213
|
+
* ```javascript
|
|
214
|
+
* import { simhash, findSimilarHashes } from 'elid';
|
|
215
|
+
*
|
|
216
|
+
* const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
|
|
217
|
+
* const hashes = candidates.map(s => simhash(s));
|
|
218
|
+
*
|
|
219
|
+
* const queryHash = simhash("iPhone 14");
|
|
220
|
+
* const matches = findSimilarHashes(queryHash, hashes, 10);
|
|
221
|
+
*
|
|
222
|
+
* console.log(matches); // [0, 1] - indices of similar items
|
|
223
|
+
* ```
|
|
224
|
+
*/
|
|
225
|
+
export function findSimilarHashes(query_hash: number, candidate_hashes: Float64Array, max_distance: number): Uint32Array;
|
|
226
|
+
/**
|
|
227
|
+
* Find the best match for a query string in an array of candidates.
|
|
228
|
+
*
|
|
229
|
+
* Returns an object with the index and similarity score of the best match.
|
|
230
|
+
*
|
|
231
|
+
* # JavaScript Example
|
|
232
|
+
*
|
|
233
|
+
* ```javascript
|
|
234
|
+
* import { findBestMatch } from 'elid';
|
|
235
|
+
*
|
|
236
|
+
* const candidates = ["apple", "application", "apply"];
|
|
237
|
+
* const result = findBestMatch("app", candidates);
|
|
238
|
+
* console.log(result); // { index: 0, score: 0.907 }
|
|
239
|
+
* ```
|
|
240
|
+
*/
|
|
241
|
+
export function findBestMatch(query: string, candidates: string[]): object;
|
|
242
|
+
/**
|
|
243
|
+
* Options for configuring string similarity algorithms
|
|
244
|
+
*/
|
|
245
|
+
export class SimilarityOptions {
|
|
246
|
+
free(): void;
|
|
247
|
+
[Symbol.dispose](): void;
|
|
248
|
+
/**
|
|
249
|
+
* Set prefix scale
|
|
250
|
+
*/
|
|
251
|
+
setPrefixScale(value: number): void;
|
|
252
|
+
/**
|
|
253
|
+
* Set case sensitivity
|
|
254
|
+
*/
|
|
255
|
+
setCaseSensitive(value: boolean): void;
|
|
256
|
+
/**
|
|
257
|
+
* Set whitespace trimming
|
|
258
|
+
*/
|
|
259
|
+
setTrimWhitespace(value: boolean): void;
|
|
260
|
+
/**
|
|
261
|
+
* Create a new SimilarityOptions with default values
|
|
262
|
+
*/
|
|
263
|
+
constructor();
|
|
264
|
+
/**
|
|
265
|
+
* Case-sensitive comparison (default: true)
|
|
266
|
+
*/
|
|
267
|
+
case_sensitive: boolean;
|
|
268
|
+
/**
|
|
269
|
+
* Trim whitespace before comparison (default: false)
|
|
270
|
+
*/
|
|
271
|
+
trim_whitespace: boolean;
|
|
272
|
+
/**
|
|
273
|
+
* Prefix scale for Jaro-Winkler (default: 0.1, max: 0.25)
|
|
274
|
+
*/
|
|
275
|
+
prefix_scale: number;
|
|
276
|
+
}
|
package/elid.js
ADDED
package/elid_bg.js
ADDED
|
@@ -0,0 +1,812 @@
|
|
|
1
|
+
let wasm;
|
|
2
|
+
export function __wbg_set_wasm(val) {
|
|
3
|
+
wasm = val;
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
let cachedUint8ArrayMemory0 = null;
|
|
8
|
+
|
|
9
|
+
function getUint8ArrayMemory0() {
|
|
10
|
+
if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
|
|
11
|
+
cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
|
|
12
|
+
}
|
|
13
|
+
return cachedUint8ArrayMemory0;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
|
|
17
|
+
|
|
18
|
+
cachedTextDecoder.decode();
|
|
19
|
+
|
|
20
|
+
const MAX_SAFARI_DECODE_BYTES = 2146435072;
|
|
21
|
+
let numBytesDecoded = 0;
|
|
22
|
+
function decodeText(ptr, len) {
|
|
23
|
+
numBytesDecoded += len;
|
|
24
|
+
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
|
|
25
|
+
cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
|
|
26
|
+
cachedTextDecoder.decode();
|
|
27
|
+
numBytesDecoded = len;
|
|
28
|
+
}
|
|
29
|
+
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function getStringFromWasm0(ptr, len) {
|
|
33
|
+
ptr = ptr >>> 0;
|
|
34
|
+
return decodeText(ptr, len);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function debugString(val) {
|
|
38
|
+
// primitive types
|
|
39
|
+
const type = typeof val;
|
|
40
|
+
if (type == 'number' || type == 'boolean' || val == null) {
|
|
41
|
+
return `${val}`;
|
|
42
|
+
}
|
|
43
|
+
if (type == 'string') {
|
|
44
|
+
return `"${val}"`;
|
|
45
|
+
}
|
|
46
|
+
if (type == 'symbol') {
|
|
47
|
+
const description = val.description;
|
|
48
|
+
if (description == null) {
|
|
49
|
+
return 'Symbol';
|
|
50
|
+
} else {
|
|
51
|
+
return `Symbol(${description})`;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
if (type == 'function') {
|
|
55
|
+
const name = val.name;
|
|
56
|
+
if (typeof name == 'string' && name.length > 0) {
|
|
57
|
+
return `Function(${name})`;
|
|
58
|
+
} else {
|
|
59
|
+
return 'Function';
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
// objects
|
|
63
|
+
if (Array.isArray(val)) {
|
|
64
|
+
const length = val.length;
|
|
65
|
+
let debug = '[';
|
|
66
|
+
if (length > 0) {
|
|
67
|
+
debug += debugString(val[0]);
|
|
68
|
+
}
|
|
69
|
+
for(let i = 1; i < length; i++) {
|
|
70
|
+
debug += ', ' + debugString(val[i]);
|
|
71
|
+
}
|
|
72
|
+
debug += ']';
|
|
73
|
+
return debug;
|
|
74
|
+
}
|
|
75
|
+
// Test for built-in
|
|
76
|
+
const builtInMatches = /\[object ([^\]]+)\]/.exec(toString.call(val));
|
|
77
|
+
let className;
|
|
78
|
+
if (builtInMatches && builtInMatches.length > 1) {
|
|
79
|
+
className = builtInMatches[1];
|
|
80
|
+
} else {
|
|
81
|
+
// Failed to match the standard '[object ClassName]'
|
|
82
|
+
return toString.call(val);
|
|
83
|
+
}
|
|
84
|
+
if (className == 'Object') {
|
|
85
|
+
// we're a user defined class or Object
|
|
86
|
+
// JSON.stringify avoids problems with cycles, and is generally much
|
|
87
|
+
// easier than looping through ownProperties of `val`.
|
|
88
|
+
try {
|
|
89
|
+
return 'Object(' + JSON.stringify(val) + ')';
|
|
90
|
+
} catch (_) {
|
|
91
|
+
return 'Object';
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
// errors
|
|
95
|
+
if (val instanceof Error) {
|
|
96
|
+
return `${val.name}: ${val.message}\n${val.stack}`;
|
|
97
|
+
}
|
|
98
|
+
// TODO we could test for more things here, like `Set`s and `Map`s.
|
|
99
|
+
return className;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
let WASM_VECTOR_LEN = 0;
|
|
103
|
+
|
|
104
|
+
const cachedTextEncoder = new TextEncoder();
|
|
105
|
+
|
|
106
|
+
if (!('encodeInto' in cachedTextEncoder)) {
|
|
107
|
+
cachedTextEncoder.encodeInto = function (arg, view) {
|
|
108
|
+
const buf = cachedTextEncoder.encode(arg);
|
|
109
|
+
view.set(buf);
|
|
110
|
+
return {
|
|
111
|
+
read: arg.length,
|
|
112
|
+
written: buf.length
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function passStringToWasm0(arg, malloc, realloc) {
|
|
118
|
+
|
|
119
|
+
if (realloc === undefined) {
|
|
120
|
+
const buf = cachedTextEncoder.encode(arg);
|
|
121
|
+
const ptr = malloc(buf.length, 1) >>> 0;
|
|
122
|
+
getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
|
|
123
|
+
WASM_VECTOR_LEN = buf.length;
|
|
124
|
+
return ptr;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
let len = arg.length;
|
|
128
|
+
let ptr = malloc(len, 1) >>> 0;
|
|
129
|
+
|
|
130
|
+
const mem = getUint8ArrayMemory0();
|
|
131
|
+
|
|
132
|
+
let offset = 0;
|
|
133
|
+
|
|
134
|
+
for (; offset < len; offset++) {
|
|
135
|
+
const code = arg.charCodeAt(offset);
|
|
136
|
+
if (code > 0x7F) break;
|
|
137
|
+
mem[ptr + offset] = code;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (offset !== len) {
|
|
141
|
+
if (offset !== 0) {
|
|
142
|
+
arg = arg.slice(offset);
|
|
143
|
+
}
|
|
144
|
+
ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
|
|
145
|
+
const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
|
|
146
|
+
const ret = cachedTextEncoder.encodeInto(arg, view);
|
|
147
|
+
|
|
148
|
+
offset += ret.written;
|
|
149
|
+
ptr = realloc(ptr, len, offset, 1) >>> 0;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
WASM_VECTOR_LEN = offset;
|
|
153
|
+
return ptr;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
let cachedDataViewMemory0 = null;
|
|
157
|
+
|
|
158
|
+
function getDataViewMemory0() {
|
|
159
|
+
if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) {
|
|
160
|
+
cachedDataViewMemory0 = new DataView(wasm.memory.buffer);
|
|
161
|
+
}
|
|
162
|
+
return cachedDataViewMemory0;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function isLikeNone(x) {
|
|
166
|
+
return x === undefined || x === null;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function addToExternrefTable0(obj) {
|
|
170
|
+
const idx = wasm.__externref_table_alloc();
|
|
171
|
+
wasm.__wbindgen_externrefs.set(idx, obj);
|
|
172
|
+
return idx;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function handleError(f, args) {
|
|
176
|
+
try {
|
|
177
|
+
return f.apply(this, args);
|
|
178
|
+
} catch (e) {
|
|
179
|
+
const idx = addToExternrefTable0(e);
|
|
180
|
+
wasm.__wbindgen_exn_store(idx);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Compute the SimHash fingerprint of a string.
|
|
185
|
+
*
|
|
186
|
+
* Returns a 64-bit hash where similar strings produce similar numbers.
|
|
187
|
+
* Use this for database queries by storing the hash and querying by numeric range.
|
|
188
|
+
*
|
|
189
|
+
* # JavaScript Example
|
|
190
|
+
*
|
|
191
|
+
* ```javascript
|
|
192
|
+
* import { simhash } from 'elid';
|
|
193
|
+
*
|
|
194
|
+
* const hash1 = simhash("iPhone 14");
|
|
195
|
+
* const hash2 = simhash("iPhone 15");
|
|
196
|
+
* const hash3 = simhash("Galaxy S23");
|
|
197
|
+
*
|
|
198
|
+
* // hash1 and hash2 will be numerically close
|
|
199
|
+
* // hash3 will be numerically distant
|
|
200
|
+
*
|
|
201
|
+
* // Store in database as bigint:
|
|
202
|
+
* // { name: "iPhone 14", simhash: hash1 }
|
|
203
|
+
* ```
|
|
204
|
+
* @param {string} text
|
|
205
|
+
* @returns {number}
|
|
206
|
+
*/
|
|
207
|
+
export function simhash(text) {
|
|
208
|
+
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
209
|
+
const len0 = WASM_VECTOR_LEN;
|
|
210
|
+
const ret = wasm.simhash(ptr0, len0);
|
|
211
|
+
return ret;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Compute the normalized Levenshtein similarity between two strings.
|
|
216
|
+
*
|
|
217
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
218
|
+
*
|
|
219
|
+
* # JavaScript Example
|
|
220
|
+
*
|
|
221
|
+
* ```javascript
|
|
222
|
+
* import { normalizedLevenshtein } from 'elid';
|
|
223
|
+
*
|
|
224
|
+
* const similarity = normalizedLevenshtein("hello", "hallo");
|
|
225
|
+
* console.log(similarity); // ~0.8
|
|
226
|
+
* ```
|
|
227
|
+
* @param {string} a
|
|
228
|
+
* @param {string} b
|
|
229
|
+
* @returns {number}
|
|
230
|
+
*/
|
|
231
|
+
export function normalizedLevenshtein(a, b) {
|
|
232
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
233
|
+
const len0 = WASM_VECTOR_LEN;
|
|
234
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
235
|
+
const len1 = WASM_VECTOR_LEN;
|
|
236
|
+
const ret = wasm.normalizedLevenshtein(ptr0, len0, ptr1, len1);
|
|
237
|
+
return ret;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Compute the best matching similarity between two strings.
|
|
242
|
+
*
|
|
243
|
+
* Runs multiple algorithms and returns the highest score.
|
|
244
|
+
*
|
|
245
|
+
* # JavaScript Example
|
|
246
|
+
*
|
|
247
|
+
* ```javascript
|
|
248
|
+
* import { bestMatch } from 'elid';
|
|
249
|
+
*
|
|
250
|
+
* const score = bestMatch("hello", "hallo");
|
|
251
|
+
* console.log(score); // ~0.8
|
|
252
|
+
* ```
|
|
253
|
+
* @param {string} a
|
|
254
|
+
* @param {string} b
|
|
255
|
+
* @returns {number}
|
|
256
|
+
*/
|
|
257
|
+
export function bestMatch(a, b) {
|
|
258
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
259
|
+
const len0 = WASM_VECTOR_LEN;
|
|
260
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
261
|
+
const len1 = WASM_VECTOR_LEN;
|
|
262
|
+
const ret = wasm.bestMatch(ptr0, len0, ptr1, len1);
|
|
263
|
+
return ret;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Compute the Jaro-Winkler similarity between two strings.
|
|
268
|
+
*
|
|
269
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
270
|
+
* Gives more favorable ratings to strings with common prefixes.
|
|
271
|
+
*
|
|
272
|
+
* # JavaScript Example
|
|
273
|
+
*
|
|
274
|
+
* ```javascript
|
|
275
|
+
* import { jaroWinkler } from 'elid';
|
|
276
|
+
*
|
|
277
|
+
* const similarity = jaroWinkler("martha", "marhta");
|
|
278
|
+
* console.log(similarity); // ~0.961
|
|
279
|
+
* ```
|
|
280
|
+
* @param {string} a
|
|
281
|
+
* @param {string} b
|
|
282
|
+
* @returns {number}
|
|
283
|
+
*/
|
|
284
|
+
export function jaroWinkler(a, b) {
|
|
285
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
286
|
+
const len0 = WASM_VECTOR_LEN;
|
|
287
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
288
|
+
const len1 = WASM_VECTOR_LEN;
|
|
289
|
+
const ret = wasm.jaroWinkler(ptr0, len0, ptr1, len1);
|
|
290
|
+
return ret;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Compute the Hamming distance between two SimHash values.
|
|
295
|
+
*
|
|
296
|
+
* Returns the number of differing bits. Lower values = higher similarity.
|
|
297
|
+
*
|
|
298
|
+
* # JavaScript Example
|
|
299
|
+
*
|
|
300
|
+
* ```javascript
|
|
301
|
+
* import { simhash, simhashDistance } from 'elid';
|
|
302
|
+
*
|
|
303
|
+
* const hash1 = simhash("iPhone 14");
|
|
304
|
+
* const hash2 = simhash("iPhone 15");
|
|
305
|
+
* const distance = simhashDistance(hash1, hash2);
|
|
306
|
+
*
|
|
307
|
+
* console.log(distance); // Low number = similar
|
|
308
|
+
* ```
|
|
309
|
+
* @param {number} hash1
|
|
310
|
+
* @param {number} hash2
|
|
311
|
+
* @returns {number}
|
|
312
|
+
*/
|
|
313
|
+
export function simhashDistance(hash1, hash2) {
|
|
314
|
+
const ret = wasm.simhashDistance(hash1, hash2);
|
|
315
|
+
return ret >>> 0;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Compute the normalized SimHash similarity between two strings.
|
|
320
|
+
*
|
|
321
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
322
|
+
*
|
|
323
|
+
* # JavaScript Example
|
|
324
|
+
*
|
|
325
|
+
* ```javascript
|
|
326
|
+
* import { simhashSimilarity } from 'elid';
|
|
327
|
+
*
|
|
328
|
+
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
329
|
+
* console.log(similarity); // ~0.9 (very similar)
|
|
330
|
+
*
|
|
331
|
+
* const similarity2 = simhashSimilarity("iPhone", "Galaxy");
|
|
332
|
+
* console.log(similarity2); // ~0.4 (different)
|
|
333
|
+
* ```
|
|
334
|
+
* @param {string} a
|
|
335
|
+
* @param {string} b
|
|
336
|
+
* @returns {number}
|
|
337
|
+
*/
|
|
338
|
+
export function simhashSimilarity(a, b) {
|
|
339
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
340
|
+
const len0 = WASM_VECTOR_LEN;
|
|
341
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
342
|
+
const len1 = WASM_VECTOR_LEN;
|
|
343
|
+
const ret = wasm.simhashSimilarity(ptr0, len0, ptr1, len1);
|
|
344
|
+
return ret;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function _assertClass(instance, klass) {
|
|
348
|
+
if (!(instance instanceof klass)) {
|
|
349
|
+
throw new Error(`expected instance of ${klass.name}`);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* Compute Levenshtein distance with custom options.
|
|
354
|
+
*
|
|
355
|
+
* # JavaScript Example
|
|
356
|
+
*
|
|
357
|
+
* ```javascript
|
|
358
|
+
* import { levenshteinWithOpts, SimilarityOptions } from 'elid';
|
|
359
|
+
*
|
|
360
|
+
* const opts = new SimilarityOptions();
|
|
361
|
+
* opts.setCaseSensitive(false);
|
|
362
|
+
* opts.setTrimWhitespace(true);
|
|
363
|
+
*
|
|
364
|
+
* const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
|
|
365
|
+
* console.log(distance); // 0
|
|
366
|
+
* ```
|
|
367
|
+
* @param {string} a
|
|
368
|
+
* @param {string} b
|
|
369
|
+
* @param {SimilarityOptions} opts
|
|
370
|
+
* @returns {number}
|
|
371
|
+
*/
|
|
372
|
+
export function levenshteinWithOpts(a, b, opts) {
|
|
373
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
374
|
+
const len0 = WASM_VECTOR_LEN;
|
|
375
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
376
|
+
const len1 = WASM_VECTOR_LEN;
|
|
377
|
+
_assertClass(opts, SimilarityOptions);
|
|
378
|
+
var ptr2 = opts.__destroy_into_raw();
|
|
379
|
+
const ret = wasm.levenshteinWithOpts(ptr0, len0, ptr1, len1, ptr2);
|
|
380
|
+
return ret >>> 0;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
/**
|
|
384
|
+
* Compute the Hamming distance between two strings.
|
|
385
|
+
*
|
|
386
|
+
* Returns the number of positions at which the characters differ.
|
|
387
|
+
* Returns null if strings have different lengths.
|
|
388
|
+
*
|
|
389
|
+
* # JavaScript Example
|
|
390
|
+
*
|
|
391
|
+
* ```javascript
|
|
392
|
+
* import { hamming } from 'elid';
|
|
393
|
+
*
|
|
394
|
+
* const distance = hamming("karolin", "kathrin");
|
|
395
|
+
* console.log(distance); // 3
|
|
396
|
+
*
|
|
397
|
+
* const invalid = hamming("hello", "world!");
|
|
398
|
+
* console.log(invalid); // null
|
|
399
|
+
* ```
|
|
400
|
+
* @param {string} a
|
|
401
|
+
* @param {string} b
|
|
402
|
+
* @returns {number | undefined}
|
|
403
|
+
*/
|
|
404
|
+
export function hamming(a, b) {
|
|
405
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
406
|
+
const len0 = WASM_VECTOR_LEN;
|
|
407
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
408
|
+
const len1 = WASM_VECTOR_LEN;
|
|
409
|
+
const ret = wasm.hamming(ptr0, len0, ptr1, len1);
|
|
410
|
+
return ret === 0x100000001 ? undefined : ret;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/**
|
|
414
|
+
* Compute the Jaro similarity between two strings.
|
|
415
|
+
*
|
|
416
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
417
|
+
* Particularly effective for short strings like names.
|
|
418
|
+
*
|
|
419
|
+
* # JavaScript Example
|
|
420
|
+
*
|
|
421
|
+
* ```javascript
|
|
422
|
+
* import { jaro } from 'elid';
|
|
423
|
+
*
|
|
424
|
+
* const similarity = jaro("martha", "marhta");
|
|
425
|
+
* console.log(similarity); // ~0.944
|
|
426
|
+
* ```
|
|
427
|
+
* @param {string} a
|
|
428
|
+
* @param {string} b
|
|
429
|
+
* @returns {number}
|
|
430
|
+
*/
|
|
431
|
+
export function jaro(a, b) {
|
|
432
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
433
|
+
const len0 = WASM_VECTOR_LEN;
|
|
434
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
435
|
+
const len1 = WASM_VECTOR_LEN;
|
|
436
|
+
const ret = wasm.jaro(ptr0, len0, ptr1, len1);
|
|
437
|
+
return ret;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
/**
|
|
441
|
+
* Compute the Levenshtein distance between two strings.
|
|
442
|
+
*
|
|
443
|
+
* Returns the minimum number of single-character edits needed to transform one string into another.
|
|
444
|
+
*
|
|
445
|
+
* # JavaScript Example
|
|
446
|
+
*
|
|
447
|
+
* ```javascript
|
|
448
|
+
* import { levenshtein } from 'elid';
|
|
449
|
+
*
|
|
450
|
+
* const distance = levenshtein("kitten", "sitting");
|
|
451
|
+
* console.log(distance); // 3
|
|
452
|
+
* ```
|
|
453
|
+
* @param {string} a
|
|
454
|
+
* @param {string} b
|
|
455
|
+
* @returns {number}
|
|
456
|
+
*/
|
|
457
|
+
export function levenshtein(a, b) {
|
|
458
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
459
|
+
const len0 = WASM_VECTOR_LEN;
|
|
460
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
461
|
+
const len1 = WASM_VECTOR_LEN;
|
|
462
|
+
const ret = wasm.levenshtein(ptr0, len0, ptr1, len1);
|
|
463
|
+
return ret >>> 0;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
/**
|
|
467
|
+
* Compute the OSA (Optimal String Alignment) distance between two strings.
|
|
468
|
+
*
|
|
469
|
+
* Similar to Levenshtein but also considers transpositions as a single operation.
|
|
470
|
+
*
|
|
471
|
+
* # JavaScript Example
|
|
472
|
+
*
|
|
473
|
+
* ```javascript
|
|
474
|
+
* import { osaDistance } from 'elid';
|
|
475
|
+
*
|
|
476
|
+
* const distance = osaDistance("ca", "ac");
|
|
477
|
+
* console.log(distance); // 1 (transposition)
|
|
478
|
+
* ```
|
|
479
|
+
* @param {string} a
|
|
480
|
+
* @param {string} b
|
|
481
|
+
* @returns {number}
|
|
482
|
+
*/
|
|
483
|
+
export function osaDistance(a, b) {
|
|
484
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
485
|
+
const len0 = WASM_VECTOR_LEN;
|
|
486
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
487
|
+
const len1 = WASM_VECTOR_LEN;
|
|
488
|
+
const ret = wasm.osaDistance(ptr0, len0, ptr1, len1);
|
|
489
|
+
return ret >>> 0;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
function passArrayJsValueToWasm0(array, malloc) {
|
|
493
|
+
const ptr = malloc(array.length * 4, 4) >>> 0;
|
|
494
|
+
for (let i = 0; i < array.length; i++) {
|
|
495
|
+
const add = addToExternrefTable0(array[i]);
|
|
496
|
+
getDataViewMemory0().setUint32(ptr + 4 * i, add, true);
|
|
497
|
+
}
|
|
498
|
+
WASM_VECTOR_LEN = array.length;
|
|
499
|
+
return ptr;
|
|
500
|
+
}
|
|
501
|
+
/**
|
|
502
|
+
* Find all matches above a threshold score.
|
|
503
|
+
*
|
|
504
|
+
* Returns an array of objects with index and score for all candidates above the threshold.
|
|
505
|
+
*
|
|
506
|
+
* # JavaScript Example
|
|
507
|
+
*
|
|
508
|
+
* ```javascript
|
|
509
|
+
* import { findMatchesAboveThreshold } from 'elid';
|
|
510
|
+
*
|
|
511
|
+
* const candidates = ["apple", "application", "apply", "banana"];
|
|
512
|
+
* const matches = findMatchesAboveThreshold("app", candidates, 0.5);
|
|
513
|
+
* console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
|
|
514
|
+
* ```
|
|
515
|
+
* @param {string} query
|
|
516
|
+
* @param {string[]} candidates
|
|
517
|
+
* @param {number} threshold
|
|
518
|
+
* @returns {any}
|
|
519
|
+
*/
|
|
520
|
+
export function findMatchesAboveThreshold(query, candidates, threshold) {
|
|
521
|
+
const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
522
|
+
const len0 = WASM_VECTOR_LEN;
|
|
523
|
+
const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
|
|
524
|
+
const len1 = WASM_VECTOR_LEN;
|
|
525
|
+
const ret = wasm.findMatchesAboveThreshold(ptr0, len0, ptr1, len1, threshold);
|
|
526
|
+
return ret;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
let cachedFloat64ArrayMemory0 = null;
|
|
530
|
+
|
|
531
|
+
function getFloat64ArrayMemory0() {
|
|
532
|
+
if (cachedFloat64ArrayMemory0 === null || cachedFloat64ArrayMemory0.byteLength === 0) {
|
|
533
|
+
cachedFloat64ArrayMemory0 = new Float64Array(wasm.memory.buffer);
|
|
534
|
+
}
|
|
535
|
+
return cachedFloat64ArrayMemory0;
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
function passArrayF64ToWasm0(arg, malloc) {
|
|
539
|
+
const ptr = malloc(arg.length * 8, 8) >>> 0;
|
|
540
|
+
getFloat64ArrayMemory0().set(arg, ptr / 8);
|
|
541
|
+
WASM_VECTOR_LEN = arg.length;
|
|
542
|
+
return ptr;
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
let cachedUint32ArrayMemory0 = null;
|
|
546
|
+
|
|
547
|
+
function getUint32ArrayMemory0() {
|
|
548
|
+
if (cachedUint32ArrayMemory0 === null || cachedUint32ArrayMemory0.byteLength === 0) {
|
|
549
|
+
cachedUint32ArrayMemory0 = new Uint32Array(wasm.memory.buffer);
|
|
550
|
+
}
|
|
551
|
+
return cachedUint32ArrayMemory0;
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
function getArrayU32FromWasm0(ptr, len) {
|
|
555
|
+
ptr = ptr >>> 0;
|
|
556
|
+
return getUint32ArrayMemory0().subarray(ptr / 4, ptr / 4 + len);
|
|
557
|
+
}
|
|
558
|
+
/**
|
|
559
|
+
* Find all hashes within a given distance threshold.
|
|
560
|
+
*
|
|
561
|
+
* Useful for database queries - pre-compute hashes, then find similar ones.
|
|
562
|
+
*
|
|
563
|
+
* # JavaScript Example
|
|
564
|
+
*
|
|
565
|
+
* ```javascript
|
|
566
|
+
* import { simhash, findSimilarHashes } from 'elid';
|
|
567
|
+
*
|
|
568
|
+
* const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
|
|
569
|
+
* const hashes = candidates.map(s => simhash(s));
|
|
570
|
+
*
|
|
571
|
+
* const queryHash = simhash("iPhone 14");
|
|
572
|
+
* const matches = findSimilarHashes(queryHash, hashes, 10);
|
|
573
|
+
*
|
|
574
|
+
* console.log(matches); // [0, 1] - indices of similar items
|
|
575
|
+
* ```
|
|
576
|
+
* @param {number} query_hash
|
|
577
|
+
* @param {Float64Array} candidate_hashes
|
|
578
|
+
* @param {number} max_distance
|
|
579
|
+
* @returns {Uint32Array}
|
|
580
|
+
*/
|
|
581
|
+
export function findSimilarHashes(query_hash, candidate_hashes, max_distance) {
|
|
582
|
+
const ptr0 = passArrayF64ToWasm0(candidate_hashes, wasm.__wbindgen_malloc);
|
|
583
|
+
const len0 = WASM_VECTOR_LEN;
|
|
584
|
+
const ret = wasm.findSimilarHashes(query_hash, ptr0, len0, max_distance);
|
|
585
|
+
var v2 = getArrayU32FromWasm0(ret[0], ret[1]).slice();
|
|
586
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
|
587
|
+
return v2;
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
/**
|
|
591
|
+
* Find the best match for a query string in an array of candidates.
|
|
592
|
+
*
|
|
593
|
+
* Returns an object with the index and similarity score of the best match.
|
|
594
|
+
*
|
|
595
|
+
* # JavaScript Example
|
|
596
|
+
*
|
|
597
|
+
* ```javascript
|
|
598
|
+
* import { findBestMatch } from 'elid';
|
|
599
|
+
*
|
|
600
|
+
* const candidates = ["apple", "application", "apply"];
|
|
601
|
+
* const result = findBestMatch("app", candidates);
|
|
602
|
+
* console.log(result); // { index: 0, score: 0.907 }
|
|
603
|
+
* ```
|
|
604
|
+
* @param {string} query
|
|
605
|
+
* @param {string[]} candidates
|
|
606
|
+
* @returns {object}
|
|
607
|
+
*/
|
|
608
|
+
export function findBestMatch(query, candidates) {
|
|
609
|
+
const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
610
|
+
const len0 = WASM_VECTOR_LEN;
|
|
611
|
+
const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
|
|
612
|
+
const len1 = WASM_VECTOR_LEN;
|
|
613
|
+
const ret = wasm.findBestMatch(ptr0, len0, ptr1, len1);
|
|
614
|
+
return ret;
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
const SimilarityOptionsFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
618
|
+
? { register: () => {}, unregister: () => {} }
|
|
619
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_similarityoptions_free(ptr >>> 0, 1));
|
|
620
|
+
/**
|
|
621
|
+
* Options for configuring string similarity algorithms
|
|
622
|
+
*/
|
|
623
|
+
export class SimilarityOptions {
|
|
624
|
+
|
|
625
|
+
__destroy_into_raw() {
|
|
626
|
+
const ptr = this.__wbg_ptr;
|
|
627
|
+
this.__wbg_ptr = 0;
|
|
628
|
+
SimilarityOptionsFinalization.unregister(this);
|
|
629
|
+
return ptr;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
free() {
|
|
633
|
+
const ptr = this.__destroy_into_raw();
|
|
634
|
+
wasm.__wbg_similarityoptions_free(ptr, 0);
|
|
635
|
+
}
|
|
636
|
+
/**
|
|
637
|
+
* Set prefix scale
|
|
638
|
+
* @param {number} value
|
|
639
|
+
*/
|
|
640
|
+
setPrefixScale(value) {
|
|
641
|
+
wasm.similarityoptions_setPrefixScale(this.__wbg_ptr, value);
|
|
642
|
+
}
|
|
643
|
+
/**
|
|
644
|
+
* Set case sensitivity
|
|
645
|
+
* @param {boolean} value
|
|
646
|
+
*/
|
|
647
|
+
setCaseSensitive(value) {
|
|
648
|
+
wasm.similarityoptions_setCaseSensitive(this.__wbg_ptr, value);
|
|
649
|
+
}
|
|
650
|
+
/**
|
|
651
|
+
* Set whitespace trimming
|
|
652
|
+
* @param {boolean} value
|
|
653
|
+
*/
|
|
654
|
+
setTrimWhitespace(value) {
|
|
655
|
+
wasm.similarityoptions_setTrimWhitespace(this.__wbg_ptr, value);
|
|
656
|
+
}
|
|
657
|
+
/**
|
|
658
|
+
* Create a new SimilarityOptions with default values
|
|
659
|
+
*/
|
|
660
|
+
constructor() {
|
|
661
|
+
const ret = wasm.similarityoptions_new();
|
|
662
|
+
this.__wbg_ptr = ret >>> 0;
|
|
663
|
+
SimilarityOptionsFinalization.register(this, this.__wbg_ptr, this);
|
|
664
|
+
return this;
|
|
665
|
+
}
|
|
666
|
+
/**
|
|
667
|
+
* Case-sensitive comparison (default: true)
|
|
668
|
+
* @returns {boolean}
|
|
669
|
+
*/
|
|
670
|
+
get case_sensitive() {
|
|
671
|
+
const ret = wasm.__wbg_get_similarityoptions_case_sensitive(this.__wbg_ptr);
|
|
672
|
+
return ret !== 0;
|
|
673
|
+
}
|
|
674
|
+
/**
|
|
675
|
+
* Case-sensitive comparison (default: true)
|
|
676
|
+
* @param {boolean} arg0
|
|
677
|
+
*/
|
|
678
|
+
set case_sensitive(arg0) {
|
|
679
|
+
wasm.__wbg_set_similarityoptions_case_sensitive(this.__wbg_ptr, arg0);
|
|
680
|
+
}
|
|
681
|
+
/**
|
|
682
|
+
* Trim whitespace before comparison (default: false)
|
|
683
|
+
* @returns {boolean}
|
|
684
|
+
*/
|
|
685
|
+
get trim_whitespace() {
|
|
686
|
+
const ret = wasm.__wbg_get_similarityoptions_trim_whitespace(this.__wbg_ptr);
|
|
687
|
+
return ret !== 0;
|
|
688
|
+
}
|
|
689
|
+
/**
|
|
690
|
+
* Trim whitespace before comparison (default: false)
|
|
691
|
+
* @param {boolean} arg0
|
|
692
|
+
*/
|
|
693
|
+
set trim_whitespace(arg0) {
|
|
694
|
+
wasm.__wbg_set_similarityoptions_trim_whitespace(this.__wbg_ptr, arg0);
|
|
695
|
+
}
|
|
696
|
+
/**
|
|
697
|
+
* Prefix scale for Jaro-Winkler (default: 0.1, max: 0.25)
|
|
698
|
+
* @returns {number}
|
|
699
|
+
*/
|
|
700
|
+
get prefix_scale() {
|
|
701
|
+
const ret = wasm.__wbg_get_similarityoptions_prefix_scale(this.__wbg_ptr);
|
|
702
|
+
return ret;
|
|
703
|
+
}
|
|
704
|
+
/**
|
|
705
|
+
* Prefix scale for Jaro-Winkler (default: 0.1, max: 0.25)
|
|
706
|
+
* @param {number} arg0
|
|
707
|
+
*/
|
|
708
|
+
set prefix_scale(arg0) {
|
|
709
|
+
wasm.__wbg_set_similarityoptions_prefix_scale(this.__wbg_ptr, arg0);
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
if (Symbol.dispose) SimilarityOptions.prototype[Symbol.dispose] = SimilarityOptions.prototype.free;
|
|
713
|
+
|
|
714
|
+
export function __wbg_Error_e83987f665cf5504(arg0, arg1) {
|
|
715
|
+
const ret = Error(getStringFromWasm0(arg0, arg1));
|
|
716
|
+
return ret;
|
|
717
|
+
};
|
|
718
|
+
|
|
719
|
+
export function __wbg___wbindgen_debug_string_df47ffb5e35e6763(arg0, arg1) {
|
|
720
|
+
const ret = debugString(arg1);
|
|
721
|
+
const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
722
|
+
const len1 = WASM_VECTOR_LEN;
|
|
723
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
|
724
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
|
725
|
+
};
|
|
726
|
+
|
|
727
|
+
export function __wbg___wbindgen_is_string_fbb76cb2940daafd(arg0) {
|
|
728
|
+
const ret = typeof(arg0) === 'string';
|
|
729
|
+
return ret;
|
|
730
|
+
};
|
|
731
|
+
|
|
732
|
+
export function __wbg___wbindgen_string_get_e4f06c90489ad01b(arg0, arg1) {
|
|
733
|
+
const obj = arg1;
|
|
734
|
+
const ret = typeof(obj) === 'string' ? obj : undefined;
|
|
735
|
+
var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
736
|
+
var len1 = WASM_VECTOR_LEN;
|
|
737
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
|
738
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
|
739
|
+
};
|
|
740
|
+
|
|
741
|
+
export function __wbg___wbindgen_throw_b855445ff6a94295(arg0, arg1) {
|
|
742
|
+
throw new Error(getStringFromWasm0(arg0, arg1));
|
|
743
|
+
};
|
|
744
|
+
|
|
745
|
+
export function __wbg_new_1acc0b6eea89d040() {
|
|
746
|
+
const ret = new Object();
|
|
747
|
+
return ret;
|
|
748
|
+
};
|
|
749
|
+
|
|
750
|
+
export function __wbg_new_68651c719dcda04e() {
|
|
751
|
+
const ret = new Map();
|
|
752
|
+
return ret;
|
|
753
|
+
};
|
|
754
|
+
|
|
755
|
+
export function __wbg_new_e17d9f43105b08be() {
|
|
756
|
+
const ret = new Array();
|
|
757
|
+
return ret;
|
|
758
|
+
};
|
|
759
|
+
|
|
760
|
+
export function __wbg_set_3f1d0b984ed272ed(arg0, arg1, arg2) {
|
|
761
|
+
arg0[arg1] = arg2;
|
|
762
|
+
};
|
|
763
|
+
|
|
764
|
+
export function __wbg_set_907fb406c34a251d(arg0, arg1, arg2) {
|
|
765
|
+
const ret = arg0.set(arg1, arg2);
|
|
766
|
+
return ret;
|
|
767
|
+
};
|
|
768
|
+
|
|
769
|
+
export function __wbg_set_c213c871859d6500(arg0, arg1, arg2) {
|
|
770
|
+
arg0[arg1 >>> 0] = arg2;
|
|
771
|
+
};
|
|
772
|
+
|
|
773
|
+
export function __wbg_set_c2abbebe8b9ebee1() { return handleError(function (arg0, arg1, arg2) {
|
|
774
|
+
const ret = Reflect.set(arg0, arg1, arg2);
|
|
775
|
+
return ret;
|
|
776
|
+
}, arguments) };
|
|
777
|
+
|
|
778
|
+
export function __wbindgen_cast_2241b6af4c4b2941(arg0, arg1) {
|
|
779
|
+
// Cast intrinsic for `Ref(String) -> Externref`.
|
|
780
|
+
const ret = getStringFromWasm0(arg0, arg1);
|
|
781
|
+
return ret;
|
|
782
|
+
};
|
|
783
|
+
|
|
784
|
+
export function __wbindgen_cast_4625c577ab2ec9ee(arg0) {
|
|
785
|
+
// Cast intrinsic for `U64 -> Externref`.
|
|
786
|
+
const ret = BigInt.asUintN(64, arg0);
|
|
787
|
+
return ret;
|
|
788
|
+
};
|
|
789
|
+
|
|
790
|
+
export function __wbindgen_cast_9ae0607507abb057(arg0) {
|
|
791
|
+
// Cast intrinsic for `I64 -> Externref`.
|
|
792
|
+
const ret = arg0;
|
|
793
|
+
return ret;
|
|
794
|
+
};
|
|
795
|
+
|
|
796
|
+
export function __wbindgen_cast_d6cd19b81560fd6e(arg0) {
|
|
797
|
+
// Cast intrinsic for `F64 -> Externref`.
|
|
798
|
+
const ret = arg0;
|
|
799
|
+
return ret;
|
|
800
|
+
};
|
|
801
|
+
|
|
802
|
+
export function __wbindgen_init_externref_table() {
|
|
803
|
+
const table = wasm.__wbindgen_externrefs;
|
|
804
|
+
const offset = table.grow(4);
|
|
805
|
+
table.set(0, undefined);
|
|
806
|
+
table.set(offset + 0, undefined);
|
|
807
|
+
table.set(offset + 1, null);
|
|
808
|
+
table.set(offset + 2, true);
|
|
809
|
+
table.set(offset + 3, false);
|
|
810
|
+
;
|
|
811
|
+
};
|
|
812
|
+
|
package/elid_bg.wasm
ADDED
|
Binary file
|
package/package.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "elid",
|
|
3
|
+
"collaborators": [
|
|
4
|
+
"ELID Contributors"
|
|
5
|
+
],
|
|
6
|
+
"description": "A fast and efficient string similarity library",
|
|
7
|
+
"version": "0.1.0",
|
|
8
|
+
"license": "MIT OR Apache-2.0",
|
|
9
|
+
"repository": {
|
|
10
|
+
"type": "git",
|
|
11
|
+
"url": "https://forge.blackleafdigital.com/BlackLeafDigital/ELID"
|
|
12
|
+
},
|
|
13
|
+
"files": [
|
|
14
|
+
"elid_bg.wasm",
|
|
15
|
+
"elid.js",
|
|
16
|
+
"elid_bg.js",
|
|
17
|
+
"elid.d.ts"
|
|
18
|
+
],
|
|
19
|
+
"module": "elid.js",
|
|
20
|
+
"types": "elid.d.ts",
|
|
21
|
+
"sideEffects": [
|
|
22
|
+
"./elid.js",
|
|
23
|
+
"./snippets/*"
|
|
24
|
+
],
|
|
25
|
+
"keywords": [
|
|
26
|
+
"string",
|
|
27
|
+
"similarity",
|
|
28
|
+
"levenshtein",
|
|
29
|
+
"fuzzy",
|
|
30
|
+
"distance"
|
|
31
|
+
]
|
|
32
|
+
}
|