elid 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/elid.d.ts +265 -265
- package/elid_bg.js +467 -467
- package/elid_bg.wasm +0 -0
- package/package.json +1 -1
package/elid.d.ts
CHANGED
|
@@ -1,35 +1,28 @@
|
|
|
1
1
|
/* tslint:disable */
|
|
2
2
|
/* eslint-disable */
|
|
3
3
|
/**
|
|
4
|
-
* Compute the
|
|
4
|
+
* Compute the SimHash fingerprint of a string.
|
|
5
5
|
*
|
|
6
|
-
*
|
|
6
|
+
* Returns a 64-bit hash where similar strings produce similar numbers.
|
|
7
|
+
* Use this for database queries by storing the hash and querying by numeric range.
|
|
7
8
|
*
|
|
8
9
|
* # JavaScript Example
|
|
9
10
|
*
|
|
10
11
|
* ```javascript
|
|
11
|
-
* import {
|
|
12
|
-
*
|
|
13
|
-
* const distance = osaDistance("ca", "ac");
|
|
14
|
-
* console.log(distance); // 1 (transposition)
|
|
15
|
-
* ```
|
|
16
|
-
*/
|
|
17
|
-
export function osaDistance(a: string, b: string): number;
|
|
18
|
-
/**
|
|
19
|
-
* Compute the Levenshtein distance between two strings.
|
|
20
|
-
*
|
|
21
|
-
* Returns the minimum number of single-character edits needed to transform one string into another.
|
|
12
|
+
* import { simhash } from 'elid';
|
|
22
13
|
*
|
|
23
|
-
*
|
|
14
|
+
* const hash1 = simhash("iPhone 14");
|
|
15
|
+
* const hash2 = simhash("iPhone 15");
|
|
16
|
+
* const hash3 = simhash("Galaxy S23");
|
|
24
17
|
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
18
|
+
* // hash1 and hash2 will be numerically close
|
|
19
|
+
* // hash3 will be numerically distant
|
|
27
20
|
*
|
|
28
|
-
*
|
|
29
|
-
*
|
|
21
|
+
* // Store in database as bigint:
|
|
22
|
+
* // { name: "iPhone 14", simhash: hash1 }
|
|
30
23
|
* ```
|
|
31
24
|
*/
|
|
32
|
-
export function
|
|
25
|
+
export function simhash(text: string): number;
|
|
33
26
|
/**
|
|
34
27
|
* Find all matches above a threshold score.
|
|
35
28
|
*
|
|
@@ -47,130 +40,186 @@ export function levenshtein(a: string, b: string): number;
|
|
|
47
40
|
*/
|
|
48
41
|
export function findMatchesAboveThreshold(query: string, candidates: string[], threshold: number): any;
|
|
49
42
|
/**
|
|
50
|
-
*
|
|
43
|
+
* Encode an embedding for cross-dimensional comparison.
|
|
51
44
|
*
|
|
52
|
-
*
|
|
45
|
+
* Projects the embedding to a common dimension space, allowing comparison
|
|
46
|
+
* between embeddings of different original dimensions (e.g., 256d vs 768d).
|
|
47
|
+
*
|
|
48
|
+
* # Parameters
|
|
49
|
+
*
|
|
50
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
51
|
+
* - `common_dims`: Target dimension space (all vectors projected here)
|
|
52
|
+
*
|
|
53
|
+
* # Returns
|
|
54
|
+
*
|
|
55
|
+
* A base32hex-encoded ELID string.
|
|
53
56
|
*
|
|
54
57
|
* # JavaScript Example
|
|
55
58
|
*
|
|
56
59
|
* ```javascript
|
|
57
|
-
* import {
|
|
60
|
+
* import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
|
|
58
61
|
*
|
|
59
|
-
*
|
|
60
|
-
* const
|
|
61
|
-
* const
|
|
62
|
+
* // Different sized embeddings from different models
|
|
63
|
+
* const embedding256 = new Float64Array(256).fill(0.1);
|
|
64
|
+
* const embedding768 = new Float64Array(768).fill(0.1);
|
|
62
65
|
*
|
|
63
|
-
*
|
|
66
|
+
* // Project both to 128-dim common space
|
|
67
|
+
* const elid1 = encodeElidCrossDimensional(embedding256, 128);
|
|
68
|
+
* const elid2 = encodeElidCrossDimensional(embedding768, 128);
|
|
69
|
+
*
|
|
70
|
+
* // Now they can be compared directly (both decode to 128 dims)
|
|
71
|
+
* const dec1 = decodeElidToEmbedding(elid1);
|
|
72
|
+
* const dec2 = decodeElidToEmbedding(elid2);
|
|
73
|
+
* // Both have length 128
|
|
64
74
|
* ```
|
|
65
75
|
*/
|
|
66
|
-
export function
|
|
76
|
+
export function encodeElidCrossDimensional(embedding: Float64Array, common_dims: number): string;
|
|
67
77
|
/**
|
|
68
|
-
*
|
|
78
|
+
* Find all hashes within a given distance threshold.
|
|
69
79
|
*
|
|
70
|
-
*
|
|
71
|
-
*
|
|
80
|
+
* Useful for database queries - pre-compute hashes, then find similar ones.
|
|
81
|
+
*
|
|
82
|
+
* # JavaScript Example
|
|
83
|
+
*
|
|
84
|
+
* ```javascript
|
|
85
|
+
* import { simhash, findSimilarHashes } from 'elid';
|
|
86
|
+
*
|
|
87
|
+
* const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
|
|
88
|
+
* const hashes = candidates.map(s => simhash(s));
|
|
89
|
+
*
|
|
90
|
+
* const queryHash = simhash("iPhone 14");
|
|
91
|
+
* const matches = findSimilarHashes(queryHash, hashes, 10);
|
|
92
|
+
*
|
|
93
|
+
* console.log(matches); // [0, 1] - indices of similar items
|
|
94
|
+
* ```
|
|
95
|
+
*/
|
|
96
|
+
export function findSimilarHashes(query_hash: number, candidate_hashes: Float64Array, max_distance: number): Uint32Array;
|
|
97
|
+
/**
|
|
98
|
+
* Compute the Jaro similarity between two strings.
|
|
99
|
+
*
|
|
100
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
101
|
+
* Particularly effective for short strings like names.
|
|
102
|
+
*
|
|
103
|
+
* # JavaScript Example
|
|
104
|
+
*
|
|
105
|
+
* ```javascript
|
|
106
|
+
* import { jaro } from 'elid';
|
|
107
|
+
*
|
|
108
|
+
* const similarity = jaro("martha", "marhta");
|
|
109
|
+
* console.log(similarity); // ~0.944
|
|
110
|
+
* ```
|
|
111
|
+
*/
|
|
112
|
+
export function jaro(a: string, b: string): number;
|
|
113
|
+
/**
|
|
114
|
+
* Compute the best matching similarity between two strings.
|
|
115
|
+
*
|
|
116
|
+
* Runs multiple algorithms and returns the highest score.
|
|
117
|
+
*
|
|
118
|
+
* # JavaScript Example
|
|
119
|
+
*
|
|
120
|
+
* ```javascript
|
|
121
|
+
* import { bestMatch } from 'elid';
|
|
122
|
+
*
|
|
123
|
+
* const score = bestMatch("hello", "hallo");
|
|
124
|
+
* console.log(score); // ~0.8
|
|
125
|
+
* ```
|
|
126
|
+
*/
|
|
127
|
+
export function bestMatch(a: string, b: string): number;
|
|
128
|
+
/**
|
|
129
|
+
* Encode an embedding vector to an ELID string.
|
|
130
|
+
*
|
|
131
|
+
* Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
|
|
132
|
+
* sortable identifier. The ELID preserves locality properties for efficient
|
|
133
|
+
* similarity search.
|
|
72
134
|
*
|
|
73
135
|
* # Parameters
|
|
74
136
|
*
|
|
75
137
|
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
76
|
-
* - `
|
|
138
|
+
* - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
|
|
77
139
|
*
|
|
78
140
|
* # Returns
|
|
79
141
|
*
|
|
80
|
-
* A base32hex-encoded ELID string
|
|
142
|
+
* A base32hex-encoded ELID string suitable for storage and comparison.
|
|
81
143
|
*
|
|
82
144
|
* # JavaScript Example
|
|
83
145
|
*
|
|
84
146
|
* ```javascript
|
|
85
|
-
* import {
|
|
86
|
-
*
|
|
87
|
-
* const embedding = new Float64Array(768).fill(0.1);
|
|
88
|
-
*
|
|
89
|
-
* // Fit in 100 characters (e.g., for database column constraints)
|
|
90
|
-
* const elid = encodeElidMaxLength(embedding, 100);
|
|
91
|
-
* console.log(elid.length <= 100); // true
|
|
147
|
+
* import { encodeElid, ElidProfile } from 'elid';
|
|
92
148
|
*
|
|
93
|
-
* //
|
|
94
|
-
* const
|
|
149
|
+
* // OpenAI embeddings are 1536 dimensions
|
|
150
|
+
* const embedding = await getEmbedding("Hello world");
|
|
151
|
+
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
152
|
+
* console.log(elid); // "012345abcdef..."
|
|
95
153
|
* ```
|
|
96
154
|
*/
|
|
97
|
-
export function
|
|
155
|
+
export function encodeElid(embedding: Float64Array, profile: ElidProfile): string;
|
|
98
156
|
/**
|
|
99
|
-
* Compute
|
|
157
|
+
* Compute the normalized SimHash similarity between two strings.
|
|
158
|
+
*
|
|
159
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
100
160
|
*
|
|
101
161
|
* # JavaScript Example
|
|
102
162
|
*
|
|
103
163
|
* ```javascript
|
|
104
|
-
* import {
|
|
164
|
+
* import { simhashSimilarity } from 'elid';
|
|
105
165
|
*
|
|
106
|
-
* const
|
|
107
|
-
*
|
|
108
|
-
* opts.setTrimWhitespace(true);
|
|
166
|
+
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
167
|
+
* console.log(similarity); // ~0.9 (very similar)
|
|
109
168
|
*
|
|
110
|
-
* const
|
|
111
|
-
* console.log(
|
|
169
|
+
* const similarity2 = simhashSimilarity("iPhone", "Galaxy");
|
|
170
|
+
* console.log(similarity2); // ~0.4 (different)
|
|
112
171
|
* ```
|
|
113
172
|
*/
|
|
114
|
-
export function
|
|
173
|
+
export function simhashSimilarity(a: string, b: string): number;
|
|
115
174
|
/**
|
|
116
|
-
*
|
|
175
|
+
* Encode an embedding using lossless full vector encoding.
|
|
117
176
|
*
|
|
118
|
-
*
|
|
119
|
-
*
|
|
177
|
+
* Preserves the exact embedding values (32-bit float precision) and all dimensions.
|
|
178
|
+
* This produces the largest output but allows exact reconstruction.
|
|
120
179
|
*
|
|
121
180
|
* # Parameters
|
|
122
181
|
*
|
|
123
|
-
* - `
|
|
182
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
124
183
|
*
|
|
125
184
|
* # Returns
|
|
126
185
|
*
|
|
127
|
-
*
|
|
186
|
+
* A base32hex-encoded ELID string that can be decoded back to the original embedding.
|
|
128
187
|
*
|
|
129
188
|
* # JavaScript Example
|
|
130
189
|
*
|
|
131
190
|
* ```javascript
|
|
132
|
-
* import {
|
|
191
|
+
* import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
|
|
133
192
|
*
|
|
134
193
|
* const embedding = new Float64Array(768).fill(0.1);
|
|
194
|
+
* const elid = encodeElidLossless(embedding);
|
|
135
195
|
*
|
|
136
|
-
* //
|
|
137
|
-
* const
|
|
138
|
-
*
|
|
139
|
-
*
|
|
140
|
-
* // Lossless IS reversible
|
|
141
|
-
* const losslessElid = encodeElidLossless(embedding);
|
|
142
|
-
* console.log(isElidReversible(losslessElid)); // true
|
|
196
|
+
* // Later, recover the exact embedding
|
|
197
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
198
|
+
* // recovered is identical to embedding
|
|
143
199
|
* ```
|
|
144
200
|
*/
|
|
145
|
-
export function
|
|
201
|
+
export function encodeElidLossless(embedding: Float64Array): string;
|
|
146
202
|
/**
|
|
147
|
-
* Compute the
|
|
203
|
+
* Compute the normalized Levenshtein similarity between two strings.
|
|
148
204
|
*
|
|
149
|
-
* Returns a
|
|
150
|
-
* Use this for database queries by storing the hash and querying by numeric range.
|
|
205
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
151
206
|
*
|
|
152
207
|
* # JavaScript Example
|
|
153
208
|
*
|
|
154
209
|
* ```javascript
|
|
155
|
-
* import {
|
|
156
|
-
*
|
|
157
|
-
* const hash1 = simhash("iPhone 14");
|
|
158
|
-
* const hash2 = simhash("iPhone 15");
|
|
159
|
-
* const hash3 = simhash("Galaxy S23");
|
|
160
|
-
*
|
|
161
|
-
* // hash1 and hash2 will be numerically close
|
|
162
|
-
* // hash3 will be numerically distant
|
|
210
|
+
* import { normalizedLevenshtein } from 'elid';
|
|
163
211
|
*
|
|
164
|
-
*
|
|
165
|
-
* //
|
|
212
|
+
* const similarity = normalizedLevenshtein("hello", "hallo");
|
|
213
|
+
* console.log(similarity); // ~0.8
|
|
166
214
|
* ```
|
|
167
215
|
*/
|
|
168
|
-
export function
|
|
216
|
+
export function normalizedLevenshtein(a: string, b: string): number;
|
|
169
217
|
/**
|
|
170
|
-
* Decode an ELID string to
|
|
218
|
+
* Decode an ELID string back to an embedding vector.
|
|
171
219
|
*
|
|
172
|
-
*
|
|
173
|
-
*
|
|
220
|
+
* Only works for ELIDs encoded with a FullVector profile (lossless,
|
|
221
|
+
* compressed, or max_length). Returns null for non-reversible profiles
|
|
222
|
+
* like Mini128, Morton, or Hilbert.
|
|
174
223
|
*
|
|
175
224
|
* # Parameters
|
|
176
225
|
*
|
|
@@ -178,18 +227,27 @@ export function simhash(text: string): number;
|
|
|
178
227
|
*
|
|
179
228
|
* # Returns
|
|
180
229
|
*
|
|
181
|
-
* A
|
|
230
|
+
* A Float64Array containing the decoded embedding, or null if the ELID
|
|
231
|
+
* is not reversible.
|
|
232
|
+
*
|
|
233
|
+
* Note: If dimension reduction was used during encoding, the decoded
|
|
234
|
+
* embedding will be in the reduced dimension space, not the original.
|
|
182
235
|
*
|
|
183
236
|
* # JavaScript Example
|
|
184
237
|
*
|
|
185
238
|
* ```javascript
|
|
186
|
-
* import {
|
|
239
|
+
* import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
|
|
187
240
|
*
|
|
188
|
-
* const
|
|
189
|
-
*
|
|
241
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
242
|
+
* const elid = encodeElidLossless(embedding);
|
|
243
|
+
*
|
|
244
|
+
* if (isElidReversible(elid)) {
|
|
245
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
246
|
+
* console.log(recovered.length); // 768
|
|
247
|
+
* }
|
|
190
248
|
* ```
|
|
191
249
|
*/
|
|
192
|
-
export function
|
|
250
|
+
export function decodeElidToEmbedding(elid_str: string): any;
|
|
193
251
|
/**
|
|
194
252
|
* Get metadata about a FullVector ELID.
|
|
195
253
|
*
|
|
@@ -222,40 +280,92 @@ export function decodeElid(elid_str: string): Uint8Array;
|
|
|
222
280
|
*/
|
|
223
281
|
export function getElidMetadata(elid_str: string): any;
|
|
224
282
|
/**
|
|
225
|
-
*
|
|
283
|
+
* Decode an ELID string to raw bytes.
|
|
226
284
|
*
|
|
227
|
-
*
|
|
228
|
-
*
|
|
285
|
+
* Returns the raw byte representation of an ELID, including the header
|
|
286
|
+
* and payload bytes. Useful for custom processing or debugging.
|
|
287
|
+
*
|
|
288
|
+
* # Parameters
|
|
289
|
+
*
|
|
290
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
291
|
+
*
|
|
292
|
+
* # Returns
|
|
293
|
+
*
|
|
294
|
+
* A Uint8Array containing the raw bytes (header + payload).
|
|
295
|
+
*
|
|
296
|
+
* # JavaScript Example
|
|
297
|
+
*
|
|
298
|
+
* ```javascript
|
|
299
|
+
* import { decodeElid } from 'elid';
|
|
300
|
+
*
|
|
301
|
+
* const bytes = decodeElid("012345abcdef...");
|
|
302
|
+
* console.log(bytes); // Uint8Array [...]
|
|
303
|
+
* ```
|
|
304
|
+
*/
|
|
305
|
+
export function decodeElid(elid_str: string): Uint8Array;
|
|
306
|
+
/**
|
|
307
|
+
* Compute the Levenshtein distance between two strings.
|
|
308
|
+
*
|
|
309
|
+
* Returns the minimum number of single-character edits needed to transform one string into another.
|
|
310
|
+
*
|
|
311
|
+
* # JavaScript Example
|
|
312
|
+
*
|
|
313
|
+
* ```javascript
|
|
314
|
+
* import { levenshtein } from 'elid';
|
|
315
|
+
*
|
|
316
|
+
* const distance = levenshtein("kitten", "sitting");
|
|
317
|
+
* console.log(distance); // 3
|
|
318
|
+
* ```
|
|
319
|
+
*/
|
|
320
|
+
export function levenshtein(a: string, b: string): number;
|
|
321
|
+
/**
|
|
322
|
+
* Check if an ELID can be decoded back to an embedding.
|
|
323
|
+
*
|
|
324
|
+
* Returns true if the ELID was encoded with a FullVector profile
|
|
325
|
+
* (lossless, compressed, or max_length), false otherwise.
|
|
229
326
|
*
|
|
230
327
|
* # Parameters
|
|
231
328
|
*
|
|
232
|
-
* - `
|
|
233
|
-
* - `common_dims`: Target dimension space (all vectors projected here)
|
|
329
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
234
330
|
*
|
|
235
331
|
* # Returns
|
|
236
332
|
*
|
|
237
|
-
*
|
|
333
|
+
* `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
|
|
238
334
|
*
|
|
239
335
|
* # JavaScript Example
|
|
240
336
|
*
|
|
241
337
|
* ```javascript
|
|
242
|
-
* import {
|
|
338
|
+
* import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
|
|
243
339
|
*
|
|
244
|
-
*
|
|
245
|
-
* const embedding256 = new Float64Array(256).fill(0.1);
|
|
246
|
-
* const embedding768 = new Float64Array(768).fill(0.1);
|
|
340
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
247
341
|
*
|
|
248
|
-
* //
|
|
249
|
-
* const
|
|
250
|
-
*
|
|
342
|
+
* // Mini128 is NOT reversible
|
|
343
|
+
* const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
344
|
+
* console.log(isElidReversible(mini128Elid)); // false
|
|
251
345
|
*
|
|
252
|
-
* //
|
|
253
|
-
* const
|
|
254
|
-
*
|
|
255
|
-
* // Both have length 128
|
|
346
|
+
* // Lossless IS reversible
|
|
347
|
+
* const losslessElid = encodeElidLossless(embedding);
|
|
348
|
+
* console.log(isElidReversible(losslessElid)); // true
|
|
256
349
|
* ```
|
|
257
350
|
*/
|
|
258
|
-
export function
|
|
351
|
+
export function isElidReversible(elid_str: string): boolean;
|
|
352
|
+
/**
|
|
353
|
+
* Compute Levenshtein distance with custom options.
|
|
354
|
+
*
|
|
355
|
+
* # JavaScript Example
|
|
356
|
+
*
|
|
357
|
+
* ```javascript
|
|
358
|
+
* import { levenshteinWithOpts, SimilarityOptions } from 'elid';
|
|
359
|
+
*
|
|
360
|
+
* const opts = new SimilarityOptions();
|
|
361
|
+
* opts.setCaseSensitive(false);
|
|
362
|
+
* opts.setTrimWhitespace(true);
|
|
363
|
+
*
|
|
364
|
+
* const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
|
|
365
|
+
* console.log(distance); // 0
|
|
366
|
+
* ```
|
|
367
|
+
*/
|
|
368
|
+
export function levenshteinWithOpts(a: string, b: string, opts: SimilarityOptions): number;
|
|
259
369
|
/**
|
|
260
370
|
* Compute the Hamming distance between two ELID strings.
|
|
261
371
|
*
|
|
@@ -308,164 +418,120 @@ export function elidHammingDistance(elid1: string, elid2: string): number;
|
|
|
308
418
|
*/
|
|
309
419
|
export function findBestMatch(query: string, candidates: string[]): object;
|
|
310
420
|
/**
|
|
311
|
-
*
|
|
312
|
-
*
|
|
313
|
-
* The retention percentage (0.0-1.0) controls how much information is preserved:
|
|
314
|
-
* - 1.0 = lossless (Full32 precision, all dimensions)
|
|
315
|
-
* - 0.5 = half precision and/or half dimensions
|
|
316
|
-
* - 0.25 = quarter precision and/or quarter dimensions
|
|
317
|
-
*
|
|
318
|
-
* The algorithm optimizes for dimension reduction first (which preserves
|
|
319
|
-
* more geometric relationships) before reducing precision.
|
|
320
|
-
*
|
|
321
|
-
* # Parameters
|
|
322
|
-
*
|
|
323
|
-
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
324
|
-
* - `retention_pct`: Information retention percentage (0.0-1.0)
|
|
325
|
-
*
|
|
326
|
-
* # Returns
|
|
421
|
+
* Compute the OSA (Optimal String Alignment) distance between two strings.
|
|
327
422
|
*
|
|
328
|
-
*
|
|
423
|
+
* Similar to Levenshtein but also considers transpositions as a single operation.
|
|
329
424
|
*
|
|
330
425
|
* # JavaScript Example
|
|
331
426
|
*
|
|
332
427
|
* ```javascript
|
|
333
|
-
* import {
|
|
334
|
-
*
|
|
335
|
-
* const embedding = new Float64Array(768).fill(0.1);
|
|
336
|
-
*
|
|
337
|
-
* // 50% retention - good balance of size and fidelity
|
|
338
|
-
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
428
|
+
* import { osaDistance } from 'elid';
|
|
339
429
|
*
|
|
340
|
-
*
|
|
341
|
-
*
|
|
430
|
+
* const distance = osaDistance("ca", "ac");
|
|
431
|
+
* console.log(distance); // 1 (transposition)
|
|
342
432
|
* ```
|
|
343
433
|
*/
|
|
344
|
-
export function
|
|
434
|
+
export function osaDistance(a: string, b: string): number;
|
|
345
435
|
/**
|
|
346
|
-
* Compute the
|
|
436
|
+
* Compute the Hamming distance between two SimHash values.
|
|
347
437
|
*
|
|
348
|
-
* Returns
|
|
349
|
-
* Particularly effective for short strings like names.
|
|
438
|
+
* Returns the number of differing bits. Lower values = higher similarity.
|
|
350
439
|
*
|
|
351
440
|
* # JavaScript Example
|
|
352
441
|
*
|
|
353
442
|
* ```javascript
|
|
354
|
-
* import {
|
|
443
|
+
* import { simhash, simhashDistance } from 'elid';
|
|
355
444
|
*
|
|
356
|
-
* const
|
|
357
|
-
*
|
|
445
|
+
* const hash1 = simhash("iPhone 14");
|
|
446
|
+
* const hash2 = simhash("iPhone 15");
|
|
447
|
+
* const distance = simhashDistance(hash1, hash2);
|
|
448
|
+
*
|
|
449
|
+
* console.log(distance); // Low number = similar
|
|
358
450
|
* ```
|
|
359
451
|
*/
|
|
360
|
-
export function
|
|
452
|
+
export function simhashDistance(hash1: number, hash2: number): number;
|
|
361
453
|
/**
|
|
362
|
-
* Encode an embedding
|
|
454
|
+
* Encode an embedding with percentage-based compression.
|
|
363
455
|
*
|
|
364
|
-
*
|
|
365
|
-
*
|
|
456
|
+
* The retention percentage (0.0-1.0) controls how much information is preserved:
|
|
457
|
+
* - 1.0 = lossless (Full32 precision, all dimensions)
|
|
458
|
+
* - 0.5 = half precision and/or half dimensions
|
|
459
|
+
* - 0.25 = quarter precision and/or quarter dimensions
|
|
460
|
+
*
|
|
461
|
+
* The algorithm optimizes for dimension reduction first (which preserves
|
|
462
|
+
* more geometric relationships) before reducing precision.
|
|
366
463
|
*
|
|
367
464
|
* # Parameters
|
|
368
465
|
*
|
|
369
466
|
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
467
|
+
* - `retention_pct`: Information retention percentage (0.0-1.0)
|
|
370
468
|
*
|
|
371
469
|
* # Returns
|
|
372
470
|
*
|
|
373
|
-
* A base32hex-encoded ELID string
|
|
471
|
+
* A base32hex-encoded ELID string.
|
|
374
472
|
*
|
|
375
473
|
* # JavaScript Example
|
|
376
474
|
*
|
|
377
475
|
* ```javascript
|
|
378
|
-
* import {
|
|
476
|
+
* import { encodeElidCompressed } from 'elid';
|
|
379
477
|
*
|
|
380
478
|
* const embedding = new Float64Array(768).fill(0.1);
|
|
381
|
-
* const elid = encodeElidLossless(embedding);
|
|
382
479
|
*
|
|
383
|
-
* //
|
|
384
|
-
* const
|
|
385
|
-
*
|
|
480
|
+
* // 50% retention - good balance of size and fidelity
|
|
481
|
+
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
482
|
+
*
|
|
483
|
+
* // 25% retention - smaller but less accurate
|
|
484
|
+
* const smallElid = encodeElidCompressed(embedding, 0.25);
|
|
386
485
|
* ```
|
|
387
486
|
*/
|
|
388
|
-
export function
|
|
487
|
+
export function encodeElidCompressed(embedding: Float64Array, retention_pct: number): string;
|
|
389
488
|
/**
|
|
390
|
-
*
|
|
489
|
+
* Encode an embedding with a maximum output string length constraint.
|
|
391
490
|
*
|
|
392
|
-
*
|
|
393
|
-
*
|
|
394
|
-
* like Mini128, Morton, or Hilbert.
|
|
491
|
+
* Calculates the optimal precision and dimension settings to fit within
|
|
492
|
+
* the specified character limit while maximizing fidelity.
|
|
395
493
|
*
|
|
396
494
|
* # Parameters
|
|
397
495
|
*
|
|
398
|
-
* - `
|
|
496
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
497
|
+
* - `max_chars`: Maximum output string length in characters
|
|
399
498
|
*
|
|
400
499
|
* # Returns
|
|
401
500
|
*
|
|
402
|
-
* A
|
|
403
|
-
* is not reversible.
|
|
404
|
-
*
|
|
405
|
-
* Note: If dimension reduction was used during encoding, the decoded
|
|
406
|
-
* embedding will be in the reduced dimension space, not the original.
|
|
501
|
+
* A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
|
|
407
502
|
*
|
|
408
503
|
* # JavaScript Example
|
|
409
504
|
*
|
|
410
505
|
* ```javascript
|
|
411
|
-
* import {
|
|
506
|
+
* import { encodeElidMaxLength } from 'elid';
|
|
412
507
|
*
|
|
413
508
|
* const embedding = new Float64Array(768).fill(0.1);
|
|
414
|
-
* const elid = encodeElidLossless(embedding);
|
|
415
|
-
*
|
|
416
|
-
* if (isElidReversible(elid)) {
|
|
417
|
-
* const recovered = decodeElidToEmbedding(elid);
|
|
418
|
-
* console.log(recovered.length); // 768
|
|
419
|
-
* }
|
|
420
|
-
* ```
|
|
421
|
-
*/
|
|
422
|
-
export function decodeElidToEmbedding(elid_str: string): any;
|
|
423
|
-
/**
|
|
424
|
-
* Encode an embedding vector to an ELID string.
|
|
425
|
-
*
|
|
426
|
-
* Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
|
|
427
|
-
* sortable identifier. The ELID preserves locality properties for efficient
|
|
428
|
-
* similarity search.
|
|
429
|
-
*
|
|
430
|
-
* # Parameters
|
|
431
|
-
*
|
|
432
|
-
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
433
|
-
* - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
|
|
434
|
-
*
|
|
435
|
-
* # Returns
|
|
436
|
-
*
|
|
437
|
-
* A base32hex-encoded ELID string suitable for storage and comparison.
|
|
438
|
-
*
|
|
439
|
-
* # JavaScript Example
|
|
440
509
|
*
|
|
441
|
-
*
|
|
442
|
-
*
|
|
510
|
+
* // Fit in 100 characters (e.g., for database column constraints)
|
|
511
|
+
* const elid = encodeElidMaxLength(embedding, 100);
|
|
512
|
+
* console.log(elid.length <= 100); // true
|
|
443
513
|
*
|
|
444
|
-
* //
|
|
445
|
-
* const
|
|
446
|
-
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
447
|
-
* console.log(elid); // "012345abcdef..."
|
|
514
|
+
* // Fit in 50 characters (more compression)
|
|
515
|
+
* const shortElid = encodeElidMaxLength(embedding, 50);
|
|
448
516
|
* ```
|
|
449
517
|
*/
|
|
450
|
-
export function
|
|
518
|
+
export function encodeElidMaxLength(embedding: Float64Array, max_chars: number): string;
|
|
451
519
|
/**
|
|
452
|
-
* Compute the
|
|
520
|
+
* Compute the Jaro-Winkler similarity between two strings.
|
|
453
521
|
*
|
|
454
522
|
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
523
|
+
* Gives more favorable ratings to strings with common prefixes.
|
|
455
524
|
*
|
|
456
525
|
* # JavaScript Example
|
|
457
526
|
*
|
|
458
527
|
* ```javascript
|
|
459
|
-
* import {
|
|
460
|
-
*
|
|
461
|
-
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
462
|
-
* console.log(similarity); // ~0.9 (very similar)
|
|
528
|
+
* import { jaroWinkler } from 'elid';
|
|
463
529
|
*
|
|
464
|
-
* const
|
|
465
|
-
* console.log(
|
|
530
|
+
* const similarity = jaroWinkler("martha", "marhta");
|
|
531
|
+
* console.log(similarity); // ~0.961
|
|
466
532
|
* ```
|
|
467
533
|
*/
|
|
468
|
-
export function
|
|
534
|
+
export function jaroWinkler(a: string, b: string): number;
|
|
469
535
|
/**
|
|
470
536
|
* Compute the Hamming distance between two strings.
|
|
471
537
|
*
|
|
@@ -485,72 +551,6 @@ export function simhashSimilarity(a: string, b: string): number;
|
|
|
485
551
|
* ```
|
|
486
552
|
*/
|
|
487
553
|
export function hamming(a: string, b: string): number | undefined;
|
|
488
|
-
/**
|
|
489
|
-
* Compute the best matching similarity between two strings.
|
|
490
|
-
*
|
|
491
|
-
* Runs multiple algorithms and returns the highest score.
|
|
492
|
-
*
|
|
493
|
-
* # JavaScript Example
|
|
494
|
-
*
|
|
495
|
-
* ```javascript
|
|
496
|
-
* import { bestMatch } from 'elid';
|
|
497
|
-
*
|
|
498
|
-
* const score = bestMatch("hello", "hallo");
|
|
499
|
-
* console.log(score); // ~0.8
|
|
500
|
-
* ```
|
|
501
|
-
*/
|
|
502
|
-
export function bestMatch(a: string, b: string): number;
|
|
503
|
-
/**
|
|
504
|
-
* Compute the Jaro-Winkler similarity between two strings.
|
|
505
|
-
*
|
|
506
|
-
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
507
|
-
* Gives more favorable ratings to strings with common prefixes.
|
|
508
|
-
*
|
|
509
|
-
* # JavaScript Example
|
|
510
|
-
*
|
|
511
|
-
* ```javascript
|
|
512
|
-
* import { jaroWinkler } from 'elid';
|
|
513
|
-
*
|
|
514
|
-
* const similarity = jaroWinkler("martha", "marhta");
|
|
515
|
-
* console.log(similarity); // ~0.961
|
|
516
|
-
* ```
|
|
517
|
-
*/
|
|
518
|
-
export function jaroWinkler(a: string, b: string): number;
|
|
519
|
-
/**
|
|
520
|
-
* Compute the normalized Levenshtein similarity between two strings.
|
|
521
|
-
*
|
|
522
|
-
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
523
|
-
*
|
|
524
|
-
* # JavaScript Example
|
|
525
|
-
*
|
|
526
|
-
* ```javascript
|
|
527
|
-
* import { normalizedLevenshtein } from 'elid';
|
|
528
|
-
*
|
|
529
|
-
* const similarity = normalizedLevenshtein("hello", "hallo");
|
|
530
|
-
* console.log(similarity); // ~0.8
|
|
531
|
-
* ```
|
|
532
|
-
*/
|
|
533
|
-
export function normalizedLevenshtein(a: string, b: string): number;
|
|
534
|
-
/**
|
|
535
|
-
* Find all hashes within a given distance threshold.
|
|
536
|
-
*
|
|
537
|
-
* Useful for database queries - pre-compute hashes, then find similar ones.
|
|
538
|
-
*
|
|
539
|
-
* # JavaScript Example
|
|
540
|
-
*
|
|
541
|
-
* ```javascript
|
|
542
|
-
* import { simhash, findSimilarHashes } from 'elid';
|
|
543
|
-
*
|
|
544
|
-
* const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
|
|
545
|
-
* const hashes = candidates.map(s => simhash(s));
|
|
546
|
-
*
|
|
547
|
-
* const queryHash = simhash("iPhone 14");
|
|
548
|
-
* const matches = findSimilarHashes(queryHash, hashes, 10);
|
|
549
|
-
*
|
|
550
|
-
* console.log(matches); // [0, 1] - indices of similar items
|
|
551
|
-
* ```
|
|
552
|
-
*/
|
|
553
|
-
export function findSimilarHashes(query_hash: number, candidate_hashes: Float64Array, max_distance: number): Uint32Array;
|
|
554
554
|
/**
|
|
555
555
|
* Dimension handling mode for full vector encoding.
|
|
556
556
|
*
|