elid 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +142 -6
- package/elid.d.ts +489 -87
- package/elid_bg.js +780 -182
- package/elid_bg.wasm +0 -0
- package/package.json +5 -5
package/elid.d.ts
CHANGED
|
@@ -1,58 +1,148 @@
|
|
|
1
1
|
/* tslint:disable */
|
|
2
2
|
/* eslint-disable */
|
|
3
3
|
/**
|
|
4
|
-
* Compute the
|
|
4
|
+
* Compute the OSA (Optimal String Alignment) distance between two strings.
|
|
5
5
|
*
|
|
6
|
-
*
|
|
6
|
+
* Similar to Levenshtein but also considers transpositions as a single operation.
|
|
7
7
|
*
|
|
8
8
|
* # JavaScript Example
|
|
9
9
|
*
|
|
10
10
|
* ```javascript
|
|
11
|
-
* import {
|
|
11
|
+
* import { osaDistance } from 'elid';
|
|
12
12
|
*
|
|
13
|
-
* const
|
|
14
|
-
* console.log(
|
|
13
|
+
* const distance = osaDistance("ca", "ac");
|
|
14
|
+
* console.log(distance); // 1 (transposition)
|
|
15
|
+
* ```
|
|
16
|
+
*/
|
|
17
|
+
export function osaDistance(a: string, b: string): number;
|
|
18
|
+
/**
|
|
19
|
+
* Compute the Levenshtein distance between two strings.
|
|
15
20
|
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
21
|
+
* Returns the minimum number of single-character edits needed to transform one string into another.
|
|
22
|
+
*
|
|
23
|
+
* # JavaScript Example
|
|
24
|
+
*
|
|
25
|
+
* ```javascript
|
|
26
|
+
* import { levenshtein } from 'elid';
|
|
27
|
+
*
|
|
28
|
+
* const distance = levenshtein("kitten", "sitting");
|
|
29
|
+
* console.log(distance); // 3
|
|
18
30
|
* ```
|
|
19
31
|
*/
|
|
20
|
-
export function
|
|
32
|
+
export function levenshtein(a: string, b: string): number;
|
|
21
33
|
/**
|
|
22
|
-
*
|
|
34
|
+
* Find all matches above a threshold score.
|
|
23
35
|
*
|
|
24
|
-
* Returns
|
|
25
|
-
* Gives more favorable ratings to strings with common prefixes.
|
|
36
|
+
* Returns an array of objects with index and score for all candidates above the threshold.
|
|
26
37
|
*
|
|
27
38
|
* # JavaScript Example
|
|
28
39
|
*
|
|
29
40
|
* ```javascript
|
|
30
|
-
* import {
|
|
41
|
+
* import { findMatchesAboveThreshold } from 'elid';
|
|
31
42
|
*
|
|
32
|
-
* const
|
|
33
|
-
*
|
|
43
|
+
* const candidates = ["apple", "application", "apply", "banana"];
|
|
44
|
+
* const matches = findMatchesAboveThreshold("app", candidates, 0.5);
|
|
45
|
+
* console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
|
|
34
46
|
* ```
|
|
35
47
|
*/
|
|
36
|
-
export function
|
|
48
|
+
export function findMatchesAboveThreshold(query: string, candidates: string[], threshold: number): any;
|
|
37
49
|
/**
|
|
38
|
-
* Compute the Hamming distance between two
|
|
50
|
+
* Compute the Hamming distance between two SimHash values.
|
|
39
51
|
*
|
|
40
|
-
* Returns the number of
|
|
41
|
-
* Returns null if strings have different lengths.
|
|
52
|
+
* Returns the number of differing bits. Lower values = higher similarity.
|
|
42
53
|
*
|
|
43
54
|
* # JavaScript Example
|
|
44
55
|
*
|
|
45
56
|
* ```javascript
|
|
46
|
-
* import {
|
|
57
|
+
* import { simhash, simhashDistance } from 'elid';
|
|
47
58
|
*
|
|
48
|
-
* const
|
|
49
|
-
*
|
|
59
|
+
* const hash1 = simhash("iPhone 14");
|
|
60
|
+
* const hash2 = simhash("iPhone 15");
|
|
61
|
+
* const distance = simhashDistance(hash1, hash2);
|
|
50
62
|
*
|
|
51
|
-
*
|
|
52
|
-
* console.log(invalid); // null
|
|
63
|
+
* console.log(distance); // Low number = similar
|
|
53
64
|
* ```
|
|
54
65
|
*/
|
|
55
|
-
export function
|
|
66
|
+
export function simhashDistance(hash1: number, hash2: number): number;
|
|
67
|
+
/**
|
|
68
|
+
* Encode an embedding with a maximum output string length constraint.
|
|
69
|
+
*
|
|
70
|
+
* Calculates the optimal precision and dimension settings to fit within
|
|
71
|
+
* the specified character limit while maximizing fidelity.
|
|
72
|
+
*
|
|
73
|
+
* # Parameters
|
|
74
|
+
*
|
|
75
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
76
|
+
* - `max_chars`: Maximum output string length in characters
|
|
77
|
+
*
|
|
78
|
+
* # Returns
|
|
79
|
+
*
|
|
80
|
+
* A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
|
|
81
|
+
*
|
|
82
|
+
* # JavaScript Example
|
|
83
|
+
*
|
|
84
|
+
* ```javascript
|
|
85
|
+
* import { encodeElidMaxLength } from 'elid';
|
|
86
|
+
*
|
|
87
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
88
|
+
*
|
|
89
|
+
* // Fit in 100 characters (e.g., for database column constraints)
|
|
90
|
+
* const elid = encodeElidMaxLength(embedding, 100);
|
|
91
|
+
* console.log(elid.length <= 100); // true
|
|
92
|
+
*
|
|
93
|
+
* // Fit in 50 characters (more compression)
|
|
94
|
+
* const shortElid = encodeElidMaxLength(embedding, 50);
|
|
95
|
+
* ```
|
|
96
|
+
*/
|
|
97
|
+
export function encodeElidMaxLength(embedding: Float64Array, max_chars: number): string;
|
|
98
|
+
/**
|
|
99
|
+
* Compute Levenshtein distance with custom options.
|
|
100
|
+
*
|
|
101
|
+
* # JavaScript Example
|
|
102
|
+
*
|
|
103
|
+
* ```javascript
|
|
104
|
+
* import { levenshteinWithOpts, SimilarityOptions } from 'elid';
|
|
105
|
+
*
|
|
106
|
+
* const opts = new SimilarityOptions();
|
|
107
|
+
* opts.setCaseSensitive(false);
|
|
108
|
+
* opts.setTrimWhitespace(true);
|
|
109
|
+
*
|
|
110
|
+
* const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
|
|
111
|
+
* console.log(distance); // 0
|
|
112
|
+
* ```
|
|
113
|
+
*/
|
|
114
|
+
export function levenshteinWithOpts(a: string, b: string, opts: SimilarityOptions): number;
|
|
115
|
+
/**
|
|
116
|
+
* Check if an ELID can be decoded back to an embedding.
|
|
117
|
+
*
|
|
118
|
+
* Returns true if the ELID was encoded with a FullVector profile
|
|
119
|
+
* (lossless, compressed, or max_length), false otherwise.
|
|
120
|
+
*
|
|
121
|
+
* # Parameters
|
|
122
|
+
*
|
|
123
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
124
|
+
*
|
|
125
|
+
* # Returns
|
|
126
|
+
*
|
|
127
|
+
* `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
|
|
128
|
+
*
|
|
129
|
+
* # JavaScript Example
|
|
130
|
+
*
|
|
131
|
+
* ```javascript
|
|
132
|
+
* import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
|
|
133
|
+
*
|
|
134
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
135
|
+
*
|
|
136
|
+
* // Mini128 is NOT reversible
|
|
137
|
+
* const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
138
|
+
* console.log(isElidReversible(mini128Elid)); // false
|
|
139
|
+
*
|
|
140
|
+
* // Lossless IS reversible
|
|
141
|
+
* const losslessElid = encodeElidLossless(embedding);
|
|
142
|
+
* console.log(isElidReversible(losslessElid)); // true
|
|
143
|
+
* ```
|
|
144
|
+
*/
|
|
145
|
+
export function isElidReversible(elid_str: string): boolean;
|
|
56
146
|
/**
|
|
57
147
|
* Compute the SimHash fingerprint of a string.
|
|
58
148
|
*
|
|
@@ -77,103 +167,324 @@ export function hamming(a: string, b: string): number | undefined;
|
|
|
77
167
|
*/
|
|
78
168
|
export function simhash(text: string): number;
|
|
79
169
|
/**
|
|
80
|
-
*
|
|
170
|
+
* Decode an ELID string to raw bytes.
|
|
81
171
|
*
|
|
82
|
-
* Returns the
|
|
172
|
+
* Returns the raw byte representation of an ELID, including the header
|
|
173
|
+
* and payload bytes. Useful for custom processing or debugging.
|
|
174
|
+
*
|
|
175
|
+
* # Parameters
|
|
176
|
+
*
|
|
177
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
178
|
+
*
|
|
179
|
+
* # Returns
|
|
180
|
+
*
|
|
181
|
+
* A Uint8Array containing the raw bytes (header + payload).
|
|
83
182
|
*
|
|
84
183
|
* # JavaScript Example
|
|
85
184
|
*
|
|
86
185
|
* ```javascript
|
|
87
|
-
* import {
|
|
186
|
+
* import { decodeElid } from 'elid';
|
|
88
187
|
*
|
|
89
|
-
* const
|
|
90
|
-
* console.log(
|
|
188
|
+
* const bytes = decodeElid("012345abcdef...");
|
|
189
|
+
* console.log(bytes); // Uint8Array [...]
|
|
91
190
|
* ```
|
|
92
191
|
*/
|
|
93
|
-
export function
|
|
192
|
+
export function decodeElid(elid_str: string): Uint8Array;
|
|
94
193
|
/**
|
|
95
|
-
*
|
|
194
|
+
* Get metadata about a FullVector ELID.
|
|
96
195
|
*
|
|
97
|
-
*
|
|
196
|
+
* Returns an object containing information about how the ELID was encoded,
|
|
197
|
+
* including original dimensions, precision, and dimension mode.
|
|
198
|
+
*
|
|
199
|
+
* # Parameters
|
|
200
|
+
*
|
|
201
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
202
|
+
*
|
|
203
|
+
* # Returns
|
|
204
|
+
*
|
|
205
|
+
* An object with metadata fields, or null if not a FullVector ELID.
|
|
98
206
|
*
|
|
99
207
|
* # JavaScript Example
|
|
100
208
|
*
|
|
101
209
|
* ```javascript
|
|
102
|
-
* import {
|
|
210
|
+
* import { encodeElidCompressed, getElidMetadata } from 'elid';
|
|
103
211
|
*
|
|
104
|
-
* const
|
|
105
|
-
* const
|
|
212
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
213
|
+
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
106
214
|
*
|
|
107
|
-
* const
|
|
108
|
-
*
|
|
215
|
+
* const meta = getElidMetadata(elid);
|
|
216
|
+
* if (meta) {
|
|
217
|
+
* console.log(meta.originalDims); // 768
|
|
218
|
+
* console.log(meta.encodedDims); // depends on compression
|
|
219
|
+
* console.log(meta.isLossless); // false
|
|
220
|
+
* }
|
|
221
|
+
* ```
|
|
222
|
+
*/
|
|
223
|
+
export function getElidMetadata(elid_str: string): any;
|
|
224
|
+
/**
|
|
225
|
+
* Encode an embedding for cross-dimensional comparison.
|
|
109
226
|
*
|
|
110
|
-
*
|
|
227
|
+
* Projects the embedding to a common dimension space, allowing comparison
|
|
228
|
+
* between embeddings of different original dimensions (e.g., 256d vs 768d).
|
|
229
|
+
*
|
|
230
|
+
* # Parameters
|
|
231
|
+
*
|
|
232
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
233
|
+
* - `common_dims`: Target dimension space (all vectors projected here)
|
|
234
|
+
*
|
|
235
|
+
* # Returns
|
|
236
|
+
*
|
|
237
|
+
* A base32hex-encoded ELID string.
|
|
238
|
+
*
|
|
239
|
+
* # JavaScript Example
|
|
240
|
+
*
|
|
241
|
+
* ```javascript
|
|
242
|
+
* import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
|
|
243
|
+
*
|
|
244
|
+
* // Different sized embeddings from different models
|
|
245
|
+
* const embedding256 = new Float64Array(256).fill(0.1);
|
|
246
|
+
* const embedding768 = new Float64Array(768).fill(0.1);
|
|
247
|
+
*
|
|
248
|
+
* // Project both to 128-dim common space
|
|
249
|
+
* const elid1 = encodeElidCrossDimensional(embedding256, 128);
|
|
250
|
+
* const elid2 = encodeElidCrossDimensional(embedding768, 128);
|
|
251
|
+
*
|
|
252
|
+
* // Now they can be compared directly (both decode to 128 dims)
|
|
253
|
+
* const dec1 = decodeElidToEmbedding(elid1);
|
|
254
|
+
* const dec2 = decodeElidToEmbedding(elid2);
|
|
255
|
+
* // Both have length 128
|
|
111
256
|
* ```
|
|
112
257
|
*/
|
|
113
|
-
export function
|
|
258
|
+
export function encodeElidCrossDimensional(embedding: Float64Array, common_dims: number): string;
|
|
114
259
|
/**
|
|
115
|
-
*
|
|
260
|
+
* Compute the Hamming distance between two ELID strings.
|
|
116
261
|
*
|
|
117
|
-
* Returns
|
|
262
|
+
* Returns the number of differing bits between two Mini128 ELIDs.
|
|
263
|
+
* This distance is proportional to the angular distance between the
|
|
264
|
+
* original embeddings (lower = more similar).
|
|
265
|
+
*
|
|
266
|
+
* # Requirements
|
|
267
|
+
*
|
|
268
|
+
* Both ELIDs must use the Mini128 profile.
|
|
269
|
+
*
|
|
270
|
+
* # Parameters
|
|
271
|
+
*
|
|
272
|
+
* - `elid1`: First ELID string
|
|
273
|
+
* - `elid2`: Second ELID string
|
|
274
|
+
*
|
|
275
|
+
* # Returns
|
|
276
|
+
*
|
|
277
|
+
* Hamming distance (0-128). 0 means identical, 128 means completely different.
|
|
118
278
|
*
|
|
119
279
|
* # JavaScript Example
|
|
120
280
|
*
|
|
121
281
|
* ```javascript
|
|
122
|
-
* import {
|
|
282
|
+
* import { encodeElid, elidHammingDistance, ElidProfile } from 'elid';
|
|
123
283
|
*
|
|
124
|
-
* const
|
|
125
|
-
* const
|
|
126
|
-
*
|
|
284
|
+
* const elid1 = encodeElid(embedding1, ElidProfile.Mini128);
|
|
285
|
+
* const elid2 = encodeElid(embedding2, ElidProfile.Mini128);
|
|
286
|
+
*
|
|
287
|
+
* const distance = elidHammingDistance(elid1, elid2);
|
|
288
|
+
* if (distance < 20) {
|
|
289
|
+
* console.log("Very similar embeddings!");
|
|
290
|
+
* }
|
|
127
291
|
* ```
|
|
128
292
|
*/
|
|
129
|
-
export function
|
|
293
|
+
export function elidHammingDistance(elid1: string, elid2: string): number;
|
|
130
294
|
/**
|
|
131
|
-
*
|
|
295
|
+
* Find the best match for a query string in an array of candidates.
|
|
132
296
|
*
|
|
133
|
-
*
|
|
297
|
+
* Returns an object with the index and similarity score of the best match.
|
|
134
298
|
*
|
|
135
299
|
* # JavaScript Example
|
|
136
300
|
*
|
|
137
301
|
* ```javascript
|
|
138
|
-
* import {
|
|
302
|
+
* import { findBestMatch } from 'elid';
|
|
139
303
|
*
|
|
140
|
-
* const
|
|
141
|
-
*
|
|
304
|
+
* const candidates = ["apple", "application", "apply"];
|
|
305
|
+
* const result = findBestMatch("app", candidates);
|
|
306
|
+
* console.log(result); // { index: 0, score: 0.907 }
|
|
142
307
|
* ```
|
|
143
308
|
*/
|
|
144
|
-
export function
|
|
309
|
+
export function findBestMatch(query: string, candidates: string[]): object;
|
|
145
310
|
/**
|
|
146
|
-
*
|
|
311
|
+
* Encode an embedding with percentage-based compression.
|
|
312
|
+
*
|
|
313
|
+
* The retention percentage (0.0-1.0) controls how much information is preserved:
|
|
314
|
+
* - 1.0 = lossless (Full32 precision, all dimensions)
|
|
315
|
+
* - 0.5 = half precision and/or half dimensions
|
|
316
|
+
* - 0.25 = quarter precision and/or quarter dimensions
|
|
317
|
+
*
|
|
318
|
+
* The algorithm optimizes for dimension reduction first (which preserves
|
|
319
|
+
* more geometric relationships) before reducing precision.
|
|
320
|
+
*
|
|
321
|
+
* # Parameters
|
|
322
|
+
*
|
|
323
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
324
|
+
* - `retention_pct`: Information retention percentage (0.0-1.0)
|
|
325
|
+
*
|
|
326
|
+
* # Returns
|
|
327
|
+
*
|
|
328
|
+
* A base32hex-encoded ELID string.
|
|
329
|
+
*
|
|
330
|
+
* # JavaScript Example
|
|
331
|
+
*
|
|
332
|
+
* ```javascript
|
|
333
|
+
* import { encodeElidCompressed } from 'elid';
|
|
334
|
+
*
|
|
335
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
336
|
+
*
|
|
337
|
+
* // 50% retention - good balance of size and fidelity
|
|
338
|
+
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
339
|
+
*
|
|
340
|
+
* // 25% retention - smaller but less accurate
|
|
341
|
+
* const smallElid = encodeElidCompressed(embedding, 0.25);
|
|
342
|
+
* ```
|
|
343
|
+
*/
|
|
344
|
+
export function encodeElidCompressed(embedding: Float64Array, retention_pct: number): string;
|
|
345
|
+
/**
|
|
346
|
+
* Compute the Jaro similarity between two strings.
|
|
147
347
|
*
|
|
148
348
|
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
349
|
+
* Particularly effective for short strings like names.
|
|
149
350
|
*
|
|
150
351
|
* # JavaScript Example
|
|
151
352
|
*
|
|
152
353
|
* ```javascript
|
|
153
|
-
* import {
|
|
354
|
+
* import { jaro } from 'elid';
|
|
154
355
|
*
|
|
155
|
-
* const similarity =
|
|
156
|
-
* console.log(similarity); // ~0.
|
|
356
|
+
* const similarity = jaro("martha", "marhta");
|
|
357
|
+
* console.log(similarity); // ~0.944
|
|
157
358
|
* ```
|
|
158
359
|
*/
|
|
159
|
-
export function
|
|
360
|
+
export function jaro(a: string, b: string): number;
|
|
160
361
|
/**
|
|
161
|
-
*
|
|
362
|
+
* Encode an embedding using lossless full vector encoding.
|
|
363
|
+
*
|
|
364
|
+
* Preserves the exact embedding values (32-bit float precision) and all dimensions.
|
|
365
|
+
* This produces the largest output but allows exact reconstruction.
|
|
366
|
+
*
|
|
367
|
+
* # Parameters
|
|
368
|
+
*
|
|
369
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
370
|
+
*
|
|
371
|
+
* # Returns
|
|
372
|
+
*
|
|
373
|
+
* A base32hex-encoded ELID string that can be decoded back to the original embedding.
|
|
162
374
|
*
|
|
163
375
|
* # JavaScript Example
|
|
164
376
|
*
|
|
165
377
|
* ```javascript
|
|
166
|
-
* import {
|
|
378
|
+
* import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
|
|
167
379
|
*
|
|
168
|
-
* const
|
|
169
|
-
*
|
|
170
|
-
* opts.setTrimWhitespace(true);
|
|
380
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
381
|
+
* const elid = encodeElidLossless(embedding);
|
|
171
382
|
*
|
|
172
|
-
*
|
|
173
|
-
*
|
|
383
|
+
* // Later, recover the exact embedding
|
|
384
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
385
|
+
* // recovered is identical to embedding
|
|
174
386
|
* ```
|
|
175
387
|
*/
|
|
176
|
-
export function
|
|
388
|
+
export function encodeElidLossless(embedding: Float64Array): string;
|
|
389
|
+
/**
|
|
390
|
+
* Decode an ELID string back to an embedding vector.
|
|
391
|
+
*
|
|
392
|
+
* Only works for ELIDs encoded with a FullVector profile (lossless,
|
|
393
|
+
* compressed, or max_length). Returns null for non-reversible profiles
|
|
394
|
+
* like Mini128, Morton, or Hilbert.
|
|
395
|
+
*
|
|
396
|
+
* # Parameters
|
|
397
|
+
*
|
|
398
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
399
|
+
*
|
|
400
|
+
* # Returns
|
|
401
|
+
*
|
|
402
|
+
* A Float64Array containing the decoded embedding, or null if the ELID
|
|
403
|
+
* is not reversible.
|
|
404
|
+
*
|
|
405
|
+
* Note: If dimension reduction was used during encoding, the decoded
|
|
406
|
+
* embedding will be in the reduced dimension space, not the original.
|
|
407
|
+
*
|
|
408
|
+
* # JavaScript Example
|
|
409
|
+
*
|
|
410
|
+
* ```javascript
|
|
411
|
+
* import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
|
|
412
|
+
*
|
|
413
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
414
|
+
* const elid = encodeElidLossless(embedding);
|
|
415
|
+
*
|
|
416
|
+
* if (isElidReversible(elid)) {
|
|
417
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
418
|
+
* console.log(recovered.length); // 768
|
|
419
|
+
* }
|
|
420
|
+
* ```
|
|
421
|
+
*/
|
|
422
|
+
export function decodeElidToEmbedding(elid_str: string): any;
|
|
423
|
+
/**
|
|
424
|
+
* Encode an embedding vector to an ELID string.
|
|
425
|
+
*
|
|
426
|
+
* Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
|
|
427
|
+
* sortable identifier. The ELID preserves locality properties for efficient
|
|
428
|
+
* similarity search.
|
|
429
|
+
*
|
|
430
|
+
* # Parameters
|
|
431
|
+
*
|
|
432
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
433
|
+
* - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
|
|
434
|
+
*
|
|
435
|
+
* # Returns
|
|
436
|
+
*
|
|
437
|
+
* A base32hex-encoded ELID string suitable for storage and comparison.
|
|
438
|
+
*
|
|
439
|
+
* # JavaScript Example
|
|
440
|
+
*
|
|
441
|
+
* ```javascript
|
|
442
|
+
* import { encodeElid, ElidProfile } from 'elid';
|
|
443
|
+
*
|
|
444
|
+
* // OpenAI embeddings are 1536 dimensions
|
|
445
|
+
* const embedding = await getEmbedding("Hello world");
|
|
446
|
+
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
447
|
+
* console.log(elid); // "012345abcdef..."
|
|
448
|
+
* ```
|
|
449
|
+
*/
|
|
450
|
+
export function encodeElid(embedding: Float64Array, profile: ElidProfile): string;
|
|
451
|
+
/**
|
|
452
|
+
* Compute the normalized SimHash similarity between two strings.
|
|
453
|
+
*
|
|
454
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
455
|
+
*
|
|
456
|
+
* # JavaScript Example
|
|
457
|
+
*
|
|
458
|
+
* ```javascript
|
|
459
|
+
* import { simhashSimilarity } from 'elid';
|
|
460
|
+
*
|
|
461
|
+
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
462
|
+
* console.log(similarity); // ~0.9 (very similar)
|
|
463
|
+
*
|
|
464
|
+
* const similarity2 = simhashSimilarity("iPhone", "Galaxy");
|
|
465
|
+
* console.log(similarity2); // ~0.4 (different)
|
|
466
|
+
* ```
|
|
467
|
+
*/
|
|
468
|
+
export function simhashSimilarity(a: string, b: string): number;
|
|
469
|
+
/**
|
|
470
|
+
* Compute the Hamming distance between two strings.
|
|
471
|
+
*
|
|
472
|
+
* Returns the number of positions at which the characters differ.
|
|
473
|
+
* Returns null if strings have different lengths.
|
|
474
|
+
*
|
|
475
|
+
* # JavaScript Example
|
|
476
|
+
*
|
|
477
|
+
* ```javascript
|
|
478
|
+
* import { hamming } from 'elid';
|
|
479
|
+
*
|
|
480
|
+
* const distance = hamming("karolin", "kathrin");
|
|
481
|
+
* console.log(distance); // 3
|
|
482
|
+
*
|
|
483
|
+
* const invalid = hamming("hello", "world!");
|
|
484
|
+
* console.log(invalid); // null
|
|
485
|
+
* ```
|
|
486
|
+
*/
|
|
487
|
+
export function hamming(a: string, b: string): number | undefined;
|
|
177
488
|
/**
|
|
178
489
|
* Compute the best matching similarity between two strings.
|
|
179
490
|
*
|
|
@@ -190,55 +501,146 @@ export function levenshteinWithOpts(a: string, b: string, opts: SimilarityOption
|
|
|
190
501
|
*/
|
|
191
502
|
export function bestMatch(a: string, b: string): number;
|
|
192
503
|
/**
|
|
193
|
-
* Compute the
|
|
504
|
+
* Compute the Jaro-Winkler similarity between two strings.
|
|
194
505
|
*
|
|
195
|
-
* Returns
|
|
506
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
507
|
+
* Gives more favorable ratings to strings with common prefixes.
|
|
196
508
|
*
|
|
197
509
|
* # JavaScript Example
|
|
198
510
|
*
|
|
199
511
|
* ```javascript
|
|
200
|
-
* import {
|
|
201
|
-
*
|
|
202
|
-
* const hash1 = simhash("iPhone 14");
|
|
203
|
-
* const hash2 = simhash("iPhone 15");
|
|
204
|
-
* const distance = simhashDistance(hash1, hash2);
|
|
512
|
+
* import { jaroWinkler } from 'elid';
|
|
205
513
|
*
|
|
206
|
-
*
|
|
514
|
+
* const similarity = jaroWinkler("martha", "marhta");
|
|
515
|
+
* console.log(similarity); // ~0.961
|
|
207
516
|
* ```
|
|
208
517
|
*/
|
|
209
|
-
export function
|
|
518
|
+
export function jaroWinkler(a: string, b: string): number;
|
|
210
519
|
/**
|
|
211
|
-
* Compute the
|
|
520
|
+
* Compute the normalized Levenshtein similarity between two strings.
|
|
212
521
|
*
|
|
213
522
|
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
214
|
-
* Particularly effective for short strings like names.
|
|
215
523
|
*
|
|
216
524
|
* # JavaScript Example
|
|
217
525
|
*
|
|
218
526
|
* ```javascript
|
|
219
|
-
* import {
|
|
527
|
+
* import { normalizedLevenshtein } from 'elid';
|
|
220
528
|
*
|
|
221
|
-
* const similarity =
|
|
222
|
-
* console.log(similarity); // ~0.
|
|
529
|
+
* const similarity = normalizedLevenshtein("hello", "hallo");
|
|
530
|
+
* console.log(similarity); // ~0.8
|
|
223
531
|
* ```
|
|
224
532
|
*/
|
|
225
|
-
export function
|
|
533
|
+
export function normalizedLevenshtein(a: string, b: string): number;
|
|
226
534
|
/**
|
|
227
|
-
* Find
|
|
535
|
+
* Find all hashes within a given distance threshold.
|
|
228
536
|
*
|
|
229
|
-
*
|
|
537
|
+
* Useful for database queries - pre-compute hashes, then find similar ones.
|
|
230
538
|
*
|
|
231
539
|
* # JavaScript Example
|
|
232
540
|
*
|
|
233
541
|
* ```javascript
|
|
234
|
-
* import {
|
|
542
|
+
* import { simhash, findSimilarHashes } from 'elid';
|
|
235
543
|
*
|
|
236
|
-
* const candidates = ["
|
|
237
|
-
* const
|
|
238
|
-
*
|
|
544
|
+
* const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
|
|
545
|
+
* const hashes = candidates.map(s => simhash(s));
|
|
546
|
+
*
|
|
547
|
+
* const queryHash = simhash("iPhone 14");
|
|
548
|
+
* const matches = findSimilarHashes(queryHash, hashes, 10);
|
|
549
|
+
*
|
|
550
|
+
* console.log(matches); // [0, 1] - indices of similar items
|
|
239
551
|
* ```
|
|
240
552
|
*/
|
|
241
|
-
export function
|
|
553
|
+
export function findSimilarHashes(query_hash: number, candidate_hashes: Float64Array, max_distance: number): Uint32Array;
|
|
554
|
+
/**
|
|
555
|
+
* Dimension handling mode for full vector encoding.
|
|
556
|
+
*
|
|
557
|
+
* Controls whether to preserve original dimensions, reduce them,
|
|
558
|
+
* or project to a common space for cross-dimensional comparison.
|
|
559
|
+
*
|
|
560
|
+
* # JavaScript Example
|
|
561
|
+
*
|
|
562
|
+
* ```javascript
|
|
563
|
+
* import { ElidDimensionMode, encodeElidFullVector } from 'elid';
|
|
564
|
+
*
|
|
565
|
+
* // Preserve all dimensions
|
|
566
|
+
* // Reduce to fewer dimensions for smaller output
|
|
567
|
+
* // Common space for comparing different-sized embeddings
|
|
568
|
+
* ```
|
|
569
|
+
*/
|
|
570
|
+
export enum ElidDimensionMode {
|
|
571
|
+
/**
|
|
572
|
+
* Preserve all original dimensions (no projection)
|
|
573
|
+
*/
|
|
574
|
+
Preserve = 0,
|
|
575
|
+
/**
|
|
576
|
+
* Reduce dimensions using random projection
|
|
577
|
+
*/
|
|
578
|
+
Reduce = 1,
|
|
579
|
+
/**
|
|
580
|
+
* Project to common space for cross-dimensional comparison
|
|
581
|
+
*/
|
|
582
|
+
Common = 2,
|
|
583
|
+
}
|
|
584
|
+
/**
|
|
585
|
+
* ELID encoding profile for vector embeddings.
|
|
586
|
+
*
|
|
587
|
+
* Choose a profile based on your use case:
|
|
588
|
+
* - `Mini128`: Fast 128-bit SimHash, good for similarity via Hamming distance
|
|
589
|
+
* - `Morton10x10`: Z-order curve encoding, good for range queries
|
|
590
|
+
* - `Hilbert10x10`: Hilbert curve encoding, best locality preservation
|
|
591
|
+
*
|
|
592
|
+
* # JavaScript Example
|
|
593
|
+
*
|
|
594
|
+
* ```javascript
|
|
595
|
+
* import { ElidProfile, encodeElid } from 'elid';
|
|
596
|
+
*
|
|
597
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
598
|
+
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
599
|
+
* ```
|
|
600
|
+
*/
|
|
601
|
+
export enum ElidProfile {
|
|
602
|
+
/**
|
|
603
|
+
* 128-bit SimHash (cosine similarity via Hamming distance)
|
|
604
|
+
*/
|
|
605
|
+
Mini128 = 0,
|
|
606
|
+
/**
|
|
607
|
+
* Morton/Z-order curve encoding (10 dims, 10 bits each)
|
|
608
|
+
*/
|
|
609
|
+
Morton10x10 = 1,
|
|
610
|
+
/**
|
|
611
|
+
* Hilbert curve encoding (10 dims, 10 bits each)
|
|
612
|
+
*/
|
|
613
|
+
Hilbert10x10 = 2,
|
|
614
|
+
}
|
|
615
|
+
/**
|
|
616
|
+
* Precision options for full vector encoding.
|
|
617
|
+
*
|
|
618
|
+
* Controls how many bits are used to represent each dimension value.
|
|
619
|
+
* Higher precision means more accurate reconstruction but larger output.
|
|
620
|
+
*
|
|
621
|
+
* # JavaScript Example
|
|
622
|
+
*
|
|
623
|
+
* ```javascript
|
|
624
|
+
* import { ElidVectorPrecision, encodeElidWithPrecision } from 'elid';
|
|
625
|
+
*
|
|
626
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
627
|
+
* // Full32 = lossless, Half16 = smaller with minimal error
|
|
628
|
+
* ```
|
|
629
|
+
*/
|
|
630
|
+
export enum ElidVectorPrecision {
|
|
631
|
+
/**
|
|
632
|
+
* Full 32-bit float (lossless, 4 bytes per dimension)
|
|
633
|
+
*/
|
|
634
|
+
Full32 = 0,
|
|
635
|
+
/**
|
|
636
|
+
* 16-bit half-precision float (2 bytes per dimension)
|
|
637
|
+
*/
|
|
638
|
+
Half16 = 1,
|
|
639
|
+
/**
|
|
640
|
+
* 8-bit quantized (1 byte per dimension, ~1% error)
|
|
641
|
+
*/
|
|
642
|
+
Quant8 = 2,
|
|
643
|
+
}
|
|
242
644
|
/**
|
|
243
645
|
* Options for configuring string similarity algorithms
|
|
244
646
|
*/
|