elid 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +142 -6
- package/elid.d.ts +485 -83
- package/elid_bg.js +770 -172
- package/elid_bg.wasm +0 -0
- package/package.json +5 -5
package/elid.d.ts
CHANGED
|
@@ -1,147 +1,204 @@
|
|
|
1
1
|
/* tslint:disable */
|
|
2
2
|
/* eslint-disable */
|
|
3
3
|
/**
|
|
4
|
-
* Compute the
|
|
4
|
+
* Compute the SimHash fingerprint of a string.
|
|
5
5
|
*
|
|
6
|
-
* Returns a
|
|
6
|
+
* Returns a 64-bit hash where similar strings produce similar numbers.
|
|
7
|
+
* Use this for database queries by storing the hash and querying by numeric range.
|
|
7
8
|
*
|
|
8
9
|
* # JavaScript Example
|
|
9
10
|
*
|
|
10
11
|
* ```javascript
|
|
11
|
-
* import {
|
|
12
|
+
* import { simhash } from 'elid';
|
|
12
13
|
*
|
|
13
|
-
* const
|
|
14
|
-
*
|
|
14
|
+
* const hash1 = simhash("iPhone 14");
|
|
15
|
+
* const hash2 = simhash("iPhone 15");
|
|
16
|
+
* const hash3 = simhash("Galaxy S23");
|
|
15
17
|
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
+
* // hash1 and hash2 will be numerically close
|
|
19
|
+
* // hash3 will be numerically distant
|
|
20
|
+
*
|
|
21
|
+
* // Store in database as bigint:
|
|
22
|
+
* // { name: "iPhone 14", simhash: hash1 }
|
|
18
23
|
* ```
|
|
19
24
|
*/
|
|
20
|
-
export function
|
|
25
|
+
export function simhash(text: string): number;
|
|
21
26
|
/**
|
|
22
|
-
*
|
|
27
|
+
* Find all matches above a threshold score.
|
|
23
28
|
*
|
|
24
|
-
* Returns
|
|
25
|
-
* Gives more favorable ratings to strings with common prefixes.
|
|
29
|
+
* Returns an array of objects with index and score for all candidates above the threshold.
|
|
26
30
|
*
|
|
27
31
|
* # JavaScript Example
|
|
28
32
|
*
|
|
29
33
|
* ```javascript
|
|
30
|
-
* import {
|
|
34
|
+
* import { findMatchesAboveThreshold } from 'elid';
|
|
31
35
|
*
|
|
32
|
-
* const
|
|
33
|
-
*
|
|
36
|
+
* const candidates = ["apple", "application", "apply", "banana"];
|
|
37
|
+
* const matches = findMatchesAboveThreshold("app", candidates, 0.5);
|
|
38
|
+
* console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
|
|
34
39
|
* ```
|
|
35
40
|
*/
|
|
36
|
-
export function
|
|
41
|
+
export function findMatchesAboveThreshold(query: string, candidates: string[], threshold: number): any;
|
|
37
42
|
/**
|
|
38
|
-
*
|
|
43
|
+
* Encode an embedding for cross-dimensional comparison.
|
|
39
44
|
*
|
|
40
|
-
*
|
|
41
|
-
*
|
|
45
|
+
* Projects the embedding to a common dimension space, allowing comparison
|
|
46
|
+
* between embeddings of different original dimensions (e.g., 256d vs 768d).
|
|
47
|
+
*
|
|
48
|
+
* # Parameters
|
|
49
|
+
*
|
|
50
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
51
|
+
* - `common_dims`: Target dimension space (all vectors projected here)
|
|
52
|
+
*
|
|
53
|
+
* # Returns
|
|
54
|
+
*
|
|
55
|
+
* A base32hex-encoded ELID string.
|
|
42
56
|
*
|
|
43
57
|
* # JavaScript Example
|
|
44
58
|
*
|
|
45
59
|
* ```javascript
|
|
46
|
-
* import {
|
|
60
|
+
* import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
|
|
47
61
|
*
|
|
48
|
-
*
|
|
49
|
-
*
|
|
62
|
+
* // Different sized embeddings from different models
|
|
63
|
+
* const embedding256 = new Float64Array(256).fill(0.1);
|
|
64
|
+
* const embedding768 = new Float64Array(768).fill(0.1);
|
|
50
65
|
*
|
|
51
|
-
*
|
|
52
|
-
*
|
|
66
|
+
* // Project both to 128-dim common space
|
|
67
|
+
* const elid1 = encodeElidCrossDimensional(embedding256, 128);
|
|
68
|
+
* const elid2 = encodeElidCrossDimensional(embedding768, 128);
|
|
69
|
+
*
|
|
70
|
+
* // Now they can be compared directly (both decode to 128 dims)
|
|
71
|
+
* const dec1 = decodeElidToEmbedding(elid1);
|
|
72
|
+
* const dec2 = decodeElidToEmbedding(elid2);
|
|
73
|
+
* // Both have length 128
|
|
53
74
|
* ```
|
|
54
75
|
*/
|
|
55
|
-
export function
|
|
76
|
+
export function encodeElidCrossDimensional(embedding: Float64Array, common_dims: number): string;
|
|
56
77
|
/**
|
|
57
|
-
*
|
|
78
|
+
* Find all hashes within a given distance threshold.
|
|
58
79
|
*
|
|
59
|
-
*
|
|
60
|
-
* Use this for database queries by storing the hash and querying by numeric range.
|
|
80
|
+
* Useful for database queries - pre-compute hashes, then find similar ones.
|
|
61
81
|
*
|
|
62
82
|
* # JavaScript Example
|
|
63
83
|
*
|
|
64
84
|
* ```javascript
|
|
65
|
-
* import { simhash } from 'elid';
|
|
85
|
+
* import { simhash, findSimilarHashes } from 'elid';
|
|
66
86
|
*
|
|
67
|
-
* const
|
|
68
|
-
* const
|
|
69
|
-
* const hash3 = simhash("Galaxy S23");
|
|
87
|
+
* const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
|
|
88
|
+
* const hashes = candidates.map(s => simhash(s));
|
|
70
89
|
*
|
|
71
|
-
*
|
|
72
|
-
*
|
|
90
|
+
* const queryHash = simhash("iPhone 14");
|
|
91
|
+
* const matches = findSimilarHashes(queryHash, hashes, 10);
|
|
73
92
|
*
|
|
74
|
-
* //
|
|
75
|
-
* // { name: "iPhone 14", simhash: hash1 }
|
|
93
|
+
* console.log(matches); // [0, 1] - indices of similar items
|
|
76
94
|
* ```
|
|
77
95
|
*/
|
|
78
|
-
export function
|
|
96
|
+
export function findSimilarHashes(query_hash: number, candidate_hashes: Float64Array, max_distance: number): Uint32Array;
|
|
79
97
|
/**
|
|
80
|
-
* Compute the
|
|
98
|
+
* Compute the Jaro similarity between two strings.
|
|
81
99
|
*
|
|
82
|
-
* Returns
|
|
100
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
101
|
+
* Particularly effective for short strings like names.
|
|
83
102
|
*
|
|
84
103
|
* # JavaScript Example
|
|
85
104
|
*
|
|
86
105
|
* ```javascript
|
|
87
|
-
* import {
|
|
106
|
+
* import { jaro } from 'elid';
|
|
88
107
|
*
|
|
89
|
-
* const
|
|
90
|
-
* console.log(
|
|
108
|
+
* const similarity = jaro("martha", "marhta");
|
|
109
|
+
* console.log(similarity); // ~0.944
|
|
91
110
|
* ```
|
|
92
111
|
*/
|
|
93
|
-
export function
|
|
112
|
+
export function jaro(a: string, b: string): number;
|
|
94
113
|
/**
|
|
95
|
-
*
|
|
114
|
+
* Compute the best matching similarity between two strings.
|
|
96
115
|
*
|
|
97
|
-
*
|
|
116
|
+
* Runs multiple algorithms and returns the highest score.
|
|
98
117
|
*
|
|
99
118
|
* # JavaScript Example
|
|
100
119
|
*
|
|
101
120
|
* ```javascript
|
|
102
|
-
* import {
|
|
121
|
+
* import { bestMatch } from 'elid';
|
|
103
122
|
*
|
|
104
|
-
* const
|
|
105
|
-
*
|
|
123
|
+
* const score = bestMatch("hello", "hallo");
|
|
124
|
+
* console.log(score); // ~0.8
|
|
125
|
+
* ```
|
|
126
|
+
*/
|
|
127
|
+
export function bestMatch(a: string, b: string): number;
|
|
128
|
+
/**
|
|
129
|
+
* Encode an embedding vector to an ELID string.
|
|
106
130
|
*
|
|
107
|
-
*
|
|
108
|
-
*
|
|
131
|
+
* Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
|
|
132
|
+
* sortable identifier. The ELID preserves locality properties for efficient
|
|
133
|
+
* similarity search.
|
|
109
134
|
*
|
|
110
|
-
*
|
|
135
|
+
* # Parameters
|
|
136
|
+
*
|
|
137
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
138
|
+
* - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
|
|
139
|
+
*
|
|
140
|
+
* # Returns
|
|
141
|
+
*
|
|
142
|
+
* A base32hex-encoded ELID string suitable for storage and comparison.
|
|
143
|
+
*
|
|
144
|
+
* # JavaScript Example
|
|
145
|
+
*
|
|
146
|
+
* ```javascript
|
|
147
|
+
* import { encodeElid, ElidProfile } from 'elid';
|
|
148
|
+
*
|
|
149
|
+
* // OpenAI embeddings are 1536 dimensions
|
|
150
|
+
* const embedding = await getEmbedding("Hello world");
|
|
151
|
+
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
152
|
+
* console.log(elid); // "012345abcdef..."
|
|
111
153
|
* ```
|
|
112
154
|
*/
|
|
113
|
-
export function
|
|
155
|
+
export function encodeElid(embedding: Float64Array, profile: ElidProfile): string;
|
|
114
156
|
/**
|
|
115
|
-
*
|
|
157
|
+
* Compute the normalized SimHash similarity between two strings.
|
|
116
158
|
*
|
|
117
|
-
* Returns
|
|
159
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
118
160
|
*
|
|
119
161
|
* # JavaScript Example
|
|
120
162
|
*
|
|
121
163
|
* ```javascript
|
|
122
|
-
* import {
|
|
164
|
+
* import { simhashSimilarity } from 'elid';
|
|
123
165
|
*
|
|
124
|
-
* const
|
|
125
|
-
*
|
|
126
|
-
*
|
|
166
|
+
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
167
|
+
* console.log(similarity); // ~0.9 (very similar)
|
|
168
|
+
*
|
|
169
|
+
* const similarity2 = simhashSimilarity("iPhone", "Galaxy");
|
|
170
|
+
* console.log(similarity2); // ~0.4 (different)
|
|
127
171
|
* ```
|
|
128
172
|
*/
|
|
129
|
-
export function
|
|
173
|
+
export function simhashSimilarity(a: string, b: string): number;
|
|
130
174
|
/**
|
|
131
|
-
*
|
|
175
|
+
* Encode an embedding using lossless full vector encoding.
|
|
132
176
|
*
|
|
133
|
-
*
|
|
177
|
+
* Preserves the exact embedding values (32-bit float precision) and all dimensions.
|
|
178
|
+
* This produces the largest output but allows exact reconstruction.
|
|
179
|
+
*
|
|
180
|
+
* # Parameters
|
|
181
|
+
*
|
|
182
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
183
|
+
*
|
|
184
|
+
* # Returns
|
|
185
|
+
*
|
|
186
|
+
* A base32hex-encoded ELID string that can be decoded back to the original embedding.
|
|
134
187
|
*
|
|
135
188
|
* # JavaScript Example
|
|
136
189
|
*
|
|
137
190
|
* ```javascript
|
|
138
|
-
* import {
|
|
191
|
+
* import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
|
|
139
192
|
*
|
|
140
|
-
* const
|
|
141
|
-
*
|
|
193
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
194
|
+
* const elid = encodeElidLossless(embedding);
|
|
195
|
+
*
|
|
196
|
+
* // Later, recover the exact embedding
|
|
197
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
198
|
+
* // recovered is identical to embedding
|
|
142
199
|
* ```
|
|
143
200
|
*/
|
|
144
|
-
export function
|
|
201
|
+
export function encodeElidLossless(embedding: Float64Array): string;
|
|
145
202
|
/**
|
|
146
203
|
* Compute the normalized Levenshtein similarity between two strings.
|
|
147
204
|
*
|
|
@@ -157,6 +214,141 @@ export function osaDistance(a: string, b: string): number;
|
|
|
157
214
|
* ```
|
|
158
215
|
*/
|
|
159
216
|
export function normalizedLevenshtein(a: string, b: string): number;
|
|
217
|
+
/**
|
|
218
|
+
* Decode an ELID string back to an embedding vector.
|
|
219
|
+
*
|
|
220
|
+
* Only works for ELIDs encoded with a FullVector profile (lossless,
|
|
221
|
+
* compressed, or max_length). Returns null for non-reversible profiles
|
|
222
|
+
* like Mini128, Morton, or Hilbert.
|
|
223
|
+
*
|
|
224
|
+
* # Parameters
|
|
225
|
+
*
|
|
226
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
227
|
+
*
|
|
228
|
+
* # Returns
|
|
229
|
+
*
|
|
230
|
+
* A Float64Array containing the decoded embedding, or null if the ELID
|
|
231
|
+
* is not reversible.
|
|
232
|
+
*
|
|
233
|
+
* Note: If dimension reduction was used during encoding, the decoded
|
|
234
|
+
* embedding will be in the reduced dimension space, not the original.
|
|
235
|
+
*
|
|
236
|
+
* # JavaScript Example
|
|
237
|
+
*
|
|
238
|
+
* ```javascript
|
|
239
|
+
* import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
|
|
240
|
+
*
|
|
241
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
242
|
+
* const elid = encodeElidLossless(embedding);
|
|
243
|
+
*
|
|
244
|
+
* if (isElidReversible(elid)) {
|
|
245
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
246
|
+
* console.log(recovered.length); // 768
|
|
247
|
+
* }
|
|
248
|
+
* ```
|
|
249
|
+
*/
|
|
250
|
+
export function decodeElidToEmbedding(elid_str: string): any;
|
|
251
|
+
/**
|
|
252
|
+
* Get metadata about a FullVector ELID.
|
|
253
|
+
*
|
|
254
|
+
* Returns an object containing information about how the ELID was encoded,
|
|
255
|
+
* including original dimensions, precision, and dimension mode.
|
|
256
|
+
*
|
|
257
|
+
* # Parameters
|
|
258
|
+
*
|
|
259
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
260
|
+
*
|
|
261
|
+
* # Returns
|
|
262
|
+
*
|
|
263
|
+
* An object with metadata fields, or null if not a FullVector ELID.
|
|
264
|
+
*
|
|
265
|
+
* # JavaScript Example
|
|
266
|
+
*
|
|
267
|
+
* ```javascript
|
|
268
|
+
* import { encodeElidCompressed, getElidMetadata } from 'elid';
|
|
269
|
+
*
|
|
270
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
271
|
+
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
272
|
+
*
|
|
273
|
+
* const meta = getElidMetadata(elid);
|
|
274
|
+
* if (meta) {
|
|
275
|
+
* console.log(meta.originalDims); // 768
|
|
276
|
+
* console.log(meta.encodedDims); // depends on compression
|
|
277
|
+
* console.log(meta.isLossless); // false
|
|
278
|
+
* }
|
|
279
|
+
* ```
|
|
280
|
+
*/
|
|
281
|
+
export function getElidMetadata(elid_str: string): any;
|
|
282
|
+
/**
|
|
283
|
+
* Decode an ELID string to raw bytes.
|
|
284
|
+
*
|
|
285
|
+
* Returns the raw byte representation of an ELID, including the header
|
|
286
|
+
* and payload bytes. Useful for custom processing or debugging.
|
|
287
|
+
*
|
|
288
|
+
* # Parameters
|
|
289
|
+
*
|
|
290
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
291
|
+
*
|
|
292
|
+
* # Returns
|
|
293
|
+
*
|
|
294
|
+
* A Uint8Array containing the raw bytes (header + payload).
|
|
295
|
+
*
|
|
296
|
+
* # JavaScript Example
|
|
297
|
+
*
|
|
298
|
+
* ```javascript
|
|
299
|
+
* import { decodeElid } from 'elid';
|
|
300
|
+
*
|
|
301
|
+
* const bytes = decodeElid("012345abcdef...");
|
|
302
|
+
* console.log(bytes); // Uint8Array [...]
|
|
303
|
+
* ```
|
|
304
|
+
*/
|
|
305
|
+
export function decodeElid(elid_str: string): Uint8Array;
|
|
306
|
+
/**
|
|
307
|
+
* Compute the Levenshtein distance between two strings.
|
|
308
|
+
*
|
|
309
|
+
* Returns the minimum number of single-character edits needed to transform one string into another.
|
|
310
|
+
*
|
|
311
|
+
* # JavaScript Example
|
|
312
|
+
*
|
|
313
|
+
* ```javascript
|
|
314
|
+
* import { levenshtein } from 'elid';
|
|
315
|
+
*
|
|
316
|
+
* const distance = levenshtein("kitten", "sitting");
|
|
317
|
+
* console.log(distance); // 3
|
|
318
|
+
* ```
|
|
319
|
+
*/
|
|
320
|
+
export function levenshtein(a: string, b: string): number;
|
|
321
|
+
/**
|
|
322
|
+
* Check if an ELID can be decoded back to an embedding.
|
|
323
|
+
*
|
|
324
|
+
* Returns true if the ELID was encoded with a FullVector profile
|
|
325
|
+
* (lossless, compressed, or max_length), false otherwise.
|
|
326
|
+
*
|
|
327
|
+
* # Parameters
|
|
328
|
+
*
|
|
329
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
330
|
+
*
|
|
331
|
+
* # Returns
|
|
332
|
+
*
|
|
333
|
+
* `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
|
|
334
|
+
*
|
|
335
|
+
* # JavaScript Example
|
|
336
|
+
*
|
|
337
|
+
* ```javascript
|
|
338
|
+
* import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
|
|
339
|
+
*
|
|
340
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
341
|
+
*
|
|
342
|
+
* // Mini128 is NOT reversible
|
|
343
|
+
* const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
344
|
+
* console.log(isElidReversible(mini128Elid)); // false
|
|
345
|
+
*
|
|
346
|
+
* // Lossless IS reversible
|
|
347
|
+
* const losslessElid = encodeElidLossless(embedding);
|
|
348
|
+
* console.log(isElidReversible(losslessElid)); // true
|
|
349
|
+
* ```
|
|
350
|
+
*/
|
|
351
|
+
export function isElidReversible(elid_str: string): boolean;
|
|
160
352
|
/**
|
|
161
353
|
* Compute Levenshtein distance with custom options.
|
|
162
354
|
*
|
|
@@ -175,20 +367,71 @@ export function normalizedLevenshtein(a: string, b: string): number;
|
|
|
175
367
|
*/
|
|
176
368
|
export function levenshteinWithOpts(a: string, b: string, opts: SimilarityOptions): number;
|
|
177
369
|
/**
|
|
178
|
-
* Compute the
|
|
370
|
+
* Compute the Hamming distance between two ELID strings.
|
|
179
371
|
*
|
|
180
|
-
*
|
|
372
|
+
* Returns the number of differing bits between two Mini128 ELIDs.
|
|
373
|
+
* This distance is proportional to the angular distance between the
|
|
374
|
+
* original embeddings (lower = more similar).
|
|
375
|
+
*
|
|
376
|
+
* # Requirements
|
|
377
|
+
*
|
|
378
|
+
* Both ELIDs must use the Mini128 profile.
|
|
379
|
+
*
|
|
380
|
+
* # Parameters
|
|
381
|
+
*
|
|
382
|
+
* - `elid1`: First ELID string
|
|
383
|
+
* - `elid2`: Second ELID string
|
|
384
|
+
*
|
|
385
|
+
* # Returns
|
|
386
|
+
*
|
|
387
|
+
* Hamming distance (0-128). 0 means identical, 128 means completely different.
|
|
181
388
|
*
|
|
182
389
|
* # JavaScript Example
|
|
183
390
|
*
|
|
184
391
|
* ```javascript
|
|
185
|
-
* import {
|
|
392
|
+
* import { encodeElid, elidHammingDistance, ElidProfile } from 'elid';
|
|
186
393
|
*
|
|
187
|
-
* const
|
|
188
|
-
*
|
|
394
|
+
* const elid1 = encodeElid(embedding1, ElidProfile.Mini128);
|
|
395
|
+
* const elid2 = encodeElid(embedding2, ElidProfile.Mini128);
|
|
396
|
+
*
|
|
397
|
+
* const distance = elidHammingDistance(elid1, elid2);
|
|
398
|
+
* if (distance < 20) {
|
|
399
|
+
* console.log("Very similar embeddings!");
|
|
400
|
+
* }
|
|
189
401
|
* ```
|
|
190
402
|
*/
|
|
191
|
-
export function
|
|
403
|
+
export function elidHammingDistance(elid1: string, elid2: string): number;
|
|
404
|
+
/**
|
|
405
|
+
* Find the best match for a query string in an array of candidates.
|
|
406
|
+
*
|
|
407
|
+
* Returns an object with the index and similarity score of the best match.
|
|
408
|
+
*
|
|
409
|
+
* # JavaScript Example
|
|
410
|
+
*
|
|
411
|
+
* ```javascript
|
|
412
|
+
* import { findBestMatch } from 'elid';
|
|
413
|
+
*
|
|
414
|
+
* const candidates = ["apple", "application", "apply"];
|
|
415
|
+
* const result = findBestMatch("app", candidates);
|
|
416
|
+
* console.log(result); // { index: 0, score: 0.907 }
|
|
417
|
+
* ```
|
|
418
|
+
*/
|
|
419
|
+
export function findBestMatch(query: string, candidates: string[]): object;
|
|
420
|
+
/**
|
|
421
|
+
* Compute the OSA (Optimal String Alignment) distance between two strings.
|
|
422
|
+
*
|
|
423
|
+
* Similar to Levenshtein but also considers transpositions as a single operation.
|
|
424
|
+
*
|
|
425
|
+
* # JavaScript Example
|
|
426
|
+
*
|
|
427
|
+
* ```javascript
|
|
428
|
+
* import { osaDistance } from 'elid';
|
|
429
|
+
*
|
|
430
|
+
* const distance = osaDistance("ca", "ac");
|
|
431
|
+
* console.log(distance); // 1 (transposition)
|
|
432
|
+
* ```
|
|
433
|
+
*/
|
|
434
|
+
export function osaDistance(a: string, b: string): number;
|
|
192
435
|
/**
|
|
193
436
|
* Compute the Hamming distance between two SimHash values.
|
|
194
437
|
*
|
|
@@ -208,37 +451,196 @@ export function bestMatch(a: string, b: string): number;
|
|
|
208
451
|
*/
|
|
209
452
|
export function simhashDistance(hash1: number, hash2: number): number;
|
|
210
453
|
/**
|
|
211
|
-
*
|
|
454
|
+
* Encode an embedding with percentage-based compression.
|
|
455
|
+
*
|
|
456
|
+
* The retention percentage (0.0-1.0) controls how much information is preserved:
|
|
457
|
+
* - 1.0 = lossless (Full32 precision, all dimensions)
|
|
458
|
+
* - 0.5 = half precision and/or half dimensions
|
|
459
|
+
* - 0.25 = quarter precision and/or quarter dimensions
|
|
460
|
+
*
|
|
461
|
+
* The algorithm optimizes for dimension reduction first (which preserves
|
|
462
|
+
* more geometric relationships) before reducing precision.
|
|
463
|
+
*
|
|
464
|
+
* # Parameters
|
|
465
|
+
*
|
|
466
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
467
|
+
* - `retention_pct`: Information retention percentage (0.0-1.0)
|
|
468
|
+
*
|
|
469
|
+
* # Returns
|
|
470
|
+
*
|
|
471
|
+
* A base32hex-encoded ELID string.
|
|
472
|
+
*
|
|
473
|
+
* # JavaScript Example
|
|
474
|
+
*
|
|
475
|
+
* ```javascript
|
|
476
|
+
* import { encodeElidCompressed } from 'elid';
|
|
477
|
+
*
|
|
478
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
479
|
+
*
|
|
480
|
+
* // 50% retention - good balance of size and fidelity
|
|
481
|
+
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
482
|
+
*
|
|
483
|
+
* // 25% retention - smaller but less accurate
|
|
484
|
+
* const smallElid = encodeElidCompressed(embedding, 0.25);
|
|
485
|
+
* ```
|
|
486
|
+
*/
|
|
487
|
+
export function encodeElidCompressed(embedding: Float64Array, retention_pct: number): string;
|
|
488
|
+
/**
|
|
489
|
+
* Encode an embedding with a maximum output string length constraint.
|
|
490
|
+
*
|
|
491
|
+
* Calculates the optimal precision and dimension settings to fit within
|
|
492
|
+
* the specified character limit while maximizing fidelity.
|
|
493
|
+
*
|
|
494
|
+
* # Parameters
|
|
495
|
+
*
|
|
496
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
497
|
+
* - `max_chars`: Maximum output string length in characters
|
|
498
|
+
*
|
|
499
|
+
* # Returns
|
|
500
|
+
*
|
|
501
|
+
* A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
|
|
502
|
+
*
|
|
503
|
+
* # JavaScript Example
|
|
504
|
+
*
|
|
505
|
+
* ```javascript
|
|
506
|
+
* import { encodeElidMaxLength } from 'elid';
|
|
507
|
+
*
|
|
508
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
509
|
+
*
|
|
510
|
+
* // Fit in 100 characters (e.g., for database column constraints)
|
|
511
|
+
* const elid = encodeElidMaxLength(embedding, 100);
|
|
512
|
+
* console.log(elid.length <= 100); // true
|
|
513
|
+
*
|
|
514
|
+
* // Fit in 50 characters (more compression)
|
|
515
|
+
* const shortElid = encodeElidMaxLength(embedding, 50);
|
|
516
|
+
* ```
|
|
517
|
+
*/
|
|
518
|
+
export function encodeElidMaxLength(embedding: Float64Array, max_chars: number): string;
|
|
519
|
+
/**
|
|
520
|
+
* Compute the Jaro-Winkler similarity between two strings.
|
|
212
521
|
*
|
|
213
522
|
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
214
|
-
*
|
|
523
|
+
* Gives more favorable ratings to strings with common prefixes.
|
|
215
524
|
*
|
|
216
525
|
* # JavaScript Example
|
|
217
526
|
*
|
|
218
527
|
* ```javascript
|
|
219
|
-
* import {
|
|
528
|
+
* import { jaroWinkler } from 'elid';
|
|
220
529
|
*
|
|
221
|
-
* const similarity =
|
|
222
|
-
* console.log(similarity); // ~0.
|
|
530
|
+
* const similarity = jaroWinkler("martha", "marhta");
|
|
531
|
+
* console.log(similarity); // ~0.961
|
|
223
532
|
* ```
|
|
224
533
|
*/
|
|
225
|
-
export function
|
|
534
|
+
export function jaroWinkler(a: string, b: string): number;
|
|
226
535
|
/**
|
|
227
|
-
*
|
|
536
|
+
* Compute the Hamming distance between two strings.
|
|
228
537
|
*
|
|
229
|
-
* Returns
|
|
538
|
+
* Returns the number of positions at which the characters differ.
|
|
539
|
+
* Returns null if strings have different lengths.
|
|
230
540
|
*
|
|
231
541
|
* # JavaScript Example
|
|
232
542
|
*
|
|
233
543
|
* ```javascript
|
|
234
|
-
* import {
|
|
544
|
+
* import { hamming } from 'elid';
|
|
235
545
|
*
|
|
236
|
-
* const
|
|
237
|
-
*
|
|
238
|
-
*
|
|
546
|
+
* const distance = hamming("karolin", "kathrin");
|
|
547
|
+
* console.log(distance); // 3
|
|
548
|
+
*
|
|
549
|
+
* const invalid = hamming("hello", "world!");
|
|
550
|
+
* console.log(invalid); // null
|
|
239
551
|
* ```
|
|
240
552
|
*/
|
|
241
|
-
export function
|
|
553
|
+
export function hamming(a: string, b: string): number | undefined;
|
|
554
|
+
/**
|
|
555
|
+
* Dimension handling mode for full vector encoding.
|
|
556
|
+
*
|
|
557
|
+
* Controls whether to preserve original dimensions, reduce them,
|
|
558
|
+
* or project to a common space for cross-dimensional comparison.
|
|
559
|
+
*
|
|
560
|
+
* # JavaScript Example
|
|
561
|
+
*
|
|
562
|
+
* ```javascript
|
|
563
|
+
* import { ElidDimensionMode, encodeElidFullVector } from 'elid';
|
|
564
|
+
*
|
|
565
|
+
* // Preserve all dimensions
|
|
566
|
+
* // Reduce to fewer dimensions for smaller output
|
|
567
|
+
* // Common space for comparing different-sized embeddings
|
|
568
|
+
* ```
|
|
569
|
+
*/
|
|
570
|
+
export enum ElidDimensionMode {
|
|
571
|
+
/**
|
|
572
|
+
* Preserve all original dimensions (no projection)
|
|
573
|
+
*/
|
|
574
|
+
Preserve = 0,
|
|
575
|
+
/**
|
|
576
|
+
* Reduce dimensions using random projection
|
|
577
|
+
*/
|
|
578
|
+
Reduce = 1,
|
|
579
|
+
/**
|
|
580
|
+
* Project to common space for cross-dimensional comparison
|
|
581
|
+
*/
|
|
582
|
+
Common = 2,
|
|
583
|
+
}
|
|
584
|
+
/**
|
|
585
|
+
* ELID encoding profile for vector embeddings.
|
|
586
|
+
*
|
|
587
|
+
* Choose a profile based on your use case:
|
|
588
|
+
* - `Mini128`: Fast 128-bit SimHash, good for similarity via Hamming distance
|
|
589
|
+
* - `Morton10x10`: Z-order curve encoding, good for range queries
|
|
590
|
+
* - `Hilbert10x10`: Hilbert curve encoding, best locality preservation
|
|
591
|
+
*
|
|
592
|
+
* # JavaScript Example
|
|
593
|
+
*
|
|
594
|
+
* ```javascript
|
|
595
|
+
* import { ElidProfile, encodeElid } from 'elid';
|
|
596
|
+
*
|
|
597
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
598
|
+
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
599
|
+
* ```
|
|
600
|
+
*/
|
|
601
|
+
export enum ElidProfile {
|
|
602
|
+
/**
|
|
603
|
+
* 128-bit SimHash (cosine similarity via Hamming distance)
|
|
604
|
+
*/
|
|
605
|
+
Mini128 = 0,
|
|
606
|
+
/**
|
|
607
|
+
* Morton/Z-order curve encoding (10 dims, 10 bits each)
|
|
608
|
+
*/
|
|
609
|
+
Morton10x10 = 1,
|
|
610
|
+
/**
|
|
611
|
+
* Hilbert curve encoding (10 dims, 10 bits each)
|
|
612
|
+
*/
|
|
613
|
+
Hilbert10x10 = 2,
|
|
614
|
+
}
|
|
615
|
+
/**
|
|
616
|
+
* Precision options for full vector encoding.
|
|
617
|
+
*
|
|
618
|
+
* Controls how many bits are used to represent each dimension value.
|
|
619
|
+
* Higher precision means more accurate reconstruction but larger output.
|
|
620
|
+
*
|
|
621
|
+
* # JavaScript Example
|
|
622
|
+
*
|
|
623
|
+
* ```javascript
|
|
624
|
+
* import { ElidVectorPrecision, encodeElidWithPrecision } from 'elid';
|
|
625
|
+
*
|
|
626
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
627
|
+
* // Full32 = lossless, Half16 = smaller with minimal error
|
|
628
|
+
* ```
|
|
629
|
+
*/
|
|
630
|
+
export enum ElidVectorPrecision {
|
|
631
|
+
/**
|
|
632
|
+
* Full 32-bit float (lossless, 4 bytes per dimension)
|
|
633
|
+
*/
|
|
634
|
+
Full32 = 0,
|
|
635
|
+
/**
|
|
636
|
+
* 16-bit half-precision float (2 bytes per dimension)
|
|
637
|
+
*/
|
|
638
|
+
Half16 = 1,
|
|
639
|
+
/**
|
|
640
|
+
* 8-bit quantized (1 byte per dimension, ~1% error)
|
|
641
|
+
*/
|
|
642
|
+
Quant8 = 2,
|
|
643
|
+
}
|
|
242
644
|
/**
|
|
243
645
|
* Options for configuring string similarity algorithms
|
|
244
646
|
*/
|